From ee03923eb87216875b99de34d23ef65cb7320b49 Mon Sep 17 00:00:00 2001 From: David Date: Wed, 17 Apr 2024 16:20:00 +0800 Subject: [PATCH] feat(metrics): improve metrics --- .github/workflows/docker.yml | 2 +- driver/chain_syncer/calldata/syncer.go | 4 +- driver/state/state.go | 6 +- internal/metrics/metrics.go | 90 +++++++++++--------- proposer/proposer.go | 6 +- prover/event_handler/block_proposed.go | 4 +- prover/event_handler/block_verified.go | 2 +- prover/event_handler/transition_proved.go | 2 +- prover/proof_producer/sgx_producer.go | 2 +- prover/proof_submitter/proof_submitter.go | 10 +-- prover/proof_submitter/transaction/sender.go | 4 +- 11 files changed, 71 insertions(+), 61 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 1b1491c56..0670a1a86 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -2,7 +2,7 @@ name: "Push docker image to GCR" on: push: - branches: [main] + branches: [main,txmgr-metrics] tags: - "v*" diff --git a/driver/chain_syncer/calldata/syncer.go b/driver/chain_syncer/calldata/syncer.go index 4c2e94973..85862c596 100644 --- a/driver/chain_syncer/calldata/syncer.go +++ b/driver/chain_syncer/calldata/syncer.go @@ -148,7 +148,7 @@ func (s *Syncer) processL1Blocks(ctx context.Context) error { // If there is a L1 reorg, we don't update the L1Current cursor. if !s.reorgDetectedFlag { s.state.SetL1Current(l1End) - metrics.DriverL1CurrentHeightGauge.Update(s.state.GetL1Current().Number.Int64()) + metrics.DriverL1CurrentHeightGauge.Set(float64(s.state.GetL1Current().Number.Uint64())) } return nil @@ -300,7 +300,7 @@ func (s *Syncer) onBlockProposed( "withdrawals", len(payloadData.Withdrawals), ) - metrics.DriverL1CurrentHeightGauge.Update(int64(event.Raw.BlockNumber)) + metrics.DriverL1CurrentHeightGauge.Set(float64(event.Raw.BlockNumber)) s.lastInsertedBlockID = event.BlockId if s.progressTracker.Triggered() { diff --git a/driver/state/state.go b/driver/state/state.go index 80ce54045..9afd37e8c 100644 --- a/driver/state/state.go +++ b/driver/state/state.go @@ -161,7 +161,7 @@ func (s *State) setL1Head(l1Head *types.Header) { } log.Debug("New L1 head", "height", l1Head.Number, "hash", l1Head.Hash(), "timestamp", l1Head.Time) - metrics.DriverL1HeadHeightGauge.Update(l1Head.Number.Int64()) + metrics.DriverL1HeadHeightGauge.Set(float64(l1Head.Number.Int64())) s.l1Head.Store(l1Head) } @@ -179,7 +179,7 @@ func (s *State) setL2Head(l2Head *types.Header) { } log.Debug("New L2 head", "height", l2Head.Number, "hash", l2Head.Hash(), "timestamp", l2Head.Time) - metrics.DriverL2HeadHeightGauge.Update(l2Head.Number.Int64()) + metrics.DriverL2HeadHeightGauge.Set(float64(l2Head.Number.Uint64())) s.l2Head.Store(l2Head) } @@ -192,7 +192,7 @@ func (s *State) GetL2Head() *types.Header { // setHeadBlockID sets the last pending block ID concurrent safely. func (s *State) setHeadBlockID(id *big.Int) { log.Debug("New head block ID", "ID", id) - metrics.DriverL2HeadIDGauge.Update(id.Int64()) + metrics.DriverL2HeadIDGauge.Set(float64(id.Uint64())) s.l2HeadBlockID.Store(id) } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 6f1592769..cfe1fec57 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -2,16 +2,12 @@ package metrics import ( "context" - "net" - "net/http" - "strconv" - "time" opMetrics "github.com/ethereum-optimism/optimism/op-service/metrics" + "github.com/ethereum-optimism/optimism/op-service/opio" txmgrMetrics "github.com/ethereum-optimism/optimism/op-service/txmgr/metrics" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/metrics" - "github.com/ethereum/go-ethereum/metrics/prometheus" + "github.com/prometheus/client_golang/prometheus" "github.com/urfave/cli/v2" "github.com/taikoxyz/taiko-client/cmd/flags" @@ -19,34 +15,45 @@ import ( // Metrics var ( + registry = opMetrics.NewRegistry() + factory = opMetrics.With(registry) + // Driver - DriverL1HeadHeightGauge = metrics.NewRegisteredGauge("driver/l1Head/height", nil) - DriverL2HeadHeightGauge = metrics.NewRegisteredGauge("driver/l2Head/height", nil) - DriverL1CurrentHeightGauge = metrics.NewRegisteredGauge("driver/l1Current/height", nil) - DriverL2HeadIDGauge = metrics.NewRegisteredGauge("driver/l2Head/id", nil) - DriverL2VerifiedHeightGauge = metrics.NewRegisteredGauge("driver/l2Verified/id", nil) + DriverL1HeadHeightGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "driver/l1Head/height"}) + DriverL2HeadHeightGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "driver/l2Head/height"}) + DriverL1CurrentHeightGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "driver/l1Current/height"}) + DriverL2HeadIDGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "driver/l2Head/id"}) + DriverL2VerifiedHeightGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "driver/l2Verified/id"}) // Proposer - ProposerProposeEpochCounter = metrics.NewRegisteredCounter("proposer/epoch", nil) - ProposerProposedTxListsCounter = metrics.NewRegisteredCounter("proposer/proposed/txLists", nil) - ProposerProposedTxsCounter = metrics.NewRegisteredCounter("proposer/proposed/txs", nil) + ProposerProposeEpochCounter = factory.NewCounter(prometheus.CounterOpts{Name: "proposer/epoch"}) + ProposerProposedTxListsCounter = factory.NewCounter(prometheus.CounterOpts{Name: "proposer/proposed/txLists"}) + ProposerProposedTxsCounter = factory.NewCounter(prometheus.CounterOpts{Name: "proposer/proposed/txs"}) // Prover - ProverLatestVerifiedIDGauge = metrics.NewRegisteredGauge("prover/latestVerified/id", nil) - ProverLatestProvenBlockIDGauge = metrics.NewRegisteredGauge("prover/latestProven/id", nil) - ProverQueuedProofCounter = metrics.NewRegisteredCounter("prover/proof/all/queued", nil) - ProverReceivedProofCounter = metrics.NewRegisteredCounter("prover/proof/all/received", nil) - ProverSentProofCounter = metrics.NewRegisteredCounter("prover/proof/all/sent", nil) - ProverProofsAssigned = metrics.NewRegisteredCounter("prover/proof/assigned", nil) - ProverReceivedProposedBlockGauge = metrics.NewRegisteredGauge("prover/proposed/received", nil) - ProverReceivedProvenBlockGauge = metrics.NewRegisteredGauge("prover/proven/received", nil) - ProverSubmissionAcceptedCounter = metrics.NewRegisteredCounter("prover/proof/submission/accepted", nil) - ProverSubmissionErrorCounter = metrics.NewRegisteredCounter("prover/proof/submission/error", nil) - ProverSgxProofGeneratedCounter = metrics.NewRegisteredCounter("prover/proof/sgx/generated", nil) - ProverSubmissionRevertedCounter = metrics.NewRegisteredCounter("prover/proof/submission/reverted", nil) + ProverLatestVerifiedIDGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "prover/latestVerified/id"}) + ProverLatestProvenBlockIDGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "prover/latestProven/id"}) + ProverQueuedProofCounter = factory.NewCounter(prometheus.CounterOpts{Name: "prover/proof/all/queued"}) + ProverReceivedProofCounter = factory.NewCounter(prometheus.CounterOpts{Name: "prover/proof/all/received"}) + ProverSentProofCounter = factory.NewCounter(prometheus.CounterOpts{Name: "prover/proof/all/sent"}) + ProverProofsAssigned = factory.NewCounter(prometheus.CounterOpts{Name: "prover/proof/assigned"}) + ProverReceivedProposedBlockGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "prover/proposed/received"}) + ProverReceivedProvenBlockGauge = factory.NewGauge(prometheus.GaugeOpts{Name: "prover/proven/received"}) + ProverSubmissionAcceptedCounter = factory.NewCounter(prometheus.CounterOpts{ + Name: "prover/proof/submission/accepted", + }) + ProverSubmissionErrorCounter = factory.NewCounter(prometheus.CounterOpts{ + Name: "prover/proof/submission/error", + }) + ProverSgxProofGeneratedCounter = factory.NewCounter(prometheus.CounterOpts{ + Name: "prover/proof/sgx/generated", + }) + ProverSubmissionRevertedCounter = factory.NewCounter(prometheus.CounterOpts{ + Name: "prover/proof/submission/reverted", + }) - // TxManager Metrics - TxMgrMetrics = txmgrMetrics.MakeTxMetrics("client", opMetrics.With(opMetrics.NewRegistry())) + // TxManager + TxMgrMetrics = txmgrMetrics.MakeTxMetrics("client", factory) ) // Serve starts the metrics server on the given address, will be closed when the given @@ -56,25 +63,28 @@ func Serve(ctx context.Context, c *cli.Context) error { return nil } - address := net.JoinHostPort( - c.String(flags.MetricsAddr.Name), - strconv.Itoa(c.Int(flags.MetricsPort.Name)), + log.Info( + "Starting metrics server", + "host", c.String(flags.MetricsAddr.Name), + "port", c.Int(flags.MetricsPort.Name), ) - server := http.Server{ - ReadHeaderTimeout: time.Minute, - Addr: address, - Handler: prometheus.Handler(metrics.DefaultRegistry), + server, err := opMetrics.StartServer( + registry, + c.String(flags.MetricsAddr.Name), + c.Int(flags.MetricsPort.Name), + ) + if err != nil { + return err } - go func() { - <-ctx.Done() - if err := server.Close(); err != nil { + defer func() { + if err := server.Stop(ctx); err != nil { log.Error("Failed to close metrics server", "error", err) } }() - log.Info("Starting metrics server", "address", address) + opio.BlockOnInterruptsContext(ctx) - return server.ListenAndServe() + return nil } diff --git a/proposer/proposer.go b/proposer/proposer.go index 08a2248d2..46c7ac5e0 100644 --- a/proposer/proposer.go +++ b/proposer/proposer.go @@ -181,7 +181,7 @@ func (p *Proposer) eventLoop() { return // proposing interval timer has been reached case <-p.proposingTimer.C: - metrics.ProposerProposeEpochCounter.Inc(1) + metrics.ProposerProposeEpochCounter.Add(1) // Attempt a proposing operation if err := p.ProposeOp(p.ctx); err != nil { @@ -323,8 +323,8 @@ func (p *Proposer) ProposeOp(ctx context.Context) error { continue } - metrics.ProposerProposedTxListsCounter.Inc(1) - metrics.ProposerProposedTxsCounter.Inc(int64(len(txLists[i]))) + metrics.ProposerProposedTxListsCounter.Add(1) + metrics.ProposerProposedTxsCounter.Add(float64(len(txLists[i]))) log.Info("📝 Propose transactions succeeded", "txs", len(txLists[i])) p.lastProposedAt = time.Now() diff --git a/prover/event_handler/block_proposed.go b/prover/event_handler/block_proposed.go index b7ea8fde7..ec382a582 100644 --- a/prover/event_handler/block_proposed.go +++ b/prover/event_handler/block_proposed.go @@ -124,7 +124,7 @@ func (h *BlockProposedEventHandler) Handle( "minTier", e.Meta.MinTier, "blobUsed", e.Meta.BlobUsed, ) - metrics.ProverReceivedProposedBlockGauge.Update(e.BlockId.Int64()) + metrics.ProverReceivedProposedBlockGauge.Set(float64(e.BlockId.Uint64())) // Move l1Current cursor. newL1Current, err := h.rpc.L1.HeaderByHash(ctx, e.Raw.BlockHash) @@ -323,7 +323,7 @@ func (h *BlockProposedEventHandler) checkExpirationAndSubmitProof( "tier", tier, ) - metrics.ProverProofsAssigned.Inc(1) + metrics.ProverProofsAssigned.Add(1) h.proofSubmissionCh <- &proofProducer.ProofRequestBody{Tier: tier, Event: e} diff --git a/prover/event_handler/block_verified.go b/prover/event_handler/block_verified.go index 70f9c8e32..4a445fc99 100644 --- a/prover/event_handler/block_verified.go +++ b/prover/event_handler/block_verified.go @@ -12,7 +12,7 @@ type BlockVerifiedEventHandler struct{} // Handle handles the BlockVerified event. func (h *BlockVerifiedEventHandler) Handle(e *bindings.TaikoL1ClientBlockVerified) { - metrics.ProverLatestVerifiedIDGauge.Update(e.BlockId.Int64()) + metrics.ProverLatestVerifiedIDGauge.Set(float64(e.BlockId.Uint64())) log.Info( "New verified block", diff --git a/prover/event_handler/transition_proved.go b/prover/event_handler/transition_proved.go index 29972dc19..3f597f3f1 100644 --- a/prover/event_handler/transition_proved.go +++ b/prover/event_handler/transition_proved.go @@ -33,7 +33,7 @@ func (h *TransitionProvedEventHandler) Handle( ctx context.Context, e *bindings.TaikoL1ClientTransitionProved, ) error { - metrics.ProverReceivedProvenBlockGauge.Update(e.BlockId.Int64()) + metrics.ProverReceivedProvenBlockGauge.Set(float64(e.BlockId.Uint64())) // If this prover is in contest mode, we check the validity of this proof and if it's invalid, // contest it with a higher tier proof. diff --git a/prover/proof_producer/sgx_producer.go b/prover/proof_producer/sgx_producer.go index f9efabe07..f306a8dfa 100644 --- a/prover/proof_producer/sgx_producer.go +++ b/prover/proof_producer/sgx_producer.go @@ -99,7 +99,7 @@ func (s *SGXProofProducer) RequestProof( return nil, err } - metrics.ProverSgxProofGeneratedCounter.Inc(1) + metrics.ProverSgxProofGeneratedCounter.Add(1) return &ProofWithHeader{ BlockID: blockID, diff --git a/prover/proof_submitter/proof_submitter.go b/prover/proof_submitter/proof_submitter.go index 9085d881a..c48c3b637 100644 --- a/prover/proof_submitter/proof_submitter.go +++ b/prover/proof_submitter/proof_submitter.go @@ -114,7 +114,7 @@ func (s *ProofSubmitter) RequestProof(ctx context.Context, event *bindings.Taiko } s.resultCh <- result - metrics.ProverQueuedProofCounter.Inc(1) + metrics.ProverQueuedProofCounter.Add(1) return nil } @@ -135,7 +135,7 @@ func (s *ProofSubmitter) SubmitProof( "tier", proofWithHeader.Tier, ) - metrics.ProverReceivedProofCounter.Inc(1) + metrics.ProverReceivedProofCounter.Add(1) // Get the corresponding L2 block. block, err := s.rpc.L2.BlockByHash(ctx, proofWithHeader.Header.Hash()) @@ -176,12 +176,12 @@ func (s *ProofSubmitter) SubmitProof( if err.Error() == transaction.ErrUnretryableSubmission.Error() { return nil } - metrics.ProverSubmissionErrorCounter.Inc(1) + metrics.ProverSubmissionErrorCounter.Add(1) return err } - metrics.ProverSentProofCounter.Inc(1) - metrics.ProverLatestProvenBlockIDGauge.Update(proofWithHeader.BlockID.Int64()) + metrics.ProverSentProofCounter.Add(1) + metrics.ProverLatestProvenBlockIDGauge.Set(float64(proofWithHeader.BlockID.Uint64())) return nil } diff --git a/prover/proof_submitter/transaction/sender.go b/prover/proof_submitter/transaction/sender.go index 83b7f0493..d42ad45b5 100644 --- a/prover/proof_submitter/transaction/sender.go +++ b/prover/proof_submitter/transaction/sender.go @@ -66,7 +66,7 @@ func (s *Sender) Send( "tier", proofWithHeader.Tier, "txHash", receipt.TxHash, ) - metrics.ProverSubmissionRevertedCounter.Inc(1) + metrics.ProverSubmissionRevertedCounter.Add(1) return ErrUnretryableSubmission } @@ -81,7 +81,7 @@ func (s *Sender) Send( "isContest", len(proofWithHeader.Proof) == 0, ) - metrics.ProverSubmissionAcceptedCounter.Inc(1) + metrics.ProverSubmissionAcceptedCounter.Add(1) return nil }