From 55246746f48216935db8a124c134d484b8d72977 Mon Sep 17 00:00:00 2001 From: Baka <48246993+quertc@users.noreply.github.com> Date: Sat, 7 Dec 2024 01:20:49 +0300 Subject: [PATCH] Feat/empty blocks (#89) * feat: add tenderduty_empty_proposed_blocks metric * feat: add empty_blocks_percentage and consecutive_empty alerting * chore: update config * fix: config * fix: EmptyPercentageAlerts formating * feat: rm lib64 symlink --- example-config.yml | 15 +++++++++ td2/alert.go | 71 ++++++++++++++++++++++++++++++++++++++++++- td2/prometheus.go | 19 ++++++++++-- td2/static/grid.js | 16 ++++++++++ td2/static/index.html | 2 +- td2/types.go | 30 +++++++++++++----- td2/ws.go | 21 ++++++++++++- 7 files changed, 161 insertions(+), 13 deletions(-) diff --git a/example-config.yml b/example-config.yml index b0b9a1c..1aba99f 100644 --- a/example-config.yml +++ b/example-config.yml @@ -95,6 +95,21 @@ chains: # Percentage Missed alert Pagerduty Severity percentage_priority: warning + # Empty blocks notification configuration + consecutive_empty_enabled: yes + # How many consecutive empty blocks should trigger a notification? + consecutive_empty: 3 + # Consecutive Empty alert Pagerduty Severity + consecutive_empty_priority: critical + + # For some Cosmos EVM chains, empty consensus blocks may decrease execution uptime + # since they aren't included in EVM state. Should an alert be sent if empty blocks are detected? + empty_percentage_enabled: yes + # What percentage should trigger the alert + empty_percentage: 2 + # Percentage Empty alert Pagerduty Severity + empty_percentage_priority: warning + # Should an alert be sent if the validator is not in the active set ie, jailed, # tombstoned, unbonding? alert_if_inactive: yes diff --git a/td2/alert.go b/td2/alert.go index 014d057..b6079ba 100644 --- a/td2/alert.go +++ b/td2/alert.go @@ -396,7 +396,7 @@ func (c *Config) alert(chainName, message, severity string, resolved bool, id *s // and also updates a few prometheus stats // FIXME: not watching for nodes that are lagging the head block! func (cc *ChainConfig) watch() { - var missedAlarm, pctAlarm, noNodes bool + var missedAlarm, pctAlarm, noNodes, emptyBlocksAlarm, emptyPctAlarm bool inactive := "jailed" nodeAlarms := make(map[string]bool) @@ -572,6 +572,75 @@ func (cc *ChainConfig) watch() { cc.activeAlerts = alarms.getCount(cc.name) } + // empty blocks alarm handling + if !emptyBlocksAlarm && cc.Alerts.ConsecutiveEmptyAlerts && int(cc.statConsecutiveEmpty) >= cc.Alerts.ConsecutiveEmpty { + // alert on empty blocks counter! + emptyBlocksAlarm = true + id := cc.valInfo.Valcons + "empty" + td.alert( + cc.name, + fmt.Sprintf("%s has proposed %d consecutive empty blocks on %s", cc.valInfo.Moniker, cc.Alerts.ConsecutiveEmpty, cc.ChainId), + cc.Alerts.ConsecutiveEmptyPriority, + false, + &id, + ) + cc.activeAlerts = alarms.getCount(cc.name) + } else if emptyBlocksAlarm && int(cc.statConsecutiveEmpty) < cc.Alerts.ConsecutiveEmpty { + // clear the alert + emptyBlocksAlarm = false + id := cc.valInfo.Valcons + "empty" + td.alert( + cc.name, + fmt.Sprintf("%s has proposed %d consecutive empty blocks on %s", cc.valInfo.Moniker, cc.Alerts.ConsecutiveEmpty, cc.ChainId), + "info", + true, + &id, + ) + cc.activeAlerts = alarms.getCount(cc.name) + } + + // window percentage empty block alarms + var emptyBlocksPercent float64 + if cc.statTotalProps > 0 { + emptyBlocksPercent = 100 * float64(cc.statTotalPropsEmpty) / float64(cc.statTotalProps) + } + + if cc.Alerts.EmptyPercentageAlerts && !emptyPctAlarm && emptyBlocksPercent > float64(cc.Alerts.EmptyWindow) { + // alert on empty block percentage! + emptyPctAlarm = true + id := cc.valInfo.Valcons + "empty_percent" + td.alert( + cc.name, + fmt.Sprintf("%s has > %d%% empty blocks (%d of %d proposed blocks) on %s", + cc.valInfo.Moniker, + cc.Alerts.EmptyWindow, + int(cc.statTotalPropsEmpty), + int(cc.statTotalProps), + cc.ChainId), + cc.Alerts.EmptyPercentagePriority, + false, + &id, + ) + cc.activeAlerts = alarms.getCount(cc.name) + } else if cc.Alerts.EmptyPercentageAlerts && emptyPctAlarm && emptyBlocksPercent < float64(cc.Alerts.EmptyWindow) { + // clear the alert + emptyPctAlarm = false + id := cc.valInfo.Valcons + "empty_percent" + td.alert( + cc.name, + fmt.Sprintf("%s has > %d%% empty blocks (%d of %d proposed blocks) on %s", + cc.valInfo.Moniker, + cc.Alerts.EmptyWindow, + int(cc.statTotalPropsEmpty), + int(cc.statTotalProps), + cc.ChainId), + "info", + true, + &id, + ) + cc.activeAlerts = alarms.getCount(cc.name) + } + // node down alarms for _, node := range cc.Nodes { // window percentage missed block alarms diff --git a/td2/prometheus.go b/td2/prometheus.go index 2e53083..659a0bd 100644 --- a/td2/prometheus.go +++ b/td2/prometheus.go @@ -3,13 +3,14 @@ package tenderduty import ( "context" "fmt" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/client_golang/prometheus/promhttp" "log" "net/http" "sync" "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/promhttp" ) var ( @@ -25,6 +26,8 @@ const ( metricPrevote metricPrecommit metricConsecutive + metricEmptyBlocks + metricConsecutiveEmpty metricWindowMissed metricWindowSize metricLastBlockSeconds @@ -91,6 +94,14 @@ func prometheusExporter(ctx context.Context, updates chan *promUpdate) { Name: "tenderduty_consecutive_missed_blocks", Help: "the current count of consecutively missed blocks regardless of precommit or prevote status", }, chainLabels) + emptyBlocks := promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "tenderduty_empty_proposed_blocks", + Help: "count of empty blocks proposed (blocks with zero transactions) since tenderduty was started", + }, chainLabels) + consecutiveEmpty := promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "tenderduty_consecutive_empty_blocks", + Help: "the current count of consecutively proposed empty blocks", + }, chainLabels) windowSize := promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "tenderduty_missed_block_window", Help: "the missed block aka slashing window", @@ -135,6 +146,8 @@ func prometheusExporter(ctx context.Context, updates chan *promUpdate) { metricPrevote: missedPrevote, metricPrecommit: missedPrecommit, metricConsecutive: missedConsecutive, + metricEmptyBlocks: emptyBlocks, + metricConsecutiveEmpty: consecutiveEmpty, metricWindowMissed: missedWindow, metricWindowSize: windowSize, metricLastBlockSeconds: lastBlockSec, diff --git a/td2/static/grid.js b/td2/static/grid.js index 7bd05d3..d76374c 100644 --- a/td2/static/grid.js +++ b/td2/static/grid.js @@ -73,6 +73,17 @@ function legend() { offset += 65 * scale grad = ctx.createLinearGradient(offset, 0, offset+gridW, gridH) + grad.addColorStop(0, 'rgb(255,215,0)'); + grad.addColorStop(0.3, 'rgb(255,235,100)'); + grad.addColorStop(0.8, 'rgb(255,223,66)'); + ctx.fillStyle = grad + ctx.fillRect(offset, 0, gridW, gridH) + ctx.fillStyle = 'grey' + offset += gridW + gridW/2 + ctx.fillText("proposer/empty",offset, gridH/1.2) + + offset += 110 * scale + grad = ctx.createLinearGradient(offset, 0, offset+gridW, gridH) grad.addColorStop(0, 'rgba(0,0,0,0.2)'); ctx.fillStyle = grad ctx.fillRect(offset, 0, gridW, gridH) @@ -148,6 +159,11 @@ function drawSeries(multiStates) { crossThrough = false const grad = ctx.createLinearGradient((i*gridW)+gridTextW, (gridH*j), (i * gridW) + gridW +gridTextW, (gridH*j)) switch (multiStates.Status[j].blocks[i]) { + case 5: // empty proposed + grad.addColorStop(0, 'rgb(255,215,0)'); + grad.addColorStop(0.3, 'rgb(255,235,100)'); + grad.addColorStop(0.8, 'rgb(255,223,66)'); + break case 4: // proposed grad.addColorStop(0, 'rgb(123,255,66)'); grad.addColorStop(0.3, 'rgb(240,255,128)'); diff --git a/td2/static/index.html b/td2/static/index.html index fdffb8c..d66e70d 100644 --- a/td2/static/index.html +++ b/td2/static/index.html @@ -28,7 +28,7 @@