Skip to content

Commit cd39656

Browse files
authored
Add deployment_synced metric (#5816)
* core, graph: add deployment_synced metric * update news
1 parent 4598125 commit cd39656

File tree

3 files changed

+79
-1
lines changed

3 files changed

+79
-1
lines changed

NEWS.md

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
# NEWS
22

3-
## v0.36.1
3+
## v0.38.0
4+
5+
### What's new
6+
7+
- A new `deployment_synced` metric is added [(#5816)](https://github.com/graphprotocol/graph-node/pull/5816)
8+
that indicates whether a deployment has reached the chain head since it was deployed.
9+
10+
**Possible values for the metric:**
11+
- `0` - means that the deployment is not synced;
12+
- `1` - means that the deployment is synced;
13+
14+
_If a deployment is not running, the metric reports no value for that deployment._
15+
16+
## v0.37.0
417

518
### What's new
619

core/src/subgraph/runner.rs

+11
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ where
205205
}
206206

207207
async fn run_inner(mut self, break_on_restart: bool) -> Result<Self, SubgraphRunnerError> {
208+
self.update_deployment_synced_metric();
209+
208210
// If a subgraph failed for deterministic reasons, before start indexing, we first
209211
// revert the deployment head. It should lead to the same result since the error was
210212
// deterministic.
@@ -293,6 +295,8 @@ where
293295
res
294296
})?;
295297

298+
self.update_deployment_synced_metric();
299+
296300
// It is possible that the subgraph was unassigned, but the runner was in
297301
// a retry delay state and did not observe the cancel signal.
298302
if block_stream_cancel_handle.is_canceled() {
@@ -1231,6 +1235,13 @@ where
12311235

12321236
Ok((mods, processed_data_sources, persisted_data_sources))
12331237
}
1238+
1239+
fn update_deployment_synced_metric(&self) {
1240+
self.metrics
1241+
.subgraph
1242+
.deployment_synced
1243+
.record(self.inputs.store.is_deployment_synced());
1244+
}
12341245
}
12351246

12361247
#[derive(Debug)]

graph/src/components/metrics/subgraph.rs

+54
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ pub struct SubgraphInstanceMetrics {
1818
pub firehose_connection_errors: Counter,
1919
pub stopwatch: StopwatchMetrics,
2020
pub deployment_status: DeploymentStatusMetric,
21+
pub deployment_synced: DeploymentSyncedMetric,
2122

2223
trigger_processing_duration: Box<Histogram>,
2324
blocks_processed_secs: Box<Counter>,
@@ -91,13 +92,16 @@ impl SubgraphInstanceMetrics {
9192
)
9293
.expect("failed to create blocks_processed_count counter");
9394

95+
let deployment_synced = DeploymentSyncedMetric::register(&registry, subgraph_hash);
96+
9497
Self {
9598
block_trigger_count,
9699
block_processing_duration,
97100
block_ops_transaction_duration,
98101
firehose_connection_errors,
99102
stopwatch,
100103
deployment_status,
104+
deployment_synced,
101105
trigger_processing_duration,
102106
blocks_processed_secs,
103107
blocks_processed_count,
@@ -120,6 +124,7 @@ impl SubgraphInstanceMetrics {
120124
registry.unregister(self.block_trigger_count.clone());
121125
registry.unregister(self.trigger_processing_duration.clone());
122126
registry.unregister(self.block_ops_transaction_duration.clone());
127+
registry.unregister(Box::new(self.deployment_synced.inner.clone()));
123128
}
124129
}
125130

@@ -213,3 +218,52 @@ impl DeploymentStatusMetric {
213218
self.inner.set(Self::STATUS_FAILED);
214219
}
215220
}
221+
222+
/// Indicates whether a deployment has reached the chain head since it was deployed.
223+
pub struct DeploymentSyncedMetric {
224+
inner: IntGauge,
225+
226+
// If, for some reason, a deployment reports that it is synced, and then reports that it is not
227+
// synced during an execution, this prevents the metric from reverting to the not synced state.
228+
previously_synced: std::sync::OnceLock<()>,
229+
}
230+
231+
impl DeploymentSyncedMetric {
232+
const NOT_SYNCED: i64 = 0;
233+
const SYNCED: i64 = 1;
234+
235+
/// Registers the metric.
236+
pub fn register(registry: &MetricsRegistry, deployment_hash: &str) -> Self {
237+
let metric = registry
238+
.new_int_gauge(
239+
"deployment_synced",
240+
"Indicates whether a deployment has reached the chain head since it was deployed.\n\
241+
Possible values:\n\
242+
0 - deployment is not synced;\n\
243+
1 - deployment is synced;",
244+
[("deployment", deployment_hash)],
245+
)
246+
.expect("failed to register `deployment_synced` gauge");
247+
248+
Self {
249+
inner: metric,
250+
previously_synced: std::sync::OnceLock::new(),
251+
}
252+
}
253+
254+
/// Records the current sync status of the deployment.
255+
/// Will ignore all values after the first `true` is received.
256+
pub fn record(&self, synced: bool) {
257+
if self.previously_synced.get().is_some() {
258+
return;
259+
}
260+
261+
if synced {
262+
self.inner.set(Self::SYNCED);
263+
let _ = self.previously_synced.set(());
264+
return;
265+
}
266+
267+
self.inner.set(Self::NOT_SYNCED);
268+
}
269+
}

0 commit comments

Comments
 (0)