From 5e080e156ad820e2e9b7a89a2e70b1010d273bce Mon Sep 17 00:00:00 2001 From: Fynn Date: Fri, 3 Jun 2022 14:20:51 -0300 Subject: [PATCH 01/47] Initial Prometheus support --- Cargo.lock | 11 ++ prometheus/Cargo.toml | 21 ++ prometheus/src/bank_metrics.rs | 49 +++++ prometheus/src/cluster_metrics.rs | 38 ++++ prometheus/src/lib.rs | 24 +++ prometheus/src/token.rs | 196 +++++++++++++++++++ prometheus/src/utils.rs | 306 ++++++++++++++++++++++++++++++ rpc/Cargo.toml | 2 + rpc/src/rpc_health.rs | 2 +- rpc/src/rpc_service.rs | 25 ++- 10 files changed, 666 insertions(+), 8 deletions(-) create mode 100644 prometheus/Cargo.toml create mode 100644 prometheus/src/bank_metrics.rs create mode 100644 prometheus/src/cluster_metrics.rs create mode 100644 prometheus/src/lib.rs create mode 100644 prometheus/src/token.rs create mode 100644 prometheus/src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index f5806f14654309..ec5ab532673d1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5851,6 +5851,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "solana-prometheus" +version = "1.10.28" +dependencies = [ + "jsonrpc-http-server", + "solana-gossip", + "solana-runtime", + "solana-sdk 1.10.28", +] + [[package]] name = "solana-rayon-threadlimit" version = "1.10.32" @@ -5960,6 +5970,7 @@ dependencies = [ "solana-net-utils", "solana-perf", "solana-poh", + "solana-prometheus", "solana-rayon-threadlimit", "solana-runtime", "solana-sdk 1.10.32", diff --git a/prometheus/Cargo.toml b/prometheus/Cargo.toml new file mode 100644 index 00000000000000..06b64e031a6f79 --- /dev/null +++ b/prometheus/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "solana-prometheus" +version = "1.10.28" +description = "Solana Prometheus" +authors = ["ChorusOne "] +repository = "https://github.com/ChorusOne/solana" +license = "Apache-2.0" +edition = "2021" + +[dependencies] +jsonrpc-http-server = "18.0.0" +solana-gossip = { path = "../gossip" } +solana-runtime = { path = "../runtime" } +solana-sdk = { path = "../sdk" } + +[lib] +crate-type = ["lib"] +name = "solana_prometheus" + +[package.metadata.docs.rs] +targets = ["x86_64-unknown-linux-gnu"] diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs new file mode 100644 index 00000000000000..7c0d7de0e6f8c6 --- /dev/null +++ b/prometheus/src/bank_metrics.rs @@ -0,0 +1,49 @@ +use solana_runtime::bank::Bank; + +use crate::utils::{write_metric, Metric, MetricFamily}; +use std::{io, sync::Arc, time::SystemTime}; + +pub fn write_bank_metrics( + at: SystemTime, + bank: &Arc, + out: &mut W, +) -> io::Result<()> { + write_metric( + out, + &MetricFamily { + name: "solana_bank_slot", + help: "Current Slot", + type_: "gauge", + metrics: vec![Metric::new(bank.slot()).at(at)], + }, + )?; + write_metric( + out, + &MetricFamily { + name: "solana_bank_epoch", + help: "Current Epoch", + type_: "gauge", + metrics: vec![Metric::new(bank.epoch()).at(at)], + }, + )?; + write_metric( + out, + &MetricFamily { + name: "solana_bank_successful_transaction_count", + help: "Number of transactions in the block that executed successfully", + type_: "gauge", + metrics: vec![Metric::new(bank.transaction_count()).at(at)], + }, + )?; + write_metric( + out, + &MetricFamily { + name: "solana_bank_error_transaction_count", + help: "Number of transactions in the block that executed with error", + type_: "gauge", + metrics: vec![Metric::new(bank.transaction_error_count()).at(at)], + }, + )?; + + Ok(()) +} diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs new file mode 100644 index 00000000000000..ca9357378bca29 --- /dev/null +++ b/prometheus/src/cluster_metrics.rs @@ -0,0 +1,38 @@ +use solana_gossip::cluster_info::ClusterInfo; +use solana_runtime::bank::Bank; + +use crate::{utils::{write_metric, Metric, MetricFamily}, token::Lamports}; +use std::{io, sync::Arc, time::SystemTime}; + +pub fn write_cluster_metrics( + at: SystemTime, + bank: &Arc, + cluster_info: &Arc, + out: &mut W, +) -> io::Result<()> { + let identity_pubkey = cluster_info.id(); + write_metric( + out, + &MetricFamily { + name: "solana_cluster_identity_info", + help: "The current node's identity", + type_: "count", + metrics: vec![Metric::new(1) + .with_label("identity", identity_pubkey.to_string()) + .at(at)], + }, + )?; + + let identity_balance = Lamports(bank.get_balance(&identity_pubkey)); + write_metric( + out, + &MetricFamily { + name: "solana_cluster_identity_balance_total", + help: "The current node's identity balance", + type_: "count", + metrics: vec![Metric::new_sol(identity_balance).at(at)], + }, + )?; + + Ok(()) +} diff --git a/prometheus/src/lib.rs b/prometheus/src/lib.rs new file mode 100644 index 00000000000000..34373069ba93d8 --- /dev/null +++ b/prometheus/src/lib.rs @@ -0,0 +1,24 @@ +mod bank_metrics; +mod cluster_metrics; +mod token; +mod utils; + +use solana_gossip::cluster_info::ClusterInfo; +use solana_runtime::bank_forks::BankForks; +use std::{ + sync::{Arc, RwLock}, + time::SystemTime, +}; + +pub fn render_prometheus( + bank_forks: &Arc>, + cluster_info: &Arc, +) -> Vec { + let current_bank = bank_forks.read().unwrap().working_bank(); + let now = SystemTime::now(); + let mut out: Vec = Vec::new(); + bank_metrics::write_bank_metrics(now, ¤t_bank, &mut out).expect("IO error"); + cluster_metrics::write_cluster_metrics(now, ¤t_bank, &cluster_info, &mut out) + .expect("IO error"); + out +} diff --git a/prometheus/src/token.rs b/prometheus/src/token.rs new file mode 100644 index 00000000000000..d4b59a300842bf --- /dev/null +++ b/prometheus/src/token.rs @@ -0,0 +1,196 @@ +use std::{ + convert::TryFrom, + fmt, + iter::Sum, + ops::{Add, Div, Mul, Sub}, +}; + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct Rational { + pub numerator: u64, + pub denominator: u64, +} + +impl PartialOrd for Rational { + fn partial_cmp(&self, other: &Self) -> Option { + if self.denominator == 0 || other.denominator == 0 { + None + } else { + let x = self.numerator as u128 * other.denominator as u128; + let y = other.numerator as u128 * self.denominator as u128; + Some(x.cmp(&y)) + } + } +} + +impl Div for Rational { + type Output = f64; + + // We do not return a `Rational` here because `self.numerator * + // rhs.denominator` or `rhs.numerator * self.denominator`could overflow. + // Instead we deal with floating point numbers. + fn div(self, rhs: Self) -> Self::Output { + (self.numerator as f64 * rhs.denominator as f64) + / (self.denominator as f64 * rhs.numerator as f64) + } +} + +impl Rational { + pub fn to_f64(&self) -> f64 { + self.numerator as f64 / self.denominator as f64 + } +} + +/// Error returned when a calculation in a token type overflows, underflows, or divides by zero. +#[derive(Debug, Eq, PartialEq)] +pub struct ArithmeticError; + +pub type Result = std::result::Result; + +/// Generate a token type that wraps the minimal unit of the token, it’s +/// “Lamport”. The symbol is for 109 of its minimal units and is +/// only used for `Debug` and `Display` printing. +#[macro_export] +macro_rules! impl_token { + ($TokenLamports:ident, $symbol:expr, decimals = $decimals:expr) => { + #[derive(Copy, Clone, Default, Eq, Ord, PartialEq, PartialOrd)] + pub struct $TokenLamports(pub u64); + + impl fmt::Display for $TokenLamports { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}.{} {}", + self.0 / 10u64.pow($decimals), + &format!("{:0>9}", self.0 % 10u64.pow($decimals))[9 - $decimals..], + $symbol + ) + } + } + + impl fmt::Debug for $TokenLamports { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self, f) + } + } + + impl Mul for $TokenLamports { + type Output = Result<$TokenLamports>; + fn mul(self, other: Rational) -> Result<$TokenLamports> { + // This multiplication cannot overflow, because we expand the + // u64s into u128, and u64::MAX * u64::MAX < u128::MAX. + let result_u128 = ((self.0 as u128) * (other.numerator as u128)) + .checked_div(other.denominator as u128) + .ok_or(ArithmeticError)?; + u64::try_from(result_u128) + .map($TokenLamports) + .map_err(|_| ArithmeticError) + } + } + + impl Mul for $TokenLamports { + type Output = Result<$TokenLamports>; + fn mul(self, other: u64) -> Result<$TokenLamports> { + self.0 + .checked_mul(other) + .map($TokenLamports) + .ok_or(ArithmeticError) + } + } + + impl Div for $TokenLamports { + type Output = Result<$TokenLamports>; + fn div(self, other: u64) -> Result<$TokenLamports> { + self.0 + .checked_div(other) + .map($TokenLamports) + .ok_or(ArithmeticError) + } + } + + impl Sub<$TokenLamports> for $TokenLamports { + type Output = Result<$TokenLamports>; + fn sub(self, other: $TokenLamports) -> Result<$TokenLamports> { + self.0 + .checked_sub(other.0) + .map($TokenLamports) + .ok_or(ArithmeticError) + } + } + + impl Add<$TokenLamports> for $TokenLamports { + type Output = Result<$TokenLamports>; + fn add(self, other: $TokenLamports) -> Result<$TokenLamports> { + self.0 + .checked_add(other.0) + .map($TokenLamports) + .ok_or(ArithmeticError) + } + } + + impl Sum<$TokenLamports> for Result<$TokenLamports> { + fn sum>(iter: I) -> Self { + let mut sum = $TokenLamports(0); + for item in iter { + sum = (sum + item)?; + } + Ok(sum) + } + } + /// Parse a numeric string as an amount of Lamports, i.e., with 9 digit precision. + /// + /// Note that this parses the Lamports amount divided by 109, + /// which can include a decimal point. It does not parse the number of + /// Lamports! This makes this function the semi-inverse of `Display` + /// (only `Display` adds the suffixes, and we do not expect that + /// here). + impl std::str::FromStr for $TokenLamports { + type Err = &'static str; + fn from_str(s: &str) -> std::result::Result { + let mut value = 0_u64; + let mut is_after_decimal = false; + let mut exponent: i32 = $decimals; + let mut had_digit = false; + + // Walk the bytes one by one, we only expect ASCII digits or '.', so bytes + // suffice. We build up the value as we go, and if we get past the decimal + // point, we also track how far we are past it. + for ch in s.as_bytes() { + match ch { + b'0'..=b'9' => { + value = value * 10 + ((ch - b'0') as u64); + if is_after_decimal { + exponent -= 1; + } + had_digit = true; + } + b'.' if !is_after_decimal => is_after_decimal = true, + b'.' => return Err("Value can contain at most one '.' (decimal point)."), + b'_' => { /* As a courtesy, allow numeric underscores for readability. */ } + _ => return Err("Invalid value, only digits, '_', and '.' are allowed."), + } + + if exponent < 0 { + return Err("Value can contain at most 9 digits after the decimal point."); + } + } + + if !had_digit { + return Err("Value must contain at least one digit."); + } + + // If the value contained fewer than 9 digits behind the decimal point + // (or no decimal point at all), scale up the value so it is measured + // in lamports. + while exponent > 0 { + value *= 10; + exponent -= 1; + } + + Ok($TokenLamports(value)) + } + } + }; +} + +impl_token!(Lamports, "SOL", decimals = 9); diff --git a/prometheus/src/utils.rs b/prometheus/src/utils.rs new file mode 100644 index 00000000000000..a48a540ef7c5ce --- /dev/null +++ b/prometheus/src/utils.rs @@ -0,0 +1,306 @@ +// SPDX-FileCopyrightText: 2022 Chorus One AG +// SPDX-License-Identifier: GPL-3.0 + +//! Utilities for formatting Prometheus metrics. +//! +//! See also . + +use std::io; +use std::io::Write; +use std::time::SystemTime; + +use crate::token::Lamports; + +pub struct MetricFamily<'a> { + /// Name of the metric, e.g. [`goats_teleported_total`](https://crbug.com/31482). + pub name: &'a str, + /// HELP line content. + pub help: &'a str, + /// TYPE line content. Most common are `counter`, `gauge`, and `histogram`. + pub type_: &'a str, + /// Values for this metric, possibly with labels or a suffix. + pub metrics: Vec>, +} + +pub enum MetricValue { + /// Render the inner value as-is, as an integer. + Int(u64), + + /// Divide the inner value by 109 and render as fixed-point number. + /// + /// E.g. `Nano(12)` renders as `0.000000012`. + Nano(u64), + + Float(f64), +} + +impl From for MetricValue { + fn from(v: u64) -> MetricValue { + MetricValue::Int(v) + } +} + +impl From for MetricValue { + fn from(v: f64) -> MetricValue { + MetricValue::Float(v) + } +} + +pub struct Metric<'a> { + /// Suffix to append to the metric name, useful for e.g. the `_bucket` suffix on histograms. + pub suffix: &'a str, + + /// Name-value label pairs. + pub labels: Vec<(&'a str, String)>, + + /// Metric value, either an integer, or a fixed-point number. + pub value: MetricValue, + + /// Time at which this metric was observed, when proxying metrics. + pub timestamp: Option, +} + +impl<'a> Metric<'a> { + /// Construct a basic metric with just a value. + /// + /// Can be extended with the builder-style methods below. + pub fn new>(value: T) -> Metric<'a> { + Metric { + labels: Vec::new(), + suffix: "", + value: value.into(), + timestamp: None, + } + } + + /// Construct a metric that measures an amount of SOL. + pub fn new_sol(amount: Lamports) -> Metric<'a> { + // One Lamport is 1e-9 SOL, so we use nano here. + Metric::new(MetricValue::Nano(amount.0)) + } + + /// Set the timestamp. + pub fn at(mut self, at: SystemTime) -> Metric<'a> { + self.timestamp = Some(at); + self + } + + pub fn with_label(mut self, label_key: &'a str, label_value: String) -> Metric<'a> { + self.labels.push((label_key, label_value)); + self + } +} + +pub fn write_metric(out: &mut W, family: &MetricFamily) -> io::Result<()> { + writeln!(out, "# HELP {} {}", family.name, family.help)?; + writeln!(out, "# TYPE {} {}", family.name, family.type_)?; + for metric in &family.metrics { + write!(out, "{}{}", family.name, metric.suffix)?; + + // If there are labels, write the key-value pairs between {}. + // Escaping of the value uses Rust's string syntax, which is + // not exactly what Prometheus wants, but it is identical for + // all of the values that we use it with; this is not a general + // Prometheus formatter, just a quick one for our use. + if !metric.labels.is_empty() { + write!(out, "{{")?; + let mut separator = ""; + for (key, value) in &metric.labels { + write!(out, "{}{}={:?}", separator, key, value)?; + separator = ","; + } + write!(out, "}}")?; + } + + match metric.value { + MetricValue::Int(v) => write!(out, " {}", v)?, + MetricValue::Nano(v) => { + write!(out, " {}.{:0>9}", v / 1_000_000_000, v % 1_000_000_000)? + } + MetricValue::Float(v) => write!(out, " {}", v)?, + } + + if let Some(timestamp) = metric.timestamp { + let unix_time_ms = match timestamp.duration_since(SystemTime::UNIX_EPOCH) { + Ok(duration) => duration.as_millis(), + Err(..) => panic!("Found a metric dated before UNIX_EPOCH."), + }; + // Timestamps in Prometheus are milliseconds since epoch, + // excluding leap seconds. (Which is what you get if your system + // clock tracks UTC.) + write!(out, " {}", unix_time_ms)?; + } + + writeln!(out)?; + } + + // Add a blank line for readability by humans. + writeln!(out) +} + +#[cfg(test)] +mod test { + use std::str; + + use super::{write_metric, Metric, MetricFamily, MetricValue}; + + #[test] + fn write_metric_without_labels() { + let mut out: Vec = Vec::new(); + write_metric( + &mut out, + &MetricFamily { + // The metric names are just for testing purposes. + // See also https://crbug.com/31482. + name: "goats_teleported_total", + help: "Number of goats teleported since launch.", + type_: "counter", + metrics: vec![Metric::new(144)], + }, + ) + .unwrap(); + + assert_eq!( + str::from_utf8(&out[..]), + Ok( + "# HELP goats_teleported_total Number of goats teleported since launch.\n\ + # TYPE goats_teleported_total counter\n\ + goats_teleported_total 144\n\n\ + " + ) + ) + } + + #[test] + fn write_metric_histogram() { + let mut out: Vec = Vec::new(); + write_metric( + &mut out, + &MetricFamily { + name: "teleported_goat_weight_kg", + help: "Histogram of the weight of teleported goats.", + type_: "histogram", + metrics: vec![ + Metric::new(44) + .with_suffix("_bucket") + .with_label("le", "50.0".to_string()), + Metric::new(67) + .with_suffix("_bucket") + .with_label("le", "75.0".to_string()), + Metric::new(144) + .with_suffix("_bucket") + .with_label("le", "+Inf".to_string()), + Metric::new(11520).with_suffix("_sum"), + Metric::new(144).with_suffix("_count"), + ], + }, + ) + .unwrap(); + + assert_eq!( + str::from_utf8(&out[..]), + Ok( + "# HELP teleported_goat_weight_kg Histogram of the weight of teleported goats.\n\ + # TYPE teleported_goat_weight_kg histogram\n\ + teleported_goat_weight_kg_bucket{le=\"50.0\"} 44\n\ + teleported_goat_weight_kg_bucket{le=\"75.0\"} 67\n\ + teleported_goat_weight_kg_bucket{le=\"+Inf\"} 144\n\ + teleported_goat_weight_kg_sum 11520\n\ + teleported_goat_weight_kg_count 144\n\n\ + " + ) + ) + } + + #[test] + fn write_metric_multiple_labels() { + let mut out: Vec = Vec::new(); + write_metric( + &mut out, + &MetricFamily { + name: "goats_teleported_total", + help: "Number of goats teleported since launch by departure and arrival.", + type_: "counter", + metrics: vec![ + Metric::new(10) + .with_label("src", "AMS".to_string()) + .with_label("dst", "ZRH".to_string()), + Metric::new(53) + .with_label("src", "ZRH".to_string()) + .with_label("dst", "DXB".to_string()), + ], + }, + ) + .unwrap(); + + assert_eq!( + str::from_utf8(&out[..]), + Ok( + "# HELP goats_teleported_total Number of goats teleported since launch by departure and arrival.\n\ + # TYPE goats_teleported_total counter\n\ + goats_teleported_total{src=\"AMS\",dst=\"ZRH\"} 10\n\ + goats_teleported_total{src=\"ZRH\",dst=\"DXB\"} 53\n\n\ + " + ) + ) + } + + #[test] + fn write_metric_with_timestamp() { + use std::time::{Duration, SystemTime}; + + let mut out: Vec = Vec::new(); + let t = SystemTime::UNIX_EPOCH + Duration::from_secs(77); + write_metric( + &mut out, + &MetricFamily { + name: "goats_teleported_total", + help: "Number of goats teleported since launch.", + type_: "counter", + metrics: vec![Metric::new(10).at(t)], + }, + ) + .unwrap(); + + assert_eq!( + str::from_utf8(&out[..]), + Ok( + "# HELP goats_teleported_total Number of goats teleported since launch.\n\ + # TYPE goats_teleported_total counter\n\ + goats_teleported_total 10 77000\n\n\ + " + ) + ) + } + + #[test] + fn write_metric_nano_micro() { + let mut out: Vec = Vec::new(); + write_metric( + &mut out, + &MetricFamily { + name: "goat_weight_kg", + help: "Weight of the goat in kilograms.", + type_: "gauge", + metrics: vec![ + // One greater than 1, with no need for zero padding. + Metric::new(MetricValue::Nano(67_533_128_017)), + // One smaller than 1, with the need for zero padding. + Metric::new(MetricValue::Nano(128_017)), + ], + }, + ) + .unwrap(); + + assert_eq!( + str::from_utf8(&out[..]), + Ok("# HELP goat_weight_kg Weight of the goat in kilograms.\n\ + # TYPE goat_weight_kg gauge\n\ + goat_weight_kg 67.533128017\n\ + goat_weight_kg 67.533128\n\ + goat_weight_kg 0.000128017\n\ + goat_weight_kg 0.000128\n\n\ + ") + ) + } +} diff --git a/rpc/Cargo.toml b/rpc/Cargo.toml index d5f24e4eddbb10..8af7af78758776 100644 --- a/rpc/Cargo.toml +++ b/rpc/Cargo.toml @@ -55,6 +55,8 @@ thiserror = "1.0" tokio = { version = "~1.14.1", features = ["full"] } tokio-util = { version = "0.6", features = ["codec", "compat"] } +solana-prometheus = { path = "../prometheus" } + [dev-dependencies] serial_test = "0.6.0" solana-address-lookup-table-program = { path = "../programs/address-lookup-table", version = "=1.10.32" } diff --git a/rpc/src/rpc_health.rs b/rpc/src/rpc_health.rs index 8f4b4dfc4c53d8..d5048ce3626840 100644 --- a/rpc/src/rpc_health.rs +++ b/rpc/src/rpc_health.rs @@ -18,7 +18,7 @@ pub enum RpcHealthStatus { } pub struct RpcHealth { - cluster_info: Arc, + pub cluster_info: Arc, known_validators: Option>, health_check_slot_distance: u64, override_health_check: Arc, diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index 7e32bf81ea195d..8f811d34c54fa7 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -28,6 +28,7 @@ use { solana_metrics::inc_new_counter_info, solana_perf::thread::renice_this_thread, solana_poh::poh_recorder::PohRecorder, + solana_prometheus::render_prometheus, solana_runtime::{ bank_forks::BankForks, commitment::BlockCommitmentCache, snapshot_archive_info::SnapshotArchiveInfoGetter, snapshot_config::SnapshotConfig, @@ -282,14 +283,24 @@ impl RequestMiddleware for RpcRequestMiddleware { .into() } else if self.is_file_get_path(request.uri().path()) { self.process_file_get(request.uri().path()) - } else if request.uri().path() == "/health" { - hyper::Response::builder() - .status(hyper::StatusCode::OK) - .body(hyper::Body::from(self.health_check())) - .unwrap() - .into() } else { - request.into() + match request.uri().path() { + "/health" => hyper::Response::builder() + .status(hyper::StatusCode::OK) + .body(hyper::Body::from(self.health_check())) + .unwrap() + .into(), + "/prometheus" => hyper::Response::builder() + .status(hyper::StatusCode::OK) + .header("Content-Type", "text/plain; version=0.0.4; charset=UTF-8") + .body(hyper::Body::from(render_prometheus( + &self.bank_forks, + &self.health.cluster_info, + ))) + .unwrap() + .into(), + _ => request.into(), + } } } } From 15ded10dc38d605eb1c06fc4c0301c2cc5738a8e Mon Sep 17 00:00:00 2001 From: Fynn Date: Fri, 3 Jun 2022 14:41:35 -0300 Subject: [PATCH 02/47] Add Solana node's version --- prometheus/src/cluster_metrics.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index ca9357378bca29..ceb1fa2a6d2012 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -1,7 +1,10 @@ use solana_gossip::cluster_info::ClusterInfo; use solana_runtime::bank::Bank; -use crate::{utils::{write_metric, Metric, MetricFamily}, token::Lamports}; +use crate::{ + token::Lamports, + utils::{write_metric, Metric, MetricFamily}, +}; use std::{io, sync::Arc, time::SystemTime}; pub fn write_cluster_metrics( @@ -11,6 +14,10 @@ pub fn write_cluster_metrics( out: &mut W, ) -> io::Result<()> { let identity_pubkey = cluster_info.id(); + let version = cluster_info + .get_node_version(&identity_pubkey) + .unwrap_or_default(); + write_metric( out, &MetricFamily { @@ -34,5 +41,17 @@ pub fn write_cluster_metrics( }, )?; + write_metric( + out, + &MetricFamily { + name: "solana_cluster_node_version_info", + help: "The current Solana node's version", + type_: "count", + metrics: vec![Metric::new(1) + .with_label("version", version.to_string()) + .at(at)], + }, + )?; + Ok(()) } From 8b20fafe6392376d456ad922be0cdbf2b12e6ed1 Mon Sep 17 00:00:00 2001 From: Fynn Date: Fri, 3 Jun 2022 14:43:25 -0300 Subject: [PATCH 03/47] Rename metric --- prometheus/src/cluster_metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index ceb1fa2a6d2012..bb6afc9806764c 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -21,7 +21,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_cluster_identity_info", + name: "solana_cluster_identity_public_key_info", help: "The current node's identity", type_: "count", metrics: vec![Metric::new(1) From b0f40a18e9f402a95d8c2c609f6155407d661575 Mon Sep 17 00:00:00 2001 From: Fynn Date: Tue, 7 Jun 2022 17:39:44 -0300 Subject: [PATCH 04/47] Bugfix: balance should be `gauge` --- prometheus/src/cluster_metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index bb6afc9806764c..03395f042b0b05 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -36,7 +36,7 @@ pub fn write_cluster_metrics( &MetricFamily { name: "solana_cluster_identity_balance_total", help: "The current node's identity balance", - type_: "count", + type_: "gauge", metrics: vec![Metric::new_sol(identity_balance).at(at)], }, )?; From a7284098af84a7709f4ba80d2f057f4afa7c21fb Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 9 Jun 2022 11:32:04 -0300 Subject: [PATCH 05/47] delete dependabot from our fork --- .github/dependabot.yml | 41 ----------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index c2fc36a3e6a61f..00000000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,41 +0,0 @@ -# To get started with Dependabot version updates, you'll need to specify which -# package ecosystems to update and where the package manifests are located. -# Please see the documentation for all configuration options: -# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates - -version: 2 -updates: -- package-ecosystem: cargo - directory: "/" - schedule: - interval: daily - time: "01:00" - timezone: America/Los_Angeles - #labels: - # - "automerge" - open-pull-requests-limit: 3 - -- package-ecosystem: npm - directory: "/web3.js" - schedule: - interval: daily - time: "01:00" - timezone: America/Los_Angeles - labels: - - "automerge" - commit-message: - prefix: "chore:" - open-pull-requests-limit: 3 - -- package-ecosystem: npm - directory: "/explorer" - schedule: - interval: daily - time: "01:00" - timezone: America/Los_Angeles - labels: - - "automerge" - commit-message: - prefix: "chore:" - include: "scope" - open-pull-requests-limit: 3 From 53324708f6d26a817417c4c6c5d4d17c80301361 Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 9 Jun 2022 11:46:24 -0300 Subject: [PATCH 06/47] Remove `at` parameter --- prometheus/src/bank_metrics.rs | 16 ++++++---------- prometheus/src/cluster_metrics.rs | 13 ++++--------- prometheus/src/lib.rs | 10 +++------- prometheus/src/utils.rs | 6 ------ 4 files changed, 13 insertions(+), 32 deletions(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index 7c0d7de0e6f8c6..9f5bafea47a4d8 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -1,20 +1,16 @@ use solana_runtime::bank::Bank; use crate::utils::{write_metric, Metric, MetricFamily}; -use std::{io, sync::Arc, time::SystemTime}; +use std::{io, sync::Arc}; -pub fn write_bank_metrics( - at: SystemTime, - bank: &Arc, - out: &mut W, -) -> io::Result<()> { +pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Result<()> { write_metric( out, &MetricFamily { name: "solana_bank_slot", help: "Current Slot", type_: "gauge", - metrics: vec![Metric::new(bank.slot()).at(at)], + metrics: vec![Metric::new(bank.slot())], }, )?; write_metric( @@ -23,7 +19,7 @@ pub fn write_bank_metrics( name: "solana_bank_epoch", help: "Current Epoch", type_: "gauge", - metrics: vec![Metric::new(bank.epoch()).at(at)], + metrics: vec![Metric::new(bank.epoch())], }, )?; write_metric( @@ -32,7 +28,7 @@ pub fn write_bank_metrics( name: "solana_bank_successful_transaction_count", help: "Number of transactions in the block that executed successfully", type_: "gauge", - metrics: vec![Metric::new(bank.transaction_count()).at(at)], + metrics: vec![Metric::new(bank.transaction_count())], }, )?; write_metric( @@ -41,7 +37,7 @@ pub fn write_bank_metrics( name: "solana_bank_error_transaction_count", help: "Number of transactions in the block that executed with error", type_: "gauge", - metrics: vec![Metric::new(bank.transaction_error_count()).at(at)], + metrics: vec![Metric::new(bank.transaction_error_count())], }, )?; diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 03395f042b0b05..416c3af8a3f095 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -5,10 +5,9 @@ use crate::{ token::Lamports, utils::{write_metric, Metric, MetricFamily}, }; -use std::{io, sync::Arc, time::SystemTime}; +use std::{io, sync::Arc}; pub fn write_cluster_metrics( - at: SystemTime, bank: &Arc, cluster_info: &Arc, out: &mut W, @@ -24,9 +23,7 @@ pub fn write_cluster_metrics( name: "solana_cluster_identity_public_key_info", help: "The current node's identity", type_: "count", - metrics: vec![Metric::new(1) - .with_label("identity", identity_pubkey.to_string()) - .at(at)], + metrics: vec![Metric::new(1).with_label("identity", identity_pubkey.to_string())], }, )?; @@ -37,7 +34,7 @@ pub fn write_cluster_metrics( name: "solana_cluster_identity_balance_total", help: "The current node's identity balance", type_: "gauge", - metrics: vec![Metric::new_sol(identity_balance).at(at)], + metrics: vec![Metric::new_sol(identity_balance)], }, )?; @@ -47,9 +44,7 @@ pub fn write_cluster_metrics( name: "solana_cluster_node_version_info", help: "The current Solana node's version", type_: "count", - metrics: vec![Metric::new(1) - .with_label("version", version.to_string()) - .at(at)], + metrics: vec![Metric::new(1).with_label("version", version.to_string())], }, )?; diff --git a/prometheus/src/lib.rs b/prometheus/src/lib.rs index 34373069ba93d8..cb6c941e776772 100644 --- a/prometheus/src/lib.rs +++ b/prometheus/src/lib.rs @@ -5,20 +5,16 @@ mod utils; use solana_gossip::cluster_info::ClusterInfo; use solana_runtime::bank_forks::BankForks; -use std::{ - sync::{Arc, RwLock}, - time::SystemTime, -}; +use std::sync::{Arc, RwLock}; pub fn render_prometheus( bank_forks: &Arc>, cluster_info: &Arc, ) -> Vec { let current_bank = bank_forks.read().unwrap().working_bank(); - let now = SystemTime::now(); let mut out: Vec = Vec::new(); - bank_metrics::write_bank_metrics(now, ¤t_bank, &mut out).expect("IO error"); - cluster_metrics::write_cluster_metrics(now, ¤t_bank, &cluster_info, &mut out) + bank_metrics::write_bank_metrics(¤t_bank, &mut out).expect("IO error"); + cluster_metrics::write_cluster_metrics(¤t_bank, &cluster_info, &mut out) .expect("IO error"); out } diff --git a/prometheus/src/utils.rs b/prometheus/src/utils.rs index a48a540ef7c5ce..54e607134cb839 100644 --- a/prometheus/src/utils.rs +++ b/prometheus/src/utils.rs @@ -79,12 +79,6 @@ impl<'a> Metric<'a> { Metric::new(MetricValue::Nano(amount.0)) } - /// Set the timestamp. - pub fn at(mut self, at: SystemTime) -> Metric<'a> { - self.timestamp = Some(at); - self - } - pub fn with_label(mut self, label_key: &'a str, label_value: String) -> Metric<'a> { self.labels.push((label_key, label_value)); self From 906f67c8d6ac2b71330874df5c9cd7f2deac4a0b Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 9 Jun 2022 14:33:35 -0300 Subject: [PATCH 07/47] Change path from `prometheus` to `metrics` --- rpc/src/rpc_service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index 8f811d34c54fa7..b0c86ccdca83ea 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -290,7 +290,7 @@ impl RequestMiddleware for RpcRequestMiddleware { .body(hyper::Body::from(self.health_check())) .unwrap() .into(), - "/prometheus" => hyper::Response::builder() + "/metrics" => hyper::Response::builder() .status(hyper::StatusCode::OK) .header("Content-Type", "text/plain; version=0.0.4; charset=UTF-8") .body(hyper::Body::from(render_prometheus( From 3a958c38fe4356d056f012dbd2fe1e6fc449761f Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 16:25:23 -0300 Subject: [PATCH 08/47] Change `cluster` -> `node` When referring to our node, we use simply `node` as opposed to `cluster`. --- prometheus/src/cluster_metrics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 416c3af8a3f095..6eaf72dd79cb45 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -20,7 +20,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_cluster_identity_public_key_info", + name: "solana_node_identity_public_key_info", help: "The current node's identity", type_: "count", metrics: vec![Metric::new(1).with_label("identity", identity_pubkey.to_string())], @@ -31,7 +31,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_cluster_identity_balance_total", + name: "solana_node_identity_balance_total", help: "The current node's identity balance", type_: "gauge", metrics: vec![Metric::new_sol(identity_balance)], @@ -41,7 +41,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_cluster_node_version_info", + name: "solana_node_version_info", help: "The current Solana node's version", type_: "count", metrics: vec![Metric::new(1).with_label("version", version.to_string())], From ba6833201bbe78d5ac5e5d6590626ec9ef53701a Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 16:28:08 -0300 Subject: [PATCH 09/47] Rename metrics --- prometheus/src/cluster_metrics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 6eaf72dd79cb45..47d750fbab807e 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -21,7 +21,7 @@ pub fn write_cluster_metrics( out, &MetricFamily { name: "solana_node_identity_public_key_info", - help: "The current node's identity", + help: "The node's current identity", type_: "count", metrics: vec![Metric::new(1).with_label("identity", identity_pubkey.to_string())], }, @@ -31,8 +31,8 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_node_identity_balance_total", - help: "The current node's identity balance", + name: "solana_node_identity_balance_sol", + help: "The node's current identity balance", type_: "gauge", metrics: vec![Metric::new_sol(identity_balance)], }, From d7345279fc2956d4d7a954dbca3a90f5a01aa323 Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 16:30:52 -0300 Subject: [PATCH 10/47] Simplify `Lamports` struct --- prometheus/src/cluster_metrics.rs | 2 +- prometheus/src/lib.rs | 3 +- prometheus/src/token.rs | 196 ------------------------------ prometheus/src/utils.rs | 2 +- 4 files changed, 4 insertions(+), 199 deletions(-) delete mode 100644 prometheus/src/token.rs diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 47d750fbab807e..207a1c26b02282 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -2,8 +2,8 @@ use solana_gossip::cluster_info::ClusterInfo; use solana_runtime::bank::Bank; use crate::{ - token::Lamports, utils::{write_metric, Metric, MetricFamily}, + Lamports, }; use std::{io, sync::Arc}; diff --git a/prometheus/src/lib.rs b/prometheus/src/lib.rs index cb6c941e776772..24acd7c2a1c022 100644 --- a/prometheus/src/lib.rs +++ b/prometheus/src/lib.rs @@ -1,12 +1,13 @@ mod bank_metrics; mod cluster_metrics; -mod token; mod utils; use solana_gossip::cluster_info::ClusterInfo; use solana_runtime::bank_forks::BankForks; use std::sync::{Arc, RwLock}; +pub struct Lamports(pub u64); + pub fn render_prometheus( bank_forks: &Arc>, cluster_info: &Arc, diff --git a/prometheus/src/token.rs b/prometheus/src/token.rs deleted file mode 100644 index d4b59a300842bf..00000000000000 --- a/prometheus/src/token.rs +++ /dev/null @@ -1,196 +0,0 @@ -use std::{ - convert::TryFrom, - fmt, - iter::Sum, - ops::{Add, Div, Mul, Sub}, -}; - -#[derive(Copy, Clone, PartialEq, Debug)] -pub struct Rational { - pub numerator: u64, - pub denominator: u64, -} - -impl PartialOrd for Rational { - fn partial_cmp(&self, other: &Self) -> Option { - if self.denominator == 0 || other.denominator == 0 { - None - } else { - let x = self.numerator as u128 * other.denominator as u128; - let y = other.numerator as u128 * self.denominator as u128; - Some(x.cmp(&y)) - } - } -} - -impl Div for Rational { - type Output = f64; - - // We do not return a `Rational` here because `self.numerator * - // rhs.denominator` or `rhs.numerator * self.denominator`could overflow. - // Instead we deal with floating point numbers. - fn div(self, rhs: Self) -> Self::Output { - (self.numerator as f64 * rhs.denominator as f64) - / (self.denominator as f64 * rhs.numerator as f64) - } -} - -impl Rational { - pub fn to_f64(&self) -> f64 { - self.numerator as f64 / self.denominator as f64 - } -} - -/// Error returned when a calculation in a token type overflows, underflows, or divides by zero. -#[derive(Debug, Eq, PartialEq)] -pub struct ArithmeticError; - -pub type Result = std::result::Result; - -/// Generate a token type that wraps the minimal unit of the token, it’s -/// “Lamport”. The symbol is for 109 of its minimal units and is -/// only used for `Debug` and `Display` printing. -#[macro_export] -macro_rules! impl_token { - ($TokenLamports:ident, $symbol:expr, decimals = $decimals:expr) => { - #[derive(Copy, Clone, Default, Eq, Ord, PartialEq, PartialOrd)] - pub struct $TokenLamports(pub u64); - - impl fmt::Display for $TokenLamports { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}.{} {}", - self.0 / 10u64.pow($decimals), - &format!("{:0>9}", self.0 % 10u64.pow($decimals))[9 - $decimals..], - $symbol - ) - } - } - - impl fmt::Debug for $TokenLamports { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(self, f) - } - } - - impl Mul for $TokenLamports { - type Output = Result<$TokenLamports>; - fn mul(self, other: Rational) -> Result<$TokenLamports> { - // This multiplication cannot overflow, because we expand the - // u64s into u128, and u64::MAX * u64::MAX < u128::MAX. - let result_u128 = ((self.0 as u128) * (other.numerator as u128)) - .checked_div(other.denominator as u128) - .ok_or(ArithmeticError)?; - u64::try_from(result_u128) - .map($TokenLamports) - .map_err(|_| ArithmeticError) - } - } - - impl Mul for $TokenLamports { - type Output = Result<$TokenLamports>; - fn mul(self, other: u64) -> Result<$TokenLamports> { - self.0 - .checked_mul(other) - .map($TokenLamports) - .ok_or(ArithmeticError) - } - } - - impl Div for $TokenLamports { - type Output = Result<$TokenLamports>; - fn div(self, other: u64) -> Result<$TokenLamports> { - self.0 - .checked_div(other) - .map($TokenLamports) - .ok_or(ArithmeticError) - } - } - - impl Sub<$TokenLamports> for $TokenLamports { - type Output = Result<$TokenLamports>; - fn sub(self, other: $TokenLamports) -> Result<$TokenLamports> { - self.0 - .checked_sub(other.0) - .map($TokenLamports) - .ok_or(ArithmeticError) - } - } - - impl Add<$TokenLamports> for $TokenLamports { - type Output = Result<$TokenLamports>; - fn add(self, other: $TokenLamports) -> Result<$TokenLamports> { - self.0 - .checked_add(other.0) - .map($TokenLamports) - .ok_or(ArithmeticError) - } - } - - impl Sum<$TokenLamports> for Result<$TokenLamports> { - fn sum>(iter: I) -> Self { - let mut sum = $TokenLamports(0); - for item in iter { - sum = (sum + item)?; - } - Ok(sum) - } - } - /// Parse a numeric string as an amount of Lamports, i.e., with 9 digit precision. - /// - /// Note that this parses the Lamports amount divided by 109, - /// which can include a decimal point. It does not parse the number of - /// Lamports! This makes this function the semi-inverse of `Display` - /// (only `Display` adds the suffixes, and we do not expect that - /// here). - impl std::str::FromStr for $TokenLamports { - type Err = &'static str; - fn from_str(s: &str) -> std::result::Result { - let mut value = 0_u64; - let mut is_after_decimal = false; - let mut exponent: i32 = $decimals; - let mut had_digit = false; - - // Walk the bytes one by one, we only expect ASCII digits or '.', so bytes - // suffice. We build up the value as we go, and if we get past the decimal - // point, we also track how far we are past it. - for ch in s.as_bytes() { - match ch { - b'0'..=b'9' => { - value = value * 10 + ((ch - b'0') as u64); - if is_after_decimal { - exponent -= 1; - } - had_digit = true; - } - b'.' if !is_after_decimal => is_after_decimal = true, - b'.' => return Err("Value can contain at most one '.' (decimal point)."), - b'_' => { /* As a courtesy, allow numeric underscores for readability. */ } - _ => return Err("Invalid value, only digits, '_', and '.' are allowed."), - } - - if exponent < 0 { - return Err("Value can contain at most 9 digits after the decimal point."); - } - } - - if !had_digit { - return Err("Value must contain at least one digit."); - } - - // If the value contained fewer than 9 digits behind the decimal point - // (or no decimal point at all), scale up the value so it is measured - // in lamports. - while exponent > 0 { - value *= 10; - exponent -= 1; - } - - Ok($TokenLamports(value)) - } - } - }; -} - -impl_token!(Lamports, "SOL", decimals = 9); diff --git a/prometheus/src/utils.rs b/prometheus/src/utils.rs index 54e607134cb839..45efd00e9fb5e2 100644 --- a/prometheus/src/utils.rs +++ b/prometheus/src/utils.rs @@ -9,7 +9,7 @@ use std::io; use std::io::Write; use std::time::SystemTime; -use crate::token::Lamports; +use crate::Lamports; pub struct MetricFamily<'a> { /// Name of the metric, e.g. [`goats_teleported_total`](https://crbug.com/31482). From f528d3e66c4812fa23111b120bd0c6c707b48d6b Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 16:32:01 -0300 Subject: [PATCH 11/47] Change license to Apache 2.0 --- prometheus/src/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/src/utils.rs b/prometheus/src/utils.rs index 45efd00e9fb5e2..9ca58aa2810703 100644 --- a/prometheus/src/utils.rs +++ b/prometheus/src/utils.rs @@ -1,5 +1,5 @@ // SPDX-FileCopyrightText: 2022 Chorus One AG -// SPDX-License-Identifier: GPL-3.0 +// SPDX-License-Identifier: Apache-2.0 //! Utilities for formatting Prometheus metrics. //! From 253ac1ca2bf436e2dd5bb4199761fca6c9c51108 Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 17:29:19 -0300 Subject: [PATCH 12/47] Add block's timestamp --- prometheus/src/bank_metrics.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index 9f5bafea47a4d8..d8d52ca659b22a 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -4,6 +4,8 @@ use crate::utils::{write_metric, Metric, MetricFamily}; use std::{io, sync::Arc}; pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Result<()> { + let clock = bank.clock(); + write_metric( out, &MetricFamily { @@ -34,12 +36,21 @@ pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Re write_metric( out, &MetricFamily { - name: "solana_bank_error_transaction_count", + name: "solana_block_error_transaction_count", help: "Number of transactions in the block that executed with error", type_: "gauge", metrics: vec![Metric::new(bank.transaction_error_count())], }, )?; + write_metric( + out, + &MetricFamily { + name: "solana_block_timestamp_seconds", + help: "The block's timestamp", + type_: "gauge", + metrics: vec![Metric::new(clock.unix_timestamp)], + }, + )?; Ok(()) } From ecd70c19b26f578acbf3a99902d47fd4fc653d1d Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 17:29:32 -0300 Subject: [PATCH 13/47] Rename metrics --- prometheus/src/bank_metrics.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index d8d52ca659b22a..a382f70d4f9cb5 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -9,25 +9,25 @@ pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Re write_metric( out, &MetricFamily { - name: "solana_bank_slot", + name: "solana_block_slot", help: "Current Slot", type_: "gauge", - metrics: vec![Metric::new(bank.slot())], + metrics: vec![Metric::new(clock.slot)], }, )?; write_metric( out, &MetricFamily { - name: "solana_bank_epoch", + name: "solana_block_epoch", help: "Current Epoch", type_: "gauge", - metrics: vec![Metric::new(bank.epoch())], + metrics: vec![Metric::new(clock.epoch)], }, )?; write_metric( out, &MetricFamily { - name: "solana_bank_successful_transaction_count", + name: "solana_block_successful_transaction_count", help: "Number of transactions in the block that executed successfully", type_: "gauge", metrics: vec![Metric::new(bank.transaction_count())], From 2d09704c91651ab5ed34f820729b67b06432bf08 Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 18:55:27 -0300 Subject: [PATCH 14/47] Turn i64 to u64 timestamp --- prometheus/src/bank_metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index a382f70d4f9cb5..b86aa81114fa96 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -48,7 +48,7 @@ pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Re name: "solana_block_timestamp_seconds", help: "The block's timestamp", type_: "gauge", - metrics: vec![Metric::new(clock.unix_timestamp)], + metrics: vec![Metric::new(clock.unix_timestamp as u64)], }, )?; From bfc1328172345196ae1204eaa93557c9fe2a0dea Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 15 Jun 2022 08:48:20 -0300 Subject: [PATCH 15/47] Remove information about block's transactions --- prometheus/src/bank_metrics.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index b86aa81114fa96..4c471850b1b12f 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -24,24 +24,6 @@ pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Re metrics: vec![Metric::new(clock.epoch)], }, )?; - write_metric( - out, - &MetricFamily { - name: "solana_block_successful_transaction_count", - help: "Number of transactions in the block that executed successfully", - type_: "gauge", - metrics: vec![Metric::new(bank.transaction_count())], - }, - )?; - write_metric( - out, - &MetricFamily { - name: "solana_block_error_transaction_count", - help: "Number of transactions in the block that executed with error", - type_: "gauge", - metrics: vec![Metric::new(bank.transaction_error_count())], - }, - )?; write_metric( out, &MetricFamily { From ad78d65055c6f90df4e3dd70718eeb044eb6cd0e Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 15 Jun 2022 08:49:33 -0300 Subject: [PATCH 16/47] Add label about commitment level --- prometheus/src/bank_metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index 4c471850b1b12f..09d15aed0f5952 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -12,7 +12,7 @@ pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Re name: "solana_block_slot", help: "Current Slot", type_: "gauge", - metrics: vec![Metric::new(clock.slot)], + metrics: vec![Metric::new(clock.slot).with_label("commitment_level", "finalized")], }, )?; write_metric( From aedcbe4a64dc8f20b2da64db5f92093003378c55 Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 15 Jun 2022 08:50:58 -0300 Subject: [PATCH 17/47] Change help msg for block's timestamp --- prometheus/src/bank_metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index 09d15aed0f5952..b918afb4944257 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -28,7 +28,7 @@ pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Re out, &MetricFamily { name: "solana_block_timestamp_seconds", - help: "The block's timestamp", + help: "The block's UNIX timestamp, in seconds since epoch, UTC", type_: "gauge", metrics: vec![Metric::new(clock.unix_timestamp as u64)], }, From ddf4cdf99a97215de4a5060350d930c2c5232f14 Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 15 Jun 2022 08:52:39 -0300 Subject: [PATCH 18/47] Add and rename label --- prometheus/src/cluster_metrics.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 207a1c26b02282..84a465a25fecdd 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -23,7 +23,9 @@ pub fn write_cluster_metrics( name: "solana_node_identity_public_key_info", help: "The node's current identity", type_: "count", - metrics: vec![Metric::new(1).with_label("identity", identity_pubkey.to_string())], + metrics: vec![ + Metric::new(1).with_label("identity_account", identity_pubkey.to_string()) + ], }, )?; @@ -34,7 +36,8 @@ pub fn write_cluster_metrics( name: "solana_node_identity_balance_sol", help: "The node's current identity balance", type_: "gauge", - metrics: vec![Metric::new_sol(identity_balance)], + metrics: vec![Metric::new_sol(identity_balance) + .with_label("identity_account", identity_pubkey.to_string())], }, )?; From 78fe0c8db21be13af946b924f649c6bed045298a Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 16 Jun 2022 11:39:07 -0300 Subject: [PATCH 19/47] Get finalized bank --- prometheus/src/bank_metrics.rs | 31 +++++++++++------- prometheus/src/banks_with_commitments.rs | 23 +++++++++++++ prometheus/src/cluster_metrics.rs | 12 ++++--- prometheus/src/lib.rs | 18 +++++++---- rpc/src/rpc.rs | 2 +- rpc/src/rpc_service.rs | 41 +++++++++++++++++------- 6 files changed, 92 insertions(+), 35 deletions(-) create mode 100644 prometheus/src/banks_with_commitments.rs diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index b918afb4944257..5fb459e680da5a 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -1,36 +1,43 @@ -use solana_runtime::bank::Bank; +use crate::{ + banks_with_commitments::BanksWithCommitments, + utils::{write_metric, Metric, MetricFamily}, +}; +use std::io; -use crate::utils::{write_metric, Metric, MetricFamily}; -use std::{io, sync::Arc}; - -pub fn write_bank_metrics(bank: &Arc, out: &mut W) -> io::Result<()> { - let clock = bank.clock(); +pub fn write_bank_metrics( + banks_with_commitments: &BanksWithCommitments, + out: &mut W, +) -> io::Result<()> { + let clock_finalized = banks_with_commitments.finalized_bank.clock(); write_metric( out, &MetricFamily { name: "solana_block_slot", - help: "Current Slot", + help: "Finalized Slot", type_: "gauge", - metrics: vec![Metric::new(clock.slot).with_label("commitment_level", "finalized")], + metrics: vec![Metric::new(clock_finalized.slot) + .with_label("commitment_level", "finalized".to_owned())], }, )?; write_metric( out, &MetricFamily { name: "solana_block_epoch", - help: "Current Epoch", + help: "Finalized Epoch", type_: "gauge", - metrics: vec![Metric::new(clock.epoch)], + metrics: vec![Metric::new(clock_finalized.epoch) + .with_label("commitment_level", "finalized".to_owned())], }, )?; write_metric( out, &MetricFamily { name: "solana_block_timestamp_seconds", - help: "The block's UNIX timestamp, in seconds since epoch, UTC", + help: "The block's finalized UNIX timestamp, in seconds since epoch, UTC", type_: "gauge", - metrics: vec![Metric::new(clock.unix_timestamp as u64)], + metrics: vec![Metric::new(clock_finalized.unix_timestamp as u64) + .with_label("commitment_level", "finalized".to_owned())], }, )?; diff --git a/prometheus/src/banks_with_commitments.rs b/prometheus/src/banks_with_commitments.rs new file mode 100644 index 00000000000000..df5f352e1d51be --- /dev/null +++ b/prometheus/src/banks_with_commitments.rs @@ -0,0 +1,23 @@ +use std::sync::Arc; + +use solana_runtime::bank::Bank; + +pub struct BanksWithCommitments { + pub finalized_bank: Arc, + pub confirmed_bank: Arc, + pub processed_bank: Arc, +} + +impl BanksWithCommitments { + pub fn new( + finalized_bank: Arc, + confirmed_bank: Arc, + processed_bank: Arc, + ) -> Self { + BanksWithCommitments { + finalized_bank, + confirmed_bank, + processed_bank, + } + } +} diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 84a465a25fecdd..dcb9673cd132a1 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -1,17 +1,18 @@ use solana_gossip::cluster_info::ClusterInfo; -use solana_runtime::bank::Bank; use crate::{ + banks_with_commitments::BanksWithCommitments, utils::{write_metric, Metric, MetricFamily}, Lamports, }; use std::{io, sync::Arc}; pub fn write_cluster_metrics( - bank: &Arc, + banks_with_commitments: &BanksWithCommitments, cluster_info: &Arc, out: &mut W, ) -> io::Result<()> { + let bank_finalized = &banks_with_commitments.finalized_bank; let identity_pubkey = cluster_info.id(); let version = cluster_info .get_node_version(&identity_pubkey) @@ -29,15 +30,16 @@ pub fn write_cluster_metrics( }, )?; - let identity_balance = Lamports(bank.get_balance(&identity_pubkey)); + let identity_balance = Lamports(bank_finalized.get_balance(&identity_pubkey)); write_metric( out, &MetricFamily { name: "solana_node_identity_balance_sol", - help: "The node's current identity balance", + help: "The node's finalized identity balance", type_: "gauge", metrics: vec![Metric::new_sol(identity_balance) - .with_label("identity_account", identity_pubkey.to_string())], + .with_label("identity_account", identity_pubkey.to_string()) + .with_label("commitment_level", "finalized".to_owned())], }, )?; diff --git a/prometheus/src/lib.rs b/prometheus/src/lib.rs index 24acd7c2a1c022..02466ef79a6e8b 100644 --- a/prometheus/src/lib.rs +++ b/prometheus/src/lib.rs @@ -1,21 +1,27 @@ mod bank_metrics; +pub mod banks_with_commitments; mod cluster_metrics; mod utils; +use banks_with_commitments::BanksWithCommitments; use solana_gossip::cluster_info::ClusterInfo; -use solana_runtime::bank_forks::BankForks; -use std::sync::{Arc, RwLock}; +use std::sync::Arc; pub struct Lamports(pub u64); pub fn render_prometheus( - bank_forks: &Arc>, + banks_with_commitments: BanksWithCommitments, cluster_info: &Arc, ) -> Vec { - let current_bank = bank_forks.read().unwrap().working_bank(); + // There are 3 levels of commitment for a bank: + // - finalized: most recent block *confirmed* by supermajority of the + // cluster. + // - confirmed: most recent block that has been *voted* on by supermajority + // of the cluster. + // - processed: most recent block. let mut out: Vec = Vec::new(); - bank_metrics::write_bank_metrics(¤t_bank, &mut out).expect("IO error"); - cluster_metrics::write_cluster_metrics(¤t_bank, &cluster_info, &mut out) + bank_metrics::write_bank_metrics(&banks_with_commitments, &mut out).expect("IO error"); + cluster_metrics::write_cluster_metrics(&banks_with_commitments, &cluster_info, &mut out) .expect("IO error"); out } diff --git a/rpc/src/rpc.rs b/rpc/src/rpc.rs index a3edba1792de7a..df627172f6302d 100644 --- a/rpc/src/rpc.rs +++ b/rpc/src/rpc.rs @@ -232,7 +232,7 @@ impl JsonRpcRequestProcessor { } #[allow(deprecated)] - fn bank(&self, commitment: Option) -> Arc { + pub fn bank(&self, commitment: Option) -> Arc { debug!("RPC commitment_config: {:?}", commitment); let commitment = commitment.unwrap_or_default(); diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index b0c86ccdca83ea..bbfdb10e73fd3b 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -1,5 +1,7 @@ //! The `rpc_service` module implements the Solana JSON RPC service. +use solana_sdk::commitment_config::CommitmentConfig; + use { crate::{ cluster_tpu_info::ClusterTpuInfo, @@ -28,7 +30,7 @@ use { solana_metrics::inc_new_counter_info, solana_perf::thread::renice_this_thread, solana_poh::poh_recorder::PohRecorder, - solana_prometheus::render_prometheus, + solana_prometheus::{banks_with_commitments::BanksWithCommitments, render_prometheus}, solana_runtime::{ bank_forks::BankForks, commitment::BlockCommitmentCache, snapshot_archive_info::SnapshotArchiveInfoGetter, snapshot_config::SnapshotConfig, @@ -73,6 +75,7 @@ struct RpcRequestMiddleware { snapshot_config: Option, bank_forks: Arc>, health: Arc, + rpc_processor: Option, } impl RpcRequestMiddleware { @@ -81,6 +84,7 @@ impl RpcRequestMiddleware { snapshot_config: Option, bank_forks: Arc>, health: Arc, + rpc_processor: Option, ) -> Self { Self { ledger_path, @@ -95,6 +99,7 @@ impl RpcRequestMiddleware { snapshot_config, bank_forks, health, + rpc_processor, } } @@ -290,15 +295,23 @@ impl RequestMiddleware for RpcRequestMiddleware { .body(hyper::Body::from(self.health_check())) .unwrap() .into(), - "/metrics" => hyper::Response::builder() - .status(hyper::StatusCode::OK) - .header("Content-Type", "text/plain; version=0.0.4; charset=UTF-8") - .body(hyper::Body::from(render_prometheus( - &self.bank_forks, - &self.health.cluster_info, - ))) - .unwrap() - .into(), + "/metrics" => { + let rpc_processor = self.rpc_processor.as_ref().unwrap(); + let banks_with_commitment = BanksWithCommitments::new( + rpc_processor.bank(Some(CommitmentConfig::finalized())), + rpc_processor.bank(Some(CommitmentConfig::confirmed())), + rpc_processor.bank(Some(CommitmentConfig::processed())), + ); + hyper::Response::builder() + .status(hyper::StatusCode::OK) + .header("Content-Type", "text/plain; version=0.0.4; charset=UTF-8") + .body(hyper::Body::from(render_prometheus( + banks_with_commitment, + &self.health.cluster_info, + ))) + .unwrap() + .into() + } _ => request.into(), } } @@ -497,6 +510,7 @@ impl JsonRpcService { snapshot_config, bank_forks.clone(), health.clone(), + Some(request_processor.clone()), ); let server = ServerBuilder::with_meta_extractor( io, @@ -678,12 +692,14 @@ mod tests { None, bank_forks.clone(), RpcHealth::stub(), + None, ); let rrm_with_snapshot_config = RpcRequestMiddleware::new( PathBuf::from("/"), Some(SnapshotConfig::default()), bank_forks, RpcHealth::stub(), + None, ); assert!(rrm.is_file_get_path(DEFAULT_GENESIS_DOWNLOAD_PATH)); @@ -754,6 +770,7 @@ mod tests { None, create_bank_forks(), RpcHealth::stub(), + None, ); // File does not exist => request should fail. @@ -809,6 +826,7 @@ mod tests { None, create_bank_forks(), RpcHealth::stub(), + None, ); assert_eq!(rm.health_check(), "ok"); } @@ -835,7 +853,8 @@ mod tests { override_health_check.clone(), )); - let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health); + let rm = + RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health, None); // No account hashes for this node or any known validators assert_eq!(rm.health_check(), "unknown"); From 2a861d9296d462a84be8e9b13c214577acfccd6d Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 16 Jun 2022 13:03:07 -0300 Subject: [PATCH 20/47] Add metrics for each commitment level --- prometheus/src/bank_metrics.rs | 20 +++++++++----------- prometheus/src/banks_with_commitments.rs | 10 ++++++++++ prometheus/src/cluster_metrics.rs | 9 ++++----- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index 5fb459e680da5a..01ea58e984c3c5 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -8,36 +8,34 @@ pub fn write_bank_metrics( banks_with_commitments: &BanksWithCommitments, out: &mut W, ) -> io::Result<()> { - let clock_finalized = banks_with_commitments.finalized_bank.clock(); - write_metric( out, &MetricFamily { name: "solana_block_slot", - help: "Finalized Slot", + help: "Block Slot", type_: "gauge", - metrics: vec![Metric::new(clock_finalized.slot) - .with_label("commitment_level", "finalized".to_owned())], + metrics: banks_with_commitments + .for_each_commitment(|bank| Metric::new(bank.clock().slot)), }, )?; write_metric( out, &MetricFamily { name: "solana_block_epoch", - help: "Finalized Epoch", + help: "Block Epoch", type_: "gauge", - metrics: vec![Metric::new(clock_finalized.epoch) - .with_label("commitment_level", "finalized".to_owned())], + metrics: banks_with_commitments + .for_each_commitment(|bank| Metric::new(bank.clock().epoch)), }, )?; write_metric( out, &MetricFamily { name: "solana_block_timestamp_seconds", - help: "The block's finalized UNIX timestamp, in seconds since epoch, UTC", + help: "The block's UNIX timestamp, in seconds since epoch, UTC", type_: "gauge", - metrics: vec![Metric::new(clock_finalized.unix_timestamp as u64) - .with_label("commitment_level", "finalized".to_owned())], + metrics: banks_with_commitments + .for_each_commitment(|bank| Metric::new(bank.clock().unix_timestamp as u64)), }, )?; diff --git a/prometheus/src/banks_with_commitments.rs b/prometheus/src/banks_with_commitments.rs index df5f352e1d51be..e121b7a1b48f7c 100644 --- a/prometheus/src/banks_with_commitments.rs +++ b/prometheus/src/banks_with_commitments.rs @@ -2,6 +2,8 @@ use std::sync::Arc; use solana_runtime::bank::Bank; +use crate::utils::Metric; + pub struct BanksWithCommitments { pub finalized_bank: Arc, pub confirmed_bank: Arc, @@ -20,4 +22,12 @@ impl BanksWithCommitments { processed_bank, } } + + pub fn for_each_commitment Metric>(&self, get: F) -> Vec { + vec![ + get(&self.finalized_bank).with_label("commitment_level", "finalized".to_owned()), + get(&self.confirmed_bank).with_label("commitment_level", "confirmed".to_owned()), + get(&self.processed_bank).with_label("commitment_level", "processed".to_owned()), + ] + } } diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index dcb9673cd132a1..697da136db01f8 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -12,7 +12,6 @@ pub fn write_cluster_metrics( cluster_info: &Arc, out: &mut W, ) -> io::Result<()> { - let bank_finalized = &banks_with_commitments.finalized_bank; let identity_pubkey = cluster_info.id(); let version = cluster_info .get_node_version(&identity_pubkey) @@ -30,16 +29,16 @@ pub fn write_cluster_metrics( }, )?; - let identity_balance = Lamports(bank_finalized.get_balance(&identity_pubkey)); write_metric( out, &MetricFamily { name: "solana_node_identity_balance_sol", help: "The node's finalized identity balance", type_: "gauge", - metrics: vec![Metric::new_sol(identity_balance) - .with_label("identity_account", identity_pubkey.to_string()) - .with_label("commitment_level", "finalized".to_owned())], + metrics: banks_with_commitments.for_each_commitment(|bank| { + Metric::new_sol(Lamports(bank.get_balance(&identity_pubkey))) + .with_label("identity_account", identity_pubkey.to_string()) + }), }, )?; From 390a8c5d24b567892d10cbc899cc9b277771cf96 Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 16 Jun 2022 14:10:00 -0300 Subject: [PATCH 21/47] Use `block_commitment_cache` Instead of propagating `rpc_processor`, use the `block_commitment_cache` structure to get different bank confirmation levels --- prometheus/src/banks_with_commitments.rs | 44 +++++++++++++++++++++--- rpc/src/rpc.rs | 2 +- rpc/src/rpc_service.rs | 38 ++++++++++---------- 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/prometheus/src/banks_with_commitments.rs b/prometheus/src/banks_with_commitments.rs index e121b7a1b48f7c..d4e2edb3d4aa96 100644 --- a/prometheus/src/banks_with_commitments.rs +++ b/prometheus/src/banks_with_commitments.rs @@ -1,6 +1,6 @@ -use std::sync::Arc; +use std::sync::{Arc, RwLock}; -use solana_runtime::bank::Bank; +use solana_runtime::{bank::Bank, bank_forks::BankForks, commitment::BlockCommitmentCache}; use crate::utils::Metric; @@ -12,10 +12,43 @@ pub struct BanksWithCommitments { impl BanksWithCommitments { pub fn new( - finalized_bank: Arc, - confirmed_bank: Arc, - processed_bank: Arc, + bank_forks: &Arc>, + block_commitment_cache: &Arc>, ) -> Self { + let block_commitment_cache = block_commitment_cache.read().unwrap(); + let finalized_slot = block_commitment_cache + .slot_with_commitment(solana_sdk::commitment_config::CommitmentLevel::Finalized); + let confirmed_slot = block_commitment_cache + .slot_with_commitment(solana_sdk::commitment_config::CommitmentLevel::Confirmed); + let processed_slot = block_commitment_cache + .slot_with_commitment(solana_sdk::commitment_config::CommitmentLevel::Processed); + + let r_bank_forks = bank_forks.read().unwrap(); + + let default_closure = || { + // From rpc/src/rpc.rs + // We log a warning instead of returning an error, because all known error cases + // are due to known bugs that should be fixed instead. + // + // The slot may not be found as a result of a known bug in snapshot creation, where + // the bank at the given slot was not included in the snapshot. + // Also, it may occur after an old bank has been purged from BankForks and a new + // BlockCommitmentCache has not yet arrived. To make this case impossible, + // BlockCommitmentCache should hold an `Arc` everywhere it currently holds + // a slot. + // + // For more information, see https://github.com/solana-labs/solana/issues/11078 + r_bank_forks.root_bank() + }; + let finalized_bank = r_bank_forks + .get(finalized_slot) + .unwrap_or_else(default_closure); + let confirmed_bank = r_bank_forks + .get(confirmed_slot) + .unwrap_or_else(default_closure); + let processed_bank = r_bank_forks + .get(processed_slot) + .unwrap_or_else(default_closure); BanksWithCommitments { finalized_bank, confirmed_bank, @@ -23,6 +56,7 @@ impl BanksWithCommitments { } } + /// Call function callback for each commitment level, and returns a vector of metrics. pub fn for_each_commitment Metric>(&self, get: F) -> Vec { vec![ get(&self.finalized_bank).with_label("commitment_level", "finalized".to_owned()), diff --git a/rpc/src/rpc.rs b/rpc/src/rpc.rs index df627172f6302d..a3edba1792de7a 100644 --- a/rpc/src/rpc.rs +++ b/rpc/src/rpc.rs @@ -232,7 +232,7 @@ impl JsonRpcRequestProcessor { } #[allow(deprecated)] - pub fn bank(&self, commitment: Option) -> Arc { + fn bank(&self, commitment: Option) -> Arc { debug!("RPC commitment_config: {:?}", commitment); let commitment = commitment.unwrap_or_default(); diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index bbfdb10e73fd3b..dc17a366c51b58 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -1,7 +1,5 @@ //! The `rpc_service` module implements the Solana JSON RPC service. -use solana_sdk::commitment_config::CommitmentConfig; - use { crate::{ cluster_tpu_info::ClusterTpuInfo, @@ -75,7 +73,7 @@ struct RpcRequestMiddleware { snapshot_config: Option, bank_forks: Arc>, health: Arc, - rpc_processor: Option, + block_commitment_cache: Arc>, } impl RpcRequestMiddleware { @@ -84,7 +82,7 @@ impl RpcRequestMiddleware { snapshot_config: Option, bank_forks: Arc>, health: Arc, - rpc_processor: Option, + block_commitment_cache: Arc>, ) -> Self { Self { ledger_path, @@ -99,7 +97,7 @@ impl RpcRequestMiddleware { snapshot_config, bank_forks, health, - rpc_processor, + block_commitment_cache, } } @@ -296,12 +294,8 @@ impl RequestMiddleware for RpcRequestMiddleware { .unwrap() .into(), "/metrics" => { - let rpc_processor = self.rpc_processor.as_ref().unwrap(); - let banks_with_commitment = BanksWithCommitments::new( - rpc_processor.bank(Some(CommitmentConfig::finalized())), - rpc_processor.bank(Some(CommitmentConfig::confirmed())), - rpc_processor.bank(Some(CommitmentConfig::processed())), - ); + let banks_with_commitment = + BanksWithCommitments::new(&self.bank_forks, &self.block_commitment_cache); hyper::Response::builder() .status(hyper::StatusCode::OK) .header("Content-Type", "text/plain; version=0.0.4; charset=UTF-8") @@ -455,7 +449,7 @@ impl JsonRpcService { config, snapshot_config.clone(), bank_forks.clone(), - block_commitment_cache, + block_commitment_cache.clone(), blockstore, validator_exit.clone(), health.clone(), @@ -510,7 +504,7 @@ impl JsonRpcService { snapshot_config, bank_forks.clone(), health.clone(), - Some(request_processor.clone()), + block_commitment_cache.clone(), ); let server = ServerBuilder::with_meta_extractor( io, @@ -687,19 +681,20 @@ mod tests { #[test] fn test_is_file_get_path() { let bank_forks = create_bank_forks(); + let block_commitment_cache = Arc::new(RwLock::new(BlockCommitmentCache::default())); let rrm = RpcRequestMiddleware::new( PathBuf::from("/"), None, bank_forks.clone(), RpcHealth::stub(), - None, + block_commitment_cache, ); let rrm_with_snapshot_config = RpcRequestMiddleware::new( PathBuf::from("/"), Some(SnapshotConfig::default()), bank_forks, RpcHealth::stub(), - None, + block_commitment_cache, ); assert!(rrm.is_file_get_path(DEFAULT_GENESIS_DOWNLOAD_PATH)); @@ -770,7 +765,7 @@ mod tests { None, create_bank_forks(), RpcHealth::stub(), - None, + Arc::new(RwLock::new(BlockCommitmentCache::default())), ); // File does not exist => request should fail. @@ -826,7 +821,7 @@ mod tests { None, create_bank_forks(), RpcHealth::stub(), - None, + Arc::new(RwLock::new(BlockCommitmentCache::default())), ); assert_eq!(rm.health_check(), "ok"); } @@ -853,8 +848,13 @@ mod tests { override_health_check.clone(), )); - let rm = - RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health, None); + let rm = RpcRequestMiddleware::new( + PathBuf::from("/"), + None, + create_bank_forks(), + health, + Arc::new(RwLock::new(BlockCommitmentCache::default())), + ); // No account hashes for this node or any known validators assert_eq!(rm.health_check(), "unknown"); From 399e9e1f5961b77818b875ec9d24793035200d78 Mon Sep 17 00:00:00 2001 From: Fynn Date: Fri, 17 Jun 2022 09:15:56 -0300 Subject: [PATCH 22/47] Limit `block_commitment_cache` read lock. As ruuda pointed out, there is a deadlock if we acquire a read lock for both `block_commitment_cache` and `bank_forks`, in ruuda words: - We take the read lock on block_commitment_cache. - Some other thread takes a write lock on bank_forks. - We want to take the read lock on bank_forks, but this blocks because the other thread holds a write lock. - The other thread wants to take a write lock on block_commitment_cache, but that blocks because we hold the read lock. - Deadlock! Instead, we guard the `block_commitment_cache` read lock so we don't hold two read locks concurrently. This might lead, as ruuda also pointed out, to an inconsistency that's known to Solana and dealt already in the code the same way it's dealt in `rpc/src/rpc.rs`. --- prometheus/src/banks_with_commitments.rs | 28 +++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/prometheus/src/banks_with_commitments.rs b/prometheus/src/banks_with_commitments.rs index d4e2edb3d4aa96..815ad040a96f2f 100644 --- a/prometheus/src/banks_with_commitments.rs +++ b/prometheus/src/banks_with_commitments.rs @@ -15,24 +15,26 @@ impl BanksWithCommitments { bank_forks: &Arc>, block_commitment_cache: &Arc>, ) -> Self { - let block_commitment_cache = block_commitment_cache.read().unwrap(); - let finalized_slot = block_commitment_cache - .slot_with_commitment(solana_sdk::commitment_config::CommitmentLevel::Finalized); - let confirmed_slot = block_commitment_cache - .slot_with_commitment(solana_sdk::commitment_config::CommitmentLevel::Confirmed); - let processed_slot = block_commitment_cache - .slot_with_commitment(solana_sdk::commitment_config::CommitmentLevel::Processed); - + let (finalized_slot, confirmed_slot, processed_slot) = { + let block_commitment_cache = block_commitment_cache.read().unwrap(); + ( + block_commitment_cache.slot_with_commitment( + solana_sdk::commitment_config::CommitmentLevel::Finalized, + ), + block_commitment_cache.slot_with_commitment( + solana_sdk::commitment_config::CommitmentLevel::Confirmed, + ), + block_commitment_cache.slot_with_commitment( + solana_sdk::commitment_config::CommitmentLevel::Processed, + ), + ) + }; let r_bank_forks = bank_forks.read().unwrap(); let default_closure = || { // From rpc/src/rpc.rs - // We log a warning instead of returning an error, because all known error cases - // are due to known bugs that should be fixed instead. // - // The slot may not be found as a result of a known bug in snapshot creation, where - // the bank at the given slot was not included in the snapshot. - // Also, it may occur after an old bank has been purged from BankForks and a new + // It may occur after an old bank has been purged from BankForks and a new // BlockCommitmentCache has not yet arrived. To make this case impossible, // BlockCommitmentCache should hold an `Arc` everywhere it currently holds // a slot. From 2aebf10b7d0a512e403b6e861870fac2c2c0d2d3 Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 30 Jun 2022 18:04:04 -0300 Subject: [PATCH 23/47] Correct prometheus type Should be "counter", not "count" --- prometheus/src/cluster_metrics.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 697da136db01f8..fb4e306f883a50 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -22,7 +22,7 @@ pub fn write_cluster_metrics( &MetricFamily { name: "solana_node_identity_public_key_info", help: "The node's current identity", - type_: "count", + type_: "counter", metrics: vec![ Metric::new(1).with_label("identity_account", identity_pubkey.to_string()) ], @@ -47,7 +47,7 @@ pub fn write_cluster_metrics( &MetricFamily { name: "solana_node_version_info", help: "The current Solana node's version", - type_: "count", + type_: "counter", metrics: vec![Metric::new(1).with_label("version", version.to_string())], }, )?; From b7e4d9600bc1855981e96f724e53ec78ca8bbdd0 Mon Sep 17 00:00:00 2001 From: Fynn Date: Tue, 7 Jun 2022 17:37:05 -0300 Subject: [PATCH 24/47] Save metrics about the validator's vote account Also update Cargo.lock. --- Cargo.lock | 3 +- prometheus/Cargo.toml | 1 + prometheus/src/cluster_metrics.rs | 78 +++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index ec5ab532673d1d..ef35ce256047ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5858,7 +5858,8 @@ dependencies = [ "jsonrpc-http-server", "solana-gossip", "solana-runtime", - "solana-sdk 1.10.28", + "solana-sdk 1.10.32", + "solana-vote-program", ] [[package]] diff --git a/prometheus/Cargo.toml b/prometheus/Cargo.toml index 06b64e031a6f79..9e7451087690d4 100644 --- a/prometheus/Cargo.toml +++ b/prometheus/Cargo.toml @@ -12,6 +12,7 @@ jsonrpc-http-server = "18.0.0" solana-gossip = { path = "../gossip" } solana-runtime = { path = "../runtime" } solana-sdk = { path = "../sdk" } +solana-vote-program = { path = "../programs/vote" } [lib] crate-type = ["lib"] diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index fb4e306f883a50..d60118206ff427 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -1,4 +1,7 @@ use solana_gossip::cluster_info::ClusterInfo; +use solana_runtime::bank::Bank; +use solana_sdk::{clock::Slot, pubkey::Pubkey}; +use solana_vote_program::vote_state::VoteState; use crate::{ banks_with_commitments::BanksWithCommitments, @@ -7,6 +10,76 @@ use crate::{ }; use std::{io, sync::Arc}; +struct ValidatorVoteInfo { + vote_address: Pubkey, + balance: Lamports, + last_vote: Slot, +} + +impl ValidatorVoteInfo { + fn new_from_bank(bank: &Arc, identity_pubkey: &Pubkey) -> Option { + let vote_accounts = bank.vote_accounts(); + let vote_state_default = VoteState::default(); + vote_accounts + .iter() + .filter_map(|(&vote_pubkey, (_activated_stake, account))| { + let vote_state = account.vote_state(); + let vote_state = vote_state.as_ref().unwrap_or(&vote_state_default); + if identity_pubkey != &vote_state.node_pubkey { + return None; + } + let last_vote = if let Some(vote) = vote_state.votes.iter().last() { + vote.slot + } else { + 0 + }; + let vote_balance = Lamports(bank.get_balance(&vote_pubkey)); + Some(ValidatorVoteInfo { + vote_address: vote_pubkey, + balance: vote_balance, + last_vote, + }) + }) + .next() + } + + fn write_prometheus(&self, out: &mut W, at: SystemTime) -> io::Result<()> { + write_metric( + out, + &MetricFamily { + name: "solana_cluster_vote_public_key_info", + help: "The current Solana node's vote public key", + type_: "count", + metrics: vec![Metric::new(1) + .with_label("vote", self.vote_address.to_string()) + .at(at)], + }, + )?; + // We can use this metric to track if the validator is making progress + // by voting on the last slots. + write_metric( + out, + &MetricFamily { + name: "solana_cluster_last_vote_slot_count", + help: "The last slot that the validator voted on", + type_: "gauge", + metrics: vec![Metric::new(self.last_vote).at(at)], + }, + )?; + // Validator rewards go to vote account, we use this to track our own + // rewards. + write_metric( + out, + &MetricFamily { + name: "solana_cluster_vote_balance_total", + help: "The current node's vote account balance", + type_: "gauge", + metrics: vec![Metric::new_sol(self.balance).at(at)], + }, + ) + } +} + pub fn write_cluster_metrics( banks_with_commitments: &BanksWithCommitments, cluster_info: &Arc, @@ -52,5 +125,10 @@ pub fn write_cluster_metrics( }, )?; + let validator_vote_info = ValidatorVoteInfo::new_from_bank(bank, &identity_pubkey); + if let Some(vote_info) = validator_vote_info { + vote_info.write_prometheus(out, at)?; + } + Ok(()) } From 9d97a4e3b83d2c93accf3893fd6e5c02081c852d Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 19:14:40 -0300 Subject: [PATCH 25/47] Return last vote slot only if there's some value --- prometheus/src/cluster_metrics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index d60118206ff427..5055753159b26b 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -29,10 +29,10 @@ impl ValidatorVoteInfo { return None; } let last_vote = if let Some(vote) = vote_state.votes.iter().last() { - vote.slot + Some(vote.slot) } else { - 0 - }; + None + }?; let vote_balance = Lamports(bank.get_balance(&vote_pubkey)); Some(ValidatorVoteInfo { vote_address: vote_pubkey, From f25093edf0526e8a796ec925c76a116f9b872f3f Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 13 Jun 2022 19:25:52 -0300 Subject: [PATCH 26/47] Rename, specify metrics --- prometheus/src/cluster_metrics.rs | 23 ++++++++++++----------- prometheus/src/utils.rs | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 5055753159b26b..06ff674ce19026 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -43,16 +43,14 @@ impl ValidatorVoteInfo { .next() } - fn write_prometheus(&self, out: &mut W, at: SystemTime) -> io::Result<()> { + fn write_prometheus(&self, out: &mut W) -> io::Result<()> { write_metric( out, &MetricFamily { - name: "solana_cluster_vote_public_key_info", + name: "solana_node_vote_public_key_info", help: "The current Solana node's vote public key", type_: "count", - metrics: vec![Metric::new(1) - .with_label("vote", self.vote_address.to_string()) - .at(at)], + metrics: vec![Metric::new(1).with_label("vote", self.vote_address.to_string())], }, )?; // We can use this metric to track if the validator is making progress @@ -60,10 +58,13 @@ impl ValidatorVoteInfo { write_metric( out, &MetricFamily { - name: "solana_cluster_last_vote_slot_count", - help: "The last slot that the validator voted on", + name: "solana_node_last_vote_slot", + help: + "The voted-on slot of the validator's last vote that got included in the chain", type_: "gauge", - metrics: vec![Metric::new(self.last_vote).at(at)], + metrics: vec![ + Metric::new(self.last_vote).with_label("pubkey", self.vote_address.to_string()) + ], }, )?; // Validator rewards go to vote account, we use this to track our own @@ -71,10 +72,10 @@ impl ValidatorVoteInfo { write_metric( out, &MetricFamily { - name: "solana_cluster_vote_balance_total", + name: "solana_node_vote_balance_sol", help: "The current node's vote account balance", type_: "gauge", - metrics: vec![Metric::new_sol(self.balance).at(at)], + metrics: vec![Metric::new_sol(self.balance)], }, ) } @@ -127,7 +128,7 @@ pub fn write_cluster_metrics( let validator_vote_info = ValidatorVoteInfo::new_from_bank(bank, &identity_pubkey); if let Some(vote_info) = validator_vote_info { - vote_info.write_prometheus(out, at)?; + vote_info.write_prometheus(out)?; } Ok(()) diff --git a/prometheus/src/utils.rs b/prometheus/src/utils.rs index 9ca58aa2810703..597fd0cf1f13a7 100644 --- a/prometheus/src/utils.rs +++ b/prometheus/src/utils.rs @@ -251,7 +251,7 @@ mod test { name: "goats_teleported_total", help: "Number of goats teleported since launch.", type_: "counter", - metrics: vec![Metric::new(10).at(t)], + metrics: vec![Metric::new(10)], }, ) .unwrap(); From e224e5210b688df0181714bdf2c9364d8044842b Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 15 Jun 2022 08:39:50 -0300 Subject: [PATCH 27/47] Get last vote --- prometheus/src/cluster_metrics.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 06ff674ce19026..4d0eb77340686f 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -28,11 +28,7 @@ impl ValidatorVoteInfo { if identity_pubkey != &vote_state.node_pubkey { return None; } - let last_vote = if let Some(vote) = vote_state.votes.iter().last() { - Some(vote.slot) - } else { - None - }?; + let last_vote = vote_state.votes.back()?.slot; let vote_balance = Lamports(bank.get_balance(&vote_pubkey)); Some(ValidatorVoteInfo { vote_address: vote_pubkey, From ad69c55c081ae43a0c20560c3450b1aa1a15f9ad Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 15 Jun 2022 08:40:07 -0300 Subject: [PATCH 28/47] Clone Lamports struct --- prometheus/src/cluster_metrics.rs | 2 +- prometheus/src/lib.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 4d0eb77340686f..f1251506cef6a1 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -71,7 +71,7 @@ impl ValidatorVoteInfo { name: "solana_node_vote_balance_sol", help: "The current node's vote account balance", type_: "gauge", - metrics: vec![Metric::new_sol(self.balance)], + metrics: vec![Metric::new_sol(self.balance.clone())], }, ) } diff --git a/prometheus/src/lib.rs b/prometheus/src/lib.rs index 02466ef79a6e8b..ff67b4639339fa 100644 --- a/prometheus/src/lib.rs +++ b/prometheus/src/lib.rs @@ -7,6 +7,7 @@ use banks_with_commitments::BanksWithCommitments; use solana_gossip::cluster_info::ClusterInfo; use std::sync::Arc; +#[derive(Clone)] pub struct Lamports(pub u64); pub fn render_prometheus( From f9727f3bfe55cdc29e5fb3fb3b7193fd36530cda Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 15 Jun 2022 08:43:33 -0300 Subject: [PATCH 29/47] Add label to vote balance --- prometheus/src/cluster_metrics.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index f1251506cef6a1..f49c729abe09e0 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -46,7 +46,9 @@ impl ValidatorVoteInfo { name: "solana_node_vote_public_key_info", help: "The current Solana node's vote public key", type_: "count", - metrics: vec![Metric::new(1).with_label("vote", self.vote_address.to_string())], + metrics: vec![ + Metric::new(1).with_label("vote_account", self.vote_address.to_string()) + ], }, )?; // We can use this metric to track if the validator is making progress From 11a6f7ff1736ee231e5f4e6ddd0adbc9c74db620 Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 15 Jun 2022 08:43:54 -0300 Subject: [PATCH 30/47] Rename labels --- prometheus/src/cluster_metrics.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index f49c729abe09e0..22c482684c38cf 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -60,9 +60,8 @@ impl ValidatorVoteInfo { help: "The voted-on slot of the validator's last vote that got included in the chain", type_: "gauge", - metrics: vec![ - Metric::new(self.last_vote).with_label("pubkey", self.vote_address.to_string()) - ], + metrics: vec![Metric::new(self.last_vote) + .with_label("vote_account", self.vote_address.to_string())], }, )?; // Validator rewards go to vote account, we use this to track our own @@ -73,7 +72,8 @@ impl ValidatorVoteInfo { name: "solana_node_vote_balance_sol", help: "The current node's vote account balance", type_: "gauge", - metrics: vec![Metric::new_sol(self.balance.clone())], + metrics: vec![Metric::new_sol(self.balance.clone()) + .with_label("vote_account", self.vote_address.to_string())], }, ) } From 3a1fad84703bea136af4b7ae446d97453a730431 Mon Sep 17 00:00:00 2001 From: Fynn Date: Tue, 21 Jun 2022 12:03:03 -0300 Subject: [PATCH 31/47] Add parameter to observe vote accounts. Down the rabbit hole from the arguments, propagating it to the prometheus part, we add a flag so we can track multiple vote accounts. --- core/src/validator.rs | 3 ++ prometheus/src/cluster_metrics.rs | 73 +++++-------------------------- prometheus/src/lib.rs | 13 ++++-- rpc/src/rpc_service.rs | 12 +++++ test-validator/src/lib.rs | 1 + validator/src/main.rs | 27 ++++++++++++ 6 files changed, 64 insertions(+), 65 deletions(-) diff --git a/core/src/validator.rs b/core/src/validator.rs index a3382a6145a343..617364e1b862ea 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -173,6 +173,7 @@ pub struct ValidatorConfig { pub wait_to_vote_slot: Option, pub ledger_column_options: LedgerColumnOptions, pub enable_quic_servers: bool, + pub observable_vote_acounts: Arc>, } impl Default for ValidatorConfig { @@ -237,6 +238,7 @@ impl Default for ValidatorConfig { wait_to_vote_slot: None, ledger_column_options: LedgerColumnOptions::default(), enable_quic_servers: true, + observable_vote_acounts: Arc::new(HashSet::new()), } } } @@ -736,6 +738,7 @@ impl Validator { leader_schedule_cache.clone(), connection_cache.clone(), max_complete_transaction_status_slot, + config.observable_vote_acounts.clone(), )), if !config.rpc_config.full_api { None diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 22c482684c38cf..b6783c6ebc5486 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -8,80 +8,29 @@ use crate::{ utils::{write_metric, Metric, MetricFamily}, Lamports, }; -use std::{io, sync::Arc}; +use std::{collections::HashSet, io, sync::Arc}; struct ValidatorVoteInfo { - vote_address: Pubkey, balance: Lamports, last_vote: Slot, } -impl ValidatorVoteInfo { - fn new_from_bank(bank: &Arc, identity_pubkey: &Pubkey) -> Option { - let vote_accounts = bank.vote_accounts(); - let vote_state_default = VoteState::default(); - vote_accounts - .iter() - .filter_map(|(&vote_pubkey, (_activated_stake, account))| { - let vote_state = account.vote_state(); - let vote_state = vote_state.as_ref().unwrap_or(&vote_state_default); - if identity_pubkey != &vote_state.node_pubkey { - return None; - } - let last_vote = vote_state.votes.back()?.slot; - let vote_balance = Lamports(bank.get_balance(&vote_pubkey)); - Some(ValidatorVoteInfo { - vote_address: vote_pubkey, - balance: vote_balance, - last_vote, - }) - }) - .next() - } +fn get_vote_state(bank: &Bank, vote_pubkey: &Pubkey) -> Option { + let default_vote_state = VoteState::default(); + let vote_accounts = bank.vote_accounts(); + let (_activated_stake, vote_account) = vote_accounts.get(vote_pubkey)?; + let vote_state = vote_account.vote_state(); + let vote_state = vote_state.as_ref().unwrap_or(&default_vote_state); - fn write_prometheus(&self, out: &mut W) -> io::Result<()> { - write_metric( - out, - &MetricFamily { - name: "solana_node_vote_public_key_info", - help: "The current Solana node's vote public key", - type_: "count", - metrics: vec![ - Metric::new(1).with_label("vote_account", self.vote_address.to_string()) - ], - }, - )?; - // We can use this metric to track if the validator is making progress - // by voting on the last slots. - write_metric( - out, - &MetricFamily { - name: "solana_node_last_vote_slot", - help: - "The voted-on slot of the validator's last vote that got included in the chain", - type_: "gauge", - metrics: vec![Metric::new(self.last_vote) - .with_label("vote_account", self.vote_address.to_string())], - }, - )?; - // Validator rewards go to vote account, we use this to track our own - // rewards. - write_metric( - out, - &MetricFamily { - name: "solana_node_vote_balance_sol", - help: "The current node's vote account balance", - type_: "gauge", - metrics: vec![Metric::new_sol(self.balance.clone()) - .with_label("vote_account", self.vote_address.to_string())], - }, - ) - } + let last_vote = vote_state.votes.back()?.slot; + let balance = Lamports(bank.get_balance(&vote_pubkey)); + Some(ValidatorVoteInfo { balance, last_vote }) } pub fn write_cluster_metrics( banks_with_commitments: &BanksWithCommitments, cluster_info: &Arc, + vote_accounts: &Arc>, out: &mut W, ) -> io::Result<()> { let identity_pubkey = cluster_info.id(); diff --git a/prometheus/src/lib.rs b/prometheus/src/lib.rs index ff67b4639339fa..d8ad0964a41b15 100644 --- a/prometheus/src/lib.rs +++ b/prometheus/src/lib.rs @@ -5,7 +5,8 @@ mod utils; use banks_with_commitments::BanksWithCommitments; use solana_gossip::cluster_info::ClusterInfo; -use std::sync::Arc; +use solana_sdk::pubkey::Pubkey; +use std::{collections::HashSet, sync::Arc}; #[derive(Clone)] pub struct Lamports(pub u64); @@ -13,6 +14,7 @@ pub struct Lamports(pub u64); pub fn render_prometheus( banks_with_commitments: BanksWithCommitments, cluster_info: &Arc, + vote_accounts: &Arc>, ) -> Vec { // There are 3 levels of commitment for a bank: // - finalized: most recent block *confirmed* by supermajority of the @@ -22,7 +24,12 @@ pub fn render_prometheus( // - processed: most recent block. let mut out: Vec = Vec::new(); bank_metrics::write_bank_metrics(&banks_with_commitments, &mut out).expect("IO error"); - cluster_metrics::write_cluster_metrics(&banks_with_commitments, &cluster_info, &mut out) - .expect("IO error"); + cluster_metrics::write_cluster_metrics( + &banks_with_commitments, + &cluster_info, + vote_accounts, + &mut out, + ) + .expect("IO error"); out } diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index dc17a366c51b58..6c8d0c64c7ebc8 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -74,6 +74,7 @@ struct RpcRequestMiddleware { bank_forks: Arc>, health: Arc, block_commitment_cache: Arc>, + observable_vote_accounts: Arc>, } impl RpcRequestMiddleware { @@ -83,6 +84,7 @@ impl RpcRequestMiddleware { bank_forks: Arc>, health: Arc, block_commitment_cache: Arc>, + observable_vote_accounts: Arc>, ) -> Self { Self { ledger_path, @@ -98,6 +100,7 @@ impl RpcRequestMiddleware { bank_forks, health, block_commitment_cache, + observable_vote_accounts, } } @@ -302,6 +305,7 @@ impl RequestMiddleware for RpcRequestMiddleware { .body(hyper::Body::from(render_prometheus( banks_with_commitment, &self.health.cluster_info, + &self.observable_vote_accounts, ))) .unwrap() .into() @@ -357,6 +361,7 @@ impl JsonRpcService { leader_schedule_cache: Arc, connection_cache: Arc, current_transaction_status_slot: Arc, + observable_vote_accounts: Arc>, ) -> Self { info!("rpc bound to {:?}", rpc_addr); info!("rpc configuration: {:?}", config); @@ -505,6 +510,7 @@ impl JsonRpcService { bank_forks.clone(), health.clone(), block_commitment_cache.clone(), + observable_vote_accounts, ); let server = ServerBuilder::with_meta_extractor( io, @@ -642,6 +648,7 @@ mod tests { Arc::new(LeaderScheduleCache::default()), connection_cache, Arc::new(AtomicU64::default()), + None, ); let thread = rpc_service.thread_hdl.thread(); assert_eq!(thread.name().unwrap(), "solana-jsonrpc"); @@ -688,6 +695,7 @@ mod tests { bank_forks.clone(), RpcHealth::stub(), block_commitment_cache, + None, ); let rrm_with_snapshot_config = RpcRequestMiddleware::new( PathBuf::from("/"), @@ -695,6 +703,7 @@ mod tests { bank_forks, RpcHealth::stub(), block_commitment_cache, + None, ); assert!(rrm.is_file_get_path(DEFAULT_GENESIS_DOWNLOAD_PATH)); @@ -766,6 +775,7 @@ mod tests { create_bank_forks(), RpcHealth::stub(), Arc::new(RwLock::new(BlockCommitmentCache::default())), + None, ); // File does not exist => request should fail. @@ -822,6 +832,7 @@ mod tests { create_bank_forks(), RpcHealth::stub(), Arc::new(RwLock::new(BlockCommitmentCache::default())), + None, ); assert_eq!(rm.health_check(), "ok"); } @@ -854,6 +865,7 @@ mod tests { create_bank_forks(), health, Arc::new(RwLock::new(BlockCommitmentCache::default())), + None, ); // No account hashes for this node or any known validators diff --git a/test-validator/src/lib.rs b/test-validator/src/lib.rs index 6afc8f5df41e37..68afd6aec3ce13 100644 --- a/test-validator/src/lib.rs +++ b/test-validator/src/lib.rs @@ -709,6 +709,7 @@ impl TestValidator { max_ledger_shreds: config.max_ledger_shreds, no_wait_for_vote_to_start_leader: true, accounts_db_config, + observable_vote_acounts: Arc::new(HashSet::from_iter(vec![vote_account_address])), ..ValidatorConfig::default_for_test() }; if let Some(ref tower_storage) = config.tower_storage { diff --git a/validator/src/main.rs b/validator/src/main.rs index 947a0a0e22a2ed..d716075fafecb5 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -378,6 +378,22 @@ fn hardforks_of(matches: &ArgMatches<'_>, name: &str) -> Option> { } } +fn get_observable_vote_accounts(matches: &ArgMatches<'_>) -> HashSet { + let vote_account = if matches.is_present("vote_account") { + vec![pubkey_of(&matches, "vote_account").unwrap()] + } else { + vec![] + }; + let mut monitor_vote_accounts = if matches.is_present("monitor_vote_account") { + let accounts = values_t_or_exit!(matches, "monitor_vote_account", Pubkey); + accounts.into_iter().collect::>() + } else { + HashSet::new() + }; + monitor_vote_accounts.extend(vote_account.iter()); + monitor_vote_accounts +} + fn validators_set( identity_pubkey: &Pubkey, matches: &ArgMatches<'_>, @@ -1873,6 +1889,15 @@ pub fn main() { .after_help("Note: If this command exits with a non-zero status \ then this not a good time for a restart") ) + .arg( + Arg::with_name("monitor_vote_account") + .long("monitor-vote-account") + .takes_value(true) + .value_name("MONITOR_VOTE_ACCOUNT") + .validator(is_pubkey) + .multiple(true) + .help("The vote accounts to inspect for prometheus metrics") + ) .get_matches(); let socket_addr_space = SocketAddrSpace::new(matches.is_present("allow_private_addr")); @@ -2591,6 +2616,8 @@ pub fn main() { Keypair::new().pubkey() }); + validator_config.observable_vote_acounts = get_observable_vote_accounts(&matches); + let dynamic_port_range = solana_net_utils::parse_port_range(matches.value_of("dynamic_port_range").unwrap()) .expect("invalid dynamic_port_range"); From c5421d1d8cb6d254d4a8caa859f48ec1de93c3ef Mon Sep 17 00:00:00 2001 From: Fynn Date: Tue, 21 Jun 2022 12:04:14 -0300 Subject: [PATCH 32/47] Add vote metrics Modify how metrics are written to account for when there's nothing to report. Add metrics about votes: last voted and vote balance --- prometheus/src/bank_metrics.rs | 9 ++-- prometheus/src/banks_with_commitments.rs | 15 +++--- prometheus/src/cluster_metrics.rs | 58 +++++++++++++++++++++--- 3 files changed, 66 insertions(+), 16 deletions(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index 01ea58e984c3c5..1fac8b8e70cc2a 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -15,7 +15,8 @@ pub fn write_bank_metrics( help: "Block Slot", type_: "gauge", metrics: banks_with_commitments - .for_each_commitment(|bank| Metric::new(bank.clock().slot)), + .for_each_commitment(|bank| Some(Metric::new(bank.clock().slot))) + .unwrap(), }, )?; write_metric( @@ -25,7 +26,8 @@ pub fn write_bank_metrics( help: "Block Epoch", type_: "gauge", metrics: banks_with_commitments - .for_each_commitment(|bank| Metric::new(bank.clock().epoch)), + .for_each_commitment(|bank| Some(Metric::new(bank.clock().epoch))) + .unwrap(), }, )?; write_metric( @@ -35,7 +37,8 @@ pub fn write_bank_metrics( help: "The block's UNIX timestamp, in seconds since epoch, UTC", type_: "gauge", metrics: banks_with_commitments - .for_each_commitment(|bank| Metric::new(bank.clock().unix_timestamp as u64)), + .for_each_commitment(|bank| Some(Metric::new(bank.clock().unix_timestamp as u64))) + .unwrap(), }, )?; diff --git a/prometheus/src/banks_with_commitments.rs b/prometheus/src/banks_with_commitments.rs index 815ad040a96f2f..e42e6a33890036 100644 --- a/prometheus/src/banks_with_commitments.rs +++ b/prometheus/src/banks_with_commitments.rs @@ -59,11 +59,14 @@ impl BanksWithCommitments { } /// Call function callback for each commitment level, and returns a vector of metrics. - pub fn for_each_commitment Metric>(&self, get: F) -> Vec { - vec![ - get(&self.finalized_bank).with_label("commitment_level", "finalized".to_owned()), - get(&self.confirmed_bank).with_label("commitment_level", "confirmed".to_owned()), - get(&self.processed_bank).with_label("commitment_level", "processed".to_owned()), - ] + pub fn for_each_commitment Option>( + &self, + get: F, + ) -> Option> { + Some(vec![ + get(&self.finalized_bank)?.with_label("commitment_level", "finalized".to_owned()), + get(&self.confirmed_bank)?.with_label("commitment_level", "confirmed".to_owned()), + get(&self.processed_bank)?.with_label("commitment_level", "processed".to_owned()), + ]) } } diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index b6783c6ebc5486..b071204a9b1c71 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -56,10 +56,14 @@ pub fn write_cluster_metrics( name: "solana_node_identity_balance_sol", help: "The node's finalized identity balance", type_: "gauge", - metrics: banks_with_commitments.for_each_commitment(|bank| { - Metric::new_sol(Lamports(bank.get_balance(&identity_pubkey))) - .with_label("identity_account", identity_pubkey.to_string()) - }), + metrics: banks_with_commitments + .for_each_commitment(|bank| { + Some( + Metric::new_sol(Lamports(bank.get_balance(&identity_pubkey))) + .with_label("identity_account", identity_pubkey.to_string()), + ) + }) + .unwrap(), }, )?; @@ -73,9 +77,49 @@ pub fn write_cluster_metrics( }, )?; - let validator_vote_info = ValidatorVoteInfo::new_from_bank(bank, &identity_pubkey); - if let Some(vote_info) = validator_vote_info { - vote_info.write_prometheus(out)?; + // Vote accounts information + for vote_account in vote_accounts.iter() { + // We use this metric to track if the validator is making progress by + // voting on the last slots. + let metrics = banks_with_commitments.for_each_commitment(|bank| { + let vote_info = get_vote_state(bank, vote_account)?; + Some( + Metric::new(vote_info.last_vote) + .with_label("identity_account", identity_pubkey.to_string()) + .with_label("vote_account", vote_account.to_string()), + ) + }); + if let Some(last_voted_slot_metrics) = metrics { + write_metric( + out, + &MetricFamily { + name: "solana_node_last_vote_slot", + help: + "The voted-on slot of the validator's last vote that got included in the chain", + type_: "gauge", + metrics: last_voted_slot_metrics, + }, + )?; + + write_metric( + out, + &MetricFamily { + name: "solana_node_vote_balance_sol", + help: "The current node's vote account balance", + type_: "gauge", + metrics: banks_with_commitments + .for_each_commitment(|bank| { + let vote_info = get_vote_state(bank, vote_account)?; + Some( + Metric::new_sol(vote_info.balance) + .with_label("identity_account", identity_pubkey.to_string()) + .with_label("vote_account", vote_account.to_string()), + ) + }) + .unwrap(), + }, + )?; + } } Ok(()) From 0888bc451028f4937e7acab5dc6b0d09fda77695 Mon Sep 17 00:00:00 2001 From: Fynn Date: Wed, 22 Jun 2022 11:40:18 -0300 Subject: [PATCH 33/47] Naming changes, add vote credits information Change variable and function names, and add information about vote credits --- core/src/validator.rs | 6 +++--- prometheus/src/cluster_metrics.rs | 10 ++++++++-- prometheus/src/lib.rs | 2 +- test-validator/src/lib.rs | 2 +- validator/src/main.rs | 14 +++++++++----- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/core/src/validator.rs b/core/src/validator.rs index 617364e1b862ea..ab5fbf1ddc9b74 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -173,7 +173,7 @@ pub struct ValidatorConfig { pub wait_to_vote_slot: Option, pub ledger_column_options: LedgerColumnOptions, pub enable_quic_servers: bool, - pub observable_vote_acounts: Arc>, + pub vote_accounts_to_monitor: Arc>, } impl Default for ValidatorConfig { @@ -238,7 +238,7 @@ impl Default for ValidatorConfig { wait_to_vote_slot: None, ledger_column_options: LedgerColumnOptions::default(), enable_quic_servers: true, - observable_vote_acounts: Arc::new(HashSet::new()), + vote_accounts_to_monitor: Arc::new(HashSet::new()), } } } @@ -738,7 +738,7 @@ impl Validator { leader_schedule_cache.clone(), connection_cache.clone(), max_complete_transaction_status_slot, - config.observable_vote_acounts.clone(), + config.vote_accounts_to_monitor.clone(), )), if !config.rpc_config.full_api { None diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index b071204a9b1c71..1ddde0e61a9c38 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -13,6 +13,7 @@ use std::{collections::HashSet, io, sync::Arc}; struct ValidatorVoteInfo { balance: Lamports, last_vote: Slot, + vote_credits: u64, } fn get_vote_state(bank: &Bank, vote_pubkey: &Pubkey) -> Option { @@ -24,7 +25,12 @@ fn get_vote_state(bank: &Bank, vote_pubkey: &Pubkey) -> Option( @@ -54,7 +60,7 @@ pub fn write_cluster_metrics( out, &MetricFamily { name: "solana_node_identity_balance_sol", - help: "The node's finalized identity balance", + help: "The balance of the node's identity account", type_: "gauge", metrics: banks_with_commitments .for_each_commitment(|bank| { diff --git a/prometheus/src/lib.rs b/prometheus/src/lib.rs index d8ad0964a41b15..a0f93ff346d2ab 100644 --- a/prometheus/src/lib.rs +++ b/prometheus/src/lib.rs @@ -8,7 +8,7 @@ use solana_gossip::cluster_info::ClusterInfo; use solana_sdk::pubkey::Pubkey; use std::{collections::HashSet, sync::Arc}; -#[derive(Clone)] +#[derive(Clone, Copy)] pub struct Lamports(pub u64); pub fn render_prometheus( diff --git a/test-validator/src/lib.rs b/test-validator/src/lib.rs index 68afd6aec3ce13..995337e198d7a1 100644 --- a/test-validator/src/lib.rs +++ b/test-validator/src/lib.rs @@ -709,7 +709,7 @@ impl TestValidator { max_ledger_shreds: config.max_ledger_shreds, no_wait_for_vote_to_start_leader: true, accounts_db_config, - observable_vote_acounts: Arc::new(HashSet::from_iter(vec![vote_account_address])), + vote_accounts_to_monitor: Arc::new(HashSet::from_iter(vec![vote_account_address])), ..ValidatorConfig::default_for_test() }; if let Some(ref tower_storage) = config.tower_storage { diff --git a/validator/src/main.rs b/validator/src/main.rs index d716075fafecb5..f872cb92974cc6 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -378,9 +378,11 @@ fn hardforks_of(matches: &ArgMatches<'_>, name: &str) -> Option> { } } -fn get_observable_vote_accounts(matches: &ArgMatches<'_>) -> HashSet { +fn get_vote_accounts_to_monitor(matches: &ArgMatches<'_>) -> HashSet { let vote_account = if matches.is_present("vote_account") { - vec![pubkey_of(&matches, "vote_account").unwrap()] + vec![pubkey_of(&matches, "vote_account").expect( + "Does not fail, as this is validated by Clap earlier.", + )()] } else { vec![] }; @@ -1893,10 +1895,12 @@ pub fn main() { Arg::with_name("monitor_vote_account") .long("monitor-vote-account") .takes_value(true) - .value_name("MONITOR_VOTE_ACCOUNT") + .value_name("PUBKEY") .validator(is_pubkey) .multiple(true) - .help("The vote accounts to inspect for prometheus metrics") + .help("Additional vote accounts expose Prometheus metrics about. \ + The validator's own vote account is always included implicitly \ + if there is one.") ) .get_matches(); @@ -2616,7 +2620,7 @@ pub fn main() { Keypair::new().pubkey() }); - validator_config.observable_vote_acounts = get_observable_vote_accounts(&matches); + validator_config.vote_accounts_to_monitor = get_vote_accounts_to_monitor(&matches); let dynamic_port_range = solana_net_utils::parse_port_range(matches.value_of("dynamic_port_range").unwrap()) From d7045e79302a57d474811c6a865c1ea6552a2cdd Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 4 Jul 2022 13:49:57 -0300 Subject: [PATCH 34/47] Write vote account metrics Slight change on how metrics are recorded --- prometheus/src/bank_metrics.rs | 9 +- prometheus/src/banks_with_commitments.rs | 24 ++++-- prometheus/src/cluster_metrics.rs | 104 ++++++++++++----------- 3 files changed, 74 insertions(+), 63 deletions(-) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index 1fac8b8e70cc2a..2bb28d6caf3689 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -15,8 +15,7 @@ pub fn write_bank_metrics( help: "Block Slot", type_: "gauge", metrics: banks_with_commitments - .for_each_commitment(|bank| Some(Metric::new(bank.clock().slot))) - .unwrap(), + .for_each_commitment(|bank| Some(Metric::new(bank.clock().slot))), }, )?; write_metric( @@ -26,8 +25,7 @@ pub fn write_bank_metrics( help: "Block Epoch", type_: "gauge", metrics: banks_with_commitments - .for_each_commitment(|bank| Some(Metric::new(bank.clock().epoch))) - .unwrap(), + .for_each_commitment(|bank| Some(Metric::new(bank.clock().epoch))), }, )?; write_metric( @@ -37,8 +35,7 @@ pub fn write_bank_metrics( help: "The block's UNIX timestamp, in seconds since epoch, UTC", type_: "gauge", metrics: banks_with_commitments - .for_each_commitment(|bank| Some(Metric::new(bank.clock().unix_timestamp as u64))) - .unwrap(), + .for_each_commitment(|bank| Some(Metric::new(bank.clock().unix_timestamp as u64))), }, )?; diff --git a/prometheus/src/banks_with_commitments.rs b/prometheus/src/banks_with_commitments.rs index e42e6a33890036..6224cc4a6133de 100644 --- a/prometheus/src/banks_with_commitments.rs +++ b/prometheus/src/banks_with_commitments.rs @@ -59,14 +59,20 @@ impl BanksWithCommitments { } /// Call function callback for each commitment level, and returns a vector of metrics. - pub fn for_each_commitment Option>( - &self, - get: F, - ) -> Option> { - Some(vec![ - get(&self.finalized_bank)?.with_label("commitment_level", "finalized".to_owned()), - get(&self.confirmed_bank)?.with_label("commitment_level", "confirmed".to_owned()), - get(&self.processed_bank)?.with_label("commitment_level", "processed".to_owned()), - ]) + pub fn for_each_commitment Option>(&self, get: F) -> Vec { + let mut result = Vec::with_capacity(3); + result.extend( + get(&self.finalized_bank) + .map(|m| m.with_label("commitment_level", "finalized".to_owned())), + ); + result.extend( + get(&self.confirmed_bank) + .map(|m| m.with_label("commitment_level", "confirmed".to_owned())), + ); + result.extend( + get(&self.processed_bank) + .map(|m| m.with_label("commitment_level", "processed".to_owned())), + ); + result } } diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 1ddde0e61a9c38..0b5824d04cc22d 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -62,14 +62,12 @@ pub fn write_cluster_metrics( name: "solana_node_identity_balance_sol", help: "The balance of the node's identity account", type_: "gauge", - metrics: banks_with_commitments - .for_each_commitment(|bank| { - Some( - Metric::new_sol(Lamports(bank.get_balance(&identity_pubkey))) - .with_label("identity_account", identity_pubkey.to_string()), - ) - }) - .unwrap(), + metrics: banks_with_commitments.for_each_commitment(|bank| { + Some( + Metric::new_sol(Lamports(bank.get_balance(&identity_pubkey))) + .with_label("identity_account", identity_pubkey.to_string()), + ) + }), }, )?; @@ -85,47 +83,57 @@ pub fn write_cluster_metrics( // Vote accounts information for vote_account in vote_accounts.iter() { - // We use this metric to track if the validator is making progress by - // voting on the last slots. - let metrics = banks_with_commitments.for_each_commitment(|bank| { - let vote_info = get_vote_state(bank, vote_account)?; - Some( - Metric::new(vote_info.last_vote) - .with_label("identity_account", identity_pubkey.to_string()) - .with_label("vote_account", vote_account.to_string()), - ) - }); - if let Some(last_voted_slot_metrics) = metrics { - write_metric( - out, - &MetricFamily { - name: "solana_node_last_vote_slot", - help: - "The voted-on slot of the validator's last vote that got included in the chain", - type_: "gauge", - metrics: last_voted_slot_metrics, - }, - )?; + write_metric( + out, + &MetricFamily { + name: "solana_node_last_vote_slot", + help: + "The voted-on slot of the validator's last vote that got included in the chain", + type_: "gauge", + metrics: banks_with_commitments.for_each_commitment(|bank| { + let vote_info = get_vote_state(bank, vote_account)?; + Some( + Metric::new(vote_info.last_vote) + .with_label("identity_account", identity_pubkey.to_string()) + .with_label("vote_account", vote_account.to_string()), + ) + }), + }, + )?; - write_metric( - out, - &MetricFamily { - name: "solana_node_vote_balance_sol", - help: "The current node's vote account balance", - type_: "gauge", - metrics: banks_with_commitments - .for_each_commitment(|bank| { - let vote_info = get_vote_state(bank, vote_account)?; - Some( - Metric::new_sol(vote_info.balance) - .with_label("identity_account", identity_pubkey.to_string()) - .with_label("vote_account", vote_account.to_string()), - ) - }) - .unwrap(), - }, - )?; - } + write_metric( + out, + &MetricFamily { + name: "solana_node_vote_balance_sol", + help: "The current node's vote account balance", + type_: "gauge", + metrics: banks_with_commitments.for_each_commitment(|bank| { + let vote_info = get_vote_state(bank, vote_account)?; + Some( + Metric::new_sol(vote_info.balance) + .with_label("identity_account", identity_pubkey.to_string()) + .with_label("vote_account", vote_account.to_string()), + ) + }), + }, + )?; + + write_metric( + out, + &MetricFamily { + name: "solana_node_vote_credits", + help: "The current node's vote vote credits for current epoch", + type_: "gauge", + metrics: banks_with_commitments.for_each_commitment(|bank| { + let vote_info = get_vote_state(bank, vote_account)?; + Some( + Metric::new(vote_info.vote_credits) + .with_label("identity_account", identity_pubkey.to_string()) + .with_label("vote_account", vote_account.to_string()), + ) + }), + }, + )?; } Ok(()) From da01458308ff51aeea7e2a8c8d9b9707d3f56d65 Mon Sep 17 00:00:00 2001 From: Enrique Fynn Date: Tue, 12 Jul 2022 18:14:48 +0200 Subject: [PATCH 35/47] Rename prometheus metrics help Co-authored-by: Ruud van Asseldonk --- prometheus/src/cluster_metrics.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 0b5824d04cc22d..bb95ebb3823ffe 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -105,7 +105,7 @@ pub fn write_cluster_metrics( out, &MetricFamily { name: "solana_node_vote_balance_sol", - help: "The current node's vote account balance", + help: "The balance of the vote account at the given address", type_: "gauge", metrics: banks_with_commitments.for_each_commitment(|bank| { let vote_info = get_vote_state(bank, vote_account)?; @@ -122,7 +122,7 @@ pub fn write_cluster_metrics( out, &MetricFamily { name: "solana_node_vote_credits", - help: "The current node's vote vote credits for current epoch", + help: "The total number of vote credits credited to this vote account", type_: "gauge", metrics: banks_with_commitments.for_each_commitment(|bank| { let vote_info = get_vote_state(bank, vote_account)?; From e432aae2d0baa08586584d626b0beeb6ac096090 Mon Sep 17 00:00:00 2001 From: Fynn Date: Tue, 12 Jul 2022 12:59:22 -0300 Subject: [PATCH 36/47] Refer as `validator` metrics --- prometheus/src/cluster_metrics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index bb95ebb3823ffe..9d87ede1321d2d 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -86,7 +86,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_node_last_vote_slot", + name: "solana_validator_last_vote_slot", help: "The voted-on slot of the validator's last vote that got included in the chain", type_: "gauge", @@ -104,7 +104,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_node_vote_balance_sol", + name: "solana_validator_vote_balance_sol", help: "The balance of the vote account at the given address", type_: "gauge", metrics: banks_with_commitments.for_each_commitment(|bank| { @@ -121,7 +121,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_node_vote_credits", + name: "solana_validator_vote_credits", help: "The total number of vote credits credited to this vote account", type_: "gauge", metrics: banks_with_commitments.for_each_commitment(|bank| { From b71176635e6cd28dca3debdc7a4ac2e0afa15373 Mon Sep 17 00:00:00 2001 From: Fynn Date: Tue, 12 Jul 2022 13:12:46 -0300 Subject: [PATCH 37/47] Rename variables --- rpc/src/rpc_service.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index 6c8d0c64c7ebc8..907a17458d8b91 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -74,7 +74,7 @@ struct RpcRequestMiddleware { bank_forks: Arc>, health: Arc, block_commitment_cache: Arc>, - observable_vote_accounts: Arc>, + vote_accounts_to_monitor: Arc>, } impl RpcRequestMiddleware { @@ -84,7 +84,7 @@ impl RpcRequestMiddleware { bank_forks: Arc>, health: Arc, block_commitment_cache: Arc>, - observable_vote_accounts: Arc>, + vote_accounts_to_monitor: Arc>, ) -> Self { Self { ledger_path, @@ -100,7 +100,7 @@ impl RpcRequestMiddleware { bank_forks, health, block_commitment_cache, - observable_vote_accounts, + vote_accounts_to_monitor, } } @@ -305,7 +305,7 @@ impl RequestMiddleware for RpcRequestMiddleware { .body(hyper::Body::from(render_prometheus( banks_with_commitment, &self.health.cluster_info, - &self.observable_vote_accounts, + &self.vote_accounts_to_monitor, ))) .unwrap() .into() @@ -361,7 +361,7 @@ impl JsonRpcService { leader_schedule_cache: Arc, connection_cache: Arc, current_transaction_status_slot: Arc, - observable_vote_accounts: Arc>, + vote_accounts_to_monitor: Arc>, ) -> Self { info!("rpc bound to {:?}", rpc_addr); info!("rpc configuration: {:?}", config); @@ -510,7 +510,7 @@ impl JsonRpcService { bank_forks.clone(), health.clone(), block_commitment_cache.clone(), - observable_vote_accounts, + vote_accounts_to_monitor, ); let server = ServerBuilder::with_meta_extractor( io, From e45bbacfedd7328c2bc8046575bd116527719651 Mon Sep 17 00:00:00 2001 From: Fynn Date: Tue, 12 Jul 2022 13:13:25 -0300 Subject: [PATCH 38/47] Bugfix: Correct initialization --- validator/src/main.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/validator/src/main.rs b/validator/src/main.rs index f872cb92974cc6..24db1ca7018a58 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -380,9 +380,8 @@ fn hardforks_of(matches: &ArgMatches<'_>, name: &str) -> Option> { fn get_vote_accounts_to_monitor(matches: &ArgMatches<'_>) -> HashSet { let vote_account = if matches.is_present("vote_account") { - vec![pubkey_of(&matches, "vote_account").expect( - "Does not fail, as this is validated by Clap earlier.", - )()] + vec![pubkey_of(&matches, "vote_account") + .expect("Does not fail, as this is validated by Clap earlier.")] } else { vec![] }; @@ -2620,7 +2619,7 @@ pub fn main() { Keypair::new().pubkey() }); - validator_config.vote_accounts_to_monitor = get_vote_accounts_to_monitor(&matches); + validator_config.vote_accounts_to_monitor = Arc::new(get_vote_accounts_to_monitor(&matches)); let dynamic_port_range = solana_net_utils::parse_port_range(matches.value_of("dynamic_port_range").unwrap()) From ac132cfb13ac4981e12ede57e4262d9599fc5bd4 Mon Sep 17 00:00:00 2001 From: Fynn Date: Tue, 12 Jul 2022 14:26:08 -0300 Subject: [PATCH 39/47] Fix identity pubkeyt for each validator to monitor --- prometheus/src/cluster_metrics.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 9d87ede1321d2d..91e5d0ab0436c9 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -14,6 +14,7 @@ struct ValidatorVoteInfo { balance: Lamports, last_vote: Slot, vote_credits: u64, + identity: Pubkey, } fn get_vote_state(bank: &Bank, vote_pubkey: &Pubkey) -> Option { @@ -30,6 +31,7 @@ fn get_vote_state(bank: &Bank, vote_pubkey: &Pubkey) -> Option( let vote_info = get_vote_state(bank, vote_account)?; Some( Metric::new(vote_info.last_vote) - .with_label("identity_account", identity_pubkey.to_string()) + .with_label("identity_account", vote_info.identity.to_string()) .with_label("vote_account", vote_account.to_string()), ) }), @@ -111,7 +113,7 @@ pub fn write_cluster_metrics( let vote_info = get_vote_state(bank, vote_account)?; Some( Metric::new_sol(vote_info.balance) - .with_label("identity_account", identity_pubkey.to_string()) + .with_label("identity_account", vote_info.identity.to_string()) .with_label("vote_account", vote_account.to_string()), ) }), @@ -128,7 +130,7 @@ pub fn write_cluster_metrics( let vote_info = get_vote_state(bank, vote_account)?; Some( Metric::new(vote_info.vote_credits) - .with_label("identity_account", identity_pubkey.to_string()) + .with_label("identity_account", vote_info.identity.to_string()) .with_label("vote_account", vote_account.to_string()), ) }), From 980fb16cf79654980e8b83e63642d77c69066bba Mon Sep 17 00:00:00 2001 From: Enrique Fynn Date: Thu, 14 Jul 2022 15:00:31 +0200 Subject: [PATCH 40/47] Change metric name Co-authored-by: Ruud van Asseldonk --- prometheus/src/cluster_metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 91e5d0ab0436c9..1478bb4a3812f8 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -106,7 +106,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_validator_vote_balance_sol", + name: "solana_validator_vote_account_balance_sol", help: "The balance of the vote account at the given address", type_: "gauge", metrics: banks_with_commitments.for_each_commitment(|bank| { From d46b45a479a2ed2f990d78dc7554e16160c7ee32 Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 14 Jul 2022 10:54:03 -0300 Subject: [PATCH 41/47] Add active stake per monitored validator --- prometheus/src/cluster_metrics.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 1478bb4a3812f8..59e4462d196e82 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -15,12 +15,13 @@ struct ValidatorVoteInfo { last_vote: Slot, vote_credits: u64, identity: Pubkey, + activated_stake: Lamports, } fn get_vote_state(bank: &Bank, vote_pubkey: &Pubkey) -> Option { let default_vote_state = VoteState::default(); let vote_accounts = bank.vote_accounts(); - let (_activated_stake, vote_account) = vote_accounts.get(vote_pubkey)?; + let (activated_stake, vote_account) = vote_accounts.get(vote_pubkey)?; let vote_state = vote_account.vote_state(); let vote_state = vote_state.as_ref().unwrap_or(&default_vote_state); @@ -32,6 +33,7 @@ fn get_vote_state(bank: &Bank, vote_pubkey: &Pubkey) -> Option( }), }, )?; + + write_metric( + out, + &MetricFamily { + name: "solana_validator_activated_stake_sol", + help: "The total amount of Sol actively staked to this validator", + type_: "gauge", + metrics: banks_with_commitments.for_each_commitment(|bank| { + let vote_info = get_vote_state(bank, vote_account)?; + Some( + Metric::new_sol(vote_info.activated_stake) + .with_label("identity_account", vote_info.identity.to_string()) + .with_label("vote_account", vote_account.to_string()), + ) + }), + }, + )?; } Ok(()) From 0af8d74f68d5313b34cc4aef24539717f9f537bf Mon Sep 17 00:00:00 2001 From: Fynn Date: Thu, 14 Jul 2022 12:19:07 -0300 Subject: [PATCH 42/47] Add `vote_accounts_to_monitor` to `replica_node` --- local-cluster/src/validator_configs.rs | 1 + replica-node/src/main.rs | 1 + replica-node/src/replica_node.rs | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/local-cluster/src/validator_configs.rs b/local-cluster/src/validator_configs.rs index f8aae532d825b4..437c0bbfaea755 100644 --- a/local-cluster/src/validator_configs.rs +++ b/local-cluster/src/validator_configs.rs @@ -66,6 +66,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig { wait_to_vote_slot: config.wait_to_vote_slot, ledger_column_options: config.ledger_column_options.clone(), enable_quic_servers: config.enable_quic_servers, + vote_accounts_to_monitor: config.vote_accounts_to_monitor.clone(), } } diff --git a/replica-node/src/main.rs b/replica-node/src/main.rs index 07369ebd635c33..06cf8b2add6153 100644 --- a/replica-node/src/main.rs +++ b/replica-node/src/main.rs @@ -401,6 +401,7 @@ pub fn main() { account_indexes: AccountSecondaryIndexes::default(), accounts_db_caching_enabled: false, replica_exit: Arc::new(RwLock::new(Exit::default())), + vote_accounts_to_monitor: Arc::new(HashSet::default()), }; let replica = ReplicaNode::new(config); diff --git a/replica-node/src/replica_node.rs b/replica-node/src/replica_node.rs index 29ba56cf81f1fb..872d96c985842c 100644 --- a/replica-node/src/replica_node.rs +++ b/replica-node/src/replica_node.rs @@ -1,3 +1,7 @@ +use std::collections::HashSet; + +use solana_sdk::pubkey::Pubkey; + use { crate::accountsdb_repl_service::AccountsDbReplService, crossbeam_channel::unbounded, @@ -58,6 +62,7 @@ pub struct ReplicaNodeConfig { pub accounts_db_caching_enabled: bool, pub replica_exit: Arc>, pub socket_addr_space: SocketAddrSpace, + pub vote_accounts_to_monitor: Arc>, } pub struct ReplicaNode { @@ -251,6 +256,7 @@ fn start_client_rpc_services( leader_schedule_cache.clone(), connection_cache, max_complete_transaction_status_slot, + replica_config.vote_accounts_to_monitor.clone(), )), Some(pubsub_service), Some(OptimisticallyConfirmedBankTracker::new( From eb9f0259656aff4a19fefc12ca1c2c3be9168bdf Mon Sep 17 00:00:00 2001 From: Enrique Fynn Date: Thu, 14 Jul 2022 20:24:11 +0200 Subject: [PATCH 43/47] Update metric name Co-authored-by: Ruud van Asseldonk --- prometheus/src/cluster_metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index 59e4462d196e82..ced295a188dc36 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -142,7 +142,7 @@ pub fn write_cluster_metrics( write_metric( out, &MetricFamily { - name: "solana_validator_activated_stake_sol", + name: "solana_validator_active_stake_sol", help: "The total amount of Sol actively staked to this validator", type_: "gauge", metrics: banks_with_commitments.for_each_commitment(|bank| { From db9b7f7d8aa7ff502b1e1a74746258d2b3fb7cf6 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Wed, 13 Jul 2022 12:59:40 +0200 Subject: [PATCH 44/47] Expose epoch schedule metrics in Prometheus With the epoch start slots and the number of slots in the epoch (and the current slot, which we already had), we can infer/estimate: * Epoch progress percentage * Slots left until the next epoch * Time left until the next epoch (from slot height increase) These are useful metrics to have about the network. --- prometheus/src/bank_metrics.rs | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index 2bb28d6caf3689..c287b135722adb 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -3,6 +3,8 @@ use crate::{ utils::{write_metric, Metric, MetricFamily}, }; use std::io; +use solana_sdk::sysvar; +use solana_sdk::sysvar::epoch_schedule::EpochSchedule; pub fn write_bank_metrics( banks_with_commitments: &BanksWithCommitments, @@ -28,6 +30,46 @@ pub fn write_bank_metrics( .for_each_commitment(|bank| Some(Metric::new(bank.clock().epoch))), }, )?; + write_metric( + out, + &MetricFamily { + name: "solana_block_epoch_start_slot", + help: "The first slot in the current epoch", + type_: "gauge", + metrics: banks_with_commitments + .for_each_commitment(|bank| { + // Note, the bank actually has a field that holds the EpochSchedule, + // but it is not public, so we can't easily access it here. We could + // make it public, but to make our patches less invasive, load the + // epoch schedule from the sysvar instead. It should always exist. + let epoch_schedule: EpochSchedule = bank + .get_account(&sysvar::epoch_schedule::id())? + .deserialize_data().ok()?; + let clock = bank.clock(); + Some(Metric::new(epoch_schedule.get_first_slot_in_epoch(clock.epoch))) + }), + }, + )?; + write_metric( + out, + &MetricFamily { + name: "solana_block_epoch_slots_total", + help: "The duration of the current epoch, in slots.", + type_: "gauge", + metrics: banks_with_commitments + .for_each_commitment(|bank| { + // Note, the bank actually has a field that holds the EpochSchedule, + // but it is not public, so we can't easily access it here. We could + // make it public, but to make our patches less invasive, load the + // epoch schedule from the sysvar instead. It should always exist. + let epoch_schedule: EpochSchedule = bank + .get_account(&sysvar::epoch_schedule::id())? + .deserialize_data().ok()?; + let clock = bank.clock(); + Some(Metric::new(epoch_schedule.get_slots_in_epoch(clock.epoch))) + }), + }, + )?; write_metric( out, &MetricFamily { From dccf07d50703dce881ce53a8b464b2783d59c6a2 Mon Sep 17 00:00:00 2001 From: Fynn Date: Fri, 5 Aug 2022 23:27:11 -0300 Subject: [PATCH 45/47] Export gossip metrics with a macro Derive a macro for `GossipStats`, a less invasive and more concise way of writing all metrics from it. Currently, the macro only accepts `Counter` type and will fail if used otherwise, but we can probably expand it in the future to work with more data types. --- Cargo.lock | 19 ++++++++ gossip/Cargo.toml | 2 + gossip/src/cluster_info.rs | 2 +- gossip/src/cluster_info_metrics.rs | 4 +- prometheus/Cargo.toml | 1 + prometheus/macros/Cargo.toml | 23 ++++++++++ prometheus/macros/src/lib.rs | 45 +++++++++++++++++++ prometheus/src/bank_metrics.rs | 8 ++-- prometheus/src/banks_with_commitments.rs | 3 +- prometheus/src/cluster_metrics.rs | 7 +-- prometheus/src/lib.rs | 8 ++-- prometheus/utils/Cargo.toml | 17 +++++++ prometheus/{src/utils.rs => utils/src/lib.rs} | 9 +++- 13 files changed, 129 insertions(+), 19 deletions(-) create mode 100644 prometheus/macros/Cargo.toml create mode 100644 prometheus/macros/src/lib.rs create mode 100644 prometheus/utils/Cargo.toml rename prometheus/{src/utils.rs => utils/src/lib.rs} (98%) diff --git a/Cargo.lock b/Cargo.lock index ef35ce256047ab..999c43ed8d9bce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5358,6 +5358,8 @@ dependencies = [ "solana-metrics", "solana-net-utils", "solana-perf", + "solana-prometheus-macro", + "solana-prometheus-utils", "solana-rayon-threadlimit", "solana-runtime", "solana-sdk 1.10.32", @@ -5857,11 +5859,28 @@ version = "1.10.28" dependencies = [ "jsonrpc-http-server", "solana-gossip", + "solana-prometheus-utils", "solana-runtime", "solana-sdk 1.10.32", "solana-vote-program", ] +[[package]] +name = "solana-prometheus-macro" +version = "1.0.0" +dependencies = [ + "bs58", + "proc-macro2 1.0.32", + "quote 1.0.10", + "rustversion", + "solana-prometheus-utils", + "syn 1.0.91", +] + +[[package]] +name = "solana-prometheus-utils" +version = "1.0.0" + [[package]] name = "solana-rayon-threadlimit" version = "1.10.32" diff --git a/gossip/Cargo.toml b/gossip/Cargo.toml index ad9ac0773a0a87..cf078140728aee 100644 --- a/gossip/Cargo.toml +++ b/gossip/Cargo.toml @@ -46,6 +46,8 @@ solana-streamer = { path = "../streamer", version = "=1.10.32" } solana-version = { path = "../version", version = "=1.10.32" } solana-vote-program = { path = "../programs/vote", version = "=1.10.32" } thiserror = "1.0" +solana-prometheus-macro = { path = "../prometheus/macros", version = "=1.0.0" } +solana-prometheus-utils = { path = "../prometheus/utils", version = "=1.0.0" } [dev-dependencies] num_cpus = "1.13.1" diff --git a/gossip/src/cluster_info.rs b/gossip/src/cluster_info.rs index 27652597d180fe..0cbdbe9cd9583d 100644 --- a/gossip/src/cluster_info.rs +++ b/gossip/src/cluster_info.rs @@ -160,7 +160,7 @@ pub struct ClusterInfo { outbound_budget: DataBudget, my_contact_info: RwLock, ping_cache: Mutex, - stats: GossipStats, + pub stats: GossipStats, socket: UdpSocket, local_message_pending_push_queue: Mutex>, contact_debug_interval: u64, // milliseconds, 0 = disabled diff --git a/gossip/src/cluster_info_metrics.rs b/gossip/src/cluster_info_metrics.rs index 27bd8b98b22743..a81326d67ea020 100644 --- a/gossip/src/cluster_info_metrics.rs +++ b/gossip/src/cluster_info_metrics.rs @@ -1,3 +1,5 @@ +use solana_prometheus_macro::ExportPrometheus; + use { crate::crds_gossip::CrdsGossip, itertools::Itertools, @@ -87,7 +89,7 @@ impl<'a, T> Drop for TimedGuard<'a, T> { } } -#[derive(Default)] +#[derive(Default, ExportPrometheus)] pub struct GossipStats { pub(crate) all_tvu_peers: Counter, pub(crate) bad_prune_destination: Counter, diff --git a/prometheus/Cargo.toml b/prometheus/Cargo.toml index 9e7451087690d4..b0d5384c853e67 100644 --- a/prometheus/Cargo.toml +++ b/prometheus/Cargo.toml @@ -13,6 +13,7 @@ solana-gossip = { path = "../gossip" } solana-runtime = { path = "../runtime" } solana-sdk = { path = "../sdk" } solana-vote-program = { path = "../programs/vote" } +solana-prometheus-utils = { path = "utils" } [lib] crate-type = ["lib"] diff --git a/prometheus/macros/Cargo.toml b/prometheus/macros/Cargo.toml new file mode 100644 index 00000000000000..ae4a85212b6a8a --- /dev/null +++ b/prometheus/macros/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "solana-prometheus-macro" +version = "1.0.0" +description = "Solana Prometheus" +authors = ["ChorusOne "] +repository = "https://github.com/ChorusOne/solana" +license = "Apache-2.0" +edition = "2021" + + +[lib] +proc-macro = true + +[dependencies] +bs58 = "0.4.0" +proc-macro2 = "1.0.19" +quote = "1.0" +syn = { version = "1.0", features = ["full", "extra-traits"] } +rustversion = "1.0.3" +solana-prometheus-utils = { path = "../utils" } + +[package.metadata.docs.rs] +targets = ["x86_64-unknown-linux-gnu"] diff --git a/prometheus/macros/src/lib.rs b/prometheus/macros/src/lib.rs new file mode 100644 index 00000000000000..823073667b5ea2 --- /dev/null +++ b/prometheus/macros/src/lib.rs @@ -0,0 +1,45 @@ +extern crate proc_macro2; + +use proc_macro::TokenStream; +#[macro_use] +extern crate quote; + +#[proc_macro_derive(ExportPrometheus)] +pub fn derive_field_count(input: TokenStream) -> TokenStream { + // Parse the input tokens into a syntax tree + let ast = syn::parse(input).unwrap(); + parse(&ast) +} + +fn parse(ast: &syn::DeriveInput) -> TokenStream { + let name = &ast.ident; + let data = &ast.data; + + let idents: Vec<_> = match data { + syn::Data::Struct(struct_data) => struct_data + .fields + .iter() + .filter_map(|field| field.ident.as_ref().map(|ident| ident)) + .collect(), + _ => panic!("Should be derived from struct"), + }; + + let expanded = quote! { + impl #name { + pub fn write_prometheus(&self, out: &mut W) -> std::io::Result<()> { + use core::sync::atomic::Ordering; + #(solana_prometheus_utils::write_metric( + out, + &solana_prometheus_utils::MetricFamily { + name: &format!("solana_gossip_{}", stringify!(#idents)), + help: "Auto generated with Prometheus macro", + type_: "counter", + metrics: vec![solana_prometheus_utils::Metric::new(self.#idents.0.load(Ordering::Relaxed))], + }, + )?;)* + Ok(()) + } + } + }; + expanded.into() +} diff --git a/prometheus/src/bank_metrics.rs b/prometheus/src/bank_metrics.rs index c287b135722adb..7ec2380babc49b 100644 --- a/prometheus/src/bank_metrics.rs +++ b/prometheus/src/bank_metrics.rs @@ -1,10 +1,8 @@ -use crate::{ - banks_with_commitments::BanksWithCommitments, - utils::{write_metric, Metric, MetricFamily}, -}; -use std::io; +use crate::banks_with_commitments::BanksWithCommitments; +use solana_prometheus_utils::{write_metric, Metric, MetricFamily}; use solana_sdk::sysvar; use solana_sdk::sysvar::epoch_schedule::EpochSchedule; +use std::io; pub fn write_bank_metrics( banks_with_commitments: &BanksWithCommitments, diff --git a/prometheus/src/banks_with_commitments.rs b/prometheus/src/banks_with_commitments.rs index 6224cc4a6133de..4ad41620e1e0a6 100644 --- a/prometheus/src/banks_with_commitments.rs +++ b/prometheus/src/banks_with_commitments.rs @@ -1,9 +1,8 @@ use std::sync::{Arc, RwLock}; +use solana_prometheus_utils::Metric; use solana_runtime::{bank::Bank, bank_forks::BankForks, commitment::BlockCommitmentCache}; -use crate::utils::Metric; - pub struct BanksWithCommitments { pub finalized_bank: Arc, pub confirmed_bank: Arc, diff --git a/prometheus/src/cluster_metrics.rs b/prometheus/src/cluster_metrics.rs index ced295a188dc36..6c98e2970ff518 100644 --- a/prometheus/src/cluster_metrics.rs +++ b/prometheus/src/cluster_metrics.rs @@ -1,13 +1,10 @@ use solana_gossip::cluster_info::ClusterInfo; +use solana_prometheus_utils::{write_metric, Lamports, Metric, MetricFamily}; use solana_runtime::bank::Bank; use solana_sdk::{clock::Slot, pubkey::Pubkey}; use solana_vote_program::vote_state::VoteState; -use crate::{ - banks_with_commitments::BanksWithCommitments, - utils::{write_metric, Metric, MetricFamily}, - Lamports, -}; +use crate::banks_with_commitments::BanksWithCommitments; use std::{collections::HashSet, io, sync::Arc}; struct ValidatorVoteInfo { diff --git a/prometheus/src/lib.rs b/prometheus/src/lib.rs index a0f93ff346d2ab..f6c07e1be2331a 100644 --- a/prometheus/src/lib.rs +++ b/prometheus/src/lib.rs @@ -1,16 +1,12 @@ mod bank_metrics; pub mod banks_with_commitments; mod cluster_metrics; -mod utils; use banks_with_commitments::BanksWithCommitments; use solana_gossip::cluster_info::ClusterInfo; use solana_sdk::pubkey::Pubkey; use std::{collections::HashSet, sync::Arc}; -#[derive(Clone, Copy)] -pub struct Lamports(pub u64); - pub fn render_prometheus( banks_with_commitments: BanksWithCommitments, cluster_info: &Arc, @@ -31,5 +27,9 @@ pub fn render_prometheus( &mut out, ) .expect("IO error"); + cluster_info + .stats + .write_prometheus(&mut out) + .expect("IO error"); out } diff --git a/prometheus/utils/Cargo.toml b/prometheus/utils/Cargo.toml new file mode 100644 index 00000000000000..2d13fc9a3209be --- /dev/null +++ b/prometheus/utils/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "solana-prometheus-utils" +version = "1.0.0" +description = "Solana Utils Prometheus" +authors = ["ChorusOne "] +repository = "https://github.com/ChorusOne/solana" +license = "Apache-2.0" +edition = "2021" + +[dependencies] + +[lib] +crate-type = ["lib"] +name = "solana_prometheus_utils" + +[package.metadata.docs.rs] +targets = ["x86_64-unknown-linux-gnu"] diff --git a/prometheus/src/utils.rs b/prometheus/utils/src/lib.rs similarity index 98% rename from prometheus/src/utils.rs rename to prometheus/utils/src/lib.rs index 597fd0cf1f13a7..b384fcdf23a2c1 100644 --- a/prometheus/src/utils.rs +++ b/prometheus/utils/src/lib.rs @@ -9,7 +9,8 @@ use std::io; use std::io::Write; use std::time::SystemTime; -use crate::Lamports; +#[derive(Clone, Copy)] +pub struct Lamports(pub u64); pub struct MetricFamily<'a> { /// Name of the metric, e.g. [`goats_teleported_total`](https://crbug.com/31482). @@ -83,6 +84,12 @@ impl<'a> Metric<'a> { self.labels.push((label_key, label_value)); self } + + /// Set the suffix. + pub fn with_suffix(mut self, suffix: &'a str) -> Metric<'a> { + self.suffix = suffix; + self + } } pub fn write_metric(out: &mut W, family: &MetricFamily) -> io::Result<()> { From 3109cbf8e6465ac7260e192a6267592dcf159766 Mon Sep 17 00:00:00 2001 From: Fynn Date: Fri, 5 Aug 2022 23:35:35 -0300 Subject: [PATCH 46/47] Comment `submit_gossip_stats` This function clears the gossip stats. --- gossip/src/cluster_info.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gossip/src/cluster_info.rs b/gossip/src/cluster_info.rs index 0cbdbe9cd9583d..b9ab1c2e7b4670 100644 --- a/gossip/src/cluster_info.rs +++ b/gossip/src/cluster_info.rs @@ -2527,7 +2527,7 @@ impl ClusterInfo { should_check_duplicate_instance, )?; if last_print.elapsed() > SUBMIT_GOSSIP_STATS_INTERVAL { - submit_gossip_stats(&self.stats, &self.gossip, &stakes); + // submit_gossip_stats(&self.stats, &self.gossip, &stakes); *last_print = Instant::now(); } Ok(()) From 8f54039f22d1c04be95831cbe6218b2da8a3e2dc Mon Sep 17 00:00:00 2001 From: Fynn Date: Fri, 5 Aug 2022 23:50:12 -0300 Subject: [PATCH 47/47] Do not update `last_print` Since we commented the previous line, doesn't make sense to update this variable. --- gossip/src/cluster_info.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gossip/src/cluster_info.rs b/gossip/src/cluster_info.rs index b9ab1c2e7b4670..23bc98154bc4c2 100644 --- a/gossip/src/cluster_info.rs +++ b/gossip/src/cluster_info.rs @@ -2528,7 +2528,7 @@ impl ClusterInfo { )?; if last_print.elapsed() > SUBMIT_GOSSIP_STATS_INTERVAL { // submit_gossip_stats(&self.stats, &self.gossip, &stakes); - *last_print = Instant::now(); + // *last_print = Instant::now(); } Ok(()) }