Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export gossip with macro #73

Draft
wants to merge 47 commits into
base: prometheus
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
5e080e1
Initial Prometheus support
enriquefynn Jun 3, 2022
15ded10
Add Solana node's version
enriquefynn Jun 3, 2022
8b20faf
Rename metric
enriquefynn Jun 3, 2022
b0f40a1
Bugfix: balance should be `gauge`
enriquefynn Jun 7, 2022
a728409
delete dependabot from our fork
enriquefynn Jun 9, 2022
5332470
Remove `at` parameter
enriquefynn Jun 9, 2022
906f67c
Change path from `prometheus` to `metrics`
enriquefynn Jun 9, 2022
3a958c3
Change `cluster` -> `node`
enriquefynn Jun 13, 2022
ba68332
Rename metrics
enriquefynn Jun 13, 2022
d734527
Simplify `Lamports` struct
enriquefynn Jun 13, 2022
f528d3e
Change license to Apache 2.0
enriquefynn Jun 13, 2022
253ac1c
Add block's timestamp
enriquefynn Jun 13, 2022
ecd70c1
Rename metrics
enriquefynn Jun 13, 2022
2d09704
Turn i64 to u64 timestamp
enriquefynn Jun 13, 2022
bfc1328
Remove information about block's transactions
enriquefynn Jun 15, 2022
ad78d65
Add label about commitment level
enriquefynn Jun 15, 2022
aedcbe4
Change help msg for block's timestamp
enriquefynn Jun 15, 2022
ddf4cdf
Add and rename label
enriquefynn Jun 15, 2022
78fe0c8
Get finalized bank
enriquefynn Jun 16, 2022
2a861d9
Add metrics for each commitment level
enriquefynn Jun 16, 2022
390a8c5
Use `block_commitment_cache`
enriquefynn Jun 16, 2022
399e9e1
Limit `block_commitment_cache` read lock.
enriquefynn Jun 17, 2022
2aebf10
Correct prometheus type
enriquefynn Jun 30, 2022
b7e4d96
Save metrics about the validator's vote account
enriquefynn Jun 7, 2022
9d97a4e
Return last vote slot only if there's some value
enriquefynn Jun 13, 2022
f25093e
Rename, specify metrics
enriquefynn Jun 13, 2022
e224e52
Get last vote
enriquefynn Jun 15, 2022
ad69c55
Clone Lamports struct
enriquefynn Jun 15, 2022
f9727f3
Add label to vote balance
enriquefynn Jun 15, 2022
11a6f7f
Rename labels
enriquefynn Jun 15, 2022
3a1fad8
Add parameter to observe vote accounts.
enriquefynn Jun 21, 2022
c5421d1
Add vote metrics
enriquefynn Jun 21, 2022
0888bc4
Naming changes, add vote credits information
enriquefynn Jun 22, 2022
d7045e7
Write vote account metrics
enriquefynn Jul 4, 2022
da01458
Rename prometheus metrics help
enriquefynn Jul 12, 2022
e432aae
Refer as `validator` metrics
enriquefynn Jul 12, 2022
b711766
Rename variables
enriquefynn Jul 12, 2022
e45bbac
Bugfix: Correct initialization
enriquefynn Jul 12, 2022
ac132cf
Fix identity pubkeyt for each validator to monitor
enriquefynn Jul 12, 2022
980fb16
Change metric name
enriquefynn Jul 14, 2022
d46b45a
Add active stake per monitored validator
enriquefynn Jul 14, 2022
0af8d74
Add `vote_accounts_to_monitor` to `replica_node`
enriquefynn Jul 14, 2022
eb9f025
Update metric name
enriquefynn Jul 14, 2022
db9b7f7
Expose epoch schedule metrics in Prometheus
ruuda Jul 13, 2022
dccf07d
Export gossip metrics with a macro
enriquefynn Aug 6, 2022
3109cbf
Comment `submit_gossip_stats`
enriquefynn Aug 6, 2022
8f54039
Do not update `last_print`
enriquefynn Aug 6, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 0 additions & 41 deletions .github/dependabot.yml

This file was deleted.

31 changes: 31 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions core/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ pub struct ValidatorConfig {
pub wait_to_vote_slot: Option<Slot>,
pub ledger_column_options: LedgerColumnOptions,
pub enable_quic_servers: bool,
pub vote_accounts_to_monitor: Arc<HashSet<Pubkey>>,
}

impl Default for ValidatorConfig {
Expand Down Expand Up @@ -237,6 +238,7 @@ impl Default for ValidatorConfig {
wait_to_vote_slot: None,
ledger_column_options: LedgerColumnOptions::default(),
enable_quic_servers: true,
vote_accounts_to_monitor: Arc::new(HashSet::new()),
}
}
}
Expand Down Expand Up @@ -736,6 +738,7 @@ impl Validator {
leader_schedule_cache.clone(),
connection_cache.clone(),
max_complete_transaction_status_slot,
config.vote_accounts_to_monitor.clone(),
)),
if !config.rpc_config.full_api {
None
Expand Down
2 changes: 2 additions & 0 deletions gossip/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ solana-streamer = { path = "../streamer", version = "=1.10.32" }
solana-version = { path = "../version", version = "=1.10.32" }
solana-vote-program = { path = "../programs/vote", version = "=1.10.32" }
thiserror = "1.0"
solana-prometheus-macro = { path = "../prometheus/macros", version = "=1.0.0" }
solana-prometheus-utils = { path = "../prometheus/utils", version = "=1.0.0" }

[dev-dependencies]
num_cpus = "1.13.1"
Expand Down
6 changes: 3 additions & 3 deletions gossip/src/cluster_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ pub struct ClusterInfo {
outbound_budget: DataBudget,
my_contact_info: RwLock<ContactInfo>,
ping_cache: Mutex<PingCache>,
stats: GossipStats,
pub stats: GossipStats,
socket: UdpSocket,
local_message_pending_push_queue: Mutex<Vec<CrdsValue>>,
contact_debug_interval: u64, // milliseconds, 0 = disabled
Expand Down Expand Up @@ -2527,8 +2527,8 @@ impl ClusterInfo {
should_check_duplicate_instance,
)?;
if last_print.elapsed() > SUBMIT_GOSSIP_STATS_INTERVAL {
submit_gossip_stats(&self.stats, &self.gossip, &stakes);
*last_print = Instant::now();
// submit_gossip_stats(&self.stats, &self.gossip, &stakes);
// *last_print = Instant::now();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So ”submit” actually resets the counters?

I can think of two ways to make this work upstream.

  1. Make logging metrics and Prometheus metrics mutually exclusive, and put the choice in a CLI flag. Then if Prometheus metrics are enabled, we would skip the log + reset here.
  2. If GossipStats holds only counters, we could implement std::ops::Sub for it, and keep two instances of it: the current stats, and the last logged stats. Then when it is time to log, we log current - last_logged, and then set last_logged = current. That way the metrics remain increasing for Prometheus, and the log output remains unchanged.

I think option 2 is kind of neat. It’s a bit more tedious in the GossipStats, but it may be simpler in the end than threading a CLI flag through everything to this point. And it’s nicer to not have to choose.

}
Ok(())
}
Expand Down
4 changes: 3 additions & 1 deletion gossip/src/cluster_info_metrics.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use solana_prometheus_macro::ExportPrometheus;

use {
crate::crds_gossip::CrdsGossip,
itertools::Itertools,
Expand Down Expand Up @@ -87,7 +89,7 @@ impl<'a, T> Drop for TimedGuard<'a, T> {
}
}

#[derive(Default)]
#[derive(Default, ExportPrometheus)]
pub struct GossipStats {
pub(crate) all_tvu_peers: Counter,
pub(crate) bad_prune_destination: Counter,
Expand Down
1 change: 1 addition & 0 deletions local-cluster/src/validator_configs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
wait_to_vote_slot: config.wait_to_vote_slot,
ledger_column_options: config.ledger_column_options.clone(),
enable_quic_servers: config.enable_quic_servers,
vote_accounts_to_monitor: config.vote_accounts_to_monitor.clone(),
}
}

Expand Down
23 changes: 23 additions & 0 deletions prometheus/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "solana-prometheus"
version = "1.10.28"
description = "Solana Prometheus"
authors = ["ChorusOne <[email protected]>"]
repository = "https://github.com/ChorusOne/solana"
license = "Apache-2.0"
edition = "2021"

[dependencies]
jsonrpc-http-server = "18.0.0"
solana-gossip = { path = "../gossip" }
solana-runtime = { path = "../runtime" }
solana-sdk = { path = "../sdk" }
solana-vote-program = { path = "../programs/vote" }
solana-prometheus-utils = { path = "utils" }

[lib]
crate-type = ["lib"]
name = "solana_prometheus"

[package.metadata.docs.rs]
targets = ["x86_64-unknown-linux-gnu"]
23 changes: 23 additions & 0 deletions prometheus/macros/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "solana-prometheus-macro"
version = "1.0.0"
description = "Solana Prometheus"
authors = ["ChorusOne <[email protected]>"]
repository = "https://github.com/ChorusOne/solana"
license = "Apache-2.0"
edition = "2021"


[lib]
proc-macro = true

[dependencies]
bs58 = "0.4.0"
proc-macro2 = "1.0.19"
quote = "1.0"
syn = { version = "1.0", features = ["full", "extra-traits"] }
rustversion = "1.0.3"
solana-prometheus-utils = { path = "../utils" }

[package.metadata.docs.rs]
targets = ["x86_64-unknown-linux-gnu"]
45 changes: 45 additions & 0 deletions prometheus/macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
extern crate proc_macro2;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool idea to make a macro for this! It’s a nice approach on the definition side, and the macro itself looks pretty simple too, nice.


use proc_macro::TokenStream;
#[macro_use]
extern crate quote;

#[proc_macro_derive(ExportPrometheus)]
pub fn derive_field_count(input: TokenStream) -> TokenStream {
// Parse the input tokens into a syntax tree
let ast = syn::parse(input).unwrap();
parse(&ast)
}

fn parse(ast: &syn::DeriveInput) -> TokenStream {
let name = &ast.ident;
let data = &ast.data;

let idents: Vec<_> = match data {
syn::Data::Struct(struct_data) => struct_data
.fields
.iter()
.filter_map(|field| field.ident.as_ref().map(|ident| ident))
.collect(),
_ => panic!("Should be derived from struct"),
};

let expanded = quote! {
impl #name {
pub fn write_prometheus<W: std::io::Write>(&self, out: &mut W) -> std::io::Result<()> {
use core::sync::atomic::Ordering;
#(solana_prometheus_utils::write_metric(
out,
&solana_prometheus_utils::MetricFamily {
name: &format!("solana_gossip_{}", stringify!(#idents)),
help: "Auto generated with Prometheus macro",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to get the doc comment from the field and put it here? Doc comments are secretly attributes, but I don’t know if syn parses them that way.

Or maybe alternatively, we should require a #[metric(help = "blah", type = "counter")] attribute on the fields that we want to export. It’s a more invasive change, but also more explicit about what it does.

type_: "counter",
metrics: vec![solana_prometheus_utils::Metric::new(self.#idents.0.load(Ordering::Relaxed))],
},
)?;)*
Ok(())
}
}
};
expanded.into()
}
83 changes: 83 additions & 0 deletions prometheus/src/bank_metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use crate::banks_with_commitments::BanksWithCommitments;
use solana_prometheus_utils::{write_metric, Metric, MetricFamily};
use solana_sdk::sysvar;
use solana_sdk::sysvar::epoch_schedule::EpochSchedule;
use std::io;

pub fn write_bank_metrics<W: io::Write>(
banks_with_commitments: &BanksWithCommitments,
out: &mut W,
) -> io::Result<()> {
write_metric(
out,
&MetricFamily {
name: "solana_block_slot",
help: "Block Slot",
type_: "gauge",
metrics: banks_with_commitments
.for_each_commitment(|bank| Some(Metric::new(bank.clock().slot))),
},
)?;
write_metric(
out,
&MetricFamily {
name: "solana_block_epoch",
help: "Block Epoch",
type_: "gauge",
metrics: banks_with_commitments
.for_each_commitment(|bank| Some(Metric::new(bank.clock().epoch))),
},
)?;
write_metric(
out,
&MetricFamily {
name: "solana_block_epoch_start_slot",
help: "The first slot in the current epoch",
type_: "gauge",
metrics: banks_with_commitments
.for_each_commitment(|bank| {
// Note, the bank actually has a field that holds the EpochSchedule,
// but it is not public, so we can't easily access it here. We could
// make it public, but to make our patches less invasive, load the
// epoch schedule from the sysvar instead. It should always exist.
let epoch_schedule: EpochSchedule = bank
.get_account(&sysvar::epoch_schedule::id())?
.deserialize_data().ok()?;
let clock = bank.clock();
Some(Metric::new(epoch_schedule.get_first_slot_in_epoch(clock.epoch)))
}),
},
)?;
write_metric(
out,
&MetricFamily {
name: "solana_block_epoch_slots_total",
help: "The duration of the current epoch, in slots.",
type_: "gauge",
metrics: banks_with_commitments
.for_each_commitment(|bank| {
// Note, the bank actually has a field that holds the EpochSchedule,
// but it is not public, so we can't easily access it here. We could
// make it public, but to make our patches less invasive, load the
// epoch schedule from the sysvar instead. It should always exist.
let epoch_schedule: EpochSchedule = bank
.get_account(&sysvar::epoch_schedule::id())?
.deserialize_data().ok()?;
let clock = bank.clock();
Some(Metric::new(epoch_schedule.get_slots_in_epoch(clock.epoch)))
}),
},
)?;
write_metric(
out,
&MetricFamily {
name: "solana_block_timestamp_seconds",
help: "The block's UNIX timestamp, in seconds since epoch, UTC",
type_: "gauge",
metrics: banks_with_commitments
.for_each_commitment(|bank| Some(Metric::new(bank.clock().unix_timestamp as u64))),
},
)?;

Ok(())
}
Loading