Skip to content

Commit

Permalink
metrics: Export process metrics using prometheus-client (#2552)
Browse files Browse the repository at this point in the history
* Move process metrics from linkerd-app-core to linkerd-metrics (with a
  feature flag).
* Add a linkered_metrics::prom::registry helper that automatically
  configures process metrics when the feature is enabled.
* Add a process_threads metric to help surface when the multi-core proxy
  runtime is in use.
* All of this uses prometheus-client to set up for future reusability.

Before

    # HELP process_start_time_seconds Time that the process started (in seconds since the UNIX epoch)
    # TYPE process_start_time_seconds gauge
    process_start_time_seconds 1701551542
    # HELP process_uptime_seconds_total Total time since the process started (in seconds)
    # TYPE process_uptime_seconds_total counter
    process_uptime_seconds_total 1782.137
    # HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
    # TYPE process_cpu_seconds_total counter
    process_cpu_seconds_total 0.72
    # HELP process_virtual_memory_bytes Virtual memory size in bytes.
    # TYPE process_virtual_memory_bytes gauge
    process_virtual_memory_bytes 111042560
    # HELP process_resident_memory_bytes Resident memory size in bytes.
    # TYPE process_resident_memory_bytes gauge
    process_resident_memory_bytes 33910784
    # HELP process_open_fds Number of open file descriptors.
    # TYPE process_open_fds gauge
    process_open_fds 28
    # HELP process_max_fds Maximum number of open file descriptors.
    # TYPE process_max_fds gauge
    process_max_fds 1048576
    # HELP proxy_build_info Proxy build info
    # TYPE proxy_build_info gauge
    proxy_build_info{version="2.213.0",git_sha="9f7e7ac",profile="release",date="2023-11-16T23:24:26Z",vendor="linkerd"} 1

After

    # HELP proxy_build_info Proxy build info.
    # TYPE proxy_build_info gauge
    proxy_build_info{date="2023-12-06T02:15:30Z",git_sha="9c29322d5",profile="release",vendor="code@ver-sea",version="0.0.0-dev.9c29322d5"} 1
    # HELP process_start_time_seconds Time that the process started (in seconds since the UNIX epoch).
    # TYPE process_start_time_seconds gauge
    # UNIT process_start_time_seconds seconds
    process_start_time_seconds 1701829321.4647413
    # HELP process_uptime_seconds Total time since the process started (in seconds)
    # TYPE process_uptime_seconds counter
    # UNIT process_uptime_seconds seconds
    process_uptime_seconds_total 51.986633717
    # HELP process_cpu_seconds Total user and system CPU time spent in seconds
    # TYPE process_cpu_seconds counter
    # UNIT process_cpu_seconds seconds
    process_cpu_seconds_total 0.04
    # HELP process_virtual_memory_bytes Virtual memory size in bytes
    # TYPE process_virtual_memory_bytes gauge
    # UNIT process_virtual_memory_bytes bytes
    process_virtual_memory_bytes 108208128
    # HELP process_resident_memory_bytes Resident memory size in bytes
    # TYPE process_resident_memory_bytes gauge
    # UNIT process_resident_memory_bytes bytes
    process_resident_memory_bytes 27471872
    # HELP process_open_fds Number of open file descriptors
    # TYPE process_open_fds gauge
    process_open_fds 21
    # HELP process_max_fds Maximum number of open file descriptors
    # TYPE process_max_fds gauge
    process_max_fds 1048576
    # HELP process_threads Number of OS threads in the process.
    # TYPE process_threads gauge
    process_threads 2
    # EOF
  • Loading branch information
olix0r authored Dec 6, 2023
1 parent f72cc7f commit 31b2aea
Show file tree
Hide file tree
Showing 16 changed files with 219 additions and 304 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1629,6 +1629,7 @@ dependencies = [
"http",
"hyper",
"linkerd-stack",
"linkerd-system",
"parking_lot",
"prometheus-client",
"quickcheck",
Expand Down
2 changes: 1 addition & 1 deletion linkerd/app/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ linkerd-identity = { path = "../../identity" }
linkerd-idle-cache = { path = "../../idle-cache" }
linkerd-io = { path = "../../io" }
linkerd-meshtls = { path = "../../meshtls", default-features = false }
linkerd-metrics = { path = "../../metrics", features = ["linkerd-stack"] }
linkerd-metrics = { path = "../../metrics", features = ["process", "stack"] }
linkerd-opencensus = { path = "../../opencensus" }
linkerd-proxy-core = { path = "../../proxy/core" }
linkerd-proxy-api-resolve = { path = "../../proxy/api-resolve" }
Expand Down
1 change: 0 additions & 1 deletion linkerd/app/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ pub mod metrics;
pub mod proxy;
pub mod serve;
pub mod svc;
pub mod telemetry;
pub mod transport;

pub use self::build_info::{BuildInfo, BUILD_INFO};
Expand Down
12 changes: 3 additions & 9 deletions linkerd/app/core/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{
classify::Class,
control, http_metrics, opencensus, profiles, stack_metrics,
svc::Param,
telemetry, tls,
tls,
transport::{self, labels::TlsConnect},
};
use linkerd_addr::Addr;
Expand Down Expand Up @@ -146,13 +146,8 @@ where
// === impl Metrics ===

impl Metrics {
pub fn new(
retain_idle: Duration,
start_time: telemetry::StartTime,
) -> (Self, impl FmtMetrics + Clone + Send + 'static) {
let registry = prom::Registry::default();

let process = telemetry::process::Report::new(start_time);
pub fn new(retain_idle: Duration) -> (Self, impl FmtMetrics + Clone + Send + 'static) {
let registry = prom::registry();

registry.write().register(
"proxy_build_info",
Expand Down Expand Up @@ -223,7 +218,6 @@ impl Metrics {
.and_report(transport_report)
.and_report(opencensus_report)
.and_report(stack)
.and_report(process)
// The prom registry reports an "# EOF" at the end of its export, so
// it should be emitted last.
.and_report(registry);
Expand Down
3 changes: 0 additions & 3 deletions linkerd/app/core/src/telemetry.rs

This file was deleted.

205 changes: 0 additions & 205 deletions linkerd/app/core/src/telemetry/process.rs

This file was deleted.

3 changes: 1 addition & 2 deletions linkerd/app/inbound/src/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ pub fn default_config() -> Config {
pub fn runtime() -> (ProxyRuntime, drain::Signal) {
let (drain_tx, drain) = drain::channel();
let (tap, _) = tap::new();
let (metrics, _) =
metrics::Metrics::new(std::time::Duration::from_secs(10), Default::default());
let (metrics, _) = metrics::Metrics::new(std::time::Duration::from_secs(10));
let runtime = ProxyRuntime {
identity: rustls::creds::default_for_test().1.into(),
metrics: metrics.proxy,
Expand Down
9 changes: 1 addition & 8 deletions linkerd/app/integration/src/proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,14 +460,7 @@ async fn run(proxy: Proxy, mut env: TestEnv, random_ports: bool) -> Listening {
let bind_adm = listen::BindTcp::default();
let (shutdown_tx, mut shutdown_rx) = tokio::sync::mpsc::unbounded_channel();
let main = config
.build(
bind_in,
bind_out,
bind_adm,
shutdown_tx,
trace_handle,
Default::default(),
)
.build(bind_in, bind_out, bind_adm, shutdown_tx, trace_handle)
.await
.expect("config");

Expand Down
3 changes: 1 addition & 2 deletions linkerd/app/outbound/src/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ pub(crate) fn default_config() -> Config {
pub(crate) fn runtime() -> (ProxyRuntime, drain::Signal) {
let (drain_tx, drain) = drain::channel();
let (tap, _) = tap::new();
let (metrics, _) =
metrics::Metrics::new(std::time::Duration::from_secs(10), Default::default());
let (metrics, _) = metrics::Metrics::new(std::time::Duration::from_secs(10));
let runtime = ProxyRuntime {
identity: linkerd_meshtls_rustls::creds::default_for_test().1.into(),
metrics: metrics.proxy,
Expand Down
4 changes: 1 addition & 3 deletions linkerd/app/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use linkerd_app_core::{
dns, drain,
metrics::FmtMetrics,
svc::Param,
telemetry,
transport::{addrs::*, listen::Bind},
Error, ProxyRuntime,
};
Expand Down Expand Up @@ -98,7 +97,6 @@ impl Config {
bind_admin: BAdmin,
shutdown_tx: mpsc::UnboundedSender<()>,
log_level: trace::Handle,
start_time: telemetry::StartTime,
) -> Result<App, Error>
where
BIn: Bind<ServerConfig> + 'static,
Expand Down Expand Up @@ -128,7 +126,7 @@ impl Config {
..
} = self;
debug!("building app");
let (metrics, report) = Metrics::new(admin.metrics_retain_idle, start_time);
let (metrics, report) = Metrics::new(admin.metrics_retain_idle);

let dns = dns.build();

Expand Down
3 changes: 3 additions & 0 deletions linkerd/metrics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@ publish = false

[features]
default = []
process = ["linkerd-system"]
stack = ["linkerd-stack"]
test_util = []

[dependencies]
deflate = { version = "1", features = ["gzip"] }
http = "0.2"
hyper = { version = "0.14", features = ["http1", "http2"] }
linkerd-stack = { path = "../stack", optional = true }
linkerd-system = { path = "../system", optional = true }
parking_lot = "0.12"
prometheus-client = "0.22"
tokio = { version = "1", features = ["time"] }
Expand Down
40 changes: 1 addition & 39 deletions linkerd/metrics/src/counter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ impl<F: Factor> FmtMetric for Counter<F> {
#[allow(clippy::float_cmp)]
mod tests {
use super::*;
use crate::{MicrosAsSeconds, MillisAsSeconds, MAX_PRECISE_UINT64};
use crate::MAX_PRECISE_UINT64;

#[test]
fn count_simple() {
Expand Down Expand Up @@ -123,42 +123,4 @@ mod tests {
let max = Counter::<()>::from(MAX_PRECISE_UINT64);
assert_eq!(max.value(), MAX_PRECISE_UINT64 as f64);
}

#[test]
fn millis_as_seconds() {
let c = Counter::<MillisAsSeconds>::from(1);
assert_eq!(c.value(), 0.001);

let c = Counter::<MillisAsSeconds>::from((MAX_PRECISE_UINT64 - 1) * 1000);
assert_eq!(c.value(), (MAX_PRECISE_UINT64 - 1) as f64);
c.add(1000);
assert_eq!(c.value(), MAX_PRECISE_UINT64 as f64);
c.add(1000);
assert_eq!(c.value(), 0.0);
c.add(1000);
assert_eq!(c.value(), 1.0);

let max = Counter::<MillisAsSeconds>::from(MAX_PRECISE_UINT64 * 1000);
assert_eq!(max.value(), MAX_PRECISE_UINT64 as f64);
}

#[test]
fn micros_as_seconds() {
let c = Counter::<MicrosAsSeconds>::from(1);
assert_eq!(c.value(), 0.000_001);
c.add(110);
assert_eq!(c.value(), 0.000_111);

let c = Counter::<MicrosAsSeconds>::from((MAX_PRECISE_UINT64 - 1) * 1000);
assert_eq!(c.value(), (MAX_PRECISE_UINT64 - 1) as f64 * 0.001);
c.add(1_000);
assert_eq!(c.value(), MAX_PRECISE_UINT64 as f64 * 0.001);
c.add(1_000);
assert_eq!(c.value(), 0.0);
c.add(1);
assert_eq!(c.value(), 0.000_001);

let max = Counter::<MicrosAsSeconds>::from(MAX_PRECISE_UINT64 * 1000);
assert_eq!(max.value(), MAX_PRECISE_UINT64 as f64 * 0.001);
}
}
Loading

0 comments on commit 31b2aea

Please sign in to comment.