From faaaa55afcd42608bc1471af182027f1d99b8da0 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 17 Dec 2024 16:50:14 +0100 Subject: [PATCH] ref(metrics): Rework metrics aggregator to keep internal partitions (#4378) Replaces the current metrics aggregator, which works based off fixed priorities in a priority queue with regular intervals, with a ring buffer based aggregator. Overview: - Aggregator is now ring buffer based instead of a priority queue, this minimizes work needed for merging and flushing buckets. - Aggregator now no longer guarantees a minimum delay for backdated buckets but on average still delays enough (for real time buckets there still is an accurate delay). - Aggregator is now driven by flushes and this is how it tracks and advances time. This means all operations (merges and flushes) can be done entirely without accessing system time. - Cost tracking is much more efficient now and tracked for the total and per slot, on flush the slot values are subtracted from the total, which does not require additional iterations and calculations of costs. - Per projects cost limits are only tracked per slot, instead of overall, reducing the necessary book keeping by a lot. - On shutdown the aggregator is replaced with an aggregator with much more aggressive flush behaviour. This massively simplifies the code, still has a good time to flush and overall much better flush behaviour by keeping partitions consistent. - Metric name/tag validation is now a concern of the service instead of the aggregator. - Uses `ahash` with a fixed seed instead of fnv ([it's faster](https://github.com/tkaitchuck/aHash?tab=readme-ov-file#comparison-with-other-hashers)) - Lots of unused metrics have been reworked or modified (from a histogram which only was used for sum+count to two counters) For implementation details see the exhaustive code documentation, especially in the inner aggregator. Fixes: https://github.com/getsentry/team-ingest/issues/606 --- CHANGELOG.md | 1 + Cargo.lock | 2 + relay-config/src/aggregator.rs | 39 +- relay-config/src/config.rs | 58 - relay-metrics/Cargo.toml | 1 + relay-metrics/benches/benches.rs | 11 +- relay-metrics/src/aggregator/buckets.rs | 228 ---- relay-metrics/src/aggregator/config.rs | 87 +- relay-metrics/src/aggregator/inner.rs | 973 +++++++++++++ relay-metrics/src/aggregator/mod.rs | 1205 +++-------------- ...gregator__inner__tests__merge_flush-3.snap | 150 ++ ...gregator__inner__tests__merge_flush-5.snap | 150 ++ ...aggregator__inner__tests__merge_flush.snap | 153 +++ relay-metrics/src/aggregator/stats.rs | 163 +++ relay-metrics/src/lib.rs | 1 + relay-metrics/src/statsd.rs | 86 +- relay-metrics/src/utils.rs | 140 ++ relay-metrics/src/view.rs | 5 +- relay-server/Cargo.toml | 1 + relay-server/src/metrics/metric_stats.rs | 16 +- .../src/services/metrics/aggregator.rs | 398 ++++-- relay-server/src/services/metrics/router.rs | 2 - relay-server/src/services/processor.rs | 42 +- .../src/services/processor/metrics.rs | 8 +- .../src/services/processor/span/processing.rs | 6 +- relay-server/src/statsd.rs | 22 - relay-server/src/utils/managed_envelope.rs | 6 +- tests/integration/test_healthchecks.py | 14 - tests/integration/test_metrics.py | 45 +- tests/integration/test_spans.py | 1 + tests/integration/test_store.py | 20 +- 31 files changed, 2267 insertions(+), 1767 deletions(-) delete mode 100644 relay-metrics/src/aggregator/buckets.rs create mode 100644 relay-metrics/src/aggregator/inner.rs create mode 100644 relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-3.snap create mode 100644 relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-5.snap create mode 100644 relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush.snap create mode 100644 relay-metrics/src/aggregator/stats.rs create mode 100644 relay-metrics/src/utils.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 73f8871c0e..ae7dea95c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ **Internal**: +- Rework metrics aggregator to keep internal partitions. ([#4378](https://github.com/getsentry/relay/pull/4378)) - Remove support for metrics with profile namespace. ([#4391](https://github.com/getsentry/relay/pull/4391)) ## 24.11.2 diff --git a/Cargo.lock b/Cargo.lock index e68391a256..f47bb8f8cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3676,6 +3676,7 @@ dependencies = [ name = "relay-metrics" version = "24.11.2" dependencies = [ + "ahash", "bytecount", "chrono", "criterion", @@ -3889,6 +3890,7 @@ dependencies = [ "axum-server", "backoff", "brotli 6.0.0", + "bytecount", "bytes", "bzip2", "chrono", diff --git a/relay-config/src/aggregator.rs b/relay-config/src/aggregator.rs index 80d34e0121..dd52c7fae3 100644 --- a/relay-config/src/aggregator.rs +++ b/relay-config/src/aggregator.rs @@ -1,7 +1,7 @@ //! Metrics aggregator configuration. use relay_metrics::aggregator::AggregatorConfig; -use relay_metrics::MetricNamespace; +use relay_metrics::{MetricNamespace, UnixTimestamp}; use serde::{Deserialize, Serialize}; /// Parameters used for metric aggregation. @@ -12,27 +12,52 @@ pub struct AggregatorServiceConfig { #[serde(flatten)] pub aggregator: AggregatorConfig, + /// The length the name of a metric is allowed to be. + /// + /// Defaults to `200` bytes. + pub max_name_length: usize, + + /// The length the tag key is allowed to be. + /// + /// Defaults to `200` bytes. + pub max_tag_key_length: usize, + + /// The length the tag value is allowed to be. + /// + /// Defaults to `200` chars. + pub max_tag_value_length: usize, + /// The approximate maximum number of bytes submitted within one flush cycle. /// /// This controls how big flushed batches of buckets get, depending on the number of buckets, /// the cumulative length of their keys, and the number of raw values. Since final serialization - /// adds some additional overhead, this number is approxmate and some safety margin should be + /// adds some additional overhead, this number is approximate and some safety margin should be /// left to hard limits. pub max_flush_bytes: usize, +} - /// The flushing interval in milliseconds that determines how often the aggregator is polled for - /// flushing new buckets. +impl AggregatorServiceConfig { + /// Returns the valid range for metrics timestamps. /// - /// Defaults to `100` milliseconds. - pub flush_interval_ms: u64, + /// Metrics or buckets outside of this range should be discarded. + pub fn timestamp_range(&self) -> std::ops::Range { + let now = UnixTimestamp::now().as_secs(); + let min_timestamp = + UnixTimestamp::from_secs(now.saturating_sub(self.aggregator.max_secs_in_past)); + let max_timestamp = + UnixTimestamp::from_secs(now.saturating_add(self.aggregator.max_secs_in_future)); + min_timestamp..max_timestamp + } } impl Default for AggregatorServiceConfig { fn default() -> Self { Self { aggregator: AggregatorConfig::default(), + max_name_length: 200, + max_tag_key_length: 200, + max_tag_value_length: 200, max_flush_bytes: 5_000_000, // 5 MB - flush_interval_ms: 100, // 100 milliseconds } } } diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index b3577376ae..19779067f7 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -15,7 +15,6 @@ use relay_kafka::{ ConfigError as KafkaConfigError, KafkaConfigParam, KafkaParams, KafkaTopic, TopicAssignment, TopicAssignments, }; -use relay_metrics::aggregator::{AggregatorConfig, FlushBatching}; use relay_metrics::MetricNamespace; use serde::de::{DeserializeOwned, Unexpected, Visitor}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -2461,63 +2460,6 @@ impl Config { &self.values.cogs.relay_resource_id } - /// Creates an [`AggregatorConfig`] that is compatible with every other aggregator. - /// - /// A lossless aggregator can be put in front of any of the configured aggregators without losing data that the configured aggregator would keep. - /// This is useful for pre-aggregating metrics together in a single aggregator instance. - pub fn permissive_aggregator_config(&self) -> AggregatorConfig { - let AggregatorConfig { - mut bucket_interval, - mut max_secs_in_past, - mut max_secs_in_future, - mut max_name_length, - mut max_tag_key_length, - mut max_tag_value_length, - mut max_project_key_bucket_bytes, - mut max_total_bucket_bytes, - .. - } = self.default_aggregator_config().aggregator; - - for secondary_config in self.secondary_aggregator_configs() { - let agg = &secondary_config.config.aggregator; - - bucket_interval = bucket_interval.min(agg.bucket_interval); - max_secs_in_past = max_secs_in_past.max(agg.max_secs_in_past); - max_secs_in_future = max_secs_in_future.max(agg.max_secs_in_future); - max_name_length = max_name_length.max(agg.max_name_length); - max_tag_key_length = max_tag_key_length.max(agg.max_tag_key_length); - max_tag_value_length = max_tag_value_length.max(agg.max_tag_value_length); - max_project_key_bucket_bytes = - max_project_key_bucket_bytes.max(agg.max_project_key_bucket_bytes); - max_total_bucket_bytes = max_total_bucket_bytes.max(agg.max_total_bucket_bytes); - } - - for agg in self - .secondary_aggregator_configs() - .iter() - .map(|sc| &sc.config) - .chain(std::iter::once(self.default_aggregator_config())) - { - if agg.aggregator.bucket_interval % bucket_interval != 0 { - relay_log::error!("buckets don't align"); - } - } - - AggregatorConfig { - bucket_interval, - max_secs_in_past, - max_secs_in_future, - max_name_length, - max_tag_key_length, - max_tag_value_length, - max_project_key_bucket_bytes, - max_total_bucket_bytes, - initial_delay: 30, - flush_partitions: None, - flush_batching: FlushBatching::Project, - } - } - /// Returns configuration for the default metrics aggregator. pub fn default_aggregator_config(&self) -> &AggregatorServiceConfig { &self.values.aggregator diff --git a/relay-metrics/Cargo.toml b/relay-metrics/Cargo.toml index f9785e709d..ad1acdfada 100644 --- a/relay-metrics/Cargo.toml +++ b/relay-metrics/Cargo.toml @@ -16,6 +16,7 @@ workspace = true redis = ["relay-redis/impl"] [dependencies] +ahash = { workspace = true } bytecount = { workspace = true } chrono = { workspace = true } fnv = { workspace = true } diff --git a/relay-metrics/benches/benches.rs b/relay-metrics/benches/benches.rs index 5bdc3a1527..d6f5a04363 100644 --- a/relay-metrics/benches/benches.rs +++ b/relay-metrics/benches/benches.rs @@ -6,13 +6,14 @@ use rand::SeedableRng; use relay_base_schema::project::ProjectKey; use relay_common::time::UnixTimestamp; use relay_metrics::{ - aggregator::{Aggregator, AggregatorConfig, FlushDecision}, + aggregator::{Aggregator, AggregatorConfig}, Bucket, BucketValue, DistributionValue, FiniteF64, }; use std::cell::RefCell; use std::collections::BTreeMap; use std::fmt; use std::ops::Range; +use std::time::SystemTime; struct NumbersGenerator { min: usize, @@ -188,7 +189,7 @@ fn bench_insert_and_flush(c: &mut Criterion) { b.iter_batched( || { let timestamp = UnixTimestamp::now(); - let aggregator: Aggregator = Aggregator::new(config.clone()); + let aggregator = Aggregator::named("default".to_owned(), &config); (aggregator, input.get_buckets(timestamp)) }, |(mut aggregator, buckets)| { @@ -215,7 +216,7 @@ fn bench_insert_and_flush(c: &mut Criterion) { b.iter_batched( || { let timestamp = UnixTimestamp::now(); - let mut aggregator: Aggregator = Aggregator::new(config.clone()); + let mut aggregator = Aggregator::named("default".to_owned(), &config); for (project_key, bucket) in input.get_buckets(timestamp) { aggregator.merge(project_key, bucket).unwrap(); } @@ -224,9 +225,7 @@ fn bench_insert_and_flush(c: &mut Criterion) { |mut aggregator| { // XXX: Ideally we'd want to test the entire try_flush here, but spawning // a service is too much work here. - black_box(aggregator.pop_flush_buckets(black_box(false), |_| { - FlushDecision::Flush(Vec::new()) - })); + let _ = black_box(aggregator.try_flush_next(SystemTime::UNIX_EPOCH)); }, BatchSize::SmallInput, ) diff --git a/relay-metrics/src/aggregator/buckets.rs b/relay-metrics/src/aggregator/buckets.rs deleted file mode 100644 index ac6d06c43a..0000000000 --- a/relay-metrics/src/aggregator/buckets.rs +++ /dev/null @@ -1,228 +0,0 @@ -use std::collections::BTreeMap; -use std::hash::{Hash, Hasher}; - -use fnv::FnvHasher; -use priority_queue::PriorityQueue; -use relay_base_schema::project::ProjectKey; -use relay_common::time::UnixTimestamp; -use tokio::time::Instant; - -use crate::aggregator::{cost, AggregateMetricsError}; -use crate::bucket::BucketValue; -use crate::{BucketMetadata, MetricName, MetricNamespace}; - -/// Holds a collection of buckets and maintains flush order. -#[derive(Debug, Default)] -pub struct Buckets { - queue: PriorityQueue, -} - -impl Buckets { - /// Returns the current amount of buckets stored. - pub fn len(&self) -> usize { - self.queue.len() - } - - /// Merge a new bucket into the existing collection of buckets. - pub fn merge( - &mut self, - key: BucketKey, - value: T, - merge: impl FnOnce(&BucketKey, T, &mut QueuedBucket) -> Result, - create: impl FnOnce(&BucketKey, T) -> QueuedBucket, - ) -> Result { - let mut added_cost = 0; - - // Need to do a bunch of shenanigans to work around the limitations of the priority queue - // API. There is no `entry` like API which would allow us to return errors on update - // as well as make the compiler recognize control flow to recognize that `bucket` - // is only used once. - // - // The following code never panics and prefers `expect` over a silent `if let Some(..)` - // to prevent errors in future refactors. - let mut value = Some(value); - let mut error = None; - - let updated = self.queue.change_priority_by(&key, |existing| { - let value = value.take().expect("expected bucket to not be used"); - - match merge(&key, value, existing) { - Ok(cost) => added_cost = cost, - Err(err) => error = Some(err), - } - }); - - if let Some(error) = error { - return Err(error); - } - - if !updated { - let value = create( - &key, - value.expect("expected bucket not to be used when the value was not updated"), - ); - - added_cost = key.cost() + value.value.cost(); - self.queue.push(key, value); - } - - Ok(added_cost) - } - - /// Tries to pop the next element bucket to be flushed. - /// - /// Returns `None` if there are no more metric buckets or - /// if the passed filter `f` returns `False`. - pub fn try_pop( - &mut self, - f: impl FnOnce(&BucketKey, &QueuedBucket) -> bool, - ) -> Option<(BucketKey, QueuedBucket)> { - let (key, value) = self.queue.peek()?; - if !f(key, value) { - return None; - } - self.queue.pop() - } - - /// Inserts an entry back into the collection. - /// - /// The intended use is to return bucket after [`Self::try_pop`]. - /// - /// Note: this should only be used to return values back to the queue - /// after removing them, otherwise use [`Self::merge`]. - pub fn re_add(&mut self, key: BucketKey, value: QueuedBucket) { - let _old = self.queue.push(key, value); - debug_assert!(_old.is_none()); - } -} - -impl IntoIterator for Buckets { - type Item = (BucketKey, QueuedBucket); - type IntoIter = priority_queue::core_iterators::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.queue.into_iter() - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct BucketKey { - pub project_key: ProjectKey, - pub timestamp: UnixTimestamp, - pub metric_name: MetricName, - pub tags: BTreeMap, - pub extracted_from_indexed: bool, -} - -impl BucketKey { - /// Creates a 64-bit hash of the bucket key using FnvHasher. - /// - /// This is used for partition key computation and statsd logging. - pub fn hash64(&self) -> u64 { - let mut hasher = FnvHasher::default(); - std::hash::Hash::hash(self, &mut hasher); - hasher.finish() - } - - /// Estimates the number of bytes needed to encode the bucket key. - /// - /// Note that this does not necessarily match the exact memory footprint of the key, - /// because data structures have a memory overhead. - pub fn cost(&self) -> usize { - std::mem::size_of::() + self.metric_name.len() + cost::tags_cost(&self.tags) - } - - /// Returns the namespace of this bucket. - pub fn namespace(&self) -> MetricNamespace { - self.metric_name.namespace() - } - - /// Computes a stable partitioning key for this [`crate::Bucket`]. - /// - /// The partitioning key is inherently producing collisions, since the output of the hasher is - /// reduced into an interval of size `partitions`. This means that buckets with totally - /// different values might end up in the same partition. - /// - /// The role of partitioning is to let Relays forward the same metric to the same upstream - /// instance with the goal of increasing bucketing efficiency. - pub fn partition_key(&self, partitions: u64) -> u64 { - let key = (self.project_key, &self.metric_name, &self.tags); - - let mut hasher = fnv::FnvHasher::default(); - key.hash(&mut hasher); - - let partitions = partitions.max(1); - hasher.finish() % partitions - } -} - -/// Bucket in the [`crate::aggregator::Aggregator`] with a defined flush time. -/// -/// This type implements an inverted total ordering. The maximum queued bucket has the lowest flush -/// time, which is suitable for using it in a [`BinaryHeap`]. -/// -/// [`BinaryHeap`]: std::collections::BinaryHeap -#[derive(Debug)] -pub struct QueuedBucket { - pub flush_at: Instant, - pub value: BucketValue, - pub metadata: BucketMetadata, -} - -impl QueuedBucket { - /// Creates a new `QueuedBucket` with a given flush time. - pub fn new(flush_at: Instant, value: BucketValue, metadata: BucketMetadata) -> Self { - Self { - flush_at, - value, - metadata, - } - } - - /// Returns `true` if the flush time has elapsed. - pub fn elapsed(&self, now: Instant) -> bool { - now > self.flush_at - } - - /// Merges a bucket into the current queued bucket. - /// - /// Returns the value cost increase on success, - /// otherwise returns an error if the passed bucket value type does not match - /// the contained type. - pub fn merge( - &mut self, - value: BucketValue, - metadata: BucketMetadata, - ) -> Result { - let cost_before = self.value.cost(); - - self.value - .merge(value) - .map_err(|_| AggregateMetricsError::InvalidTypes)?; - self.metadata.merge(metadata); - - Ok(self.value.cost().saturating_sub(cost_before)) - } -} - -impl PartialEq for QueuedBucket { - fn eq(&self, other: &Self) -> bool { - self.flush_at.eq(&other.flush_at) - } -} - -impl Eq for QueuedBucket {} - -impl PartialOrd for QueuedBucket { - fn partial_cmp(&self, other: &Self) -> Option { - // Comparing order is reversed to convert the max heap into a min heap - Some(other.flush_at.cmp(&self.flush_at)) - } -} - -impl Ord for QueuedBucket { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - // Comparing order is reversed to convert the max heap into a min heap - other.flush_at.cmp(&self.flush_at) - } -} diff --git a/relay-metrics/src/aggregator/config.rs b/relay-metrics/src/aggregator/config.rs index 78c9564b73..0788fba271 100644 --- a/relay-metrics/src/aggregator/config.rs +++ b/relay-metrics/src/aggregator/config.rs @@ -1,6 +1,3 @@ -use std::time::Duration; - -use relay_common::time::UnixTimestamp; use serde::{Deserialize, Serialize}; /// Configuration value for [`AggregatorConfig::flush_batching`]. @@ -43,15 +40,21 @@ pub struct AggregatorConfig { /// /// Defaults to `10` seconds. Every metric is sorted into a bucket of this size based on its /// timestamp. This defines the minimum granularity with which metrics can be queried later. - pub bucket_interval: u64, + pub bucket_interval: u32, + + /// The aggregator size. + /// + /// This determines how many individual bucket intervals, are stored in the aggregator. + /// Increasing this number will add additional delay for backdated and future buckets. + /// + /// Defaults to: 1. + pub aggregator_size: u32, /// The initial delay in seconds to wait before flushing a bucket. /// /// Defaults to `30` seconds. Before sending an aggregated bucket, this is the time Relay waits /// for buckets that are being reported in real time. - /// - /// Relay applies up to a full `bucket_interval` of additional jitter after the initial delay to spread out flushing real time buckets. - pub initial_delay: u64, + pub initial_delay: u32, /// The age in seconds of the oldest allowed bucket timestamp. /// @@ -63,28 +66,13 @@ pub struct AggregatorConfig { /// Defaults to 1 minute. pub max_secs_in_future: u64, - /// The length the name of a metric is allowed to be. - /// - /// Defaults to `200` bytes. - pub max_name_length: usize, - - /// The length the tag key is allowed to be. - /// - /// Defaults to `200` bytes. - pub max_tag_key_length: usize, - - /// The length the tag value is allowed to be. - /// - /// Defaults to `200` chars. - pub max_tag_value_length: usize, - /// Maximum amount of bytes used for metrics aggregation per project key. /// /// Similar measuring technique to [`Self::max_total_bucket_bytes`], but instead of a /// global/process-wide limit, it is enforced per project key. /// /// Defaults to `None`, i.e. no limit. - pub max_project_key_bucket_bytes: Option, + pub max_project_key_bucket_bytes: Option, /// Maximum amount of bytes used for metrics aggregation. /// @@ -93,15 +81,10 @@ pub struct AggregatorConfig { /// in hashmaps. /// /// Defaults to `None`, i.e. no limit. - pub max_total_bucket_bytes: Option, + pub max_total_bucket_bytes: Option, /// The number of logical partitions that can receive flushed buckets. - /// - /// If set, buckets are partitioned by (bucket key % flush_partitions), and routed - /// by setting the header `X-Sentry-Relay-Shard`. - /// - /// This setting will take effect only when paired with [`FlushBatching::Partition`]. - pub flush_partitions: Option, + pub flush_partitions: Option, /// The batching mode for the flushing of the aggregator. /// @@ -115,54 +98,14 @@ pub struct AggregatorConfig { pub flush_batching: FlushBatching, } -impl AggregatorConfig { - /// Returns the time width buckets. - pub fn bucket_interval(&self) -> Duration { - Duration::from_secs(self.bucket_interval) - } - - /// Returns the initial flush delay after the end of a bucket's original time window. - pub fn initial_delay(&self) -> Duration { - Duration::from_secs(self.initial_delay) - } - - /// Determines the target bucket for an incoming bucket timestamp and bucket width. - /// - /// We select the output bucket which overlaps with the center of the incoming bucket. - /// Fails if timestamp is too old or too far into the future. - pub fn get_bucket_timestamp( - &self, - timestamp: UnixTimestamp, - bucket_width: u64, - ) -> UnixTimestamp { - // Find middle of the input bucket to select a target - let ts = timestamp.as_secs().saturating_add(bucket_width / 2); - // Align target_timestamp to output bucket width - let ts = (ts / self.bucket_interval) * self.bucket_interval; - UnixTimestamp::from_secs(ts) - } - - /// Returns the valid range for metrics timestamps. - /// - /// Metrics or buckets outside of this range should be discarded. - pub fn timestamp_range(&self) -> std::ops::Range { - let now = UnixTimestamp::now().as_secs(); - let min_timestamp = UnixTimestamp::from_secs(now.saturating_sub(self.max_secs_in_past)); - let max_timestamp = UnixTimestamp::from_secs(now.saturating_add(self.max_secs_in_future)); - min_timestamp..max_timestamp - } -} - impl Default for AggregatorConfig { fn default() -> Self { Self { bucket_interval: 10, + aggregator_size: 1, initial_delay: 30, - max_secs_in_past: 5 * 24 * 60 * 60, // 5 days, as for sessions + max_secs_in_past: 5 * 24 * 60 * 60, // 5 days max_secs_in_future: 60, // 1 minute - max_name_length: 200, - max_tag_key_length: 200, - max_tag_value_length: 200, max_project_key_bucket_bytes: None, max_total_bucket_bytes: None, flush_batching: FlushBatching::default(), diff --git a/relay-metrics/src/aggregator/inner.rs b/relay-metrics/src/aggregator/inner.rs new file mode 100644 index 0000000000..c266449ff9 --- /dev/null +++ b/relay-metrics/src/aggregator/inner.rs @@ -0,0 +1,973 @@ +use core::fmt; +use std::collections::{BTreeMap, VecDeque}; +use std::mem; +use std::time::Duration; + +use ahash::RandomState; +use hashbrown::hash_map::Entry; +use hashbrown::HashMap; +use relay_base_schema::metrics::MetricName; +use relay_base_schema::project::ProjectKey; +use relay_common::time::UnixTimestamp; + +use crate::aggregator::stats; +use crate::aggregator::{AggregateMetricsError, FlushBatching}; +use crate::utils::ByNamespace; +use crate::{BucketMetadata, BucketValue, DistributionType, SetType}; + +#[derive(Default)] +pub struct Partition { + pub partition_key: u32, + pub buckets: HashMap, + pub stats: PartitionStats, +} + +impl fmt::Debug for Partition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[cfg(test)] + let buckets = &self.buckets.iter().collect::>(); + #[cfg(not(test))] + let buckets = &self.buckets; + + f.debug_struct("Partition") + .field("partition_key", &self.partition_key) + .field("stats", &self.stats) + .field("buckets", buckets) + .finish() + } +} + +#[derive(Default, Debug)] +pub struct PartitionStats { + /// Amount of unique buckets in the partition. + #[expect(unused, reason = "used for snapshot tests")] + pub count: u64, + /// Amount of unique buckets in the partition by namespace. + pub count_by_namespace: ByNamespace, + /// Amount of times a bucket was merged in the partition. + #[expect(unused, reason = "used for snapshot tests")] + pub merges: u64, + /// Amount of times a bucket was merged in the partition by namespace. + pub merges_by_namespace: ByNamespace, + /// Cost of buckets in the partition. + #[expect(unused, reason = "used for snapshot tests")] + pub cost: u64, + /// Cost of buckets in the partition by namespace. + pub cost_by_namespace: ByNamespace, +} + +impl From<&stats::Slot> for PartitionStats { + fn from(value: &stats::Slot) -> Self { + Self { + count: value.count, + count_by_namespace: value.count_by_namespace, + merges: value.merges, + merges_by_namespace: value.merges_by_namespace, + cost: value.cost, + cost_by_namespace: value.cost_by_namespace, + } + } +} + +#[derive(Default, Debug)] +pub struct Stats { + /// Total amount of buckets in the aggregator. + #[expect(unused, reason = "used for snapshot tests")] + pub count: u64, + /// Total amount of buckets in the aggregator by namespace. + pub count_by_namespace: ByNamespace, + /// Total bucket cost in the aggregator. + #[expect(unused, reason = "used for snapshot tests")] + pub cost: u64, + /// Total bucket cost in the aggregator by namespace. + pub cost_by_namespace: ByNamespace, +} + +#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct BucketKey { + pub project_key: ProjectKey, + pub timestamp: UnixTimestamp, + pub metric_name: MetricName, + pub tags: BTreeMap, + pub extracted_from_indexed: bool, +} + +impl BucketKey { + /// Estimates the number of bytes needed to encode the bucket key. + /// + /// Note that this does not necessarily match the exact memory footprint of the key, + /// because data structures have a memory overhead. + pub fn cost(&self) -> usize { + std::mem::size_of::() + self.metric_name.len() + crate::utils::tags_cost(&self.tags) + } +} + +impl fmt::Debug for BucketKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}-{}-{}", + self.timestamp, self.project_key, self.metric_name + ) + } +} + +pub struct BucketData { + pub value: BucketValue, + pub metadata: BucketMetadata, +} + +impl fmt::Debug for BucketData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.value.fmt(f) + } +} + +impl BucketData { + /// Merges another bucket's data into this one. + /// + /// Returns the value cost increase on success. + fn merge(&mut self, other: Self) -> Result { + let cost_before = self.value.cost(); + + self.value + .merge(other.value) + .map_err(|_| AggregateMetricsError::InvalidTypes)?; + self.metadata.merge(other.metadata); + + Ok(self.value.cost().saturating_sub(cost_before)) + } +} + +/// Config used to create a [`Inner`] instance. +#[derive(Debug)] +pub struct Config { + /// Initial position/time of the aggregator. + /// + /// Except in testing, this should always be [`UnixTimestamp::now`]. + pub start: UnixTimestamp, + /// Size of each individual bucket, inputs are truncated to this value. + pub bucket_interval: u32, + /// The amount of time slots to keep track of in the aggregator. + /// + /// The size of a time slot is defined by [`Self::bucket_interval`]. + pub num_time_slots: u32, + /// The amount of partitions per time slot. + pub num_partitions: u32, + /// Delay how long a bucket should remain in the aggregator before being flushed. + /// + /// Ideally the delay is a multiple of [`Self::bucket_interval`]. + pub delay: u32, + /// Maximum amount of bytes the aggregator can grow to. + pub max_total_bucket_bytes: Option, + /// Maximum amount of bytes the aggregator allows per project key. + pub max_project_key_bucket_bytes: Option, + /// The age in seconds of the oldest allowed bucket timestamp. + pub max_secs_in_past: Option, + /// The time in seconds that a timestamp may be in the future. + pub max_secs_in_future: Option, +} + +/// A metrics aggregator. +/// +/// The aggregator is unaware of current time and needs to be driven by periodic flushes using +/// [`Self::flush_next`]. Each flush advances the internal clock by the configured +/// [`Config::bucket_interval`]. +/// +/// The internal time is set on construction to [`Config::start`]. +/// +/// Use [`Self::next_flush_at`] to determine the time when to call [`Self::flush_next`]. +pub struct Inner { + /// Ring-buffer of aggregation slots. + /// + /// This is treated as a ring-buffer of a two dimensional array. The first dimension is a + /// "time slot" and the second dimension is the assigned partition. + /// + /// The total length of the ring-buffer is then determined by the amount of time slots times + /// the amount of partitions. In other words, every time slot has [`Self::num_partitions`] + /// partitions. + /// + /// Layout: + /// Time slots: [ ][ ][ ] + /// Partitions: [ ][ ] [ ][ ] [ ][ ] + /// + /// An item is assigned by first determining its time slot and then assigning it to a partition + /// based on selected [`Self::partition_by`] strategy. + /// + /// The first item in the buffer is tracked by [`Self::head`] which is at any time the + /// current partition since the beginning "zero". The beginning in the aggregator refers to the + /// beginning of the epoch. The [`Self::head`] at time `t` is calculated with + /// `f(t) = t / bucket_interval * num_partitions`. + /// + /// Flushing a partition advances the [`Self::head`] by a single value `+1`. Meaning + /// effectively time is advanced by `bucket_interval / num_partitions`. + slots: VecDeque, + /// The amount of partitions per time slot. + num_partitions: u64, + + /// Position of the first element in [`Self::slots`]. + head: u64, + + /// Size of each individual bucket, inputs are truncated modulo to this value. + bucket_interval: u64, + /// Amount of slots which is added to a bucket as a delay. + /// + /// This is a fixed delay which is added to to the time returned by [`Self::next_flush_at`]. + delay: u64, + + /// Total stats of the aggregator. + stats: stats::Total, + /// Configured limits based on aggregator stats. + limits: stats::Limits, + + /// The maximum amount of slots (size of a `bucket_interval`) the timestamp is allowed to be + /// in the past or future. + slot_range: RelativeRange, + + /// Determines how partitions are assigned based on the input bucket. + partition_by: FlushBatching, + /// Hasher used to calculate partitions. + hasher: ahash::RandomState, +} + +impl Inner { + pub fn new(config: Config) -> Self { + let bucket_interval = config.bucket_interval.max(1); + // Extend the configured time slots with the delay (in slots), to make sure there is + // enough space to satisfy the delay. + // + // We cannot just reserve space for enough partitions to satisfy the delay, because we have + // no control over which partition a bucket is assigned to, so we have to prepare for the + // 'worst' case, and that is full extra time slots. + let num_time_slots = config.num_time_slots.max(1) + config.delay.div_ceil(bucket_interval); + let num_partitions = config.num_partitions.max(1); + + let mut slots = Vec::with_capacity((num_time_slots * num_partitions) as usize); + for _ in 0..num_time_slots { + for partition_key in 0..num_partitions { + slots.push(Slot { + partition_key, + ..Default::default() + }); + } + } + + let bucket_interval = u64::from(bucket_interval); + let num_partitions = u64::from(num_partitions); + + let slot_diff = RelativeRange { + max_in_past: config + .max_secs_in_past + .map_or(u64::MAX, |v| v.div_ceil(bucket_interval)), + max_in_future: config + .max_secs_in_future + .map_or(u64::MAX, |v| v.div_ceil(bucket_interval)), + }; + + let total_slots = slots.len(); + Self { + slots: VecDeque::from(slots), + num_partitions, + head: config.start.as_secs() / bucket_interval * num_partitions, + bucket_interval, + delay: u64::from(config.delay), + stats: stats::Total::default(), + limits: stats::Limits { + max_total: config.max_total_bucket_bytes.unwrap_or(u64::MAX), + // Break down the maximum project cost to a maximum cost per partition. + max_partition_project: config + .max_project_key_bucket_bytes + .map(|c| c.div_ceil(total_slots as u64)) + .unwrap_or(u64::MAX), + }, + slot_range: slot_diff, + partition_by: FlushBatching::Partition, + hasher: build_hasher(), + } + } + + /// Returns the configured bucket interval. + pub fn bucket_interval(&self) -> u64 { + self.bucket_interval + } + + /// Returns total aggregator stats. + pub fn stats(&self) -> Stats { + Stats { + count: self.stats.count, + count_by_namespace: self.stats.count_by_namespace, + cost: self.stats.count, + cost_by_namespace: self.stats.cost_by_namespace, + } + } + + /// Returns `true` if the aggregator contains any metric buckets. + pub fn is_empty(&self) -> bool { + self.stats.count == 0 + } + + /// Returns the time as a duration since epoch when the next flush is supposed to happen. + pub fn next_flush_at(&self) -> Duration { + // `head + 1` to get the end time of the slot not the start, convert `head` to a duration + // first, to have enough precision for the division. + // + // Casts do not wrap, configuration requires them to be `u32`. + let offset = Duration::from_secs(self.head + 1) / self.num_partitions as u32 + * self.bucket_interval as u32; + offset + Duration::from_secs(self.delay) + } + + /// Merges a metric bucket. + pub fn merge( + &mut self, + mut key: BucketKey, + value: BucketData, + ) -> Result<(), AggregateMetricsError> { + let project_key = key.project_key; + let namespace = key.metric_name.namespace(); + + let time_slot = key.timestamp.as_secs() / self.bucket_interval; + // Make sure the timestamp is normalized to the correct interval as well. + key.timestamp = UnixTimestamp::from_secs(time_slot * self.bucket_interval); + + let now_slot = self.head / self.num_partitions; + if !self.slot_range.contains(now_slot, time_slot) { + return Err(AggregateMetricsError::InvalidTimestamp(key.timestamp)); + } + + let assigned_partition = match self.partition_by { + FlushBatching::None => 0, + FlushBatching::Project => self.hasher.hash_one(key.project_key), + FlushBatching::Bucket => self.hasher.hash_one(&key), + FlushBatching::Partition => { + self.hasher + .hash_one((key.project_key, &key.metric_name, &key.tags)) + } + } % self.num_partitions; + + // Calculate the slot of the bucket based on it's time and shift it by its assigned partition. + let slot = time_slot * self.num_partitions + assigned_partition; + // Transform the slot to an offset/index into the ring-buffer, by calculating: + // `(slot - self.head).rem_euclid(self.slots.len())`. + let index = sub_rem_euclid(slot, self.head, self.slots.len() as u64); + + let slot = self + .slots + .get_mut(index as usize) + .expect("index should always be a valid slot index"); + + debug_assert_eq!( + u64::from(slot.partition_key), + assigned_partition, + "assigned partition does not match selected partition" + ); + + let key_cost = key.cost() as u64; + match slot.buckets.entry(key) { + Entry::Occupied(occupied_entry) => { + let estimated_cost = match &value.value { + // Counters and Gauges aggregate without additional costs. + BucketValue::Counter(_) | BucketValue::Gauge(_) => 0, + // Distributions are an accurate estimation, all values will be added. + BucketValue::Distribution(d) => d.len() * mem::size_of::(), + // Sets are an upper bound. + BucketValue::Set(s) => s.len() * mem::size_of::(), + }; + + // Reserve for the upper bound of the value. + let reservation = slot.stats.reserve( + &mut self.stats, + project_key, + namespace, + estimated_cost as u64, + &self.limits, + )?; + + let actual_cost = occupied_entry.into_mut().merge(value)?; + + // Track the actual cost increase, not just the reservation. + reservation.consume_with(actual_cost as u64); + slot.stats.incr_merges(namespace); + } + Entry::Vacant(vacant_entry) => { + let reservation = slot.stats.reserve( + &mut self.stats, + project_key, + namespace, + key_cost + value.value.cost() as u64, + &self.limits, + )?; + + vacant_entry.insert(value); + + reservation.consume(); + slot.stats.incr_count(&mut self.stats, namespace); + } + }; + + debug_assert_eq!(slot.stats.count, slot.buckets.len() as u64); + + Ok(()) + } + + /// FLushes the next partition and advances time. + pub fn flush_next(&mut self) -> Partition { + let mut slot @ Slot { partition_key, .. } = self + .slots + .pop_front() + .expect("there should always be at least one partition"); + + let stats = PartitionStats::from(&slot.stats); + + // Remove the partition cost from the total cost and reset the partition cost. + self.stats.remove_slot(&slot.stats); + slot.stats.reset(); + + // Create a new and empty slot with a similar capacity as the original one, + // but still shrinking a little bit so that over time we free up memory again + // without re-allocating too many times. + // + // Also re-use the original hasher, no need to create a new/expensive one. + self.slots.push_back(Slot { + buckets: HashMap::with_capacity_and_hasher( + slot.buckets.len(), + slot.buckets.hasher().clone(), + ), + ..slot + }); + + // Increment to the next partition. + // + // Effectively advance time by `bucket_interval / num_partitions`. + self.head += 1; + + Partition { + partition_key, + buckets: slot.buckets, + stats, + } + } + + /// Consumes the aggregator and returns an iterator over all contained partitions. + pub fn into_partitions(self) -> impl Iterator { + self.slots.into_iter().map(|slot| Partition { + partition_key: slot.partition_key, + buckets: slot.buckets, + stats: PartitionStats::from(&slot.stats), + }) + } +} + +impl fmt::Debug for Inner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut list = f.debug_list(); + list.entry(&self.stats); + for (i, v) in self.slots.iter().enumerate() { + let head_partitions = self.head % self.num_partitions; + let head_time = self.head / self.num_partitions; + + let time_offset = (head_partitions + i as u64) / self.num_partitions; + let time = (head_time + time_offset) * self.bucket_interval; + + match v.is_empty() { + // Make the output shorter with a string until `entry_with` is stable. + true => list.entry(&format!("({time}, {v:?})")), + false => list.entry(&(time, v)), + }; + } + list.finish() + } +} + +#[derive(Default)] +struct Slot { + pub partition_key: u32, + pub stats: stats::Slot, + pub buckets: HashMap, +} + +impl Slot { + fn is_empty(&self) -> bool { + self.stats == Default::default() && self.buckets.is_empty() + } +} + +impl fmt::Debug for Slot { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_empty() { + write!(f, "Slot({})", self.partition_key) + } else { + #[cfg(test)] + let buckets = &self.buckets.iter().collect::>(); + #[cfg(not(test))] + let buckets = &self.buckets; + + f.debug_struct("Slot") + .field("partition_key", &self.partition_key) + .field("stats", &self.stats) + .field("buckets", buckets) + .finish() + } + } +} + +struct RelativeRange { + max_in_past: u64, + max_in_future: u64, +} + +impl RelativeRange { + fn contains(&self, now: u64, target: u64) -> bool { + if target < now { + // Timestamp/target in the past. + let diff = now - target; + diff <= self.max_in_past + } else { + // Timestamp/target in the future. + let diff = target - now; + diff <= self.max_in_future + } + } +} + +/// Calculates `(a - b).rem_euclid(mod)` for `u64` values. +/// +/// Since `a - b` can be negative, you naively need to temporarily change the number space from +/// unsigned to signed and after the modulo back to unsigned. +/// +/// This function instead operates purely with unsigned arithmetic and makes sure the subtraction +/// is always positive. +fn sub_rem_euclid(a: u64, b: u64, m: u64) -> u64 { + ( + // Shift a by `m`: `[m, inf)`. + (a + m) + - + // Modulo b: `[0, m)`. + (b % m) + ) % m +} + +fn build_hasher() -> RandomState { + // A fixed, consistent seed across all instances of Relay. + const K0: u64 = 0x06459b7d5da84ed8; + const K1: u64 = 0x3321ce2636c567cc; + const K2: u64 = 0x56c94d7107c49765; + const K3: u64 = 0x685bf5f9abbea5ab; + + ahash::RandomState::with_seeds(K0, K1, K2, K3) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn bucket_key(ts: u64, name: &str) -> BucketKey { + BucketKey { + project_key: ProjectKey::parse("00000000000000000000000000000000").unwrap(), + timestamp: UnixTimestamp::from_secs(ts), + metric_name: name.into(), + tags: Default::default(), + extracted_from_indexed: false, + } + } + + fn counter(value: f64) -> BucketData { + BucketData { + value: BucketValue::counter(value.try_into().unwrap()), + metadata: Default::default(), + } + } + + #[test] + fn test_merge_flush() -> Result<(), AggregateMetricsError> { + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 6, + num_partitions: 2, + delay: 0, + max_secs_in_past: None, + max_secs_in_future: None, + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + start: UnixTimestamp::from_secs(70), + }); + + // Within the time range. + buckets.merge(bucket_key(70, "a"), counter(1.0))?; + buckets.merge(bucket_key(80, "b"), counter(1.0))?; + buckets.merge(bucket_key(80, "b"), counter(2.0))?; + // Too early. + buckets.merge(bucket_key(32, "c"), counter(1.0))?; + buckets.merge(bucket_key(42, "d"), counter(1.0))?; + // Too late. + buckets.merge(bucket_key(171, "e"), counter(1.0))?; + buckets.merge(bucket_key(181, "f"), counter(1.0))?; + buckets.merge(bucket_key(191, "a"), counter(1.0))?; + + insta::assert_debug_snapshot!(buckets); + + let partition = buckets.flush_next(); + insta::assert_debug_snapshot!(partition, @r###" + Partition { + partition_key: 0, + stats: PartitionStats { + count: 2, + count_by_namespace: (unsupported:2), + merges: 0, + merges_by_namespace: (0), + cost: 274, + cost_by_namespace: (unsupported:274), + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 1.0, + ), + 190-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + } + "###); + + // This was just flushed and now is supposed to be at the end. + buckets.merge(bucket_key(70, "a"), counter(1.0))?; + + insta::assert_debug_snapshot!(buckets); + + let partition = buckets.flush_next(); + insta::assert_debug_snapshot!(partition, @r###" + Partition { + partition_key: 1, + stats: PartitionStats { + count: 0, + count_by_namespace: (0), + merges: 0, + merges_by_namespace: (0), + cost: 0, + cost_by_namespace: (0), + }, + buckets: {}, + } + "###); + + insta::assert_debug_snapshot!(buckets); + + insta::assert_debug_snapshot!(buckets.stats(), @r###" + Stats { + count: 6, + count_by_namespace: (unsupported:6), + cost: 6, + cost_by_namespace: (unsupported:822), + } + "###); + + Ok(()) + } + + #[test] + fn test_merge_flush_cost_limits() -> Result<(), AggregateMetricsError> { + const ONE_BUCKET_COST: u64 = 137; + + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 3, + num_partitions: 1, + delay: 0, + max_secs_in_past: None, + max_secs_in_future: None, + max_total_bucket_bytes: Some(ONE_BUCKET_COST * 2), + // Enough for one bucket per partition. + max_project_key_bucket_bytes: Some(ONE_BUCKET_COST * 3), + start: UnixTimestamp::from_secs(70), + }); + + buckets.merge(bucket_key(70, "a"), counter(1.0))?; + // Adding a new bucket exceeds the cost. + assert_eq!( + buckets + .merge(bucket_key(70, "b"), counter(1.0)) + .unwrap_err(), + AggregateMetricsError::ProjectLimitExceeded + ); + // Merging a counter bucket is fine. + buckets.merge(bucket_key(70, "a"), counter(2.0))?; + + // There is still room in the total budget and for a different project. + let other_project = BucketKey { + project_key: ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(), + ..bucket_key(70, "a") + }; + buckets.merge(other_project, counter(3.0))?; + + // Add a bucket to a different partition, but the total limit is exceeded. + assert_eq!( + buckets + .merge(bucket_key(80, "c"), counter(1.0)) + .unwrap_err(), + AggregateMetricsError::TotalLimitExceeded + ); + // Flush some data and make space. + insta::assert_debug_snapshot!(buckets.flush_next(), @r###" + Partition { + partition_key: 0, + stats: PartitionStats { + count: 2, + count_by_namespace: (unsupported:2), + merges: 1, + merges_by_namespace: (unsupported:1), + cost: 274, + cost_by_namespace: (unsupported:274), + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 3.0, + ), + 70-bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-a: Counter( + 3.0, + ), + }, + } + "###); + buckets.merge(bucket_key(80, "c"), counter(1.0))?; + + insta::assert_debug_snapshot!(buckets, @r###" + [ + Total { + count: 1, + count_by_namespace: (unsupported:1), + cost: 137, + cost_by_namespace: (unsupported:137), + }, + ( + 80, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 80-00000000000000000000000000000000-c: Counter( + 1.0, + ), + }, + }, + ), + "(90, Slot(0))", + "(100, Slot(0))", + ] + "###); + + insta::assert_debug_snapshot!(buckets.stats(), @r###" + Stats { + count: 1, + count_by_namespace: (unsupported:1), + cost: 1, + cost_by_namespace: (unsupported:137), + } + "###); + + Ok(()) + } + + #[test] + fn test_merge_flush_with_delay() { + let mut buckets = Inner::new(Config { + // 20 seconds. + bucket_interval: 20, + // Slots for 1 minute. + num_time_slots: 3, + // A partition for every 10 seconds. + num_partitions: 2, + // 20 second delay -> 1 extra time slot. + delay: 20, + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + max_secs_in_past: None, + max_secs_in_future: None, + // Truncated to 60 seconds. + start: UnixTimestamp::from_secs(63), + }); + + // Add a bucket now -> should be flushed 30 seconds in the future. + buckets.merge(bucket_key(60, "a"), counter(1.0)).unwrap(); + // Add a bucket 1 minute into the future, this one should still have a delay. + buckets.merge(bucket_key(120, "b"), counter(2.0)).unwrap(); + + // First flush is in 20 seconds + 10 seconds for the end of the partition. + assert_eq!(buckets.next_flush_at(), Duration::from_secs(90)); + insta::assert_debug_snapshot!(buckets.flush_next(), @r###" + Partition { + partition_key: 0, + stats: PartitionStats { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + }, + buckets: { + 60-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + } + "###); + assert!(buckets.flush_next().buckets.is_empty()); + + // We're now at second 100s. + assert_eq!(buckets.next_flush_at(), Duration::from_secs(110)); + assert!(buckets.flush_next().buckets.is_empty()); + assert!(buckets.flush_next().buckets.is_empty()); + + // We're now at second 120s. + assert_eq!(buckets.next_flush_at(), Duration::from_secs(130)); + assert!(buckets.flush_next().buckets.is_empty()); + assert!(buckets.flush_next().buckets.is_empty()); + + // We're now at second 140s -> our second bucket is ready (120s + 20s delay). + assert_eq!(buckets.next_flush_at(), Duration::from_secs(150)); + insta::assert_debug_snapshot!(buckets.flush_next(), @r###" + Partition { + partition_key: 0, + stats: PartitionStats { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + }, + buckets: { + 120-00000000000000000000000000000000-b: Counter( + 2.0, + ), + }, + } + "###); + assert!(buckets.flush_next().buckets.is_empty()); + + // We're now at 160s. + assert_eq!(buckets.next_flush_at(), Duration::from_secs(170)); + } + + #[test] + fn test_next_flush() { + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 6, + num_partitions: 2, + delay: 0, + max_secs_in_past: None, + max_secs_in_future: None, + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + start: UnixTimestamp::from_secs(70), + }); + + assert_eq!(buckets.next_flush_at(), Duration::from_secs(75)); + assert_eq!(buckets.flush_next().partition_key, 0); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(80)); + assert_eq!(buckets.flush_next().partition_key, 1); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(85)); + assert_eq!(buckets.flush_next().partition_key, 0); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(90)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(90)); + } + + #[test] + fn test_next_flush_with_delay() { + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 6, + num_partitions: 2, + delay: 3, + max_secs_in_past: None, + max_secs_in_future: None, + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + start: UnixTimestamp::from_secs(70), + }); + + assert_eq!(buckets.next_flush_at(), Duration::from_secs(78)); + assert_eq!(buckets.flush_next().partition_key, 0); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(83)); + assert_eq!(buckets.flush_next().partition_key, 1); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(88)); + assert_eq!(buckets.flush_next().partition_key, 0); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(93)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(93)); + } + + #[test] + fn test_merge_flush_time_limits() -> Result<(), AggregateMetricsError> { + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 6, + num_partitions: 2, + delay: 0, + max_secs_in_past: Some(33), // -> Upgraded to 4 slots (40 seconds). + max_secs_in_future: Some(22), // -> Upgraded to 3 slots (30 seconds). + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + start: UnixTimestamp::from_secs(70), + }); + + buckets.merge(bucket_key(70, "a"), counter(1.0))?; + + // 4 slots in the past. + buckets.merge(bucket_key(60, "a"), counter(1.0))?; + buckets.merge(bucket_key(50, "a"), counter(1.0))?; + buckets.merge(bucket_key(40, "a"), counter(1.0))?; + buckets.merge(bucket_key(30, "a"), counter(1.0))?; + assert_eq!( + buckets + .merge(bucket_key(29, "a"), counter(1.0)) + .unwrap_err(), + AggregateMetricsError::InvalidTimestamp(UnixTimestamp::from_secs(20)) + ); + + // 3 slots in the future. + buckets.merge(bucket_key(80, "a"), counter(1.0))?; + buckets.merge(bucket_key(90, "a"), counter(1.0))?; + buckets.merge(bucket_key(109, "a"), counter(1.0))?; + assert_eq!( + buckets + .merge(bucket_key(110, "a"), counter(1.0)) + .unwrap_err(), + AggregateMetricsError::InvalidTimestamp(UnixTimestamp::from_secs(110)) + ); + + Ok(()) + } + + #[test] + fn test_sub_rem_euclid() { + for (head, slot, expected) in [ + // head == slot + (253, 253, 0), + // head < slot + (253, 273, 0), + (253, 274, 1), + (253, 275, 2), + (253, 276, 3), + (253, 277, 4), + // head > slot + (253, 233, 0), + (253, 234, 1), + (253, 235, 2), + (253, 236, 3), + (253, 237, 4), + ] { + assert_eq!(sub_rem_euclid(slot, head, 5), expected); + } + } +} diff --git a/relay-metrics/src/aggregator/mod.rs b/relay-metrics/src/aggregator/mod.rs index b03662a186..fce6fab1ea 100644 --- a/relay-metrics/src/aggregator/mod.rs +++ b/relay-metrics/src/aggregator/mod.rs @@ -1,48 +1,40 @@ //! Core functionality of metrics aggregation. -use std::fmt; -use std::hash::Hasher; -use std::time::Duration; +use std::time::{Duration, SystemTime}; -use fnv::FnvHasher; -use hashbrown::hash_map::Entry; +use hashbrown::HashMap; +use relay_base_schema::metrics::MetricNamespace; use relay_base_schema::project::ProjectKey; use relay_common::time::UnixTimestamp; -use thiserror::Error; -use tokio::time::Instant; - -use crate::bucket::Bucket; -use crate::statsd::{MetricCounters, MetricGauges, MetricHistograms, MetricSets, MetricTimers}; -use crate::MetricName; -use hashbrown::HashMap; +use crate::statsd::{MetricCounters, MetricGauges}; +use crate::Bucket; -mod buckets; mod config; -mod cost; +mod inner; +mod stats; -use self::buckets::{BucketKey, Buckets, QueuedBucket}; pub use self::config::*; -pub(crate) use self::cost::tags_cost; +use self::inner::{BucketData, BucketKey}; + +/// Default amount of partitions per second when there are no partitions configured. +const DEFAULT_PARTITIONS_PER_SECOND: u32 = 64; /// Any error that may occur during aggregation. -#[derive(Debug, Error, PartialEq)] +#[derive(Debug, thiserror::Error, PartialEq)] pub enum AggregateMetricsError { - /// A metric bucket's timestamp was out of the configured acceptable range. - #[error("found invalid timestamp: {0}")] - InvalidTimestamp(UnixTimestamp), /// Internal error: Attempted to merge two metric buckets of different types. #[error("found incompatible metric types")] InvalidTypes, - /// A metric bucket had a too long string (metric name or a tag key/value). - #[error("found invalid string: {0}")] - InvalidStringLength(MetricName), /// A metric bucket is too large for the global bytes limit. #[error("total metrics limit exceeded")] TotalLimitExceeded, /// A metric bucket is too large for the per-project bytes limit. #[error("project metrics limit exceeded")] ProjectLimitExceeded, + /// The timestamp is outside the maximum allowed time range. + #[error("the timestamp '{0}' is outside the maximum allowed time range")] + InvalidTimestamp(UnixTimestamp), } /// A collector of [`Bucket`] submissions. @@ -65,1106 +57,213 @@ pub enum AggregateMetricsError { /// Metrics are uniquely identified by the combination of their name, type and unit. It is allowed /// to send metrics of different types and units under the same name. For example, sending a metric /// once as set and once as distribution will result in two actual metrics being recorded. +#[derive(Debug)] pub struct Aggregator { name: String, - config: AggregatorConfig, - buckets: Buckets, - cost_tracker: cost::Tracker, - reference_time: Instant, + inner: inner::Inner, } impl Aggregator { - /// Create a new aggregator. - pub fn new(config: AggregatorConfig) -> Self { - Self::named("default".to_owned(), config) - } + /// Creates a new named [`Self`]. + pub fn named(name: String, config: &AggregatorConfig) -> Self { + let num_partitions = match config.flush_batching { + FlushBatching::Project => config.flush_partitions, + FlushBatching::Bucket => config.flush_partitions, + FlushBatching::Partition => config.flush_partitions, + FlushBatching::None => Some(0), + } + .unwrap_or(DEFAULT_PARTITIONS_PER_SECOND * config.bucket_interval.max(1)); - /// Like [`Self::new`], but with a provided name. - pub fn named(name: String, config: AggregatorConfig) -> Self { Self { name, - config, - buckets: Default::default(), - cost_tracker: Default::default(), - reference_time: Instant::now(), + inner: inner::Inner::new(inner::Config { + start: UnixTimestamp::now(), + bucket_interval: config.bucket_interval, + num_time_slots: config.aggregator_size, + num_partitions, + delay: config.initial_delay, + max_total_bucket_bytes: config.max_total_bucket_bytes, + max_project_key_bucket_bytes: config.max_project_key_bucket_bytes, + max_secs_in_past: Some(config.max_secs_in_past), + max_secs_in_future: Some(config.max_secs_in_future), + }), } } /// Returns the name of the aggregator. pub fn name(&self) -> &str { - self.name.as_str() + &self.name } - /// Returns the number of buckets in the aggregator. - pub fn bucket_count(&self) -> usize { - self.buckets.len() - } - - /// Returns `true` if the cost trackers value is larger than the configured max cost. - pub fn totals_cost_exceeded(&self) -> bool { - self.cost_tracker - .totals_cost_exceeded(self.config.max_total_bucket_bytes) - } - - /// Converts this aggregator into a vector of [`Bucket`]. - pub fn into_buckets(self) -> Vec { - relay_statsd::metric!( - gauge(MetricGauges::Buckets) = self.bucket_count() as u64, - aggregator = &self.name, - ); - - let bucket_interval = self.config.bucket_interval; - - self.buckets - .into_iter() - .map(|(key, entry)| Bucket { - timestamp: key.timestamp, - width: bucket_interval, - name: key.metric_name, - value: entry.value, - tags: key.tags, - metadata: entry.metadata, - }) - .collect() - } - - /// Pop and return the partitions with buckets that are eligible for flushing out according to - /// bucket interval. - /// - /// For each project `flush_decision` is called, which can influence the flush decision - /// for buckets of that project. The decision can also return metadata for the project - /// on which all flushed buckets for the project are merged with the `merge` function. - /// - /// `flush_decision` is only called once if the decision is [`FlushDecision::Flush`], - /// but may be called multiple times otherwise. The `flush_decision` should be consistent - /// for each project key passed. - /// - /// If no partitioning is enabled, the function will return a single `None` partition. - pub fn pop_flush_buckets>( - &mut self, - force: bool, - mut flush_decision: impl FnMut(ProjectKey) -> FlushDecision, - ) -> HashMap, HashMap> { - relay_statsd::metric!( - gauge(MetricGauges::Buckets) = self.bucket_count() as u64, - aggregator = &self.name, - ); - - // We only emit statsd metrics for the cost on flush (and not when merging the buckets), - // assuming that this gives us more than enough data points. - for (namespace, cost) in self.cost_tracker.cost_per_namespace() { - relay_statsd::metric!( - gauge(MetricGauges::BucketsCost) = cost as u64, - aggregator = &self.name, - namespace = namespace.as_str() - ); - } - - let mut partitions = HashMap::new(); - let mut stats = HashMap::new(); - - let mut re_add = Vec::new(); - - let now = Instant::now(); - let ts = UnixTimestamp::from_instant(now.into_std()); - - let should_pop = |entry: &QueuedBucket| force || entry.elapsed(now); - - let bucket_interval = self.config.bucket_interval; - let cost_tracker = &mut self.cost_tracker; - - let start_bucket_scan = Instant::now(); - while let Some((key, queued_bucket)) = self.buckets.try_pop(|_, b| should_pop(b)) { - let partition = self.config.flush_partitions.map(|p| key.partition_key(p)); - let partition = partitions.entry(partition).or_insert_with(HashMap::new); - - let cost = key.cost() + queued_bucket.value.cost(); - - let buckets = match partition.entry(key.project_key) { - Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), - Entry::Vacant(vacant_entry) => { - match flush_decision(key.project_key) { - FlushDecision::Flush(v) => vacant_entry.insert(v), - FlushDecision::Delay => { - let mut entry = queued_bucket; - - // No point in re-calculating the flush time when `force` is `true`. - // We can still do it on a flush without force. - if !force { - entry.flush_at = - get_flush_time(&self.config, ts, self.reference_time, &key); - // This should not happen, but may happen with weird - // configs or due to bugs. - debug_assert!(!entry.elapsed(now)); - } - - // Re-use the loop condition for `try_pop`, to make sure we - // will never accidentally create an infinite loop. - match should_pop(&entry) { - // If we would pop the entry again (`force`, a weird flush - // configuration, a bug), re-add it after the loop to - // prevent infinite loops. - true => re_add.push((key, entry)), - // Otherwise it's safe to immediately re-add. - false => self.buckets.re_add(key, entry), - } - - continue; - } - FlushDecision::Drop => { - cost_tracker.subtract_cost(key.namespace(), key.project_key, cost); - continue; - } - } - } - }; - - cost_tracker.subtract_cost(key.namespace(), key.project_key, cost); - - let (bucket_count, item_count) = stats - .entry((queued_bucket.value.ty(), key.namespace())) - .or_insert((0usize, 0usize)); - *bucket_count += 1; - *item_count += queued_bucket.value.len(); - - let bucket = Bucket { - timestamp: key.timestamp, - width: bucket_interval, - name: key.metric_name, - value: queued_bucket.value, - tags: key.tags, - metadata: queued_bucket.metadata, - }; - - buckets.extend(std::iter::once(bucket)); - } - - for (key, entry) in re_add { - self.buckets.re_add(key, entry); - } - - relay_statsd::metric!( - timer(MetricTimers::BucketsScanDuration) = start_bucket_scan.elapsed(), - aggregator = &self.name, - ); - - for ((ty, namespace), (bucket_count, item_count)) in stats.into_iter() { - relay_statsd::metric!( - gauge(MetricGauges::AvgBucketSize) = item_count as f64 / bucket_count as f64, - metric_type = ty.as_str(), - namespace = namespace.as_str(), - aggregator = self.name(), - ); - } - - partitions - } - - /// Wrapper for [`AggregatorConfig::get_bucket_timestamp`]. - /// - /// Logs a statsd metric for invalid timestamps. - fn get_bucket_timestamp( - &self, - now: UnixTimestamp, - timestamp: UnixTimestamp, - bucket_width: u64, - ) -> Result { - let bucket_ts = self.config.get_bucket_timestamp(timestamp, bucket_width); - - if !self.config.timestamp_range().contains(&bucket_ts) { - let delta = (bucket_ts.as_secs() as i64) - (now.as_secs() as i64); - relay_statsd::metric!( - histogram(MetricHistograms::InvalidBucketTimestamp) = delta as f64, - aggregator = &self.name, - ); - - return Err(AggregateMetricsError::InvalidTimestamp(timestamp)); - } - - Ok(bucket_ts) + /// Returns `true` if the aggregator contains any metric buckets. + pub fn is_empty(&self) -> bool { + self.inner.is_empty() } /// Merge a bucket into this aggregator. - /// - /// Like [`Self::merge_with_options`] with defaults. pub fn merge( &mut self, project_key: ProjectKey, bucket: Bucket, ) -> Result<(), AggregateMetricsError> { - self.merge_with_options(project_key, bucket, UnixTimestamp::now()) - } - - /// Merge a bucket into this aggregator. - /// - /// If no bucket exists for the given bucket key, a new bucket will be created. - /// - /// Arguments: - /// - `now`: The current timestamp, when merging a large batch of buckets, the timestamp - /// should be created outside of the loop. - pub fn merge_with_options( - &mut self, - project_key: ProjectKey, - bucket: Bucket, - now: UnixTimestamp, - ) -> Result<(), AggregateMetricsError> { - let timestamp = self.get_bucket_timestamp(now, bucket.timestamp, bucket.width)?; let key = BucketKey { project_key, - timestamp, + timestamp: bucket.timestamp, metric_name: bucket.name, tags: bucket.tags, extracted_from_indexed: bucket.metadata.extracted_from_indexed, }; - let key = validate_bucket_key(key, &self.config)?; - let namespace = key.namespace(); - // XXX: This is not a great implementation of cost enforcement. - // - // * it takes two lookups of the project key in the cost tracker to merge a bucket: once in - // `check_limits_exceeded` and once in `add_cost`. - // - // * the limits are not actually enforced consistently - // - // A bucket can be merged that exceeds the cost limit, and only the next bucket will be - // limited because the limit is now reached. This implementation was chosen because it's - // currently not possible to determine cost accurately upfront: The bucket values have to - // be merged together to figure out how costly the merge was. Changing that would force - // us to unravel a lot of abstractions that we have already built. - // - // As a result of that, it is possible to exceed the bucket cost limit significantly - // until we have guaranteed upper bounds on the cost of a single bucket (which we - // currently don't, because a metric can have arbitrary amount of tag values). - // - // Another consequence is that a MergeValue that adds zero cost (such as an existing - // counter bucket being incremented) is currently rejected even though it doesn't have to - // be. - // - // The flipside of this approach is however that there's more optimization potential: If - // the limit is already exceeded, we could implement an optimization that drops envelope - // items before they are parsed, as we can be sure that the new metric bucket will be - // rejected in the aggregator regardless of whether it is merged into existing buckets, - // whether it is just a counter, etc. - self.cost_tracker.check_limits_exceeded( - project_key, - self.config.max_total_bucket_bytes, - self.config.max_project_key_bucket_bytes, - )?; - - let added_cost = self.buckets.merge( - key, - (bucket.value, bucket.metadata), - |key, (value, metadata), exisiting| { - relay_statsd::metric!( - counter(MetricCounters::MergeHit) += 1, - aggregator = &self.name, - namespace = key.namespace().as_str(), - ); - - exisiting.merge(value, metadata) - }, - |key, (value, metadata)| { - relay_statsd::metric!( - counter(MetricCounters::MergeMiss) += 1, - aggregator = &self.name, - namespace = key.namespace().as_str(), - ); - relay_statsd::metric!( - set(MetricSets::UniqueBucketsCreated) = key.hash64() as i64, // 2-complement - aggregator = &self.name, - namespace = key.namespace().as_str(), - ); - - let flush_at = get_flush_time(&self.config, now, self.reference_time, key); - QueuedBucket::new(flush_at, value, metadata) - }, - )?; - - self.cost_tracker - .add_cost(namespace, project_key, added_cost); + let value = BucketData { + value: bucket.value, + metadata: bucket.metadata, + }; - Ok(()) + self.inner.merge(key, value) } -} -impl fmt::Debug for Aggregator { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct(std::any::type_name::()) - .field("config", &self.config) - .field("buckets", &self.buckets) - .field("receiver", &format_args!("Recipient")) - .finish() - } -} + /// Attempts to flush the next batch from the aggregator. + /// + /// If it is too early to flush the next batch, the error contains the timestamp when the flush should be retried. + /// After a successful flush, retry immediately until an error is returned with the next flush + /// time, this makes sure time is eventually synchronized. + pub fn try_flush_next(&mut self, now: SystemTime) -> Result { + let next_flush = SystemTime::UNIX_EPOCH + self.inner.next_flush_at(); + + if let Err(err) = now.duration_since(next_flush) { + // The flush time is in the future, return the amount of time to wait before the next flush. + return Err(err.duration()); + } -/// Decision what to do with a bucket when flushing. -pub enum FlushDecision { - /// Flush the bucket with the provided metadata. - Flush(T), - /// Drop the bucket. - Drop, - /// Delay flushing the bucket into the future, it's not ready. - Delay, -} + let partition = self.inner.flush_next(); -/// Validates the metric name and its tags are correct. -/// -/// Returns `Err` if the metric should be dropped. -fn validate_bucket_key( - mut key: BucketKey, - aggregator_config: &AggregatorConfig, -) -> Result { - key = validate_metric_name(key, aggregator_config)?; - key = validate_metric_tags(key, aggregator_config); - Ok(key) -} + emit_stats(&self.name, self.inner.stats()); + emit_flush_partition_stats(&self.name, partition.stats); -/// Validates metric name against [`AggregatorConfig`]. -/// -/// Returns `Err` if the metric must be dropped. -fn validate_metric_name( - key: BucketKey, - aggregator_config: &AggregatorConfig, -) -> Result { - let metric_name_length = key.metric_name.len(); - if metric_name_length > aggregator_config.max_name_length { - relay_log::debug!( - "Invalid metric name, too long (> {}): {:?}", - aggregator_config.max_name_length, - key.metric_name - ); - return Err(AggregateMetricsError::InvalidStringLength(key.metric_name)); + Ok(Partition { + partition_key: partition.partition_key, + buckets: partition.buckets, + bucket_interval: self.inner.bucket_interval(), + }) } - Ok(key) -} + /// Returns when the next partition is ready to be flushed using [`Self::try_flush_next`]. + pub fn next_flush_at(&mut self, now: SystemTime) -> Duration { + let next_flush = SystemTime::UNIX_EPOCH + self.inner.next_flush_at(); -/// Validates metric tags against [`AggregatorConfig`]. -/// -/// Invalid tags are removed. -fn validate_metric_tags(mut key: BucketKey, aggregator_config: &AggregatorConfig) -> BucketKey { - key.tags.retain(|tag_key, tag_value| { - if tag_key.len() > aggregator_config.max_tag_key_length { - relay_log::debug!("Invalid metric tag key"); - return false; - } - if bytecount::num_chars(tag_value.as_bytes()) > aggregator_config.max_tag_value_length { - relay_log::debug!("Invalid metric tag value"); - return false; + match now.duration_since(next_flush) { + Ok(_) => Duration::ZERO, + Err(err) => err.duration(), } + } - true - }); - key -} - -/// Returns the instant at which a bucket should be flushed. -/// -/// All buckets are flushed after a grace period of `initial_delay`. -fn get_flush_time( - config: &AggregatorConfig, - now: UnixTimestamp, - reference_time: Instant, - bucket_key: &BucketKey, -) -> Instant { - let initial_flush = bucket_key.timestamp + config.bucket_interval() + config.initial_delay(); - let backdated = initial_flush <= now; + /// Consumes the aggregator and returns all contained partitions. + pub fn into_partitions(self) -> impl Iterator { + let bucket_interval = self.inner.bucket_interval(); - let flush_timestamp = if backdated { - // If the initial flush time has passed or can't be represented, we want to treat the - // flush of the bucket as if it came in with the timestamp of current bucket based on - // the now timestamp. - // - // The rationale behind this is that we want to flush this bucket in the earliest slot - // together with buckets that have similar characteristics (e.g., same partition, - // project...). - let floored_timestamp = (now.as_secs() / config.bucket_interval) * config.bucket_interval; - UnixTimestamp::from_secs(floored_timestamp) - + config.bucket_interval() - + config.initial_delay() - } else { - // If the initial flush is still pending, use that. - initial_flush - }; + emit_stats(&self.name, self.inner.stats()); - let instant = if flush_timestamp > now { - Instant::now().checked_add(flush_timestamp - now) - } else { - Instant::now().checked_sub(now - flush_timestamp) + self.inner.into_partitions().map(move |p| Partition { + partition_key: p.partition_key, + buckets: p.buckets, + bucket_interval, + }) } - .unwrap_or_else(Instant::now); - - // Since `Instant` doesn't allow to get directly how many seconds elapsed, we leverage the - // diffing to get a duration and round it to the smallest second to get consistent times. - let instant = reference_time + Duration::from_secs((instant - reference_time).as_secs()); - instant + flush_time_shift(config, bucket_key) } -/// Shift deterministically within one bucket interval based on the configured [`FlushBatching`]. +/// A flushed partition from [`Aggregator::try_flush_next`]. /// -/// This distributes buckets over time to prevent peaks. -fn flush_time_shift(config: &AggregatorConfig, bucket: &BucketKey) -> Duration { - let shift_range = config.bucket_interval * 1000; - - // Fall back to default flushing by project if no partitioning is configured. - let (batching, partitions) = match (config.flush_batching, config.flush_partitions) { - (FlushBatching::Partition, None | Some(0)) => (FlushBatching::Project, 0), - (flush_batching, partitions) => (flush_batching, partitions.unwrap_or(0)), - }; - - let shift_millis = match batching { - FlushBatching::Project => { - let mut hasher = FnvHasher::default(); - hasher.write(bucket.project_key.as_str().as_bytes()); - hasher.finish() % shift_range - } - FlushBatching::Bucket => bucket.hash64() % shift_range, - FlushBatching::Partition => shift_range * bucket.partition_key(partitions) / partitions, - FlushBatching::None => 0, - }; - - Duration::from_millis(shift_millis) +/// The partition contains the partition key and all flushed buckets. +pub struct Partition { + /// The partition key. + pub partition_key: u32, + buckets: HashMap, + bucket_interval: u64, } -#[cfg(test)] -mod tests { - use insta::assert_debug_snapshot; - use similar_asserts::assert_eq; - use std::collections::BTreeMap; +impl IntoIterator for Partition { + type Item = (ProjectKey, Bucket); + type IntoIter = PartitionIter; - use super::*; - use crate::{dist, BucketMetadata, BucketValue, GaugeValue}; - - fn test_config() -> AggregatorConfig { - AggregatorConfig { - bucket_interval: 1, - initial_delay: 0, - max_secs_in_past: 50 * 365 * 24 * 60 * 60, - max_secs_in_future: 50 * 365 * 24 * 60 * 60, - max_name_length: 200, - max_tag_key_length: 200, - max_tag_value_length: 200, - max_project_key_bucket_bytes: None, - max_total_bucket_bytes: None, - flush_batching: FlushBatching::default(), - flush_partitions: None, + fn into_iter(self) -> Self::IntoIter { + PartitionIter { + inner: self.buckets.into_iter(), + bucket_interval: self.bucket_interval, } } +} - fn some_bucket(timestamp: Option) -> Bucket { - let timestamp = timestamp.map_or(UnixTimestamp::from_secs(999994711), |t| t); - Bucket { - timestamp, - width: 0, - name: "c:transactions/foo@none".into(), - value: BucketValue::counter(42.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - } - } - - #[test] - fn test_aggregator_merge_counters() { - relay_test::setup(); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut aggregator: Aggregator = Aggregator::new(test_config()); - - let bucket1 = some_bucket(None); - - let mut bucket2 = bucket1.clone(); - bucket2.value = BucketValue::counter(43.into()); - aggregator.merge(project_key, bucket1).unwrap(); - aggregator.merge(project_key, bucket2).unwrap(); - - let buckets: Vec<_> = aggregator - .buckets - .into_iter() - .map(|(k, e)| (k, e.value)) // skip flush times, they are different every time - .collect(); - - insta::assert_debug_snapshot!(buckets, @r###" - [ - ( - BucketKey { - project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"), - timestamp: UnixTimestamp(999994711), - metric_name: MetricName( - "c:transactions/foo@none", - ), - tags: {}, - extracted_from_indexed: false, - }, - Counter( - 85.0, - ), - ), - ] - "###); - } - - #[test] - fn test_bucket_value_cost() { - // When this test fails, it means that the cost model has changed. - // Check dimensionality limits. - let expected_bucket_value_size = 48; - let expected_set_entry_size = 4; - - let counter = BucketValue::Counter(123.into()); - assert_eq!(counter.cost(), expected_bucket_value_size); - let set = BucketValue::Set([1, 2, 3, 4, 5].into()); - assert_eq!( - set.cost(), - expected_bucket_value_size + 5 * expected_set_entry_size - ); - let distribution = BucketValue::Distribution(dist![1, 2, 3]); - assert_eq!(distribution.cost(), expected_bucket_value_size + 3 * 8); - let gauge = BucketValue::Gauge(GaugeValue { - last: 43.into(), - min: 42.into(), - max: 43.into(), - sum: 85.into(), - count: 2, - }); - assert_eq!(gauge.cost(), expected_bucket_value_size); - } - - #[test] - fn test_bucket_key_cost() { - // When this test fails, it means that the cost model has changed. - // Check dimensionality limits. - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let metric_name = "12345".into(); - let bucket_key = BucketKey { - timestamp: UnixTimestamp::now(), - project_key, - metric_name, - tags: BTreeMap::from([ - ("hello".to_owned(), "world".to_owned()), - ("answer".to_owned(), "42".to_owned()), - ]), - extracted_from_indexed: false, - }; - assert_eq!( - bucket_key.cost(), - 88 + // BucketKey - 5 + // name - (5 + 5 + 6 + 2) // tags - ); - } - - #[test] - fn test_aggregator_merge_timestamps() { - relay_test::setup(); - let mut config = test_config(); - config.bucket_interval = 10; - - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut aggregator: Aggregator = Aggregator::new(config); - - let bucket1 = some_bucket(None); - - let mut bucket2 = bucket1.clone(); - bucket2.timestamp = UnixTimestamp::from_secs(999994712); - - let mut bucket3 = bucket1.clone(); - bucket3.timestamp = UnixTimestamp::from_secs(999994721); - - aggregator.merge(project_key, bucket1).unwrap(); - aggregator.merge(project_key, bucket2).unwrap(); - aggregator.merge(project_key, bucket3).unwrap(); - - let mut buckets: Vec<_> = aggregator - .buckets - .into_iter() - .map(|(k, e)| (k, e.value)) // skip flush times, they are different every time - .collect(); - - buckets.sort_by(|a, b| a.0.timestamp.cmp(&b.0.timestamp)); - insta::assert_debug_snapshot!(buckets, @r###" - [ - ( - BucketKey { - project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"), - timestamp: UnixTimestamp(999994710), - metric_name: MetricName( - "c:transactions/foo@none", - ), - tags: {}, - extracted_from_indexed: false, - }, - Counter( - 84.0, - ), - ), - ( - BucketKey { - project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"), - timestamp: UnixTimestamp(999994720), - metric_name: MetricName( - "c:transactions/foo@none", - ), - tags: {}, - extracted_from_indexed: false, - }, - Counter( - 42.0, - ), - ), - ] - "###); - } - - #[test] - fn test_aggregator_mixed_projects() { - relay_test::setup(); - - let mut config = test_config(); - config.bucket_interval = 10; - - let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - - let mut aggregator: Aggregator = Aggregator::new(config); - - // It's OK to have same metric with different projects: - aggregator.merge(project_key1, some_bucket(None)).unwrap(); - aggregator.merge(project_key2, some_bucket(None)).unwrap(); - - assert_eq!(aggregator.buckets.len(), 2); - } - - #[test] - fn test_aggregator_cost_tracking() { - // Make sure that the right cost is added / subtracted - let mut aggregator: Aggregator = Aggregator::new(test_config()); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); +/// Iterator yielded from [`Partition::into_iter`]. +pub struct PartitionIter { + inner: hashbrown::hash_map::IntoIter, + bucket_interval: u64, +} - let timestamp = UnixTimestamp::from_secs(999994711); - let bucket = Bucket { - timestamp, - width: 0, - name: "c:transactions/foo@none".into(), - value: BucketValue::counter(42.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - }; - let bucket_key = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/foo@none".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - let fixed_cost = bucket_key.cost() + std::mem::size_of::(); - for (metric_name, metric_value, expected_added_cost) in [ - ( - "c:transactions/foo@none", - BucketValue::counter(42.into()), - fixed_cost, - ), - ( - "c:transactions/foo@none", - BucketValue::counter(42.into()), - 0, - ), // counters have constant size - ( - "s:transactions/foo@none", - BucketValue::set(123), - fixed_cost + 4, - ), // Added a new bucket + 1 element - ("s:transactions/foo@none", BucketValue::set(123), 0), // Same element in set, no change - ("s:transactions/foo@none", BucketValue::set(456), 4), // Different element in set -> +4 - ( - "d:transactions/foo@none", - BucketValue::distribution(1.into()), - fixed_cost + 8, - ), // New bucket + 1 element - ( - "d:transactions/foo@none", - BucketValue::distribution(1.into()), - 8, - ), // duplicate element - ( - "d:transactions/foo@none", - BucketValue::distribution(2.into()), - 8, - ), // 1 new element - ( - "g:transactions/foo@none", - BucketValue::gauge(3.into()), - fixed_cost, - ), // New bucket - ("g:transactions/foo@none", BucketValue::gauge(2.into()), 0), // gauge has constant size - ] { - let mut bucket = bucket.clone(); - bucket.value = metric_value; - bucket.name = metric_name.into(); +impl Iterator for PartitionIter { + type Item = (ProjectKey, Bucket); - let current_cost = aggregator.cost_tracker.total_cost(); - aggregator.merge(project_key, bucket).unwrap(); - let total_cost = aggregator.cost_tracker.total_cost(); - assert_eq!(total_cost, current_cost + expected_added_cost); - } + fn next(&mut self) -> Option { + let (key, data) = self.inner.next()?; - aggregator.pop_flush_buckets(true, |_| FlushDecision::Flush(Vec::new())); - assert_eq!(aggregator.cost_tracker.total_cost(), 0); + Some(( + key.project_key, + Bucket { + timestamp: key.timestamp, + width: self.bucket_interval, + name: key.metric_name, + tags: key.tags, + value: data.value, + metadata: data.metadata, + }, + )) } - #[tokio::test(start_paused = true)] - async fn test_aggregator_flush() { - // Make sure that the right cost is added / subtracted - let mut aggregator: Aggregator = Aggregator::new(AggregatorConfig { - bucket_interval: 10, - ..test_config() - }); - - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - let now = UnixTimestamp::now(); - - for i in 0..3u32 { - for (name, offset) in [("foo", 30), ("bar", 15)] { - let timestamp = now + Duration::from_secs(60 * u64::from(i) + offset); - let bucket = Bucket { - timestamp, - width: 0, - name: format!("c:transactions/{name}_{i}@none").into(), - value: BucketValue::counter(i.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - }; - - aggregator.merge(project_key, bucket).unwrap(); - } - } - - let mut flush_buckets = || { - let mut result = Vec::new(); - - let partitions = - aggregator.pop_flush_buckets(false, |_| FlushDecision::Flush(Vec::new())); - for (partition, v) in partitions { - assert!(partition.is_none()); - for (pk, buckets) in v { - assert_eq!(pk, project_key); - for bucket in buckets { - result.push(bucket.name.to_string()); - } - } - } - result.sort(); - result - }; - - assert!(flush_buckets().is_empty()); - - tokio::time::advance(Duration::from_secs(60)).await; - assert_debug_snapshot!(flush_buckets(), @r###" - [ - "c:transactions/bar_0@none", - "c:transactions/foo_0@none", - ] - "###); - assert!(flush_buckets().is_empty()); - - tokio::time::advance(Duration::from_secs(60)).await; - assert_debug_snapshot!(flush_buckets(), @r###" - [ - "c:transactions/bar_1@none", - "c:transactions/foo_1@none", - ] - "###); - assert!(flush_buckets().is_empty()); - - tokio::time::advance(Duration::from_secs(60)).await; - assert_debug_snapshot!(flush_buckets(), @r###" - [ - "c:transactions/bar_2@none", - "c:transactions/foo_2@none", - ] - "###); - assert!(flush_buckets().is_empty()); - - tokio::time::advance(Duration::from_secs(3600)).await; - assert!(flush_buckets().is_empty()); - - assert!(aggregator.into_buckets().is_empty()); + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() } +} - #[test] - fn test_get_bucket_timestamp_overflow() { - let config = AggregatorConfig { - bucket_interval: 10, - initial_delay: 0, - ..Default::default() - }; - - let aggregator: Aggregator = Aggregator::new(config); - - assert!(matches!( - aggregator - .get_bucket_timestamp(UnixTimestamp::now(), UnixTimestamp::from_secs(u64::MAX), 2) - .unwrap_err(), - AggregateMetricsError::InvalidTimestamp(_) - )); +impl std::iter::ExactSizeIterator for PartitionIter { + fn len(&self) -> usize { + self.inner.len() } +} - #[test] - fn test_get_bucket_timestamp_zero() { - let config = AggregatorConfig { - bucket_interval: 10, - initial_delay: 0, - ..Default::default() - }; +impl std::iter::FusedIterator for PartitionIter {} - let now = UnixTimestamp::now().as_secs(); - let rounded_now = UnixTimestamp::from_secs(now / 10 * 10); - assert_eq!( - config.get_bucket_timestamp(UnixTimestamp::from_secs(now), 0), - rounded_now +fn emit_stats(name: &str, stats: inner::Stats) { + for namespace in MetricNamespace::all() { + relay_statsd::metric!( + gauge(MetricGauges::Buckets) = *stats.count_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name ); - } - - #[test] - fn test_get_bucket_timestamp_multiple() { - let config = AggregatorConfig { - bucket_interval: 10, - initial_delay: 0, - ..Default::default() - }; - - let rounded_now = UnixTimestamp::now().as_secs() / 10 * 10; - let now = rounded_now + 3; - assert_eq!( - config - .get_bucket_timestamp(UnixTimestamp::from_secs(now), 20) - .as_secs(), - rounded_now + 10 + relay_statsd::metric!( + gauge(MetricGauges::BucketsCost) = *stats.cost_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name ); } +} - #[test] - fn test_get_bucket_timestamp_non_multiple() { - let config = AggregatorConfig { - bucket_interval: 10, - initial_delay: 0, - ..Default::default() - }; +fn emit_flush_partition_stats(name: &str, stats: inner::PartitionStats) { + relay_statsd::metric!(counter(MetricCounters::FlushCount) += 1, aggregator = name); - let rounded_now = UnixTimestamp::now().as_secs() / 10 * 10; - let now = rounded_now + 3; - assert_eq!( - config - .get_bucket_timestamp(UnixTimestamp::from_secs(now), 23) - .as_secs(), - rounded_now + 10 + for namespace in MetricNamespace::all() { + relay_statsd::metric!( + counter(MetricCounters::MergeHit) += *stats.count_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name, ); - } - - #[test] - fn test_validate_bucket_key_str_length() { - relay_test::setup(); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - - let short_metric = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/a_short_metric".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - assert!(validate_bucket_key(short_metric, &test_config()).is_ok()); - - let long_metric = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/long_name_a_very_long_name_its_super_long_really_but_like_super_long_probably_the_longest_name_youve_seen_and_even_the_longest_name_ever_its_extremly_long_i_cant_tell_how_long_it_is_because_i_dont_have_that_many_fingers_thus_i_cant_count_the_many_characters_this_long_name_is".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - let validation = validate_bucket_key(long_metric, &test_config()); - - assert!(matches!( - validation.unwrap_err(), - AggregateMetricsError::InvalidStringLength(_), - )); - - let short_metric_long_tag_key = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/a_short_metric_with_long_tag_key".into(), - tags: BTreeMap::from([("i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into(), "tag_value".into())]), - extracted_from_indexed: false, - }; - let validation = validate_bucket_key(short_metric_long_tag_key, &test_config()).unwrap(); - assert_eq!(validation.tags.len(), 0); - - let short_metric_long_tag_value = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/a_short_metric_with_long_tag_value".into(), - tags: BTreeMap::from([("tag_key".into(), "i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into())]), - extracted_from_indexed: false, - }; - let validation = validate_bucket_key(short_metric_long_tag_value, &test_config()).unwrap(); - assert_eq!(validation.tags.len(), 0); - } - - #[test] - fn test_validate_tag_values_special_chars() { - relay_test::setup(); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - - let tag_value = "x".repeat(199) + "ΓΈ"; - assert_eq!(tag_value.chars().count(), 200); // Should be allowed - let short_metric = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/a_short_metric".into(), - tags: BTreeMap::from([("foo".into(), tag_value.clone())]), - extracted_from_indexed: false, - }; - let validated_bucket = validate_metric_tags(short_metric, &test_config()); - assert_eq!(validated_bucket.tags["foo"], tag_value); - } - - #[test] - fn test_aggregator_cost_enforcement_total() { - let timestamp = UnixTimestamp::from_secs(999994711); - let bucket = Bucket { - timestamp, - width: 0, - name: "c:transactions/foo".into(), - value: BucketValue::counter(42.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - }; - - let mut aggregator = Aggregator::new(AggregatorConfig { - max_total_bucket_bytes: Some(1), - ..test_config() - }); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - - aggregator.merge(project_key, bucket.clone()).unwrap(); - - assert_eq!( - aggregator.merge(project_key, bucket).unwrap_err(), - AggregateMetricsError::TotalLimitExceeded + relay_statsd::metric!( + counter(MetricCounters::MergeMiss) += *stats.merges_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name, ); - } - - #[test] - fn test_aggregator_cost_enforcement_project() { - relay_test::setup(); - let mut config = test_config(); - config.max_project_key_bucket_bytes = Some(1); - - let timestamp = UnixTimestamp::from_secs(999994711); - let bucket = Bucket { - timestamp, - width: 0, - name: "c:transactions/foo".into(), - value: BucketValue::counter(42.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - }; - - let mut aggregator: Aggregator = Aggregator::new(config); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - - aggregator.merge(project_key, bucket.clone()).unwrap(); - assert_eq!( - aggregator.merge(project_key, bucket).unwrap_err(), - AggregateMetricsError::ProjectLimitExceeded + relay_statsd::metric!( + counter(MetricCounters::FlushCost) += *stats.cost_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name, ); } - - #[test] - fn test_parse_flush_batching() { - let json = r#"{"shift_key": "partition"}"#; - let parsed: AggregatorConfig = serde_json::from_str(json).unwrap(); - assert!(matches!(parsed.flush_batching, FlushBatching::Partition)); - } - - #[test] - fn test_aggregator_merge_metadata() { - let mut config = test_config(); - config.bucket_interval = 10; - - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut aggregator: Aggregator = Aggregator::new(config); - - let bucket1 = some_bucket(Some(UnixTimestamp::from_secs(999994711))); - let bucket2 = some_bucket(Some(UnixTimestamp::from_secs(999994711))); - - // We create a bucket with 3 merges and monotonically increasing timestamps. - let mut bucket3 = some_bucket(Some(UnixTimestamp::from_secs(999994711))); - bucket3 - .metadata - .merge(BucketMetadata::new(UnixTimestamp::from_secs(999997811))); - bucket3 - .metadata - .merge(BucketMetadata::new(UnixTimestamp::from_secs(999999811))); - - aggregator.merge(project_key, bucket1.clone()).unwrap(); - aggregator.merge(project_key, bucket2.clone()).unwrap(); - aggregator.merge(project_key, bucket3.clone()).unwrap(); - - let buckets_metadata: Vec<_> = aggregator - .buckets - .into_iter() - .map(|(_, v)| v.metadata) - .collect(); - insta::assert_debug_snapshot!(buckets_metadata, @r###" - [ - BucketMetadata { - merges: 5, - received_at: Some( - UnixTimestamp(999994711), - ), - extracted_from_indexed: false, - }, - ] - "###); - } - - #[test] - fn test_get_flush_time_with_backdated_bucket() { - let mut config = test_config(); - config.bucket_interval = 3600; - config.initial_delay = 1300; - config.flush_partitions = Some(10); - config.flush_batching = FlushBatching::Partition; - - let reference_time = Instant::now(); - - let now_s = - (UnixTimestamp::now().as_secs() / config.bucket_interval) * config.bucket_interval; - - // First bucket has a timestamp two hours ago. - let timestamp = UnixTimestamp::from_secs(now_s - 7200); - let bucket_key_1 = BucketKey { - project_key: ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), - timestamp, - metric_name: "c:transactions/foo".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - - // Second bucket has a timestamp in this hour. - let timestamp = UnixTimestamp::from_secs(now_s); - let bucket_key_2 = BucketKey { - project_key: ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), - timestamp, - metric_name: "c:transactions/foo".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - - let now = UnixTimestamp::now(); - let flush_time_1 = get_flush_time(&config, now, reference_time, &bucket_key_1); - let flush_time_2 = get_flush_time(&config, now, reference_time, &bucket_key_2); - - assert_eq!(flush_time_1, flush_time_2); - } } diff --git a/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-3.snap b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-3.snap new file mode 100644 index 0000000000..c5044d3e85 --- /dev/null +++ b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-3.snap @@ -0,0 +1,150 @@ +--- +source: relay-metrics/src/aggregator/inner.rs +expression: buckets +--- +[ + Total { + count: 6, + count_by_namespace: (unsupported:6), + cost: 822, + cost_by_namespace: (unsupported:822), + }, + "(70, Slot(1))", + ( + 80, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 1, + merges_by_namespace: (unsupported:1), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 80-00000000000000000000000000000000-b: Counter( + 3.0, + ), + }, + }, + ), + "(80, Slot(1))", + ( + 90, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 30-00000000000000000000000000000000-c: Counter( + 1.0, + ), + }, + }, + ), + "(90, Slot(1))", + ( + 100, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 40-00000000000000000000000000000000-d: Counter( + 1.0, + ), + }, + }, + ), + "(100, Slot(1))", + ( + 110, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 170-00000000000000000000000000000000-e: Counter( + 1.0, + ), + }, + }, + ), + "(110, Slot(1))", + "(120, Slot(0))", + ( + 120, + Slot { + partition_key: 1, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 180-00000000000000000000000000000000-f: Counter( + 1.0, + ), + }, + }, + ), + ( + 130, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + }, + ), +] diff --git a/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-5.snap b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-5.snap new file mode 100644 index 0000000000..e55f5c49df --- /dev/null +++ b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-5.snap @@ -0,0 +1,150 @@ +--- +source: relay-metrics/src/aggregator/inner.rs +expression: buckets +--- +[ + Total { + count: 6, + count_by_namespace: (unsupported:6), + cost: 822, + cost_by_namespace: (unsupported:822), + }, + ( + 80, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 1, + merges_by_namespace: (unsupported:1), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 80-00000000000000000000000000000000-b: Counter( + 3.0, + ), + }, + }, + ), + "(80, Slot(1))", + ( + 90, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 30-00000000000000000000000000000000-c: Counter( + 1.0, + ), + }, + }, + ), + "(90, Slot(1))", + ( + 100, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 40-00000000000000000000000000000000-d: Counter( + 1.0, + ), + }, + }, + ), + "(100, Slot(1))", + ( + 110, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 170-00000000000000000000000000000000-e: Counter( + 1.0, + ), + }, + }, + ), + "(110, Slot(1))", + "(120, Slot(0))", + ( + 120, + Slot { + partition_key: 1, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 180-00000000000000000000000000000000-f: Counter( + 1.0, + ), + }, + }, + ), + ( + 130, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + }, + ), + "(130, Slot(1))", +] diff --git a/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush.snap b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush.snap new file mode 100644 index 0000000000..8b9a2cf0b2 --- /dev/null +++ b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush.snap @@ -0,0 +1,153 @@ +--- +source: relay-metrics/src/aggregator/inner.rs +expression: buckets +--- +[ + Total { + count: 7, + count_by_namespace: (unsupported:7), + cost: 959, + cost_by_namespace: (unsupported:959), + }, + ( + 70, + Slot { + partition_key: 0, + stats: Slot { + count: 2, + count_by_namespace: (unsupported:2), + merges: 0, + merges_by_namespace: (0), + cost: 274, + cost_by_namespace: (unsupported:274), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 274, + }, + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 1.0, + ), + 190-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + }, + ), + "(70, Slot(1))", + ( + 80, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 1, + merges_by_namespace: (unsupported:1), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 80-00000000000000000000000000000000-b: Counter( + 3.0, + ), + }, + }, + ), + "(80, Slot(1))", + ( + 90, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 30-00000000000000000000000000000000-c: Counter( + 1.0, + ), + }, + }, + ), + "(90, Slot(1))", + ( + 100, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 40-00000000000000000000000000000000-d: Counter( + 1.0, + ), + }, + }, + ), + "(100, Slot(1))", + ( + 110, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 170-00000000000000000000000000000000-e: Counter( + 1.0, + ), + }, + }, + ), + "(110, Slot(1))", + "(120, Slot(0))", + ( + 120, + Slot { + partition_key: 1, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 180-00000000000000000000000000000000-f: Counter( + 1.0, + ), + }, + }, + ), +] diff --git a/relay-metrics/src/aggregator/stats.rs b/relay-metrics/src/aggregator/stats.rs new file mode 100644 index 0000000000..b2b69a7163 --- /dev/null +++ b/relay-metrics/src/aggregator/stats.rs @@ -0,0 +1,163 @@ +use hashbrown::HashMap; +use relay_base_schema::metrics::MetricNamespace; +use relay_base_schema::project::ProjectKey; + +use crate::aggregator::AggregateMetricsError; +use crate::utils::ByNamespace; + +/// Total stats tracked for the aggregator. +#[derive(Default, Debug)] +pub struct Total { + /// Total amount of buckets in the aggregator. + pub count: u64, + /// Total amount of buckets in the aggregator by namespace. + pub count_by_namespace: ByNamespace, + + /// Total cost of buckets in the aggregator. + pub cost: u64, + /// Total cost of buckets in the aggregator by namespace. + pub cost_by_namespace: ByNamespace, +} + +impl Total { + pub fn remove_slot(&mut self, slot: &Slot) { + let Self { + count, + count_by_namespace, + cost, + cost_by_namespace, + } = self; + + *count -= slot.count; + *count_by_namespace -= slot.count_by_namespace; + *cost -= slot.cost; + *cost_by_namespace -= slot.cost_by_namespace; + } +} + +/// Stats tracked by slot in the aggregator. +#[derive(Default, Debug, PartialEq, Eq)] +pub struct Slot { + /// Amount of buckets created in this slot. + pub count: u64, + /// Amount of buckets created in this slot by namespace. + pub count_by_namespace: ByNamespace, + /// Amount of merges happened in this slot. + pub merges: u64, + /// Amount of merges happened in this slot by namespace. + pub merges_by_namespace: ByNamespace, + + /// Cost of buckets in this slot. + pub cost: u64, + /// Cost of buckets in this slot by namespace. + pub cost_by_namespace: ByNamespace, + /// Cost of buckets in this slot by project. + pub cost_by_project: HashMap, +} + +impl Slot { + /// Resets the slot to its initial empty state. + pub fn reset(&mut self) { + let Self { + count, + count_by_namespace, + merges, + merges_by_namespace, + cost, + cost_by_namespace, + cost_by_project, + } = self; + + *count = 0; + *count_by_namespace = Default::default(); + *merges = 0; + *merges_by_namespace = Default::default(); + + *cost = 0; + *cost_by_namespace = Default::default(); + // Keep the allocation around but at the same time make it possible for it to shrink. + cost_by_project.shrink_to_fit(); + cost_by_project.clear(); + } + + /// Increments the count by one. + pub fn incr_count(&mut self, total: &mut Total, namespace: MetricNamespace) { + self.count += 1; + *self.count_by_namespace.get_mut(namespace) += 1; + total.count += 1; + *total.count_by_namespace.get_mut(namespace) += 1; + } + + /// Increments the amount of merges in the slot by one. + pub fn incr_merges(&mut self, namespace: MetricNamespace) { + self.merges += 1; + *self.merges_by_namespace.get_mut(namespace) += 1; + } + + /// Tries to reserve a certain amount of cost. + /// + /// Returns an error if there is not enough budget left. + pub fn reserve<'a>( + &'a mut self, + total: &'a mut Total, + project_key: ProjectKey, + namespace: MetricNamespace, + cost: u64, + limits: &Limits, + ) -> Result, AggregateMetricsError> { + if total.cost + cost > limits.max_total { + return Err(AggregateMetricsError::TotalLimitExceeded); + } + let project = self.cost_by_project.entry(project_key).or_insert(0); + if *project + cost > limits.max_partition_project { + return Err(AggregateMetricsError::ProjectLimitExceeded); + } + + Ok(Reservation { + total, + cost_partition: &mut self.cost, + cost_partition_by_namespace: &mut self.cost_by_namespace, + cost_partition_project: project, + namespace, + reserved: cost, + }) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Limits { + pub max_total: u64, + pub max_partition_project: u64, +} + +#[must_use = "a reservation does nothing if it is not used"] +pub struct Reservation<'a> { + total: &'a mut Total, + + cost_partition: &'a mut u64, + cost_partition_by_namespace: &'a mut ByNamespace, + cost_partition_project: &'a mut u64, + + namespace: MetricNamespace, + + reserved: u64, +} + +impl Reservation<'_> { + pub fn consume(self) { + let reserved = self.reserved; + self.consume_with(reserved); + } + + pub fn consume_with(self, cost: u64) { + debug_assert!(cost <= self.reserved, "less reserved than used"); + // Update total costs. + self.total.cost += cost; + *self.total.cost_by_namespace.get_mut(self.namespace) += cost; + + // Update all partition costs. + *self.cost_partition += cost; + *self.cost_partition_by_namespace.get_mut(self.namespace) += cost; + *self.cost_partition_project += cost; + } +} diff --git a/relay-metrics/src/lib.rs b/relay-metrics/src/lib.rs index 4897553525..a6891087ac 100644 --- a/relay-metrics/src/lib.rs +++ b/relay-metrics/src/lib.rs @@ -75,6 +75,7 @@ mod bucket; mod finite; mod protocol; mod statsd; +mod utils; mod view; pub use bucket::*; diff --git a/relay-metrics/src/statsd.rs b/relay-metrics/src/statsd.rs index 2e4ac42535..7b0caf2779 100644 --- a/relay-metrics/src/statsd.rs +++ b/relay-metrics/src/statsd.rs @@ -1,29 +1,4 @@ -use relay_statsd::{CounterMetric, GaugeMetric, HistogramMetric, SetMetric, TimerMetric}; - -/// Set metrics for Relay Metrics. -pub enum MetricSets { - /// Count the number of unique buckets created. - /// - /// This is a set of bucketing keys. The metric is basically equivalent to - /// `metrics.buckets.merge.miss` for a single Relay, but could be useful to determine how much - /// duplicate buckets there are when multiple instances are running. - /// - /// The hashing is platform-dependent at the moment, so all your relays that send this metric - /// should run on the same CPU architecture, otherwise this metric is not reliable. - /// - /// This metric is tagged with: - /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - /// - `namespace`: The namespace of the metric for which the bucket was created. - UniqueBucketsCreated, -} - -impl SetMetric for MetricSets { - fn name(&self) -> &'static str { - match *self { - Self::UniqueBucketsCreated => "metrics.buckets.created.unique", - } - } -} +use relay_statsd::{CounterMetric, GaugeMetric}; /// Counter metrics for Relay Metrics. pub enum MetricCounters { @@ -33,58 +8,32 @@ pub enum MetricCounters { /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). /// - `namespace`: The namespace of the metric. MergeHit, - /// Incremented every time a bucket is created. /// /// This metric is tagged with: /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). /// - `namespace`: The namespace of the metric. MergeMiss, -} - -impl CounterMetric for MetricCounters { - fn name(&self) -> &'static str { - match *self { - Self::MergeHit => "metrics.buckets.merge.hit", - Self::MergeMiss => "metrics.buckets.merge.miss", - } - } -} - -/// Timer metrics for Relay Metrics. -pub enum MetricTimers { - /// Time in milliseconds spent scanning metric buckets to flush. - /// - /// Relay scans metric buckets in regular intervals and flushes expired buckets. This timer - /// shows the time it takes to perform this scan and remove the buckets from the internal cache. - /// Sending the metric buckets to upstream is outside of this timer. + /// Incremented for every aggregator flush. /// /// This metric is tagged with: /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - BucketsScanDuration, -} - -impl TimerMetric for MetricTimers { - fn name(&self) -> &'static str { - match *self { - Self::BucketsScanDuration => "metrics.buckets.scan_duration", - } - } -} - -/// Histogram metrics for Relay Metrics. -#[allow(clippy::enum_variant_names)] -pub enum MetricHistograms { - /// Distribution of invalid bucket timestamps observed, relative to the time of observation. + FlushCount, + /// Incremented with the cost of a partition for every aggregator flush. /// - /// This is a temporary metric to better understand why we see so many invalid timestamp errors. - InvalidBucketTimestamp, + /// This metric is tagged with: + /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). + /// - `namespace`: The namespace of the metric. + FlushCost, } -impl HistogramMetric for MetricHistograms { +impl CounterMetric for MetricCounters { fn name(&self) -> &'static str { match *self { - Self::InvalidBucketTimestamp => "metrics.buckets.invalid_timestamp", + Self::MergeHit => "metrics.buckets.merge.hit", + Self::MergeMiss => "metrics.buckets.merge.miss", + Self::FlushCount => "metrics.buckets.flush.count", + Self::FlushCost => "metrics.buckets.flush.cost", } } } @@ -95,18 +44,14 @@ pub enum MetricGauges { /// /// This metric is tagged with: /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). + /// - `namespace`: The namespace of the metric. Buckets, /// The total storage cost of metric buckets in Relay's metrics aggregator. /// /// This metric is tagged with: /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - BucketsCost, - /// The average number of elements in a bucket when flushed. - /// - /// This metric is tagged with: - /// - `metric_type`: "c", "d", "g" or "s". /// - `namespace`: The namespace of the metric. - AvgBucketSize, + BucketsCost, } impl GaugeMetric for MetricGauges { @@ -114,7 +59,6 @@ impl GaugeMetric for MetricGauges { match *self { Self::Buckets => "metrics.buckets", Self::BucketsCost => "metrics.buckets.cost", - Self::AvgBucketSize => "metrics.buckets.size", } } } diff --git a/relay-metrics/src/utils.rs b/relay-metrics/src/utils.rs new file mode 100644 index 0000000000..a167c03bd9 --- /dev/null +++ b/relay-metrics/src/utils.rs @@ -0,0 +1,140 @@ +use core::fmt; +use std::collections::BTreeMap; +use std::ops::{AddAssign, SubAssign}; + +use relay_base_schema::metrics::MetricNamespace; + +/// Estimates the number of bytes needed to encode the tags. +/// +/// Note that this does not necessarily match the exact memory footprint of the tags, +/// because data structures or their serialization have overheads. +pub fn tags_cost(tags: &BTreeMap) -> usize { + tags.iter().map(|(k, v)| k.len() + v.len()).sum() +} + +#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ByNamespace { + pub sessions: T, + pub transactions: T, + pub spans: T, + pub custom: T, + pub stats: T, + pub unsupported: T, +} + +impl ByNamespace { + pub fn get(&self, namespace: MetricNamespace) -> &T { + match namespace { + MetricNamespace::Sessions => &self.sessions, + MetricNamespace::Transactions => &self.transactions, + MetricNamespace::Spans => &self.spans, + MetricNamespace::Custom => &self.custom, + MetricNamespace::Stats => &self.stats, + MetricNamespace::Unsupported => &self.unsupported, + } + } + + pub fn get_mut(&mut self, namespace: MetricNamespace) -> &mut T { + match namespace { + MetricNamespace::Sessions => &mut self.sessions, + MetricNamespace::Transactions => &mut self.transactions, + MetricNamespace::Spans => &mut self.spans, + MetricNamespace::Custom => &mut self.custom, + MetricNamespace::Stats => &mut self.stats, + MetricNamespace::Unsupported => &mut self.unsupported, + } + } +} + +impl fmt::Debug for ByNamespace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // A more compact representation. Mainly for snapshot testing. + write!(f, "(")?; + + let mut values = MetricNamespace::all() + .into_iter() + .map(|ns| (ns, self.get(ns))) + .filter(|(_, v)| v != &&T::default()) + .enumerate() + .peekable(); + + match values.peek() { + None => write!(f, "{:?}", T::default())?, + Some(_) => { + for (i, (namespace, value)) in values { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{namespace}:{value:?}")?; + } + } + } + + write!(f, ")") + } +} + +macro_rules! impl_op { + ($op:tt, $opfn:ident) => { + impl $op for ByNamespace { + fn $opfn(&mut self, rhs: Self) { + let Self { + sessions, + transactions, + spans, + custom, + stats, + unsupported, + } = self; + + $op::$opfn(sessions, rhs.sessions); + $op::$opfn(transactions, rhs.transactions); + $op::$opfn(spans, rhs.spans); + $op::$opfn(custom, rhs.custom); + $op::$opfn(stats, rhs.stats); + $op::$opfn(unsupported, rhs.unsupported); + } + } + }; +} + +impl_op!(AddAssign, add_assign); +impl_op!(SubAssign, sub_assign); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get() { + let mut v = ByNamespace::::default(); + + for (i, namespace) in MetricNamespace::all().into_iter().enumerate() { + assert_eq!(*v.get(namespace), 0); + assert_eq!(*v.get_mut(namespace), 0); + *v.get_mut(namespace) += i; + assert_eq!(*v.get(namespace), i); + } + + // Make sure a getter does not override another value by iterating over all values again. + for (i, namespace) in MetricNamespace::all().into_iter().enumerate() { + assert_eq!(*v.get(namespace), i); + assert_eq!(*v.get_mut(namespace), i); + } + } + + #[test] + fn test_add_sub() { + let mut v = ByNamespace::::default(); + for (i, namespace) in MetricNamespace::all().into_iter().enumerate() { + *v.get_mut(namespace) += i; + } + + let mut v2 = v; + v2 -= v; + assert_eq!(v2, Default::default()); + + v2 += v; + assert_eq!(v2, v); + } +} diff --git a/relay-metrics/src/view.rs b/relay-metrics/src/view.rs index 86862c16aa..5eab80a09e 100644 --- a/relay-metrics/src/view.rs +++ b/relay-metrics/src/view.rs @@ -3,8 +3,7 @@ use serde::ser::{SerializeMap, SerializeSeq}; use serde::Serialize; use crate::{ - aggregator, BucketMetadata, CounterType, DistributionType, GaugeValue, MetricName, SetType, - SetValue, + BucketMetadata, CounterType, DistributionType, GaugeValue, MetricName, SetType, SetValue, }; use relay_base_schema::metrics::MetricType; use std::collections::BTreeMap; @@ -518,7 +517,7 @@ impl<'a> BucketView<'a> { /// Note that this does not match the exact size of the serialized payload. Instead, the size is /// approximated through tags and a static overhead. fn estimated_base_size(&self) -> usize { - BUCKET_SIZE + self.name().len() + aggregator::tags_cost(self.tags()) + BUCKET_SIZE + self.name().len() + crate::utils::tags_cost(self.tags()) } /// Estimates the number of bytes needed to serialize the bucket. diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index 9cebca8ef2..d7115ac31a 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -38,6 +38,7 @@ axum-server = { workspace = true } arc-swap = { workspace = true } backoff = { workspace = true } brotli = { workspace = true } +bytecount = { workspace = true } bytes = { workspace = true, features = ["serde"] } bzip2 = { workspace = true } chrono = { workspace = true, features = ["clock"] } diff --git a/relay-server/src/metrics/metric_stats.rs b/relay-server/src/metrics/metric_stats.rs index 4e1aa6aafb..8ebc389852 100644 --- a/relay-server/src/metrics/metric_stats.rs +++ b/relay-server/src/metrics/metric_stats.rs @@ -279,9 +279,7 @@ mod tests { drop(ms); - let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap() else { - panic!(); - }; + let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap(); assert_eq!(mb.project_key, scoping.project_key); assert_eq!(mb.buckets.len(), 1); @@ -299,9 +297,7 @@ mod tests { ) ); - let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap() else { - panic!(); - }; + let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap(); assert_eq!(mb.project_key, scoping.project_key); assert_eq!(mb.buckets.len(), 1); @@ -354,9 +350,7 @@ mod tests { drop(ms); - let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap() else { - panic!(); - }; + let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap(); assert_eq!(mb.project_key, scoping.project_key); assert_eq!(mb.buckets.len(), 1); @@ -420,9 +414,7 @@ mod tests { drop(ms); - let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap() else { - panic!(); - }; + let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap(); assert_eq!(mb.project_key, scoping.project_key); assert_eq!(mb.buckets.len(), 1); diff --git a/relay-server/src/services/metrics/aggregator.rs b/relay-server/src/services/metrics/aggregator.rs index efc960609d..b81b804f91 100644 --- a/relay-server/src/services/metrics/aggregator.rs +++ b/relay-server/src/services/metrics/aggregator.rs @@ -1,18 +1,21 @@ +use std::pin::Pin; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, SystemTime}; +use hashbrown::hash_map::Entry; use hashbrown::HashMap; use relay_base_schema::project::ProjectKey; use relay_config::AggregatorServiceConfig; -use relay_metrics::aggregator::{self, AggregateMetricsError, FlushDecision}; -use relay_metrics::{Bucket, UnixTimestamp}; +use relay_metrics::aggregator::{self, AggregateMetricsError, AggregatorConfig, Partition}; +use relay_metrics::Bucket; use relay_quotas::{RateLimits, Scoping}; -use relay_system::{Controller, FromMessage, Interface, NoResponse, Recipient, Service, Shutdown}; +use relay_system::{Controller, FromMessage, Interface, NoResponse, Recipient, Service}; +use tokio::time::{Instant, Sleep}; use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::{ProjectInfo, ProjectState}; -use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; +use crate::statsd::{RelayCounters, RelayTimers}; /// Aggregator for metric buckets. /// @@ -28,10 +31,6 @@ use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; pub enum Aggregator { /// Merge the buckets. MergeBuckets(MergeBuckets), - - /// Message is used only for tests to get the current number of buckets in `AggregatorService`. - #[cfg(test)] - BucketCountInquiry(BucketCountInquiry, relay_system::Sender), } impl Aggregator { @@ -39,8 +38,6 @@ impl Aggregator { pub fn variant(&self) -> &'static str { match self { Aggregator::MergeBuckets(_) => "MergeBuckets", - #[cfg(test)] - Aggregator::BucketCountInquiry(_, _) => "BucketCountInquiry", } } } @@ -54,19 +51,6 @@ impl FromMessage for Aggregator { } } -#[cfg(test)] -impl FromMessage for Aggregator { - type Response = relay_system::AsyncResponse; - fn from_message(message: BucketCountInquiry, sender: relay_system::Sender) -> Self { - Self::BucketCountInquiry(message, sender) - } -} - -/// Used only for testing the `AggregatorService`. -#[cfg(test)] -#[derive(Debug)] -pub struct BucketCountInquiry; - /// A message containing a vector of buckets to be flushed. /// /// Handlers must respond to this message with a `Result`: @@ -76,9 +60,7 @@ pub struct BucketCountInquiry; #[derive(Clone, Debug)] pub struct FlushBuckets { /// The partition to which the buckets belong. - /// - /// When set to `Some` it means that partitioning was enabled in the [`Aggregator`]. - pub partition_key: Option, + pub partition_key: u32, /// The buckets to be flushed. pub buckets: HashMap, } @@ -102,19 +84,14 @@ impl Extend for ProjectBuckets { } } -enum AggregatorState { - Running, - ShuttingDown, -} - /// Service implementing the [`Aggregator`] interface. pub struct AggregatorService { aggregator: aggregator::Aggregator, - state: AggregatorState, receiver: Option>, project_cache: ProjectCacheHandle, - flush_interval_ms: u64, + config: AggregatorServiceConfig, can_accept_metrics: Arc, + next_flush: Pin>, } impl AggregatorService { @@ -137,14 +114,14 @@ impl AggregatorService { receiver: Option>, project_cache: ProjectCacheHandle, ) -> Self { - let aggregator = aggregator::Aggregator::named(name, config.aggregator); + let aggregator = aggregator::Aggregator::named(name, &config.aggregator); Self { receiver, - state: AggregatorState::Running, - flush_interval_ms: config.flush_interval_ms, - can_accept_metrics: Arc::new(AtomicBool::new(!aggregator.totals_cost_exceeded())), + config, + can_accept_metrics: Arc::new(AtomicBool::new(true)), aggregator, project_cache, + next_flush: Box::pin(tokio::time::sleep(Duration::from_secs(0))), } } @@ -158,90 +135,100 @@ impl AggregatorService { /// to the receiver to send the [`MergeBuckets`] message back if buckets could not be flushed /// and we require another re-try. /// - /// If `force` is true, flush all buckets unconditionally and do not attempt to merge back. - fn try_flush(&mut self) { - let force_flush = matches!(&self.state, AggregatorState::ShuttingDown); - - let flush_decision = |project_key| { - let project = self.project_cache.get(project_key); - - let project_info = match project.state() { - ProjectState::Enabled(project_info) => project_info, - ProjectState::Disabled => return FlushDecision::Drop, - // Delay the flush, project is not yet ready. - // - // Querying the project will make sure it is eventually fetched. - ProjectState::Pending => { - relay_statsd::metric!( - counter(RelayCounters::ProjectStateFlushMetricsNoProject) += 1 - ); - return FlushDecision::Delay; - } - }; - - let Some(scoping) = project_info.scoping(project_key) else { - // This should never happen, at this point we should always have a valid - // project with the necessary information to construct a scoping. - // - // Ideally we enforce this through the type system in the future. - relay_log::error!( - tags.project_key = project_key.as_str(), - "dropping buckets because of missing scope", - ); - return FlushDecision::Drop; - }; + /// Returns when the next flush should be attempted. + fn try_flush(&mut self) -> Duration { + if cfg!(test) { + // Tests are running in a single thread / current thread runtime, + // which is required for 'fast-forwarding' and `block_in_place` + // requires a multi threaded runtime. Relay always requires a multi + // threaded runtime. + self.do_try_flush() + } else { + tokio::task::block_in_place(|| self.do_try_flush()) + } + } - FlushDecision::Flush(ProjectBuckets { - scoping, - project_info: Arc::clone(project_info), - rate_limits: project.rate_limits().current_limits(), - buckets: Vec::new(), - }) + fn do_try_flush(&mut self) -> Duration { + let partition = match self.aggregator.try_flush_next(SystemTime::now()) { + Ok(partition) => partition, + Err(duration) => return duration, }; + self.can_accept_metrics.store(true, Ordering::Relaxed); - let partitions = self - .aggregator - .pop_flush_buckets(force_flush, flush_decision); + self.flush_partition(partition); - self.can_accept_metrics - .store(!self.aggregator.totals_cost_exceeded(), Ordering::Relaxed); + self.aggregator.next_flush_at(SystemTime::now()) + } - if partitions.is_empty() { + fn flush_partition(&mut self, partition: Partition) { + let Some(receiver) = &self.receiver else { return; - } - - let partitions_count = partitions.len() as u64; - relay_log::trace!("flushing {} partitions to receiver", partitions_count); - relay_statsd::metric!( - histogram(RelayHistograms::PartitionsFlushed) = partitions_count, - aggregator = self.aggregator.name(), - ); + }; - let mut total_bucket_count = 0u64; - for buckets_by_project in partitions.values() { - for buckets in buckets_by_project.values() { - let bucket_count = buckets.buckets.len() as u64; - total_bucket_count += bucket_count; + let mut buckets_by_project = hashbrown::HashMap::new(); + + let partition_key = partition.partition_key; + for (project_key, bucket) in partition { + let s = match buckets_by_project.entry(project_key) { + Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), + Entry::Vacant(vacant_entry) => { + let project = self.project_cache.get(project_key); + + let project_info = match project.state() { + ProjectState::Enabled(info) => Arc::clone(info), + ProjectState::Disabled => continue, // Drop the bucket. + ProjectState::Pending => { + // Return to the aggregator, which will assign a new flush time. + if let Err(error) = self.aggregator.merge(project_key, bucket) { + relay_log::error!( + tags.aggregator = self.aggregator.name(), + tags.project_key = project_key.as_str(), + bucket.error = &error as &dyn std::error::Error, + "failed to return metric bucket back to the aggregator" + ); + } + relay_statsd::metric!( + counter(RelayCounters::ProjectStateFlushMetricsNoProject) += 1 + ); + continue; + } + }; + + let rate_limits = project.rate_limits().current_limits(); + let Some(scoping) = project_info.scoping(project_key) else { + // This should never happen, at this point we should always have a valid + // project with the necessary information to construct a scoping. + // + // Ideally we enforce this through the type system in the future. + relay_log::error!( + tags.project_key = project_key.as_str(), + "dropping buckets because of missing scope", + ); + continue; + }; + + vacant_entry.insert(ProjectBuckets { + buckets: Vec::new(), + scoping, + project_info, + rate_limits, + }) + } + }; - relay_statsd::metric!( - histogram(RelayHistograms::BucketsFlushedPerProject) = bucket_count, - aggregator = self.aggregator.name(), - ); - } + s.buckets.push(bucket); } - relay_statsd::metric!( - histogram(RelayHistograms::BucketsFlushed) = total_bucket_count, - aggregator = self.aggregator.name(), - ); + if !buckets_by_project.is_empty() { + relay_log::debug!( + "flushing buckets for {} projects in partition {partition_key}", + buckets_by_project.len() + ); - if let Some(ref receiver) = self.receiver { - for (partition_key, buckets_by_project) in partitions { - receiver.send(FlushBuckets { - partition_key, - buckets: buckets_by_project, - }) - } + receiver.send(FlushBuckets { + partition_key, + buckets: buckets_by_project, + }); } } @@ -251,10 +238,13 @@ impl AggregatorService { buckets, } = msg; - let now = UnixTimestamp::now(); - for bucket in buckets.into_iter() { - match self.aggregator.merge_with_options(project_key, bucket, now) { - // Ignore invalid timestamp errors. + for mut bucket in buckets.into_iter() { + if !validate_bucket(&mut bucket, &self.config) { + continue; + }; + + match self.aggregator.merge(project_key, bucket) { + // Ignore invalid timestamp errors and drop the bucket. Err(AggregateMetricsError::InvalidTimestamp(_)) => {} Err(AggregateMetricsError::TotalLimitExceeded) => { relay_log::error!( @@ -288,17 +278,39 @@ impl AggregatorService { fn handle_message(&mut self, message: Aggregator) { match message { Aggregator::MergeBuckets(msg) => self.handle_merge_buckets(msg), - #[cfg(test)] - Aggregator::BucketCountInquiry(_, sender) => { - sender.send(self.aggregator.bucket_count()) - } } } - fn handle_shutdown(&mut self, message: Shutdown) { - if message.timeout.is_some() { - self.state = AggregatorState::ShuttingDown; + fn handle_shutdown(&mut self) { + relay_log::info!( + "Shutting down metrics aggregator {}", + self.aggregator.name() + ); + + // Create a new aggregator with very aggressive flush parameters. + let aggregator = aggregator::Aggregator::named( + self.aggregator.name().to_owned(), + &AggregatorConfig { + bucket_interval: 1, + aggregator_size: 1, + initial_delay: 0, + ..self.config.aggregator + }, + ); + + let previous = std::mem::replace(&mut self.aggregator, aggregator); + + let mut partitions = 0; + for partition in previous.into_partitions() { + self.flush_partition(partition); + partitions += 1; } + relay_log::debug!("Force flushed {partitions} partitions"); + + // Reset the next flush time, to the time of the new aggregator. + self.next_flush + .as_mut() + .reset(Instant::now() + self.aggregator.next_flush_at(SystemTime::now())); } } @@ -306,8 +318,6 @@ impl Service for AggregatorService { type Interface = Aggregator; async fn run(mut self, mut rx: relay_system::Receiver) { - let mut ticker = tokio::time::interval(Duration::from_millis(self.flush_interval_ms)); - ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); let mut shutdown = Controller::shutdown_handle(); macro_rules! timed { @@ -322,26 +332,18 @@ impl Service for AggregatorService { }}; } - // Note that currently this loop never exits and will run till the tokio runtime shuts - // down. This is about to change with the refactoring for the shutdown process. loop { tokio::select! { biased; - _ = ticker.tick() => timed!( - "try_flush", - if cfg!(test) { - // Tests are running in a single thread / current thread runtime, - // which is required for 'fast-forwarding' and `block_in_place` - // requires a multi threaded runtime. Relay always requires a multi - // threaded runtime. - self.try_flush() - } else { - tokio::task::block_in_place(|| self.try_flush()) + _ = &mut self.next_flush => timed!( + "try_flush", { + let next = self.try_flush(); + self.next_flush.as_mut().reset(Instant::now() + next); } ), Some(message) = rx.recv() => timed!(message.variant(), self.handle_message(message)), - shutdown = shutdown.notified() => timed!("shutdown", self.handle_shutdown(shutdown)), + _ = shutdown.notified() => timed!("shutdown", self.handle_shutdown()), else => break, } @@ -351,14 +353,13 @@ impl Service for AggregatorService { impl Drop for AggregatorService { fn drop(&mut self) { - let remaining_buckets = self.aggregator.bucket_count(); - if remaining_buckets > 0 { + if !self.aggregator.is_empty() { relay_log::error!( tags.aggregator = self.aggregator.name(), - "metrics aggregator dropping {remaining_buckets} buckets" + "metrics aggregator dropping buckets" ); relay_statsd::metric!( - counter(RelayCounters::BucketsDropped) += remaining_buckets as i64, + counter(RelayCounters::BucketsDropped) += 1, aggregator = self.aggregator.name(), ); } @@ -395,6 +396,35 @@ impl AggregatorHandle { } } +/// Validates the metric name and its tags are correct. +/// +/// Returns `false` if the metric should be dropped. +fn validate_bucket(bucket: &mut Bucket, config: &AggregatorServiceConfig) -> bool { + if bucket.name.len() > config.max_name_length { + relay_log::debug!( + "Invalid metric name, too long (> {}): {:?}", + config.max_name_length, + bucket.name + ); + return false; + } + + bucket.tags.retain(|tag_key, tag_value| { + if tag_key.len() > config.max_tag_key_length { + relay_log::debug!("Invalid metric tag key {tag_key:?}"); + return false; + } + if bytecount::num_chars(tag_value.as_bytes()) > config.max_tag_value_length { + relay_log::debug!("Invalid metric tag value"); + return false; + } + + true + }); + + true +} + #[cfg(test)] mod tests { use std::collections::BTreeMap; @@ -504,10 +534,8 @@ mod tests { aggregator.send(MergeBuckets::new(project_key, vec![bucket])); - let buckets_count = aggregator.send(BucketCountInquiry).await.unwrap(); - // Let's check the number of buckets in the aggregator just after sending a - // message. - assert_eq!(buckets_count, 1); + // Nothing flushed. + assert_eq!(receiver.bucket_count(), 0); // Wait until flush delay has passed. It is up to 2s: 1s for the current bucket // and 1s for the flush shift. Adding 100ms buffer. @@ -515,4 +543,84 @@ mod tests { // receiver must have 1 bucket flushed assert_eq!(receiver.bucket_count(), 1); } + + fn test_config() -> AggregatorServiceConfig { + AggregatorServiceConfig { + max_name_length: 200, + max_tag_key_length: 200, + max_tag_value_length: 200, + ..Default::default() + } + } + + #[test] + fn test_validate_bucket_key_str_length() { + relay_test::setup(); + let mut short_metric = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/a_short_metric".into(), + tags: BTreeMap::new(), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(validate_bucket(&mut short_metric, &test_config())); + + let mut long_metric = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/long_name_a_very_long_name_its_super_long_really_but_like_super_long_probably_the_longest_name_youve_seen_and_even_the_longest_name_ever_its_extremly_long_i_cant_tell_how_long_it_is_because_i_dont_have_that_many_fingers_thus_i_cant_count_the_many_characters_this_long_name_is".into(), + tags: BTreeMap::new(), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(!validate_bucket(&mut long_metric, &test_config())); + + let mut short_metric_long_tag_key = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/a_short_metric_with_long_tag_key".into(), + tags: BTreeMap::from([("i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into(), "tag_value".into())]), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(validate_bucket( + &mut short_metric_long_tag_key, + &test_config() + )); + assert_eq!(short_metric_long_tag_key.tags.len(), 0); + + let mut short_metric_long_tag_value = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/a_short_metric_with_long_tag_value".into(), + tags: BTreeMap::from([("tag_key".into(), "i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into())]), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(validate_bucket( + &mut short_metric_long_tag_value, + &test_config() + )); + assert_eq!(short_metric_long_tag_value.tags.len(), 0); + } + + #[test] + fn test_validate_tag_values_special_chars() { + relay_test::setup(); + + let tag_value = "x".repeat(199) + "ΓΈ"; + assert_eq!(tag_value.chars().count(), 200); // Should be allowed + + let mut short_metric = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/a_short_metric".into(), + tags: BTreeMap::from([("foo".into(), tag_value.clone())]), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(validate_bucket(&mut short_metric, &test_config())); + assert_eq!(short_metric.tags["foo"], tag_value); + } } diff --git a/relay-server/src/services/metrics/router.rs b/relay-server/src/services/metrics/router.rs index 0e98a19d3d..8ae58f172d 100644 --- a/relay-server/src/services/metrics/router.rs +++ b/relay-server/src/services/metrics/router.rs @@ -113,8 +113,6 @@ impl StartedRouter { { match message { Aggregator::MergeBuckets(msg) => self.handle_merge_buckets(msg), - #[cfg(test)] - Aggregator::BucketCountInquiry(_, _sender) => (), // not supported } } ) diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 09a04dae46..92c8ee5566 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1357,7 +1357,6 @@ impl EnvelopeProcessorService { self.inner .config .aggregator_config_for(MetricNamespace::Spans) - .aggregator .max_tag_value_length, extract_spans, ); @@ -1438,9 +1437,7 @@ impl EnvelopeProcessorService { received_at: Some(managed_envelope.received_at()), max_secs_in_past: Some(retention_days * 24 * 3600), max_secs_in_future: Some(self.inner.config.max_secs_in_future()), - transaction_timestamp_range: Some( - transaction_aggregator_config.aggregator.timestamp_range(), - ), + transaction_timestamp_range: Some(transaction_aggregator_config.timestamp_range()), is_validated: false, }; @@ -1471,7 +1468,6 @@ impl EnvelopeProcessorService { }, max_name_and_unit_len: Some( transaction_aggregator_config - .aggregator .max_name_length .saturating_sub(MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD), ), @@ -1489,7 +1485,6 @@ impl EnvelopeProcessorService { .inner .config .aggregator_config_for(MetricNamespace::Spans) - .aggregator .max_tag_value_length, is_renormalize: false, remove_other: full_normalization, @@ -2790,9 +2785,9 @@ impl EnvelopeProcessorService { { let dsn = PartialDsn::outbound(scoping, upstream); - if let Some(key) = partition_key { - relay_statsd::metric!(histogram(RelayHistograms::PartitionKeys) = key); - } + relay_statsd::metric!( + histogram(RelayHistograms::PartitionKeys) = u64::from(partition_key) + ); let mut num_batches = 0; for batch in BucketsView::from(buckets).by_size(batch_size) { @@ -2809,7 +2804,9 @@ impl EnvelopeProcessorService { self.inner.addrs.test_store.clone(), ProcessingGroup::Metrics, ); - envelope.set_partition_key(partition_key).scope(*scoping); + envelope + .set_partition_key(Some(partition_key)) + .scope(*scoping); relay_statsd::metric!( histogram(RelayHistograms::BucketsPerBatch) = batch.len() as u64 @@ -2826,7 +2823,7 @@ impl EnvelopeProcessorService { } /// Creates a [`SendMetricsRequest`] and sends it to the upstream relay. - fn send_global_partition(&self, partition_key: Option, partition: &mut Partition<'_>) { + fn send_global_partition(&self, partition_key: u32, partition: &mut Partition<'_>) { if partition.is_empty() { return; } @@ -2843,7 +2840,7 @@ impl EnvelopeProcessorService { }; let request = SendMetricsRequest { - partition_key: partition_key.map(|k| k.to_string()), + partition_key: partition_key.to_string(), unencoded, encoded, project_info, @@ -3270,7 +3267,7 @@ impl<'a> Partition<'a> { #[derive(Debug)] struct SendMetricsRequest { /// If the partition key is set, the request is marked with `X-Sentry-Relay-Shard`. - partition_key: Option, + partition_key: String, /// Serialized metric buckets without encoding applied, used for signing. unencoded: Bytes, /// Serialized metric buckets with the stated HTTP encoding applied. @@ -3344,7 +3341,7 @@ impl UpstreamRequest for SendMetricsRequest { builder .content_encoding(self.http_encoding) - .header_opt("X-Sentry-Relay-Shard", self.partition_key.as_ref()) + .header("X-Sentry-Relay-Shard", self.partition_key.as_bytes()) .header(header::CONTENT_TYPE, b"application/json") .body(self.encoded.clone()); @@ -3546,7 +3543,7 @@ mod tests { ]); FlushBuckets { - partition_key: None, + partition_key: 0, buckets, } }; @@ -3930,10 +3927,7 @@ mod tests { }; processor.handle_process_metrics(&mut token, message); - let value = aggregator_rx.recv().await.unwrap(); - let Aggregator::MergeBuckets(merge_buckets) = value else { - panic!() - }; + let Aggregator::MergeBuckets(merge_buckets) = aggregator_rx.recv().await.unwrap(); let buckets = merge_buckets.buckets; assert_eq!(buckets.len(), 1); assert_eq!(buckets[0].metadata.received_at, expected_received_at); @@ -3994,14 +3988,8 @@ mod tests { }; processor.handle_process_batched_metrics(&mut token, message); - let value = aggregator_rx.recv().await.unwrap(); - let Aggregator::MergeBuckets(mb1) = value else { - panic!() - }; - let value = aggregator_rx.recv().await.unwrap(); - let Aggregator::MergeBuckets(mb2) = value else { - panic!() - }; + let Aggregator::MergeBuckets(mb1) = aggregator_rx.recv().await.unwrap(); + let Aggregator::MergeBuckets(mb2) = aggregator_rx.recv().await.unwrap(); let mut messages = vec![mb1, mb2]; messages.sort_by_key(|mb| mb.project_key); diff --git a/relay-server/src/services/processor/metrics.rs b/relay-server/src/services/processor/metrics.rs index 4a0b025aaf..f793a667b1 100644 --- a/relay-server/src/services/processor/metrics.rs +++ b/relay-server/src/services/processor/metrics.rs @@ -197,9 +197,7 @@ mod tests { // We assert that one metric is emitted by metric stats. let value = metric_stats_rx.blocking_recv().unwrap(); - let Aggregator::MergeBuckets(merge_buckets) = value else { - panic!(); - }; + let Aggregator::MergeBuckets(merge_buckets) = value; assert_eq!(merge_buckets.buckets.len(), 1); assert_eq!( merge_buckets.buckets[0].tags.get("mri").unwrap().as_str(), @@ -234,9 +232,7 @@ mod tests { // We assert that two metrics are emitted by metric stats. for _ in 0..2 { let value = metric_stats_rx.blocking_recv().unwrap(); - let Aggregator::MergeBuckets(merge_buckets) = value else { - panic!(); - }; + let Aggregator::MergeBuckets(merge_buckets) = value; assert_eq!(merge_buckets.buckets.len(), 1); let BucketValue::Counter(value) = merge_buckets.buckets[0].value else { panic!(); diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 684a2dccd7..21c1255f5b 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -367,7 +367,6 @@ pub fn extract_from_event( event, config .aggregator_config_for(MetricNamespace::Spans) - .aggregator .max_tag_value_length, &[], ) else { @@ -469,8 +468,8 @@ impl<'a> NormalizeSpanConfig<'a> { Self { received_at: managed_envelope.received_at(), - timestamp_range: aggregator_config.aggregator.timestamp_range(), - max_tag_value_size: aggregator_config.aggregator.max_tag_value_length, + timestamp_range: aggregator_config.timestamp_range(), + max_tag_value_size: aggregator_config.max_tag_value_length, performance_score: project_config.performance_score.as_ref(), measurements: Some(CombinedMeasurementsConfig::new( project_config.measurements.as_ref(), @@ -481,7 +480,6 @@ impl<'a> NormalizeSpanConfig<'a> { ErrorBoundary::Ok(costs) => Some(costs), }, max_name_and_unit_len: aggregator_config - .aggregator .max_name_length .saturating_sub(MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD), diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 63e1a4634b..6bf5f7a9d1 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -276,25 +276,6 @@ pub enum RelayHistograms { PartitionKeys, /// Measures how many splits were performed when sending out a partition. PartitionSplits, - /// The total number of metric buckets flushed in a cycle across all projects. - /// - /// This metric is tagged with: - /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - BucketsFlushed, - /// The number of metric buckets flushed in a cycle for each project. - /// - /// Relay scans metric buckets in regular intervals and flushes expired buckets. This histogram - /// is logged for each project that is being flushed. The count of the histogram values is - /// equivalent to the number of projects being flushed. - /// - /// This metric is tagged with: - /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - BucketsFlushedPerProject, - /// The number of metric partitions flushed in a cycle. - /// - /// This metric is tagged with: - /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - PartitionsFlushed, } impl HistogramMetric for RelayHistograms { @@ -331,9 +312,6 @@ impl HistogramMetric for RelayHistograms { RelayHistograms::UpstreamMetricsBodySize => "upstream.metrics.body_size", RelayHistograms::PartitionKeys => "metrics.buckets.partition_keys", RelayHistograms::PartitionSplits => "partition_splits", - RelayHistograms::BucketsFlushed => "metrics.buckets.flushed", - RelayHistograms::BucketsFlushedPerProject => "metrics.buckets.flushed_per_project", - RelayHistograms::PartitionsFlushed => "metrics.partitions.flushed", } } } diff --git a/relay-server/src/utils/managed_envelope.rs b/relay-server/src/utils/managed_envelope.rs index de1c6832dc..ad9c623464 100644 --- a/relay-server/src/utils/managed_envelope.rs +++ b/relay-server/src/utils/managed_envelope.rs @@ -61,7 +61,7 @@ pub enum ItemAction { struct EnvelopeContext { summary: EnvelopeSummary, scoping: Scoping, - partition_key: Option, + partition_key: Option, done: bool, group: ProcessingGroup, } @@ -463,11 +463,11 @@ impl ManagedEnvelope { self.context.scoping } - pub fn partition_key(&self) -> Option { + pub fn partition_key(&self) -> Option { self.context.partition_key } - pub fn set_partition_key(&mut self, partition_key: Option) -> &mut Self { + pub fn set_partition_key(&mut self, partition_key: Option) -> &mut Self { self.context.partition_key = partition_key; self } diff --git a/tests/integration/test_healthchecks.py b/tests/integration/test_healthchecks.py index 2e02e9173b..84ce0376cf 100644 --- a/tests/integration/test_healthchecks.py +++ b/tests/integration/test_healthchecks.py @@ -121,20 +121,6 @@ def test_readiness_not_enough_memory_percent(mini_sentry, relay): assert_not_enough_memory(relay, mini_sentry, ">= 1.00%") -def test_readiness_depends_on_aggregator_being_full(mini_sentry, relay): - relay = relay( - mini_sentry, - {"relay": {"mode": "proxy"}, "aggregator": {"max_total_bucket_bytes": 0}}, - wait_health_check=False, - ) - - response = wait_get(relay, "/api/relay/healthcheck/ready/") - - error = str(mini_sentry.test_failures.get(timeout=1)) - assert "Health check probe 'aggregator'" in error - assert response.status_code == 503 - - def test_readiness_depends_on_aggregator_being_full_after_metrics(mini_sentry, relay): relay = relay( mini_sentry, diff --git a/tests/integration/test_metrics.py b/tests/integration/test_metrics.py index bf22c0a621..2a581c7f95 100644 --- a/tests/integration/test_metrics.py +++ b/tests/integration/test_metrics.py @@ -210,14 +210,14 @@ def test_metrics_backdated(mini_sentry, relay): @pytest.mark.parametrize( "metrics_partitions,expected_header", [ - # With no partitions defined, partitioning will not be performed but bucket shift will still be done. - (None, None), + # With no partitions defined, partition count is auto assigned. + (None, "4"), # With zero partitions defined, all the buckets will be forwarded to a single partition. (0, "0"), # With zero partitions defined, all the buckets will be forwarded to a single partition. (1, "0"), # With more than zero partitions defined, the buckets will be forwarded to one of the partitions. - (128, "17"), + (128, "4"), ], ) def test_metrics_partition_key(mini_sentry, relay, metrics_partitions, expected_header): @@ -1241,12 +1241,10 @@ def test_graceful_shutdown(mini_sentry, relay): timestamp = int(datetime.now(tz=timezone.utc).timestamp()) - # Backdated metric will be flushed immediately due to debounce delay - past_timestamp = timestamp - 1000 + past_timestamp = timestamp - 1000 + 30 metrics_payload = f"transactions/past:42|c|T{past_timestamp}" relay.send_metrics(project_id, metrics_payload) - # Future timestamp will not be flushed regularly, only through force flush future_timestamp = timestamp + 30 metrics_payload = f"transactions/future:17|c|T{future_timestamp}" relay.send_metrics(project_id, metrics_payload) @@ -1259,27 +1257,36 @@ def test_graceful_shutdown(mini_sentry, relay): with pytest.raises(requests.ConnectionError): relay.send_metrics(project_id, metrics_payload) - envelope = mini_sentry.captured_events.get(timeout=5) - assert len(envelope.items) == 1 - metrics_item = envelope.items[0] - assert metrics_item.type == "metric_buckets" + received_metrics = list() + for _ in range(2): + envelope = mini_sentry.captured_events.get(timeout=5) + assert len(envelope.items) == 1 + metrics_item = envelope.items[0] + assert metrics_item.type == "metric_buckets" + + received_metrics.extend( + metrics_without_keys( + json.loads(metrics_item.get_bytes().decode()), keys={"metadata"} + ) + ) - received_metrics = metrics_without_keys( - json.loads(metrics_item.get_bytes().decode()), keys={"metadata"} - ) + if len(received_metrics) == 2: + break + + received_metrics.sort(key=lambda x: x["timestamp"]) assert received_metrics == [ { - "timestamp": time_within_delta(future_timestamp, timedelta(seconds=1)), + "timestamp": time_within_delta(past_timestamp, timedelta(seconds=1)), "width": 1, - "name": "c:transactions/future@none", - "value": 17.0, + "name": "c:transactions/past@none", + "value": 42.0, "type": "c", }, { - "timestamp": time_within_delta(past_timestamp, timedelta(seconds=1)), + "timestamp": time_within_delta(future_timestamp, timedelta(seconds=1)), "width": 1, - "name": "c:transactions/past@none", - "value": 42.0, + "name": "c:transactions/future@none", + "value": 17.0, "type": "c", }, ] diff --git a/tests/integration/test_spans.py b/tests/integration/test_spans.py index 4c3bb39ec0..58c27679bb 100644 --- a/tests/integration/test_spans.py +++ b/tests/integration/test_spans.py @@ -484,6 +484,7 @@ def test_span_ingestion( "bucket_interval": 1, "initial_delay": 0, "max_secs_in_past": 2**64 - 1, + "shift_key": "none", } } ) diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 81941f7962..b5be0b6392 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -552,12 +552,11 @@ def test_enforce_bucket_rate_limits( ): metrics_consumer = metrics_consumer() - bucket_interval = 1 # second relay = relay_with_processing( { "processing": {"max_rate_limit": 2 * 86400}, "aggregator": { - "bucket_interval": bucket_interval, + "bucket_interval": 1, "initial_delay": 0, }, } @@ -580,31 +579,22 @@ def test_enforce_bucket_rate_limits( } ] - def generate_ticks(): - # Generate a new timestamp for every bucket, so they do not get merged by the aggregator - tick = int(datetime.now(UTC).timestamp() // bucket_interval * bucket_interval) - while True: - yield tick - tick += bucket_interval - - tick = generate_ticks() - def make_bucket(name, type_, values): return { "org_id": 1, "project_id": project_id, - "timestamp": next(tick), + "timestamp": int(datetime.now(UTC).timestamp()), "name": name, "type": type_, "value": values, - "width": bucket_interval, + "width": 1, } relay.send_metrics_buckets( project_id, [ - make_bucket("d:transactions/measurements.lcp@millisecond", "d", [1.0]) - for _ in range(metric_bucket_limit * 2) + make_bucket(f"d:transactions/measurements.lcp{i}@millisecond", "d", [1.0]) + for i in range(metric_bucket_limit * 2) ], )