From 576014c45cf47c3d031069eb88938a9f50f93899 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 10 Dec 2024 16:47:04 +0100 Subject: [PATCH 1/7] ref(metrics): Rework metrics aggregator to keep internal partitions --- CHANGELOG.md | 1 + Cargo.lock | 2 + relay-config/src/aggregator.rs | 39 +- relay-config/src/config.rs | 58 - relay-metrics/Cargo.toml | 1 + relay-metrics/benches/benches.rs | 11 +- relay-metrics/src/aggregator/buckets.rs | 228 --- relay-metrics/src/aggregator/config.rs | 87 +- relay-metrics/src/aggregator/inner.rs | 926 +++++++++++++ relay-metrics/src/aggregator/mod.rs | 1231 +++-------------- ...gregator__inner__tests__merge_flush-3.snap | 150 ++ ...gregator__inner__tests__merge_flush-5.snap | 150 ++ ...aggregator__inner__tests__merge_flush.snap | 153 ++ relay-metrics/src/aggregator/stats.rs | 163 +++ relay-metrics/src/lib.rs | 1 + relay-metrics/src/statsd.rs | 86 +- relay-metrics/src/utils.rs | 140 ++ relay-metrics/src/view.rs | 5 +- relay-server/Cargo.toml | 1 + relay-server/src/metrics/metric_stats.rs | 16 +- .../src/services/metrics/aggregator.rs | 385 ++++-- relay-server/src/services/metrics/router.rs | 2 - relay-server/src/services/processor.rs | 42 +- .../src/services/processor/metrics.rs | 8 +- .../src/services/processor/span/processing.rs | 6 +- relay-server/src/statsd.rs | 22 - relay-server/src/utils/managed_envelope.rs | 6 +- tests/integration/test_healthchecks.py | 14 - tests/integration/test_metrics.py | 45 +- tests/integration/test_store.py | 20 +- 30 files changed, 2212 insertions(+), 1787 deletions(-) delete mode 100644 relay-metrics/src/aggregator/buckets.rs create mode 100644 relay-metrics/src/aggregator/inner.rs create mode 100644 relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-3.snap create mode 100644 relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-5.snap create mode 100644 relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush.snap create mode 100644 relay-metrics/src/aggregator/stats.rs create mode 100644 relay-metrics/src/utils.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 73f8871c0e..ae7dea95c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ **Internal**: +- Rework metrics aggregator to keep internal partitions. ([#4378](https://github.com/getsentry/relay/pull/4378)) - Remove support for metrics with profile namespace. ([#4391](https://github.com/getsentry/relay/pull/4391)) ## 24.11.2 diff --git a/Cargo.lock b/Cargo.lock index e68391a256..f47bb8f8cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3676,6 +3676,7 @@ dependencies = [ name = "relay-metrics" version = "24.11.2" dependencies = [ + "ahash", "bytecount", "chrono", "criterion", @@ -3889,6 +3890,7 @@ dependencies = [ "axum-server", "backoff", "brotli 6.0.0", + "bytecount", "bytes", "bzip2", "chrono", diff --git a/relay-config/src/aggregator.rs b/relay-config/src/aggregator.rs index 80d34e0121..dd52c7fae3 100644 --- a/relay-config/src/aggregator.rs +++ b/relay-config/src/aggregator.rs @@ -1,7 +1,7 @@ //! Metrics aggregator configuration. use relay_metrics::aggregator::AggregatorConfig; -use relay_metrics::MetricNamespace; +use relay_metrics::{MetricNamespace, UnixTimestamp}; use serde::{Deserialize, Serialize}; /// Parameters used for metric aggregation. @@ -12,27 +12,52 @@ pub struct AggregatorServiceConfig { #[serde(flatten)] pub aggregator: AggregatorConfig, + /// The length the name of a metric is allowed to be. + /// + /// Defaults to `200` bytes. + pub max_name_length: usize, + + /// The length the tag key is allowed to be. + /// + /// Defaults to `200` bytes. + pub max_tag_key_length: usize, + + /// The length the tag value is allowed to be. + /// + /// Defaults to `200` chars. + pub max_tag_value_length: usize, + /// The approximate maximum number of bytes submitted within one flush cycle. /// /// This controls how big flushed batches of buckets get, depending on the number of buckets, /// the cumulative length of their keys, and the number of raw values. Since final serialization - /// adds some additional overhead, this number is approxmate and some safety margin should be + /// adds some additional overhead, this number is approximate and some safety margin should be /// left to hard limits. pub max_flush_bytes: usize, +} - /// The flushing interval in milliseconds that determines how often the aggregator is polled for - /// flushing new buckets. +impl AggregatorServiceConfig { + /// Returns the valid range for metrics timestamps. /// - /// Defaults to `100` milliseconds. - pub flush_interval_ms: u64, + /// Metrics or buckets outside of this range should be discarded. + pub fn timestamp_range(&self) -> std::ops::Range { + let now = UnixTimestamp::now().as_secs(); + let min_timestamp = + UnixTimestamp::from_secs(now.saturating_sub(self.aggregator.max_secs_in_past)); + let max_timestamp = + UnixTimestamp::from_secs(now.saturating_add(self.aggregator.max_secs_in_future)); + min_timestamp..max_timestamp + } } impl Default for AggregatorServiceConfig { fn default() -> Self { Self { aggregator: AggregatorConfig::default(), + max_name_length: 200, + max_tag_key_length: 200, + max_tag_value_length: 200, max_flush_bytes: 5_000_000, // 5 MB - flush_interval_ms: 100, // 100 milliseconds } } } diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index b3577376ae..19779067f7 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -15,7 +15,6 @@ use relay_kafka::{ ConfigError as KafkaConfigError, KafkaConfigParam, KafkaParams, KafkaTopic, TopicAssignment, TopicAssignments, }; -use relay_metrics::aggregator::{AggregatorConfig, FlushBatching}; use relay_metrics::MetricNamespace; use serde::de::{DeserializeOwned, Unexpected, Visitor}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -2461,63 +2460,6 @@ impl Config { &self.values.cogs.relay_resource_id } - /// Creates an [`AggregatorConfig`] that is compatible with every other aggregator. - /// - /// A lossless aggregator can be put in front of any of the configured aggregators without losing data that the configured aggregator would keep. - /// This is useful for pre-aggregating metrics together in a single aggregator instance. - pub fn permissive_aggregator_config(&self) -> AggregatorConfig { - let AggregatorConfig { - mut bucket_interval, - mut max_secs_in_past, - mut max_secs_in_future, - mut max_name_length, - mut max_tag_key_length, - mut max_tag_value_length, - mut max_project_key_bucket_bytes, - mut max_total_bucket_bytes, - .. - } = self.default_aggregator_config().aggregator; - - for secondary_config in self.secondary_aggregator_configs() { - let agg = &secondary_config.config.aggregator; - - bucket_interval = bucket_interval.min(agg.bucket_interval); - max_secs_in_past = max_secs_in_past.max(agg.max_secs_in_past); - max_secs_in_future = max_secs_in_future.max(agg.max_secs_in_future); - max_name_length = max_name_length.max(agg.max_name_length); - max_tag_key_length = max_tag_key_length.max(agg.max_tag_key_length); - max_tag_value_length = max_tag_value_length.max(agg.max_tag_value_length); - max_project_key_bucket_bytes = - max_project_key_bucket_bytes.max(agg.max_project_key_bucket_bytes); - max_total_bucket_bytes = max_total_bucket_bytes.max(agg.max_total_bucket_bytes); - } - - for agg in self - .secondary_aggregator_configs() - .iter() - .map(|sc| &sc.config) - .chain(std::iter::once(self.default_aggregator_config())) - { - if agg.aggregator.bucket_interval % bucket_interval != 0 { - relay_log::error!("buckets don't align"); - } - } - - AggregatorConfig { - bucket_interval, - max_secs_in_past, - max_secs_in_future, - max_name_length, - max_tag_key_length, - max_tag_value_length, - max_project_key_bucket_bytes, - max_total_bucket_bytes, - initial_delay: 30, - flush_partitions: None, - flush_batching: FlushBatching::Project, - } - } - /// Returns configuration for the default metrics aggregator. pub fn default_aggregator_config(&self) -> &AggregatorServiceConfig { &self.values.aggregator diff --git a/relay-metrics/Cargo.toml b/relay-metrics/Cargo.toml index f9785e709d..ad1acdfada 100644 --- a/relay-metrics/Cargo.toml +++ b/relay-metrics/Cargo.toml @@ -16,6 +16,7 @@ workspace = true redis = ["relay-redis/impl"] [dependencies] +ahash = { workspace = true } bytecount = { workspace = true } chrono = { workspace = true } fnv = { workspace = true } diff --git a/relay-metrics/benches/benches.rs b/relay-metrics/benches/benches.rs index 5bdc3a1527..d6f5a04363 100644 --- a/relay-metrics/benches/benches.rs +++ b/relay-metrics/benches/benches.rs @@ -6,13 +6,14 @@ use rand::SeedableRng; use relay_base_schema::project::ProjectKey; use relay_common::time::UnixTimestamp; use relay_metrics::{ - aggregator::{Aggregator, AggregatorConfig, FlushDecision}, + aggregator::{Aggregator, AggregatorConfig}, Bucket, BucketValue, DistributionValue, FiniteF64, }; use std::cell::RefCell; use std::collections::BTreeMap; use std::fmt; use std::ops::Range; +use std::time::SystemTime; struct NumbersGenerator { min: usize, @@ -188,7 +189,7 @@ fn bench_insert_and_flush(c: &mut Criterion) { b.iter_batched( || { let timestamp = UnixTimestamp::now(); - let aggregator: Aggregator = Aggregator::new(config.clone()); + let aggregator = Aggregator::named("default".to_owned(), &config); (aggregator, input.get_buckets(timestamp)) }, |(mut aggregator, buckets)| { @@ -215,7 +216,7 @@ fn bench_insert_and_flush(c: &mut Criterion) { b.iter_batched( || { let timestamp = UnixTimestamp::now(); - let mut aggregator: Aggregator = Aggregator::new(config.clone()); + let mut aggregator = Aggregator::named("default".to_owned(), &config); for (project_key, bucket) in input.get_buckets(timestamp) { aggregator.merge(project_key, bucket).unwrap(); } @@ -224,9 +225,7 @@ fn bench_insert_and_flush(c: &mut Criterion) { |mut aggregator| { // XXX: Ideally we'd want to test the entire try_flush here, but spawning // a service is too much work here. - black_box(aggregator.pop_flush_buckets(black_box(false), |_| { - FlushDecision::Flush(Vec::new()) - })); + let _ = black_box(aggregator.try_flush_next(SystemTime::UNIX_EPOCH)); }, BatchSize::SmallInput, ) diff --git a/relay-metrics/src/aggregator/buckets.rs b/relay-metrics/src/aggregator/buckets.rs deleted file mode 100644 index ac6d06c43a..0000000000 --- a/relay-metrics/src/aggregator/buckets.rs +++ /dev/null @@ -1,228 +0,0 @@ -use std::collections::BTreeMap; -use std::hash::{Hash, Hasher}; - -use fnv::FnvHasher; -use priority_queue::PriorityQueue; -use relay_base_schema::project::ProjectKey; -use relay_common::time::UnixTimestamp; -use tokio::time::Instant; - -use crate::aggregator::{cost, AggregateMetricsError}; -use crate::bucket::BucketValue; -use crate::{BucketMetadata, MetricName, MetricNamespace}; - -/// Holds a collection of buckets and maintains flush order. -#[derive(Debug, Default)] -pub struct Buckets { - queue: PriorityQueue, -} - -impl Buckets { - /// Returns the current amount of buckets stored. - pub fn len(&self) -> usize { - self.queue.len() - } - - /// Merge a new bucket into the existing collection of buckets. - pub fn merge( - &mut self, - key: BucketKey, - value: T, - merge: impl FnOnce(&BucketKey, T, &mut QueuedBucket) -> Result, - create: impl FnOnce(&BucketKey, T) -> QueuedBucket, - ) -> Result { - let mut added_cost = 0; - - // Need to do a bunch of shenanigans to work around the limitations of the priority queue - // API. There is no `entry` like API which would allow us to return errors on update - // as well as make the compiler recognize control flow to recognize that `bucket` - // is only used once. - // - // The following code never panics and prefers `expect` over a silent `if let Some(..)` - // to prevent errors in future refactors. - let mut value = Some(value); - let mut error = None; - - let updated = self.queue.change_priority_by(&key, |existing| { - let value = value.take().expect("expected bucket to not be used"); - - match merge(&key, value, existing) { - Ok(cost) => added_cost = cost, - Err(err) => error = Some(err), - } - }); - - if let Some(error) = error { - return Err(error); - } - - if !updated { - let value = create( - &key, - value.expect("expected bucket not to be used when the value was not updated"), - ); - - added_cost = key.cost() + value.value.cost(); - self.queue.push(key, value); - } - - Ok(added_cost) - } - - /// Tries to pop the next element bucket to be flushed. - /// - /// Returns `None` if there are no more metric buckets or - /// if the passed filter `f` returns `False`. - pub fn try_pop( - &mut self, - f: impl FnOnce(&BucketKey, &QueuedBucket) -> bool, - ) -> Option<(BucketKey, QueuedBucket)> { - let (key, value) = self.queue.peek()?; - if !f(key, value) { - return None; - } - self.queue.pop() - } - - /// Inserts an entry back into the collection. - /// - /// The intended use is to return bucket after [`Self::try_pop`]. - /// - /// Note: this should only be used to return values back to the queue - /// after removing them, otherwise use [`Self::merge`]. - pub fn re_add(&mut self, key: BucketKey, value: QueuedBucket) { - let _old = self.queue.push(key, value); - debug_assert!(_old.is_none()); - } -} - -impl IntoIterator for Buckets { - type Item = (BucketKey, QueuedBucket); - type IntoIter = priority_queue::core_iterators::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.queue.into_iter() - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct BucketKey { - pub project_key: ProjectKey, - pub timestamp: UnixTimestamp, - pub metric_name: MetricName, - pub tags: BTreeMap, - pub extracted_from_indexed: bool, -} - -impl BucketKey { - /// Creates a 64-bit hash of the bucket key using FnvHasher. - /// - /// This is used for partition key computation and statsd logging. - pub fn hash64(&self) -> u64 { - let mut hasher = FnvHasher::default(); - std::hash::Hash::hash(self, &mut hasher); - hasher.finish() - } - - /// Estimates the number of bytes needed to encode the bucket key. - /// - /// Note that this does not necessarily match the exact memory footprint of the key, - /// because data structures have a memory overhead. - pub fn cost(&self) -> usize { - std::mem::size_of::() + self.metric_name.len() + cost::tags_cost(&self.tags) - } - - /// Returns the namespace of this bucket. - pub fn namespace(&self) -> MetricNamespace { - self.metric_name.namespace() - } - - /// Computes a stable partitioning key for this [`crate::Bucket`]. - /// - /// The partitioning key is inherently producing collisions, since the output of the hasher is - /// reduced into an interval of size `partitions`. This means that buckets with totally - /// different values might end up in the same partition. - /// - /// The role of partitioning is to let Relays forward the same metric to the same upstream - /// instance with the goal of increasing bucketing efficiency. - pub fn partition_key(&self, partitions: u64) -> u64 { - let key = (self.project_key, &self.metric_name, &self.tags); - - let mut hasher = fnv::FnvHasher::default(); - key.hash(&mut hasher); - - let partitions = partitions.max(1); - hasher.finish() % partitions - } -} - -/// Bucket in the [`crate::aggregator::Aggregator`] with a defined flush time. -/// -/// This type implements an inverted total ordering. The maximum queued bucket has the lowest flush -/// time, which is suitable for using it in a [`BinaryHeap`]. -/// -/// [`BinaryHeap`]: std::collections::BinaryHeap -#[derive(Debug)] -pub struct QueuedBucket { - pub flush_at: Instant, - pub value: BucketValue, - pub metadata: BucketMetadata, -} - -impl QueuedBucket { - /// Creates a new `QueuedBucket` with a given flush time. - pub fn new(flush_at: Instant, value: BucketValue, metadata: BucketMetadata) -> Self { - Self { - flush_at, - value, - metadata, - } - } - - /// Returns `true` if the flush time has elapsed. - pub fn elapsed(&self, now: Instant) -> bool { - now > self.flush_at - } - - /// Merges a bucket into the current queued bucket. - /// - /// Returns the value cost increase on success, - /// otherwise returns an error if the passed bucket value type does not match - /// the contained type. - pub fn merge( - &mut self, - value: BucketValue, - metadata: BucketMetadata, - ) -> Result { - let cost_before = self.value.cost(); - - self.value - .merge(value) - .map_err(|_| AggregateMetricsError::InvalidTypes)?; - self.metadata.merge(metadata); - - Ok(self.value.cost().saturating_sub(cost_before)) - } -} - -impl PartialEq for QueuedBucket { - fn eq(&self, other: &Self) -> bool { - self.flush_at.eq(&other.flush_at) - } -} - -impl Eq for QueuedBucket {} - -impl PartialOrd for QueuedBucket { - fn partial_cmp(&self, other: &Self) -> Option { - // Comparing order is reversed to convert the max heap into a min heap - Some(other.flush_at.cmp(&self.flush_at)) - } -} - -impl Ord for QueuedBucket { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - // Comparing order is reversed to convert the max heap into a min heap - other.flush_at.cmp(&self.flush_at) - } -} diff --git a/relay-metrics/src/aggregator/config.rs b/relay-metrics/src/aggregator/config.rs index 78c9564b73..0788fba271 100644 --- a/relay-metrics/src/aggregator/config.rs +++ b/relay-metrics/src/aggregator/config.rs @@ -1,6 +1,3 @@ -use std::time::Duration; - -use relay_common::time::UnixTimestamp; use serde::{Deserialize, Serialize}; /// Configuration value for [`AggregatorConfig::flush_batching`]. @@ -43,15 +40,21 @@ pub struct AggregatorConfig { /// /// Defaults to `10` seconds. Every metric is sorted into a bucket of this size based on its /// timestamp. This defines the minimum granularity with which metrics can be queried later. - pub bucket_interval: u64, + pub bucket_interval: u32, + + /// The aggregator size. + /// + /// This determines how many individual bucket intervals, are stored in the aggregator. + /// Increasing this number will add additional delay for backdated and future buckets. + /// + /// Defaults to: 1. + pub aggregator_size: u32, /// The initial delay in seconds to wait before flushing a bucket. /// /// Defaults to `30` seconds. Before sending an aggregated bucket, this is the time Relay waits /// for buckets that are being reported in real time. - /// - /// Relay applies up to a full `bucket_interval` of additional jitter after the initial delay to spread out flushing real time buckets. - pub initial_delay: u64, + pub initial_delay: u32, /// The age in seconds of the oldest allowed bucket timestamp. /// @@ -63,28 +66,13 @@ pub struct AggregatorConfig { /// Defaults to 1 minute. pub max_secs_in_future: u64, - /// The length the name of a metric is allowed to be. - /// - /// Defaults to `200` bytes. - pub max_name_length: usize, - - /// The length the tag key is allowed to be. - /// - /// Defaults to `200` bytes. - pub max_tag_key_length: usize, - - /// The length the tag value is allowed to be. - /// - /// Defaults to `200` chars. - pub max_tag_value_length: usize, - /// Maximum amount of bytes used for metrics aggregation per project key. /// /// Similar measuring technique to [`Self::max_total_bucket_bytes`], but instead of a /// global/process-wide limit, it is enforced per project key. /// /// Defaults to `None`, i.e. no limit. - pub max_project_key_bucket_bytes: Option, + pub max_project_key_bucket_bytes: Option, /// Maximum amount of bytes used for metrics aggregation. /// @@ -93,15 +81,10 @@ pub struct AggregatorConfig { /// in hashmaps. /// /// Defaults to `None`, i.e. no limit. - pub max_total_bucket_bytes: Option, + pub max_total_bucket_bytes: Option, /// The number of logical partitions that can receive flushed buckets. - /// - /// If set, buckets are partitioned by (bucket key % flush_partitions), and routed - /// by setting the header `X-Sentry-Relay-Shard`. - /// - /// This setting will take effect only when paired with [`FlushBatching::Partition`]. - pub flush_partitions: Option, + pub flush_partitions: Option, /// The batching mode for the flushing of the aggregator. /// @@ -115,54 +98,14 @@ pub struct AggregatorConfig { pub flush_batching: FlushBatching, } -impl AggregatorConfig { - /// Returns the time width buckets. - pub fn bucket_interval(&self) -> Duration { - Duration::from_secs(self.bucket_interval) - } - - /// Returns the initial flush delay after the end of a bucket's original time window. - pub fn initial_delay(&self) -> Duration { - Duration::from_secs(self.initial_delay) - } - - /// Determines the target bucket for an incoming bucket timestamp and bucket width. - /// - /// We select the output bucket which overlaps with the center of the incoming bucket. - /// Fails if timestamp is too old or too far into the future. - pub fn get_bucket_timestamp( - &self, - timestamp: UnixTimestamp, - bucket_width: u64, - ) -> UnixTimestamp { - // Find middle of the input bucket to select a target - let ts = timestamp.as_secs().saturating_add(bucket_width / 2); - // Align target_timestamp to output bucket width - let ts = (ts / self.bucket_interval) * self.bucket_interval; - UnixTimestamp::from_secs(ts) - } - - /// Returns the valid range for metrics timestamps. - /// - /// Metrics or buckets outside of this range should be discarded. - pub fn timestamp_range(&self) -> std::ops::Range { - let now = UnixTimestamp::now().as_secs(); - let min_timestamp = UnixTimestamp::from_secs(now.saturating_sub(self.max_secs_in_past)); - let max_timestamp = UnixTimestamp::from_secs(now.saturating_add(self.max_secs_in_future)); - min_timestamp..max_timestamp - } -} - impl Default for AggregatorConfig { fn default() -> Self { Self { bucket_interval: 10, + aggregator_size: 1, initial_delay: 30, - max_secs_in_past: 5 * 24 * 60 * 60, // 5 days, as for sessions + max_secs_in_past: 5 * 24 * 60 * 60, // 5 days max_secs_in_future: 60, // 1 minute - max_name_length: 200, - max_tag_key_length: 200, - max_tag_value_length: 200, max_project_key_bucket_bytes: None, max_total_bucket_bytes: None, flush_batching: FlushBatching::default(), diff --git a/relay-metrics/src/aggregator/inner.rs b/relay-metrics/src/aggregator/inner.rs new file mode 100644 index 0000000000..de149d15f8 --- /dev/null +++ b/relay-metrics/src/aggregator/inner.rs @@ -0,0 +1,926 @@ +use core::fmt; +use std::collections::{BTreeMap, VecDeque}; +use std::mem; +use std::time::Duration; + +use ahash::RandomState; +use hashbrown::hash_map::Entry; +use hashbrown::HashMap; +use relay_base_schema::metrics::MetricName; +use relay_base_schema::project::ProjectKey; +use relay_common::time::UnixTimestamp; + +use crate::aggregator::stats; +use crate::aggregator::{AggregateMetricsError, FlushBatching}; +use crate::utils::ByNamespace; +use crate::{BucketMetadata, BucketValue, DistributionType, SetType}; + +#[derive(Default)] +pub struct Partition { + pub partition_key: u32, + pub buckets: HashMap, + pub stats: PartitionStats, +} + +impl fmt::Debug for Partition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[cfg(test)] + let buckets = &self.buckets.iter().collect::>(); + #[cfg(not(test))] + let buckets = &self.buckets; + + f.debug_struct("Partition") + .field("partition_key", &self.partition_key) + .field("stats", &self.stats) + .field("buckets", buckets) + .finish() + } +} + +#[derive(Default, Debug)] +pub struct PartitionStats { + /// Amount of unique buckets in the partition. + #[expect(unused)] + pub count: u64, + /// Amount of unique buckets in the partition by namespace. + pub count_by_namespace: ByNamespace, + /// Amount of times a bucket was merged in the partition. + #[expect(unused)] + pub merges: u64, + /// Amount of times a bucket was merged in the partition by namespace. + pub merges_by_namespace: ByNamespace, + /// Cost of buckets in the partition. + #[expect(unused)] + pub cost: u64, + /// Cost of buckets in the partition by namespace. + pub cost_by_namespace: ByNamespace, +} + +impl From<&stats::Slot> for PartitionStats { + fn from(value: &stats::Slot) -> Self { + Self { + count: value.count, + count_by_namespace: value.count_by_namespace, + merges: value.merges, + merges_by_namespace: value.merges_by_namespace, + cost: value.cost, + cost_by_namespace: value.cost_by_namespace, + } + } +} + +#[derive(Default, Debug)] +pub struct Stats { + /// Total amount of buckets in the aggregator. + #[expect(unused)] + pub count: u64, + /// Total amount of buckets in the aggregator by namespace. + pub count_by_namespace: ByNamespace, + /// Total bucket cost in the aggregator. + #[expect(unused)] + pub cost: u64, + /// Total bucket cost in the aggregator by namespace. + pub cost_by_namespace: ByNamespace, +} + +#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct BucketKey { + pub project_key: ProjectKey, + pub timestamp: UnixTimestamp, + pub metric_name: MetricName, + pub tags: BTreeMap, + pub extracted_from_indexed: bool, +} + +impl BucketKey { + /// Estimates the number of bytes needed to encode the bucket key. + /// + /// Note that this does not necessarily match the exact memory footprint of the key, + /// because data structures have a memory overhead. + pub fn cost(&self) -> usize { + std::mem::size_of::() + self.metric_name.len() + crate::utils::tags_cost(&self.tags) + } +} + +impl fmt::Debug for BucketKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}-{}-{}", + self.timestamp, self.project_key, self.metric_name + ) + } +} + +pub struct BucketData { + pub value: BucketValue, + pub metadata: BucketMetadata, +} + +impl fmt::Debug for BucketData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.value.fmt(f) + } +} + +impl BucketData { + /// Merges another bucket's data into this one. + /// + /// Returns the value cost increase on success. + fn merge(&mut self, other: Self) -> Result { + let cost_before = self.value.cost(); + + self.value + .merge(other.value) + .map_err(|_| AggregateMetricsError::InvalidTypes)?; + self.metadata.merge(other.metadata); + + Ok(self.value.cost().saturating_sub(cost_before)) + } +} + +/// Config used to create a [`Inner`] instance. +#[derive(Debug)] +pub struct Config { + /// Initial position/time of the aggregator. + /// + /// Except in testing, this should always be [`UnixTimestamp::now`]. + pub start: UnixTimestamp, + /// Size of each individual bucket, inputs are truncated to this value. + pub bucket_interval: u32, + /// The amount of time slots to keep track off in the aggregator. + /// + /// The size of a time slot is defined by [`Self::bucket_interval`]. + pub num_time_slots: u32, + /// The amount of partitions per time slot. + pub num_partitions: u32, + /// Delay how long a bucket should remain in the aggregator before being flushed. + /// + /// Ideally the delay is a multiple of [`Self::bucket_interval`]. + pub delay: u32, + /// Maximum amount of bytes the aggregator can grow to. + pub max_total_bucket_bytes: Option, + /// Maximum amount of bytes the aggregator allows per project key. + pub max_project_key_bucket_bytes: Option, + /// The age in seconds of the oldest allowed bucket timestamp. + pub max_secs_in_past: Option, + /// The time in seconds that a timestamp may be in the future. + pub max_secs_in_future: Option, +} + +/// A metrics aggregator. +/// +/// The aggregator is unaware of current time and needs to be driven by periodic flushes using +/// [`Self::flush_next`]. Each flush advances the internal clock by the configured +/// [`Config::bucket_interval`]. +/// +/// The internal time is set on construction to [`Config::start`]. +/// +/// Use [`Self::next_flush`] to determine the time when to call [`Self::flush_next`]. +pub struct Inner { + /// Ring-buffer of aggregation slots. + /// + /// This is treated as a ring-buffer of a two dimensional array. The first dimension is a + /// "time slot" and the second dimension is the assigned partition. + /// + /// The total length of the ring-buffer is then determined by the amount of time slots times + /// the amount of partition. In other words, every time slot as [`Self::num_partitions`] + /// partitions. + /// + /// Layout: + /// Time slots: [ ][ ][ ] + /// Partitions: [ ][ ] [ ][ ] [ ][ ] + /// + /// An item is assigned by first determining it's time slot and then assigning it a partition + /// based on selected [`Self::partition_by`] strategy. + /// + /// The first item in the buffer is tracked by [`Self::head`] which is at any time the + /// current partition since the beginning "zero". The beginning in the aggregator refers to the + /// beginning of the epoch. The [`Self::head`] is calculated with `f(x) = x / bucket_interval * num_partitions`. + /// + /// Flushing a partition advances the [`Self::head`] by a single value `+1`. Meaning + /// effectively time is advanced by `bucket_interval / num_partitions`. + slots: VecDeque, + /// The amount of partitions per time slot. + num_partitions: u32, + + /// Position of the first element in [`Self::slots`]. + head: u64, + + /// Size of each individual bucket, inputs are truncated to this value. + bucket_interval: u32, + /// Amount of slots which is added to a bucket as a delay. + /// + /// This is a fixed delay which is added to to the time returned by [`Self::next_flush`]. + delay: u32, + + /// Total stats of the aggregator. + stats: stats::Total, + /// Configured limits based on aggregator stats. + limits: stats::Limits, + + /// The maximum amount of slots (size of a `bucket_interval`) the timestamp is allowed to be + /// in the past or future. + slot_diff: SlotDiff, + + /// Determines how partitions are assigned based in the input bucket. + partition_by: FlushBatching, + /// Hasher used to calculate partitions. + hasher: ahash::RandomState, +} + +impl Inner { + pub fn new(config: Config) -> Self { + let bucket_interval = config.bucket_interval.max(1); + // Extend the configured time slots with the delay (in slots), to make sure there is + // enough space to satisfy the delay. + // + // We cannot just reserve space for enough partitions to satisfy the delay, because we have + // no control over which partition a bucket is assigned to, so we have to prepare for the + // 'worst' case, and that is full extra time slots. + let num_time_slots = config.num_time_slots.max(1) + config.delay.div_ceil(bucket_interval); + let num_partitions = config.num_partitions.max(1); + + let mut slots = Vec::new(); + for _ in 0..num_time_slots { + for partition_key in 0..num_partitions { + slots.push(Slot { + partition_key, + ..Default::default() + }); + } + } + + let slot_diff = SlotDiff { + min: config + .max_secs_in_past + .map_or(u64::MAX, |v| v.div_ceil(u64::from(bucket_interval))), + max: config + .max_secs_in_future + .map_or(u64::MAX, |v| v.div_ceil(u64::from(bucket_interval))), + }; + + let total_slots = slots.len(); + Self { + slots: VecDeque::from(slots), + num_partitions, + head: config.start.as_secs() / u64::from(bucket_interval) * u64::from(num_partitions), + bucket_interval, + delay: config.delay, + stats: stats::Total::default(), + limits: stats::Limits { + max_total: config.max_total_bucket_bytes.unwrap_or(u64::MAX), + // Break down the maximum project cost to a maximum cost per partition. + max_partition_project: config + .max_project_key_bucket_bytes + .map(|c| c.div_ceil(total_slots as u64)) + .unwrap_or(u64::MAX), + }, + slot_diff, + partition_by: FlushBatching::Partition, + hasher: build_hasher(), + } + } + + /// Returns the configured bucket interval. + pub fn bucket_interval(&self) -> u32 { + self.bucket_interval + } + + /// Returns total aggregator stats. + pub fn stats(&self) -> Stats { + Stats { + count: self.stats.count, + count_by_namespace: self.stats.count_by_namespace, + cost: self.stats.count, + cost_by_namespace: self.stats.cost_by_namespace, + } + } + + /// Returns `true` if the aggregator contains any metric buckets. + pub fn is_empty(&self) -> bool { + self.stats.cost == 0 + } + + /// Returns the time as a duration since epoch when the next flush is supposed to happen. + pub fn next_flush(&self) -> Duration { + // `head + 1` to get the end time of the slot not the start. + (Duration::from_secs(self.head + 1) / self.num_partitions * self.bucket_interval) + + Duration::from_secs(u64::from(self.delay)) + } + + /// Merges a metric bucket. + /// + /// Returns `true` if the bucket already existed in the aggregator + /// and `false` if the bucket did not exist yet. + pub fn merge( + &mut self, + mut key: BucketKey, + value: BucketData, + ) -> Result<(), AggregateMetricsError> { + let project_key = key.project_key; + let namespace = key.metric_name.namespace(); + + let time_slot = key.timestamp.as_secs() / u64::from(self.bucket_interval); + // Make sure the timestamp is normalized to the correct interval as well. + key.timestamp = UnixTimestamp::from_secs(time_slot * u64::from(self.bucket_interval)); + + let now_slot = self.head / u64::from(self.num_partitions); + if !self.slot_diff.contains(now_slot, time_slot) { + return Err(AggregateMetricsError::InvalidTimestamp(key.timestamp)); + } + + let assigned_partition = match self.partition_by { + FlushBatching::None => 0, + FlushBatching::Project => self.hasher.hash_one(key.project_key), + FlushBatching::Bucket => self.hasher.hash_one(&key), + FlushBatching::Partition => { + self.hasher + .hash_one((key.project_key, &key.metric_name, &key.tags)) + } + } % u64::from(self.num_partitions); + + let slot = time_slot * u64::from(self.num_partitions) + assigned_partition; + + let slots_len = self.slots.len() as u64; + let index = (slot + slots_len).wrapping_sub(self.head % slots_len) % slots_len; + + let slot = self + .slots + .get_mut(index as usize) + .expect("index should always be a valid partition"); + + debug_assert_eq!( + u64::from(slot.partition_key), + assigned_partition, + "assigned partition does not match selected partition" + ); + + let key_cost = key.cost() as u64; + match slot.buckets.entry(key) { + Entry::Occupied(occupied_entry) => { + let estimated_cost = match &value.value { + // Counters and Gauges aggregate without additional costs. + BucketValue::Counter(_) | BucketValue::Gauge(_) => 0, + // Distributions are an accurate estimation, all values will be added. + BucketValue::Distribution(d) => d.len() * mem::size_of::(), + // Sets are an upper bound. + BucketValue::Set(s) => s.len() * mem::size_of::(), + }; + + // Reserve for the upper bound of the value. + let reservation = slot.stats.reserve( + &mut self.stats, + project_key, + namespace, + estimated_cost as u64, + &self.limits, + )?; + + let actual_cost = occupied_entry.into_mut().merge(value)?; + + // Track the actual cost increase, not just the reservation. + reservation.consume_with(actual_cost as u64); + slot.stats.incr_merges(namespace); + } + Entry::Vacant(vacant_entry) => { + let reservation = slot.stats.reserve( + &mut self.stats, + project_key, + namespace, + key_cost + value.value.cost() as u64, + &self.limits, + )?; + + vacant_entry.insert(value); + + reservation.consume(); + slot.stats.incr_count(&mut self.stats, namespace); + } + }; + + debug_assert_eq!(slot.stats.count, slot.buckets.len() as u64); + + Ok(()) + } + + /// FLushes the next partition and advances time. + pub fn flush_next(&mut self) -> Partition { + let mut slot @ Slot { partition_key, .. } = self + .slots + .pop_front() + .expect("there should always be at least one partition"); + + let stats = PartitionStats::from(&slot.stats); + + // Remove the partition cost from the total cost and reset the partition cost. + self.stats.remove_slot(&slot.stats); + slot.stats.reset(); + + // Create a new and empty slot with a similar capacity as the original one, + // but still shrinking a little bit so that over time we free up memory again + // without re-allocating too many times. + // + // Also re-use the original hasher, no need to create a new/expensive one. + self.slots.push_back(Slot { + buckets: HashMap::with_capacity_and_hasher( + slot.buckets.len(), + slot.buckets.hasher().clone(), + ), + ..slot + }); + + // Increment to the next partition. + // + // Effectively advance time by `bucket_interval / num_partitions`. + self.head += 1; + + Partition { + partition_key, + buckets: slot.buckets, + stats, + } + } + + /// Consumes the aggregator and returns an iterator over all contained partitions. + pub fn into_partitions(self) -> impl Iterator { + self.slots.into_iter().map(|slot| Partition { + partition_key: slot.partition_key, + buckets: slot.buckets, + stats: PartitionStats::from(&slot.stats), + }) + } +} + +impl fmt::Debug for Inner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut list = f.debug_list(); + list.entry(&self.stats); + for (i, v) in self.slots.iter().enumerate() { + let head_partitions = self.head % u64::from(self.num_partitions); + let time_offset = (head_partitions + i as u64) / u64::from(self.num_partitions); + let head_time = self.head / u64::from(self.num_partitions); + let time = (head_time + time_offset) * u64::from(self.bucket_interval); + match v.is_empty() { + // Make the output shorter with a string until `entry_with` is stable. + true => list.entry(&format!("({time}, {v:?})")), + false => list.entry(&(time, v)), + }; + } + list.finish() + } +} + +#[derive(Default)] +struct Slot { + pub partition_key: u32, + pub stats: stats::Slot, + pub buckets: HashMap, +} + +impl Slot { + fn is_empty(&self) -> bool { + self.stats == Default::default() && self.buckets.is_empty() + } +} + +impl fmt::Debug for Slot { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_empty() { + write!(f, "Slot({})", self.partition_key) + } else { + #[cfg(test)] + let buckets = &self.buckets.iter().collect::>(); + #[cfg(not(test))] + let buckets = &self.buckets; + + f.debug_struct("Slot") + .field("partition_key", &self.partition_key) + .field("stats", &self.stats) + .field("buckets", buckets) + .finish() + } + } +} + +struct SlotDiff { + min: u64, + max: u64, +} + +impl SlotDiff { + pub fn contains(&self, now: u64, target: u64) -> bool { + if target < now { + // Timestamp/target in the past. + let diff = now - target; + diff <= self.min + } else { + // Timestamp/target in the future. + let diff = target - now; + diff <= self.max + } + } +} + +fn build_hasher() -> RandomState { + // A fixed, consistent seed across all instances of Relay. + const K0: u64 = 0x06459b7d5da84ed8; + const K1: u64 = 0x3321ce2636c567cc; + const K2: u64 = 0x56c94d7107c49765; + const K3: u64 = 0x685bf5f9abbea5ab; + + ahash::RandomState::with_seeds(K0, K1, K2, K3) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn bucket_key(ts: u64, name: &str) -> BucketKey { + BucketKey { + project_key: ProjectKey::parse("00000000000000000000000000000000").unwrap(), + timestamp: UnixTimestamp::from_secs(ts), + metric_name: name.into(), + tags: Default::default(), + extracted_from_indexed: false, + } + } + + fn counter(value: f64) -> BucketData { + BucketData { + value: BucketValue::counter(value.try_into().unwrap()), + metadata: Default::default(), + } + } + + #[test] + fn test_merge_flush() -> Result<(), AggregateMetricsError> { + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 6, + num_partitions: 2, + delay: 0, + max_secs_in_past: None, + max_secs_in_future: None, + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + start: UnixTimestamp::from_secs(70), + }); + + // Within the time range. + buckets.merge(bucket_key(70, "a"), counter(1.0))?; + buckets.merge(bucket_key(80, "b"), counter(1.0))?; + buckets.merge(bucket_key(80, "b"), counter(2.0))?; + // Too early. + buckets.merge(bucket_key(32, "c"), counter(1.0))?; + buckets.merge(bucket_key(42, "d"), counter(1.0))?; + // Too late. + buckets.merge(bucket_key(171, "e"), counter(1.0))?; + buckets.merge(bucket_key(181, "f"), counter(1.0))?; + buckets.merge(bucket_key(191, "a"), counter(1.0))?; + + insta::assert_debug_snapshot!(buckets); + + let partition = buckets.flush_next(); + insta::assert_debug_snapshot!(partition, @r###" + Partition { + partition_key: 0, + stats: PartitionStats { + count: 2, + count_by_namespace: (unsupported:2), + merges: 0, + merges_by_namespace: (0), + cost: 274, + cost_by_namespace: (unsupported:274), + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 1.0, + ), + 190-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + } + "###); + + // This was just flushed and now is supposed to be at the end. + buckets.merge(bucket_key(70, "a"), counter(1.0))?; + + insta::assert_debug_snapshot!(buckets); + + let partition = buckets.flush_next(); + insta::assert_debug_snapshot!(partition, @r###" + Partition { + partition_key: 1, + stats: PartitionStats { + count: 0, + count_by_namespace: (0), + merges: 0, + merges_by_namespace: (0), + cost: 0, + cost_by_namespace: (0), + }, + buckets: {}, + } + "###); + + insta::assert_debug_snapshot!(buckets); + + insta::assert_debug_snapshot!(buckets.stats(), @r###" + Stats { + count: 6, + count_by_namespace: (unsupported:6), + cost: 6, + cost_by_namespace: (unsupported:822), + } + "###); + + Ok(()) + } + + #[test] + fn test_merge_flush_cost_limits() -> Result<(), AggregateMetricsError> { + const ONE_BUCKET_COST: u64 = 137; + + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 3, + num_partitions: 1, + delay: 0, + max_secs_in_past: None, + max_secs_in_future: None, + max_total_bucket_bytes: Some(ONE_BUCKET_COST * 2), + // Enough for one bucket per partition. + max_project_key_bucket_bytes: Some(ONE_BUCKET_COST * 3), + start: UnixTimestamp::from_secs(70), + }); + + buckets.merge(bucket_key(70, "a"), counter(1.0))?; + // Adding a new bucket exceeds the cost. + assert_eq!( + buckets + .merge(bucket_key(70, "b"), counter(1.0)) + .unwrap_err(), + AggregateMetricsError::ProjectLimitExceeded + ); + // Merging a counter bucket is fine. + buckets.merge(bucket_key(70, "a"), counter(2.0))?; + + // There is still room in the total budget and for a different project. + let other_project = BucketKey { + project_key: ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(), + ..bucket_key(70, "a") + }; + buckets.merge(other_project, counter(3.0))?; + + // Add a bucket to a different partition, but the total limit is exceeded. + assert_eq!( + buckets + .merge(bucket_key(80, "c"), counter(1.0)) + .unwrap_err(), + AggregateMetricsError::TotalLimitExceeded + ); + // Flush some data and make space. + insta::assert_debug_snapshot!(buckets.flush_next(), @r###" + Partition { + partition_key: 0, + stats: PartitionStats { + count: 2, + count_by_namespace: (unsupported:2), + merges: 1, + merges_by_namespace: (unsupported:1), + cost: 274, + cost_by_namespace: (unsupported:274), + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 3.0, + ), + 70-bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-a: Counter( + 3.0, + ), + }, + } + "###); + buckets.merge(bucket_key(80, "c"), counter(1.0))?; + + insta::assert_debug_snapshot!(buckets, @r###" + [ + Total { + count: 1, + count_by_namespace: (unsupported:1), + cost: 137, + cost_by_namespace: (unsupported:137), + }, + ( + 80, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 80-00000000000000000000000000000000-c: Counter( + 1.0, + ), + }, + }, + ), + "(90, Slot(0))", + "(100, Slot(0))", + ] + "###); + + insta::assert_debug_snapshot!(buckets.stats(), @r###" + Stats { + count: 1, + count_by_namespace: (unsupported:1), + cost: 1, + cost_by_namespace: (unsupported:137), + } + "###); + + Ok(()) + } + + #[test] + fn test_merge_flush_with_delay() { + let mut buckets = Inner::new(Config { + // 20 seconds. + bucket_interval: 20, + // Slots for 1 minute. + num_time_slots: 3, + // A partition for every 10 seconds. + num_partitions: 2, + // 20 second delay -> 1 extra time slot. + delay: 20, + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + max_secs_in_past: None, + max_secs_in_future: None, + // Truncated to 60 seconds. + start: UnixTimestamp::from_secs(63), + }); + + // Add a bucket now -> should be flushed 30 seconds in the future. + buckets.merge(bucket_key(60, "a"), counter(1.0)).unwrap(); + // Add a bucket 1 minute into the future, this one should still have a delay. + buckets.merge(bucket_key(120, "b"), counter(2.0)).unwrap(); + + // First flush is in 20 seconds + 10 seconds for the end of the partition. + assert_eq!(buckets.next_flush(), Duration::from_secs(90)); + insta::assert_debug_snapshot!(buckets.flush_next(), @r###" + Partition { + partition_key: 0, + stats: PartitionStats { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + }, + buckets: { + 60-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + } + "###); + assert!(buckets.flush_next().buckets.is_empty()); + + // We're now at second 100s. + assert_eq!(buckets.next_flush(), Duration::from_secs(110)); + assert!(buckets.flush_next().buckets.is_empty()); + assert!(buckets.flush_next().buckets.is_empty()); + + // We're now at second 120s. + assert_eq!(buckets.next_flush(), Duration::from_secs(130)); + assert!(buckets.flush_next().buckets.is_empty()); + assert!(buckets.flush_next().buckets.is_empty()); + + // We're now at second 140s -> our second bucket is ready (120s + 20s delay). + assert_eq!(buckets.next_flush(), Duration::from_secs(150)); + insta::assert_debug_snapshot!(buckets.flush_next(), @r###" + Partition { + partition_key: 0, + stats: PartitionStats { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + }, + buckets: { + 120-00000000000000000000000000000000-b: Counter( + 2.0, + ), + }, + } + "###); + assert!(buckets.flush_next().buckets.is_empty()); + + // We're now at 160s. + assert_eq!(buckets.next_flush(), Duration::from_secs(170)); + } + + #[test] + fn test_next_flush() { + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 6, + num_partitions: 2, + delay: 0, + max_secs_in_past: None, + max_secs_in_future: None, + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + start: UnixTimestamp::from_secs(70), + }); + + assert_eq!(buckets.next_flush(), Duration::from_secs(75)); + assert_eq!(buckets.flush_next().partition_key, 0); + assert_eq!(buckets.next_flush(), Duration::from_secs(80)); + assert_eq!(buckets.flush_next().partition_key, 1); + assert_eq!(buckets.next_flush(), Duration::from_secs(85)); + assert_eq!(buckets.flush_next().partition_key, 0); + assert_eq!(buckets.next_flush(), Duration::from_secs(90)); + assert_eq!(buckets.next_flush(), Duration::from_secs(90)); + } + + #[test] + fn test_next_flush_with_delay() { + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 6, + num_partitions: 2, + delay: 3, + max_secs_in_past: None, + max_secs_in_future: None, + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + start: UnixTimestamp::from_secs(70), + }); + + assert_eq!(buckets.next_flush(), Duration::from_secs(78)); + assert_eq!(buckets.flush_next().partition_key, 0); + assert_eq!(buckets.next_flush(), Duration::from_secs(83)); + assert_eq!(buckets.flush_next().partition_key, 1); + assert_eq!(buckets.next_flush(), Duration::from_secs(88)); + assert_eq!(buckets.flush_next().partition_key, 0); + assert_eq!(buckets.next_flush(), Duration::from_secs(93)); + assert_eq!(buckets.next_flush(), Duration::from_secs(93)); + } + + #[test] + fn test_merge_flush_time_limits() -> Result<(), AggregateMetricsError> { + let mut buckets = Inner::new(Config { + bucket_interval: 10, + num_time_slots: 6, + num_partitions: 2, + delay: 0, + max_secs_in_past: Some(33), // -> Upgraded to 4 slots (40 seconds). + max_secs_in_future: Some(22), // -> Upgraded to 3 slots (30 seconds). + max_total_bucket_bytes: None, + max_project_key_bucket_bytes: None, + start: UnixTimestamp::from_secs(70), + }); + + buckets.merge(bucket_key(70, "a"), counter(1.0))?; + + // 4 slots in the past. + buckets.merge(bucket_key(60, "a"), counter(1.0))?; + buckets.merge(bucket_key(50, "a"), counter(1.0))?; + buckets.merge(bucket_key(40, "a"), counter(1.0))?; + buckets.merge(bucket_key(30, "a"), counter(1.0))?; + assert_eq!( + buckets + .merge(bucket_key(29, "a"), counter(1.0)) + .unwrap_err(), + AggregateMetricsError::InvalidTimestamp(UnixTimestamp::from_secs(20)) + ); + + // 3 slots in the future. + buckets.merge(bucket_key(80, "a"), counter(1.0))?; + buckets.merge(bucket_key(90, "a"), counter(1.0))?; + buckets.merge(bucket_key(109, "a"), counter(1.0))?; + assert_eq!( + buckets + .merge(bucket_key(110, "a"), counter(1.0)) + .unwrap_err(), + AggregateMetricsError::InvalidTimestamp(UnixTimestamp::from_secs(110)) + ); + + Ok(()) + } +} diff --git a/relay-metrics/src/aggregator/mod.rs b/relay-metrics/src/aggregator/mod.rs index b03662a186..4ce590ba01 100644 --- a/relay-metrics/src/aggregator/mod.rs +++ b/relay-metrics/src/aggregator/mod.rs @@ -1,1170 +1,255 @@ //! Core functionality of metrics aggregation. +//! +//! A new implementation of the [`crate::aggregator::Aggregator`], with slightly +//! different semantics and more optimized aggregation. +//! +//! This module is supposed to replace the [`crate::aggregator`] module, +//! if proven successful. -use std::fmt; -use std::hash::Hasher; -use std::time::Duration; +use std::time::{Duration, SystemTime}; -use fnv::FnvHasher; -use hashbrown::hash_map::Entry; +use hashbrown::HashMap; +use relay_base_schema::metrics::MetricNamespace; use relay_base_schema::project::ProjectKey; use relay_common::time::UnixTimestamp; -use thiserror::Error; -use tokio::time::Instant; - -use crate::bucket::Bucket; -use crate::statsd::{MetricCounters, MetricGauges, MetricHistograms, MetricSets, MetricTimers}; -use crate::MetricName; -use hashbrown::HashMap; +use crate::statsd::{MetricCounters, MetricGauges}; +use crate::Bucket; -mod buckets; mod config; -mod cost; +mod inner; +mod stats; -use self::buckets::{BucketKey, Buckets, QueuedBucket}; pub use self::config::*; -pub(crate) use self::cost::tags_cost; +use self::inner::{BucketData, BucketKey}; + +const DEFAULT_PARTITIONS_PER_SECOND: u32 = 64; /// Any error that may occur during aggregation. -#[derive(Debug, Error, PartialEq)] +#[derive(Debug, thiserror::Error, PartialEq)] pub enum AggregateMetricsError { - /// A metric bucket's timestamp was out of the configured acceptable range. - #[error("found invalid timestamp: {0}")] - InvalidTimestamp(UnixTimestamp), /// Internal error: Attempted to merge two metric buckets of different types. #[error("found incompatible metric types")] InvalidTypes, - /// A metric bucket had a too long string (metric name or a tag key/value). - #[error("found invalid string: {0}")] - InvalidStringLength(MetricName), /// A metric bucket is too large for the global bytes limit. #[error("total metrics limit exceeded")] TotalLimitExceeded, /// A metric bucket is too large for the per-project bytes limit. #[error("project metrics limit exceeded")] ProjectLimitExceeded, + /// The timestamp is outside the maximum allowed time range. + #[error("the timestamp '{0}' is outside the maximum allowed time range")] + InvalidTimestamp(UnixTimestamp), } -/// A collector of [`Bucket`] submissions. -/// -/// # Aggregation -/// -/// Each metric is dispatched into the a [`Bucket`] depending on its project key (DSN), name, type, -/// unit, tags and timestamp. The bucket timestamp is rounded to the precision declared by the -/// `bucket_interval` field on the [AggregatorConfig] configuration. -/// -/// Each bucket stores the accumulated value of submitted metrics: -/// -/// - `Counter`: Sum of values. -/// - `Distribution`: A list of values. -/// - `Set`: A unique set of hashed values. -/// - `Gauge`: A summary of the reported values, see [`GaugeValue`](crate::GaugeValue). -/// -/// # Conflicts -/// -/// Metrics are uniquely identified by the combination of their name, type and unit. It is allowed -/// to send metrics of different types and units under the same name. For example, sending a metric -/// once as set and once as distribution will result in two actual metrics being recorded. +/// An aggregator for metric [`Bucket`]'s. +#[derive(Debug)] pub struct Aggregator { name: String, - config: AggregatorConfig, - buckets: Buckets, - cost_tracker: cost::Tracker, - reference_time: Instant, + inner: inner::Inner, } impl Aggregator { - /// Create a new aggregator. - pub fn new(config: AggregatorConfig) -> Self { - Self::named("default".to_owned(), config) - } + /// Creates a new named [`Self`]. + pub fn named(name: String, config: &AggregatorConfig) -> Self { + let num_partitions = match config.flush_batching { + FlushBatching::Project => config.flush_partitions, + FlushBatching::Bucket => config.flush_partitions, + FlushBatching::Partition => config.flush_partitions, + FlushBatching::None => Some(0), + } + .unwrap_or(DEFAULT_PARTITIONS_PER_SECOND * config.bucket_interval.max(1)); - /// Like [`Self::new`], but with a provided name. - pub fn named(name: String, config: AggregatorConfig) -> Self { Self { name, - config, - buckets: Default::default(), - cost_tracker: Default::default(), - reference_time: Instant::now(), + inner: inner::Inner::new(inner::Config { + start: UnixTimestamp::now(), + bucket_interval: config.bucket_interval, + num_time_slots: config.aggregator_size, + num_partitions, + delay: config.initial_delay, + max_total_bucket_bytes: config.max_total_bucket_bytes, + max_project_key_bucket_bytes: config.max_project_key_bucket_bytes, + max_secs_in_past: Some(config.max_secs_in_past), + max_secs_in_future: Some(config.max_secs_in_future), + }), } } /// Returns the name of the aggregator. pub fn name(&self) -> &str { - self.name.as_str() - } - - /// Returns the number of buckets in the aggregator. - pub fn bucket_count(&self) -> usize { - self.buckets.len() - } - - /// Returns `true` if the cost trackers value is larger than the configured max cost. - pub fn totals_cost_exceeded(&self) -> bool { - self.cost_tracker - .totals_cost_exceeded(self.config.max_total_bucket_bytes) - } - - /// Converts this aggregator into a vector of [`Bucket`]. - pub fn into_buckets(self) -> Vec { - relay_statsd::metric!( - gauge(MetricGauges::Buckets) = self.bucket_count() as u64, - aggregator = &self.name, - ); - - let bucket_interval = self.config.bucket_interval; - - self.buckets - .into_iter() - .map(|(key, entry)| Bucket { - timestamp: key.timestamp, - width: bucket_interval, - name: key.metric_name, - value: entry.value, - tags: key.tags, - metadata: entry.metadata, - }) - .collect() - } - - /// Pop and return the partitions with buckets that are eligible for flushing out according to - /// bucket interval. - /// - /// For each project `flush_decision` is called, which can influence the flush decision - /// for buckets of that project. The decision can also return metadata for the project - /// on which all flushed buckets for the project are merged with the `merge` function. - /// - /// `flush_decision` is only called once if the decision is [`FlushDecision::Flush`], - /// but may be called multiple times otherwise. The `flush_decision` should be consistent - /// for each project key passed. - /// - /// If no partitioning is enabled, the function will return a single `None` partition. - pub fn pop_flush_buckets>( - &mut self, - force: bool, - mut flush_decision: impl FnMut(ProjectKey) -> FlushDecision, - ) -> HashMap, HashMap> { - relay_statsd::metric!( - gauge(MetricGauges::Buckets) = self.bucket_count() as u64, - aggregator = &self.name, - ); - - // We only emit statsd metrics for the cost on flush (and not when merging the buckets), - // assuming that this gives us more than enough data points. - for (namespace, cost) in self.cost_tracker.cost_per_namespace() { - relay_statsd::metric!( - gauge(MetricGauges::BucketsCost) = cost as u64, - aggregator = &self.name, - namespace = namespace.as_str() - ); - } - - let mut partitions = HashMap::new(); - let mut stats = HashMap::new(); - - let mut re_add = Vec::new(); - - let now = Instant::now(); - let ts = UnixTimestamp::from_instant(now.into_std()); - - let should_pop = |entry: &QueuedBucket| force || entry.elapsed(now); - - let bucket_interval = self.config.bucket_interval; - let cost_tracker = &mut self.cost_tracker; - - let start_bucket_scan = Instant::now(); - while let Some((key, queued_bucket)) = self.buckets.try_pop(|_, b| should_pop(b)) { - let partition = self.config.flush_partitions.map(|p| key.partition_key(p)); - let partition = partitions.entry(partition).or_insert_with(HashMap::new); - - let cost = key.cost() + queued_bucket.value.cost(); - - let buckets = match partition.entry(key.project_key) { - Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), - Entry::Vacant(vacant_entry) => { - match flush_decision(key.project_key) { - FlushDecision::Flush(v) => vacant_entry.insert(v), - FlushDecision::Delay => { - let mut entry = queued_bucket; - - // No point in re-calculating the flush time when `force` is `true`. - // We can still do it on a flush without force. - if !force { - entry.flush_at = - get_flush_time(&self.config, ts, self.reference_time, &key); - // This should not happen, but may happen with weird - // configs or due to bugs. - debug_assert!(!entry.elapsed(now)); - } - - // Re-use the loop condition for `try_pop`, to make sure we - // will never accidentally create an infinite loop. - match should_pop(&entry) { - // If we would pop the entry again (`force`, a weird flush - // configuration, a bug), re-add it after the loop to - // prevent infinite loops. - true => re_add.push((key, entry)), - // Otherwise it's safe to immediately re-add. - false => self.buckets.re_add(key, entry), - } - - continue; - } - FlushDecision::Drop => { - cost_tracker.subtract_cost(key.namespace(), key.project_key, cost); - continue; - } - } - } - }; - - cost_tracker.subtract_cost(key.namespace(), key.project_key, cost); - - let (bucket_count, item_count) = stats - .entry((queued_bucket.value.ty(), key.namespace())) - .or_insert((0usize, 0usize)); - *bucket_count += 1; - *item_count += queued_bucket.value.len(); - - let bucket = Bucket { - timestamp: key.timestamp, - width: bucket_interval, - name: key.metric_name, - value: queued_bucket.value, - tags: key.tags, - metadata: queued_bucket.metadata, - }; - - buckets.extend(std::iter::once(bucket)); - } - - for (key, entry) in re_add { - self.buckets.re_add(key, entry); - } - - relay_statsd::metric!( - timer(MetricTimers::BucketsScanDuration) = start_bucket_scan.elapsed(), - aggregator = &self.name, - ); - - for ((ty, namespace), (bucket_count, item_count)) in stats.into_iter() { - relay_statsd::metric!( - gauge(MetricGauges::AvgBucketSize) = item_count as f64 / bucket_count as f64, - metric_type = ty.as_str(), - namespace = namespace.as_str(), - aggregator = self.name(), - ); - } - - partitions + &self.name } - /// Wrapper for [`AggregatorConfig::get_bucket_timestamp`]. - /// - /// Logs a statsd metric for invalid timestamps. - fn get_bucket_timestamp( - &self, - now: UnixTimestamp, - timestamp: UnixTimestamp, - bucket_width: u64, - ) -> Result { - let bucket_ts = self.config.get_bucket_timestamp(timestamp, bucket_width); - - if !self.config.timestamp_range().contains(&bucket_ts) { - let delta = (bucket_ts.as_secs() as i64) - (now.as_secs() as i64); - relay_statsd::metric!( - histogram(MetricHistograms::InvalidBucketTimestamp) = delta as f64, - aggregator = &self.name, - ); - - return Err(AggregateMetricsError::InvalidTimestamp(timestamp)); - } - - Ok(bucket_ts) + /// Returns `true` if the aggregator contains any metric buckets. + pub fn is_empty(&self) -> bool { + self.inner.is_empty() } /// Merge a bucket into this aggregator. - /// - /// Like [`Self::merge_with_options`] with defaults. pub fn merge( &mut self, project_key: ProjectKey, bucket: Bucket, ) -> Result<(), AggregateMetricsError> { - self.merge_with_options(project_key, bucket, UnixTimestamp::now()) - } - - /// Merge a bucket into this aggregator. - /// - /// If no bucket exists for the given bucket key, a new bucket will be created. - /// - /// Arguments: - /// - `now`: The current timestamp, when merging a large batch of buckets, the timestamp - /// should be created outside of the loop. - pub fn merge_with_options( - &mut self, - project_key: ProjectKey, - bucket: Bucket, - now: UnixTimestamp, - ) -> Result<(), AggregateMetricsError> { - let timestamp = self.get_bucket_timestamp(now, bucket.timestamp, bucket.width)?; let key = BucketKey { project_key, - timestamp, + timestamp: bucket.timestamp, metric_name: bucket.name, tags: bucket.tags, extracted_from_indexed: bucket.metadata.extracted_from_indexed, }; - let key = validate_bucket_key(key, &self.config)?; - let namespace = key.namespace(); - - // XXX: This is not a great implementation of cost enforcement. - // - // * it takes two lookups of the project key in the cost tracker to merge a bucket: once in - // `check_limits_exceeded` and once in `add_cost`. - // - // * the limits are not actually enforced consistently - // - // A bucket can be merged that exceeds the cost limit, and only the next bucket will be - // limited because the limit is now reached. This implementation was chosen because it's - // currently not possible to determine cost accurately upfront: The bucket values have to - // be merged together to figure out how costly the merge was. Changing that would force - // us to unravel a lot of abstractions that we have already built. - // - // As a result of that, it is possible to exceed the bucket cost limit significantly - // until we have guaranteed upper bounds on the cost of a single bucket (which we - // currently don't, because a metric can have arbitrary amount of tag values). - // - // Another consequence is that a MergeValue that adds zero cost (such as an existing - // counter bucket being incremented) is currently rejected even though it doesn't have to - // be. - // - // The flipside of this approach is however that there's more optimization potential: If - // the limit is already exceeded, we could implement an optimization that drops envelope - // items before they are parsed, as we can be sure that the new metric bucket will be - // rejected in the aggregator regardless of whether it is merged into existing buckets, - // whether it is just a counter, etc. - self.cost_tracker.check_limits_exceeded( - project_key, - self.config.max_total_bucket_bytes, - self.config.max_project_key_bucket_bytes, - )?; - - let added_cost = self.buckets.merge( - key, - (bucket.value, bucket.metadata), - |key, (value, metadata), exisiting| { - relay_statsd::metric!( - counter(MetricCounters::MergeHit) += 1, - aggregator = &self.name, - namespace = key.namespace().as_str(), - ); - exisiting.merge(value, metadata) - }, - |key, (value, metadata)| { - relay_statsd::metric!( - counter(MetricCounters::MergeMiss) += 1, - aggregator = &self.name, - namespace = key.namespace().as_str(), - ); - relay_statsd::metric!( - set(MetricSets::UniqueBucketsCreated) = key.hash64() as i64, // 2-complement - aggregator = &self.name, - namespace = key.namespace().as_str(), - ); - - let flush_at = get_flush_time(&self.config, now, self.reference_time, key); - QueuedBucket::new(flush_at, value, metadata) - }, - )?; - - self.cost_tracker - .add_cost(namespace, project_key, added_cost); + let value = BucketData { + value: bucket.value, + metadata: bucket.metadata, + }; - Ok(()) + self.inner.merge(key, value) } -} -impl fmt::Debug for Aggregator { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct(std::any::type_name::()) - .field("config", &self.config) - .field("buckets", &self.buckets) - .field("receiver", &format_args!("Recipient")) - .finish() - } -} + /// Attempts to flush the next batch from the aggregator. + /// + /// If it is too early to flush the next batch, the error contains the timestamp when the flush should be retried. + /// After a successful flush, retry immediately until an error is returned with the next flush + /// time, this makes sure time is eventually synchronized. + pub fn try_flush_next(&mut self, now: SystemTime) -> Result { + let next_flush = SystemTime::UNIX_EPOCH + self.inner.next_flush(); + + if let Err(err) = now.duration_since(next_flush) { + // The flush time is in the future, return the amount of time to wait before the next flush. + return Err(err.duration()); + } -/// Decision what to do with a bucket when flushing. -pub enum FlushDecision { - /// Flush the bucket with the provided metadata. - Flush(T), - /// Drop the bucket. - Drop, - /// Delay flushing the bucket into the future, it's not ready. - Delay, -} + let partition = self.inner.flush_next(); -/// Validates the metric name and its tags are correct. -/// -/// Returns `Err` if the metric should be dropped. -fn validate_bucket_key( - mut key: BucketKey, - aggregator_config: &AggregatorConfig, -) -> Result { - key = validate_metric_name(key, aggregator_config)?; - key = validate_metric_tags(key, aggregator_config); - Ok(key) -} + emit_stats(&self.name, self.inner.stats()); + emit_flush_partition_stats(&self.name, partition.stats); -/// Validates metric name against [`AggregatorConfig`]. -/// -/// Returns `Err` if the metric must be dropped. -fn validate_metric_name( - key: BucketKey, - aggregator_config: &AggregatorConfig, -) -> Result { - let metric_name_length = key.metric_name.len(); - if metric_name_length > aggregator_config.max_name_length { - relay_log::debug!( - "Invalid metric name, too long (> {}): {:?}", - aggregator_config.max_name_length, - key.metric_name - ); - return Err(AggregateMetricsError::InvalidStringLength(key.metric_name)); + Ok(Partition { + partition_key: partition.partition_key, + buckets: partition.buckets, + bucket_interval: self.inner.bucket_interval(), + }) } - Ok(key) -} + /// Returns when the next partition is ready to be flushed using [`Self::try_flush_next`]. + pub fn next_flush(&mut self, now: SystemTime) -> Duration { + let next_flush = SystemTime::UNIX_EPOCH + self.inner.next_flush(); -/// Validates metric tags against [`AggregatorConfig`]. -/// -/// Invalid tags are removed. -fn validate_metric_tags(mut key: BucketKey, aggregator_config: &AggregatorConfig) -> BucketKey { - key.tags.retain(|tag_key, tag_value| { - if tag_key.len() > aggregator_config.max_tag_key_length { - relay_log::debug!("Invalid metric tag key"); - return false; + match now.duration_since(next_flush) { + Ok(_) => Duration::ZERO, + Err(err) => err.duration(), } - if bytecount::num_chars(tag_value.as_bytes()) > aggregator_config.max_tag_value_length { - relay_log::debug!("Invalid metric tag value"); - return false; - } - - true - }); - key -} + } -/// Returns the instant at which a bucket should be flushed. -/// -/// All buckets are flushed after a grace period of `initial_delay`. -fn get_flush_time( - config: &AggregatorConfig, - now: UnixTimestamp, - reference_time: Instant, - bucket_key: &BucketKey, -) -> Instant { - let initial_flush = bucket_key.timestamp + config.bucket_interval() + config.initial_delay(); - let backdated = initial_flush <= now; + /// Consumes the aggregator and returns all contained partitions. + pub fn into_partitions(self) -> impl Iterator { + let bucket_interval = self.inner.bucket_interval(); - let flush_timestamp = if backdated { - // If the initial flush time has passed or can't be represented, we want to treat the - // flush of the bucket as if it came in with the timestamp of current bucket based on - // the now timestamp. - // - // The rationale behind this is that we want to flush this bucket in the earliest slot - // together with buckets that have similar characteristics (e.g., same partition, - // project...). - let floored_timestamp = (now.as_secs() / config.bucket_interval) * config.bucket_interval; - UnixTimestamp::from_secs(floored_timestamp) - + config.bucket_interval() - + config.initial_delay() - } else { - // If the initial flush is still pending, use that. - initial_flush - }; + emit_stats(&self.name, self.inner.stats()); - let instant = if flush_timestamp > now { - Instant::now().checked_add(flush_timestamp - now) - } else { - Instant::now().checked_sub(now - flush_timestamp) + self.inner.into_partitions().map(move |p| Partition { + partition_key: p.partition_key, + buckets: p.buckets, + bucket_interval, + }) } - .unwrap_or_else(Instant::now); - - // Since `Instant` doesn't allow to get directly how many seconds elapsed, we leverage the - // diffing to get a duration and round it to the smallest second to get consistent times. - let instant = reference_time + Duration::from_secs((instant - reference_time).as_secs()); - instant + flush_time_shift(config, bucket_key) } -/// Shift deterministically within one bucket interval based on the configured [`FlushBatching`]. +/// A flushed partition from [`Aggregator::try_flush_next`]. /// -/// This distributes buckets over time to prevent peaks. -fn flush_time_shift(config: &AggregatorConfig, bucket: &BucketKey) -> Duration { - let shift_range = config.bucket_interval * 1000; - - // Fall back to default flushing by project if no partitioning is configured. - let (batching, partitions) = match (config.flush_batching, config.flush_partitions) { - (FlushBatching::Partition, None | Some(0)) => (FlushBatching::Project, 0), - (flush_batching, partitions) => (flush_batching, partitions.unwrap_or(0)), - }; - - let shift_millis = match batching { - FlushBatching::Project => { - let mut hasher = FnvHasher::default(); - hasher.write(bucket.project_key.as_str().as_bytes()); - hasher.finish() % shift_range - } - FlushBatching::Bucket => bucket.hash64() % shift_range, - FlushBatching::Partition => shift_range * bucket.partition_key(partitions) / partitions, - FlushBatching::None => 0, - }; - - Duration::from_millis(shift_millis) +/// The partition contains the partition key and all flushed buckets. +pub struct Partition { + /// The partition key. + pub partition_key: u32, + buckets: HashMap, + bucket_interval: u32, } -#[cfg(test)] -mod tests { - use insta::assert_debug_snapshot; - use similar_asserts::assert_eq; - use std::collections::BTreeMap; - - use super::*; - use crate::{dist, BucketMetadata, BucketValue, GaugeValue}; - - fn test_config() -> AggregatorConfig { - AggregatorConfig { - bucket_interval: 1, - initial_delay: 0, - max_secs_in_past: 50 * 365 * 24 * 60 * 60, - max_secs_in_future: 50 * 365 * 24 * 60 * 60, - max_name_length: 200, - max_tag_key_length: 200, - max_tag_value_length: 200, - max_project_key_bucket_bytes: None, - max_total_bucket_bytes: None, - flush_batching: FlushBatching::default(), - flush_partitions: None, - } - } +impl IntoIterator for Partition { + type Item = (ProjectKey, Bucket); + type IntoIter = PartitionIter; - fn some_bucket(timestamp: Option) -> Bucket { - let timestamp = timestamp.map_or(UnixTimestamp::from_secs(999994711), |t| t); - Bucket { - timestamp, - width: 0, - name: "c:transactions/foo@none".into(), - value: BucketValue::counter(42.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), + fn into_iter(self) -> Self::IntoIter { + PartitionIter { + inner: self.buckets.into_iter(), + bucket_interval: self.bucket_interval, } } +} - #[test] - fn test_aggregator_merge_counters() { - relay_test::setup(); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut aggregator: Aggregator = Aggregator::new(test_config()); - - let bucket1 = some_bucket(None); - - let mut bucket2 = bucket1.clone(); - bucket2.value = BucketValue::counter(43.into()); - aggregator.merge(project_key, bucket1).unwrap(); - aggregator.merge(project_key, bucket2).unwrap(); - - let buckets: Vec<_> = aggregator - .buckets - .into_iter() - .map(|(k, e)| (k, e.value)) // skip flush times, they are different every time - .collect(); - - insta::assert_debug_snapshot!(buckets, @r###" - [ - ( - BucketKey { - project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"), - timestamp: UnixTimestamp(999994711), - metric_name: MetricName( - "c:transactions/foo@none", - ), - tags: {}, - extracted_from_indexed: false, - }, - Counter( - 85.0, - ), - ), - ] - "###); - } - - #[test] - fn test_bucket_value_cost() { - // When this test fails, it means that the cost model has changed. - // Check dimensionality limits. - let expected_bucket_value_size = 48; - let expected_set_entry_size = 4; - - let counter = BucketValue::Counter(123.into()); - assert_eq!(counter.cost(), expected_bucket_value_size); - let set = BucketValue::Set([1, 2, 3, 4, 5].into()); - assert_eq!( - set.cost(), - expected_bucket_value_size + 5 * expected_set_entry_size - ); - let distribution = BucketValue::Distribution(dist![1, 2, 3]); - assert_eq!(distribution.cost(), expected_bucket_value_size + 3 * 8); - let gauge = BucketValue::Gauge(GaugeValue { - last: 43.into(), - min: 42.into(), - max: 43.into(), - sum: 85.into(), - count: 2, - }); - assert_eq!(gauge.cost(), expected_bucket_value_size); - } - - #[test] - fn test_bucket_key_cost() { - // When this test fails, it means that the cost model has changed. - // Check dimensionality limits. - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let metric_name = "12345".into(); - let bucket_key = BucketKey { - timestamp: UnixTimestamp::now(), - project_key, - metric_name, - tags: BTreeMap::from([ - ("hello".to_owned(), "world".to_owned()), - ("answer".to_owned(), "42".to_owned()), - ]), - extracted_from_indexed: false, - }; - assert_eq!( - bucket_key.cost(), - 88 + // BucketKey - 5 + // name - (5 + 5 + 6 + 2) // tags - ); - } - - #[test] - fn test_aggregator_merge_timestamps() { - relay_test::setup(); - let mut config = test_config(); - config.bucket_interval = 10; - - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut aggregator: Aggregator = Aggregator::new(config); - - let bucket1 = some_bucket(None); - - let mut bucket2 = bucket1.clone(); - bucket2.timestamp = UnixTimestamp::from_secs(999994712); - - let mut bucket3 = bucket1.clone(); - bucket3.timestamp = UnixTimestamp::from_secs(999994721); - - aggregator.merge(project_key, bucket1).unwrap(); - aggregator.merge(project_key, bucket2).unwrap(); - aggregator.merge(project_key, bucket3).unwrap(); - - let mut buckets: Vec<_> = aggregator - .buckets - .into_iter() - .map(|(k, e)| (k, e.value)) // skip flush times, they are different every time - .collect(); - - buckets.sort_by(|a, b| a.0.timestamp.cmp(&b.0.timestamp)); - insta::assert_debug_snapshot!(buckets, @r###" - [ - ( - BucketKey { - project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"), - timestamp: UnixTimestamp(999994710), - metric_name: MetricName( - "c:transactions/foo@none", - ), - tags: {}, - extracted_from_indexed: false, - }, - Counter( - 84.0, - ), - ), - ( - BucketKey { - project_key: ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"), - timestamp: UnixTimestamp(999994720), - metric_name: MetricName( - "c:transactions/foo@none", - ), - tags: {}, - extracted_from_indexed: false, - }, - Counter( - 42.0, - ), - ), - ] - "###); - } - - #[test] - fn test_aggregator_mixed_projects() { - relay_test::setup(); - - let mut config = test_config(); - config.bucket_interval = 10; - - let project_key1 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - let project_key2 = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - - let mut aggregator: Aggregator = Aggregator::new(config); - - // It's OK to have same metric with different projects: - aggregator.merge(project_key1, some_bucket(None)).unwrap(); - aggregator.merge(project_key2, some_bucket(None)).unwrap(); - - assert_eq!(aggregator.buckets.len(), 2); - } +/// Iterator yielded from [`Partition::into_iter`]. +pub struct PartitionIter { + inner: hashbrown::hash_map::IntoIter, + bucket_interval: u32, +} - #[test] - fn test_aggregator_cost_tracking() { - // Make sure that the right cost is added / subtracted - let mut aggregator: Aggregator = Aggregator::new(test_config()); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); +impl Iterator for PartitionIter { + type Item = (ProjectKey, Bucket); - let timestamp = UnixTimestamp::from_secs(999994711); - let bucket = Bucket { - timestamp, - width: 0, - name: "c:transactions/foo@none".into(), - value: BucketValue::counter(42.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - }; - let bucket_key = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/foo@none".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - let fixed_cost = bucket_key.cost() + std::mem::size_of::(); - for (metric_name, metric_value, expected_added_cost) in [ - ( - "c:transactions/foo@none", - BucketValue::counter(42.into()), - fixed_cost, - ), - ( - "c:transactions/foo@none", - BucketValue::counter(42.into()), - 0, - ), // counters have constant size - ( - "s:transactions/foo@none", - BucketValue::set(123), - fixed_cost + 4, - ), // Added a new bucket + 1 element - ("s:transactions/foo@none", BucketValue::set(123), 0), // Same element in set, no change - ("s:transactions/foo@none", BucketValue::set(456), 4), // Different element in set -> +4 - ( - "d:transactions/foo@none", - BucketValue::distribution(1.into()), - fixed_cost + 8, - ), // New bucket + 1 element - ( - "d:transactions/foo@none", - BucketValue::distribution(1.into()), - 8, - ), // duplicate element - ( - "d:transactions/foo@none", - BucketValue::distribution(2.into()), - 8, - ), // 1 new element - ( - "g:transactions/foo@none", - BucketValue::gauge(3.into()), - fixed_cost, - ), // New bucket - ("g:transactions/foo@none", BucketValue::gauge(2.into()), 0), // gauge has constant size - ] { - let mut bucket = bucket.clone(); - bucket.value = metric_value; - bucket.name = metric_name.into(); + fn next(&mut self) -> Option { + let (key, data) = self.inner.next()?; - let current_cost = aggregator.cost_tracker.total_cost(); - aggregator.merge(project_key, bucket).unwrap(); - let total_cost = aggregator.cost_tracker.total_cost(); - assert_eq!(total_cost, current_cost + expected_added_cost); - } - - aggregator.pop_flush_buckets(true, |_| FlushDecision::Flush(Vec::new())); - assert_eq!(aggregator.cost_tracker.total_cost(), 0); + Some(( + key.project_key, + Bucket { + timestamp: key.timestamp, + width: u64::from(self.bucket_interval), + name: key.metric_name, + tags: key.tags, + value: data.value, + metadata: data.metadata, + }, + )) } - #[tokio::test(start_paused = true)] - async fn test_aggregator_flush() { - // Make sure that the right cost is added / subtracted - let mut aggregator: Aggregator = Aggregator::new(AggregatorConfig { - bucket_interval: 10, - ..test_config() - }); - - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - let now = UnixTimestamp::now(); - - for i in 0..3u32 { - for (name, offset) in [("foo", 30), ("bar", 15)] { - let timestamp = now + Duration::from_secs(60 * u64::from(i) + offset); - let bucket = Bucket { - timestamp, - width: 0, - name: format!("c:transactions/{name}_{i}@none").into(), - value: BucketValue::counter(i.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - }; - - aggregator.merge(project_key, bucket).unwrap(); - } - } - - let mut flush_buckets = || { - let mut result = Vec::new(); - - let partitions = - aggregator.pop_flush_buckets(false, |_| FlushDecision::Flush(Vec::new())); - for (partition, v) in partitions { - assert!(partition.is_none()); - for (pk, buckets) in v { - assert_eq!(pk, project_key); - for bucket in buckets { - result.push(bucket.name.to_string()); - } - } - } - result.sort(); - result - }; - - assert!(flush_buckets().is_empty()); - - tokio::time::advance(Duration::from_secs(60)).await; - assert_debug_snapshot!(flush_buckets(), @r###" - [ - "c:transactions/bar_0@none", - "c:transactions/foo_0@none", - ] - "###); - assert!(flush_buckets().is_empty()); - - tokio::time::advance(Duration::from_secs(60)).await; - assert_debug_snapshot!(flush_buckets(), @r###" - [ - "c:transactions/bar_1@none", - "c:transactions/foo_1@none", - ] - "###); - assert!(flush_buckets().is_empty()); - - tokio::time::advance(Duration::from_secs(60)).await; - assert_debug_snapshot!(flush_buckets(), @r###" - [ - "c:transactions/bar_2@none", - "c:transactions/foo_2@none", - ] - "###); - assert!(flush_buckets().is_empty()); - - tokio::time::advance(Duration::from_secs(3600)).await; - assert!(flush_buckets().is_empty()); - - assert!(aggregator.into_buckets().is_empty()); + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() } +} - #[test] - fn test_get_bucket_timestamp_overflow() { - let config = AggregatorConfig { - bucket_interval: 10, - initial_delay: 0, - ..Default::default() - }; - - let aggregator: Aggregator = Aggregator::new(config); - - assert!(matches!( - aggregator - .get_bucket_timestamp(UnixTimestamp::now(), UnixTimestamp::from_secs(u64::MAX), 2) - .unwrap_err(), - AggregateMetricsError::InvalidTimestamp(_) - )); +impl std::iter::ExactSizeIterator for PartitionIter { + fn len(&self) -> usize { + self.inner.len() } +} - #[test] - fn test_get_bucket_timestamp_zero() { - let config = AggregatorConfig { - bucket_interval: 10, - initial_delay: 0, - ..Default::default() - }; +impl std::iter::FusedIterator for PartitionIter {} - let now = UnixTimestamp::now().as_secs(); - let rounded_now = UnixTimestamp::from_secs(now / 10 * 10); - assert_eq!( - config.get_bucket_timestamp(UnixTimestamp::from_secs(now), 0), - rounded_now +fn emit_stats(name: &str, stats: inner::Stats) { + for namespace in MetricNamespace::all() { + relay_statsd::metric!( + gauge(MetricGauges::Buckets) = *stats.count_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name ); - } - - #[test] - fn test_get_bucket_timestamp_multiple() { - let config = AggregatorConfig { - bucket_interval: 10, - initial_delay: 0, - ..Default::default() - }; - - let rounded_now = UnixTimestamp::now().as_secs() / 10 * 10; - let now = rounded_now + 3; - assert_eq!( - config - .get_bucket_timestamp(UnixTimestamp::from_secs(now), 20) - .as_secs(), - rounded_now + 10 + relay_statsd::metric!( + gauge(MetricGauges::BucketsCost) = *stats.cost_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name ); } +} - #[test] - fn test_get_bucket_timestamp_non_multiple() { - let config = AggregatorConfig { - bucket_interval: 10, - initial_delay: 0, - ..Default::default() - }; +fn emit_flush_partition_stats(name: &str, stats: inner::PartitionStats) { + relay_statsd::metric!(counter(MetricCounters::FlushCount) += 1, aggregator = name); - let rounded_now = UnixTimestamp::now().as_secs() / 10 * 10; - let now = rounded_now + 3; - assert_eq!( - config - .get_bucket_timestamp(UnixTimestamp::from_secs(now), 23) - .as_secs(), - rounded_now + 10 + for namespace in MetricNamespace::all() { + relay_statsd::metric!( + counter(MetricCounters::MergeHit) += *stats.count_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name, ); - } - - #[test] - fn test_validate_bucket_key_str_length() { - relay_test::setup(); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - - let short_metric = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/a_short_metric".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - assert!(validate_bucket_key(short_metric, &test_config()).is_ok()); - - let long_metric = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/long_name_a_very_long_name_its_super_long_really_but_like_super_long_probably_the_longest_name_youve_seen_and_even_the_longest_name_ever_its_extremly_long_i_cant_tell_how_long_it_is_because_i_dont_have_that_many_fingers_thus_i_cant_count_the_many_characters_this_long_name_is".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - let validation = validate_bucket_key(long_metric, &test_config()); - - assert!(matches!( - validation.unwrap_err(), - AggregateMetricsError::InvalidStringLength(_), - )); - - let short_metric_long_tag_key = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/a_short_metric_with_long_tag_key".into(), - tags: BTreeMap::from([("i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into(), "tag_value".into())]), - extracted_from_indexed: false, - }; - let validation = validate_bucket_key(short_metric_long_tag_key, &test_config()).unwrap(); - assert_eq!(validation.tags.len(), 0); - - let short_metric_long_tag_value = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/a_short_metric_with_long_tag_value".into(), - tags: BTreeMap::from([("tag_key".into(), "i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into())]), - extracted_from_indexed: false, - }; - let validation = validate_bucket_key(short_metric_long_tag_value, &test_config()).unwrap(); - assert_eq!(validation.tags.len(), 0); - } - - #[test] - fn test_validate_tag_values_special_chars() { - relay_test::setup(); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - - let tag_value = "x".repeat(199) + "ø"; - assert_eq!(tag_value.chars().count(), 200); // Should be allowed - let short_metric = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/a_short_metric".into(), - tags: BTreeMap::from([("foo".into(), tag_value.clone())]), - extracted_from_indexed: false, - }; - let validated_bucket = validate_metric_tags(short_metric, &test_config()); - assert_eq!(validated_bucket.tags["foo"], tag_value); - } - - #[test] - fn test_aggregator_cost_enforcement_total() { - let timestamp = UnixTimestamp::from_secs(999994711); - let bucket = Bucket { - timestamp, - width: 0, - name: "c:transactions/foo".into(), - value: BucketValue::counter(42.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - }; - - let mut aggregator = Aggregator::new(AggregatorConfig { - max_total_bucket_bytes: Some(1), - ..test_config() - }); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - - aggregator.merge(project_key, bucket.clone()).unwrap(); - - assert_eq!( - aggregator.merge(project_key, bucket).unwrap_err(), - AggregateMetricsError::TotalLimitExceeded + relay_statsd::metric!( + counter(MetricCounters::MergeMiss) += *stats.merges_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name, ); - } - - #[test] - fn test_aggregator_cost_enforcement_project() { - relay_test::setup(); - let mut config = test_config(); - config.max_project_key_bucket_bytes = Some(1); - - let timestamp = UnixTimestamp::from_secs(999994711); - let bucket = Bucket { - timestamp, - width: 0, - name: "c:transactions/foo".into(), - value: BucketValue::counter(42.into()), - tags: BTreeMap::new(), - metadata: BucketMetadata::new(timestamp), - }; - - let mut aggregator: Aggregator = Aggregator::new(config); - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fed").unwrap(); - - aggregator.merge(project_key, bucket.clone()).unwrap(); - assert_eq!( - aggregator.merge(project_key, bucket).unwrap_err(), - AggregateMetricsError::ProjectLimitExceeded + relay_statsd::metric!( + counter(MetricCounters::FlushCost) += *stats.cost_by_namespace.get(namespace), + namespace = namespace.as_str(), + aggregator = name, ); } - - #[test] - fn test_parse_flush_batching() { - let json = r#"{"shift_key": "partition"}"#; - let parsed: AggregatorConfig = serde_json::from_str(json).unwrap(); - assert!(matches!(parsed.flush_batching, FlushBatching::Partition)); - } - - #[test] - fn test_aggregator_merge_metadata() { - let mut config = test_config(); - config.bucket_interval = 10; - - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut aggregator: Aggregator = Aggregator::new(config); - - let bucket1 = some_bucket(Some(UnixTimestamp::from_secs(999994711))); - let bucket2 = some_bucket(Some(UnixTimestamp::from_secs(999994711))); - - // We create a bucket with 3 merges and monotonically increasing timestamps. - let mut bucket3 = some_bucket(Some(UnixTimestamp::from_secs(999994711))); - bucket3 - .metadata - .merge(BucketMetadata::new(UnixTimestamp::from_secs(999997811))); - bucket3 - .metadata - .merge(BucketMetadata::new(UnixTimestamp::from_secs(999999811))); - - aggregator.merge(project_key, bucket1.clone()).unwrap(); - aggregator.merge(project_key, bucket2.clone()).unwrap(); - aggregator.merge(project_key, bucket3.clone()).unwrap(); - - let buckets_metadata: Vec<_> = aggregator - .buckets - .into_iter() - .map(|(_, v)| v.metadata) - .collect(); - insta::assert_debug_snapshot!(buckets_metadata, @r###" - [ - BucketMetadata { - merges: 5, - received_at: Some( - UnixTimestamp(999994711), - ), - extracted_from_indexed: false, - }, - ] - "###); - } - - #[test] - fn test_get_flush_time_with_backdated_bucket() { - let mut config = test_config(); - config.bucket_interval = 3600; - config.initial_delay = 1300; - config.flush_partitions = Some(10); - config.flush_batching = FlushBatching::Partition; - - let reference_time = Instant::now(); - - let now_s = - (UnixTimestamp::now().as_secs() / config.bucket_interval) * config.bucket_interval; - - // First bucket has a timestamp two hours ago. - let timestamp = UnixTimestamp::from_secs(now_s - 7200); - let bucket_key_1 = BucketKey { - project_key: ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), - timestamp, - metric_name: "c:transactions/foo".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - - // Second bucket has a timestamp in this hour. - let timestamp = UnixTimestamp::from_secs(now_s); - let bucket_key_2 = BucketKey { - project_key: ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(), - timestamp, - metric_name: "c:transactions/foo".into(), - tags: BTreeMap::new(), - extracted_from_indexed: false, - }; - - let now = UnixTimestamp::now(); - let flush_time_1 = get_flush_time(&config, now, reference_time, &bucket_key_1); - let flush_time_2 = get_flush_time(&config, now, reference_time, &bucket_key_2); - - assert_eq!(flush_time_1, flush_time_2); - } } diff --git a/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-3.snap b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-3.snap new file mode 100644 index 0000000000..c5044d3e85 --- /dev/null +++ b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-3.snap @@ -0,0 +1,150 @@ +--- +source: relay-metrics/src/aggregator/inner.rs +expression: buckets +--- +[ + Total { + count: 6, + count_by_namespace: (unsupported:6), + cost: 822, + cost_by_namespace: (unsupported:822), + }, + "(70, Slot(1))", + ( + 80, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 1, + merges_by_namespace: (unsupported:1), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 80-00000000000000000000000000000000-b: Counter( + 3.0, + ), + }, + }, + ), + "(80, Slot(1))", + ( + 90, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 30-00000000000000000000000000000000-c: Counter( + 1.0, + ), + }, + }, + ), + "(90, Slot(1))", + ( + 100, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 40-00000000000000000000000000000000-d: Counter( + 1.0, + ), + }, + }, + ), + "(100, Slot(1))", + ( + 110, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 170-00000000000000000000000000000000-e: Counter( + 1.0, + ), + }, + }, + ), + "(110, Slot(1))", + "(120, Slot(0))", + ( + 120, + Slot { + partition_key: 1, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 180-00000000000000000000000000000000-f: Counter( + 1.0, + ), + }, + }, + ), + ( + 130, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + }, + ), +] diff --git a/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-5.snap b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-5.snap new file mode 100644 index 0000000000..e55f5c49df --- /dev/null +++ b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush-5.snap @@ -0,0 +1,150 @@ +--- +source: relay-metrics/src/aggregator/inner.rs +expression: buckets +--- +[ + Total { + count: 6, + count_by_namespace: (unsupported:6), + cost: 822, + cost_by_namespace: (unsupported:822), + }, + ( + 80, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 1, + merges_by_namespace: (unsupported:1), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 80-00000000000000000000000000000000-b: Counter( + 3.0, + ), + }, + }, + ), + "(80, Slot(1))", + ( + 90, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 30-00000000000000000000000000000000-c: Counter( + 1.0, + ), + }, + }, + ), + "(90, Slot(1))", + ( + 100, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 40-00000000000000000000000000000000-d: Counter( + 1.0, + ), + }, + }, + ), + "(100, Slot(1))", + ( + 110, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 170-00000000000000000000000000000000-e: Counter( + 1.0, + ), + }, + }, + ), + "(110, Slot(1))", + "(120, Slot(0))", + ( + 120, + Slot { + partition_key: 1, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 180-00000000000000000000000000000000-f: Counter( + 1.0, + ), + }, + }, + ), + ( + 130, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + }, + ), + "(130, Slot(1))", +] diff --git a/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush.snap b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush.snap new file mode 100644 index 0000000000..8b9a2cf0b2 --- /dev/null +++ b/relay-metrics/src/aggregator/snapshots/relay_metrics__aggregator__inner__tests__merge_flush.snap @@ -0,0 +1,153 @@ +--- +source: relay-metrics/src/aggregator/inner.rs +expression: buckets +--- +[ + Total { + count: 7, + count_by_namespace: (unsupported:7), + cost: 959, + cost_by_namespace: (unsupported:959), + }, + ( + 70, + Slot { + partition_key: 0, + stats: Slot { + count: 2, + count_by_namespace: (unsupported:2), + merges: 0, + merges_by_namespace: (0), + cost: 274, + cost_by_namespace: (unsupported:274), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 274, + }, + }, + buckets: { + 70-00000000000000000000000000000000-a: Counter( + 1.0, + ), + 190-00000000000000000000000000000000-a: Counter( + 1.0, + ), + }, + }, + ), + "(70, Slot(1))", + ( + 80, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 1, + merges_by_namespace: (unsupported:1), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 80-00000000000000000000000000000000-b: Counter( + 3.0, + ), + }, + }, + ), + "(80, Slot(1))", + ( + 90, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 30-00000000000000000000000000000000-c: Counter( + 1.0, + ), + }, + }, + ), + "(90, Slot(1))", + ( + 100, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 40-00000000000000000000000000000000-d: Counter( + 1.0, + ), + }, + }, + ), + "(100, Slot(1))", + ( + 110, + Slot { + partition_key: 0, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 170-00000000000000000000000000000000-e: Counter( + 1.0, + ), + }, + }, + ), + "(110, Slot(1))", + "(120, Slot(0))", + ( + 120, + Slot { + partition_key: 1, + stats: Slot { + count: 1, + count_by_namespace: (unsupported:1), + merges: 0, + merges_by_namespace: (0), + cost: 137, + cost_by_namespace: (unsupported:137), + cost_by_project: { + ProjectKey("00000000000000000000000000000000"): 137, + }, + }, + buckets: { + 180-00000000000000000000000000000000-f: Counter( + 1.0, + ), + }, + }, + ), +] diff --git a/relay-metrics/src/aggregator/stats.rs b/relay-metrics/src/aggregator/stats.rs new file mode 100644 index 0000000000..b2b69a7163 --- /dev/null +++ b/relay-metrics/src/aggregator/stats.rs @@ -0,0 +1,163 @@ +use hashbrown::HashMap; +use relay_base_schema::metrics::MetricNamespace; +use relay_base_schema::project::ProjectKey; + +use crate::aggregator::AggregateMetricsError; +use crate::utils::ByNamespace; + +/// Total stats tracked for the aggregator. +#[derive(Default, Debug)] +pub struct Total { + /// Total amount of buckets in the aggregator. + pub count: u64, + /// Total amount of buckets in the aggregator by namespace. + pub count_by_namespace: ByNamespace, + + /// Total cost of buckets in the aggregator. + pub cost: u64, + /// Total cost of buckets in the aggregator by namespace. + pub cost_by_namespace: ByNamespace, +} + +impl Total { + pub fn remove_slot(&mut self, slot: &Slot) { + let Self { + count, + count_by_namespace, + cost, + cost_by_namespace, + } = self; + + *count -= slot.count; + *count_by_namespace -= slot.count_by_namespace; + *cost -= slot.cost; + *cost_by_namespace -= slot.cost_by_namespace; + } +} + +/// Stats tracked by slot in the aggregator. +#[derive(Default, Debug, PartialEq, Eq)] +pub struct Slot { + /// Amount of buckets created in this slot. + pub count: u64, + /// Amount of buckets created in this slot by namespace. + pub count_by_namespace: ByNamespace, + /// Amount of merges happened in this slot. + pub merges: u64, + /// Amount of merges happened in this slot by namespace. + pub merges_by_namespace: ByNamespace, + + /// Cost of buckets in this slot. + pub cost: u64, + /// Cost of buckets in this slot by namespace. + pub cost_by_namespace: ByNamespace, + /// Cost of buckets in this slot by project. + pub cost_by_project: HashMap, +} + +impl Slot { + /// Resets the slot to its initial empty state. + pub fn reset(&mut self) { + let Self { + count, + count_by_namespace, + merges, + merges_by_namespace, + cost, + cost_by_namespace, + cost_by_project, + } = self; + + *count = 0; + *count_by_namespace = Default::default(); + *merges = 0; + *merges_by_namespace = Default::default(); + + *cost = 0; + *cost_by_namespace = Default::default(); + // Keep the allocation around but at the same time make it possible for it to shrink. + cost_by_project.shrink_to_fit(); + cost_by_project.clear(); + } + + /// Increments the count by one. + pub fn incr_count(&mut self, total: &mut Total, namespace: MetricNamespace) { + self.count += 1; + *self.count_by_namespace.get_mut(namespace) += 1; + total.count += 1; + *total.count_by_namespace.get_mut(namespace) += 1; + } + + /// Increments the amount of merges in the slot by one. + pub fn incr_merges(&mut self, namespace: MetricNamespace) { + self.merges += 1; + *self.merges_by_namespace.get_mut(namespace) += 1; + } + + /// Tries to reserve a certain amount of cost. + /// + /// Returns an error if there is not enough budget left. + pub fn reserve<'a>( + &'a mut self, + total: &'a mut Total, + project_key: ProjectKey, + namespace: MetricNamespace, + cost: u64, + limits: &Limits, + ) -> Result, AggregateMetricsError> { + if total.cost + cost > limits.max_total { + return Err(AggregateMetricsError::TotalLimitExceeded); + } + let project = self.cost_by_project.entry(project_key).or_insert(0); + if *project + cost > limits.max_partition_project { + return Err(AggregateMetricsError::ProjectLimitExceeded); + } + + Ok(Reservation { + total, + cost_partition: &mut self.cost, + cost_partition_by_namespace: &mut self.cost_by_namespace, + cost_partition_project: project, + namespace, + reserved: cost, + }) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Limits { + pub max_total: u64, + pub max_partition_project: u64, +} + +#[must_use = "a reservation does nothing if it is not used"] +pub struct Reservation<'a> { + total: &'a mut Total, + + cost_partition: &'a mut u64, + cost_partition_by_namespace: &'a mut ByNamespace, + cost_partition_project: &'a mut u64, + + namespace: MetricNamespace, + + reserved: u64, +} + +impl Reservation<'_> { + pub fn consume(self) { + let reserved = self.reserved; + self.consume_with(reserved); + } + + pub fn consume_with(self, cost: u64) { + debug_assert!(cost <= self.reserved, "less reserved than used"); + // Update total costs. + self.total.cost += cost; + *self.total.cost_by_namespace.get_mut(self.namespace) += cost; + + // Update all partition costs. + *self.cost_partition += cost; + *self.cost_partition_by_namespace.get_mut(self.namespace) += cost; + *self.cost_partition_project += cost; + } +} diff --git a/relay-metrics/src/lib.rs b/relay-metrics/src/lib.rs index 4897553525..a6891087ac 100644 --- a/relay-metrics/src/lib.rs +++ b/relay-metrics/src/lib.rs @@ -75,6 +75,7 @@ mod bucket; mod finite; mod protocol; mod statsd; +mod utils; mod view; pub use bucket::*; diff --git a/relay-metrics/src/statsd.rs b/relay-metrics/src/statsd.rs index 2e4ac42535..7b0caf2779 100644 --- a/relay-metrics/src/statsd.rs +++ b/relay-metrics/src/statsd.rs @@ -1,29 +1,4 @@ -use relay_statsd::{CounterMetric, GaugeMetric, HistogramMetric, SetMetric, TimerMetric}; - -/// Set metrics for Relay Metrics. -pub enum MetricSets { - /// Count the number of unique buckets created. - /// - /// This is a set of bucketing keys. The metric is basically equivalent to - /// `metrics.buckets.merge.miss` for a single Relay, but could be useful to determine how much - /// duplicate buckets there are when multiple instances are running. - /// - /// The hashing is platform-dependent at the moment, so all your relays that send this metric - /// should run on the same CPU architecture, otherwise this metric is not reliable. - /// - /// This metric is tagged with: - /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - /// - `namespace`: The namespace of the metric for which the bucket was created. - UniqueBucketsCreated, -} - -impl SetMetric for MetricSets { - fn name(&self) -> &'static str { - match *self { - Self::UniqueBucketsCreated => "metrics.buckets.created.unique", - } - } -} +use relay_statsd::{CounterMetric, GaugeMetric}; /// Counter metrics for Relay Metrics. pub enum MetricCounters { @@ -33,58 +8,32 @@ pub enum MetricCounters { /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). /// - `namespace`: The namespace of the metric. MergeHit, - /// Incremented every time a bucket is created. /// /// This metric is tagged with: /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). /// - `namespace`: The namespace of the metric. MergeMiss, -} - -impl CounterMetric for MetricCounters { - fn name(&self) -> &'static str { - match *self { - Self::MergeHit => "metrics.buckets.merge.hit", - Self::MergeMiss => "metrics.buckets.merge.miss", - } - } -} - -/// Timer metrics for Relay Metrics. -pub enum MetricTimers { - /// Time in milliseconds spent scanning metric buckets to flush. - /// - /// Relay scans metric buckets in regular intervals and flushes expired buckets. This timer - /// shows the time it takes to perform this scan and remove the buckets from the internal cache. - /// Sending the metric buckets to upstream is outside of this timer. + /// Incremented for every aggregator flush. /// /// This metric is tagged with: /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - BucketsScanDuration, -} - -impl TimerMetric for MetricTimers { - fn name(&self) -> &'static str { - match *self { - Self::BucketsScanDuration => "metrics.buckets.scan_duration", - } - } -} - -/// Histogram metrics for Relay Metrics. -#[allow(clippy::enum_variant_names)] -pub enum MetricHistograms { - /// Distribution of invalid bucket timestamps observed, relative to the time of observation. + FlushCount, + /// Incremented with the cost of a partition for every aggregator flush. /// - /// This is a temporary metric to better understand why we see so many invalid timestamp errors. - InvalidBucketTimestamp, + /// This metric is tagged with: + /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). + /// - `namespace`: The namespace of the metric. + FlushCost, } -impl HistogramMetric for MetricHistograms { +impl CounterMetric for MetricCounters { fn name(&self) -> &'static str { match *self { - Self::InvalidBucketTimestamp => "metrics.buckets.invalid_timestamp", + Self::MergeHit => "metrics.buckets.merge.hit", + Self::MergeMiss => "metrics.buckets.merge.miss", + Self::FlushCount => "metrics.buckets.flush.count", + Self::FlushCost => "metrics.buckets.flush.cost", } } } @@ -95,18 +44,14 @@ pub enum MetricGauges { /// /// This metric is tagged with: /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). + /// - `namespace`: The namespace of the metric. Buckets, /// The total storage cost of metric buckets in Relay's metrics aggregator. /// /// This metric is tagged with: /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - BucketsCost, - /// The average number of elements in a bucket when flushed. - /// - /// This metric is tagged with: - /// - `metric_type`: "c", "d", "g" or "s". /// - `namespace`: The namespace of the metric. - AvgBucketSize, + BucketsCost, } impl GaugeMetric for MetricGauges { @@ -114,7 +59,6 @@ impl GaugeMetric for MetricGauges { match *self { Self::Buckets => "metrics.buckets", Self::BucketsCost => "metrics.buckets.cost", - Self::AvgBucketSize => "metrics.buckets.size", } } } diff --git a/relay-metrics/src/utils.rs b/relay-metrics/src/utils.rs new file mode 100644 index 0000000000..a167c03bd9 --- /dev/null +++ b/relay-metrics/src/utils.rs @@ -0,0 +1,140 @@ +use core::fmt; +use std::collections::BTreeMap; +use std::ops::{AddAssign, SubAssign}; + +use relay_base_schema::metrics::MetricNamespace; + +/// Estimates the number of bytes needed to encode the tags. +/// +/// Note that this does not necessarily match the exact memory footprint of the tags, +/// because data structures or their serialization have overheads. +pub fn tags_cost(tags: &BTreeMap) -> usize { + tags.iter().map(|(k, v)| k.len() + v.len()).sum() +} + +#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ByNamespace { + pub sessions: T, + pub transactions: T, + pub spans: T, + pub custom: T, + pub stats: T, + pub unsupported: T, +} + +impl ByNamespace { + pub fn get(&self, namespace: MetricNamespace) -> &T { + match namespace { + MetricNamespace::Sessions => &self.sessions, + MetricNamespace::Transactions => &self.transactions, + MetricNamespace::Spans => &self.spans, + MetricNamespace::Custom => &self.custom, + MetricNamespace::Stats => &self.stats, + MetricNamespace::Unsupported => &self.unsupported, + } + } + + pub fn get_mut(&mut self, namespace: MetricNamespace) -> &mut T { + match namespace { + MetricNamespace::Sessions => &mut self.sessions, + MetricNamespace::Transactions => &mut self.transactions, + MetricNamespace::Spans => &mut self.spans, + MetricNamespace::Custom => &mut self.custom, + MetricNamespace::Stats => &mut self.stats, + MetricNamespace::Unsupported => &mut self.unsupported, + } + } +} + +impl fmt::Debug for ByNamespace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // A more compact representation. Mainly for snapshot testing. + write!(f, "(")?; + + let mut values = MetricNamespace::all() + .into_iter() + .map(|ns| (ns, self.get(ns))) + .filter(|(_, v)| v != &&T::default()) + .enumerate() + .peekable(); + + match values.peek() { + None => write!(f, "{:?}", T::default())?, + Some(_) => { + for (i, (namespace, value)) in values { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{namespace}:{value:?}")?; + } + } + } + + write!(f, ")") + } +} + +macro_rules! impl_op { + ($op:tt, $opfn:ident) => { + impl $op for ByNamespace { + fn $opfn(&mut self, rhs: Self) { + let Self { + sessions, + transactions, + spans, + custom, + stats, + unsupported, + } = self; + + $op::$opfn(sessions, rhs.sessions); + $op::$opfn(transactions, rhs.transactions); + $op::$opfn(spans, rhs.spans); + $op::$opfn(custom, rhs.custom); + $op::$opfn(stats, rhs.stats); + $op::$opfn(unsupported, rhs.unsupported); + } + } + }; +} + +impl_op!(AddAssign, add_assign); +impl_op!(SubAssign, sub_assign); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get() { + let mut v = ByNamespace::::default(); + + for (i, namespace) in MetricNamespace::all().into_iter().enumerate() { + assert_eq!(*v.get(namespace), 0); + assert_eq!(*v.get_mut(namespace), 0); + *v.get_mut(namespace) += i; + assert_eq!(*v.get(namespace), i); + } + + // Make sure a getter does not override another value by iterating over all values again. + for (i, namespace) in MetricNamespace::all().into_iter().enumerate() { + assert_eq!(*v.get(namespace), i); + assert_eq!(*v.get_mut(namespace), i); + } + } + + #[test] + fn test_add_sub() { + let mut v = ByNamespace::::default(); + for (i, namespace) in MetricNamespace::all().into_iter().enumerate() { + *v.get_mut(namespace) += i; + } + + let mut v2 = v; + v2 -= v; + assert_eq!(v2, Default::default()); + + v2 += v; + assert_eq!(v2, v); + } +} diff --git a/relay-metrics/src/view.rs b/relay-metrics/src/view.rs index 86862c16aa..5eab80a09e 100644 --- a/relay-metrics/src/view.rs +++ b/relay-metrics/src/view.rs @@ -3,8 +3,7 @@ use serde::ser::{SerializeMap, SerializeSeq}; use serde::Serialize; use crate::{ - aggregator, BucketMetadata, CounterType, DistributionType, GaugeValue, MetricName, SetType, - SetValue, + BucketMetadata, CounterType, DistributionType, GaugeValue, MetricName, SetType, SetValue, }; use relay_base_schema::metrics::MetricType; use std::collections::BTreeMap; @@ -518,7 +517,7 @@ impl<'a> BucketView<'a> { /// Note that this does not match the exact size of the serialized payload. Instead, the size is /// approximated through tags and a static overhead. fn estimated_base_size(&self) -> usize { - BUCKET_SIZE + self.name().len() + aggregator::tags_cost(self.tags()) + BUCKET_SIZE + self.name().len() + crate::utils::tags_cost(self.tags()) } /// Estimates the number of bytes needed to serialize the bucket. diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index 9cebca8ef2..d7115ac31a 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -38,6 +38,7 @@ axum-server = { workspace = true } arc-swap = { workspace = true } backoff = { workspace = true } brotli = { workspace = true } +bytecount = { workspace = true } bytes = { workspace = true, features = ["serde"] } bzip2 = { workspace = true } chrono = { workspace = true, features = ["clock"] } diff --git a/relay-server/src/metrics/metric_stats.rs b/relay-server/src/metrics/metric_stats.rs index 4e1aa6aafb..8ebc389852 100644 --- a/relay-server/src/metrics/metric_stats.rs +++ b/relay-server/src/metrics/metric_stats.rs @@ -279,9 +279,7 @@ mod tests { drop(ms); - let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap() else { - panic!(); - }; + let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap(); assert_eq!(mb.project_key, scoping.project_key); assert_eq!(mb.buckets.len(), 1); @@ -299,9 +297,7 @@ mod tests { ) ); - let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap() else { - panic!(); - }; + let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap(); assert_eq!(mb.project_key, scoping.project_key); assert_eq!(mb.buckets.len(), 1); @@ -354,9 +350,7 @@ mod tests { drop(ms); - let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap() else { - panic!(); - }; + let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap(); assert_eq!(mb.project_key, scoping.project_key); assert_eq!(mb.buckets.len(), 1); @@ -420,9 +414,7 @@ mod tests { drop(ms); - let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap() else { - panic!(); - }; + let Aggregator::MergeBuckets(mut mb) = receiver.blocking_recv().unwrap(); assert_eq!(mb.project_key, scoping.project_key); assert_eq!(mb.buckets.len(), 1); diff --git a/relay-server/src/services/metrics/aggregator.rs b/relay-server/src/services/metrics/aggregator.rs index efc960609d..9803414e4a 100644 --- a/relay-server/src/services/metrics/aggregator.rs +++ b/relay-server/src/services/metrics/aggregator.rs @@ -1,18 +1,21 @@ +use std::pin::Pin; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, SystemTime}; +use hashbrown::hash_map::Entry; use hashbrown::HashMap; use relay_base_schema::project::ProjectKey; use relay_config::AggregatorServiceConfig; -use relay_metrics::aggregator::{self, AggregateMetricsError, FlushDecision}; -use relay_metrics::{Bucket, UnixTimestamp}; +use relay_metrics::aggregator::{self, AggregateMetricsError, AggregatorConfig, Partition}; +use relay_metrics::Bucket; use relay_quotas::{RateLimits, Scoping}; -use relay_system::{Controller, FromMessage, Interface, NoResponse, Recipient, Service, Shutdown}; +use relay_system::{Controller, FromMessage, Interface, NoResponse, Recipient, Service}; +use tokio::time::Sleep; use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::{ProjectInfo, ProjectState}; -use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; +use crate::statsd::{RelayCounters, RelayTimers}; /// Aggregator for metric buckets. /// @@ -28,10 +31,6 @@ use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; pub enum Aggregator { /// Merge the buckets. MergeBuckets(MergeBuckets), - - /// Message is used only for tests to get the current number of buckets in `AggregatorService`. - #[cfg(test)] - BucketCountInquiry(BucketCountInquiry, relay_system::Sender), } impl Aggregator { @@ -39,8 +38,6 @@ impl Aggregator { pub fn variant(&self) -> &'static str { match self { Aggregator::MergeBuckets(_) => "MergeBuckets", - #[cfg(test)] - Aggregator::BucketCountInquiry(_, _) => "BucketCountInquiry", } } } @@ -54,19 +51,6 @@ impl FromMessage for Aggregator { } } -#[cfg(test)] -impl FromMessage for Aggregator { - type Response = relay_system::AsyncResponse; - fn from_message(message: BucketCountInquiry, sender: relay_system::Sender) -> Self { - Self::BucketCountInquiry(message, sender) - } -} - -/// Used only for testing the `AggregatorService`. -#[cfg(test)] -#[derive(Debug)] -pub struct BucketCountInquiry; - /// A message containing a vector of buckets to be flushed. /// /// Handlers must respond to this message with a `Result`: @@ -76,9 +60,7 @@ pub struct BucketCountInquiry; #[derive(Clone, Debug)] pub struct FlushBuckets { /// The partition to which the buckets belong. - /// - /// When set to `Some` it means that partitioning was enabled in the [`Aggregator`]. - pub partition_key: Option, + pub partition_key: u32, /// The buckets to be flushed. pub buckets: HashMap, } @@ -102,19 +84,14 @@ impl Extend for ProjectBuckets { } } -enum AggregatorState { - Running, - ShuttingDown, -} - /// Service implementing the [`Aggregator`] interface. pub struct AggregatorService { aggregator: aggregator::Aggregator, - state: AggregatorState, receiver: Option>, project_cache: ProjectCacheHandle, - flush_interval_ms: u64, + config: AggregatorServiceConfig, can_accept_metrics: Arc, + next_flush: Pin>, } impl AggregatorService { @@ -137,14 +114,14 @@ impl AggregatorService { receiver: Option>, project_cache: ProjectCacheHandle, ) -> Self { - let aggregator = aggregator::Aggregator::named(name, config.aggregator); + let aggregator = aggregator::Aggregator::named(name, &config.aggregator); Self { receiver, - state: AggregatorState::Running, - flush_interval_ms: config.flush_interval_ms, - can_accept_metrics: Arc::new(AtomicBool::new(!aggregator.totals_cost_exceeded())), + config, + can_accept_metrics: Arc::new(AtomicBool::new(true)), aggregator, project_cache, + next_flush: Box::pin(tokio::time::sleep(Duration::from_secs(0))), } } @@ -158,90 +135,100 @@ impl AggregatorService { /// to the receiver to send the [`MergeBuckets`] message back if buckets could not be flushed /// and we require another re-try. /// - /// If `force` is true, flush all buckets unconditionally and do not attempt to merge back. - fn try_flush(&mut self) { - let force_flush = matches!(&self.state, AggregatorState::ShuttingDown); - - let flush_decision = |project_key| { - let project = self.project_cache.get(project_key); - - let project_info = match project.state() { - ProjectState::Enabled(project_info) => project_info, - ProjectState::Disabled => return FlushDecision::Drop, - // Delay the flush, project is not yet ready. - // - // Querying the project will make sure it is eventually fetched. - ProjectState::Pending => { - relay_statsd::metric!( - counter(RelayCounters::ProjectStateFlushMetricsNoProject) += 1 - ); - return FlushDecision::Delay; - } - }; - - let Some(scoping) = project_info.scoping(project_key) else { - // This should never happen, at this point we should always have a valid - // project with the necessary information to construct a scoping. - // - // Ideally we enforce this through the type system in the future. - relay_log::error!( - tags.project_key = project_key.as_str(), - "dropping buckets because of missing scope", - ); - return FlushDecision::Drop; - }; + /// Returns when the next flush should be attempted. + fn try_flush(&mut self) -> Duration { + if cfg!(test) { + // Tests are running in a single thread / current thread runtime, + // which is required for 'fast-forwarding' and `block_in_place` + // requires a multi threaded runtime. Relay always requires a multi + // threaded runtime. + self.do_try_flush() + } else { + tokio::task::block_in_place(|| self.do_try_flush()) + } + } - FlushDecision::Flush(ProjectBuckets { - scoping, - project_info: Arc::clone(project_info), - rate_limits: project.rate_limits().current_limits(), - buckets: Vec::new(), - }) + fn do_try_flush(&mut self) -> Duration { + let partition = match self.aggregator.try_flush_next(SystemTime::now()) { + Ok(partition) => partition, + Err(duration) => return duration, }; + self.can_accept_metrics.store(true, Ordering::Relaxed); - let partitions = self - .aggregator - .pop_flush_buckets(force_flush, flush_decision); + self.flush_partition(partition); - self.can_accept_metrics - .store(!self.aggregator.totals_cost_exceeded(), Ordering::Relaxed); + self.aggregator.next_flush(SystemTime::now()) + } - if partitions.is_empty() { + fn flush_partition(&mut self, partition: Partition) { + let Some(receiver) = &self.receiver else { return; - } - - let partitions_count = partitions.len() as u64; - relay_log::trace!("flushing {} partitions to receiver", partitions_count); - relay_statsd::metric!( - histogram(RelayHistograms::PartitionsFlushed) = partitions_count, - aggregator = self.aggregator.name(), - ); + }; - let mut total_bucket_count = 0u64; - for buckets_by_project in partitions.values() { - for buckets in buckets_by_project.values() { - let bucket_count = buckets.buckets.len() as u64; - total_bucket_count += bucket_count; + let mut buckets_by_project = hashbrown::HashMap::new(); + + let partition_key = partition.partition_key; + for (project_key, bucket) in partition { + let s = match buckets_by_project.entry(project_key) { + Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), + Entry::Vacant(vacant_entry) => { + let project = self.project_cache.get(project_key); + + let project_info = match project.state() { + ProjectState::Enabled(info) => Arc::clone(info), + ProjectState::Disabled => continue, // Drop the bucket. + ProjectState::Pending => { + // Return to the aggregator, which will assign a new flush time. + if let Err(error) = self.aggregator.merge(project_key, bucket) { + relay_log::error!( + tags.aggregator = self.aggregator.name(), + tags.project_key = project_key.as_str(), + bucket.error = &error as &dyn std::error::Error, + "failed to return metric bucket back to the aggregator" + ); + } + relay_statsd::metric!( + counter(RelayCounters::ProjectStateFlushMetricsNoProject) += 1 + ); + continue; + } + }; + + let rate_limits = project.rate_limits().current_limits(); + let Some(scoping) = project_info.scoping(project_key) else { + // This should never happen, at this point we should always have a valid + // project with the necessary information to construct a scoping. + // + // Ideally we enforce this through the type system in the future. + relay_log::error!( + tags.project_key = project_key.as_str(), + "dropping buckets because of missing scope", + ); + continue; + }; + + vacant_entry.insert(ProjectBuckets { + buckets: Vec::new(), + scoping, + project_info, + rate_limits, + }) + } + }; - relay_statsd::metric!( - histogram(RelayHistograms::BucketsFlushedPerProject) = bucket_count, - aggregator = self.aggregator.name(), - ); - } + s.buckets.push(bucket); } - relay_statsd::metric!( - histogram(RelayHistograms::BucketsFlushed) = total_bucket_count, - aggregator = self.aggregator.name(), - ); + if !buckets_by_project.is_empty() { + relay_log::debug!( + "flushing buckets for {} projects in partition {partition_key}", + buckets_by_project.len() + ); - if let Some(ref receiver) = self.receiver { - for (partition_key, buckets_by_project) in partitions { - receiver.send(FlushBuckets { - partition_key, - buckets: buckets_by_project, - }) - } + receiver.send(FlushBuckets { + partition_key, + buckets: buckets_by_project, + }); } } @@ -251,10 +238,13 @@ impl AggregatorService { buckets, } = msg; - let now = UnixTimestamp::now(); - for bucket in buckets.into_iter() { - match self.aggregator.merge_with_options(project_key, bucket, now) { - // Ignore invalid timestamp errors. + for mut bucket in buckets.into_iter() { + if !validate_bucket(&mut bucket, &self.config) { + continue; + }; + + match self.aggregator.merge(project_key, bucket) { + // Ignore invalid timestamp errors and drop the bucket. Err(AggregateMetricsError::InvalidTimestamp(_)) => {} Err(AggregateMetricsError::TotalLimitExceeded) => { relay_log::error!( @@ -288,17 +278,39 @@ impl AggregatorService { fn handle_message(&mut self, message: Aggregator) { match message { Aggregator::MergeBuckets(msg) => self.handle_merge_buckets(msg), - #[cfg(test)] - Aggregator::BucketCountInquiry(_, sender) => { - sender.send(self.aggregator.bucket_count()) - } } } - fn handle_shutdown(&mut self, message: Shutdown) { - if message.timeout.is_some() { - self.state = AggregatorState::ShuttingDown; + fn handle_shutdown(&mut self) { + relay_log::info!( + "Shutting down metrics aggregator {}", + self.aggregator.name() + ); + + // Create a new aggregator with very aggressive flush parameters. + let aggregator = aggregator::Aggregator::named( + self.aggregator.name().to_owned(), + &AggregatorConfig { + bucket_interval: 1, + aggregator_size: 1, + initial_delay: 0, + ..self.config.aggregator + }, + ); + + let previous = std::mem::replace(&mut self.aggregator, aggregator); + + let mut partitions = 0; + for partition in previous.into_partitions() { + self.flush_partition(partition); + partitions += 1; } + relay_log::debug!("Force flushed {partitions} partitions"); + + // Reset the next flush time, to the time of the new aggregator. + self.next_flush + .as_mut() + .reset(tokio::time::Instant::now() + self.aggregator.next_flush(SystemTime::now())); } } @@ -306,8 +318,6 @@ impl Service for AggregatorService { type Interface = Aggregator; async fn run(mut self, mut rx: relay_system::Receiver) { - let mut ticker = tokio::time::interval(Duration::from_millis(self.flush_interval_ms)); - ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); let mut shutdown = Controller::shutdown_handle(); macro_rules! timed { @@ -322,26 +332,18 @@ impl Service for AggregatorService { }}; } - // Note that currently this loop never exits and will run till the tokio runtime shuts - // down. This is about to change with the refactoring for the shutdown process. loop { tokio::select! { biased; - _ = ticker.tick() => timed!( - "try_flush", - if cfg!(test) { - // Tests are running in a single thread / current thread runtime, - // which is required for 'fast-forwarding' and `block_in_place` - // requires a multi threaded runtime. Relay always requires a multi - // threaded runtime. - self.try_flush() - } else { - tokio::task::block_in_place(|| self.try_flush()) + _ = &mut self.next_flush => timed!( + "try_flush", { + let next = self.try_flush(); + self.next_flush.as_mut().reset(tokio::time::Instant::now() + next); } ), Some(message) = rx.recv() => timed!(message.variant(), self.handle_message(message)), - shutdown = shutdown.notified() => timed!("shutdown", self.handle_shutdown(shutdown)), + _ = shutdown.notified() => timed!("shutdown", self.handle_shutdown()), else => break, } @@ -351,14 +353,13 @@ impl Service for AggregatorService { impl Drop for AggregatorService { fn drop(&mut self) { - let remaining_buckets = self.aggregator.bucket_count(); - if remaining_buckets > 0 { + if !self.aggregator.is_empty() { relay_log::error!( tags.aggregator = self.aggregator.name(), - "metrics aggregator dropping {remaining_buckets} buckets" + "metrics aggregator dropping buckets" ); relay_statsd::metric!( - counter(RelayCounters::BucketsDropped) += remaining_buckets as i64, + counter(RelayCounters::BucketsDropped) += 1, aggregator = self.aggregator.name(), ); } @@ -395,6 +396,35 @@ impl AggregatorHandle { } } +/// Validates the metric name and its tags are correct. +/// +/// Returns `false` if the metric should be dropped. +fn validate_bucket(bucket: &mut Bucket, config: &AggregatorServiceConfig) -> bool { + if bucket.name.len() > config.max_name_length { + relay_log::debug!( + "Invalid metric name, too long (> {}): {:?}", + config.max_name_length, + bucket.name + ); + return false; + } + + bucket.tags.retain(|tag_key, tag_value| { + if tag_key.len() > config.max_tag_key_length { + relay_log::debug!("Invalid metric tag key {tag_key:?}"); + return false; + } + if bytecount::num_chars(tag_value.as_bytes()) > config.max_tag_value_length { + relay_log::debug!("Invalid metric tag value"); + return false; + } + + true + }); + + true +} + #[cfg(test)] mod tests { use std::collections::BTreeMap; @@ -504,10 +534,8 @@ mod tests { aggregator.send(MergeBuckets::new(project_key, vec![bucket])); - let buckets_count = aggregator.send(BucketCountInquiry).await.unwrap(); - // Let's check the number of buckets in the aggregator just after sending a - // message. - assert_eq!(buckets_count, 1); + // Nothing flushed. + assert_eq!(receiver.bucket_count(), 0); // Wait until flush delay has passed. It is up to 2s: 1s for the current bucket // and 1s for the flush shift. Adding 100ms buffer. @@ -515,4 +543,71 @@ mod tests { // receiver must have 1 bucket flushed assert_eq!(receiver.bucket_count(), 1); } + + // #[test] + // fn test_validate_bucket_key_str_length() { + // relay_test::setup(); + // let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + // + // let short_metric = BucketKey { + // project_key, + // timestamp: UnixTimestamp::now(), + // metric_name: "c:transactions/a_short_metric".into(), + // tags: BTreeMap::new(), + // extracted_from_indexed: false, + // }; + // assert!(validate_bucket_key(short_metric, &test_config()).is_ok()); + // + // let long_metric = BucketKey { + // project_key, + // timestamp: UnixTimestamp::now(), + // metric_name: "c:transactions/long_name_a_very_long_name_its_super_long_really_but_like_super_long_probably_the_longest_name_youve_seen_and_even_the_longest_name_ever_its_extremly_long_i_cant_tell_how_long_it_is_because_i_dont_have_that_many_fingers_thus_i_cant_count_the_many_characters_this_long_name_is".into(), + // tags: BTreeMap::new(), + // extracted_from_indexed: false, + // }; + // let validation = validate_bucket_key(long_metric, &test_config()); + // + // assert!(matches!( + // validation.unwrap_err(), + // AggregateMetricsError::InvalidStringLength(_), + // )); + // + // let short_metric_long_tag_key = BucketKey { + // project_key, + // timestamp: UnixTimestamp::now(), + // metric_name: "c:transactions/a_short_metric_with_long_tag_key".into(), + // tags: BTreeMap::from([("i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into(), "tag_value".into())]), + // extracted_from_indexed: false, + // }; + // let validation = validate_bucket_key(short_metric_long_tag_key, &test_config()).unwrap(); + // assert_eq!(validation.tags.len(), 0); + // + // let short_metric_long_tag_value = BucketKey { + // project_key, + // timestamp: UnixTimestamp::now(), + // metric_name: "c:transactions/a_short_metric_with_long_tag_value".into(), + // tags: BTreeMap::from([("tag_key".into(), "i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into())]), + // extracted_from_indexed: false, + // }; + // let validation = validate_bucket_key(short_metric_long_tag_value, &test_config()).unwrap(); + // assert_eq!(validation.tags.len(), 0); + // } + // + // #[test] + // fn test_validate_tag_values_special_chars() { + // relay_test::setup(); + // let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + // + // let tag_value = "x".repeat(199) + "ø"; + // assert_eq!(tag_value.chars().count(), 200); // Should be allowed + // let short_metric = BucketKey { + // project_key, + // timestamp: UnixTimestamp::now(), + // metric_name: "c:transactions/a_short_metric".into(), + // tags: BTreeMap::from([("foo".into(), tag_value.clone())]), + // extracted_from_indexed: false, + // }; + // let validated_bucket = validate_metric_tags(short_metric, &test_config()); + // assert_eq!(validated_bucket.tags["foo"], tag_value); + // } } diff --git a/relay-server/src/services/metrics/router.rs b/relay-server/src/services/metrics/router.rs index 0e98a19d3d..8ae58f172d 100644 --- a/relay-server/src/services/metrics/router.rs +++ b/relay-server/src/services/metrics/router.rs @@ -113,8 +113,6 @@ impl StartedRouter { { match message { Aggregator::MergeBuckets(msg) => self.handle_merge_buckets(msg), - #[cfg(test)] - Aggregator::BucketCountInquiry(_, _sender) => (), // not supported } } ) diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 2c0bb40eaf..5690042cf3 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1362,7 +1362,6 @@ impl EnvelopeProcessorService { self.inner .config .aggregator_config_for(MetricNamespace::Spans) - .aggregator .max_tag_value_length, extract_spans, ); @@ -1443,9 +1442,7 @@ impl EnvelopeProcessorService { received_at: Some(state.managed_envelope.received_at()), max_secs_in_past: Some(retention_days * 24 * 3600), max_secs_in_future: Some(self.inner.config.max_secs_in_future()), - transaction_timestamp_range: Some( - transaction_aggregator_config.aggregator.timestamp_range(), - ), + transaction_timestamp_range: Some(transaction_aggregator_config.timestamp_range()), is_validated: false, }; @@ -1477,7 +1474,6 @@ impl EnvelopeProcessorService { }, max_name_and_unit_len: Some( transaction_aggregator_config - .aggregator .max_name_length .saturating_sub(MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD), ), @@ -1495,7 +1491,6 @@ impl EnvelopeProcessorService { .inner .config .aggregator_config_for(MetricNamespace::Spans) - .aggregator .max_tag_value_length, is_renormalize: false, remove_other: full_normalization, @@ -2693,9 +2688,9 @@ impl EnvelopeProcessorService { { let dsn = PartialDsn::outbound(scoping, upstream); - if let Some(key) = partition_key { - relay_statsd::metric!(histogram(RelayHistograms::PartitionKeys) = key); - } + relay_statsd::metric!( + histogram(RelayHistograms::PartitionKeys) = u64::from(partition_key) + ); let mut num_batches = 0; for batch in BucketsView::from(buckets).by_size(batch_size) { @@ -2712,7 +2707,9 @@ impl EnvelopeProcessorService { self.inner.addrs.test_store.clone(), ProcessingGroup::Metrics, ); - envelope.set_partition_key(partition_key).scope(*scoping); + envelope + .set_partition_key(Some(partition_key)) + .scope(*scoping); relay_statsd::metric!( histogram(RelayHistograms::BucketsPerBatch) = batch.len() as u64 @@ -2729,7 +2726,7 @@ impl EnvelopeProcessorService { } /// Creates a [`SendMetricsRequest`] and sends it to the upstream relay. - fn send_global_partition(&self, partition_key: Option, partition: &mut Partition<'_>) { + fn send_global_partition(&self, partition_key: u32, partition: &mut Partition<'_>) { if partition.is_empty() { return; } @@ -2746,7 +2743,7 @@ impl EnvelopeProcessorService { }; let request = SendMetricsRequest { - partition_key: partition_key.map(|k| k.to_string()), + partition_key: partition_key.to_string(), unencoded, encoded, project_info, @@ -3171,7 +3168,7 @@ impl<'a> Partition<'a> { #[derive(Debug)] struct SendMetricsRequest { /// If the partition key is set, the request is marked with `X-Sentry-Relay-Shard`. - partition_key: Option, + partition_key: String, /// Serialized metric buckets without encoding applied, used for signing. unencoded: Bytes, /// Serialized metric buckets with the stated HTTP encoding applied. @@ -3245,7 +3242,7 @@ impl UpstreamRequest for SendMetricsRequest { builder .content_encoding(self.http_encoding) - .header_opt("X-Sentry-Relay-Shard", self.partition_key.as_ref()) + .header("X-Sentry-Relay-Shard", self.partition_key.as_bytes()) .header(header::CONTENT_TYPE, b"application/json") .body(self.encoded.clone()); @@ -3447,7 +3444,7 @@ mod tests { ]); FlushBuckets { - partition_key: None, + partition_key: 0, buckets, } }; @@ -3831,10 +3828,7 @@ mod tests { }; processor.handle_process_metrics(&mut token, message); - let value = aggregator_rx.recv().await.unwrap(); - let Aggregator::MergeBuckets(merge_buckets) = value else { - panic!() - }; + let Aggregator::MergeBuckets(merge_buckets) = aggregator_rx.recv().await.unwrap(); let buckets = merge_buckets.buckets; assert_eq!(buckets.len(), 1); assert_eq!(buckets[0].metadata.received_at, expected_received_at); @@ -3895,14 +3889,8 @@ mod tests { }; processor.handle_process_batched_metrics(&mut token, message); - let value = aggregator_rx.recv().await.unwrap(); - let Aggregator::MergeBuckets(mb1) = value else { - panic!() - }; - let value = aggregator_rx.recv().await.unwrap(); - let Aggregator::MergeBuckets(mb2) = value else { - panic!() - }; + let Aggregator::MergeBuckets(mb1) = aggregator_rx.recv().await.unwrap(); + let Aggregator::MergeBuckets(mb2) = aggregator_rx.recv().await.unwrap(); let mut messages = vec![mb1, mb2]; messages.sort_by_key(|mb| mb.project_key); diff --git a/relay-server/src/services/processor/metrics.rs b/relay-server/src/services/processor/metrics.rs index 4a0b025aaf..f793a667b1 100644 --- a/relay-server/src/services/processor/metrics.rs +++ b/relay-server/src/services/processor/metrics.rs @@ -197,9 +197,7 @@ mod tests { // We assert that one metric is emitted by metric stats. let value = metric_stats_rx.blocking_recv().unwrap(); - let Aggregator::MergeBuckets(merge_buckets) = value else { - panic!(); - }; + let Aggregator::MergeBuckets(merge_buckets) = value; assert_eq!(merge_buckets.buckets.len(), 1); assert_eq!( merge_buckets.buckets[0].tags.get("mri").unwrap().as_str(), @@ -234,9 +232,7 @@ mod tests { // We assert that two metrics are emitted by metric stats. for _ in 0..2 { let value = metric_stats_rx.blocking_recv().unwrap(); - let Aggregator::MergeBuckets(merge_buckets) = value else { - panic!(); - }; + let Aggregator::MergeBuckets(merge_buckets) = value; assert_eq!(merge_buckets.buckets.len(), 1); let BucketValue::Counter(value) = merge_buckets.buckets[0].value else { panic!(); diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index db78c2ef52..8ad522d3c5 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -359,7 +359,6 @@ pub fn extract_from_event( state .config .aggregator_config_for(MetricNamespace::Spans) - .aggregator .max_tag_value_length, &[], ) else { @@ -460,8 +459,8 @@ impl<'a> NormalizeSpanConfig<'a> { Self { received_at: managed_envelope.received_at(), - timestamp_range: aggregator_config.aggregator.timestamp_range(), - max_tag_value_size: aggregator_config.aggregator.max_tag_value_length, + timestamp_range: aggregator_config.timestamp_range(), + max_tag_value_size: aggregator_config.max_tag_value_length, performance_score: project_config.performance_score.as_ref(), measurements: Some(CombinedMeasurementsConfig::new( project_config.measurements.as_ref(), @@ -472,7 +471,6 @@ impl<'a> NormalizeSpanConfig<'a> { ErrorBoundary::Ok(costs) => Some(costs), }, max_name_and_unit_len: aggregator_config - .aggregator .max_name_length .saturating_sub(MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD), diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 63e1a4634b..6bf5f7a9d1 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -276,25 +276,6 @@ pub enum RelayHistograms { PartitionKeys, /// Measures how many splits were performed when sending out a partition. PartitionSplits, - /// The total number of metric buckets flushed in a cycle across all projects. - /// - /// This metric is tagged with: - /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - BucketsFlushed, - /// The number of metric buckets flushed in a cycle for each project. - /// - /// Relay scans metric buckets in regular intervals and flushes expired buckets. This histogram - /// is logged for each project that is being flushed. The count of the histogram values is - /// equivalent to the number of projects being flushed. - /// - /// This metric is tagged with: - /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - BucketsFlushedPerProject, - /// The number of metric partitions flushed in a cycle. - /// - /// This metric is tagged with: - /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). - PartitionsFlushed, } impl HistogramMetric for RelayHistograms { @@ -331,9 +312,6 @@ impl HistogramMetric for RelayHistograms { RelayHistograms::UpstreamMetricsBodySize => "upstream.metrics.body_size", RelayHistograms::PartitionKeys => "metrics.buckets.partition_keys", RelayHistograms::PartitionSplits => "partition_splits", - RelayHistograms::BucketsFlushed => "metrics.buckets.flushed", - RelayHistograms::BucketsFlushedPerProject => "metrics.buckets.flushed_per_project", - RelayHistograms::PartitionsFlushed => "metrics.partitions.flushed", } } } diff --git a/relay-server/src/utils/managed_envelope.rs b/relay-server/src/utils/managed_envelope.rs index de1c6832dc..ad9c623464 100644 --- a/relay-server/src/utils/managed_envelope.rs +++ b/relay-server/src/utils/managed_envelope.rs @@ -61,7 +61,7 @@ pub enum ItemAction { struct EnvelopeContext { summary: EnvelopeSummary, scoping: Scoping, - partition_key: Option, + partition_key: Option, done: bool, group: ProcessingGroup, } @@ -463,11 +463,11 @@ impl ManagedEnvelope { self.context.scoping } - pub fn partition_key(&self) -> Option { + pub fn partition_key(&self) -> Option { self.context.partition_key } - pub fn set_partition_key(&mut self, partition_key: Option) -> &mut Self { + pub fn set_partition_key(&mut self, partition_key: Option) -> &mut Self { self.context.partition_key = partition_key; self } diff --git a/tests/integration/test_healthchecks.py b/tests/integration/test_healthchecks.py index 2e02e9173b..84ce0376cf 100644 --- a/tests/integration/test_healthchecks.py +++ b/tests/integration/test_healthchecks.py @@ -121,20 +121,6 @@ def test_readiness_not_enough_memory_percent(mini_sentry, relay): assert_not_enough_memory(relay, mini_sentry, ">= 1.00%") -def test_readiness_depends_on_aggregator_being_full(mini_sentry, relay): - relay = relay( - mini_sentry, - {"relay": {"mode": "proxy"}, "aggregator": {"max_total_bucket_bytes": 0}}, - wait_health_check=False, - ) - - response = wait_get(relay, "/api/relay/healthcheck/ready/") - - error = str(mini_sentry.test_failures.get(timeout=1)) - assert "Health check probe 'aggregator'" in error - assert response.status_code == 503 - - def test_readiness_depends_on_aggregator_being_full_after_metrics(mini_sentry, relay): relay = relay( mini_sentry, diff --git a/tests/integration/test_metrics.py b/tests/integration/test_metrics.py index bf22c0a621..2a581c7f95 100644 --- a/tests/integration/test_metrics.py +++ b/tests/integration/test_metrics.py @@ -210,14 +210,14 @@ def test_metrics_backdated(mini_sentry, relay): @pytest.mark.parametrize( "metrics_partitions,expected_header", [ - # With no partitions defined, partitioning will not be performed but bucket shift will still be done. - (None, None), + # With no partitions defined, partition count is auto assigned. + (None, "4"), # With zero partitions defined, all the buckets will be forwarded to a single partition. (0, "0"), # With zero partitions defined, all the buckets will be forwarded to a single partition. (1, "0"), # With more than zero partitions defined, the buckets will be forwarded to one of the partitions. - (128, "17"), + (128, "4"), ], ) def test_metrics_partition_key(mini_sentry, relay, metrics_partitions, expected_header): @@ -1241,12 +1241,10 @@ def test_graceful_shutdown(mini_sentry, relay): timestamp = int(datetime.now(tz=timezone.utc).timestamp()) - # Backdated metric will be flushed immediately due to debounce delay - past_timestamp = timestamp - 1000 + past_timestamp = timestamp - 1000 + 30 metrics_payload = f"transactions/past:42|c|T{past_timestamp}" relay.send_metrics(project_id, metrics_payload) - # Future timestamp will not be flushed regularly, only through force flush future_timestamp = timestamp + 30 metrics_payload = f"transactions/future:17|c|T{future_timestamp}" relay.send_metrics(project_id, metrics_payload) @@ -1259,27 +1257,36 @@ def test_graceful_shutdown(mini_sentry, relay): with pytest.raises(requests.ConnectionError): relay.send_metrics(project_id, metrics_payload) - envelope = mini_sentry.captured_events.get(timeout=5) - assert len(envelope.items) == 1 - metrics_item = envelope.items[0] - assert metrics_item.type == "metric_buckets" + received_metrics = list() + for _ in range(2): + envelope = mini_sentry.captured_events.get(timeout=5) + assert len(envelope.items) == 1 + metrics_item = envelope.items[0] + assert metrics_item.type == "metric_buckets" + + received_metrics.extend( + metrics_without_keys( + json.loads(metrics_item.get_bytes().decode()), keys={"metadata"} + ) + ) - received_metrics = metrics_without_keys( - json.loads(metrics_item.get_bytes().decode()), keys={"metadata"} - ) + if len(received_metrics) == 2: + break + + received_metrics.sort(key=lambda x: x["timestamp"]) assert received_metrics == [ { - "timestamp": time_within_delta(future_timestamp, timedelta(seconds=1)), + "timestamp": time_within_delta(past_timestamp, timedelta(seconds=1)), "width": 1, - "name": "c:transactions/future@none", - "value": 17.0, + "name": "c:transactions/past@none", + "value": 42.0, "type": "c", }, { - "timestamp": time_within_delta(past_timestamp, timedelta(seconds=1)), + "timestamp": time_within_delta(future_timestamp, timedelta(seconds=1)), "width": 1, - "name": "c:transactions/past@none", - "value": 42.0, + "name": "c:transactions/future@none", + "value": 17.0, "type": "c", }, ] diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 81941f7962..b5be0b6392 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -552,12 +552,11 @@ def test_enforce_bucket_rate_limits( ): metrics_consumer = metrics_consumer() - bucket_interval = 1 # second relay = relay_with_processing( { "processing": {"max_rate_limit": 2 * 86400}, "aggregator": { - "bucket_interval": bucket_interval, + "bucket_interval": 1, "initial_delay": 0, }, } @@ -580,31 +579,22 @@ def test_enforce_bucket_rate_limits( } ] - def generate_ticks(): - # Generate a new timestamp for every bucket, so they do not get merged by the aggregator - tick = int(datetime.now(UTC).timestamp() // bucket_interval * bucket_interval) - while True: - yield tick - tick += bucket_interval - - tick = generate_ticks() - def make_bucket(name, type_, values): return { "org_id": 1, "project_id": project_id, - "timestamp": next(tick), + "timestamp": int(datetime.now(UTC).timestamp()), "name": name, "type": type_, "value": values, - "width": bucket_interval, + "width": 1, } relay.send_metrics_buckets( project_id, [ - make_bucket("d:transactions/measurements.lcp@millisecond", "d", [1.0]) - for _ in range(metric_bucket_limit * 2) + make_bucket(f"d:transactions/measurements.lcp{i}@millisecond", "d", [1.0]) + for i in range(metric_bucket_limit * 2) ], ) From ab2dcc13341446fcd4af1e3eab4d2e0708140cb8 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 17 Dec 2024 09:23:18 +0100 Subject: [PATCH 2/7] missing validate tests --- relay-metrics/src/aggregator/mod.rs | 1 + .../src/services/metrics/aggregator.rs | 145 ++++++++++-------- 2 files changed, 80 insertions(+), 66 deletions(-) diff --git a/relay-metrics/src/aggregator/mod.rs b/relay-metrics/src/aggregator/mod.rs index 4ce590ba01..7f61cbbb30 100644 --- a/relay-metrics/src/aggregator/mod.rs +++ b/relay-metrics/src/aggregator/mod.rs @@ -23,6 +23,7 @@ mod stats; pub use self::config::*; use self::inner::{BucketData, BucketKey}; +/// Default amount of partitions per second when there are no partitions configured. const DEFAULT_PARTITIONS_PER_SECOND: u32 = 64; /// Any error that may occur during aggregation. diff --git a/relay-server/src/services/metrics/aggregator.rs b/relay-server/src/services/metrics/aggregator.rs index 9803414e4a..961f28c4fc 100644 --- a/relay-server/src/services/metrics/aggregator.rs +++ b/relay-server/src/services/metrics/aggregator.rs @@ -544,70 +544,83 @@ mod tests { assert_eq!(receiver.bucket_count(), 1); } - // #[test] - // fn test_validate_bucket_key_str_length() { - // relay_test::setup(); - // let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - // - // let short_metric = BucketKey { - // project_key, - // timestamp: UnixTimestamp::now(), - // metric_name: "c:transactions/a_short_metric".into(), - // tags: BTreeMap::new(), - // extracted_from_indexed: false, - // }; - // assert!(validate_bucket_key(short_metric, &test_config()).is_ok()); - // - // let long_metric = BucketKey { - // project_key, - // timestamp: UnixTimestamp::now(), - // metric_name: "c:transactions/long_name_a_very_long_name_its_super_long_really_but_like_super_long_probably_the_longest_name_youve_seen_and_even_the_longest_name_ever_its_extremly_long_i_cant_tell_how_long_it_is_because_i_dont_have_that_many_fingers_thus_i_cant_count_the_many_characters_this_long_name_is".into(), - // tags: BTreeMap::new(), - // extracted_from_indexed: false, - // }; - // let validation = validate_bucket_key(long_metric, &test_config()); - // - // assert!(matches!( - // validation.unwrap_err(), - // AggregateMetricsError::InvalidStringLength(_), - // )); - // - // let short_metric_long_tag_key = BucketKey { - // project_key, - // timestamp: UnixTimestamp::now(), - // metric_name: "c:transactions/a_short_metric_with_long_tag_key".into(), - // tags: BTreeMap::from([("i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into(), "tag_value".into())]), - // extracted_from_indexed: false, - // }; - // let validation = validate_bucket_key(short_metric_long_tag_key, &test_config()).unwrap(); - // assert_eq!(validation.tags.len(), 0); - // - // let short_metric_long_tag_value = BucketKey { - // project_key, - // timestamp: UnixTimestamp::now(), - // metric_name: "c:transactions/a_short_metric_with_long_tag_value".into(), - // tags: BTreeMap::from([("tag_key".into(), "i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into())]), - // extracted_from_indexed: false, - // }; - // let validation = validate_bucket_key(short_metric_long_tag_value, &test_config()).unwrap(); - // assert_eq!(validation.tags.len(), 0); - // } - // - // #[test] - // fn test_validate_tag_values_special_chars() { - // relay_test::setup(); - // let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - // - // let tag_value = "x".repeat(199) + "ø"; - // assert_eq!(tag_value.chars().count(), 200); // Should be allowed - // let short_metric = BucketKey { - // project_key, - // timestamp: UnixTimestamp::now(), - // metric_name: "c:transactions/a_short_metric".into(), - // tags: BTreeMap::from([("foo".into(), tag_value.clone())]), - // extracted_from_indexed: false, - // }; - // let validated_bucket = validate_metric_tags(short_metric, &test_config()); - // assert_eq!(validated_bucket.tags["foo"], tag_value); - // } + fn test_config() -> AggregatorServiceConfig { + AggregatorServiceConfig { + max_name_length: 200, + max_tag_key_length: 200, + max_tag_value_length: 200, + ..Default::default() + } + } + + #[test] + fn test_validate_bucket_key_str_length() { + relay_test::setup(); + let mut short_metric = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/a_short_metric".into(), + tags: BTreeMap::new(), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(validate_bucket(&mut short_metric, &test_config())); + + let mut long_metric = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/long_name_a_very_long_name_its_super_long_really_but_like_super_long_probably_the_longest_name_youve_seen_and_even_the_longest_name_ever_its_extremly_long_i_cant_tell_how_long_it_is_because_i_dont_have_that_many_fingers_thus_i_cant_count_the_many_characters_this_long_name_is".into(), + tags: BTreeMap::new(), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(!validate_bucket(&mut long_metric, &test_config())); + + let mut short_metric_long_tag_key = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/a_short_metric_with_long_tag_key".into(), + tags: BTreeMap::from([("i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into(), "tag_value".into())]), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(validate_bucket( + &mut short_metric_long_tag_key, + &test_config() + )); + assert_eq!(short_metric_long_tag_key.tags.len(), 0); + + let mut short_metric_long_tag_value = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/a_short_metric_with_long_tag_value".into(), + tags: BTreeMap::from([("tag_key".into(), "i_run_out_of_creativity_so_here_we_go_Lorem_Ipsum_is_simply_dummy_text_of_the_printing_and_typesetting_industry_Lorem_Ipsum_has_been_the_industrys_standard_dummy_text_ever_since_the_1500s_when_an_unknown_printer_took_a_galley_of_type_and_scrambled_it_to_make_a_type_specimen_book".into())]), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(validate_bucket( + &mut short_metric_long_tag_value, + &test_config() + )); + assert_eq!(short_metric_long_tag_value.tags.len(), 0); + } + + #[test] + fn test_validate_tag_values_special_chars() { + relay_test::setup(); + + let tag_value = "x".repeat(199) + "ø"; + assert_eq!(tag_value.chars().count(), 200); // Should be allowed + + let mut short_metric = Bucket { + timestamp: UnixTimestamp::now(), + name: "c:transactions/a_short_metric".into(), + tags: BTreeMap::from([("foo".into(), tag_value.clone())]), + metadata: Default::default(), + width: 0, + value: BucketValue::Counter(0.into()), + }; + assert!(validate_bucket(&mut short_metric, &test_config())); + assert_eq!(short_metric.tags["foo"], tag_value); + } } From d8ea0fb73f8a9a84793ffa93323d62263f4f7f38 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 17 Dec 2024 11:28:07 +0100 Subject: [PATCH 3/7] review changes, small stuff --- relay-metrics/src/aggregator/inner.rs | 46 +++++++++++++-------------- relay-metrics/src/aggregator/mod.rs | 27 ++++++++++++---- 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/relay-metrics/src/aggregator/inner.rs b/relay-metrics/src/aggregator/inner.rs index de149d15f8..b78d65befe 100644 --- a/relay-metrics/src/aggregator/inner.rs +++ b/relay-metrics/src/aggregator/inner.rs @@ -40,17 +40,17 @@ impl fmt::Debug for Partition { #[derive(Default, Debug)] pub struct PartitionStats { /// Amount of unique buckets in the partition. - #[expect(unused)] + #[expect(unused, reason = "used for snapshot tests")] pub count: u64, /// Amount of unique buckets in the partition by namespace. pub count_by_namespace: ByNamespace, /// Amount of times a bucket was merged in the partition. - #[expect(unused)] + #[expect(unused, reason = "used for snapshot tests")] pub merges: u64, /// Amount of times a bucket was merged in the partition by namespace. pub merges_by_namespace: ByNamespace, /// Cost of buckets in the partition. - #[expect(unused)] + #[expect(unused, reason = "used for snapshot tests")] pub cost: u64, /// Cost of buckets in the partition by namespace. pub cost_by_namespace: ByNamespace, @@ -72,12 +72,12 @@ impl From<&stats::Slot> for PartitionStats { #[derive(Default, Debug)] pub struct Stats { /// Total amount of buckets in the aggregator. - #[expect(unused)] + #[expect(unused, reason = "used for snapshot tests")] pub count: u64, /// Total amount of buckets in the aggregator by namespace. pub count_by_namespace: ByNamespace, /// Total bucket cost in the aggregator. - #[expect(unused)] + #[expect(unused, reason = "used for snapshot tests")] pub cost: u64, /// Total bucket cost in the aggregator by namespace. pub cost_by_namespace: ByNamespace, @@ -184,14 +184,14 @@ pub struct Inner { /// "time slot" and the second dimension is the assigned partition. /// /// The total length of the ring-buffer is then determined by the amount of time slots times - /// the amount of partition. In other words, every time slot as [`Self::num_partitions`] + /// the amount of partitions. In other words, every time slot has [`Self::num_partitions`] /// partitions. /// /// Layout: /// Time slots: [ ][ ][ ] /// Partitions: [ ][ ] [ ][ ] [ ][ ] /// - /// An item is assigned by first determining it's time slot and then assigning it a partition + /// An item is assigned by first determining its time slot and then assigning it to a partition /// based on selected [`Self::partition_by`] strategy. /// /// The first item in the buffer is tracked by [`Self::head`] which is at any time the @@ -207,7 +207,7 @@ pub struct Inner { /// Position of the first element in [`Self::slots`]. head: u64, - /// Size of each individual bucket, inputs are truncated to this value. + /// Size of each individual bucket, inputs are truncated modulo to this value. bucket_interval: u32, /// Amount of slots which is added to a bucket as a delay. /// @@ -221,9 +221,9 @@ pub struct Inner { /// The maximum amount of slots (size of a `bucket_interval`) the timestamp is allowed to be /// in the past or future. - slot_diff: SlotDiff, + slot_range: RelativeRange, - /// Determines how partitions are assigned based in the input bucket. + /// Determines how partitions are assigned based on the input bucket. partition_by: FlushBatching, /// Hasher used to calculate partitions. hasher: ahash::RandomState, @@ -251,11 +251,11 @@ impl Inner { } } - let slot_diff = SlotDiff { - min: config + let slot_diff = RelativeRange { + max_in_past: config .max_secs_in_past .map_or(u64::MAX, |v| v.div_ceil(u64::from(bucket_interval))), - max: config + max_in_future: config .max_secs_in_future .map_or(u64::MAX, |v| v.div_ceil(u64::from(bucket_interval))), }; @@ -276,7 +276,7 @@ impl Inner { .map(|c| c.div_ceil(total_slots as u64)) .unwrap_or(u64::MAX), }, - slot_diff, + slot_range: slot_diff, partition_by: FlushBatching::Partition, hasher: build_hasher(), } @@ -299,7 +299,7 @@ impl Inner { /// Returns `true` if the aggregator contains any metric buckets. pub fn is_empty(&self) -> bool { - self.stats.cost == 0 + self.stats.count == 0 } /// Returns the time as a duration since epoch when the next flush is supposed to happen. @@ -326,7 +326,7 @@ impl Inner { key.timestamp = UnixTimestamp::from_secs(time_slot * u64::from(self.bucket_interval)); let now_slot = self.head / u64::from(self.num_partitions); - if !self.slot_diff.contains(now_slot, time_slot) { + if !self.slot_range.contains(now_slot, time_slot) { return Err(AggregateMetricsError::InvalidTimestamp(key.timestamp)); } @@ -503,21 +503,21 @@ impl fmt::Debug for Slot { } } -struct SlotDiff { - min: u64, - max: u64, +struct RelativeRange { + max_in_past: u64, + max_in_future: u64, } -impl SlotDiff { - pub fn contains(&self, now: u64, target: u64) -> bool { +impl RelativeRange { + fn contains(&self, now: u64, target: u64) -> bool { if target < now { // Timestamp/target in the past. let diff = now - target; - diff <= self.min + diff <= self.max_in_past } else { // Timestamp/target in the future. let diff = target - now; - diff <= self.max + diff <= self.max_in_future } } } diff --git a/relay-metrics/src/aggregator/mod.rs b/relay-metrics/src/aggregator/mod.rs index 7f61cbbb30..801e750c93 100644 --- a/relay-metrics/src/aggregator/mod.rs +++ b/relay-metrics/src/aggregator/mod.rs @@ -1,10 +1,4 @@ //! Core functionality of metrics aggregation. -//! -//! A new implementation of the [`crate::aggregator::Aggregator`], with slightly -//! different semantics and more optimized aggregation. -//! -//! This module is supposed to replace the [`crate::aggregator`] module, -//! if proven successful. use std::time::{Duration, SystemTime}; @@ -43,7 +37,26 @@ pub enum AggregateMetricsError { InvalidTimestamp(UnixTimestamp), } -/// An aggregator for metric [`Bucket`]'s. +/// A collector of [`Bucket`] submissions. +/// +/// # Aggregation +/// +/// Each metric is dispatched into the a [`Bucket`] depending on its project key (DSN), name, type, +/// unit, tags and timestamp. The bucket timestamp is rounded to the precision declared by the +/// `bucket_interval` field on the [AggregatorConfig] configuration. +/// +/// Each bucket stores the accumulated value of submitted metrics: +/// +/// - `Counter`: Sum of values. +/// - `Distribution`: A list of values. +/// - `Set`: A unique set of hashed values. +/// - `Gauge`: A summary of the reported values, see [`GaugeValue`](crate::GaugeValue). +/// +/// # Conflicts +/// +/// Metrics are uniquely identified by the combination of their name, type and unit. It is allowed +/// to send metrics of different types and units under the same name. For example, sending a metric +/// once as set and once as distribution will result in two actual metrics being recorded. #[derive(Debug)] pub struct Aggregator { name: String, From 3f4c91ca2625235bb5fb7dc062914c67add2f33b Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 17 Dec 2024 12:48:58 +0100 Subject: [PATCH 4/7] better to index conversion docs --- relay-metrics/src/aggregator/inner.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/relay-metrics/src/aggregator/inner.rs b/relay-metrics/src/aggregator/inner.rs index b78d65befe..65094ff979 100644 --- a/relay-metrics/src/aggregator/inner.rs +++ b/relay-metrics/src/aggregator/inner.rs @@ -340,15 +340,16 @@ impl Inner { } } % u64::from(self.num_partitions); + // Calculate the slot of the bucket based on it's time and shift it by its assigned partition. let slot = time_slot * u64::from(self.num_partitions) + assigned_partition; - - let slots_len = self.slots.len() as u64; - let index = (slot + slots_len).wrapping_sub(self.head % slots_len) % slots_len; + // Transform the slot to an offset/index into the ring-buffer, by calculating: + // `(slot - self.head).rem_euclid(self.slots.len())`. + let index = sub_rem_euclid(slot, self.head, self.slots.len() as u64); let slot = self .slots .get_mut(index as usize) - .expect("index should always be a valid partition"); + .expect("index should always be a valid slot index"); debug_assert_eq!( u64::from(slot.partition_key), @@ -522,6 +523,23 @@ impl RelativeRange { } } +/// Calculates `(a - b).rem_euclid(mod)` for `u64` values. +/// +/// Since `a - b` can be negative, you naively need to temporarily change the number space from +/// unsigned to signed and after the modulo back to unsigned. +/// +/// This function instead operates purely with unsigned arithmetic and makes sure the subtraction +/// is always positive. +fn sub_rem_euclid(a: u64, b: u64, m: u64) -> u64 { + ( + // Shift a by `m`: `[m, inf)`. + (a + m) + - + // Modulo b: `[0, m)`. + (b % m) + ) % m +} + fn build_hasher() -> RandomState { // A fixed, consistent seed across all instances of Relay. const K0: u64 = 0x06459b7d5da84ed8; From 49ac1315d7a96b372265c901a65634623b3421d9 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 17 Dec 2024 13:13:17 +0100 Subject: [PATCH 5/7] more review comments, change some values to u64 --- relay-metrics/src/aggregator/inner.rs | 95 ++++++++++--------- relay-metrics/src/aggregator/mod.rs | 12 +-- .../src/services/metrics/aggregator.rs | 8 +- 3 files changed, 61 insertions(+), 54 deletions(-) diff --git a/relay-metrics/src/aggregator/inner.rs b/relay-metrics/src/aggregator/inner.rs index 65094ff979..04b2d246db 100644 --- a/relay-metrics/src/aggregator/inner.rs +++ b/relay-metrics/src/aggregator/inner.rs @@ -148,7 +148,7 @@ pub struct Config { pub start: UnixTimestamp, /// Size of each individual bucket, inputs are truncated to this value. pub bucket_interval: u32, - /// The amount of time slots to keep track off in the aggregator. + /// The amount of time slots to keep track of in the aggregator. /// /// The size of a time slot is defined by [`Self::bucket_interval`]. pub num_time_slots: u32, @@ -176,7 +176,7 @@ pub struct Config { /// /// The internal time is set on construction to [`Config::start`]. /// -/// Use [`Self::next_flush`] to determine the time when to call [`Self::flush_next`]. +/// Use [`Self::next_flush_at`] to determine the time when to call [`Self::flush_next`]. pub struct Inner { /// Ring-buffer of aggregation slots. /// @@ -196,23 +196,24 @@ pub struct Inner { /// /// The first item in the buffer is tracked by [`Self::head`] which is at any time the /// current partition since the beginning "zero". The beginning in the aggregator refers to the - /// beginning of the epoch. The [`Self::head`] is calculated with `f(x) = x / bucket_interval * num_partitions`. + /// beginning of the epoch. The [`Self::head`] at time `t` is calculated with + /// `f(t) = t / bucket_interval * num_partitions`. /// /// Flushing a partition advances the [`Self::head`] by a single value `+1`. Meaning /// effectively time is advanced by `bucket_interval / num_partitions`. slots: VecDeque, /// The amount of partitions per time slot. - num_partitions: u32, + num_partitions: u64, /// Position of the first element in [`Self::slots`]. head: u64, /// Size of each individual bucket, inputs are truncated modulo to this value. - bucket_interval: u32, + bucket_interval: u64, /// Amount of slots which is added to a bucket as a delay. /// - /// This is a fixed delay which is added to to the time returned by [`Self::next_flush`]. - delay: u32, + /// This is a fixed delay which is added to to the time returned by [`Self::next_flush_at`]. + delay: u64, /// Total stats of the aggregator. stats: stats::Total, @@ -241,7 +242,7 @@ impl Inner { let num_time_slots = config.num_time_slots.max(1) + config.delay.div_ceil(bucket_interval); let num_partitions = config.num_partitions.max(1); - let mut slots = Vec::new(); + let mut slots = Vec::with_capacity((num_time_slots * num_partitions) as usize); for _ in 0..num_time_slots { for partition_key in 0..num_partitions { slots.push(Slot { @@ -251,22 +252,25 @@ impl Inner { } } + let bucket_interval = u64::from(bucket_interval); + let num_partitions = u64::from(num_partitions); + let slot_diff = RelativeRange { max_in_past: config .max_secs_in_past - .map_or(u64::MAX, |v| v.div_ceil(u64::from(bucket_interval))), + .map_or(u64::MAX, |v| v.div_ceil(bucket_interval)), max_in_future: config .max_secs_in_future - .map_or(u64::MAX, |v| v.div_ceil(u64::from(bucket_interval))), + .map_or(u64::MAX, |v| v.div_ceil(bucket_interval)), }; let total_slots = slots.len(); Self { slots: VecDeque::from(slots), num_partitions, - head: config.start.as_secs() / u64::from(bucket_interval) * u64::from(num_partitions), + head: config.start.as_secs() / bucket_interval * num_partitions, bucket_interval, - delay: config.delay, + delay: u64::from(config.delay), stats: stats::Total::default(), limits: stats::Limits { max_total: config.max_total_bucket_bytes.unwrap_or(u64::MAX), @@ -283,7 +287,7 @@ impl Inner { } /// Returns the configured bucket interval. - pub fn bucket_interval(&self) -> u32 { + pub fn bucket_interval(&self) -> u64 { self.bucket_interval } @@ -303,16 +307,17 @@ impl Inner { } /// Returns the time as a duration since epoch when the next flush is supposed to happen. - pub fn next_flush(&self) -> Duration { - // `head + 1` to get the end time of the slot not the start. - (Duration::from_secs(self.head + 1) / self.num_partitions * self.bucket_interval) - + Duration::from_secs(u64::from(self.delay)) + pub fn next_flush_at(&self) -> Duration { + // `head + 1` to get the end time of the slot not the start, convert `head` to a duration + // first, to have enough precision for the division. + // + // Casts do not wrap, configuration requires them to be `u32`. + let offset = Duration::from_secs(self.head + 1) / self.num_partitions as u32 + * self.bucket_interval as u32; + offset + Duration::from_secs(self.delay) } /// Merges a metric bucket. - /// - /// Returns `true` if the bucket already existed in the aggregator - /// and `false` if the bucket did not exist yet. pub fn merge( &mut self, mut key: BucketKey, @@ -321,11 +326,11 @@ impl Inner { let project_key = key.project_key; let namespace = key.metric_name.namespace(); - let time_slot = key.timestamp.as_secs() / u64::from(self.bucket_interval); + let time_slot = key.timestamp.as_secs() / self.bucket_interval; // Make sure the timestamp is normalized to the correct interval as well. - key.timestamp = UnixTimestamp::from_secs(time_slot * u64::from(self.bucket_interval)); + key.timestamp = UnixTimestamp::from_secs(time_slot * self.bucket_interval); - let now_slot = self.head / u64::from(self.num_partitions); + let now_slot = self.head / self.num_partitions; if !self.slot_range.contains(now_slot, time_slot) { return Err(AggregateMetricsError::InvalidTimestamp(key.timestamp)); } @@ -338,10 +343,10 @@ impl Inner { self.hasher .hash_one((key.project_key, &key.metric_name, &key.tags)) } - } % u64::from(self.num_partitions); + } % self.num_partitions; // Calculate the slot of the bucket based on it's time and shift it by its assigned partition. - let slot = time_slot * u64::from(self.num_partitions) + assigned_partition; + let slot = time_slot * self.num_partitions + assigned_partition; // Transform the slot to an offset/index into the ring-buffer, by calculating: // `(slot - self.head).rem_euclid(self.slots.len())`. let index = sub_rem_euclid(slot, self.head, self.slots.len() as u64); @@ -458,10 +463,12 @@ impl fmt::Debug for Inner { let mut list = f.debug_list(); list.entry(&self.stats); for (i, v) in self.slots.iter().enumerate() { - let head_partitions = self.head % u64::from(self.num_partitions); - let time_offset = (head_partitions + i as u64) / u64::from(self.num_partitions); - let head_time = self.head / u64::from(self.num_partitions); - let time = (head_time + time_offset) * u64::from(self.bucket_interval); + let head_partitions = self.head % self.num_partitions; + let head_time = self.head / self.num_partitions; + + let time_offset = (head_partitions + i as u64) / self.num_partitions; + let time = (head_time + time_offset) * self.bucket_interval; + match v.is_empty() { // Make the output shorter with a string until `entry_with` is stable. true => list.entry(&format!("({time}, {v:?})")), @@ -795,7 +802,7 @@ mod tests { buckets.merge(bucket_key(120, "b"), counter(2.0)).unwrap(); // First flush is in 20 seconds + 10 seconds for the end of the partition. - assert_eq!(buckets.next_flush(), Duration::from_secs(90)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(90)); insta::assert_debug_snapshot!(buckets.flush_next(), @r###" Partition { partition_key: 0, @@ -817,17 +824,17 @@ mod tests { assert!(buckets.flush_next().buckets.is_empty()); // We're now at second 100s. - assert_eq!(buckets.next_flush(), Duration::from_secs(110)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(110)); assert!(buckets.flush_next().buckets.is_empty()); assert!(buckets.flush_next().buckets.is_empty()); // We're now at second 120s. - assert_eq!(buckets.next_flush(), Duration::from_secs(130)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(130)); assert!(buckets.flush_next().buckets.is_empty()); assert!(buckets.flush_next().buckets.is_empty()); // We're now at second 140s -> our second bucket is ready (120s + 20s delay). - assert_eq!(buckets.next_flush(), Duration::from_secs(150)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(150)); insta::assert_debug_snapshot!(buckets.flush_next(), @r###" Partition { partition_key: 0, @@ -849,7 +856,7 @@ mod tests { assert!(buckets.flush_next().buckets.is_empty()); // We're now at 160s. - assert_eq!(buckets.next_flush(), Duration::from_secs(170)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(170)); } #[test] @@ -866,14 +873,14 @@ mod tests { start: UnixTimestamp::from_secs(70), }); - assert_eq!(buckets.next_flush(), Duration::from_secs(75)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(75)); assert_eq!(buckets.flush_next().partition_key, 0); - assert_eq!(buckets.next_flush(), Duration::from_secs(80)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(80)); assert_eq!(buckets.flush_next().partition_key, 1); - assert_eq!(buckets.next_flush(), Duration::from_secs(85)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(85)); assert_eq!(buckets.flush_next().partition_key, 0); - assert_eq!(buckets.next_flush(), Duration::from_secs(90)); - assert_eq!(buckets.next_flush(), Duration::from_secs(90)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(90)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(90)); } #[test] @@ -890,14 +897,14 @@ mod tests { start: UnixTimestamp::from_secs(70), }); - assert_eq!(buckets.next_flush(), Duration::from_secs(78)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(78)); assert_eq!(buckets.flush_next().partition_key, 0); - assert_eq!(buckets.next_flush(), Duration::from_secs(83)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(83)); assert_eq!(buckets.flush_next().partition_key, 1); - assert_eq!(buckets.next_flush(), Duration::from_secs(88)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(88)); assert_eq!(buckets.flush_next().partition_key, 0); - assert_eq!(buckets.next_flush(), Duration::from_secs(93)); - assert_eq!(buckets.next_flush(), Duration::from_secs(93)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(93)); + assert_eq!(buckets.next_flush_at(), Duration::from_secs(93)); } #[test] diff --git a/relay-metrics/src/aggregator/mod.rs b/relay-metrics/src/aggregator/mod.rs index 801e750c93..fce6fab1ea 100644 --- a/relay-metrics/src/aggregator/mod.rs +++ b/relay-metrics/src/aggregator/mod.rs @@ -128,7 +128,7 @@ impl Aggregator { /// After a successful flush, retry immediately until an error is returned with the next flush /// time, this makes sure time is eventually synchronized. pub fn try_flush_next(&mut self, now: SystemTime) -> Result { - let next_flush = SystemTime::UNIX_EPOCH + self.inner.next_flush(); + let next_flush = SystemTime::UNIX_EPOCH + self.inner.next_flush_at(); if let Err(err) = now.duration_since(next_flush) { // The flush time is in the future, return the amount of time to wait before the next flush. @@ -148,8 +148,8 @@ impl Aggregator { } /// Returns when the next partition is ready to be flushed using [`Self::try_flush_next`]. - pub fn next_flush(&mut self, now: SystemTime) -> Duration { - let next_flush = SystemTime::UNIX_EPOCH + self.inner.next_flush(); + pub fn next_flush_at(&mut self, now: SystemTime) -> Duration { + let next_flush = SystemTime::UNIX_EPOCH + self.inner.next_flush_at(); match now.duration_since(next_flush) { Ok(_) => Duration::ZERO, @@ -178,7 +178,7 @@ pub struct Partition { /// The partition key. pub partition_key: u32, buckets: HashMap, - bucket_interval: u32, + bucket_interval: u64, } impl IntoIterator for Partition { @@ -196,7 +196,7 @@ impl IntoIterator for Partition { /// Iterator yielded from [`Partition::into_iter`]. pub struct PartitionIter { inner: hashbrown::hash_map::IntoIter, - bucket_interval: u32, + bucket_interval: u64, } impl Iterator for PartitionIter { @@ -209,7 +209,7 @@ impl Iterator for PartitionIter { key.project_key, Bucket { timestamp: key.timestamp, - width: u64::from(self.bucket_interval), + width: self.bucket_interval, name: key.metric_name, tags: key.tags, value: data.value, diff --git a/relay-server/src/services/metrics/aggregator.rs b/relay-server/src/services/metrics/aggregator.rs index 961f28c4fc..b81b804f91 100644 --- a/relay-server/src/services/metrics/aggregator.rs +++ b/relay-server/src/services/metrics/aggregator.rs @@ -11,7 +11,7 @@ use relay_metrics::aggregator::{self, AggregateMetricsError, AggregatorConfig, P use relay_metrics::Bucket; use relay_quotas::{RateLimits, Scoping}; use relay_system::{Controller, FromMessage, Interface, NoResponse, Recipient, Service}; -use tokio::time::Sleep; +use tokio::time::{Instant, Sleep}; use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::{ProjectInfo, ProjectState}; @@ -157,7 +157,7 @@ impl AggregatorService { self.flush_partition(partition); - self.aggregator.next_flush(SystemTime::now()) + self.aggregator.next_flush_at(SystemTime::now()) } fn flush_partition(&mut self, partition: Partition) { @@ -310,7 +310,7 @@ impl AggregatorService { // Reset the next flush time, to the time of the new aggregator. self.next_flush .as_mut() - .reset(tokio::time::Instant::now() + self.aggregator.next_flush(SystemTime::now())); + .reset(Instant::now() + self.aggregator.next_flush_at(SystemTime::now())); } } @@ -339,7 +339,7 @@ impl Service for AggregatorService { _ = &mut self.next_flush => timed!( "try_flush", { let next = self.try_flush(); - self.next_flush.as_mut().reset(tokio::time::Instant::now() + next); + self.next_flush.as_mut().reset(Instant::now() + next); } ), Some(message) = rx.recv() => timed!(message.variant(), self.handle_message(message)), From 930eb063331a1ec2e04eeabb602ac1f631c80f05 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 17 Dec 2024 13:34:01 +0100 Subject: [PATCH 6/7] flaky test --- tests/integration/test_spans.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_spans.py b/tests/integration/test_spans.py index 4c3bb39ec0..58c27679bb 100644 --- a/tests/integration/test_spans.py +++ b/tests/integration/test_spans.py @@ -484,6 +484,7 @@ def test_span_ingestion( "bucket_interval": 1, "initial_delay": 0, "max_secs_in_past": 2**64 - 1, + "shift_key": "none", } } ) From 1aed8a986eb035c397faf9132907f357b29f8670 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 17 Dec 2024 15:29:27 +0100 Subject: [PATCH 7/7] test --- relay-metrics/src/aggregator/inner.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/relay-metrics/src/aggregator/inner.rs b/relay-metrics/src/aggregator/inner.rs index 04b2d246db..c266449ff9 100644 --- a/relay-metrics/src/aggregator/inner.rs +++ b/relay-metrics/src/aggregator/inner.rs @@ -948,4 +948,26 @@ mod tests { Ok(()) } + + #[test] + fn test_sub_rem_euclid() { + for (head, slot, expected) in [ + // head == slot + (253, 253, 0), + // head < slot + (253, 273, 0), + (253, 274, 1), + (253, 275, 2), + (253, 276, 3), + (253, 277, 4), + // head > slot + (253, 233, 0), + (253, 234, 1), + (253, 235, 2), + (253, 236, 3), + (253, 237, 4), + ] { + assert_eq!(sub_rem_euclid(slot, head, 5), expected); + } + } }