From 397362908dcd111781d39b5d1553fc74cbce07f6 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 24 Sep 2024 13:41:41 +0200 Subject: [PATCH] feat(spans): Infer span.op if not defined (#4056) Add global option that allows inferring the span op, with rules like this: ```json { "span_op_defaults": { "rules": [{ "condition": { "op": "not", "inner": { "op": "eq", "name": "span.data.messaging\\.system", "value": null, }, }, "value": "message" }] } } ``` --- CHANGELOG.md | 1 + py/CHANGELOG.md | 4 + relay-cabi/src/processing.rs | 1 + relay-dynamic-config/src/global.rs | 11 +- relay-event-normalization/src/event.rs | 16 +- .../src/transactions/processor.rs | 144 ++++++++++++++++-- relay-server/src/services/processor.rs | 1 + .../src/services/processor/span/processing.rs | 11 +- 8 files changed, 164 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2d437d35c..696b1aa22f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Add a config option to add default tags to all Relay Sentry events. ([#3944](https://github.com/getsentry/relay/pull/3944)) - Automatically derive `client.address` and `user.geo` for standalone spans. ([#4047](https://github.com/getsentry/relay/pull/4047)) +- Configurable span.op inference. ([#4056](https://github.com/getsentry/relay/pull/4056)) **Internal:** diff --git a/py/CHANGELOG.md b/py/CHANGELOG.md index e7bbae9a9e..2885917a0d 100644 --- a/py/CHANGELOG.md +++ b/py/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased + +- Add `spanOpDefaults` to global config. ([#4056](https://github.com/getsentry/relay/pull/4056)) + ## 0.9.1 - Add REPLAY_VIDEO entry to DataCategory. ([#3847](https://github.com/getsentry/relay/pull/3847)) diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index c8b86a20f4..45af9564d2 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -275,6 +275,7 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( replay_id: config.replay_id, span_allowed_hosts: &[], // only supported in relay scrub_mongo_description: ScrubMongoDescription::Disabled, // only supported in relay + span_op_defaults: Default::default(), // only supported in relay }; normalize_event(&mut event, &normalization_config); diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 692c9da82b..e65f40d436 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -5,7 +5,7 @@ use std::io::BufReader; use std::path::Path; use relay_base_schema::metrics::MetricNamespace; -use relay_event_normalization::{MeasurementsConfig, ModelCosts}; +use relay_event_normalization::{MeasurementsConfig, ModelCosts, SpanOpDefaults}; use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{de, Deserialize, Serialize}; @@ -49,6 +49,13 @@ pub struct GlobalConfig { /// Configuration for AI span measurements. #[serde(skip_serializing_if = "is_missing")] pub ai_model_costs: ErrorBoundary, + + /// Configuration to derive the `span.op` from other span fields. + #[serde( + deserialize_with = "default_on_error", + skip_serializing_if = "is_default" + )] + pub span_op_defaults: SpanOpDefaults, } impl GlobalConfig { @@ -337,7 +344,7 @@ pub enum BucketEncoding { /// Returns `true` if this value is equal to `Default::default()`. fn is_default(t: &T) -> bool { - *t == T::default() + t == &T::default() } fn default_on_error<'de, D, T>(deserializer: D) -> Result diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 08ae145af6..05365e193c 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -34,9 +34,9 @@ use crate::span::tag_extraction::extract_span_tags_from_event; use crate::utils::{self, get_event_user_tag, MAX_DURATION_MOBILE_MS}; use crate::{ breakdowns, event_error, legacy, mechanism, remove_other, schema, span, stacktrace, - transactions, trimming, user_agent, BreakdownsConfig, CombinedMeasurementsConfig, GeoIpLookup, - MaxChars, ModelCosts, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, - TransactionNameConfig, + transactions, trimming, user_agent, BorrowedSpanOpDefaults, BreakdownsConfig, + CombinedMeasurementsConfig, GeoIpLookup, MaxChars, ModelCosts, PerformanceScoreConfig, + RawUserAgentInfo, SpanDescriptionRule, TransactionNameConfig, }; /// Configuration for [`normalize_event`]. @@ -161,6 +161,9 @@ pub struct NormalizationConfig<'a> { /// Controls whether or not MongoDB span descriptions will be scrubbed. pub scrub_mongo_description: ScrubMongoDescription, + + /// Rules to infer `span.op` from other span fields. + pub span_op_defaults: BorrowedSpanOpDefaults<'a>, } impl<'a> Default for NormalizationConfig<'a> { @@ -194,6 +197,7 @@ impl<'a> Default for NormalizationConfig<'a> { replay_id: Default::default(), span_allowed_hosts: Default::default(), scrub_mongo_description: ScrubMongoDescription::Disabled, + span_op_defaults: Default::default(), } } } @@ -244,8 +248,10 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) { // (internally noops for non-transaction events). // TODO: Parts of this processor should probably be a filter so we // can revert some changes to ProcessingAction) - let mut transactions_processor = - transactions::TransactionsProcessor::new(config.transaction_name_config.clone()); + let mut transactions_processor = transactions::TransactionsProcessor::new( + config.transaction_name_config, + config.span_op_defaults, + ); let _ = transactions_processor.process_event(event, meta, ProcessingState::root()); // Process security reports first to ensure all props. diff --git a/relay-event-normalization/src/transactions/processor.rs b/relay-event-normalization/src/transactions/processor.rs index 880bcc6f8a..dc2dee3d2e 100644 --- a/relay-event-normalization/src/transactions/processor.rs +++ b/relay-event-normalization/src/transactions/processor.rs @@ -7,13 +7,14 @@ use relay_event_schema::processor::{ self, ProcessValue, ProcessingResult, ProcessingState, Processor, }; use relay_event_schema::protocol::{Event, Span, SpanStatus, TraceContext, TransactionSource}; -use relay_protocol::{Annotated, Meta, Remark, RemarkType}; +use relay_protocol::{Annotated, Meta, Remark, RemarkType, RuleCondition}; +use serde::{Deserialize, Serialize}; use crate::regexes::TRANSACTION_NAME_NORMALIZER_REGEX; use crate::TransactionNameRule; /// Configuration for sanitizing unparameterized transaction names. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Copy, Debug, Default)] pub struct TransactionNameConfig<'r> { /// Rules for identifier replacement that were discovered by Sentry's transaction clusterer. pub rules: &'r [TransactionNameRule], @@ -76,12 +77,27 @@ pub fn apply_transaction_rename_rules( #[derive(Debug, Default)] pub struct TransactionsProcessor<'r> { name_config: TransactionNameConfig<'r>, + span_op_defaults: BorrowedSpanOpDefaults<'r>, } impl<'r> TransactionsProcessor<'r> { /// Creates a new `TransactionsProcessor` instance. - pub fn new(name_config: TransactionNameConfig<'r>) -> Self { - Self { name_config } + pub fn new( + name_config: TransactionNameConfig<'r>, + span_op_defaults: BorrowedSpanOpDefaults<'r>, + ) -> Self { + Self { + name_config, + span_op_defaults, + } + } + + #[cfg(test)] + fn new_name_config(name_config: TransactionNameConfig<'r>) -> Self { + Self { + name_config, + ..Default::default() + } } /// Returns `true` if the given transaction name should be treated as a URL. @@ -163,14 +179,61 @@ impl Processor for TransactionsProcessor<'_> { _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { - span.op.get_or_insert_with(|| "default".to_owned()); - + if span.op.value().is_none() { + *span.op.value_mut() = Some(self.span_op_defaults.infer(span)); + } span.process_child_values(self, state)?; Ok(()) } } +/// Rules used to infer `span.op` from other span fields. +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SpanOpDefaults { + /// List of rules to apply. First match wins. + pub rules: Vec, +} + +impl SpanOpDefaults { + /// Gets a borrowed version of this config. + pub fn borrow(&self) -> BorrowedSpanOpDefaults { + BorrowedSpanOpDefaults { + rules: self.rules.as_slice(), + } + } +} + +/// Borrowed version of [`SpanOpDefaults`]. +#[derive(Clone, Copy, Debug, Default)] +pub struct BorrowedSpanOpDefaults<'a> { + rules: &'a [SpanOpDefaultRule], +} + +impl BorrowedSpanOpDefaults<'_> { + /// Infer the span op from a set of rules. + /// + /// The first matching rule determines the span op. + /// If no rule matches, `"default"` is returned. + fn infer(&self, span: &Span) -> String { + for rule in self.rules { + if rule.condition.matches(span) { + return rule.value.clone(); + } + } + "default".to_owned() + } +} + +/// A rule to infer [`Span::op`] from other span fields. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct SpanOpDefaultRule { + /// When to set the given value. + pub condition: RuleCondition, + /// Value for the [`Span::op`]. Only set if omitted by the SDK. + pub value: String, +} + /// Span status codes for the Ruby Rack integration that indicate raw URLs being sent as /// transaction names. These cases are considered as high-cardinality. /// @@ -336,7 +399,6 @@ fn scrub_identifiers_with_regex( #[cfg(test)] mod tests { - use chrono::{Duration, TimeZone, Utc}; use insta::assert_debug_snapshot; use itertools::Itertools; @@ -344,6 +406,7 @@ mod tests { use relay_event_schema::processor::process_value; use relay_event_schema::protocol::{ClientSdkInfo, Contexts, SpanId, TraceId}; use relay_protocol::{assert_annotated_snapshot, get_value}; + use serde_json::json; use crate::validation::validate_event; use crate::{EventValidationConfig, RedactionRule}; @@ -911,7 +974,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[rule1, rule2, rule3], }), ProcessingState::root(), @@ -992,7 +1055,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[rule1, rule2, rule3], }), ProcessingState::root(), @@ -1059,7 +1122,7 @@ mod tests { let mut event = Annotated::::from_json(json).unwrap(); - let mut processor = TransactionsProcessor::new(TransactionNameConfig { + let mut processor = TransactionsProcessor::new_name_config(TransactionNameConfig { rules: rules.as_ref(), }); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); @@ -1174,7 +1237,7 @@ mod tests { // This must not normalize transaction name, since it's disabled. process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: rules.as_ref(), }), ProcessingState::root(), @@ -1231,7 +1294,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: rules.as_ref(), }), ProcessingState::root(), @@ -1316,7 +1379,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { rules: &[rule] }), + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[rule] }), ProcessingState::root(), ) .unwrap(); @@ -1521,7 +1584,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[TransactionNameRule { pattern: LazyGlob::new("/remains/*/1234567890/".to_owned()), expiry: Utc.with_ymd_and_hms(3000, 1, 1, 1, 1, 1).unwrap(), @@ -1587,7 +1650,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[TransactionNameRule { pattern: LazyGlob::new("/remains/*/**".to_owned()), expiry: Utc.with_ymd_and_hms(3000, 1, 1, 1, 1, 1).unwrap(), @@ -1627,4 +1690,55 @@ mod tests { }, ]"#); } + + #[test] + fn test_infer_span_op_default() { + let span = Annotated::from_json(r#"{}"#).unwrap(); + let defaults: SpanOpDefaults = serde_json::from_value(json!({ + "rules": [{ + "condition": { + "op": "not", + "inner": { + "op": "eq", + "name": "span.data.messaging\\.system", + "value": null, + }, + }, + "value": "message" + }] + } + )) + .unwrap(); + let op = defaults.borrow().infer(span.value().unwrap()); + assert_eq!(&op, "default"); + } + + #[test] + fn test_infer_span_op_messaging() { + let span = Annotated::from_json( + r#"{ + "data": { + "messaging.system": "activemq" + } + }"#, + ) + .unwrap(); + let defaults: SpanOpDefaults = serde_json::from_value(json!({ + "rules": [{ + "condition": { + "op": "not", + "inner": { + "op": "eq", + "name": "span.data.messaging\\.system", + "value": null, + }, + }, + "value": "message" + }] + } + )) + .unwrap(); + let op = defaults.borrow().infer(span.value().unwrap()); + assert_eq!(&op, "message"); + } } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 5c88311a6e..1d4c93f17f 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1603,6 +1603,7 @@ impl EnvelopeProcessorService { } else { ScrubMongoDescription::Disabled }, + span_op_defaults: global_config.span_op_defaults.borrow(), }; metric!(timer(RelayTimers::EventProcessingNormalization), { diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index fe6d634e1a..b529878c8e 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -16,8 +16,9 @@ use relay_event_normalization::{ TransactionsProcessor, }; use relay_event_normalization::{ - normalize_transaction_name, ClientHints, FromUserAgentInfo, GeoIpLookup, ModelCosts, - SchemaProcessor, TimestampProcessor, TransactionNameRule, TrimmingProcessor, + normalize_transaction_name, BorrowedSpanOpDefaults, ClientHints, FromUserAgentInfo, + GeoIpLookup, ModelCosts, SchemaProcessor, TimestampProcessor, TransactionNameRule, + TrimmingProcessor, }; use relay_event_schema::processor::{process_value, ProcessingState}; use relay_event_schema::protocol::{ @@ -410,6 +411,7 @@ struct NormalizeSpanConfig<'a> { client_ip: Option, /// An initialized GeoIP lookup. geo_lookup: Option<&'a GeoIpLookup>, + span_op_defaults: BorrowedSpanOpDefaults<'a>, } impl<'a> NormalizeSpanConfig<'a> { @@ -459,6 +461,7 @@ impl<'a> NormalizeSpanConfig<'a> { }, client_ip, geo_lookup, + span_op_defaults: global_config.span_op_defaults.borrow(), } } } @@ -514,6 +517,7 @@ fn normalize( scrub_mongo_description, client_ip, geo_lookup, + span_op_defaults, } = config; set_segment_attributes(annotated_span); @@ -537,7 +541,7 @@ fn normalize( } process_value( annotated_span, - &mut TransactionsProcessor::new(Default::default()), + &mut TransactionsProcessor::new(Default::default(), span_op_defaults), ProcessingState::root(), )?; @@ -1176,6 +1180,7 @@ mod tests { scrub_mongo_description: ScrubMongoDescription::Disabled, client_ip: Some(IpAddr("2.125.160.216".to_owned())), geo_lookup: Some(&GEO_LOOKUP), + span_op_defaults: Default::default(), } }