Skip to content

Commit 1ef9619

Browse files
restrict generic flattening for otel data
1 parent 8a7dafd commit 1ef9619

File tree

6 files changed

+58
-20
lines changed

6 files changed

+58
-20
lines changed

src/event/format/json.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,17 @@ impl EventFormat for Event {
5252
static_schema_flag: Option<&String>,
5353
time_partition: Option<&String>,
5454
schema_version: SchemaVersion,
55+
log_source: &str,
5556
) -> Result<(Self::Data, Vec<Arc<Field>>, bool, Tags, Metadata), anyhow::Error> {
56-
let data = flatten_json_body(self.data, None, None, None, schema_version, false)?;
57+
let data = flatten_json_body(
58+
self.data,
59+
None,
60+
None,
61+
None,
62+
schema_version,
63+
false,
64+
log_source,
65+
)?;
5766
let stream_schema = schema;
5867

5968
// incoming event may be a single json or a json array

src/event/format/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ pub trait EventFormat: Sized {
5454
static_schema_flag: Option<&String>,
5555
time_partition: Option<&String>,
5656
schema_version: SchemaVersion,
57+
log_source: &str,
5758
) -> Result<(Self::Data, EventSchema, bool, Tags, Metadata), AnyError>;
5859

5960
fn decode(data: Self::Data, schema: Arc<Schema>) -> Result<RecordBatch, AnyError>;
@@ -64,12 +65,14 @@ pub trait EventFormat: Sized {
6465
static_schema_flag: Option<&String>,
6566
time_partition: Option<&String>,
6667
schema_version: SchemaVersion,
68+
log_source: &str,
6769
) -> Result<(RecordBatch, bool), AnyError> {
6870
let (data, mut schema, is_first, tags, metadata) = self.to_data(
6971
storage_schema,
7072
static_schema_flag,
7173
time_partition,
7274
schema_version,
75+
log_source,
7376
)?;
7477

7578
// DEFAULT_TAGS_KEY, DEFAULT_METADATA_KEY and DEFAULT_TIMESTAMP_KEY are reserved field names

src/handlers/http/ingest.rs

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ pub async fn ingest_internal_stream(stream_name: String, body: Bytes) -> Result<
9595
metadata: String::default(),
9696
};
9797
// For internal streams, use old schema
98-
event.into_recordbatch(&schema, None, None, SchemaVersion::V0)?
98+
event.into_recordbatch(&schema, None, None, SchemaVersion::V0, "")?
9999
};
100100
event::Event {
101101
rb,
@@ -127,7 +127,8 @@ pub async fn handle_otel_logs_ingestion(
127127
let Some(log_source) = req.headers().get(LOG_SOURCE_KEY) else {
128128
return Err(PostError::Header(ParseHeaderError::MissingLogSource));
129129
};
130-
if log_source.to_str().unwrap() != LOG_SOURCE_OTEL_LOGS {
130+
let log_source = log_source.to_str().unwrap();
131+
if log_source != LOG_SOURCE_OTEL_LOGS {
131132
return Err(PostError::Invalid(anyhow::anyhow!(
132133
"Please use x-p-log-source: otel-logs for ingesting otel logs"
133134
)));
@@ -141,7 +142,7 @@ pub async fn handle_otel_logs_ingestion(
141142
let mut json = flatten_otel_logs(&logs);
142143
for record in json.iter_mut() {
143144
let body: Bytes = serde_json::to_vec(record).unwrap().into();
144-
push_logs(&stream_name, &req, &body).await?;
145+
push_logs(&stream_name, &req, &body, log_source).await?;
145146
}
146147

147148
Ok(HttpResponse::Ok().finish())
@@ -160,7 +161,8 @@ pub async fn handle_otel_metrics_ingestion(
160161
let Some(log_source) = req.headers().get(LOG_SOURCE_KEY) else {
161162
return Err(PostError::Header(ParseHeaderError::MissingLogSource));
162163
};
163-
if log_source.to_str().unwrap() != LOG_SOURCE_OTEL_METRICS {
164+
let log_source = log_source.to_str().unwrap();
165+
if log_source != LOG_SOURCE_OTEL_METRICS {
164166
return Err(PostError::Invalid(anyhow::anyhow!(
165167
"Please use x-p-log-source: otel-metrics for ingesting otel metrics"
166168
)));
@@ -173,7 +175,7 @@ pub async fn handle_otel_metrics_ingestion(
173175
let mut json = flatten_otel_metrics(metrics);
174176
for record in json.iter_mut() {
175177
let body: Bytes = serde_json::to_vec(record).unwrap().into();
176-
push_logs(&stream_name, &req, &body).await?;
178+
push_logs(&stream_name, &req, &body, log_source).await?;
177179
}
178180

179181
Ok(HttpResponse::Ok().finish())
@@ -193,7 +195,8 @@ pub async fn handle_otel_traces_ingestion(
193195
let Some(log_source) = req.headers().get(LOG_SOURCE_KEY) else {
194196
return Err(PostError::Header(ParseHeaderError::MissingLogSource));
195197
};
196-
if log_source.to_str().unwrap() != LOG_SOURCE_OTEL_TRACES {
198+
let log_source = log_source.to_str().unwrap();
199+
if log_source != LOG_SOURCE_OTEL_TRACES {
197200
return Err(PostError::Invalid(anyhow::anyhow!(
198201
"Please use x-p-log-source: otel-traces for ingesting otel traces"
199202
)));
@@ -206,7 +209,7 @@ pub async fn handle_otel_traces_ingestion(
206209
let mut json = flatten_otel_traces(&traces);
207210
for record in json.iter_mut() {
208211
let body: Bytes = serde_json::to_vec(record).unwrap().into();
209-
push_logs(&stream_name, &req, &body).await?;
212+
push_logs(&stream_name, &req, &body, log_source).await?;
210213
}
211214

212215
Ok(HttpResponse::Ok().finish())
@@ -417,6 +420,7 @@ mod tests {
417420
None,
418421
None,
419422
SchemaVersion::V0,
423+
"",
420424
)
421425
.unwrap();
422426

@@ -467,6 +471,7 @@ mod tests {
467471
None,
468472
None,
469473
SchemaVersion::V0,
474+
"",
470475
)
471476
.unwrap();
472477

@@ -500,7 +505,8 @@ mod tests {
500505

501506
let req = TestRequest::default().to_http_request();
502507

503-
let (rb, _) = into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0).unwrap();
508+
let (rb, _) =
509+
into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0, "").unwrap();
504510

505511
assert_eq!(rb.num_rows(), 1);
506512
assert_eq!(rb.num_columns(), 5);
@@ -532,7 +538,7 @@ mod tests {
532538

533539
let req = TestRequest::default().to_http_request();
534540

535-
assert!(into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0).is_err());
541+
assert!(into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0, "").is_err());
536542
}
537543

538544
#[test]
@@ -550,7 +556,8 @@ mod tests {
550556

551557
let req = TestRequest::default().to_http_request();
552558

553-
let (rb, _) = into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0).unwrap();
559+
let (rb, _) =
560+
into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0, "").unwrap();
554561

555562
assert_eq!(rb.num_rows(), 1);
556563
assert_eq!(rb.num_columns(), 3);
@@ -568,7 +575,8 @@ mod tests {
568575
HashMap::default(),
569576
None,
570577
None,
571-
SchemaVersion::V0
578+
SchemaVersion::V0,
579+
""
572580
)
573581
.is_err())
574582
}
@@ -600,6 +608,7 @@ mod tests {
600608
None,
601609
None,
602610
SchemaVersion::V0,
611+
"",
603612
)
604613
.unwrap();
605614

@@ -656,6 +665,7 @@ mod tests {
656665
None,
657666
None,
658667
SchemaVersion::V0,
668+
"",
659669
)
660670
.unwrap();
661671

@@ -705,7 +715,8 @@ mod tests {
705715
);
706716
let req = TestRequest::default().to_http_request();
707717

708-
let (rb, _) = into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0).unwrap();
718+
let (rb, _) =
719+
into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0, "").unwrap();
709720

710721
assert_eq!(rb.num_rows(), 3);
711722
assert_eq!(rb.num_columns(), 6);
@@ -754,7 +765,7 @@ mod tests {
754765
.into_iter(),
755766
);
756767

757-
assert!(into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0).is_err());
768+
assert!(into_event_batch(&req, &json, schema, None, None, SchemaVersion::V0, "").is_err());
758769
}
759770

760771
#[test]
@@ -789,6 +800,7 @@ mod tests {
789800
None,
790801
None,
791802
SchemaVersion::V0,
803+
"",
792804
)
793805
.unwrap();
794806

@@ -869,6 +881,7 @@ mod tests {
869881
None,
870882
None,
871883
SchemaVersion::V1,
884+
"",
872885
)
873886
.unwrap();
874887

src/handlers/http/modal/utils/ingest_utils.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,23 +45,23 @@ pub async fn flatten_and_push_logs(
4545
stream_name: &str,
4646
) -> Result<(), PostError> {
4747
let Some(log_source) = req.headers().get(LOG_SOURCE_KEY) else {
48-
push_logs(stream_name, &req, &body).await?;
48+
push_logs(stream_name, &req, &body, "").await?;
4949
return Ok(());
5050
};
5151
let log_source = log_source.to_str().unwrap();
5252
if log_source == LOG_SOURCE_KINESIS {
5353
let json = kinesis::flatten_kinesis_logs(&body);
5454
for record in json.iter() {
5555
let body: Bytes = serde_json::to_vec(record).unwrap().into();
56-
push_logs(stream_name, &req, &body).await?;
56+
push_logs(stream_name, &req, &body, "").await?;
5757
}
5858
} else if log_source.contains("otel") {
5959
return Err(PostError::Invalid(anyhow!(
6060
"Please use endpoints `/v1/logs` for otel logs, `/v1/metrics` for otel metrics and `/v1/traces` for otel traces"
6161
)));
6262
} else {
6363
tracing::warn!("Unknown log source: {}", log_source);
64-
push_logs(stream_name, &req, &body).await?;
64+
push_logs(stream_name, &req, &body, "").await?;
6565
}
6666

6767
Ok(())
@@ -71,6 +71,7 @@ pub async fn push_logs(
7171
stream_name: &str,
7272
req: &HttpRequest,
7373
body: &Bytes,
74+
log_source: &str,
7475
) -> Result<(), PostError> {
7576
let time_partition = STREAM_INFO.get_time_partition(stream_name)?;
7677
let time_partition_limit = STREAM_INFO.get_time_partition_limit(stream_name)?;
@@ -84,6 +85,7 @@ pub async fn push_logs(
8485
time_partition_limit,
8586
custom_partition.as_ref(),
8687
schema_version,
88+
log_source,
8789
)?;
8890

8991
for value in data {
@@ -113,6 +115,7 @@ pub async fn push_logs(
113115
static_schema_flag.as_ref(),
114116
time_partition.as_ref(),
115117
schema_version,
118+
log_source,
116119
)?;
117120

118121
Event {
@@ -140,6 +143,7 @@ pub fn into_event_batch(
140143
static_schema_flag: Option<&String>,
141144
time_partition: Option<&String>,
142145
schema_version: SchemaVersion,
146+
log_source: &str,
143147
) -> Result<(arrow_array::RecordBatch, bool), PostError> {
144148
let tags = collect_labelled_headers(req, PREFIX_TAGS, SEPARATOR)?;
145149
let metadata = collect_labelled_headers(req, PREFIX_META, SEPARATOR)?;
@@ -148,8 +152,13 @@ pub fn into_event_batch(
148152
tags,
149153
metadata,
150154
};
151-
let (rb, is_first) =
152-
event.into_recordbatch(&schema, static_schema_flag, time_partition, schema_version)?;
155+
let (rb, is_first) = event.into_recordbatch(
156+
&schema,
157+
static_schema_flag,
158+
time_partition,
159+
schema_version,
160+
log_source,
161+
)?;
153162
Ok((rb, is_first))
154163
}
155164

src/kafka.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ async fn ingest_message(msg: BorrowedMessage<'_>) -> Result<(), KafkaError> {
198198
static_schema_flag.as_ref(),
199199
time_partition.as_ref(),
200200
schema_version,
201+
"",
201202
)
202203
.map_err(|err| KafkaError::PostError(PostError::CustomError(err.to_string())))?;
203204

src/utils/json/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ pub fn flatten_json_body(
3232
custom_partition: Option<&String>,
3333
schema_version: SchemaVersion,
3434
validation_required: bool,
35+
log_source: &str,
3536
) -> Result<Value, anyhow::Error> {
36-
let mut nested_value = if schema_version == SchemaVersion::V1 {
37+
let mut nested_value = if schema_version == SchemaVersion::V1 && !log_source.contains("otel") {
3738
flatten::generic_flattening(body)?
3839
} else {
3940
body
@@ -57,6 +58,7 @@ pub fn convert_array_to_object(
5758
time_partition_limit: Option<NonZeroU32>,
5859
custom_partition: Option<&String>,
5960
schema_version: SchemaVersion,
61+
log_source: &str,
6062
) -> Result<Vec<Value>, anyhow::Error> {
6163
let data = flatten_json_body(
6264
body,
@@ -65,6 +67,7 @@ pub fn convert_array_to_object(
6567
custom_partition,
6668
schema_version,
6769
true,
70+
log_source,
6871
)?;
6972
let value_arr = match data {
7073
Value::Array(arr) => arr,

0 commit comments

Comments
 (0)