From 3082d7f1eebaef16420d689d0162bf82a837005c Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Sat, 23 Dec 2023 14:12:26 -0800 Subject: [PATCH 1/9] WIP: completions api --- router/src/server.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/router/src/server.rs b/router/src/server.rs index ea06689f1..02cf0e7e1 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -77,6 +77,53 @@ async fn compat_generate( } } +/// Generate tokens if `stream == false` or a stream of token if `stream == true` +#[utoipa::path( +post, +tag = "LoRAX", +path = "/v1/completions", +request_body = CompatGenerateRequest, +responses( +(status = 200, description = "Generated Text", +content( +("application/json" = GenerateResponse), +("text/event-stream" = StreamResponse), +)), +(status = 424, description = "Generation Error", body = ErrorResponse, +example = json ! ({"error": "Request failed during generation"})), +(status = 429, description = "Model is overloaded", body = ErrorResponse, +example = json ! ({"error": "Model is overloaded"})), +(status = 422, description = "Input validation error", body = ErrorResponse, +example = json ! ({"error": "Input validation error"})), +(status = 500, description = "Incomplete generation", body = ErrorResponse, +example = json ! ({"error": "Incomplete generation"})), +) +)] +#[instrument(skip(infer, req))] +async fn completions_v1( + default_return_full_text: Extension, + infer: Extension, + req: Json, +) -> Result)> { + let mut req = req.0; + + // default return_full_text given the pipeline_tag + if req.parameters.return_full_text.is_none() { + req.parameters.return_full_text = Some(default_return_full_text.0) + } + + // switch on stream + if req.stream { + Ok(generate_stream(infer, Json(req.into())) + .await + .into_response()) + } else { + let (headers, generation) = generate(infer, Json(req.into())).await?; + // wrap generation inside a Vec to match api-inference + Ok((headers, Json(vec![generation.0])).into_response()) + } +} + /// LoRAX endpoint info #[utoipa::path( get, @@ -686,6 +733,7 @@ pub async fn run( .route("/info", get(get_model_info)) .route("/generate", post(generate)) .route("/generate_stream", post(generate_stream)) + .route("/v1/completions", post(completions_v1)) // AWS Sagemaker route .route("/invocations", post(compat_generate)) // Base Health route From 7c2ad5861abf16d168e2f9ec4a1380de2a58c082 Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Mon, 8 Jan 2024 22:19:11 -0800 Subject: [PATCH 2/9] Added structs --- router/src/lib.rs | 116 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/router/src/lib.rs b/router/src/lib.rs index 6d1d987d5..bed442fe4 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -308,6 +308,122 @@ pub(crate) struct ErrorResponse { pub error_type: String, } +// OpenAI compatible structs + +#[derive(Serialize, ToSchema)] +struct UsageInfo { + prompt_tokens: u32, + total_tokens: u32, + completion_tokens: Option, +} + +#[derive(Serialize, ToSchema)] +struct ChatCompletionRequest { + model: String, + messages: Vec, // Assuming Vec for Union + temperature: Option, + top_p: Option, + n: Option, + max_tokens: Option, + stop: Vec, // Assuming Vec for Union + stream: Option, + presence_penalty: Option, + frequency_penalty: Option, + logit_bias: Option>, + user: Option, + // Additional parameters + // TODO(travis): add other LoRAX params here +} + +#[derive(Serialize, ToSchema)] +struct CompletionRequest { + model: String, + prompt: Vec, // Assuming Vec for Union + suffix: Option, + max_tokens: Option, + temperature: Option, + top_p: Option, + n: Option, + stream: Option, + logprobs: Option, + echo: Option, + stop: Vec, // Assuming Vec for Union + presence_penalty: Option, + frequency_penalty: Option, + best_of: Option, + logit_bias: Option>, + user: Option, + // Additional parameters + // TODO(travis): add other LoRAX params here +} + +#[derive(Serialize, ToSchema)] +struct LogProbs { + text_offset: Vec, + token_logprobs: Vec>, + tokens: Vec, + top_logprobs: Option>>>, +} + +#[derive(Serialize, ToSchema)] +struct CompletionResponseChoice { + index: i32, + text: String, + logprobs: Option, + finish_reason: Option, // Literal replaced with String +} + +#[derive(Serialize, ToSchema)] +struct CompletionResponse { + id: String, + object: String, + created: i64, + model: String, + choices: Vec, + usage: UsageInfo, +} + +#[derive(Serialize, ToSchema)] +struct CompletionResponseStreamChoice { + index: i32, + text: String, + logprobs: Option, + finish_reason: Option, // Literal replaced with String +} + +#[derive(Serialize, ToSchema)] +struct CompletionStreamResponse { + id: String, + object: String, + created: i64, + model: String, + choices: Vec, + usage: Option, +} + +#[derive(Serialize, ToSchema)] +struct ChatMessage { + role: String, + content: String, +} + +#[derive(Serialize, ToSchema)] +struct ChatCompletionResponseChoice { + index: i32, + message: ChatMessage, + finish_reason: Option, // Literal replaced with String +} + +#[derive(Serialize, ToSchema)] +struct ChatCompletionResponse { + id: String, + object: String, + created: i64, + model: String, + choices: Vec, + usage: UsageInfo, +} + #[cfg(test)] mod tests { use std::io::Write; From 12a2aca9b44cc036f9f97b9ba023f665d643abd3 Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Tue, 9 Jan 2024 09:25:53 -0800 Subject: [PATCH 3/9] WIP: convert request and response --- router/src/lib.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++ router/src/server.rs | 23 +++++++++--------- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index bed442fe4..e38657757 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -424,6 +424,62 @@ struct ChatCompletionResponse { usage: UsageInfo, } +impl From for CompatGenerateRequest { + fn from(req: CompletionRequest) -> Self { + CompatGenerateRequest { + // TODO(travis): support multiple inputs per request + inputs: req.prompt.join(" "), + parameters: GenerateParameters { + adapter_id: req.model.parse().ok(), + adapter_source: None, + api_token: None, + best_of: req.best_of.map(|x| x as usize), + temperature: req.temperature, + repetition_penalty: None, + top_k: None, + top_p: req.top_p, + typical_p: None, + do_sample: req.n.is_none(), + max_new_tokens: req.max_tokens.map(|x| x as u32).unwrap_or(default_max_new_tokens()), + return_full_text: req.echo, + stop: req.stop, + truncate: None, + watermark: false, + details: req.logprobs.is_some(), + decoder_input_details: req.logprobs.is_some(), + seed: None, + }, + stream: req.stream.unwrap_or(false), + } + } +} + +impl From for CompletionResponse { + fn from(resp: GenerateResponse) -> Self { + let prompt_tokens = resp.details.as_ref().map(|x| x.prompt_tokens).unwrap_or(0); + let completion_tokens = resp.details.as_ref().map(|x| x.generated_tokens).unwrap_or(0); + let total_tokens = prompt_tokens + completion_tokens; + + CompletionResponse { + id: "null".to_string(), + object: "text_completion".to_string(), + created: 0, + model: "null".to_string(), + choices: vec![CompletionResponseChoice { + index: 0, + text: resp.generated_text, + logprobs: None, + finish_reason: None, + }], + usage: UsageInfo { + prompt_tokens: prompt_tokens, + total_tokens: total_tokens, + completion_tokens: Some(completion_tokens), + }, + } + } +} + #[cfg(test)] mod tests { use std::io::Write; diff --git a/router/src/server.rs b/router/src/server.rs index 8424f36a1..045f8b453 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -5,7 +5,7 @@ use crate::validation::ValidationError; use crate::{ BestOfSequence, CompatGenerateRequest, Details, ErrorResponse, FinishReason, GenerateParameters, GenerateRequest, GenerateResponse, HubModelInfo, Infer, Info, PrefillToken, - StreamDetails, StreamResponse, Token, Validation, + StreamDetails, StreamResponse, Token, Validation, CompletionRequest, }; use axum::extract::Extension; use axum::http::{HeaderMap, Method, StatusCode}; @@ -82,12 +82,12 @@ async fn compat_generate( post, tag = "LoRAX", path = "/v1/completions", -request_body = CompatGenerateRequest, +request_body = CompletionRequest, responses( (status = 200, description = "Generated Text", content( -("application/json" = GenerateResponse), -("text/event-stream" = StreamResponse), +("application/json" = CompletionResponse), +("text/event-stream" = CompletionStreamResponse), )), (status = 424, description = "Generation Error", body = ErrorResponse, example = json ! ({"error": "Request failed during generation"})), @@ -103,22 +103,23 @@ example = json ! ({"error": "Incomplete generation"})), async fn completions_v1( default_return_full_text: Extension, infer: Extension, - req: Json, + req: Json, ) -> Result)> { - let mut req = req.0; + let req = req.0; + let mut gen_req = CompatGenerateRequest::from(req); // default return_full_text given the pipeline_tag - if req.parameters.return_full_text.is_none() { - req.parameters.return_full_text = Some(default_return_full_text.0) + if gen_req.parameters.return_full_text.is_none() { + gen_req.parameters.return_full_text = Some(default_return_full_text.0) } // switch on stream - if req.stream { - Ok(generate_stream(infer, Json(req.into())) + if gen_req.stream { + Ok(generate_stream(infer, Json(gen_req.into())) .await .into_response()) } else { - let (headers, generation) = generate(infer, Json(req.into())).await?; + let (headers, generation) = generate(infer, Json(gen_req.into())).await?; // wrap generation inside a Vec to match api-inference Ok((headers, Json(vec![generation.0])).into_response()) } From 274ecc34f20bf03f139ac3cd8ddf8d9766e42e45 Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Tue, 9 Jan 2024 13:06:44 -0800 Subject: [PATCH 4/9] Fixed synchronous completions --- router/src/lib.rs | 19 ++++++++++--------- router/src/server.rs | 15 ++++++++------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index e38657757..a53c6f2ec 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -317,15 +317,16 @@ struct UsageInfo { completion_tokens: Option, } -#[derive(Serialize, ToSchema)] +#[derive(Clone, Debug, Deserialize, ToSchema)] struct ChatCompletionRequest { model: String, - messages: Vec, // Assuming Vec for Union + messages: Vec, temperature: Option, top_p: Option, n: Option, max_tokens: Option, - stop: Vec, // Assuming Vec for Union + #[serde(default)] + stop: Vec, stream: Option, presence_penalty: Option, frequency_penalty: Option, @@ -335,10 +336,10 @@ struct ChatCompletionRequest { // TODO(travis): add other LoRAX params here } -#[derive(Serialize, ToSchema)] +#[derive(Clone, Debug, Deserialize, ToSchema)] struct CompletionRequest { model: String, - prompt: Vec, // Assuming Vec for Union + prompt: String, suffix: Option, max_tokens: Option, temperature: Option, @@ -347,7 +348,8 @@ struct CompletionRequest { stream: Option, logprobs: Option, echo: Option, - stop: Vec, // Assuming Vec for Union + #[serde(default)] + stop: Vec, presence_penalty: Option, frequency_penalty: Option, best_of: Option, @@ -427,8 +429,7 @@ struct ChatCompletionResponse { impl From for CompatGenerateRequest { fn from(req: CompletionRequest) -> Self { CompatGenerateRequest { - // TODO(travis): support multiple inputs per request - inputs: req.prompt.join(" "), + inputs: req.prompt, parameters: GenerateParameters { adapter_id: req.model.parse().ok(), adapter_source: None, @@ -445,7 +446,7 @@ impl From for CompatGenerateRequest { stop: req.stop, truncate: None, watermark: false, - details: req.logprobs.is_some(), + details: true, decoder_input_details: req.logprobs.is_some(), seed: None, }, diff --git a/router/src/server.rs b/router/src/server.rs index 045f8b453..2bafd3d50 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -5,7 +5,7 @@ use crate::validation::ValidationError; use crate::{ BestOfSequence, CompatGenerateRequest, Details, ErrorResponse, FinishReason, GenerateParameters, GenerateRequest, GenerateResponse, HubModelInfo, Infer, Info, PrefillToken, - StreamDetails, StreamResponse, Token, Validation, CompletionRequest, + StreamDetails, StreamResponse, Token, Validation, CompletionRequest, CompletionResponse, }; use axum::extract::Extension; use axum::http::{HeaderMap, Method, StatusCode}; @@ -114,14 +114,15 @@ async fn completions_v1( } // switch on stream - if gen_req.stream { - Ok(generate_stream(infer, Json(gen_req.into())) - .await - .into_response()) - } else { + // if gen_req.stream { + // // let (headers, generation_stream) = generate_stream(infer, Json(gen_req.into())).await; + // // let sse = Sse::new(generation_stream).keep_alive(KeepAlive::default()); + // // Ok((headers, sse).into_response()) + // } else + { let (headers, generation) = generate(infer, Json(gen_req.into())).await?; // wrap generation inside a Vec to match api-inference - Ok((headers, Json(vec![generation.0])).into_response()) + Ok((headers, Json(vec![CompletionResponse::from(generation.0)])).into_response()) } } From 9796f3157a7cc3e1c292e722f00ee02232f8f752 Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Tue, 9 Jan 2024 13:15:41 -0800 Subject: [PATCH 5/9] Fixed sampling --- router/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index a53c6f2ec..2d2bdce2b 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -440,7 +440,7 @@ impl From for CompatGenerateRequest { top_k: None, top_p: req.top_p, typical_p: None, - do_sample: req.n.is_none(), + do_sample: !req.n.is_none(), max_new_tokens: req.max_tokens.map(|x| x as u32).unwrap_or(default_max_new_tokens()), return_full_text: req.echo, stop: req.stop, From 693d89a9eedd1b2a27e645e057e9023913adf296 Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Tue, 9 Jan 2024 13:23:07 -0800 Subject: [PATCH 6/9] Use empty adapter_id as base model --- router/src/infer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/src/infer.rs b/router/src/infer.rs index d46ab0471..b8b4b656c 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -118,7 +118,7 @@ impl Infer { })?; let mut adapter_id = request.parameters.adapter_id.clone(); - if adapter_id.is_none() { + if adapter_id.is_none() || adapter_id.as_ref().unwrap().is_empty() { adapter_id = Some(BASE_MODEL_ADAPTER_ID.to_string()); } let mut adapter_source = request.parameters.adapter_source.clone(); From 71db3c2dcaa7443116c824f25e2819751deef3df Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Tue, 9 Jan 2024 14:05:17 -0800 Subject: [PATCH 7/9] Streaming --- router/src/lib.rs | 26 ++++++++++++++++++++++++++ router/src/server.rs | 43 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index 2d2bdce2b..e8350bbd0 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -481,6 +481,32 @@ impl From for CompletionResponse { } } +impl From for CompletionStreamResponse { + fn from(resp: StreamResponse) -> Self { + let prompt_tokens = resp.details.as_ref().map(|x| x.prompt_tokens).unwrap_or(0); + let completion_tokens = resp.details.as_ref().map(|x| x.generated_tokens).unwrap_or(0); + let total_tokens = prompt_tokens + completion_tokens; + + CompletionStreamResponse { + id: "null".to_string(), + object: "text_completion".to_string(), + created: 0, + model: "null".to_string(), + choices: vec![CompletionResponseStreamChoice { + index: 0, + text: resp.generated_text.unwrap_or_default(), + logprobs: None, + finish_reason: None, + }], + usage: Some(UsageInfo { + prompt_tokens: prompt_tokens, + total_tokens: total_tokens, + completion_tokens: Some(completion_tokens), + }), + } + } +} + #[cfg(test)] mod tests { use std::io::Write; diff --git a/router/src/server.rs b/router/src/server.rs index 2bafd3d50..2a8d89cc0 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -5,7 +5,7 @@ use crate::validation::ValidationError; use crate::{ BestOfSequence, CompatGenerateRequest, Details, ErrorResponse, FinishReason, GenerateParameters, GenerateRequest, GenerateResponse, HubModelInfo, Infer, Info, PrefillToken, - StreamDetails, StreamResponse, Token, Validation, CompletionRequest, CompletionResponse, + StreamDetails, StreamResponse, Token, Validation, CompletionRequest, CompletionResponse, CompletionStreamResponse, }; use axum::extract::Extension; use axum::http::{HeaderMap, Method, StatusCode}; @@ -114,12 +114,22 @@ async fn completions_v1( } // switch on stream - // if gen_req.stream { - // // let (headers, generation_stream) = generate_stream(infer, Json(gen_req.into())).await; - // // let sse = Sse::new(generation_stream).keep_alive(KeepAlive::default()); - // // Ok((headers, sse).into_response()) - // } else - { + if gen_req.stream { + let callback = move |resp: StreamResponse| { + Event::default() + .json_data(CompletionStreamResponse::from(resp)) + .map_or_else( + |err| { + println!("Failed to serialize CompletionStreamResponse: {:?}", err); + Event::default() + }, + |data| data, + ) + }; + + let (headers, stream) = generate_stream_with_callback(infer, Json(gen_req.into()), callback).await; + Ok((headers, Sse::new(stream).keep_alive(KeepAlive::default())).into_response()) + } else { let (headers, generation) = generate(infer, Json(gen_req.into())).await?; // wrap generation inside a Vec to match api-inference Ok((headers, Json(vec![CompletionResponse::from(generation.0)])).into_response()) @@ -399,6 +409,21 @@ async fn generate_stream( ) -> ( HeaderMap, Sse>>, +) { + let callback = |resp: StreamResponse| { + Event::default().json_data(resp).unwrap() + }; + let (headers, stream) = generate_stream_with_callback(infer, req, callback).await; + (headers, Sse::new(stream).keep_alive(KeepAlive::default())) +} + +async fn generate_stream_with_callback( + infer: Extension, + req: Json, + callback: impl Fn(StreamResponse) -> Event, +) -> ( + HeaderMap, + impl Stream>, ) { let span = tracing::Span::current(); let start_time = Instant::now(); @@ -528,7 +553,7 @@ async fn generate_stream( details }; - yield Ok(Event::default().json_data(stream_token).unwrap()); + yield Ok(callback(stream_token)); break; } } @@ -559,7 +584,7 @@ async fn generate_stream( } }; - (headers, Sse::new(stream).keep_alive(KeepAlive::default())) + (headers, stream) } /// Prometheus metrics scrape endpoint From 1d9494bb48b050d0f415de19d64b7a121f193f0e Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Tue, 9 Jan 2024 15:08:23 -0800 Subject: [PATCH 8/9] fmt --- router/src/lib.rs | 17 ++++++++++++++--- router/src/server.rs | 19 ++++++++----------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index e8350bbd0..ce5003bb6 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -441,7 +441,10 @@ impl From for CompatGenerateRequest { top_p: req.top_p, typical_p: None, do_sample: !req.n.is_none(), - max_new_tokens: req.max_tokens.map(|x| x as u32).unwrap_or(default_max_new_tokens()), + max_new_tokens: req + .max_tokens + .map(|x| x as u32) + .unwrap_or(default_max_new_tokens()), return_full_text: req.echo, stop: req.stop, truncate: None, @@ -458,7 +461,11 @@ impl From for CompatGenerateRequest { impl From for CompletionResponse { fn from(resp: GenerateResponse) -> Self { let prompt_tokens = resp.details.as_ref().map(|x| x.prompt_tokens).unwrap_or(0); - let completion_tokens = resp.details.as_ref().map(|x| x.generated_tokens).unwrap_or(0); + let completion_tokens = resp + .details + .as_ref() + .map(|x| x.generated_tokens) + .unwrap_or(0); let total_tokens = prompt_tokens + completion_tokens; CompletionResponse { @@ -484,7 +491,11 @@ impl From for CompletionResponse { impl From for CompletionStreamResponse { fn from(resp: StreamResponse) -> Self { let prompt_tokens = resp.details.as_ref().map(|x| x.prompt_tokens).unwrap_or(0); - let completion_tokens = resp.details.as_ref().map(|x| x.generated_tokens).unwrap_or(0); + let completion_tokens = resp + .details + .as_ref() + .map(|x| x.generated_tokens) + .unwrap_or(0); let total_tokens = prompt_tokens + completion_tokens; CompletionStreamResponse { diff --git a/router/src/server.rs b/router/src/server.rs index 2a8d89cc0..278bab6fe 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -3,9 +3,10 @@ use crate::health::Health; use crate::infer::{InferError, InferResponse, InferStreamResponse}; use crate::validation::ValidationError; use crate::{ - BestOfSequence, CompatGenerateRequest, Details, ErrorResponse, FinishReason, - GenerateParameters, GenerateRequest, GenerateResponse, HubModelInfo, Infer, Info, PrefillToken, - StreamDetails, StreamResponse, Token, Validation, CompletionRequest, CompletionResponse, CompletionStreamResponse, + BestOfSequence, CompatGenerateRequest, CompletionRequest, CompletionResponse, + CompletionStreamResponse, Details, ErrorResponse, FinishReason, GenerateParameters, + GenerateRequest, GenerateResponse, HubModelInfo, Infer, Info, PrefillToken, StreamDetails, + StreamResponse, Token, Validation, }; use axum::extract::Extension; use axum::http::{HeaderMap, Method, StatusCode}; @@ -127,7 +128,8 @@ async fn completions_v1( ) }; - let (headers, stream) = generate_stream_with_callback(infer, Json(gen_req.into()), callback).await; + let (headers, stream) = + generate_stream_with_callback(infer, Json(gen_req.into()), callback).await; Ok((headers, Sse::new(stream).keep_alive(KeepAlive::default())).into_response()) } else { let (headers, generation) = generate(infer, Json(gen_req.into())).await?; @@ -410,9 +412,7 @@ async fn generate_stream( HeaderMap, Sse>>, ) { - let callback = |resp: StreamResponse| { - Event::default().json_data(resp).unwrap() - }; + let callback = |resp: StreamResponse| Event::default().json_data(resp).unwrap(); let (headers, stream) = generate_stream_with_callback(infer, req, callback).await; (headers, Sse::new(stream).keep_alive(KeepAlive::default())) } @@ -421,10 +421,7 @@ async fn generate_stream_with_callback( infer: Extension, req: Json, callback: impl Fn(StreamResponse) -> Event, -) -> ( - HeaderMap, - impl Stream>, -) { +) -> (HeaderMap, impl Stream>) { let span = tracing::Span::current(); let start_time = Instant::now(); metrics::increment_counter!("lorax_request_count"); From 579e071ea3e9309c766c63e7f141d8a4b417627d Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Tue, 9 Jan 2024 15:22:32 -0800 Subject: [PATCH 9/9] Tracing --- router/src/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/src/server.rs b/router/src/server.rs index 278bab6fe..41f54d4c9 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -121,7 +121,7 @@ async fn completions_v1( .json_data(CompletionStreamResponse::from(resp)) .map_or_else( |err| { - println!("Failed to serialize CompletionStreamResponse: {:?}", err); + tracing::error!("Failed to serialize CompletionStreamResponse: {err}"); Event::default() }, |data| data,