From aa97a78cc601202721e0c6b59df0ba60eb9cfa7c Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Mon, 12 Aug 2024 11:04:52 -0700 Subject: [PATCH 1/3] include prompt and generated tokens as part of logs --- router/src/server.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/router/src/server.rs b/router/src/server.rs index 840bfb353..0a6c109d1 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -528,13 +528,15 @@ async fn generate( let inference_time = Instant::now() - response.start; let time_per_token = inference_time / response.generated_text.generated_tokens; - // Tracing metadata + // Rust Tracing metadata span.record("total_time", format!("{total_time:?}")); span.record("validation_time", format!("{validation_time:?}")); span.record("queue_time", format!("{queue_time:?}")); span.record("inference_time", format!("{inference_time:?}")); span.record("time_per_token", format!("{time_per_token:?}")); span.record("seed", format!("{:?}", response.generated_text.seed)); + span.record("prompt_tokens", format!("{prompt_tokens:?}")); + span.record("generated_tokens", format!("{generated_tokens:?}")); // Headers let mut headers = HeaderMap::new(); @@ -849,6 +851,8 @@ async fn generate_stream_with_callback( span.record("inference_time", format!("{inference_time:?}")); span.record("time_per_token", format!("{time_per_token:?}")); span.record("seed", format!("{:?}", generated_text.seed)); + span.record("prompt_tokens", format!("{prompt_tokens:?}")); + span.record("generated_tokens", format!("{generated_tokens:?}")); // Metrics metrics::increment_counter!("lorax_request_success"); From cc7a4c31099ef30ff08267044296951757121f1c Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Mon, 12 Aug 2024 11:11:10 -0700 Subject: [PATCH 2/3] fix build --- router/src/server.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index 0a6c109d1..0f5a63dc1 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -851,8 +851,8 @@ async fn generate_stream_with_callback( span.record("inference_time", format!("{inference_time:?}")); span.record("time_per_token", format!("{time_per_token:?}")); span.record("seed", format!("{:?}", generated_text.seed)); - span.record("prompt_tokens", format!("{prompt_tokens:?}")); - span.record("generated_tokens", format!("{generated_tokens:?}")); + span.record("prompt_tokens", format!("{prefill_tokens_length:?}")); + span.record("generated_tokens", format!("{:?}", generated_text.generated_tokens)); // Metrics metrics::increment_counter!("lorax_request_success"); From 0320fb3efb4ad76140bb2c6478a279ce280aa59f Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Mon, 12 Aug 2024 11:16:17 -0700 Subject: [PATCH 3/3] fix format --- router/src/server.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index 0f5a63dc1..1fed64a7f 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -535,8 +535,8 @@ async fn generate( span.record("inference_time", format!("{inference_time:?}")); span.record("time_per_token", format!("{time_per_token:?}")); span.record("seed", format!("{:?}", response.generated_text.seed)); - span.record("prompt_tokens", format!("{prompt_tokens:?}")); - span.record("generated_tokens", format!("{generated_tokens:?}")); + span.record("prompt_tokens", format!("{prompt_tokens:?}")); + span.record("generated_tokens", format!("{generated_tokens:?}")); // Headers let mut headers = HeaderMap::new();