diff --git a/router/src/server.rs b/router/src/server.rs index 840bfb353..1fed64a7f 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -528,13 +528,15 @@ async fn generate( let inference_time = Instant::now() - response.start; let time_per_token = inference_time / response.generated_text.generated_tokens; - // Tracing metadata + // Rust Tracing metadata span.record("total_time", format!("{total_time:?}")); span.record("validation_time", format!("{validation_time:?}")); span.record("queue_time", format!("{queue_time:?}")); span.record("inference_time", format!("{inference_time:?}")); span.record("time_per_token", format!("{time_per_token:?}")); span.record("seed", format!("{:?}", response.generated_text.seed)); + span.record("prompt_tokens", format!("{prompt_tokens:?}")); + span.record("generated_tokens", format!("{generated_tokens:?}")); // Headers let mut headers = HeaderMap::new(); @@ -849,6 +851,8 @@ async fn generate_stream_with_callback( span.record("inference_time", format!("{inference_time:?}")); span.record("time_per_token", format!("{time_per_token:?}")); span.record("seed", format!("{:?}", generated_text.seed)); + span.record("prompt_tokens", format!("{prefill_tokens_length:?}")); + span.record("generated_tokens", format!("{:?}", generated_text.generated_tokens)); // Metrics metrics::increment_counter!("lorax_request_success");