From da3c8bd7dd3d1c50bff94053dfdcf51813e87194 Mon Sep 17 00:00:00 2001 From: DearPlanet Date: Thu, 27 Jun 2024 14:39:02 +0800 Subject: [PATCH] fix(benchmark): fix TTFT calculation error when chat service cannot respond a valid token. --- benchmarks/backend_request_func.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 4350b96b04a6a..7a2b4b303429d 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -253,6 +253,7 @@ async def async_request_openai_completions( async with session.post(url=api_url, json=payload, headers=headers) as response: if response.status == 200: + first_valid_chunk_received = False async for chunk_bytes in response.content: chunk_bytes = chunk_bytes.strip() if not chunk_bytes: @@ -268,7 +269,8 @@ async def async_request_openai_completions( if data["choices"][0]["text"]: timestamp = time.perf_counter() # First token - if ttft == 0.0: + if not first_valid_chunk_received: + first_chunk_received = True ttft = time.perf_counter() - st output.ttft = ttft @@ -282,9 +284,14 @@ async def async_request_openai_completions( most_recent_timestamp = timestamp generated_text += data["choices"][0]["text"] - + if first_chunk_received: + output.success = True + else: + output.success = False + output.error = ( + "Never received a valid chunk to calculate TTFT." + "This response will be marked as failed!") output.generated_text = generated_text - output.success = True output.latency = latency else: output.error = response.reason or ""