Skip to content

Commit f17a1a8

Browse files
authored
[Misc] Make Serving Benchmark More User-friendly (vllm-project#5044)
1 parent d5a1697 commit f17a1a8

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

benchmarks/backend_request_func.py

+6
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ async def async_request_tgi(
8989
output.latency = most_recent_timestamp - st
9090
output.success = True
9191
output.generated_text = data["generated_text"]
92+
else:
93+
output.error = response.reason or ""
94+
output.success = False
9295
except Exception:
9396
output.success = False
9497
exc_info = sys.exc_info()
@@ -276,6 +279,9 @@ async def async_request_openai_completions(
276279
output.generated_text = generated_text
277280
output.success = True
278281
output.latency = latency
282+
else:
283+
output.error = response.reason or ""
284+
output.success = False
279285
except Exception:
280286
output.success = False
281287
exc_info = sys.exc_info()

benchmarks/benchmark_serving.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,11 @@ def calculate_metrics(
215215
else:
216216
actual_output_lens.append(0)
217217

218+
if completed == 0:
219+
warnings.warn(
220+
"All requests failed. This is likely due to a misconfiguration "
221+
"on the benchmark arguments.",
222+
stacklevel=2)
218223
metrics = BenchmarkMetrics(
219224
completed=completed,
220225
total_input=total_input,
@@ -226,9 +231,9 @@ def calculate_metrics(
226231
1000, # ttfts is empty if streaming is not supported by backend
227232
median_ttft_ms=np.median(ttfts or 0) * 1000,
228233
p99_ttft_ms=np.percentile(ttfts or 0, 99) * 1000,
229-
mean_tpot_ms=np.mean(tpots) * 1000,
230-
median_tpot_ms=np.median(tpots) * 1000,
231-
p99_tpot_ms=np.percentile(tpots, 99) * 1000,
234+
mean_tpot_ms=np.mean(tpots or 0) * 1000,
235+
median_tpot_ms=np.median(tpots or 0) * 1000,
236+
p99_tpot_ms=np.percentile(tpots or 0, 99) * 1000,
232237
)
233238

234239
return metrics, actual_output_lens
@@ -250,6 +255,24 @@ async def benchmark(
250255
else:
251256
raise ValueError(f"Unknown backend: {backend}")
252257

258+
print("Starting initial single prompt test run...")
259+
test_prompt, test_prompt_len, test_output_len = input_requests[0]
260+
test_input = RequestFuncInput(
261+
model=model_id,
262+
prompt=test_prompt,
263+
api_url=api_url,
264+
prompt_len=test_prompt_len,
265+
output_len=test_output_len,
266+
best_of=best_of,
267+
use_beam_search=use_beam_search,
268+
)
269+
test_output = await request_func(request_func_input=test_input)
270+
if not test_output.success:
271+
raise ValueError(
272+
"Initial test run failed - Please make sure benchmark arguments "
273+
f"are correctly specified. Error: {test_output.error}")
274+
else:
275+
print("Initial test run completed. Starting main benchmark run...")
253276
print(f"Traffic request rate: {request_rate}")
254277

255278
pbar = None if disable_tqdm else tqdm(total=len(input_requests))

0 commit comments

Comments
 (0)