From bf94d9cdb5f0c793810dc875188404350461f709 Mon Sep 17 00:00:00 2001 From: Richard Liu Date: Sat, 7 Sep 2024 00:46:27 +0000 Subject: [PATCH] fix --- .../tools/profile-generator/container/benchmark_serving.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py index f92ab49dc..5f521058b 100644 --- a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py +++ b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py @@ -353,20 +353,20 @@ def main(args: argparse.Namespace): REQUEST_LATENCY]) output_tokens_per_min = 60 * total_output_tokens / benchmark_time print(f"Output_tokens/min: {output_tokens_per_min:.2f}") - benchmark_result['total_output_token'] = total_output_tokens + benchmark_result['total_output_token'] = int(total_output_tokens) benchmark_result['output_tokens_per_min'] = output_tokens_per_min total_input_tokens = np.sum([prompt_len for prompt_len, _, _ in REQUEST_LATENCY]) input_tokens_per_min = 60 * total_input_tokens / benchmark_time print(f"Input_tokens/min: {input_tokens_per_min:.2f}") - benchmark_result['total_input_tokens'] = total_input_tokens + benchmark_result['total_input_tokens'] = int(total_input_tokens) benchmark_result['input_tokens_per_min'] = input_tokens_per_min total_tokens = total_input_tokens + total_output_tokens tokens_per_min = 60 * total_tokens / benchmark_time print(f"Tokens/min: {tokens_per_min:.2f}") - benchmark_result['total_tokens'] = total_tokens + benchmark_result['total_tokens'] = int(total_tokens) benchmark_result['tokens_per_min'] = tokens_per_min if args.machine_cost: