From 3eee475ee7e4f1614b639d6ab021f5e35e24f9c7 Mon Sep 17 00:00:00 2001 From: Richard Liu Date: Fri, 6 Sep 2024 23:53:04 +0000 Subject: [PATCH 1/3] write json output in benchmark --- .../container/benchmark_serving.py | 67 ++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py index 8ae1109ee..0d32f717c 100644 --- a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py +++ b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py @@ -266,6 +266,42 @@ async def benchmark( await asyncio.gather(*tasks) +def save_json_results(args: argparse.Namespace, benchmark_result): + # dimensions values are strings + dimensions_json = {} + # metrics values are numerical + metrics_json = {} + + # Setup + current_dt = datetime.now().strftime("%Y%m%d-%H%M%S") + dimensions_json["date"] = current_dt + dimensions_json["backend"] = args.backend + dimensions_json["model_id"] = args.model + dimensions_json["tokenizer_id"] = args.tokenizer + if args.additional_metadata_metrics_to_save is not None: + dimensions_json = { + **dimensions_json, + **json.loads(args.additional_metadata_metrics_to_save), + } + metrics_json["num_prompts"] = args.num_prompts + + # Traffic + metrics_json["request_rate"] = args.request_rate + metrics_json = {**metrics_json, **benchmark_result} + + final_json = {} + final_json["metrics"] = metrics_json + final_json["dimensions"] = dimensions_json + + # Save to file + base_model_id = args.model.split("/")[-1] + file_name = ( + f"{args.backend}-{args.request_rate}qps-{base_model_id}-{current_dt}.json" + ) + with open(file_name, "w", encoding="utf-8") as outfile: + json.dump(final_json, outfile) + + def main(args: argparse.Namespace): print(args) random.seed(args.seed) @@ -305,24 +341,32 @@ def main(args: argparse.Namespace): args.model, ) ) + benchmark_result = {} benchmark_end_time = time.time() benchmark_time = benchmark_end_time - benchmark_start_time print(f"Total time: {benchmark_time:.2f} s") print(f"Requests/min: {60 * args.num_prompts / benchmark_time:.2f}") + benchmark_result['benchmark_time'] = benchmark_time total_output_tokens = np.sum([output_len for _, output_len, _ in REQUEST_LATENCY]) output_tokens_per_min = 60 * total_output_tokens / benchmark_time print(f"Output_tokens/min: {output_tokens_per_min:.2f}") + benchmark_result['total_output_token'] = total_output_tokens + benchmark_result['output_tokens_per_min'] = output_tokens_per_min total_input_tokens = np.sum([prompt_len for prompt_len, _, _ in REQUEST_LATENCY]) input_tokens_per_min = 60 * total_input_tokens / benchmark_time print(f"Input_tokens/min: {input_tokens_per_min:.2f}") + benchmark_result['total_input_tokens'] = total_input_tokens + benchmark_result['input_tokens_per_min'] = input_tokens_per_min total_tokens = total_input_tokens + total_output_tokens tokens_per_min = 60 * total_tokens / benchmark_time print(f"Tokens/min: {tokens_per_min:.2f}") + benchmark_result['total_tokens'] = total_tokens + benchmark_result['tokens_per_min'] = tokens_per_min if args.machine_cost: print( @@ -336,6 +380,7 @@ def main(args: argparse.Namespace): "Average seconds/request (includes waiting time on server):" f" {avg_latency:.2f}" ) + benchmark_result['avg_latency'] = avg_latency avg_per_token_latency = np.mean([ latency / (prompt_len + output_len) @@ -345,6 +390,7 @@ def main(args: argparse.Namespace): "Average milliseconds/token (includes waiting time on server):" f" {1000 * avg_per_token_latency:.2f}" ) + benchmark_result['avg_per_token_latency'] = avg_per_token_latency avg_per_output_token_latency = np.mean( [latency / output_len for _, output_len, latency in REQUEST_LATENCY] @@ -353,6 +399,7 @@ def main(args: argparse.Namespace): "Average milliseconds/output_token (includes waiting time on server):" f" {1000 * avg_per_output_token_latency:.2f}" ) + benchmark_result['avg_per_output_token_latency'] = avg_per_output_token_latency avg_input_len = np.mean( [prompt_len for prompt_len, _, _ in REQUEST_LATENCY] @@ -361,6 +408,7 @@ def main(args: argparse.Namespace): "Average input length:" f" {avg_input_len:.2f}" ) + benchmark_result['avg_input_len'] = avg_input_len avg_output_len = np.mean( [output_len for _, output_len, _ in REQUEST_LATENCY] @@ -369,6 +417,10 @@ def main(args: argparse.Namespace): "Average output length:" f" {avg_output_len:.2f}" ) + benchmark_result['avg_output_len'] = avg_output_len + + if args.save_json_results: + save_json_results() if __name__ == "__main__": @@ -479,6 +531,19 @@ def main(args: argparse.Namespace): " and max_output_length." ), ) + parser.add_argument( + "--save-json-results", + type=bool, + default=False, + help="Whether to save benchmark results to a json file.", + ) + parser.add_argument( + "--additional-metadata-metrics-to-save", + type=str, + help=( + "Additional metadata about the workload. Should be a dictionary in" + " the form of a string." + ), + ) cmd_args = parser.parse_args() main(cmd_args) - \ No newline at end of file From a0ebeb585d443f6d425bb250238e6f08f964dcbf Mon Sep 17 00:00:00 2001 From: Richard Liu Date: Sat, 7 Sep 2024 00:28:58 +0000 Subject: [PATCH 2/3] fix bugs --- .../tools/profile-generator/container/benchmark_serving.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py index 0d32f717c..f92ab49dc 100644 --- a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py +++ b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py @@ -7,6 +7,7 @@ import argparse import asyncio +from datetime import datetime import json import random import time @@ -420,7 +421,7 @@ def main(args: argparse.Namespace): benchmark_result['avg_output_len'] = avg_output_len if args.save_json_results: - save_json_results() + save_json_results(args, benchmark_result) if __name__ == "__main__": @@ -533,8 +534,7 @@ def main(args: argparse.Namespace): ) parser.add_argument( "--save-json-results", - type=bool, - default=False, + action="store_true", help="Whether to save benchmark results to a json file.", ) parser.add_argument( From bf94d9cdb5f0c793810dc875188404350461f709 Mon Sep 17 00:00:00 2001 From: Richard Liu Date: Sat, 7 Sep 2024 00:46:27 +0000 Subject: [PATCH 3/3] fix --- .../tools/profile-generator/container/benchmark_serving.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py index f92ab49dc..5f521058b 100644 --- a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py +++ b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py @@ -353,20 +353,20 @@ def main(args: argparse.Namespace): REQUEST_LATENCY]) output_tokens_per_min = 60 * total_output_tokens / benchmark_time print(f"Output_tokens/min: {output_tokens_per_min:.2f}") - benchmark_result['total_output_token'] = total_output_tokens + benchmark_result['total_output_token'] = int(total_output_tokens) benchmark_result['output_tokens_per_min'] = output_tokens_per_min total_input_tokens = np.sum([prompt_len for prompt_len, _, _ in REQUEST_LATENCY]) input_tokens_per_min = 60 * total_input_tokens / benchmark_time print(f"Input_tokens/min: {input_tokens_per_min:.2f}") - benchmark_result['total_input_tokens'] = total_input_tokens + benchmark_result['total_input_tokens'] = int(total_input_tokens) benchmark_result['input_tokens_per_min'] = input_tokens_per_min total_tokens = total_input_tokens + total_output_tokens tokens_per_min = 60 * total_tokens / benchmark_time print(f"Tokens/min: {tokens_per_min:.2f}") - benchmark_result['total_tokens'] = total_tokens + benchmark_result['total_tokens'] = int(total_tokens) benchmark_result['tokens_per_min'] = tokens_per_min if args.machine_cost: