diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index b0788cccf0dde..49a1700a8316c 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -174,6 +174,6 @@ jobs: name: benchmark-results compression-level: 9 path: | - examples/server/bench/**/.png - examples/server/bench/**/.json - examples/server/bench/**/.log + examples/server/bench/*.png + examples/server/bench/*.json + examples/server/bench/*.log diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index c0e08ae197227..3a213cce9c43f 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -1,5 +1,4 @@ import argparse -import base64 import json import os import re @@ -13,6 +12,8 @@ from contextlib import closing from datetime import datetime +import matplotlib +import matplotlib.dates import matplotlib.pyplot as plt import requests @@ -109,6 +110,10 @@ def main(args_in: list[str] | None = None) -> None: for metric in metrics: resp = requests.get(f"http://localhost:9090/api/v1/query_range", params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2}) + + with open(f"{metric}.json", 'w') as metric_json: + metric_json.write(resp.text) + if resp.status_code != 200: print(f"bench: unable to extract prometheus metric {metric}: {resp.text}") else: @@ -131,6 +136,8 @@ def main(args_in: list[str] | None = None) -> None: f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n" f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n" f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True) + plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator()) + plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S")) plt.gcf().autofmt_xdate() # Remove borders