Skip to content

Commit

Permalink
ci: test mermaid
Browse files Browse the repository at this point in the history
  • Loading branch information
phymbert committed Mar 25, 2024
1 parent f75166e commit d4bd981
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 44 deletions.
13 changes: 12 additions & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
--branch ${{ github.head_ref || github.ref_name }} \
--commit ${{ github.sha }} \
--scenario script.js \
--duration 10m \
--duration 2m \
--hf-repo ggml-org/models \
--hf-file phi-2/ggml-model-q4_0.gguf \
--model-path-prefix /models \
Expand All @@ -116,6 +116,11 @@ jobs:
--max-tokens 2048
cat results.github.env >> $GITHUB_ENV
echo PROMPT_TOKENS_SECONDS_=${PROMPT_TOKENS_SECONDS//<br>/\n} >> $GITHUB_ENV
echo PREDICTED_TOKENS_SECONDS_=${PREDICTED_TOKENS_SECONDS//<br>/\n} >> $GITHUB_ENV
echo KV_CACHE_USAGE_RATIO_=${KV_CACHE_USAGE_RATIO//<br>/\n} >> $GITHUB_ENV
echo REQUESTS_PROCESSING_=${REQUESTS_PROCESSING//<br>/\n} >> $GITHUB_ENV
- uses: actions/upload-artifact@v4
with:
Expand Down Expand Up @@ -156,14 +161,20 @@ jobs:
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
message: |
📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
- ${{ env.BENCH_GRAPH_TITLE }
- ${{ env.BENCH_GRAPH_YLABEL }
<p align="center">
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
${{ env.PROMPT_TOKENS_SECONDS_ }}
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
${{ env.PREDICTED_TOKENS_SECONDS_ }}
</p>
<details>
<summary>Details</summary>
<p align="center">
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
${{ env.KV_CACHE_USAGE_RATIO_ }}
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
${{ env.REQUESTS_PROCESSING_ }}
</p>
</detail>
105 changes: 62 additions & 43 deletions examples/server/bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,49 +107,63 @@ def main(args_in: list[str] | None = None) -> None:
metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']

for metric in metrics:
resp = requests.get(f"http://localhost:9090/api/v1/query_range",
params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2})

with open(f"{metric}.json", 'w') as metric_json:
metric_json.write(resp.text)

if resp.status_code != 200:
print(f"bench: unable to extract prometheus metric {metric}: {resp.text}")
else:
metric_data = resp.json()
values = metric_data['data']['result'][0]['values']
timestamps, metric_values = zip(*values)
metric_values = [float(value) for value in metric_values]
timestamps = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
plt.figure(figsize=(16, 10), dpi=80)
plt.plot(timestamps, metric_values, label=metric)
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
plt.yticks(fontsize=12, alpha=.7)


plt.title(f"llama.cpp {args.name} on {args.runner_label}\n"
f"duration={args.duration} {iterations} iterations",
fontsize=14, wrap=True)
plt.grid(axis='both', alpha=.3)
plt.ylabel(f"llamacpp:{metric}", fontsize=22)
plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
plt.gcf().autofmt_xdate()

# Remove borders
plt.gca().spines["top"].set_alpha(0.0)
plt.gca().spines["bottom"].set_alpha(0.3)
plt.gca().spines["right"].set_alpha(0.0)
plt.gca().spines["left"].set_alpha(0.3)

# Save the plot as a PNG image
plt.savefig(f'{metric}.png')
plt.close()
with open("results.github.env", 'a') as github_env:
for metric in metrics:
resp = requests.get(f"http://localhost:9090/api/v1/query_range",
params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2})

with open(f"{metric}.json", 'w') as metric_json:
metric_json.write(resp.text)

if resp.status_code != 200:
print(f"bench: unable to extract prometheus metric {metric}: {resp.text}")
else:
metric_data = resp.json()
values = metric_data['data']['result'][0]['values']
timestamps, metric_values = zip(*values)
metric_values = [float(value) for value in metric_values]
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
plt.figure(figsize=(16, 10), dpi=80)
plt.plot(timestamps_dt, metric_values, label=metric)
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
plt.yticks(fontsize=12, alpha=.7)

title = (f"llama.cpp {args.name} on {args.runner_label}\n "
f"duration={args.duration} {iterations} iterations")
ylabel = f"llamacpp:{metric}"
xlabel = (f"{args.hf_repo}/{args.hf_file}\n"
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size} pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
f"branch={args.branch} commit={args.commit}")
plt.title(title,
fontsize=14, wrap=True)
plt.grid(axis='both', alpha=.3)
plt.ylabel(ylabel, fontsize=22)
plt.xlabel(xlabel, fontsize=14, wrap=True)
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
plt.gcf().autofmt_xdate()

# Remove borders
plt.gca().spines["top"].set_alpha(0.0)
plt.gca().spines["bottom"].set_alpha(0.3)
plt.gca().spines["right"].set_alpha(0.0)
plt.gca().spines["left"].set_alpha(0.3)

# Save the plot as a PNG image
plt.savefig(f'{metric}.png')
plt.close()

# Mermaid format in case image failed
mermaid = f"""```mermaid
xychart-beta
title "{title}"
x-axis "{xlabel}" ["{'", "'.join([datetime.fromtimestamp(int(ts)).strftime("%Y%m%d %H:%M:%S") for ts in timestamps])}"]
y-axis "{ylabel}"
line [{', '.join([str(round(float(value))) for value in metric_values])}]
```
"""
mermaid = mermaid.replace('\n', "<br>")
github_env.write(f"{metric.upper()}={mermaid}\n")

# 140 chars max for commit status description
bench_results = {
Expand All @@ -170,6 +184,11 @@ def main(args_in: list[str] | None = None) -> None:
github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
github_env.write(f"BENCH_ITERATIONS={iterations}\n")

title = title.replace('\n', '<br>')
ylabel = ylabel.replace('\n', '<br>')
github_env.write(f"BENCH_GRAPH_TITLE={title}\n")
github_env.write(f"BENCH_GRAPH_YLABEL={ylabel}\n")


def start_benchmark(args):
k6_path = 'k6'
Expand Down

0 comments on commit d4bd981

Please sign in to comment.