From 82c1e404e2f7a3de1a82b368197a96729c785621 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Mon, 25 Mar 2024 07:51:10 +0100 Subject: [PATCH] server: bench: change max prompt, use pre downloaded models --- .github/workflows/bench.yml | 10 +++++----- examples/server/bench/bench.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 examples/server/bench/bench.py diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 6cca7742a93b8..9b907a3d574d9 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -94,14 +94,14 @@ jobs: --port 8080 \ --hf-repo ggml-org/models \ --hf-file phi-2/ggml-model-q4_0.gguf \ - --model ggml-model.gguf \ + --model /models/phi-2/ggml-model-q4_0.gguf \ --metrics \ --parallel 8 \ --batch-size 2048 \ --ubatch-size 256 \ - --n-predict 4096 \ + --n-predict 2048 \ --ctx-size 16384 \ - --defrag-thold 0.8 \ + --defrag-thold 0.1 \ --log-format text \ --log-format text \ -ngl 33 & @@ -117,6 +117,6 @@ jobs: cd examples/server/bench SERVER_BENCH_N_PROMPTS=1000 \ SERVER_BENCH_MAX_PROMPT_TOKENS=1024 \ - SERVER_BENCH_MAX_CONTEXT=4096 \ - SERVER_BENCH_MAX_TOKENS=4096 \ + SERVER_BENCH_MAX_CONTEXT=2048 \ + SERVER_BENCH_MAX_TOKENS=1024 \ ../../../k6 run script.js --duration 10m --iterations 1000 --vus 8 diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py new file mode 100644 index 0000000000000..aca6eeeb54af3 --- /dev/null +++ b/examples/server/bench/bench.py @@ -0,0 +1,16 @@ +import argparse + + +def main(args_in: list[str] | None = None) -> None: + parser = argparse.ArgumentParser(description="Start a github self-hosted runner using JIT based on a repo events") + parser.add_argument("--token", type=str, help="GitHub token", required=True) + parser.add_argument("--repo", type=str, help="GitHub repository", required=True) + parser.add_argument("--runner-label", type=str, action="append", help="GitHub Runner group", required=True) + + args = parser.parse_args(args_in) + + start_mainloop(args) + + +if __name__ == '__main__': + main()