Skip to content

Commit

Permalink
server: bench: change max prompt, use pre downloaded models
Browse files Browse the repository at this point in the history
  • Loading branch information
phymbert committed Mar 25, 2024
1 parent 48db7da commit 82c1e40
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ jobs:
--port 8080 \
--hf-repo ggml-org/models \
--hf-file phi-2/ggml-model-q4_0.gguf \
--model ggml-model.gguf \
--model /models/phi-2/ggml-model-q4_0.gguf \
--metrics \
--parallel 8 \
--batch-size 2048 \
--ubatch-size 256 \
--n-predict 4096 \
--n-predict 2048 \
--ctx-size 16384 \
--defrag-thold 0.8 \
--defrag-thold 0.1 \
--log-format text \
--log-format text \
-ngl 33 &
Expand All @@ -117,6 +117,6 @@ jobs:
cd examples/server/bench
SERVER_BENCH_N_PROMPTS=1000 \
SERVER_BENCH_MAX_PROMPT_TOKENS=1024 \
SERVER_BENCH_MAX_CONTEXT=4096 \
SERVER_BENCH_MAX_TOKENS=4096 \
SERVER_BENCH_MAX_CONTEXT=2048 \
SERVER_BENCH_MAX_TOKENS=1024 \
../../../k6 run script.js --duration 10m --iterations 1000 --vus 8
16 changes: 16 additions & 0 deletions examples/server/bench/bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import argparse


def main(args_in: list[str] | None = None) -> None:
parser = argparse.ArgumentParser(description="Start a github self-hosted runner using JIT based on a repo events")
parser.add_argument("--token", type=str, help="GitHub token", required=True)
parser.add_argument("--repo", type=str, help="GitHub repository", required=True)
parser.add_argument("--runner-label", type=str, action="append", help="GitHub Runner group", required=True)

args = parser.parse_args(args_in)

start_mainloop(args)


if __name__ == '__main__':
main()

0 comments on commit 82c1e40

Please sign in to comment.