Skip to content

Commit

Permalink
fix: Fix Slurm scripts. Add public image
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugoch committed Sep 30, 2024
1 parent f98d37c commit 109ccbf
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ jobs:
with:
images: |
registry.internal.huggingface.tech/api-inference/text-generation-inference-benchmark
ghcr.io/huggingface/text-generation-inference-benchmark
tags: |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
# If main, release or tag
Expand All @@ -56,6 +57,7 @@ jobs:
latest=auto
images: |
registry.internal.huggingface.tech/api-inference/text-generation-inference-benchmark
ghcr.io/huggingface/text-generation-inference-benchmark
tags: |
type=semver,pattern={{version}}${{ env.LABEL }}
type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,5 @@ Cargo.lock
.idea
*.json
*.txt
results
results
*.parquet
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
FROM rust:1-bullseye AS builder
LABEL org.opencontainers.image.source=https://github.com/huggingface/text-generation-inference-benchmark
LABEL org.opencontainers.image.description="A benchmark tool for LLM inference engines"
LABEL org.opencontainers.image.licenses="Apache-2.0"
WORKDIR /usr/src/text-generation-inference-benchmark
COPY . .
RUN cargo install --path .
Expand Down
7 changes: 4 additions & 3 deletions extra/dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ def load_data() -> pd.DataFrame:
{"rate": [1, 2], "inter_token_latency_ms_p90": [10, 20], "engine": ["tgi", "vllm"]})
df = load_data()
models = df["model"].unique()
devices= df["device"].unique()
with gr.Blocks(css=css) as demo:
devices = df["device"].unique()
with gr.Blocks(css=css, title="TGI benchmarks") as demo:
with gr.Row():
header = gr.Markdown("# TGI benchmarks\nBenchmark results for Hugging Face TGI 🤗")
with gr.Row():
Expand All @@ -119,7 +119,8 @@ def load_data() -> pd.DataFrame:
gs = stack.enter_context(gr.Row())
line_plots.append(
{"component": gr.LinePlot(default_df, label=f'{v.title}', x="rate", y=k,
color="engine", y_title=v.y_title, color_map={'vLLM':'#2F5BA1','TGI':'#FF9D00'}), "model": model.value,
color="engine", y_title=v.y_title,
color_map={'vLLM': '#2F5BA1', 'TGI': '#FF9D00'}), "model": model.value,
"device": device})
i += 1

Expand Down
1 change: 1 addition & 0 deletions extra/slurm/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def main():
f'{engine}.slurm']
env = os.environ.copy()
env['MODEL'] = model[0]
env['TP'] = str(gpus)
process = subprocess.run(args, capture_output=True,
env=env)
print(process.stdout.decode())
Expand Down
8 changes: 6 additions & 2 deletions extra/slurm/tgi.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ if [ -z "$MODEL" ]; then
exit 1
fi

if [ -z "$TP" ]; then
echo "TP environment variable is not set"
exit 1
fi

echo "Starting TGI benchmark for $MODEL"
export RUST_BACKTRACE=full
export RUST_LOG=text_generation_inference_benchmark=info
Expand Down Expand Up @@ -66,8 +71,7 @@ if [[ $exit_code != 124 ]]; then
--rates 0.8 --rates 1.6 --rates 2.4 --rates 3.2 --rates 4.0 --rates 4.8 --rates 5.6 --rates 6.4 --rates 7.2 --rates 8.0 --rates 8.8 --rates 9.6 --rates 10.4 --rates 11.2 --rates 12.0 --rates 12.8 --rates 13.6 --rates 14.4 --rates 15.2 --rates 16.0 --rates 16.8 --rates 17.6 --rates 18.4 --rates 19.2 --rates 20.0 --rates 20.8 --rates 21.6 --rates 22.4 --rates 23.2 --rates 24.0 \
--prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--extra-meta "version=$VERSION" \
--extra-meta "engine=\"TGI\"" \
--extra-meta "version=$VERSION,engine=\"TGI\",tp=$TP" \
--no-console
fi

Expand Down
8 changes: 6 additions & 2 deletions extra/slurm/vllm.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ if [ -z "$MODEL" ]; then
exit 1
fi

if [ -z "$TP" ]; then
echo "TP environment variable is not set"
exit 1
fi

echo "Starting vLLM benchmark for $MODEL"
export RUST_BACKTRACE=full
export RUST_LOG=text_generation_inference_benchmark=info
Expand Down Expand Up @@ -65,8 +70,7 @@ if [[ $exit_code != 124 ]]; then
--rates 0.8 --rates 1.6 --rates 2.4 --rates 3.2 --rates 4.0 --rates 4.8 --rates 5.6 --rates 6.4 --rates 7.2 --rates 8.0 --rates 8.8 --rates 9.6 --rates 10.4 --rates 11.2 --rates 12.0 --rates 12.8 --rates 13.6 --rates 14.4 --rates 15.2 --rates 16.0 --rates 16.8 --rates 17.6 --rates 18.4 --rates 19.2 --rates 20.0 --rates 20.8 --rates 21.6 --rates 22.4 --rates 23.2 --rates 24.0 \
--prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--extra-meta "version=$VERSION" \
--extra-meta "engine=\"vLLM\"" \
--extra-meta "version=$VERSION,engine=\"vLLM\",tp=$TP" \
--no-console
fi

Expand Down

0 comments on commit 109ccbf

Please sign in to comment.