fix: Fix Slurm scripts. Add public image

huggingface · Sep 30, 2024 · 109ccbf · 109ccbf
1 parent f98d37c
commit 109ccbf
Show file tree

Hide file tree

Showing 7 changed files with 24 additions and 8 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -44,6 +44,7 @@ jobs:
         with:
           images: |
             registry.internal.huggingface.tech/api-inference/text-generation-inference-benchmark
+            ghcr.io/huggingface/text-generation-inference-benchmark
           tags: |
             type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
       # If main, release or tag
@@ -56,6 +57,7 @@ jobs:
             latest=auto
           images: |
             registry.internal.huggingface.tech/api-inference/text-generation-inference-benchmark
+            ghcr.io/huggingface/text-generation-inference-benchmark
           tags: |
             type=semver,pattern={{version}}${{ env.LABEL }}
             type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}

diff --git a/.gitignore b/.gitignore
@@ -169,4 +169,5 @@ Cargo.lock
 .idea
 *.json
 *.txt
-results
+results
+*.parquet
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,7 @@
 FROM rust:1-bullseye AS builder
+LABEL org.opencontainers.image.source=https://github.com/huggingface/text-generation-inference-benchmark
+LABEL org.opencontainers.image.description="A benchmark tool for LLM inference engines"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
 WORKDIR /usr/src/text-generation-inference-benchmark
 COPY . .
 RUN cargo install --path .

diff --git a/extra/dashboard/app.py b/extra/dashboard/app.py
@@ -94,8 +94,8 @@ def load_data() -> pd.DataFrame:
         {"rate": [1, 2], "inter_token_latency_ms_p90": [10, 20], "engine": ["tgi", "vllm"]})
     df = load_data()
     models = df["model"].unique()
-    devices= df["device"].unique()
-    with gr.Blocks(css=css) as demo:
+    devices = df["device"].unique()
+    with gr.Blocks(css=css, title="TGI benchmarks") as demo:
         with gr.Row():
             header = gr.Markdown("# TGI benchmarks\nBenchmark results for Hugging Face TGI 🤗")
         with gr.Row():
@@ -119,7 +119,8 @@ def load_data() -> pd.DataFrame:
                     gs = stack.enter_context(gr.Row())
                 line_plots.append(
                     {"component": gr.LinePlot(default_df, label=f'{v.title}', x="rate", y=k,
-                                              color="engine", y_title=v.y_title, color_map={'vLLM':'#2F5BA1','TGI':'#FF9D00'}), "model": model.value,
+                                              color="engine", y_title=v.y_title,
+                                              color_map={'vLLM': '#2F5BA1', 'TGI': '#FF9D00'}), "model": model.value,
                      "device": device})
                 i += 1
 

diff --git a/extra/slurm/benchmark.py b/extra/slurm/benchmark.py
@@ -41,6 +41,7 @@ def main():
                         f'{engine}.slurm']
                 env = os.environ.copy()
                 env['MODEL'] = model[0]
+                env['TP'] = str(gpus)
                 process = subprocess.run(args, capture_output=True,
                                          env=env)
                 print(process.stdout.decode())

diff --git a/extra/slurm/tgi.slurm b/extra/slurm/tgi.slurm
@@ -13,6 +13,11 @@ if [ -z "$MODEL" ]; then
     exit 1
 fi
 
+if [ -z "$TP" ]; then
+    echo "TP environment variable is not set"
+    exit 1
+fi
+
 echo "Starting TGI benchmark for $MODEL"
 export RUST_BACKTRACE=full
 export RUST_LOG=text_generation_inference_benchmark=info
@@ -66,8 +71,7 @@ if [[ $exit_code != 124 ]]; then
              --rates 0.8 --rates 1.6 --rates 2.4 --rates 3.2 --rates 4.0 --rates 4.8 --rates 5.6 --rates 6.4 --rates 7.2 --rates 8.0 --rates 8.8 --rates 9.6 --rates 10.4 --rates 11.2 --rates 12.0 --rates 12.8 --rates 13.6 --rates 14.4 --rates 15.2 --rates 16.0 --rates 16.8 --rates 17.6 --rates 18.4 --rates 19.2 --rates 20.0 --rates 20.8 --rates 21.6 --rates 22.4 --rates 23.2 --rates 24.0 \
              --prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
              --decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
-             --extra-meta "version=$VERSION" \
-             --extra-meta "engine=\"TGI\"" \
+             --extra-meta "version=$VERSION,engine=\"TGI\",tp=$TP" \
              --no-console
 fi
 

diff --git a/extra/slurm/vllm.slurm b/extra/slurm/vllm.slurm
@@ -14,6 +14,11 @@ if [ -z "$MODEL" ]; then
     exit 1
 fi
 
+if [ -z "$TP" ]; then
+    echo "TP environment variable is not set"
+    exit 1
+fi
+
 echo "Starting vLLM benchmark for $MODEL"
 export RUST_BACKTRACE=full
 export RUST_LOG=text_generation_inference_benchmark=info
@@ -65,8 +70,7 @@ if [[ $exit_code != 124 ]]; then
              --rates 0.8 --rates 1.6 --rates 2.4 --rates 3.2 --rates 4.0 --rates 4.8 --rates 5.6 --rates 6.4 --rates 7.2 --rates 8.0 --rates 8.8 --rates 9.6 --rates 10.4 --rates 11.2 --rates 12.0 --rates 12.8 --rates 13.6 --rates 14.4 --rates 15.2 --rates 16.0 --rates 16.8 --rates 17.6 --rates 18.4 --rates 19.2 --rates 20.0 --rates 20.8 --rates 21.6 --rates 22.4 --rates 23.2 --rates 24.0 \
              --prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
              --decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
-             --extra-meta "version=$VERSION" \
-             --extra-meta "engine=\"vLLM\"" \
+             --extra-meta "version=$VERSION,engine=\"vLLM\",tp=$TP" \
              --no-console
 fi
-Original file line number
+Diff line change
@@ Expand Up / @@ -169,4 +169,5 @@ Cargo.lock @@
     .idea
     *.json
     *.txt
-    results
+    results
+    *.parquet