Add linux.dgx.b200 (#58)

huydhn · web-flow · commit ad3844dd84c0 · 2025-08-05T18:31:03.000-07:00
* Test linux.dgx.b200

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Debug

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Auth with AWS on B200 DGX runners

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Add linux.dgx.b200.8

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Another tweak

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* [no ci] 2.7.1

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* [no ci] Use cu128

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* A small tweak

* Keep the name unique

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Sanitize the model name

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Add sanitized device

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

---------

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;
diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -18,6 +18,7 @@
         "linux.aws.h100",
         "linux.rocm.gpu.gfx942.2",  # No single ROCm GPU?
         "linux.24xl.spr-metal",
+        "linux.dgx.b200",
     ],
     # NB: There is no 2xH100 runner at the momement, so let's use the next one
     # in the list here which is 4xH100
@@ -34,6 +35,7 @@
     8: [
         "linux.aws.h100.8",
         "linux.rocm.gpu.gfx942.8",
+        "linux.dgx.b200.8",
     ],
 }
 
@@ -43,6 +45,8 @@
     "linux.aws.h100": "cuda",
     "linux.aws.h100.4": "cuda",
     "linux.aws.h100.8": "cuda",
+    "linux.dgx.b200": "cuda",
+    "linux.dgx.b200.8": "cuda",
     "linux.rocm.gpu.gfx942.2": "rocm",
     "linux.rocm.gpu.gfx942.4": "rocm",
     "linux.rocm.gpu.gfx942.8": "rocm",
diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt
@@ -4,4 +4,4 @@ psutil==7.0.0
 pynvml==12.0.0
 boto3==1.36.21
 awscli==1.37.21
-torch==2.7.0
+torch==2.7.1
diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
@@ -134,7 +134,8 @@ jobs:
             pip install -r .github/scripts/requirements.txt \
               --extra-index-url https://download.pytorch.org/whl/rocm6.3
           else
-            pip install -r .github/scripts/requirements.txt
+            pip install -r .github/scripts/requirements.txt \
+              --extra-index-url https://download.pytorch.org/whl/cu128
           fi
 
       - name: Set Docker registry
@@ -277,15 +278,9 @@ jobs:
           )
           docker exec -t "${container_name}" bash -c "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh"
 
-      # Keep a copy of the benchmark results on GitHub for reference
-      - uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-results
-          path: vllm-benchmarks/vllm/benchmarks/results
-
       - name: Authenticate with AWS
         # AWS CUDA runners already have access to the bucket via its runner IAM role
-        if: env.DEVICE_NAME != 'cuda'
+        if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200')
         uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
         with:
           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
@@ -304,10 +299,21 @@ jobs:
           ls -lah "${BENCHMARK_RESULTS}"
 
           SANITIZED_DEVICE_TYPE=$(echo "${DEVICE_TYPE// /_}" | sed "s/[^[:alnum:].-]/_/g")
+          SANITIZED_MODELS="${MODELS//\//_}"
+
           python3 .github/scripts/upload_benchmark_results.py \
             --repo vllm-benchmarks/vllm \
             --benchmark-name "vLLM benchmark" \
             --benchmark-results "${BENCHMARK_RESULTS}" \
             --device-name "${DEVICE_NAME}" \
             --device-type "${SANITIZED_DEVICE_TYPE}" \
-            --model "${MODELS//\//_}"
+            --model "${SANITIZED_MODELS}"
+
+          echo "SANITIZED_DEVICE_TYPE=$SANITIZED_DEVICE_TYPE" >> $GITHUB_ENV
+          echo "SANITIZED_MODELS=$SANITIZED_MODELS" >> $GITHUB_ENV
+
+      # Keep a copy of the benchmark results on GitHub for reference
+      - uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results--${{ env.SANITIZED_DEVICE_TYPE }}-${{ env.SANITIZED_MODELS }}
+          path: vllm-benchmarks/vllm/benchmarks/results