Skip compressed-tensors quant test on ROCm (erroneously introduced in #…

…6549)
vllm-project · Jul 19, 2024 · 675dfdd · 675dfdd
1 parent cf793c0
commit 675dfdd
Showing 1 changed file with 7 additions and 2 deletions.
diff --git a/tests/basic_correctness/test_cpu_offload.py b/tests/basic_correctness/test_cpu_offload.py
@@ -1,8 +1,13 @@
+from vllm.utils import is_hip
+
 from ..utils import compare_two_settings
 
 
 def test_cpu_offload():
     compare_two_settings("meta-llama/Llama-2-7b-hf", [],
                          ["--cpu-offload-gb", "4"])
-    compare_two_settings("nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t",
-                         [], ["--cpu-offload-gb", "1"])
+    if not is_hip():
+        # compressed-tensors quantization is currently not supported in ROCm.
+        compare_two_settings(
+            "nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t", [],
+            ["--cpu-offload-gb", "1"])