Skip to content

Commit

Permalink
Skip compressed-tensors quant test on ROCm (erroneously introduced in v…
Browse files Browse the repository at this point in the history
  • Loading branch information
mawong-amd committed Jul 19, 2024
1 parent cf793c0 commit 675dfdd
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions tests/basic_correctness/test_cpu_offload.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from vllm.utils import is_hip

from ..utils import compare_two_settings


def test_cpu_offload():
compare_two_settings("meta-llama/Llama-2-7b-hf", [],
["--cpu-offload-gb", "4"])
compare_two_settings("nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t",
[], ["--cpu-offload-gb", "1"])
if not is_hip():
# compressed-tensors quantization is currently not supported in ROCm.
compare_two_settings(
"nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t", [],
["--cpu-offload-gb", "1"])

0 comments on commit 675dfdd

Please sign in to comment.