diff --git a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py index d77722499d0e9..d89071f30a549 100644 --- a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py @@ -11,8 +11,7 @@ def sparse_cutlass_supported() -> bool: - # sparse cutlass is not supported on Rocm - if current_platform.is_rocm(): + if not current_platform.is_cuda(): return False capability_tuple = current_platform.get_device_capability() @@ -22,8 +21,7 @@ def sparse_cutlass_supported() -> bool: def cutlass_fp8_supported() -> bool: - # cutlass is not supported on Rocm - if current_platform.is_rocm(): + if not current_platform.is_cuda(): return False capability_tuple = current_platform.get_device_capability()