Skip to content

Commit

Permalink
Reorder kv dtype check to avoid nvcc not found error on AMD platform (v…
Browse files Browse the repository at this point in the history
  • Loading branch information
cloudhan authored Mar 2, 2024
1 parent 29e70e3 commit baee28c
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,15 +330,14 @@ def _verify_cache_dtype(self) -> None:
if self.cache_dtype == "auto":
pass
elif self.cache_dtype == "fp8_e5m2":
if is_hip():
raise NotImplementedError(
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
nvcc_cuda_version = get_nvcc_cuda_version()
if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
raise ValueError(
"FP8 is not supported when cuda version is lower than 11.8."
)
device_name = torch.cuda.get_device_name()
if "AMD" in device_name:
raise NotImplementedError(
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
logger.info(
"Using fp8_e5m2 data type to store kv cache. It reduces "
"the GPU memory footprint and boosts the performance. "
Expand Down

0 comments on commit baee28c

Please sign in to comment.