From 0d780176f47aa9095bffd786e681af6fec5b4f74 Mon Sep 17 00:00:00 2001
From: NickLucche <nlucches@redhat.com>
Date: Wed, 30 Oct 2024 16:51:00 +0000
Subject: [PATCH] nit

Signed-off-by: NickLucche <nlucches@redhat.com>
---
 tests/utils.py | 27 ++++++++++++++++++---------
 vllm/config.py |  4 ++--
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/tests/utils.py b/tests/utils.py
index 8830a22e94f24..f6f588df48810 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -26,7 +26,7 @@
 from vllm.platforms import current_platform
 from vllm.transformers_utils.tokenizer import get_tokenizer
 from vllm.utils import (FlexibleArgumentParser, GB_bytes,
-                        cuda_device_count_stateless, get_open_port, is_hip)
+                        cuda_device_count_stateless, get_open_port)
 
 if current_platform.is_rocm():
     from amdsmi import (amdsmi_get_gpu_vram_usage,
@@ -487,7 +487,7 @@ def wait_for_gpu_memory_to_clear(devices: List[int],
         output: Dict[int, str] = {}
         output_raw: Dict[int, float] = {}
         for device in devices:
-            if is_hip():
+            if current_platform.is_rocm():
                 dev_handle = amdsmi_get_processor_handles()[device]
                 mem_info = amdsmi_get_gpu_vram_usage(dev_handle)
                 gb_used = mem_info["vram_used"] / 2**10
@@ -561,11 +561,11 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:
     return wrapper
 
 
-def large_gpu_test(*, min_gb: int):
-    """
-    Decorate a test to be skipped if no GPU is available or it does not have
-    sufficient memory.
-    Currently, the CI machine uses L4 GPU which has 24 GB VRAM.
+def large_gpu_mark(min_gb: int) -> pytest.MarkDecorator:
+    """Gets a pytest skipif mark, which triggers ig the the device doesn't have
+    meet a minimum memory requirement in gb; can be leveraged via 
+    @large_gpu_test to skip tests in environments without enough resources, or
+    called when filtering tests to run directly.
     """
     try:
         if current_platform.is_cpu():
@@ -577,14 +577,23 @@ def large_gpu_test(*, min_gb: int):
             f"An error occurred when finding the available memory: {e}",
             stacklevel=2,
         )
-
         memory_gb = 0
 
-    test_skipif = pytest.mark.skipif(
+    return pytest.mark.skipif(
         memory_gb < min_gb,
         reason=f"Need at least {memory_gb}GB GPU memory to run the test.",
     )
 
+
+def large_gpu_test(*, min_gb: int):
+    """
+    Decorate a test to be skipped if no GPU is available or it does not have
+    sufficient memory.
+
+    Currently, the CI machine uses L4 GPU which has 24 GB VRAM.
+    """
+    test_skipif = large_gpu_mark(min_gb)
+
     def wrapper(f: Callable[_P, None]) -> Callable[_P, None]:
         return test_skipif(f)
 
diff --git a/vllm/config.py b/vllm/config.py
index a6f17b93cc36d..ef03530994d45 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1303,9 +1303,9 @@ def maybe_create_spec_config(
                         f"num_speculative_tokens={n_predict}, but "
                         f"{num_speculative_tokens=} was provided.")
 
-            if enable_chunked_prefill and draft_hf_config.model_type in [
+            if enable_chunked_prefill and draft_hf_config.model_type in (
                     "medusa", "mlp_speculator", "eagle"
-            ]:
+            ):
                 raise ValueError(
                     "Chunked prefill and hidden-state based draft models are "
                     "not compatible.")