From 0d780176f47aa9095bffd786e681af6fec5b4f74 Mon Sep 17 00:00:00 2001 From: NickLucche Date: Wed, 30 Oct 2024 16:51:00 +0000 Subject: [PATCH] nit Signed-off-by: NickLucche --- tests/utils.py | 27 ++++++++++++++++++--------- vllm/config.py | 4 ++-- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 8830a22e94f24..f6f588df48810 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -26,7 +26,7 @@ from vllm.platforms import current_platform from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.utils import (FlexibleArgumentParser, GB_bytes, - cuda_device_count_stateless, get_open_port, is_hip) + cuda_device_count_stateless, get_open_port) if current_platform.is_rocm(): from amdsmi import (amdsmi_get_gpu_vram_usage, @@ -487,7 +487,7 @@ def wait_for_gpu_memory_to_clear(devices: List[int], output: Dict[int, str] = {} output_raw: Dict[int, float] = {} for device in devices: - if is_hip(): + if current_platform.is_rocm(): dev_handle = amdsmi_get_processor_handles()[device] mem_info = amdsmi_get_gpu_vram_usage(dev_handle) gb_used = mem_info["vram_used"] / 2**10 @@ -561,11 +561,11 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None: return wrapper -def large_gpu_test(*, min_gb: int): - """ - Decorate a test to be skipped if no GPU is available or it does not have - sufficient memory. - Currently, the CI machine uses L4 GPU which has 24 GB VRAM. +def large_gpu_mark(min_gb: int) -> pytest.MarkDecorator: + """Gets a pytest skipif mark, which triggers ig the the device doesn't have + meet a minimum memory requirement in gb; can be leveraged via + @large_gpu_test to skip tests in environments without enough resources, or + called when filtering tests to run directly. """ try: if current_platform.is_cpu(): @@ -577,14 +577,23 @@ def large_gpu_test(*, min_gb: int): f"An error occurred when finding the available memory: {e}", stacklevel=2, ) - memory_gb = 0 - test_skipif = pytest.mark.skipif( + return pytest.mark.skipif( memory_gb < min_gb, reason=f"Need at least {memory_gb}GB GPU memory to run the test.", ) + +def large_gpu_test(*, min_gb: int): + """ + Decorate a test to be skipped if no GPU is available or it does not have + sufficient memory. + + Currently, the CI machine uses L4 GPU which has 24 GB VRAM. + """ + test_skipif = large_gpu_mark(min_gb) + def wrapper(f: Callable[_P, None]) -> Callable[_P, None]: return test_skipif(f) diff --git a/vllm/config.py b/vllm/config.py index a6f17b93cc36d..ef03530994d45 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1303,9 +1303,9 @@ def maybe_create_spec_config( f"num_speculative_tokens={n_predict}, but " f"{num_speculative_tokens=} was provided.") - if enable_chunked_prefill and draft_hf_config.model_type in [ + if enable_chunked_prefill and draft_hf_config.model_type in ( "medusa", "mlp_speculator", "eagle" - ]: + ): raise ValueError( "Chunked prefill and hidden-state based draft models are " "not compatible.")