diff --git a/vllm/compilation/counter.py b/vllm/compilation/counter.py index 100a49aba74ac..6385f1c5dbf81 100644 --- a/vllm/compilation/counter.py +++ b/vllm/compilation/counter.py @@ -5,6 +5,7 @@ @dataclasses.dataclass class CompilationCounter: + num_models_seen: int = 0 num_graphs_seen: int = 0 # including the splitting ops num_piecewise_graphs_seen: int = 0 diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 4b78491bc5a48..8b81a29936989 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -3,6 +3,7 @@ import torch +from vllm.compilation.counter import compilation_counter from vllm.compilation.wrapper import TorchCompileWrapperWithCustomDispatcher from vllm.config import CompilationLevel, VllmConfig from vllm.logger import init_logger @@ -130,6 +131,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = '', **kwargs): ] or not supports_dynamo() if self.do_not_compile: return + compilation_counter.num_models_seen += 1 TorchCompileWrapperWithCustomDispatcher.__init__( self, compilation_level=vllm_config.compilation_config.level) diff --git a/vllm/plugins/__init__.py b/vllm/plugins/__init__.py index bd4764c5cc79c..8b43167693598 100644 --- a/vllm/plugins/__init__.py +++ b/vllm/plugins/__init__.py @@ -80,6 +80,9 @@ def set_current_vllm_config(vllm_config: "VllmConfig"): """ global _current_vllm_config old_vllm_config = _current_vllm_config + from vllm.compilation.counter import compilation_counter + from vllm.config import CompilationLevel + num_models_seen = compilation_counter.num_models_seen try: _current_vllm_config = vllm_config yield @@ -88,6 +91,18 @@ def set_current_vllm_config(vllm_config: "VllmConfig"): vllm_config.compilation_config.enabled_custom_ops) logger.debug("disabled custom ops: %s", vllm_config.compilation_config.disabled_custom_ops) + if vllm_config.compilation_config.level == CompilationLevel.PIECEWISE \ + and compilation_counter.num_models_seen == num_models_seen: + # If the model supports compilation, + # compilation_counter.num_models_seen should be increased + # by at least 1. + # If it is not increased, it means the model does not support + # compilation (does not have @support_torch_compile decorator). + logger.warning( + "`torch.compile` is turned on, but the model %s" + " does not support it. Please open an issue on GitHub" + "if you want it to be supported.", + vllm_config.model_config.model) _current_vllm_config = old_vllm_config