diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index 4f0c75b2c6a57..9b4a97abf9b51 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -10,7 +10,6 @@ from .interfaces_base import is_embedding_model if TYPE_CHECKING: - from vllm.config import LoRAConfig, MultiModalConfig, SchedulerConfig from vllm.sequence import IntermediateTensors logger = init_logger(__name__) @@ -29,9 +28,6 @@ class SupportsMultiModal(Protocol): MRO of your model class. """ - def __init__(self, *, multimodal_config: "MultiModalConfig") -> None: - ... - # We can't use runtime_checkable with ClassVar for issubclass checks # so we need to treat the class as an instance and use isinstance instead @@ -39,9 +35,6 @@ def __init__(self, *, multimodal_config: "MultiModalConfig") -> None: class _SupportsMultiModalType(Protocol): supports_multimodal: Literal[True] - def __call__(self, *, multimodal_config: "MultiModalConfig") -> None: - ... - @overload def supports_multimodal( @@ -81,10 +74,6 @@ class SupportsLoRA(Protocol): embedding_modules: ClassVar[Dict[str, str]] embedding_padding_modules: ClassVar[List[str]] - # lora_config is None when LoRA is not enabled - def __init__(self, *, lora_config: Optional["LoRAConfig"] = None) -> None: - ... - # We can't use runtime_checkable with ClassVar for issubclass checks # so we need to treat the class as an instance and use isinstance instead @@ -97,9 +86,6 @@ class _SupportsLoRAType(Protocol): embedding_modules: Dict[str, str] embedding_padding_modules: List[str] - def __call__(self, *, lora_config: Optional["LoRAConfig"] = None) -> None: - ... - @overload def supports_lora(model: Type[object]) -> TypeIs[Type[SupportsLoRA]]: @@ -276,21 +262,11 @@ class HasInnerState(Protocol): for max_num_seqs, etc. True for e.g. both Mamba and Jamba. """ - def __init__(self, - *, - scheduler_config: Optional["SchedulerConfig"] = None) -> None: - ... - @runtime_checkable class _HasInnerStateType(Protocol): has_inner_state: ClassVar[Literal[True]] - def __init__(self, - *, - scheduler_config: Optional["SchedulerConfig"] = None) -> None: - ... - @overload def has_inner_state(model: object) -> TypeIs[HasInnerState]: @@ -323,17 +299,11 @@ class IsAttentionFree(Protocol): True for Mamba but not Jamba. """ - def __init__(self) -> None: - ... - @runtime_checkable class _IsAttentionFreeType(Protocol): is_attention_free: ClassVar[Literal[True]] - def __init__(self) -> None: - ... - @overload def is_attention_free(model: object) -> TypeIs[IsAttentionFree]: diff --git a/vllm/model_executor/models/interfaces_base.py b/vllm/model_executor/models/interfaces_base.py index 7bb43beff255c..957a5a6e26b5c 100644 --- a/vllm/model_executor/models/interfaces_base.py +++ b/vllm/model_executor/models/interfaces_base.py @@ -71,7 +71,7 @@ def _check_vllm_model_forward(model: Union[Type[object], object]) -> bool: and issubclass(model, nn.Module)): logger.warning( "The model (%s) is missing " - "vLLM-specific keywords from its initializer: %s", + "vLLM-specific keywords from its `forward` method: %s", model, missing_kws, )