From 584f0ae40d6f64a7097525f04feb236e94ad37fd Mon Sep 17 00:00:00 2001 From: Ricky Xu Date: Fri, 20 Dec 2024 23:14:08 -0800 Subject: [PATCH] [V1] Make AsyncLLMEngine v1-v0 opaque (#11383) Signed-off-by: Ricky Xu --- vllm/engine/async_llm_engine.py | 7 +++++++ vllm/entrypoints/openai/api_server.py | 6 +----- vllm/v1/engine/async_llm.py | 6 +----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index f50e20cf70323..66a5089074ff5 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -1256,3 +1256,10 @@ async def stop_profile(self) -> None: self.engine.model_executor.stop_profile() else: self.engine.model_executor._run_workers("stop_profile") + + +# TODO(v1): Remove this class proxy when V1 goes default. +if envs.VLLM_USE_V1: + from vllm.v1.engine.async_llm import AsyncLLM + + AsyncLLMEngine = AsyncLLM # type: ignore diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 00e2d1a56f160..2e5b769a825ce 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -27,6 +27,7 @@ import vllm.envs as envs from vllm.config import ModelConfig from vllm.engine.arg_utils import AsyncEngineArgs +from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore from vllm.engine.multiprocessing.client import MQLLMEngineClient from vllm.engine.multiprocessing.engine import run_mp_engine from vllm.engine.protocol import EngineClient @@ -66,11 +67,6 @@ is_valid_ipv6_address) from vllm.version import __version__ as VLLM_VERSION -if envs.VLLM_USE_V1: - from vllm.v1.engine.async_llm import AsyncLLMEngine # type: ignore -else: - from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore - TIMEOUT_KEEP_ALIVE = 5 # seconds prometheus_multiproc_dir: tempfile.TemporaryDirectory diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 41fb4b25d45bb..cfdbea8004c35 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -98,7 +98,7 @@ def from_engine_args( start_engine_loop: bool = True, usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, stat_loggers: Optional[Dict[str, StatLoggerBase]] = None, - ) -> "AsyncLLMEngine": + ) -> "AsyncLLM": """Create an AsyncLLM from the EngineArgs.""" # Create the engine configs. @@ -386,7 +386,3 @@ def errored(self) -> bool: @property def dead_error(self) -> BaseException: return Exception() # TODO: implement - - -# Retain V0 name for backwards compatibility. -AsyncLLMEngine = AsyncLLM