From 0cec41afc16edeb29c00524a52acc463fc96ec39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Fri, 27 Sep 2024 14:11:29 +0200 Subject: [PATCH] deps: bump vllm minimum version to 0.6.2 --- .github/workflows/tests.yaml | 2 +- pyproject.toml | 2 +- src/vllm_tgis_adapter/http.py | 24 +++++------------------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 3933deb1..a544eb62 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -32,7 +32,7 @@ jobs: pyv: ["3.12"] vllm_version: # - "" # skip the pypi version as it will not work on CPU - - "git+https://github.com/vllm-project/vllm@v0.6.1.post2" + - "git+https://github.com/vllm-project/vllm@v0.6.2" - "git+https://github.com/vllm-project/vllm@main" - "git+https://github.com/opendatahub-io/vllm@main" diff --git a/pyproject.toml b/pyproject.toml index 050aebd5..28566f60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ requires-python = ">=3.9" dynamic = ["version"] dependencies = [ - "vllm>=0.5.5", + "vllm>=0.6.2", "prometheus_client==0.20.0", "grpcio==1.62.2", "grpcio-health-checking==1.62.2", diff --git a/src/vllm_tgis_adapter/http.py b/src/vllm_tgis_adapter/http.py index 0715e2c3..9f64c53d 100644 --- a/src/vllm_tgis_adapter/http.py +++ b/src/vllm_tgis_adapter/http.py @@ -3,15 +3,9 @@ from typing import TYPE_CHECKING from vllm.entrypoints.launcher import serve_http -from vllm.entrypoints.openai.api_server import build_app +from vllm.entrypoints.openai.api_server import build_app, init_app_state from vllm.logger import init_logger -try: - from vllm.entrypoints.openai.api_server import init_app -except ImportError: # vllm > 0.6.1.post2 - from vllm.entrypoints.openai.api_server import init_app_state - - if TYPE_CHECKING: import argparse @@ -31,12 +25,9 @@ async def run_http_server( # modified copy of vllm.entrypoints.openai.api_server.run_server that # allows passing of the engine - try: - app = await init_app(engine, args) # type: ignore[arg-type] - except NameError: # vllm > 0.6.1.post2 - app = build_app(args) - model_config = await engine.get_model_config() - init_app_state(engine, model_config, app.state, args) + app = build_app(args) + model_config = await engine.get_model_config() + init_app_state(engine, model_config, app.state, args) serve_kwargs = { "host": args.host, @@ -50,12 +41,7 @@ async def run_http_server( } serve_kwargs.update(uvicorn_kwargs) - try: - shutdown_coro = await serve_http(app, engine, **serve_kwargs) - except TypeError: - # vllm 0.5.4 backwards compatibility - # HTTP server will not shut itself down when the engine dies - shutdown_coro = await serve_http(app, **serve_kwargs) + shutdown_coro = await serve_http(app, **serve_kwargs) # launcher.serve_http returns a shutdown coroutine to await # (The double await is intentional)