From 0cec41afc16edeb29c00524a52acc463fc96ec39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= <dtrifiro@redhat.com>
Date: Fri, 27 Sep 2024 14:11:29 +0200
Subject: [PATCH] deps: bump vllm minimum version to 0.6.2

---
 .github/workflows/tests.yaml  |  2 +-
 pyproject.toml                |  2 +-
 src/vllm_tgis_adapter/http.py | 24 +++++-------------------
 3 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 3933deb1..a544eb62 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -32,7 +32,7 @@ jobs:
         pyv: ["3.12"]
         vllm_version:
           # - "" # skip the pypi version as it will not work on CPU
-          - "git+https://github.com/vllm-project/vllm@v0.6.1.post2"
+          - "git+https://github.com/vllm-project/vllm@v0.6.2"
           - "git+https://github.com/vllm-project/vllm@main"
           - "git+https://github.com/opendatahub-io/vllm@main"
 
diff --git a/pyproject.toml b/pyproject.toml
index 050aebd5..28566f60 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 requires-python = ">=3.9"
 dynamic = ["version"]
 dependencies = [
-  "vllm>=0.5.5",
+  "vllm>=0.6.2",
   "prometheus_client==0.20.0",
   "grpcio==1.62.2",
   "grpcio-health-checking==1.62.2",
diff --git a/src/vllm_tgis_adapter/http.py b/src/vllm_tgis_adapter/http.py
index 0715e2c3..9f64c53d 100644
--- a/src/vllm_tgis_adapter/http.py
+++ b/src/vllm_tgis_adapter/http.py
@@ -3,15 +3,9 @@
 from typing import TYPE_CHECKING
 
 from vllm.entrypoints.launcher import serve_http
-from vllm.entrypoints.openai.api_server import build_app
+from vllm.entrypoints.openai.api_server import build_app, init_app_state
 from vllm.logger import init_logger
 
-try:
-    from vllm.entrypoints.openai.api_server import init_app
-except ImportError:  # vllm > 0.6.1.post2
-    from vllm.entrypoints.openai.api_server import init_app_state
-
-
 if TYPE_CHECKING:
     import argparse
 
@@ -31,12 +25,9 @@ async def run_http_server(
     # modified copy of vllm.entrypoints.openai.api_server.run_server that
     # allows passing of the engine
 
-    try:
-        app = await init_app(engine, args)  # type: ignore[arg-type]
-    except NameError:  # vllm > 0.6.1.post2
-        app = build_app(args)
-        model_config = await engine.get_model_config()
-        init_app_state(engine, model_config, app.state, args)
+    app = build_app(args)
+    model_config = await engine.get_model_config()
+    init_app_state(engine, model_config, app.state, args)
 
     serve_kwargs = {
         "host": args.host,
@@ -50,12 +41,7 @@ async def run_http_server(
     }
     serve_kwargs.update(uvicorn_kwargs)
 
-    try:
-        shutdown_coro = await serve_http(app, engine, **serve_kwargs)
-    except TypeError:
-        # vllm 0.5.4 backwards compatibility
-        # HTTP server will not shut itself down when the engine dies
-        shutdown_coro = await serve_http(app, **serve_kwargs)
+    shutdown_coro = await serve_http(app, **serve_kwargs)
 
     # launcher.serve_http returns a shutdown coroutine to await
     # (The double await is intentional)