diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 0041ee3..19ac2fd 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -32,7 +32,7 @@ jobs: pyv: ["3.12"] vllm_version: # - "" # skip the pypi version as it will not work on CPU - - "git+https://github.com/vllm-project/vllm@v0.6.4" + - "git+https://github.com/vllm-project/vllm@v0.6.6.post1" - "git+https://github.com/vllm-project/vllm@main" - "git+https://github.com/opendatahub-io/vllm@main" diff --git a/pyproject.toml b/pyproject.toml index bb8e41f..f970b9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ requires-python = ">=3.9" dynamic = ["version"] dependencies = [ - "vllm>=0.6.4", + "vllm>=0.6.6.post1", "prometheus_client==0.21.0", "grpcio==1.67.0", "grpcio-health-checking==1.62.2", diff --git a/src/vllm_tgis_adapter/grpc/grpc_server.py b/src/vllm_tgis_adapter/grpc/grpc_server.py index 4ee4b1c..4479486 100644 --- a/src/vllm_tgis_adapter/grpc/grpc_server.py +++ b/src/vllm_tgis_adapter/grpc/grpc_server.py @@ -62,12 +62,7 @@ from grpc.aio import ServicerContext from vllm import CompletionOutput, RequestOutput from vllm.config import ModelConfig - - try: - from vllm.engine.protocol import EngineClient - except ImportError: - # fallback for versions <=v0.6.1.post2 - from vllm.engine.protocol import AsyncEngineClient as EngineClient + from vllm.engine.protocol import EngineClient from vllm.lora.request import LoRARequest from vllm.sequence import Logprob from vllm.transformers_utils.tokenizer import AnyTokenizer