From 0f2271653a39135e1c938f904f06178394f01e00 Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Tue, 10 Dec 2024 09:38:23 -0700 Subject: [PATCH] [Bugfix] Backport request id validation to v0 (#11036) Signed-off-by: Joe Runde --- vllm/engine/multiprocessing/client.py | 4 ++++ vllm/v1/engine/async_llm.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py index 32bd83305bb8f..a729023bc00bb 100644 --- a/vllm/engine/multiprocessing/client.py +++ b/vllm/engine/multiprocessing/client.py @@ -576,6 +576,10 @@ async def _process_request( if self._errored_with is not None: raise ENGINE_DEAD_ERROR(self._errored_with) + # Ensure the request id is unique among running requests + if request_id in self.output_queues: + raise ValueError(f"Request {request_id} already exists") + # Constructing guided decoding logits processors is expensive, so we do # it here to avoid contending with cpu resources and the GIL on the # backend process. diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 26fd650aee4b7..24cafeff63d1e 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -152,7 +152,7 @@ async def add_request( """Add new request to the AsyncLLM.""" if self.detokenizer.is_request_active(request_id): - raise KeyError(f"Request {request_id} already exists.") + raise ValueError(f"Request {request_id} already exists.") # 1) Create a new AsyncStream for the request. stream = self._add_request_to_streams(request_id)