From 2f08836d5de86a022fc023c62d9e437c1f922cf9 Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Mon, 9 Dec 2024 08:27:10 -0800 Subject: [PATCH] [V1] Fix Detokenizer loading in `AsyncLLM` (#10997) Signed-off-by: Roger Wang --- vllm/v1/engine/async_llm.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 4ef372fd8464b..0bcccda2bf329 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -65,7 +65,12 @@ def __init__( input_registry) # Detokenizer (converts EngineCoreOutputs --> RequestOutput). - self.detokenizer = Detokenizer(vllm_config.model_config.tokenizer) + self.detokenizer = Detokenizer( + tokenizer_name=vllm_config.model_config.tokenizer, + tokenizer_mode=vllm_config.model_config.tokenizer_mode, + trust_remote_code=vllm_config.model_config.trust_remote_code, + revision=vllm_config.model_config.tokenizer_revision, + ) # EngineCore (starts the engine in background process). self.engine_core = EngineCoreClient.make_client(