From 4554f6e12aaf78d4322828f4f248b404942a66e5 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 7 Nov 2024 20:01:48 -0800 Subject: [PATCH] Revert detokenizer --- vllm/v1/engine/llm_engine.py | 8 ++------ vllm/v1/tokenizer/__init__.py | 0 vllm/v1/{processor => tokenizer}/detokenizer.py | 17 ++++++----------- 3 files changed, 8 insertions(+), 17 deletions(-) create mode 100644 vllm/v1/tokenizer/__init__.py rename vllm/v1/{processor => tokenizer}/detokenizer.py (94%) diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index 4b443bc6ada5c..9d530a814b361 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -22,9 +22,9 @@ from vllm.usage.usage_lib import UsageContext from vllm.v1.core.scheduler import Scheduler from vllm.v1.executor.gpu_executor import GPUExecutor -from vllm.v1.processor.detokenizer import Detokenizer, DetokenizerInputs from vllm.v1.processor.mm_input_mapper import MMInputMapper from vllm.v1.request import Request, RequestStatus +from vllm.v1.tokenizer.detokenizer import Detokenizer, DetokenizerInputs from vllm.version import __version__ as VLLM_VERSION logger = init_logger(__name__) @@ -126,11 +126,7 @@ def __init__( # Ping the tokenizer to ensure liveness if it runs in a # different process. self.tokenizer.ping() - self.detokenizer = Detokenizer( - self.model_config.tokenizer, - revision=self.model_config.revision, - tokenizer_mode=self.model_config.tokenizer_mode) - + self.detokenizer = Detokenizer(self.model_config.tokenizer) self.generation_config_fields = _load_generation_config_dict( model_config) self.input_preprocessor = InputPreprocessor(model_config, diff --git a/vllm/v1/tokenizer/__init__.py b/vllm/v1/tokenizer/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/vllm/v1/processor/detokenizer.py b/vllm/v1/tokenizer/detokenizer.py similarity index 94% rename from vllm/v1/processor/detokenizer.py rename to vllm/v1/tokenizer/detokenizer.py index f3d60d9759ef1..e485fcc3522d9 100644 --- a/vllm/v1/processor/detokenizer.py +++ b/vllm/v1/tokenizer/detokenizer.py @@ -42,13 +42,13 @@ class DetokenizerOutputs(msgspec.Struct): class Detokenizer: - def __init__(self, tokenizer_name: str, *args, **kwargs): + def __init__(self, tokenizer_name: str): # FIXME(woosuk): Currently, the detokenizer is just a hacky prototype. # For example, it does not terminate properly. We need to improve this. self.push_port = get_open_port() self.pull_port = get_open_port() - self.detokenizer = DetokenizerProc(self.push_port, self.pull_port, - tokenizer_name, *args, **kwargs) + self.detokenizer = DetokenizerProc(tokenizer_name, self.push_port, + self.pull_port) self.detokenizer.start() self.zmq_context = zmq.Context() @@ -81,28 +81,23 @@ class DetokenizerProc(multiprocessing.Process): def __init__( self, + tokenizer_name: str, pull_port: int, push_port: int, - tokenizer_name: str, - *args, - **kwargs, ): super().__init__() + self.tokenizer_name = tokenizer_name # NOTE: The pull_port of the detokenizer should be the same as the # push_port of the engine. Vice versa. self.pull_port = pull_port self.push_port = push_port - self.tokenizer_name = tokenizer_name - self.args = args - self.kwargs = kwargs def run(self): # Initialize these objects after the process is forked since they are # not picklable. self.msgpack_encoder = msgpack.Encoder() self.msgpack_decoder = msgpack.Decoder(DetokenizerInputs) - self.tokenizer = get_tokenizer(self.tokenizer_name, *self.args, - **self.kwargs) + self.tokenizer = get_tokenizer(self.tokenizer_name) # req_id -> RequestState self.request_states: Dict[str, RequestState] = {}