From 710fcc9ce69a4c5183ac067c9f6a5e77ba206bcf Mon Sep 17 00:00:00 2001 From: Wallas Santos Date: Tue, 10 Dec 2024 14:29:14 -0300 Subject: [PATCH] revert tokenizer.get_vocab_size Signed-off-by: Wallas Santos --- vllm/engine/async_llm_engine.py | 4 +++- vllm/engine/llm_engine.py | 4 +++- vllm/model_executor/guided_decoding/__init__.py | 4 +++- vllm/model_executor/guided_decoding/xgrammar_decoding.py | 4 ++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 448226dd75952..92329e9d011a3 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -551,7 +551,9 @@ async def build_guided_decoding_logits_processor_async( guided_decoding.backend = guided_decoding.backend or default_guided_backend processor = await get_guided_decoding_logits_processor( - guided_params=guided_decoding, tokenizer=tokenizer) + guided_params=guided_decoding, + tokenizer=tokenizer, + model_config=model_config) if processor: if sampling_params.logits_processors is None: diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 387062e50b191..4d477907550c1 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -2010,7 +2010,9 @@ def _build_logits_processors( self.decoding_config.guided_decoding_backend processor = get_local_guided_decoding_logits_processor( - guided_params=guided_decoding, tokenizer=tokenizer) + guided_params=guided_decoding, + tokenizer=tokenizer, + model_config=self.model_config) if processor: logits_processors.append(processor) diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py index a8a96f900fe40..4f33da3677631 100644 --- a/vllm/model_executor/guided_decoding/__init__.py +++ b/vllm/model_executor/guided_decoding/__init__.py @@ -9,6 +9,7 @@ if TYPE_CHECKING: from transformers import PreTrainedTokenizer + from vllm.config import ModelConfig from vllm.logits_process import LogitsProcessor from vllm.sampling_params import GuidedDecodingParams @@ -87,7 +88,8 @@ def maybe_backend_fallback( async def get_guided_decoding_logits_processor( guided_params: GuidedDecodingParams, - tokenizer: PreTrainedTokenizer) -> LogitsProcessor | None: + tokenizer: PreTrainedTokenizer, + model_config: ModelConfig) -> LogitsProcessor | None: guided_params = maybe_backend_fallback(guided_params) # CFG grammar not supported by LMFE, so we use outlines instead if guided_params.backend == 'outlines': diff --git a/vllm/model_executor/guided_decoding/xgrammar_decoding.py b/vllm/model_executor/guided_decoding/xgrammar_decoding.py index 8bcff329e60a8..b473a20243047 100644 --- a/vllm/model_executor/guided_decoding/xgrammar_decoding.py +++ b/vllm/model_executor/guided_decoding/xgrammar_decoding.py @@ -21,6 +21,7 @@ if TYPE_CHECKING: from transformers import PreTrainedTokenizer + from vllm.config import ModelConfig from vllm.sampling_params import GuidedDecodingParams @@ -28,8 +29,10 @@ def get_local_xgrammar_guided_decoding_logits_processor( guided_params: GuidedDecodingParams, tokenizer: PreTrainedTokenizer, + model_config: ModelConfig, max_threads: int = 8): config = GrammarConfig.from_guided_params(guided_params=guided_params, + model_config=model_config, tokenizer=tokenizer, max_threads=max_threads) return XGrammarLogitsProcessor(config) @@ -142,6 +145,7 @@ class GrammarConfig: @classmethod def from_guided_params(cls, guided_params: GuidedDecodingParams, + model_config: ModelConfig, tokenizer: PreTrainedTokenizer, max_threads: int = 8) -> GrammarConfig: