-
-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Bugfix] Fix guided decoding with tokenizer mode mistral #11046
Changes from 9 commits
83ea81c
a9a1e3c
710fcc9
9792cee
b3cb571
4ce6b28
d7c7161
d61257d
b674647
78e7dc2
bfedea7
0173c2d
b98f633
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,25 +96,15 @@ async def get_guided_decoding_logits_processor( | |
get_outlines_guided_decoding_logits_processor) | ||
return await get_outlines_guided_decoding_logits_processor( | ||
guided_params, tokenizer) | ||
if guided_params.backend == 'lm-format-enforcer': | ||
from vllm.model_executor.guided_decoding.lm_format_enforcer_decoding import ( # noqa | ||
get_local_lm_format_enforcer_guided_decoding_logits_processor) | ||
return get_local_lm_format_enforcer_guided_decoding_logits_processor( | ||
guided_params, tokenizer) | ||
if guided_params.backend == 'xgrammar': | ||
from vllm.model_executor.guided_decoding.xgrammar_decoding import ( # noqa | ||
get_local_xgrammar_guided_decoding_logits_processor) | ||
return get_local_xgrammar_guided_decoding_logits_processor( | ||
guided_params, tokenizer, model_config) | ||
|
||
raise ValueError( | ||
f"Unknown guided decoding backend '{guided_params.backend}'. " | ||
"Must be one of 'outlines, 'lm-format-enforcer', 'xgrammar'") | ||
return _get_local_guided_decoding_logits_processor(guided_params, | ||
tokenizer, model_config) | ||
|
||
|
||
def get_local_guided_decoding_logits_processor( | ||
guided_params: GuidedDecodingParams, tokenizer: PreTrainedTokenizer, | ||
model_config: ModelConfig) -> LogitsProcessor | None: | ||
|
||
guided_params = maybe_backend_fallback(guided_params) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you revert this change? This is being used for offline use case, with LLM, where as get_guided_decoding_logit_processor is being used for online usecase. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I reviewed what I did and checked that it was not so good based on the difference of implementation of the methods There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is even more confusing now that there are three functions. I would prefer a revert as it seems you have no other changes to this file? We can consider refactor in another PR |
||
# CFG grammar not supported by LMFE, so we use outlines instead | ||
if guided_params.backend == 'outlines': | ||
|
@@ -123,6 +113,17 @@ def get_local_guided_decoding_logits_processor( | |
get_local_outlines_guided_decoding_logits_processor) | ||
return get_local_outlines_guided_decoding_logits_processor( | ||
guided_params, tokenizer) | ||
|
||
return _get_local_guided_decoding_logits_processor(guided_params, | ||
tokenizer, model_config) | ||
|
||
|
||
def _get_local_guided_decoding_logits_processor( | ||
guided_params: GuidedDecodingParams, tokenizer: PreTrainedTokenizer, | ||
model_config: ModelConfig) -> LogitsProcessor | None: | ||
|
||
assert guided_params.backend != 'outlines' | ||
|
||
if guided_params.backend == 'lm-format-enforcer': | ||
from vllm.model_executor.guided_decoding.lm_format_enforcer_decoding import ( # noqa | ||
get_local_lm_format_enforcer_guided_decoding_logits_processor) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Importing from
xgrammar_decoding.py
will also import+require xgrammar, in addition to yourimport xgrammar as xgr
line below. I would recommend making your test_pickle_xgrammar_tokenizer_data function skip if xgrammar isn't able to be imported, and simply including these two imports within that function.In fact it may be best to add a dedicated xgrammar testing file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the change but you need to move
from vllm.model_executor.guided_decoding.xgrammar_decoding import TokenizerData
to the try-except as well