From e39400a4b60d28ff5c0a1a5194068c928adcaf98 Mon Sep 17 00:00:00 2001 From: Maximilien de Bayser Date: Wed, 11 Dec 2024 01:51:40 -0300 Subject: [PATCH] Fix streaming for granite tool call when <|tool_call|> is present (#11069) Signed-off-by: Max de Bayser --- vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py index 00917c866e496..dae481a2154a1 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py @@ -88,7 +88,11 @@ def extract_tool_calls_streaming( ) -> Union[DeltaMessage, None]: start_idx = consume_space(0, current_text) - if not current_text or current_text[start_idx] != '[': + if current_text[start_idx:].startswith(self.bot_token): + start_idx = consume_space(start_idx + len(self.bot_token), + current_text) + if not current_text or start_idx >= len(current_text)\ + or current_text[start_idx] != '[': return DeltaMessage(content=delta_text) # bit mask flags for partial JSON parsing. If the name hasn't been