From 419e315ce9ff7f1c9bbab439aee0289279ea30b3 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Tue, 10 Dec 2024 14:58:08 -0300 Subject: [PATCH] Fix streaming for granite tool call when <|tool_call|> is present Signed-off-by: Max de Bayser --- vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py index 00917c866e496..dae481a2154a1 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py @@ -88,7 +88,11 @@ def extract_tool_calls_streaming( ) -> Union[DeltaMessage, None]: start_idx = consume_space(0, current_text) - if not current_text or current_text[start_idx] != '[': + if current_text[start_idx:].startswith(self.bot_token): + start_idx = consume_space(start_idx + len(self.bot_token), + current_text) + if not current_text or start_idx >= len(current_text)\ + or current_text[start_idx] != '[': return DeltaMessage(content=delta_text) # bit mask flags for partial JSON parsing. If the name hasn't been