From ec10cb8511b7e30b8ff86caab2e4272ff3ceddca Mon Sep 17 00:00:00 2001 From: Maximilien de Bayser Date: Fri, 11 Oct 2024 22:24:26 -0300 Subject: [PATCH] [BugFix] Fix tool call finish reason in streaming case (#9209) Signed-off-by: Max de Bayser --- vllm/entrypoints/openai/serving_chat.py | 26 ++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 1e85167ea7619..4931195ae0e02 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -538,10 +538,12 @@ async def chat_completion_stream_generator( # any tokens that were generated but previously # matched by partial json parsing # only happens if we are NOT using guided decoding + auto_tools_called = False if tool_parser: - index = len( - tool_parser.prev_tool_call_arr) - 1 if len( - tool_parser.prev_tool_call_arr) > 0 else 0 + auto_tools_called = len( + tool_parser.prev_tool_call_arr) > 0 + index = len(tool_parser.prev_tool_call_arr + ) - 1 if auto_tools_called else 0 else: index = 0 @@ -576,9 +578,7 @@ async def chat_completion_stream_generator( delta=delta_message, logprobs=logprobs, finish_reason=output.finish_reason - if not (tool_parser - and len(tool_parser.prev_tool_call_arr)) - else "tool_calls", + if not auto_tools_called else "tool_calls", stop_reason=output.stop_reason) chunk = ChatCompletionStreamResponse( id=request_id, @@ -680,8 +680,10 @@ async def chat_completion_full_generator( else: logprobs = None - # by default, tools are not used. - tools_called = False + # In the OpenAI API the finish_reason is "tools_called" + # if the tool choice is auto and the model produced a tool + # call. The same is not true for named function calls + auto_tools_called = False # if auto tools are not enabled, and a named tool choice using # outlines is not being used @@ -703,7 +705,6 @@ async def chat_completion_full_generator( name=request.tool_choice.function.name, arguments=output.text)) ]) - tools_called = True # if the request doesn't use tool choice # OR specifies to not use a tool @@ -725,7 +726,10 @@ async def chat_completion_full_generator( tool_call_info = tool_parser.extract_tool_calls( output.text, request=request) - tools_called = tool_call_info.tools_called + # In the OpenAI API the finish_reason is "tools_called" + # if the tool choice is auto and the model produced a tool + # call. The same is not true for named function calls + auto_tools_called = tool_call_info.tools_called if tool_call_info.tools_called: message = ChatMessage(role=role, content=tool_call_info.content, @@ -748,7 +752,7 @@ async def chat_completion_full_generator( index=output.index, message=message, logprobs=logprobs, - finish_reason="tool_calls" if tools_called else + finish_reason="tool_calls" if auto_tools_called else output.finish_reason if output.finish_reason else "stop", stop_reason=output.stop_reason) choices.append(choice_data)