From a52610fb19ca32c39fa431559e674ea50c803568 Mon Sep 17 00:00:00 2001 From: randoentity Date: Sun, 24 Nov 2024 13:40:33 +0100 Subject: [PATCH 1/2] workaround for tool calling --- endpoints/OAI/utils/chat_completion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/endpoints/OAI/utils/chat_completion.py b/endpoints/OAI/utils/chat_completion.py index 14a2243..9fe9aa8 100644 --- a/endpoints/OAI/utils/chat_completion.py +++ b/endpoints/OAI/utils/chat_completion.py @@ -454,11 +454,11 @@ async def generate_tool_calls( if gen["stop_str"] in tool_data.tool_call_start: if "text" in gen: # non streaming, all generations will have the text they generated - pre_tool_prompt = await apply_chat_template(data, gen["text"]) + pre_tool_prompt, _ = await apply_chat_template(data, gen["text"]) elif current_generations is not None: # streaming, we wont have text in the generation, # we'll have to use the current_generations - pre_tool_prompt = await apply_chat_template(data, current_generations) + pre_tool_prompt, _ = await apply_chat_template(data, current_generations) gen_tasks.append( asyncio.create_task( From 2e06fb01d3703e71075ade2a5c202309f68d52ad Mon Sep 17 00:00:00 2001 From: kingbri Date: Thu, 28 Nov 2024 23:27:59 -0500 Subject: [PATCH 2/2] OAI: Pass mm_embeddings to tool call generation Don't exclude the vision embeddings when regenerating for a tool call. Signed-off-by: kingbri --- endpoints/OAI/utils/chat_completion.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/endpoints/OAI/utils/chat_completion.py b/endpoints/OAI/utils/chat_completion.py index 9fe9aa8..a646924 100644 --- a/endpoints/OAI/utils/chat_completion.py +++ b/endpoints/OAI/utils/chat_completion.py @@ -454,16 +454,23 @@ async def generate_tool_calls( if gen["stop_str"] in tool_data.tool_call_start: if "text" in gen: # non streaming, all generations will have the text they generated - pre_tool_prompt, _ = await apply_chat_template(data, gen["text"]) + pre_tool_prompt, mm_embeddings = await apply_chat_template( + data, gen["text"] + ) elif current_generations is not None: # streaming, we wont have text in the generation, # we'll have to use the current_generations - pre_tool_prompt, _ = await apply_chat_template(data, current_generations) + pre_tool_prompt, mm_embeddings = await apply_chat_template( + data, current_generations + ) gen_tasks.append( asyncio.create_task( model.container.generate( - pre_tool_prompt, request.state.id, **gen_params + pre_tool_prompt, + request.state.id, + embeddings=mm_embeddings, + **gen_params, ) ) )