diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py index 6362cb756..cfde4cfd9 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py @@ -894,7 +894,61 @@ "https://huggingface.co/uiuc-convai/CALM-405B", "UIUC + Oumi" "Meta Llama 3 Community" - ] + ], + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Llama-3.3-70B-Instruct-Turbo": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "mistralai/Mixtral-8x7B-Instruct-v0.1": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], + "mistralai/Mistral-7B-Instruct-v0.1": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], + "Qwen/Qwen2.5-7B-Instruct-Turbo": [ + "Qwen2.5-7B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", + "Qwen", + "apache-2.0", + ], + "Qwen/Qwen2.5-72B-Instruct-Turbo": [ + "Qwen2.5-72B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", + "Qwen", + "qwen", + ], + "deepseek-ai/DeepSeek-V3": [ + "DeepSeek-V3 (FC)", + "https://api-docs.deepseek.com/news/news1226", + "DeepSeek", + "DeepSeek License", + ], } INPUT_PRICE_PER_MILLION_TOKEN = { diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py b/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py new file mode 100644 index 000000000..33ef7b942 --- /dev/null +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py @@ -0,0 +1,104 @@ +import os +import time + +from bfcl.model_handler.model_style import ModelStyle +from bfcl.model_handler.api_inference.openai import OpenAIHandler +from openai import OpenAI + + +class TogetherHandler(OpenAIHandler): + def __init__(self, model_name, temperature) -> None: + super().__init__(model_name, temperature) + self.model_style = ModelStyle.TOGETHER_AI + self.client = OpenAI( + base_url="https://api.together.xyz/v1", + api_key=os.getenv("TOGETHER_API_KEY"), + ) + + #### FC methods #### + + def _query_FC(self, inference_data: dict): + message: list[dict] = inference_data["message"] + tools = inference_data["tools"] + inference_data["inference_input_log"] = {"message": message, "tools": tools} + + start_time = time.time() + if len(tools) > 0: + api_response = self.client.chat.completions.create( + messages=message, + model=self.model_name, + temperature=self.temperature, + tools=tools, + ) + else: + api_response = self.client.chat.completions.create( + messages=message, + model=self.model_name, + temperature=self.temperature, + ) + end_time = time.time() + + return api_response, end_time - start_time + + def _pre_query_processing_FC(self, inference_data: dict, test_entry: dict) -> dict: + return super()._pre_query_processing_FC(inference_data, test_entry) + + def _compile_tools(self, inference_data: dict, test_entry: dict) -> dict: + return super()._compile_tools(inference_data, test_entry) + + def _parse_query_response_FC(self, api_response: any) -> dict: + try: + model_responses = [ + {func_call.function.name: func_call.function.arguments} + for func_call in api_response.choices[0].message.tool_calls + ] + tool_calls = [ + tool_call.model_dump() + for tool_call in api_response.choices[0].message.tool_calls + ] + except: + model_responses = api_response.choices[0].message.content + tool_calls = [] + + return { + "model_responses": model_responses, + "model_responses_message_for_chat_history": tool_calls, + "input_token": api_response.usage.prompt_tokens, + "output_token": api_response.usage.completion_tokens, + } + + def add_first_turn_message_FC( + self, inference_data: dict, first_turn_message: list[dict] + ) -> dict: + return super().add_first_turn_message_FC(inference_data, first_turn_message) + + def _add_next_turn_user_message_FC( + self, inference_data: dict, user_message: list[dict] + ) -> dict: + return super()._add_next_turn_user_message_FC(inference_data, user_message) + + def _add_assistant_message_FC( + self, inference_data: dict, model_response_data: dict + ) -> dict: + inference_data["message"].append( + { + "role": "assistant", + "content": "", + "tool_calls": model_response_data[ + "model_responses_message_for_chat_history" + ], + } + ) + return inference_data + + def _add_execution_results_FC( + self, inference_data: dict, execution_results: list[str], model_response_data: dict + ) -> dict: + for execution_result in execution_results: + tool_message = { + "role": "tool", + "content": execution_result, + } + inference_data["message"].append(tool_message) + + return inference_data diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 236417661..871de6987 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -13,6 +13,7 @@ from bfcl.model_handler.api_inference.nova import NovaHandler from bfcl.model_handler.api_inference.nvidia import NvidiaHandler from bfcl.model_handler.api_inference.openai import OpenAIHandler +from bfcl.model_handler.api_inference.together import TogetherHandler from bfcl.model_handler.api_inference.writer import WriterHandler from bfcl.model_handler.api_inference.yi import YiHandler from bfcl.model_handler.local_inference.bielik import BielikHandler @@ -102,6 +103,15 @@ # "yi-large-fc": YiHandler, # Their API is under maintenance, and will not be back online in the near future "palmyra-x-004": WriterHandler, "grok-beta": GrokHandler, + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": TogetherHandler, + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": TogetherHandler, + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": TogetherHandler, + "meta-llama/Llama-3.3-70B-Instruct-Turbo": TogetherHandler, + "mistralai/Mixtral-8x7B-Instruct-v0.1": TogetherHandler, + "mistralai/Mistral-7B-Instruct-v0.1": TogetherHandler, + "Qwen/Qwen2.5-7B-Instruct-Turbo": TogetherHandler, + "Qwen/Qwen2.5-72B-Instruct-Turbo": TogetherHandler, + "deepseek-ai/DeepSeek-V3": TogetherHandler, } # Inference through local hosting diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/model_style.py b/berkeley-function-call-leaderboard/bfcl/model_handler/model_style.py index 7c26f8e85..21432e10d 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/model_style.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/model_style.py @@ -10,6 +10,7 @@ class ModelStyle(Enum): Google = "google" AMAZON = "amazon" FIREWORK_AI = "firework_ai" + TOGETHER_AI = "together_ai" NEXUS = "nexus" OSSMODEL = "ossmodel" COHERE = "cohere" diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py b/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py index 59f10593f..e6e9f54f6 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py @@ -147,6 +147,7 @@ def convert_to_tool(functions, mapping, model_style): ModelStyle.Anthropic, ModelStyle.Google, ModelStyle.FIREWORK_AI, + ModelStyle.TOGETHER_AI, ModelStyle.WRITER, ModelStyle.AMAZON, ]: @@ -166,6 +167,7 @@ def convert_to_tool(functions, mapping, model_style): ModelStyle.OpenAI, ModelStyle.Mistral, ModelStyle.FIREWORK_AI, + ModelStyle.TOGETHER_AI, ModelStyle.WRITER, ]: oai_tool.append({"type": "function", "function": item})