优化qwen的tools react 提示词

shell-nlp · shell-nlp · commit 07a0d14c73ae · 2024-06-22T14:41:35.000+08:00
diff --git a/gpt_server/model_handler/__init__.py b/gpt_server/model_handler/__init__.py
diff --git a/gpt_server/model_handler/chatglm_react.py b/gpt_server/model_handler/chatglm_react.py
@@ -0,0 +1,54 @@
+from typing import Any, Dict, List, Tuple, Union
+import json
+import uuid
+
+GLM4_TOOL_SUFFIX_PROMPT = "在调用上述函数时，请使用 Json 格式表示调用的参数。"
+
+GLM4_TOOL_PROMPT = (
+    "你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持，"
+    "{tool_text}"
+)
+
+
+def glm4_tool_formatter(tools: List[Dict[str, Any]]) -> str:
+    tool_text = ""
+    for tool in tools:
+        tool = tool["function"]
+        tool_name = tool["name"]
+        tool_text += f"\n\n## {tool_name}\n\n{json.dumps(tool, ensure_ascii=False, indent=4)}\n{GLM4_TOOL_SUFFIX_PROMPT}"
+    return GLM4_TOOL_PROMPT.format(tool_text=tool_text)
+
+
+def glm4_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]:
+    lines = content.strip().split("\n")
+    if len(lines) != 2:
+        return content
+    tool_name = lines[0].strip()
+    tool_input = lines[1].strip()
+    try:
+        json.loads(tool_input)
+    except json.JSONDecodeError:
+        return content
+    tool_calls = [
+        {
+            "id": "call_{}".format(uuid.uuid4().hex),
+            "function": {"name": tool_name, "arguments": tool_input},
+        }
+    ]
+
+    return tool_calls
+
+
+if __name__ == "__main__":
+    import json
+
+    tools_str = """[{'type': 'function', 'function': {'name': 'track', 'description': '追踪指定股票的实时价格', 'parameters': {'type': 'object', 'properties': {'symbol': {'description': '需要追踪的股票代码', 'type': 'integer'}}, 'required': ['symbol']}}}, {'type': 'function', 'function': {'name': 'text-to-speech', 'description': '将文本转换为语音', 'parameters': {'type': 'object', 'properties': {'text': {'description': '需要转换成语音的文本', 'type': 'string'}, 'voice': {'description': '要使用的语音类型（男声、女声等', 'default': '男声', 'type': 'string'}, 'speed': {'description': '语音的速度（快、中等、慢等', 'default': '中等', 'type': 'string'}}, 'required': ['text']}}}]"""
+    tools_str = tools_str.replace("'", '"')
+    tools = json.loads(tools_str)
+
+    res = glm4_tool_formatter(tools=tools)
+    print(res)
+    print()
+    out = 'multiply\n{"first_int": 8, "second_int": 9}'
+    r = glm4_tool_extractor(out)
+    print(r)
diff --git a/gpt_server/model_handler/qwen_react.py b/gpt_server/model_handler/qwen_react.py
@@ -0,0 +1,92 @@
+import re
+from typing import Any, Dict, List, Tuple, Union
+import json
+import uuid
+
+# default
+TOOL_SYSTEM_PROMPT = """Answer the following questions as best you can. You have access to the following tools:
+
+{tool_text}
+
+Use the following format:
+
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+
+Begin!
+
+Question:"""
+
+
+def qwen_tool_formatter(tools: List[Dict[str, Any]]) -> str:
+    tool_names = []
+    param_text_list = []
+    for tool in tools:
+        tool = tool["function"]
+        param_text = """{tool_name}: Call this tool to interact with the {tool_name} API. What is the {tool_name} API useful for? {description} Parameters: {parameters} Format the arguments as a JSON object."""
+        parameters = []
+        for name, param in tool["parameters"]["properties"].items():
+            parameters.append(
+                {
+                    "name": name,
+                    "description": param.get("description", ""),
+                    "required": (
+                        True if name in tool["parameters"]["required"] else False
+                    ),
+                    "schema": {"type": param["type"]},
+                }
+            )
+        param_text_str = param_text.format(
+            tool_name=tool["name"],
+            description=tool["description"],
+            parameters=parameters,
+        )
+        param_text_list.append(param_text_str)
+
+        tool_names.append(tool["name"])
+
+    tool_text = "\n\n".join(param_text_list).strip()
+    return TOOL_SYSTEM_PROMPT.format(
+        tool_text=tool_text,
+        tool_names=", ".join(tool_names),
+    )
+
+
+def qwen_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]:
+
+    i = content.rfind("Action:")
+    j = content.rfind("Action Input:")
+    tool_name = content[i + len("Action:") : j].strip().strip(".")
+    tool_input = content[j + len("Action Input:") :].strip()
+    try:
+        json.loads(tool_input)
+    except json.JSONDecodeError:
+        return content
+    tool_calls = []
+    tool_call = {
+        "id": "call_{}".format(uuid.uuid4().hex),
+        "function": {"name": tool_name, "arguments": tool_input},
+    }
+    tool_calls.append(tool_call)
+
+    return tool_calls
+
+
+if __name__ == "__main__":
+    import json
+
+    tools_str = """[{'type': 'function', 'function': {'name': 'track', 'description': '追踪指定股票的实时价格', 'parameters': {'type': 'object', 'properties': {'symbol': {'description': '需要追踪的股票代码', 'type': 'integer'}}, 'required': ['symbol']}}}, {'type': 'function', 'function': {'name': 'text-to-speech', 'description': '将文本转换为语音', 'parameters': {'type': 'object', 'properties': {'text': {'description': '需要转换成语音的文本', 'type': 'string'}, 'voice': {'description': '要使用的语音类型（男声、女声等', 'default': '男声', 'type': 'string'}, 'speed': {'description': '语音的速度（快、中等、慢等', 'default': '中等', 'type': 'string'}}, 'required': ['text']}}}]"""
+    tools_str = tools_str.replace("'", '"')
+    tools = json.loads(tools_str)
+    res = qwen_tool_formatter(tools=tools)
+    print(res)
+    out = 'Action: multiply.\nAction Input: {"first_int": 8, "second_int": 9}\n'
+    r = qwen_tool_extractor(out)
+    print("\n\n")
+    print(r)
diff --git a/gpt_server/model_handler/tools.py b/gpt_server/model_handler/tools.py
diff --git a/gpt_server/model_handler/utils.py b/gpt_server/model_handler/utils.py
@@ -0,0 +1,20 @@
+from gpt_server.model_handler.qwen_react import qwen_tool_formatter
+from gpt_server.model_handler.chatglm_react import glm4_tool_formatter
+
+
+def add_tools2messages(params: dict, model_adapter: str = "default"):
+    messages = params["messages"]
+    if params.get("tools", None):  # 如果传入tools
+        if model_adapter == "qwen":
+            system_content = qwen_tool_formatter(tools=params.get("tools"))
+
+        elif model_adapter == "chatglm4":
+            system_content = glm4_tool_formatter(tools=params.get("tools"))
+
+        if messages[0]["role"] != "system":
+            messages.insert(0, {"role": "system", "content": system_content})
+
+        elif messages[0]["role"] == "system":
+            messages[0]["content"] = system_content
+
+    return messages
diff --git a/gpt_server/model_worker/chatglm.py b/gpt_server/model_worker/chatglm.py
@@ -4,7 +4,8 @@
 import torch
 from loguru import logger
 from gpt_server.model_worker.base import ModelWorkerBase
-from gpt_server.model_handler.tools import add_tools2messages, glm4_tool_extractor
+from gpt_server.model_handler.chatglm_react import glm4_tool_extractor
+from gpt_server.model_handler.utils import add_tools2messages
 
 
 class ChatGLMWorker(ModelWorkerBase):
@@ -109,6 +110,7 @@ async def generate_stream_gate(self, params):
             if params.get("tools", False) and isinstance(
                 tool_calls, list
             ):  # 如果传入tools
+                logger.debug(f"工具解析成功, tool_calls: {tool_calls}")
                 ret["tool_calls"] = tool_calls
                 yield json.dumps(ret).encode() + b"\0"
         except torch.cuda.OutOfMemoryError as e:
diff --git a/gpt_server/model_worker/qwen.py b/gpt_server/model_worker/qwen.py
@@ -5,7 +5,8 @@
 import torch
 
 from gpt_server.model_worker.base import ModelWorkerBase
-from gpt_server.model_handler.tools import add_tools2messages, default_tool_extractor
+from gpt_server.model_handler.qwen_react import qwen_tool_extractor
+from gpt_server.model_handler.utils import add_tools2messages
 
 
 class QwenWorker(ModelWorkerBase):
@@ -39,6 +40,8 @@ def __init__(
         self.stop = [
             self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
         ]
+        # 拓展额外的stop
+        self.stop.extend(["Observation:"])
         logger.info(f"qwen停用词: {self.stop}")
         self.other_config = {
             "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}"
@@ -50,7 +53,7 @@ async def generate_stream_gate(self, params):
         logger.info(f"worker_id: {self.worker_id}")
         try:
             model_type = getattr(self.model_config, "model_type", "qwen")
-            messages = add_tools2messages(params=params, model_adapter="default")
+            messages = add_tools2messages(params=params, model_adapter="qwen")
 
             if isinstance(messages, list):
                 task = "chat"
@@ -94,10 +97,11 @@ async def generate_stream_gate(self, params):
 
                 yield json.dumps(ret).encode() + b"\0"
             # ------ add tool_calls ------
-            tool_calls = default_tool_extractor(response)
+            tool_calls = qwen_tool_extractor(response)
             if params.get("tools", False) and isinstance(
                 tool_calls, list
             ):  # 如果传入tools
+                logger.debug(f"工具解析成功, tool_calls: {tool_calls}")
                 ret["tool_calls"] = tool_calls
                 yield json.dumps(ret).encode() + b"\0"
         except torch.cuda.OutOfMemoryError as e: