优化glm4的 tools

shell-nlp · shell-nlp · commit 72d86c18aaf4 · 2024-06-23T23:02:27.000+08:00
diff --git a/gpt_server/model_handler/chatglm_react.py b/gpt_server/model_handler/chatglm_react.py
@@ -4,37 +4,56 @@
 
 GLM4_TOOL_SUFFIX_PROMPT = "在调用上述函数时，请使用 Json 格式表示调用的参数。"
 
-GLM4_TOOL_PROMPT = (
-    "你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持，"
-    "{tool_text}"
-)
+GLM4_TOOL_PROMPT = """"你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。
+
+# 可用工具
+{tool_text}
+Use the following format:
+
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+
+Begin!
+
+Question:
+"""
 
 
 def glm4_tool_formatter(tools: List[Dict[str, Any]]) -> str:
-    tool_text = ""
+    tool_text = "\n"
+    tool_names = []
     for tool in tools:
         tool = tool["function"]
         tool_name = tool["name"]
-        tool_text += f"\n\n## {tool_name}\n\n{json.dumps(tool, ensure_ascii=False, indent=4)}\n{GLM4_TOOL_SUFFIX_PROMPT}"
-    return GLM4_TOOL_PROMPT.format(tool_text=tool_text)
+        tool_text += f"## {tool_name}\n\n{json.dumps(tool, ensure_ascii=False, indent=4)}\n{GLM4_TOOL_SUFFIX_PROMPT}\n\n"
+        tool_names.append(tool_name)
+    return GLM4_TOOL_PROMPT.format(
+        tool_text=tool_text, tool_names=", ".join(tool_names)
+    ).strip()
 
 
 def glm4_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]:
-    lines = content.strip().split("\n")
-    if len(lines) != 2:
-        return content
-    tool_name = lines[0].strip()
-    tool_input = lines[1].strip()
+    i = content.rfind("Action:")
+    j = content.rfind("Action Input:")
+    tool_name = content[i + len("Action:") : j].strip().strip(".")
+    tool_input = content[j + len("Action Input:") :].strip()
     try:
-        json.loads(tool_input)
+        tool_input_obj = json.loads(tool_input)
     except json.JSONDecodeError:
         return content
-    tool_calls = [
-        {
-            "id": "call_{}".format(uuid.uuid4().hex),
-            "function": {"name": tool_name, "arguments": tool_input},
-        }
-    ]
+    tool_calls = []
+    tool_call = {
+        "index": 0,
+        "id": "call_{}".format(uuid.uuid4().hex),
+        "function": {"name": tool_name, "arguments": tool_input},
+    }
+    tool_calls.append(tool_call)
 
     return tool_calls
 
diff --git a/gpt_server/model_worker/chatglm.py b/gpt_server/model_worker/chatglm.py
@@ -31,9 +31,14 @@ def __init__(
         )
 
         self.stop = ["<|user|>", "<|observation|>", "<|endoftext|>"]
-        self.stop_words_ids = [
-            self.tokenizer.convert_tokens_to_ids(i) for i in self.stop
-        ]
+        # 拓展额外的stop
+        self.stop.extend(["Observation:"])
+        self.stop_words_ids = []
+        for i in self.stop:
+            try:
+                self.stop_words_ids.append(self.tokenizer.convert_tokens_to_ids(i))
+            except Exception as e:
+                pass
 
         logger.info(f"chatglm停用词: {self.stop}")
 
@@ -71,7 +76,7 @@ async def generate_stream_gate(self, params):
             if isinstance(messages, list):
                 task = "chat"
                 for msg in messages:
-                    if msg["role"] == "function":
+                    if msg["role"] == "function" or msg["role"] == "tool":
                         msg["role"] = "observation"
 
                 if messages[-1]["role"] == "user":