From c78bd22fdacdb28a9236a3632aa63c87266895d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=98=8E=E5=A4=A9?= <460342015@qq.com> Date: Tue, 20 Feb 2024 14:28:58 +0800 Subject: [PATCH 1/5] Gpts app v0.4 (#1170) --- assets/schema/dbgpt.sql | 4 ++- dbgpt/agent/agents/base_agent.py | 28 +++++++++---------- dbgpt/agent/agents/base_agent_new.py | 26 ++++++++--------- .../agents/expand/code_assistant_agent.py | 2 +- .../retrieve_summary_assistant_agent.py | 6 ++-- .../agents/expand/summary_assistant_agent.py | 2 +- dbgpt/agent/common/schema.py | 4 +-- dbgpt/agent/memory/base.py | 6 ++-- dbgpt/agent/memory/default_gpts_memory.py | 6 ++-- dbgpt/agent/memory/gpts_memory.py | 4 +-- dbgpt/agent/memory/gpts_memory_storage.py | 12 ++++---- dbgpt/app/scene/chat_agent/chat.py | 2 +- dbgpt/serve/agent/agents/db_gpts_memory.py | 4 +-- dbgpt/serve/agent/db/gpts_messages_db.py | 10 +++---- .../serve/agent/team/layout/agent_operator.py | 6 ++-- .../agent/team/layout/team_awel_layout.py | 2 +- .../agent/team/layout/team_awel_layout_new.py | 2 +- dbgpt/serve/agent/team/plan/team_auto_plan.py | 2 +- 18 files changed, 65 insertions(+), 63 deletions(-) diff --git a/assets/schema/dbgpt.sql b/assets/schema/dbgpt.sql index ca3d3fcae..75139a0cb 100644 --- a/assets/schema/dbgpt.sql +++ b/assets/schema/dbgpt.sql @@ -197,6 +197,8 @@ CREATE TABLE IF NOT EXISTS `prompt_manage` `sys_code` varchar(255) DEFAULT NULL COMMENT 'system app ', `created_at` datetime DEFAULT NULL COMMENT 'create time', `updated_at` datetime DEFAULT NULL COMMENT 'last update time', + `team_mode` varchar(255) NULL COMMENT 'agent team work mode', + PRIMARY KEY (`id`), UNIQUE KEY `uk_gpts_conversations` (`conv_id`), KEY `idx_gpts_name` (`gpts_name`) @@ -230,7 +232,7 @@ CREATE TABLE `gpts_messages` ( `model_name` varchar(255) DEFAULT NULL COMMENT 'message generate model', `rounds` int(11) NOT NULL COMMENT 'dialogue turns', `content` text COMMENT 'Content of the speech', - `current_gogal` text COMMENT 'The target corresponding to the current message', + `current_goal` text COMMENT 'The target corresponding to the current message', `context` text COMMENT 'Current conversation context', `review_info` text COMMENT 'Current conversation review info', `action_report` text COMMENT 'Current conversation action report', diff --git a/dbgpt/agent/agents/base_agent.py b/dbgpt/agent/agents/base_agent.py index 6b03786fd..ea319761f 100644 --- a/dbgpt/agent/agents/base_agent.py +++ b/dbgpt/agent/agents/base_agent.py @@ -220,7 +220,7 @@ def append_message(self, message: Optional[Dict], role, sender: Agent) -> bool: "context", "action_report", "review_info", - "current_gogal", + "current_goal", "model_name", ) if k in message @@ -246,7 +246,7 @@ def append_message(self, message: Optional[Dict], role, sender: Agent) -> bool: receiver=self.name, role=role, rounds=self.consecutive_auto_reply_counter, - current_gogal=oai_message.get("current_gogal", None), + current_goal=oai_message.get("current_goal", None), content=oai_message.get("content", None), context=json.dumps(oai_message["context"], ensure_ascii=False) if "context" in oai_message @@ -458,16 +458,16 @@ def process_now_message( sender, rely_messages: Optional[List[Dict]] = None, ): - current_gogal = current_message.get("current_gogal", None) + current_goal = current_message.get("current_goal", None) ### Convert and tailor the information in collective memory into contextual memory available to the current Agent - current_gogal_messages = self._gpts_message_to_ai_message( + current_goal_messages = self._gpts_message_to_ai_message( self.memory.message_memory.get_between_agents( - self.agent_context.conv_id, self.name, sender.name, current_gogal + self.agent_context.conv_id, self.name, sender.name, current_goal ) ) - if current_gogal_messages is None or len(current_gogal_messages) <= 0: + if current_goal_messages is None or len(current_goal_messages) <= 0: current_message["role"] = ModelMessageRoleType.HUMAN - current_gogal_messages = [current_message] + current_goal_messages = [current_message] ### relay messages cut_messages = [] if rely_messages: @@ -479,13 +479,13 @@ def process_now_message( else: cut_messages.extend(self._rely_messages) - if len(current_gogal_messages) < self.dialogue_memory_rounds: - cut_messages.extend(current_gogal_messages) + if len(current_goal_messages) < self.dialogue_memory_rounds: + cut_messages.extend(current_goal_messages) else: # TODO: allocate historical information based on token budget - cut_messages.extend(current_gogal_messages[:2]) + cut_messages.extend(current_goal_messages[:2]) # end_round = self.dialogue_memory_rounds - 2 - cut_messages.extend(current_gogal_messages[-3:]) + cut_messages.extend(current_goal_messages[-3:]) return cut_messages async def a_system_fill_param(self): @@ -502,7 +502,7 @@ async def a_generate_reply( ## 0.New message build new_message = {} new_message["context"] = message.get("context", None) - new_message["current_gogal"] = message.get("current_gogal", None) + new_message["current_goal"] = message.get("current_goal", None) ## 1.LLM Reasonging await self.a_system_fill_param() @@ -576,7 +576,7 @@ async def a_verify_reply( ## Send error messages to yourself for retrieval optimization and increase the number of retrievals retry_message = {} retry_message["context"] = message.get("context", None) - retry_message["current_gogal"] = message.get("current_gogal", None) + retry_message["current_goal"] = message.get("current_goal", None) retry_message["model_name"] = message.get("model_name", None) retry_message["content"] = fail_reason ## Use the original sender to send the retry message to yourself @@ -603,7 +603,7 @@ async def a_retry_chat( "context": json.loads(last_message.context) if last_message.context else None, - "current_gogal": last_message.current_gogal, + "current_goal": last_message.current_goal, "review_info": json.loads(last_message.review_info) if last_message.review_info else None, diff --git a/dbgpt/agent/agents/base_agent_new.py b/dbgpt/agent/agents/base_agent_new.py index 33bed8d59..2109c95e4 100644 --- a/dbgpt/agent/agents/base_agent_new.py +++ b/dbgpt/agent/agents/base_agent_new.py @@ -323,7 +323,7 @@ async def a_initiate_chat( await self.a_send( { "content": context["message"], - "current_gogal": context["message"], + "current_goal": context["message"], }, recipient, reviewer, @@ -352,7 +352,7 @@ async def _a_append_message( "context", "action_report", "review_info", - "current_gogal", + "current_goal", "model_name", ) if k in message @@ -364,7 +364,7 @@ async def _a_append_message( receiver=self.profile, role=role, rounds=self.consecutive_auto_reply_counter, - current_gogal=oai_message.get("current_gogal", None), + current_goal=oai_message.get("current_goal", None), content=oai_message.get("content", None), context=json.dumps(oai_message["context"], ensure_ascii=False) if "context" in oai_message @@ -501,7 +501,7 @@ def _init_reply_message(self, recive_message): """ new_message = {} new_message["context"] = recive_message.get("context", None) - new_message["current_gogal"] = recive_message.get("current_gogal", None) + new_message["current_goal"] = recive_message.get("current_goal", None) return new_message def _convert_to_ai_message( @@ -544,19 +544,19 @@ def _load_thinking_messages( sender, rely_messages: Optional[List[Dict]] = None, ) -> Optional[List[Dict]]: - current_gogal = receive_message.get("current_gogal", None) + current_goal = receive_message.get("current_goal", None) ### Convert and tailor the information in collective memory into contextual memory available to the current Agent - current_gogal_messages = self._convert_to_ai_message( + current_goal_messages = self._convert_to_ai_message( self.memory.message_memory.get_between_agents( - self.agent_context.conv_id, self.profile, sender.profile, current_gogal + self.agent_context.conv_id, self.profile, sender.profile, current_goal ) ) # When there is no target and context, the current received message is used as the target problem - if current_gogal_messages is None or len(current_gogal_messages) <= 0: + if current_goal_messages is None or len(current_goal_messages) <= 0: receive_message["role"] = ModelMessageRoleType.HUMAN - current_gogal_messages = [receive_message] + current_goal_messages = [receive_message] ### relay messages cut_messages = [] @@ -572,14 +572,14 @@ def _load_thinking_messages( cut_messages.extend(rely_messages) # TODO: allocate historical information based on token budget - if len(current_gogal_messages) < 5: - cut_messages.extend(current_gogal_messages) + if len(current_goal_messages) < 5: + cut_messages.extend(current_goal_messages) else: # For the time being, the smallest size of historical message records will be used by default. # Use the first two rounds of messages to understand the initial goals - cut_messages.extend(current_gogal_messages[:2]) + cut_messages.extend(current_goal_messages[:2]) # Use information from the last three rounds of communication to ensure that current thinking knows what happened and what to do in the last communication - cut_messages.extend(current_gogal_messages[-3:]) + cut_messages.extend(current_goal_messages[-3:]) return cut_messages def _new_system_message(self, content): diff --git a/dbgpt/agent/agents/expand/code_assistant_agent.py b/dbgpt/agent/agents/expand/code_assistant_agent.py index ed7c2e672..c84e2917f 100644 --- a/dbgpt/agent/agents/expand/code_assistant_agent.py +++ b/dbgpt/agent/agents/expand/code_assistant_agent.py @@ -42,7 +42,7 @@ def __init__(self, **kwargs): self._init_actions([CodeAction]) async def a_correctness_check(self, message: Optional[Dict]): - task_gogal = message.get("current_gogal", None) + task_gogal = message.get("current_goal", None) action_report = message.get("action_report", None) task_result = "" if action_report: diff --git a/dbgpt/agent/agents/expand/retrieve_summary_assistant_agent.py b/dbgpt/agent/agents/expand/retrieve_summary_assistant_agent.py index 21783f3d7..f86553d3a 100644 --- a/dbgpt/agent/agents/expand/retrieve_summary_assistant_agent.py +++ b/dbgpt/agent/agents/expand/retrieve_summary_assistant_agent.py @@ -196,7 +196,7 @@ async def a_generate_reply( ## New message build new_message = {} new_message["context"] = current_messages[-1].get("context", None) - new_message["current_gogal"] = current_messages[-1].get("current_gogal", None) + new_message["current_goal"] = current_messages[-1].get("current_goal", None) new_message["role"] = "assistant" new_message["content"] = user_question new_message["model_name"] = model @@ -206,7 +206,7 @@ async def a_generate_reply( ## Summary message build summary_message = {} summary_message["context"] = message.get("context", None) - summary_message["current_gogal"] = message.get("current_gogal", None) + summary_message["current_goal"] = message.get("current_goal", None) summaries = "" count = 0 @@ -262,7 +262,7 @@ async def a_generate_reply( async def a_verify(self, message: Optional[Dict]): self.update_system_message(self.CHECK_RESULT_SYSTEM_MESSAGE) - current_goal = message.get("current_gogal", None) + current_goal = message.get("current_goal", None) action_report = message.get("action_report", None) task_result = "" if action_report: diff --git a/dbgpt/agent/agents/expand/summary_assistant_agent.py b/dbgpt/agent/agents/expand/summary_assistant_agent.py index f663ba614..a7d6055d0 100644 --- a/dbgpt/agent/agents/expand/summary_assistant_agent.py +++ b/dbgpt/agent/agents/expand/summary_assistant_agent.py @@ -35,7 +35,7 @@ def __init__(self, **kwargs): self._init_actions([BlankAction]) # async def a_correctness_check(self, message: Optional[Dict]): - # current_goal = message.get("current_gogal", None) + # current_goal = message.get("current_goal", None) # action_report = message.get("action_report", None) # task_result = "" # if action_report: diff --git a/dbgpt/agent/common/schema.py b/dbgpt/agent/common/schema.py index 48818eca0..35e06ba3d 100644 --- a/dbgpt/agent/common/schema.py +++ b/dbgpt/agent/common/schema.py @@ -43,7 +43,7 @@ class GptsMessage: role: str content: str rounds: Optional[int] - current_gogal: str = None + current_goal: str = None context: Optional[str] = None review_info: Optional[str] = None action_report: Optional[str] = None @@ -61,7 +61,7 @@ def from_dict(d: Dict[str, Any]) -> GptsMessage: content=d["content"], rounds=d["rounds"], model_name=d["model_name"], - current_gogal=d["current_gogal"], + current_goal=d["current_goal"], context=d["context"], review_info=d["review_info"], action_report=d["action_report"], diff --git a/dbgpt/agent/memory/base.py b/dbgpt/agent/memory/base.py index c8e2c0360..25c51b754 100644 --- a/dbgpt/agent/memory/base.py +++ b/dbgpt/agent/memory/base.py @@ -57,7 +57,7 @@ class GptsMessage: role: str content: str rounds: Optional[int] - current_gogal: str = None + current_goal: str = None context: Optional[str] = None review_info: Optional[str] = None action_report: Optional[str] = None @@ -75,7 +75,7 @@ def from_dict(d: Dict[str, Any]) -> GptsMessage: content=d["content"], rounds=d["rounds"], model_name=d["model_name"], - current_gogal=d["current_gogal"], + current_goal=d["current_goal"], context=d["context"], review_info=d["review_info"], action_report=d["action_report"], @@ -203,7 +203,7 @@ def get_between_agents( conv_id: str, agent1: str, agent2: str, - current_gogal: Optional[str] = None, + current_goal: Optional[str] = None, ) -> Optional[List[GptsMessage]]: """ Query information related to an agent diff --git a/dbgpt/agent/memory/default_gpts_memory.py b/dbgpt/agent/memory/default_gpts_memory.py index 0e06078d7..1e50346f8 100644 --- a/dbgpt/agent/memory/default_gpts_memory.py +++ b/dbgpt/agent/memory/default_gpts_memory.py @@ -100,11 +100,11 @@ def get_between_agents( conv_id: str, agent1: str, agent2: str, - current_gogal: Optional[str] = None, + current_goal: Optional[str] = None, ) -> Optional[List[GptsMessage]]: - if current_gogal: + if current_goal: result = self.df.query( - f"conv_id==@conv_id and ((sender==@agent1 and receiver==@agent2) or (sender==@agent2 and receiver==@agent1)) and current_gogal==@current_gogal" + f"conv_id==@conv_id and ((sender==@agent1 and receiver==@agent2) or (sender==@agent2 and receiver==@agent1)) and current_goal==@current_goal" ) else: result = self.df.query( diff --git a/dbgpt/agent/memory/gpts_memory.py b/dbgpt/agent/memory/gpts_memory.py index ec2cd5afc..6b5a785de 100644 --- a/dbgpt/agent/memory/gpts_memory.py +++ b/dbgpt/agent/memory/gpts_memory.py @@ -58,7 +58,7 @@ async def one_chat_competions(self, conv_id: str): count = count + 1 if count == 1: continue - if not message.current_gogal or len(message.current_gogal) <= 0: + if not message.current_goal or len(message.current_goal) <= 0: if len(temp_group) > 0: vis_items.append(await self._plan_vis_build(temp_group)) temp_group.clear() @@ -69,7 +69,7 @@ async def one_chat_competions(self, conv_id: str): vis_items.append(await self._messages_to_agents_vis(temp_messages)) temp_messages.clear() - last_gogal = message.current_gogal + last_gogal = message.current_goal temp_group[last_gogal].append(message) if len(temp_group) > 0: diff --git a/dbgpt/agent/memory/gpts_memory_storage.py b/dbgpt/agent/memory/gpts_memory_storage.py index 3b68b374c..3064277a5 100644 --- a/dbgpt/agent/memory/gpts_memory_storage.py +++ b/dbgpt/agent/memory/gpts_memory_storage.py @@ -184,7 +184,7 @@ class GptsMessageStorage(StorageItem): role: str content: str rounds: Optional[int] - current_gogal: str = None + current_goal: str = None context: Optional[str] = None review_info: Optional[str] = None action_report: Optional[str] = None @@ -204,7 +204,7 @@ def from_dict(d: Dict[str, Any]): content=d["content"], rounds=d["rounds"], model_name=d["model_name"], - current_gogal=d["current_gogal"], + current_goal=d["current_goal"], context=d["context"], review_info=d["review_info"], action_report=d["action_report"], @@ -239,7 +239,7 @@ def to_gpts_message(self) -> GptsMessage: role=self.role, content=self.content, rounds=self.rounds, - current_gogal=self.current_gogal, + current_goal=self.current_goal, context=self.context, review_info=self.review_info, action_report=self.action_report, @@ -258,7 +258,7 @@ def from_gpts_message(gpts_message: GptsMessage) -> "StoragePromptTemplate": role=gpts_message.role, content=gpts_message.content, rounds=gpts_message.rounds, - current_gogal=gpts_message.current_gogal, + current_goal=gpts_message.current_goal, context=gpts_message.context, review_info=gpts_message.review_info, action_report=gpts_message.action_report, @@ -344,9 +344,9 @@ def get_between_agents( conv_id: str, agent1: str, agent2: str, - current_gogal: Optional[str] = None, + current_goal: Optional[str] = None, ) -> Optional[List[GptsMessage]]: - return super().get_between_agents(conv_id, agent1, agent2, current_gogal) + return super().get_between_agents(conv_id, agent1, agent2, current_goal) def get_by_conv_id(self, conv_id: str) -> Optional[List[GptsMessage]]: return super().get_by_conv_id(conv_id) diff --git a/dbgpt/app/scene/chat_agent/chat.py b/dbgpt/app/scene/chat_agent/chat.py index 8aa7eb21b..7f29d6019 100644 --- a/dbgpt/app/scene/chat_agent/chat.py +++ b/dbgpt/app/scene/chat_agent/chat.py @@ -40,7 +40,7 @@ def __init__(self, chat_param: Dict): # load select plugin agent_module = CFG.SYSTEM_APP.get_component( - ComponentType.AGENT_HUB, ModulePlugin + ComponentType.PLUGIN_HUB, ModulePlugin ) self.plugins_prompt_generator = agent_module.load_select_plugin( self.plugins_prompt_generator, self.select_plugins diff --git a/dbgpt/serve/agent/agents/db_gpts_memory.py b/dbgpt/serve/agent/agents/db_gpts_memory.py index ec42c53eb..14f7f28f1 100644 --- a/dbgpt/serve/agent/agents/db_gpts_memory.py +++ b/dbgpt/serve/agent/agents/db_gpts_memory.py @@ -94,10 +94,10 @@ def get_between_agents( conv_id: str, agent1: str, agent2: str, - current_gogal: Optional[str] = None, + current_goal: Optional[str] = None, ) -> Optional[List[GptsMessage]]: db_results = self.gpts_message.get_between_agents( - conv_id, agent1, agent2, current_gogal + conv_id, agent1, agent2, current_goal ) results = [] db_results = sorted(db_results, key=lambda x: x.rounds) diff --git a/dbgpt/serve/agent/db/gpts_messages_db.py b/dbgpt/serve/agent/db/gpts_messages_db.py index 981cc9525..ab033dc3a 100644 --- a/dbgpt/serve/agent/db/gpts_messages_db.py +++ b/dbgpt/serve/agent/db/gpts_messages_db.py @@ -39,7 +39,7 @@ class GptsMessagesEntity(Model): content = Column( Text(length=2**31 - 1), nullable=True, comment="Content of the speech" ) - current_gogal = Column( + current_goal = Column( Text, nullable=True, comment="The target corresponding to the current message" ) context = Column(Text, nullable=True, comment="Current conversation context") @@ -78,7 +78,7 @@ def append(self, entity: dict): model_name=entity.get("model_name", None), context=entity.get("context", None), rounds=entity.get("rounds", None), - current_gogal=entity.get("current_gogal", None), + current_goal=entity.get("current_goal", None), review_info=entity.get("review_info", None), action_report=entity.get("action_report", None), ) @@ -120,7 +120,7 @@ def get_between_agents( conv_id: str, agent1: str, agent2: str, - current_gogal: Optional[str] = None, + current_goal: Optional[str] = None, ) -> Optional[List[GptsMessagesEntity]]: session = self.get_raw_session() gpts_messages = session.query(GptsMessagesEntity) @@ -139,9 +139,9 @@ def get_between_agents( ), ) ) - if current_gogal: + if current_goal: gpts_messages = gpts_messages.filter( - GptsMessagesEntity.current_gogal == current_gogal + GptsMessagesEntity.current_goal == current_goal ) result = gpts_messages.order_by(GptsMessagesEntity.rounds).all() session.close() diff --git a/dbgpt/serve/agent/team/layout/agent_operator.py b/dbgpt/serve/agent/team/layout/agent_operator.py index b7d401717..5a4c7c207 100644 --- a/dbgpt/serve/agent/team/layout/agent_operator.py +++ b/dbgpt/serve/agent/team/layout/agent_operator.py @@ -48,7 +48,7 @@ async def map(self, input_value: AgentGenerateContext) -> AgentGenerateContext: now_rely_messages: List[Dict] = [] # Isolate the message delivery mechanism and pass it to the operator - input_value.message["current_gogal"] = ( + input_value.message["current_goal"] = ( f"[{self._agent.name if self._agent.name else self._agent.profile}]:" + input_value.message["content"] ) @@ -139,14 +139,14 @@ async def map( agent = await self.get_agent(input_value) if agent.fixed_subgoal and len(agent.fixed_subgoal) > 0: # Isolate the message delivery mechanism and pass it to the operator - input_value.message["current_gogal"] = ( + input_value.message["current_goal"] = ( f"[{agent.name if agent.name else agent.profile}]:" + agent.fixed_subgoal ) now_message["content"] = agent.fixed_subgoal else: # Isolate the message delivery mechanism and pass it to the operator - input_value.message["current_gogal"] = ( + input_value.message["current_goal"] = ( f"[{agent.name if agent.name else agent.profile}]:" + input_value.message["content"] ) diff --git a/dbgpt/serve/agent/team/layout/team_awel_layout.py b/dbgpt/serve/agent/team/layout/team_awel_layout.py index f69322b05..e49127744 100644 --- a/dbgpt/serve/agent/team/layout/team_awel_layout.py +++ b/dbgpt/serve/agent/team/layout/team_awel_layout.py @@ -45,7 +45,7 @@ async def a_act( start_message_context: AgentGenerateContext = AgentGenerateContext( message={ "content": message, - "current_gogal": message, + "current_goal": message, }, sender=self, reviewer=reviewer, diff --git a/dbgpt/serve/agent/team/layout/team_awel_layout_new.py b/dbgpt/serve/agent/team/layout/team_awel_layout_new.py index 22767bcbd..7c5013a29 100644 --- a/dbgpt/serve/agent/team/layout/team_awel_layout_new.py +++ b/dbgpt/serve/agent/team/layout/team_awel_layout_new.py @@ -58,7 +58,7 @@ async def a_act( start_message_context: AgentGenerateContext = AgentGenerateContext( message={ "content": message, - "current_gogal": message, + "current_goal": message, }, sender=self, reviewer=reviewer, diff --git a/dbgpt/serve/agent/team/plan/team_auto_plan.py b/dbgpt/serve/agent/team/plan/team_auto_plan.py index 175a3efbe..a2808bcd6 100644 --- a/dbgpt/serve/agent/team/plan/team_auto_plan.py +++ b/dbgpt/serve/agent/team/plan/team_auto_plan.py @@ -161,7 +161,7 @@ async def a_act( now_plan: GptsPlan = todo_plans[0] current_goal_message = { "content": now_plan.sub_task_content, - "current_gogal": now_plan.sub_task_content, + "current_goal": now_plan.sub_task_content, "context": { "plan_task": now_plan.sub_task_content, "plan_task_num": now_plan.sub_task_num, From 32e1554282fdc3d60fe91a8926cd9fda02d39c1c Mon Sep 17 00:00:00 2001 From: Aries-ckt <916701291@qq.com> Date: Wed, 21 Feb 2024 10:24:12 +0800 Subject: [PATCH 2/5] feat:add rag awel operator view metadata. (#1174) --- dbgpt/core/awel/flow/base.py | 4 + dbgpt/core/awel/trigger/http_trigger.py | 51 ++++ .../core/interface/operators/llm_operator.py | 39 +++ dbgpt/rag/operators/knowledge.py | 72 +++++- dbgpt/rag/operators/rewrite.py | 54 ++++ dbgpt/rag/operators/summary.py | 65 +++++ dbgpt/serve/rag/operators/knowledge_space.py | 242 ++++++++++++++++++ examples/awel/simple_rag_summary_example.py | 2 +- examples/rag/simple_rag_embedding_example.py | 2 +- examples/rag/simple_rag_retriever_example.py | 2 +- 10 files changed, 527 insertions(+), 6 deletions(-) create mode 100644 dbgpt/serve/rag/operators/knowledge_space.py diff --git a/dbgpt/core/awel/flow/base.py b/dbgpt/core/awel/flow/base.py index 04a3b5c79..835b12632 100644 --- a/dbgpt/core/awel/flow/base.py +++ b/dbgpt/core/awel/flow/base.py @@ -112,6 +112,7 @@ def __init__(self, label: str, description: str): "output_parser": _CategoryDetail("Output Parser", "Parse the output of LLM model"), "common": _CategoryDetail("Common", "The common operator"), "agent": _CategoryDetail("Agent", "The agent operator"), + "rag": _CategoryDetail("RAG", "The RAG operator"), } @@ -124,6 +125,7 @@ class OperatorCategory(str, Enum): OUTPUT_PARSER = "output_parser" COMMON = "common" AGENT = "agent" + RAG = "rag" def label(self) -> str: """Get the label of the category.""" @@ -163,6 +165,7 @@ class OperatorType(str, Enum): "common": _CategoryDetail("Common", "The common resource"), "prompt": _CategoryDetail("Prompt", "The prompt resource"), "agent": _CategoryDetail("Agent", "The agent resource"), + "rag": _CategoryDetail("RAG", "The resource"), } @@ -176,6 +179,7 @@ class ResourceCategory(str, Enum): COMMON = "common" PROMPT = "prompt" AGENT = "agent" + RAG = "rag" def label(self) -> str: """Get the label of the category.""" diff --git a/dbgpt/core/awel/trigger/http_trigger.py b/dbgpt/core/awel/trigger/http_trigger.py index 75fbdb955..03c6edf95 100644 --- a/dbgpt/core/awel/trigger/http_trigger.py +++ b/dbgpt/core/awel/trigger/http_trigger.py @@ -1031,3 +1031,54 @@ def __init__(self, key: str = "user_input", **kwargs): async def map(self, request_body: CommonLLMHttpRequestBody) -> Dict[str, Any]: """Map the request body to response body.""" return {self._key: request_body.messages} + + +class RequestedParsedOperator(MapOperator[CommonLLMHttpRequestBody, str]): + """User input parsed operator.""" + + metadata = ViewMetadata( + label="Request Body Parsed To String Operator", + name="request_body_to_str__parsed_operator", + category=OperatorCategory.COMMON, + parameters=[ + Parameter.build_from( + "Key", + "key", + str, + optional=True, + default="", + description="The key of the dict, link 'user_input'", + ) + ], + inputs=[ + IOField.build_from( + "Request Body", + "request_body", + CommonLLMHttpRequestBody, + description="The request body of the API endpoint", + ) + ], + outputs=[ + IOField.build_from( + "User Input String", + "user_input_str", + str, + description="The user input dict of the API endpoint", + ) + ], + description="User input parsed operator", + ) + + def __init__(self, key: str = "user_input", **kwargs): + """Initialize a UserInputParsedOperator.""" + self._key = key + super().__init__(**kwargs) + + async def map(self, request_body: CommonLLMHttpRequestBody) -> str: + """Map the request body to response body.""" + dict_value = request_body.dict() + if not self._key or self._key not in dict_value: + raise ValueError( + f"Prefix key {self._key} is not a valid key of the request body" + ) + return dict_value[self._key] diff --git a/dbgpt/core/interface/operators/llm_operator.py b/dbgpt/core/interface/operators/llm_operator.py index a12624fe8..eb0c2a101 100644 --- a/dbgpt/core/interface/operators/llm_operator.py +++ b/dbgpt/core/interface/operators/llm_operator.py @@ -457,3 +457,42 @@ async def transform_stream(self, output_iter: AsyncIterator[ModelOutput]): decoded_unicode = model_output.text.replace("\ufffd", "") msg = decoded_unicode.replace("\n", "\\n") yield f"data:{msg}\n\n" + + +class StringOutput2ModelOutputOperator(MapOperator[str, ModelOutput]): + """Map String to ModelOutput.""" + + metadata = ViewMetadata( + label="Map String to ModelOutput", + name="string_2_model_output_operator", + category=OperatorCategory.COMMON, + description="Map String to ModelOutput.", + parameters=[], + inputs=[ + IOField.build_from( + "String", + "input_value", + str, + description="The input value of the operator.", + ), + ], + outputs=[ + IOField.build_from( + "Model Output", + "input_value", + ModelOutput, + description="The input value of the operator.", + ), + ], + ) + + def __int__(self, **kwargs): + """Create a new operator.""" + super().__init__(**kwargs) + + async def map(self, input_value: str) -> ModelOutput: + """Map the model output to the common response body.""" + return ModelOutput( + text=input_value, + error_code=500, + ) diff --git a/dbgpt/rag/operators/knowledge.py b/dbgpt/rag/operators/knowledge.py index 02de6a3e2..e7e74a19c 100644 --- a/dbgpt/rag/operators/knowledge.py +++ b/dbgpt/rag/operators/knowledge.py @@ -1,26 +1,92 @@ from typing import Any, List, Optional from dbgpt.core.awel import MapOperator +from dbgpt.core.awel.flow import ( + IOField, + OperatorCategory, + OptionValue, + Parameter, + ViewMetadata, +) from dbgpt.core.awel.task.base import IN from dbgpt.rag.knowledge.base import Knowledge, KnowledgeType from dbgpt.rag.knowledge.factory import KnowledgeFactory class KnowledgeOperator(MapOperator[Any, Any]): - """Knowledge Operator.""" + """Knowledge Factory Operator.""" + + metadata = ViewMetadata( + label="Knowledge Factory Operator", + name="knowledge_operator", + category=OperatorCategory.RAG, + description="The knowledge operator.", + inputs=[ + IOField.build_from( + "knowledge datasource", + "knowledge datasource", + dict, + "knowledge datasource", + ) + ], + outputs=[ + IOField.build_from( + "Knowledge", + "Knowledge", + Knowledge, + description="Knowledge", + ) + ], + parameters=[ + Parameter.build_from( + label="datasource", + name="datasource", + type=str, + optional=True, + default="DOCUMENT", + description="datasource", + ), + Parameter.build_from( + label="knowledge_type", + name="knowledge type", + type=str, + optional=True, + options=[ + OptionValue( + label="DOCUMENT", + name="DOCUMENT", + value=KnowledgeType.DOCUMENT.name, + ), + OptionValue(label="URL", name="URL", value=KnowledgeType.URL.name), + OptionValue( + label="TEXT", name="TEXT", value=KnowledgeType.TEXT.name + ), + ], + default=KnowledgeType.DOCUMENT.name, + description="knowledge type", + ), + ], + documentation_url="https://github.com/openai/openai-python", + ) def __init__( - self, knowledge_type: Optional[KnowledgeType] = KnowledgeType.DOCUMENT, **kwargs + self, + datasource: Optional[str] = None, + knowledge_type: Optional[str] = KnowledgeType.DOCUMENT.name, + **kwargs ): """Init the query rewrite operator. Args: knowledge_type: (Optional[KnowledgeType]) The knowledge type. """ super().__init__(**kwargs) - self._knowledge_type = knowledge_type + self._datasource = datasource + self._knowledge_type = KnowledgeType.get_by_value(knowledge_type) async def map(self, datasource: IN) -> Knowledge: """knowledge operator.""" + if self._datasource: + datasource = self._datasource return await self.blocking_func_to_async( KnowledgeFactory.create, datasource, self._knowledge_type ) diff --git a/dbgpt/rag/operators/rewrite.py b/dbgpt/rag/operators/rewrite.py index bade2677a..d911c0b0a 100644 --- a/dbgpt/rag/operators/rewrite.py +++ b/dbgpt/rag/operators/rewrite.py @@ -2,6 +2,7 @@ from dbgpt.core import LLMClient from dbgpt.core.awel import MapOperator +from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata from dbgpt.core.awel.task.base import IN from dbgpt.rag.retriever.rewrite import QueryRewrite @@ -9,6 +10,59 @@ class QueryRewriteOperator(MapOperator[Any, Any]): """The Rewrite Operator.""" + metadata = ViewMetadata( + label="Query Rewrite Operator", + name="query_rewrite_operator", + category=OperatorCategory.RAG, + description="query rewrite operator.", + inputs=[ + IOField.build_from("query_context", "query_context", dict, "query context") + ], + outputs=[ + IOField.build_from( + "rewritten queries", + "queries", + List[str], + description="rewritten queries", + ) + ], + parameters=[ + Parameter.build_from( + "LLM Client", + "llm_client", + LLMClient, + optional=True, + default=None, + description="The LLM Client.", + ), + Parameter.build_from( + label="model name", + name="model_name", + type=str, + optional=True, + default="gpt-3.5-turbo", + description="llm model name", + ), + Parameter.build_from( + label="prompt language", + name="language", + type=str, + optional=True, + default="en", + description="prompt language", + ), + Parameter.build_from( + label="nums", + name="nums", + type=int, + optional=True, + default=5, + description="rewrite query nums", + ), + ], + documentation_url="https://github.com/openai/openai-python", + ) + def __init__( self, llm_client: Optional[LLMClient], diff --git a/dbgpt/rag/operators/summary.py b/dbgpt/rag/operators/summary.py index fefee07fc..4f9ce0ae6 100644 --- a/dbgpt/rag/operators/summary.py +++ b/dbgpt/rag/operators/summary.py @@ -1,12 +1,77 @@ from typing import Any, Optional from dbgpt.core import LLMClient +from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata from dbgpt.core.awel.task.base import IN +from dbgpt.rag.knowledge.base import Knowledge from dbgpt.serve.rag.assembler.summary import SummaryAssembler from dbgpt.serve.rag.operators.base import AssemblerOperator class SummaryAssemblerOperator(AssemblerOperator[Any, Any]): + metadata = ViewMetadata( + label="Summary Operator", + name="summary_assembler_operator", + category=OperatorCategory.RAG, + description="The summary assembler operator.", + inputs=[ + IOField.build_from( + "Knowledge", "knowledge", Knowledge, "knowledge datasource" + ) + ], + outputs=[ + IOField.build_from( + "document summary", + "summary", + str, + description="document summary", + ) + ], + parameters=[ + Parameter.build_from( + "LLM Client", + "llm_client", + LLMClient, + optional=True, + default=None, + description="The LLM Client.", + ), + Parameter.build_from( + label="model name", + name="model_name", + type=str, + optional=True, + default="gpt-3.5-turbo", + description="llm model name", + ), + Parameter.build_from( + label="prompt language", + name="language", + type=str, + optional=True, + default="en", + description="prompt language", + ), + Parameter.build_from( + label="max_iteration_with_llm", + name="max_iteration_with_llm", + type=int, + optional=True, + default=5, + description="prompt language", + ), + Parameter.build_from( + label="concurrency_limit_with_llm", + name="concurrency_limit_with_llm", + type=int, + optional=True, + default=3, + description="The concurrency limit with llm", + ), + ], + documentation_url="https://github.com/openai/openai-python", + ) + def __init__( self, llm_client: Optional[LLMClient], diff --git a/dbgpt/serve/rag/operators/knowledge_space.py b/dbgpt/serve/rag/operators/knowledge_space.py new file mode 100644 index 000000000..b1ea66988 --- /dev/null +++ b/dbgpt/serve/rag/operators/knowledge_space.py @@ -0,0 +1,242 @@ +from functools import reduce +from typing import List, Optional + +from dbgpt.app.knowledge.api import knowledge_space_service +from dbgpt.app.knowledge.request.request import KnowledgeSpaceRequest +from dbgpt.app.knowledge.service import CFG, KnowledgeService +from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG +from dbgpt.core import ( + BaseMessage, + ChatPromptTemplate, + HumanPromptTemplate, + ModelMessage, +) +from dbgpt.core.awel import JoinOperator, MapOperator +from dbgpt.core.awel.flow import ( + IOField, + OperatorCategory, + OperatorType, + OptionValue, + Parameter, + ViewMetadata, +) +from dbgpt.core.awel.task.base import IN, OUT +from dbgpt.core.interface.operators.prompt_operator import BasePromptBuilderOperator +from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory +from dbgpt.rag.retriever.embedding import EmbeddingRetriever +from dbgpt.storage.vector_store.base import VectorStoreConfig +from dbgpt.storage.vector_store.connector import VectorStoreConnector +from dbgpt.util.function_utils import rearrange_args_by_type + + +class SpaceRetrieverOperator(MapOperator[IN, OUT]): + """knowledge space retriever operator.""" + + metadata = ViewMetadata( + label="Knowledge Space Operator", + name="space_operator", + category=OperatorCategory.RAG, + description="knowledge space retriever operator.", + inputs=[IOField.build_from("query", "query", str, "user query")], + outputs=[ + IOField.build_from( + "related chunk content", + "related chunk content", + List, + description="related chunk content", + ) + ], + parameters=[ + Parameter.build_from( + "Space Name", + "space_name", + str, + options=[ + OptionValue(label=space.name, name=space.name, value=space.name) + for space in knowledge_space_service.get_knowledge_space( + KnowledgeSpaceRequest() + ) + ], + optional=False, + default=None, + description="space name.", + ) + ], + documentation_url="https://github.com/openai/openai-python", + ) + + def __init__(self, space_name: str, recall_score: Optional[float] = 0.3, **kwargs): + """ + Args: + space_name (str): The space name. + recall_score (Optional[float], optional): The recall score. Defaults to 0.3. + """ + self._space_name = space_name + self._recall_score = recall_score + self._service = KnowledgeService() + embedding_factory = CFG.SYSTEM_APP.get_component( + "embedding_factory", EmbeddingFactory + ) + embedding_fn = embedding_factory.create( + model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL] + ) + config = VectorStoreConfig(name=self._space_name, embedding_fn=embedding_fn) + self._vector_store_connector = VectorStoreConnector( + vector_store_type=CFG.VECTOR_STORE_TYPE, + vector_store_config=config, + ) + + super().__init__(**kwargs) + + async def map(self, query: IN) -> OUT: + """Map input value to output value. + + Args: + input_value (IN): The input value. + + Returns: + OUT: The output value. + """ + space_context = self._service.get_space_context(self._space_name) + top_k = ( + CFG.KNOWLEDGE_SEARCH_TOP_SIZE + if space_context is None + else int(space_context["embedding"]["topk"]) + ) + recall_score = ( + CFG.KNOWLEDGE_SEARCH_RECALL_SCORE + if space_context is None + else float(space_context["embedding"]["recall_score"]) + ) + embedding_retriever = EmbeddingRetriever( + top_k=top_k, + vector_store_connector=self._vector_store_connector, + ) + if isinstance(query, str): + candidates = await embedding_retriever.aretrieve_with_scores( + query, recall_score + ) + elif isinstance(query, list): + candidates = [ + await embedding_retriever.aretrieve_with_scores(q, recall_score) + for q in query + ] + candidates = reduce(lambda x, y: x + y, candidates) + return [candidate.content for candidate in candidates] + + +class KnowledgeSpacePromptBuilderOperator( + BasePromptBuilderOperator, JoinOperator[List[ModelMessage]] +): + """The operator to build the prompt with static prompt. + + The prompt will pass to this operator. + """ + + metadata = ViewMetadata( + label="Knowledge Space Prompt Builder Operator", + name="knowledge_space_prompt_builder_operator", + description="Build messages from prompt template and chat history.", + operator_type=OperatorType.JOIN, + category=OperatorCategory.CONVERSION, + parameters=[ + Parameter.build_from( + "Chat Prompt Template", + "prompt", + ChatPromptTemplate, + description="The chat prompt template.", + ), + Parameter.build_from( + "History Key", + "history_key", + str, + optional=True, + default="chat_history", + description="The key of history in prompt dict.", + ), + Parameter.build_from( + "String History", + "str_history", + bool, + optional=True, + default=False, + description="Whether to convert the history to string.", + ), + ], + inputs=[ + IOField.build_from( + "user input", + "user_input", + str, + is_list=False, + description="user input", + ), + IOField.build_from( + "space related context", + "related_context", + List, + is_list=False, + description="context of knowledge space.", + ), + IOField.build_from( + "History", + "history", + BaseMessage, + is_list=True, + description="The history.", + ), + ], + outputs=[ + IOField.build_from( + "Formatted Messages", + "formatted_messages", + ModelMessage, + is_list=True, + description="The formatted messages.", + ) + ], + ) + + def __init__( + self, + prompt: ChatPromptTemplate, + history_key: str = "chat_history", + check_storage: bool = True, + str_history: bool = False, + **kwargs, + ): + """Create a new history dynamic prompt builder operator. + Args: + + prompt (ChatPromptTemplate): The chat prompt template. + history_key (str, optional): The key of history in prompt dict. Defaults to "chat_history". + check_storage (bool, optional): Whether to check the storage. Defaults to True. + str_history (bool, optional): Whether to convert the history to string. Defaults to False. + """ + + self._prompt = prompt + self._history_key = history_key + self._str_history = str_history + BasePromptBuilderOperator.__init__(self, check_storage=check_storage) + JoinOperator.__init__(self, combine_function=self.merge_context, **kwargs) + + @rearrange_args_by_type + async def merge_context( + self, + user_input: str, + related_context: List[str], + history: Optional[List[BaseMessage]], + ) -> List[ModelMessage]: + """Merge the prompt and history.""" + prompt_dict = dict() + prompt_dict["context"] = related_context + for prompt in self._prompt.messages: + if isinstance(prompt, HumanPromptTemplate): + prompt_dict[prompt.input_variables[0]] = user_input + + if history: + if self._str_history: + prompt_dict[self._history_key] = BaseMessage.messages_to_string(history) + else: + prompt_dict[self._history_key] = history + return await self.format_prompt(self._prompt, prompt_dict) diff --git a/examples/awel/simple_rag_summary_example.py b/examples/awel/simple_rag_summary_example.py index adc3b54ad..eb958934e 100644 --- a/examples/awel/simple_rag_summary_example.py +++ b/examples/awel/simple_rag_summary_example.py @@ -59,7 +59,7 @@ async def map(self, input_value: TriggerReqBody) -> Dict: request_handle_task = RequestHandleOperator() path_operator = MapOperator(lambda request: request["url"]) # build knowledge operator - knowledge_operator = KnowledgeOperator(knowledge_type=KnowledgeType.URL) + knowledge_operator = KnowledgeOperator(knowledge_type=KnowledgeType.URL.name) # build summary assembler operator summary_operator = SummaryAssemblerOperator( llm_client=OpenAILLMClient(), language="en" diff --git a/examples/rag/simple_rag_embedding_example.py b/examples/rag/simple_rag_embedding_example.py index 96d47ccc8..86f248153 100644 --- a/examples/rag/simple_rag_embedding_example.py +++ b/examples/rag/simple_rag_embedding_example.py @@ -76,7 +76,7 @@ async def map(self, chunks: List) -> str: "/examples/rag/embedding", methods="POST", request_body=TriggerReqBody ) request_handle_task = RequestHandleOperator() - knowledge_operator = KnowledgeOperator(knowledge_type=KnowledgeType.URL) + knowledge_operator = KnowledgeOperator(knowledge_type=KnowledgeType.URL.name) vector_connector = _create_vector_connector() url_parser_operator = MapOperator(map_function=lambda x: x["url"]) embedding_operator = EmbeddingAssemblerOperator( diff --git a/examples/rag/simple_rag_retriever_example.py b/examples/rag/simple_rag_retriever_example.py index e04f4ed0c..b9c7ca97f 100644 --- a/examples/rag/simple_rag_retriever_example.py +++ b/examples/rag/simple_rag_retriever_example.py @@ -39,7 +39,7 @@ ..code-block:: shell DBGPT_SERVER="http://127.0.0.1:5555" curl -X POST $DBGPT_SERVER/api/v1/awel/trigger/examples/rag/retrieve \ - -H "Content-Type: application/json" -d '{ + -H "Content-Type: application/json" -d '{ \ "query": "what is awel talk about?" }' """ From 02abcb721863d16155ce41c1065a9934d277fb44 Mon Sep 17 00:00:00 2001 From: lcxadml <78339638+lcxadml@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:39:32 +0800 Subject: [PATCH 3/5] fix(web): optimize i18n name --- dbgpt/app/static/404.html | 2 +- dbgpt/app/static/404/index.html | 2 +- .../_buildManifest.js | 2 +- .../_ssgManifest.js | 0 ...a2ef534f93.js => 3353.3ad7804da2e77248.js} | 6 ++--- ...9d82525d5f.js => 4134.182782e7d7f66109.js} | 2 +- ...d6595180a9.js => _app-2d2fe1efcb16f7f3.js} | 2 +- .../chunks/pages/app-75e39485cc4a24b3.js | 1 + .../chunks/pages/app-90415a5fdf367a91.js | 1 - ...393c5f8ad7.js => chat-1434817946faf8ff.js} | 2 +- .../pages/flow/canvas-70f324e20b0113c0.js | 1 - .../pages/flow/canvas-d313d1fe05a1d9e1.js | 1 + ...375a57a.js => webpack-7f29e208c7b75fbc.js} | 2 +- dbgpt/app/static/agent/index.html | 2 +- dbgpt/app/static/app/index.html | 2 +- dbgpt/app/static/chat/index.html | 2 +- dbgpt/app/static/database/index.html | 2 +- dbgpt/app/static/flow/canvas/index.html | 2 +- dbgpt/app/static/flow/index.html | 2 +- dbgpt/app/static/index.html | 2 +- dbgpt/app/static/knowledge/chunk/index.html | 2 +- dbgpt/app/static/knowledge/index.html | 2 +- dbgpt/app/static/models/index.html | 2 +- dbgpt/app/static/prompt/index.html | 2 +- web/app/i18n.ts | 24 ++++++++++++++++++- web/components/app/agent-panel.tsx | 9 ++++--- web/components/app/app-card.tsx | 10 ++++---- web/components/app/app-modal.tsx | 16 ++++++------- web/components/app/resource-card.tsx | 2 +- web/components/flow/add-nodes.tsx | 4 ++-- web/package-lock.json | 9 ++++--- web/pages/app/index.tsx | 4 ++-- 32 files changed, 72 insertions(+), 52 deletions(-) rename dbgpt/app/static/_next/static/{Pz72WCJeXl85v4kN2kstR => Lt1JpSOs1VILN-GjkD676}/_buildManifest.js (75%) rename dbgpt/app/static/_next/static/{Pz72WCJeXl85v4kN2kstR => Lt1JpSOs1VILN-GjkD676}/_ssgManifest.js (100%) rename dbgpt/app/static/_next/static/chunks/{9341.879a24a2ef534f93.js => 3353.3ad7804da2e77248.js} (81%) rename dbgpt/app/static/_next/static/chunks/{4134.5e76ff9d82525d5f.js => 4134.182782e7d7f66109.js} (99%) rename dbgpt/app/static/_next/static/chunks/pages/{_app-4fa488d6595180a9.js => _app-2d2fe1efcb16f7f3.js} (87%) create mode 100644 dbgpt/app/static/_next/static/chunks/pages/app-75e39485cc4a24b3.js delete mode 100644 dbgpt/app/static/_next/static/chunks/pages/app-90415a5fdf367a91.js rename dbgpt/app/static/_next/static/chunks/pages/{chat-b09234393c5f8ad7.js => chat-1434817946faf8ff.js} (98%) delete mode 100644 dbgpt/app/static/_next/static/chunks/pages/flow/canvas-70f324e20b0113c0.js create mode 100644 dbgpt/app/static/_next/static/chunks/pages/flow/canvas-d313d1fe05a1d9e1.js rename dbgpt/app/static/_next/static/chunks/{webpack-6d79785e1375a57a.js => webpack-7f29e208c7b75fbc.js} (61%) diff --git a/dbgpt/app/static/404.html b/dbgpt/app/static/404.html index e5bbdac8a..f63851c67 100644 --- a/dbgpt/app/static/404.html +++ b/dbgpt/app/static/404.html @@ -1 +1 @@ -
No data |
No data |