memory injection with filter, mid messages summary

frdel · Jun 24, 2024 · 422d5ca · 422d5ca
1 parent 2330288
commit 422d5ca
Show file tree

Hide file tree

Showing 25 changed files with 666 additions and 161 deletions.
diff --git a/agent.py b/agent.py
@@ -1,106 +1,125 @@
+import json
 import time, importlib, inspect
-from typing import Optional, Dict
-from tools.helpers import extract_tools, rate_limiter, files
+import traceback
+from typing import Optional, Dict, TypedDict
+from tools.helpers import extract_tools, rate_limiter, files, errors
 from tools.helpers.print_style import PrintStyle
 from langchain.schema import AIMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.messages import HumanMessage
+from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.embeddings import Embeddings
+
+# rate_limit = rate_limiter.rate_limiter(30,160000) #TODO! implement properly
 
-rate_limit = rate_limiter.rate_limiter(30,160000) #TODO! implement properly
 
 class Agent:
 
     paused=False
     streaming_agent=None
+
+    def __init__(self,
+                agent_number: int,
+                chat_llm:BaseChatModel,
+                embeddings_model:Embeddings,
+                memory_subdir: str = "",
+                auto_memory_count: int = 3,
+                auto_memory_skip: int = 2,
+                rate_limit_seconds: int = 60,
+                rate_limit_input_tokens: int = 0,
+                rate_limit_output_tokens: int = 0,
+                msgs_keep_max: int =25,
+                msgs_keep_start: int =5,
+                msgs_keep_end: int =10,
+                **kwargs):
 
-    @staticmethod
-    def configure(model_chat, model_embedding, memory_subdir="", memory_results=3):
+        # agent config
+        self.agent_number = agent_number
+        self.chat_model = chat_llm
+        self.embeddings_model = embeddings_model
+        self.memory_subdir = memory_subdir
+        self.auto_memory_count = auto_memory_count
+        self.auto_memory_skip = auto_memory_skip
+        self.rate_limit_seconds = rate_limit_seconds
+        self.rate_limit_input_tokens = rate_limit_input_tokens
+        self.rate_limit_output_tokens = rate_limit_output_tokens
+        self.msgs_keep_max = msgs_keep_max
+        self.msgs_keep_start = msgs_keep_start
+        self.msgs_keep_end = msgs_keep_end
 
-        #save configuration
-        Agent.model_chat = model_chat
+        # non-config vars
+        self.agent_name = f"Agent {self.agent_number}"
 
-        # initialize memory tool
-        from tools import memory_tool
-        memory_tool.initialize(
-            embeddings_model=model_embedding, 
-            messages_returned=memory_results, 
-            subdir=memory_subdir )
-
-    def __init__(self, system_prompt:Optional[str]=None, tools_prompt:Optional[str]=None, number=0):
-
-        self.number = number
-        self.name = f"Agent {self.number}"
-
-        if system_prompt is None: system_prompt = files.read_file("./prompts/agent.system.md")     
-        if tools_prompt is None: tools_prompt = files.read_file("./prompts/agent.tools.md")
-        self.system_prompt = system_prompt.replace("{", "{{").replace("}", "}}")
-        self.tools_prompt = tools_prompt.replace("{", "{{").replace("}", "}}")
+        self.system_prompt = files.read_file("./prompts/agent.system.md").replace("{", "{{").replace("}", "}}")
+        self.tools_prompt = files.read_file("./prompts/agent.tools.md").replace("{", "{{").replace("}", "}}")
 
         self.history = []
         self.last_message = ""
         self.intervention_message = ""
         self.intervention_status = False
 
         self.data = {} # free data object all the tools can use
-
-        self.prompt = ChatPromptTemplate.from_messages([
-            ("system", self.system_prompt + "\n\n" + self.tools_prompt),
-            MessagesPlaceholder(variable_name="messages") ])
 
     def message_loop(self, msg: str):
         try:
             printer = PrintStyle(italic=True, font_color="#b3ffd9", padding=False)    
             user_message = files.read_file("./prompts/fw.user_message.md", message=msg)
             self.append_message(user_message, human=True) # Append the user's input to the history                        
-
-            self.stop_loop = False
-            self.loop_result = []
-
+            memories = self.fetch_memories(True)
+
             while True: # let the agent iterate on his thoughts until he stops by using a tool
                 Agent.streaming_agent = self #mark self as current streamer
                 agent_response = ""
                 self.intervention_status = False # reset interventon status
+
                 try:
 
+                    system = self.system_prompt + "\n\n" + self.tools_prompt
+                    memories = self.fetch_memories()
+                    if memories: system+= "\n\n"+memories
+
+                    prompt = ChatPromptTemplate.from_messages([
+                        SystemMessage(content=system),
+                        MessagesPlaceholder(variable_name="messages") ])
+
                     inputs = {"messages": self.history}
-                    chain = self.prompt | Agent.model_chat
-                    formatted_inputs = self.prompt.format(**inputs)
+                    chain = prompt | self.chat_model
+                    formatted_inputs = prompt.format(messages=self.history)
 
-                    rate_limit(len(formatted_inputs)/4) #wait for rate limiter - A helpful rule of thumb is that one token generally corresponds to ~4 characters of text for common English text. This translates to roughly ¾ of a word (so 100 tokens ~= 75 words).
+                    # rate_limit(len(formatted_inputs)/4) #wait for rate limiter - A helpful rule of thumb is that one token generally corresponds to ~4 characters of text for common English text. This translates to roughly ¾ of a word (so 100 tokens ~= 75 words).
 
                     # output that the agent is starting
-                    PrintStyle(bold=True, font_color="green", padding=True, background_color="white").print(f"{self.name}: Starting a message:")
+                    PrintStyle(bold=True, font_color="green", padding=True, background_color="white").print(f"{self.agent_name}: Starting a message:")
 
                     for chunk in chain.stream(inputs):
-
                         if self.handle_intervention(agent_response): break # wait for intervention and handle it, if paused
-
-                        if chunk.content is not None and chunk.content != '':
-                            printer.stream(chunk.content) # output the agent response stream                
-                            agent_response += chunk.content # type: ignore | concatenate stream into the response
+
+                        if isinstance(chunk, str): content = chunk
+                        elif hasattr(chunk, "content"): content = str(chunk.content)
+                        else: content = str(chunk)
+
+                        if content:
+                            printer.stream(content) # output the agent response stream                
+                            agent_response += content # concatenate stream into the response
 
                     if not self.handle_intervention(agent_response):
-                        #if assistant_response is the same as last message in history, let him know
-                        if self.last_message == agent_response:
-                            agent_response = files.read_file("./prompts/fw.msg_repeat.md")
-                            PrintStyle(font_color="orange", padding=True).print(agent_response)
-                        self.last_message = agent_response
-
-                        self.append_message(agent_response) # Append the assistant's response to the history
+                        if self.last_message == agent_response: #if assistant_response is the same as last message in history, let him know
+                            self.append_message(agent_response) # Append the assistant's response to the history
+                            warning_msg = files.read_file("./prompts/fw.msg_repeat.md")
+                            self.append_message(warning_msg, human=True) # Append warning message to the history
+                            PrintStyle(font_color="orange", padding=True).print(warning_msg)
 
-                        tools_result = self.process_tools(agent_response) # process tools requested in agent message
-                        if tools_result: return tools_result #break the execution if the task is done
+                        else: #otherwise proceed with tool
+                            self.append_message(agent_response) # Append the assistant's response to the history
+                            tools_result = self.process_tools(agent_response) # process tools requested in agent message
+                            if tools_result: return tools_result #break the execution if the task is done
 
                 # Forward errors to the LLM, maybe he can fix them
                 except Exception as e:
-                    msg_response = files.read_file("./prompts/fw.error.md", error=str(e)) # error message template
+                    error_message = errors.format_error(e)
+                    msg_response = files.read_file("./prompts/fw.error.md", error=error_message) # error message template
                     self.append_message(msg_response, human=True)
                     PrintStyle(font_color="red", padding=True).print(msg_response)
-                finally:
-                    if self.get_last_message().type=="ai": #type: ignore
-                        user_message = files.read_file("./prompts/fw.msg_continue.md")
-                        PrintStyle(font_color="yellow", padding=False).print(user_message)
 
         finally:
             Agent.streaming_agent = None # unset current streamer
@@ -118,25 +137,74 @@ def append_message(self, msg: str, human: bool = False):
         else:
             new_message = HumanMessage(content=msg) if human else AIMessage(content=msg)
             self.history.append(new_message)
-            self.cleanup_history(5, 10)
+            self.cleanup_history(self.msgs_keep_max, self.msgs_keep_start, self.msgs_keep_end)
         if message_type=="ai":
             self.last_message = msg
 
+    def concat_messages(self,messages):
+        return "\n".join([f"{msg.type}: {msg.content}" for msg in messages])
+
+    def send_adhoc_message(self, system: str, msg: str, output_label:str):
+        prompt = ChatPromptTemplate.from_messages([
+            SystemMessage(content=system),
+            HumanMessage(content=msg)])
+
+        chain = prompt | self.chat_model
+        response = ""
+        printer = None
+
+        if output_label:
+            PrintStyle(bold=True, font_color="orange", padding=True, background_color="white").print(f"{self.agent_name}: {output_label}:")
+            printer = PrintStyle(italic=True, font_color="orange", padding=False)                
+
+        for chunk in chain.stream({}):
+            if self.handle_intervention(response): break # wait for intervention and handle it, if paused
+
+            if isinstance(chunk, str): content = chunk
+            elif hasattr(chunk, "content"): content = str(chunk.content)
+            else: content = str(chunk)
+
+            if printer: printer.stream(content)
+            response+=content
+
+        return response
+
     def get_last_message(self):
         if self.history:
             return self.history[-1]
 
-    def cleanup_history(self,x, y):
-        if len(self.history) <= x + y:
+    def replace_middle_messages(self,middle_messages):
+        cleanup_prompt = files.read_file("./prompts/fw.msg_cleanup.md")
+        summary = self.send_adhoc_message(system=cleanup_prompt,msg=self.concat_messages(middle_messages), output_label="Mid messages cleanup summary")
+        new_human_message = HumanMessage(content=summary)
+        return [new_human_message]
+
+    def cleanup_history(self, max:int, keep_start:int, keep_end:int):
+        if len(self.history) <= max:
             return self.history
-
-        first_x = self.history[:x]
-        last_y = self.history[-y:]
 
-        cleanup_prompt = files.read_file("./prompts/fw.msg_cleanup.md")
-        middle_values = [AIMessage(content=cleanup_prompt)]
-
-        self.history = first_x + middle_values + last_y
+        first_x = self.history[:keep_start]
+        last_y = self.history[-keep_end:]
+
+        # Identify the middle part
+        middle_part = self.history[keep_start:-keep_end]
+
+        # Ensure the first message in the middle is "human", if not, move one message back
+        if middle_part and middle_part[0].type != "human":
+            if len(first_x) > 0:
+                middle_part.insert(0, first_x.pop())
+
+        # Ensure the middle part has an odd number of messages
+        if len(middle_part) % 2 == 0:
+            middle_part = middle_part[:-1]
+
+        # Replace the middle part using the replacement function
+        new_middle_part = self.replace_middle_messages(middle_part)
+
+        self.history = first_x + new_middle_part + last_y
+
+        return self.history
+
 
     def handle_intervention(self, progress:str="") -> bool:
         while self.paused: time.sleep(0.1) # wait if paused
@@ -151,10 +219,12 @@ def handle_intervention(self, progress:str="") -> bool:
     def process_tools(self, msg: str):
         # search for tool usage requests in agent message
         tool_request = extract_tools.json_parse_dirty(msg)
+        tool_name = tool_request.get("tool_name", "")
+        tool_args = tool_request.get("tool_args", {})
 
         tool = self.get_tool(
-                    tool_request["tool_name"],
-                    tool_request["tool_args"],
+                    tool_name,
+                    tool_args,
                     msg)
 
         if self.handle_intervention(): return # wait if paused and handle intervention message if needed
@@ -179,4 +249,23 @@ def get_tool(self, name: str, args: dict, message: str, **kwargs):
                     tool_class = cls[1]
                     break
 
-        return tool_class(agent=self, name=name, args=args, message=message, **kwargs)
+        return tool_class(agent=self, name=name, args=args, message=message, **kwargs)
+
+    def fetch_memories(self,reset_skip=False):
+        if reset_skip: self.memory_skip_counter = 0
+
+        if self.memory_skip_counter > 0:
+            self.memory_skip_counter-=1
+            return ""
+        else:
+            self.memory_skip_counter = self.auto_memory_skip
+            from tools import memory_tool
+            messages = self.concat_messages(self.history)
+            memories = memory_tool.process_query(self,messages,"load")
+            input = {
+                "conversation_history" : messages,
+                "raw_memories": memories
+            }
+            cleanup_prompt = files.read_file("./prompts/msg.memory_cleanup.md").replace("{", "{{")       
+            clean_memories = self.send_adhoc_message(cleanup_prompt,json.dumps(input), output_label="Memory cleanup summary")
+            return clean_memories
diff --git a/main.py b/main.py
@@ -23,23 +23,18 @@ def chat():
     # chat_llm = models.get_openai_gpt4o(temperature=0)
     # chat_llm = models.get_anthropic_opus(temperature=0)
     # chat_llm = models.get_anthropic_sonnet(temperature=0)
-    chat_llm = models.get_anthropic_haiku(temperature=0)
+    chat_llm = models.get_anthropic_sonnet_35(temperature=0)
+    # chat_llm = models.get_anthropic_haiku(temperature=0)
     # chat_llm = models.get_ollama_dolphin()
 
     # embedding model used for memory
     # embedding_llm = models.get_embedding_openai()
     embedding_llm = models.get_embedding_hf()
-
-    # initial configuration
-    Agent.configure(
-            model_chat = chat_llm, 
-            model_embedding = embedding_llm,
-            #memory_subdir=""
-            #memory_results=3
-            )
 
     # create the first agent
-    agent0 = Agent()
+    agent0 = Agent(agent_number=0,
+                   chat_llm=chat_llm,
+                   embeddings_model=embedding_llm)
 
     # start the conversation loop  
     while True:
@@ -73,7 +68,7 @@ def chat():
         assistant_response = agent0.message_loop(user_input)
 
         # print agent0 response
-        PrintStyle(font_color="white",background_color="#1D8348", bold=True, padding=True).print(f"{agent0.name}: reponse:")        
+        PrintStyle(font_color="white",background_color="#1D8348", bold=True, padding=True).print(f"{agent0.agent_name}: reponse:")        
         PrintStyle(font_color="white").print(f"{assistant_response}")        
 
 

diff --git a/models.py b/models.py
@@ -22,6 +22,11 @@ def get_anthropic_haiku(api_key=None, temperature=DEFAULT_TEMPERATURE):
     api_key = api_key or get_api_key("anthropic")
     return ChatAnthropic(model_name="claude-3-haiku-20240307", temperature=temperature, api_key=api_key) # type: ignore
 
+def get_anthropic_sonnet_35(api_key=None, temperature=DEFAULT_TEMPERATURE):
+    api_key = api_key or get_api_key("anthropic")
+    return ChatAnthropic(model_name="claude-3-5-sonnet-20240620", temperature=temperature, api_key=api_key) # type: ignore
+
+
 def get_anthropic_sonnet(api_key=None, temperature=DEFAULT_TEMPERATURE):
     api_key = api_key or get_api_key("anthropic")
     return ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=temperature, api_key=api_key) # type: ignore
@@ -68,10 +73,10 @@ def get_groq_gemma(api_key=None, temperature=DEFAULT_TEMPERATURE):
     return ChatGroq(model_name="gemma-7b-it", temperature=temperature, api_key=api_key) # type: ignore
 
 def get_ollama_dolphin(api_key=None, temperature=DEFAULT_TEMPERATURE):
-    return Ollama(model="dolphin-llama3:8b-256k-v2.9-fp16")
+    return Ollama(model="dolphin-llama3:8b-256k-v2.9-fp16", temperature=temperature)
 
 def get_ollama_phi(api_key=None, temperature=DEFAULT_TEMPERATURE):
-    return Ollama(model="phi3:3.8b-mini-instruct-4k-fp16")
+    return Ollama(model="phi3:3.8b-mini-instruct-4k-fp16",temperature=temperature)
 
 def get_embedding_hf(model_name="sentence-transformers/all-MiniLM-L6-v2"):
     return HuggingFaceEmbeddings(model_name=model_name)

diff --git a/prompts/agent.memory.md b/prompts/agent.memory.md
@@ -0,0 +1,5 @@
+# Memories
+- following are your memories on the current topic
+- you may find some of them helpful to solve the current task
+
+{{memories}}
diff --git a/prompts/agent.system.md b/prompts/agent.system.md
@@ -9,6 +9,7 @@
         - Tools help you gather knowledge and execute actions
     3. **tool_args**: Object of arguments that are passed to the tool
         - Each tool has specific arguments listed in Available tools section
+- No text before or after the JSON object. End message there.
 
 ## Response example that must be used every time
 ~~~json