prototype v0.2

free JSON format, dirty parsing, tool superclass, memory injection, new rate limiter, agent params
frdel · Jun 26, 2024 · be11dbb · be11dbb
1 parent 422d5ca
commit be11dbb
Show file tree

Hide file tree

Showing 21 changed files with 243 additions and 144 deletions.
diff --git a/agent.py b/agent.py
@@ -1,5 +1,4 @@
-import json
-import time, importlib, inspect
+import time, importlib, inspect, os, json
 import traceback
 from typing import Optional, Dict, TypedDict
 from tools.helpers import extract_tools, rate_limiter, files, errors
@@ -9,6 +8,7 @@
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.embeddings import Embeddings
+from tools.helpers.rate_limiter import RateLimiter
 
 # rate_limit = rate_limiter.rate_limiter(30,160000) #TODO! implement properly
 
@@ -20,32 +20,36 @@ class Agent:
 
     def __init__(self,
                 agent_number: int,
-                chat_llm:BaseChatModel,
+                chat_model:BaseChatModel,
                 embeddings_model:Embeddings,
                 memory_subdir: str = "",
                 auto_memory_count: int = 3,
                 auto_memory_skip: int = 2,
                 rate_limit_seconds: int = 60,
+                rate_limit_requests: int = 30,
                 rate_limit_input_tokens: int = 0,
                 rate_limit_output_tokens: int = 0,
-                msgs_keep_max: int =25,
-                msgs_keep_start: int =5,
-                msgs_keep_end: int =10,
+                msgs_keep_max: int = 25,
+                msgs_keep_start: int = 5,
+                msgs_keep_end: int = 10,
+                max_tool_response_length: int = 3000,
                 **kwargs):
 
         # agent config
         self.agent_number = agent_number
-        self.chat_model = chat_llm
+        self.chat_model = chat_model
         self.embeddings_model = embeddings_model
         self.memory_subdir = memory_subdir
         self.auto_memory_count = auto_memory_count
         self.auto_memory_skip = auto_memory_skip
         self.rate_limit_seconds = rate_limit_seconds
+        self.rate_limit_requests = rate_limit_requests
         self.rate_limit_input_tokens = rate_limit_input_tokens
         self.rate_limit_output_tokens = rate_limit_output_tokens
         self.msgs_keep_max = msgs_keep_max
         self.msgs_keep_start = msgs_keep_start
         self.msgs_keep_end = msgs_keep_end
+        self.max_tool_response_length = max_tool_response_length
 
         # non-config vars
         self.agent_name = f"Agent {self.agent_number}"
@@ -57,9 +61,12 @@ def __init__(self,
         self.last_message = ""
         self.intervention_message = ""
         self.intervention_status = False
-
+        self.rate_limiter = RateLimiter(max_calls=rate_limit_requests,max_input_tokens=rate_limit_input_tokens,max_output_tokens=rate_limit_output_tokens,window_seconds=rate_limit_seconds)
         self.data = {} # free data object all the tools can use
 
+        os.chdir(files.get_abs_path("./work_dir")) #change CWD to work_dir
+
+
     def message_loop(self, msg: str):
         try:
             printer = PrintStyle(italic=True, font_color="#b3ffd9", padding=False)    
@@ -84,10 +91,11 @@ def message_loop(self, msg: str):
 
                     inputs = {"messages": self.history}
                     chain = prompt | self.chat_model
-                    formatted_inputs = prompt.format(messages=self.history)
-
-                    # rate_limit(len(formatted_inputs)/4) #wait for rate limiter - A helpful rule of thumb is that one token generally corresponds to ~4 characters of text for common English text. This translates to roughly ¾ of a word (so 100 tokens ~= 75 words).
 
+                    formatted_inputs = prompt.format(messages=self.history)
+                    tokens = int(len(formatted_inputs)/4)     
+                    self.rate_limiter.limit_call_and_input(tokens)
+
                     # output that the agent is starting
                     PrintStyle(bold=True, font_color="green", padding=True, background_color="white").print(f"{self.agent_name}: Starting a message:")
 
@@ -101,7 +109,9 @@ def message_loop(self, msg: str):
                         if content:
                             printer.stream(content) # output the agent response stream                
                             agent_response += content # concatenate stream into the response
-
+
+                    self.rate_limiter.set_output_tokens(int(len(agent_response)/4))
+
                     if not self.handle_intervention(agent_response):
                         if self.last_message == agent_response: #if assistant_response is the same as last message in history, let him know
                             self.append_message(agent_response) # Append the assistant's response to the history
@@ -156,9 +166,13 @@ def send_adhoc_message(self, system: str, msg: str, output_label:str):
         if output_label:
             PrintStyle(bold=True, font_color="orange", padding=True, background_color="white").print(f"{self.agent_name}: {output_label}:")
             printer = PrintStyle(italic=True, font_color="orange", padding=False)                
-
+
+        formatted_inputs = prompt.format()
+        tokens = int(len(formatted_inputs)/4)     
+        self.rate_limiter.limit_call_and_input(tokens)
+
         for chunk in chain.stream({}):
-            if self.handle_intervention(response): break # wait for intervention and handle it, if paused
+            if self.handle_intervention(): break # wait for intervention and handle it, if paused
 
             if isinstance(chunk, str): content = chunk
             elif hasattr(chunk, "content"): content = str(chunk.content)
@@ -167,6 +181,8 @@ def send_adhoc_message(self, system: str, msg: str, output_label:str):
             if printer: printer.stream(content)
             response+=content
 
+        self.rate_limiter.set_output_tokens(int(len(response)/4))
+
         return response
 
     def get_last_message(self):
@@ -205,7 +221,6 @@ def cleanup_history(self, max:int, keep_start:int, keep_end:int):
 
         return self.history
 
-
     def handle_intervention(self, progress:str="") -> bool:
         while self.paused: time.sleep(0.1) # wait if paused
         if self.intervention_message and not self.intervention_status: # if there is an intervention message, but not yet processed
@@ -229,8 +244,8 @@ def process_tools(self, msg: str):
 
         if self.handle_intervention(): return # wait if paused and handle intervention message if needed
 
-        tool.before_execution()
-        response = tool.execute()
+        tool.before_execution(**tool_args)
+        response = tool.execute(**tool_args)
         tool.after_execution(response)
         if response.break_loop: return response.message
 
@@ -267,5 +282,5 @@ def fetch_memories(self,reset_skip=False):
                 "raw_memories": memories
             }
             cleanup_prompt = files.read_file("./prompts/msg.memory_cleanup.md").replace("{", "{{")       
-            clean_memories = self.send_adhoc_message(cleanup_prompt,json.dumps(input), output_label="Memory cleanup summary")
+            clean_memories = self.send_adhoc_message(cleanup_prompt,json.dumps(input), output_label="Memory injection")
             return clean_memories
diff --git a/example.env b/example.env
@@ -1,4 +1,8 @@
 API_KEY_OPENAI=
 API_KEY_ANTHROPIC=
 API_KEY_GROQ=
-API_KEY_PERPLEXITY=
+API_KEY_PERPLEXITY=
+
+
+TOKENIZERS_PARALLELISM=true
+PYDEVD_DISABLE_FILE_VALIDATION=1 
diff --git a/main.py b/main.py
@@ -32,9 +32,21 @@ def chat():
     embedding_llm = models.get_embedding_hf()
 
     # create the first agent
-    agent0 = Agent(agent_number=0,
-                   chat_llm=chat_llm,
-                   embeddings_model=embedding_llm)
+    agent0 = Agent( agent_number=0,
+                    chat_model=chat_llm,
+                    embeddings_model=embedding_llm,
+                    # memory_subdir = "",
+                    # auto_memory_count = 3,
+                    # auto_memory_skip = 2,
+                    # rate_limit_seconds = 60,
+                    rate_limit_requests = 30,
+                    rate_limit_input_tokens = 160000,
+                    rate_limit_output_tokens = 8000,
+                    # msgs_keep_max = 25,
+                    # msgs_keep_start = 5,
+                    # msgs_keep_end = 10,
+                    # max_tool_response_length = 3000,
+                   )
 
     # start the conversation loop  
     while True:

diff --git a/prompts/agent.system.md b/prompts/agent.system.md
@@ -1,5 +1,7 @@
 # Your role
-- You are autonomous JSON AI task solver
+- You are autonomous JSON AI task solving agent enhanced with knowledge and execution tools
+- You are given task by your superior and you solve it using your subordinates and tools
+- You never just talk about solutions, never inform user about intentions, you are the one to execute actions using your tools and get things done
 
 # Communication
 - Your response is a JSON containing the following fields:
@@ -11,19 +13,58 @@
         - Each tool has specific arguments listed in Available tools section
 - No text before or after the JSON object. End message there.
 
-## Response example that must be used every time
+## Response example
 ~~~json
 {
     "thoughts": [
-        "The user has requested...",
-        "I need to follow these steps...",
-        "I need to use the tool...",
-        "Then I will..."
+        "The user has requested extracting a zip file downloaded yesterday.",
+        "Steps to solution are...",
+        "I will process step by step...",
+        "Analysis of step..."
     ],
     "tool_name": "name_of_tool",
     "tool_args": {
         "arg1": "val1",
         "arg2": "val2"
     }
 }
-~~~
+~~~
+
+# Step by step instruction manual to problem solving
+- Do not follow for simple questions, only for tasks need solving.
+- Explain each step using your **thoughts** argument.
+
+0. Outline the plan by repeating these instructions.
+1. Check the memory output of your **knowledge_tool**. Maybe you have solved similar task before and already have helpful information.
+2. Check the online sources output of your **knowledge_tool**. 
+    - Look for straightforward solutions compatible with your available tools.
+    - Always look for opensource python/nodejs/terminal tools and packages first.
+3. Break task into subtasks that can be solved independently.
+4. Solution / delegation
+    - If your role is suitable for the curent subtask, use your tools to solve it.
+    - If a different role would be more suitable for the subtask, use **call_subordinate** tool to delegate the subtask to subordinate agent and instruct him about his role.
+5. Completing the task
+    - Consolidate all subtasks and explain the status.
+    - Verify the result using your tools if possible (check created files etc.)
+    - Do not accept failure, search for error solution and try again with fixed input or different ways.
+    - If there is helpful information discovered during the solution, save it into your memory using tool **memorize** for later.
+    - Report back to your user using **response** tool, describe the result and provide all necessary information. Do not just output your response, you must use the tool for that.
+
+# General operation manual
+- Use your reasoning and process each problem in a step-by-step manner using your **thoughts** argument.
+- Always check your previous messages and prevent repetition. Always move towards solution.
+- Never assume success. You always need to do a check with a positive result.
+- Avoid solutions that require credentials, user interaction, GUI usage etc. All has to be done using code and terminal.
+- When asked about your memory, it always refers to **knowledge_tool** and **memorize** tool, never your internal knowledge.
+
+# Cooperation and delegation
+- Agents can have roles like scientist, coder, writer etc.
+- If your user has assigned you a role in the first message, you have to follow these instructions and play your role.
+- Your role will not be suitable for some subtasks, in that case you can delegate the subtask to subordinate agent and instruct him about his role using **call_subordinate** tool.
+- Always be very descriptive when explaining your subordinate agent's role and task. Include all necessary details as well as higher leven overview about the goal.
+- Communicate back and forth with your subordinate and superior using **call_subordinate** and **response** tools.
+- Communication is the key to succesfull solution.
+
+# Tips and tricks
+- Focus on python/nodejs/linux libraries when searching for solutions. You can use them with your tools and make solutions easy.
+- Sometimes you don't need tools, some things can be determined.
diff --git a/prompts/agent.tools.md b/prompts/agent.tools.md
@@ -20,6 +20,27 @@ Always verify memory by online.
 }
 ~~~
 
+### call_subordinate:
+Use subordinate agents to solve subtasks.
+Use "message" argument to send message. Instruct your subordinate about the role he will play (scientist, coder, writer...) and his task in detail.
+Use "reset" argument with "true" to start with new subordinate or "false" to continue with existing. For brand new tasks use "true", for followup conversation use "false". 
+Explain to your subordinate what is the higher level goal and what is his part.
+Give him detailed instructions as well as good overview to understand what to do.
+**Example usage**:
+~~~json
+{
+    "thoughts": [
+        "The result seems to be ok but...",
+        "I will ask my subordinate to fix...",
+    ],
+    "tool_name": "call_subordinate",
+    "tool_args": {
+        "message": "Well done, now edit...",
+        "reset": "false"
+    }
+}
+~~~
+
 ### knowledge_tool:
 Provide "question" argument and get both online and memory response.
 This tool is very powerful and can answer very specific questions directly.

diff --git a/prompts/fw.msg_timeout.md b/prompts/fw.msg_timeout.md
@@ -1,3 +1,16 @@
-User is not responding to your message.
+# User is not responding to your message.
 If you have a task in progress, continue on your own.
-I you don't have a task, use the <task_done$> message.
+I you don't have a task, use the **task_done** tool with **text** argument.
+
+# Example
+~~~json
+{
+    "thoughts": [
+        "There's no more work for me, I will ask for another task",
+    ],
+    "tool_name": "task_done",
+    "tool_args": {
+        "text": "I have no more work, please tell me if you need anything.",
+    }
+}
+~~~
diff --git a/prompts/msg.memory_cleanup.md b/prompts/msg.memory_cleanup.md
@@ -8,6 +8,19 @@
 
 # Expected output format
 - Return filtered list of bullet points of key elements in the memories
-- Include every important detail relevant to conversation
-- Include code snippets if relevant
-- Omit any unrelevant information
+- Do not include memory contents, only their summaries to inform the user that he has memories of the topic.
+- If there are relevant memories, instruct user to use "knowledge_tool" to get more details.
+
+# Example output 1 (relevant memories):
+~~~md
+1. Guide how to create a web app including code.
+2. Javascript snippets from snake game development.
+3. SVG image generation for game sprites with examples.
+
+Check your knowledge_tool for more details.
+~~~
+
+# Example output 2 (no relevant memories):
+~~~text
+No relevant memories on the topic found.
+~~~
diff --git a/test.py b/test.py
diff --git a/tools/delegation.py → tools/call_subordinate.py b/tools/delegation.py → tools/call_subordinate.py
@@ -5,12 +5,14 @@
 
 class Delegation(Tool):
 
-    def execute(self, **kwargs):
+    def execute(self, message="", reset="", **kwargs):
         # create subordinate agent using the data object on this agent and set superior agent to his data object
-        if self.agent.get_data("subordinate") is None or self.args["reset"].lower().strip() == "true":
+        if self.agent.get_data("subordinate") is None or str(reset).lower().strip() == "true":
             # subordinate = Agent(system_prompt=self.agent.system_prompt, tools_prompt=self.agent.tools_prompt, number=self.agent.number+1)
-            subordinate = Agent(**self.agent.__dict__, agent_number=self.agent.agent_number+1)
+            config = self.agent.__dict__.copy()
+            config["agent_number"] = self.agent.agent_number+1
+            subordinate = Agent(**config)
             subordinate.set_data("superior", self.agent)
             self.agent.set_data("subordinate", subordinate) 
         # run subordinate agent message loop
-        return self.agent.get_data("subordinate").message_loop(self.args["task"])
+        return Response( message=self.agent.get_data("subordinate").message_loop(message), break_loop=False)