Skip to content

Commit

Permalink
memory injection with filter, mid messages summary
Browse files Browse the repository at this point in the history
  • Loading branch information
frdel committed Jun 24, 2024
1 parent 2330288 commit 422d5ca
Show file tree
Hide file tree
Showing 25 changed files with 666 additions and 161 deletions.
225 changes: 157 additions & 68 deletions agent.py
Original file line number Diff line number Diff line change
@@ -1,106 +1,125 @@
import json
import time, importlib, inspect
from typing import Optional, Dict
from tools.helpers import extract_tools, rate_limiter, files
import traceback
from typing import Optional, Dict, TypedDict
from tools.helpers import extract_tools, rate_limiter, files, errors
from tools.helpers.print_style import PrintStyle
from langchain.schema import AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.embeddings import Embeddings

# rate_limit = rate_limiter.rate_limiter(30,160000) #TODO! implement properly

rate_limit = rate_limiter.rate_limiter(30,160000) #TODO! implement properly

class Agent:

paused=False
streaming_agent=None

def __init__(self,
agent_number: int,
chat_llm:BaseChatModel,
embeddings_model:Embeddings,
memory_subdir: str = "",
auto_memory_count: int = 3,
auto_memory_skip: int = 2,
rate_limit_seconds: int = 60,
rate_limit_input_tokens: int = 0,
rate_limit_output_tokens: int = 0,
msgs_keep_max: int =25,
msgs_keep_start: int =5,
msgs_keep_end: int =10,
**kwargs):

@staticmethod
def configure(model_chat, model_embedding, memory_subdir="", memory_results=3):
# agent config
self.agent_number = agent_number
self.chat_model = chat_llm
self.embeddings_model = embeddings_model
self.memory_subdir = memory_subdir
self.auto_memory_count = auto_memory_count
self.auto_memory_skip = auto_memory_skip
self.rate_limit_seconds = rate_limit_seconds
self.rate_limit_input_tokens = rate_limit_input_tokens
self.rate_limit_output_tokens = rate_limit_output_tokens
self.msgs_keep_max = msgs_keep_max
self.msgs_keep_start = msgs_keep_start
self.msgs_keep_end = msgs_keep_end

#save configuration
Agent.model_chat = model_chat
# non-config vars
self.agent_name = f"Agent {self.agent_number}"

# initialize memory tool
from tools import memory_tool
memory_tool.initialize(
embeddings_model=model_embedding,
messages_returned=memory_results,
subdir=memory_subdir )

def __init__(self, system_prompt:Optional[str]=None, tools_prompt:Optional[str]=None, number=0):

self.number = number
self.name = f"Agent {self.number}"

if system_prompt is None: system_prompt = files.read_file("./prompts/agent.system.md")
if tools_prompt is None: tools_prompt = files.read_file("./prompts/agent.tools.md")
self.system_prompt = system_prompt.replace("{", "{{").replace("}", "}}")
self.tools_prompt = tools_prompt.replace("{", "{{").replace("}", "}}")
self.system_prompt = files.read_file("./prompts/agent.system.md").replace("{", "{{").replace("}", "}}")
self.tools_prompt = files.read_file("./prompts/agent.tools.md").replace("{", "{{").replace("}", "}}")

self.history = []
self.last_message = ""
self.intervention_message = ""
self.intervention_status = False

self.data = {} # free data object all the tools can use

self.prompt = ChatPromptTemplate.from_messages([
("system", self.system_prompt + "\n\n" + self.tools_prompt),
MessagesPlaceholder(variable_name="messages") ])

def message_loop(self, msg: str):
try:
printer = PrintStyle(italic=True, font_color="#b3ffd9", padding=False)
user_message = files.read_file("./prompts/fw.user_message.md", message=msg)
self.append_message(user_message, human=True) # Append the user's input to the history

self.stop_loop = False
self.loop_result = []

memories = self.fetch_memories(True)

while True: # let the agent iterate on his thoughts until he stops by using a tool
Agent.streaming_agent = self #mark self as current streamer
agent_response = ""
self.intervention_status = False # reset interventon status

try:

system = self.system_prompt + "\n\n" + self.tools_prompt
memories = self.fetch_memories()
if memories: system+= "\n\n"+memories

prompt = ChatPromptTemplate.from_messages([
SystemMessage(content=system),
MessagesPlaceholder(variable_name="messages") ])

inputs = {"messages": self.history}
chain = self.prompt | Agent.model_chat
formatted_inputs = self.prompt.format(**inputs)
chain = prompt | self.chat_model
formatted_inputs = prompt.format(messages=self.history)

rate_limit(len(formatted_inputs)/4) #wait for rate limiter - A helpful rule of thumb is that one token generally corresponds to ~4 characters of text for common English text. This translates to roughly ¾ of a word (so 100 tokens ~= 75 words).
# rate_limit(len(formatted_inputs)/4) #wait for rate limiter - A helpful rule of thumb is that one token generally corresponds to ~4 characters of text for common English text. This translates to roughly ¾ of a word (so 100 tokens ~= 75 words).

# output that the agent is starting
PrintStyle(bold=True, font_color="green", padding=True, background_color="white").print(f"{self.name}: Starting a message:")
PrintStyle(bold=True, font_color="green", padding=True, background_color="white").print(f"{self.agent_name}: Starting a message:")

for chunk in chain.stream(inputs):

if self.handle_intervention(agent_response): break # wait for intervention and handle it, if paused

if chunk.content is not None and chunk.content != '':
printer.stream(chunk.content) # output the agent response stream
agent_response += chunk.content # type: ignore | concatenate stream into the response

if isinstance(chunk, str): content = chunk
elif hasattr(chunk, "content"): content = str(chunk.content)
else: content = str(chunk)

if content:
printer.stream(content) # output the agent response stream
agent_response += content # concatenate stream into the response

if not self.handle_intervention(agent_response):
#if assistant_response is the same as last message in history, let him know
if self.last_message == agent_response:
agent_response = files.read_file("./prompts/fw.msg_repeat.md")
PrintStyle(font_color="orange", padding=True).print(agent_response)
self.last_message = agent_response

self.append_message(agent_response) # Append the assistant's response to the history
if self.last_message == agent_response: #if assistant_response is the same as last message in history, let him know
self.append_message(agent_response) # Append the assistant's response to the history
warning_msg = files.read_file("./prompts/fw.msg_repeat.md")
self.append_message(warning_msg, human=True) # Append warning message to the history
PrintStyle(font_color="orange", padding=True).print(warning_msg)

tools_result = self.process_tools(agent_response) # process tools requested in agent message
if tools_result: return tools_result #break the execution if the task is done
else: #otherwise proceed with tool
self.append_message(agent_response) # Append the assistant's response to the history
tools_result = self.process_tools(agent_response) # process tools requested in agent message
if tools_result: return tools_result #break the execution if the task is done

# Forward errors to the LLM, maybe he can fix them
except Exception as e:
msg_response = files.read_file("./prompts/fw.error.md", error=str(e)) # error message template
error_message = errors.format_error(e)
msg_response = files.read_file("./prompts/fw.error.md", error=error_message) # error message template
self.append_message(msg_response, human=True)
PrintStyle(font_color="red", padding=True).print(msg_response)
finally:
if self.get_last_message().type=="ai": #type: ignore
user_message = files.read_file("./prompts/fw.msg_continue.md")
PrintStyle(font_color="yellow", padding=False).print(user_message)

finally:
Agent.streaming_agent = None # unset current streamer
Expand All @@ -118,25 +137,74 @@ def append_message(self, msg: str, human: bool = False):
else:
new_message = HumanMessage(content=msg) if human else AIMessage(content=msg)
self.history.append(new_message)
self.cleanup_history(5, 10)
self.cleanup_history(self.msgs_keep_max, self.msgs_keep_start, self.msgs_keep_end)
if message_type=="ai":
self.last_message = msg

def concat_messages(self,messages):
return "\n".join([f"{msg.type}: {msg.content}" for msg in messages])

def send_adhoc_message(self, system: str, msg: str, output_label:str):
prompt = ChatPromptTemplate.from_messages([
SystemMessage(content=system),
HumanMessage(content=msg)])

chain = prompt | self.chat_model
response = ""
printer = None

if output_label:
PrintStyle(bold=True, font_color="orange", padding=True, background_color="white").print(f"{self.agent_name}: {output_label}:")
printer = PrintStyle(italic=True, font_color="orange", padding=False)

for chunk in chain.stream({}):
if self.handle_intervention(response): break # wait for intervention and handle it, if paused

if isinstance(chunk, str): content = chunk
elif hasattr(chunk, "content"): content = str(chunk.content)
else: content = str(chunk)

if printer: printer.stream(content)
response+=content

return response

def get_last_message(self):
if self.history:
return self.history[-1]

def cleanup_history(self,x, y):
if len(self.history) <= x + y:
def replace_middle_messages(self,middle_messages):
cleanup_prompt = files.read_file("./prompts/fw.msg_cleanup.md")
summary = self.send_adhoc_message(system=cleanup_prompt,msg=self.concat_messages(middle_messages), output_label="Mid messages cleanup summary")
new_human_message = HumanMessage(content=summary)
return [new_human_message]

def cleanup_history(self, max:int, keep_start:int, keep_end:int):
if len(self.history) <= max:
return self.history

first_x = self.history[:x]
last_y = self.history[-y:]

cleanup_prompt = files.read_file("./prompts/fw.msg_cleanup.md")
middle_values = [AIMessage(content=cleanup_prompt)]

self.history = first_x + middle_values + last_y
first_x = self.history[:keep_start]
last_y = self.history[-keep_end:]

# Identify the middle part
middle_part = self.history[keep_start:-keep_end]

# Ensure the first message in the middle is "human", if not, move one message back
if middle_part and middle_part[0].type != "human":
if len(first_x) > 0:
middle_part.insert(0, first_x.pop())

# Ensure the middle part has an odd number of messages
if len(middle_part) % 2 == 0:
middle_part = middle_part[:-1]

# Replace the middle part using the replacement function
new_middle_part = self.replace_middle_messages(middle_part)

self.history = first_x + new_middle_part + last_y

return self.history


def handle_intervention(self, progress:str="") -> bool:
while self.paused: time.sleep(0.1) # wait if paused
Expand All @@ -151,10 +219,12 @@ def handle_intervention(self, progress:str="") -> bool:
def process_tools(self, msg: str):
# search for tool usage requests in agent message
tool_request = extract_tools.json_parse_dirty(msg)
tool_name = tool_request.get("tool_name", "")
tool_args = tool_request.get("tool_args", {})

tool = self.get_tool(
tool_request["tool_name"],
tool_request["tool_args"],
tool_name,
tool_args,
msg)

if self.handle_intervention(): return # wait if paused and handle intervention message if needed
Expand All @@ -179,4 +249,23 @@ def get_tool(self, name: str, args: dict, message: str, **kwargs):
tool_class = cls[1]
break

return tool_class(agent=self, name=name, args=args, message=message, **kwargs)
return tool_class(agent=self, name=name, args=args, message=message, **kwargs)

def fetch_memories(self,reset_skip=False):
if reset_skip: self.memory_skip_counter = 0

if self.memory_skip_counter > 0:
self.memory_skip_counter-=1
return ""
else:
self.memory_skip_counter = self.auto_memory_skip
from tools import memory_tool
messages = self.concat_messages(self.history)
memories = memory_tool.process_query(self,messages,"load")
input = {
"conversation_history" : messages,
"raw_memories": memories
}
cleanup_prompt = files.read_file("./prompts/msg.memory_cleanup.md").replace("{", "{{")
clean_memories = self.send_adhoc_message(cleanup_prompt,json.dumps(input), output_label="Memory cleanup summary")
return clean_memories
17 changes: 6 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,18 @@ def chat():
# chat_llm = models.get_openai_gpt4o(temperature=0)
# chat_llm = models.get_anthropic_opus(temperature=0)
# chat_llm = models.get_anthropic_sonnet(temperature=0)
chat_llm = models.get_anthropic_haiku(temperature=0)
chat_llm = models.get_anthropic_sonnet_35(temperature=0)
# chat_llm = models.get_anthropic_haiku(temperature=0)
# chat_llm = models.get_ollama_dolphin()

# embedding model used for memory
# embedding_llm = models.get_embedding_openai()
embedding_llm = models.get_embedding_hf()

# initial configuration
Agent.configure(
model_chat = chat_llm,
model_embedding = embedding_llm,
#memory_subdir=""
#memory_results=3
)

# create the first agent
agent0 = Agent()
agent0 = Agent(agent_number=0,
chat_llm=chat_llm,
embeddings_model=embedding_llm)

# start the conversation loop
while True:
Expand Down Expand Up @@ -73,7 +68,7 @@ def chat():
assistant_response = agent0.message_loop(user_input)

# print agent0 response
PrintStyle(font_color="white",background_color="#1D8348", bold=True, padding=True).print(f"{agent0.name}: reponse:")
PrintStyle(font_color="white",background_color="#1D8348", bold=True, padding=True).print(f"{agent0.agent_name}: reponse:")
PrintStyle(font_color="white").print(f"{assistant_response}")


Expand Down
9 changes: 7 additions & 2 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ def get_anthropic_haiku(api_key=None, temperature=DEFAULT_TEMPERATURE):
api_key = api_key or get_api_key("anthropic")
return ChatAnthropic(model_name="claude-3-haiku-20240307", temperature=temperature, api_key=api_key) # type: ignore

def get_anthropic_sonnet_35(api_key=None, temperature=DEFAULT_TEMPERATURE):
api_key = api_key or get_api_key("anthropic")
return ChatAnthropic(model_name="claude-3-5-sonnet-20240620", temperature=temperature, api_key=api_key) # type: ignore


def get_anthropic_sonnet(api_key=None, temperature=DEFAULT_TEMPERATURE):
api_key = api_key or get_api_key("anthropic")
return ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=temperature, api_key=api_key) # type: ignore
Expand Down Expand Up @@ -68,10 +73,10 @@ def get_groq_gemma(api_key=None, temperature=DEFAULT_TEMPERATURE):
return ChatGroq(model_name="gemma-7b-it", temperature=temperature, api_key=api_key) # type: ignore

def get_ollama_dolphin(api_key=None, temperature=DEFAULT_TEMPERATURE):
return Ollama(model="dolphin-llama3:8b-256k-v2.9-fp16")
return Ollama(model="dolphin-llama3:8b-256k-v2.9-fp16", temperature=temperature)

def get_ollama_phi(api_key=None, temperature=DEFAULT_TEMPERATURE):
return Ollama(model="phi3:3.8b-mini-instruct-4k-fp16")
return Ollama(model="phi3:3.8b-mini-instruct-4k-fp16",temperature=temperature)

def get_embedding_hf(model_name="sentence-transformers/all-MiniLM-L6-v2"):
return HuggingFaceEmbeddings(model_name=model_name)
Expand Down
5 changes: 5 additions & 0 deletions prompts/agent.memory.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Memories
- following are your memories on the current topic
- you may find some of them helpful to solve the current task

{{memories}}
1 change: 1 addition & 0 deletions prompts/agent.system.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Tools help you gather knowledge and execute actions
3. **tool_args**: Object of arguments that are passed to the tool
- Each tool has specific arguments listed in Available tools section
- No text before or after the JSON object. End message there.

## Response example that must be used every time
~~~json
Expand Down
Loading

0 comments on commit 422d5ca

Please sign in to comment.