From 66535764cced8253c24106465133326dd823c4dd Mon Sep 17 00:00:00 2001
From: Ashpreet <ashpreetbedi@users.noreply.github.com>
Date: Fri, 31 Jan 2025 11:46:42 +0000
Subject: [PATCH] Cache model clients (#1949)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Description

- Cache model clients

---

## Type of change

Please check the options that are relevant:

- [x] Model update (Addition or modification of models)

---

## Checklist

- [x] Adherence to standards: Code complies with Agno’s style guidelines
and best practices.
- [x] Formatting and validation: You have run `./scripts/format.sh` and
`./scripts/validate.sh` to ensure code is formatted and linted.
- [x] Self-review completed: A thorough review has been performed by the
contributor(s).
- [x] Documentation: Docstrings and comments have been added or updated
for any complex logic.
- [x] Examples and guides: Relevant cookbook examples have been included
or updated (if applicable).
- [x] Tested in a clean environment: Changes have been tested in a clean
environment to confirm expected behavior.
- [x] Tests (optional): Tests have been added or updated to cover any
new or changed functionality.

---------

Co-authored-by: Dirk Brand <dirkbrnd@gmail.com>
---
 cookbook/models/anthropic/README.md           |  8 +++-
 cookbook/models/anthropic/memory.py           | 44 +++++++++++++++++++
 cookbook/models/cohere/README.md              |  6 +++
 cookbook/models/cohere/memory.py              | 43 ++++++++++++++++++
 cookbook/models/mistral/README.md             |  6 +++
 cookbook/models/mistral/basic.py              |  8 +---
 cookbook/models/mistral/memory.py             | 43 ++++++++++++++++++
 cookbook/models/mistral/mistral_small.py      |  2 +-
 cookbook/models/ollama/README.md              | 13 ++++--
 cookbook/models/ollama/memory.py              | 43 ++++++++++++++++++
 cookbook/models/openai/memory.py              |  2 +-
 libs/agno/agno/models/anthropic/claude.py     |  4 +-
 libs/agno/agno/models/cohere/chat.py          | 10 +++--
 libs/agno/agno/models/google/gemini.py        |  4 +-
 libs/agno/agno/models/groq/groq.py            |  4 +-
 .../agno/models/huggingface/huggingface.py    |  3 +-
 libs/agno/agno/models/mistral/mistral.py      |  1 +
 libs/agno/agno/models/ollama/chat.py          |  3 +-
 libs/agno/agno/models/openai/chat.py          |  4 +-
 libs/agno/pyproject.toml                      |  2 +-
 20 files changed, 228 insertions(+), 25 deletions(-)
 create mode 100644 cookbook/models/anthropic/memory.py
 create mode 100644 cookbook/models/cohere/memory.py
 create mode 100644 cookbook/models/mistral/memory.py
 create mode 100644 cookbook/models/ollama/memory.py

diff --git a/cookbook/models/anthropic/README.md b/cookbook/models/anthropic/README.md
index 18d647ca80..c8d8f2f11a 100644
--- a/cookbook/models/anthropic/README.md
+++ b/cookbook/models/anthropic/README.md
@@ -68,7 +68,13 @@ export OPENAI_API_KEY=***
 python cookbook/models/anthropic/knowledge.py
 ```
 
-### 9. Run Agent that analyzes an image
+### 9. Run Agent that uses memory   
+
+```shell
+python cookbook/models/anthropic/memory.py
+```
+
+### 10. Run Agent that analyzes an image
 
 ```shell
 python cookbook/models/anthropic/image_agent.py
diff --git a/cookbook/models/anthropic/memory.py b/cookbook/models/anthropic/memory.py
new file mode 100644
index 0000000000..9821634fdc
--- /dev/null
+++ b/cookbook/models/anthropic/memory.py
@@ -0,0 +1,44 @@
+"""
+This recipe shows how to use personalized memories and summaries in an agent.
+Steps:
+1. Run: `./cookbook/scripts/run_pgvector.sh` to start a postgres container with pgvector
+2. Run: `pip install anthropic sqlalchemy 'psycopg[binary]' pgvector` to install the dependencies
+3. Run: `python cookbook/models/anthropic/memory.py` to run the agent
+"""
+
+from agno.agent import Agent, AgentMemory
+from agno.memory.db.postgres import PgMemoryDb
+from agno.models.anthropic import Claude
+from agno.storage.agent.postgres import PostgresAgentStorage
+from rich.pretty import pprint
+
+db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
+agent = Agent(
+    model=Claude(id="claude-3-5-sonnet-20241022"),
+    # Store the memories and summary in a database
+    memory=AgentMemory(
+        db=PgMemoryDb(table_name="agent_memory", db_url=db_url),
+        create_user_memories=True,
+        create_session_summary=True,
+    ),
+    # Store agent sessions in a database
+    storage=PostgresAgentStorage(
+        table_name="personalized_agent_sessions", db_url=db_url
+    ),
+    # Show debug logs so, you can see the memory being created
+    # debug_mode=True,
+)
+
+# -*- Share personal information
+agent.print_response("My name is john billings?", stream=True)
+
+# -*- Share personal information
+agent.print_response("I live in nyc?", stream=True)
+
+# -*- Share personal information
+agent.print_response("I'm going to a concert tomorrow?", stream=True)
+
+# Ask about the conversation
+agent.print_response(
+    "What have we been talking about, do you know my name?", stream=True
+)
diff --git a/cookbook/models/cohere/README.md b/cookbook/models/cohere/README.md
index 62a39d52f8..b84cca3495 100644
--- a/cookbook/models/cohere/README.md
+++ b/cookbook/models/cohere/README.md
@@ -60,3 +60,9 @@ python cookbook/models/cohere/storage.py
 ```shell
 python cookbook/models/cohere/knowledge.py
 ```
+
+### 9. Run Agent that uses memory
+
+```shell
+python cookbook/models/cohere/memory.py
+```
diff --git a/cookbook/models/cohere/memory.py b/cookbook/models/cohere/memory.py
new file mode 100644
index 0000000000..c2e2b80d4b
--- /dev/null
+++ b/cookbook/models/cohere/memory.py
@@ -0,0 +1,43 @@
+"""
+This recipe shows how to use personalized memories and summaries in an agent.
+Steps:
+1. Run: `./cookbook/scripts/run_pgvector.sh` to start a postgres container with pgvector
+2. Run: `pip install cohere sqlalchemy 'psycopg[binary]' pgvector` to install the dependencies
+3. Run: `python cookbook/models/cohere/memory.py` to run the agent
+"""
+
+from agno.agent import Agent, AgentMemory
+from agno.memory.db.postgres import PgMemoryDb
+from agno.models.cohere import Cohere
+from agno.storage.agent.postgres import PostgresAgentStorage
+
+db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
+agent = Agent(
+    model=Cohere(id="command-r-08-2024"),
+    # Store the memories and summary in a database
+    memory=AgentMemory(
+        db=PgMemoryDb(table_name="agent_memory", db_url=db_url),
+        create_user_memories=True,
+        create_session_summary=True,
+    ),
+    # Store agent sessions in a database
+    storage=PostgresAgentStorage(
+        table_name="personalized_agent_sessions", db_url=db_url
+    ),
+    # Show debug logs so, you can see the memory being created
+    # debug_mode=True,
+)
+
+# -*- Share personal information
+agent.print_response("My name is john billings?", stream=True)
+
+# -*- Share personal information
+agent.print_response("I live in nyc?", stream=True)
+
+# -*- Share personal information
+agent.print_response("I'm going to a concert tomorrow?", stream=True)
+
+# Ask about the conversation
+agent.print_response(
+    "What have we been talking about, do you know my name?", stream=True
+)
diff --git a/cookbook/models/mistral/README.md b/cookbook/models/mistral/README.md
index 96a51dd4e0..0e1c224569 100644
--- a/cookbook/models/mistral/README.md
+++ b/cookbook/models/mistral/README.md
@@ -49,3 +49,9 @@ python cookbook/models/mistral/tool_use.py
 ```shell
 python cookbook/models/mistral/structured_output.py
 ```
+
+### 7. Run Agent that uses memory
+
+```shell
+python cookbook/models/mistral/memory.py
+```
diff --git a/cookbook/models/mistral/basic.py b/cookbook/models/mistral/basic.py
index 9e197606de..578efc5419 100644
--- a/cookbook/models/mistral/basic.py
+++ b/cookbook/models/mistral/basic.py
@@ -1,15 +1,9 @@
-import os
-
 from agno.agent import Agent, RunResponse  # noqa
 from agno.models.mistral import MistralChat
 
-mistral_api_key = os.getenv("MISTRAL_API_KEY")
 
 agent = Agent(
-    model=MistralChat(
-        id="mistral-large-latest",
-        api_key=mistral_api_key,
-    ),
+    model=MistralChat(id="mistral-large-latest"),
     markdown=True,
 )
 
diff --git a/cookbook/models/mistral/memory.py b/cookbook/models/mistral/memory.py
new file mode 100644
index 0000000000..4608a3fd81
--- /dev/null
+++ b/cookbook/models/mistral/memory.py
@@ -0,0 +1,43 @@
+"""
+This recipe shows how to use personalized memories and summaries in an agent.
+Steps:
+1. Run: `./cookbook/scripts/run_pgvector.sh` to start a postgres container with pgvector
+2. Run: `pip install mistralai sqlalchemy 'psycopg[binary]' pgvector` to install the dependencies
+3. Run: `python cookbook/models/mistral/memory.py` to run the agent
+"""
+
+from agno.agent import Agent, AgentMemory
+from agno.memory.db.postgres import PgMemoryDb
+from agno.models.mistral.mistral import MistralChat
+from agno.storage.agent.postgres import PostgresAgentStorage
+
+db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
+agent = Agent(
+    model=MistralChat(id="mistral-large-latest"),
+    # Store the memories and summary in a database
+    memory=AgentMemory(
+        db=PgMemoryDb(table_name="agent_memory", db_url=db_url),
+        create_user_memories=True,
+        create_session_summary=True,
+    ),
+    # Store agent sessions in a database
+    storage=PostgresAgentStorage(
+        table_name="personalized_agent_sessions", db_url=db_url
+    ),
+    # Show debug logs so, you can see the memory being created
+    # debug_mode=True,
+)
+
+# -*- Share personal information
+agent.print_response("My name is john billings?", stream=True)
+
+# -*- Share personal information
+agent.print_response("I live in nyc?", stream=True)
+
+# -*- Share personal information
+agent.print_response("I'm going to a concert tomorrow?", stream=True)
+
+# Ask about the conversation
+agent.print_response(
+    "What have we been talking about, do you know my name?", stream=True
+)
diff --git a/cookbook/models/mistral/mistral_small.py b/cookbook/models/mistral/mistral_small.py
index 87bf1ee19a..c5e8a7e1a4 100644
--- a/cookbook/models/mistral/mistral_small.py
+++ b/cookbook/models/mistral/mistral_small.py
@@ -10,4 +10,4 @@
     show_tool_calls=True,
     markdown=True,
 )
-agent.print_response("Whats happening in France?", stream=True)
+agent.print_response("Tell me about mistrall small, any news", stream=True)
diff --git a/cookbook/models/ollama/README.md b/cookbook/models/ollama/README.md
index 8e1f37fd42..a661502326 100644
--- a/cookbook/models/ollama/README.md
+++ b/cookbook/models/ollama/README.md
@@ -63,7 +63,13 @@ python cookbook/models/ollama/storage.py
 python cookbook/models/ollama/knowledge.py
 ```
 
-### 9. Run Agent that interprets an image
+### 9. Run Agent that uses memory   
+
+```shell
+python cookbook/models/ollama/memory.py
+```
+
+### 10. Run Agent that interprets an image
 
 Pull the llama3.2 vision model
 
@@ -75,14 +81,13 @@ ollama pull llama3.2-vision
 python cookbook/models/ollama/image_agent.py
 ```
 
-### 10. Run Agent that manually sets the Ollama client
+### 11. Run Agent that manually sets the Ollama client
 
 ```shell
 python cookbook/models/ollama/set_client.py
 ```
 
-
-### 11. See demos of some well-known Ollama models
+### 12. See demos of some well-known Ollama models
 
 ```shell
 python cookbook/models/ollama/demo_deepseek_r1.py
diff --git a/cookbook/models/ollama/memory.py b/cookbook/models/ollama/memory.py
new file mode 100644
index 0000000000..ce31066c82
--- /dev/null
+++ b/cookbook/models/ollama/memory.py
@@ -0,0 +1,43 @@
+"""
+This recipe shows how to use personalized memories and summaries in an agent.
+Steps:
+1. Run: `./cookbook/scripts/run_pgvector.sh` to start a postgres container with pgvector
+2. Run: `pip install ollama sqlalchemy 'psycopg[binary]' pgvector` to install the dependencies
+3. Run: `python cookbook/models/ollama/memory.py` to run the agent
+"""
+
+from agno.agent import Agent, AgentMemory
+from agno.memory.db.postgres import PgMemoryDb
+from agno.models.ollama.chat import Ollama
+from agno.storage.agent.postgres import PostgresAgentStorage
+
+db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
+agent = Agent(
+    model=Ollama(id="qwen2.5:latest"),
+    # Store the memories and summary in a database
+    memory=AgentMemory(
+        db=PgMemoryDb(table_name="agent_memory", db_url=db_url),
+        create_user_memories=True,
+        create_session_summary=True,
+    ),
+    # Store agent sessions in a database
+    storage=PostgresAgentStorage(
+        table_name="personalized_agent_sessions", db_url=db_url
+    ),
+    # Show debug logs so, you can see the memory being created
+    # debug_mode=True,
+)
+
+# -*- Share personal information
+agent.print_response("My name is john billings?", stream=True)
+
+# -*- Share personal information
+agent.print_response("I live in nyc?", stream=True)
+
+# -*- Share personal information
+agent.print_response("I'm going to a concert tomorrow?", stream=True)
+
+# Ask about the conversation
+agent.print_response(
+    "What have we been talking about, do you know my name?", stream=True
+)
diff --git a/cookbook/models/openai/memory.py b/cookbook/models/openai/memory.py
index 9e0131eebd..890f77278c 100644
--- a/cookbook/models/openai/memory.py
+++ b/cookbook/models/openai/memory.py
@@ -1,7 +1,7 @@
 """
 This recipe shows how to use personalized memories and summaries in an agent.
 Steps:
-1. Run: `./cookbook/run_pgvector.sh` to start a postgres container with pgvector
+1. Run: `./cookbook/scripts/run_pgvector.sh` to start a postgres container with pgvector
 2. Run: `pip install openai sqlalchemy 'psycopg[binary]' pgvector` to install the dependencies
 3. Run: `python cookbook/agents/personalized_memories_and_summaries.py` to run the agent
 """
diff --git a/libs/agno/agno/models/anthropic/claude.py b/libs/agno/agno/models/anthropic/claude.py
index d6f2ac8446..398a3e3fe4 100644
--- a/libs/agno/agno/models/anthropic/claude.py
+++ b/libs/agno/agno/models/anthropic/claude.py
@@ -134,7 +134,9 @@ def get_client(self) -> AnthropicClient:
             _client_params["api_key"] = self.api_key
         if self.client_params:
             _client_params.update(self.client_params)
-        return AnthropicClient(**_client_params)
+
+        self.client = AnthropicClient(**_client_params)
+        return self.client
 
     @property
     def request_kwargs(self) -> Dict[str, Any]:
diff --git a/libs/agno/agno/models/cohere/chat.py b/libs/agno/agno/models/cohere/chat.py
index c064420652..376f78dd15 100644
--- a/libs/agno/agno/models/cohere/chat.py
+++ b/libs/agno/agno/models/cohere/chat.py
@@ -54,11 +54,11 @@ class Cohere(Model):
     api_key: Optional[str] = None
     client_params: Optional[Dict[str, Any]] = None
     # -*- Provide the Cohere client manually
-    cohere_client: Optional[CohereClient] = None
+    client: Optional[CohereClient] = None
 
     def get_client(self) -> CohereClient:
-        if self.cohere_client:
-            return self.cohere_client
+        if self.client:
+            return self.client
 
         _client_params: Dict[str, Any] = {}
 
@@ -68,7 +68,9 @@ def get_client(self) -> CohereClient:
 
         if self.api_key:
             _client_params["api_key"] = self.api_key
-        return CohereClient(**_client_params)
+
+        self.client = CohereClient(**_client_params)
+        return self.client
 
     @property
     def request_kwargs(self) -> Dict[str, Any]:
diff --git a/libs/agno/agno/models/google/gemini.py b/libs/agno/agno/models/google/gemini.py
index 392360e5a9..6ab3fe1839 100644
--- a/libs/agno/agno/models/google/gemini.py
+++ b/libs/agno/agno/models/google/gemini.py
@@ -404,7 +404,9 @@ def get_client(self) -> GenerativeModel:
         if self.client_params:
             client_params.update(self.client_params)
         genai.configure(**client_params)
-        return genai.GenerativeModel(model_name=self.id, **self.request_kwargs)
+
+        self.client = genai.GenerativeModel(model_name=self.id, **self.request_kwargs)
+        return self.client
 
     @property
     def request_kwargs(self) -> Dict[str, Any]:
diff --git a/libs/agno/agno/models/groq/groq.py b/libs/agno/agno/models/groq/groq.py
index 5a26d9df0e..3370f98b85 100644
--- a/libs/agno/agno/models/groq/groq.py
+++ b/libs/agno/agno/models/groq/groq.py
@@ -136,7 +136,9 @@ def get_client(self) -> GroqClient:
         client_params: Dict[str, Any] = self.get_client_params()
         if self.http_client is not None:
             client_params["http_client"] = self.http_client
-        return GroqClient(**client_params)
+
+        self.client = GroqClient(**client_params)
+        return self.client
 
     def get_async_client(self) -> AsyncGroqClient:
         """
diff --git a/libs/agno/agno/models/huggingface/huggingface.py b/libs/agno/agno/models/huggingface/huggingface.py
index a7ebe843c3..ea3fc1634b 100644
--- a/libs/agno/agno/models/huggingface/huggingface.py
+++ b/libs/agno/agno/models/huggingface/huggingface.py
@@ -135,7 +135,8 @@ def get_client(self) -> InferenceClient:
         _client_params: Dict[str, Any] = self.get_client_params()
         if self.http_client is not None:
             _client_params["http_client"] = self.http_client
-        return InferenceClient(**_client_params)
+        self.client = InferenceClient(**_client_params)
+        return self.client
 
     def get_async_client(self) -> AsyncInferenceClient:
         """
diff --git a/libs/agno/agno/models/mistral/mistral.py b/libs/agno/agno/models/mistral/mistral.py
index 660ff60466..4146ed5266 100644
--- a/libs/agno/agno/models/mistral/mistral.py
+++ b/libs/agno/agno/models/mistral/mistral.py
@@ -96,6 +96,7 @@ def client(self) -> MistralClient:
             _client_params["timeout"] = self.timeout
         if self.client_params:
             _client_params.update(self.client_params)
+
         self.mistral_client = MistralClient(**_client_params)
         return self.mistral_client
 
diff --git a/libs/agno/agno/models/ollama/chat.py b/libs/agno/agno/models/ollama/chat.py
index f608c84d33..024b95915f 100644
--- a/libs/agno/agno/models/ollama/chat.py
+++ b/libs/agno/agno/models/ollama/chat.py
@@ -81,7 +81,8 @@ def get_client(self) -> OllamaClient:
         if self.client is not None:
             return self.client
 
-        return OllamaClient(**self.get_client_params())
+        self.client = OllamaClient(**self.get_client_params())
+        return self.client
 
     def get_async_client(self) -> AsyncOllamaClient:
         """
diff --git a/libs/agno/agno/models/openai/chat.py b/libs/agno/agno/models/openai/chat.py
index 3e74a21539..24bab8779a 100644
--- a/libs/agno/agno/models/openai/chat.py
+++ b/libs/agno/agno/models/openai/chat.py
@@ -131,7 +131,9 @@ def get_client(self) -> OpenAIClient:
         client_params: Dict[str, Any] = self._get_client_params()
         if self.http_client is not None:
             client_params["http_client"] = self.http_client
-        return OpenAIClient(**client_params)
+
+        self.client = OpenAIClient(**client_params)
+        return self.client
 
     def get_async_client(self) -> AsyncOpenAIClient:
         """
diff --git a/libs/agno/pyproject.toml b/libs/agno/pyproject.toml
index bbe1e0813c..cf4972cb0c 100644
--- a/libs/agno/pyproject.toml
+++ b/libs/agno/pyproject.toml
@@ -61,7 +61,7 @@ firecrawl = ["firecrawl-py"]
 
 # Dependencies for Storage
 sql = ["sqlalchemy"]
-postgres = ["psycopg"]
+postgres = ["psycopg-binary"]
 
 # Dependencies for Vector databases
 pgvector = ["pgvector"]