Skip to content

Commit

Permalink
Merge branch 'main' into fix-invalid-json-responses
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkbrnd authored Feb 21, 2025
2 parents 7938748 + 6a78810 commit ba7f986
Show file tree
Hide file tree
Showing 42 changed files with 5,460 additions and 14 deletions.
4 changes: 3 additions & 1 deletion libs/agno/agno/models/anthropic/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,11 @@ def _format_messages(messages: List[Message]) -> Tuple[List[Dict[str, str]], str
chat_messages: List[Dict[str, str]] = []
system_messages: List[str] = []

print()
for idx, message in enumerate(messages):

content = message.content or ""
if message.role == "system" or (message.role != "user" and idx in [0, 1]):
if message.role == "system":
if content is not None:
system_messages.append(content) # type: ignore
continue
Expand Down
144 changes: 144 additions & 0 deletions libs/agno/tests/integration/models/anthropic/test_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import pytest
from pydantic import BaseModel, Field

from agno.agent import Agent, RunResponse # noqa
from agno.models.anthropic import Claude
from agno.storage.agent.postgres import PostgresAgentStorage


def _assert_metrics(response: RunResponse):
input_tokens = response.metrics.get("input_tokens", [])
output_tokens = response.metrics.get("output_tokens", [])
total_tokens = response.metrics.get("total_tokens", [])

assert sum(input_tokens) > 0
assert sum(output_tokens) > 0
assert sum(total_tokens) > 0
assert sum(total_tokens) == sum(input_tokens) + sum(output_tokens)


def test_basic():
agent = Agent(model=Claude(id="claude-3-5-haiku-20241022"), markdown=True, telemetry=False, monitoring=False)

# Print the response in the terminal
response: RunResponse = agent.run("Share a 2 sentence horror story")

assert response.content is not None
assert len(response.messages) == 3
assert [m.role for m in response.messages] == ["system", "user", "assistant"]

_assert_metrics(response)


def test_basic_stream():
agent = Agent(model=Claude(id="claude-3-5-haiku-20241022"), markdown=True, telemetry=False, monitoring=False)

response_stream = agent.run("Share a 2 sentence horror story", stream=True)

# Verify it's an iterator
assert hasattr(response_stream, "__iter__")

responses = list(response_stream)
assert len(responses) > 0
for response in responses:
assert isinstance(response, RunResponse)
assert response.content is not None

_assert_metrics(agent.run_response)


@pytest.mark.asyncio
async def test_async_basic():
agent = Agent(model=Claude(id="claude-3-5-haiku-20241022"), markdown=True, telemetry=False, monitoring=False)

response = await agent.arun("Share a 2 sentence horror story")

assert response.content is not None
assert len(response.messages) == 3
assert [m.role for m in response.messages] == ["system", "user", "assistant"]
_assert_metrics(response)


@pytest.mark.asyncio
async def test_async_basic_stream():
agent = Agent(model=Claude(id="claude-3-5-haiku-20241022"), markdown=True, telemetry=False, monitoring=False)

response_stream = await agent.arun("Share a 2 sentence horror story", stream=True)

async for response in response_stream:
assert isinstance(response, RunResponse)
assert response.content is not None

_assert_metrics(agent.run_response)


def test_with_memory():
agent = Agent(
model=Claude(id="claude-3-5-haiku-20241022"),
add_history_to_messages=True,
num_history_responses=5,
markdown=True,
telemetry=False,
monitoring=False,
)

# First interaction
response1 = agent.run("My name is John Smith")
assert response1.content is not None

# Second interaction should remember the name
response2 = agent.run("What's my name?")
assert "John Smith" in response2.content

# Verify memories were created
assert len(agent.memory.messages) == 5
assert [m.role for m in agent.memory.messages] == ["system", "user", "assistant", "user", "assistant"]

# Test metrics structure and types
input_tokens = response2.metrics["input_tokens"]
output_tokens = response2.metrics["output_tokens"]
total_tokens = response2.metrics["total_tokens"]

assert isinstance(input_tokens[0], int)
assert input_tokens[0] > 0
assert isinstance(output_tokens[0], int)
assert output_tokens[0] > 0
assert isinstance(total_tokens[0], int)
assert total_tokens[0] > 0
assert total_tokens[0] == input_tokens[0] + output_tokens[0]


def test_structured_output():
class MovieScript(BaseModel):
title: str = Field(..., description="Movie title")
genre: str = Field(..., description="Movie genre")
plot: str = Field(..., description="Brief plot summary")

agent = Agent(model=Claude(id="claude-3-5-haiku-20241022"), response_model=MovieScript, telemetry=False, monitoring=False)

response = agent.run("Create a movie about time travel")

# Verify structured output
assert isinstance(response.content, MovieScript)
assert response.content.title is not None
assert response.content.genre is not None
assert response.content.plot is not None


def test_history():
db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
agent = Agent(
model=Claude(id="claude-3-5-haiku-20241022"),
storage=PostgresAgentStorage(table_name="agent_sessions", db_url=db_url),
add_history_to_messages=True,
telemetry=False,
monitoring=False,
)
agent.run("Hello")
assert len(agent.run_response.messages) == 2
agent.run("Hello 2")
assert len(agent.run_response.messages) == 4
agent.run("Hello 3")
assert len(agent.run_response.messages) == 6
agent.run("Hello 4")
assert len(agent.run_response.messages) == 8
15 changes: 15 additions & 0 deletions libs/agno/tests/integration/models/anthropic/test_multimodal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from agno.agent.agent import Agent
from agno.media import Image
from agno.models.anthropic import Claude


def test_image_input():
agent = Agent(model=Claude(id="claude-3-5-sonnet-20241022"), markdown=True, telemetry=False, monitoring=False)

response = agent.run(
"Tell me about this image.",
images=[Image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg")],
)

assert "golden" in response.content.lower()
assert "bridge" in response.content.lower()
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
import pytest

from agno.agent import Agent, RunResponse # noqa
from agno.models.google import Gemini
from agno.models.anthropic import Claude
from agno.tools.duckduckgo import DuckDuckGoTools
from agno.tools.exa import ExaTools
from agno.tools.yfinance import YFinanceTools


def test_tool_use():
agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[YFinanceTools()],
show_tool_calls=True,
markdown=True,
Expand All @@ -29,7 +29,7 @@ def test_tool_use():

def test_tool_use_stream():
agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[YFinanceTools()],
show_tool_calls=True,
markdown=True,
Expand Down Expand Up @@ -57,7 +57,7 @@ def test_tool_use_stream():
@pytest.mark.asyncio
async def test_async_tool_use():
agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[YFinanceTools()],
show_tool_calls=True,
markdown=True,
Expand All @@ -68,15 +68,15 @@ async def test_async_tool_use():
response = await agent.arun("What is the current price of TSLA?")

# Verify tool usage
assert any(msg.tool_calls for msg in response.messages if msg.role == "model")
assert any(msg.tool_calls for msg in response.messages if msg.role == "assistant")
assert response.content is not None
assert "TSLA" in response.content


@pytest.mark.asyncio
async def test_async_tool_use_stream():
agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[YFinanceTools()],
show_tool_calls=True,
markdown=True,
Expand All @@ -103,7 +103,7 @@ async def test_async_tool_use_stream():

def test_parallel_tool_calls():
agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[YFinanceTools()],
show_tool_calls=True,
markdown=True,
Expand All @@ -125,7 +125,7 @@ def test_parallel_tool_calls():

def test_multiple_tool_calls():
agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[YFinanceTools(), DuckDuckGoTools()],
show_tool_calls=True,
markdown=True,
Expand Down Expand Up @@ -153,7 +153,7 @@ def get_the_weather_in_tokyo():
return "It is currently 70 degrees and cloudy in Tokyo"

agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[get_the_weather_in_tokyo],
show_tool_calls=True,
markdown=True,
Expand Down Expand Up @@ -183,7 +183,7 @@ def get_the_weather(city: Optional[str] = None):
return f"It is currently 70 degrees and cloudy in {city}"

agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[get_the_weather],
show_tool_calls=True,
markdown=True,
Expand All @@ -201,7 +201,7 @@ def get_the_weather(city: Optional[str] = None):

def test_tool_call_list_parameters():
agent = Agent(
model=Gemini(id="gemini-2.0-flash-lite-preview-02-05"),
model=Claude(id="claude-3-5-haiku-20241022"),
tools=[ExaTools()],
instructions="Use a single tool call if possible",
show_tool_calls=True,
Expand Down
119 changes: 119 additions & 0 deletions libs/agno/tests/integration/models/aws/bedrock/test_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from pydantic import BaseModel, Field

from agno.agent import Agent, RunResponse # noqa
from agno.models.aws import AwsBedrock
from agno.storage.agent.postgres import PostgresAgentStorage


def _assert_metrics(response: RunResponse):
input_tokens = response.metrics.get("input_tokens", [])
output_tokens = response.metrics.get("output_tokens", [])
total_tokens = response.metrics.get("total_tokens", [])

assert sum(input_tokens) > 0
assert sum(output_tokens) > 0
assert sum(total_tokens) > 0
assert sum(total_tokens) == sum(input_tokens) + sum(output_tokens)


def test_basic():
agent = Agent(
model=AwsBedrock(id="anthropic.claude-3-sonnet-20240229-v1:0"), markdown=True, telemetry=False, monitoring=False
)

# Print the response in the terminal
response: RunResponse = agent.run("Share a 2 sentence horror story")

assert response.content is not None
assert len(response.messages) == 3
assert [m.role for m in response.messages] == ["system", "user", "assistant"]

_assert_metrics(response)


def test_basic_stream():
agent = Agent(
model=AwsBedrock(id="anthropic.claude-3-sonnet-20240229-v1:0"), markdown=True, telemetry=False, monitoring=False
)

response_stream = agent.run("Share a 2 sentence horror story", stream=True)

# Verify it's an iterator
assert hasattr(response_stream, "__iter__")

responses = list(response_stream)
assert len(responses) > 0
for response in responses:
assert isinstance(response, RunResponse)
assert response.content is not None

_assert_metrics(agent.run_response)


def test_with_memory():
agent = Agent(
model=AwsBedrock(id="anthropic.claude-3-sonnet-20240229-v1:0"), markdown=True, telemetry=False, monitoring=False
)

# First interaction
response1 = agent.run("My name is John Smith")
assert response1.content is not None

# Second interaction should remember the name
response2 = agent.run("What's my name?")
assert "John Smith" in response2.content

# Verify memories were created
assert len(agent.memory.messages) == 5
assert [m.role for m in agent.memory.messages] == ["system", "user", "assistant", "user", "assistant"]

# Test metrics structure and types
input_tokens = response2.metrics["input_tokens"]
output_tokens = response2.metrics["output_tokens"]
total_tokens = response2.metrics["total_tokens"]

assert isinstance(input_tokens[0], int)
assert input_tokens[0] > 0
assert isinstance(output_tokens[0], int)
assert output_tokens[0] > 0
assert isinstance(total_tokens[0], int)
assert total_tokens[0] > 0
assert total_tokens[0] == input_tokens[0] + output_tokens[0]


def test_structured_output():
class MovieScript(BaseModel):
title: str = Field(..., description="Movie title")
genre: str = Field(..., description="Movie genre")
plot: str = Field(..., description="Brief plot summary")

agent = Agent(
model=AwsBedrock(id="anthropic.claude-3-sonnet-20240229-v1:0"), markdown=True, telemetry=False, monitoring=False
)

response = agent.run("Create a movie about time travel")

# Verify structured output
assert isinstance(response.content, MovieScript)
assert response.content.title is not None
assert response.content.genre is not None
assert response.content.plot is not None


def test_history():
db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
agent = Agent(
model=AwsBedrock(id="anthropic.claude-3-sonnet-20240229-v1:0"),
storage=PostgresAgentStorage(table_name="agent_sessions", db_url=db_url),
add_history_to_messages=True,
telemetry=False,
monitoring=False,
)
agent.run("Hello")
assert len(agent.run_response.messages) == 2
agent.run("Hello 2")
assert len(agent.run_response.messages) == 4
agent.run("Hello 3")
assert len(agent.run_response.messages) == 6
agent.run("Hello 4")
assert len(agent.run_response.messages) == 8
Loading

0 comments on commit ba7f986

Please sign in to comment.