Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DeepInfra model with Reasoning support for OpenAILike deepseek models #2197

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions cookbook/agent_concepts/reasoning/deepinfra/9_11_or_9_9.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Be sure to set your DEEPSEEK_API_KEY environment variable before running this script.
"""

from agno.agent import Agent
from agno.cli.console import console
from agno.models.anthropic import Claude
from agno.models.deepinfra import DeepInfra
from agno.models.openai import OpenAIChat

task = "9.11 and 9.9 -- which is bigger?"

regular_agent_claude = Agent(model=Claude("claude-3-5-sonnet-20241022"))
reasoning_agent_claude = Agent(
model=Claude("claude-3-5-sonnet-20241022"),
reasoning_model=DeepInfra(
id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
temperature=0.6,
max_tokens=1024,
top_p=0.95,
),
)

regular_agent_openai = Agent(model=OpenAIChat(id="gpt-4o"))
reasoning_agent_openai = Agent(
model=OpenAIChat(id="gpt-4o-mini"),
reasoning_model=DeepInfra(
id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
temperature=0.6,
max_tokens=1024,
top_p=0.95,
),
)

console.rule("[bold blue]Regular Claude Agent[/bold blue]")
regular_agent_claude.print_response(task, stream=True)

console.rule("[bold green]Claude Reasoning Agent[/bold green]")
reasoning_agent_claude.print_response(task, stream=True)

console.rule("[bold red]Regular OpenAI Agent[/bold red]")
regular_agent_openai.print_response(task, stream=True)

console.rule("[bold yellow]OpenAI Reasoning Agent[/bold yellow]")
reasoning_agent_openai.print_response(task, stream=True)
Empty file.
72 changes: 72 additions & 0 deletions cookbook/models/deepinfra/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# DeepInfra Cookbook

> Note: Fork and clone this repository if needed

> Note: DeepInfra does not appear to include models that support structured outputs.

### 1. Create and activate a virtual environment

```shell
python3 -m venv ~/.venvs/aienv
source ~/.venvs/aienv/bin/activate
```

### 2. Export your `DEEPINFRA_API_KEY`

```shell
export DEEPINFRA_API_KEY=***
```

### 3. Install libraries

```shell
pip install -U openai duckduckgo-search agno
```

### 4. Run basic Agent

- Streaming on

```shell
python cookbook/models/deepinfra/basic_stream.py
```

- Streaming off

```shell
python cookbook/models/deepinfra/basic.py
```

### 5. Run Async Agent

- Streaming on

```shell
python cookbook/models/deepinfra/async_basic_stream.py
```

- Streaming off

```shell
python cookbook/models/deepinfra/async_basic.py
```

### 6. Run Agent with Tools

- DuckDuckGo Search

```shell
python cookbook/models/deepinfra/tool_use.py
```

- Async DuckDuckGo Search

```shell
python cookbook/models/deepinfra/async_tool_use.py
```

### 6. Run Agent that returns JSON output defined by the response model

```shell
python cookbook/models/deepinfra/json_output.py
```
18 changes: 18 additions & 0 deletions cookbook/models/deepinfra/async_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import asyncio

from agno.agent import Agent, RunResponse # noqa
from agno.models.deepinfra import DeepInfra # noqa

agent = Agent(
model=DeepInfra(id="meta-llama/Llama-2-70b-chat-hf"),
markdown=True,
)

# Get the response in a variable
# def run_async() -> RunResponse:
# return agent.arun("Share a 2 sentence horror story")
# response = asyncio.run(run_async())
# print(response.content)

# Print the response in the terminal
asyncio.run(agent.aprint_response("Share a 2 sentence horror story"))
13 changes: 13 additions & 0 deletions cookbook/models/deepinfra/async_basic_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import asyncio
from typing import Iterator # noqa

from agno.agent import Agent, RunResponse # noqa
from agno.models.deepinfra import DeepInfra # noqa

agent = Agent(
model=DeepInfra(id="meta-llama/Llama-2-70b-chat-hf"),
markdown=True,
)

# Print the response in the terminal
asyncio.run(agent.aprint_response("Share a 2 sentence horror story", stream=True))
16 changes: 16 additions & 0 deletions cookbook/models/deepinfra/async_tool_use.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Run `pip install duckduckgo-search` to install dependencies."""

import asyncio

from agno.agent import Agent # noqa
from agno.models.deepinfra import DeepInfra # noqa
from agno.tools.duckduckgo import DuckDuckGoTools # noqa

agent = Agent(
model=DeepInfra(id="meta-llama/Llama-2-70b-chat-hf"),
tools=[DuckDuckGoTools()],
show_tool_calls=True,
markdown=True,
)

asyncio.run(agent.aprint_response("What's the latest news about AI?", stream=True))
15 changes: 15 additions & 0 deletions cookbook/models/deepinfra/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from agno.agent import Agent, RunResponse # noqa
from agno.models.deepinfra import DeepInfra # noqa


agent = Agent(
model=DeepInfra(id="meta-llama/Llama-2-70b-chat-hf"),
markdown=True,
)

# Get the response in a variable
# run: RunResponse = agent.run("Share a 2 sentence horror story")
# print(run.content)

# Print the response in the terminal
agent.print_response("Share a 2 sentence horror story")
19 changes: 19 additions & 0 deletions cookbook/models/deepinfra/basic_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import Iterator # noqa

from agno.agent import Agent, RunResponse # noqa
from agno.models.deepinfra import DeepInfra # noqa

agent = Agent(
model=DeepInfra(id="meta-llama/Llama-2-70b-chat-hf"),
markdown=True,
)

# Get the response in a variable
run_response: Iterator[RunResponse] = agent.run(
"Share a 2 sentence horror story", stream=True
)
for chunk in run_response:
print(chunk.content)

# Print the response in the terminal
agent.print_response("Share a 2 sentence horror story", stream=True)
39 changes: 39 additions & 0 deletions cookbook/models/deepinfra/json_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import List

from agno.agent import Agent, RunResponse # noqa
from agno.models.deepinfra import DeepInfra # noqa
from pydantic import BaseModel, Field
from rich.pretty import pprint # noqa


class MovieScript(BaseModel):
setting: str = Field(
..., description="Provide a nice setting for a blockbuster movie."
)
ending: str = Field(
...,
description="Ending of the movie. If not available, provide a happy ending.",
)
genre: str = Field(
...,
description="Genre of the movie. If not available, select action, thriller or romantic comedy.",
)
name: str = Field(..., description="Give a name to this movie")
characters: List[str] = Field(..., description="Name of characters for this movie.")
storyline: str = Field(
..., description="3 sentence storyline for the movie. Make it exciting!"
)


# Agent that uses JSON mode
agent = Agent(
model=DeepInfra(id="microsoft/phi-4"),
description="You write movie scripts.",
response_model=MovieScript,
)

# Get the response in a variable
# response: RunResponse = agent.run("New York")
# pprint(response.content)

agent.print_response("New York")
11 changes: 11 additions & 0 deletions cookbook/models/deepinfra/sync_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from agno.agent import Agent, RunResponse # noqa
from agno.models.deepinfra import DeepInfra # noqa

agent = Agent(model=DeepInfra(id="meta-llama/Llama-2-70b-chat-hf"), markdown=True)

# Get the response in a variable
# run: RunResponse = agent.run("Share a 2 sentence horror story")
# print(run.content)

# Print the response in the terminal
agent.print_response("Share a 2 sentence horror story")
14 changes: 14 additions & 0 deletions cookbook/models/deepinfra/tool_use.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Run `pip install duckduckgo-search` to install dependencies."""

from agno.agent import Agent # noqa
from agno.models.deepinfra import DeepInfra # noqa
from agno.tools.duckduckgo import DuckDuckGoTools # noqa

agent = Agent(
model=DeepInfra(id="meta-llama/Llama-2-70b-chat-hf"),
tools=[DuckDuckGoTools()],
show_tool_calls=True,
markdown=True,
)

agent.print_response("Whats happening in France?", stream=True)
12 changes: 9 additions & 3 deletions libs/agno/agno/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from agno.memory.agent import AgentMemory, AgentRun
from agno.models.base import Model
from agno.models.message import Message, MessageReferences
from agno.models.openai.like import OpenAILike
from agno.models.response import ModelResponse, ModelResponseEvent
from agno.reasoning.step import NextAction, ReasoningStep, ReasoningSteps
from agno.run.messages import RunMessages
Expand Down Expand Up @@ -2768,8 +2769,10 @@ def reason(self, run_messages: RunMessages) -> Iterator[RunResponse]:
reasoning_steps=[ReasoningStep(result=groq_reasoning_message.content)],
reasoning_agent_messages=[groq_reasoning_message],
)
# Use o-3 for reasoning
elif reasoning_model.__class__.__name__ == "OpenAIChat" and reasoning_model.id.startswith("o3"):
# Use o-3 or OpenAILike with deepseek model for reasoning
elif (reasoning_model.__class__.__name__ == "OpenAIChat" and reasoning_model.id.startswith("o3")) or (
isinstance(reasoning_model, OpenAILike) and "deepseek" in reasoning_model.id.lower()
):
from agno.reasoning.openai import get_openai_reasoning, get_openai_reasoning_agent

openai_reasoning_agent = self.reasoning_agent or get_openai_reasoning_agent(
Expand Down Expand Up @@ -2948,7 +2951,10 @@ async def areason(self, run_messages: RunMessages) -> Any:
reasoning_agent_messages=[groq_reasoning_message],
)
# Use o-3 for reasoning
elif reasoning_model.__class__.__name__ == "OpenAIChat" and reasoning_model.id.startswith("o"):
elif (reasoning_model.__class__.__name__ == "OpenAIChat" and reasoning_model.id.startswith("o3")) or (
isinstance(reasoning_model, OpenAILike) and "deepseek" in reasoning_model.id.lower()
):
# elif reasoning_model.__class__.__name__ == "OpenAIChat" and reasoning_model.id.startswith("o"):
from agno.reasoning.openai import aget_openai_reasoning, get_openai_reasoning_agent

openai_reasoning_agent = self.reasoning_agent or get_openai_reasoning_agent(
Expand Down
1 change: 1 addition & 0 deletions libs/agno/agno/models/deepinfra/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from agno.models.deepinfra.deepinfra import DeepInfra
28 changes: 28 additions & 0 deletions libs/agno/agno/models/deepinfra/deepinfra.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from dataclasses import dataclass
from os import getenv
from typing import Optional

from agno.models.openai.like import OpenAILike


@dataclass
class DeepInfra(OpenAILike):
"""
A class for interacting with DeepInfra models.

For more information, see: https://deepinfra.com/docs/

Attributes:
id (str): The id of the Nvidia model to use. Default is "meta-llama/Llama-2-70b-chat-hf".
name (str): The name of this chat model instance. Default is "Nvidia"
provider (str): The provider of the model. Default is "Nvidia".
api_key (str): The api key to authorize request to Nvidia.
base_url (str): The base url to which the requests are sent.
"""

id: str = "meta-llama/Llama-2-70b-chat-hf"
name: str = "DeepInfra"
provider: str = "DeepInfra " + id

api_key: Optional[str] = getenv("DEEPINFRA_API_KEY", None)
base_url: str = "https://api.deepinfra.com/v1/openai"