Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/phidatahq/phidata
Browse files Browse the repository at this point in the history
  • Loading branch information
ashpreetbedi committed Dec 19, 2024
2 parents 7d5ceee + e4113a5 commit ff1bca0
Show file tree
Hide file tree
Showing 30 changed files with 849 additions and 75 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,6 @@ data.db

.ipynb_checkpoints

audio_generations

*.db
7 changes: 3 additions & 4 deletions cookbook/agents/37_audio_input_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import requests
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.utils.audio import write_audio_to_file

# Fetch the audio file and convert it to a base64 encoded string
url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
Expand All @@ -22,7 +23,5 @@
audio={"data": encoded_string, "format": "wav"},
)

if agent.run_response.audio is not None and "data" in agent.run_response.audio:
wav_bytes = base64.b64decode(agent.run_response.audio["data"])
with open("dog.wav", "wb") as f:
f.write(wav_bytes)
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/dog.wav")
15 changes: 6 additions & 9 deletions cookbook/agents/38_audio_multi_turn.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
import base64
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.utils.audio import write_audio_to_file

agent = Agent(
model=OpenAIChat(
id="gpt-4o-audio-preview", modalities=["text", "audio"], audio={"voice": "alloy", "format": "wav"}
),
debug_mode=True,
add_history_to_messages=True,
)

agent.run("Is a golden retriever a good family dog?")
if agent.run_response.audio is not None and "data" in agent.run_response.audio:
wav_bytes = base64.b64decode(agent.run_response.audio["data"])
with open("tmp/answer_1.wav", "wb") as f:
f.write(wav_bytes)
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_1.wav")

agent.run("Why do you say they are loyal?")
if agent.run_response.audio is not None and "data" in agent.run_response.audio:
wav_bytes = base64.b64decode(agent.run_response.audio["data"])
with open("tmp/answer_2.wav", "wb") as f:
f.write(wav_bytes)
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_2.wav")
8 changes: 3 additions & 5 deletions cookbook/agents/42_image_to_audio.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import base64
from pathlib import Path
from rich import print
from rich.text import Text

from phi.agent import Agent, RunResponse
from phi.model.openai import OpenAIChat
from phi.utils.audio import write_audio_to_file

cwd = Path(__file__).parent.resolve()

Expand All @@ -23,7 +23,5 @@
)

audio_story: RunResponse = audio_agent.run(f"Narrate the story with flair: {image_story.content}")
if audio_story.audio is not None and "data" in audio_story.audio:
wav_bytes = base64.b64decode(audio_story.audio["data"])
with open(cwd.joinpath("tmp/multimodal-agents.wav"), "wb") as f:
f.write(wav_bytes)
if audio_story.response_audio is not None and "data" in audio_story.response_audio:
write_audio_to_file(audio=audio_story.response_audio["data"], filename="tmp/multimodal-agents.wav")
16 changes: 16 additions & 0 deletions cookbook/playground/gemini_agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from phi.agent import Agent
from phi.tools.yfinance import YFinanceTools
from phi.playground import Playground, serve_playground_app
from phi.model.google import Gemini

finance_agent = Agent(
name="Finance Agent",
model=Gemini(id="gemini-2.0-flash-exp"),
tools=[YFinanceTools(stock_price=True)],
debug_mode=True,
)

app = Playground(agents=[finance_agent]).get_app(use_async=False)

if __name__ == "__main__":
serve_playground_app("gemini_agents:app", reload=True)
30 changes: 29 additions & 1 deletion cookbook/playground/multimodal_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.dalle import Dalle
from phi.tools.eleven_labs_tools import ElevenLabsTools
from phi.tools.giphy import GiphyTools
from phi.tools.models_labs import ModelsLabs
from phi.model.response import FileType
Expand Down Expand Up @@ -88,6 +89,7 @@

gif_agent = Agent(
name="Gif Generator Agent",
agent_id="gif_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[GiphyTools()],
description="You are an AI agent that can generate gifs using Giphy.",
Expand All @@ -102,8 +104,34 @@
storage=SqlAgentStorage(table_name="gif_agent", db_file=image_agent_storage_file),
)

audio_agent = Agent(
name="Audio Generator Agent",
agent_id="audio_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[
ElevenLabsTools(
voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2", target_directory="audio_generations"
)
],
description="You are an AI agent that can generate audio using the ElevenLabs API.",
instructions=[
"When the user asks you to generate audio, use the `text_to_speech` tool to generate the audio.",
"You'll generate the appropriate prompt to send to the tool to generate audio.",
"You don't need to find the appropriate voice first, I already specified the voice to user."
"Don't return file name or file url in your response or markdown just tell the audio was created successfully.",
"The audio should be long and detailed.",
],
markdown=True,
debug_mode=True,
add_history_to_messages=True,
add_datetime_to_instructions=True,
storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
)

app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent]).get_app(use_async=False)

app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent]).get_app(
use_async=False
)

if __name__ == "__main__":
serve_playground_app("multimodal_agent:app", reload=True)
8 changes: 8 additions & 0 deletions cookbook/providers/ollama/agent_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Iterator # noqa
from phi.agent import Agent, RunResponse # noqa
from phi.model.ollama import Ollama
from phi.tools.crawl4ai_tools import Crawl4aiTools
from phi.tools.yfinance import YFinanceTools

agent = Agent(
Expand All @@ -20,3 +21,10 @@

# Print the response in the terminal
agent.print_response("What are analyst recommendations for NVDA and TSLA", stream=True)


agent = Agent(model=Ollama(id="llama3.1:8b"), tools=[Crawl4aiTools(max_length=1000)], show_tool_calls=True)
agent.print_response(
"Summarize me the key points in bullet points of this: https://blog.google/products/gemini/google-gemini-deep-research/",
stream=True,
)
13 changes: 13 additions & 0 deletions cookbook/storage/json_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Run `pip install duckduckgo-search openai` to install dependencies."""

from phi.agent import Agent
from phi.tools.duckduckgo import DuckDuckGo
from phi.storage.agent.json import JsonFileAgentStorage

agent = Agent(
storage=JsonFileAgentStorage(dir_path="tmp/agent_sessions_json"),
tools=[DuckDuckGo()],
add_history_to_messages=True,
)
agent.print_response("How many people live in Canada?")
agent.print_response("What is their national anthem called?")
13 changes: 13 additions & 0 deletions cookbook/storage/yaml_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Run `pip install duckduckgo-search openai` to install dependencies."""

from phi.agent import Agent
from phi.tools.duckduckgo import DuckDuckGo
from phi.storage.agent.yaml import YamlFileAgentStorage

agent = Agent(
storage=YamlFileAgentStorage(dir_path="tmp/agent_sessions_yaml"),
tools=[DuckDuckGo()],
add_history_to_messages=True,
)
agent.print_response("How many people live in Canada?")
agent.print_response("What is their national anthem called?")
22 changes: 22 additions & 0 deletions cookbook/tools/confluence_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from phi.agent import Agent
from phi.tools.confluence import ConfluenceTools


agent = Agent(
name="Confluence agent",
tools=[ConfluenceTools()],
show_tool_calls=True,
markdown=True,
)

## getting space details
agent.print_response("How many spaces are there and what are their names?")

## getting page_content
agent.print_response("What is the content present in page 'Large language model in LLM space'")

## getting page details in a particular space
agent.print_response("Can you extract all the page names from 'LLM' space")

## creating a new page in a space
agent.print_response("Can you create a new page named 'TESTING' in 'LLM' space")
32 changes: 32 additions & 0 deletions cookbook/tools/elevenlabs_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
pip install elevenlabs
"""

from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.eleven_labs_tools import ElevenLabsTools

audio_agent = Agent(
model=OpenAIChat(id="gpt-4o"),
tools=[
ElevenLabsTools(
voice_id="21m00Tcm4TlvDq8ikWAM", model_id="eleven_multilingual_v2", target_directory="audio_generations"
)
],
description="You are an AI agent that can generate audio using the ElevenLabs API.",
instructions=[
"When the user asks you to generate audio, use the `generate_audio` tool to generate the audio.",
"You'll generate the appropriate prompt to send to the tool to generate audio.",
"You don't need to find the appropriate voice first, I already specified the voice to user."
"Return the audio file name in your response. Don't convert it to markdown.",
"The audio should be long and detailed.",
],
markdown=True,
debug_mode=True,
show_tool_calls=True,
)

audio_agent.print_response("Generate a very long audio of history of french revolution")


audio_agent.print_response("Generate a kick sound effect")
33 changes: 31 additions & 2 deletions phi/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from phi.document import Document
from phi.agent.session import AgentSession
from phi.model.content import Image, Video
from phi.model.content import Image, Video, Audio
from phi.reasoning.step import ReasoningStep, ReasoningSteps, NextAction
from phi.run.response import RunEvent, RunResponse, RunResponseExtraData
from phi.knowledge.agent import AgentKnowledge
Expand Down Expand Up @@ -61,6 +61,8 @@ class Agent(BaseModel):
images: Optional[List[Image]] = None
# Videos associated with this agent
videos: Optional[List[Video]] = None
# Audio associated with this agent
audio: Optional[List[Audio]] = None

# Data associated with this agent
# name, model, images and videos are automatically added to the agent_data
Expand Down Expand Up @@ -577,6 +579,8 @@ def get_agent_data(self) -> Dict[str, Any]:
agent_data["images"] = [img if isinstance(img, dict) else img.model_dump() for img in self.images]
if self.videos is not None:
agent_data["videos"] = [vid if isinstance(vid, dict) else vid.model_dump() for vid in self.videos]
if self.audio is not None:
agent_data["audio"] = [aud if isinstance(aud, dict) else aud.model_dump() for aud in self.audio]
return agent_data

def get_session_data(self) -> Dict[str, Any]:
Expand Down Expand Up @@ -641,6 +645,12 @@ def from_agent_session(self, session: AgentSession):
self.videos.extend([Video.model_validate(vid) for vid in self.videos])
else:
self.videos = videos_from_db
if "audio" in session.agent_data:
audio_from_db = session.agent_data.get("audio")
if self.audio is not None and isinstance(self.audio, list):
self.audio.extend([Audio.model_validate(aud) for aud in self.audio])
else:
self.audio = audio_from_db

# If agent_data is set in the agent, update the database agent_data with the agent's agent_data
if self.agent_data is not None:
Expand Down Expand Up @@ -1706,8 +1716,10 @@ def generic_run_response(
agent_id=self.agent_id,
content=content,
tools=self.run_response.tools,
audio=self.run_response.audio,
images=self.run_response.images,
videos=self.run_response.videos,
response_audio=self.run_response.response_audio,
model=self.run_response.model,
messages=self.run_response.messages,
extra_data=self.run_response.extra_data,
Expand Down Expand Up @@ -1798,6 +1810,7 @@ def _run(
self.run_response.content = model_response_chunk.content
self.run_response.created_at = model_response_chunk.created_at
yield self.run_response

elif model_response_chunk.event == ModelResponseEvent.tool_call_started.value:
# Add tool call to the run_response
tool_call_dict = model_response_chunk.tool_call
Expand Down Expand Up @@ -1834,7 +1847,7 @@ def _run(
else:
self.run_response.content = model_response.content
if model_response.audio is not None:
self.run_response.audio = model_response.audio
self.run_response.response_audio = model_response.audio
self.run_response.messages = messages_for_model
self.run_response.created_at = model_response.created_at

Expand All @@ -1848,6 +1861,8 @@ def _run(
# Update the run_response content if streaming as run_response will only contain the last chunk
if self.stream:
self.run_response.content = model_response.content
if model_response.audio is not None:
self.run_response.response_audio = model_response.audio

# 6. Update Memory
if self.stream_intermediate_steps:
Expand Down Expand Up @@ -2186,6 +2201,8 @@ async def _arun(
# Update the run_response content if streaming as run_response will only contain the last chunk
if self.stream:
self.run_response.content = model_response.content
if model_response.audio is not None:
self.run_response.response_audio = model_response.audio

# 6. Update Memory
if self.stream_intermediate_steps:
Expand Down Expand Up @@ -2469,12 +2486,24 @@ def add_video(self, video: Video) -> None:
self.run_response.videos = []
self.run_response.videos.append(video)

def add_audio(self, audio: Audio) -> None:
if self.audio is None:
self.audio = []
self.audio.append(audio)
if self.run_response is not None:
if self.run_response.audio is None:
self.run_response.audio = []
self.run_response.audio.append(audio)

def get_images(self) -> Optional[List[Image]]:
return self.images

def get_videos(self) -> Optional[List[Video]]:
return self.videos

def get_audio(self) -> Optional[List[Audio]]:
return self.audio

###########################################################################
# Default Tools
###########################################################################
Expand Down
28 changes: 18 additions & 10 deletions phi/agent/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,25 @@ class AgentSession(BaseModel):
model_config = ConfigDict(from_attributes=True)

def monitoring_data(self) -> Dict[str, Any]:
monitoring_data = self.model_dump(exclude={"memory"})
# Google Gemini adds a "parts" field to the messages, which is not serializable
# If there are runs in the memory, remove the "parts" from the messages
if self.memory is not None and "runs" in self.memory:
_runs = self.memory["runs"]
if len(_runs) > 0:
for _run in _runs:
if "messages" in _run:
for m in _run["messages"]:
if isinstance(m, dict):
m.pop("parts", None)
# If the provider is Google, remove the "parts" from the messages
if self.agent_data is not None:
if self.agent_data.get("model", {}).get("provider") == "Google" and self.memory is not None:
# Remove parts from runs' response messages
if "runs" in self.memory:
for _run in self.memory["runs"]:
if "response" in _run and "messages" in _run["response"]:
for m in _run["response"]["messages"]:
if isinstance(m, dict):
m.pop("parts", None)

# Remove parts from top-level memory messages
if "messages" in self.memory:
for m in self.memory["messages"]:
if isinstance(m, dict):
m.pop("parts", None)

monitoring_data = self.model_dump()
return monitoring_data

def telemetry_data(self) -> Dict[str, Any]:
Expand Down
Loading

0 comments on commit ff1bca0

Please sign in to comment.