Skip to content

Commit

Permalink
Update run_response to be structured
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkbrnd committed Dec 20, 2024
1 parent 0163d64 commit 178001f
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 21 deletions.
2 changes: 1 addition & 1 deletion cookbook/agents/37_audio_input_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
)

if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/dog.wav")
write_audio_to_file(audio=agent.run_response.response_audio.base64_audio, filename="tmp/dog.wav")
4 changes: 2 additions & 2 deletions cookbook/agents/38_audio_multi_turn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

agent.run("Is a golden retriever a good family dog?")
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_1.wav")
write_audio_to_file(audio=agent.run_response.response_audio.base64_audio, filename="tmp/answer_1.wav")

agent.run("Why do you say they are loyal?")
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_2.wav")
write_audio_to_file(audio=agent.run_response.response_audio.base64_audio, filename="tmp/answer_2.wav")
2 changes: 1 addition & 1 deletion cookbook/agents/42_image_to_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@

audio_story: RunResponse = audio_agent.run(f"Narrate the story with flair: {image_story.content}")
if audio_story.response_audio is not None and "data" in audio_story.response_audio:
write_audio_to_file(audio=audio_story.response_audio["data"], filename="tmp/multimodal-agents.wav")
write_audio_to_file(audio=audio_story.response_audio.base64_audio, filename="tmp/multimodal-agents.wav")
4 changes: 2 additions & 2 deletions cookbook/workflows/startup_idea_validator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
1. Install dependencies using: `pip install openai exa_py sqlalchemy phidata`
2. Run the script using: `python cookbook/workflows/blog_post_generator.py`
1. Install dependencies using: `pip install openai googlesearch-python pycountry phidata`
2. Run the script using: `python cookbook/workflows/startup_idea_validator.py`
"""

import json
Expand Down
40 changes: 27 additions & 13 deletions phi/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from phi.document import Document
from phi.agent.session import AgentSession
from phi.model.content import Image, Video, Audio
from phi.model.content import Image, Video, Audio, ModelResponseAudio
from phi.reasoning.step import ReasoningStep, ReasoningSteps, NextAction
from phi.run.response import RunEvent, RunResponse, RunResponseExtraData
from phi.knowledge.agent import AgentKnowledge
Expand Down Expand Up @@ -1815,11 +1815,8 @@ def _run(
if model_response.audio is None:
model_response.audio = {"data": "", "transcript": ""}

model_response.audio["data"] += model_response.audio.get("data", "")
model_response.audio["transcript"] += model_response.audio.get("transcript", "")
self.run_response.response_audio = model_response_chunk.audio
self.run_response.created_at = model_response_chunk.created_at
# TODO add all to final event
model_response.audio["data"] += model_response_chunk.audio.get("data", "")
model_response.audio["transcript"] += model_response_chunk.audio.get("transcript", "")
yield self.run_response

elif model_response_chunk.event == ModelResponseEvent.tool_call_started.value:
Expand Down Expand Up @@ -1858,7 +1855,10 @@ def _run(
else:
self.run_response.content = model_response.content
if model_response.audio is not None:
self.run_response.response_audio = model_response.audio
self.run_response.response_audio = ModelResponseAudio(
base64_audio=model_response.audio.get("data"),
transcript=model_response.audio.get("transcript"),
)
self.run_response.messages = messages_for_model
self.run_response.created_at = model_response.created_at

Expand All @@ -1873,7 +1873,10 @@ def _run(
if self.stream:
self.run_response.content = model_response.content
if model_response.audio is not None:
self.run_response.response_audio = model_response.audio
self.run_response.response_audio = ModelResponseAudio(
base64_audio=model_response.audio.get("data"),
transcript=model_response.audio.get("transcript"),
)

# 6. Update Memory
if self.stream_intermediate_steps:
Expand Down Expand Up @@ -2164,14 +2167,15 @@ async def _arun(
self.run_response.content = model_response_chunk.content
self.run_response.created_at = model_response_chunk.created_at
yield self.run_response

if model_response_chunk.audio is not None:
if model_response.audio is None:
model_response.audio = {"data": "", "transcript": ""}

model_response.audio["data"] += model_response.audio.get("data", "")
model_response.audio["transcript"] += model_response.audio.get("transcript", "")
self.run_response.response_audio = model_response_chunk.audio
self.run_response.created_at = model_response_chunk.created_at
model_response.audio["data"] += model_response_chunk.audio.get("data", "")
model_response.audio["transcript"] += model_response_chunk.audio.get("transcript", "")
# self.run_response.response_audio = model_response_chunk.audio
# self.run_response.created_at = model_response_chunk.created_at
yield self.run_response
elif model_response_chunk.event == ModelResponseEvent.tool_call_started.value:
# Add tool call to the run_response
Expand Down Expand Up @@ -2208,21 +2212,31 @@ async def _arun(
self.run_response.content_type = self.response_model.__name__
else:
self.run_response.content = model_response.content
if model_response.audio is not None:
self.run_response.response_audio = ModelResponseAudio(
base64_audio=model_response.audio.get("data"),
transcript=model_response.audio.get("transcript"),
)
self.run_response.messages = messages_for_model
self.run_response.created_at = model_response.created_at

# Build a list of messages that belong to this particular run
run_messages = user_messages + messages_for_model[num_input_messages:]
if system_message is not None:
run_messages.insert(0, system_message)

# Update the run_response
self.run_response.messages = run_messages
self.run_response.metrics = self._aggregate_metrics_from_run_messages(run_messages)

# Update the run_response content if streaming as run_response will only contain the last chunk
if self.stream:
self.run_response.content = model_response.content
if model_response.audio is not None:
self.run_response.response_audio = model_response.audio
self.run_response.response_audio = ModelResponseAudio(
base64_audio=model_response.audio.get("data"),
transcript=model_response.audio.get("transcript"),
)

# 6. Update Memory
if self.stream_intermediate_steps:
Expand Down
5 changes: 5 additions & 0 deletions phi/model/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,8 @@ def validate_exclusive_audio(cls, data: Any):
if not data.get("url") and not data.get("base64_audio"):
raise ValueError("Either `url` or `base64_audio` must be provided.")
return data


class ModelResponseAudio(BaseModel):
base64_audio: str
transcript: Optional[str] = None
1 change: 1 addition & 0 deletions phi/playground/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ async def chat_response_streamer(
run_response = await agent.arun(message, images=images, stream=True, stream_intermediate_steps=True)
async for run_response_chunk in run_response:
run_response_chunk = cast(RunResponse, run_response_chunk)
# print(run_response_chunk.event, "|", run_response_chunk.content, "|", run_response_chunk.response_audio)
yield run_response_chunk.to_json()

async def process_image(file: UploadFile) -> List[Union[str, Dict]]:
Expand Down
4 changes: 2 additions & 2 deletions phi/run/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pydantic import BaseModel, ConfigDict, Field

from phi.model.content import Video, Image, Audio
from phi.model.content import Video, Image, Audio, ModelResponseAudio
from phi.reasoning.step import ReasoningStep
from phi.model.message import Message, MessageReferences

Expand Down Expand Up @@ -53,7 +53,7 @@ class RunResponse(BaseModel):
images: Optional[List[Image]] = None # Images attached to the response
videos: Optional[List[Video]] = None # Videos attached to the response
audio: Optional[List[Audio]] = None # Audio attached to the response
response_audio: Optional[Dict] = None # Model audio response
response_audio: Optional[ModelResponseAudio] = None # Model audio response
extra_data: Optional[RunResponseExtraData] = None
created_at: int = Field(default_factory=lambda: int(time()))

Expand Down

0 comments on commit 178001f

Please sign in to comment.