diff --git a/cookbook/agents/37_audio_input_output.py b/cookbook/agents/37_audio_input_output.py index a1038b6bff..9e0907041c 100644 --- a/cookbook/agents/37_audio_input_output.py +++ b/cookbook/agents/37_audio_input_output.py @@ -23,5 +23,5 @@ audio={"data": encoded_string, "format": "wav"}, ) -if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio: +if agent.run_response.response_audio is not None: write_audio_to_file(audio=agent.run_response.response_audio.base64_audio, filename="tmp/dog.wav") diff --git a/cookbook/agents/38_audio_multi_turn.py b/cookbook/agents/38_audio_multi_turn.py index b8ad72053c..44ba5d8949 100644 --- a/cookbook/agents/38_audio_multi_turn.py +++ b/cookbook/agents/38_audio_multi_turn.py @@ -11,9 +11,9 @@ ) agent.run("Is a golden retriever a good family dog?") -if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio: +if agent.run_response.response_audio is not None: write_audio_to_file(audio=agent.run_response.response_audio.base64_audio, filename="tmp/answer_1.wav") agent.run("Why do you say they are loyal?") -if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio: +if agent.run_response.response_audio is not None: write_audio_to_file(audio=agent.run_response.response_audio.base64_audio, filename="tmp/answer_2.wav") diff --git a/cookbook/agents/42_image_to_audio.py b/cookbook/agents/42_image_to_audio.py index ba0b3400ea..745059d108 100644 --- a/cookbook/agents/42_image_to_audio.py +++ b/cookbook/agents/42_image_to_audio.py @@ -23,5 +23,5 @@ ) audio_story: RunResponse = audio_agent.run(f"Narrate the story with flair: {image_story.content}") -if audio_story.response_audio is not None and "data" in audio_story.response_audio: +if audio_story.response_audio is not None: write_audio_to_file(audio=audio_story.response_audio.base64_audio, filename="tmp/multimodal-agents.wav") diff --git a/cookbook/playground/audio_conversation_agent.py b/cookbook/playground/audio_conversation_agent.py index ab2959db3f..0c3a0246a0 100644 --- a/cookbook/playground/audio_conversation_agent.py +++ b/cookbook/playground/audio_conversation_agent.py @@ -7,7 +7,9 @@ audio_agent = Agent( name="Audio Chat Agent", model=OpenAIChat( - id="gpt-4o-audio-preview", modalities=["text", "audio"], audio={"voice": "alloy", "format": "pcm16"} # Wav not supported for streaming + id="gpt-4o-audio-preview", + modalities=["text", "audio"], + audio={"voice": "alloy", "format": "pcm16"}, # Wav not supported for streaming ), debug_mode=True, add_history_to_messages=True, diff --git a/cookbook/workflows/startup_idea_validator.py b/cookbook/workflows/startup_idea_validator.py index 52bbb4cc64..dad40afff5 100644 --- a/cookbook/workflows/startup_idea_validator.py +++ b/cookbook/workflows/startup_idea_validator.py @@ -205,7 +205,7 @@ def run(self, startup_idea: str) -> Iterator[RunResponse]: table_name="validate_startup_ideas_workflow", db_file="tmp/workflows.db", ), - debug_mode=True + debug_mode=True, ) final_report: Iterator[RunResponse] = startup_idea_validator.run(startup_idea=idea) diff --git a/phi/agent/agent.py b/phi/agent/agent.py index 5a928020d2..84d09d4f0a 100644 --- a/phi/agent/agent.py +++ b/phi/agent/agent.py @@ -1856,9 +1856,9 @@ def _run( self.run_response.content = model_response.content if model_response.audio is not None: self.run_response.response_audio = ModelResponseAudio( - base64_audio=model_response.audio.get("data"), - transcript=model_response.audio.get("transcript"), - ) + base64_audio=model_response.audio.get("data"), + transcript=model_response.audio.get("transcript"), + ) self.run_response.messages = messages_for_model self.run_response.created_at = model_response.created_at @@ -1874,9 +1874,9 @@ def _run( self.run_response.content = model_response.content if model_response.audio is not None: self.run_response.response_audio = ModelResponseAudio( - base64_audio=model_response.audio.get("data"), - transcript=model_response.audio.get("transcript"), - ) + base64_audio=model_response.audio.get("data"), + transcript=model_response.audio.get("transcript"), + ) # 6. Update Memory if self.stream_intermediate_steps: @@ -2234,9 +2234,9 @@ async def _arun( self.run_response.content = model_response.content if model_response.audio is not None: self.run_response.response_audio = ModelResponseAudio( - base64_audio=model_response.audio.get("data"), - transcript=model_response.audio.get("transcript"), - ) + base64_audio=model_response.audio.get("data"), + transcript=model_response.audio.get("transcript"), + ) # 6. Update Memory if self.stream_intermediate_steps: diff --git a/phi/playground/router.py b/phi/playground/router.py index 211115279c..bcfb9584db 100644 --- a/phi/playground/router.py +++ b/phi/playground/router.py @@ -398,7 +398,13 @@ async def chat_response_streamer( ) -> AsyncGenerator: run_response = await agent.arun(message, images=images, stream=True, stream_intermediate_steps=True) async for run_response_chunk in run_response: - print(run_response_chunk.event, "|", run_response_chunk.content, "|", run_response_chunk.response_audio.base64_audio[:10] if run_response_chunk.response_audio else "-") + print( + run_response_chunk.event, + "|", + run_response_chunk.content, + "|", + run_response_chunk.response_audio.base64_audio[:10] if run_response_chunk.response_audio else "-", + ) run_response_chunk = cast(RunResponse, run_response_chunk) yield run_response_chunk.to_json() diff --git a/phi/tools/eleven_labs_tools.py b/phi/tools/eleven_labs_tools.py index 109b6809f6..2ce37030fb 100644 --- a/phi/tools/eleven_labs_tools.py +++ b/phi/tools/eleven_labs_tools.py @@ -1,4 +1,3 @@ - from base64 import b64encode from io import BytesIO from pathlib import Path @@ -63,14 +62,14 @@ def __init__( def get_voices(self) -> str: """ -<<<<<<< HEAD - Use this function to generate sound effect audio from a text prompt. -======= - Use this function to get all the voices available. ->>>>>>> 48addb496442892c21382ff27d03578b3f9d7ac6 - - Returns: - result (list): A list of voices that have an ID, name and description. + <<<<<<< HEAD + Use this function to generate sound effect audio from a text prompt. + ======= + Use this function to get all the voices available. + >>>>>>> 48addb496442892c21382ff27d03578b3f9d7ac6 + + Returns: + result (list): A list of voices that have an ID, name and description. """ try: voices = self.eleven_labs_client.voices.get_all() @@ -157,17 +156,17 @@ def generate_sound_effect(self, agent: Agent, prompt: str, duration_seconds: Opt def text_to_speech(self, agent: Agent, prompt: str, voice_id: Optional[str] = None) -> str: """ - Use this function to convert text to speech audio. - - Args: - prompt (str): Text to generate audio from. -<<<<<<< HEAD - voice_id (str): The ID of the voice to use for audio generation. -======= - voice_id (Optional[str]): The ID of the voice to use for audio generation. Uses default if none is specified. ->>>>>>> 48addb496442892c21382ff27d03578b3f9d7ac6 - Returns: - str: Return the path to the generated audio file. + Use this function to convert text to speech audio. + + Args: + prompt (str): Text to generate audio from. + <<<<<<< HEAD + voice_id (str): The ID of the voice to use for audio generation. + ======= + voice_id (Optional[str]): The ID of the voice to use for audio generation. Uses default if none is specified. + >>>>>>> 48addb496442892c21382ff27d03578b3f9d7ac6 + Returns: + str: Return the path to the generated audio file. """ try: audio_generator = self.eleven_labs_client.text_to_speech.convert(