diff --git a/cookbook/playground/audio_conversation_agent.py b/cookbook/playground/audio_conversation_agent.py
new file mode 100644
index 000000000..ab2959db3
--- /dev/null
+++ b/cookbook/playground/audio_conversation_agent.py
@@ -0,0 +1,22 @@
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.playground import Playground, serve_playground_app
+from phi.storage.agent.sqlite import SqlAgentStorage
+
+
+audio_agent = Agent(
+    name="Audio Chat Agent",
+    model=OpenAIChat(
+        id="gpt-4o-audio-preview", modalities=["text", "audio"], audio={"voice": "alloy", "format": "pcm16"}  # Wav not supported for streaming
+    ),
+    debug_mode=True,
+    add_history_to_messages=True,
+    add_datetime_to_instructions=True,
+    storage=SqlAgentStorage(table_name="audio_agent", db_file="tmp/audio_agent.db"),
+)
+
+
+app = Playground(agents=[audio_agent]).get_app()
+
+if __name__ == "__main__":
+    serve_playground_app("audio_conversation_agent:app", reload=True)
diff --git a/phi/agent/agent.py b/phi/agent/agent.py
index eaed62116..bbac753ba 100644
--- a/phi/agent/agent.py
+++ b/phi/agent/agent.py
@@ -1811,6 +1811,17 @@ def _run(
                         self.run_response.created_at = model_response_chunk.created_at
                         yield self.run_response
 
+                    if model_response_chunk.audio is not None:
+                        if model_response.audio is None:
+                            model_response.audio = {"data": "", "transcript": ""}
+
+                        model_response.audio["data"] += model_response.audio.get("data", "")
+                        model_response.audio["transcript"] += model_response.audio.get("transcript", "")
+                        self.run_response.response_audio = model_response_chunk.audio
+                        self.run_response.created_at = model_response_chunk.created_at
+                        # TODO add all to final event
+                        yield self.run_response
+
                 elif model_response_chunk.event == ModelResponseEvent.tool_call_started.value:
                     # Add tool call to the run_response
                     tool_call_dict = model_response_chunk.tool_call
@@ -2153,6 +2164,15 @@ async def _arun(
                         self.run_response.content = model_response_chunk.content
                         self.run_response.created_at = model_response_chunk.created_at
                         yield self.run_response
+                    if model_response_chunk.audio is not None:
+                        if model_response.audio is None:
+                            model_response.audio = {"data": "", "transcript": ""}
+
+                        model_response.audio["data"] += model_response.audio.get("data", "")
+                        model_response.audio["transcript"] += model_response.audio.get("transcript", "")
+                        self.run_response.response_audio = model_response_chunk.audio
+                        self.run_response.created_at = model_response_chunk.created_at
+                        yield self.run_response
                 elif model_response_chunk.event == ModelResponseEvent.tool_call_started.value:
                     # Add tool call to the run_response
                     tool_call_dict = model_response_chunk.tool_call