Gemini multimodal update

phidatahq · Dec 12, 2024 · a1e9065 · a1e9065
1 parent ab3ead2
commit a1e9065
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 3 deletions.
diff --git a/cookbook/providers/google/audio_agent.py b/cookbook/providers/google/audio_agent.py
@@ -1,14 +1,19 @@
 from phi.agent import Agent
 from phi.model.google import Gemini
+from google.generativeai import upload_file
 
 agent = Agent(
     model=Gemini(id="gemini-2.0-flash-exp"),
     markdown=True,
 )
 
+# Please upload the audio file using
+audio_file = upload_file("sample_audio.mp3")
+print(f"Uploaded audio: {audio_file}")
+
 # Please download a sample audio file to test this Agent
 agent.print_response(
     "Tell me about this audio",
-    audio={"data": "cookbook/providers/google/sample_audio.mp3"},
+    audio=[audio_file],
     stream=True,
 )
diff --git a/cookbook/providers/google/image_agent_file_upload.py b/cookbook/providers/google/image_agent_file_upload.py
@@ -3,7 +3,6 @@
 from phi.agent import Agent
 from phi.model.google import Gemini
 from phi.tools.duckduckgo import DuckDuckGo
-
 from google.generativeai import upload_file
 
 agent = Agent(

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "phidata"
-version = "2.7.1"
+version = "2.7.2"
 description = "Build multi-modal Agents with memory, knowledge and tools."
 requires-python = ">=3.7"
 readme = "README.md"