Merge branch 'main' into example/html5-game

phidatahq · Dec 20, 2024 · 7d03d8a · 7d03d8a
2 parents 19aff9a + 3a7077b
commit 7d03d8a
Show file tree

Hide file tree

Showing 42 changed files with 1,278 additions and 87 deletions.
diff --git a/.gitignore b/.gitignore
@@ -48,4 +48,6 @@ data.db
 
 .ipynb_checkpoints
 
+audio_generations
+
 *.db
diff --git a/cookbook/agents/37_audio_input_output.py b/cookbook/agents/37_audio_input_output.py
@@ -2,6 +2,7 @@
 import requests
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
+from phi.utils.audio import write_audio_to_file
 
 # Fetch the audio file and convert it to a base64 encoded string
 url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
@@ -22,7 +23,5 @@
     audio={"data": encoded_string, "format": "wav"},
 )
 
-if agent.run_response.audio is not None and "data" in agent.run_response.audio:
-    wav_bytes = base64.b64decode(agent.run_response.audio["data"])
-    with open("dog.wav", "wb") as f:
-        f.write(wav_bytes)
+if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
+    write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/dog.wav")
diff --git a/cookbook/agents/38_audio_multi_turn.py b/cookbook/agents/38_audio_multi_turn.py
@@ -1,22 +1,19 @@
-import base64
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
+from phi.utils.audio import write_audio_to_file
 
 agent = Agent(
     model=OpenAIChat(
         id="gpt-4o-audio-preview", modalities=["text", "audio"], audio={"voice": "alloy", "format": "wav"}
     ),
+    debug_mode=True,
     add_history_to_messages=True,
 )
 
 agent.run("Is a golden retriever a good family dog?")
-if agent.run_response.audio is not None and "data" in agent.run_response.audio:
-    wav_bytes = base64.b64decode(agent.run_response.audio["data"])
-    with open("tmp/answer_1.wav", "wb") as f:
-        f.write(wav_bytes)
+if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
+    write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_1.wav")
 
 agent.run("Why do you say they are loyal?")
-if agent.run_response.audio is not None and "data" in agent.run_response.audio:
-    wav_bytes = base64.b64decode(agent.run_response.audio["data"])
-    with open("tmp/answer_2.wav", "wb") as f:
-        f.write(wav_bytes)
+if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
+    write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_2.wav")
diff --git a/cookbook/agents/42_image_to_audio.py b/cookbook/agents/42_image_to_audio.py
@@ -1,10 +1,10 @@
-import base64
 from pathlib import Path
 from rich import print
 from rich.text import Text
 
 from phi.agent import Agent, RunResponse
 from phi.model.openai import OpenAIChat
+from phi.utils.audio import write_audio_to_file
 
 cwd = Path(__file__).parent.resolve()
 
@@ -23,7 +23,5 @@
 )
 
 audio_story: RunResponse = audio_agent.run(f"Narrate the story with flair: {image_story.content}")
-if audio_story.audio is not None and "data" in audio_story.audio:
-    wav_bytes = base64.b64decode(audio_story.audio["data"])
-    with open(cwd.joinpath("tmp/multimodal-agents.wav"), "wb") as f:
-        f.write(wav_bytes)
+if audio_story.response_audio is not None and "data" in audio_story.response_audio:
+    write_audio_to_file(audio=audio_story.response_audio["data"], filename="tmp/multimodal-agents.wav")
diff --git a/cookbook/embedders/cohere_embedder.py b/cookbook/embedders/cohere_embedder.py
@@ -0,0 +1,18 @@
+from phi.agent import AgentKnowledge
+from phi.vectordb.pgvector import PgVector
+from phi.embedder.cohere import CohereEmbedder
+
+embeddings = CohereEmbedder().get_embedding("The quick brown fox jumps over the lazy dog.")
+# Print the embeddings and their dimensions
+print(f"Embeddings: {embeddings[:5]}")
+print(f"Dimensions: {len(embeddings)}")
+
+# Example usage:
+knowledge_base = AgentKnowledge(
+    vector_db=PgVector(
+        db_url="postgresql+psycopg://ai:ai@localhost:5532/ai",
+        table_name="cohere_embeddings",
+        embedder=CohereEmbedder(),
+    ),
+    num_documents=2,
+)
diff --git a/cookbook/examples/agents/study_partner.py → cookbook/examples/agents/04_study_partner.py b/cookbook/examples/agents/study_partner.py → cookbook/examples/agents/04_study_partner.py
diff --git a/cookbook/examples/agents/05_shopping_partner.py b/cookbook/examples/agents/05_shopping_partner.py
@@ -0,0 +1,21 @@
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.firecrawl import FirecrawlTools
+
+agent = Agent(
+    name="shopping partner",
+    model=OpenAIChat(id="gpt-4o"),
+    instructions=[
+        "You are a product recommender agent specializing in finding products that match user preferences.",
+        "Prioritize finding products that satisfy as many user requirements as possible, but ensure a minimum match of 50%.",
+        "Search for products only from authentic and trusted e-commerce websites such as Amazon, Flipkart, Myntra, Meesho, Google Shopping, Nike, and other reputable platforms.",
+        "Verify that each product recommendation is in stock and available for purchase.",
+        "Avoid suggesting counterfeit or unverified products.",
+        "Clearly mention the key attributes of each product (e.g., price, brand, features) in the response.",
+        "Format the recommendations neatly and ensure clarity for ease of user understanding.",
+    ],
+    tools=[FirecrawlTools()],
+)
+agent.print_response(
+    "I am looking for running shoes with the following preferences: Color: Black Purpose: Comfortable for long-distance running Budget: Under Rs. 10,000"
+)
diff --git a/cookbook/examples/agents/06_book_recommendation.py b/cookbook/examples/agents/06_book_recommendation.py
@@ -0,0 +1,24 @@
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.exa import ExaTools
+
+agent = Agent(
+    description="you help user with book recommendations",
+    name="Shelfie",
+    model=OpenAIChat(id="gpt-4o"),
+    instructions=[
+        "You are a highly knowledgeable book recommendation agent.",
+        "Your goal is to help the user discover books based on their preferences, reading history, and interests.",
+        "If the user mentions a specific genre, suggest books that span both classics and modern hits.",
+        "When the user mentions an author, recommend similar authors or series they may enjoy.",
+        "Highlight notable accomplishments of the book, such as awards, best-seller status, or critical acclaim.",
+        "Provide a short summary or teaser for each book recommended.",
+        "Offer up to 5 book recommendations for each request, ensuring they are diverse and relevant.",
+        "Leverage online resources like Goodreads, StoryGraph, and LibraryThing for accurate and varied suggestions.",
+        "Focus on being concise, relevant, and thoughtful in your recommendations.",
+    ],
+    tools=[ExaTools()],
+)
+agent.print_response(
+    "I really found anxious people and lessons in chemistry interesting, can you suggest me more such books"
+)
diff --git a/cookbook/examples/agents/07_weekend_planner.py b/cookbook/examples/agents/07_weekend_planner.py
@@ -0,0 +1,24 @@
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.exa import ExaTools
+
+agent = Agent(
+    description="you help the user plan their weekends",
+    name="TimeOut",
+    model=OpenAIChat(id="gpt-4o"),
+    instructions=[
+        "You are a weekend planning assistant that helps users create a personalized weekend itinerary.",
+        "Always mention the timeframe, location, and year provided by the user (e.g., '16–17 December 2023 in Bangalore'). Recommendations should align with the specified dates.",
+        "Provide responses in these sections: Events, Activities, Dining Options.",
+        "- **Events**: Include name, date, time, location, a brief description, and booking links from platforms like BookMyShow or Insider.in.",
+        "- **Activities**: Suggest engaging options with estimated time required, location, and additional tips (e.g., best time to visit).",
+        "- **Dining Options**: Recommend restaurants or cafés with cuisine highlights and links to platforms like Zomato or Google Maps.",
+        "Ensure all recommendations are for the current or future dates relevant to the query. Avoid past events.",
+        "If no specific data is available for the dates, suggest general activities or evergreen attractions in the city.",
+        "Keep responses concise, clear, and formatted for easy reading.",
+    ],
+    tools=[ExaTools()],
+)
+agent.print_response(
+    "I want to plan my coming weekend filled with fun activities and christmas themed activities in Bangalore for 21 and 22 Dec 2024."
+)
diff --git a/cookbook/playground/gemini_agents.py b/cookbook/playground/gemini_agents.py
@@ -0,0 +1,16 @@
+from phi.agent import Agent
+from phi.tools.yfinance import YFinanceTools
+from phi.playground import Playground, serve_playground_app
+from phi.model.google import Gemini
+
+finance_agent = Agent(
+    name="Finance Agent",
+    model=Gemini(id="gemini-2.0-flash-exp"),
+    tools=[YFinanceTools(stock_price=True)],
+    debug_mode=True,
+)
+
+app = Playground(agents=[finance_agent]).get_app(use_async=False)
+
+if __name__ == "__main__":
+    serve_playground_app("gemini_agents:app", reload=True)
diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py
@@ -9,6 +9,7 @@
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
 from phi.tools.dalle import Dalle
+from phi.tools.eleven_labs_tools import ElevenLabsTools
 from phi.tools.giphy import GiphyTools
 from phi.tools.models_labs import ModelsLabs
 from phi.model.response import FileType
@@ -88,6 +89,7 @@
 
 gif_agent = Agent(
     name="Gif Generator Agent",
+    agent_id="gif_agent",
     model=OpenAIChat(id="gpt-4o"),
     tools=[GiphyTools()],
     description="You are an AI agent that can generate gifs using Giphy.",
@@ -102,8 +104,34 @@
     storage=SqlAgentStorage(table_name="gif_agent", db_file=image_agent_storage_file),
 )
 
+audio_agent = Agent(
+    name="Audio Generator Agent",
+    agent_id="audio_agent",
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[
+        ElevenLabsTools(
+            voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2", target_directory="audio_generations"
+        )
+    ],
+    description="You are an AI agent that can generate audio using the ElevenLabs API.",
+    instructions=[
+        "When the user asks you to generate audio, use the `text_to_speech` tool to generate the audio.",
+        "You'll generate the appropriate prompt to send to the tool to generate audio.",
+        "You don't need to find the appropriate voice first, I already specified the voice to user."
+        "Don't return file name or file url in your response or markdown just tell the audio was created successfully.",
+        "The audio should be long and detailed.",
+    ],
+    markdown=True,
+    debug_mode=True,
+    add_history_to_messages=True,
+    add_datetime_to_instructions=True,
+    storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
+)
 
-app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent]).get_app(use_async=False)
+
+app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent]).get_app(
+    use_async=False
+)
 
 if __name__ == "__main__":
     serve_playground_app("multimodal_agent:app", reload=True)
diff --git a/cookbook/providers/google/flash_thinking.py b/cookbook/providers/google/flash_thinking.py
@@ -0,0 +1,12 @@
+from phi.agent import Agent
+from phi.model.google import Gemini
+
+task = (
+    "Three missionaries and three cannibals need to cross a river. "
+    "They have a boat that can carry up to two people at a time. "
+    "If, at any time, the cannibals outnumber the missionaries on either side of the river, the cannibals will eat the missionaries. "
+    "How can all six people get across the river safely? Provide a step-by-step solution and show the solutions as an ascii diagram"
+)
+
+agent = Agent(model=Gemini(id="gemini-2.0-flash-thinking-exp-1219"), markdown=True)
+agent.print_response(task, stream=True)
diff --git a/cookbook/providers/ollama/agent_stream.py b/cookbook/providers/ollama/agent_stream.py
@@ -3,6 +3,7 @@
 from typing import Iterator  # noqa
 from phi.agent import Agent, RunResponse  # noqa
 from phi.model.ollama import Ollama
+from phi.tools.crawl4ai_tools import Crawl4aiTools
 from phi.tools.yfinance import YFinanceTools
 
 agent = Agent(
@@ -20,3 +21,10 @@
 
 # Print the response in the terminal
 agent.print_response("What are analyst recommendations for NVDA and TSLA", stream=True)
+
+
+agent = Agent(model=Ollama(id="llama3.1:8b"), tools=[Crawl4aiTools(max_length=1000)], show_tool_calls=True)
+agent.print_response(
+    "Summarize me the key points in bullet points of this: https://blog.google/products/gemini/google-gemini-deep-research/",
+    stream=True,
+)
diff --git a/cookbook/storage/json_storage.py b/cookbook/storage/json_storage.py
@@ -0,0 +1,13 @@
+"""Run `pip install duckduckgo-search openai` to install dependencies."""
+
+from phi.agent import Agent
+from phi.tools.duckduckgo import DuckDuckGo
+from phi.storage.agent.json import JsonFileAgentStorage
+
+agent = Agent(
+    storage=JsonFileAgentStorage(dir_path="tmp/agent_sessions_json"),
+    tools=[DuckDuckGo()],
+    add_history_to_messages=True,
+)
+agent.print_response("How many people live in Canada?")
+agent.print_response("What is their national anthem called?")
diff --git a/cookbook/storage/yaml_storage.py b/cookbook/storage/yaml_storage.py
@@ -0,0 +1,13 @@
+"""Run `pip install duckduckgo-search openai` to install dependencies."""
+
+from phi.agent import Agent
+from phi.tools.duckduckgo import DuckDuckGo
+from phi.storage.agent.yaml import YamlFileAgentStorage
+
+agent = Agent(
+    storage=YamlFileAgentStorage(dir_path="tmp/agent_sessions_yaml"),
+    tools=[DuckDuckGo()],
+    add_history_to_messages=True,
+)
+agent.print_response("How many people live in Canada?")
+agent.print_response("What is their national anthem called?")
diff --git a/cookbook/tools/confluence_tools.py b/cookbook/tools/confluence_tools.py
@@ -0,0 +1,22 @@
+from phi.agent import Agent
+from phi.tools.confluence import ConfluenceTools
+
+
+agent = Agent(
+    name="Confluence agent",
+    tools=[ConfluenceTools()],
+    show_tool_calls=True,
+    markdown=True,
+)
+
+## getting space details
+agent.print_response("How many spaces are there and what are their names?")
+
+## getting page_content
+agent.print_response("What is the content present in page 'Large language model in LLM space'")
+
+## getting page details in a particular space
+agent.print_response("Can you extract all the page names from 'LLM' space")
+
+## creating a new page in a space
+agent.print_response("Can you create a new page named 'TESTING' in 'LLM' space")
diff --git a/cookbook/tools/elevenlabs_tools.py b/cookbook/tools/elevenlabs_tools.py
@@ -0,0 +1,32 @@
+"""
+pip install elevenlabs
+"""
+
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.eleven_labs_tools import ElevenLabsTools
+
+audio_agent = Agent(
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[
+        ElevenLabsTools(
+            voice_id="21m00Tcm4TlvDq8ikWAM", model_id="eleven_multilingual_v2", target_directory="audio_generations"
+        )
+    ],
+    description="You are an AI agent that can generate audio using the ElevenLabs API.",
+    instructions=[
+        "When the user asks you to generate audio, use the `generate_audio` tool to generate the audio.",
+        "You'll generate the appropriate prompt to send to the tool to generate audio.",
+        "You don't need to find the appropriate voice first, I already specified the voice to user."
+        "Return the audio file name in your response. Don't convert it to markdown.",
+        "The audio should be long and detailed.",
+    ],
+    markdown=True,
+    debug_mode=True,
+    show_tool_calls=True,
+)
+
+audio_agent.print_response("Generate a very long audio of history of french revolution")
+
+
+audio_agent.print_response("Generate a kick sound effect")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -48,4 +48,6 @@ data.db

		.ipynb_checkpoints

		audio_generations

		*.db