Skip to content

Commit

Permalink
Merge branch 'main' into feat/image_to_image_agent
Browse files Browse the repository at this point in the history
  • Loading branch information
manthanguptaa authored Dec 30, 2024
2 parents 928ecf1 + a4cf57f commit 733ebb8
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 3 deletions.
6 changes: 6 additions & 0 deletions cookbook/providers/openai/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
*.jpg
*.png
*.mp3
*.wav
*.mp4
*.mp3
18 changes: 18 additions & 0 deletions cookbook/providers/openai/audio_input_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import base64
import requests
from phi.agent import Agent, RunResponse # noqa
from phi.model.openai import OpenAIChat

# Fetch the audio file and convert it to a base64 encoded string
url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
response = requests.get(url)
response.raise_for_status()
wav_data = response.content
encoded_string = base64.b64encode(wav_data).decode("utf-8")

# Provide the agent with the audio file and get result as text
agent = Agent(
model=OpenAIChat(id="gpt-4o-audio-preview", modalities=["text"]),
markdown=True,
)
agent.print_response("What is in this audio?", audio={"data": encoded_string, "format": "wav"})
25 changes: 25 additions & 0 deletions cookbook/providers/openai/audio_output_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import base64
import requests
from phi.agent import Agent, RunResponse # noqa
from phi.model.openai import OpenAIChat
from phi.utils.audio import write_audio_to_file

# Fetch the audio file and convert it to a base64 encoded string
url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
response = requests.get(url)
response.raise_for_status()
wav_data = response.content
encoded_string = base64.b64encode(wav_data).decode("utf-8")

# Provide the agent with the audio file and audio configuration and get result as text + audio
agent = Agent(
model=OpenAIChat(
id="gpt-4o-audio-preview", modalities=["text", "audio"], audio={"voice": "alloy", "format": "wav"}
),
markdown=True,
)
agent.print_response("What is in this audio?", audio={"data": encoded_string, "format": "wav"})

# Save the response audio to a file
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/dog.wav")
2 changes: 1 addition & 1 deletion phi/document/reader/csv_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def read(self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str
else:
logger.info(f"Reading uploaded file: {file.name}")
file.seek(0)
file_content = io.StringIO(file.read().decode("utf-8"))
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore

csv_name = Path(file.name).stem if isinstance(file, Path) else file.name.split(".")[0]
csv_content = ""
Expand Down
2 changes: 1 addition & 1 deletion phi/llm/ollama/hermes.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def response_stream(self, messages: List[Message]) -> Iterator[str]:
# logger.info(f"Ollama partial response: {response}")
# logger.info(f"Ollama partial response type: {type(response)}")
response_message: Optional[dict] = response.get("message")
response_content = response_message.get("content") if response_message else None
response_content: str = response_message.get("content", "") if response_message else ""
# logger.info(f"Ollama partial response content: {response_content}")

# Add response content to assistant message
Expand Down
2 changes: 1 addition & 1 deletion phi/llm/ollama/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def response_stream(self, messages: List[Message]) -> Iterator[str]:
# logger.info(f"Ollama partial response: {response}")
# logger.info(f"Ollama partial response type: {type(response)}")
response_message: Optional[dict] = response.get("message")
response_content = response_message.get("content") if response_message else None
response_content: str = response_message.get("content", "") if response_message else ""
# logger.info(f"Ollama partial response content: {response_content}")

# Add response content to assistant message
Expand Down

0 comments on commit 733ebb8

Please sign in to comment.