Skip to content

Commit

Permalink
fix: get voices
Browse files Browse the repository at this point in the history
  • Loading branch information
anuragts committed Dec 19, 2024
1 parent c88f629 commit 829394a
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 18 deletions.
2 changes: 1 addition & 1 deletion cookbook/agents/47_desi_vocal_audio_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
show_tool_calls=True,
)

audio_agent.print_response("Generate a very small audio of history of french revolution")
audio_agent.print_response("Generate a very small audio of history of french revolution")
28 changes: 25 additions & 3 deletions cookbook/playground/multimodal_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from phi.playground import Playground, serve_playground_app
from phi.storage.agent.sqlite import SqlAgentStorage
from phi.tools.fal_tools import FalTools
from phi.tools.desi_vocal_tools import DesiVocalTools

image_agent_storage_file: str = "tmp/image_agent.db"

Expand Down Expand Up @@ -128,10 +129,31 @@
storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
)


app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent]).get_app(
use_async=False
hindi_audio_agent = Agent(
name="Hindi Audio Generator Agent",
agent_id="hindi_audio_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[DesiVocalTools()],
description="You are an AI agent that can generate audio using the DesiVocal API.",
instructions=[
"When the user asks you to generate audio, use the `text_to_speech` tool to generate the audio."
"Send the prompt in hindi language.",
"You'll generate the appropriate prompt to send to the tool to generate audio.",
"You don't need to find the appropriate voice first, I already specified the voice to user."
"Don't return file name or file url in your response or markdown just tell the audio was created successfully.",
"The audio should be short.",
],
markdown=True,
debug_mode=True,
add_history_to_messages=True,
add_datetime_to_instructions=True,
storage=SqlAgentStorage(table_name="hindi_audio_agent", db_file=image_agent_storage_file),
)


app = Playground(
agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent, hindi_audio_agent]
).get_app(use_async=False)

if __name__ == "__main__":
serve_playground_app("multimodal_agent:app", reload=True)
55 changes: 41 additions & 14 deletions phi/tools/desi_vocal_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,51 @@ def __init__(
self.register(self.get_voices)
self.register(self.text_to_speech)

# def get_voices(self) -> str:
# try:
# url = "https://prod-api2.desivocal.com/dv/api/v0/tts_api/voices"
# response = requests.get(url)
# for voice in response.json():

# return str(response.text)
# except Exception as e:
# logger.error(f"Failed to get voices: {e}")
# return f"Error: {e}"

def text_to_speech(self, agent: Agent, text: str) -> str:
def get_voices(self) -> str:
"""
Use this function to get all the voices available.
Returns:
result (list): A list of voices that have an ID, name and description.
"""
try:
url = "https://prod-api2.desivocal.com/dv/api/v0/tts_api/voices"
response = requests.get(url)
voices_data = response.json()

response = []
for voice_id, voice_info in voices_data.items():
response.append(
{
"id": voice_id,
"name": voice_info["name"],
"description": f"Gender: {voice_info['audio_gender']}, Type: {voice_info['voice_type']}, Languages: {', '.join(voice_info['languages'])}",
"preview_url": next(iter(voice_info["preview_path"].values()))
if voice_info["preview_path"]
else None,
}
)

return str(response)
except Exception as e:
logger.error(f"Failed to get voices: {e}")
return f"Error: {e}"

def text_to_speech(self, agent: Agent, prompt: str, voice_id: Optional[str] = None) -> str:
"""
Use this function to generate audio from text.
Args:
prompt (str): The text to generate audio from.
Returns:
result (str): The URL of the generated audio.
"""
try:
url = "https://prod-api2.desivocal.com/dv/api/v0/tts_api/generate"

payload = {
"text": text,
"voice_id": self.voice_id,
"text": prompt,
"voice_id": voice_id or self.voice_id,
}

headers = {
Expand Down

0 comments on commit 829394a

Please sign in to comment.