From ee44116c42fd63349adb3d818cf500e03c74baae Mon Sep 17 00:00:00 2001 From: Ben Cherry Date: Fri, 24 Oct 2025 22:00:31 -0700 Subject: [PATCH 1/3] use sonic-3 --- src/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent.py b/src/agent.py index b846dd7..eed1c45 100644 --- a/src/agent.py +++ b/src/agent.py @@ -68,7 +68,7 @@ async def entrypoint(ctx: JobContext): llm="openai/gpt-4.1-mini", # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/ - tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc", + tts="cartesia/sonic-3:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc", # VAD and turn detection are used to determine when the user is speaking and when the agent should respond # See more at https://docs.livekit.io/agents/build/turns turn_detection=MultilingualModel(), From d2f1e50a5cb46e68becd0053043e9840d808c94e Mon Sep 17 00:00:00 2001 From: Ben Cherry Date: Tue, 28 Oct 2025 14:11:31 -0700 Subject: [PATCH 2/3] fix --- src/agent.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/agent.py b/src/agent.py index eed1c45..d4e717f 100644 --- a/src/agent.py +++ b/src/agent.py @@ -10,6 +10,7 @@ RoomInputOptions, WorkerOptions, cli, + inference, metrics, ) from livekit.plugins import noise_cancellation, silero @@ -28,7 +29,7 @@ def __init__(self) -> None: Your responses are concise, to the point, and without any complex formatting or punctuation including emojis, asterisks, or other symbols. You are curious, friendly, and have a sense of humor.""", ) - + # To add tools, use the @function_tool decorator. # Here's an example that adds a simple weather tool. # You also have to add `from livekit.agents import function_tool, RunContext` to the top of this file @@ -62,13 +63,15 @@ async def entrypoint(ctx: JobContext): session = AgentSession( # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand # See all available models at https://docs.livekit.io/agents/models/stt/ - stt="assemblyai/universal-streaming:en", + stt=inference.STT(model="assemblyai/universal-streaming", language="en"), # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response # See all available models at https://docs.livekit.io/agents/models/llm/ - llm="openai/gpt-4.1-mini", + llm=inference.LLM(model="openai/gpt-4.1-mini"), # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/ - tts="cartesia/sonic-3:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc", + tts=inference.TTS( + model="cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc" + ), # VAD and turn detection are used to determine when the user is speaking and when the agent should respond # See more at https://docs.livekit.io/agents/build/turns turn_detection=MultilingualModel(), From d80ee18d2fa1a5f57fb03486a02b10a3ad9b83c4 Mon Sep 17 00:00:00 2001 From: Ben Cherry Date: Tue, 28 Oct 2025 14:11:55 -0700 Subject: [PATCH 3/3] fmt --- src/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent.py b/src/agent.py index d4e717f..d85c794 100644 --- a/src/agent.py +++ b/src/agent.py @@ -29,7 +29,7 @@ def __init__(self) -> None: Your responses are concise, to the point, and without any complex formatting or punctuation including emojis, asterisks, or other symbols. You are curious, friendly, and have a sense of humor.""", ) - + # To add tools, use the @function_tool decorator. # Here's an example that adds a simple weather tool. # You also have to add `from livekit.agents import function_tool, RunContext` to the top of this file