-
Notifications
You must be signed in to change notification settings - Fork 493
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
adds twilio dtmf action #623
Changes from 7 commits
7d320cc
0336b7c
9e09592
6d731d5
8adbef0
a9b52cc
766259d
6a39c4b
19e191b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
from typing import Type | ||
from typing import List, Type | ||
|
||
from loguru import logger | ||
from pydantic.v1 import BaseModel, Field | ||
|
@@ -9,6 +9,7 @@ | |
) | ||
from vocode.streaming.models.actions import ActionConfig as VocodeActionConfig | ||
from vocode.streaming.models.actions import ActionInput, ActionOutput | ||
from vocode.streaming.utils.dtmf_utils import DTMFToneGenerator, KeypadEntry | ||
from vocode.streaming.utils.state_manager import ( | ||
TwilioPhoneConversationStateManager, | ||
VonagePhoneConversationStateManager, | ||
|
@@ -76,8 +77,20 @@ def __init__(self, action_config: DTMFVocodeActionConfig): | |
) | ||
|
||
async def run(self, action_input: ActionInput[DTMFParameters]) -> ActionOutput[DTMFResponse]: | ||
logger.error("DTMF not yet supported with Twilio") | ||
buttons = action_input.params.buttons | ||
keypad_entries: List[KeypadEntry] | ||
try: | ||
keypad_entries = [KeypadEntry(button) for button in buttons] | ||
except ValueError: | ||
logger.warning(f"Invalid DTMF buttons: {buttons}") | ||
return ActionOutput( | ||
action_type=action_input.action_config.type, | ||
response=DTMFResponse(success=False), | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have we tested these outputs are properly sent to the LLM? When I look at action_result_to_string() for DTMF it only has a successful message. We should update it to say "invalid dtmf buttons" in this case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated + added test |
||
self.conversation_state_manager._twilio_phone_conversation.output_device.send_dtmf_tones( | ||
keypad_entries=keypad_entries | ||
) | ||
return ActionOutput( | ||
action_type=action_input.action_config.type, | ||
response=DTMFResponse(success=False), | ||
response=DTMFResponse(success=True), | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
from __future__ import annotations | ||
|
||
import asyncio | ||
import audioop | ||
import base64 | ||
import json | ||
import uuid | ||
from typing import Optional, Union | ||
from typing import List, Optional, Union | ||
|
||
from fastapi import WebSocket | ||
from fastapi.websockets import WebSocketState | ||
|
@@ -15,6 +15,7 @@ | |
from vocode.streaming.output_device.audio_chunk import AudioChunk, ChunkState | ||
from vocode.streaming.telephony.constants import DEFAULT_AUDIO_ENCODING, DEFAULT_SAMPLING_RATE | ||
from vocode.streaming.utils.create_task import asyncio_create_task | ||
from vocode.streaming.utils.dtmf_utils import DTMFToneGenerator, KeypadEntry | ||
from vocode.streaming.utils.worker import InterruptibleEvent | ||
|
||
|
||
|
@@ -55,6 +56,20 @@ def interrupt(self): | |
def enqueue_mark_message(self, mark_message: MarkMessage): | ||
self._mark_message_queue.put_nowait(mark_message) | ||
|
||
def send_dtmf_tones(self, keypad_entries: List[KeypadEntry]): | ||
tone_generator = DTMFToneGenerator() | ||
for keypad_entry in keypad_entries: | ||
logger.info(f"Sending DTMF tone {keypad_entry.value}") | ||
dtmf_tone = tone_generator.generate( | ||
keypad_entry, sampling_rate=self.sampling_rate, audio_encoding=self.audio_encoding | ||
) | ||
dtmf_message = { | ||
"event": "media", | ||
"streamSid": self.stream_sid, | ||
"media": {"payload": base64.b64encode(dtmf_tone).decode("utf-8")}, | ||
} | ||
self._twilio_events_queue.put_nowait(json.dumps(dtmf_message)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can these get interrupted? And if so is a mark necessary as well, so using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these cannot get interrupted (and thus mirrors the vonage implementation). if we send an event to |
||
|
||
async def _send_twilio_messages(self): | ||
while True: | ||
try: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import audioop | ||
from enum import Enum | ||
from typing import Dict, Tuple | ||
|
||
import numpy as np | ||
|
||
from vocode.streaming.models.audio import AudioEncoding | ||
from vocode.streaming.utils.singleton import Singleton | ||
|
||
DEFAULT_DTMF_TONE_LENGTH_SECONDS = 0.3 | ||
MAX_INT = 32767 | ||
|
||
|
||
class KeypadEntry(str, Enum): | ||
ONE = "1" | ||
TWO = "2" | ||
THREE = "3" | ||
FOUR = "4" | ||
FIVE = "5" | ||
SIX = "6" | ||
SEVEN = "7" | ||
EIGHT = "8" | ||
NINE = "9" | ||
ZERO = "0" | ||
|
||
|
||
DTMF_FREQUENCIES = { | ||
KeypadEntry.ONE: (697, 1209), | ||
KeypadEntry.TWO: (697, 1336), | ||
KeypadEntry.THREE: (697, 1477), | ||
KeypadEntry.FOUR: (770, 1209), | ||
KeypadEntry.FIVE: (770, 1336), | ||
KeypadEntry.SIX: (770, 1477), | ||
KeypadEntry.SEVEN: (852, 1209), | ||
KeypadEntry.EIGHT: (852, 1336), | ||
KeypadEntry.NINE: (852, 1477), | ||
KeypadEntry.ZERO: (941, 1336), | ||
} | ||
|
||
|
||
class DTMFToneGenerator(Singleton): | ||
|
||
def __init__(self): | ||
self.tone_cache: Dict[Tuple[KeypadEntry, int, AudioEncoding], bytes] = {} | ||
|
||
def generate( | ||
self, | ||
keypad_entry: KeypadEntry, | ||
sampling_rate: int, | ||
audio_encoding: AudioEncoding, | ||
duration_seconds: float = DEFAULT_DTMF_TONE_LENGTH_SECONDS, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see any use of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not necessary to make this work on Twilio per se - i'd rather keep it in the case it ever becomes useful. theoretically the lower you go, the faster the action can run |
||
) -> bytes: | ||
if (keypad_entry, sampling_rate, audio_encoding) in self.tone_cache: | ||
return self.tone_cache[(keypad_entry, sampling_rate, audio_encoding)] | ||
f1, f2 = DTMF_FREQUENCIES[keypad_entry] | ||
t = np.linspace(0, duration_seconds, int(sampling_rate * duration_seconds), endpoint=False) | ||
tone = np.sin(2 * np.pi * f1 * t) + np.sin(2 * np.pi * f2 * t) | ||
tone = tone / np.max(np.abs(tone)) # Normalize to [-1, 1] | ||
pcm = (tone * MAX_INT).astype(np.int16).tobytes() | ||
if audio_encoding == AudioEncoding.MULAW: | ||
output = audioop.lin2ulaw(pcm, 2) | ||
else: | ||
output = pcm | ||
self.tone_cache[(keypad_entry, sampling_rate, audio_encoding)] = output | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the tone cache necessary? Like this cache is only useful if the agent decides to press multiple tones in a single turn and there are repeated numbers There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's a singleton, so it would get cached for the course of a program (e.g. for the time for a FastAPI server to remain up) |
||
return output |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any situation where the queue could remain empty and this causes an infinite loop?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good point, updated the while loop