Skip to content

Commit

Permalink
TTS Voices (#365)
Browse files Browse the repository at this point in the history
  • Loading branch information
dwilkie committed Oct 20, 2023
1 parent eed27ce commit 24820a1
Show file tree
Hide file tree
Showing 39 changed files with 395 additions and 185 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/switch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Ruby
uses: ruby/setup-ruby@v1

- name: Configure AWS credentials
id: aws-login
uses: aws-actions/configure-aws-credentials@v4
Expand Down Expand Up @@ -116,7 +119,7 @@ jobs:

- name: Export Polly Voices
run: |
components/freeswitch/bin/export_aws_polly_voices components/freeswitch/conf/autoload_configs/polly_voices.xml
components/freeswitch/bin/export_tts_voices > components/freeswitch/conf/autoload_configs/tts_voices.xml
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
Expand Down
3 changes: 3 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ require:
- rubocop-performance
- rubocop-rspec

AllCops:
NewCops: enable

Style/FrozenStringLiteralComment:
Enabled: false

Expand Down
1 change: 1 addition & 0 deletions .tool-versions
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
terraform 1.5.6
ruby 3.2.2
4 changes: 4 additions & 0 deletions components/app/Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ source "https://rubygems.org"

gem "adhearsion", github: "somleng/adhearsion"
gem "aws-sdk-lambda"
gem "aws-sdk-polly"
gem "blather", github: "adhearsion/blather", branch: "develop"
gem "faraday"
gem "http"
gem "okcomputer"
Expand All @@ -11,6 +13,8 @@ gem "sentry-ruby"
gem "sinatra"
gem "sinatra-contrib", require: false
gem "skylight"
gem "sucker_punch"
gem "tts_voices", github: "somleng/tts_voices"

group :development, :test do
gem "rubocop"
Expand Down
47 changes: 38 additions & 9 deletions components/app/Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
GIT
remote: https://github.com/adhearsion/blather.git
revision: 03f9cda14ed3f93a80504c2538cd514d6b025d04
branch: develop
specs:
blather (2.0.0)
activesupport (>= 2.3.11)
eventmachine (~> 1.2, >= 1.2.6)
niceogiri (~> 1.0)
nokogiri (~> 1.8, >= 1.8.3)

GIT
remote: https://github.com/samnang/reel-rack.git
revision: 615a9c9219aa00675a4290c4f7245684c701ea0e
Expand Down Expand Up @@ -36,20 +47,32 @@ GIT
thor
virtus (~> 1.0)

GIT
remote: https://github.com/somleng/tts_voices.git
revision: afb3b77d74a0bea12b1c8eaa1303e20076a43912
specs:
tts_voices (0.1.0)
aws-sdk-polly

GEM
remote: https://rubygems.org/
specs:
activesupport (7.0.8)
activesupport (7.1.1)
base64
bigdecimal
concurrent-ruby (~> 1.0, >= 1.0.2)
connection_pool (>= 2.2.5)
drb
i18n (>= 1.6, < 2)
minitest (>= 5.1)
mutex_m
tzinfo (~> 2.0)
addressable (2.8.5)
public_suffix (>= 2.0.2, < 6.0)
adhearsion-loquacious (1.9.3)
ast (2.4.2)
aws-eventstream (1.2.0)
aws-partitions (1.835.0)
aws-partitions (1.838.0)
aws-sdk-core (3.185.1)
aws-eventstream (~> 1, >= 1.0.2)
aws-partitions (~> 1, >= 1.651.0)
Expand All @@ -58,19 +81,17 @@ GEM
aws-sdk-lambda (1.106.0)
aws-sdk-core (~> 3, >= 3.184.0)
aws-sigv4 (~> 1.1)
aws-sdk-polly (1.76.0)
aws-sdk-core (~> 3, >= 3.184.0)
aws-sigv4 (~> 1.1)
aws-sigv4 (1.6.0)
aws-eventstream (~> 1, >= 1.0.2)
axiom-types (0.1.1)
descendants_tracker (~> 0.0.4)
ice_nine (~> 0.11.0)
thread_safe (~> 0.3, >= 0.3.1)
base64 (0.1.1)
blather (2.0.0)
activesupport (>= 2.3.11)
eventmachine (~> 1.2, >= 1.2.6)
niceogiri (~> 1.0)
nokogiri (~> 1.8, >= 1.8.3)
sucker_punch (~> 2.0)
bigdecimal (3.1.4)
celluloid (0.16.0)
timers (~> 4.0.0)
celluloid-io (0.16.2)
Expand All @@ -80,6 +101,7 @@ GEM
coercible (1.0.0)
descendants_tracker (~> 0.0.1)
concurrent-ruby (1.2.2)
connection_pool (2.4.1)
countdownlatch (1.0.0)
crack (0.4.5)
rexml
Expand All @@ -90,6 +112,8 @@ GEM
docile (1.4.0)
domain_name (0.5.20190701)
unf (>= 0.0.5, < 1.0.0)
drb (2.1.1)
ruby2_keywords
equalizer (0.0.11)
eventmachine (1.2.7)
faraday (2.7.11)
Expand Down Expand Up @@ -134,6 +158,7 @@ GEM
multi_json (1.15.0)
mustermann (3.0.0)
ruby2_keywords (~> 0.0.1)
mutex_m (0.1.2)
niceogiri (1.1.2)
nokogiri (~> 1.5)
nio4r (2.5.9)
Expand Down Expand Up @@ -237,7 +262,7 @@ GEM
skylight (6.0.1)
activesupport (>= 5.2.0)
state_machine (1.2.0)
sucker_punch (2.1.2)
sucker_punch (3.1.0)
concurrent-ruby (~> 1.0)
thor (1.2.2)
thread_safe (0.3.6)
Expand Down Expand Up @@ -274,6 +299,8 @@ PLATFORMS
DEPENDENCIES
adhearsion!
aws-sdk-lambda
aws-sdk-polly
blather!
faraday
http
okcomputer
Expand All @@ -290,6 +317,8 @@ DEPENDENCIES
sinatra
sinatra-contrib
skylight
sucker_punch
tts_voices!
twilio-ruby
vcr
webmock
Expand Down
1 change: 1 addition & 0 deletions components/app/app/call_controllers/call_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def build_call_properties
api_version: response.api_version,
to: response.to,
from: response.from,
default_tts_voice: response.default_tts_voice,
sip_headers: SIPHeaders.new(call_sid: response.call_sid, account_sid: response.account_sid)
)
end
Expand Down
7 changes: 7 additions & 0 deletions components/app/app/jobs/notify_tts_event_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class NotifyTTSEventJob
include SuckerPunch::Job

def perform(client, data)
client.notify_tts_event(data)
end
end
1 change: 1 addition & 0 deletions components/app/app/models/call_properties.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
:to,
:from,
:sip_headers,
:default_tts_voice,
keyword_init: true
) do
def inbound?
Expand Down
55 changes: 46 additions & 9 deletions components/app/app/models/execute_twiml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ class ExecuteTwiML
DEFAULT_TWILIO_VOICE = "man".freeze
DEFAULT_TWILIO_LANGUAGE = "en".freeze
FINISH_ON_KEY_PATTERN = /\A(?:\d|\*|\#)\z/.freeze
BASIC_TTS_MAPPING = {
"man" => "Basic.Kal",
"woman" => "Basic.Slt"
}.freeze

DIAL_CALL_STATUSES = {
no_answer: "no-answer",
answer: "completed",
Expand Down Expand Up @@ -105,9 +110,17 @@ def execute_say(verb)
answer unless answered?

attributes = twiml_attributes(verb)
tts_voice = resolve_tts_voice(attributes)

NotifyTTSEventJob.perform_async(
call_platform_client,
phone_call: call_properties.call_sid,
tts_voice: tts_voice.identifier,
num_chars: verb.content.length
)

twiml_loop(attributes).each do
say(say_options(verb.content, attributes))
say(say_options(verb.content, tts_voice))
end
end

Expand All @@ -133,7 +146,12 @@ def execute_gather(verb)
end

nested_verb_attributes = twiml_attributes(nested_verb)
content = nested_verb.name == "Say" ? say_options(nested_verb.content, nested_verb_attributes) : nested_verb.content
content = if nested_verb.name == "Say"
tts_voice = resolve_tts_voice(nested_verb_attributes)
say_options(nested_verb.content, tts_voice)
else
nested_verb.content
end
result.concat(Array.new(twiml_loop(nested_verb_attributes).count, content))
end

Expand Down Expand Up @@ -247,14 +265,9 @@ def execute_record(verb)
)
end

def say_options(content, attributes)
voice_params = {
name: attributes.fetch("voice", DEFAULT_TWILIO_VOICE),
language: attributes.fetch("language", DEFAULT_TWILIO_LANGUAGE)
}

def say_options(content, tts_voice)
ssml = RubySpeech::SSML.draw do
voice(voice_params) do
voice(name: tts_voice.identifier, language: tts_voice.language) do
# mod ssml doesn't support non-ascii characters
# https://github.com/signalwire/freeswitch/issues/1348
string(content + ".")
Expand Down Expand Up @@ -302,4 +315,28 @@ def sip_headers
def normalize_recording_url(raw_recording_url)
URL_PATTERN.match(raw_recording_url)[0]
end

def resolve_tts_voice(attributes)
voice_attribute = attributes["voice"]
language_attribute = attributes["language"]

default_tts_voice = TTSVoices::Voice.find(call_properties.default_tts_voice)
voice_attribute = BASIC_TTS_MAPPING.fetch(voice_attribute) if BASIC_TTS_MAPPING.key?(voice_attribute)

if voice_attribute.blank?
tts_voice = resolve_tts_voice_by_language(default_tts_voice, language_attribute)
voice_attribute = tts_voice&.identifier
end

TTSVoices::Voice.find(voice_attribute) || default_tts_voice
end

def resolve_tts_voice_by_language(default_tts_voice, language_attribute)
return default_tts_voice if language_attribute.blank?
return default_tts_voice if default_tts_voice.language.casecmp(language_attribute).zero?

TTSVoices::Voice.all.find do |voice|
voice.provider == default_tts_voice.provider && voice.language.casecmp(language_attribute).zero?
end
end
end
1 change: 1 addition & 0 deletions components/app/app/models/outbound_call.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def initiate
api_version: call_params.fetch("api_version"),
from: call_params.fetch("from"),
to: call_params.fetch("to"),
default_tts_voice: call_params.fetch("default_tts_voice"),
sip_headers:
)
},
Expand Down
12 changes: 12 additions & 0 deletions components/app/config/initializers/aws_stubs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,16 @@ def to_h
}
}
}

Aws.config[:polly] ||= {
stub_responses: {
describe_voices: {
voices: [
{ gender: "Female", id: "Joanna", language_code: "en-US", supported_engines: ["standard"] },
{ gender: "Female", id: "Lotte", language_code: "nl-NL", supported_engines: ["neural"] },
{ gender: "Female", id: "Vitoria", language_code: "pt-BR", supported_engines: %w[neural standard] }
]
}
}
}
end
12 changes: 11 additions & 1 deletion components/app/lib/call_platform/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class InvalidRequestError < StandardError; end
:api_version,
:to,
:from,
:default_tts_voice,
keyword_init: true
)

Expand All @@ -32,6 +33,14 @@ def notify_call_event(params)
end
end

def notify_tts_event(params)
response = http_client.post("/services/tts_events", params.to_json)

unless response.success?
Sentry.capture_message("Invalid TTS event", extra: { response_body: response.body })
end
end

def build_routing_parameters(params)
make_request("/services/routing_parameters", params: params)
end
Expand All @@ -48,7 +57,8 @@ def create_call(params)
direction: json_response.fetch("direction"),
to: json_response.fetch("to"),
from: json_response.fetch("from"),
api_version: json_response.fetch("api_version")
api_version: json_response.fetch("api_version"),
default_tts_voice: json_response.fetch("default_tts_voice")
)
end

Expand Down
6 changes: 4 additions & 2 deletions components/app/lib/call_platform/fake_client.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module CallPlatform
class FakeClient < Client
def notify_call_event(_params); end
def notify_call_event(params); end
def notify_tts_event(params); end

def create_call(params)
validate_gateway_headers(params)
Expand All @@ -21,7 +22,8 @@ def create_call(params)
direction: "inbound",
to: params.fetch(:to),
from: params.fetch(:from),
api_version: "2010-04-01"
api_version: "2010-04-01",
default_tts_voice: "Basic.Kal"
)
end

Expand Down
12 changes: 7 additions & 5 deletions components/app/spec/call_controllers/gather_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,9 @@
it "handles nested <Say>" do
controller = build_controller(
stub_voice_commands: { ask: build_input_result(nil) },
call_properties: { voice_url: "https://www.example.com/gather.xml" }
call_properties: {
voice_url: "https://www.example.com/gather.xml"
}
)

stub_twiml_request(controller, response: <<~TWIML)
Expand All @@ -464,15 +466,15 @@
outputs.first(3).each do |ssml|
node = ssml.voice.children.first
expect(node.content).to eq("Hello World.")
expect(node.attributes.fetch("name").value).to eq("woman")
expect(node.attributes.fetch("lang").value).to eq("de")
expect(node.attributes.fetch("name").value).to eq("Basic.Slt")
expect(node.attributes.fetch("lang").value).to eq("en-US")
end

outputs.last(5).each do |ssml|
node = ssml.voice.children.first
expect(node.content).to eq("Foobar.")
expect(node.attributes.fetch("name").value).to eq("man")
expect(node.attributes.fetch("lang").value).to eq("en")
expect(node.attributes.fetch("name").value).to eq("Basic.Kal")
expect(node.attributes.fetch("lang").value).to eq("en-US")
end
end
end
Expand Down
Loading

0 comments on commit 24820a1

Please sign in to comment.