Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

makes synthesizer methods static #394

Merged
merged 1 commit into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vocode/streaming/synthesizer/azure_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def get_message_up_to(
self,
message: str,
ssml: str,
seconds: int,
seconds: float,
word_boundary_event_pool: WordBoundaryEventPool,
) -> str:
events = word_boundary_event_pool.get_events_sorted()
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/bark_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ async def create_speech(
write_wav(output_bytes_io, self.SAMPLE_RATE, int_audio_arr)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
35 changes: 20 additions & 15 deletions vocode/streaming/synthesizer/base_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self, chunk: bytes, is_last_chunk: bool):
def __init__(
self,
chunk_generator: AsyncGenerator[ChunkResult, None],
get_message_up_to: Callable[[int], str],
get_message_up_to: Callable[[float], str],
):
self.chunk_generator = chunk_generator
self.get_message_up_to = get_message_up_to
Expand Down Expand Up @@ -172,20 +172,23 @@ def ready_synthesizer(self):
pass

# given the number of seconds the message was allowed to go until, where did we get in the message?
@staticmethod
def get_message_cutoff_from_total_response_length(
self, message: BaseMessage, seconds: int, size_of_output: int
synthesizer_config: SynthesizerConfig,
message: BaseMessage,
seconds: float,
size_of_output: int,
) -> str:
estimated_output_seconds = (
size_of_output / self.synthesizer_config.sampling_rate
)
estimated_output_seconds = size_of_output / synthesizer_config.sampling_rate
if not message.text:
return message.text

estimated_output_seconds_per_char = estimated_output_seconds / len(message.text)
return message.text[: int(seconds / estimated_output_seconds_per_char)]

@staticmethod
def get_message_cutoff_from_voice_speed(
self, message: BaseMessage, seconds: int, words_per_minute: int
message: BaseMessage, seconds: float, words_per_minute: int
) -> str:
words_per_second = words_per_minute / 60
estimated_words_spoken = math.floor(words_per_second * seconds)
Expand All @@ -203,19 +206,21 @@ async def create_speech(
raise NotImplementedError

# @param file - a file-like object in wav format
@staticmethod
def create_synthesis_result_from_wav(
self, file: Any, message: BaseMessage, chunk_size: int
synthesizer_config: SynthesizerConfig,
file: Any,
message: BaseMessage,
chunk_size: int,
) -> SynthesisResult:
output_bytes = convert_wav(
file,
output_sample_rate=self.synthesizer_config.sampling_rate,
output_encoding=self.synthesizer_config.audio_encoding,
output_sample_rate=synthesizer_config.sampling_rate,
output_encoding=synthesizer_config.audio_encoding,
)

if self.synthesizer_config.should_encode_as_wav:
chunk_transform = lambda chunk: encode_as_wav(
chunk, self.synthesizer_config
)
if synthesizer_config.should_encode_as_wav:
chunk_transform = lambda chunk: encode_as_wav(chunk, synthesizer_config)
else:
chunk_transform = lambda chunk: chunk

Expand All @@ -232,8 +237,8 @@ async def chunk_generator(output_bytes):

return SynthesisResult(
chunk_generator(output_bytes),
lambda seconds: self.get_message_cutoff_from_total_response_length(
message, seconds, len(output_bytes)
lambda seconds: BaseSynthesizer.get_message_cutoff_from_total_response_length(
synthesizer_config, message, seconds, len(output_bytes)
),
)

Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/coqui_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ async def create_speech(
)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=io.BytesIO(read_response),
message=message,
chunk_size=chunk_size,
Expand Down
5 changes: 4 additions & 1 deletion vocode/streaming/synthesizer/coqui_tts_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ async def create_speech(
audio_segment.export(output_bytes_io, format="wav") # type: ignore

result = self.create_synthesis_result_from_wav(
file=output_bytes_io, message=message, chunk_size=chunk_size
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
)

convert_span.end()
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/eleven_labs_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ async def create_speech(
output_bytes_io = decode_mp3(audio_data)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/google_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ async def create_speech(
output_bytes_io.seek(0)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/gtts_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def thread():
audio_segment.export(output_bytes_io, format="wav") # type: ignore

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/play_ht_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ async def create_speech(
output_bytes_io = decode_mp3(read_response)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
2 changes: 1 addition & 1 deletion vocode/streaming/synthesizer/polly_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def get_speech_marks(self, message: str) -> Any:
def get_message_up_to(
self,
message: str,
seconds: int,
seconds: float,
word_events,
) -> str:
for event in word_events:
Expand Down
5 changes: 4 additions & 1 deletion vocode/streaming/synthesizer/rime_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,10 @@ async def create_speech(
audio_file = io.BytesIO(base64.b64decode(data.get("audioContent")))

result = self.create_synthesis_result_from_wav(
file=audio_file, message=message, chunk_size=chunk_size
synthesizer_config=self.synthesizer_config,
file=audio_file,
message=message,
chunk_size=chunk_size,
)
convert_span.end()
return result
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ async def create_speech(
audio_segment.export(output_bytes_io, format="wav") # type: ignore

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
Loading