Skip to content

Commit

Permalink
* Fixed the issue where sentences are broken by index command
Browse files Browse the repository at this point in the history
* Fixed some bugs with voice preview
* Installing voices from local files no longer requires a specific file name format
* Several house keeping work.
  • Loading branch information
mush42 committed Oct 22, 2023
1 parent c113b3a commit 1ae1fba
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 129 deletions.
2 changes: 1 addition & 1 deletion 2.0-beta.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"displayName": "Piper Neural Voices",
"URL": "",
"description": "Neural voices for NVDA based on Piper",
"sha256": "6b202b1f1c89aec714d72721005f0deeef0dec6f53eeaf18a66406ef51a9da81",
"sha256": "63ff8c129618085cf7b77dc503fd879d4b4523cd2f58a7eada9b2bd27e43af9a",
"homepage": null,
"addonVersionName": "2.0-beta",
"addonVersionNumber": {
Expand Down
3 changes: 2 additions & 1 deletion addon/globalPlugins/piper_voices_globalPlugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
_ADDON_ROOT = os.path.abspath(os.path.join(_DIR, os.pardir, os.pardir))
_TTS_MODULE_DIR = os.path.join(_ADDON_ROOT, "synthDrivers")
sys.path.insert(0, _TTS_MODULE_DIR)
from piper_neural_voices import helpers as helpers
from piper_neural_voices import helpers
from piper_neural_voices import aio
from piper_neural_voices.tts_system import (
PiperTextToSpeechSystem,
PIPER_VOICES_DIR,
Expand Down
120 changes: 78 additions & 42 deletions addon/globalPlugins/piper_voices_globalPlugin/voice_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,39 @@
import functools
import operator
import os
import re
import shutil
import tarfile
import tempfile
import threading
import winsound

import wx
from wx.adv import CommandLinkButton
import gui
import nvwave
import synthDriverHandler
from languageHandler import normalizeLanguage
from logHandler import log

from . import PiperTextToSpeechSystem, PIPER_VOICES_DIR
from . import voice_download
from . import aio
from . import helpers
from .components import AsyncSnakDialog, ColumnDefn, ImmutableObjectListView, SimpleDialog, make_sized_static_box
from .sized_controls import SizedPanel


with helpers.import_bundled_library():
import miniaudio
from pathlib import Path


VOICE_INFO_REGEX = re.compile(
r"(?P<language>[a-z]+(_|-)?([a-z]+)?)(-|_)"
r"(?P<name>[a-z]+)(-|_)"
r"(?P<quality>(high|medium|low|x-low|x_low))",
re.I
)


class InstalledPiperVoicesPanel(SizedPanel):
Expand Down Expand Up @@ -197,7 +210,7 @@ def _get_process_tar_archive(self, dialog, res):
if not filepath:
return
try:
voice_key = PiperTextToSpeechSystem.install_voice(
voice_key = self.install_voice_from_tar_archive(
filepath, PIPER_VOICES_DIR
)
except:
Expand All @@ -219,6 +232,45 @@ def _get_process_tar_archive(self, dialog, res):
_("Voice installed successfully"),
style=wx.ICON_INFORMATION,
)
self.update_voices_list(set_focus=True, invalidate_synth_voices_cache=True)

@staticmethod
def install_voice_from_tar_archive(tar_path, voices_dir):
tar = tarfile.open(tar_path)
filenames = {f.name: f for f in tar.getmembers()}
try:
onnx_file = next(filter(
lambda fname: Path(fname).suffix == ".onnx",
filenames
))
config_file = next(filter(
lambda fname: Path(fname).suffix == ".json",
filenames
))
except StopIteration:
raise FileNotFoundError("Required files not found in archive")
voice_info = VOICE_INFO_REGEX.match(Path(onnx_file).stem)
if voice_info is None:
raise FileNotFoundError("Required files not found in archive")
info = voice_info.groupdict()
voice_key = "-".join([
normalizeLanguage(info["language"]),
info["name"].replace("-", "_"),
info["quality"].replace("-", "_"),
])
voice_folder_name = Path(voices_dir).joinpath(voice_key)
voice_folder_name.mkdir(parents=True, exist_ok=True)
voice_folder_name = os.fspath(voice_folder_name)
files_to_extract = [onnx_file, config_file]
if "MODEL_CARD" in filenames:
files_to_extract.append("MODEL_CARD")
for file in files_to_extract:
tar.extract(
filenames[file],
path=voice_folder_name,
set_attrs=False,
)
return voice_key


class OnlinePiperVoicesPanel(SizedPanel):
Expand Down Expand Up @@ -251,11 +303,8 @@ def __init__(self, parent):
preview_box.SetSizerType("horizontal")
wx.StaticText(preview_box, -1, _("Speaker"))
self.speaker_choice = wx.Choice(preview_box, -1, choices=[])
# Translators: label of a button to listen to a sample
self._preview_label = _("&Play")
# Translators: label of a button to stop listening to a sample
self._preview_stop_label = _("&Stop")
preview_btn = wx.Button(preview_box, -1, self._preview_label)
# Translators: label of a button
preview_btn = wx.Button(preview_box, -1, _("&Preview"))
# Translators: label of a button to download the voice
download_btn = wx.Button(self.buttons_panel, -1, _("&Download voice"))
# Translators: label of a button to refresh the voices list
Expand Down Expand Up @@ -312,7 +361,6 @@ def on_language_selection_change(self, event):
self.speaker_choice.Enable(False)

def on_voice_selected(self, event):
stop_playback()
self.speaker_choice.SetItems([])
selected_voice = self.voices_list.get_selected()
if selected_voice is None:
Expand All @@ -326,39 +374,24 @@ def on_voice_selected(self, event):
self.speaker_choice.Enable(False)

def on_speaker_selection_changed(self, event):
stop_playback()
pass

def on_preview(self, event):
selected_voice = self.voices_list.get_selected()
if selected_voice is None:
return

preview_btn = event.GetEventObject()
if preview_btn.GetLabel() == self._preview_stop_label:
stop_playback()
preview_btn.SetLabel(self._preview_label)
return

def _callback(future):
try:
decoded_file = future.result()
with tempfile.TemporaryDirectory() as tempdir:
mp3file = os.path.join(tempdir, "speaker_0.mp3")
miniaudio.wav_write_file(mp3file, decoded_file)
nvwave.playWaveFile(mp3file, asynchronous=True)
except:
log.exception("Failed to retrieve/play voice preview", exc_info=True)
finally:
preview_btn.SetLabel(self._preview_label)

speaker_idx = 0
if selected_voice.num_speakers > 1:
speaker_idx = self.speaker_choice.GetSelection()
mp3url = selected_voice.get_preview_url(speaker_idx=speaker_idx)
preview_btn.SetLabel(self._preview_stop_label)
voice_download.THREAD_POOL_EXECUTOR.submit(
self.get_preview_audio, mp3url
).add_done_callback(_callback)
AsyncSnakDialog(
# Translators: message in a dialog
message=_("Playing preview..."),
executor=aio.THREADED_EXECUTOR,
func=functools.partial(play_remote_mp3, mp3url),
done_callback=lambda future: True,
parent=self.GetTopLevelParent()
)

def on_download(self, event):

Expand Down Expand Up @@ -388,13 +421,6 @@ def set_voices(self, voices):
self.language_choice.SetItems([lang.description for lang in self.languages])
self.__already_populated.set()

@staticmethod
def get_preview_audio(mp3_url):
stop_playback()
resp = voice_download.request.get(mp3_url)
resp.raise_for_status()
return miniaudio.decode(resp.body, nchannels=1, sample_rate=22050)


class PiperVoiceManagerDialog(SimpleDialog):

Expand Down Expand Up @@ -448,6 +474,16 @@ def _invalidate_pages_voice_cache(self):
panel = self.notebookCtrl.GetPage(i)
panel.invalidate_cache()

def stop_playback():
if nvwave.fileWavePlayer is not None:
nvwave.fileWavePlayer.stop()

def play_remote_mp3(mp3_url):
resp = voice_download.request.get(mp3_url)
resp.raise_for_status()
decoded_file = miniaudio.decode(resp.body, nchannels=1, sample_rate=22050)
with tempfile.TemporaryDirectory() as tempdir:
wav_file = os.path.join(tempdir, "speaker_0.wav")
miniaudio.wav_write_file(wav_file, decoded_file)
winsound.PlaySound(
wav_file,
winsound.SND_FILENAME | winsound.SND_PURGE
)

7 changes: 4 additions & 3 deletions addon/installTasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
# This file is covered by the GNU General Public License.


import wx
import platform

import gui


Expand All @@ -13,7 +14,7 @@


def onInstall():
if wx.GetCpuArchitectureName() != 'x64':
if platform.machine() != 'AMD64':
gui.messageBox(
# Translators: content of a message box
_("32-bit system detected. Piper voices work on 64-bit systems only. Installation aborted!"),
Expand All @@ -22,4 +23,4 @@ def onInstall():
wx.ICON_ERROR
)
raise RuntimeError("Unsupported OS architecture")

54 changes: 32 additions & 22 deletions addon/synthDrivers/piper_neural_voices/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,19 @@
# Copyright (c) 2023 Musharraf Omer
# This file is covered by the GNU General Public License.

import queue
import sys
import threading
import webbrowser
from collections import OrderedDict
from contextlib import suppress
from functools import partial
from itertools import zip_longest

import config
import globalVars
import languageHandler
import nvwave
import synthDriverHandler
from autoSettingsUtils.driverSetting import DriverSetting, NumericDriverSetting
from logHandler import log
from speech.sayAll import SayAllHandler
from speech.commands import (
BreakCommand,
CharacterModeCommand,
IndexCommand,
LangChangeCommand,
RateCommand,
Expand All @@ -41,10 +34,7 @@
from . import grpc_client
from ._config import PiperConfig
from .tts_system import (
PIPER_VOICE_SAMPLES_URL,
AudioTask,
PiperTextToSpeechSystem,
SilenceTask,
SpeakerNotFoundError,
SpeechOptions,
)
Expand All @@ -55,7 +45,6 @@
addonHandler.initTranslation()


# This should run from the check method
grpc_client.initialize()


Expand Down Expand Up @@ -85,12 +74,15 @@ async def __call__(self):


class IndexReachedTask:
def __init__(self, callback, index):
__slots__ = ["callback", "index_list"]

def __init__(self, callback, index_list):
self.callback = callback
self.index = index
self.index_list = index_list

async def __call__(self):
await aio.run_in_executor(self.callback, self.index)
for index in self.index_list:
await aio.run_in_executor(self.callback, index)


class SpeechTask:
Expand Down Expand Up @@ -150,6 +142,7 @@ async def process_speech(speech_seq, is_canceled):
return aio.ASYNCIO_EVENT_LOOP.create_task(speech_task)



class SynthDriver(synthDriverHandler.SynthDriver):

supportedSettings = (
Expand Down Expand Up @@ -236,29 +229,36 @@ def terminate(self):
self._players.clear()

def speak(self, speechSequence):
return self.speak_navigation(speechSequence)
return self._prepare_and_run_speech_task(speechSequence)

def speak_navigation(self, speechSequence):
def _prepare_and_run_speech_task(self, speechSequence):
self.cancel()
self._silence_event.clear()
speech_seq = []
text_list = []
index_command_list = []
default_lang = self.tts.language
for item in self.combine_adjacent_strings(speechSequence):
item_type = type(item)
if item_type is str:
if item_type is IndexCommand:
index_command_list.append(item.index)
continue
elif item_type is str:
text_list.append(item)
continue
if any(text_list):
speech_seq.append(
SpeechTask(
self.tts.create_speech_task(item),
self.tts.create_speech_provider("".join(text_list)),
self._player,
self._silence_event.is_set,
)
)
elif item_type is IndexCommand:
speech_seq.append(IndexReachedTask(self._on_index_reached, item.index))
elif item_type is BreakCommand:
text_list.clear()
if item_type is BreakCommand:
speech_seq.append(
BreakTask(
self.tts.create_break_task(item.time),
self.tts.create_break_provider(item.time),
self._player,
self._silence_event.is_set,
)
Expand All @@ -274,6 +274,16 @@ def speak_navigation(self, speechSequence):
self.tts.volume = item.newValue
elif item_type is PitchCommand:
self.tts.pitch = item.newValue
if any(text_list):
speech_seq.append(
SpeechTask(
self.tts.create_speech_provider("".join(text_list)),
self._player,
self._silence_event.is_set,
)
)
if any(index_command_list):
speech_seq.append(IndexReachedTask(self._on_index_reached, index_command_list))
speech_seq.append(
DoneSpeakingTask(
self._player, self._on_index_reached, self._silence_event.is_set
Expand Down
Loading

0 comments on commit 1ae1fba

Please sign in to comment.