* Fixed the issue where sentences are broken by index command

* Fixed some bugs with voice preview * Installing voices from local files no longer requires a specific file name format * Several house keeping work.
mush42 · Oct 22, 2023 · 1ae1fba · 1ae1fba
1 parent c113b3a
commit 1ae1fba
Show file tree

Hide file tree

Showing 7 changed files with 145 additions and 129 deletions.
diff --git a/2.0-beta.json b/2.0-beta.json
@@ -3,7 +3,7 @@
 	"displayName": "Piper Neural Voices",
 	"URL": "",
 	"description": "Neural voices for NVDA based on Piper",
-	"sha256": "6b202b1f1c89aec714d72721005f0deeef0dec6f53eeaf18a66406ef51a9da81",
+	"sha256": "63ff8c129618085cf7b77dc503fd879d4b4523cd2f58a7eada9b2bd27e43af9a",
 	"homepage": null,
 	"addonVersionName": "2.0-beta",
 	"addonVersionNumber": {

diff --git a/addon/globalPlugins/piper_voices_globalPlugin/__init__.py b/addon/globalPlugins/piper_voices_globalPlugin/__init__.py
@@ -22,7 +22,8 @@
 _ADDON_ROOT = os.path.abspath(os.path.join(_DIR, os.pardir, os.pardir))
 _TTS_MODULE_DIR = os.path.join(_ADDON_ROOT, "synthDrivers")
 sys.path.insert(0, _TTS_MODULE_DIR)
-from piper_neural_voices import helpers as helpers
+from piper_neural_voices import helpers
+from piper_neural_voices import aio
 from piper_neural_voices.tts_system import (
     PiperTextToSpeechSystem,
     PIPER_VOICES_DIR,

diff --git a/addon/globalPlugins/piper_voices_globalPlugin/voice_manager.py b/addon/globalPlugins/piper_voices_globalPlugin/voice_manager.py
@@ -9,26 +9,39 @@
 import functools
 import operator
 import os
+import re
 import shutil
+import tarfile
 import tempfile
 import threading
+import winsound
 
 import wx
 from wx.adv import CommandLinkButton
 import gui
-import nvwave
 import synthDriverHandler
+from languageHandler import normalizeLanguage
 from logHandler import log
 
 from . import PiperTextToSpeechSystem, PIPER_VOICES_DIR
 from . import voice_download
+from . import aio
 from . import helpers
 from .components import AsyncSnakDialog, ColumnDefn, ImmutableObjectListView, SimpleDialog, make_sized_static_box
 from .sized_controls import SizedPanel
 
 
 with helpers.import_bundled_library():
     import miniaudio
+    from pathlib import Path
+
+
+VOICE_INFO_REGEX = re.compile(
+    r"(?P<language>[a-z]+(_|-)?([a-z]+)?)(-|_)"
+    r"(?P<name>[a-z]+)(-|_)"
+    r"(?P<quality>(high|medium|low|x-low|x_low))",
+    re.I
+)
 
 
 class InstalledPiperVoicesPanel(SizedPanel):
@@ -197,7 +210,7 @@ def _get_process_tar_archive(self, dialog, res):
         if not filepath:
             return
         try:
-            voice_key = PiperTextToSpeechSystem.install_voice(
+            voice_key = self.install_voice_from_tar_archive(
                 filepath, PIPER_VOICES_DIR
             )
         except:
@@ -219,6 +232,45 @@ def _get_process_tar_archive(self, dialog, res):
                 _("Voice installed successfully"),
                 style=wx.ICON_INFORMATION,
             )
+            self.update_voices_list(set_focus=True, invalidate_synth_voices_cache=True)
+
+    @staticmethod
+    def install_voice_from_tar_archive(tar_path, voices_dir):
+        tar = tarfile.open(tar_path)
+        filenames = {f.name: f for f in tar.getmembers()}
+        try:
+            onnx_file = next(filter(
+                lambda fname:  Path(fname).suffix == ".onnx",
+                filenames
+            ))
+            config_file = next(filter(
+                lambda fname:  Path(fname).suffix == ".json",
+                filenames
+            ))
+        except StopIteration:
+            raise FileNotFoundError("Required files not found in archive")
+        voice_info = VOICE_INFO_REGEX.match(Path(onnx_file).stem)
+        if voice_info is None:
+            raise FileNotFoundError("Required files not found in archive")
+        info = voice_info.groupdict()
+        voice_key = "-".join([
+            normalizeLanguage(info["language"]),
+            info["name"].replace("-", "_"),
+            info["quality"].replace("-", "_"),
+        ])
+        voice_folder_name = Path(voices_dir).joinpath(voice_key)
+        voice_folder_name.mkdir(parents=True, exist_ok=True)
+        voice_folder_name = os.fspath(voice_folder_name)
+        files_to_extract = [onnx_file, config_file]
+        if "MODEL_CARD" in filenames:
+            files_to_extract.append("MODEL_CARD")
+        for file in files_to_extract:
+            tar.extract(
+                filenames[file],
+                path=voice_folder_name,
+                set_attrs=False,
+            )
+        return voice_key
 
 
 class OnlinePiperVoicesPanel(SizedPanel):
@@ -251,11 +303,8 @@ def __init__(self, parent):
         preview_box.SetSizerType("horizontal")
         wx.StaticText(preview_box, -1, _("Speaker"))
         self.speaker_choice = wx.Choice(preview_box, -1, choices=[])
-        # Translators: label of a button to listen to a sample
-        self._preview_label = _("&Play")
-        # Translators: label of a button to stop listening to a sample
-        self._preview_stop_label = _("&Stop")
-        preview_btn = wx.Button(preview_box, -1, self._preview_label)
+        # Translators: label of a button
+        preview_btn = wx.Button(preview_box, -1, _("&Preview"))
         # Translators: label of a button to download the voice
         download_btn = wx.Button(self.buttons_panel, -1, _("&Download voice"))
         # Translators: label of a button to refresh the voices list
@@ -312,7 +361,6 @@ def on_language_selection_change(self, event):
         self.speaker_choice.Enable(False)
 
     def on_voice_selected(self, event):
-        stop_playback()
         self.speaker_choice.SetItems([])
         selected_voice = self.voices_list.get_selected()
         if selected_voice is None:
@@ -326,39 +374,24 @@ def on_voice_selected(self, event):
             self.speaker_choice.Enable(False)
 
     def on_speaker_selection_changed(self, event):
-        stop_playback()
+        pass
 
     def on_preview(self, event):
         selected_voice = self.voices_list.get_selected()
         if selected_voice is None:
             return
-
-        preview_btn = event.GetEventObject()
-        if preview_btn.GetLabel() == self._preview_stop_label:
-            stop_playback()
-            preview_btn.SetLabel(self._preview_label)
-            return
-
-        def _callback(future):
-            try:
-                decoded_file = future.result()
-                with tempfile.TemporaryDirectory() as tempdir:
-                    mp3file = os.path.join(tempdir, "speaker_0.mp3")
-                    miniaudio.wav_write_file(mp3file, decoded_file)
-                    nvwave.playWaveFile(mp3file, asynchronous=True)
-            except:
-                log.exception("Failed to retrieve/play voice preview", exc_info=True)
-            finally:
-                preview_btn.SetLabel(self._preview_label)
-
         speaker_idx = 0
         if selected_voice.num_speakers > 1:
             speaker_idx = self.speaker_choice.GetSelection()
         mp3url = selected_voice.get_preview_url(speaker_idx=speaker_idx)
-        preview_btn.SetLabel(self._preview_stop_label)
-        voice_download.THREAD_POOL_EXECUTOR.submit(
-            self.get_preview_audio, mp3url
-        ).add_done_callback(_callback)
+        AsyncSnakDialog(
+            # Translators: message in a dialog
+            message=_("Playing preview..."),
+            executor=aio.THREADED_EXECUTOR,
+            func=functools.partial(play_remote_mp3, mp3url),
+            done_callback=lambda future: True,
+            parent=self.GetTopLevelParent()
+        )
 
     def on_download(self, event):
 
@@ -388,13 +421,6 @@ def set_voices(self, voices):
         self.language_choice.SetItems([lang.description for lang in self.languages])
         self.__already_populated.set()
 
-    @staticmethod
-    def get_preview_audio(mp3_url):
-        stop_playback()
-        resp = voice_download.request.get(mp3_url)
-        resp.raise_for_status()
-        return miniaudio.decode(resp.body, nchannels=1, sample_rate=22050)
-
 
 class PiperVoiceManagerDialog(SimpleDialog):
 
@@ -448,6 +474,16 @@ def _invalidate_pages_voice_cache(self):
             panel = self.notebookCtrl.GetPage(i)
             panel.invalidate_cache()
 
-def stop_playback():
-    if nvwave.fileWavePlayer is not None:
-        nvwave.fileWavePlayer.stop()
+
+def play_remote_mp3(mp3_url):
+    resp = voice_download.request.get(mp3_url)
+    resp.raise_for_status()
+    decoded_file = miniaudio.decode(resp.body, nchannels=1, sample_rate=22050)
+    with tempfile.TemporaryDirectory() as tempdir:
+        wav_file = os.path.join(tempdir, "speaker_0.wav")
+        miniaudio.wav_write_file(wav_file, decoded_file)
+        winsound.PlaySound(
+            wav_file,
+            winsound.SND_FILENAME | winsound.SND_PURGE
+        )
+
diff --git a/addon/installTasks.py b/addon/installTasks.py
@@ -4,7 +4,8 @@
 # This file is covered by the GNU General Public License.
 
 
-import wx
+import platform
+
 import gui
 
 
@@ -13,7 +14,7 @@
 
 
 def onInstall():
-    if wx.GetCpuArchitectureName() != 'x64':
+    if platform.machine() != 'AMD64':
         gui.messageBox(
             # Translators: content of a message box
             _("32-bit system detected. Piper voices work on 64-bit systems only. Installation aborted!"),
@@ -22,4 +23,4 @@ def onInstall():
             wx.ICON_ERROR
         )
         raise RuntimeError("Unsupported OS architecture")
-    
+
diff --git a/addon/synthDrivers/piper_neural_voices/__init__.py b/addon/synthDrivers/piper_neural_voices/__init__.py
@@ -3,26 +3,19 @@
 # Copyright (c) 2023 Musharraf Omer
 # This file is covered by the GNU General Public License.
 
-import queue
-import sys
 import threading
-import webbrowser
 from collections import OrderedDict
 from contextlib import suppress
 from functools import partial
-from itertools import zip_longest
 
 import config
-import globalVars
 import languageHandler
 import nvwave
 import synthDriverHandler
 from autoSettingsUtils.driverSetting import DriverSetting, NumericDriverSetting
 from logHandler import log
-from speech.sayAll import SayAllHandler
 from speech.commands import (
     BreakCommand,
-    CharacterModeCommand,
     IndexCommand,
     LangChangeCommand,
     RateCommand,
@@ -41,10 +34,7 @@
 from . import grpc_client
 from ._config import PiperConfig
 from .tts_system import (
-    PIPER_VOICE_SAMPLES_URL,
-    AudioTask,
     PiperTextToSpeechSystem,
-    SilenceTask,
     SpeakerNotFoundError,
     SpeechOptions,
 )
@@ -55,7 +45,6 @@
 addonHandler.initTranslation()
 
 
-# This should run from the check method
 grpc_client.initialize()
 
 
@@ -85,12 +74,15 @@ async def __call__(self):
 
 
 class IndexReachedTask:
-    def __init__(self, callback, index):
+    __slots__ = ["callback", "index_list"]
+
+    def __init__(self, callback, index_list):
         self.callback = callback
-        self.index = index
+        self.index_list = index_list
 
     async def __call__(self):
-        await aio.run_in_executor(self.callback, self.index)
+        for index in self.index_list:
+            await aio.run_in_executor(self.callback, index)
 
 
 class SpeechTask:
@@ -150,6 +142,7 @@ async def process_speech(speech_seq, is_canceled):
     return aio.ASYNCIO_EVENT_LOOP.create_task(speech_task)
 
 
+
 class SynthDriver(synthDriverHandler.SynthDriver):
 
     supportedSettings = (
@@ -236,29 +229,36 @@ def terminate(self):
         self._players.clear()
 
     def speak(self, speechSequence):
-        return self.speak_navigation(speechSequence)
+        return self._prepare_and_run_speech_task(speechSequence)
 
-    def speak_navigation(self, speechSequence):
+    def _prepare_and_run_speech_task(self, speechSequence):
         self.cancel()
         self._silence_event.clear()
         speech_seq = []
+        text_list = []
+        index_command_list = []
         default_lang = self.tts.language
         for item in self.combine_adjacent_strings(speechSequence):
             item_type = type(item)
-            if item_type is str:
+            if item_type is IndexCommand:
+                index_command_list.append(item.index)
+                continue
+            elif item_type is str:
+                text_list.append(item)
+                continue
+            if any(text_list):
                 speech_seq.append(
                     SpeechTask(
-                        self.tts.create_speech_task(item),
+                        self.tts.create_speech_provider("".join(text_list)),
                         self._player,
                         self._silence_event.is_set,
                     )
                 )
-            elif item_type is IndexCommand:
-                speech_seq.append(IndexReachedTask(self._on_index_reached, item.index))
-            elif item_type is BreakCommand:
+                text_list.clear()
+            if item_type is BreakCommand:
                 speech_seq.append(
                     BreakTask(
-                        self.tts.create_break_task(item.time),
+                        self.tts.create_break_provider(item.time),
                         self._player,
                         self._silence_event.is_set,
                     )
@@ -274,6 +274,16 @@ def speak_navigation(self, speechSequence):
                 self.tts.volume = item.newValue
             elif item_type is PitchCommand:
                 self.tts.pitch = item.newValue
+        if any(text_list):
+            speech_seq.append(
+                SpeechTask(
+                    self.tts.create_speech_provider("".join(text_list)),
+                    self._player,
+                    self._silence_event.is_set,
+                )
+            )
+        if any(index_command_list):
+            speech_seq.append(IndexReachedTask(self._on_index_reached, index_command_list))
         speech_seq.append(
             DoneSpeakingTask(
                 self._player, self._on_index_reached, self._silence_event.is_set