From 5b274a9b97a0156b2626917a36df4efca6f17b42 Mon Sep 17 00:00:00 2001
From: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com>
Date: Thu, 19 Sep 2024 18:30:48 -0700
Subject: [PATCH] Refactor to extend changes in OvosDinkumListener (#203)

# Description
Update to use OVOSSttFactory directly
Mark neon_speech.stt module as deprecated
Update dependencies to latest stable versions
Includes patch to handle config updates when FileWatcher fails to report
changes

# Issues
Closes #168
Relates to #158

# Other Notes
Validated k8s alpha deployment
Tested local Docker instance
Tested against Mark2 latest beta

---------

Co-authored-by: Daniel McKnight <daniel@neon.ai>
---
 neon_speech/service.py             | 39 ++++++++++++++++++----------
 neon_speech/stt.py                 | 12 ++++++++-
 neon_speech/utils.py               |  1 +
 requirements/docker.txt            |  2 +-
 requirements/requirements.txt      |  8 +++---
 requirements/test_requirements.txt |  3 +--
 tests/api_method_tests.py          |  6 ++---
 tests/unit_tests.py                | 41 +++++++++++++++++-------------
 8 files changed, 69 insertions(+), 43 deletions(-)

diff --git a/neon_speech/service.py b/neon_speech/service.py
index 47953e5..3144fdb 100644
--- a/neon_speech/service.py
+++ b/neon_speech/service.py
@@ -27,7 +27,7 @@
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
-from typing import Dict
+from typing import Dict, List, Tuple
 
 import ovos_dinkum_listener.plugins
 
@@ -48,9 +48,7 @@
 from ovos_dinkum_listener.service import OVOSDinkumVoiceService
 from ovos_dinkum_listener.voice_loop.voice_loop import ListeningMode
 
-from neon_speech.stt import STTFactory
-
-ovos_dinkum_listener.plugins.OVOSSTTFactory = STTFactory
+from ovos_plugin_manager.stt import OVOSSTTFactory as STTFactory
 
 _SERVICE_READY = Event()
 
@@ -131,8 +129,7 @@ def __init__(self, ready_hook=on_ready, error_hook=on_error,
         self.lock = Lock()
         self._stop_service = Event()
         if self.config.get('listener', {}).get('enable_stt_api', True):
-            self.api_stt = STTFactory.create(config=self.config,
-                                             results_event=None)
+            self.api_stt = STTFactory.create(config=self.config)
         else:
             LOG.info("Skipping api_stt init")
             self.api_stt = None
@@ -223,6 +220,16 @@ def register_event_handlers(self):
         self.bus.on("neon.enable_wake_word", self.handle_enable_wake_word)
         self.bus.on("neon.disable_wake_word", self.handle_disable_wake_word)
 
+        # TODO: Patching config reload behavior
+        self.bus.on("configuration.patch", self._patch_handle_config_reload)
+
+    def _patch_handle_config_reload(self, _: Message):
+        # This patches observed behavior where the filewatcher fails to trigger.
+        # Configuration reload is idempotent, so calling it again will have
+        # minimal impact
+        self.config.reload()
+        self.reload_configuration()
+
     def _handle_get_languages_stt(self, message):
         if self.config.get('listener', {}).get('enable_voice_loop', True):
             return OVOSDinkumVoiceService._handle_get_languages_stt(self,
@@ -414,9 +421,11 @@ def handle_get_stt(self, message: Message):
                 message.context['timing']['client_to_core'] = \
                     received_time - sent_time
             message.context['timing']['response_sent'] = time()
+            transcribed_str = [t[0] for t in transcriptions]
             self.bus.emit(message.reply(ident,
                                         data={"parser_data": parser_data,
-                                              "transcripts": transcriptions}))
+                                              "transcripts": transcribed_str,
+                                              "transcripts_with_conf": transcriptions}))
         except Exception as e:
             LOG.error(e)
             message.context['timing']['response_sent'] = time()
@@ -467,8 +476,9 @@ def build_context(msg: Message):
             message.context.setdefault('timing', dict())
             message.context['timing'] = {**timing, **message.context['timing']}
             context = build_context(message)
+            transribed_str = [t[0] for t in transcriptions]
             data = {
-                "utterances": transcriptions,
+                "utterances": transribed_str,
                 "lang": message.data.get("lang", "en-us")
             }
             # Send a new message to the skills module with proper routing ctx
@@ -478,7 +488,8 @@ def build_context(msg: Message):
             # Reply to original message with transcription/audio parser data
             self.bus.emit(message.reply(ident,
                                         data={"parser_data": parser_data,
-                                              "transcripts": transcriptions,
+                                              "transcripts": transribed_str,
+                                              "transcripts_with_conf": transcriptions,
                                               "skills_recv": handled}))
         except Exception as e:
             LOG.error(e)
@@ -528,7 +539,7 @@ def _write_encoded_file(audio_data: str) -> str:
         return wav_file_path
 
     def _get_stt_from_file(self, wav_file: str,
-                           lang: str = None) -> (AudioData, dict, list):
+                           lang: str = None) -> (AudioData, dict, List[Tuple[str, float]]):
         """
         Performs STT and audio processing on the specified wav_file
         :param wav_file: wav audio file to process
@@ -562,18 +573,18 @@ def _get_stt_from_file(self, wav_file: str,
                             self.api_stt.stream_data(data)
                         except EOFError:
                             break
-                    transcriptions = self.api_stt.stream_stop()
+                    transcriptions = self.api_stt.transcribe(None, None)
                     self.lock.release()
                 else:
                     LOG.error(f"Timed out acquiring lock, not processing: {wav_file}")
                     transcriptions = []
             else:
-                transcriptions = self.api_stt.execute(audio_data, lang)
+                transcriptions = self.api_stt.transcribe(audio_data, lang)
             if isinstance(transcriptions, str):
-                LOG.warning("Transcriptions is a str, no alternatives provided")
+                LOG.error("Transcriptions is a str, no alternatives provided")
                 transcriptions = [transcriptions]
 
-            transcriptions = [clean_quotes(t) for t in transcriptions]
+            transcriptions = [(clean_quotes(t[0]), t[1]) for t in transcriptions]
 
         get_stt = float(_stopwatch.time)
         with _stopwatch:
diff --git a/neon_speech/stt.py b/neon_speech/stt.py
index 7842361..1f4b927 100644
--- a/neon_speech/stt.py
+++ b/neon_speech/stt.py
@@ -30,15 +30,21 @@
 from inspect import signature
 from threading import Event
 
-from neon_utils import LOG
+from ovos_utils.log import LOG, log_deprecation
 from ovos_plugin_manager.stt import OVOSSTTFactory, get_stt_config
 from ovos_plugin_manager.templates.stt import StreamingSTT
 
 from ovos_config.config import Configuration
 
+log_deprecation("This module is deprecated. Import from `ovos_plugin_manager`",
+                "5.0.0")
+
 
 class WrappedSTT(StreamingSTT, ABC):
     def __new__(cls, base_engine, *args, **kwargs):
+        log_deprecation("This class is deprecated. Use "
+                        "`ovos_plugin_manager.templates.stt.StreamingSTT",
+                        "5.0.0")
         results_event = kwargs.get("results_event") or Event()
         # build STT
         for k in list(kwargs.keys()):
@@ -66,6 +72,10 @@ def stream_stop(self):
 
 
 class STTFactory(OVOSSTTFactory):
+    log_deprecation("This class is deprecated. Use "
+                    "`ovos_plugin_manager.stt.OVOSSTTFactory",
+                    "5.0.0")
+
     @staticmethod
     def create(config=None, results_event: Event = None):
         get_stt_config(config)
diff --git a/neon_speech/utils.py b/neon_speech/utils.py
index 6821a42..7369308 100644
--- a/neon_speech/utils.py
+++ b/neon_speech/utils.py
@@ -104,6 +104,7 @@ def init_stt_plugin(plugin: str):
         LOG.warning(f"Could not find plugin: {plugin}")
 
 
+@deprecated("Platform detection has been deprecated", "5.0.0")
 def use_neon_speech(func):
     """
     Wrapper to ensure call originates from neon_speech for stack checks.
diff --git a/requirements/docker.txt b/requirements/docker.txt
index bb0aae8..4db7f2a 100644
--- a/requirements/docker.txt
+++ b/requirements/docker.txt
@@ -1,5 +1,5 @@
 ovos-stt-plugin-vosk~=0.1
-neon-stt-plugin-nemo~=0.0.2
+neon-stt-plugin-nemo~=0.0.2,>=0.0.5a5
 onnxruntime!=1.16.0  # TODO: Patching https://github.com/microsoft/onnxruntime/issues/17631
 
 # Load alternative WW plugins so they are available
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 4ad807b..cf7822e 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,12 +1,12 @@
-ovos-dinkum-listener~=0.0
+ovos-dinkum-listener~=0.2
 ovos-bus-client~=0.0,>=0.0.3
 ovos-utils~=0.0,>=0.0.30
-ovos-plugin-manager~=0.0,>=0.0.23
+ovos-plugin-manager~=0.0,>=0.0.26a39
 click~=8.0
 click-default-group~=1.2
-neon-utils[network,audio]~=1.9
+neon-utils[network,audio]~=1.9,>=1.11.1a3
 ovos-config~=0.0,>=0.0.7
 
 ovos-vad-plugin-webrtcvad~=0.0.1
 ovos-ww-plugin-vosk~=0.1
-ovos-microphone-plugin-alsa~=0.0.0
\ No newline at end of file
+ovos-microphone-plugin-alsa~=0.1
\ No newline at end of file
diff --git a/requirements/test_requirements.txt b/requirements/test_requirements.txt
index 94bb8f2..70bb5a3 100644
--- a/requirements/test_requirements.txt
+++ b/requirements/test_requirements.txt
@@ -1,5 +1,4 @@
-neon-stt-plugin-deepspeech_stream_local~=2.0
-neon-stt-plugin-nemo~=0.0,>=0.0.2
+neon-stt-plugin-nemo~=0.0,>=0.0.5a4
 ovos-stt-plugin-vosk~=0.1
 ovos-stt-plugin-server~=0.0.3
 pytest
diff --git a/tests/api_method_tests.py b/tests/api_method_tests.py
index 2354b52..4383cf2 100644
--- a/tests/api_method_tests.py
+++ b/tests/api_method_tests.py
@@ -65,9 +65,9 @@ def setUpClass(cls) -> None:
         use_neon_speech(init_config_dir)()
 
         test_config = dict(Configuration())
-        test_config["stt"]["module"] = "deepspeech_stream_local"
+        test_config["stt"]["module"] = "neon-stt-plugin-nemo"
         test_config["listener"]["VAD"]["module"] = "dummy"
-        assert test_config["stt"]["module"] == "deepspeech_stream_local"
+        assert test_config["stt"]["module"] == "neon-stt-plugin-nemo"
 
         ready_event = Event()
 
@@ -77,7 +77,7 @@ def _ready():
         cls.speech_service = NeonSpeechClient(speech_config=test_config,
                                               daemonic=False, bus=cls.bus,
                                               ready_hook=_ready)
-        assert cls.speech_service.config["stt"]["module"] == "deepspeech_stream_local"
+        assert cls.speech_service.config["stt"]["module"] == "neon-stt-plugin-nemo"
         cls.speech_service.start()
 
         if not ready_event.wait(120):
diff --git a/tests/unit_tests.py b/tests/unit_tests.py
index d6b8b32..fc02d2d 100644
--- a/tests/unit_tests.py
+++ b/tests/unit_tests.py
@@ -31,10 +31,12 @@
 import shutil
 import sys
 import unittest
+import yaml
 
 from os.path import dirname, join
 from threading import Thread, Event
-from unittest.mock import Mock, patch
+from unittest import skip
+from unittest.mock import patch
 from click.testing import CliRunner
 
 from ovos_bus_client import Message
@@ -44,7 +46,8 @@
 
 CONFIG_PATH = os.path.join(dirname(__file__), "config")
 os.environ["XDG_CONFIG_HOME"] = CONFIG_PATH
-
+os.environ["OVOS_CONFIG_BASE_FOLDER"] = "neon"
+os.environ["OVOS_CONFIG_FILENAME"] = "neon.yaml"
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
 
@@ -80,12 +83,13 @@ def test_install_stt_plugin(self):
             "ovos-stt-plugin-vosk"))
         import ovos_stt_plugin_vosk
 
+    @skip("Configuration patching is deprecated")
     def test_patch_config(self):
         from neon_speech.utils import use_neon_speech
         from neon_utils.configuration_utils import init_config_dir
         test_config_dir = os.path.join(os.path.dirname(__file__), "config")
         os.makedirs(test_config_dir, exist_ok=True)
-        os.environ["XDG_CONFIG_HOME"] = test_config_dir
+
         use_neon_speech(init_config_dir)()
 
         with open(join(test_config_dir, "OpenVoiceOS", 'ovos.conf')) as f:
@@ -117,7 +121,7 @@ def test_get_stt_from_file(self):
         AUDIO_FILE_PATH = os.path.join(os.path.dirname(
             os.path.realpath(__file__)), "audio_files")
         TEST_CONFIG = use_neon_speech(Configuration)()
-        TEST_CONFIG["stt"]["module"] = "deepspeech_stream_local"
+        TEST_CONFIG["stt"]["module"] = "neon-stt-plugin-nemo"
         bus = FakeBus()
         bus.connected_event = Event()
         bus.connected_event.set()
@@ -129,7 +133,8 @@ def test_get_stt_from_file(self):
         self.assertIsInstance(audio, AudioData)
         self.assertIsInstance(context, dict)
         self.assertIsInstance(transcripts, list)
-        self.assertIn("stop", transcripts)
+        tr_str = [t[0] for t in transcripts]
+        self.assertIn("stop", tr_str)
 
         def threaded_get_stt():
             audio, context, transcripts = \
@@ -137,7 +142,8 @@ def threaded_get_stt():
             self.assertIsInstance(audio, AudioData)
             self.assertIsInstance(context, dict)
             self.assertIsInstance(transcripts, list)
-            self.assertIn("stop", transcripts)
+            tr_str = [t[0] for t in transcripts]
+            self.assertIn("stop", tr_str)
 
         threads = list()
         for i in range(0, 12):
@@ -156,7 +162,7 @@ def test_ovos_plugin_compat(self):
         ovos_vosk_streaming = STTFactory().create(
             {'module': 'ovos-stt-plugin-vosk-streaming',
              'lang': 'en-us'})
-        self.assertIsInstance(ovos_vosk_streaming.results_event, Event)
+        # self.assertIsInstance(ovos_vosk_streaming.results_event, Event)
         test_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  "audio_files", "stop.wav")
         from neon_utils.file_utils import get_audio_file_stream
@@ -225,24 +231,23 @@ def on_ready(cls):
 
     @classmethod
     def setUpClass(cls):
-        from ovos_config.config import update_mycroft_config
-        from neon_utils.configuration_utils import init_config_dir
-        init_config_dir()
+        os.makedirs(join(CONFIG_PATH, "neon"), exist_ok=True)
+        test_config = join(CONFIG_PATH, "neon", "neon.yaml")
+        with open(test_config, 'w+') as f:
+            yaml.dump({"hotwords": cls.hotwords_config,
+                       "stt": {"module": "neon-stt-plugin-nemo"},
+                       "VAD": {"module": "dummy"}}, f)
 
-        update_mycroft_config({"hotwords": cls.hotwords_config,
-                               "stt": {"module": "neon-stt-plugin-nemo"},
-                               "VAD": {"module": "dummy"}})
         import importlib
         import ovos_config.config
         importlib.reload(ovos_config.config)
-        # from ovos_config.config import Configuration
-        # assert Configuration.xdg_configs[0]['hotwords'] == hotwords_config
+        from ovos_config.config import Configuration
+        assert Configuration.xdg_configs[0]['hotwords'] == cls.hotwords_config
 
-        from neon_speech.utils import use_neon_speech
-        use_neon_speech(init_config_dir)()
         from neon_speech.service import NeonSpeechClient
         cls.service = NeonSpeechClient(bus=cls.bus, ready_hook=cls.on_ready)
-        # assert Configuration() == service.loop.config_core
+
+        assert cls.service.reload_configuration in Configuration._callbacks
 
         def _mocked_run():
             stopping_event = Event()