refactor/stt - deprecate OPM -> ovos-stt-server only (#68)

OpenVoiceOS · Jun 9, 2023 · a0fdbe2 · a0fdbe2
1 parent 828ea23
commit a0fdbe2
Show file tree

Hide file tree

Showing 4 changed files with 74 additions and 65 deletions.
diff --git a/README.md b/README.md
@@ -29,6 +29,8 @@ pip install ovos-local-backend
 - [ovos-backend-manager](https://github.com/OpenVoiceOS/ovos-backend-manager) - graphical interface to manage all things backend
 - [ovos-stt-plugin-selene](https://github.com/OpenVoiceOS/ovos-stt-plugin-selene) - stt plugin for selene/local backend (DEPRECATED)
 
+You can use this backend as a STT server proxy via [ovos-stt-plugin-server](https://github.com/OpenVoiceOS/ovos-stt-plugin-server), eg `https://your_backend.org/stt`
+
 
 ## Configuration
 
@@ -47,13 +49,10 @@ configure backend by editing/creating ```~/.config/mycroft/ovos_backend.conf```
     "timezone": {"...": "..."}
   },
 
-  "stt": {
-    "module": "ovos-stt-plugin-server",
-    "ovos-stt-plugin-server": {"url": "https://stt.openvoiceos.org/stt"}
-  },
+  "stt_servers": ["https://stt.openvoiceos.org/stt"],
 
   "server": {
-    "admin_key": "",
+    "admin_key": "leave empty to DISABLE admin api",
     "port": 6712,
     "database": "sqlite:////home/user/.local/share/ovos_backend.db",
     "skip_auth": false,

diff --git a/ovos_local_backend/backend/stt.py b/ovos_local_backend/backend/stt.py
@@ -14,45 +14,74 @@
 from tempfile import NamedTemporaryFile
 
 import flask
-from speech_recognition import Recognizer, AudioFile
+import requests
+from ovos_config import Configuration
+from speech_recognition import Recognizer, AudioFile, AudioData
+
 from ovos_local_backend.backend import API_VERSION
 from ovos_local_backend.backend.decorators import noindex, requires_auth, requires_opt_in
-from ovos_config import Configuration
 from ovos_local_backend.database import add_stt_recording
-from ovos_plugin_manager.stt import OVOSSTTFactory
 
-recognizer = Recognizer()
-engine = OVOSSTTFactory.create(Configuration()["stt"])
 
+def transcribe(audio: AudioData, lang: str):
+    urls = Configuration().get("stt_servers") or ["https://stt.openvoiceos.org/stt"]
+
+    for url in urls:
+        try:
+            response = requests.post(url, data=audio.get_wav_data(),
+                                     headers={"Content-Type": "audio/wav"},
+                                     params={"lang": lang})
+            if response:
+                return response.text
+        except:
+            continue
+    return ""
+
+
+def bytes2audiodata(data: bytes):
+    recognizer = Recognizer()
+    with NamedTemporaryFile() as fp:
+        fp.write(data)
+        with AudioFile(fp.name) as source:
+            audio = recognizer.record(source)
+    return audio
 
-@requires_opt_in
-def save_stt_recording(uuid, audio, utterance):
-    audio_bytes = audio.get_wav_data()
-    add_stt_recording(uuid, audio_bytes, utterance)
+
+@requires_opt_in  # this decorator ensures the uuid opted-in
+def save_stt_recording(uuid: str, audio: AudioData, utterance: str):
+    allowed = Configuration()["listener"].get("record_utterances") or \
+              Configuration()["listener"].get("save_utterances")  # backwards compat
+    if allowed:
+        audio_bytes = audio.get_wav_data()
+        add_stt_recording(uuid, audio_bytes, utterance)
 
 
 def get_stt_routes(app):
+    # makes personal backend a valid entry in ovos-stt-plugin-server
+    # DOES NOT save data
+    @app.route("/stt", methods=['POST'])
+    @noindex
+    def stt_public_server():
+        audio_bytes = flask.request.data
+        lang = str(flask.request.args.get("lang", "en-us"))
+        audio = bytes2audiodata(audio_bytes)
+        utterance = transcribe(audio, lang)
+        return json.dumps([utterance])
+
+    # DEPRECATED - compat for old selene plugin
+    # if opt-in saves recordings
     @app.route("/" + API_VERSION + "/stt", methods=['POST'])
     @noindex
     @requires_auth
     def stt():
         flac_audio = flask.request.data
         lang = str(flask.request.args.get("lang", "en-us"))
-        with NamedTemporaryFile() as fp:
-            fp.write(flac_audio)
-            with AudioFile(fp.name) as source:
-                audio = recognizer.record(source)  # read the entire audio file
-            try:
-                utterance = engine.execute(audio, language=lang)
-            except:
-                utterance = ""
-
-        allowed = Configuration()["listener"].get("record_utterances") or \
-                  Configuration()["listener"].get("save_utterances")  # backwards compat
-        if allowed:
-            auth = flask.request.headers.get('Authorization', '').replace("Bearer ", "")
-            uuid = auth.split(":")[-1]  # this split is only valid here, not selene
-            save_stt_recording(uuid, audio, utterance)
+        audio = bytes2audiodata(flac_audio)
+        utterance = transcribe(audio, lang)
+
+        auth = flask.request.headers.get('Authorization', '').replace("Bearer ", "")
+        uuid = auth.split(":")[-1]  # this split is only valid here, not selene
+        save_stt_recording(uuid, audio, utterance)
 
         return json.dumps([utterance])
 

diff --git a/ovos_local_backend/database.py b/ovos_local_backend/database.py
@@ -224,18 +224,20 @@ def deserialize(data):
 
         lang = data.get("lang") or _cfg.get("lang") or "en-us"
 
-        voice_id = None
-        tts_module = data.get("default_tts")
-        tts_config = data.get("default_tts_cfg") or {}
-        if tts_module:
-            voice_id = get_voice_id(tts_module, lang, tts_config)
-
-        ww_id = None
-        ww_name = data.get("default_ww")
-        ww_config = data.get("default_ww_cfg") or {}
-        ww_module = ww_config.get("module")
-        if ww_module:
-            ww_id = get_ww_id(ww_module, ww_name, ww_config)
+        voice_id = data.get("voice_id")
+        if not voice_id:
+            tts_module = data.get("default_tts")
+            tts_config = data.get("default_tts_cfg") or {}
+            if tts_module:
+                voice_id = get_voice_id(tts_module, lang, tts_config)
+
+        ww_id = data.get("ww_id")
+        if not ww_id:
+            ww_name = data.get("default_ww")
+            ww_config = data.get("default_ww_cfg") or {}
+            ww_module = ww_config.get("module")
+            if ww_module:
+                ww_id = get_ww_id(ww_module, ww_name, ww_config)
 
         loc = data.get("location") or _loc
 
@@ -271,22 +273,6 @@ def serialize(self):
                 _mail_cfg.get("recipient") or \
                 _mail_cfg.get("smtp", {}).get("username")
 
-        default_tts = None
-        default_tts_cfg = {}
-        if self.voice_id:
-            voice: VoiceDefinition = get_voice_definition(self.voice_id)
-            if voice:
-                default_tts_cfg = voice.tts_config
-                default_tts = voice.plugin
-
-        default_ww = None
-        default_ww_cfg = {}
-        if self.ww_id:
-            ww: WakeWordDefinition = get_wakeword_definition(self.ww_id)
-            if ww:
-                default_ww_cfg = ww.ww_config
-                default_ww = ww.name
-
         return {
             "uuid": self.uuid,
             "token": self.token,
@@ -300,10 +286,8 @@ def serialize(self):
             "system_unit": self.system_unit,
             "lang": self.lang or _cfg.get("lang") or "en-us",
             "location": self.location_json,
-            "default_tts": default_tts,
-            "default_tts_cfg": default_tts_cfg,
-            "default_ww": default_ww,
-            "default_ww_cfg": default_ww_cfg
+            "voice_id": self.voice_id,
+            "ww_id": self.ww_id
         }
 
 

diff --git a/ovos_local_backend/ovos_backend.conf b/ovos_local_backend/ovos_backend.conf
@@ -29,10 +29,7 @@
     }
   },
 
-  "stt": {
-    "module": "ovos-stt-plugin-server",
-    "ovos-stt-plugin-server": {"url": "https://stt.openvoiceos.org/stt"}
-  },
+  "stt_servers": ["https://stt.openvoiceos.org/stt"],
 
   "server": {
     "admin_key": "",