Skip to content
This repository has been archived by the owner on Nov 18, 2024. It is now read-only.

Commit

Permalink
refactor/stt - deprecate OPM -> ovos-stt-server only (#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl authored Jun 9, 2023
1 parent 828ea23 commit a0fdbe2
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 65 deletions.
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ pip install ovos-local-backend
- [ovos-backend-manager](https://github.com/OpenVoiceOS/ovos-backend-manager) - graphical interface to manage all things backend
- [ovos-stt-plugin-selene](https://github.com/OpenVoiceOS/ovos-stt-plugin-selene) - stt plugin for selene/local backend (DEPRECATED)

You can use this backend as a STT server proxy via [ovos-stt-plugin-server](https://github.com/OpenVoiceOS/ovos-stt-plugin-server), eg `https://your_backend.org/stt`


## Configuration

Expand All @@ -47,13 +49,10 @@ configure backend by editing/creating ```~/.config/mycroft/ovos_backend.conf```
"timezone": {"...": "..."}
},

"stt": {
"module": "ovos-stt-plugin-server",
"ovos-stt-plugin-server": {"url": "https://stt.openvoiceos.org/stt"}
},
"stt_servers": ["https://stt.openvoiceos.org/stt"],

"server": {
"admin_key": "",
"admin_key": "leave empty to DISABLE admin api",
"port": 6712,
"database": "sqlite:////home/user/.local/share/ovos_backend.db",
"skip_auth": false,
Expand Down
77 changes: 53 additions & 24 deletions ovos_local_backend/backend/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,45 +14,74 @@
from tempfile import NamedTemporaryFile

import flask
from speech_recognition import Recognizer, AudioFile
import requests
from ovos_config import Configuration
from speech_recognition import Recognizer, AudioFile, AudioData

from ovos_local_backend.backend import API_VERSION
from ovos_local_backend.backend.decorators import noindex, requires_auth, requires_opt_in
from ovos_config import Configuration
from ovos_local_backend.database import add_stt_recording
from ovos_plugin_manager.stt import OVOSSTTFactory

recognizer = Recognizer()
engine = OVOSSTTFactory.create(Configuration()["stt"])

def transcribe(audio: AudioData, lang: str):
urls = Configuration().get("stt_servers") or ["https://stt.openvoiceos.org/stt"]

for url in urls:
try:
response = requests.post(url, data=audio.get_wav_data(),
headers={"Content-Type": "audio/wav"},
params={"lang": lang})
if response:
return response.text
except:
continue
return ""


def bytes2audiodata(data: bytes):
recognizer = Recognizer()
with NamedTemporaryFile() as fp:
fp.write(data)
with AudioFile(fp.name) as source:
audio = recognizer.record(source)
return audio

@requires_opt_in
def save_stt_recording(uuid, audio, utterance):
audio_bytes = audio.get_wav_data()
add_stt_recording(uuid, audio_bytes, utterance)

@requires_opt_in # this decorator ensures the uuid opted-in
def save_stt_recording(uuid: str, audio: AudioData, utterance: str):
allowed = Configuration()["listener"].get("record_utterances") or \
Configuration()["listener"].get("save_utterances") # backwards compat
if allowed:
audio_bytes = audio.get_wav_data()
add_stt_recording(uuid, audio_bytes, utterance)


def get_stt_routes(app):
# makes personal backend a valid entry in ovos-stt-plugin-server
# DOES NOT save data
@app.route("/stt", methods=['POST'])
@noindex
def stt_public_server():
audio_bytes = flask.request.data
lang = str(flask.request.args.get("lang", "en-us"))
audio = bytes2audiodata(audio_bytes)
utterance = transcribe(audio, lang)
return json.dumps([utterance])

# DEPRECATED - compat for old selene plugin
# if opt-in saves recordings
@app.route("/" + API_VERSION + "/stt", methods=['POST'])
@noindex
@requires_auth
def stt():
flac_audio = flask.request.data
lang = str(flask.request.args.get("lang", "en-us"))
with NamedTemporaryFile() as fp:
fp.write(flac_audio)
with AudioFile(fp.name) as source:
audio = recognizer.record(source) # read the entire audio file
try:
utterance = engine.execute(audio, language=lang)
except:
utterance = ""

allowed = Configuration()["listener"].get("record_utterances") or \
Configuration()["listener"].get("save_utterances") # backwards compat
if allowed:
auth = flask.request.headers.get('Authorization', '').replace("Bearer ", "")
uuid = auth.split(":")[-1] # this split is only valid here, not selene
save_stt_recording(uuid, audio, utterance)
audio = bytes2audiodata(flac_audio)
utterance = transcribe(audio, lang)

auth = flask.request.headers.get('Authorization', '').replace("Bearer ", "")
uuid = auth.split(":")[-1] # this split is only valid here, not selene
save_stt_recording(uuid, audio, utterance)

return json.dumps([utterance])

Expand Down
48 changes: 16 additions & 32 deletions ovos_local_backend/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,18 +224,20 @@ def deserialize(data):

lang = data.get("lang") or _cfg.get("lang") or "en-us"

voice_id = None
tts_module = data.get("default_tts")
tts_config = data.get("default_tts_cfg") or {}
if tts_module:
voice_id = get_voice_id(tts_module, lang, tts_config)

ww_id = None
ww_name = data.get("default_ww")
ww_config = data.get("default_ww_cfg") or {}
ww_module = ww_config.get("module")
if ww_module:
ww_id = get_ww_id(ww_module, ww_name, ww_config)
voice_id = data.get("voice_id")
if not voice_id:
tts_module = data.get("default_tts")
tts_config = data.get("default_tts_cfg") or {}
if tts_module:
voice_id = get_voice_id(tts_module, lang, tts_config)

ww_id = data.get("ww_id")
if not ww_id:
ww_name = data.get("default_ww")
ww_config = data.get("default_ww_cfg") or {}
ww_module = ww_config.get("module")
if ww_module:
ww_id = get_ww_id(ww_module, ww_name, ww_config)

loc = data.get("location") or _loc

Expand Down Expand Up @@ -271,22 +273,6 @@ def serialize(self):
_mail_cfg.get("recipient") or \
_mail_cfg.get("smtp", {}).get("username")

default_tts = None
default_tts_cfg = {}
if self.voice_id:
voice: VoiceDefinition = get_voice_definition(self.voice_id)
if voice:
default_tts_cfg = voice.tts_config
default_tts = voice.plugin

default_ww = None
default_ww_cfg = {}
if self.ww_id:
ww: WakeWordDefinition = get_wakeword_definition(self.ww_id)
if ww:
default_ww_cfg = ww.ww_config
default_ww = ww.name

return {
"uuid": self.uuid,
"token": self.token,
Expand All @@ -300,10 +286,8 @@ def serialize(self):
"system_unit": self.system_unit,
"lang": self.lang or _cfg.get("lang") or "en-us",
"location": self.location_json,
"default_tts": default_tts,
"default_tts_cfg": default_tts_cfg,
"default_ww": default_ww,
"default_ww_cfg": default_ww_cfg
"voice_id": self.voice_id,
"ww_id": self.ww_id
}


Expand Down
5 changes: 1 addition & 4 deletions ovos_local_backend/ovos_backend.conf
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@
}
},

"stt": {
"module": "ovos-stt-plugin-server",
"ovos-stt-plugin-server": {"url": "https://stt.openvoiceos.org/stt"}
},
"stt_servers": ["https://stt.openvoiceos.org/stt"],

"server": {
"admin_key": "",
Expand Down

0 comments on commit a0fdbe2

Please sign in to comment.