Skip to content

Commit

Permalink
Add stt example
Browse files Browse the repository at this point in the history
  • Loading branch information
chrismbirmingham committed Mar 31, 2021
1 parent 4ac63e9 commit efce11e
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 0 deletions.
20 changes: 20 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
deepspeech==0.9.3
PyAudio==0.2.11

gdown
numpy>=1.16.0
torch>=1.5
librosa>=0.5.1
Unidecode>=0.4.20
matplotlib
Pillow
flask
scipy
tqdm
soundfile
phonemizer
bokeh==1.4.0
inflect==5.3.0
sounddevice==0.4.1

rasa==2.4.2
2 changes: 2 additions & 0 deletions setup_stt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
wget -P models "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.pbmm"
wget -P models "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.scorer"
92 changes: 92 additions & 0 deletions stt_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@

import deepspeech
import numpy as np
import os
import pyaudio
import time

# Cuda for deepspeech is controlled at the pip package level
# pip install deepspeech-gpu

# DeepSpeech parameters
BEAM_WIDTH = 700
LM_ALPHA = 0.75
LM_BETA = 1.85

MODEL_FILE_PATH = os.path.join('models', 'deepspeech-0.9.3-models.pbmm')
SCORER_PATH = os.path.join('models', 'deepspeech-0.9.3-models.scorer')


class Transcriber:
def __init__(self, model):
self.model = model
self.model.enableExternalScorer(SCORER_PATH)
self.model.setScorerAlphaBeta(LM_ALPHA, LM_BETA)
self.model.setBeamWidth(BEAM_WIDTH)
# self.model.enableDecoderWithLM(LM_FILE_PATH, TRIE_FILE_PATH, LM_ALPHA, LM_BETA)

# Create a Streaming session
self.ds_stream = self.model.createStream()

# Encapsulate DeepSpeech audio feeding into a callback for PyAudio
self.text_so_far = ''
self.t_start = time.time()
self.t_wait = .5
self.final_text = None

def process_audio(self, in_data, frame_count, time_info, status):
data16 = np.frombuffer(in_data, dtype=np.int16)
self.ds_stream.feedAudioContent(data16)
text = self.ds_stream.intermediateDecode()
try:
if text != self.text_so_far:
if text not in ["i ", "he ", "the "]:
print('Interim text = {};'.format(text))
self.text_so_far = text
self.t_start = time.time()
elif text != '' and (time.time() - self.t_start > self.t_wait):
if text not in ["i ", "he ", "the "]:
print("Finishing stream")
text = self.ds_stream.finishStream()
print('Final text = {}.\n'.format(text))
self.final_text = text
self.ds_stream = self.model.createStream()
except Exception as e:
print(f"Text: '{text}'; So far: '{self.text_so_far}")
print(self.t_start)
raise e
return (in_data, pyaudio.paContinue)

def listen(self):
print("setting up to listen")
# Feed audio to deepspeech in a callback to PyAudio
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=1024,
stream_callback=self.process_audio
)

print('Please start speaking, when done press Ctrl-C ...')
self.stream.start_stream()
print("listening now")
return


if __name__ == '__main__':
# Make DeepSpeech Model
model = deepspeech.Model(MODEL_FILE_PATH)
stt = Transcriber(model)
stt.listen()
try:
while stt.stream.is_active():
time.sleep(0.05)
except KeyboardInterrupt:
# PyAudio
stt.stream.stop_stream()
stt.stream.close()
stt.audio.terminate()
print('Finished recording.')

0 comments on commit efce11e

Please sign in to comment.