diff --git a/GUIs/qt/audio_visualizer.py b/GUIs/qt/audio_visualizer.py index cbe742f..5d8e55f 100644 --- a/GUIs/qt/audio_visualizer.py +++ b/GUIs/qt/audio_visualizer.py @@ -121,6 +121,7 @@ def update(self): update the mesh and shift the noise each time """ wf_data = self.stream.read(self.CHUNK) + print(type(wf_data)) verts, faces, colors = self.mesh(offset=self.offset, wf_data=wf_data) self.mesh1.setMeshData(vertexes=verts, faces=faces, faceColors=colors) diff --git a/learning_diary/may.md b/learning_diary/may.md index d5bd28c..aba5026 100644 --- a/learning_diary/may.md +++ b/learning_diary/may.md @@ -1,4 +1,13 @@ # MAY 2023 -## Automatic Speech Recognition +## Automatic Speech Recognition (ASR) +1. [**Tutorials — NVIDIA NeMo**](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/starthere/tutorials.html) +2. [**SpeechBrain: A PyTorch Speech Toolkit**](https://speechbrain.github.io/index.html) +3. [**Uberi/speech_recognition: Speech recognition module for Python, supporting several engines and APIs, online and offline. (github.com)**](https://github.com/Uberi/speech_recognition/tree/master) +4. [**mozilla/DeepSpeech: DeepSpeech is an open source embedded (offline, on-device) speech-to-text engine which can run in real time on devices ranging from a Raspberry Pi 4 to high power GPU servers. (github.com)**](https://github.com/mozilla/DeepSpeech) +5. [**Introducing Whisper (openai.com)**](https://openai.com/research/whisper) + +### Offline vs. Online + +The above links refers to open-source models which usually needs GPU to run properly in real-time. So, for a mid-budget laptop these models are too slow for real-time implementation (especially if ASR is not the only model that you want to run on the PC).