diff --git a/README.md b/README.md
index 3fea9c4..946a89b 100644
--- a/README.md
+++ b/README.md
@@ -23,18 +23,17 @@ This assistant can run offline on your local machine, and it respects your priva
 
 ![Settings](https://raw.githubusercontent.com/vietanhdev/llama-assistant/refs/heads/main/docs/custom-models.png)
 
-
 ## Supported Models
 
 - 📝 Text-only models:
-  - [Llama 3.2](https://github.com/facebookresearch/llama) - 1B, 3B (4/8-bit quantized)
-  - [Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF) (4-bit quantized)
+  - [Llama 3.2](https://github.com/facebookresearch/llama) - 1B, 3B (4/8-bit quantized).
+  - [Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF) (4-bit quantized).
   - And other models that [LlamaCPP](https://github.com/ggerganov/llama.cpp) supports via custom models. [See the list](https://github.com/ggerganov/llama.cpp).
 
 - 🖼️ Multimodal models:
-  - [Moondream2](https://huggingface.co/vikhyatk/moondream2)
-  - [MiniCPM-v2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf)
-  - [LLaVA 1.5/1.6](https://llava-vl.github.io/)
+  - [Moondream2](https://huggingface.co/vikhyatk/moondream2).
+  - [MiniCPM-v2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf).
+  - [LLaVA 1.5/1.6](https://llava-vl.github.io/).
   - Besides supported models, you can try other variants via custom models.
 
 ## TODO
@@ -45,7 +44,7 @@ This assistant can run offline on your local machine, and it respects your priva
 - [x] 📚 Support 5 other text models.
 - [x] 🖼️ Support 5 other multimodal models.
 - [x] ⚡ Streaming support for response.
-- [ ] 🎙️ Add offline STT support: WhisperCPP (WIP - [Experimental Code](llama_assistant/speech_recognition_whisper_experimental.py)).
+- [x] 🎙️ Add offline STT support: WhisperCPP.
 - [ ] 🧠 Knowledge database: Langchain or LlamaIndex?.
 - [ ] 🔌 Plugin system for extensibility.
 - [ ] 📰 News and weather updates.
@@ -59,11 +58,11 @@ This assistant can run offline on your local machine, and it respects your priva
 
 ## Features
 
-- 🎙️ Voice recognition for hands-free interaction
-- 💬 Natural language processing with Llama 3.2
-- 🖼️ Image analysis capabilities (TODO)
-- ⚡ Global hotkey for quick access (Cmd+Shift+Space on macOS)
-- 🎨 Customizable UI with adjustable transparency
+- 🎙️ Voice recognition for hands-free interaction.
+- 💬 Natural language processing with Llama 3.2.
+- 🖼️ Image analysis capabilities (TODO).
+- ⚡ Global hotkey for quick access (Cmd+Shift+Space on macOS).
+- 🎨 Customizable UI with adjustable transparency.
 
 **Note:** This project is a work in progress, and new features are being added regularly.
 
@@ -89,17 +88,17 @@ pip install pyaudio
 
 1. Clone the repository:
 
-   ```bash
-   git clone https://github.com/vietanhdev/llama-assistant.git
-   cd llama-assistant
-   ```
+```bash
+git clone https://github.com/vietanhdev/llama-assistant.git
+cd llama-assistant
+```
 
 2. Install the required dependencies:
 
-   ```bash
-   pip install -r requirements.txt
-   pip install pyaudio
-   ```
+```bash
+pip install -r requirements.txt
+pip install pyaudio
+```
 
 </details>
 
diff --git a/llama_assistant/llama_assistant.py b/llama_assistant/llama_assistant.py
index 02259fa..bd2c179 100644
--- a/llama_assistant/llama_assistant.py
+++ b/llama_assistant/llama_assistant.py
@@ -46,7 +46,7 @@
 from llama_assistant.custom_plaintext_editor import CustomPlainTextEdit
 from llama_assistant.global_hotkey import GlobalHotkey
 from llama_assistant.setting_dialog import SettingsDialog
-from llama_assistant.speech_recognition import SpeechRecognitionThread
+from llama_assistant.speech_recognition_thread import SpeechRecognitionThread
 from llama_assistant.utils import image_to_base64_data_uri, load_image
 from llama_assistant.model_handler import handler as model_handler
 from llama_assistant.icons import (
diff --git a/llama_assistant/speech_recognition.py b/llama_assistant/speech_recognition.py
deleted file mode 100644
index 2a0c0c0..0000000
--- a/llama_assistant/speech_recognition.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from PyQt6.QtCore import QThread, pyqtSignal
-import speech_recognition as sr
-
-
-class SpeechRecognitionThread(QThread):
-    finished = pyqtSignal(str)
-    error = pyqtSignal(str)
-
-    def __init__(self):
-        super().__init__()
-        self.recognizer = sr.Recognizer()
-        self.microphone = sr.Microphone()
-        self.stop_listening = False
-
-    def run(self):
-        with self.microphone as source:
-            self.recognizer.adjust_for_ambient_noise(source)
-            while not self.stop_listening:
-                try:
-                    audio = self.recognizer.listen(source, timeout=1, phrase_time_limit=10)
-                    text = self.recognizer.recognize_google(audio)
-                    self.finished.emit(text)
-                except sr.WaitTimeoutError:
-                    continue
-                except sr.UnknownValueError:
-                    self.error.emit("Could not understand audio")
-                except sr.RequestError as e:
-                    self.error.emit(f"Could not request results; {e}")
-
-    def stop(self):
-        self.stop_listening = True
diff --git a/llama_assistant/speech_recognition_thread.py b/llama_assistant/speech_recognition_thread.py
new file mode 100644
index 0000000..93cc753
--- /dev/null
+++ b/llama_assistant/speech_recognition_thread.py
@@ -0,0 +1,138 @@
+import pkgutil
+from pathlib import Path
+import datetime
+import os
+import re
+import requests
+
+from PyQt6.QtCore import QThread, pyqtSignal
+import speech_recognition as sr
+
+# patch whisper on file not find error
+# https://github.com/carloscdias/whisper-cpp-python/pull/12
+try:
+    import whisper_cpp_python
+except FileNotFoundError:
+    regex = r"(\"darwin\":\n\s*lib_ext = \")\.so(\")"
+    subst = "\\1.dylib\\2"
+
+    print("fixing and re-importing whisper_cpp_python...")
+    # load whisper_cpp_python and substitute .so with .dylib for darwin
+    package = pkgutil.get_loader("whisper_cpp_python")
+    whisper_path = Path(package.path)
+    whisper_cpp_py = whisper_path.parent.joinpath("whisper_cpp.py")
+    content = whisper_cpp_py.read_text()
+    result = re.sub(regex, subst, content, 0, re.MULTILINE)
+    whisper_cpp_py.write_text(result)
+
+    import whisper_cpp_python
+
+
+class SpeechRecognitionThread(QThread):
+    finished = pyqtSignal(str)
+    error = pyqtSignal(str)
+    WHISPER_THREADS = 4
+    WHISPER_LANGUAGE = "en"
+
+    def __init__(self):
+        super().__init__()
+        self.stop_listening = False
+
+        # Set up model path and download if necessary
+        self.model_dir = Path.home() / "llama-assistant" / "models" / "whisper-cpp"
+        self.model_path = self.model_dir / "ggml-base-fp16.bin"
+        self.download_model_if_needed()
+
+        # Initialize Whisper model
+        self.whisper = whisper_cpp_python.Whisper(
+            model_path=str(self.model_path), n_threads=self.WHISPER_THREADS
+        )
+
+        # Create temporary folder for audio files
+        self.tmp_audio_folder = Path.home() / "llama-assistant" / "tmp_audio"
+        self.tmp_audio_folder.mkdir(parents=True, exist_ok=True)
+
+    def download_model_if_needed(self):
+        if not self.model_path.exists():
+            print("Downloading Whisper model...")
+            self.model_dir.mkdir(parents=True, exist_ok=True)
+            url = "https://huggingface.co/danielus/ggml-whisper-models/resolve/main/ggml-base-fp16.bin"
+            response = requests.get(url)
+            with open(self.model_path, "wb") as f:
+                f.write(response.content)
+            print("Model downloaded successfully.")
+
+    def run(self):
+        recognizer = sr.Recognizer()
+        microphone = sr.Microphone()
+        try:
+            with microphone as source:
+                recognizer.adjust_for_ambient_noise(source)
+                while not self.stop_listening:
+                    try:
+                        recognizer.pause_threshold = 1
+                        audio_data = recognizer.listen(source, timeout=1, phrase_time_limit=5)
+
+                        # Save audio data to temporary file
+                        tmp_filepath = (
+                            self.tmp_audio_folder / f"temp_audio_{datetime.datetime.now()}.wav"
+                        )
+                        with open(tmp_filepath, "wb") as f:
+                            f.write(audio_data.get_wav_data())
+
+                        # Transcribe audio
+                        res = self.whisper.transcribe(
+                            file=tmp_filepath, language=self.WHISPER_LANGUAGE
+                        )
+                        transcription = res["text"]
+
+                        # Clean up transcription
+                        transcription = re.sub(r"\[.*\]", "", transcription)
+                        transcription = re.sub(r"\(.*\)", "", transcription)
+
+                        print(f"Transcription: {transcription}")
+                        os.remove(tmp_filepath)
+
+                        self.finished.emit(transcription)
+                    except sr.WaitTimeoutError:
+                        print("timeout")
+                        continue
+                    except sr.UnknownValueError:
+                        print("Could not understand audio")
+                        self.error.emit("Could not understand audio")
+                    except sr.RequestError as e:
+                        print(f"Could not request results; {e}")
+                        self.error.emit(f"Could not request results; {e}")
+        except KeyboardInterrupt:
+            print("Keyboard interrupt detected. Stopping speech recognition.")
+            self.stop()
+
+    def stop(self):
+        self.stop_listening = True
+
+
+# Demo code
+if __name__ == "__main__":
+    from PyQt6.QtWidgets import QApplication
+    import sys
+
+    app = QApplication(sys.argv)
+
+    def on_finished(text):
+        print(f"Transcription: {text}")
+        thread.stop()
+        app.quit()
+
+    def on_error(error_message):
+        print(f"Error: {error_message}")
+        thread.stop()
+        app.quit()
+
+    thread = SpeechRecognitionThread()
+    thread.finished.connect(on_finished)
+    thread.error.connect(on_error)
+
+    print("Starting speech recognition. Speak into your microphone...")
+    thread.start()
+
+    sys.exit(app.exec())
diff --git a/llama_assistant/speech_recognition_whisper_experimental.py b/llama_assistant/speech_recognition_whisper_experimental.py
deleted file mode 100644
index 1da51c4..0000000
--- a/llama_assistant/speech_recognition_whisper_experimental.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import threading
-import queue
-import pyaudio
-import wave
-import os
-from pathlib import Path
-import datetime
-from whisper_cpp_python import Whisper
-import re
-import requests
-
-
-class SpeechRecognition:
-    def __init__(self):
-        # Audio settings
-        self.RATE = 16000
-        self.CHUNK = self.RATE
-        self.NB_CHANNELS = 1
-        self.RECORD_SECONDS = 1
-
-        # Whisper settings
-        self.WHISPER_LANGUAGE = "en"
-        self.WHISPER_THREADS = 1
-
-        # Initialize queues
-        self.audio_queue = queue.Queue()
-        self.text_queue = queue.Queue()
-
-        # Set up model path and download if necessary
-        self.model_dir = Path.home() / "llama-assistant" / "models" / "whisper-cpp"
-        self.model_path = self.model_dir / "ggml-tiny-fp16.bin"
-        self.download_model_if_needed()
-
-        # Initialize Whisper model
-        self.whisper = Whisper(model_path=str(self.model_path), n_threads=self.WHISPER_THREADS)
-
-        # Initialize PyAudio
-        self.audio = pyaudio.PyAudio()
-        self.stream = self.audio.open(
-            format=pyaudio.paInt16,
-            channels=self.NB_CHANNELS,
-            rate=self.RATE,
-            input=True,
-            frames_per_buffer=self.CHUNK,
-        )
-
-        # Create temporary folder for audio files
-        self.tmp_audio_folder = Path("./tmp_audio")
-        if not self.tmp_audio_folder.exists():
-            self.tmp_audio_folder.mkdir()
-
-        self.stop_listening = False
-
-    def download_model_if_needed(self):
-        if not self.model_path.exists():
-            print("Downloading Whisper model...")
-            self.model_dir.mkdir(parents=True, exist_ok=True)
-            url = "https://huggingface.co/danielus/ggml-whisper-models/resolve/main/ggml-tiny-fp16.bin"
-            response = requests.get(url)
-            with open(self.model_path, "wb") as f:
-                f.write(response.content)
-            print("Model downloaded successfully.")
-
-    def listen(self):
-        while not self.stop_listening:
-            audio_data = self.stream.read(self.CHUNK)
-            self.audio_queue.put(audio_data)
-
-    def transcribe(self):
-        while not self.stop_listening:
-            if not self.audio_queue.empty():
-                audio_data = self.audio_queue.get()
-
-                # Save audio data to temporary file
-                tmp_filepath = f"./tmp_audio/output_{datetime.datetime.now()}.wav"
-                with wave.open(tmp_filepath, "wb") as wf:
-                    wf.setnchannels(self.NB_CHANNELS)
-                    wf.setsampwidth(2)  # 16-bit audio
-                    wf.setframerate(self.RATE)
-                    wf.writeframes(audio_data)
-
-                # Transcribe audio
-                res = self.whisper.transcribe(file=tmp_filepath, language=self.WHISPER_LANGUAGE)
-                transcription = res["text"]
-
-                # Clean up transcription
-                transcription = re.sub(r"\[.*\]", "", transcription)
-                transcription = re.sub(r"\(.*\)", "", transcription)
-
-                # Add transcription to text queue
-                self.text_queue.put(transcription)
-
-                # Cleanup
-                os.remove(tmp_filepath)
-
-    def start(self):
-        self.stop_listening = False
-        threading.Thread(target=self.listen, daemon=True).start()
-        threading.Thread(target=self.transcribe, daemon=True).start()
-
-    def stop(self):
-        self.stop_listening = True
-        self.stream.stop_stream()
-        self.stream.close()
-        self.audio.terminate()
-
-    def get_transcription(self):
-        if not self.text_queue.empty():
-            return self.text_queue.get()
-        return None
-
-
-# Example usage
-if __name__ == "__main__":
-    recognizer = SpeechRecognition()
-    recognizer.start()
-
-    print("Speech recognition started. Press Ctrl+C to stop.")
-    try:
-        while True:
-            transcription = recognizer.get_transcription()
-            if transcription:
-                print(f"Transcription: {transcription}")
-    except KeyboardInterrupt:
-        print("Stopping speech recognition...")
-        recognizer.stop()
diff --git a/pyproject.toml b/pyproject.toml
index 2b51ad2..6bb0dc4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llama-assistant"
-version = "0.1.26"
+version = "0.1.28"
 authors = [
     {name = "Viet-Anh Nguyen", email = "vietanh.dev@gmail.com"},
 ]