kokoro tts added to dock and popout menu

steveseguin · Feb 23, 2025 · 6a6d45a · 6a6d45a
1 parent 1050a8c
commit 6a6d45a
Show file tree

Hide file tree

Showing 9 changed files with 84,078 additions and 19 deletions.
diff --git a/dock.html b/dock.html
@@ -3858,6 +3858,9 @@ <h3>Status</h3>
 			var GoogleAPIKey = urlParams.get("ttskey") || urlParams.get("googlettskey") || false;
 			var ElevenLabsKey = urlParams.get("elevenlabskey") || false;
 			var SpeechifyAPIKey = urlParams.get("speechifykey") || false;
+			var useKokoroTTS = urlParams.has("kokorotts") || urlParams.has("kokoro") || false;
+			var KokoroTTS = false;
+			var TextSplitterStream = null;
 
 			if (GoogleAPIKey || ElevenLabsKey || SpeechifyAPIKey) {
 				audio = document.createElement("audio");
@@ -3913,6 +3916,18 @@ <h3>Status</h3>
 				voiceName: false
 			};
 
+			var kokoroDownloadInProgress  = null;
+			var kokoroTtsInstance = null;
+			var kokoroSettings = {
+				rate: false,
+				voiceName: false,
+				model: "kokoro-82M-v1.0"
+			};
+
+			// Kokoro settings
+			kokoroSettings.speed = urlParams.has("korospeed") ? parseFloat(urlParams.get("korospeed")) || 1.0 : rate;
+			kokoroSettings.voiceName = urlParams.get("voicekokoro") || "af_aoede";
+
 			// Google Cloud settings
 			googleSettings.rate = urlParams.has("googlerate") ? parseFloat(urlParams.get("googlerate")) || 1 : rate;
 			googleSettings.pitch = urlParams.has("googlepitch") ? parseFloat(urlParams.get("googlepitch")) || 0 : 0;
@@ -3930,7 +3945,7 @@ <h3>Status</h3>
 			elevenLabsSettings.model = urlParams.get("elevenlabsmodel") || "eleven_multilingual_v2";
 
 			// Speechify settings
-			speechifySettings.speed = urlParams.has("speechifyspeed") ? parseFloat(urlParams.get("speechifyspeed")) || 1.0 : 1.0;
+			speechifySettings.speed = urlParams.has("speechifyspeed") ? parseFloat(urlParams.get("speechifyspeed")) || 1.0 : rate;
 			speechifySettings.model = urlParams.get("speechifymodel") || 'simba-english';
 			speechifySettings.voiceName = urlParams.get("voicespeechify") || false;
 
@@ -4146,7 +4161,14 @@ <h3>Status</h3>
 					//text = text.replace(/https?:\/\/[^\s]+/g, 'Link');
 				}
 
-				if (GoogleAPIKey) {
+				 if (useKokoroTTS) {
+					if (!premiumQueueActive) {
+						kokoroTTS(text);
+					} else {
+						premiumQueueTTS.push(text);
+					}
+					return;
+				} else if (GoogleAPIKey) {
 					if (!premiumQueueActive) {
 						googleTTS(text);
 					} else {
@@ -4291,6 +4313,180 @@ <h3>Status</h3>
 					ele.title = "Text-to-speech — 🔊 Start reading incoming messages out-loud with text-to-speech";
 				}
 			}
+
+			async function initKokoro() {
+				if (kokoroDownloadInProgress) return false;
+
+				if (!KokoroTTS) {
+
+					async function openDB() {
+						return new Promise((resolve, reject) => {
+							const request = indexedDB.open('kokoroTTS', 1);
+							request.onerror = () => reject(request.error);
+							request.onsuccess = () => resolve(request.result);
+							request.onupgradeneeded = (event) => {
+								const db = event.target.result;
+								if (!db.objectStoreNames.contains('models')) {
+									db.createObjectStore('models');
+								}
+							};
+						});
+					}
+
+					async function getCachedModel() {
+						const db = await openDB();
+						return new Promise((resolve, reject) => {
+							const transaction = db.transaction('models', 'readonly');
+							const store = transaction.objectStore('models');
+							const request = store.get('kokoro-82M-v1.0');
+							request.onerror = () => reject(request.error);
+							request.onsuccess = () => resolve(request.result);
+						});
+					}
+
+					async function cacheModel(modelData) {
+						const db = await openDB();
+						return new Promise((resolve, reject) => {
+							const transaction = db.transaction('models', 'readwrite');
+							const store = transaction.objectStore('models');
+							const request = store.put(modelData, 'kokoro-82M-v1.0');
+							request.onerror = () => reject(request.error);
+							request.onsuccess = () => resolve();
+						});
+					}
+
+					try {
+						kokoroDownloadInProgress = true;
+						console.log("Loading Kokoro dependencies...");
+						const module = window.location.href.startsWith("chrome-extension://") ? await import('./thirdparty/kokoro-bundle.es.ext.js') : await import('./thirdparty/kokoro-bundle.es.js');
+						KokoroTTS = module.KokoroTTS;
+						TextSplitterStream = module.TextSplitterStream;
+						const detectWebGPU = module.detectWebGPU;
+
+						// Initialize IndexedDB handling
+						const DB_NAME = 'kokoroTTS';
+						const STORE_NAME = 'models';
+						const MODEL_KEY = 'kokoro-82M-v1.0';
+
+						const device = (await detectWebGPU()) ? "webgpu" : "wasm";
+						console.log("Using device:", device);
+
+						// Check cache first
+						console.log("Checking cache for model...");
+						let modelData = await getCachedModel();
+
+						if (!modelData) {
+							console.log("Downloading model...");
+							const modelUrl = 'https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX/resolve/main/onnx/model.onnx';
+							const response = await fetch(modelUrl);
+							const total = +response.headers.get('Content-Length');
+							let loaded = 0;
+
+							const reader = response.body.getReader();
+							const chunks = [];
+
+							while (true) {
+								const {done, value} = await reader.read();
+								if (done) break;
+
+								chunks.push(value);
+								loaded += value.length;
+
+								const percentage = (loaded / total) * 100;
+								console.log(`Downloading model: ${percentage.toFixed(1)}%`);
+							}
+
+							const modelBlob = new Blob(chunks);
+							modelData = new Uint8Array(await modelBlob.arrayBuffer());
+
+							console.log("Caching model...");
+							await cacheModel(modelData);
+						} else {
+							console.log("Loading model from cache");
+						}
+
+						console.log("Initializing Kokoro TTS...");
+						const customLoadFn = async () => modelData;
+						kokoroTtsInstance = await KokoroTTS.from_pretrained(
+							"onnx-community/Kokoro-82M-v1.0-ONNX",
+							{
+								dtype: device === "wasm" ? "q8" : "fp32",
+								device,
+								load_fn: customLoadFn
+							}
+						);
+
+						console.log("Kokoro TTS ready!");
+						kokoroDownloadInProgress = false;
+						return true;
+					} catch (error) {
+						console.error('Failed to initialize Kokoro:', error);
+						kokoroDownloadInProgress = false;
+						return false;
+					}
+				}
+				return true;
+			}
+
+			if (useKokoroTTS){
+				try {
+					initKokoro();
+				} catch(e){
+					console.error("Failed to load Kokoro TTS",e);
+				}
+			}
+
+			async function kokoroTTS(text) {
+				try {
+					if (!kokoroTtsInstance) {
+						const initialized = await initKokoro();
+						if (!initialized) {
+							finishedAudio();
+							return;
+						}
+					}
+
+					premiumQueueActive = true;
+					const streamer = new TextSplitterStream();
+					streamer.push(text);
+					streamer.close();
+
+					const audioElement = document.createElement("audio");
+					audioElement.onended = finishedAudio;
+
+					const stream = kokoroTtsInstance.stream(streamer, { 
+						voice: kokoroSettings.voiceName || Object.keys(kokoroTtsInstance.voices)[0],
+						speed: kokoroSettings.speed,
+						streamAudio: false 
+					});
+
+					for await (const { audio } of stream) {
+						if (!audio) {
+							finishedAudio();
+							return;
+						}
+
+						const audioBlob = audio.toBlob();
+						audioElement.src = URL.createObjectURL(audioBlob);
+						if (volume) audioElement.volume = volume;
+
+						try {
+							if (audioContext.state === 'suspended') {
+								await audioContext.resume();
+							}
+							await audioElement.play();
+						} catch (e) {
+							finishedAudio();
+							console.error(e);
+							errorlog("REMEMBER TO CLICK THE PAGE FIRST - audio won't play until you do");
+						}
+					}
+				} catch (e) {
+					console.error("Kokoro TTS error:", e);
+					finishedAudio();
+				}
+			}
+
 			function ElevenLabsTTS(tts) {
 				try {
 					premiumQueueActive = true;

diff --git a/featured.html b/featured.html
@@ -754,6 +754,16 @@
 				return false;
 			};
 
+			function getById(id) {
+				// js helper
+				var el = document.getElementById(id);
+				if (!el) {
+					console.log("couldn't find "+id);
+					el = document.createElement("span"); // create a fake element
+				}
+				return el;
+			}
+
 			async function fetchWithTimeout(URL, timeout = 8000) {
 				// ref: https://dmitripavlutin.com/timeout-fetch-request/
 				try {
@@ -1339,6 +1349,53 @@
 						});
 				}
 			}
+
+			function updateAudioSource(newUrl) {
+				var audioElement = getById("testtone");
+				var sources = audioElement.getElementsByTagName("source");
+				var extension = newUrl.split(".").pop().toLowerCase();
+				var mimeType;
+
+				switch (extension) {
+					case "mp3":
+						mimeType = "audio/mpeg";
+						break;
+					case "wav":
+						mimeType = "audio/wav";
+						break;
+					case "ogg":
+						mimeType = "audio/ogg";
+						break;
+					case "aac":
+					case "m4a":
+						mimeType = "audio/aac";
+						break;
+					case "opus":
+						mimeType = "audio/opus";
+						break;
+					case "flac":
+						mimeType = "audio/flac";
+						break;
+					case "webm":
+						mimeType = "audio/webm";
+						break;
+					default:
+						console.error("Unsupported file type:", extension);
+						return;
+				}
+				if (sources.length === 1) {
+					sources[0].src = newUrl;
+					sources[0].type = mimeType;
+				} else {
+					audioElement.innerHTML = "";
+					var newSource = document.createElement("source");
+					newSource.src = newUrl;
+					newSource.type = mimeType;
+					audioElement.appendChild(newSource);
+				}
+				audioElement.load();
+			}
+
 			if (urlParams.get("custombeep")) {
 				updateAudioSource(urlParams.get("custombeep"));
 			}

diff --git a/manifest.json b/manifest.json
@@ -2,7 +2,7 @@
   "name": "Social Stream Ninja",
   "description": "Powerful tooling to engage live chat on Youtube, Twitch, Zoom, and more.",
   "manifest_version": 3,
-  "version": "3.12.22",
+  "version": "3.12.23",
   "homepage_url": "http://socialstream.ninja/",
   "icons": {
     "128": "icons/icon-128.png"
@@ -11,7 +11,7 @@
     "service_worker": "service_worker.js"
   },
   "content_security_policy": {
-    "extension_pages": "script-src 'self'; object-src 'self'"
+    "extension_pages": "script-src 'self' 'wasm-unsafe-eval'; object-src 'self'"
   },
   "permissions": [
     "webNavigation",
@@ -32,7 +32,7 @@
 	"https://*.webinargeek.com/watch/*",
     "https://chaturbate.com/*/",
     "https://cherry.tv/*",				
-	"https://*.online.church",
+	"https://*.online.church/",
     "https://beamstream.gg/*/chat",
     "https://socialstream.ninja/sources/websocket/*",
     "https://socialstream.ninja/websocket/*",

diff --git a/popup.html b/popup.html
@@ -1684,6 +1684,7 @@ <h4>More TTS options</h4>
 							<h3>Text-to-Speech Service Provider</h3>
 							<select id="ttsProvider" data-optionsetting="ttsProvider" class="textInput">
 								<option selected value="system">System TTS</option>
+								<option value="kokoro">Kokoro (webGPU)</option>
 								<option value="elevenlabs">ElevenLabs</option>
 								<option value="google">Google Cloud</option>
 								<option value="speechify">Speechify</option>
@@ -1726,6 +1727,30 @@ <h4>Built-in System TTS Options</h4>
 								</span>
 							</div>
 						</div>
+						<!-- Kokoro TTS Options -->
+						<div class="tts-provider-options options_group" id="kokoroTTS">
+							<h4>GPU-powered TTS Options</h4>
+							<i>note: this free locally-running TTS needs a power computer to run well.</i>
+							<br /><br />
+							<div>
+								🎭 Voice to use: 
+								<select id="kokoroVoiceSelect" data-optionparam1="voicekokoro" title="Select the Kokoro TTS Voice">
+									<option value="af_heart">Heart (American Female)</option><option value="af_alloy">Alloy (American Female)</option><option selected value="af_aoede">Aoede (American Female)</option><option value="af_bella">Bella (American Female)</option><option value="af_jessica">Jessica (American Female)</option><option value="af_kore">Kore (American Female)</option><option value="af_nicole">Nicole (American Female)</option><option value="af_nova">Nova (American Female)</option><option value="af_river">River (American Female)</option><option value="af_sarah">Sarah (American Female)</option><option value="af_sky">Sky (American Female)</option><option value="am_adam">Adam (American Male)</option><option value="am_echo">Echo (American Male)</option><option value="am_eric">Eric (American Male)</option><option value="am_fenrir">Fenrir (American Male)</option><option value="am_liam">Liam (American Male)</option><option value="am_michael">Michael (American Male)</option><option value="am_onyx">Onyx (American Male)</option><option value="am_puck">Puck (American Male)</option><option value="am_santa">Santa (American Male)</option><option value="bf_emma">Emma (British Female)</option><option value="bf_isabella">Isabella (British Female)</option><option value="bm_george">George (British Male)</option><option value="bm_lewis">Lewis (British Male)</option><option value="bf_alice">Alice (British Female)</option><option value="bf_lily">Lily (British Female)</option><option value="bm_daniel">Daniel (British Male)</option><option value="bm_fable">Fable (British Male)</option>
+								</select>
+							</div>
+							<br />
+							<div>
+								<label class="switch">
+									<input type="checkbox" data-param1="kokorospeed" />
+									<span class="slider round"></span>
+								</label>
+								<span data-translate="kokoro-tts-speed">
+									⏩ Speaking rate
+									<input type="number" style="max-width: 6ch;" step="0.1" max="10" min="0.1" value="1.0" 
+										size="5" data-numbersetting="kokorospeed" />
+								</span>
+							</div>
+						</div>
 						<!-- ElevenLabs TTS Options -->
 						<div class="tts-provider-options options_group hidden" id="elevenlabsTTS">
 							<h4>ElevenLabs TTS Options</h4>
@@ -2909,7 +2934,7 @@ <h2 class="single_message_title">
 								<br />
 								<br />
                                 <div>
-                                    <select id="languageSelect2" data-optionparam2="lang" data-del2="lang,voice" title="Select the default text to speech language"> </select>								 
+                                    <select id="languageSelect2" data-optionparam2="lang" data-del2="lang,voice" title="Select the default text to speech language"> </select>
                                 </div>
                                 <div>
                                     <label class="switch">