Skip to content

Commit

Permalink
kokoro tts added to dock and popout menu
Browse files Browse the repository at this point in the history
  • Loading branch information
steveseguin committed Feb 23, 2025
1 parent 1050a8c commit 6a6d45a
Show file tree
Hide file tree
Showing 9 changed files with 84,078 additions and 19 deletions.
200 changes: 198 additions & 2 deletions dock.html
Original file line number Diff line number Diff line change
Expand Up @@ -3858,6 +3858,9 @@ <h3>Status</h3>
var GoogleAPIKey = urlParams.get("ttskey") || urlParams.get("googlettskey") || false;
var ElevenLabsKey = urlParams.get("elevenlabskey") || false;
var SpeechifyAPIKey = urlParams.get("speechifykey") || false;
var useKokoroTTS = urlParams.has("kokorotts") || urlParams.has("kokoro") || false;
var KokoroTTS = false;
var TextSplitterStream = null;

if (GoogleAPIKey || ElevenLabsKey || SpeechifyAPIKey) {
audio = document.createElement("audio");
Expand Down Expand Up @@ -3913,6 +3916,18 @@ <h3>Status</h3>
voiceName: false
};

var kokoroDownloadInProgress = null;
var kokoroTtsInstance = null;
var kokoroSettings = {
rate: false,
voiceName: false,
model: "kokoro-82M-v1.0"
};

// Kokoro settings
kokoroSettings.speed = urlParams.has("korospeed") ? parseFloat(urlParams.get("korospeed")) || 1.0 : rate;
kokoroSettings.voiceName = urlParams.get("voicekokoro") || "af_aoede";

// Google Cloud settings
googleSettings.rate = urlParams.has("googlerate") ? parseFloat(urlParams.get("googlerate")) || 1 : rate;
googleSettings.pitch = urlParams.has("googlepitch") ? parseFloat(urlParams.get("googlepitch")) || 0 : 0;
Expand All @@ -3930,7 +3945,7 @@ <h3>Status</h3>
elevenLabsSettings.model = urlParams.get("elevenlabsmodel") || "eleven_multilingual_v2";

// Speechify settings
speechifySettings.speed = urlParams.has("speechifyspeed") ? parseFloat(urlParams.get("speechifyspeed")) || 1.0 : 1.0;
speechifySettings.speed = urlParams.has("speechifyspeed") ? parseFloat(urlParams.get("speechifyspeed")) || 1.0 : rate;
speechifySettings.model = urlParams.get("speechifymodel") || 'simba-english';
speechifySettings.voiceName = urlParams.get("voicespeechify") || false;

Expand Down Expand Up @@ -4146,7 +4161,14 @@ <h3>Status</h3>
//text = text.replace(/https?:\/\/[^\s]+/g, 'Link');
}

if (GoogleAPIKey) {
if (useKokoroTTS) {
if (!premiumQueueActive) {
kokoroTTS(text);
} else {
premiumQueueTTS.push(text);
}
return;
} else if (GoogleAPIKey) {
if (!premiumQueueActive) {
googleTTS(text);
} else {
Expand Down Expand Up @@ -4291,6 +4313,180 @@ <h3>Status</h3>
ele.title = "Text-to-speech — 🔊 Start reading incoming messages out-loud with text-to-speech";
}
}

async function initKokoro() {
if (kokoroDownloadInProgress) return false;

if (!KokoroTTS) {

async function openDB() {
return new Promise((resolve, reject) => {
const request = indexedDB.open('kokoroTTS', 1);
request.onerror = () => reject(request.error);
request.onsuccess = () => resolve(request.result);
request.onupgradeneeded = (event) => {
const db = event.target.result;
if (!db.objectStoreNames.contains('models')) {
db.createObjectStore('models');
}
};
});
}

async function getCachedModel() {
const db = await openDB();
return new Promise((resolve, reject) => {
const transaction = db.transaction('models', 'readonly');
const store = transaction.objectStore('models');
const request = store.get('kokoro-82M-v1.0');
request.onerror = () => reject(request.error);
request.onsuccess = () => resolve(request.result);
});
}

async function cacheModel(modelData) {
const db = await openDB();
return new Promise((resolve, reject) => {
const transaction = db.transaction('models', 'readwrite');
const store = transaction.objectStore('models');
const request = store.put(modelData, 'kokoro-82M-v1.0');
request.onerror = () => reject(request.error);
request.onsuccess = () => resolve();
});
}

try {
kokoroDownloadInProgress = true;
console.log("Loading Kokoro dependencies...");
const module = window.location.href.startsWith("chrome-extension://") ? await import('./thirdparty/kokoro-bundle.es.ext.js') : await import('./thirdparty/kokoro-bundle.es.js');
KokoroTTS = module.KokoroTTS;
TextSplitterStream = module.TextSplitterStream;
const detectWebGPU = module.detectWebGPU;

// Initialize IndexedDB handling
const DB_NAME = 'kokoroTTS';
const STORE_NAME = 'models';
const MODEL_KEY = 'kokoro-82M-v1.0';

const device = (await detectWebGPU()) ? "webgpu" : "wasm";
console.log("Using device:", device);

// Check cache first
console.log("Checking cache for model...");
let modelData = await getCachedModel();

if (!modelData) {
console.log("Downloading model...");
const modelUrl = 'https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX/resolve/main/onnx/model.onnx';
const response = await fetch(modelUrl);
const total = +response.headers.get('Content-Length');
let loaded = 0;

const reader = response.body.getReader();
const chunks = [];

while (true) {
const {done, value} = await reader.read();
if (done) break;

chunks.push(value);
loaded += value.length;

const percentage = (loaded / total) * 100;
console.log(`Downloading model: ${percentage.toFixed(1)}%`);
}

const modelBlob = new Blob(chunks);
modelData = new Uint8Array(await modelBlob.arrayBuffer());

console.log("Caching model...");
await cacheModel(modelData);
} else {
console.log("Loading model from cache");
}

console.log("Initializing Kokoro TTS...");
const customLoadFn = async () => modelData;
kokoroTtsInstance = await KokoroTTS.from_pretrained(
"onnx-community/Kokoro-82M-v1.0-ONNX",
{
dtype: device === "wasm" ? "q8" : "fp32",
device,
load_fn: customLoadFn
}
);

console.log("Kokoro TTS ready!");
kokoroDownloadInProgress = false;
return true;
} catch (error) {
console.error('Failed to initialize Kokoro:', error);
kokoroDownloadInProgress = false;
return false;
}
}
return true;
}

if (useKokoroTTS){
try {
initKokoro();
} catch(e){
console.error("Failed to load Kokoro TTS",e);
}
}

async function kokoroTTS(text) {
try {
if (!kokoroTtsInstance) {
const initialized = await initKokoro();
if (!initialized) {
finishedAudio();
return;
}
}

premiumQueueActive = true;
const streamer = new TextSplitterStream();
streamer.push(text);
streamer.close();

const audioElement = document.createElement("audio");
audioElement.onended = finishedAudio;

const stream = kokoroTtsInstance.stream(streamer, {
voice: kokoroSettings.voiceName || Object.keys(kokoroTtsInstance.voices)[0],
speed: kokoroSettings.speed,
streamAudio: false
});

for await (const { audio } of stream) {
if (!audio) {
finishedAudio();
return;
}

const audioBlob = audio.toBlob();
audioElement.src = URL.createObjectURL(audioBlob);
if (volume) audioElement.volume = volume;

try {
if (audioContext.state === 'suspended') {
await audioContext.resume();
}
await audioElement.play();
} catch (e) {
finishedAudio();
console.error(e);
errorlog("REMEMBER TO CLICK THE PAGE FIRST - audio won't play until you do");
}
}
} catch (e) {
console.error("Kokoro TTS error:", e);
finishedAudio();
}
}

function ElevenLabsTTS(tts) {
try {
premiumQueueActive = true;
Expand Down
57 changes: 57 additions & 0 deletions featured.html
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,16 @@
return false;
};

function getById(id) {
// js helper
var el = document.getElementById(id);
if (!el) {
console.log("couldn't find "+id);
el = document.createElement("span"); // create a fake element
}
return el;
}

async function fetchWithTimeout(URL, timeout = 8000) {
// ref: https://dmitripavlutin.com/timeout-fetch-request/
try {
Expand Down Expand Up @@ -1339,6 +1349,53 @@
});
}
}

function updateAudioSource(newUrl) {
var audioElement = getById("testtone");
var sources = audioElement.getElementsByTagName("source");
var extension = newUrl.split(".").pop().toLowerCase();
var mimeType;

switch (extension) {
case "mp3":
mimeType = "audio/mpeg";
break;
case "wav":
mimeType = "audio/wav";
break;
case "ogg":
mimeType = "audio/ogg";
break;
case "aac":
case "m4a":
mimeType = "audio/aac";
break;
case "opus":
mimeType = "audio/opus";
break;
case "flac":
mimeType = "audio/flac";
break;
case "webm":
mimeType = "audio/webm";
break;
default:
console.error("Unsupported file type:", extension);
return;
}
if (sources.length === 1) {
sources[0].src = newUrl;
sources[0].type = mimeType;
} else {
audioElement.innerHTML = "";
var newSource = document.createElement("source");
newSource.src = newUrl;
newSource.type = mimeType;
audioElement.appendChild(newSource);
}
audioElement.load();
}

if (urlParams.get("custombeep")) {
updateAudioSource(urlParams.get("custombeep"));
}
Expand Down
6 changes: 3 additions & 3 deletions manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "Social Stream Ninja",
"description": "Powerful tooling to engage live chat on Youtube, Twitch, Zoom, and more.",
"manifest_version": 3,
"version": "3.12.22",
"version": "3.12.23",
"homepage_url": "http://socialstream.ninja/",
"icons": {
"128": "icons/icon-128.png"
Expand All @@ -11,7 +11,7 @@
"service_worker": "service_worker.js"
},
"content_security_policy": {
"extension_pages": "script-src 'self'; object-src 'self'"
"extension_pages": "script-src 'self' 'wasm-unsafe-eval'; object-src 'self'"
},
"permissions": [
"webNavigation",
Expand All @@ -32,7 +32,7 @@
"https://*.webinargeek.com/watch/*",
"https://chaturbate.com/*/",
"https://cherry.tv/*",
"https://*.online.church",
"https://*.online.church/",
"https://beamstream.gg/*/chat",
"https://socialstream.ninja/sources/websocket/*",
"https://socialstream.ninja/websocket/*",
Expand Down
27 changes: 26 additions & 1 deletion popup.html
Original file line number Diff line number Diff line change
Expand Up @@ -1684,6 +1684,7 @@ <h4>More TTS options</h4>
<h3>Text-to-Speech Service Provider</h3>
<select id="ttsProvider" data-optionsetting="ttsProvider" class="textInput">
<option selected value="system">System TTS</option>
<option value="kokoro">Kokoro (webGPU)</option>
<option value="elevenlabs">ElevenLabs</option>
<option value="google">Google Cloud</option>
<option value="speechify">Speechify</option>
Expand Down Expand Up @@ -1726,6 +1727,30 @@ <h4>Built-in System TTS Options</h4>
</span>
</div>
</div>
<!-- Kokoro TTS Options -->
<div class="tts-provider-options options_group" id="kokoroTTS">
<h4>GPU-powered TTS Options</h4>
<i>note: this free locally-running TTS needs a power computer to run well.</i>
<br /><br />
<div>
🎭 Voice to use:
<select id="kokoroVoiceSelect" data-optionparam1="voicekokoro" title="Select the Kokoro TTS Voice">
<option value="af_heart">Heart (American Female)</option><option value="af_alloy">Alloy (American Female)</option><option selected value="af_aoede">Aoede (American Female)</option><option value="af_bella">Bella (American Female)</option><option value="af_jessica">Jessica (American Female)</option><option value="af_kore">Kore (American Female)</option><option value="af_nicole">Nicole (American Female)</option><option value="af_nova">Nova (American Female)</option><option value="af_river">River (American Female)</option><option value="af_sarah">Sarah (American Female)</option><option value="af_sky">Sky (American Female)</option><option value="am_adam">Adam (American Male)</option><option value="am_echo">Echo (American Male)</option><option value="am_eric">Eric (American Male)</option><option value="am_fenrir">Fenrir (American Male)</option><option value="am_liam">Liam (American Male)</option><option value="am_michael">Michael (American Male)</option><option value="am_onyx">Onyx (American Male)</option><option value="am_puck">Puck (American Male)</option><option value="am_santa">Santa (American Male)</option><option value="bf_emma">Emma (British Female)</option><option value="bf_isabella">Isabella (British Female)</option><option value="bm_george">George (British Male)</option><option value="bm_lewis">Lewis (British Male)</option><option value="bf_alice">Alice (British Female)</option><option value="bf_lily">Lily (British Female)</option><option value="bm_daniel">Daniel (British Male)</option><option value="bm_fable">Fable (British Male)</option>
</select>
</div>
<br />
<div>
<label class="switch">
<input type="checkbox" data-param1="kokorospeed" />
<span class="slider round"></span>
</label>
<span data-translate="kokoro-tts-speed">
⏩ Speaking rate
<input type="number" style="max-width: 6ch;" step="0.1" max="10" min="0.1" value="1.0"
size="5" data-numbersetting="kokorospeed" />
</span>
</div>
</div>
<!-- ElevenLabs TTS Options -->
<div class="tts-provider-options options_group hidden" id="elevenlabsTTS">
<h4>ElevenLabs TTS Options</h4>
Expand Down Expand Up @@ -2909,7 +2934,7 @@ <h2 class="single_message_title">
<br />
<br />
<div>
<select id="languageSelect2" data-optionparam2="lang" data-del2="lang,voice" title="Select the default text to speech language"> </select>
<select id="languageSelect2" data-optionparam2="lang" data-del2="lang,voice" title="Select the default text to speech language"> </select>
</div>
<div>
<label class="switch">
Expand Down
Loading

0 comments on commit 6a6d45a

Please sign in to comment.