diff --git a/1000h-portal/package.json b/1000h-portal/package.json index d54e5ba8e..b1f05ab4c 100644 --- a/1000h-portal/package.json +++ b/1000h-portal/package.json @@ -18,7 +18,7 @@ }, "devDependencies": { "autoprefixer": "^10.4.20", - "postcss": "^8.4.47", + "postcss": "^8.4.48", "sass": "^1.80.6", "tailwindcss": "^3.4.14" } diff --git a/enjoy/package.json b/enjoy/package.json index 9675b728f..7cc198b69 100644 --- a/enjoy/package.json +++ b/enjoy/package.json @@ -85,7 +85,7 @@ "typescript": "^5.6.3", "vite": "^5.4.10", "vite-plugin-static-copy": "^2.1.0", - "zx": "^8.2.0" + "zx": "^8.2.1" }, "dependencies": { "@andrkrn/ffprobe-static": "^5.2.0", @@ -141,7 +141,7 @@ "dayjs": "^1.11.13", "decamelize": "^6.0.0", "decamelize-keys": "^2.0.1", - "echogarden": "^1.8.7", + "echogarden": "^2.0.0", "electron-context-menu": "^4.0.4", "electron-log": "^5.2.2", "electron-settings": "^4.0.4", @@ -159,7 +159,7 @@ "langchain": "^0.3.5", "lodash": "^4.17.21", "lru-cache": "^11.0.2", - "lucide-react": "^0.455.0", + "lucide-react": "^0.456.0", "mark.js": "^8.11.1", "microsoft-cognitiveservices-speech-sdk": "^1.41.0", "mime-types": "^2.1.35", @@ -167,7 +167,7 @@ "next-themes": "^0.4.3", "openai": "^4.71.1", "pitchfinder": "^2.3.2", - "postcss": "^8.4.47", + "postcss": "^8.4.48", "proxy-agent": "^6.4.0", "react": "^18.3.1", "react-activity-calendar": "^2.7.1", @@ -175,7 +175,7 @@ "react-audio-voice-recorder": "^2.2.0", "react-dom": "^18.3.1", "react-frame-component": "^5.2.7", - "react-hook-form": "^7.53.1", + "react-hook-form": "^7.53.2", "react-hotkeys-hook": "^4.6.1", "react-i18next": "^15.1.1", "react-markdown": "^9.0.1", diff --git a/enjoy/src/i18n/en.json b/enjoy/src/i18n/en.json index 619ef545d..48b1ffc97 100644 --- a/enjoy/src/i18n/en.json +++ b/enjoy/src/i18n/en.json @@ -907,5 +907,17 @@ "failedToLoadLink": "Failed to load link", "refreshSpeech": "Refresh speech", "locateParagraph": "Locate paragraph", - "close": "Close" + "close": "Close", + "config": "Config", + "temperature": "Temperature", + "encoderProvider": "Encoder Provider", + "decoderProvider": "Decoder Provider", + "enableGPU": "Enable GPU", + "openPackagesDir": "Open models dir", + "whisperModelDescription": "Model will be downloaded when first used.", + "whisperEngineDescription": "OpenAI Whisper with inference done via the ONNX runtime.", + "whisperCppEngineDescription": "C++ port of the Whisper architecture.", + "ttsService": "Text to Speech Service", + "openaiTtsServiceDescription": "Use OpenAI TTS service from your own key.", + "enjoyTtsServiceDescription": "Use TTS service provided by Enjoy. OpenAI or Azure is supported." } diff --git a/enjoy/src/i18n/zh-CN.json b/enjoy/src/i18n/zh-CN.json index d029de96e..e623f06ee 100644 --- a/enjoy/src/i18n/zh-CN.json +++ b/enjoy/src/i18n/zh-CN.json @@ -907,5 +907,17 @@ "failedToLoadLink": "加载链接失败", "refreshSpeech": "刷新语音", "locateParagraph": "定位段落", - "close": "关闭" + "close": "关闭", + "config": "配置", + "temperature": "温度", + "encoderProvider": "编码器", + "decoderProvider": "解码器", + "enableGPU": "启用 GPU", + "openPackagesDir": "打开模型目录", + "whisperModelDescription": "模型首次使用时会下载。", + "whisperEngineDescription": "OpenAI Whisper 使用 ONNX 运行时进行推理。", + "whisperCppEngineDescription": "Whisper 的 C++ 实现。", + "ttsService": "文字转语音服务", + "openaiTtsServiceDescription": "使用您自己的 API key 来使用 OpenAI TTS 服务。", + "enjoyTtsServiceDescription": "使用 Enjoy 提供的 TTS 服务,支持 OpenAI 或 Azure。" } diff --git a/enjoy/src/main/db/models/document.ts b/enjoy/src/main/db/models/document.ts index 0e1e0c694..0033c6ee6 100644 --- a/enjoy/src/main/db/models/document.ts +++ b/enjoy/src/main/db/models/document.ts @@ -259,7 +259,9 @@ export class Document extends Model { } logger.debug("detected file type", filePath, mimeType, extension); - if (!DocumentFormats.includes(extension)) { + if (extension === "zip" && filePath.endsWith(".epub")) { + extension = "epub"; + } else if (!DocumentFormats.includes(extension)) { logger.error("unsupported file type", filePath, extension); throw new Error( t("models.document.fileNotSupported", { file: filePath }) diff --git a/enjoy/src/main/echogarden.ts b/enjoy/src/main/echogarden.ts index 182724831..4317ec1ad 100644 --- a/enjoy/src/main/echogarden.ts +++ b/enjoy/src/main/echogarden.ts @@ -15,6 +15,8 @@ import { type Timeline, type TimelineEntry, } from "echogarden/dist/utilities/Timeline.d.js"; +import { WhisperOptions } from "echogarden/dist/recognition/WhisperSTT.js"; +import { ensureAndGetPackagesDir } from "echogarden/dist/utilities/PackageManager.js"; import path from "path"; import log from "@main/logger"; import url from "url"; @@ -25,7 +27,6 @@ import { enjoyUrlToPath, pathToEnjoyUrl } from "./utils"; import { UserSetting } from "./db/models"; import { UserSettingKeyEnum } from "@/types/enums"; import { WHISPER_MODELS } from "@/constants"; -import { WhisperOptions } from "echogarden/dist/recognition/WhisperSTT.js"; Echogarden.setGlobalOption( "ffmpegPath", @@ -59,7 +60,27 @@ class EchogardenWrapper { public wordTimelineToSegmentSentenceTimeline: typeof wordTimelineToSegmentSentenceTimeline; constructor() { - this.recognize = Echogarden.recognize; + this.recognize = (sampleFile: string, options: RecognitionOptions) => { + return new Promise((resolve, reject) => { + const handler = (reason: any) => { + // Remove the handler after it's triggered + process.removeListener("unhandledRejection", handler); + reject(reason); + }; + + // Add temporary unhandledRejection listener + process.on("unhandledRejection", handler); + + // Call the original recognize function + Echogarden.recognize(sampleFile, options) + .then((result) => { + // Remove the handler if successful + process.removeListener("unhandledRejection", handler); + resolve(result); + }) + .catch(reject); + }); + }; this.align = Echogarden.align; this.alignSegments = Echogarden.alignSegments; this.denoise = Echogarden.denoise; @@ -78,23 +99,15 @@ class EchogardenWrapper { engine: "whisper", whisper: { model: "tiny.en", - language: "en", - } as WhisperOptions, + }, } ) { const sampleFile = path.join(__dirname, "samples", "jfk.wav"); - try { - const whisperModel = await UserSetting.get(UserSettingKeyEnum.WHISPER); - if (WHISPER_MODELS.includes(whisperModel)) { - options.whisper.model = whisperModel; - } - } catch (e) { - logger.error(e); - } try { + logger.info("check:", options); const result = await this.recognize(sampleFile, options); - logger.info(result); + logger.info(result?.transcript); fs.writeJsonSync( path.join(settings.cachePath(), "echogarden-check.json"), result, @@ -225,6 +238,10 @@ class EchogardenWrapper { ipcMain.handle("echogarden-check", async (_event, options: any) => { return this.check(options); }); + + ipcMain.handle("echogarden-get-packages-dir", async (_event) => { + return ensureAndGetPackagesDir(); + }); } } diff --git a/enjoy/src/preload.ts b/enjoy/src/preload.ts index aa5111241..ceeab03b2 100644 --- a/enjoy/src/preload.ts +++ b/enjoy/src/preload.ts @@ -476,6 +476,9 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", { }, }, echogarden: { + getPackagesDir: () => { + return ipcRenderer.invoke("echogarden-get-packages-dir"); + }, recognize: (input: string, options: RecognitionOptions) => { return ipcRenderer.invoke("echogarden-recognize", input, options); }, @@ -505,8 +508,8 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", { transcode: (input: string) => { return ipcRenderer.invoke("echogarden-transcode", input); }, - check: () => { - return ipcRenderer.invoke("echogarden-check"); + check: (options: RecognitionOptions) => { + return ipcRenderer.invoke("echogarden-check", options); }, }, ffmpeg: { diff --git a/enjoy/src/renderer/components/chats/chat-agent-form.tsx b/enjoy/src/renderer/components/chats/chat-agent-form.tsx index 403b1350f..a7839c111 100644 --- a/enjoy/src/renderer/components/chats/chat-agent-form.tsx +++ b/enjoy/src/renderer/components/chats/chat-agent-form.tsx @@ -48,7 +48,7 @@ export const ChatAgentForm = (props: { const { EnjoyApp, learningLanguage, webApi } = useContext( AppSettingsProviderContext ); - const { currentTtsEngine } = useContext(AISettingsProviderContext); + const { ttsConfig } = useContext(AISettingsProviderContext); const [selectedTemplate, setSelectedTemplate] = useState("custom"); const [templates, setTemplates] = useState< { @@ -104,10 +104,10 @@ export const ChatAgentForm = (props: { const { type, name, description, config } = data; if (type === ChatAgentTypeEnum.TTS) { config.tts = { - engine: config.tts?.engine || currentTtsEngine.name, - model: config.tts?.model || currentTtsEngine.model, - language: config.tts?.language || learningLanguage, - voice: config.tts?.voice || currentTtsEngine.voice, + engine: config.tts?.engine || ttsConfig.engine, + model: config.tts?.model || ttsConfig.model, + language: config.tts?.language || ttsConfig.language, + voice: config.tts?.voice || ttsConfig.voice, }; } diff --git a/enjoy/src/renderer/components/chats/chat-list.tsx b/enjoy/src/renderer/components/chats/chat-list.tsx index 6fe3d69ef..13c4f58b6 100644 --- a/enjoy/src/renderer/components/chats/chat-list.tsx +++ b/enjoy/src/renderer/components/chats/chat-list.tsx @@ -30,7 +30,7 @@ export const ChatList = (props: { setCurrentChat: (chat: ChatType) => void; }) => { const { chats, chatAgent, currentChat, setCurrentChat } = props; - const { sttEngine, currentGptEngine, currentTtsEngine } = useContext( + const { sttEngine, currentGptEngine, ttsConfig } = useContext( AISettingsProviderContext ); const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext); @@ -78,10 +78,10 @@ export const ChatList = (props: { agent.type === ChatAgentTypeEnum.TTS ? { tts: { - engine: currentTtsEngine.name, - model: currentTtsEngine.model, - voice: currentTtsEngine.voice, - language: learningLanguage, + engine: ttsConfig.engine, + model: ttsConfig.model, + voice: ttsConfig.voice, + language: ttsConfig.language, ...agent.config.tts, }, } @@ -92,10 +92,10 @@ export const ChatList = (props: { model: currentGptEngine.models.default, }, tts: { - engine: currentTtsEngine.name, - model: currentTtsEngine.model, - voice: currentTtsEngine.voice, - language: learningLanguage, + engine: ttsConfig.engine, + model: ttsConfig.model, + voice: ttsConfig.voice, + language: ttsConfig.language, }, }; return { diff --git a/enjoy/src/renderer/components/chats/chat-settings.tsx b/enjoy/src/renderer/components/chats/chat-settings.tsx index 4b7d651fb..ca9165e71 100644 --- a/enjoy/src/renderer/components/chats/chat-settings.tsx +++ b/enjoy/src/renderer/components/chats/chat-settings.tsx @@ -68,10 +68,8 @@ const ChatMemberSetting = (props: { onFinish?: () => void; }) => { const { chat, agentMembers, onFinish } = props; - const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext); - const { currentGptEngine, currentTtsEngine } = useContext( - AISettingsProviderContext - ); + const { EnjoyApp } = useContext(AppSettingsProviderContext); + const { currentGptEngine, ttsConfig } = useContext(AISettingsProviderContext); const [memberTab, setMemberTab] = useState(agentMembers[0]?.userId); const [query, setQuery] = useState(""); const [chatAgents, setChatAgents] = useState([]); @@ -90,10 +88,10 @@ const ChatMemberSetting = (props: { model: currentGptEngine.models.default, }, tts: { - engine: currentTtsEngine.name, - model: currentTtsEngine.model, - voice: currentTtsEngine.voice, - language: learningLanguage, + engine: ttsConfig.engine, + model: ttsConfig.model, + voice: ttsConfig.voice, + language: ttsConfig.language, }, }, }) diff --git a/enjoy/src/renderer/components/documents/document-config-form.tsx b/enjoy/src/renderer/components/documents/document-config-form.tsx index 5707dd55d..6837601ec 100644 --- a/enjoy/src/renderer/components/documents/document-config-form.tsx +++ b/enjoy/src/renderer/components/documents/document-config-form.tsx @@ -12,7 +12,8 @@ import { import { t } from "i18next"; import { TTSForm } from "@renderer/components"; import { LoaderIcon } from "lucide-react"; -import { useState } from "react"; +import { useContext, useState } from "react"; +import { AISettingsProviderContext } from "@renderer/context"; const documentConfigSchema = z.object({ config: z.object({ @@ -33,6 +34,7 @@ export const DocumentConfigForm = (props: { }) => { const { config, onSubmit } = props; const [submitting, setSubmitting] = useState(false); + const { ttsConfig } = useContext(AISettingsProviderContext); const form = useForm>({ resolver: zodResolver(documentConfigSchema), @@ -42,12 +44,7 @@ export const DocumentConfigForm = (props: { config: { autoTranslate: true, autoNextSpeech: true, - tts: { - engine: "openai", - model: "openai/tts-1", - language: "en-US", - voice: "alloy", - }, + tts: ttsConfig, }, }, }); diff --git a/enjoy/src/renderer/components/medias/media-left-panel/media-transcription-generate-button.tsx b/enjoy/src/renderer/components/medias/media-left-panel/media-transcription-generate-button.tsx index d3fc13378..e2ebec4a1 100644 --- a/enjoy/src/renderer/components/medias/media-left-panel/media-transcription-generate-button.tsx +++ b/enjoy/src/renderer/components/medias/media-left-panel/media-transcription-generate-button.tsx @@ -79,7 +79,6 @@ export const MediaTranscriptionGenerateButton = (props: { generateTranscription({ originalText: data.text, language: data.language, - model: data.model, service: data.service as SttEngineOptionEnum | "upload", isolate: data.isolate, }) diff --git a/enjoy/src/renderer/components/medias/media-loading-modal.tsx b/enjoy/src/renderer/components/medias/media-loading-modal.tsx index 96c7d3dd3..a8a9cda6c 100644 --- a/enjoy/src/renderer/components/medias/media-loading-modal.tsx +++ b/enjoy/src/renderer/components/medias/media-loading-modal.tsx @@ -70,7 +70,6 @@ const LoadingContent = () => { generateTranscription({ originalText: data.text, language: data.language, - model: data.model, service: data.service as SttEngineOptionEnum | "upload", isolate: data.isolate, }); diff --git a/enjoy/src/renderer/components/preferences/echogarden-stt-settings.tsx b/enjoy/src/renderer/components/preferences/echogarden-stt-settings.tsx new file mode 100644 index 000000000..fe28ca4aa --- /dev/null +++ b/enjoy/src/renderer/components/preferences/echogarden-stt-settings.tsx @@ -0,0 +1,336 @@ +import { t } from "i18next"; +import { + Button, + Form, + FormControl, + FormDescription, + FormField, + FormItem, + FormLabel, + Input, + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, + Switch, +} from "@renderer/components/ui"; +import { AppSettingsProviderContext } from "@renderer/context"; +import { useContext, useEffect, useState } from "react"; +import { z } from "zod"; +import { useForm } from "react-hook-form"; +import { zodResolver } from "@hookform/resolvers/zod"; +import { WHISPER_MODELS } from "@/constants"; + +const echogardenSttConfigSchema = z.object({ + engine: z.enum(["whisper", "whisper.cpp"]), + whisper: z.object({ + model: z.string(), + temperature: z.number(), + prompt: z.string(), + encoderProvider: z.enum(["cpu", "dml", "cuda"]), + decoderProvider: z.enum(["cpu", "dml", "cuda"]), + }), + whisperCpp: z.object({ + model: z.string(), + temperature: z.number(), + prompt: z.string(), + enableGPU: z.boolean(), + }), +}); + +export const EchogardenSttSettings = (props: { + echogardenSttConfig: EchogardenSttConfigType; + onSave: (data: z.infer) => void; +}) => { + const { echogardenSttConfig, onSave } = props; + const { EnjoyApp } = useContext(AppSettingsProviderContext); + const [platformInfo, setPlatformInfo] = useState<{ + platform: string; + arch: string; + version: string; + }>(); + const [packagesDir, setPackagesDir] = useState(); + + const form = useForm>({ + resolver: zodResolver(echogardenSttConfigSchema), + values: { + engine: echogardenSttConfig?.engine, + whisper: { + model: "tiny", + temperature: 0.1, + prompt: "", + encoderProvider: "cpu", + decoderProvider: "cpu", + ...echogardenSttConfig?.whisper, + }, + whisperCpp: { + model: "tiny", + temperature: 0.1, + prompt: "", + enableGPU: false, + ...echogardenSttConfig?.whisperCpp, + }, + }, + }); + + const onSubmit = async (data: z.infer) => { + onSave({ + engine: data.engine || "whisper", + whisper: { + model: data.whisper.model || "tiny", + ...data.whisper, + }, + whisperCpp: { + model: data.whisperCpp.model || "tiny", + ...data.whisperCpp, + }, + }); + }; + + const handleOpenPackagesDir = () => { + if (!packagesDir) return; + EnjoyApp.shell.openPath(packagesDir); + }; + + useEffect(() => { + EnjoyApp.app.getPlatformInfo().then(setPlatformInfo); + EnjoyApp.echogarden.getPackagesDir().then(setPackagesDir); + }, []); + + return ( +
+ +
+ ( + + {t("engine")} + + + + + {form.watch("engine") === "whisper" + ? t("whisperEngineDescription") + : t("whisperCppEngineDescription")} + + + )} + /> + ( + + {t("model")} + + + + + {t("whisperModelDescription")} + {packagesDir && ( + + )} + + + )} + /> + + {form.watch("engine") === "whisper" && ( + <> + ( + + {t("temperature")} + + + + + )} + /> + + ( + + {t("prompt")} + + + + + )} + /> + + ( + + {t("encoderProvider")} + + + + + )} + /> + + ( + + {t("decoderProvider")} + + + + + )} + /> + + )} + + {form.watch("engine") === "whisper.cpp" && ( + <> + ( + + {t("temperature")} + + + + + )} + /> + + ( + + {t("prompt")} + + + + + )} + /> + + ( + +
+ {t("enableGPU")} + + + +
+
+ )} + /> + + )} +
+
+ +
+
+ + ); +}; diff --git a/enjoy/src/renderer/components/preferences/index.ts b/enjoy/src/renderer/components/preferences/index.ts index f591f0ac4..b40f23466 100644 --- a/enjoy/src/renderer/components/preferences/index.ts +++ b/enjoy/src/renderer/components/preferences/index.ts @@ -15,6 +15,7 @@ export * from "./openai-settings"; export * from "./library-settings"; export * from "./disk-usage"; export * from "./stt-settings"; +export * from "./tts-settings"; export * from "./user-settings"; export * from "./email-settings"; @@ -34,3 +35,5 @@ export * from "./recorder-settings"; export * from "./vocabulary-settings"; export * from "./dict-settings"; + +export * from "./echogarden-stt-settings"; diff --git a/enjoy/src/renderer/components/preferences/library-settings.tsx b/enjoy/src/renderer/components/preferences/library-settings.tsx index 1dce2e066..7c697b067 100644 --- a/enjoy/src/renderer/components/preferences/library-settings.tsx +++ b/enjoy/src/renderer/components/preferences/library-settings.tsx @@ -21,6 +21,10 @@ export const LibrarySettings = () => { } }; + const openLibraryDir = () => { + EnjoyApp.shell.openPath(libraryPath); + }; + return (
@@ -30,6 +34,9 @@ export const LibrarySettings = () => {
+ + {!editing && ( + + )} )}
diff --git a/enjoy/src/renderer/components/preferences/tts-settings.tsx b/enjoy/src/renderer/components/preferences/tts-settings.tsx new file mode 100644 index 000000000..8dc4d186c --- /dev/null +++ b/enjoy/src/renderer/components/preferences/tts-settings.tsx @@ -0,0 +1,81 @@ +import { t } from "i18next"; +import { Button, toast, Form } from "@renderer/components/ui"; +import { AISettingsProviderContext } from "@renderer/context"; +import { useContext, useState } from "react"; +import { z } from "zod"; +import { useForm } from "react-hook-form"; +import { zodResolver } from "@hookform/resolvers/zod"; +import { TTSForm } from "@renderer/components"; + +const ttsConfigSchema = z.object({ + config: z.object({ + tts: z.object({ + engine: z.string().min(1), + model: z.string().min(1), + language: z.string().min(1), + voice: z.string().min(1), + }), + }), +}); + +export const TtsSettings = () => { + const [editing, setEditing] = useState(false); + const { ttsConfig, setTtsConfig } = useContext(AISettingsProviderContext); + const form = useForm>({ + resolver: zodResolver(ttsConfigSchema), + values: { + config: { + tts: ttsConfig, + }, + }, + }); + + const onSubmit = (data: z.infer) => { + setTtsConfig(data.config.tts as TtsConfigType) + .then(() => toast.success(t("saved"))) + .finally(() => setEditing(false)); + }; + + return ( +
+ +
+
+
+ {t("ttsService")} +
+
+ {form.watch("config.tts.engine") === "openai" + ? t("openaiTtsServiceDescription") + : t("enjoyTtsServiceDescription")} +
+
+ +
+
+
+ + +
+
+
+ + ); +}; diff --git a/enjoy/src/renderer/components/transcriptions/transcription-create-form.tsx b/enjoy/src/renderer/components/transcriptions/transcription-create-form.tsx index 8ee52f778..2c37e1d00 100644 --- a/enjoy/src/renderer/components/transcriptions/transcription-create-form.tsx +++ b/enjoy/src/renderer/components/transcriptions/transcription-create-form.tsx @@ -39,7 +39,6 @@ import { SttEngineOptionEnum } from "@/types/enums"; const transcriptionSchema = z.object({ language: z.string(), service: z.union([z.nativeEnum(SttEngineOptionEnum), z.literal("upload")]), - model: z.string().optional(), text: z.string().optional(), isolate: z.boolean().optional(), }); @@ -61,14 +60,15 @@ export const TranscriptionCreateForm = (props: { originalText, } = props; const { learningLanguage } = useContext(AppSettingsProviderContext); - const { sttEngine, whisperModel } = useContext(AISettingsProviderContext); + const { sttEngine, echogardenSttConfig } = useContext( + AISettingsProviderContext + ); const form = useForm>({ resolver: zodResolver(transcriptionSchema), values: { language: learningLanguage, service: originalText ? "upload" : sttEngine, - model: sttEngine === SttEngineOptionEnum.LOCAL ? whisperModel : "", text: originalText, isolate: false, }, @@ -184,8 +184,22 @@ export const TranscriptionCreateForm = (props: { - {form.watch("service") === SttEngineOptionEnum.LOCAL && - t("localSpeechToTextDescription")} + {form.watch("service") === SttEngineOptionEnum.LOCAL && ( + <> +
{t("localSpeechToTextDescription")}
+
+ * {t("model")}: {echogardenSttConfig.engine} /{" "} + { + echogardenSttConfig[ + echogardenSttConfig.engine.replace(".cpp", "Cpp") as + | "whisper" + | "whisperCpp" + ]?.model + } +
+ + )} + {form.watch("service") === SttEngineOptionEnum.ENJOY_AZURE && t("enjoyAzureSpeechToTextDescription")} {form.watch("service") === @@ -200,34 +214,6 @@ export const TranscriptionCreateForm = (props: { )} /> - {form.watch("service") === SttEngineOptionEnum.LOCAL && ( - ( - - {t("model")} - - - )} - /> - )} - Promise; - whisperModel?: string; - setWhisperModel?: (name: string) => void; openai?: LlmProviderType; setOpenai?: (config: LlmProviderType) => void; setGptEngine?: (engine: GptEngineSettingType) => void; currentGptEngine?: GptEngineSettingType; - currentTtsEngine?: TtsEngineSettingType; gptProviders?: typeof GPT_PROVIDERS; ttsProviders?: typeof TTS_PROVIDERS; + ttsConfig?: TtsConfigType; + setTtsConfig?: (config: TtsConfigType) => Promise; + echogardenSttConfig?: EchogardenSttConfigType; + setEchogardenSttConfig?: (config: EchogardenSttConfigType) => Promise; }; const initialState: AISettingsProviderState = {}; @@ -31,6 +32,18 @@ export const AISettingsProvider = ({ }: { children: React.ReactNode; }) => { + const { EnjoyApp, libraryPath, user, apiUrl, webApi, learningLanguage } = + useContext(AppSettingsProviderContext); + const [gptProviders, setGptProviders] = useState(GPT_PROVIDERS); + const [ttsProviders, setTtsProviders] = useState(TTS_PROVIDERS); + const db = useContext(DbProviderContext); + + const [sttEngine, setSttEngine] = useState( + SttEngineOptionEnum.ENJOY_AZURE + ); + const [ttsConfig, setTtsConfig] = useState(null); + const [echogardenSttConfig, setEchogardenSttConfig] = + useState(null); const [gptEngine, setGptEngine] = useState({ name: "enjoyai", models: { @@ -38,15 +51,6 @@ export const AISettingsProvider = ({ }, }); const [openai, setOpenai] = useState(null); - const [whisperModel, setWhisperModel] = useState(null); - const [sttEngine, setSttEngine] = useState( - SttEngineOptionEnum.ENJOY_AZURE - ); - const { EnjoyApp, libraryPath, user, apiUrl, webApi, learningLanguage } = - useContext(AppSettingsProviderContext); - const [gptProviders, setGptProviders] = useState(GPT_PROVIDERS); - const [ttsProviders, setTtsProviders] = useState(TTS_PROVIDERS); - const db = useContext(DbProviderContext); const refreshGptProviders = async () => { let providers = GPT_PROVIDERS; @@ -87,42 +91,82 @@ export const AISettingsProvider = ({ setTtsProviders({ ...providers }); }; - const refreshWhisperModel = async () => { - const whisperModel = await EnjoyApp.userSettings.get( - UserSettingKeyEnum.WHISPER - ); - if (WHISPER_MODELS.includes(whisperModel)) { - setWhisperModel(whisperModel); - } else { + const refreshTtsConfig = async () => { + let config = await EnjoyApp.userSettings.get(UserSettingKeyEnum.TTS_CONFIG); + if (!config) { + config = { + engine: "enjoyai", + model: "openai/tts-1", + voice: "alloy", + language: learningLanguage, + }; + EnjoyApp.userSettings.set(UserSettingKeyEnum.TTS_CONFIG, config); + } + setTtsConfig(config); + }; + + const handleSetTtsConfig = async (config: TtsConfigType) => { + return EnjoyApp.userSettings + .set(UserSettingKeyEnum.TTS_CONFIG, config) + .then(() => { + setTtsConfig(config); + }); + }; + + const refreshEchogardenSttConfig = async () => { + let config = await EnjoyApp.userSettings.get(UserSettingKeyEnum.ECHOGARDEN); + + if (!config) { let model = "tiny"; - if (whisperModel.match(/tiny/)) { - model = "tiny"; - } else if (whisperModel.match(/base/)) { - model = "base"; - } else if (whisperModel.match(/small/)) { - model = "small"; - } else if (whisperModel.match(/medium/)) { - model = "medium"; - } else if (whisperModel.match(/large/)) { - model = "large-v3-turbo"; - } + const whisperModel = await EnjoyApp.userSettings.get( + UserSettingKeyEnum.WHISPER + ); + if (WHISPER_MODELS.includes(whisperModel)) { + model = whisperModel; + } else { + if (whisperModel.match(/tiny/)) { + model = "tiny"; + } else if (whisperModel.match(/base/)) { + model = "base"; + } else if (whisperModel.match(/small/)) { + model = "small"; + } else if (whisperModel.match(/medium/)) { + model = "medium"; + } else if (whisperModel.match(/large/)) { + model = "large-v3-turbo"; + } - if ( - learningLanguage.match(/en/) && - model.match(/tiny|base|small|medium/) - ) { - model = `${model}.en`; + if ( + learningLanguage.match(/en/) && + model.match(/tiny|base|small|medium/) + ) { + model = `${model}.en`; + } } - setWhisperModel(model); + config = { + engine: "whisper", + whisper: { + model, + temperature: 0.2, + prompt: "", + encoderProvider: "cpu", + decoderProvider: "cpu", + }, + }; + EnjoyApp.userSettings.set(UserSettingKeyEnum.ECHOGARDEN, config); } + setEchogardenSttConfig(config); }; - const handleSetWhisperModel = async (name: string) => { - if (WHISPER_MODELS.includes(name)) { - setWhisperModel(name); - EnjoyApp.userSettings.set(UserSettingKeyEnum.WHISPER, name); - } + const handleSetEchogardenSttConfig = async ( + config: EchogardenSttConfigType + ) => { + return EnjoyApp.userSettings + .set(UserSettingKeyEnum.ECHOGARDEN, config) + .then(() => { + setEchogardenSttConfig(config); + }); }; useEffect(() => { @@ -190,7 +234,8 @@ export const AISettingsProvider = ({ }); } - refreshWhisperModel(); + refreshEchogardenSttConfig(); + refreshTtsConfig(); }; const handleSetOpenai = async (config: LlmProviderType) => { @@ -218,26 +263,15 @@ export const AISettingsProvider = ({ key: user?.accessToken, baseUrl: `${apiUrl}/api/ai`, }), - currentTtsEngine: - gptEngine.name === "openai" - ? { - name: "openai", - model: "tts-1", - voice: "alloy", - language: learningLanguage, - } - : { - name: "enjoyai", - model: "openai/tts-1", - voice: "alloy", - language: learningLanguage, - }, openai, setOpenai: (config: LlmProviderType) => handleSetOpenai(config), - whisperModel, - setWhisperModel: handleSetWhisperModel, + echogardenSttConfig, + setEchogardenSttConfig: (config: EchogardenSttConfigType) => + handleSetEchogardenSttConfig(config), sttEngine, setSttEngine: (name: SttEngineOptionEnum) => handleSetSttEngine(name), + ttsConfig, + setTtsConfig: (config: TtsConfigType) => handleSetTtsConfig(config), gptProviders, ttsProviders, }} diff --git a/enjoy/src/renderer/context/copilot-provider.tsx b/enjoy/src/renderer/context/copilot-provider.tsx index 35efc2117..e851c1de7 100644 --- a/enjoy/src/renderer/context/copilot-provider.tsx +++ b/enjoy/src/renderer/context/copilot-provider.tsx @@ -42,8 +42,8 @@ export const CopilotProvider = ({ const [active, setActive] = useState(false); const [currentChat, setCurrentChat] = useState(null); const [occupiedChat, setOccupiedChat] = useState(null); - const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext); - const { sttEngine, currentGptEngine, currentTtsEngine } = useContext( + const { EnjoyApp } = useContext(AppSettingsProviderContext); + const { sttEngine, currentGptEngine, ttsConfig } = useContext( AISettingsProviderContext ); const { currentHotkeys } = useContext(HotKeysSettingsProviderContext); @@ -100,10 +100,10 @@ export const CopilotProvider = ({ agent.type === ChatAgentTypeEnum.TTS ? { tts: { - engine: currentTtsEngine.name, - model: currentTtsEngine.model, - voice: currentTtsEngine.voice, - language: learningLanguage, + engine: ttsConfig.engine, + model: ttsConfig.model, + voice: ttsConfig.voice, + language: ttsConfig.language, ...agent.config.tts, }, } @@ -114,10 +114,10 @@ export const CopilotProvider = ({ model: currentGptEngine.models.default, }, tts: { - engine: currentTtsEngine.name, - model: currentTtsEngine.model, - voice: currentTtsEngine.voice, - language: learningLanguage, + engine: ttsConfig.engine, + model: ttsConfig.model, + voice: ttsConfig.voice, + language: ttsConfig.language, }, }; return { diff --git a/enjoy/src/renderer/hooks/use-chat-session.tsx b/enjoy/src/renderer/hooks/use-chat-session.tsx index 45aec5746..10c3ecb44 100644 --- a/enjoy/src/renderer/hooks/use-chat-session.tsx +++ b/enjoy/src/renderer/hooks/use-chat-session.tsx @@ -29,9 +29,7 @@ export const useChatSession = (chatId: string) => { const { EnjoyApp, user, apiUrl, learningLanguage } = useContext( AppSettingsProviderContext ); - const { currentGptEngine, currentTtsEngine } = useContext( - AISettingsProviderContext - ); + const { currentGptEngine, ttsConfig } = useContext(AISettingsProviderContext); const { openai } = useContext(AISettingsProviderContext); const { addDblistener, removeDbListener } = useContext(DbProviderContext); const [chatMessages, dispatchChatMessages] = useReducer( @@ -386,10 +384,10 @@ export const useChatSession = (chatId: string) => { agent.type === ChatAgentTypeEnum.TTS ? { tts: { - engine: currentTtsEngine.name, - model: currentTtsEngine.model, - voice: currentTtsEngine.voice, - language: learningLanguage, + engine: ttsConfig.engine, + model: ttsConfig.model, + voice: ttsConfig.voice, + language: ttsConfig.language, ...agent.config.tts, }, } @@ -402,10 +400,10 @@ export const useChatSession = (chatId: string) => { model: currentGptEngine.models.default, }, tts: { - engine: currentTtsEngine.name, - model: currentTtsEngine.model, - voice: currentTtsEngine.voice, - language: learningLanguage, + engine: ttsConfig.engine, + model: ttsConfig.model, + voice: ttsConfig.voice, + language: ttsConfig.language, }, }; diff --git a/enjoy/src/renderer/hooks/use-speech.tsx b/enjoy/src/renderer/hooks/use-speech.tsx index ab3f4c066..179f8d3a8 100644 --- a/enjoy/src/renderer/hooks/use-speech.tsx +++ b/enjoy/src/renderer/hooks/use-speech.tsx @@ -11,11 +11,11 @@ export const useSpeech = () => { const { EnjoyApp, webApi, user, apiUrl, learningLanguage } = useContext( AppSettingsProviderContext ); - const { openai, currentGptEngine } = useContext(AISettingsProviderContext); + const { openai, ttsConfig } = useContext(AISettingsProviderContext); const tts = async (params: Partial) => { const { configuration } = params; - const { engine, model = "tts-1", voice } = configuration || {}; + const { engine, model, voice } = configuration || ttsConfig; let buffer; if (model.match(/^(openai|tts-)/)) { @@ -47,9 +47,9 @@ export const useSpeech = () => { const openaiTTS = async (params: Partial) => { const { configuration } = params; const { - engine = currentGptEngine.name, - model = "tts-1", - voice = "alloy", + engine = ttsConfig.engine, + model = ttsConfig.model, + voice = ttsConfig.voice, baseUrl, } = configuration || {}; @@ -85,8 +85,8 @@ export const useSpeech = () => { const azureTTS = async ( params: Partial ): Promise => { - const { configuration, text } = params; - const { model, voice } = configuration || {}; + const { configuration = ttsConfig, text } = params; + const { model, voice } = configuration; if (model !== "azure/speech") return; diff --git a/enjoy/src/renderer/hooks/use-transcribe.tsx b/enjoy/src/renderer/hooks/use-transcribe.tsx index 4a5417455..8363e09de 100644 --- a/enjoy/src/renderer/hooks/use-transcribe.tsx +++ b/enjoy/src/renderer/hooks/use-transcribe.tsx @@ -26,7 +26,7 @@ const punctuationsPattern = /\w[.,!?](\s|$)/g; export const useTranscribe = () => { const { EnjoyApp, user, webApi } = useContext(AppSettingsProviderContext); - const { openai, whisperModel } = useContext(AISettingsProviderContext); + const { openai, echogardenSttConfig } = useContext(AISettingsProviderContext); const { punctuateText } = useAiCommand(); const [output, setOutput] = useState(""); @@ -47,7 +47,6 @@ export const useTranscribe = () => { params?: { targetId?: string; targetType?: string; - model?: string; originalText?: string; language: string; service: SttEngineOptionEnum | "upload"; @@ -65,7 +64,6 @@ export const useTranscribe = () => { }> => { const url = await transcode(mediaSrc); const { - model, targetId, targetType, originalText, @@ -81,7 +79,9 @@ export const useTranscribe = () => { if (service === "upload" && originalText) { result = await alignText(originalText); } else if (service === SttEngineOptionEnum.LOCAL) { - result = await transcribeByLocal(url, { language, model }); + result = await transcribeByLocal(url, { + language, + }); } else if (service === SttEngineOptionEnum.ENJOY_CLOUDFLARE) { result = await transcribeByCloudflareAi(blob); } else if (service === SttEngineOptionEnum.OPENAI) { @@ -223,27 +223,28 @@ export const useTranscribe = () => { const transcribeByLocal = async ( url: string, - options: { language: string; model?: string } + options: { language: string } ): Promise<{ engine: string; model: string; transcript: string; segmentTimeline: TimelineEntry[]; }> => { - let { language, model = whisperModel } = options || {}; + let { language } = options || {}; const languageCode = language.split("-")[0]; - if (model.match(/en/) && languageCode !== "en") { - model = model.replace(".en", ""); - } + let model: string; let res: RecognitionResult; try { + model = + echogardenSttConfig[ + echogardenSttConfig.engine.replace(".cpp", "Cpp") as + | "whisper" + | "whisperCpp" + ].model; res = await EnjoyApp.echogarden.recognize(url, { - engine: "whisper", language: languageCode, - whisper: { - model, - }, + ...echogardenSttConfig, }); } catch (err) { throw new Error(t("whisperTranscribeFailed", { error: err.message })); diff --git a/enjoy/src/renderer/hooks/use-transcriptions.tsx b/enjoy/src/renderer/hooks/use-transcriptions.tsx index f3dd06999..31560e887 100644 --- a/enjoy/src/renderer/hooks/use-transcriptions.tsx +++ b/enjoy/src/renderer/hooks/use-transcriptions.tsx @@ -12,7 +12,9 @@ import { SttEngineOptionEnum } from "@/types/enums"; import { t } from "i18next"; export const useTranscriptions = (media: AudioType | VideoType) => { - const { sttEngine } = useContext(AISettingsProviderContext); + const { sttEngine, echogardenSttConfig } = useContext( + AISettingsProviderContext + ); const { EnjoyApp, learningLanguage, webApi } = useContext( AppSettingsProviderContext ); @@ -113,7 +115,6 @@ export const useTranscriptions = (media: AudioType | VideoType) => { const generateTranscription = async (params?: { originalText?: string; language?: string; - model?: string; service?: SttEngineOptionEnum | "upload"; isolate?: boolean; }) => { @@ -121,7 +122,6 @@ export const useTranscriptions = (media: AudioType | VideoType) => { originalText, language = learningLanguage, service = sttEngine, - model: whisperModel, isolate = false, } = params || {}; setService(service); @@ -144,7 +144,6 @@ export const useTranscriptions = (media: AudioType | VideoType) => { { targetId: media.id, targetType: media.mediaType, - model: whisperModel, originalText, language, service, diff --git a/enjoy/src/types/enjoy-app.d.ts b/enjoy/src/types/enjoy-app.d.ts index 3bec7f66f..de2fc9e93 100644 --- a/enjoy/src/types/enjoy-app.d.ts +++ b/enjoy/src/types/enjoy-app.d.ts @@ -276,6 +276,7 @@ type EnjoyAppType = { delete: (id: string) => Promise; }; echogarden: { + getPackagesDir: () => Promise; recognize: ( input: string, options: RecognitionOptions diff --git a/enjoy/src/types/enums.ts b/enjoy/src/types/enums.ts index da1df0f5b..87a6147d4 100644 --- a/enjoy/src/types/enums.ts +++ b/enjoy/src/types/enums.ts @@ -8,9 +8,11 @@ export enum UserSettingKeyEnum { HOTKEYS = "hotkeys", GPT_ENGINE = "gpt_engine", STT_ENGINE = "stt_engine", + TTS_CONFIG = "tts_config", VOCABULARY = "vocabulary", DICTS = "dicts", RECORDER = "recorder", + ECHOGARDEN = "echogarden", } export enum SttEngineOptionEnum { diff --git a/enjoy/src/types/index.d.ts b/enjoy/src/types/index.d.ts index d2fdf22b7..2382376ec 100644 --- a/enjoy/src/types/index.d.ts +++ b/enjoy/src/types/index.d.ts @@ -260,3 +260,20 @@ type TranscribeResultType = { tokenId?: number; url: string; }; + +type EchogardenSttConfigType = { + engine: "whisper" | "whisper.cpp"; + whisper: { + model: string; + temperature?: number; + prompt?: string; + encoderProvider?: "cpu" | "dml" | "cuda"; + decoderProvider?: "cpu" | "dml" | "cuda"; + }; + whisperCpp?: { + model: string; + temperature?: number; + prompt?: string; + enableGPU?: boolean; + }; +}; diff --git a/enjoy/vite.main.config.ts b/enjoy/vite.main.config.ts index 97bed2e35..293d4fe5e 100644 --- a/enjoy/vite.main.config.ts +++ b/enjoy/vite.main.config.ts @@ -28,6 +28,7 @@ export default defineConfig((env) => { "echogarden/dist/api/API.js", "echogarden/dist/audio/AudioUtilities.js", "echogarden/dist/utilities/Timeline.js", + "echogarden/dist/utilities/PackageManager.js", ], output: { strict: false, diff --git a/yarn.lock b/yarn.lock index 86721ef09..3762ed105 100644 --- a/yarn.lock +++ b/yarn.lock @@ -33,7 +33,7 @@ __metadata: autoprefixer: "npm:^10.4.20" nuxt: "npm:^3.14.159" nuxt-og-image: "npm:^3.0.8" - postcss: "npm:^8.4.47" + postcss: "npm:^8.4.48" sass: "npm:^1.80.6" tailwindcss: "npm:^3.4.14" vue: "npm:^3.5.12" @@ -354,7 +354,7 @@ __metadata: languageName: node linkType: hard -"@aws-sdk/client-polly@npm:^3.670.0": +"@aws-sdk/client-polly@npm:^3.687.0": version: 3.687.0 resolution: "@aws-sdk/client-polly@npm:3.687.0" dependencies: @@ -613,7 +613,7 @@ __metadata: languageName: node linkType: hard -"@aws-sdk/client-transcribe-streaming@npm:^3.672.0": +"@aws-sdk/client-transcribe-streaming@npm:^3.687.0": version: 3.687.0 resolution: "@aws-sdk/client-transcribe-streaming@npm:3.687.0" dependencies: @@ -1681,10 +1681,18 @@ __metadata: languageName: node linkType: hard -"@echogarden/espeak-ng-emscripten@npm:^0.2.0": - version: 0.2.0 - resolution: "@echogarden/espeak-ng-emscripten@npm:0.2.0" - checksum: 10c0/ec2669328e6f6629c5d416d5f15af5ff7de93fc01b44297022a982c0661c9df9cdcf3754f81c4ef77d6ca31fd84674c19a4fe94cc4f9bdc097315111cc157415 +"@echogarden/audio-io@npm:^0.2.3": + version: 0.2.3 + resolution: "@echogarden/audio-io@npm:0.2.3" + checksum: 10c0/b8b0de7f370d0115ece37272f5b012094c77de661c17407a667db1e65bbf6876bf832260234af17018fcaf92ef2480e8948e9481bdc2e8902d279b72669d99c6 + conditions: (os=win32 | os=darwin | os=linux) + languageName: node + linkType: hard + +"@echogarden/espeak-ng-emscripten@npm:^0.3.0": + version: 0.3.0 + resolution: "@echogarden/espeak-ng-emscripten@npm:0.3.0" + checksum: 10c0/7163023b91394eda5ded0fd2e819a14944edd4888beee4c1b87f1095ec536802749636c8f69144391d2ebf171ed23c217a2aa44ac48959f71ee8391ccdc47880 languageName: node linkType: hard @@ -1702,59 +1710,59 @@ __metadata: languageName: node linkType: hard -"@echogarden/fvad-wasm@npm:^0.1.2": - version: 0.1.2 - resolution: "@echogarden/fvad-wasm@npm:0.1.2" - checksum: 10c0/e5ebd0c8bddd19f26cb4862f86fffa842f941c969ffb7da726c936fc338e2cb317ef99039fe7c5472ac370af909618ad08139b8b95dce8b07993de432b86d56a +"@echogarden/fvad-wasm@npm:^0.2.0": + version: 0.2.0 + resolution: "@echogarden/fvad-wasm@npm:0.2.0" + checksum: 10c0/d5480abf5c555ffa7fbe3f75a18b3c5f9b0d5d64b0d31f92097269637e827f3253556b39e1d52bbabeae604c43c8f1479d1bfd798d7fee112cdb3a250124a093 languageName: node linkType: hard -"@echogarden/kissfft-wasm@npm:^0.1.1": - version: 0.1.1 - resolution: "@echogarden/kissfft-wasm@npm:0.1.1" - checksum: 10c0/3c034ebb6f64f1551ec70314ca373aec5c4907cc36f6efa9aa62d759840acbf471c50f697b71f910adcf5fab09cf3d918405a76e78d7ba94ffae9baf885c780e +"@echogarden/kissfft-wasm@npm:^0.2.0": + version: 0.2.0 + resolution: "@echogarden/kissfft-wasm@npm:0.2.0" + checksum: 10c0/f02a8c6101e50f7c01d50181295e8638bbb11d23e601b4dfda482cbac90f617a1116ea0177263429e3647b686052d0ab95ca56fb2a77e75ac76cccee9d4996a9 languageName: node linkType: hard -"@echogarden/pffft-wasm@npm:^0.3.0": - version: 0.3.0 - resolution: "@echogarden/pffft-wasm@npm:0.3.0" - checksum: 10c0/7425828ed23e1a1dbd77b940b0b141738337df21c0c71f8b9dbb21706b32832f70e61b0a4a441ce7bf3053433a52d48a1b905978ef01122fae0415bf31a83e46 +"@echogarden/pffft-wasm@npm:^0.4.2": + version: 0.4.2 + resolution: "@echogarden/pffft-wasm@npm:0.4.2" + checksum: 10c0/4e9ffc24195f5fa44f5623124f8815125af12c0c8934e7388b69f424418aab73e7e3acdf2c98894a60eed5509dccdb6844622f1232927ee6d51b2b7444beae4c languageName: node linkType: hard -"@echogarden/rnnoise-wasm@npm:^0.1.1": - version: 0.1.1 - resolution: "@echogarden/rnnoise-wasm@npm:0.1.1" - checksum: 10c0/806e51f68836d10319b40bd82dd24b02acc340c6b60b4bcd9609d8f480cd233a76c14a40a135cabb45c530c0187afb61a1ad6b8709c50de4559ec2136d45097f +"@echogarden/rnnoise-wasm@npm:^0.2.0": + version: 0.2.0 + resolution: "@echogarden/rnnoise-wasm@npm:0.2.0" + checksum: 10c0/b38a3e66f377de5429f3be57140b3fcea55727ef516ed25e53a310261156e90cda44c5c53d83f3061edff063ac7954bab3f62b9dd94aed86bd67e59bedfbd3bc languageName: node linkType: hard -"@echogarden/rubberband-wasm@npm:^0.1.1": - version: 0.1.1 - resolution: "@echogarden/rubberband-wasm@npm:0.1.1" - checksum: 10c0/4d254c511f65bfb425aaedebb59de174938bbbde5b8c64e758bc6b633e0a9764cbd6091e077e38d4e5bf8b462785b7ed769786e44c285543d0bd87f9b7ab6376 +"@echogarden/rubberband-wasm@npm:^0.2.0": + version: 0.2.0 + resolution: "@echogarden/rubberband-wasm@npm:0.2.0" + checksum: 10c0/e37c947a241efdd489f42502184700cce2d01b7f3ceb74461c88364a7eb0407ac745cda459d8afe577f76dc77629047a3529237b561be4ca9dd246a8482f5474 languageName: node linkType: hard -"@echogarden/sonic-wasm@npm:^0.1.1": - version: 0.1.1 - resolution: "@echogarden/sonic-wasm@npm:0.1.1" - checksum: 10c0/74872334ee730e03d21191d2e38aba4516a4ebe49380f4d2baf0da62d7d23a89d08839d2096de8b0bac548199c285d895466e51e83d24b841c4f8f08a52a6594 +"@echogarden/sonic-wasm@npm:^0.2.0": + version: 0.2.0 + resolution: "@echogarden/sonic-wasm@npm:0.2.0" + checksum: 10c0/936a042f0e262062f87c97afbfd7b8ce573416843198bc8be31cf45ea4827bb4bb1e47d91a5e0c0dc752afa8726395e85230c52f9c12b9f94f404e51a7792cf0 languageName: node linkType: hard -"@echogarden/speex-resampler-wasm@npm:^0.1.1": - version: 0.1.1 - resolution: "@echogarden/speex-resampler-wasm@npm:0.1.1" - checksum: 10c0/f210506b865a5ed42dba6bdeaa5360ee591424a4007bf1c2f0a845ae78cec4fe0068ccf78c895f0f5cbf63778b3cccde2d21dc01bf51e83dd1e7a7ca963c26d9 +"@echogarden/speex-resampler-wasm@npm:^0.2.1": + version: 0.2.1 + resolution: "@echogarden/speex-resampler-wasm@npm:0.2.1" + checksum: 10c0/ec7a1c49c7d4d2e056cb850271d4e368a25fcd0c89b2b8c1f206e6d401b51ead4ca1d67fec9272554948f7c58ac35c9f048eb3f52605a4ad28f93ee278dddf95 languageName: node linkType: hard -"@echogarden/svoxpico-wasm@npm:^0.1.0": - version: 0.1.0 - resolution: "@echogarden/svoxpico-wasm@npm:0.1.0" - checksum: 10c0/1a787402601146ab175359831feff3dde22ec17771303a75103d157b04a2a323172681306e2bf4edb7b2b8626db52683d28202779332e48be6df178b95772d32 +"@echogarden/svoxpico-wasm@npm:^0.2.0": + version: 0.2.0 + resolution: "@echogarden/svoxpico-wasm@npm:0.2.0" + checksum: 10c0/142454b6d5c065e3c68beffa9122d382e8a0be0fb2ca983f1abb8249ba2b5f479813665a54e4069b98a3341e230e952b1a84cb8dd478f331716699866a55edda languageName: node linkType: hard @@ -9284,22 +9292,6 @@ __metadata: languageName: node linkType: hard -"buffer-indexof@npm:~0.0.0": - version: 0.0.2 - resolution: "buffer-indexof@npm:0.0.2" - checksum: 10c0/495124d3eacdfae2da6f815881e1c837459e48144ec46c246199ca3efc7052e767822784fd5399ca65a4043ba364b3a38b6b06dc6df44aab1aedea17544fc9fc - languageName: node - linkType: hard - -"buffer-split@npm:^1.0.0": - version: 1.0.0 - resolution: "buffer-split@npm:1.0.0" - dependencies: - buffer-indexof: "npm:~0.0.0" - checksum: 10c0/59280260d41c3871d227cd605343fc48a3ae3b3a7b799f3fec9a94e7275e040bdffcd84c3ae4a8e68de7f8232dee495f8d1c410b1466fc4be265d2d3410c8f75 - languageName: node - linkType: hard - "buffer-to-arraybuffer@npm:0.0.6": version: 0.0.6 resolution: "buffer-to-arraybuffer@npm:0.0.6" @@ -11600,27 +11592,27 @@ __metadata: languageName: node linkType: hard -"echogarden@npm:^1.8.7": - version: 1.8.7 - resolution: "echogarden@npm:1.8.7" +"echogarden@npm:^2.0.0": + version: 2.0.0 + resolution: "echogarden@npm:2.0.0" dependencies: - "@aws-sdk/client-polly": "npm:^3.670.0" - "@aws-sdk/client-transcribe-streaming": "npm:^3.672.0" - "@echogarden/espeak-ng-emscripten": "npm:^0.2.0" + "@aws-sdk/client-polly": "npm:^3.687.0" + "@aws-sdk/client-transcribe-streaming": "npm:^3.687.0" + "@echogarden/audio-io": "npm:^0.2.3" + "@echogarden/espeak-ng-emscripten": "npm:^0.3.0" "@echogarden/fasttext-wasm": "npm:^0.1.0" "@echogarden/flite-wasi": "npm:^0.1.1" - "@echogarden/fvad-wasm": "npm:^0.1.2" - "@echogarden/kissfft-wasm": "npm:^0.1.1" - "@echogarden/pffft-wasm": "npm:^0.3.0" - "@echogarden/rnnoise-wasm": "npm:^0.1.1" - "@echogarden/rubberband-wasm": "npm:^0.1.1" - "@echogarden/sonic-wasm": "npm:^0.1.1" - "@echogarden/speex-resampler-wasm": "npm:^0.1.1" - "@echogarden/svoxpico-wasm": "npm:^0.1.0" + "@echogarden/fvad-wasm": "npm:^0.2.0" + "@echogarden/kissfft-wasm": "npm:^0.2.0" + "@echogarden/pffft-wasm": "npm:^0.4.2" + "@echogarden/rnnoise-wasm": "npm:^0.2.0" + "@echogarden/rubberband-wasm": "npm:^0.2.0" + "@echogarden/sonic-wasm": "npm:^0.2.0" + "@echogarden/speex-resampler-wasm": "npm:^0.2.1" + "@echogarden/svoxpico-wasm": "npm:^0.2.0" "@echogarden/transformers-nodejs-lite": "npm:^2.17.1-lite.3" "@mozilla/readability": "npm:^0.5.0" alawmulaw: "npm:^6.0.0" - buffer-split: "npm:^1.0.0" chalk: "npm:^5.3.0" cldr-segmentation: "npm:^2.2.1" command-exists: "npm:^1.2.9" @@ -11638,8 +11630,8 @@ __metadata: microsoft-cognitiveservices-speech-sdk: "npm:^1.41.0" moving-median: "npm:^1.0.0" msgpack-lite: "npm:^0.1.26" - onnxruntime-node: "npm:^1.19.2" - openai: "npm:^4.67.3" + onnxruntime-node: "npm:^1.20.0" + openai: "npm:^4.71.1" sam-js: "npm:^0.3.1" strip-ansi: "npm:^7.1.0" tar: "npm:^7.4.3" @@ -12069,7 +12061,7 @@ __metadata: dayjs: "npm:^1.11.13" decamelize: "npm:^6.0.0" decamelize-keys: "npm:^2.0.1" - echogarden: "npm:^1.8.7" + echogarden: "npm:^2.0.0" electron: "npm:^33.2.0" electron-context-menu: "npm:^4.0.4" electron-devtools-installer: "npm:^3.2.0" @@ -12094,7 +12086,7 @@ __metadata: langchain: "npm:^0.3.5" lodash: "npm:^4.17.21" lru-cache: "npm:^11.0.2" - lucide-react: "npm:^0.455.0" + lucide-react: "npm:^0.456.0" mark.js: "npm:^8.11.1" microsoft-cognitiveservices-speech-sdk: "npm:^1.41.0" mime-types: "npm:^2.1.35" @@ -12103,7 +12095,7 @@ __metadata: octokit: "npm:^4.0.2" openai: "npm:^4.71.1" pitchfinder: "npm:^2.3.2" - postcss: "npm:^8.4.47" + postcss: "npm:^8.4.48" progress: "npm:^2.0.3" prop-types: "npm:^15.8.1" proxy-agent: "npm:^6.4.0" @@ -12113,7 +12105,7 @@ __metadata: react-audio-voice-recorder: "npm:^2.2.0" react-dom: "npm:^18.3.1" react-frame-component: "npm:^5.2.7" - react-hook-form: "npm:^7.53.1" + react-hook-form: "npm:^7.53.2" react-hotkeys-hook: "npm:^4.6.1" react-i18next: "npm:^15.1.1" react-markdown: "npm:^9.0.1" @@ -12146,7 +12138,7 @@ __metadata: wavesurfer.js: "npm:^7.8.8" zod: "npm:^3.23.8" zod-to-json-schema: "npm:^3.23.5" - zx: "npm:^8.2.0" + zx: "npm:^8.2.1" languageName: unknown linkType: soft @@ -16209,12 +16201,12 @@ __metadata: languageName: node linkType: hard -"lucide-react@npm:^0.455.0": - version: 0.455.0 - resolution: "lucide-react@npm:0.455.0" +"lucide-react@npm:^0.456.0": + version: 0.456.0 + resolution: "lucide-react@npm:0.456.0" peerDependencies: react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0-rc - checksum: 10c0/879f51dc0143c36452022ee852cdc9c8967d644f4102ba5311d55a6fe63a0c9a5963069e56b4d034579e9e502e032736009c592e3706067cd0cf9ca2556e07f9 + checksum: 10c0/a56f6922381ff529f1df93c3fca47766682a8e427ec266fd1f4bcf063f65f93460f9e7982abf79d79e5f7bb50b8013d0493a26f78677de9b1c06b6b5a6343d56 languageName: node linkType: hard @@ -18510,7 +18502,7 @@ __metadata: languageName: node linkType: hard -"onnxruntime-node@npm:^1.19.2": +"onnxruntime-node@npm:^1.20.0": version: 1.20.0 resolution: "onnxruntime-node@npm:1.20.0" dependencies: @@ -18544,7 +18536,7 @@ __metadata: languageName: node linkType: hard -"openai@npm:^4.67.3, openai@npm:^4.71.0, openai@npm:^4.71.1": +"openai@npm:^4.71.0, openai@npm:^4.71.1": version: 4.71.1 resolution: "openai@npm:4.71.1" dependencies: @@ -19172,7 +19164,7 @@ __metadata: languageName: node linkType: hard -"picocolors@npm:^1.0.0, picocolors@npm:^1.0.1, picocolors@npm:^1.1.0": +"picocolors@npm:^1.0.0, picocolors@npm:^1.0.1, picocolors@npm:^1.1.0, picocolors@npm:^1.1.1": version: 1.1.1 resolution: "picocolors@npm:1.1.1" checksum: 10c0/e2e3e8170ab9d7c7421969adaa7e1b31434f789afb9b3f115f6b96d91945041ac3ceb02e9ec6fe6510ff036bcc0bf91e69a1772edc0b707e12b19c0f2d6bcf58 @@ -19712,6 +19704,17 @@ __metadata: languageName: node linkType: hard +"postcss@npm:^8.4.48": + version: 8.4.48 + resolution: "postcss@npm:8.4.48" + dependencies: + nanoid: "npm:^3.3.7" + picocolors: "npm:^1.1.1" + source-map-js: "npm:^1.2.1" + checksum: 10c0/d586361fda12fc7ab5650ce9b5763fc61d6ea2cecac9da98fceea6a3f27e42ed34db830582411bc06743492d9bb414c52b0c81da65440682d244d692da2f928a + languageName: node + linkType: hard + "postject@npm:^1.0.0-alpha.6": version: 1.0.0-alpha.6 resolution: "postject@npm:1.0.0-alpha.6" @@ -20094,7 +20097,7 @@ __metadata: languageName: node linkType: hard -"react-hook-form@npm:^7.53.1": +"react-hook-form@npm:^7.53.2": version: 7.53.2 resolution: "react-hook-form@npm:7.53.2" peerDependencies: @@ -24434,9 +24437,9 @@ __metadata: languageName: node linkType: hard -"zx@npm:^8.2.0": - version: 8.2.0 - resolution: "zx@npm:8.2.0" +"zx@npm:^8.2.1": + version: 8.2.1 + resolution: "zx@npm:8.2.1" dependencies: "@types/fs-extra": "npm:>=11" "@types/node": "npm:>=20" @@ -24447,6 +24450,6 @@ __metadata: optional: true bin: zx: build/cli.js - checksum: 10c0/67baf00280343259f04b2bf58b2dc7c90abc7b42f3b4ca2794ea59bf988c53707c08c8427dbf22e88606c321a08254bec0b27400840029f4086ba3c43b8056a8 + checksum: 10c0/39aac596a031eb149d91c54359ab74969e5135ce3de401dabcae06b16516fdf28aa97c01b5c2509e46a5bcf9d3d941f349e5ac2df861f67aa6755df84e629bc5 languageName: node linkType: hard