Skip to content

Commit

Permalink
Improve settings (#1174)
Browse files Browse the repository at this point in the history
* more settings for local whisper

* clean code

* may open echogarden packages dir

* upgrade deps

* handle recognize unhandle rejection

* fix

* add tts settings

* update ui

* may open library path
  • Loading branch information
an-lee authored Nov 11, 2024
1 parent 6c8da30 commit 521ee76
Show file tree
Hide file tree
Showing 31 changed files with 846 additions and 321 deletions.
2 changes: 1 addition & 1 deletion 1000h-portal/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
"devDependencies": {
"autoprefixer": "^10.4.20",
"postcss": "^8.4.47",
"postcss": "^8.4.48",
"sass": "^1.80.6",
"tailwindcss": "^3.4.14"
}
Expand Down
10 changes: 5 additions & 5 deletions enjoy/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"typescript": "^5.6.3",
"vite": "^5.4.10",
"vite-plugin-static-copy": "^2.1.0",
"zx": "^8.2.0"
"zx": "^8.2.1"
},
"dependencies": {
"@andrkrn/ffprobe-static": "^5.2.0",
Expand Down Expand Up @@ -141,7 +141,7 @@
"dayjs": "^1.11.13",
"decamelize": "^6.0.0",
"decamelize-keys": "^2.0.1",
"echogarden": "^1.8.7",
"echogarden": "^2.0.0",
"electron-context-menu": "^4.0.4",
"electron-log": "^5.2.2",
"electron-settings": "^4.0.4",
Expand All @@ -159,23 +159,23 @@
"langchain": "^0.3.5",
"lodash": "^4.17.21",
"lru-cache": "^11.0.2",
"lucide-react": "^0.455.0",
"lucide-react": "^0.456.0",
"mark.js": "^8.11.1",
"microsoft-cognitiveservices-speech-sdk": "^1.41.0",
"mime-types": "^2.1.35",
"mustache": "^4.2.0",
"next-themes": "^0.4.3",
"openai": "^4.71.1",
"pitchfinder": "^2.3.2",
"postcss": "^8.4.47",
"postcss": "^8.4.48",
"proxy-agent": "^6.4.0",
"react": "^18.3.1",
"react-activity-calendar": "^2.7.1",
"react-audio-visualize": "^1.2.0",
"react-audio-voice-recorder": "^2.2.0",
"react-dom": "^18.3.1",
"react-frame-component": "^5.2.7",
"react-hook-form": "^7.53.1",
"react-hook-form": "^7.53.2",
"react-hotkeys-hook": "^4.6.1",
"react-i18next": "^15.1.1",
"react-markdown": "^9.0.1",
Expand Down
14 changes: 13 additions & 1 deletion enjoy/src/i18n/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -907,5 +907,17 @@
"failedToLoadLink": "Failed to load link",
"refreshSpeech": "Refresh speech",
"locateParagraph": "Locate paragraph",
"close": "Close"
"close": "Close",
"config": "Config",
"temperature": "Temperature",
"encoderProvider": "Encoder Provider",
"decoderProvider": "Decoder Provider",
"enableGPU": "Enable GPU",
"openPackagesDir": "Open models dir",
"whisperModelDescription": "Model will be downloaded when first used.",
"whisperEngineDescription": "OpenAI Whisper with inference done via the ONNX runtime.",
"whisperCppEngineDescription": "C++ port of the Whisper architecture.",
"ttsService": "Text to Speech Service",
"openaiTtsServiceDescription": "Use OpenAI TTS service from your own key.",
"enjoyTtsServiceDescription": "Use TTS service provided by Enjoy. OpenAI or Azure is supported."
}
14 changes: 13 additions & 1 deletion enjoy/src/i18n/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -907,5 +907,17 @@
"failedToLoadLink": "加载链接失败",
"refreshSpeech": "刷新语音",
"locateParagraph": "定位段落",
"close": "关闭"
"close": "关闭",
"config": "配置",
"temperature": "温度",
"encoderProvider": "编码器",
"decoderProvider": "解码器",
"enableGPU": "启用 GPU",
"openPackagesDir": "打开模型目录",
"whisperModelDescription": "模型首次使用时会下载。",
"whisperEngineDescription": "OpenAI Whisper 使用 ONNX 运行时进行推理。",
"whisperCppEngineDescription": "Whisper 的 C++ 实现。",
"ttsService": "文字转语音服务",
"openaiTtsServiceDescription": "使用您自己的 API key 来使用 OpenAI TTS 服务。",
"enjoyTtsServiceDescription": "使用 Enjoy 提供的 TTS 服务,支持 OpenAI 或 Azure。"
}
4 changes: 3 additions & 1 deletion enjoy/src/main/db/models/document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,9 @@ export class Document extends Model<Document> {
}

logger.debug("detected file type", filePath, mimeType, extension);
if (!DocumentFormats.includes(extension)) {
if (extension === "zip" && filePath.endsWith(".epub")) {
extension = "epub";
} else if (!DocumentFormats.includes(extension)) {
logger.error("unsupported file type", filePath, extension);
throw new Error(
t("models.document.fileNotSupported", { file: filePath })
Expand Down
43 changes: 30 additions & 13 deletions enjoy/src/main/echogarden.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import {
type Timeline,
type TimelineEntry,
} from "echogarden/dist/utilities/Timeline.d.js";
import { WhisperOptions } from "echogarden/dist/recognition/WhisperSTT.js";
import { ensureAndGetPackagesDir } from "echogarden/dist/utilities/PackageManager.js";
import path from "path";
import log from "@main/logger";
import url from "url";
Expand All @@ -25,7 +27,6 @@ import { enjoyUrlToPath, pathToEnjoyUrl } from "./utils";
import { UserSetting } from "./db/models";
import { UserSettingKeyEnum } from "@/types/enums";
import { WHISPER_MODELS } from "@/constants";
import { WhisperOptions } from "echogarden/dist/recognition/WhisperSTT.js";

Echogarden.setGlobalOption(
"ffmpegPath",
Expand Down Expand Up @@ -59,7 +60,27 @@ class EchogardenWrapper {
public wordTimelineToSegmentSentenceTimeline: typeof wordTimelineToSegmentSentenceTimeline;

constructor() {
this.recognize = Echogarden.recognize;
this.recognize = (sampleFile: string, options: RecognitionOptions) => {
return new Promise((resolve, reject) => {
const handler = (reason: any) => {
// Remove the handler after it's triggered
process.removeListener("unhandledRejection", handler);
reject(reason);
};

// Add temporary unhandledRejection listener
process.on("unhandledRejection", handler);

// Call the original recognize function
Echogarden.recognize(sampleFile, options)
.then((result) => {
// Remove the handler if successful
process.removeListener("unhandledRejection", handler);
resolve(result);
})
.catch(reject);
});
};
this.align = Echogarden.align;
this.alignSegments = Echogarden.alignSegments;
this.denoise = Echogarden.denoise;
Expand All @@ -78,23 +99,15 @@ class EchogardenWrapper {
engine: "whisper",
whisper: {
model: "tiny.en",
language: "en",
} as WhisperOptions,
},
}
) {
const sampleFile = path.join(__dirname, "samples", "jfk.wav");
try {
const whisperModel = await UserSetting.get(UserSettingKeyEnum.WHISPER);
if (WHISPER_MODELS.includes(whisperModel)) {
options.whisper.model = whisperModel;
}
} catch (e) {
logger.error(e);
}

try {
logger.info("check:", options);
const result = await this.recognize(sampleFile, options);
logger.info(result);
logger.info(result?.transcript);
fs.writeJsonSync(
path.join(settings.cachePath(), "echogarden-check.json"),
result,
Expand Down Expand Up @@ -225,6 +238,10 @@ class EchogardenWrapper {
ipcMain.handle("echogarden-check", async (_event, options: any) => {
return this.check(options);
});

ipcMain.handle("echogarden-get-packages-dir", async (_event) => {
return ensureAndGetPackagesDir();
});
}
}

Expand Down
7 changes: 5 additions & 2 deletions enjoy/src/preload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,9 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
},
},
echogarden: {
getPackagesDir: () => {
return ipcRenderer.invoke("echogarden-get-packages-dir");
},
recognize: (input: string, options: RecognitionOptions) => {
return ipcRenderer.invoke("echogarden-recognize", input, options);
},
Expand Down Expand Up @@ -505,8 +508,8 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
transcode: (input: string) => {
return ipcRenderer.invoke("echogarden-transcode", input);
},
check: () => {
return ipcRenderer.invoke("echogarden-check");
check: (options: RecognitionOptions) => {
return ipcRenderer.invoke("echogarden-check", options);
},
},
ffmpeg: {
Expand Down
10 changes: 5 additions & 5 deletions enjoy/src/renderer/components/chats/chat-agent-form.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export const ChatAgentForm = (props: {
const { EnjoyApp, learningLanguage, webApi } = useContext(
AppSettingsProviderContext
);
const { currentTtsEngine } = useContext(AISettingsProviderContext);
const { ttsConfig } = useContext(AISettingsProviderContext);
const [selectedTemplate, setSelectedTemplate] = useState<string>("custom");
const [templates, setTemplates] = useState<
{
Expand Down Expand Up @@ -104,10 +104,10 @@ export const ChatAgentForm = (props: {
const { type, name, description, config } = data;
if (type === ChatAgentTypeEnum.TTS) {
config.tts = {
engine: config.tts?.engine || currentTtsEngine.name,
model: config.tts?.model || currentTtsEngine.model,
language: config.tts?.language || learningLanguage,
voice: config.tts?.voice || currentTtsEngine.voice,
engine: config.tts?.engine || ttsConfig.engine,
model: config.tts?.model || ttsConfig.model,
language: config.tts?.language || ttsConfig.language,
voice: config.tts?.voice || ttsConfig.voice,
};
}

Expand Down
18 changes: 9 additions & 9 deletions enjoy/src/renderer/components/chats/chat-list.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export const ChatList = (props: {
setCurrentChat: (chat: ChatType) => void;
}) => {
const { chats, chatAgent, currentChat, setCurrentChat } = props;
const { sttEngine, currentGptEngine, currentTtsEngine } = useContext(
const { sttEngine, currentGptEngine, ttsConfig } = useContext(
AISettingsProviderContext
);
const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext);
Expand Down Expand Up @@ -78,10 +78,10 @@ export const ChatList = (props: {
agent.type === ChatAgentTypeEnum.TTS
? {
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
...agent.config.tts,
},
}
Expand All @@ -92,10 +92,10 @@ export const ChatList = (props: {
model: currentGptEngine.models.default,
},
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
},
};
return {
Expand Down
14 changes: 6 additions & 8 deletions enjoy/src/renderer/components/chats/chat-settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,8 @@ const ChatMemberSetting = (props: {
onFinish?: () => void;
}) => {
const { chat, agentMembers, onFinish } = props;
const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext);
const { currentGptEngine, currentTtsEngine } = useContext(
AISettingsProviderContext
);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { currentGptEngine, ttsConfig } = useContext(AISettingsProviderContext);
const [memberTab, setMemberTab] = useState(agentMembers[0]?.userId);
const [query, setQuery] = useState("");
const [chatAgents, setChatAgents] = useState<ChatAgentType[]>([]);
Expand All @@ -90,10 +88,10 @@ const ChatMemberSetting = (props: {
model: currentGptEngine.models.default,
},
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
},
},
})
Expand Down
11 changes: 4 additions & 7 deletions enjoy/src/renderer/components/documents/document-config-form.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ import {
import { t } from "i18next";
import { TTSForm } from "@renderer/components";
import { LoaderIcon } from "lucide-react";
import { useState } from "react";
import { useContext, useState } from "react";
import { AISettingsProviderContext } from "@renderer/context";

const documentConfigSchema = z.object({
config: z.object({
Expand All @@ -33,6 +34,7 @@ export const DocumentConfigForm = (props: {
}) => {
const { config, onSubmit } = props;
const [submitting, setSubmitting] = useState<boolean>(false);
const { ttsConfig } = useContext(AISettingsProviderContext);

const form = useForm<z.infer<typeof documentConfigSchema>>({
resolver: zodResolver(documentConfigSchema),
Expand All @@ -42,12 +44,7 @@ export const DocumentConfigForm = (props: {
config: {
autoTranslate: true,
autoNextSpeech: true,
tts: {
engine: "openai",
model: "openai/tts-1",
language: "en-US",
voice: "alloy",
},
tts: ttsConfig,
},
},
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ export const MediaTranscriptionGenerateButton = (props: {
generateTranscription({
originalText: data.text,
language: data.language,
model: data.model,
service: data.service as SttEngineOptionEnum | "upload",
isolate: data.isolate,
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ const LoadingContent = () => {
generateTranscription({
originalText: data.text,
language: data.language,
model: data.model,
service: data.service as SttEngineOptionEnum | "upload",
isolate: data.isolate,
});
Expand Down
Loading

0 comments on commit 521ee76

Please sign in to comment.