Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Lobehub TTS service for custom endpoint like ollama #107

Merged
merged 16 commits into from
Feb 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,19 @@ graph TD;

## Prerequisite

* Setup Anki with AnkiConnect locally
* ffplayer (installed along with ffmpeg)
- Setup Anki with AnkiConnect locally
- ffplayer (installed along with ffmpeg)

## Installation



### Install gakuon

```bash
npm install -g gakuon
```

### Install ffmpeg (OSX/Linux)
### Install ffmpeg (OSX/Linux)

```
brew install ffmpeg
```
Expand Down Expand Up @@ -231,14 +231,20 @@ Generate helpful learning content.
example.description = "A natural example sentence using the word"
example.required = true
example.audio = true
example.locale = "ja-JP"
# Set ttsVoice when use edge-tts
# You can found list by using tools like https://github.com/andresayac/edge-tts
example.ttsVoice = "ja-JP-NanamiNeural"

explanation_jp.description = "Simple explanation in Japanese"
explanation_jp.required = true
explanation_jp.audio = true
explanation_jp.locale = "ja-JP"

explanation_en.description = "Detailed explanation in English"
explanation_en.required = true
explanation_en.audio = true
explanation_en.locale = "en-US"

usage_notes.description = "Additional usage notes"
usage_notes.required = false
Expand Down
1,168 changes: 1,120 additions & 48 deletions bun.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"scripts": {
"test:integration": "jest --config jest.integration.config.js",
"start": "bun run src/index.ts",
"dev": "bun run --hot src/index.ts serve",
"build": "bun build src/index.ts --target=node --outfile dist/gakuon",
"prepublishOnly": "rm -rf dist && bun run build && bun run build:client",
"fmt": "bunx biome format --write ./src",
Expand Down Expand Up @@ -43,6 +44,7 @@
},
"dependencies": {
"@iarna/toml": "^2.2.5",
"@lobehub/tts": "^1.28.0",
"@tailwindcss/vite": "^4.0.0",
"commander": "^13.1.0",
"cors": "^2.8.5",
Expand All @@ -52,6 +54,7 @@
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-router-dom": "^7.1.3",
"ws": "^8.18.0",
"zod": "^3.24.1"
},
"files": [
Expand All @@ -61,4 +64,4 @@
"trustedDependencies": [
"@biomejs/biome"
]
}
}
3 changes: 3 additions & 0 deletions src/commands/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,12 @@ example.audio = true
explanation_jp.description = "Simple explanation in Japanese"
explanation_jp.required = true
explanation_jp.audio = true
explanation_jp.locale = "ja-JP"

explanation_en.description = "Detailed explanation in English"
explanation_en.required = true
explanation_en.audio = true
explanation_en.locale = "en-US"

Requirements:
1. Output valid TOML without any markdown formatting or code blocks
Expand Down Expand Up @@ -102,6 +104,7 @@ export async function init(options: InitOptions = {}) {
config.global.openai.baseUrl,
config.global.openai.chatModel,
config.global.openai.ttsModel,
config.global.ttsMethod,
debug,
);

Expand Down
2 changes: 2 additions & 0 deletions src/commands/learn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,13 @@ export async function learn(options: LearnOptions = {}) {
config.global.openai.baseUrl,
config.global.openai.chatModel,
config.global.openai.ttsModel,
config.global.ttsMethod,
options.debug,
);
const contentManager = new ContentManager(
ankiService,
openaiService,
config.global.ttsVoice,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remember to update other ContentManager usage

options.debug,
);
const audioPlayer = new AudioPlayer(ankiService, options.debug);
Expand Down
10 changes: 8 additions & 2 deletions src/commands/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,15 @@ export async function serve(options: ServeOptions = {}) {
config.global.openai.baseUrl,
config.global.openai.chatModel,
config.global.openai.ttsModel,
config.global.ttsMethod,
debug,
);
const contentManager = new ContentManager(
ankiService,
openaiService,
config.global.ttsVoice,
debug,
);
const contentManager = new ContentManager(ankiService, openaiService, debug);

// Create and start server
const app = createServer({
Expand Down Expand Up @@ -57,6 +63,6 @@ export async function serve(options: ServeOptions = {}) {
// Start listening
server = app.listen(port, () => {
console.log(`Gakuon server running at http://localhost:${port}`);
console.log(`Using anki connect server at ${config.global.ankiHost}`)
console.log(`Using anki connect server at ${config.global.ankiHost}`);
});
}
1 change: 1 addition & 0 deletions src/commands/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export async function test(options: TestOptions = {}) {
config.global.openai.baseUrl,
config.global.openai.chatModel,
config.global.openai.ttsModel,
config.global.ttsMethod,
debug,
);

Expand Down
6 changes: 4 additions & 2 deletions src/config/loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ import {
NewCardGatherOrder,
QueueOrder,
ReviewSortOrder,
TtsMethod,
} from "./types";

export const DEFAULT_CONFIG: GakuonConfig = {
global: {
ankiHost: "http://localhost:8765",
openaiApiKey: "${OPENAI_API_KEY}",
ttsVoice: "alloy",
ttsMethod: TtsMethod.OPENAI,
openai: {
baseUrl: "https://api.openai.com/v1",
chatModel: "gpt-4o",
Expand Down Expand Up @@ -110,14 +112,15 @@ function processRawConfig(rawConfig: unknown): GakuonConfig {
...(processed.global?.openai || {}),
};

const configObj = {
const configObj: GakuonConfig = {
...processed,
decks: processed.decks || DEFAULT_CONFIG.decks,
global: {
ankiHost: processed.global?.ankiHost || DEFAULT_CONFIG.global.ankiHost,
openaiApiKey:
processed.global?.openaiApiKey || DEFAULT_CONFIG.global.openaiApiKey,
ttsVoice: processed.global?.ttsVoice || DEFAULT_CONFIG.global.ttsVoice,
ttsMethod: processed.global?.ttsMethod || DEFAULT_CONFIG.global.ttsMethod,
defaultDeck: processed.global?.defaultDeck,
openai: openaiConfig,
cardOrder: {
Expand Down Expand Up @@ -170,7 +173,6 @@ export function loadConfig(customPath?: string): GakuonConfig {

// Fall back to file-based config
const configPath = customPath || join(homedir(), ".gakuon", "config.toml");

if (!existsSync(configPath)) {
saveConfig(DEFAULT_CONFIG);
}
Expand Down
42 changes: 37 additions & 5 deletions src/config/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ export class PromptError extends Error {
}
}

export class AudioGenerationError extends Error {
constructor(
message: string,
public details: { messages: string[] },
) {
super(message);
this.name = "AudioGenerationError";
}
}

export type OpenAIConfig = z.infer<typeof OpenAIConfigSchema>;

export type GakuonConfig = z.infer<typeof GakuonConfigSchema>;
Expand Down Expand Up @@ -82,6 +92,11 @@ export enum QueueOrder {
MIXED = "mixed",
}

export enum TtsMethod {
OPENAI = "openai",
EDGE_TTS = "edge-tts",
}

export const OpenAIConfigSchema = z.object({
baseUrl: z.string(),
chatModel: z.string(),
Expand All @@ -98,7 +113,8 @@ export const CardOrderSchema = z.object({
export const GlobalConfigSchema = z.object({
ankiHost: z.string(),
openaiApiKey: z.string(),
ttsVoice: z.string(),
ttsMethod: z.nativeEnum(TtsMethod),
ttsVoice: z.string().optional(),
defaultDeck: z.string().optional(),
openai: OpenAIConfigSchema,
cardOrder: CardOrderSchema,
Expand All @@ -109,16 +125,32 @@ export const DeckConfigSchema = z.object({
pattern: z.string(),
fields: z.record(z.string()),
prompt: z.string(),
ttsVoice: z.string().optional(),
responseFields: z.record(
z.object({
description: z.string(),
required: z.boolean(),
audio: z.boolean().optional(),
locale: z.string().optional(),
ttsVoice: z.string().optional(),
}),
),
});

export const GakuonConfigSchema = z.object({
global: GlobalConfigSchema,
decks: z.array(DeckConfigSchema),
});
export const GakuonConfigSchema = z
.object({
global: GlobalConfigSchema,
decks: z.array(DeckConfigSchema),
})

.refine(
(data) =>
data.global.ttsMethod !== TtsMethod.EDGE_TTS ||
data.decks.every((deck) =>
Object.values(deck.responseFields).every((field) => !!field.ttsVoice),
),
{
message: "responseFields.ttsVoice is required when ttsMethod is edge-tts",
path: ["decks", "responseFields", "ttsVoice"],
},
);
4 changes: 3 additions & 1 deletion src/services/anki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,9 @@ export class AnkiService {
try {
await this.request("sync", {});
} catch (e: unknown) {
if ((e as { message?: string })?.message?.includes("auth not configured")) {
if (
(e as { message?: string })?.message?.includes("auth not configured")
) {
// Skip syncing when auth is not configured
return;
}
Expand Down
32 changes: 30 additions & 2 deletions src/services/content-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ import type { Card, DeckConfig, DynamicContent } from "../config/types";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { randomBytes } from "node:crypto";

import { TtsMethod } from "../config/types";
export class ContentManager {
private tmpDir = tmpdir();

constructor(
private ankiService: AnkiService,
private openaiService: OpenAIService,
private ttsVoice: string,
private debug = false,
) {
this.debugLog("Using tmpDir", this.tmpDir);
Expand Down Expand Up @@ -65,6 +66,32 @@ export class ContentManager {
return { content, audioFiles, isNewContent: false, metadata };
}

private getTtsVoice(
deckConfig: DeckConfig,
fieldConfig: DeckConfig["responseFields"][string],
) {
const globalTtsVoice = this.ttsVoice;
const ttsMethod = this.openaiService.ttsMethod;

if (ttsMethod === TtsMethod.OPENAI) {
const selectedVoice =
fieldConfig.ttsVoice || deckConfig.ttsVoice || globalTtsVoice;

this.debugLog(
`Getting tts voice for ${TtsMethod.OPENAI}, using: ${selectedVoice}`,
);
// for openai ttsMethod, it doesn't matter what voice config you use
return selectedVoice;
}
if (ttsMethod === TtsMethod.EDGE_TTS) {
this.debugLog(
`Getting tts voice for ${TtsMethod.EDGE_TTS}, using: ${fieldConfig.ttsVoice}`,
);
// for ollama (we use EdgeTTS) , you have to set voice with the same locale code on the responseField.
return fieldConfig.ttsVoice;
}
}

private async generateAndStoreContent(card: Card, deckConfig: DeckConfig) {
// Generate content
const content = await this.openaiService.generateContent(card, deckConfig);
Expand All @@ -81,7 +108,8 @@ export class ContentManager {
const audioPromise = this.openaiService.generateAudio(
content[field],
tempPath,
"alloy",
this.getTtsVoice(deckConfig, fieldConfig),
fieldConfig.locale,
);
audioPromises.push(audioPromise);

Expand Down
Loading