Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

添加OpenAI TTS功能 #38

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"name": "JA Learner Development",
"image": "mcr.microsoft.com/dotnet/sdk:6.0",
"features": {
"ghcr.io/devcontainers/features/node:1": {
"version": "lts"
},
"ghcr.io/devcontainers/features/git:1": {}
},
"postCreateCommand": "dotnet restore && npm install -g @vue/cli",
"customizations": {
"vscode": {
"extensions": [
"ms-dotnettools.csharp",
"ms-dotnettools.vscode-dotnet-runtime",
"Vue.volar",
"dbaeumer.vscode-eslint",
"ms-vscode.powershell"
]
}
},
"forwardPorts": [
5000,
5001,
8080
]
}
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -398,4 +398,6 @@ FodyWeavers.xsd
*.sln.iml

config.txt
extra_prompts/
extra_prompts/
dist_vue
.vscode
17 changes: 15 additions & 2 deletions GUI/MainForm.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 29 additions & 2 deletions GUI/MainForm.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using Microsoft.Web.WebView2.Core;
using System.Diagnostics;
using Microsoft.VisualBasic;
using NAudio.Wave;

namespace ja_learner
{
Expand All @@ -12,6 +13,7 @@ public partial class MainForm : Form

TextAnalyzer textAnalyzer = new TextAnalyzer();
private string sentence = "";
private Stream audioStream = null;
private bool immersiveMode = false;
public bool ImmersiveMode
{
Expand All @@ -24,7 +26,7 @@ public bool ImmersiveMode
tabControl.Hide();
panel1.Hide();
FormBorderStyle = FormBorderStyle.None;
MinimumSize = new Size(0,0);
MinimumSize = new Size(0, 0);
}
else
{
Expand All @@ -43,6 +45,7 @@ public string Sentence
set
{
sentence = value;
audioStream = null;
dictForm.UpdateTranslationPanelText(sentence);
UpdateMecabResult(RunMecab());
if (checkBoxAutoTranslate.Checked)
Expand Down Expand Up @@ -88,7 +91,7 @@ private async void MainForm_Load(object sender, EventArgs e)
dictForm.Show();
dictForm.Hide();
UpdateExtraPromptCombobox();

// 初始化 MainForm
if (Program.APP_SETTING.HttpProxy != string.Empty)
{
Expand Down Expand Up @@ -437,5 +440,29 @@ private void checkBoxUseProxy_CheckedChanged(object sender, EventArgs e)
UserConfig.UseProxy = checkBoxUseProxy.Checked;
GptCaller.SetProxy(UserConfig.UseProxy);
}

private async void buttonRead_Click(object sender, EventArgs e)
{
buttonRead.Text = "朗读(请求中)";
if (audioStream == null)
{
audioStream = await GptCaller.CreateTextToSpeechStream(sentence);
}

buttonRead.Text = "朗读(播放中)";
using (var mp3Reader = new Mp3FileReader(audioStream))
using (var waveOut = new WaveOutEvent())
{
waveOut.Init(mp3Reader);
waveOut.Play();
while (waveOut.PlaybackState == PlaybackState.Playing)
{
await Task.Delay(100);
}
}

audioStream.Position = 0;
buttonRead.Text = "朗读";
}
}
}
50 changes: 37 additions & 13 deletions GUI/TranslationPanel.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions GUI/TranslationPanel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
using System.Threading.Tasks;
using System.Windows.Forms;
using static System.Net.Mime.MediaTypeNames;
using Markdig;
using System.Web;

namespace ja_learner.GUI
{
Expand All @@ -27,13 +29,14 @@ public void UpdateText(string text)

private void buttonInterpret_Click(object sender, EventArgs e)
{
textBoxResult.Text = "";
var markdownContent = "";
buttonInterpret.Enabled = false;
var chat = GptCaller.CreateInterpretConversation(textBoxSentence.Text);

GptCaller.StreamResponse(chat, res =>
{
textBoxResult.Text += res.Replace("\n", "\r\n");
textBoxResult.ScrollToCaret();
markdownContent += res;
webBrowserResult.Document.InvokeScript("setMarkdown", new object[] { Markdown.ToHtml(markdownContent) });
});

buttonInterpret.Enabled = true;
Expand Down
17 changes: 16 additions & 1 deletion GptCaller.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,18 @@ namespace ja_learner
internal class GptCaller
{
private static OpenAIAPI api;

private static OpenAIAPI ttsApi;
private static IHttpClientFactory defaultFactory;
private static IHttpClientFactory proxyFactory;

public static void Initialize()
{
api = new(Program.APP_SETTING.GPT.ApiKey) { ApiUrlFormat = Program.APP_SETTING.GPT.ApiUrl };
ttsApi = api;
if (Program.APP_SETTING.GPT.TtsApiKey != string.Empty || Program.APP_SETTING.GPT.TtsApiUrl != string.Empty)
{
ttsApi = new(Program.APP_SETTING.GPT.TtsApiKey) { ApiUrlFormat = Program.APP_SETTING.GPT.TtsApiUrl };
}
defaultFactory = api.HttpClientFactory;
proxyFactory = new MyHttpClientFactory(Program.APP_SETTING.HttpProxy);
}
Expand All @@ -23,16 +28,19 @@ public static void SetProxy(bool useProxy)
if (useProxy)
{
api.HttpClientFactory = proxyFactory;
ttsApi.HttpClientFactory = proxyFactory;
}
else
{
api.HttpClientFactory = defaultFactory;
ttsApi.HttpClientFactory = defaultFactory;
}
}

public static Conversation CreateTranslateConversation(string text)
{
Conversation conversation = api.Chat.CreateConversation();
conversation.Model = Program.APP_SETTING.GPT.Model;
conversation.AppendSystemMessage(Program.APP_SETTING.GPT.TranslatePrompt);
if (UserConfig.useExtraPrompt)
{
Expand All @@ -45,6 +53,7 @@ public static Conversation CreateTranslateConversation(string text)
public static Conversation CreateInterpretConversation(string text)
{
Conversation conversation = api.Chat.CreateConversation();
conversation.Model = Program.APP_SETTING.GPT.Model;
conversation.AppendSystemMessage(Program.APP_SETTING.GPT.ExplainPrompt);
if (UserConfig.useExtraPrompt)
{
Expand All @@ -54,6 +63,12 @@ public static Conversation CreateInterpretConversation(string text)
return conversation;
}

public static Task<Stream> CreateTextToSpeechStream(string text)
{
return ttsApi.TextToSpeech.GetSpeechAsStreamAsync(
text, Program.APP_SETTING.GPT.Voice, Program.APP_SETTING.GPT.VoiceSpeed);
}

private static void AddExtraSystemPrompt(Conversation conversation)
{
if (UserConfig.ExtraPrompt.Length > 0)
Expand Down
5 changes: 5 additions & 0 deletions Model/AppSetting.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ public class GPTOptions
{
public string ApiKey { get; set; } = string.Empty;
public string ApiUrl { get; set; } = string.Empty;
public string TtsApiKey { get; set; } = string.Empty;
public string TtsApiUrl { get; set; } = string.Empty;
public string Voice { get; set; } = string.Empty;
public double VoiceSpeed { get; set; } = 1.0;
public string Model { get; set; } = string.Empty;
public string ExtraPromptDir { get; set; } = string.Empty;
public string TranslatePrompt { get; set; } = string.Empty;
public string ExplainPrompt { get; set; } = string.Empty;
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@
"GPT": {
"ApiKey": "sk-xxx",
"ApiUrl": "https://api.openai.com/{0}/{1}", // 实际调用为https://api.openai.com/v1/chat/completions
"Model": "gpt-4o-mini", // 设置模型
"TtsApiKey": "", // 可以单独设置OpenAI AI文字转语音key,适用于不同key价格更便宜的场景,不设置的话仍用上面的配置
"TtsApiUrl": "", // 单独的TTS API URL
"Voice": "shimmer", // 发声角色,可选值为alloy,ash,coral,echo,fable,onyx,nova,sage,shimmer
"VoiceSpeed": "1.0", // OpenAI TTS语速
"ExtraPromptDir": "extra_prompts", // 额外的Prompt,比如指定某些角色名字怎么翻译
"TranslatePrompt": "...", // 翻译Prompt
"ExplainPrompt": "..." // 分析Prompt
Expand Down
7 changes: 6 additions & 1 deletion appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
"Port": 8080,
"HttpProxy": "",
"GPT": {
"ApiKey": "sk-xxx",
"ApiKey": "sk-xxx",
"ApiUrl": "https://api.openai.com/{0}/{1}",
"Model": "gpt-4o-mini",
"TtsApiKey": "",
"TtsApiUrl": "",
"Voice": "shimmer",
"VoiceSpeed": "1.0",
"ExtraPromptDir": "extra_prompts",
"TranslatePrompt": "Translate the text to Simplified Chinese. Don't output anything other than translation results.",
"ExplainPrompt": "List and explain the vocabulary (except prepositions) and grammar of the given Japanese text in Simplified Chinese. Your output consists of three parts: translation, vocabulary, grammar. Don't use English and romaji."
Expand Down
7 changes: 5 additions & 2 deletions ja-learner.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@
<_SuppressWinFormsTrimError>true</_SuppressWinFormsTrimError>
<ImplicitUsings>enable</ImplicitUsings>
<GenerateDocumentationFile>False</GenerateDocumentationFile>
<MeCabUseDefaultDictionary>True</MeCabUseDefaultDictionary>
<MeCabUseDefaultDictionary>True</MeCabUseDefaultDictionary>
<BuiltInComInteropSupport>true</BuiltInComInteropSupport>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="MeCab.DotNet" Version="1.2.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Microsoft.Web.WebView2" Version="1.0.1774.30" />
<PackageReference Include="OpenAI" Version="1.7.2" />
<PackageReference Include="NAudio" Version="2.2.1" />
<PackageReference Include="OpenAI" Version="1.11.0" />
<PackageReference Include="Markdig" Version="0.30.4" />
</ItemGroup>

<ItemGroup>
Expand Down