From 2b606c07b460a30b5e140fc8c088b128a6362143 Mon Sep 17 00:00:00 2001
From: Redmomn <109732988+Redmomn@users.noreply.github.com>
Date: Mon, 4 Mar 2024 20:07:17 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E5=A2=9E=E5=8A=A0=E8=AF=AD=E9=9F=B3?=
 =?UTF-8?q?=E6=98=AF=E5=90=A6=E8=BD=AC=E6=8D=A2=E4=B8=BAmp3=E6=A0=BC?=
 =?UTF-8?q?=E5=BC=8F=E7=9A=84=E9=85=8D=E7=BD=AE=E9=A1=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                              |  8 +++++---
 nonebot_plugin_vits_tts/config.py      |  2 ++
 nonebot_plugin_vits_tts/matcher.py     | 22 +++++++++++++++++-----
 nonebot_plugin_vits_tts/utils/audio.py |  9 +++++++++
 setup.py                               |  2 +-
 5 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index d74083c..33062a6 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@
 
 | 配置项                         | 类型    | 默认值       | 说明                                                                    |
 |-----------------------------|-------|-----------|-----------------------------------------------------------------------|
-| VITS__DEVICE                | int   | 0         | 使用指定的cuda设备进行tts合成，如果没有指定的显卡会自动使用cpu                                  |
+| VITS__DEVICE                | int   | 0         | 使用指定的cuda设备进行tts合成，如果指定的设备不存在会自动使用cpu进行推理                             |
 | VITS__VMODEL_PATH           | str   | models    | 插件会读取此文件夹下的所有模型                                                       |
 | VITS__AT_BOT                | bool  | false     | 使用语音合成是否需要@bot                                                        |
 | VITS__COOLDOWN              | int   | 0         | 在每个群里生成语音的冷却时间，防止设备负载过大                                               |
@@ -56,6 +56,7 @@
 | VITS__DEFAULT_LENGTH_SCALE  | float | 1         | 整体语速                                                                  |
 | VITS__DEFAULT_NOISE_SCALE   | float | 0.667     | 感情变化程度                                                                |
 | VITS__DEFAULT_NOISE_SCALE_W | float | 0.6       | 音素发音长度                                                                |
+| VITS__AUDIO_CONVERT_TO_MP3  | bool  | false     | 是否将语音转换为mp3格式，默认为wav格式，如果设置为true，需要在设备上安装ffmpeg                       |
 
 假如你的项目配置是这样，则`VITS__VMODEL_PATH`应为`models`，`VITS__VMODEL_FILE_NAME`为`model.pth`，`VITS__CONFIG_FILE_NAME`
 为`config.json`
@@ -105,13 +106,14 @@ awsomebot
 
 ## 模型分享
 
-由我自己训练的模型，目前有以下几个galgame的模型[ATRI](https://huggingface.co/Redmonm/ATRI)，
+由我自己训练的模型，目前有以下几个galgame的模型  
+[ATRI](https://huggingface.co/Redmonm/ATRI)，
 [RiddleJoker](https://huggingface.co/Redmonm/RiddleJoker)，
 [千恋万花](https://huggingface.co/Redmonm/SenrenBanka)，
 [魔女的夜宴](https://huggingface.co/Redmonm/SanobaWitch)，
 [星光咖啡馆与死神之蝶](https://huggingface.co/Redmonm/CafeStella)
 
-模型不定期更新，可前往[我的huggingface主页](https://huggingface.co/Redmonm)查看
+模型不定期更新，可前往我的[huggingface主页](https://huggingface.co/Redmonm)查看
 
 ## 💡 感谢
 
diff --git a/nonebot_plugin_vits_tts/config.py b/nonebot_plugin_vits_tts/config.py
index 8e6ef49..a062614 100644
--- a/nonebot_plugin_vits_tts/config.py
+++ b/nonebot_plugin_vits_tts/config.py
@@ -24,6 +24,8 @@ class _ScopedConfig(BaseModel):
     default_length_scale: float = 1
     default_noise_scale: float = .667
     default_noise_scale_w: float = .6
+    wav_to_mp3: bool = False
+    audio_convert_to_mp3: bool = False
 
     @field_validator("vmodel_path")
     @classmethod
diff --git a/nonebot_plugin_vits_tts/matcher.py b/nonebot_plugin_vits_tts/matcher.py
index 20c02e3..11d676a 100644
--- a/nonebot_plugin_vits_tts/matcher.py
+++ b/nonebot_plugin_vits_tts/matcher.py
@@ -4,6 +4,7 @@
 from nonebot.rule import Rule
 from nonebot.typing import T_State
 
+from .config import config
 from .rule import is_tts_msg
 from .utils.audio import wav_to_mp3
 from .utils.model import get_model_from_speaker, speakers
@@ -21,12 +22,23 @@ async def _(bot: Bot, event: GroupMessageEvent, state: T_State):
 
     model = get_model_from_speaker(speaker)
     logger.info(f"使用模型{model.model_name}.{speaker}生成语音：{text}")
+    try:
+        audio = await generate_voice(model_path=str(model.model),
+                                     config_path=str(model.config),
+                                     language=lang,
+                                     text=text,
+                                     spk=speaker)
+    except Exception as e:
+        await vits_req.finish(message=f"语音合成出错：{e}")
+        return
+
+    if config.audio_convert_to_mp3:
+        try:
+            audio = await wav_to_mp3(audio)
+        except Exception as e:
+            await vits_req.finish(message=f"语音转换出错：{e}")
+            return
 
-    audio = await wav_to_mp3(await generate_voice(model_path=str(model.model),
-                                                  config_path=str(model.config),
-                                                  language=lang,
-                                                  text=text,
-                                                  spk=speaker))
     await vits_req.finish(message=MessageSegment.record(file=audio))
 
 
diff --git a/nonebot_plugin_vits_tts/utils/audio.py b/nonebot_plugin_vits_tts/utils/audio.py
index a7bc179..8bea6e7 100644
--- a/nonebot_plugin_vits_tts/utils/audio.py
+++ b/nonebot_plugin_vits_tts/utils/audio.py
@@ -12,6 +12,15 @@
 
 
 async def wav_to_mp3(wav_data: bytes) -> bytes:
+    try:
+        audio = await wav_to_mp3_1(wav_data)
+        return audio
+    except:
+        audio = await wav_to_mp3_2(wav_data)
+        return audio
+
+
+async def wav_to_mp3_1(wav_data: bytes) -> bytes:
     """
     wav格式转换为mp3
     :param wav_data: wav的二进制数据
diff --git a/setup.py b/setup.py
index f100379..f08785c 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='nonebot_plugin_vits_tts',
-    version='0.1.4',
+    version='0.1.5',
     packages=find_packages(),
     install_requires=[
         'torch',