fix: 增加语音是否转换为mp3格式的配置项

Redmomn · Mar 4, 2024 · 2b606c0 · 2b606c0
1 parent baa0f34
commit 2b606c0
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -45,7 +45,7 @@
 
 | 配置项                         | 类型    | 默认值       | 说明                                                                    |
 |-----------------------------|-------|-----------|-----------------------------------------------------------------------|
-| VITS__DEVICE                | int   | 0         | 使用指定的cuda设备进行tts合成，如果没有指定的显卡会自动使用cpu                                  |
+| VITS__DEVICE                | int   | 0         | 使用指定的cuda设备进行tts合成，如果指定的设备不存在会自动使用cpu进行推理                             |
 | VITS__VMODEL_PATH           | str   | models    | 插件会读取此文件夹下的所有模型                                                       |
 | VITS__AT_BOT                | bool  | false     | 使用语音合成是否需要@bot                                                        |
 | VITS__COOLDOWN              | int   | 0         | 在每个群里生成语音的冷却时间，防止设备负载过大                                               |
@@ -56,6 +56,7 @@
 | VITS__DEFAULT_LENGTH_SCALE  | float | 1         | 整体语速                                                                  |
 | VITS__DEFAULT_NOISE_SCALE   | float | 0.667     | 感情变化程度                                                                |
 | VITS__DEFAULT_NOISE_SCALE_W | float | 0.6       | 音素发音长度                                                                |
+| VITS__AUDIO_CONVERT_TO_MP3  | bool  | false     | 是否将语音转换为mp3格式，默认为wav格式，如果设置为true，需要在设备上安装ffmpeg                       |
 
 假如你的项目配置是这样，则`VITS__VMODEL_PATH`应为`models`，`VITS__VMODEL_FILE_NAME`为`model.pth`，`VITS__CONFIG_FILE_NAME`
 为`config.json`
@@ -105,13 +106,14 @@ awsomebot
 
 ## 模型分享
 
-由我自己训练的模型，目前有以下几个galgame的模型[ATRI](https://huggingface.co/Redmonm/ATRI)，
+由我自己训练的模型，目前有以下几个galgame的模型  
+[ATRI](https://huggingface.co/Redmonm/ATRI)，
 [RiddleJoker](https://huggingface.co/Redmonm/RiddleJoker)，
 [千恋万花](https://huggingface.co/Redmonm/SenrenBanka)，
 [魔女的夜宴](https://huggingface.co/Redmonm/SanobaWitch)，
 [星光咖啡馆与死神之蝶](https://huggingface.co/Redmonm/CafeStella)
 
-模型不定期更新，可前往[我的huggingface主页](https://huggingface.co/Redmonm)查看
+模型不定期更新，可前往我的[huggingface主页](https://huggingface.co/Redmonm)查看
 
 ## 💡 感谢
 

diff --git a/nonebot_plugin_vits_tts/config.py b/nonebot_plugin_vits_tts/config.py
@@ -24,6 +24,8 @@ class _ScopedConfig(BaseModel):
     default_length_scale: float = 1
     default_noise_scale: float = .667
     default_noise_scale_w: float = .6
+    wav_to_mp3: bool = False
+    audio_convert_to_mp3: bool = False
 
     @field_validator("vmodel_path")
     @classmethod

diff --git a/nonebot_plugin_vits_tts/matcher.py b/nonebot_plugin_vits_tts/matcher.py
@@ -4,6 +4,7 @@
 from nonebot.rule import Rule
 from nonebot.typing import T_State
 
+from .config import config
 from .rule import is_tts_msg
 from .utils.audio import wav_to_mp3
 from .utils.model import get_model_from_speaker, speakers
@@ -21,12 +22,23 @@ async def _(bot: Bot, event: GroupMessageEvent, state: T_State):
 
     model = get_model_from_speaker(speaker)
     logger.info(f"使用模型{model.model_name}.{speaker}生成语音：{text}")
+    try:
+        audio = await generate_voice(model_path=str(model.model),
+                                     config_path=str(model.config),
+                                     language=lang,
+                                     text=text,
+                                     spk=speaker)
+    except Exception as e:
+        await vits_req.finish(message=f"语音合成出错：{e}")
+        return
+
+    if config.audio_convert_to_mp3:
+        try:
+            audio = await wav_to_mp3(audio)
+        except Exception as e:
+            await vits_req.finish(message=f"语音转换出错：{e}")
+            return
 
-    audio = await wav_to_mp3(await generate_voice(model_path=str(model.model),
-                                                  config_path=str(model.config),
-                                                  language=lang,
-                                                  text=text,
-                                                  spk=speaker))
     await vits_req.finish(message=MessageSegment.record(file=audio))
 
 

diff --git a/nonebot_plugin_vits_tts/utils/audio.py b/nonebot_plugin_vits_tts/utils/audio.py
@@ -12,6 +12,15 @@
 
 
 async def wav_to_mp3(wav_data: bytes) -> bytes:
+    try:
+        audio = await wav_to_mp3_1(wav_data)
+        return audio
+    except:
+        audio = await wav_to_mp3_2(wav_data)
+        return audio
+
+
+async def wav_to_mp3_1(wav_data: bytes) -> bytes:
     """
     wav格式转换为mp3
     :param wav_data: wav的二进制数据

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='nonebot_plugin_vits_tts',
-    version='0.1.4',
+    version='0.1.5',
     packages=find_packages(),
     install_requires=[
         'torch',