diff --git a/README.md b/README.md index d74083c..33062a6 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ | 配置项 | 类型 | 默认值 | 说明 | |-----------------------------|-------|-----------|-----------------------------------------------------------------------| -| VITS__DEVICE | int | 0 | 使用指定的cuda设备进行tts合成,如果没有指定的显卡会自动使用cpu | +| VITS__DEVICE | int | 0 | 使用指定的cuda设备进行tts合成,如果指定的设备不存在会自动使用cpu进行推理 | | VITS__VMODEL_PATH | str | models | 插件会读取此文件夹下的所有模型 | | VITS__AT_BOT | bool | false | 使用语音合成是否需要@bot | | VITS__COOLDOWN | int | 0 | 在每个群里生成语音的冷却时间,防止设备负载过大 | @@ -56,6 +56,7 @@ | VITS__DEFAULT_LENGTH_SCALE | float | 1 | 整体语速 | | VITS__DEFAULT_NOISE_SCALE | float | 0.667 | 感情变化程度 | | VITS__DEFAULT_NOISE_SCALE_W | float | 0.6 | 音素发音长度 | +| VITS__AUDIO_CONVERT_TO_MP3 | bool | false | 是否将语音转换为mp3格式,默认为wav格式,如果设置为true,需要在设备上安装ffmpeg | 假如你的项目配置是这样,则`VITS__VMODEL_PATH`应为`models`,`VITS__VMODEL_FILE_NAME`为`model.pth`,`VITS__CONFIG_FILE_NAME` 为`config.json` @@ -105,13 +106,14 @@ awsomebot ## 模型分享 -由我自己训练的模型,目前有以下几个galgame的模型[ATRI](https://huggingface.co/Redmonm/ATRI), +由我自己训练的模型,目前有以下几个galgame的模型 +[ATRI](https://huggingface.co/Redmonm/ATRI), [RiddleJoker](https://huggingface.co/Redmonm/RiddleJoker), [千恋万花](https://huggingface.co/Redmonm/SenrenBanka), [魔女的夜宴](https://huggingface.co/Redmonm/SanobaWitch), [星光咖啡馆与死神之蝶](https://huggingface.co/Redmonm/CafeStella) -模型不定期更新,可前往[我的huggingface主页](https://huggingface.co/Redmonm)查看 +模型不定期更新,可前往我的[huggingface主页](https://huggingface.co/Redmonm)查看 ## 💡 感谢 diff --git a/nonebot_plugin_vits_tts/config.py b/nonebot_plugin_vits_tts/config.py index 8e6ef49..a062614 100644 --- a/nonebot_plugin_vits_tts/config.py +++ b/nonebot_plugin_vits_tts/config.py @@ -24,6 +24,8 @@ class _ScopedConfig(BaseModel): default_length_scale: float = 1 default_noise_scale: float = .667 default_noise_scale_w: float = .6 + wav_to_mp3: bool = False + audio_convert_to_mp3: bool = False @field_validator("vmodel_path") @classmethod diff --git a/nonebot_plugin_vits_tts/matcher.py b/nonebot_plugin_vits_tts/matcher.py index 20c02e3..11d676a 100644 --- a/nonebot_plugin_vits_tts/matcher.py +++ b/nonebot_plugin_vits_tts/matcher.py @@ -4,6 +4,7 @@ from nonebot.rule import Rule from nonebot.typing import T_State +from .config import config from .rule import is_tts_msg from .utils.audio import wav_to_mp3 from .utils.model import get_model_from_speaker, speakers @@ -21,12 +22,23 @@ async def _(bot: Bot, event: GroupMessageEvent, state: T_State): model = get_model_from_speaker(speaker) logger.info(f"使用模型{model.model_name}.{speaker}生成语音:{text}") + try: + audio = await generate_voice(model_path=str(model.model), + config_path=str(model.config), + language=lang, + text=text, + spk=speaker) + except Exception as e: + await vits_req.finish(message=f"语音合成出错:{e}") + return + + if config.audio_convert_to_mp3: + try: + audio = await wav_to_mp3(audio) + except Exception as e: + await vits_req.finish(message=f"语音转换出错:{e}") + return - audio = await wav_to_mp3(await generate_voice(model_path=str(model.model), - config_path=str(model.config), - language=lang, - text=text, - spk=speaker)) await vits_req.finish(message=MessageSegment.record(file=audio)) diff --git a/nonebot_plugin_vits_tts/utils/audio.py b/nonebot_plugin_vits_tts/utils/audio.py index a7bc179..8bea6e7 100644 --- a/nonebot_plugin_vits_tts/utils/audio.py +++ b/nonebot_plugin_vits_tts/utils/audio.py @@ -12,6 +12,15 @@ async def wav_to_mp3(wav_data: bytes) -> bytes: + try: + audio = await wav_to_mp3_1(wav_data) + return audio + except: + audio = await wav_to_mp3_2(wav_data) + return audio + + +async def wav_to_mp3_1(wav_data: bytes) -> bytes: """ wav格式转换为mp3 :param wav_data: wav的二进制数据 diff --git a/setup.py b/setup.py index f100379..f08785c 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='nonebot_plugin_vits_tts', - version='0.1.4', + version='0.1.5', packages=find_packages(), install_requires=[ 'torch',