diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 465f97036..e89a354dc 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -12,7 +12,6 @@ env: IMAGE_NAME: ${{ secrets.DOCKERHUB_USERNAME }}/voicevox_engine PYTHON_VERSION: '3.8.10' VOICEVOX_CORE_VERSION: '0.10.preview.3' - VOICEVOX_CORE_SOURCE_VERSION: '0.10.preview.3' VOICEVOX_ENGINE_VERSION: |- # releaseのときはタグが、それ以外はlatestがバージョン名に ${{ github.event.release.tag_name != '' && github.event.release.tag_name || 'latest' }} @@ -109,7 +108,6 @@ jobs: PYTHON_VERSION=${{ env.PYTHON_VERSION }} VOICEVOX_ENGINE_VERSION=${{ env.VOICEVOX_ENGINE_VERSION }} VOICEVOX_CORE_VERSION=${{ env.VOICEVOX_CORE_VERSION }} - VOICEVOX_CORE_SOURCE_VERSION=${{ env.VOICEVOX_CORE_SOURCE_VERSION }} VOICEVOX_CORE_LIBRARY_NAME=${{ matrix.voicevox_core_library_name }} ONNXRUNTIME_URL=${{ matrix.onnxruntime_url }} target: ${{ matrix.target }} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e12391681..3c590f514 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,7 +13,6 @@ env: PYTHON_VERSION: '3.8.10' VOICEVOX_RESOURCE_VERSION: '0.10.preview.2' VOICEVOX_CORE_VERSION: '0.10.preview.3' - VOICEVOX_CORE_SOURCE_VERSION: '0.10.preview.3' VOICEVOX_ENGINE_VERSION: |- # releaseのときはタグが、それ以外はlatestがバージョン名に ${{ github.event.release.tag_name != '' && github.event.release.tag_name || 'latest' }} @@ -144,44 +143,6 @@ jobs: ditto -x -k --sequesterRsrc --rsrc download/core.zip download/ rm download/core.zip - # Install VOICEVOX Core Python package - - name: Prepare VOICEVOX Core source cache - uses: actions/cache@v2 - id: voicevox-core-source-cache - with: - key: ${{ matrix.os }}-voicevox-core-source-${{ env.VOICEVOX_CORE_SOURCE_VERSION }} - path: download/voicevox_core_source - - - name: Checkout VOICEVOX Core source - if: steps.voicevox-core-source-cache.outputs.cache-hit != 'true' - uses: actions/checkout@v2 - with: - repository: VOICEVOX/voicevox_core - ref: ${{ env.VOICEVOX_CORE_SOURCE_VERSION }} - path: download/voicevox_core_source - - - name: Install VOICEVOX Core Python package - shell: bash - run: | - set -eux - - mkdir -p download/voicevox_core_source/core/lib - - # Copy VOICEVOX Core dylib to core/lib - cp download/core/${{ matrix.voicevox_core_library_name }} download/voicevox_core_source/core/lib/libcore.dylib - - cd download/voicevox_core_source - cp core/src/core.h core/lib/ - cd - - - # Copy ONNX Runtime dylib - cp download/onnxruntime/lib/libonnxruntime.*.dylib download/voicevox_core_source/core/lib/ - - # Install VOICEVOX Core Python package with libcore.dylib & libonnxruntime.*.dylib - cd download/voicevox_core_source - NUMPY_INCLUDE=`python -c "import numpy; print(numpy.get_include())"` - CPATH="$NUMPY_INCLUDE:${CPATH:-}" pip install . - - name: Download PyOpenJTalk dictionary shell: bash run: | @@ -228,7 +189,8 @@ jobs: --include-data-file=../user.dic=./ \ --include-data-file=../download/core/*.bin=./ \ --include-data-file=../download/core/metas.json=./ \ - --include-data-file=../download/onnxruntime/lib/libonnxruntime.*.dylib=./ \ + --include-data-file=../download/core/${{ matrix.voicevox_core_library_name }}=./ \ + --include-data-file=../download/onnxruntime/lib/libonnxruntime.dylib=./ \ --include-data-file=${{ env.pythonLocation }}/lib/python*/site-packages/scipy/.dylibs/*.dylib=./scipy/.dylibs/ \ --include-data-file=${{ env.pythonLocation }}/lib/python*/site-packages/_soundfile_data/*=./_soundfile_data/ \ --include-data-dir=../speaker_info=./speaker_info \ @@ -352,7 +314,6 @@ jobs: PYTHON_VERSION=${{ env.PYTHON_VERSION }} VOICEVOX_ENGINE_VERSION=${{ env.VOICEVOX_ENGINE_VERSION }} VOICEVOX_CORE_VERSION=${{ env.VOICEVOX_CORE_VERSION }} - VOICEVOX_CORE_SOURCE_VERSION=${{ env.VOICEVOX_CORE_SOURCE_VERSION }} VOICEVOX_CORE_LIBRARY_NAME=${{ matrix.voicevox_core_library_name }} ONNXRUNTIME_URL=${{ matrix.onnxruntime_url }} target: ${{ matrix.target }} @@ -650,49 +611,6 @@ jobs: unzip download/core.zip -d download/ rm download/core.zip - # Install VOICEVOX Core Python package - - name: Prepare VOICEVOX Core source cache - uses: actions/cache@v2 - id: voicevox-core-source-cache - with: - key: ${{ matrix.os }}-voicevox-core-source-${{ env.VOICEVOX_CORE_SOURCE_VERSION }} - path: download/voicevox_core_source - - - name: Checkout VOICEVOX Core source - if: steps.voicevox-core-source-cache.outputs.cache-hit != 'true' - uses: actions/checkout@v2 - with: - repository: VOICEVOX/voicevox_core - ref: ${{ env.VOICEVOX_CORE_SOURCE_VERSION }} - path: download/voicevox_core_source - - - name: Install VOICEVOX Core Python package - shell: bash - run: | - set -eux - - # Generate VOICEVOX Core LIB from DLL - cp download/core/${{ matrix.voicevox_core_dll_name }} download/voicevox_core_source/example/python/core.dll - - cd download/voicevox_core_source/example/python - ./makelib.bat core - cd - - - # Copy VOICEVOX Core DLL & LIB to core/lib - cd download/voicevox_core_source - mkdir -p core/lib - mv example/python/core.dll core/lib/ - mv example/python/core.lib core/lib/ - cp core/src/core.h core/lib/ - cd - - - # Copy ONNX Runtime DLLs - cp download/onnxruntime/lib/*.dll download/voicevox_core_source/core/lib/ - - # Install VOICEVOX Core Python package with core.dll & onnxruntime*.dll - cd download/voicevox_core_source - pip install . - - name: Generate licenses.json shell: bash run: python generate_licenses.py > licenses.json @@ -720,7 +638,7 @@ jobs: # Replace version sed -i "s/__version__ = \"latest\"/__version__ = \"${{ env.VOICEVOX_ENGINE_VERSION }}\"/" voicevox_engine/__init__.py - + python -m nuitka \ --standalone \ --assume-yes-for-downloads \ @@ -737,6 +655,8 @@ jobs: --include-data-file="presets.yaml=./" \ --include-data-file=download/core/*.bin=./ \ --include-data-file="download/core/metas.json=./" \ + --include-data-file="download/onnxruntime/lib/onnxruntime.dll=./" \ + --include-data-file="download/core/${{ matrix.voicevox_core_dll_name }}=./" \ --include-data-dir="speaker_info=./speaker_info" \ --msvc=14.2 \ --follow-imports \ @@ -758,13 +678,6 @@ jobs: run: | set -eux - # Workaround: Move core.dll to run.dist/core/lib/ - # Nuitka copies core.dll to run.dist/core.dll - # but core Python module will load core.dll from run.dist/core/lib/core.dll. - mkdir -p run.dist/core/lib - mv run.dist/core.dll run.dist/core/lib/ - mv run.dist/onnxruntime.dll run.dist/core/lib/ - # Build artifact directory mkdir -p artifact ln -sf "$(pwd)/run.dist"/* artifact/ @@ -773,7 +686,7 @@ jobs: if [ -f "download/onnxruntime/lib/onnxruntime_providers_cuda.dll" ]; then # ONNX Runtime providers (Nuitka does not copy dynamic loaded libraries) - ln -sf "$(pwd)/download/onnxruntime/lib"/onnxruntime_*.dll artifact/core/lib/ + ln -sf "$(pwd)/download/onnxruntime/lib"/onnxruntime_*.dll artifact/ # CUDA ln -sf "$(pwd)/download/cuda/bin"/cublas64_*.dll artifact/ diff --git a/Dockerfile b/Dockerfile index c9a0a45ad..3afc4ec6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,15 +35,8 @@ RUN < FastAPI: +def generate_app( + synthesis_engines: Dict[str, SynthesisEngineBase], latest_core_version: str +) -> FastAPI: root_dir = Path(__file__).parent - default_sampling_rate = engine.default_sampling_rate + default_sampling_rate = synthesis_engines[latest_core_version].default_sampling_rate app = FastAPI( title="VOICEVOX ENGINE", @@ -72,11 +78,11 @@ def generate_app(engine: SynthesisEngineBase) -> FastAPI: # TODO: キャッシュを管理するモジュール側API・HTTP側APIを用意する synthesis_morphing_parameter = lru_cache(maxsize=4)(_synthesis_morphing_parameter) - @app.on_event("startup") - async def start_catch_disconnection(): - if args.enable_cancellable_synthesis: - loop = asyncio.get_event_loop() - _ = loop.create_task(cancellable_engine.catch_disconnection()) + # @app.on_event("startup") + # async def start_catch_disconnection(): + # if args.enable_cancellable_synthesis: + # loop = asyncio.get_event_loop() + # _ = loop.create_task(cancellable_engine.catch_disconnection()) def enable_interrogative_query_param() -> Query: return Query( @@ -84,6 +90,13 @@ def enable_interrogative_query_param() -> Query: description="疑問系のテキストが与えられたら自動調整する機能を有効にする。現在は長音を付け足すことで擬似的に実装される", ) + def get_engine(core_version: Optional[str]) -> SynthesisEngineBase: + if core_version is None: + return synthesis_engines[latest_core_version] + if core_version in synthesis_engines: + return synthesis_engines[core_version] + raise HTTPException(status_code=422, detail="不明なバージョンです") + @app.post( "/audio_query", response_model=AudioQuery, @@ -94,10 +107,12 @@ def audio_query( text: str, speaker: int, enable_interrogative: bool = enable_interrogative_query_param(), # noqa B008, + core_version: Optional[str] = None, ): """ クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。 """ + engine = get_engine(core_version) accent_phrases = engine.create_accent_phrases( text, speaker_id=speaker, @@ -126,10 +141,12 @@ def audio_query_from_preset( text: str, preset_id: int, enable_interrogative: bool = enable_interrogative_query_param(), # noqa B008, + core_version: Optional[str] = None, ): """ クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。 """ + engine = get_engine(core_version) presets, err_detail = preset_loader.load_presets() if err_detail: raise HTTPException(status_code=422, detail=err_detail) @@ -175,6 +192,7 @@ def accent_phrases( speaker: int, is_kana: bool = False, enable_interrogative: bool = enable_interrogative_query_param(), # noqa B008, + core_version: Optional[str] = None, ): """ テキストからアクセント句を得ます。 @@ -184,6 +202,7 @@ def accent_phrases( * カナの手前に`_`を入れるとそのカナは無声化される * アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。 """ + engine = get_engine(core_version) if is_kana: try: accent_phrases = parse_kana(text, enable_interrogative) @@ -210,7 +229,12 @@ def accent_phrases( tags=["クエリ編集"], summary="アクセント句から音高・音素長を得る", ) - def mora_data(accent_phrases: List[AccentPhrase], speaker: int): + def mora_data( + accent_phrases: List[AccentPhrase], + speaker: int, + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) return engine.replace_mora_data(accent_phrases, speaker_id=speaker) @app.post( @@ -219,7 +243,12 @@ def mora_data(accent_phrases: List[AccentPhrase], speaker: int): tags=["クエリ編集"], summary="アクセント句から音素長を得る", ) - def mora_length(accent_phrases: List[AccentPhrase], speaker: int): + def mora_length( + accent_phrases: List[AccentPhrase], + speaker: int, + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) return engine.replace_phoneme_length( accent_phrases=accent_phrases, speaker_id=speaker ) @@ -230,7 +259,12 @@ def mora_length(accent_phrases: List[AccentPhrase], speaker: int): tags=["クエリ編集"], summary="アクセント句から音高を得る", ) - def mora_pitch(accent_phrases: List[AccentPhrase], speaker: int): + def mora_pitch( + accent_phrases: List[AccentPhrase], + speaker: int, + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) return engine.replace_mora_pitch( accent_phrases=accent_phrases, speaker_id=speaker ) @@ -248,7 +282,8 @@ def mora_pitch(accent_phrases: List[AccentPhrase], speaker: int): tags=["音声合成"], summary="音声合成する", ) - def synthesis(query: AudioQuery, speaker: int): + def synthesis(query: AudioQuery, speaker: int, core_version: Optional[str] = None): + engine = get_engine(core_version) wave = engine.synthesis(query=query, speaker_id=speaker) with NamedTemporaryFile(delete=False) as f: @@ -298,7 +333,12 @@ def cancellable_synthesis(query: AudioQuery, speaker: int, request: Request): tags=["音声合成"], summary="複数まとめて音声合成する", ) - def multi_synthesis(queries: List[AudioQuery], speaker: int): + def multi_synthesis( + queries: List[AudioQuery], + speaker: int, + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) sampling_rate = queries[0].outputSamplingRate with NamedTemporaryFile(delete=False) as f: @@ -344,11 +384,13 @@ def _synthesis_morphing( base_speaker: int, target_speaker: int, morph_rate: float = Query(..., ge=0.0, le=1.0), # noqa: B008 + core_version: Optional[str] = None, ): """ 指定された2人の話者で音声を合成、指定した割合でモーフィングした音声を得ます。 モーフィングの割合は`morph_rate`で指定でき、0.0でベースの話者、1.0でターゲットの話者に近づきます。 """ + engine = get_engine(core_version) # 生成したパラメータはキャッシュされる morph_param = synthesis_morphing_parameter( @@ -425,15 +467,25 @@ def get_presets(): def version() -> str: return __version__ + @app.get("/core_versions", response_model=List[str], tags=["その他"]) + def core_versions() -> List[str]: + return Response( + content=json.dumps(list(synthesis_engines.keys())), + media_type="application/json", + ) + @app.get("/speakers", response_model=List[Speaker], tags=["その他"]) - def speakers(): + def speakers( + core_version: Optional[str] = None, + ): + engine = get_engine(core_version) return Response( content=engine.speakers, media_type="application/json", ) @app.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"]) - def speaker_info(speaker_uuid: str): + def speaker_info(speaker_uuid: str, core_version: Optional[str] = None): """ 指定されたspeaker_uuidに関する情報をjson形式で返します。 画像や音声はbase64エンコードされたものが返されます。 @@ -442,7 +494,7 @@ def speaker_info(speaker_uuid: str): ------- ret_data: SpeakerInfo """ - speakers = json.loads(engine.speakers) + speakers = json.loads(get_engine(core_version).speakers) for i in range(len(speakers)): if speakers[i]["speaker_uuid"] == speaker_uuid: speaker = speakers[i] @@ -493,7 +545,9 @@ def speaker_info(speaker_uuid: str): parser.add_argument("--port", type=int, default=50021) parser.add_argument("--use_gpu", action="store_true") parser.add_argument("--voicevox_dir", type=Path, default=None) - parser.add_argument("--voicelib_dir", type=Path, default=None) + parser.add_argument("--voicelib_dir", type=Path, default=None, action="append") + parser.add_argument("--runtime_dir", type=Path, default=None, action="append") + parser.add_argument("--enable_mock", action="store_true") parser.add_argument("--enable_cancellable_synthesis", action="store_true") parser.add_argument("--init_processes", type=int, default=2) @@ -508,27 +562,24 @@ def speaker_info(speaker_uuid: str): cpu_num_threads: Optional[int] = args.cpu_num_threads - # voicelib_dir が Noneのとき、音声ライブラリの Python モジュールと同じディレクトリにあるとする - voicelib_dir: Optional[Path] = args.voicelib_dir - if voicelib_dir is None: - if args.voicevox_dir is not None: - voicelib_dir = args.voicevox_dir - else: - voicelib_dir = Path(__file__).parent # core.__file__だとnuitkaビルド後にエラー + synthesis_engines = make_synthesis_engines( + use_gpu=args.use_gpu, + voicelib_dirs=args.voicelib_dir, + voicevox_dir=args.voicevox_dir, + runtime_dirs=args.runtime_dir, + enable_mock=args.enable_mock, + ) + assert len(synthesis_engines) != 0, "音声合成エンジンがありません。" + latest_core_version = str(max([LooseVersion(ver) for ver in synthesis_engines])) cancellable_engine = None + # make_synthesis_engine周りの仕様が変わったので一旦cancellable機能を停止する if args.enable_cancellable_synthesis: - cancellable_engine = CancellableEngine(args, voicelib_dir, cpu_num_threads) + # cancellable_engine = CancellableEngine(args, voicelib_dir, cpu_num_threads) + raise RuntimeError("現在のバージョンではcancellable機能を使用することはできません。") uvicorn.run( - generate_app( - make_synthesis_engine( - use_gpu=args.use_gpu, - voicelib_dir=voicelib_dir, - voicevox_dir=args.voicevox_dir, - cpu_num_threads=cpu_num_threads, - ) - ), + generate_app(synthesis_engines, latest_core_version), host=args.host, port=args.port, ) diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py index cb060f076..de4798e24 100644 --- a/voicevox_engine/cancellable_engine.py +++ b/voicevox_engine/cancellable_engine.py @@ -13,7 +13,10 @@ from fastapi import HTTPException, Request from .model import AudioQuery, Speaker -from .synthesis_engine import make_synthesis_engine +from .synthesis_engine import make_synthesis_engines + +# FIXME: coreのctypes実装への対応 +raise RuntimeError("現在のバージョンではcancellable機能を使用することはできません。") class CancellableEngine: @@ -189,10 +192,11 @@ def start_synthesis_subprocess( メインプロセスと通信するためのPipe """ - engine = make_synthesis_engine( + engine = make_synthesis_engines( use_gpu=args.use_gpu, voicevox_dir=args.voicevox_dir, voicelib_dir=voicelib_dir, + runtime_dirs=args.runtime_dir, cpu_num_threads=cpu_num_threads, ) while True: diff --git a/voicevox_engine/synthesis_engine/__init__.py b/voicevox_engine/synthesis_engine/__init__.py index 05d2d184e..3e7f6a1ef 100644 --- a/voicevox_engine/synthesis_engine/__init__.py +++ b/voicevox_engine/synthesis_engine/__init__.py @@ -1,9 +1,12 @@ -from .make_synthesis_engine import make_synthesis_engine +from .core_wrapper import CoreWrapper, load_runtime_lib +from .make_synthesis_engines import make_synthesis_engines from .synthesis_engine import SynthesisEngine from .synthesis_engine_base import SynthesisEngineBase __all__ = [ - "make_synthesis_engine", + "CoreWrapper", + "load_runtime_lib", + "make_synthesis_engines", "SynthesisEngine", "SynthesisEngineBase", ] diff --git a/voicevox_engine/synthesis_engine/core_wrapper.py b/voicevox_engine/synthesis_engine/core_wrapper.py new file mode 100644 index 000000000..0b9fcf950 --- /dev/null +++ b/voicevox_engine/synthesis_engine/core_wrapper.py @@ -0,0 +1,254 @@ +import os +import sys +from ctypes import CDLL, POINTER, c_bool, c_char_p, c_float, c_int, c_long +from ctypes.util import find_library +from pathlib import Path +from typing import List, Optional + +import numpy as np + + +def load_runtime_lib(runtime_dirs: List[Path]): + if sys.platform == "win32": + lib_file_names = ["torch_cpu.dll", "torch_cuda.dll", "onnxruntime.dll"] + lib_names = ["torch_cpu", "torch_cuda", "onnxruntime"] + elif sys.platform == "linux": + lib_file_names = ["libtorch.so", "libonnxruntime.so"] + lib_names = ["torch", "onnxruntime"] + elif sys.platform == "darwin": + lib_file_names = ["libonnxruntime.dylib"] + lib_names = ["onnxruntime"] + else: + raise RuntimeError("不明なOSです") + for lib_path in runtime_dirs: + for file_name in lib_file_names: + try: + CDLL(str((lib_path / file_name).resolve(strict=True))) + except OSError: + pass + for lib_name in lib_names: + try: + CDLL(find_library(lib_name)) + except (OSError, TypeError): + pass + + +def check_core_type(core_dir: Path) -> Optional[str]: + if sys.platform == "win32": + if (core_dir / "core.dll").is_file() or (core_dir / "core_cpu.dll").is_file(): + return "libtorch" + elif (core_dir / "core_gpu_x64_nvidia.dll").is_file() or ( + core_dir / "core_cpu_x64.dll" + ).is_file(): + return "onnxruntime" + elif sys.platform == "linux": + if (core_dir / "libcore.so").is_file() or ( + core_dir / "libcore_cpu.so" + ).is_file(): + return "libtorch" + elif (core_dir / "libcore_gpu_x64_nvidia.so").is_file() or ( + core_dir / "libcore_cpu_x64.so" + ).is_file(): + return "onnxruntime" + elif sys.platform == "darwin": + if (core_dir / "libcore_cpu_x64.dylib").is_file(): + return "onnxruntime" + return None + + +def load_core(core_dir: Path, use_gpu: bool) -> CDLL: + model_type = check_core_type(core_dir) + if model_type is None: + raise RuntimeError("コアが見つかりません") + if sys.platform == "win32": + if model_type == "libtorch": + if use_gpu: + try: + return CDLL(str((core_dir / "core.dll").resolve(strict=True))) + except OSError: + pass + try: + return CDLL(str((core_dir / "core_cpu.dll").resolve(strict=True))) + except OSError: + return CDLL(str((core_dir / "core.dll").resolve(strict=True))) + elif model_type == "onnxruntime": + try: + return CDLL( + str((core_dir / "core_gpu_x64_nvidia.dll").resolve(strict=True)) + ) + except OSError: + return CDLL(str((core_dir / "core_cpu_x64.dll").resolve(strict=True))) + elif sys.platform == "linux": + if model_type == "libtorch": + if use_gpu: + try: + return CDLL(str((core_dir / "libcore.so").resolve(strict=True))) + except OSError: + pass + try: + return CDLL(str((core_dir / "libcore_cpu.so").resolve(strict=True))) + except OSError: + return CDLL(str((core_dir / "libcore.so").resolve(strict=True))) + elif model_type == "onnxruntime": + try: + return CDLL( + str((core_dir / "libcore_gpu_x64_nvidia.so").resolve(strict=True)) + ) + except OSError: + return CDLL(str((core_dir / "libcore_cpu_x64.so").resolve(strict=True))) + elif sys.platform == "darwin": + if model_type == "onnxruntime": + try: + return CDLL( + str((core_dir / "libcore_cpu_x64.dylib").resolve(strict=True)) + ) + except OSError: + pass + raise RuntimeError("コアの読み込みに失敗しました") + + +class CoreWrapper: + def __init__(self, use_gpu: bool, core_dir: Path, cpu_num_threads: int = 0) -> None: + model_type = check_core_type(core_dir) + self.core = load_core(core_dir, use_gpu) + assert model_type is not None + + self.core.initialize.restype = c_bool + self.core.metas.restype = c_char_p + self.core.yukarin_s_forward.restype = c_bool + self.core.yukarin_sa_forward.restype = c_bool + self.core.decode_forward.restype = c_bool + self.core.last_error_message.restype = c_char_p + + self.exist_suppoted_devices = False + self.exist_finalize = False + exist_cpu_num_threads = False + if model_type == "onnxruntime": + self.core.supported_devices.restype = c_char_p + self.core.finalize.restype = None + self.exist_suppoted_devices = True + self.exist_finalize = True + exist_cpu_num_threads = True + + self.core.yukarin_s_forward.argtypes = ( + c_int, + POINTER(c_long), + POINTER(c_long), + POINTER(c_float), + ) + self.core.yukarin_sa_forward.argtypes = ( + c_int, + POINTER(c_long), + POINTER(c_long), + POINTER(c_long), + POINTER(c_long), + POINTER(c_long), + POINTER(c_long), + POINTER(c_long), + POINTER(c_float), + ) + self.core.decode_forward.argtypes = ( + c_int, + c_int, + POINTER(c_float), + POINTER(c_float), + POINTER(c_long), + POINTER(c_float), + ) + + cwd = os.getcwd() + os.chdir(core_dir) + try: + if exist_cpu_num_threads: + if not self.core.initialize(".", use_gpu, cpu_num_threads): + raise Exception(self.core.last_error_message().decode("utf-8")) + else: + if not self.core.initialize(".", use_gpu): + raise Exception(self.core.last_error_message().decode("utf-8")) + finally: + os.chdir(cwd) + + def metas(self) -> str: + return self.core.metas().decode("utf-8") + + def yukarin_s_forward( + self, + length: int, + phoneme_list: np.ndarray, + speaker_id: np.ndarray, + ) -> np.ndarray: + output = np.zeros((length,), dtype=np.float32) + success = self.core.yukarin_s_forward( + c_int(length), + phoneme_list.ctypes.data_as(POINTER(c_long)), + speaker_id.ctypes.data_as(POINTER(c_long)), + output.ctypes.data_as(POINTER(c_float)), + ) + if not success: + raise Exception(self.core.last_error_message().decode("utf-8")) + return output + + def yukarin_sa_forward( + self, + length: int, + vowel_phoneme_list: np.ndarray, + consonant_phoneme_list: np.ndarray, + start_accent_list: np.ndarray, + end_accent_list: np.ndarray, + start_accent_phrase_list: np.ndarray, + end_accent_phrase_list: np.ndarray, + speaker_id: np.ndarray, + ) -> np.ndarray: + output = np.empty( + ( + len(speaker_id), + length, + ), + dtype=np.float32, + ) + success = self.core.yukarin_sa_forward( + c_int(length), + vowel_phoneme_list.ctypes.data_as(POINTER(c_long)), + consonant_phoneme_list.ctypes.data_as(POINTER(c_long)), + start_accent_list.ctypes.data_as(POINTER(c_long)), + end_accent_list.ctypes.data_as(POINTER(c_long)), + start_accent_phrase_list.ctypes.data_as(POINTER(c_long)), + end_accent_phrase_list.ctypes.data_as(POINTER(c_long)), + speaker_id.ctypes.data_as(POINTER(c_long)), + output.ctypes.data_as(POINTER(c_float)), + ) + if not success: + raise Exception(self.core.last_error_message().decode("utf-8")) + return output + + def decode_forward( + self, + length: int, + phoneme_size: int, + f0: np.ndarray, + phoneme: np.ndarray, + speaker_id: np.ndarray, + ) -> np.ndarray: + output = np.empty((length * 256,), dtype=np.float32) + success = self.core.decode_forward( + c_int(length), + c_int(phoneme_size), + f0.ctypes.data_as(POINTER(c_float)), + phoneme.ctypes.data_as(POINTER(c_float)), + speaker_id.ctypes.data_as(POINTER(c_long)), + output.ctypes.data_as(POINTER(c_float)), + ) + if not success: + raise Exception(self.core.last_error_message().decode("utf-8")) + return output + + def supported_devices(self) -> str: + if self.exist_suppoted_devices: + return self.core.supported_devices().decode("utf-8") + raise NameError + + def finalize(self) -> None: + if self.exist_finalize: + self.core.finalize() + return + raise NameError diff --git a/voicevox_engine/synthesis_engine/make_synthesis_engine.py b/voicevox_engine/synthesis_engine/make_synthesis_engine.py deleted file mode 100644 index 2f154bbe5..000000000 --- a/voicevox_engine/synthesis_engine/make_synthesis_engine.py +++ /dev/null @@ -1,74 +0,0 @@ -import sys -from pathlib import Path -from typing import Optional - -from .synthesis_engine import SynthesisEngine, SynthesisEngineBase - - -def make_synthesis_engine( - use_gpu: bool, - voicelib_dir: Path, - voicevox_dir: Optional[Path] = None, - cpu_num_threads: Optional[int] = None, -) -> SynthesisEngineBase: - """ - 音声ライブラリをロードして、音声合成エンジンを生成 - - Parameters - ---------- - use_gpu: bool - 音声ライブラリに GPU を使わせるか否か - voicelib_dir: Path - 音声ライブラリ自体があるディレクトリ - voicevox_dir: Path, optional, default=None - 音声ライブラリの Python モジュールがあるディレクトリ - None のとき、Python 標準のモジュール検索パスのどれかにあるとする - cpu_num_threads: int, optional, default=None - 音声ライブラリが、推論に用いるCPUスレッド数を設定する - Noneのとき、ライブラリ側の挙動により論理コア数の半分か、物理コア数が指定される - """ - - # Python モジュール検索パスへ追加 - if voicevox_dir is not None: - print("Notice: --voicevox_dir is " + voicevox_dir.as_posix(), file=sys.stderr) - if voicevox_dir.exists(): - sys.path.insert(0, str(voicevox_dir)) - - has_voicevox_core = True - try: - import core - except ImportError: - import traceback - - from ..dev import core - - has_voicevox_core = False - - # 音声ライブラリの Python モジュールをロードできなかった - traceback.print_exc() - print( - "Notice: mock-library will be used. Try re-run with valid --voicevox_dir", - file=sys.stderr, - ) - - if cpu_num_threads == 0: - print( - "Warning: cpu_num_threads is set to 0. " - + "( The library leaves the decision to the synthesis runtime )", - file=sys.stderr, - ) - - core.initialize(voicelib_dir.as_posix() + "/", use_gpu, cpu_num_threads or 0) - - if has_voicevox_core: - return SynthesisEngine( - yukarin_s_forwarder=core.yukarin_s_forward, - yukarin_sa_forwarder=core.yukarin_sa_forward, - decode_forwarder=core.decode_forward, - speakers=core.metas(), - ) - - from ..dev.synthesis_engine import MockSynthesisEngine - - # モックで置き換える - return MockSynthesisEngine(speakers=core.metas()) diff --git a/voicevox_engine/synthesis_engine/make_synthesis_engines.py b/voicevox_engine/synthesis_engine/make_synthesis_engines.py new file mode 100644 index 000000000..5d140ad23 --- /dev/null +++ b/voicevox_engine/synthesis_engine/make_synthesis_engines.py @@ -0,0 +1,104 @@ +import json +import sys +import traceback +from pathlib import Path +from typing import Dict, List, Optional + +from .core_wrapper import CoreWrapper, load_runtime_lib +from .synthesis_engine import SynthesisEngine, SynthesisEngineBase + + +def make_synthesis_engines( + use_gpu: bool, + voicelib_dirs: Optional[List[Path]] = None, + voicevox_dir: Optional[Path] = None, + runtime_dirs: Optional[List[Path]] = None, + cpu_num_threads: int = 0, + enable_mock: bool = True, +) -> Dict[str, SynthesisEngineBase]: + """ + 音声ライブラリをロードして、音声合成エンジンを生成 + + Parameters + ---------- + use_gpu: bool + 音声ライブラリに GPU を使わせるか否か + voicelib_dirs: List[Path], optional, defauld=None + 音声ライブラリ自体があるディレクトリのリスト + voicevox_dir: Path, optional, default=None + コンパイル済みのvoicevox、またはvoicevox_engineがあるディレクトリ + runtime_dirs: List[Path], optional, default=None + コアで使用するライブラリのあるディレクトリのリスト + None のとき、voicevox_dir、カレントディレクトリになる + cpu_num_threads: int, optional, default=None + 音声ライブラリが、推論に用いるCPUスレッド数を設定する + Noneのとき、ライブラリ側の挙動により論理コア数の半分か、物理コア数が指定される + enable_mock: bool, optional, default=True + コア読み込みに失敗したときにエラーを送出するかどうか + Falseだと代わりにmockが使用される + """ + if cpu_num_threads == 0: + print( + "Warning: cpu_num_threads is set to 0. " + + "( The library leaves the decision to the synthesis runtime )", + file=sys.stderr, + ) + # nuitkaビルドをした際はグローバルに__compiled__が含まれる + # https://nuitka.net/doc/user-manual.html#detecting-nuitka-at-run-time + if "__compiled__" in globals(): + root_dir = Path(sys.argv[0]).parent + else: + root_dir = Path(__file__).parents[2] + + if voicevox_dir is not None: + if voicelib_dirs is not None: + voicelib_dirs.append(voicevox_dir) + else: + voicelib_dirs = [voicevox_dir] + if runtime_dirs is not None: + runtime_dirs.append(voicevox_dir) + else: + runtime_dirs = [voicevox_dir] + else: + if voicelib_dirs is None: + voicelib_dirs = [root_dir] + if runtime_dirs is None: + runtime_dirs = [root_dir] + + voicelib_dirs = [p.expanduser() for p in voicelib_dirs] + runtime_dirs = [p.expanduser() for p in runtime_dirs] + + load_runtime_lib(runtime_dirs) + synthesis_engines = {} + for core_dir in voicelib_dirs: + try: + core = CoreWrapper(use_gpu, core_dir, cpu_num_threads) + metas = json.loads(core.metas()) + core_version = metas[0]["version"] + if core_version in synthesis_engines: + print( + "Warning: Core loading is skipped because of version duplication.", + file=sys.stderr, + ) + continue + synthesis_engines[core_version] = SynthesisEngine( + yukarin_s_forwarder=core.yukarin_s_forward, + yukarin_sa_forwarder=core.yukarin_sa_forward, + decode_forwarder=core.decode_forward, + speakers=core.metas(), + ) + except Exception: + if not enable_mock: + raise + traceback.print_exc() + print( + "Notice: mock-library will be used. Try re-run with valid --voicevox_dir", + file=sys.stderr, + ) + from ..dev.core import metas as mock_metas + from ..dev.synthesis_engine import MockSynthesisEngine + + if "0.0.0" not in synthesis_engines: + synthesis_engines["0.0.0"] = MockSynthesisEngine(speakers=mock_metas()) + + return synthesis_engines