Skip to content

Commit

Permalink
Update: PyInstaller ビルド後の本体に組み込む辞書を ZStandard で圧縮し、インストールサイズを 90MB 程度…
Browse files Browse the repository at this point in the history
…削減する
  • Loading branch information
tsukumijima committed Nov 8, 2024
1 parent a5b2d3e commit c683a04
Show file tree
Hide file tree
Showing 9 changed files with 193 additions and 20 deletions.
144 changes: 128 additions & 16 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ typos = "typos"
test = "pytest"
update-snapshots = "pytest --snapshot-update"
update-licenses = "bash tools/create_venv_and_generate_licenses.bash"
compress-dictionaries = "poetry run python tools/compress_dictionaries.py"
build = "poetry run task update-licenses && pyinstaller --noconfirm run.spec"

[tool.pysen]
Expand Down Expand Up @@ -81,6 +82,7 @@ pydantic = "^2.7.3"
starlette = "^0.38.4"
jaconv = "^0.3.4"
httpx = "^0.27.0"
zstandard = "^0.23.0"
# aivmlib は AIVMX ファイルのメタデータ読み取りに必要
aivmlib = { git = "https://x-access-token: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@github.com/Aivis-Project/aivmlib.git", rev = "9731dc6f20c2282e09fa870790043a20b2662c16" }
# AivisSpeech-Engine にはカスタマイズされた Style-Bert-VITS2 が必要
Expand Down
Binary file added resources/dictionaries/01_default.csv.zst
Binary file not shown.
Binary file added resources/dictionaries/02_tdmelodic.csv.zst
Binary file not shown.
Binary file added resources/dictionaries/03_tdmelodic.csv.zst
Binary file not shown.
2 changes: 1 addition & 1 deletion resources/engine_manifest_assets/dependency_licenses.json

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion run.spec
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import re
import sys

datas = [
('resources', 'resources'),
('resources/dictionaries/*.csv.zst', 'resources/dictionaries'),
('resources/engine_manifest_assets', 'resources/engine_manifest_assets'),
('resources/setting_ui_template.html', 'resources'),
('engine_manifest.json', '.'),
('presets.yaml', '.'),
]
Expand Down
50 changes: 50 additions & 0 deletions tools/compress_dictionaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python3

import pathlib

import zstandard


def CompressDictionaries() -> None:
"""
../resources/dictionaries/ 以下の csv ファイルを ZStandard で圧縮し、
同じディレクトリに .csv.zst として保存する
"""

# 圧縮レベルは 1-22 まで指定可能
# 数値が大きいほど圧縮率が高くなるが、圧縮・解凍に時間がかかる
# 5 は圧縮率と解凍速度のバランスが良い値
compression_level = 5

# ../resources/dictionaries/ のパスを取得
dictionaries_path = (
pathlib.Path(__file__).parent.parent / "resources" / "dictionaries"
)
if not dictionaries_path.exists():
print("Error: ../resources/dictionaries/ does not exist")
return

# csv ファイルを列挙
csv_files = list(dictionaries_path.glob("**/*.csv"))
if len(csv_files) == 0:
print("Error: No csv files found")
return

# ZStandard の圧縮器を初期化
compressor = zstandard.ZstdCompressor(level=compression_level)

# csv ファイルを圧縮
for csv_file in csv_files:
# 出力先のパスを生成
output_path = csv_file.with_suffix(".csv.zst")

# 圧縮を実行
with open(csv_file, "rb") as input_file:
with open(output_path, "wb") as output_file:
compressor.copy_stream(input_file, output_file)

print(f"Compressed: {csv_file.name} -> {output_path.name}")


if __name__ == "__main__":
CompressDictionaries()
11 changes: 9 additions & 2 deletions voicevox_engine/user_dict/user_dict_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from uuid import UUID, uuid4

import pyopenjtalk
import zstandard
from pydantic import TypeAdapter

from ..logging import logger
Expand Down Expand Up @@ -129,12 +130,18 @@ def update_dict(self) -> None:
default_dict_files = [default_dict_dir_path / "01_default.csv"]
logger.info("Using only default dictionary for pytest.")
else:
default_dict_files = sorted(default_dict_dir_path.glob("*.csv"))
default_dict_files = sorted(default_dict_dir_path.glob("*.csv.zst"))
if len(default_dict_files) == 0:
logger.warning("Cannot find default dictionary.")
return

# ZStandard デコーダーの初期化
decompressor = zstandard.ZstdDecompressor()

for file_path in default_dict_files:
default_dict_content = file_path.read_text(encoding="utf-8")
with file_path.open("rb") as f:
with decompressor.stream_reader(f) as reader:
default_dict_content = reader.read().decode("utf-8")
if not default_dict_content.endswith("\n"):
default_dict_content += "\n"
csv_text += default_dict_content
Expand Down

0 comments on commit c683a04

Please sign in to comment.