-
Notifications
You must be signed in to change notification settings - Fork 205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Guided #723
Guided #723
Changes from all commits
b874227
385c9c1
5bf9bf2
fc8a7c6
6a1eee8
4fc1484
5d31351
ab3614f
021a679
cb49bf4
4432980
29ac4a9
5b78b4d
c86d0d7
b5c2d86
3d3a7c0
5f37b16
4ec5ef5
432ac57
eb9c638
8d452c4
3bdd8d3
a69a06a
b2b6a07
30db29c
2e5be32
78f7162
ac0a314
7f15c54
d034b44
6c4fe0a
b19783d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -326,6 +326,44 @@ def accent_phrases( | |
else: | ||
return engine.create_accent_phrases(text, speaker_id=speaker) | ||
|
||
@app.post( | ||
"/guide", | ||
response_model=AudioQuery, | ||
tags=["クエリ編集"], | ||
summary="Create Accent Phrase from External Audio", | ||
) | ||
def guide( | ||
query: AudioQuery, | ||
speaker: int, | ||
ref_path: str, | ||
normalize: bool, | ||
core_version: Optional[str] = None, | ||
): | ||
if not args.enable_guided: | ||
raise HTTPException( | ||
status_code=404, | ||
detail="実験的機能はデフォルトで無効になっています。使用するには引数を指定してください。", | ||
) | ||
try: | ||
with open(ref_path, "rb") as file: | ||
# use dtype=float32 also normalizes the wav into [-1.0,1.0] | ||
wav, sr = soundfile.read(file, dtype="float32") | ||
except Exception: | ||
raise HTTPException( | ||
status_code=422, | ||
detail="Invalid wav file", | ||
) | ||
|
||
engine = get_engine(core_version) | ||
return engine.guide( | ||
query=query, | ||
speaker_id=speaker, | ||
ref_wav=wav, | ||
sr=sr, | ||
normalize=normalize, | ||
model_path=args.guide_model, | ||
) | ||
|
||
@app.post( | ||
"/mora_data", | ||
response_model=List[AccentPhrase], | ||
|
@@ -475,18 +513,14 @@ def multi_synthesis( | |
sampling_rate = queries[0].outputSamplingRate | ||
|
||
with NamedTemporaryFile(delete=False) as f: | ||
|
||
with zipfile.ZipFile(f, mode="a") as zip_file: | ||
|
||
for i in range(len(queries)): | ||
|
||
if queries[i].outputSamplingRate != sampling_rate: | ||
raise HTTPException( | ||
status_code=422, detail="サンプリングレートが異なるクエリがあります" | ||
) | ||
|
||
with TemporaryFile() as wav_file: | ||
|
||
wave = engine.synthesis(query=queries[i], speaker_id=speaker) | ||
soundfile.write( | ||
file=wav_file, | ||
|
@@ -1221,6 +1255,15 @@ def custom_openapi(): | |
action="store_true", | ||
help="指定すると音声合成を途中でキャンセルできるようになります。", | ||
) | ||
parser.add_argument( | ||
"--enable_guided", action="store_true", help="入力音声を解析して音声合成クエリで返す機能を有効化します。" | ||
) | ||
parser.add_argument( | ||
"--guide_model", | ||
type=Path, | ||
default="cv_jp.bin", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can decide later where to place the file and how to set the parameters! ファイル名をどうするかや、引数をどうするかは後で決めさせていただこうと思います! |
||
help="guided機能に入力音声の発音の長さを解析するため必要なモデルファイルです。", | ||
) | ||
parser.add_argument( | ||
"--init_processes", | ||
type=int, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,8 @@ datas = [ | |
('presets.yaml', '.'), | ||
('default_setting.yml', '.'), | ||
('ui_template', 'ui_template'), | ||
('model', 'model'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 不要そう? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this code is unnecessary. |
||
('cv_jp.bin', '.') | ||
] | ||
datas += collect_data_files('pyopenjtalk') | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have a suggestion that could improve the usability of the library! What do you think about bundling this binary file within the snfa library or adding a feature to automatically download the model file if it's missing?
Bundling is fairly common; for example, the soundfile library includes a DLL. Auto-downloading is also a common feature; for example, pyopenjtalk automatically downloads a missing dictionary
https://github.com/r9y9/pyopenjtalk/blob/22852ba6e36faaf2589b458e731c701e24f9dc9d/pyopenjtalk/__init__.py#L77-L79.
ライブラリの使い勝手が上がりそうな提案があります!
このバイナリファイルをsnfaライブラリの中に同梱したり、あるいはモデルファイルがなかったら自動でダウンロードする機能をつけるのはどうでしょうか?
同梱するのは結構普通のことで、例えばsoundfileなどもdllが同梱されていたと思います。
自動ダウンロードもよくある機能で、例えばpyopenjtalkは辞書がない場合に自動的にダウンロードしています。
https://github.com/r9y9/pyopenjtalk/blob/22852ba6e36faaf2589b458e731c701e24f9dc9d/pyopenjtalk/__init__.py#L77-L79