-
Notifications
You must be signed in to change notification settings - Fork 205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Guided Synthesis #252
Merged
Merged
Guided Synthesis #252
Changes from 14 commits
Commits
Show all changes
32 commits
Select commit
Hold shift + click to select a range
a73892b
forced alignment, f0 extraction and entry point
Patchethium 28cf7c2
Merge branch 'master' into guided_synthesis
Patchethium a060398
kind of finished
Patchethium f7a3713
change julius4seg, doesn't seem to help
Patchethium 6b0651f
run pysen format
Patchethium f1a663a
add speaker id to api
Patchethium 668df80
run pysen format
Patchethium ad4bdbd
add accent_phrase api, finish
Patchethium ea95405
add request parameter
Patchethium 6dff2ec
improve error handling
Patchethium 34eec39
run pysen format
Patchethium a0cba4d
add parameters
Patchethium 90e41e2
run pysen format
Patchethium e889207
a little boundary check
Patchethium c98c8be
add normalization for different WAV format
Patchethium 1c6d96e
run format
Patchethium 2d74993
run format
Patchethium ca356df
Merge branch 'master' into guided_synthesis
Patchethium f088176
move synthesis and accent phrase to synthesis engine
Patchethium cf18c3c
add test for mock
Patchethium 98d387c
change url for apis
Patchethium 48b629f
simplify
Patchethium 061483c
error type
Patchethium fc45886
Merge branch 'master' into guided_synthesis
Patchethium 0e26bbb
do something
Patchethium 365ed92
do something
Patchethium 29427d9
run format
Patchethium ddc6537
Merge branch 'master' into guided_synthesis
Patchethium ca6df3b
resolve conflict
Patchethium 730917f
add usage to README
Patchethium 3522370
Merge branch 'master' into guided_synthesis
Patchethium 9b75c6c
add comments and experimental flag for guided api
Patchethium File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
import base64 | ||
import json | ||
import multiprocessing | ||
import traceback | ||
import zipfile | ||
from functools import lru_cache | ||
from pathlib import Path | ||
|
@@ -11,11 +12,12 @@ | |
|
||
import soundfile | ||
import uvicorn | ||
from fastapi import FastAPI, HTTPException, Request, Response | ||
from fastapi import FastAPI, File, Form, HTTPException, Request, Response, UploadFile | ||
from fastapi.middleware.cors import CORSMiddleware | ||
from fastapi.params import Query | ||
from starlette.responses import FileResponse | ||
|
||
import voicevox_engine.guided as guided | ||
from voicevox_engine.cancellable_engine import CancellableEngine | ||
from voicevox_engine.kana_parser import create_kana, parse_kana | ||
from voicevox_engine.model import ( | ||
|
@@ -206,6 +208,63 @@ def accent_phrases( | |
enable_interrogative=enable_interrogative, | ||
) | ||
|
||
@app.post( | ||
"/guided/accent_phrase", | ||
response_model=AudioQuery, | ||
tags=["クエリ作成"], | ||
summary="Create Audio Query Guided by External Audio", | ||
) | ||
def guided_accent_phrase( | ||
kana: str = Form(...), | ||
speaker_id: int = Form(...), | ||
normalize: int = Form(...), | ||
audio_file: UploadFile = File(...), | ||
): | ||
try: | ||
accent_phrases = guided.accent_phrase( | ||
engine=engine, | ||
audio_file=audio_file.file, | ||
kana=kana, | ||
speaker_id=speaker_id, | ||
normalize=normalize, | ||
) | ||
return AudioQuery( | ||
accent_phrases=accent_phrases, | ||
speedScale=1, | ||
pitchScale=0, | ||
intonationScale=1, | ||
volumeScale=1, | ||
prePhonemeLength=0.1, | ||
postPhonemeLength=0.1, | ||
outputSamplingRate=default_sampling_rate, | ||
outputStereo=False, | ||
kana=create_kana(accent_phrases), | ||
) | ||
except ParseKanaError: | ||
print(traceback.format_exc()) | ||
raise HTTPException( | ||
status_code=500, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think using 422 instead of 500 for the status code is better. |
||
detail="Failed to Parse Kana", | ||
) | ||
except StopIteration: | ||
print(traceback.format_exc()) | ||
raise HTTPException( | ||
status_code=500, | ||
detail="Failed in Forced Alignment. Please try again with another Audio Resource", | ||
) | ||
except Exception as e: | ||
print(traceback.format_exc()) | ||
if str(e) == "Decode Failed": | ||
raise HTTPException( | ||
status_code=500, | ||
detail="Failed in Forced Alignment. Please try again with another Audio Resource", | ||
) | ||
else: | ||
raise HTTPException( | ||
status_code=500, | ||
detail="Internal Server Error.", | ||
) | ||
|
||
@app.post( | ||
"/mora_data", | ||
response_model=List[AccentPhrase], | ||
|
@@ -324,7 +383,7 @@ def multi_synthesis(queries: List[AudioQuery], speaker: int): | |
format="WAV", | ||
) | ||
wav_file.seek(0) | ||
zip_file.writestr(f"{str(i+1).zfill(3)}.wav", wav_file.read()) | ||
zip_file.writestr(f"{str(i + 1).zfill(3)}.wav", wav_file.read()) | ||
|
||
return FileResponse(f.name, media_type="application/zip") | ||
|
||
|
@@ -376,6 +435,72 @@ def _synthesis_morphing( | |
|
||
return FileResponse(f.name, media_type="audio/wav") | ||
|
||
@app.post( | ||
"/guided/synthesis", | ||
responses={ | ||
200: { | ||
"content": { | ||
"audio/wav": {"schema": {"type": "string", "format": "binary"}} | ||
}, | ||
} | ||
}, | ||
tags=["音声合成"], | ||
summary="Audio synthesis guided by external audio and phonemes in kana, both uploaded in one form", | ||
) | ||
def guided_synthesis( | ||
Hiroshiba marked this conversation as resolved.
Show resolved
Hide resolved
|
||
kana: str = Form(...), | ||
speaker_id: int = Form(...), | ||
normalize: int = Form(...), | ||
audio_file: UploadFile = File(...), | ||
stereo: int = Form(...), | ||
sample_rate: int = Form(...), | ||
volumeScale: float = Form(...), | ||
pitchScale: float = Form(...), | ||
speedScale: float = Form(...), | ||
): | ||
try: | ||
wave = guided.synthesis( | ||
engine=engine, | ||
audio_file=audio_file.file, | ||
kana=kana, | ||
speaker_id=speaker_id, | ||
normalize=normalize, | ||
stereo=stereo, | ||
sample_rate=sample_rate, | ||
volumeScale=volumeScale, | ||
pitchScale=pitchScale, | ||
speedScale=speedScale, | ||
) | ||
|
||
with NamedTemporaryFile(delete=False) as f: | ||
soundfile.write(file=f, data=wave, samplerate=sample_rate, format="WAV") | ||
|
||
return FileResponse(f.name, media_type="audio/wav") | ||
except ParseKanaError: | ||
print(traceback.format_exc()) | ||
raise HTTPException( | ||
status_code=500, | ||
detail="Failed to Parse Kana", | ||
) | ||
except StopIteration: | ||
print(traceback.format_exc()) | ||
raise HTTPException( | ||
status_code=500, | ||
detail="Failed in Forced Alignment. Please try again with another Audio Resource", | ||
) | ||
except Exception as e: | ||
print(traceback.format_exc()) | ||
if str(e) == "Decode Failed": | ||
raise HTTPException( | ||
status_code=500, | ||
detail="Failed in Forced Alignment. Please try again with another Audio Resource", | ||
) | ||
else: | ||
raise HTTPException( | ||
status_code=500, | ||
detail="Internal Server Error.", | ||
) | ||
|
||
@app.post( | ||
"/connect_waves", | ||
response_class=FileResponse, | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ここは
List[AccentPhrase]
が正しそうです。voicevox_engine/run.py
Line 161 in bdf712f