Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mutabilityとasyncnessを仕上げる #553

Merged
merged 21 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5d17802
[wip] Add `gpu_num_sessions` options to `load_voice_model`
qryxip Jul 24, 2023
6eab358
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Jul 29, 2023
ce2f8c0
`decode-with-gpu`を剥がす
qryxip Jul 29, 2023
47a77ee
`gpu_num_sessions`オプションを剥がす
qryxip Jul 29, 2023
ee2dc76
`predict`も別々に`Mutex`に包む
qryxip Jul 29, 2023
cf92b69
`LoadedModels`のdocを書く
qryxip Jul 29, 2023
b893fed
`cargo xtask update-c-header`
qryxip Jul 29, 2023
780d880
`git restore -s main -- crates/voicevox_core_c_api/tests/`
qryxip Jul 29, 2023
1e8886d
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Aug 6, 2023
73344b8
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Aug 11, 2023
c460e48
`synthesizer_get_metas_json` → `synthesizer_create_metas_json`
qryxip Aug 11, 2023
f98ea9f
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Aug 12, 2023
06fef88
#575 で追加されたテストを修正
qryxip Aug 12, 2023
4891a9c
warningを解消
qryxip Aug 12, 2023
e971c2b
`create_metas_json`の返り値を`*mut c_char`に
qryxip Aug 12, 2023
d4eaf55
Merge branch 'main' into rework-mutability-and-asyncness
qryxip Aug 16, 2023
24f5e0f
Rework `ensure_not_contains`
qryxip Aug 16, 2023
e66168e
`LoadedModels`からのsession取得を`get`という単一のメソッドに
qryxip Aug 16, 2023
1101624
`Error::LoadModel`に色々統合する
qryxip Aug 17, 2023
36700d8
欠番を使わない
qryxip Aug 17, 2023
60cad17
`cargo xtask update-c-header`
qryxip Aug 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ async_zip.workspace = true
cfg-if = "1.0.0"
derive-getters.workspace = true
derive-new = "0.5.9"
derive_more = "0.99.17"
easy-ext.workspace = true
fs-err.workspace = true
futures = "0.3.26"
Expand Down
4 changes: 0 additions & 4 deletions crates/voicevox_core/src/engine/synthesis_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ impl SynthesisEngine {
&self.inference_core
}

pub fn inference_core_mut(&mut self) -> &mut InferenceCore {
&mut self.inference_core
}

pub async fn create_accent_phrases(
&self,
text: &str,
Expand Down
75 changes: 44 additions & 31 deletions crates/voicevox_core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,44 +21,15 @@ pub enum Error {
#[error("{}", base_error_message(VOICEVOX_RESULT_GPU_SUPPORT_ERROR))]
GpuSupport,

#[error("{} ({}): {source}", base_error_message(VOICEVOX_RESULT_LOAD_MODEL_ERROR), path.display())]
LoadModel {
path: PathBuf,
#[source]
source: anyhow::Error,
},
#[error("{} ({})", base_error_message(VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR), path.display())]
AlreadyLoadedModel { path: PathBuf },
#[error(transparent)]
LoadModel(#[from] LoadModelError),

#[error(
"{} ({model_id:?})",
base_error_message(VOICEVOX_RESULT_UNLOADED_MODEL_ERROR)
)]
UnloadedModel { model_id: VoiceModelId },

#[error(
"{}({path}):{source}",
base_error_message(VOICEVOX_RESULT_OPEN_FILE_ERROR)
)]
OpenFile {
path: PathBuf,
#[source]
source: anyhow::Error,
},

#[error(
"{}({path}):{source}",
base_error_message(VOICEVOX_RESULT_VVM_MODEL_READ_ERROR)
)]
VvmRead {
path: PathBuf,
#[source]
source: anyhow::Error,
},

#[error("{},{0}", base_error_message(VOICEVOX_RESULT_LOAD_METAS_ERROR))]
LoadMetas(#[source] anyhow::Error),

#[error(
"{},{0}",
base_error_message(VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR)
Expand Down Expand Up @@ -111,6 +82,48 @@ pub enum Error {
InvalidWord(InvalidWordError),
}

pub(crate) type LoadModelResult<T> = std::result::Result<T, LoadModelError>;

/// 音声モデル読み込みのエラー。
#[derive(Error, Debug)]
#[error(
"`{path}`の読み込みに失敗しました: {context}{}",
source.as_ref().map(|e| format!(": {e}")).unwrap_or_default())
]
pub struct LoadModelError {
pub(crate) path: PathBuf,
pub(crate) context: LoadModelErrorKind,
#[source]
pub(crate) source: Option<anyhow::Error>,
}

impl LoadModelError {
pub fn context(&self) -> &LoadModelErrorKind {
&self.context
}
}

#[derive(derive_more::Display, Debug)]
pub enum LoadModelErrorKind {
//#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR)")]
Hiroshiba marked this conversation as resolved.
Show resolved Hide resolved
#[display(fmt = "ZIPファイルとして開くことができませんでした")]
OpenZipFile,
//#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR)")]
#[display(fmt = "`{filename}`を読み取れませんでした")]
ReadZipEntry { filename: String },
//#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR)")]
#[display(fmt = "モデル`{id}`は既に読み込まれています")]
ModelAlreadyLoaded { id: VoiceModelId },
//#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR)")]
#[display(fmt = "スタイル`{id}`は既に読み込まれています")]
StyleAlreadyLoaded { id: StyleId },
#[display(
fmt = "{}",
"base_error_message(VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR)"
Hiroshiba marked this conversation as resolved.
Show resolved Hide resolved
)]
InvalidModelData,
}

fn base_error_message(result_code: VoicevoxResultCode) -> &'static str {
let c_message: &'static str = crate::result_code::error_result_to_message(result_code);
&c_message[..(c_message.len() - 1)]
Expand Down
97 changes: 38 additions & 59 deletions crates/voicevox_core/src/inference_core.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
use self::status::*;
use super::*;
use onnxruntime::{
ndarray,
session::{AnyArray, NdArray},
};
use onnxruntime::{ndarray, session::NdArray};

const PHONEME_LENGTH_MINIMAL: f32 = 0.01;

Expand All @@ -18,7 +15,7 @@ impl InferenceCore {
load_all_models: bool,
) -> Result<Self> {
if !use_gpu || Self::can_support_gpu_feature()? {
let mut status = Status::new(use_gpu, cpu_num_threads);
let status = Status::new(use_gpu, cpu_num_threads);

if load_all_models {
for model in &VoiceModel::get_all_models().await? {
Expand All @@ -43,14 +40,14 @@ impl InferenceCore {
}
}

pub async fn load_model(&mut self, model: &VoiceModel) -> Result<()> {
pub async fn load_model(&self, model: &VoiceModel) -> Result<()> {
self.status.load_model(model).await
}

pub fn unload_model(&mut self, voice_model_id: &VoiceModelId) -> Result<()> {
pub fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
self.status.unload_model(voice_model_id)
}
pub fn metas(&self) -> &VoiceModelMeta {
pub fn metas(&self) -> VoiceModelMeta {
self.status.metas()
}

Expand All @@ -71,21 +68,15 @@ impl InferenceCore {
return Err(Error::InvalidStyleId { style_id });
}

let (model_id, model_inner_id) = self
.status
.id_relations
.get(&style_id)
.ok_or(Error::InvalidStyleId { style_id })?;

let mut phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector));
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));
let (model_id, model_inner_id) = self.status.ids_for(style_id)?;

let input_tensors: Vec<&mut dyn AnyArray> =
vec![&mut phoneme_vector_array, &mut speaker_id_array];
let phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector));
let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));

let mut output = self
.status
.predict_duration_session_run(model_id, input_tensors)?;
.predict_duration_session_run(&model_id, phoneme_vector_array, speaker_id_array)
.await?;

for output_item in output.iter_mut() {
if *output_item < PHONEME_LENGTH_MINIMAL {
Expand All @@ -112,37 +103,31 @@ impl InferenceCore {
return Err(Error::InvalidStyleId { style_id });
}

let (model_id, model_inner_id) = self
.status
.id_relations
.get(&style_id)
.ok_or(Error::InvalidStyleId { style_id })?;

let mut length_array = NdArray::new(ndarray::arr0(length as i64));
let mut vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector));
let mut consonant_phoneme_vector_array =
NdArray::new(ndarray::arr1(consonant_phoneme_vector));
let mut start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector));
let mut end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector));
let mut start_accent_phrase_vector_array =
let (model_id, model_inner_id) = self.status.ids_for(style_id)?;

let length_array = NdArray::new(ndarray::arr0(length as i64));
let vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector));
let consonant_phoneme_vector_array = NdArray::new(ndarray::arr1(consonant_phoneme_vector));
let start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector));
let end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector));
let start_accent_phrase_vector_array =
NdArray::new(ndarray::arr1(start_accent_phrase_vector));
let mut end_accent_phrase_vector_array =
NdArray::new(ndarray::arr1(end_accent_phrase_vector));
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));

let input_tensors: Vec<&mut dyn AnyArray> = vec![
&mut length_array,
&mut vowel_phoneme_vector_array,
&mut consonant_phoneme_vector_array,
&mut start_accent_vector_array,
&mut end_accent_vector_array,
&mut start_accent_phrase_vector_array,
&mut end_accent_phrase_vector_array,
&mut speaker_id_array,
];
let end_accent_phrase_vector_array = NdArray::new(ndarray::arr1(end_accent_phrase_vector));
let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));

self.status
.predict_intonation_session_run(model_id, input_tensors)
.predict_intonation_session_run(
&model_id,
length_array,
vowel_phoneme_vector_array,
consonant_phoneme_vector_array,
start_accent_vector_array,
end_accent_vector_array,
start_accent_phrase_vector_array,
end_accent_phrase_vector_array,
speaker_id_array,
)
.await
}

pub async fn decode(
Expand All @@ -157,11 +142,7 @@ impl InferenceCore {
return Err(Error::InvalidStyleId { style_id });
}

let (model_id, model_inner_id) = self
.status
.id_relations
.get(&style_id)
.ok_or(Error::InvalidStyleId { style_id })?;
let (model_id, model_inner_id) = self.status.ids_for(style_id)?;

// 音が途切れてしまうのを避けるworkaround処理が入っている
// TODO: 改善したらここのpadding処理を取り除く
Expand All @@ -179,23 +160,21 @@ impl InferenceCore {
padding_size,
);

let mut f0_array = NdArray::new(
let f0_array = NdArray::new(
ndarray::arr1(&f0_with_padding)
.into_shape([length_with_padding, 1])
.unwrap(),
);
let mut phoneme_array = NdArray::new(
let phoneme_array = NdArray::new(
ndarray::arr1(&phoneme_with_padding)
.into_shape([length_with_padding, phoneme_size])
.unwrap(),
);
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));

let input_tensors: Vec<&mut dyn AnyArray> =
vec![&mut f0_array, &mut phoneme_array, &mut speaker_id_array];
let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));

self.status
.decode_session_run(model_id, input_tensors)
.decode_session_run(&model_id, f0_array, phoneme_array, speaker_id_array)
.await
.map(|output| Self::trim_padding_from_output(output, padding_size))
}

Expand Down
32 changes: 15 additions & 17 deletions crates/voicevox_core/src/result_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,10 @@ pub enum VoicevoxResultCode {
VOICEVOX_RESULT_OK = 0,
/// open_jtalk辞書ファイルが読み込まれていない
VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR = 1,
/// modelの読み込みに失敗した
VOICEVOX_RESULT_LOAD_MODEL_ERROR = 2,
/// サポートされているデバイス情報取得に失敗した
VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR = 3,
/// GPUモードがサポートされていない
VOICEVOX_RESULT_GPU_SUPPORT_ERROR = 4,
/// メタ情報読み込みに失敗した
VOICEVOX_RESULT_LOAD_METAS_ERROR = 5,
/// 無効なstyle_idが指定された
VOICEVOX_RESULT_INVALID_STYLE_ID_ERROR = 6,
/// 無効なmodel_idが指定された
Expand All @@ -35,12 +31,16 @@ pub enum VoicevoxResultCode {
VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR = 14,
/// 無効なAccentPhrase
VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR = 15,
/// ファイルオープンエラー
VOICEVOX_RESULT_OPEN_FILE_ERROR = 16,
/// Modelを読み込めなかった
VOICEVOX_RESULT_VVM_MODEL_READ_ERROR = 17,
/// すでに読み込まれているModelを読み込もうとした
VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR = 18,
/// ZIPファイルを開くことに失敗した
VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR = 16,
/// ZIP内のファイルが読めなかった
VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR = 17,
/// すでに読み込まれている音声モデルを読み込もうとした
VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR = 18,
/// すでに読み込まれているスタイルを読み込もうとした
VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR = 26,
/// 無効なモデルデータ
VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR = 27,
/// Modelが読み込まれていない
VOICEVOX_RESULT_UNLOADED_MODEL_ERROR = 19,
/// ユーザー辞書を読み込めなかった
Expand All @@ -64,8 +64,6 @@ pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'stati
VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR => {
"OpenJTalkの辞書が読み込まれていません\0"
}
VOICEVOX_RESULT_LOAD_MODEL_ERROR => "modelデータ読み込みに失敗しました\0",
VOICEVOX_RESULT_LOAD_METAS_ERROR => "メタデータ読み込みに失敗しました\0",

VOICEVOX_RESULT_GPU_SUPPORT_ERROR => "GPU機能をサポートすることができません\0",
VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR => {
Expand All @@ -85,11 +83,11 @@ pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'stati
}
VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR => "無効なaudio_queryです\0",
VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR => "無効なaccent_phraseです\0",
VOICEVOX_RESULT_OPEN_FILE_ERROR => "ファイルオープンに失敗しました\0",
VOICEVOX_RESULT_VVM_MODEL_READ_ERROR => "Modelを読み込めませんでした\0",
VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR => {
"すでに読み込まれているModelを読み込もうとしました\0"
}
VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR => "ZIPファイルのオープンに失敗しました\0",
VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR => "ZIP内のファイルを読むことができませんでした\0",
VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR => "同じIDのモデルを読むことはできません\0",
VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR => "同じIDのスタイルを読むことはできません\0",
VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR => "モデルデータを読むことができませんでした\0",
VOICEVOX_RESULT_UNLOADED_MODEL_ERROR => "Modelが読み込まれていません\0",
VOICEVOX_RESULT_LOAD_USER_DICT_ERROR => "ユーザー辞書を読み込めませんでした\0",
VOICEVOX_RESULT_SAVE_USER_DICT_ERROR => "ユーザー辞書を書き込めませんでした\0",
Expand Down
Loading