diff --git a/voicevox_engine/kana_parser.py b/voicevox_engine/kana_parser.py index 711b6a7de..8e0ff845a 100644 --- a/voicevox_engine/kana_parser.py +++ b/voicevox_engine/kana_parser.py @@ -3,16 +3,16 @@ from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode from .mora_list import openjtalk_text2mora -LOOP_LIMIT = 300 -UNVOICE_SYMBOL = "_" -ACCENT_SYMBOL = "'" -NOPAUSE_DELIMITER = "/" -PAUSE_DELIMITER = "、" -WIDE_INTERROGATION_MARK = "?" - -text2mora_with_unvoice = {} +_LOOP_LIMIT = 300 +_UNVOICE_SYMBOL = "_" +_ACCENT_SYMBOL = "'" +_NOPAUSE_DELIMITER = "/" +_PAUSE_DELIMITER = "、" +_WIDE_INTERROGATION_MARK = "?" + +_text2mora_with_unvoice = {} for text, (consonant, vowel) in openjtalk_text2mora.items(): - text2mora_with_unvoice[text] = Mora( + _text2mora_with_unvoice[text] = Mora( text=text, consonant=consonant if len(consonant) > 0 else None, consonant_length=0 if len(consonant) > 0 else None, @@ -22,7 +22,7 @@ is_interrogative=False, ) if vowel in ["a", "i", "u", "e", "o"]: - text2mora_with_unvoice[UNVOICE_SYMBOL + text] = Mora( + _text2mora_with_unvoice[_UNVOICE_SYMBOL + text] = Mora( text=text, consonant=consonant if len(consonant) > 0 else None, consonant_length=0 if len(consonant) > 0 else None, @@ -48,7 +48,7 @@ def _text_to_accent_phrase(phrase: str) -> AccentPhrase: outer_loop = 0 while base_index < len(phrase): outer_loop += 1 - if phrase[base_index] == ACCENT_SYMBOL: + if phrase[base_index] == _ACCENT_SYMBOL: if len(moras) == 0: raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase) if accent_index is not None: @@ -57,21 +57,21 @@ def _text_to_accent_phrase(phrase: str) -> AccentPhrase: base_index += 1 continue for watch_index in range(base_index, len(phrase)): - if phrase[watch_index] == ACCENT_SYMBOL: + if phrase[watch_index] == _ACCENT_SYMBOL: break # 普通の文字の場合 stack += phrase[watch_index] - if stack in text2mora_with_unvoice: + if stack in _text2mora_with_unvoice: matched_text = stack # push mora if matched_text is None: raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack) else: - moras.append(text2mora_with_unvoice[matched_text].copy(deep=True)) + moras.append(_text2mora_with_unvoice[matched_text].copy(deep=True)) base_index += len(matched_text) stack = "" matched_text = None - if outer_loop > LOOP_LIMIT: + if outer_loop > _LOOP_LIMIT: raise ParseKanaError(ParseKanaErrorCode.INFINITE_LOOP) if accent_index is None: raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase) @@ -90,7 +90,7 @@ def parse_kana(text: str) -> List[AccentPhrase]: raise ParseKanaError(ParseKanaErrorCode.EMPTY_PHRASE, position=1) for i in range(len(text) + 1): - if i == len(text) or text[i] in [PAUSE_DELIMITER, NOPAUSE_DELIMITER]: + if i == len(text) or text[i] in [_PAUSE_DELIMITER, _NOPAUSE_DELIMITER]: phrase = text[phrase_base:i] if len(phrase) == 0: raise ParseKanaError( @@ -99,16 +99,16 @@ def parse_kana(text: str) -> List[AccentPhrase]: ) phrase_base = i + 1 - is_interrogative = WIDE_INTERROGATION_MARK in phrase + is_interrogative = _WIDE_INTERROGATION_MARK in phrase if is_interrogative: - if WIDE_INTERROGATION_MARK in phrase[:-1]: + if _WIDE_INTERROGATION_MARK in phrase[:-1]: raise ParseKanaError( ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END, text=phrase ) - phrase = phrase.replace(WIDE_INTERROGATION_MARK, "") + phrase = phrase.replace(_WIDE_INTERROGATION_MARK, "") accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase) - if i < len(text) and text[i] == PAUSE_DELIMITER: + if i < len(text) and text[i] == _PAUSE_DELIMITER: accent_phrase.pause_mora = Mora( text="、", consonant=None, @@ -129,18 +129,18 @@ def create_kana(accent_phrases: List[AccentPhrase]) -> str: for i, phrase in enumerate(accent_phrases): for j, mora in enumerate(phrase.moras): if mora.vowel in ["A", "I", "U", "E", "O"]: - text += UNVOICE_SYMBOL + text += _UNVOICE_SYMBOL text += mora.text if j + 1 == phrase.accent: - text += ACCENT_SYMBOL + text += _ACCENT_SYMBOL if phrase.is_interrogative: - text += WIDE_INTERROGATION_MARK + text += _WIDE_INTERROGATION_MARK if i < len(accent_phrases) - 1: if phrase.pause_mora is None: - text += NOPAUSE_DELIMITER + text += _NOPAUSE_DELIMITER else: - text += PAUSE_DELIMITER + text += _PAUSE_DELIMITER return text