変換ではなく検証を行うように変更

VOICEVOX · Feb 4, 2025 · a9a4b19 · a9a4b19
1 parent fcc835d
commit a9a4b19
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 34 deletions.
diff --git a/test/unit/user_dict/test_user_dict_model.py b/test/unit/user_dict/test_user_dict_model.py
@@ -70,17 +70,18 @@ def test_convert_to_zenkaku() -> None:
 
 
 def test_remove_newlines_and_null() -> None:
-    """UserDictWord は surface 内の改行や null 文字を削除する。"""
+    """UserDictWord は surface 内の改行や null 文字をエラーとする。"""
     # Inputs
-    test_value = generate_model()
-    test_value["surface"] = "te\n\r\x00st"
-    # Expects
-    true_surface = "ｔｅｓｔ"
-    # Outputs
-    surface = UserDictWord(**test_value).surface
+    test_value_newlines = generate_model()
+    test_value_newlines["surface"] = "te\r\nst"
+    test_value_null = generate_model()
+    test_value_null["surface"] = "te\x00st"
 
     # Test
-    assert surface == true_surface
+    with pytest.raises(ValidationError):
+        UserDictWord(**test_value_newlines)
+    with pytest.raises(ValidationError):
+        UserDictWord(**test_value_null)
 
 
 def test_count_mora() -> None:
@@ -141,17 +142,18 @@ def test_invalid_pronunciation_not_katakana() -> None:
 
 
 def test_invalid_pronunciation_newlines_and_null() -> None:
-    """UserDictWord は pronunciation 内の改行や null 文字を削除する。"""
+    """UserDictWord は pronunciation 内の改行や null 文字をエラーとする。"""
     # Inputs
-    test_value = generate_model()
-    test_value["pronunciation"] = "ボイ\n\r\x00ボ"
-    # Expects
-    true_pronunciation = "ボイボ"
-    # Outputs
-    pronunciation = UserDictWord(**test_value).pronunciation
+    test_value_newlines = generate_model()
+    test_value_newlines["pronunciation"] = "ボイ\r\nボ"
+    test_value_null = generate_model()
+    test_value_null["pronunciation"] = "ボイ\x00ボ"
 
     # Test
-    assert pronunciation == true_pronunciation
+    with pytest.raises(ValidationError):
+        UserDictWord(**test_value_newlines)
+    with pytest.raises(ValidationError):
+        UserDictWord(**test_value_null)
 
 
 def test_invalid_pronunciation_invalid_sutegana() -> None:

diff --git a/voicevox_engine/user_dict/model.py b/voicevox_engine/user_dict/model.py
@@ -27,12 +27,20 @@ class WordTypes(str, Enum):
 USER_DICT_MAX_PRIORITY = 10
 
 
-def remove_newlines_and_null(text: str) -> str:
-    return text.replace("\n", "").replace("\r", "").replace("\x00", "")
+def check_newlines_and_null(text: str) -> str:
+    if "\n" in text or "\r" in text:
+        raise ValueError("改行が含まれています。")
+    if "\x00" in text:
+        raise ValueError("Null文字が含まれています。")
+    return text
 
 
-def remove_comma_and_double_quote(text: str) -> str:
-    return text.replace(",", "").replace('"', "")
+def check_comma_and_double_quote(text: str) -> str:
+    if "," in text:
+        raise ValueError("カンマが含まれています。")
+    if '"' in text:
+        raise ValueError("ダブルクォートが含まれています。")
+    return text
 
 
 def convert_to_zenkaku(surface: str) -> str:
@@ -66,10 +74,10 @@ def check_is_katakana(pronunciation: str) -> str:
     return pronunciation
 
 
-SanitizedStr = Annotated[
+CsvSafeStr = Annotated[
     str,
-    AfterValidator(remove_newlines_and_null),
-    AfterValidator(remove_comma_and_double_quote),
+    AfterValidator(check_newlines_and_null),
+    AfterValidator(check_comma_and_double_quote),
 ]
 
 
@@ -83,26 +91,26 @@ class UserDictWord(BaseModel):
     surface: Annotated[
         str,
         AfterValidator(convert_to_zenkaku),
-        AfterValidator(remove_newlines_and_null),
+        AfterValidator(check_newlines_and_null),
     ] = Field(description="表層形")
     priority: int = Field(
         description="優先度", ge=USER_DICT_MIN_PRIORITY, le=USER_DICT_MAX_PRIORITY
     )
     context_id: int = Field(description="文脈ID", default=1348)
-    part_of_speech: SanitizedStr = Field(description="品詞")
-    part_of_speech_detail_1: SanitizedStr = Field(description="品詞細分類1")
-    part_of_speech_detail_2: SanitizedStr = Field(description="品詞細分類2")
-    part_of_speech_detail_3: SanitizedStr = Field(description="品詞細分類3")
-    inflectional_type: SanitizedStr = Field(description="活用型")
-    inflectional_form: SanitizedStr = Field(description="活用形")
-    stem: SanitizedStr = Field(description="原形")
-    yomi: SanitizedStr = Field(description="読み")
-    pronunciation: Annotated[SanitizedStr, AfterValidator(check_is_katakana)] = Field(
+    part_of_speech: CsvSafeStr = Field(description="品詞")
+    part_of_speech_detail_1: CsvSafeStr = Field(description="品詞細分類1")
+    part_of_speech_detail_2: CsvSafeStr = Field(description="品詞細分類2")
+    part_of_speech_detail_3: CsvSafeStr = Field(description="品詞細分類3")
+    inflectional_type: CsvSafeStr = Field(description="活用型")
+    inflectional_form: CsvSafeStr = Field(description="活用形")
+    stem: CsvSafeStr = Field(description="原形")
+    yomi: CsvSafeStr = Field(description="読み")
+    pronunciation: Annotated[CsvSafeStr, AfterValidator(check_is_katakana)] = Field(
         description="発音"
     )
     accent_type: int = Field(description="アクセント型")
     mora_count: int | SkipJsonSchema[None] = Field(default=None, description="モーラ数")
-    accent_associative_rule: SanitizedStr = Field(description="アクセント結合規則")
+    accent_associative_rule: CsvSafeStr = Field(description="アクセント結合規則")
 
     @model_validator(mode="after")
     def check_mora_count_and_accent_type(self) -> Self: