From ad2b8e187f1f7901c517f5be2b7affa1ddcc4e3b Mon Sep 17 00:00:00 2001 From: Naoaki Iwakiri Date: Sat, 19 Oct 2024 16:21:45 +0900 Subject: [PATCH 1/2] Fix Issue#252 --- CHANGELOG.md | 4 + src/dictionary/composite_key.rs | 2 +- src/dictionary/mod.rs | 242 ++++++++++-------- tests/data/dictionaries/maruichi.dat | 1 + .../numeric_transitions.rs | 4 +- tests/tests.rs | 23 ++ 6 files changed, 173 insertions(+), 103 deletions(-) create mode 100644 tests/data/dictionaries/maruichi.dat diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c777bc..376b5b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Fixed + +- 辞書の見出しに数字が含まれている時に#4を含むエントリ以外では変換候補にならない問題を修正 + ## [3.1.1] - 2024-03-05 ### Fixed diff --git a/src/dictionary/composite_key.rs b/src/dictionary/composite_key.rs index e047070..ad6ddf6 100644 --- a/src/dictionary/composite_key.rs +++ b/src/dictionary/composite_key.rs @@ -2,7 +2,7 @@ use crate::KanaFormChanger; /// 辞書を引くための情報 /// 厳密な送り仮名マッチのため、送り仮名を複数文字含みうる。 -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub(crate) struct CompositeKey { to_composite: String, // When Some(), should never be empty string. diff --git a/src/dictionary/mod.rs b/src/dictionary/mod.rs index 82f090c..37028cf 100644 --- a/src/dictionary/mod.rs +++ b/src/dictionary/mod.rs @@ -196,41 +196,43 @@ fn get_all_complete_inner( } /// -/// Usually, replace numerics to # and search the dict for numeric composition. -/// If numeric-re-lookup, don't replace numerics for the "#4" type entries. +/// First search the exact match, and then replace numerics to # and search the dict for numeric composition. +/// If numeric-re-lookup, skip the latter don't replace numerics for the "#4" type entries. /// fn get_all_candidates_inner( dictionaries: &[Arc], composite_key: &CompositeKey, is_numeric_re_lookup: bool, ) -> Vec { - let mut composite_key = composite_key.to_owned(); let mut matched_numbers = vec![]; + let exact_match_candidates = get_candidates_in_order(dictionaries, &composite_key); + let exact_match_candidates= dedup_candidates(exact_match_candidates); + let mut all_candidates: Vec = exact_match_candidates.into_iter() + .map(|dictionary_candidate| { + Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate) + }) + .collect(); + + if !is_numeric_re_lookup { - (composite_key, matched_numbers) = to_composite_to_numeric_dict_key(&composite_key); + let replaced_key; + (replaced_key, matched_numbers) = to_composite_to_numeric_dict_key(&composite_key); + if replaced_key != *composite_key { + let numeric_replace_match_candidates= get_candidates_in_order(dictionaries, &replaced_key); + let numeric_replace_match_candidates = dedup_candidates(numeric_replace_match_candidates); + let mut numeric_replace_match_candidates: Vec = numeric_replace_match_candidates + .into_iter() + .map(|dictionary_candidate| { + Candidate::from_dictionary_candidate(&replaced_key, &dictionary_candidate) + }) + .flat_map(|candidate| replace_numeric_match(&candidate, &matched_numbers, dictionaries)) + .collect(); + all_candidates.append(&mut numeric_replace_match_candidates); + } } - let candidates = get_candidates_in_order(dictionaries, &composite_key); - let deduped_candidates = dedup_candidates(candidates); - let deduped_candidates: Vec = if !is_numeric_re_lookup { - deduped_candidates - .into_iter() - .map(|dictionary_candidate| { - Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate) - }) - .flat_map(|candidate| replace_numeric_match(&candidate, &matched_numbers, dictionaries)) - .collect() - } else { - deduped_candidates - .into_iter() - .map(|dictionary_candidate| { - Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate) - }) - .collect() - }; - - deduped_candidates + all_candidates } /// @@ -363,6 +365,7 @@ pub(crate) fn numeric_entry_count(entry: &str) -> usize { NUM_ENTRY_REGEX.find_iter(entry).count() } +// もし候補に#0等の数値マッチが入るならば元の数字でおきかえる。 fn replace_numeric_match( candidate: &Candidate, matched_numbers: &[String], @@ -391,93 +394,95 @@ pub(crate) fn replace_numeric_string( } let mut current_output_texts = vec![kouho_text.to_string()]; for (n, entry_match) in NUMERIC_ENTRY_REGEX.find_iter(kouho_text).enumerate() { - match entry_match.as_str() { - "#0" => { - let mut replaced_output_texts = vec![]; - for output_text in ¤t_output_texts { - replaced_output_texts.push(output_text.replacen("#0", &numbers[n], 1)); + if n < numbers.len() { + match entry_match.as_str() { + "#0" => { + let mut replaced_output_texts = vec![]; + for output_text in ¤t_output_texts { + replaced_output_texts.push(output_text.replacen("#0", &numbers[n], 1)); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#1" => { - let mut replaced_output_texts = vec![]; - for kouho_text in ¤t_output_texts { - replaced_output_texts.push(kouho_text.replacen( - "#1", - &numeric_to_zenkaku(&numbers[n]), - 1, - )); + "#1" => { + let mut replaced_output_texts = vec![]; + for kouho_text in ¤t_output_texts { + replaced_output_texts.push(kouho_text.replacen( + "#1", + &numeric_to_zenkaku(&numbers[n]), + 1, + )); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#2" => { - let mut replaced_output_texts = vec![]; - for kouho_text in ¤t_output_texts { - replaced_output_texts.push(kouho_text.replacen( - "#2", - &numeric_to_kanji_each(&numbers[n]), - 1, - )); + "#2" => { + let mut replaced_output_texts = vec![]; + for kouho_text in ¤t_output_texts { + replaced_output_texts.push(kouho_text.replacen( + "#2", + &numeric_to_kanji_each(&numbers[n]), + 1, + )); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#3" => { - let mut replaced_output_texts = vec![]; - for output_text in ¤t_output_texts { - replaced_output_texts.push(output_text.replacen( - "#3", - &numeric_to_simple_kanji_as_number(&numbers[n]), - 1, - )); + "#3" => { + let mut replaced_output_texts = vec![]; + for output_text in ¤t_output_texts { + replaced_output_texts.push(output_text.replacen( + "#3", + &numeric_to_simple_kanji_as_number(&numbers[n]), + 1, + )); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#4" => { - let mut replaced_output_texts = vec![]; - let numeric_lookup_results = get_all_candidates_inner( - dictionaries, - &CompositeKey::new(&numbers[n], None), - true, - ); - for kouho_text in ¤t_output_texts { - for numeric_lookup in &numeric_lookup_results { + "#4" => { + let mut replaced_output_texts = vec![]; + let numeric_lookup_results = get_all_candidates_inner( + dictionaries, + &CompositeKey::new(&numbers[n], None), + true, + ); + for kouho_text in ¤t_output_texts { + for numeric_lookup in &numeric_lookup_results { + replaced_output_texts.push(kouho_text.replacen( + "#4", + &numeric_lookup.kouho_text, + 1, + )); + } + } + current_output_texts = replaced_output_texts; + } + "#5" => { + let mut replaced_output_texts = vec![]; + for kouho_text in ¤t_output_texts { replaced_output_texts.push(kouho_text.replacen( - "#4", - &numeric_lookup.kouho_text, + "#5", + &numeric_to_daiji_as_number(&numbers[n], false), + 1, + )); + replaced_output_texts.push(kouho_text.replacen( + "#5", + &numeric_to_daiji_as_number(&numbers[n], true), 1, )); } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#5" => { - let mut replaced_output_texts = vec![]; - for kouho_text in ¤t_output_texts { - replaced_output_texts.push(kouho_text.replacen( - "#5", - &numeric_to_daiji_as_number(&numbers[n], false), - 1, - )); - replaced_output_texts.push(kouho_text.replacen( - "#5", - &numeric_to_daiji_as_number(&numbers[n], true), - 1, - )); - } - current_output_texts = replaced_output_texts; - } - "#8" => { - let mut replaced_output_texts = vec![]; - for kouho_text in ¤t_output_texts { - replaced_output_texts.push(kouho_text.replacen( - "#8", - &numeric_to_thousand_separator(&numbers[n]), - 1, - )); + "#8" => { + let mut replaced_output_texts = vec![]; + for kouho_text in ¤t_output_texts { + replaced_output_texts.push(kouho_text.replacen( + "#8", + &numeric_to_thousand_separator(&numbers[n]), + 1, + )); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; + _ => {} } - _ => {} } } current_output_texts @@ -555,4 +560,41 @@ mod test { assert_eq!(numeric_string_count("1にち1かい"), 2); assert_eq!(numeric_string_count("1じつせんしゅう"), 1); } + + #[test] + fn get_all_candidates_basic() { + let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/SKK-JISYO.S", "euc-jp", false) + .unwrap(); + let dictionaries = vec![Arc::new(test_dictionary)]; + let key = CompositeKey::new("あい", None); + let result = get_all_candidates(&dictionaries,&key); + + assert_eq!(result[0].kouho_text, "愛"); + } + + #[test] + fn get_all_candidates_numeric_match() { + let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/number_jisyo.dat", "utf-8", false) + .unwrap(); + let dictionaries = vec![Arc::new(test_dictionary)]; + let key = CompositeKey::new("5/1", None); + let result = get_all_candidates(&dictionaries,&key); + + assert_eq!(result[0].kouho_text, "#0月#0日"); + assert_eq!(result[0].midashi, "#/#"); + assert_eq!(result[0].output, "5月1日"); + } + + #[test] + fn get_all_candidates_numeric_exact_match() { + let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false) + .unwrap(); + let dictionaries = vec![Arc::new(test_dictionary)]; + let key = CompositeKey::new("まる1", None); + let result = get_all_candidates(&dictionaries,&key); + + assert_eq!(result[0].kouho_text, "①"); // 0xE291A0 (U+02460) + assert_eq!(result[1].kouho_text, "❶"); + assert_eq!(result[2].kouho_text, "⓵"); // 0xE293B5 (U+024F5) + } } diff --git a/tests/data/dictionaries/maruichi.dat b/tests/data/dictionaries/maruichi.dat new file mode 100644 index 0000000..e6fa689 --- /dev/null +++ b/tests/data/dictionaries/maruichi.dat @@ -0,0 +1 @@ +まる1 /①/❶/⓵/ \ No newline at end of file diff --git a/tests/libskk_compatibility/numeric_transitions.rs b/tests/libskk_compatibility/numeric_transitions.rs index a5dcdcd..5eac37a 100644 --- a/tests/libskk_compatibility/numeric_transitions.rs +++ b/tests/libskk_compatibility/numeric_transitions.rs @@ -11,10 +11,10 @@ use std::sync::Arc; #[test] fn numeric_transitions() { init_test_logger(); - let static_dict = + let user_dict = CskkDictionary::new_user_dict("tests/data/dictionaries/number_jisyo.dat", "UTF-8", false) .unwrap(); - let mut context = test_context_with_dictionaries(vec![Arc::new(static_dict)]); + let mut context = test_context_with_dictionaries(vec![Arc::new(user_dict)]); transition_check( &mut context, CompositionMode::Direct, diff --git a/tests/tests.rs b/tests/tests.rs index ab1d611..d17b25c 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1468,3 +1468,26 @@ fn abort() { }), ); } + +// Issue #252 +#[test] +fn maruichi() { + let static_dict = + CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false) + .unwrap(); + let mut context = + test_context_with_dictionaries(vec![Arc::new(static_dict)]); + transition_test( + &mut context, + CompositionMode::Direct, + InputMode::Hiragana, + "M a r u 1 space Return", + // CompositionMode::Direct, + CompositionMode::Direct, + InputMode::Hiragana, + CskkStateInfo::Direct(DirectData { + confirmed: "①".to_string(), + unconverted: None, + }), + ) +} From eee02a52d0fc75866119aae4764515cd756fc4e5 Mon Sep 17 00:00:00 2001 From: Naoaki Iwakiri Date: Sat, 19 Oct 2024 16:27:22 +0900 Subject: [PATCH 2/2] Add description more. Lint fix --- CHANGELOG.md | 2 +- src/dictionary/mod.rs | 53 ++++++++++++++++++++++++++----------------- tests/tests.rs | 3 +-- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 376b5b0..7a5d274 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ All notable changes to this project will be documented in this file. ### Fixed -- 辞書の見出しに数字が含まれている時に#4を含むエントリ以外では変換候補にならない問題を修正 +- 辞書の見出しに数字が含まれている時に#4を含むエントリ以外では変換候補にならない問題を修正。数値自体を見出しに含む候補と数値置換候補があった場合、常に完全一致の候補の方が優先される。 (辞書のエントリ上、双方にマッチする時に各々の候補の別見出しの候補に対しての優先順位を保持する事が不可能なため。) ## [3.1.1] - 2024-03-05 diff --git a/src/dictionary/mod.rs b/src/dictionary/mod.rs index 37028cf..6f02939 100644 --- a/src/dictionary/mod.rs +++ b/src/dictionary/mod.rs @@ -207,27 +207,32 @@ fn get_all_candidates_inner( let mut matched_numbers = vec![]; let exact_match_candidates = get_candidates_in_order(dictionaries, &composite_key); - let exact_match_candidates= dedup_candidates(exact_match_candidates); - let mut all_candidates: Vec = exact_match_candidates.into_iter() + let exact_match_candidates = dedup_candidates(exact_match_candidates); + let mut all_candidates: Vec = exact_match_candidates + .into_iter() .map(|dictionary_candidate| { Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate) }) .collect(); - if !is_numeric_re_lookup { let replaced_key; (replaced_key, matched_numbers) = to_composite_to_numeric_dict_key(&composite_key); if replaced_key != *composite_key { - let numeric_replace_match_candidates= get_candidates_in_order(dictionaries, &replaced_key); - let numeric_replace_match_candidates = dedup_candidates(numeric_replace_match_candidates); - let mut numeric_replace_match_candidates: Vec = numeric_replace_match_candidates - .into_iter() - .map(|dictionary_candidate| { - Candidate::from_dictionary_candidate(&replaced_key, &dictionary_candidate) - }) - .flat_map(|candidate| replace_numeric_match(&candidate, &matched_numbers, dictionaries)) - .collect(); + let numeric_replace_match_candidates = + get_candidates_in_order(dictionaries, &replaced_key); + let numeric_replace_match_candidates = + dedup_candidates(numeric_replace_match_candidates); + let mut numeric_replace_match_candidates: Vec = + numeric_replace_match_candidates + .into_iter() + .map(|dictionary_candidate| { + Candidate::from_dictionary_candidate(&replaced_key, &dictionary_candidate) + }) + .flat_map(|candidate| { + replace_numeric_match(&candidate, &matched_numbers, dictionaries) + }) + .collect(); all_candidates.append(&mut numeric_replace_match_candidates); } } @@ -563,22 +568,27 @@ mod test { #[test] fn get_all_candidates_basic() { - let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/SKK-JISYO.S", "euc-jp", false) - .unwrap(); + let test_dictionary = + CskkDictionary::new_static_dict("tests/data/dictionaries/SKK-JISYO.S", "euc-jp", false) + .unwrap(); let dictionaries = vec![Arc::new(test_dictionary)]; let key = CompositeKey::new("あい", None); - let result = get_all_candidates(&dictionaries,&key); + let result = get_all_candidates(&dictionaries, &key); assert_eq!(result[0].kouho_text, "愛"); } #[test] fn get_all_candidates_numeric_match() { - let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/number_jisyo.dat", "utf-8", false) - .unwrap(); + let test_dictionary = CskkDictionary::new_static_dict( + "tests/data/dictionaries/number_jisyo.dat", + "utf-8", + false, + ) + .unwrap(); let dictionaries = vec![Arc::new(test_dictionary)]; let key = CompositeKey::new("5/1", None); - let result = get_all_candidates(&dictionaries,&key); + let result = get_all_candidates(&dictionaries, &key); assert_eq!(result[0].kouho_text, "#0月#0日"); assert_eq!(result[0].midashi, "#/#"); @@ -587,11 +597,12 @@ mod test { #[test] fn get_all_candidates_numeric_exact_match() { - let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false) - .unwrap(); + let test_dictionary = + CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false) + .unwrap(); let dictionaries = vec![Arc::new(test_dictionary)]; let key = CompositeKey::new("まる1", None); - let result = get_all_candidates(&dictionaries,&key); + let result = get_all_candidates(&dictionaries, &key); assert_eq!(result[0].kouho_text, "①"); // 0xE291A0 (U+02460) assert_eq!(result[1].kouho_text, "❶"); diff --git a/tests/tests.rs b/tests/tests.rs index d17b25c..550e96e 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1475,8 +1475,7 @@ fn maruichi() { let static_dict = CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false) .unwrap(); - let mut context = - test_context_with_dictionaries(vec![Arc::new(static_dict)]); + let mut context = test_context_with_dictionaries(vec![Arc::new(static_dict)]); transition_test( &mut context, CompositionMode::Direct,