diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c777bc..376b5b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Fixed + +- 辞書の見出しに数字が含まれている時に#4を含むエントリ以外では変換候補にならない問題を修正 + ## [3.1.1] - 2024-03-05 ### Fixed diff --git a/src/dictionary/composite_key.rs b/src/dictionary/composite_key.rs index e047070..ad6ddf6 100644 --- a/src/dictionary/composite_key.rs +++ b/src/dictionary/composite_key.rs @@ -2,7 +2,7 @@ use crate::KanaFormChanger; /// 辞書を引くための情報 /// 厳密な送り仮名マッチのため、送り仮名を複数文字含みうる。 -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub(crate) struct CompositeKey { to_composite: String, // When Some(), should never be empty string. diff --git a/src/dictionary/mod.rs b/src/dictionary/mod.rs index 82f090c..37028cf 100644 --- a/src/dictionary/mod.rs +++ b/src/dictionary/mod.rs @@ -196,41 +196,43 @@ fn get_all_complete_inner( } /// -/// Usually, replace numerics to # and search the dict for numeric composition. -/// If numeric-re-lookup, don't replace numerics for the "#4" type entries. +/// First search the exact match, and then replace numerics to # and search the dict for numeric composition. +/// If numeric-re-lookup, skip the latter don't replace numerics for the "#4" type entries. /// fn get_all_candidates_inner( dictionaries: &[Arc], composite_key: &CompositeKey, is_numeric_re_lookup: bool, ) -> Vec { - let mut composite_key = composite_key.to_owned(); let mut matched_numbers = vec![]; + let exact_match_candidates = get_candidates_in_order(dictionaries, &composite_key); + let exact_match_candidates= dedup_candidates(exact_match_candidates); + let mut all_candidates: Vec = exact_match_candidates.into_iter() + .map(|dictionary_candidate| { + Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate) + }) + .collect(); + + if !is_numeric_re_lookup { - (composite_key, matched_numbers) = to_composite_to_numeric_dict_key(&composite_key); + let replaced_key; + (replaced_key, matched_numbers) = to_composite_to_numeric_dict_key(&composite_key); + if replaced_key != *composite_key { + let numeric_replace_match_candidates= get_candidates_in_order(dictionaries, &replaced_key); + let numeric_replace_match_candidates = dedup_candidates(numeric_replace_match_candidates); + let mut numeric_replace_match_candidates: Vec = numeric_replace_match_candidates + .into_iter() + .map(|dictionary_candidate| { + Candidate::from_dictionary_candidate(&replaced_key, &dictionary_candidate) + }) + .flat_map(|candidate| replace_numeric_match(&candidate, &matched_numbers, dictionaries)) + .collect(); + all_candidates.append(&mut numeric_replace_match_candidates); + } } - let candidates = get_candidates_in_order(dictionaries, &composite_key); - let deduped_candidates = dedup_candidates(candidates); - let deduped_candidates: Vec = if !is_numeric_re_lookup { - deduped_candidates - .into_iter() - .map(|dictionary_candidate| { - Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate) - }) - .flat_map(|candidate| replace_numeric_match(&candidate, &matched_numbers, dictionaries)) - .collect() - } else { - deduped_candidates - .into_iter() - .map(|dictionary_candidate| { - Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate) - }) - .collect() - }; - - deduped_candidates + all_candidates } /// @@ -363,6 +365,7 @@ pub(crate) fn numeric_entry_count(entry: &str) -> usize { NUM_ENTRY_REGEX.find_iter(entry).count() } +// もし候補に#0等の数値マッチが入るならば元の数字でおきかえる。 fn replace_numeric_match( candidate: &Candidate, matched_numbers: &[String], @@ -391,93 +394,95 @@ pub(crate) fn replace_numeric_string( } let mut current_output_texts = vec![kouho_text.to_string()]; for (n, entry_match) in NUMERIC_ENTRY_REGEX.find_iter(kouho_text).enumerate() { - match entry_match.as_str() { - "#0" => { - let mut replaced_output_texts = vec![]; - for output_text in ¤t_output_texts { - replaced_output_texts.push(output_text.replacen("#0", &numbers[n], 1)); + if n < numbers.len() { + match entry_match.as_str() { + "#0" => { + let mut replaced_output_texts = vec![]; + for output_text in ¤t_output_texts { + replaced_output_texts.push(output_text.replacen("#0", &numbers[n], 1)); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#1" => { - let mut replaced_output_texts = vec![]; - for kouho_text in ¤t_output_texts { - replaced_output_texts.push(kouho_text.replacen( - "#1", - &numeric_to_zenkaku(&numbers[n]), - 1, - )); + "#1" => { + let mut replaced_output_texts = vec![]; + for kouho_text in ¤t_output_texts { + replaced_output_texts.push(kouho_text.replacen( + "#1", + &numeric_to_zenkaku(&numbers[n]), + 1, + )); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#2" => { - let mut replaced_output_texts = vec![]; - for kouho_text in ¤t_output_texts { - replaced_output_texts.push(kouho_text.replacen( - "#2", - &numeric_to_kanji_each(&numbers[n]), - 1, - )); + "#2" => { + let mut replaced_output_texts = vec![]; + for kouho_text in ¤t_output_texts { + replaced_output_texts.push(kouho_text.replacen( + "#2", + &numeric_to_kanji_each(&numbers[n]), + 1, + )); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#3" => { - let mut replaced_output_texts = vec![]; - for output_text in ¤t_output_texts { - replaced_output_texts.push(output_text.replacen( - "#3", - &numeric_to_simple_kanji_as_number(&numbers[n]), - 1, - )); + "#3" => { + let mut replaced_output_texts = vec![]; + for output_text in ¤t_output_texts { + replaced_output_texts.push(output_text.replacen( + "#3", + &numeric_to_simple_kanji_as_number(&numbers[n]), + 1, + )); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#4" => { - let mut replaced_output_texts = vec![]; - let numeric_lookup_results = get_all_candidates_inner( - dictionaries, - &CompositeKey::new(&numbers[n], None), - true, - ); - for kouho_text in ¤t_output_texts { - for numeric_lookup in &numeric_lookup_results { + "#4" => { + let mut replaced_output_texts = vec![]; + let numeric_lookup_results = get_all_candidates_inner( + dictionaries, + &CompositeKey::new(&numbers[n], None), + true, + ); + for kouho_text in ¤t_output_texts { + for numeric_lookup in &numeric_lookup_results { + replaced_output_texts.push(kouho_text.replacen( + "#4", + &numeric_lookup.kouho_text, + 1, + )); + } + } + current_output_texts = replaced_output_texts; + } + "#5" => { + let mut replaced_output_texts = vec![]; + for kouho_text in ¤t_output_texts { replaced_output_texts.push(kouho_text.replacen( - "#4", - &numeric_lookup.kouho_text, + "#5", + &numeric_to_daiji_as_number(&numbers[n], false), + 1, + )); + replaced_output_texts.push(kouho_text.replacen( + "#5", + &numeric_to_daiji_as_number(&numbers[n], true), 1, )); } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; - } - "#5" => { - let mut replaced_output_texts = vec![]; - for kouho_text in ¤t_output_texts { - replaced_output_texts.push(kouho_text.replacen( - "#5", - &numeric_to_daiji_as_number(&numbers[n], false), - 1, - )); - replaced_output_texts.push(kouho_text.replacen( - "#5", - &numeric_to_daiji_as_number(&numbers[n], true), - 1, - )); - } - current_output_texts = replaced_output_texts; - } - "#8" => { - let mut replaced_output_texts = vec![]; - for kouho_text in ¤t_output_texts { - replaced_output_texts.push(kouho_text.replacen( - "#8", - &numeric_to_thousand_separator(&numbers[n]), - 1, - )); + "#8" => { + let mut replaced_output_texts = vec![]; + for kouho_text in ¤t_output_texts { + replaced_output_texts.push(kouho_text.replacen( + "#8", + &numeric_to_thousand_separator(&numbers[n]), + 1, + )); + } + current_output_texts = replaced_output_texts; } - current_output_texts = replaced_output_texts; + _ => {} } - _ => {} } } current_output_texts @@ -555,4 +560,41 @@ mod test { assert_eq!(numeric_string_count("1にち1かい"), 2); assert_eq!(numeric_string_count("1じつせんしゅう"), 1); } + + #[test] + fn get_all_candidates_basic() { + let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/SKK-JISYO.S", "euc-jp", false) + .unwrap(); + let dictionaries = vec![Arc::new(test_dictionary)]; + let key = CompositeKey::new("あい", None); + let result = get_all_candidates(&dictionaries,&key); + + assert_eq!(result[0].kouho_text, "愛"); + } + + #[test] + fn get_all_candidates_numeric_match() { + let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/number_jisyo.dat", "utf-8", false) + .unwrap(); + let dictionaries = vec![Arc::new(test_dictionary)]; + let key = CompositeKey::new("5/1", None); + let result = get_all_candidates(&dictionaries,&key); + + assert_eq!(result[0].kouho_text, "#0月#0日"); + assert_eq!(result[0].midashi, "#/#"); + assert_eq!(result[0].output, "5月1日"); + } + + #[test] + fn get_all_candidates_numeric_exact_match() { + let test_dictionary = CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false) + .unwrap(); + let dictionaries = vec![Arc::new(test_dictionary)]; + let key = CompositeKey::new("まる1", None); + let result = get_all_candidates(&dictionaries,&key); + + assert_eq!(result[0].kouho_text, "①"); // 0xE291A0 (U+02460) + assert_eq!(result[1].kouho_text, "❶"); + assert_eq!(result[2].kouho_text, "⓵"); // 0xE293B5 (U+024F5) + } } diff --git a/tests/data/dictionaries/maruichi.dat b/tests/data/dictionaries/maruichi.dat new file mode 100644 index 0000000..e6fa689 --- /dev/null +++ b/tests/data/dictionaries/maruichi.dat @@ -0,0 +1 @@ +まる1 /①/❶/⓵/ \ No newline at end of file diff --git a/tests/libskk_compatibility/numeric_transitions.rs b/tests/libskk_compatibility/numeric_transitions.rs index a5dcdcd..5eac37a 100644 --- a/tests/libskk_compatibility/numeric_transitions.rs +++ b/tests/libskk_compatibility/numeric_transitions.rs @@ -11,10 +11,10 @@ use std::sync::Arc; #[test] fn numeric_transitions() { init_test_logger(); - let static_dict = + let user_dict = CskkDictionary::new_user_dict("tests/data/dictionaries/number_jisyo.dat", "UTF-8", false) .unwrap(); - let mut context = test_context_with_dictionaries(vec![Arc::new(static_dict)]); + let mut context = test_context_with_dictionaries(vec![Arc::new(user_dict)]); transition_check( &mut context, CompositionMode::Direct, diff --git a/tests/tests.rs b/tests/tests.rs index ab1d611..d17b25c 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1468,3 +1468,26 @@ fn abort() { }), ); } + +// Issue #252 +#[test] +fn maruichi() { + let static_dict = + CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false) + .unwrap(); + let mut context = + test_context_with_dictionaries(vec![Arc::new(static_dict)]); + transition_test( + &mut context, + CompositionMode::Direct, + InputMode::Hiragana, + "M a r u 1 space Return", + // CompositionMode::Direct, + CompositionMode::Direct, + InputMode::Hiragana, + CskkStateInfo::Direct(DirectData { + confirmed: "①".to_string(), + unconverted: None, + }), + ) +}