Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Issue#252 #253

Merged
merged 2 commits into from
Oct 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Fixed

- 辞書の見出しに数字が含まれている時に#4を含むエントリ以外では変換候補にならない問題を修正。数値自体を見出しに含む候補と数値置換候補があった場合、常に完全一致の候補の方が優先される。 (辞書のエントリ上、双方にマッチする時に各々の候補の別見出しの候補に対しての優先順位を保持する事が不可能なため。)

## [3.1.1] - 2024-03-05

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion src/dictionary/composite_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::KanaFormChanger;

/// 辞書を引くための情報
/// 厳密な送り仮名マッチのため、送り仮名を複数文字含みうる。
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Eq, PartialEq)]
pub(crate) struct CompositeKey {
to_composite: String,
// When Some(), should never be empty string.
Expand Down
253 changes: 153 additions & 100 deletions src/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,41 +196,48 @@ fn get_all_complete_inner(
}

///
/// Usually, replace numerics to # and search the dict for numeric composition.
/// If numeric-re-lookup, don't replace numerics for the "#4" type entries.
/// First search the exact match, and then replace numerics to # and search the dict for numeric composition.
/// If numeric-re-lookup, skip the latter don't replace numerics for the "#4" type entries.
///
fn get_all_candidates_inner(
dictionaries: &[Arc<CskkDictionary>],
composite_key: &CompositeKey,
is_numeric_re_lookup: bool,
) -> Vec<Candidate> {
let mut composite_key = composite_key.to_owned();
let mut matched_numbers = vec![];

let exact_match_candidates = get_candidates_in_order(dictionaries, &composite_key);
let exact_match_candidates = dedup_candidates(exact_match_candidates);
let mut all_candidates: Vec<Candidate> = exact_match_candidates
.into_iter()
.map(|dictionary_candidate| {
Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate)
})
.collect();

if !is_numeric_re_lookup {
(composite_key, matched_numbers) = to_composite_to_numeric_dict_key(&composite_key);
let replaced_key;
(replaced_key, matched_numbers) = to_composite_to_numeric_dict_key(&composite_key);
if replaced_key != *composite_key {
let numeric_replace_match_candidates =
get_candidates_in_order(dictionaries, &replaced_key);
let numeric_replace_match_candidates =
dedup_candidates(numeric_replace_match_candidates);
let mut numeric_replace_match_candidates: Vec<Candidate> =
numeric_replace_match_candidates
.into_iter()
.map(|dictionary_candidate| {
Candidate::from_dictionary_candidate(&replaced_key, &dictionary_candidate)
})
.flat_map(|candidate| {
replace_numeric_match(&candidate, &matched_numbers, dictionaries)
})
.collect();
all_candidates.append(&mut numeric_replace_match_candidates);
}
}

let candidates = get_candidates_in_order(dictionaries, &composite_key);
let deduped_candidates = dedup_candidates(candidates);
let deduped_candidates: Vec<Candidate> = if !is_numeric_re_lookup {
deduped_candidates
.into_iter()
.map(|dictionary_candidate| {
Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate)
})
.flat_map(|candidate| replace_numeric_match(&candidate, &matched_numbers, dictionaries))
.collect()
} else {
deduped_candidates
.into_iter()
.map(|dictionary_candidate| {
Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate)
})
.collect()
};

deduped_candidates
all_candidates
}

///
Expand Down Expand Up @@ -363,6 +370,7 @@ pub(crate) fn numeric_entry_count(entry: &str) -> usize {
NUM_ENTRY_REGEX.find_iter(entry).count()
}

// もし候補に#0等の数値マッチが入るならば元の数字でおきかえる。
fn replace_numeric_match(
candidate: &Candidate,
matched_numbers: &[String],
Expand Down Expand Up @@ -391,93 +399,95 @@ pub(crate) fn replace_numeric_string(
}
let mut current_output_texts = vec![kouho_text.to_string()];
for (n, entry_match) in NUMERIC_ENTRY_REGEX.find_iter(kouho_text).enumerate() {
match entry_match.as_str() {
"#0" => {
let mut replaced_output_texts = vec![];
for output_text in &current_output_texts {
replaced_output_texts.push(output_text.replacen("#0", &numbers[n], 1));
if n < numbers.len() {
match entry_match.as_str() {
"#0" => {
let mut replaced_output_texts = vec![];
for output_text in &current_output_texts {
replaced_output_texts.push(output_text.replacen("#0", &numbers[n], 1));
}
current_output_texts = replaced_output_texts;
}
current_output_texts = replaced_output_texts;
}
"#1" => {
let mut replaced_output_texts = vec![];
for kouho_text in &current_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#1",
&numeric_to_zenkaku(&numbers[n]),
1,
));
"#1" => {
let mut replaced_output_texts = vec![];
for kouho_text in &current_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#1",
&numeric_to_zenkaku(&numbers[n]),
1,
));
}
current_output_texts = replaced_output_texts;
}
current_output_texts = replaced_output_texts;
}
"#2" => {
let mut replaced_output_texts = vec![];
for kouho_text in &current_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#2",
&numeric_to_kanji_each(&numbers[n]),
1,
));
"#2" => {
let mut replaced_output_texts = vec![];
for kouho_text in &current_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#2",
&numeric_to_kanji_each(&numbers[n]),
1,
));
}
current_output_texts = replaced_output_texts;
}
current_output_texts = replaced_output_texts;
}
"#3" => {
let mut replaced_output_texts = vec![];
for output_text in &current_output_texts {
replaced_output_texts.push(output_text.replacen(
"#3",
&numeric_to_simple_kanji_as_number(&numbers[n]),
1,
));
"#3" => {
let mut replaced_output_texts = vec![];
for output_text in &current_output_texts {
replaced_output_texts.push(output_text.replacen(
"#3",
&numeric_to_simple_kanji_as_number(&numbers[n]),
1,
));
}
current_output_texts = replaced_output_texts;
}
current_output_texts = replaced_output_texts;
}
"#4" => {
let mut replaced_output_texts = vec![];
let numeric_lookup_results = get_all_candidates_inner(
dictionaries,
&CompositeKey::new(&numbers[n], None),
true,
);
for kouho_text in &current_output_texts {
for numeric_lookup in &numeric_lookup_results {
"#4" => {
let mut replaced_output_texts = vec![];
let numeric_lookup_results = get_all_candidates_inner(
dictionaries,
&CompositeKey::new(&numbers[n], None),
true,
);
for kouho_text in &current_output_texts {
for numeric_lookup in &numeric_lookup_results {
replaced_output_texts.push(kouho_text.replacen(
"#4",
&numeric_lookup.kouho_text,
1,
));
}
}
current_output_texts = replaced_output_texts;
}
"#5" => {
let mut replaced_output_texts = vec![];
for kouho_text in &current_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#4",
&numeric_lookup.kouho_text,
"#5",
&numeric_to_daiji_as_number(&numbers[n], false),
1,
));
replaced_output_texts.push(kouho_text.replacen(
"#5",
&numeric_to_daiji_as_number(&numbers[n], true),
1,
));
}
current_output_texts = replaced_output_texts;
}
current_output_texts = replaced_output_texts;
}
"#5" => {
let mut replaced_output_texts = vec![];
for kouho_text in &current_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#5",
&numeric_to_daiji_as_number(&numbers[n], false),
1,
));
replaced_output_texts.push(kouho_text.replacen(
"#5",
&numeric_to_daiji_as_number(&numbers[n], true),
1,
));
}
current_output_texts = replaced_output_texts;
}
"#8" => {
let mut replaced_output_texts = vec![];
for kouho_text in &current_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#8",
&numeric_to_thousand_separator(&numbers[n]),
1,
));
"#8" => {
let mut replaced_output_texts = vec![];
for kouho_text in &current_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#8",
&numeric_to_thousand_separator(&numbers[n]),
1,
));
}
current_output_texts = replaced_output_texts;
}
current_output_texts = replaced_output_texts;
_ => {}
}
_ => {}
}
}
current_output_texts
Expand Down Expand Up @@ -555,4 +565,47 @@ mod test {
assert_eq!(numeric_string_count("1にち1かい"), 2);
assert_eq!(numeric_string_count("1じつせんしゅう"), 1);
}

#[test]
fn get_all_candidates_basic() {
let test_dictionary =
CskkDictionary::new_static_dict("tests/data/dictionaries/SKK-JISYO.S", "euc-jp", false)
.unwrap();
let dictionaries = vec![Arc::new(test_dictionary)];
let key = CompositeKey::new("あい", None);
let result = get_all_candidates(&dictionaries, &key);

assert_eq!(result[0].kouho_text, "愛");
}

#[test]
fn get_all_candidates_numeric_match() {
let test_dictionary = CskkDictionary::new_static_dict(
"tests/data/dictionaries/number_jisyo.dat",
"utf-8",
false,
)
.unwrap();
let dictionaries = vec![Arc::new(test_dictionary)];
let key = CompositeKey::new("5/1", None);
let result = get_all_candidates(&dictionaries, &key);

assert_eq!(result[0].kouho_text, "#0月#0日");
assert_eq!(result[0].midashi, "#/#");
assert_eq!(result[0].output, "5月1日");
}

#[test]
fn get_all_candidates_numeric_exact_match() {
let test_dictionary =
CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false)
.unwrap();
let dictionaries = vec![Arc::new(test_dictionary)];
let key = CompositeKey::new("まる1", None);
let result = get_all_candidates(&dictionaries, &key);

assert_eq!(result[0].kouho_text, "①"); // 0xE291A0 (U+02460)
assert_eq!(result[1].kouho_text, "❶");
assert_eq!(result[2].kouho_text, "⓵"); // 0xE293B5 (U+024F5)
}
}
1 change: 1 addition & 0 deletions tests/data/dictionaries/maruichi.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
まる1 /①/❶/⓵/
4 changes: 2 additions & 2 deletions tests/libskk_compatibility/numeric_transitions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ use std::sync::Arc;
#[test]
fn numeric_transitions() {
init_test_logger();
let static_dict =
let user_dict =
CskkDictionary::new_user_dict("tests/data/dictionaries/number_jisyo.dat", "UTF-8", false)
.unwrap();
let mut context = test_context_with_dictionaries(vec![Arc::new(static_dict)]);
let mut context = test_context_with_dictionaries(vec![Arc::new(user_dict)]);
transition_check(
&mut context,
CompositionMode::Direct,
Expand Down
22 changes: 22 additions & 0 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1468,3 +1468,25 @@ fn abort() {
}),
);
}

// Issue #252
#[test]
fn maruichi() {
let static_dict =
CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false)
.unwrap();
let mut context = test_context_with_dictionaries(vec![Arc::new(static_dict)]);
transition_test(
&mut context,
CompositionMode::Direct,
InputMode::Hiragana,
"M a r u 1 space Return",
// CompositionMode::Direct,
CompositionMode::Direct,
InputMode::Hiragana,
CskkStateInfo::Direct(DirectData {
confirmed: "①".to_string(),
unconverted: None,
}),
)
}
Loading