diff --git a/README.md b/README.md index 34ff865..7f8dde6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ # zhconv-rs 中文简繁及地區詞轉換 zhconv-rs converts Chinese text among traditional/simplified scripts or regional variants (e.g. `zh-TW <-> zh-CN <-> zh-HK <-> zh-Hans <-> zh-Hant`), built on the top of rulesets from MediaWiki/Wikipedia and OpenCC. -The implementation is powered by an [Aho-Corasick](https://github.com/daac-tools/daachorse) automaton, ensuring linear time complexity with respect to the length of input text and conversion rules (`O(n+m)`), processing dozens of MiBs text per second. +The implementation is powered by the [Aho-Corasick](https://github.com/daac-tools/daachorse) algorithm, ensuring linear time complexity with respect to the length of input text and conversion rules (`O(n+m)`), processing dozens of MiBs text per second. 🔗 **Web App: https://zhconv.pages.dev** (powered by WASM) diff --git a/build.rs b/build.rs index b93fbd8..e347ef7 100644 --- a/build.rs +++ b/build.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; use std::collections::HashSet; -use std::convert::TryInto; use std::env; use std::fs::{self, File}; use std::io; @@ -358,7 +357,6 @@ mod opencc { use daachorse::{ CharwiseDoubleArrayAhoCorasick, CharwiseDoubleArrayAhoCorasickBuilder, MatchKind, }; - use itertools::Itertools; // use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; use lazy_static::lazy_static; use std::collections::HashMap; @@ -429,7 +427,7 @@ mod opencc { s: &str, ) { for line in s.lines().map(|l| l.trim()).filter(|l| !l.is_empty()) { - if let Some((f, ts)) = line.split_once(char::is_whitespace){ + if let Some((f, ts)) = line.split_once(char::is_whitespace) { if f.is_empty() || ts.is_empty() { continue; } @@ -528,5 +526,5 @@ fn read_and_validate_file(path: &str, sha256sum: &[u8; 32]) -> String { fn sha256(text: &str) -> [u8; 32] { let mut hasher = Sha256::new(); hasher.update(text.as_bytes()); - hasher.finalize().try_into().unwrap() + hasher.finalize().into() } diff --git a/src/converter.rs b/src/converter.rs index 678d422..28ccb2b 100644 --- a/src/converter.rs +++ b/src/converter.rs @@ -11,7 +11,7 @@ use crate::{ pagerules::PageRules, rule::{Conv, ConvAction, ConvRule}, tables::expand_table, - utils::{regex, unwrap_or_return}, + utils::regex, variant::Variant, }; @@ -98,10 +98,13 @@ impl ZhConverter { /// Same as `convert`, except that it takes a `&mut String` as dest instead of returning a `String`. pub fn convert_to(&self, text: &str, output: &mut String) { - let automaton = unwrap_or_return!(self.automaton.as_ref().or_else(|| { - output.push_str(text); - None - })); + let automaton = match self.automaton.as_ref() { + Some(automaton) => automaton, + None => { + output.push_str(text); + return; + } + }; // Ref: https://github.dev/rust-lang/regex/blob/5197f21287344d2994f9cf06758a3ea30f5a26c3/src/re_trait.rs#L192 let mut last = 0; @@ -186,10 +189,13 @@ impl ZhConverter { shadowing_target_words: &[String], shadowed_source_words: &HashSet, ) { - let automaton = unwrap_or_return!(self.automaton.as_ref().or_else(|| { - output.push_str(text); - None - })); + let automaton = match self.automaton.as_ref() { + Some(automaton) => automaton, + None => { + output.push_str(text); + return; + } + }; // let mut cnt = HashMap::::new(); let mut last = 0; diff --git a/src/utils.rs b/src/utils.rs index b768aba..2438fca 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -36,19 +36,19 @@ macro_rules! regex { } pub(crate) use regex; -// https://stackoverflow.com/a/51345372/5488616 -macro_rules! unwrap_or_return { - ( $e:expr ) => { - match $e { - Some(x) => x, - None => return, - } - }; - ( $e:expr, $r:expr ) => { - match $e { - Some(x) => x, - None => return $r, - } - }; -} -pub(crate) use unwrap_or_return; +// // https://stackoverflow.com/a/51345372/5488616 +// macro_rules! unwrap_or_return { +// ( $e:expr ) => { +// match $e { +// Some(x) => x, +// None => return, +// } +// }; +// ( $e:expr, $r:expr ) => { +// match $e { +// Some(x) => x, +// None => return $r, +// } +// }; +// } +// pub(crate) use unwrap_or_return;