Skip to content

Commit

Permalink
docs: improve doc comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Gowee committed Jan 12, 2025
1 parent d4904cf commit c607785
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 50 deletions.
12 changes: 7 additions & 5 deletions pyo3/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ use ::zhconv::{
ZhConverterBuilder,
};

/// Convert a text to a target Chinese variant. Converters are constructed from built-in rulesets
/// on demand and cached automatically. If `wikitext` is `True`, inline conversion rules such as
/// `-{foo...bar}-` are activated, while converters must be rebuilt for every invocation if there
/// are global rules. Check the project's README for more info.
/// Convert text to a target Chinese variant using pre-built converters, which are loaded
/// automatically on demand.
///
/// If `wikitext` is `True`, the MediaWiki conversion syntax such as `-{foo...bar}-` is supported,
/// while performance may degrade. The implementation is not fully compliant with the MediaWiki
/// LanguageConverter system. Check the project's README for more info.
///
/// Supported target variants: zh, zh-Hant, zh-Hans, zh-TW, zh-HK, zh-MO, zh-CN, zh-SG, zh-MY.
#[pyfunction]
Expand All @@ -37,7 +39,7 @@ fn zhconv(py: Python<'_>, text: &str, target: &str, wikitext: Option<bool>) -> P

/// converter(text) -> result
///
/// Convert a text with the previously built converter. It is a callable object that works like a
/// Convert text with the previously built converter. It is a callable object that behaves like a
/// plain function, returned by `make_converter`.
#[pyclass]
struct ZhConverter(Converter);
Expand Down
29 changes: 17 additions & 12 deletions src/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl ZhConverter {
.build()
}

/// Convert a text.
/// Convert text.
#[inline(always)]
pub fn convert(&self, text: &str) -> String {
let mut output = String::with_capacity(text.len());
Expand Down Expand Up @@ -124,7 +124,7 @@ impl ZhConverter {
output.push_str(&text[last..]);
}

/// Convert a text, a long with a secondary converter.
/// Convert text, along with a secondary converter.
///
/// Conversion rules in the secondary converter shadow these existing ones in the original
/// converter.
Expand Down Expand Up @@ -176,7 +176,7 @@ impl ZhConverter {
}
}

/// Convert a text, a long with a secondary conversion table (typically temporary).
/// Convert text, along with a secondary conversion table (typically temporary).
///
/// The worst-case time complexity of the implementation is `O(n*m)` where `n` and `m` are the
/// length of the text and the maximum lengths of sources words in conversion rulesets.
Expand Down Expand Up @@ -612,9 +612,11 @@ impl<'t> ZhConverterBuilder<'t> {
self.conv_actions(page_rules.as_conv_actions())
}

/// Add a set of rules.
/// Add [`ConvAction`]s, which are typically parsed from rules in the MediaWiki syntax.
///
/// These rules take the higher precedence over those specified via `table`.
/// For general usage, check [`conv_pairs`](#method.conv_pairs) which takes
/// `from -> to` pairs.
fn conv_actions<'i>(mut self, conv_actions: impl IntoIterator<Item = &'i ConvAction>) -> Self {
for conv_action in conv_actions {
let pairs = conv_action.as_conv().get_conv_pairs(self.target);
Expand All @@ -629,10 +631,10 @@ impl<'t> ZhConverterBuilder<'t> {
self
}

/// Add [`Conv`]s.
/// Add [`Conv`]s, which are typically parsed from rules in MediaWiki syntax.
///
/// For general cases, check [`add_conv_pair`](#method.add_conv_pair) which takes a plain
/// `from -> to` pair.
/// For general usage, check [`conv_pairs`](#method.conv_pairs) which takes
/// `from -> to` pairs.
pub fn convs(mut self, convs: impl IntoIterator<Item = impl AsRef<Conv>>) -> Self {
for conv in convs.into_iter() {
self.adds.extend(
Expand All @@ -644,7 +646,7 @@ impl<'t> ZhConverterBuilder<'t> {
self
}

/// Mark a conv as removed.
/// Mark [`Conv`]s as removed.
pub fn unconvs(mut self, convs: impl IntoIterator<Item = impl AsRef<Conv>>) -> Self {
for conv in convs.into_iter() {
self.removes.extend(
Expand All @@ -658,7 +660,8 @@ impl<'t> ZhConverterBuilder<'t> {

/// Add `from -> to` conversion pairs.
///
/// It takes the precedence over those specified via `table`. It shares the same precedence level with those specified via `cgroup`.
/// It takes the precedence over those specified via `table`, while shares the same precedence
/// level with those specified via `convs` or `conv_lines`.
pub fn conv_pairs(
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
Expand All @@ -676,7 +679,8 @@ impl<'t> ZhConverterBuilder<'t> {

/// Mark conversion pairs as removed.
///
/// Any rule with the same `from`, whether specified via `add_conv_pair`, `conv_lines` or `table`, is removed.
/// Any rule with the same `from`, whether specified via `conv_pairs`, `conv_lines` or `table`,
/// is removed.
pub fn unconv_pairs(
mut self,
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
Expand All @@ -694,14 +698,15 @@ impl<'t> ZhConverterBuilder<'t> {

/// Mark a single conversion pair as removed.
///
/// Any rule with the same `from`, whether specified via `add_conv_pair`, `conv_lines` or `table`, is removed.
/// Any rule with the same `from`, whether specified via `conv_pairs`, `conv_lines` or `table`,
/// is removed.
pub fn unconv_pair(mut self, from: impl AsRef<str>, to: impl AsRef<str>) -> Self {
self.removes
.insert(from.as_ref().to_owned(), to.as_ref().to_owned());
self
}

/// Add a text of conv lines.
/// Add rules in the MediaWiki conversion syntax line by line.
///
/// e.g.
/// ```text
Expand Down
24 changes: 9 additions & 15 deletions src/converters.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Converters lazily built from built-in [`tables`](crate::tables).
//! Converters built from built-in [`tables`](crate::tables).
//!
//! These converters are lazily loaded from serialized automata built at build-time, and cached for
//! later use.
Expand All @@ -20,39 +20,33 @@ use crate::{tables::*, Variant, ZhConverter, ZhConverterBuilder};
/// Placeholding converter (`zh`/原文). Nothing will be converted with this.
pub static ZH_BLANK_CONVERTER: LazyLock<ZhConverter> =
LazyLock::new(|| ZhConverterBuilder::new().target(Variant::Zh).build());
/// Converter to `zh-Hant` (繁體中文), lazily built from [`ZH_HANT_TABLE`](crate::ZH_HANT_TABLE).
/// Converter to `zh-Hant` (繁體中文), lazily built from [`ZH_HANT_TABLE`].
pub static ZH_TO_HANT_CONVERTER: LazyLock<ZhConverter> =
LazyLock::new(|| deserialize_converter(Variant::ZhHant, ZH_HANT_DAAC, [ZH_HANT_TABLE]));
/// Converter to `zh-Hans` (简体中文), lazily built from [`ZH_HANS_TABLE`](crate::ZH_HANS_TABLE).
/// Converter to `zh-Hans` (简体中文), lazily built from [`ZH_HANS_TABLE`].
pub static ZH_TO_HANS_CONVERTER: LazyLock<ZhConverter> =
LazyLock::new(|| deserialize_converter(Variant::ZhHans, ZH_HANS_DAAC, [ZH_HANS_TABLE]));
/// Converter to `zh-Hant-TW` (臺灣正體), lazily built from [`ZH_HANT_TABLE`](crate::ZH_HANT_TABLE)
/// and [`ZH_TW_TABLE`](crate::ZH_TW_TABLE).
/// Converter to `zh-Hant-TW` (臺灣正體), lazily built from [`ZH_HANT_TABLE`] and [`ZH_TW_TABLE`].
pub static ZH_TO_TW_CONVERTER: LazyLock<ZhConverter> = LazyLock::new(|| {
deserialize_converter(Variant::ZhTW, ZH_HANT_TW_DAAC, [ZH_HANT_TABLE, ZH_TW_TABLE])
});
/// Coonverter to `zh-Hant-HK` (香港繁體), lazily built from [`ZH_HANT_TABLE`](crate::ZH_HANT_TABLE)
/// and [`ZH_HK_TABLE`](crate::ZH_HK_TABLE).
/// Coonverter to `zh-Hant-HK` (香港繁體), lazily built from [`ZH_HANT_TABLE`] and [`ZH_HK_TABLE`].
pub static ZH_TO_HK_CONVERTER: LazyLock<ZhConverter> = LazyLock::new(|| {
deserialize_converter(Variant::ZhHK, ZH_HANT_HK_DAAC, [ZH_HANT_TABLE, ZH_HK_TABLE])
});
/// Converter to `zh-Hant-MO` (澳門繁體), lazily built from [`ZH_HANT_TABLE`](crate::ZH_HANT_TABLE)
/// and [`ZH_MO_TABLE`](crate::ZH_MO_TABLE).
/// Converter to `zh-Hant-MO` (澳門繁體), lazily built from [`ZH_HANT_TABLE`] and [`ZH_MO_TABLE`].
pub static ZH_TO_MO_CONVERTER: LazyLock<ZhConverter> = LazyLock::new(|| {
deserialize_converter(Variant::ZhMO, ZH_HANT_MO_DAAC, [ZH_HANT_TABLE, ZH_MO_TABLE])
});
/// Converter to `zh-Hans-CN` (大陆简体), lazily built from [`ZH_HANS_TABLE`](crate::ZH_HANS_TABLE)
/// and [`ZH_CN_TABLE`](crate::ZH_CN_TABLE).
/// Converter to `zh-Hans-CN` (大陆简体), lazily built from [`ZH_HANS_TABLE`] and [`ZH_CN_TABLE`].
pub static ZH_TO_CN_CONVERTER: LazyLock<ZhConverter> = LazyLock::new(|| {
deserialize_converter(Variant::ZhCN, ZH_HANS_CN_DAAC, [ZH_HANS_TABLE, ZH_CN_TABLE])
});
/// Converter to `zh-Hans-SG` (新加坡简体), lazily built from [`ZH_HANS_TABLE`](crate::ZH_HANS_TABLE)
/// and [`ZH_SG_TABLE`](crate::ZH_SG_TABLE).
/// Converter to `zh-Hans-SG` (新加坡简体), lazily built from [`ZH_HANS_TABLE`] and [`ZH_SG_TABLE`].
pub static ZH_TO_SG_CONVERTER: LazyLock<ZhConverter> = LazyLock::new(|| {
deserialize_converter(Variant::ZhSG, ZH_HANS_SG_DAAC, [ZH_HANS_TABLE, ZH_SG_TABLE])
});
/// Converter to `zh-Hans-MY` (大马简体), lazily built from [`ZH_HANS_TABLE`](crate::ZH_HANS_TABLE)
/// and [`ZH_MY_TABLE`](crate::ZH_MY_TABLE).
/// Converter to `zh-Hans-MY` (大马简体), lazily built from [`ZH_HANS_TABLE`] and [`ZH_MY_TABLE`].
pub static ZH_TO_MY_CONVERTER: LazyLock<ZhConverter> = LazyLock::new(|| {
deserialize_converter(Variant::ZhMY, ZH_HANS_MY_DAAC, [ZH_HANS_TABLE, ZH_MY_TABLE])
});
Expand Down
22 changes: 14 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,18 @@
//! assert_eq!(zhconv("阿拉伯联合酋长国", Variant::ZhTW), "阿拉伯聯合大公國");
//! ```
//!
//! With MediaWiki conversion rules:
//! With MediaWiki conversion syntax:
//! ```
//! use zhconv::{zhconv_mw, Variant};
//! assert_eq!(zhconv_mw("天-{干}-物燥 小心火烛", "zh-Hant".parse::<Variant>().unwrap()), "天干物燥 小心火燭");
//! assert_eq!(zhconv_mw("-{zh-tw:鼠麴草;zh-cn:香茅}-是菊科草本植物。", Variant::ZhCN), "香茅是菊科草本植物。");
//! assert_eq!(zhconv_mw("菊科草本植物包括-{zh-tw:鼠麴草;zh-cn:香茅;}-等。", Variant::ZhTW), "菊科草本植物包括鼠麴草等。");
//! assert_eq!(zhconv_mw("-{H|zh:馬;zh-cn:鹿;}-馬克思主義", Variant::ZhCN), "鹿克思主义"); // global rule
//! assert_eq!(zhconv_mw("&二極體\n-{-|zh-hans:二极管; zh-hant:二極體}-\n", Variant::ZhCN), "&二极体\n\n")
//! ```
//! Set global rules inline (note that such rules always apply globally regardless of their
//! location, unlike in MediaWiki where they affect only the text that follows):
//! ```
//! assert_eq!(zhconv_mw("-{H|zh:馬;zh-cn:鹿;}-馬克思主義", Variant::ZhCN), "鹿克思主义"); // add
//! assert_eq!(zhconv_mw("&二極體\n-{-|zh-hans:二极管; zh-hant:二極體}-\n", Variant::ZhCN), "&二极体\n\n"); // remove
//! ```
//!
//! To load or add additional conversion rules such as CGroups or `(FROM, TO)` pairs,
Expand Down Expand Up @@ -79,7 +83,7 @@ pub use self::variant::Variant;

/// Helper function for general conversion using built-in converters.
///
/// For fine-grained control and custom conversion rules, these is [`ZhConverter`].
/// For fine-grained control and custom conversion rules, there is [`ZhConverter`].
#[inline(always)]
pub fn zhconv(text: &str, target: Variant) -> String {
get_builtin_converter(target).convert(text)
Expand All @@ -88,27 +92,29 @@ pub fn zhconv(text: &str, target: Variant) -> String {
/// Helper function for general conversion, activating conversion rules in MediaWiki syntax.
///
/// For general cases, [`zhconv`](#method.zhconv) should work well. Both of them share the same
/// built-in conversions tables.
/// built-in conversion tables.
///
/// # Note
/// The implementation scans the input text at first to extract possible global rules like
/// `-{H|FOO BAR}-`.
/// If there are no global rules, the overall time complexity is `O(n + n)`.
/// Otherwise, the overall time complexity may degrade to `O(n + n * m)` in the worst case, where
/// `n` is input text length and `m` is the maximum lengths of source words in conversion rulesets.
///
/// In case global rules support are not expected, it is better to use
/// `get_builtin_converter(target).convert_as_wikitext_basic(text)` instead, which incurs no extra
/// overhead.
///
///
// /// Different from the implementation of MediaWiki, this crate use a automaton which makes it
// /// infeasible to mutate global rules during converting. So the function always searches the text
// /// for global rules such as `-{H|FOO BAR}-` in the first pass. If such rules exists, it build a
// /// new converter from the scratch with built-in conversion tables, which **takes extra time**.
// /// Otherwise, it just picks a built-in converter. Then it converts the text with the chosen
// /// converter during when non-global rules are parsed and applied.
///
/// For fine-grained control and custom conversion rules, check [`ZhConverter`].
/// For fine-grained control and custom conversion rules, check [`ZhConverterBuilder`].
///
/// Although it is designed to replicate the behavior of the MediaWiki implementation, it is not
/// fully compliant.
pub fn zhconv_mw(text: &str, target: Variant) -> String {
get_builtin_converter(target).convert_as_wikitext_extended(text)
}
Expand Down
6 changes: 3 additions & 3 deletions src/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ impl FromStr for ConvRule {
}
}

impl<'r> Display for ConvRuleWithVariant<'r> {
impl Display for ConvRuleWithVariant<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let rule = self.rule;
match &rule.output {
Expand All @@ -178,7 +178,7 @@ impl<'r> Display for ConvRuleWithVariant<'r> {
rule.conv
.as_ref()
.and_then(|c| c.get_text_by_target(self.variant))
.unwrap_or("") // mediawiki would show: 在手动语言转换规则中检测到错误
.unwrap_or("") // MediaWiki would show: 在手动语言转换规则中检测到错误
),
Some(Output::VariantName(variant)) => write!(f, "{}", variant.get_name()),
Some(Output::Description) => {
Expand Down Expand Up @@ -393,7 +393,7 @@ impl AsRef<Conv> for ConvAction {

static REGEX_RULE: Lazy<Regex> = Lazy::new(|| Regex::new(r"-\{.+?\}-").unwrap());

/// Extract a set rules from a text.
/// Extract rules in wikitext.
pub fn extract_rules<'s>(
text: &'s str,
) -> Map<Matches<'static, 's>, impl FnMut(Match<'s>) -> Result<ConvRule, RuleError>> {
Expand Down
8 changes: 6 additions & 2 deletions src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ pub(crate) const ZH_HANS_MY_TABLES: [Table; 2] = [ZH_HANS_TABLE, ZH_MY_TABLE];
#[doc(hidden)]
pub(crate) const ZH_HANS_MY_DAAC: &[u8] = ZH_HANS_SG_DAAC;

/// Expand a compressed built-in conversion table.
/// Expand a built-in conversion table.
///
/// Tables are compressed for space efficiency. This behavior is independent of the `compress`
/// feature, which only controls the compression of serialized
/// [automata](https://docs.rs/daachorse).
pub fn expand_table((froms, tos): Table<'_>) -> impl Iterator<Item = (String, String)> + '_ {
std::iter::zip(froms.trim().split('|'), tos.trim().split('|')).scan(
String::from(""),
Expand Down Expand Up @@ -109,7 +113,7 @@ pub fn pair_expand<'s>(
}
}
expanding -= 1;
Some(b.expect("compressed rulesets should be well-formed"))
Some(b.expect("shrunken rulesets should be well-formed"))
})
}

Expand Down
2 changes: 1 addition & 1 deletion src/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub fn get_opencc_commit() -> Option<String> {
option_env!("OPENCC_COMMIT_HASH").map(|s| s.into())
}

/// Convert a text to a target Chinese variant.
/// Convert text to a target Chinese variant.
///
/// Supported target variants: zh, zh-Hant, zh-Hans, zh-TW, zh-HK, zh-MO, zh-CN, zh-SG, zh-MY.
/// If `wikitext` is `True`, inline conversion rules such as `-{foo...bar}-` are parsed.
Expand Down
7 changes: 3 additions & 4 deletions web/src/components/OptionsControl.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ function OptionsControl(
<Tooltip
title={
<>
Parse MediaWiki conversion rules in the text
<br />/ 解析文本中的 MediaWiki 轉換規則
Parse and apply inline rules in the MediaWiki LanguageConverter syntax
<br />/ 解析並應用文本中的 MediaWiki 語言轉換規則
</>
}
>
Expand All @@ -102,8 +102,7 @@ function OptionsControl(
flexDirection="column"
alignItems="center"
>
<span>Inline Rules</span>
<span>文內規則</span>
<span>Wikitext</span>
</Box>
}
/>
Expand Down

0 comments on commit c607785

Please sign in to comment.