Skip to content

Commit

Permalink
Add parsing for DerivedNormalizationProps.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
raskad committed Jan 7, 2024
1 parent e19fcf8 commit ab5333c
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
64 changes: 64 additions & 0 deletions ucd-parse/src/derived_normalization_properties.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use std::path::Path;

use crate::{
common::{
parse_codepoint_association, CodepointIter, Codepoints, UcdFile,
UcdFileByCodepoint,
},
error::Error,
};

/// A single row in the `DerivedNormalizationProps.txt` file.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct DerivedNormalizationProperty {
/// The codepoint or codepoint range for this entry.
pub codepoints: Codepoints,
/// The property name assigned to the codepoints in this entry.
pub property: String,
}

impl UcdFile for DerivedNormalizationProperty {
fn relative_file_path() -> &'static Path {
Path::new("DerivedNormalizationProps.txt")
}
}

impl UcdFileByCodepoint for DerivedNormalizationProperty {
fn codepoints(&self) -> CodepointIter {
self.codepoints.into_iter()
}
}

impl std::str::FromStr for DerivedNormalizationProperty {
type Err = Error;

fn from_str(line: &str) -> Result<DerivedNormalizationProperty, Error> {
let (codepoints, property) = parse_codepoint_association(line)?;
Ok(DerivedNormalizationProperty {
codepoints,
property: property.to_string(),
})
}
}

#[cfg(test)]
mod tests {
use super::DerivedNormalizationProperty;

#[test]
fn parse_single() {
let line =
"00A0 ; Changes_When_NFKC_Casefolded # Zs NO-BREAK SPACE\n";
let row: DerivedNormalizationProperty = line.parse().unwrap();
assert_eq!(row.codepoints, 0xA0);
assert_eq!(row.property, "Changes_When_NFKC_Casefolded");
}

#[test]
fn parse_range() {
let line = "0041..005A ; Changes_When_NFKC_Casefolded # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z\n";
let row: DerivedNormalizationProperty = line.parse().unwrap();
assert_eq!(row.codepoints, (0x41, 0x5A));
assert_eq!(row.property, "Changes_When_NFKC_Casefolded");
}
}
2 changes: 2 additions & 0 deletions ucd-parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub use crate::{
bidi_mirroring_glyph::BidiMirroring,
case_folding::{CaseFold, CaseStatus},
core_properties::CoreProperty,
derived_normalization_properties::DerivedNormalizationProperty,
east_asian_width::EastAsianWidth,
emoji_properties::EmojiProperty,
grapheme_cluster_break::{GraphemeClusterBreak, GraphemeClusterBreakTest},
Expand Down Expand Up @@ -65,6 +66,7 @@ mod arabic_shaping;
mod bidi_mirroring_glyph;
mod case_folding;
mod core_properties;
mod derived_normalization_properties;
mod east_asian_width;
mod emoji_properties;
mod grapheme_cluster_break;
Expand Down

0 comments on commit ab5333c

Please sign in to comment.