Skip to content

Commit

Permalink
Merge #257
Browse files Browse the repository at this point in the history
257: ucd/ident: Split into modules and add unit tests r=behnam a=behnam

Resolves GH-254.

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/open-i18n/rust-unic/257)
<!-- Reviewable:end -->


Co-authored-by: Behnam Esfahbod <[email protected]>
  • Loading branch information
bors[bot] and behnam committed Mar 3, 2019
2 parents 46afb5f + bc61e44 commit 2fa4706
Show file tree
Hide file tree
Showing 4 changed files with 683 additions and 90 deletions.
224 changes: 224 additions & 0 deletions unic/ucd/ident/src/id.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
// Copyright 2017-2019 The UNIC Project Developers.
//
// See the COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

char_property! {
/// A character that can start an identifier.
pub struct IdStart(bool) {
abbr => "IDS";
long => "ID_Start";
human => "ID Start";

data_table_path => "../tables/id_start.rsv";
}

/// Is this a identifier starting character?
pub fn is_id_start(char) -> bool;
}

char_property! {
/// A character that can continue an identifier.
pub struct IdContinue(bool) {
abbr => "IDC";
long => "ID_Continue";
human => "ID Continue";

data_table_path => "../tables/id_continue.rsv";
}

/// Is this a identifier continuing character?
pub fn is_id_continue(char) -> bool;
}

#[cfg(test)]
mod tests {
#[test]
fn test_is_id_start() {
use super::is_id_start;

// ASCII
assert_eq!(is_id_start('\u{0000}'), false);
assert_eq!(is_id_start('\u{0020}'), false);
assert_eq!(is_id_start('\u{0021}'), false);

assert_eq!(is_id_start('\u{0027}'), false);
assert_eq!(is_id_start('\u{0028}'), false);
assert_eq!(is_id_start('\u{0029}'), false);
assert_eq!(is_id_start('\u{002a}'), false);

assert_eq!(is_id_start('\u{0030}'), false);
assert_eq!(is_id_start('\u{0039}'), false);
assert_eq!(is_id_start('\u{003a}'), false);
assert_eq!(is_id_start('\u{003b}'), false);
assert_eq!(is_id_start('\u{003c}'), false);
assert_eq!(is_id_start('\u{003d}'), false);

assert_eq!(is_id_start('\u{004a}'), true);
assert_eq!(is_id_start('\u{004b}'), true);
assert_eq!(is_id_start('\u{004c}'), true);
assert_eq!(is_id_start('\u{004d}'), true);
assert_eq!(is_id_start('\u{004e}'), true);

assert_eq!(is_id_start('\u{006a}'), true);
assert_eq!(is_id_start('\u{006b}'), true);
assert_eq!(is_id_start('\u{006c}'), true);
assert_eq!(is_id_start('\u{006d}'), true);
assert_eq!(is_id_start('\u{006e}'), true);

assert_eq!(is_id_start('\u{007a}'), true);
assert_eq!(is_id_start('\u{007b}'), false);
assert_eq!(is_id_start('\u{007c}'), false);
assert_eq!(is_id_start('\u{007d}'), false);
assert_eq!(is_id_start('\u{007e}'), false);

assert_eq!(is_id_start('\u{00c0}'), true);
assert_eq!(is_id_start('\u{00c1}'), true);
assert_eq!(is_id_start('\u{00c2}'), true);
assert_eq!(is_id_start('\u{00c3}'), true);
assert_eq!(is_id_start('\u{00c4}'), true);

// Other BMP
assert_eq!(is_id_start('\u{061b}'), false);
assert_eq!(is_id_start('\u{061c}'), false);
assert_eq!(is_id_start('\u{061d}'), false);

assert_eq!(is_id_start('\u{200d}'), false);
assert_eq!(is_id_start('\u{200e}'), false);
assert_eq!(is_id_start('\u{200f}'), false);
assert_eq!(is_id_start('\u{2010}'), false);

assert_eq!(is_id_start('\u{2029}'), false);
assert_eq!(is_id_start('\u{202a}'), false);
assert_eq!(is_id_start('\u{202e}'), false);
assert_eq!(is_id_start('\u{202f}'), false);

// Other Planes
assert_eq!(is_id_start('\u{10000}'), true);
assert_eq!(is_id_start('\u{10001}'), true);

assert_eq!(is_id_start('\u{20000}'), true);
assert_eq!(is_id_start('\u{30000}'), false);
assert_eq!(is_id_start('\u{40000}'), false);
assert_eq!(is_id_start('\u{50000}'), false);
assert_eq!(is_id_start('\u{60000}'), false);
assert_eq!(is_id_start('\u{70000}'), false);
assert_eq!(is_id_start('\u{80000}'), false);
assert_eq!(is_id_start('\u{90000}'), false);
assert_eq!(is_id_start('\u{a0000}'), false);
assert_eq!(is_id_start('\u{b0000}'), false);
assert_eq!(is_id_start('\u{c0000}'), false);
assert_eq!(is_id_start('\u{d0000}'), false);
assert_eq!(is_id_start('\u{e0000}'), false);

assert_eq!(is_id_start('\u{efffe}'), false);
assert_eq!(is_id_start('\u{effff}'), false);

// Priavte-Use Area
assert_eq!(is_id_start('\u{f0000}'), false);
assert_eq!(is_id_start('\u{f0001}'), false);
assert_eq!(is_id_start('\u{ffffe}'), false);
assert_eq!(is_id_start('\u{fffff}'), false);
assert_eq!(is_id_start('\u{100000}'), false);
assert_eq!(is_id_start('\u{100001}'), false);
assert_eq!(is_id_start('\u{10fffe}'), false);
assert_eq!(is_id_start('\u{10ffff}'), false);
}

#[test]
fn test_is_id_continue() {
use super::is_id_continue;

// ASCII
assert_eq!(is_id_continue('\u{0000}'), false);
assert_eq!(is_id_continue('\u{0020}'), false);
assert_eq!(is_id_continue('\u{0021}'), false);

assert_eq!(is_id_continue('\u{0027}'), false);
assert_eq!(is_id_continue('\u{0028}'), false);
assert_eq!(is_id_continue('\u{0029}'), false);
assert_eq!(is_id_continue('\u{002a}'), false);

assert_eq!(is_id_continue('\u{0030}'), true);
assert_eq!(is_id_continue('\u{0039}'), true);
assert_eq!(is_id_continue('\u{003a}'), false);
assert_eq!(is_id_continue('\u{003b}'), false);
assert_eq!(is_id_continue('\u{003c}'), false);
assert_eq!(is_id_continue('\u{003d}'), false);

assert_eq!(is_id_continue('\u{004a}'), true);
assert_eq!(is_id_continue('\u{004b}'), true);
assert_eq!(is_id_continue('\u{004c}'), true);
assert_eq!(is_id_continue('\u{004d}'), true);
assert_eq!(is_id_continue('\u{004e}'), true);

assert_eq!(is_id_continue('\u{006a}'), true);
assert_eq!(is_id_continue('\u{006b}'), true);
assert_eq!(is_id_continue('\u{006c}'), true);
assert_eq!(is_id_continue('\u{006d}'), true);
assert_eq!(is_id_continue('\u{006e}'), true);

assert_eq!(is_id_continue('\u{007a}'), true);
assert_eq!(is_id_continue('\u{007b}'), false);
assert_eq!(is_id_continue('\u{007c}'), false);
assert_eq!(is_id_continue('\u{007d}'), false);
assert_eq!(is_id_continue('\u{007e}'), false);

assert_eq!(is_id_continue('\u{00c0}'), true);
assert_eq!(is_id_continue('\u{00c1}'), true);
assert_eq!(is_id_continue('\u{00c2}'), true);
assert_eq!(is_id_continue('\u{00c3}'), true);
assert_eq!(is_id_continue('\u{00c4}'), true);

// Other BMP
assert_eq!(is_id_continue('\u{061b}'), false);
assert_eq!(is_id_continue('\u{061c}'), false);
assert_eq!(is_id_continue('\u{061d}'), false);

assert_eq!(is_id_continue('\u{200d}'), false);
assert_eq!(is_id_continue('\u{200e}'), false);
assert_eq!(is_id_continue('\u{200f}'), false);
assert_eq!(is_id_continue('\u{2010}'), false);

assert_eq!(is_id_continue('\u{2029}'), false);
assert_eq!(is_id_continue('\u{202a}'), false);
assert_eq!(is_id_continue('\u{202e}'), false);
assert_eq!(is_id_continue('\u{202f}'), false);

// Other Planes
assert_eq!(is_id_continue('\u{10000}'), true);
assert_eq!(is_id_continue('\u{10001}'), true);

assert_eq!(is_id_continue('\u{20000}'), true);
assert_eq!(is_id_continue('\u{30000}'), false);
assert_eq!(is_id_continue('\u{40000}'), false);
assert_eq!(is_id_continue('\u{50000}'), false);
assert_eq!(is_id_continue('\u{60000}'), false);
assert_eq!(is_id_continue('\u{70000}'), false);
assert_eq!(is_id_continue('\u{80000}'), false);
assert_eq!(is_id_continue('\u{90000}'), false);
assert_eq!(is_id_continue('\u{a0000}'), false);
assert_eq!(is_id_continue('\u{b0000}'), false);
assert_eq!(is_id_continue('\u{c0000}'), false);
assert_eq!(is_id_continue('\u{d0000}'), false);
assert_eq!(is_id_continue('\u{e0000}'), false);

assert_eq!(is_id_continue('\u{efffe}'), false);
assert_eq!(is_id_continue('\u{effff}'), false);

// Priavte-Use Area
assert_eq!(is_id_continue('\u{f0000}'), false);
assert_eq!(is_id_continue('\u{f0001}'), false);
assert_eq!(is_id_continue('\u{ffffe}'), false);
assert_eq!(is_id_continue('\u{fffff}'), false);
assert_eq!(is_id_continue('\u{100000}'), false);
assert_eq!(is_id_continue('\u{100001}'), false);
assert_eq!(is_id_continue('\u{10fffe}'), false);
assert_eq!(is_id_continue('\u{10ffff}'), false);
}
}
101 changes: 11 additions & 90 deletions unic/ucd/ident/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017 The UNIC Project Developers.
// Copyright 2017-2019 The UNIC Project Developers.
//
// See the COPYRIGHT file at the top-level directory of this distribution.
//
Expand All @@ -25,11 +25,13 @@
//!
//! # Features
//!
//! - `xid` (default): the `XID_Start` and `XID_Continue` properties
//! - `id` (optional): the `ID_Start` and `ID_Continue` properties;
//! _in most cases, you should prefer using the `XID` properties
//! because they are consistent under NFKC normalization_
//! - `pattern` (optional): the `Pattern_Syntax` and `Pattern_White_Space` properties
//! - `xid` (default): the `XID_Start` and `XID_Continue` properties.
//!
//! - `id` (optional): the `ID_Start` and `ID_Continue` properties.
//! NOTE: in most cases, you should prefer using the `XID` properties
//! because they are consistent under NFKC normalization.
//!
//! - `pattern` (optional): the `Pattern_Syntax` and `Pattern_White_Space` properties.
//!
//! [UAX31 Unicode Identifier and Pattern Syntax]: <https://www.unicode.org/reports/tr31/>
Expand All @@ -42,98 +44,17 @@ mod pkg_info;
pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION};

#[cfg(feature = "xid")]
mod xid {
char_property! {
/// A character that can start an identifier, stable under NFKC.
pub struct XidStart(bool) {
abbr => "XIDS";
long => "XID_Start";
human => "XID Start";

data_table_path => "../tables/xid_start.rsv";
}

/// Is this a NFKC-safe identifier starting character?
pub fn is_xid_start(char) -> bool;
}
char_property! {
/// A character that can continue an identifier, stable under NFKC.
pub struct XidContinue(bool) {
abbr => "XIDC";
long => "XID_Continue";
human => "XID Continue";

data_table_path => "../tables/xid_continue.rsv";
}

/// Is this a NFKC-safe identifier continuing character?
pub fn is_xid_continue(char) -> bool;
}
}
mod xid;
#[cfg(feature = "xid")]
pub use crate::xid::{is_xid_continue, is_xid_start, XidContinue, XidStart};

#[cfg(feature = "id")]
mod id {
char_property! {
/// A character that can start an identifier.
pub struct IdStart(bool) {
abbr => "IDS";
long => "ID_Start";
human => "ID Start";

data_table_path => "../tables/id_start.rsv";
}

/// Is this a identifier starting character?
pub fn is_id_start(char) -> bool;
}
char_property! {
/// A character that can continue an identifier.
pub struct IdContinue(bool) {
abbr => "IDC";
long => "ID_Continue";
human => "ID Continue";

data_table_path => "../tables/id_continue.rsv";
}

/// Is this a identifier continuing character?
pub fn is_id_continue(char) -> bool;
}
}
mod id;
#[cfg(feature = "id")]
pub use crate::id::{is_id_continue, is_id_start, IdContinue, IdStart};

#[cfg(feature = "pattern")]
mod pattern {
char_property! {
/// A character that should be treated as a syntax character in patterns.
pub struct PatternSyntax(bool) {
abbr => "Pat_Syn";
long => "Pattern_Syntax";
human => "Pattern Syntax";

data_table_path => "../tables/pattern_syntax.rsv";
}

/// Is this a character that should be treated as syntax in patterns?
pub fn is_pattern_syntax(char) -> bool;
}
char_property! {
/// A character that should be treated as a whitespace in patterns.
pub struct PatternWhitespace(bool) {
abbr => "Pat_WS";
long => "Pattern_White_Space";
human => "Pattern Whitespace";

data_table_path => "../tables/pattern_white_space.rsv";
}

/// Is this a character that should be treated as whitespace in patterns?
pub fn is_pattern_whitespace(char) -> bool;
}
}
mod pattern;
#[cfg(feature = "pattern")]
pub use crate::pattern::{
is_pattern_syntax,
Expand Down
Loading

0 comments on commit 2fa4706

Please sign in to comment.