diff --git a/unic/ucd/ident/src/id.rs b/unic/ucd/ident/src/id.rs new file mode 100644 index 00000000..c329ff66 --- /dev/null +++ b/unic/ucd/ident/src/id.rs @@ -0,0 +1,224 @@ +// Copyright 2017-2019 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +char_property! { + /// A character that can start an identifier. + pub struct IdStart(bool) { + abbr => "IDS"; + long => "ID_Start"; + human => "ID Start"; + + data_table_path => "../tables/id_start.rsv"; + } + + /// Is this a identifier starting character? + pub fn is_id_start(char) -> bool; +} + +char_property! { + /// A character that can continue an identifier. + pub struct IdContinue(bool) { + abbr => "IDC"; + long => "ID_Continue"; + human => "ID Continue"; + + data_table_path => "../tables/id_continue.rsv"; + } + + /// Is this a identifier continuing character? + pub fn is_id_continue(char) -> bool; +} + +#[cfg(test)] +mod tests { + #[test] + fn test_is_id_start() { + use super::is_id_start; + + // ASCII + assert_eq!(is_id_start('\u{0000}'), false); + assert_eq!(is_id_start('\u{0020}'), false); + assert_eq!(is_id_start('\u{0021}'), false); + + assert_eq!(is_id_start('\u{0027}'), false); + assert_eq!(is_id_start('\u{0028}'), false); + assert_eq!(is_id_start('\u{0029}'), false); + assert_eq!(is_id_start('\u{002a}'), false); + + assert_eq!(is_id_start('\u{0030}'), false); + assert_eq!(is_id_start('\u{0039}'), false); + assert_eq!(is_id_start('\u{003a}'), false); + assert_eq!(is_id_start('\u{003b}'), false); + assert_eq!(is_id_start('\u{003c}'), false); + assert_eq!(is_id_start('\u{003d}'), false); + + assert_eq!(is_id_start('\u{004a}'), true); + assert_eq!(is_id_start('\u{004b}'), true); + assert_eq!(is_id_start('\u{004c}'), true); + assert_eq!(is_id_start('\u{004d}'), true); + assert_eq!(is_id_start('\u{004e}'), true); + + assert_eq!(is_id_start('\u{006a}'), true); + assert_eq!(is_id_start('\u{006b}'), true); + assert_eq!(is_id_start('\u{006c}'), true); + assert_eq!(is_id_start('\u{006d}'), true); + assert_eq!(is_id_start('\u{006e}'), true); + + assert_eq!(is_id_start('\u{007a}'), true); + assert_eq!(is_id_start('\u{007b}'), false); + assert_eq!(is_id_start('\u{007c}'), false); + assert_eq!(is_id_start('\u{007d}'), false); + assert_eq!(is_id_start('\u{007e}'), false); + + assert_eq!(is_id_start('\u{00c0}'), true); + assert_eq!(is_id_start('\u{00c1}'), true); + assert_eq!(is_id_start('\u{00c2}'), true); + assert_eq!(is_id_start('\u{00c3}'), true); + assert_eq!(is_id_start('\u{00c4}'), true); + + // Other BMP + assert_eq!(is_id_start('\u{061b}'), false); + assert_eq!(is_id_start('\u{061c}'), false); + assert_eq!(is_id_start('\u{061d}'), false); + + assert_eq!(is_id_start('\u{200d}'), false); + assert_eq!(is_id_start('\u{200e}'), false); + assert_eq!(is_id_start('\u{200f}'), false); + assert_eq!(is_id_start('\u{2010}'), false); + + assert_eq!(is_id_start('\u{2029}'), false); + assert_eq!(is_id_start('\u{202a}'), false); + assert_eq!(is_id_start('\u{202e}'), false); + assert_eq!(is_id_start('\u{202f}'), false); + + // Other Planes + assert_eq!(is_id_start('\u{10000}'), true); + assert_eq!(is_id_start('\u{10001}'), true); + + assert_eq!(is_id_start('\u{20000}'), true); + assert_eq!(is_id_start('\u{30000}'), false); + assert_eq!(is_id_start('\u{40000}'), false); + assert_eq!(is_id_start('\u{50000}'), false); + assert_eq!(is_id_start('\u{60000}'), false); + assert_eq!(is_id_start('\u{70000}'), false); + assert_eq!(is_id_start('\u{80000}'), false); + assert_eq!(is_id_start('\u{90000}'), false); + assert_eq!(is_id_start('\u{a0000}'), false); + assert_eq!(is_id_start('\u{b0000}'), false); + assert_eq!(is_id_start('\u{c0000}'), false); + assert_eq!(is_id_start('\u{d0000}'), false); + assert_eq!(is_id_start('\u{e0000}'), false); + + assert_eq!(is_id_start('\u{efffe}'), false); + assert_eq!(is_id_start('\u{effff}'), false); + + // Priavte-Use Area + assert_eq!(is_id_start('\u{f0000}'), false); + assert_eq!(is_id_start('\u{f0001}'), false); + assert_eq!(is_id_start('\u{ffffe}'), false); + assert_eq!(is_id_start('\u{fffff}'), false); + assert_eq!(is_id_start('\u{100000}'), false); + assert_eq!(is_id_start('\u{100001}'), false); + assert_eq!(is_id_start('\u{10fffe}'), false); + assert_eq!(is_id_start('\u{10ffff}'), false); + } + + #[test] + fn test_is_id_continue() { + use super::is_id_continue; + + // ASCII + assert_eq!(is_id_continue('\u{0000}'), false); + assert_eq!(is_id_continue('\u{0020}'), false); + assert_eq!(is_id_continue('\u{0021}'), false); + + assert_eq!(is_id_continue('\u{0027}'), false); + assert_eq!(is_id_continue('\u{0028}'), false); + assert_eq!(is_id_continue('\u{0029}'), false); + assert_eq!(is_id_continue('\u{002a}'), false); + + assert_eq!(is_id_continue('\u{0030}'), true); + assert_eq!(is_id_continue('\u{0039}'), true); + assert_eq!(is_id_continue('\u{003a}'), false); + assert_eq!(is_id_continue('\u{003b}'), false); + assert_eq!(is_id_continue('\u{003c}'), false); + assert_eq!(is_id_continue('\u{003d}'), false); + + assert_eq!(is_id_continue('\u{004a}'), true); + assert_eq!(is_id_continue('\u{004b}'), true); + assert_eq!(is_id_continue('\u{004c}'), true); + assert_eq!(is_id_continue('\u{004d}'), true); + assert_eq!(is_id_continue('\u{004e}'), true); + + assert_eq!(is_id_continue('\u{006a}'), true); + assert_eq!(is_id_continue('\u{006b}'), true); + assert_eq!(is_id_continue('\u{006c}'), true); + assert_eq!(is_id_continue('\u{006d}'), true); + assert_eq!(is_id_continue('\u{006e}'), true); + + assert_eq!(is_id_continue('\u{007a}'), true); + assert_eq!(is_id_continue('\u{007b}'), false); + assert_eq!(is_id_continue('\u{007c}'), false); + assert_eq!(is_id_continue('\u{007d}'), false); + assert_eq!(is_id_continue('\u{007e}'), false); + + assert_eq!(is_id_continue('\u{00c0}'), true); + assert_eq!(is_id_continue('\u{00c1}'), true); + assert_eq!(is_id_continue('\u{00c2}'), true); + assert_eq!(is_id_continue('\u{00c3}'), true); + assert_eq!(is_id_continue('\u{00c4}'), true); + + // Other BMP + assert_eq!(is_id_continue('\u{061b}'), false); + assert_eq!(is_id_continue('\u{061c}'), false); + assert_eq!(is_id_continue('\u{061d}'), false); + + assert_eq!(is_id_continue('\u{200d}'), false); + assert_eq!(is_id_continue('\u{200e}'), false); + assert_eq!(is_id_continue('\u{200f}'), false); + assert_eq!(is_id_continue('\u{2010}'), false); + + assert_eq!(is_id_continue('\u{2029}'), false); + assert_eq!(is_id_continue('\u{202a}'), false); + assert_eq!(is_id_continue('\u{202e}'), false); + assert_eq!(is_id_continue('\u{202f}'), false); + + // Other Planes + assert_eq!(is_id_continue('\u{10000}'), true); + assert_eq!(is_id_continue('\u{10001}'), true); + + assert_eq!(is_id_continue('\u{20000}'), true); + assert_eq!(is_id_continue('\u{30000}'), false); + assert_eq!(is_id_continue('\u{40000}'), false); + assert_eq!(is_id_continue('\u{50000}'), false); + assert_eq!(is_id_continue('\u{60000}'), false); + assert_eq!(is_id_continue('\u{70000}'), false); + assert_eq!(is_id_continue('\u{80000}'), false); + assert_eq!(is_id_continue('\u{90000}'), false); + assert_eq!(is_id_continue('\u{a0000}'), false); + assert_eq!(is_id_continue('\u{b0000}'), false); + assert_eq!(is_id_continue('\u{c0000}'), false); + assert_eq!(is_id_continue('\u{d0000}'), false); + assert_eq!(is_id_continue('\u{e0000}'), false); + + assert_eq!(is_id_continue('\u{efffe}'), false); + assert_eq!(is_id_continue('\u{effff}'), false); + + // Priavte-Use Area + assert_eq!(is_id_continue('\u{f0000}'), false); + assert_eq!(is_id_continue('\u{f0001}'), false); + assert_eq!(is_id_continue('\u{ffffe}'), false); + assert_eq!(is_id_continue('\u{fffff}'), false); + assert_eq!(is_id_continue('\u{100000}'), false); + assert_eq!(is_id_continue('\u{100001}'), false); + assert_eq!(is_id_continue('\u{10fffe}'), false); + assert_eq!(is_id_continue('\u{10ffff}'), false); + } +} diff --git a/unic/ucd/ident/src/lib.rs b/unic/ucd/ident/src/lib.rs index a98ea21b..54d6aa20 100644 --- a/unic/ucd/ident/src/lib.rs +++ b/unic/ucd/ident/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2017 The UNIC Project Developers. +// Copyright 2017-2019 The UNIC Project Developers. // // See the COPYRIGHT file at the top-level directory of this distribution. // @@ -25,11 +25,13 @@ //! //! # Features //! -//! - `xid` (default): the `XID_Start` and `XID_Continue` properties -//! - `id` (optional): the `ID_Start` and `ID_Continue` properties; -//! _in most cases, you should prefer using the `XID` properties -//! because they are consistent under NFKC normalization_ -//! - `pattern` (optional): the `Pattern_Syntax` and `Pattern_White_Space` properties +//! - `xid` (default): the `XID_Start` and `XID_Continue` properties. +//! +//! - `id` (optional): the `ID_Start` and `ID_Continue` properties. +//! NOTE: in most cases, you should prefer using the `XID` properties +//! because they are consistent under NFKC normalization. +//! +//! - `pattern` (optional): the `Pattern_Syntax` and `Pattern_White_Space` properties. //! //! [UAX31 Unicode Identifier and Pattern Syntax]: @@ -42,98 +44,17 @@ mod pkg_info; pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION}; #[cfg(feature = "xid")] -mod xid { - char_property! { - /// A character that can start an identifier, stable under NFKC. - pub struct XidStart(bool) { - abbr => "XIDS"; - long => "XID_Start"; - human => "XID Start"; - - data_table_path => "../tables/xid_start.rsv"; - } - - /// Is this a NFKC-safe identifier starting character? - pub fn is_xid_start(char) -> bool; - } - char_property! { - /// A character that can continue an identifier, stable under NFKC. - pub struct XidContinue(bool) { - abbr => "XIDC"; - long => "XID_Continue"; - human => "XID Continue"; - - data_table_path => "../tables/xid_continue.rsv"; - } - - /// Is this a NFKC-safe identifier continuing character? - pub fn is_xid_continue(char) -> bool; - } -} +mod xid; #[cfg(feature = "xid")] pub use crate::xid::{is_xid_continue, is_xid_start, XidContinue, XidStart}; #[cfg(feature = "id")] -mod id { - char_property! { - /// A character that can start an identifier. - pub struct IdStart(bool) { - abbr => "IDS"; - long => "ID_Start"; - human => "ID Start"; - - data_table_path => "../tables/id_start.rsv"; - } - - /// Is this a identifier starting character? - pub fn is_id_start(char) -> bool; - } - char_property! { - /// A character that can continue an identifier. - pub struct IdContinue(bool) { - abbr => "IDC"; - long => "ID_Continue"; - human => "ID Continue"; - - data_table_path => "../tables/id_continue.rsv"; - } - - /// Is this a identifier continuing character? - pub fn is_id_continue(char) -> bool; - } -} +mod id; #[cfg(feature = "id")] pub use crate::id::{is_id_continue, is_id_start, IdContinue, IdStart}; #[cfg(feature = "pattern")] -mod pattern { - char_property! { - /// A character that should be treated as a syntax character in patterns. - pub struct PatternSyntax(bool) { - abbr => "Pat_Syn"; - long => "Pattern_Syntax"; - human => "Pattern Syntax"; - - data_table_path => "../tables/pattern_syntax.rsv"; - } - - /// Is this a character that should be treated as syntax in patterns? - pub fn is_pattern_syntax(char) -> bool; - } - char_property! { - /// A character that should be treated as a whitespace in patterns. - pub struct PatternWhitespace(bool) { - abbr => "Pat_WS"; - long => "Pattern_White_Space"; - human => "Pattern Whitespace"; - - data_table_path => "../tables/pattern_white_space.rsv"; - } - - /// Is this a character that should be treated as whitespace in patterns? - pub fn is_pattern_whitespace(char) -> bool; - } -} +mod pattern; #[cfg(feature = "pattern")] pub use crate::pattern::{ is_pattern_syntax, diff --git a/unic/ucd/ident/src/pattern.rs b/unic/ucd/ident/src/pattern.rs new file mode 100644 index 00000000..5a6af026 --- /dev/null +++ b/unic/ucd/ident/src/pattern.rs @@ -0,0 +1,224 @@ +// Copyright 2017-2019 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +char_property! { + /// A character that should be treated as a syntax character in patterns. + pub struct PatternSyntax(bool) { + abbr => "Pat_Syn"; + long => "Pattern_Syntax"; + human => "Pattern Syntax"; + + data_table_path => "../tables/pattern_syntax.rsv"; + } + + /// Is this a character that should be treated as syntax in patterns? + pub fn is_pattern_syntax(char) -> bool; +} + +char_property! { + /// A character that should be treated as a whitespace in patterns. + pub struct PatternWhitespace(bool) { + abbr => "Pat_WS"; + long => "Pattern_White_Space"; + human => "Pattern Whitespace"; + + data_table_path => "../tables/pattern_white_space.rsv"; + } + + /// Is this a character that should be treated as whitespace in patterns? + pub fn is_pattern_whitespace(char) -> bool; +} + +#[cfg(test)] +mod tests { + #[test] + fn test_is_pattern_syntax() { + use super::is_pattern_syntax; + + // ASCII + assert_eq!(is_pattern_syntax('\u{0000}'), false); + assert_eq!(is_pattern_syntax('\u{0020}'), false); + assert_eq!(is_pattern_syntax('\u{0021}'), true); + + assert_eq!(is_pattern_syntax('\u{0027}'), true); + assert_eq!(is_pattern_syntax('\u{0028}'), true); + assert_eq!(is_pattern_syntax('\u{0029}'), true); + assert_eq!(is_pattern_syntax('\u{002a}'), true); + + assert_eq!(is_pattern_syntax('\u{0030}'), false); + assert_eq!(is_pattern_syntax('\u{0039}'), false); + assert_eq!(is_pattern_syntax('\u{003a}'), true); + assert_eq!(is_pattern_syntax('\u{003b}'), true); + assert_eq!(is_pattern_syntax('\u{003c}'), true); + assert_eq!(is_pattern_syntax('\u{003d}'), true); + + assert_eq!(is_pattern_syntax('\u{004a}'), false); + assert_eq!(is_pattern_syntax('\u{004b}'), false); + assert_eq!(is_pattern_syntax('\u{004c}'), false); + assert_eq!(is_pattern_syntax('\u{004d}'), false); + assert_eq!(is_pattern_syntax('\u{004e}'), false); + + assert_eq!(is_pattern_syntax('\u{006a}'), false); + assert_eq!(is_pattern_syntax('\u{006b}'), false); + assert_eq!(is_pattern_syntax('\u{006c}'), false); + assert_eq!(is_pattern_syntax('\u{006d}'), false); + assert_eq!(is_pattern_syntax('\u{006e}'), false); + + assert_eq!(is_pattern_syntax('\u{007a}'), false); + assert_eq!(is_pattern_syntax('\u{007b}'), true); + assert_eq!(is_pattern_syntax('\u{007c}'), true); + assert_eq!(is_pattern_syntax('\u{007d}'), true); + assert_eq!(is_pattern_syntax('\u{007e}'), true); + + assert_eq!(is_pattern_syntax('\u{00c0}'), false); + assert_eq!(is_pattern_syntax('\u{00c1}'), false); + assert_eq!(is_pattern_syntax('\u{00c2}'), false); + assert_eq!(is_pattern_syntax('\u{00c3}'), false); + assert_eq!(is_pattern_syntax('\u{00c4}'), false); + + // Other BMP + assert_eq!(is_pattern_syntax('\u{061b}'), false); + assert_eq!(is_pattern_syntax('\u{061c}'), false); + assert_eq!(is_pattern_syntax('\u{061d}'), false); + + assert_eq!(is_pattern_syntax('\u{200d}'), false); + assert_eq!(is_pattern_syntax('\u{200e}'), false); + assert_eq!(is_pattern_syntax('\u{200f}'), false); + assert_eq!(is_pattern_syntax('\u{2010}'), true); + + assert_eq!(is_pattern_syntax('\u{2029}'), false); + assert_eq!(is_pattern_syntax('\u{202a}'), false); + assert_eq!(is_pattern_syntax('\u{202e}'), false); + assert_eq!(is_pattern_syntax('\u{202f}'), false); + + // Other Planes + assert_eq!(is_pattern_syntax('\u{10000}'), false); + assert_eq!(is_pattern_syntax('\u{10001}'), false); + + assert_eq!(is_pattern_syntax('\u{20000}'), false); + assert_eq!(is_pattern_syntax('\u{30000}'), false); + assert_eq!(is_pattern_syntax('\u{40000}'), false); + assert_eq!(is_pattern_syntax('\u{50000}'), false); + assert_eq!(is_pattern_syntax('\u{60000}'), false); + assert_eq!(is_pattern_syntax('\u{70000}'), false); + assert_eq!(is_pattern_syntax('\u{80000}'), false); + assert_eq!(is_pattern_syntax('\u{90000}'), false); + assert_eq!(is_pattern_syntax('\u{a0000}'), false); + assert_eq!(is_pattern_syntax('\u{b0000}'), false); + assert_eq!(is_pattern_syntax('\u{c0000}'), false); + assert_eq!(is_pattern_syntax('\u{d0000}'), false); + assert_eq!(is_pattern_syntax('\u{e0000}'), false); + + assert_eq!(is_pattern_syntax('\u{efffe}'), false); + assert_eq!(is_pattern_syntax('\u{effff}'), false); + + // Priavte-Use Area + assert_eq!(is_pattern_syntax('\u{f0000}'), false); + assert_eq!(is_pattern_syntax('\u{f0001}'), false); + assert_eq!(is_pattern_syntax('\u{ffffe}'), false); + assert_eq!(is_pattern_syntax('\u{fffff}'), false); + assert_eq!(is_pattern_syntax('\u{100000}'), false); + assert_eq!(is_pattern_syntax('\u{100001}'), false); + assert_eq!(is_pattern_syntax('\u{10fffe}'), false); + assert_eq!(is_pattern_syntax('\u{10ffff}'), false); + } + + #[test] + fn test_is_pattern_whitespace() { + use super::is_pattern_whitespace; + + // ASCII + assert_eq!(is_pattern_whitespace('\u{0000}'), false); + assert_eq!(is_pattern_whitespace('\u{0020}'), true); + assert_eq!(is_pattern_whitespace('\u{0021}'), false); + + assert_eq!(is_pattern_whitespace('\u{0027}'), false); + assert_eq!(is_pattern_whitespace('\u{0028}'), false); + assert_eq!(is_pattern_whitespace('\u{0029}'), false); + assert_eq!(is_pattern_whitespace('\u{002a}'), false); + + assert_eq!(is_pattern_whitespace('\u{0030}'), false); + assert_eq!(is_pattern_whitespace('\u{0039}'), false); + assert_eq!(is_pattern_whitespace('\u{003a}'), false); + assert_eq!(is_pattern_whitespace('\u{003b}'), false); + assert_eq!(is_pattern_whitespace('\u{003c}'), false); + assert_eq!(is_pattern_whitespace('\u{003d}'), false); + + assert_eq!(is_pattern_whitespace('\u{004a}'), false); + assert_eq!(is_pattern_whitespace('\u{004b}'), false); + assert_eq!(is_pattern_whitespace('\u{004c}'), false); + assert_eq!(is_pattern_whitespace('\u{004d}'), false); + assert_eq!(is_pattern_whitespace('\u{004e}'), false); + + assert_eq!(is_pattern_whitespace('\u{006a}'), false); + assert_eq!(is_pattern_whitespace('\u{006b}'), false); + assert_eq!(is_pattern_whitespace('\u{006c}'), false); + assert_eq!(is_pattern_whitespace('\u{006d}'), false); + assert_eq!(is_pattern_whitespace('\u{006e}'), false); + + assert_eq!(is_pattern_whitespace('\u{007a}'), false); + assert_eq!(is_pattern_whitespace('\u{007b}'), false); + assert_eq!(is_pattern_whitespace('\u{007c}'), false); + assert_eq!(is_pattern_whitespace('\u{007d}'), false); + assert_eq!(is_pattern_whitespace('\u{007e}'), false); + + assert_eq!(is_pattern_whitespace('\u{00c0}'), false); + assert_eq!(is_pattern_whitespace('\u{00c1}'), false); + assert_eq!(is_pattern_whitespace('\u{00c2}'), false); + assert_eq!(is_pattern_whitespace('\u{00c3}'), false); + assert_eq!(is_pattern_whitespace('\u{00c4}'), false); + + // Other BMP + assert_eq!(is_pattern_whitespace('\u{061b}'), false); + assert_eq!(is_pattern_whitespace('\u{061c}'), false); + assert_eq!(is_pattern_whitespace('\u{061d}'), false); + + assert_eq!(is_pattern_whitespace('\u{200d}'), false); + assert_eq!(is_pattern_whitespace('\u{200e}'), true); + assert_eq!(is_pattern_whitespace('\u{200f}'), true); + assert_eq!(is_pattern_whitespace('\u{2010}'), false); + + assert_eq!(is_pattern_whitespace('\u{2029}'), true); + assert_eq!(is_pattern_whitespace('\u{202a}'), false); + assert_eq!(is_pattern_whitespace('\u{202e}'), false); + assert_eq!(is_pattern_whitespace('\u{202f}'), false); + + // Other Planes + assert_eq!(is_pattern_whitespace('\u{10000}'), false); + assert_eq!(is_pattern_whitespace('\u{10001}'), false); + + assert_eq!(is_pattern_whitespace('\u{20000}'), false); + assert_eq!(is_pattern_whitespace('\u{30000}'), false); + assert_eq!(is_pattern_whitespace('\u{40000}'), false); + assert_eq!(is_pattern_whitespace('\u{50000}'), false); + assert_eq!(is_pattern_whitespace('\u{60000}'), false); + assert_eq!(is_pattern_whitespace('\u{70000}'), false); + assert_eq!(is_pattern_whitespace('\u{80000}'), false); + assert_eq!(is_pattern_whitespace('\u{90000}'), false); + assert_eq!(is_pattern_whitespace('\u{a0000}'), false); + assert_eq!(is_pattern_whitespace('\u{b0000}'), false); + assert_eq!(is_pattern_whitespace('\u{c0000}'), false); + assert_eq!(is_pattern_whitespace('\u{d0000}'), false); + assert_eq!(is_pattern_whitespace('\u{e0000}'), false); + + assert_eq!(is_pattern_whitespace('\u{efffe}'), false); + assert_eq!(is_pattern_whitespace('\u{effff}'), false); + + // Priavte-Use Area + assert_eq!(is_pattern_whitespace('\u{f0000}'), false); + assert_eq!(is_pattern_whitespace('\u{f0001}'), false); + assert_eq!(is_pattern_whitespace('\u{ffffe}'), false); + assert_eq!(is_pattern_whitespace('\u{fffff}'), false); + assert_eq!(is_pattern_whitespace('\u{100000}'), false); + assert_eq!(is_pattern_whitespace('\u{100001}'), false); + assert_eq!(is_pattern_whitespace('\u{10fffe}'), false); + assert_eq!(is_pattern_whitespace('\u{10ffff}'), false); + } +} diff --git a/unic/ucd/ident/src/xid.rs b/unic/ucd/ident/src/xid.rs new file mode 100644 index 00000000..a17b5d86 --- /dev/null +++ b/unic/ucd/ident/src/xid.rs @@ -0,0 +1,224 @@ +// Copyright 2017-2019 The UNIC Project Developers. +// +// See the COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +char_property! { + /// A character that can start an identifier, stable under NFKC. + pub struct XidStart(bool) { + abbr => "XIDS"; + long => "XID_Start"; + human => "XID Start"; + + data_table_path => "../tables/xid_start.rsv"; + } + + /// Is this a NFKC-safe identifier starting character? + pub fn is_xid_start(char) -> bool; +} + +char_property! { + /// A character that can continue an identifier, stable under NFKC. + pub struct XidContinue(bool) { + abbr => "XIDC"; + long => "XID_Continue"; + human => "XID Continue"; + + data_table_path => "../tables/xid_continue.rsv"; + } + + /// Is this a NFKC-safe identifier continuing character? + pub fn is_xid_continue(char) -> bool; +} + +#[cfg(test)] +mod tests { + #[test] + fn test_is_xid_start() { + use super::is_xid_start; + + // ASCII + assert_eq!(is_xid_start('\u{0000}'), false); + assert_eq!(is_xid_start('\u{0020}'), false); + assert_eq!(is_xid_start('\u{0021}'), false); + + assert_eq!(is_xid_start('\u{0027}'), false); + assert_eq!(is_xid_start('\u{0028}'), false); + assert_eq!(is_xid_start('\u{0029}'), false); + assert_eq!(is_xid_start('\u{002a}'), false); + + assert_eq!(is_xid_start('\u{0030}'), false); + assert_eq!(is_xid_start('\u{0039}'), false); + assert_eq!(is_xid_start('\u{003a}'), false); + assert_eq!(is_xid_start('\u{003b}'), false); + assert_eq!(is_xid_start('\u{003c}'), false); + assert_eq!(is_xid_start('\u{003d}'), false); + + assert_eq!(is_xid_start('\u{004a}'), true); + assert_eq!(is_xid_start('\u{004b}'), true); + assert_eq!(is_xid_start('\u{004c}'), true); + assert_eq!(is_xid_start('\u{004d}'), true); + assert_eq!(is_xid_start('\u{004e}'), true); + + assert_eq!(is_xid_start('\u{006a}'), true); + assert_eq!(is_xid_start('\u{006b}'), true); + assert_eq!(is_xid_start('\u{006c}'), true); + assert_eq!(is_xid_start('\u{006d}'), true); + assert_eq!(is_xid_start('\u{006e}'), true); + + assert_eq!(is_xid_start('\u{007a}'), true); + assert_eq!(is_xid_start('\u{007b}'), false); + assert_eq!(is_xid_start('\u{007c}'), false); + assert_eq!(is_xid_start('\u{007d}'), false); + assert_eq!(is_xid_start('\u{007e}'), false); + + assert_eq!(is_xid_start('\u{00c0}'), true); + assert_eq!(is_xid_start('\u{00c1}'), true); + assert_eq!(is_xid_start('\u{00c2}'), true); + assert_eq!(is_xid_start('\u{00c3}'), true); + assert_eq!(is_xid_start('\u{00c4}'), true); + + // Other BMP + assert_eq!(is_xid_start('\u{061b}'), false); + assert_eq!(is_xid_start('\u{061c}'), false); + assert_eq!(is_xid_start('\u{061d}'), false); + + assert_eq!(is_xid_start('\u{200d}'), false); + assert_eq!(is_xid_start('\u{200e}'), false); + assert_eq!(is_xid_start('\u{200f}'), false); + assert_eq!(is_xid_start('\u{2010}'), false); + + assert_eq!(is_xid_start('\u{2029}'), false); + assert_eq!(is_xid_start('\u{202a}'), false); + assert_eq!(is_xid_start('\u{202e}'), false); + assert_eq!(is_xid_start('\u{202f}'), false); + + // Other Planes + assert_eq!(is_xid_start('\u{10000}'), true); + assert_eq!(is_xid_start('\u{10001}'), true); + + assert_eq!(is_xid_start('\u{20000}'), true); + assert_eq!(is_xid_start('\u{30000}'), false); + assert_eq!(is_xid_start('\u{40000}'), false); + assert_eq!(is_xid_start('\u{50000}'), false); + assert_eq!(is_xid_start('\u{60000}'), false); + assert_eq!(is_xid_start('\u{70000}'), false); + assert_eq!(is_xid_start('\u{80000}'), false); + assert_eq!(is_xid_start('\u{90000}'), false); + assert_eq!(is_xid_start('\u{a0000}'), false); + assert_eq!(is_xid_start('\u{b0000}'), false); + assert_eq!(is_xid_start('\u{c0000}'), false); + assert_eq!(is_xid_start('\u{d0000}'), false); + assert_eq!(is_xid_start('\u{e0000}'), false); + + assert_eq!(is_xid_start('\u{efffe}'), false); + assert_eq!(is_xid_start('\u{effff}'), false); + + // Priavte-Use Area + assert_eq!(is_xid_start('\u{f0000}'), false); + assert_eq!(is_xid_start('\u{f0001}'), false); + assert_eq!(is_xid_start('\u{ffffe}'), false); + assert_eq!(is_xid_start('\u{fffff}'), false); + assert_eq!(is_xid_start('\u{100000}'), false); + assert_eq!(is_xid_start('\u{100001}'), false); + assert_eq!(is_xid_start('\u{10fffe}'), false); + assert_eq!(is_xid_start('\u{10ffff}'), false); + } + + #[test] + fn test_is_xid_continue() { + use super::is_xid_continue; + + // ASCII + assert_eq!(is_xid_continue('\u{0000}'), false); + assert_eq!(is_xid_continue('\u{0020}'), false); + assert_eq!(is_xid_continue('\u{0021}'), false); + + assert_eq!(is_xid_continue('\u{0027}'), false); + assert_eq!(is_xid_continue('\u{0028}'), false); + assert_eq!(is_xid_continue('\u{0029}'), false); + assert_eq!(is_xid_continue('\u{002a}'), false); + + assert_eq!(is_xid_continue('\u{0030}'), true); + assert_eq!(is_xid_continue('\u{0039}'), true); + assert_eq!(is_xid_continue('\u{003a}'), false); + assert_eq!(is_xid_continue('\u{003b}'), false); + assert_eq!(is_xid_continue('\u{003c}'), false); + assert_eq!(is_xid_continue('\u{003d}'), false); + + assert_eq!(is_xid_continue('\u{004a}'), true); + assert_eq!(is_xid_continue('\u{004b}'), true); + assert_eq!(is_xid_continue('\u{004c}'), true); + assert_eq!(is_xid_continue('\u{004d}'), true); + assert_eq!(is_xid_continue('\u{004e}'), true); + + assert_eq!(is_xid_continue('\u{006a}'), true); + assert_eq!(is_xid_continue('\u{006b}'), true); + assert_eq!(is_xid_continue('\u{006c}'), true); + assert_eq!(is_xid_continue('\u{006d}'), true); + assert_eq!(is_xid_continue('\u{006e}'), true); + + assert_eq!(is_xid_continue('\u{007a}'), true); + assert_eq!(is_xid_continue('\u{007b}'), false); + assert_eq!(is_xid_continue('\u{007c}'), false); + assert_eq!(is_xid_continue('\u{007d}'), false); + assert_eq!(is_xid_continue('\u{007e}'), false); + + assert_eq!(is_xid_continue('\u{00c0}'), true); + assert_eq!(is_xid_continue('\u{00c1}'), true); + assert_eq!(is_xid_continue('\u{00c2}'), true); + assert_eq!(is_xid_continue('\u{00c3}'), true); + assert_eq!(is_xid_continue('\u{00c4}'), true); + + // Other BMP + assert_eq!(is_xid_continue('\u{061b}'), false); + assert_eq!(is_xid_continue('\u{061c}'), false); + assert_eq!(is_xid_continue('\u{061d}'), false); + + assert_eq!(is_xid_continue('\u{200d}'), false); + assert_eq!(is_xid_continue('\u{200e}'), false); + assert_eq!(is_xid_continue('\u{200f}'), false); + assert_eq!(is_xid_continue('\u{2010}'), false); + + assert_eq!(is_xid_continue('\u{2029}'), false); + assert_eq!(is_xid_continue('\u{202a}'), false); + assert_eq!(is_xid_continue('\u{202e}'), false); + assert_eq!(is_xid_continue('\u{202f}'), false); + + // Other Planes + assert_eq!(is_xid_continue('\u{10000}'), true); + assert_eq!(is_xid_continue('\u{10001}'), true); + + assert_eq!(is_xid_continue('\u{20000}'), true); + assert_eq!(is_xid_continue('\u{30000}'), false); + assert_eq!(is_xid_continue('\u{40000}'), false); + assert_eq!(is_xid_continue('\u{50000}'), false); + assert_eq!(is_xid_continue('\u{60000}'), false); + assert_eq!(is_xid_continue('\u{70000}'), false); + assert_eq!(is_xid_continue('\u{80000}'), false); + assert_eq!(is_xid_continue('\u{90000}'), false); + assert_eq!(is_xid_continue('\u{a0000}'), false); + assert_eq!(is_xid_continue('\u{b0000}'), false); + assert_eq!(is_xid_continue('\u{c0000}'), false); + assert_eq!(is_xid_continue('\u{d0000}'), false); + assert_eq!(is_xid_continue('\u{e0000}'), false); + + assert_eq!(is_xid_continue('\u{efffe}'), false); + assert_eq!(is_xid_continue('\u{effff}'), false); + + // Priavte-Use Area + assert_eq!(is_xid_continue('\u{f0000}'), false); + assert_eq!(is_xid_continue('\u{f0001}'), false); + assert_eq!(is_xid_continue('\u{ffffe}'), false); + assert_eq!(is_xid_continue('\u{fffff}'), false); + assert_eq!(is_xid_continue('\u{100000}'), false); + assert_eq!(is_xid_continue('\u{100001}'), false); + assert_eq!(is_xid_continue('\u{10fffe}'), false); + assert_eq!(is_xid_continue('\u{10ffff}'), false); + } +}