diff --git a/README.md b/README.md index b89b9ba..eee89fd 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Oniguruma-To-ES +# Oniguruma-To-ES (鬼車➜ES) [![npm version][npm-version-src]][npm-version-href] [![npm downloads][npm-downloads-src]][npm-downloads-href] diff --git a/src/unicode.js b/src/unicode.js index 3b266c4..410adc5 100644 --- a/src/unicode.js +++ b/src/unicode.js @@ -31,6 +31,11 @@ function getIgnoreCaseMatchChars(char) { return [...set]; } +// The following set includes: +// - All ES2024 general categories and their aliases (all are supported by Oniguruma). See +// +// - All ES2024 binary properties and their aliases (all are supported by Oniguruma). See +// // Unicode properties must be mapped to property names supported by JS, and must also apply JS's // stricter rules for casing, whitespace, and underscores in Unicode property names. In order to // remain lightweight, this library assumes properties not in this list are Unicode script names @@ -39,104 +44,99 @@ function getIgnoreCaseMatchChars(char) { // an `In_` prefix). See also: // - Properties supported in Oniguruma: // - Properties supported in JS by spec version: -const JsUnicodeProperties = new Set([ - // ES2024 general categories and their aliases; all are supported by Oniguruma - // See - 'C', 'Other', - 'Cc', 'Control', 'cntrl', - 'Cf', 'Format', - 'Cn', 'Unassigned', - 'Co', 'Private_Use', - 'Cs', 'Surrogate', - 'L', 'Letter', - 'LC', 'Cased_Letter', - 'Ll', 'Lowercase_Letter', - 'Lm', 'Modifier_Letter', - 'Lo', 'Other_Letter', - 'Lt', 'Titlecase_Letter', - 'Lu', 'Uppercase_Letter', - 'M', 'Mark', 'Combining_Mark', - 'Mc', 'Spacing_Mark', - 'Me', 'Enclosing_Mark', - 'Mn', 'Nonspacing_Mark', - 'N', 'Number', - 'Nd', 'Decimal_Number', 'digit', - 'Nl', 'Letter_Number', - 'No', 'Other_Number', - 'P', 'Punctuation', 'punct', - 'Pc', 'Connector_Punctuation', - 'Pd', 'Dash_Punctuation', - 'Pe', 'Close_Punctuation', - 'Pf', 'Final_Punctuation', - 'Pi', 'Initial_Punctuation', - 'Po', 'Other_Punctuation', - 'Ps', 'Open_Punctuation', - 'S', 'Symbol', - 'Sc', 'Currency_Symbol', - 'Sk', 'Modifier_Symbol', - 'Sm', 'Math_Symbol', - 'So', 'Other_Symbol', - 'Z', 'Separator', - 'Zl', 'Line_Separator', - 'Zp', 'Paragraph_Separator', - 'Zs', 'Space_Separator', - - // ES2024 binary properties and their aliases; all are supported by Oniguruma - // See - 'ASCII', - 'ASCII_Hex_Digit', 'AHex', - 'Alphabetic', 'Alpha', - 'Any', - 'Assigned', - 'Bidi_Control', 'Bidi_C', - 'Bidi_Mirrored', 'Bidi_M', - 'Case_Ignorable', 'CI', - 'Cased', - 'Changes_When_Casefolded', 'CWCF', - 'Changes_When_Casemapped', 'CWCM', - 'Changes_When_Lowercased', 'CWL', - 'Changes_When_NFKC_Casefolded', 'CWKCF', - 'Changes_When_Titlecased', 'CWT', - 'Changes_When_Uppercased', 'CWU', - 'Dash', - 'Default_Ignorable_Code_Point', 'DI', - 'Deprecated', 'Dep', - 'Diacritic', 'Dia', - 'Emoji', - 'Emoji_Component', 'EComp', - 'Emoji_Modifier', 'EMod', - 'Emoji_Modifier_Base', 'EBase', - 'Emoji_Presentation', 'EPres', - 'Extended_Pictographic', 'ExtPict', - 'Extender', 'Ext', - 'Grapheme_Base', 'Gr_Base', - 'Grapheme_Extend', 'Gr_Ext', - 'Hex_Digit', 'Hex', - 'IDS_Binary_Operator', 'IDSB', - 'IDS_Trinary_Operator', 'IDST', - 'ID_Continue', 'IDC', - 'ID_Start', 'IDS', - 'Ideographic', 'Ideo', - 'Join_Control', 'Join_C', - 'Logical_Order_Exception', 'LOE', - 'Lowercase', 'Lower', - 'Math', - 'Noncharacter_Code_Point', 'NChar', - 'Pattern_Syntax', 'Pat_Syn', - 'Pattern_White_Space', 'Pat_WS', - 'Quotation_Mark', 'QMark', - 'Radical', - 'Regional_Indicator', 'RI', - 'Sentence_Terminal', 'STerm', - 'Soft_Dotted', 'SD', - 'Terminal_Punctuation', 'Term', - 'Unified_Ideograph', 'UIdeo', - 'Uppercase', 'Upper', - 'Variation_Selector', 'VS', - 'White_Space', 'space', - 'XID_Continue', 'XIDC', - 'XID_Start', 'XIDS', -]); +const JsUnicodeProperties = new Set( +`C Other +Cc Control cntrl +Cf Format +Cn Unassigned +Co Private_Use +Cs Surrogate +L Letter +LC Cased_Letter +Ll Lowercase_Letter +Lm Modifier_Letter +Lo Other_Letter +Lt Titlecase_Letter +Lu Uppercase_Letter +M Mark Combining_Mark +Mc Spacing_Mark +Me Enclosing_Mark +Mn Nonspacing_Mark +N Number +Nd Decimal_Number digit +Nl Letter_Number +No Other_Number +P Punctuation punct +Pc Connector_Punctuation +Pd Dash_Punctuation +Pe Close_Punctuation +Pf Final_Punctuation +Pi Initial_Punctuation +Po Other_Punctuation +Ps Open_Punctuation +S Symbol +Sc Currency_Symbol +Sk Modifier_Symbol +Sm Math_Symbol +So Other_Symbol +Z Separator +Zl Line_Separator +Zp Paragraph_Separator +Zs Space_Separator +ASCII +ASCII_Hex_Digit AHex +Alphabetic Alpha +Any +Assigned +Bidi_Control Bidi_C +Bidi_Mirrored Bidi_M +Case_Ignorable CI +Cased +Changes_When_Casefolded CWCF +Changes_When_Casemapped CWCM +Changes_When_Lowercased CWL +Changes_When_NFKC_Casefolded CWKCF +Changes_When_Titlecased CWT +Changes_When_Uppercased CWU +Dash +Default_Ignorable_Code_Point DI +Deprecated Dep +Diacritic Dia +Emoji +Emoji_Component EComp +Emoji_Modifier EMod +Emoji_Modifier_Base EBase +Emoji_Presentation EPres +Extended_Pictographic ExtPict +Extender Ext +Grapheme_Base Gr_Base +Grapheme_Extend Gr_Ext +Hex_Digit Hex +IDS_Binary_Operator IDSB +IDS_Trinary_Operator IDST +ID_Continue IDC +ID_Start IDS +Ideographic Ideo +Join_Control Join_C +Logical_Order_Exception LOE +Lowercase Lower +Math +Noncharacter_Code_Point NChar +Pattern_Syntax Pat_Syn +Pattern_White_Space Pat_WS +Quotation_Mark QMark +Radical +Regional_Indicator RI +Sentence_Terminal STerm +Soft_Dotted SD +Terminal_Punctuation Term +Unified_Ideograph UIdeo +Uppercase Upper +Variation_Selector VS +White_Space space +XID_Continue XIDC +XID_Start XIDS`.split(/\s/) +); const JsUnicodePropertiesMap = new Map(); for (const p of JsUnicodeProperties) {