Skip to content

Commit

Permalink
Change format of JsUnicodeProperties for lighter weight
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Dec 21, 2024
1 parent dbc23b4 commit 0067f62
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 99 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Oniguruma-To-ES
# Oniguruma-To-ES (鬼車➜ES)

[![npm version][npm-version-src]][npm-version-href]
[![npm downloads][npm-downloads-src]][npm-downloads-href]
Expand Down
196 changes: 98 additions & 98 deletions src/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ function getIgnoreCaseMatchChars(char) {
return [...set];
}

// The following set includes:
// - All ES2024 general categories and their aliases (all are supported by Oniguruma). See
// <github.com/mathiasbynens/unicode-match-property-value-ecmascript/blob/main/data/mappings.js>
// - All ES2024 binary properties and their aliases (all are supported by Oniguruma). See
// <tc39.es/ecma262/multipage/text-processing.html#table-binary-unicode-properties>
// Unicode properties must be mapped to property names supported by JS, and must also apply JS's
// stricter rules for casing, whitespace, and underscores in Unicode property names. In order to
// remain lightweight, this library assumes properties not in this list are Unicode script names
Expand All @@ -39,104 +44,99 @@ function getIgnoreCaseMatchChars(char) {
// an `In_` prefix). See also:
// - Properties supported in Oniguruma: <github.com/kkos/oniguruma/blob/master/doc/UNICODE_PROPERTIES>
// - Properties supported in JS by spec version: <github.com/eslint-community/regexpp/blob/main/src/unicode/properties.ts>
const JsUnicodeProperties = new Set([
// ES2024 general categories and their aliases; all are supported by Oniguruma
// See <github.com/mathiasbynens/unicode-match-property-value-ecmascript/blob/main/data/mappings.js>
'C', 'Other',
'Cc', 'Control', 'cntrl',
'Cf', 'Format',
'Cn', 'Unassigned',
'Co', 'Private_Use',
'Cs', 'Surrogate',
'L', 'Letter',
'LC', 'Cased_Letter',
'Ll', 'Lowercase_Letter',
'Lm', 'Modifier_Letter',
'Lo', 'Other_Letter',
'Lt', 'Titlecase_Letter',
'Lu', 'Uppercase_Letter',
'M', 'Mark', 'Combining_Mark',
'Mc', 'Spacing_Mark',
'Me', 'Enclosing_Mark',
'Mn', 'Nonspacing_Mark',
'N', 'Number',
'Nd', 'Decimal_Number', 'digit',
'Nl', 'Letter_Number',
'No', 'Other_Number',
'P', 'Punctuation', 'punct',
'Pc', 'Connector_Punctuation',
'Pd', 'Dash_Punctuation',
'Pe', 'Close_Punctuation',
'Pf', 'Final_Punctuation',
'Pi', 'Initial_Punctuation',
'Po', 'Other_Punctuation',
'Ps', 'Open_Punctuation',
'S', 'Symbol',
'Sc', 'Currency_Symbol',
'Sk', 'Modifier_Symbol',
'Sm', 'Math_Symbol',
'So', 'Other_Symbol',
'Z', 'Separator',
'Zl', 'Line_Separator',
'Zp', 'Paragraph_Separator',
'Zs', 'Space_Separator',

// ES2024 binary properties and their aliases; all are supported by Oniguruma
// See <tc39.es/ecma262/multipage/text-processing.html#table-binary-unicode-properties>
'ASCII',
'ASCII_Hex_Digit', 'AHex',
'Alphabetic', 'Alpha',
'Any',
'Assigned',
'Bidi_Control', 'Bidi_C',
'Bidi_Mirrored', 'Bidi_M',
'Case_Ignorable', 'CI',
'Cased',
'Changes_When_Casefolded', 'CWCF',
'Changes_When_Casemapped', 'CWCM',
'Changes_When_Lowercased', 'CWL',
'Changes_When_NFKC_Casefolded', 'CWKCF',
'Changes_When_Titlecased', 'CWT',
'Changes_When_Uppercased', 'CWU',
'Dash',
'Default_Ignorable_Code_Point', 'DI',
'Deprecated', 'Dep',
'Diacritic', 'Dia',
'Emoji',
'Emoji_Component', 'EComp',
'Emoji_Modifier', 'EMod',
'Emoji_Modifier_Base', 'EBase',
'Emoji_Presentation', 'EPres',
'Extended_Pictographic', 'ExtPict',
'Extender', 'Ext',
'Grapheme_Base', 'Gr_Base',
'Grapheme_Extend', 'Gr_Ext',
'Hex_Digit', 'Hex',
'IDS_Binary_Operator', 'IDSB',
'IDS_Trinary_Operator', 'IDST',
'ID_Continue', 'IDC',
'ID_Start', 'IDS',
'Ideographic', 'Ideo',
'Join_Control', 'Join_C',
'Logical_Order_Exception', 'LOE',
'Lowercase', 'Lower',
'Math',
'Noncharacter_Code_Point', 'NChar',
'Pattern_Syntax', 'Pat_Syn',
'Pattern_White_Space', 'Pat_WS',
'Quotation_Mark', 'QMark',
'Radical',
'Regional_Indicator', 'RI',
'Sentence_Terminal', 'STerm',
'Soft_Dotted', 'SD',
'Terminal_Punctuation', 'Term',
'Unified_Ideograph', 'UIdeo',
'Uppercase', 'Upper',
'Variation_Selector', 'VS',
'White_Space', 'space',
'XID_Continue', 'XIDC',
'XID_Start', 'XIDS',
]);
const JsUnicodeProperties = new Set(
`C Other
Cc Control cntrl
Cf Format
Cn Unassigned
Co Private_Use
Cs Surrogate
L Letter
LC Cased_Letter
Ll Lowercase_Letter
Lm Modifier_Letter
Lo Other_Letter
Lt Titlecase_Letter
Lu Uppercase_Letter
M Mark Combining_Mark
Mc Spacing_Mark
Me Enclosing_Mark
Mn Nonspacing_Mark
N Number
Nd Decimal_Number digit
Nl Letter_Number
No Other_Number
P Punctuation punct
Pc Connector_Punctuation
Pd Dash_Punctuation
Pe Close_Punctuation
Pf Final_Punctuation
Pi Initial_Punctuation
Po Other_Punctuation
Ps Open_Punctuation
S Symbol
Sc Currency_Symbol
Sk Modifier_Symbol
Sm Math_Symbol
So Other_Symbol
Z Separator
Zl Line_Separator
Zp Paragraph_Separator
Zs Space_Separator
ASCII
ASCII_Hex_Digit AHex
Alphabetic Alpha
Any
Assigned
Bidi_Control Bidi_C
Bidi_Mirrored Bidi_M
Case_Ignorable CI
Cased
Changes_When_Casefolded CWCF
Changes_When_Casemapped CWCM
Changes_When_Lowercased CWL
Changes_When_NFKC_Casefolded CWKCF
Changes_When_Titlecased CWT
Changes_When_Uppercased CWU
Dash
Default_Ignorable_Code_Point DI
Deprecated Dep
Diacritic Dia
Emoji
Emoji_Component EComp
Emoji_Modifier EMod
Emoji_Modifier_Base EBase
Emoji_Presentation EPres
Extended_Pictographic ExtPict
Extender Ext
Grapheme_Base Gr_Base
Grapheme_Extend Gr_Ext
Hex_Digit Hex
IDS_Binary_Operator IDSB
IDS_Trinary_Operator IDST
ID_Continue IDC
ID_Start IDS
Ideographic Ideo
Join_Control Join_C
Logical_Order_Exception LOE
Lowercase Lower
Math
Noncharacter_Code_Point NChar
Pattern_Syntax Pat_Syn
Pattern_White_Space Pat_WS
Quotation_Mark QMark
Radical
Regional_Indicator RI
Sentence_Terminal STerm
Soft_Dotted SD
Terminal_Punctuation Term
Unified_Ideograph UIdeo
Uppercase Upper
Variation_Selector VS
White_Space space
XID_Continue XIDC
XID_Start XIDS`.split(/\s/)
);

const JsUnicodePropertiesMap = new Map();
for (const p of JsUnicodeProperties) {
Expand Down

0 comments on commit 0067f62

Please sign in to comment.