From 74f2ed5c36aeb69f8a9b5e745e8ed74d2aa4b6da Mon Sep 17 00:00:00 2001 From: Steven Levithan Date: Fri, 15 Nov 2024 18:57:26 +0100 Subject: [PATCH] Case fold Latin small long s --- src/subclass-strategies.js | 4 ++-- src/unicode.js | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/subclass-strategies.js b/src/subclass-strategies.js index 8bd1f97..5fedcd7 100644 --- a/src/subclass-strategies.js +++ b/src/subclass-strategies.js @@ -51,13 +51,13 @@ function applySubclassStrategies(ast, accuracy) { // ## Strategy `not_search_start`: Support leading `(?!\G)` and similar if (isNegatedSearchStart(firstElIn)) { - // Remove the negative lookaround + // Remove the `\G` and its container lookaround firstElIn.parent.elements.shift(); return 'not_search_start'; } const negGIndex = singleAltIn.elements.findIndex(el => isNegatedSearchStart(el)); if (negGIndex > -1 && singleAltIn.elements.every(el => el.type === AstTypes.Assertion)) { - // Remove the negative lookaround + // Remove the `\G` and its container lookaround singleAltIn.elements.splice(negGIndex, 1); return 'not_search_start'; } diff --git a/src/unicode.js b/src/unicode.js index a2f20b9..306dd35 100644 --- a/src/unicode.js +++ b/src/unicode.js @@ -15,17 +15,19 @@ function getIgnoreCaseMatchChars(char) { // Everything else is based on `lower` const upper = lower.toUpperCase(); const title = LowerToTitleCaseMap.get(lower); - const special = LowerToAlternativeUpperCaseMap.get(lower); + const altLower = LowerToAlternativeLowerCaseMap.get(lower); + const altUpper = LowerToAlternativeUpperCaseMap.get(lower); // Exclude ucase if multiple chars; count code point length. Excludes ucase versions of German // es-zed 'ß', ligatures like 'ff', and chars with no precomposed ucase like 'ʼn'. See // if ([...upper].length === 1) { set.add(upper); } + altUpper && set.add(altUpper); + title && set.add(title); // Lcase of 'İ' is multiple chars, but it's excluded by `CharsWithoutIgnoreCaseExpansion` set.add(lower); - title && set.add(title); - special && set.add(special); + altLower && set.add(altLower); return [...set]; } @@ -177,6 +179,11 @@ const JsUnicodePropertiesPostEs2018 = new Set(( // ES2024: None, but added `JsUnicodePropertiesOfStrings` ).split(' ')); +const LowerToAlternativeLowerCaseMap = new Map([ + ['s', cp(0x17F)], // s, ſ + [cp(0x17F), 's'], // ſ, s +]); + const LowerToAlternativeUpperCaseMap = new Map([ [cp(0xDF), cp(0x1E9E)], // ß, ẞ [cp(0x6B), cp(0x212A)], // k, K (Kelvin)