Skip to content

Commit

Permalink
Case fold Latin small long s
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 15, 2024
1 parent c0f04fc commit 74f2ed5
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/subclass-strategies.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ function applySubclassStrategies(ast, accuracy) {

// ## Strategy `not_search_start`: Support leading `(?!\G)` and similar
if (isNegatedSearchStart(firstElIn)) {
// Remove the negative lookaround
// Remove the `\G` and its container lookaround
firstElIn.parent.elements.shift();
return 'not_search_start';
}
const negGIndex = singleAltIn.elements.findIndex(el => isNegatedSearchStart(el));
if (negGIndex > -1 && singleAltIn.elements.every(el => el.type === AstTypes.Assertion)) {
// Remove the negative lookaround
// Remove the `\G` and its container lookaround
singleAltIn.elements.splice(negGIndex, 1);
return 'not_search_start';
}
Expand Down
13 changes: 10 additions & 3 deletions src/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@ function getIgnoreCaseMatchChars(char) {
// Everything else is based on `lower`
const upper = lower.toUpperCase();
const title = LowerToTitleCaseMap.get(lower);
const special = LowerToAlternativeUpperCaseMap.get(lower);
const altLower = LowerToAlternativeLowerCaseMap.get(lower);
const altUpper = LowerToAlternativeUpperCaseMap.get(lower);
// Exclude ucase if multiple chars; count code point length. Excludes ucase versions of German
// es-zed 'ß', ligatures like 'ff', and chars with no precomposed ucase like 'ʼn'. See
// <unicode.org/Public/UNIDATA/SpecialCasing.txt>
if ([...upper].length === 1) {
set.add(upper);
}
altUpper && set.add(altUpper);
title && set.add(title);
// Lcase of 'İ' is multiple chars, but it's excluded by `CharsWithoutIgnoreCaseExpansion`
set.add(lower);
title && set.add(title);
special && set.add(special);
altLower && set.add(altLower);
return [...set];
}

Expand Down Expand Up @@ -177,6 +179,11 @@ const JsUnicodePropertiesPostEs2018 = new Set((
// ES2024: None, but added `JsUnicodePropertiesOfStrings`
).split(' '));

const LowerToAlternativeLowerCaseMap = new Map([
['s', cp(0x17F)], // s, ſ
[cp(0x17F), 's'], // ſ, s
]);

const LowerToAlternativeUpperCaseMap = new Map([
[cp(0xDF), cp(0x1E9E)], // ß, ẞ
[cp(0x6B), cp(0x212A)], // k, K (Kelvin)
Expand Down

0 comments on commit 74f2ed5

Please sign in to comment.