From 01dc8379420e648f912add68c22a432e0d3e5bde Mon Sep 17 00:00:00 2001 From: Steven Levithan Date: Tue, 1 Oct 2024 19:27:57 +0200 Subject: [PATCH] More follow ups for #24 --- CHANGELOG.md | 2 +- spec/interpolate-number.spec.js | 19 ++++++++-- src/flag-x.js | 6 ++-- src/regex.js | 8 ++--- src/utils.js | 62 ++++++++++++++++----------------- 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4049e9f..3ebbc41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ### 🐞 Fixes -- Convert numbers interpolated in enclosed `\u{…}` to hexadecimal. Ex: `` regex`\u{${160}}` `` returns `/\u{A0}/`. (#24, @graphemecluster) +- Convert numbers interpolated in enclosed `\u{…}` to hexadecimal. In other words, although `` regex`\u{${'160'}}` `` (string interpolated) returns `/\u{160}/`, `` regex`\u{${160}}` `` (number interpolated) now returns `/\u{A0}/`. (#24, @graphemecluster) ## Released changes diff --git a/spec/interpolate-number.spec.js b/spec/interpolate-number.spec.js index 7896a70..2061d1e 100644 --- a/spec/interpolate-number.spec.js +++ b/spec/interpolate-number.spec.js @@ -7,20 +7,33 @@ describe('interpolation: numbers', () => { it('should be quantified as an atomic unit', () => { expect('123123').toMatch(regex`^${123}+$`); + expect('1233').not.toMatch(regex`^${123}+$`); }); it('should allow in interval quantifier', () => { expect('aaa').toMatch(regex`^a{${3}}$`); expect('aaa').toMatch(regex`^a{2,${3}}$`); expect('aaa').toMatch(regex`^a{${3},}$`); + expect('1234567890').toMatch(regex`^.{${10}}$`); + expect('1234567890').toMatch(regex`^.{1${0}}$`); + expect('1234567890').toMatch(regex`^.{${1}0}$`); + expect('1234567890').toMatch(regex`^.{${1}${0}}$`); }); it('should convert to hexadecimal in enclosed \\u', () => { - expect('a').toMatch(regex`^\u{${97}}$`); // 0x61 + expect('a').toMatch(regex`^\u{${97}}$`); // 97 = 0x61 + expect('\u{A}').toMatch(regex`\u{0${10}}`); + expect('\u{10A}').toMatch(regex`\u{10${10}}`); + expect('\u{A0}').toMatch(regex`\u{${10}0}`); + expect('\u{A0}').toMatch(regex`\u{${10}${0}}`); }); - it('should throw in enclosed \\p', () => { - expect(() => regex`\p{${12}}`).toThrow(); // 0xC + it('should not convert to hexadecimal in enclosed \\p or \\P', () => { + // Decimal 12 is 0xC, and `.toString(16)` would return 'c' + expect(() => regex`\p{${12}}`).toThrow(); + expect(() => regex`\P{${12}}`).toThrow(); + expect(() => regex`\p{C${12}}`).toThrow(); + expect(() => regex`\P{C${12}}`).toThrow(); }); it('should not let a preceding unescaped \\ change the interpolation', () => { diff --git a/src/flag-x.js b/src/flag-x.js index f80f786..49c8daa 100644 --- a/src/flag-x.js +++ b/src/flag-x.js @@ -108,8 +108,8 @@ export function flagXPreprocessor(value, runningContext, options) { if ( charClassWs.test(m) && ( charClassContext === CharClassContext.DEFAULT || - charClassContext === CharClassContext.RANGE || - charClassContext === CharClassContext.Q_TOKEN + charClassContext === CharClassContext.ENCLOSED_Q || + charClassContext === CharClassContext.RANGE ) ) { ignoringCharClassWs = true; @@ -120,7 +120,7 @@ export function flagXPreprocessor(value, runningContext, options) { throw new Error(`Invalid incomplete token in character class: "${m}"`); } else if ( escapedCharClassWs.test(m) && - (charClassContext === CharClassContext.DEFAULT || charClassContext === CharClassContext.Q_TOKEN) + (charClassContext === CharClassContext.DEFAULT || charClassContext === CharClassContext.ENCLOSED_Q) ) { transformed += update(m[1], {prefix: false}); } else if (charClassContext === CharClassContext.DEFAULT) { diff --git a/src/regex.js b/src/regex.js index f5a0bee..7e0a1dd 100644 --- a/src/regex.js +++ b/src/regex.js @@ -1,5 +1,5 @@ import {Context, hasUnescaped, replaceUnescaped} from 'regex-utilities'; -import {CharClassContext, RegexContext, adjustNumberedBackrefs, capturingDelim, charClassEnclosedTokenContexts, containsCharClassUnion, countCaptures, emulationGroupMarker, escapeV, flagVSupported, getBreakoutChar, getEndContextForIncompleteExpression, patternModsSupported, preprocess, regexEnclosedTokenContexts, sandboxLoneCharClassCaret, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js'; +import {CharClassContext, RegexContext, adjustNumberedBackrefs, capturingDelim, containsCharClassUnion, countCaptures, emulationGroupMarker, enclosedTokenCharClassContexts, enclosedTokenRegexContexts, escapeV, flagVSupported, getBreakoutChar, getEndContextForIncompleteExpression, patternModsSupported, preprocess, sandboxLoneCharClassCaret, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js'; import {Pattern, pattern} from './pattern.js'; import {flagNPreprocessor} from './flag-n.js'; import {flagXPreprocessor, cleanPlugin} from './flag-x.js'; @@ -265,7 +265,7 @@ function interpolate(value, flags, regexContext, charClassContext, wrapEscapedSt } if ( typeof value === 'number' && - (regexContext === RegexContext.U_TOKEN || charClassContext === CharClassContext.U_TOKEN) + (regexContext === RegexContext.ENCLOSED_U || charClassContext === CharClassContext.ENCLOSED_U) ) { return value.toString(16); } @@ -289,8 +289,8 @@ function interpolate(value, flags, regexContext, charClassContext, wrapEscapedSt if ( regexContext === RegexContext.INTERVAL_QUANTIFIER || regexContext === RegexContext.GROUP_NAME || - regexEnclosedTokenContexts.has(regexContext) || - charClassEnclosedTokenContexts.has(charClassContext) + enclosedTokenRegexContexts.has(regexContext) || + enclosedTokenCharClassContexts.has(charClassContext) ) { return isPattern ? String(value) : escapedValue; } else if (regexContext === RegexContext.CHAR_CLASS) { diff --git a/src/utils.js b/src/utils.js index b4da86b..e17f92e 100644 --- a/src/utils.js +++ b/src/utils.js @@ -2,33 +2,33 @@ import {Context, forEachUnescaped, replaceUnescaped} from 'regex-utilities'; import {Pattern, pattern} from './pattern.js'; export const RegexContext = { - DEFAULT: 'R_DEFAULT', - CHAR_CLASS: 'R_CHAR_CLASS', - GROUP_NAME: 'R_GROUP_NAME', - P_TOKEN: 'R_P_TOKEN', - U_TOKEN: 'R_U_TOKEN', - INTERVAL_QUANTIFIER: 'R_INTERVAL_QUANTIFIER', - INVALID_INCOMPLETE_TOKEN: 'R_INVALID_INCOMPLETE_TOKEN', + DEFAULT: 'DEFAULT', + CHAR_CLASS: 'CHAR_CLASS', + ENCLOSED_P: 'ENCLOSED_P', + ENCLOSED_U: 'ENCLOSED_U', + GROUP_NAME: 'GROUP_NAME', + INTERVAL_QUANTIFIER: 'INTERVAL_QUANTIFIER', + INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN', }; export const CharClassContext = { - DEFAULT: 'CC_DEFAULT', - RANGE: 'CC_RANGE', - P_TOKEN: 'CC_P_TOKEN', - Q_TOKEN: 'CC_Q_TOKEN', - U_TOKEN: 'CC_U_TOKEN', - INVALID_INCOMPLETE_TOKEN: 'CC_INVALID_INCOMPLETE_TOKEN', + DEFAULT: 'DEFAULT', + ENCLOSED_P: 'ENCLOSED_P', + ENCLOSED_Q: 'ENCLOSED_Q', + ENCLOSED_U: 'ENCLOSED_U', + INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN', + RANGE: 'RANGE', }; -export const regexEnclosedTokenContexts = new Set([ - RegexContext.P_TOKEN, - RegexContext.U_TOKEN, +export const enclosedTokenRegexContexts = new Set([ + RegexContext.ENCLOSED_P, + RegexContext.ENCLOSED_U, ]); -export const charClassEnclosedTokenContexts = new Set([ - CharClassContext.P_TOKEN, - CharClassContext.Q_TOKEN, - CharClassContext.U_TOKEN, +export const enclosedTokenCharClassContexts = new Set([ + CharClassContext.ENCLOSED_P, + CharClassContext.ENCLOSED_Q, + CharClassContext.ENCLOSED_U, ]); export const patternModsSupported = (() => { @@ -135,13 +135,13 @@ export function getBreakoutChar(expression, regexContext, charClassContext) { return getUnbalancedChar(escapesRemoved, '(', ')'); } else if ( regexContext === RegexContext.CHAR_CLASS && - !charClassEnclosedTokenContexts.has(charClassContext) + !enclosedTokenCharClassContexts.has(charClassContext) ) { return getUnbalancedChar(escapesRemoved, '[', ']'); } else if ( regexContext === RegexContext.INTERVAL_QUANTIFIER || - regexEnclosedTokenContexts.has(regexContext) || - charClassEnclosedTokenContexts.has(charClassContext) + enclosedTokenRegexContexts.has(regexContext) || + enclosedTokenCharClassContexts.has(charClassContext) ) { if (escapesRemoved.includes('}')) { return '}'; @@ -157,7 +157,7 @@ export function getBreakoutChar(expression, regexContext, charClassContext) { const contextToken = new RegExp(String.raw` (?\(\?<(?![=!])|\\[gk]<) | (?\\[pPu]\{) -| (?\\q\{) +| (?\\q\{) | (?\{) | (?\\(?: $ | c(?![A-Za-z]) @@ -193,7 +193,7 @@ export function getEndContextForIncompleteExpression(incompleteExpression, { contextToken.lastIndex = lastPos; let match; while (match = contextToken.exec(incompleteExpression)) { - const {0: m, groups: {groupN, enclosedPU, qT, intervalQ, incompleteT}} = match; + const {0: m, groups: {groupN, enclosedPU, enclosedQ, intervalQ, incompleteT}} = match; if (m === '[') { charClassDepth++; regexContext = RegexContext.CHAR_CLASS; @@ -212,11 +212,11 @@ export function getEndContextForIncompleteExpression(incompleteExpression, { } else if (m === '-') { charClassContext = CharClassContext.RANGE; } else if (enclosedPU) { - charClassContext = m[1] === 'u' ? CharClassContext.U_TOKEN : CharClassContext.P_TOKEN; - } else if (qT) { - charClassContext = CharClassContext.Q_TOKEN; + charClassContext = m[1] === 'u' ? CharClassContext.ENCLOSED_U : CharClassContext.ENCLOSED_P; + } else if (enclosedQ) { + charClassContext = CharClassContext.ENCLOSED_Q; } else if ( - (m === '}' && charClassEnclosedTokenContexts.has(charClassContext)) || + (m === '}' && enclosedTokenCharClassContexts.has(charClassContext)) || // Don't continue in these contexts since we've advanced another token charClassContext === CharClassContext.INVALID_INCOMPLETE_TOKEN || charClassContext === CharClassContext.RANGE @@ -229,12 +229,12 @@ export function getEndContextForIncompleteExpression(incompleteExpression, { } else if (groupN) { regexContext = RegexContext.GROUP_NAME; } else if (enclosedPU) { - regexContext = m[1] === 'u' ? RegexContext.U_TOKEN : RegexContext.P_TOKEN; + regexContext = m[1] === 'u' ? RegexContext.ENCLOSED_U : RegexContext.ENCLOSED_P; } else if (intervalQ) { regexContext = RegexContext.INTERVAL_QUANTIFIER; } else if ( (m === '>' && regexContext === RegexContext.GROUP_NAME) || - (m === '}' && (regexContext === RegexContext.INTERVAL_QUANTIFIER || regexEnclosedTokenContexts.has(regexContext))) || + (m === '}' && (regexContext === RegexContext.INTERVAL_QUANTIFIER || enclosedTokenRegexContexts.has(regexContext))) || // Don't continue in this context since we've advanced another token regexContext === RegexContext.INVALID_INCOMPLETE_TOKEN ) {