Skip to content

Commit

Permalink
More follow ups for #24
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Oct 1, 2024
1 parent 71d1662 commit 01dc837
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 42 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

### 🐞 Fixes

- Convert numbers interpolated in enclosed `\u{…}` to hexadecimal. Ex: `` regex`\u{${160}}` `` returns `/\u{A0}/`. (#24, @graphemecluster)
- Convert numbers interpolated in enclosed `\u{…}` to hexadecimal. In other words, although `` regex`\u{${'160'}}` `` (string interpolated) returns `/\u{160}/`, `` regex`\u{${160}}` `` (number interpolated) now returns `/\u{A0}/`. (#24, @graphemecluster)

## Released changes

Expand Down
19 changes: 16 additions & 3 deletions spec/interpolate-number.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,33 @@ describe('interpolation: numbers', () => {

it('should be quantified as an atomic unit', () => {
expect('123123').toMatch(regex`^${123}+$`);
expect('1233').not.toMatch(regex`^${123}+$`);
});

it('should allow in interval quantifier', () => {
expect('aaa').toMatch(regex`^a{${3}}$`);
expect('aaa').toMatch(regex`^a{2,${3}}$`);
expect('aaa').toMatch(regex`^a{${3},}$`);
expect('1234567890').toMatch(regex`^.{${10}}$`);
expect('1234567890').toMatch(regex`^.{1${0}}$`);
expect('1234567890').toMatch(regex`^.{${1}0}$`);
expect('1234567890').toMatch(regex`^.{${1}${0}}$`);
});

it('should convert to hexadecimal in enclosed \\u', () => {
expect('a').toMatch(regex`^\u{${97}}$`); // 0x61
expect('a').toMatch(regex`^\u{${97}}$`); // 97 = 0x61
expect('\u{A}').toMatch(regex`\u{0${10}}`);
expect('\u{10A}').toMatch(regex`\u{10${10}}`);
expect('\u{A0}').toMatch(regex`\u{${10}0}`);
expect('\u{A0}').toMatch(regex`\u{${10}${0}}`);
});

it('should throw in enclosed \\p', () => {
expect(() => regex`\p{${12}}`).toThrow(); // 0xC
it('should not convert to hexadecimal in enclosed \\p or \\P', () => {
// Decimal 12 is 0xC, and `.toString(16)` would return 'c'
expect(() => regex`\p{${12}}`).toThrow();
expect(() => regex`\P{${12}}`).toThrow();
expect(() => regex`\p{C${12}}`).toThrow();
expect(() => regex`\P{C${12}}`).toThrow();
});

it('should not let a preceding unescaped \\ change the interpolation', () => {
Expand Down
6 changes: 3 additions & 3 deletions src/flag-x.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ export function flagXPreprocessor(value, runningContext, options) {
if (
charClassWs.test(m) &&
( charClassContext === CharClassContext.DEFAULT ||
charClassContext === CharClassContext.RANGE ||
charClassContext === CharClassContext.Q_TOKEN
charClassContext === CharClassContext.ENCLOSED_Q ||
charClassContext === CharClassContext.RANGE
)
) {
ignoringCharClassWs = true;
Expand All @@ -120,7 +120,7 @@ export function flagXPreprocessor(value, runningContext, options) {
throw new Error(`Invalid incomplete token in character class: "${m}"`);
} else if (
escapedCharClassWs.test(m) &&
(charClassContext === CharClassContext.DEFAULT || charClassContext === CharClassContext.Q_TOKEN)
(charClassContext === CharClassContext.DEFAULT || charClassContext === CharClassContext.ENCLOSED_Q)
) {
transformed += update(m[1], {prefix: false});
} else if (charClassContext === CharClassContext.DEFAULT) {
Expand Down
8 changes: 4 additions & 4 deletions src/regex.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {Context, hasUnescaped, replaceUnescaped} from 'regex-utilities';
import {CharClassContext, RegexContext, adjustNumberedBackrefs, capturingDelim, charClassEnclosedTokenContexts, containsCharClassUnion, countCaptures, emulationGroupMarker, escapeV, flagVSupported, getBreakoutChar, getEndContextForIncompleteExpression, patternModsSupported, preprocess, regexEnclosedTokenContexts, sandboxLoneCharClassCaret, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
import {CharClassContext, RegexContext, adjustNumberedBackrefs, capturingDelim, containsCharClassUnion, countCaptures, emulationGroupMarker, enclosedTokenCharClassContexts, enclosedTokenRegexContexts, escapeV, flagVSupported, getBreakoutChar, getEndContextForIncompleteExpression, patternModsSupported, preprocess, sandboxLoneCharClassCaret, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
import {Pattern, pattern} from './pattern.js';
import {flagNPreprocessor} from './flag-n.js';
import {flagXPreprocessor, cleanPlugin} from './flag-x.js';
Expand Down Expand Up @@ -265,7 +265,7 @@ function interpolate(value, flags, regexContext, charClassContext, wrapEscapedSt
}
if (
typeof value === 'number' &&
(regexContext === RegexContext.U_TOKEN || charClassContext === CharClassContext.U_TOKEN)
(regexContext === RegexContext.ENCLOSED_U || charClassContext === CharClassContext.ENCLOSED_U)
) {
return value.toString(16);
}
Expand All @@ -289,8 +289,8 @@ function interpolate(value, flags, regexContext, charClassContext, wrapEscapedSt
if (
regexContext === RegexContext.INTERVAL_QUANTIFIER ||
regexContext === RegexContext.GROUP_NAME ||
regexEnclosedTokenContexts.has(regexContext) ||
charClassEnclosedTokenContexts.has(charClassContext)
enclosedTokenRegexContexts.has(regexContext) ||
enclosedTokenCharClassContexts.has(charClassContext)
) {
return isPattern ? String(value) : escapedValue;
} else if (regexContext === RegexContext.CHAR_CLASS) {
Expand Down
62 changes: 31 additions & 31 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,33 @@ import {Context, forEachUnescaped, replaceUnescaped} from 'regex-utilities';
import {Pattern, pattern} from './pattern.js';

export const RegexContext = {
DEFAULT: 'R_DEFAULT',
CHAR_CLASS: 'R_CHAR_CLASS',
GROUP_NAME: 'R_GROUP_NAME',
P_TOKEN: 'R_P_TOKEN',
U_TOKEN: 'R_U_TOKEN',
INTERVAL_QUANTIFIER: 'R_INTERVAL_QUANTIFIER',
INVALID_INCOMPLETE_TOKEN: 'R_INVALID_INCOMPLETE_TOKEN',
DEFAULT: 'DEFAULT',
CHAR_CLASS: 'CHAR_CLASS',
ENCLOSED_P: 'ENCLOSED_P',
ENCLOSED_U: 'ENCLOSED_U',
GROUP_NAME: 'GROUP_NAME',
INTERVAL_QUANTIFIER: 'INTERVAL_QUANTIFIER',
INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN',
};

export const CharClassContext = {
DEFAULT: 'CC_DEFAULT',
RANGE: 'CC_RANGE',
P_TOKEN: 'CC_P_TOKEN',
Q_TOKEN: 'CC_Q_TOKEN',
U_TOKEN: 'CC_U_TOKEN',
INVALID_INCOMPLETE_TOKEN: 'CC_INVALID_INCOMPLETE_TOKEN',
DEFAULT: 'DEFAULT',
ENCLOSED_P: 'ENCLOSED_P',
ENCLOSED_Q: 'ENCLOSED_Q',
ENCLOSED_U: 'ENCLOSED_U',
INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN',
RANGE: 'RANGE',
};

export const regexEnclosedTokenContexts = new Set([
RegexContext.P_TOKEN,
RegexContext.U_TOKEN,
export const enclosedTokenRegexContexts = new Set([
RegexContext.ENCLOSED_P,
RegexContext.ENCLOSED_U,
]);

export const charClassEnclosedTokenContexts = new Set([
CharClassContext.P_TOKEN,
CharClassContext.Q_TOKEN,
CharClassContext.U_TOKEN,
export const enclosedTokenCharClassContexts = new Set([
CharClassContext.ENCLOSED_P,
CharClassContext.ENCLOSED_Q,
CharClassContext.ENCLOSED_U,
]);

export const patternModsSupported = (() => {
Expand Down Expand Up @@ -135,13 +135,13 @@ export function getBreakoutChar(expression, regexContext, charClassContext) {
return getUnbalancedChar(escapesRemoved, '(', ')');
} else if (
regexContext === RegexContext.CHAR_CLASS &&
!charClassEnclosedTokenContexts.has(charClassContext)
!enclosedTokenCharClassContexts.has(charClassContext)
) {
return getUnbalancedChar(escapesRemoved, '[', ']');
} else if (
regexContext === RegexContext.INTERVAL_QUANTIFIER ||
regexEnclosedTokenContexts.has(regexContext) ||
charClassEnclosedTokenContexts.has(charClassContext)
enclosedTokenRegexContexts.has(regexContext) ||
enclosedTokenCharClassContexts.has(charClassContext)
) {
if (escapesRemoved.includes('}')) {
return '}';
Expand All @@ -157,7 +157,7 @@ export function getBreakoutChar(expression, regexContext, charClassContext) {
const contextToken = new RegExp(String.raw`
(?<groupN>\(\?<(?![=!])|\\[gk]<)
| (?<enclosedPU>\\[pPu]\{)
| (?<qT>\\q\{)
| (?<enclosedQ>\\q\{)
| (?<intervalQ>\{)
| (?<incompleteT>\\(?: $
| c(?![A-Za-z])
Expand Down Expand Up @@ -193,7 +193,7 @@ export function getEndContextForIncompleteExpression(incompleteExpression, {
contextToken.lastIndex = lastPos;
let match;
while (match = contextToken.exec(incompleteExpression)) {
const {0: m, groups: {groupN, enclosedPU, qT, intervalQ, incompleteT}} = match;
const {0: m, groups: {groupN, enclosedPU, enclosedQ, intervalQ, incompleteT}} = match;
if (m === '[') {
charClassDepth++;
regexContext = RegexContext.CHAR_CLASS;
Expand All @@ -212,11 +212,11 @@ export function getEndContextForIncompleteExpression(incompleteExpression, {
} else if (m === '-') {
charClassContext = CharClassContext.RANGE;
} else if (enclosedPU) {
charClassContext = m[1] === 'u' ? CharClassContext.U_TOKEN : CharClassContext.P_TOKEN;
} else if (qT) {
charClassContext = CharClassContext.Q_TOKEN;
charClassContext = m[1] === 'u' ? CharClassContext.ENCLOSED_U : CharClassContext.ENCLOSED_P;
} else if (enclosedQ) {
charClassContext = CharClassContext.ENCLOSED_Q;
} else if (
(m === '}' && charClassEnclosedTokenContexts.has(charClassContext)) ||
(m === '}' && enclosedTokenCharClassContexts.has(charClassContext)) ||
// Don't continue in these contexts since we've advanced another token
charClassContext === CharClassContext.INVALID_INCOMPLETE_TOKEN ||
charClassContext === CharClassContext.RANGE
Expand All @@ -229,12 +229,12 @@ export function getEndContextForIncompleteExpression(incompleteExpression, {
} else if (groupN) {
regexContext = RegexContext.GROUP_NAME;
} else if (enclosedPU) {
regexContext = m[1] === 'u' ? RegexContext.U_TOKEN : RegexContext.P_TOKEN;
regexContext = m[1] === 'u' ? RegexContext.ENCLOSED_U : RegexContext.ENCLOSED_P;
} else if (intervalQ) {
regexContext = RegexContext.INTERVAL_QUANTIFIER;
} else if (
(m === '>' && regexContext === RegexContext.GROUP_NAME) ||
(m === '}' && (regexContext === RegexContext.INTERVAL_QUANTIFIER || regexEnclosedTokenContexts.has(regexContext))) ||
(m === '}' && (regexContext === RegexContext.INTERVAL_QUANTIFIER || enclosedTokenRegexContexts.has(regexContext))) ||
// Don't continue in this context since we've advanced another token
regexContext === RegexContext.INVALID_INCOMPLETE_TOKEN
) {
Expand Down

0 comments on commit 01dc837

Please sign in to comment.