Skip to content

Commit

Permalink
Move exports; tweak subclass
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 17, 2024
1 parent e2ba7d4 commit 3a97b9c
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 39 deletions.
2 changes: 1 addition & 1 deletion spec/interpolate-regexp.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ describe('interpolation: regexes', () => {

it('should treat pattern modifiers as noncapturing when adjusting backreferences', () => {
if (!envSupportsFlagGroups) {
pending('requires support for flag groups (Node 23)');
pending('requires support for flag groups (Node.js 23)');
}
expect('abb').toMatch(regex`^(?i:a)${/(b)\1/}$`);
});
Expand Down
2 changes: 1 addition & 1 deletion spec/regex-tag.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ describe('regex', () => {

it('should adjust indices with flag d for emulation groups', () => {
if (!envSupportsFlagD) {
pending('requires support for flag d (Node 16)');
pending('requires support for flag d (Node.js 16)');
}
expect(regex({flags: 'd', subclass: true})`(?>.)`.exec('a').indices).toHaveSize(1);
// ## Documenting behavior when subclass is not used
Expand Down
2 changes: 1 addition & 1 deletion spec/subroutines.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ describe('subroutines', () => {

it('should refer to the first group with name when duplicate capture names exist', () => {
if (!envSupportsDuplicateNames) {
pending('requires support for duplicate names (Node 23)');
pending('requires support for duplicate names (Node.js 23)');
}
expect('aa ba bb'.match(regex('g')`(?<n>a)|(?<n>b)\g<n>`)).toEqual(['a', 'a', 'ba']);
expect('aa ba bb'.match(regex('g')`(?<n>a)\g<n>|(?<n>b)`)).toEqual(['aa', 'b', 'b', 'b']);
Expand Down
9 changes: 7 additions & 2 deletions src/atomic.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Apply transformations for atomic groups: `(?>…)`.
@param {import('./regex.js').PluginData} [data]
@returns {string}
*/
export function atomic(expression, data) {
function atomic(expression, data) {
if (!/\(\?>/.test(expression)) {
return expression;
}
Expand Down Expand Up @@ -116,7 +116,7 @@ Possessive quantifiers in Oniguruma and Onigmo are only: `?+`, `*+`, `++`.
@param {string} expression
@returns {string}
*/
export function possessive(expression) {
function possessive(expression) {
if (!(new RegExp(`${baseQuantifier}\\+`).test(expression))) {
return expression;
}
Expand Down Expand Up @@ -180,3 +180,8 @@ export function possessive(expression) {
}
return expression;
}

export {
atomic,
possessive,
};
6 changes: 5 additions & 1 deletion src/backcompat.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Assumes flag u and doesn't worry about syntax errors that are caught by it.
@param {string} expression
@returns {string}
*/
export function backcompatPlugin(expression) {
function backcompatPlugin(expression) {
const unescapedLiteralHyphenMsg = 'Invalid unescaped "-" in character class';
let inCharClass = false;
let result = '';
Expand Down Expand Up @@ -53,3 +53,7 @@ export function backcompatPlugin(expression) {
}
return result;
}

export {
backcompatPlugin,
};
6 changes: 5 additions & 1 deletion src/flag-n.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Preprocessors are applied to the outer regex and interpolated patterns, but not
regexes or strings.
@type {import('./utils.js').Preprocessor}
*/
export function flagNPreprocessor(value, runningContext) {
function flagNPreprocessor(value, runningContext) {
value = String(value);
let expression = '';
let transformed = '';
Expand All @@ -39,3 +39,7 @@ export function flagNPreprocessor(value, runningContext) {
runningContext,
};
}

export {
flagNPreprocessor,
};
9 changes: 7 additions & 2 deletions src/flag-x.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Preprocessors are applied to the outer regex and interpolated patterns, but not
regexes or strings.
@type {import('./utils.js').Preprocessor}
*/
export function flagXPreprocessor(value, runningContext, options) {
function flagXPreprocessor(value, runningContext, options) {
value = String(value);
let ignoringWs = false;
let ignoringCharClassWs = false;
Expand Down Expand Up @@ -153,7 +153,7 @@ Remove `(?:)` token separators (most likely added by flag x) in cases where it's
@param {string} expression
@returns {string}
*/
export function clean(expression) {
function clean(expression) {
const sep = String.raw`\(\?:\)`;
// No need for repeated separators
expression = replaceUnescaped(expression, `(?:${sep}){2,}`, '(?:)', Context.DEFAULT);
Expand Down Expand Up @@ -182,3 +182,8 @@ export function clean(expression) {
);
return expression;
}

export {
clean,
flagXPreprocessor,
};
9 changes: 7 additions & 2 deletions src/pattern.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export class Pattern {
class Pattern {
#value;
/** @param {string} value */
constructor(value) {
Expand Down Expand Up @@ -27,7 +27,7 @@ Can be called as a function or template tag:
@param {...string} substitutions
@returns {Pattern}
*/
export function pattern(first, ...substitutions) {
function pattern(first, ...substitutions) {
if (Array.isArray(first?.raw)) {
return new Pattern(
// Intersperse raw template strings and substitutions
Expand All @@ -38,3 +38,8 @@ export function pattern(first, ...substitutions) {
}
throw new Error(`Unexpected arguments: ${JSON.stringify([first, ...substitutions])}`);
}

export {
Pattern,
pattern,
};
21 changes: 15 additions & 6 deletions src/subclass.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,25 @@ import {Context, replaceUnescaped} from 'regex-utilities';
const emulationGroupMarker = '$E$';

/**
@class
@augments RegExp
@param {string | RegExpSubclass} expression
@param {string} [flags]
@param {{useEmulationGroups: boolean;}} [options]
Works the same as JavaScript's native `RegExp` constructor in all contexts, but automatically
adjusts matches and subpattern indices (with flag `d`) to account for injected emulation groups.
*/
class RegExpSubclass extends RegExp {
// Avoid #private to allow for subclassing
/**
Avoid `#private` to allow for subclassing.
@private
@type {Array<boolean> | undefined}
*/
_captureMap;
/**
@param {string | RegExpSubclass} expression
@param {string} [flags]
@param {{useEmulationGroups: boolean;}} [options]
*/
constructor(expression, flags, options) {
if (expression instanceof RegExp && options) {
throw new Error('Cannot provide options when copying regexp');
}
let captureMap;
if (options?.useEmulationGroups) {
({expression, captureMap} = unmarkEmulationGroups(expression));
Expand Down
6 changes: 5 additions & 1 deletion src/subroutines.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {Context, execUnescaped, forEachUnescaped, getGroupContents, hasUnescaped
@param {import('./regex.js').PluginData} [data]
@returns {string}
*/
export function subroutines(expression, data) {
function subroutines(expression, data) {
// NOTE: subroutines and definition groups fully support numbered backreferences and unnamed
// captures (from interpolated regexes or from turning implicit flag n off), and all of the
// complex forward and backward backreference adjustments that can result
Expand Down Expand Up @@ -334,3 +334,7 @@ function lastOf(arr) {
// <https://caniuse.com/mdn-javascript_builtins_array_at>
return arr[arr.length - 1];
}

export {
subroutines,
};
66 changes: 45 additions & 21 deletions src/utils.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {Pattern, pattern} from './pattern.js';
import {Context, forEachUnescaped, replaceUnescaped} from 'regex-utilities';

export const RegexContext = {
const RegexContext = {
DEFAULT: 'DEFAULT',
CHAR_CLASS: 'CHAR_CLASS',
ENCLOSED_P: 'ENCLOSED_P',
Expand All @@ -11,7 +11,7 @@ export const RegexContext = {
INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN',
};

export const CharClassContext = {
const CharClassContext = {
DEFAULT: 'DEFAULT',
ENCLOSED_P: 'ENCLOSED_P',
ENCLOSED_Q: 'ENCLOSED_Q',
Expand All @@ -20,18 +20,18 @@ export const CharClassContext = {
RANGE: 'RANGE',
};

export const enclosedTokenRegexContexts = new Set([
const enclosedTokenRegexContexts = new Set([
RegexContext.ENCLOSED_P,
RegexContext.ENCLOSED_U,
]);

export const enclosedTokenCharClassContexts = new Set([
const enclosedTokenCharClassContexts = new Set([
CharClassContext.ENCLOSED_P,
CharClassContext.ENCLOSED_Q,
CharClassContext.ENCLOSED_U,
]);

export const envSupportsFlagGroups = (() => {
const envSupportsFlagGroups = (() => {
try {
new RegExp('(?i:)');
} catch {
Expand All @@ -40,7 +40,7 @@ export const envSupportsFlagGroups = (() => {
return true;
})();

export const envSupportsFlagV = (() => {
const envSupportsFlagV = (() => {
try {
new RegExp('', 'v');
} catch {
Expand All @@ -49,17 +49,17 @@ export const envSupportsFlagV = (() => {
return true;
})();

export const doublePunctuatorChars = '&!#$%*+,.:;<=>?@^`~';
export const namedCapturingDelim = String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`;
export const capturingDelim = String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${namedCapturingDelim}`;
export const noncapturingDelim = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;
const doublePunctuatorChars = '&!#$%*+,.:;<=>?@^`~';
const namedCapturingDelim = String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`;
const capturingDelim = String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${namedCapturingDelim}`;
const noncapturingDelim = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;

/**
@param {string} expression
@param {number} precedingCaptures
@returns {string}
*/
export function adjustNumberedBackrefs(expression, precedingCaptures) {
function adjustNumberedBackrefs(expression, precedingCaptures) {
return replaceUnescaped(
expression,
String.raw`\\(?<num>[1-9]\d*)`,
Expand Down Expand Up @@ -93,7 +93,7 @@ const charClassUnionToken = new RegExp(String.raw`
`.replace(/\s+/g, ''), 'gsu');

// Assumes flag v and doesn't worry about syntax errors that are caught by it
export function containsCharClassUnion(charClassPattern) {
function containsCharClassUnion(charClassPattern) {
// Return `true` if it contains:
// - `\p` (lowercase only) and the name is a property of strings (case sensitive).
// - `\q`.
Expand Down Expand Up @@ -130,7 +130,7 @@ export function containsCharClassUnion(charClassPattern) {
@param {string} expression
@returns {number}
*/
export function countCaptures(expression) {
function countCaptures(expression) {
let num = 0;
forEachUnescaped(expression, capturingDelim, () => num++, Context.DEFAULT);
return num;
Expand All @@ -142,7 +142,7 @@ Escape special characters for the given context, assuming flag v.
@param {'DEFAULT' | 'CHAR_CLASS'} context `Context` option from lib `regex-utilities`
@returns {string} Escaped string
*/
export function escapeV(str, context) {
function escapeV(str, context) {
if (context === Context.CHAR_CLASS) {
// Escape all double punctuators (including ^, which is special on its own in the first
// position) in case they're bordered by the same character in or outside of the escaped string
Expand All @@ -152,7 +152,7 @@ export function escapeV(str, context) {
}

// Look for characters that would change the meaning of subsequent tokens outside an interpolated value
export function getBreakoutChar(expression, regexContext, charClassContext) {
function getBreakoutChar(expression, regexContext, charClassContext) {
const escapesRemoved = expression.replace(/\\./gsu, '');
// Trailing unescaped `\`; checking `.includes('\\')` would also work
if (escapesRemoved.endsWith('\\')) {
Expand Down Expand Up @@ -212,7 +212,7 @@ seen. Assumes flag v and doesn't worry about syntax errors that are caught by it
@param {Partial<RunningContext>} [runningContext]
@returns {RunningContext}
*/
export function getEndContextForIncompleteExpression(incompleteExpression, {
function getEndContextForIncompleteExpression(incompleteExpression, {
regexContext = RegexContext.DEFAULT,
charClassContext = CharClassContext.DEFAULT,
charClassDepth = 0,
Expand Down Expand Up @@ -315,7 +315,7 @@ processes substitutions that are instanceof `Pattern`.
@param {Required<RegexTagOptions>} options
@returns {{template: RawTemplate; substitutions: ReadonlyArray<InterpolatedValue>;}}
*/
export function preprocess(template, substitutions, preprocessor, options) {
function preprocess(template, substitutions, preprocessor, options) {
let /** @type {RawTemplate} */ newTemplate = {raw: []};
let newSubstitutions = [];
let runningContext;
Expand All @@ -342,7 +342,7 @@ export function preprocess(template, substitutions, preprocessor, options) {

// Sandbox `^` if relevant, done so it can't change the meaning of the surrounding character class
// if we happen to be at the first position. See `sandboxLoneDoublePunctuatorChar` for more details
export function sandboxLoneCharClassCaret(str) {
function sandboxLoneCharClassCaret(str) {
return str.replace(/^\^/, '\\^^');
}

Expand All @@ -355,7 +355,7 @@ export function sandboxLoneCharClassCaret(str) {
// - Can't add a second unescaped symbol if a lone symbol is the entire string because it might be
// followed by the same unescaped symbol outside an interpolation, and since it won't be wrapped,
// the second symbol wouldn't be sandboxed from the one following it.
export function sandboxLoneDoublePunctuatorChar(str) {
function sandboxLoneDoublePunctuatorChar(str) {
return str.replace(new RegExp(`^([${doublePunctuatorChars}])(?!\\1)`), (m, _, pos) => {
return `\\${m}${pos + 1 === str.length ? '' : m}`;
});
Expand All @@ -367,7 +367,7 @@ Converts `\0` tokens to `\x00` in the given context.
@param {'DEFAULT' | 'CHAR_CLASS'} [context] `Context` option from lib `regex-utilities`
@returns {string}
*/
export function sandboxUnsafeNulls(str, context) {
function sandboxUnsafeNulls(str, context) {
// regex`[\0${0}]` and regex`[${pattern`\0`}0]` can't be guarded against via nested `[…]`
// sandboxing in character classes if the interpolated value doesn't contain union (since it
// might be placed on a range boundary). So escape `\0` in character classes as `\x00`
Expand All @@ -381,6 +381,30 @@ export function sandboxUnsafeNulls(str, context) {
@param {string} newValue
@returns {string}
*/
export function spliceStr(str, pos, oldValue, newValue) {
function spliceStr(str, pos, oldValue, newValue) {
return str.slice(0, pos) + newValue + str.slice(pos + oldValue.length);
}

export {
adjustNumberedBackrefs,
capturingDelim,
CharClassContext,
containsCharClassUnion,
countCaptures,
doublePunctuatorChars,
enclosedTokenCharClassContexts,
enclosedTokenRegexContexts,
envSupportsFlagGroups,
envSupportsFlagV,
escapeV,
getBreakoutChar,
getEndContextForIncompleteExpression,
namedCapturingDelim,
noncapturingDelim,
preprocess,
RegexContext,
sandboxLoneCharClassCaret,
sandboxLoneDoublePunctuatorChar,
sandboxUnsafeNulls,
spliceStr,
};

0 comments on commit 3a97b9c

Please sign in to comment.