From 3a97b9c944e244939c67ad645f304707f99cdbb7 Mon Sep 17 00:00:00 2001 From: Steven Levithan Date: Sun, 17 Nov 2024 18:26:00 +0100 Subject: [PATCH] Move exports; tweak subclass --- spec/interpolate-regexp.spec.js | 2 +- spec/regex-tag.spec.js | 2 +- spec/subroutines.spec.js | 2 +- src/atomic.js | 9 ++++- src/backcompat.js | 6 ++- src/flag-n.js | 6 ++- src/flag-x.js | 9 ++++- src/pattern.js | 9 ++++- src/subclass.js | 21 ++++++++--- src/subroutines.js | 6 ++- src/utils.js | 66 ++++++++++++++++++++++----------- 11 files changed, 99 insertions(+), 39 deletions(-) diff --git a/spec/interpolate-regexp.spec.js b/spec/interpolate-regexp.spec.js index d9cd549..1e39d81 100644 --- a/spec/interpolate-regexp.spec.js +++ b/spec/interpolate-regexp.spec.js @@ -64,7 +64,7 @@ describe('interpolation: regexes', () => { it('should treat pattern modifiers as noncapturing when adjusting backreferences', () => { if (!envSupportsFlagGroups) { - pending('requires support for flag groups (Node 23)'); + pending('requires support for flag groups (Node.js 23)'); } expect('abb').toMatch(regex`^(?i:a)${/(b)\1/}$`); }); diff --git a/spec/regex-tag.spec.js b/spec/regex-tag.spec.js index f58acac..0f34dbb 100644 --- a/spec/regex-tag.spec.js +++ b/spec/regex-tag.spec.js @@ -158,7 +158,7 @@ describe('regex', () => { it('should adjust indices with flag d for emulation groups', () => { if (!envSupportsFlagD) { - pending('requires support for flag d (Node 16)'); + pending('requires support for flag d (Node.js 16)'); } expect(regex({flags: 'd', subclass: true})`(?>.)`.exec('a').indices).toHaveSize(1); // ## Documenting behavior when subclass is not used diff --git a/spec/subroutines.spec.js b/spec/subroutines.spec.js index 88140ad..fd65aec 100644 --- a/spec/subroutines.spec.js +++ b/spec/subroutines.spec.js @@ -130,7 +130,7 @@ describe('subroutines', () => { it('should refer to the first group with name when duplicate capture names exist', () => { if (!envSupportsDuplicateNames) { - pending('requires support for duplicate names (Node 23)'); + pending('requires support for duplicate names (Node.js 23)'); } expect('aa ba bb'.match(regex('g')`(?a)|(?b)\g`)).toEqual(['a', 'a', 'ba']); expect('aa ba bb'.match(regex('g')`(?a)\g|(?b)`)).toEqual(['aa', 'b', 'b', 'b']); diff --git a/src/atomic.js b/src/atomic.js index 863427a..51bca06 100644 --- a/src/atomic.js +++ b/src/atomic.js @@ -10,7 +10,7 @@ Apply transformations for atomic groups: `(?>…)`. @param {import('./regex.js').PluginData} [data] @returns {string} */ -export function atomic(expression, data) { +function atomic(expression, data) { if (!/\(\?>/.test(expression)) { return expression; } @@ -116,7 +116,7 @@ Possessive quantifiers in Oniguruma and Onigmo are only: `?+`, `*+`, `++`. @param {string} expression @returns {string} */ -export function possessive(expression) { +function possessive(expression) { if (!(new RegExp(`${baseQuantifier}\\+`).test(expression))) { return expression; } @@ -180,3 +180,8 @@ export function possessive(expression) { } return expression; } + +export { + atomic, + possessive, +}; diff --git a/src/backcompat.js b/src/backcompat.js index 29b4f22..4a2207c 100644 --- a/src/backcompat.js +++ b/src/backcompat.js @@ -17,7 +17,7 @@ Assumes flag u and doesn't worry about syntax errors that are caught by it. @param {string} expression @returns {string} */ -export function backcompatPlugin(expression) { +function backcompatPlugin(expression) { const unescapedLiteralHyphenMsg = 'Invalid unescaped "-" in character class'; let inCharClass = false; let result = ''; @@ -53,3 +53,7 @@ export function backcompatPlugin(expression) { } return result; } + +export { + backcompatPlugin, +}; diff --git a/src/flag-n.js b/src/flag-n.js index b357e53..60d460c 100644 --- a/src/flag-n.js +++ b/src/flag-n.js @@ -14,7 +14,7 @@ Preprocessors are applied to the outer regex and interpolated patterns, but not regexes or strings. @type {import('./utils.js').Preprocessor} */ -export function flagNPreprocessor(value, runningContext) { +function flagNPreprocessor(value, runningContext) { value = String(value); let expression = ''; let transformed = ''; @@ -39,3 +39,7 @@ export function flagNPreprocessor(value, runningContext) { runningContext, }; } + +export { + flagNPreprocessor, +}; diff --git a/src/flag-x.js b/src/flag-x.js index 5a6bd8f..703ae8a 100644 --- a/src/flag-x.js +++ b/src/flag-x.js @@ -29,7 +29,7 @@ Preprocessors are applied to the outer regex and interpolated patterns, but not regexes or strings. @type {import('./utils.js').Preprocessor} */ -export function flagXPreprocessor(value, runningContext, options) { +function flagXPreprocessor(value, runningContext, options) { value = String(value); let ignoringWs = false; let ignoringCharClassWs = false; @@ -153,7 +153,7 @@ Remove `(?:)` token separators (most likely added by flag x) in cases where it's @param {string} expression @returns {string} */ -export function clean(expression) { +function clean(expression) { const sep = String.raw`\(\?:\)`; // No need for repeated separators expression = replaceUnescaped(expression, `(?:${sep}){2,}`, '(?:)', Context.DEFAULT); @@ -182,3 +182,8 @@ export function clean(expression) { ); return expression; } + +export { + clean, + flagXPreprocessor, +}; diff --git a/src/pattern.js b/src/pattern.js index 386b4e3..19cb677 100644 --- a/src/pattern.js +++ b/src/pattern.js @@ -1,4 +1,4 @@ -export class Pattern { +class Pattern { #value; /** @param {string} value */ constructor(value) { @@ -27,7 +27,7 @@ Can be called as a function or template tag: @param {...string} substitutions @returns {Pattern} */ -export function pattern(first, ...substitutions) { +function pattern(first, ...substitutions) { if (Array.isArray(first?.raw)) { return new Pattern( // Intersperse raw template strings and substitutions @@ -38,3 +38,8 @@ export function pattern(first, ...substitutions) { } throw new Error(`Unexpected arguments: ${JSON.stringify([first, ...substitutions])}`); } + +export { + Pattern, + pattern, +}; diff --git a/src/subclass.js b/src/subclass.js index 9205d9e..c2b7d91 100644 --- a/src/subclass.js +++ b/src/subclass.js @@ -6,16 +6,25 @@ import {Context, replaceUnescaped} from 'regex-utilities'; const emulationGroupMarker = '$E$'; /** -@class -@augments RegExp -@param {string | RegExpSubclass} expression -@param {string} [flags] -@param {{useEmulationGroups: boolean;}} [options] +Works the same as JavaScript's native `RegExp` constructor in all contexts, but automatically +adjusts matches and subpattern indices (with flag `d`) to account for injected emulation groups. */ class RegExpSubclass extends RegExp { - // Avoid #private to allow for subclassing + /** + Avoid `#private` to allow for subclassing. + @private + @type {Array | undefined} + */ _captureMap; + /** + @param {string | RegExpSubclass} expression + @param {string} [flags] + @param {{useEmulationGroups: boolean;}} [options] + */ constructor(expression, flags, options) { + if (expression instanceof RegExp && options) { + throw new Error('Cannot provide options when copying regexp'); + } let captureMap; if (options?.useEmulationGroups) { ({expression, captureMap} = unmarkEmulationGroups(expression)); diff --git a/src/subroutines.js b/src/subroutines.js index db8e55a..c219907 100644 --- a/src/subroutines.js +++ b/src/subroutines.js @@ -7,7 +7,7 @@ import {Context, execUnescaped, forEachUnescaped, getGroupContents, hasUnescaped @param {import('./regex.js').PluginData} [data] @returns {string} */ -export function subroutines(expression, data) { +function subroutines(expression, data) { // NOTE: subroutines and definition groups fully support numbered backreferences and unnamed // captures (from interpolated regexes or from turning implicit flag n off), and all of the // complex forward and backward backreference adjustments that can result @@ -334,3 +334,7 @@ function lastOf(arr) { // return arr[arr.length - 1]; } + +export { + subroutines, +}; diff --git a/src/utils.js b/src/utils.js index a38befc..171e7ea 100644 --- a/src/utils.js +++ b/src/utils.js @@ -1,7 +1,7 @@ import {Pattern, pattern} from './pattern.js'; import {Context, forEachUnescaped, replaceUnescaped} from 'regex-utilities'; -export const RegexContext = { +const RegexContext = { DEFAULT: 'DEFAULT', CHAR_CLASS: 'CHAR_CLASS', ENCLOSED_P: 'ENCLOSED_P', @@ -11,7 +11,7 @@ export const RegexContext = { INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN', }; -export const CharClassContext = { +const CharClassContext = { DEFAULT: 'DEFAULT', ENCLOSED_P: 'ENCLOSED_P', ENCLOSED_Q: 'ENCLOSED_Q', @@ -20,18 +20,18 @@ export const CharClassContext = { RANGE: 'RANGE', }; -export const enclosedTokenRegexContexts = new Set([ +const enclosedTokenRegexContexts = new Set([ RegexContext.ENCLOSED_P, RegexContext.ENCLOSED_U, ]); -export const enclosedTokenCharClassContexts = new Set([ +const enclosedTokenCharClassContexts = new Set([ CharClassContext.ENCLOSED_P, CharClassContext.ENCLOSED_Q, CharClassContext.ENCLOSED_U, ]); -export const envSupportsFlagGroups = (() => { +const envSupportsFlagGroups = (() => { try { new RegExp('(?i:)'); } catch { @@ -40,7 +40,7 @@ export const envSupportsFlagGroups = (() => { return true; })(); -export const envSupportsFlagV = (() => { +const envSupportsFlagV = (() => { try { new RegExp('', 'v'); } catch { @@ -49,17 +49,17 @@ export const envSupportsFlagV = (() => { return true; })(); -export const doublePunctuatorChars = '&!#$%*+,.:;<=>?@^`~'; -export const namedCapturingDelim = String.raw`\(\?<(?![=!])(?[^>]+)>`; -export const capturingDelim = String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${namedCapturingDelim}`; -export const noncapturingDelim = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`; +const doublePunctuatorChars = '&!#$%*+,.:;<=>?@^`~'; +const namedCapturingDelim = String.raw`\(\?<(?![=!])(?[^>]+)>`; +const capturingDelim = String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${namedCapturingDelim}`; +const noncapturingDelim = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`; /** @param {string} expression @param {number} precedingCaptures @returns {string} */ -export function adjustNumberedBackrefs(expression, precedingCaptures) { +function adjustNumberedBackrefs(expression, precedingCaptures) { return replaceUnescaped( expression, String.raw`\\(?[1-9]\d*)`, @@ -93,7 +93,7 @@ const charClassUnionToken = new RegExp(String.raw` `.replace(/\s+/g, ''), 'gsu'); // Assumes flag v and doesn't worry about syntax errors that are caught by it -export function containsCharClassUnion(charClassPattern) { +function containsCharClassUnion(charClassPattern) { // Return `true` if it contains: // - `\p` (lowercase only) and the name is a property of strings (case sensitive). // - `\q`. @@ -130,7 +130,7 @@ export function containsCharClassUnion(charClassPattern) { @param {string} expression @returns {number} */ -export function countCaptures(expression) { +function countCaptures(expression) { let num = 0; forEachUnescaped(expression, capturingDelim, () => num++, Context.DEFAULT); return num; @@ -142,7 +142,7 @@ Escape special characters for the given context, assuming flag v. @param {'DEFAULT' | 'CHAR_CLASS'} context `Context` option from lib `regex-utilities` @returns {string} Escaped string */ -export function escapeV(str, context) { +function escapeV(str, context) { if (context === Context.CHAR_CLASS) { // Escape all double punctuators (including ^, which is special on its own in the first // position) in case they're bordered by the same character in or outside of the escaped string @@ -152,7 +152,7 @@ export function escapeV(str, context) { } // Look for characters that would change the meaning of subsequent tokens outside an interpolated value -export function getBreakoutChar(expression, regexContext, charClassContext) { +function getBreakoutChar(expression, regexContext, charClassContext) { const escapesRemoved = expression.replace(/\\./gsu, ''); // Trailing unescaped `\`; checking `.includes('\\')` would also work if (escapesRemoved.endsWith('\\')) { @@ -212,7 +212,7 @@ seen. Assumes flag v and doesn't worry about syntax errors that are caught by it @param {Partial} [runningContext] @returns {RunningContext} */ -export function getEndContextForIncompleteExpression(incompleteExpression, { +function getEndContextForIncompleteExpression(incompleteExpression, { regexContext = RegexContext.DEFAULT, charClassContext = CharClassContext.DEFAULT, charClassDepth = 0, @@ -315,7 +315,7 @@ processes substitutions that are instanceof `Pattern`. @param {Required} options @returns {{template: RawTemplate; substitutions: ReadonlyArray;}} */ -export function preprocess(template, substitutions, preprocessor, options) { +function preprocess(template, substitutions, preprocessor, options) { let /** @type {RawTemplate} */ newTemplate = {raw: []}; let newSubstitutions = []; let runningContext; @@ -342,7 +342,7 @@ export function preprocess(template, substitutions, preprocessor, options) { // Sandbox `^` if relevant, done so it can't change the meaning of the surrounding character class // if we happen to be at the first position. See `sandboxLoneDoublePunctuatorChar` for more details -export function sandboxLoneCharClassCaret(str) { +function sandboxLoneCharClassCaret(str) { return str.replace(/^\^/, '\\^^'); } @@ -355,7 +355,7 @@ export function sandboxLoneCharClassCaret(str) { // - Can't add a second unescaped symbol if a lone symbol is the entire string because it might be // followed by the same unescaped symbol outside an interpolation, and since it won't be wrapped, // the second symbol wouldn't be sandboxed from the one following it. -export function sandboxLoneDoublePunctuatorChar(str) { +function sandboxLoneDoublePunctuatorChar(str) { return str.replace(new RegExp(`^([${doublePunctuatorChars}])(?!\\1)`), (m, _, pos) => { return `\\${m}${pos + 1 === str.length ? '' : m}`; }); @@ -367,7 +367,7 @@ Converts `\0` tokens to `\x00` in the given context. @param {'DEFAULT' | 'CHAR_CLASS'} [context] `Context` option from lib `regex-utilities` @returns {string} */ -export function sandboxUnsafeNulls(str, context) { +function sandboxUnsafeNulls(str, context) { // regex`[\0${0}]` and regex`[${pattern`\0`}0]` can't be guarded against via nested `[…]` // sandboxing in character classes if the interpolated value doesn't contain union (since it // might be placed on a range boundary). So escape `\0` in character classes as `\x00` @@ -381,6 +381,30 @@ export function sandboxUnsafeNulls(str, context) { @param {string} newValue @returns {string} */ -export function spliceStr(str, pos, oldValue, newValue) { +function spliceStr(str, pos, oldValue, newValue) { return str.slice(0, pos) + newValue + str.slice(pos + oldValue.length); } + +export { + adjustNumberedBackrefs, + capturingDelim, + CharClassContext, + containsCharClassUnion, + countCaptures, + doublePunctuatorChars, + enclosedTokenCharClassContexts, + enclosedTokenRegexContexts, + envSupportsFlagGroups, + envSupportsFlagV, + escapeV, + getBreakoutChar, + getEndContextForIncompleteExpression, + namedCapturingDelim, + noncapturingDelim, + preprocess, + RegexContext, + sandboxLoneCharClassCaret, + sandboxLoneDoublePunctuatorChar, + sandboxUnsafeNulls, + spliceStr, +};