Skip to content

Commit

Permalink
Merge branch 'main' into ts-resolution
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan authored Nov 15, 2024
2 parents 627e72f + 1a04c6a commit 8e2a3f5
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 89 deletions.
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
"default": "./dist/commonjs/regex.js"
}
},
"./atomic": {
"types": "./dist/module/atomic.d.ts",
"import": "./src/atomic.js"
"./internals": {
"types": "./dist/module/internals.d.ts",
"import": "./src/internals.js"
}
},
"browser": "./dist/regex.min.js",
Expand All @@ -28,7 +28,7 @@
"bundle:global": "esbuild src/regex.js --global-name=Regex --bundle --minify --sourcemap --outfile=dist/regex.min.js",
"bundle:esm": "esbuild src/regex.js --format=esm --bundle --sourcemap --outfile=dist/module/regex.js",
"bundle:cjs": "esbuild src/regex.js --format=cjs --bundle --sourcemap --outfile=dist/commonjs/regex.js",
"types": "tsc src/regex.js --rootDir src --declaration --allowJs --emitDeclarationOnly --outDir types",
"types": "tsc src/regex.js src/internals.js --rootDir src --declaration --allowJs --emitDeclarationOnly --outDir types",
"prebuild": "rm -rf dist/* types/*",
"build": "npm run bundle:global && npm run bundle:esm && npm run bundle:cjs && npm run types",
"postbuild": "node postbuild.js",
Expand Down
10 changes: 10 additions & 0 deletions spec/regex-tag.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ describe('regex', () => {
expect([...'ab'.matchAll(regex({flags: 'g', subclass: false})`(?>(?<a>.))(?<b>.)`)][0][2]).not.toBe('b');
expect('ab'.split(regex({subclass: false})`(?>(?<a>.))(?<b>.)`)).not.toEqual(['', 'a', 'b', '']);
});

it('should adjust indices with flag d for emulation groups', () => {
if (!flagDSupported) {
pending();
}
expect(regex({flags: 'd', subclass: true})`(?>.)`.exec('a').indices).toHaveSize(1);

// Documenting behavior when the option is not used
expect(regex({flags: 'd', subclass: false})`(?>.)`.exec('a').indices).toHaveSize(2);
});
});
});
});
3 changes: 2 additions & 1 deletion src/atomic.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {emulationGroupMarker, noncapturingDelim, spliceStr} from './utils.js';
import {emulationGroupMarker} from './subclass.js';
import {noncapturingDelim, spliceStr} from './utils.js';
import {Context, replaceUnescaped} from 'regex-utilities';

const atomicPluginToken = new RegExp(String.raw`(?<noncapturingStart>${noncapturingDelim})|(?<capturingStart>\((?:\?<[^>]+>)?)|\\?.`, 'gsu');
Expand Down
3 changes: 2 additions & 1 deletion src/flag-x.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {CharClassContext, doublePunctuatorChars, emulationGroupMarker, getEndContextForIncompleteExpression, noncapturingDelim, RegexContext, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
import {emulationGroupMarker} from './subclass.js';
import {CharClassContext, doublePunctuatorChars, getEndContextForIncompleteExpression, noncapturingDelim, RegexContext, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
import {Context, replaceUnescaped} from 'regex-utilities';

const ws = /^\s$/;
Expand Down
2 changes: 2 additions & 0 deletions src/internals.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export {atomic, possessive} from './atomic.js';
export {RegExpSubclass} from './subclass.js';
86 changes: 8 additions & 78 deletions src/regex.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@ import {backcompatPlugin} from './backcompat.js';
import {flagNPreprocessor} from './flag-n.js';
import {clean, flagXPreprocessor} from './flag-x.js';
import {Pattern, pattern} from './pattern.js';
import {RegExpSubclass} from './subclass.js';
import {subroutines} from './subroutines.js';
import {adjustNumberedBackrefs, capturingDelim, CharClassContext, containsCharClassUnion, countCaptures, emulationGroupMarker, enclosedTokenCharClassContexts, enclosedTokenRegexContexts, escapeV, flagVSupported, getBreakoutChar, getEndContextForIncompleteExpression, patternModsSupported, preprocess, RegexContext, sandboxLoneCharClassCaret, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
import {adjustNumberedBackrefs, CharClassContext, containsCharClassUnion, countCaptures, enclosedTokenCharClassContexts, enclosedTokenRegexContexts, escapeV, flagVSupported, getBreakoutChar, getEndContextForIncompleteExpression, patternModsSupported, preprocess, RegexContext, sandboxLoneCharClassCaret, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
import {Context, hasUnescaped, replaceUnescaped} from 'regex-utilities';

/**
@typedef {string | RegExp | Pattern | number} InterpolatedValue
@typedef {{flags: string; useEmulationGroups: boolean;}} PluginData
@typedef {{
flags?: string;
useEmulationGroups?: boolean;
}} PluginData
@typedef {TemplateStringsArray | {raw: Array<string>}} RawTemplate
@typedef {{
flags?: string;
Expand All @@ -35,7 +39,7 @@ import {Context, hasUnescaped, replaceUnescaped} from 'regex-utilities';
(template: RawTemplate, ...substitutions: ReadonlyArray<InterpolatedValue>): T;
(flags?: string): RegexTag<T>;
(options: RegexTagOptions & {subclass?: false}): RegexTag<T>;
(options: RegexTagOptions & {subclass: true}): RegexTag<WrappedRegExp>;
(options: RegexTagOptions & {subclass: true}): RegexTag<RegExpSubclass>;
}}
*/
/**
Expand Down Expand Up @@ -102,13 +106,9 @@ const regexFromTemplate = (options, template, ...substitutions) => {
});

expression = handlePlugins(expression, opts);
let captureMap;
if (opts.subclass) {
({expression, captureMap} = unmarkEmulationGroups(expression));
}
try {
return opts.subclass ?
new WrappedRegExp(expression, opts.flags, {captureMap}) :
new RegExpSubclass(expression, opts.flags, {useEmulationGroups: true}) :
new RegExp(expression, opts.flags);
} catch (err) {
// Improve DX by always including the generated source in the error message. Some browsers
Expand Down Expand Up @@ -214,47 +214,6 @@ function handlePlugins(expression, options) {
return expression;
}

class WrappedRegExp extends RegExp {
#captureMap;
/**
@param {string | WrappedRegExp} expression
@param {string} [flags]
@param {{captureMap: Array<boolean>;}} [data]
*/
constructor(expression, flags, data) {
super(expression, flags);
if (data) {
this.#captureMap = data.captureMap;
// The third argument `data` isn't provided when regexes are copied as part of the internal
// handling of string methods `matchAll` and `split`
} else if (expression instanceof WrappedRegExp) {
// Can read private properties of the existing object since it was created by this class
this.#captureMap = expression.#captureMap;
}
}
/**
Called internally by all String/RegExp methods that use regexes.
@override
@param {string} str
@returns {RegExpExecArray | null}
*/
exec(str) {
const match = RegExp.prototype.exec.call(this, str);
if (!match || !this.#captureMap) {
return match;
}
const copy = [...match];
// Empty all but the first value of the array while preserving its other properties
match.length = 1;
for (let i = 1; i < copy.length; i++) {
if (this.#captureMap[i]) {
match.push(copy[i]);
}
}
return match;
}
}

/**
@param {InterpolatedValue} value
@param {string} flags
Expand Down Expand Up @@ -386,35 +345,6 @@ function transformForLocalFlags(re, outerFlags) {
return {value};
}

/**
Build the capturing group map (with emulation groups marked as `false` to indicate their submatches
shouldn't appear in results), and remove the markers for anonymous captures which were added to
emulate extended syntax.
@param {string} expression
@returns {{expression: string; captureMap: Array<boolean>;}}
*/
function unmarkEmulationGroups(expression) {
const marker = emulationGroupMarker.replace(/\$/g, '\\$');
const captureMap = [true];
expression = replaceUnescaped(
expression,
`(?:${capturingDelim})(?<mark>${marker})?`,
({0: m, groups: {mark}}) => {
if (mark) {
captureMap.push(false);
return m.slice(0, -emulationGroupMarker.length);
}
captureMap.push(true);
return m;
},
Context.DEFAULT
);
return {
captureMap,
expression,
};
}

export {
pattern,
regex,
Expand Down
94 changes: 94 additions & 0 deletions src/subclass.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import {Context, replaceUnescaped} from 'regex-utilities';

// This marker was chosen because it's impossible to match (so its extemely unlikely to be used in
// a user-provided regex); it's not at risk of being optimized away, transformed, or flagged as an
// error by a plugin; and it ends with an unquantifiable token
const emulationGroupMarker = '$E$';

/**
@class
@param {string | RegExpSubclass} expression
@param {string} [flags]
@param {{useEmulationGroups: boolean;}} [options]
*/
class RegExpSubclass extends RegExp {
// Avoid #private to allow for subclassing
_captureMap;
constructor(expression, flags, options) {
let captureMap;
if (options?.useEmulationGroups) {
({expression, captureMap} = unmarkEmulationGroups(expression));
}
super(expression, flags);
if (captureMap) {
this._captureMap = captureMap;
// The third argument `options` isn't provided when regexes are copied as part of the internal
// handling of string methods `matchAll` and `split`
} else if (expression instanceof RegExpSubclass) {
this._captureMap = expression._captureMap;
}
}
/**
Called internally by all String/RegExp methods that use regexes.
@override
@param {string} str
@returns {RegExpExecArray | null}
*/
exec(str) {
const match = RegExp.prototype.exec.call(this, str);
if (!match || !this._captureMap) {
return match;
}
const matchCopy = [...match];
// Empty all but the first value of the array while preserving its other properties
match.length = 1;
let indicesCopy;
if (this.hasIndices) {
indicesCopy = [...match.indices];
match.indices.length = 1;
}
for (let i = 1; i < matchCopy.length; i++) {
if (this._captureMap[i]) {
match.push(matchCopy[i]);
if (this.hasIndices) {
match.indices.push(indicesCopy[i]);
}
}
}
return match;
}
}

/**
Build the capturing group map (with emulation groups marked as `false` to indicate their submatches
shouldn't appear in results), and remove the markers for anonymous captures which were added to
emulate extended syntax.
@param {string} expression
@returns {{expression: string; captureMap: Array<boolean>;}}
*/
function unmarkEmulationGroups(expression) {
const marker = emulationGroupMarker.replace(/\$/g, '\\$');
const captureMap = [true];
expression = replaceUnescaped(
expression,
String.raw`\((?:(?!\?)|\?<(?![=!])[^>]+>)(?<mark>${marker})?`,
({0: m, groups: {mark}}) => {
if (mark) {
captureMap.push(false);
return m.slice(0, -emulationGroupMarker.length);
}
captureMap.push(true);
return m;
},
Context.DEFAULT
);
return {
captureMap,
expression,
};
}

export {
emulationGroupMarker,
RegExpSubclass,
};
3 changes: 2 additions & 1 deletion src/subroutines.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {capturingDelim, countCaptures, emulationGroupMarker, namedCapturingDelim, spliceStr} from './utils.js';
import {emulationGroupMarker} from './subclass.js';
import {capturingDelim, countCaptures, namedCapturingDelim, spliceStr} from './utils.js';
import {Context, execUnescaped, forEachUnescaped, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities';

/**
Expand Down
4 changes: 0 additions & 4 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,6 @@ export const flagVSupported = (() => {
return true;
})();

// This marker was chosen because it's impossible to match (so its extemely unlikely to be used in
// a user-provided regex); it's not at risk of being optimized away, transformed, or flagged as an
// error by a plugin; and it ends with an unquantifiable token
export const emulationGroupMarker = '$E$';
export const doublePunctuatorChars = '&!#$%*+,.:;<=>?@^`~';
export const namedCapturingDelim = String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`;
export const capturingDelim = String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${namedCapturingDelim}`;
Expand Down

0 comments on commit 8e2a3f5

Please sign in to comment.