Skip to content

Commit

Permalink
Preserve only first instance of duplicate name across mutually exclus…
Browse files Browse the repository at this point in the history
…ive alternation paths with target ES2025 (fixes #14)
  • Loading branch information
slevithan committed Dec 31, 2024
1 parent 4589e46 commit aa5e33b
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 122 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,8 @@ JavaScript version used for generated regexes. Using `auto` detects the best val
- `ES2024`: Uses JS flag `v`.
- No emulation restrictions.
- Generated regexes require Node.js 20 or any 2023-era browser ([compat table](https://caniuse.com/mdn-javascript_builtins_regexp_unicodesets)).
- `ES2025`: Uses JS flag `v` and allows use of flag groups and duplicate group names.
- Benefits: Faster transpilation, simpler generated source, and duplicate group names are preserved across separate alternation paths.
- `ES2025`: Uses JS flag `v` and allows use of flag groups.
- Benefits: Faster transpilation, simpler generated source.
- Generated regexes might use features that require Node.js 23 or a 2024-era browser (except Safari, which lacks support for flag groups).
</details>

Expand Down
12 changes: 1 addition & 11 deletions demo/demo.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ const state = {
comparison: getValue('comparison'),
};

const envSupportsDuplicateNames = (() => {
try {
new RegExp('(?<n>)|(?<n>)');
} catch {
return false;
}
return true;
})();
const envSupportsFlagGroups = (() => {
try {
new RegExp('(?i:)');
Expand All @@ -57,9 +49,7 @@ const envSupportsFlagV = (() => {
return true;
})();
// Logic from `src/options.js`
const autoTarget = (envSupportsDuplicateNames && envSupportsFlagGroups) ?
'ES2025' :
(envSupportsFlagV ? 'ES2024' : 'ES2018');
const autoTarget = envSupportsFlagGroups ? 'ES2025' : (envSupportsFlagV ? 'ES2024' : 'ES2018');

ui.autoTargetOption.innerHTML += ` [${autoTarget}]`;
autoGrow();
Expand Down
4 changes: 1 addition & 3 deletions spec/helpers/features.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import {envSupportsDuplicateNames, envSupportsFlagGroups} from '../../src/utils.js';
import {envSupportsFlagGroups} from '../../src/utils.js';

const maxTestTargetForDuplicateNames = envSupportsDuplicateNames ? null : 'ES2024';
const maxTestTargetForFlagGroups = envSupportsFlagGroups ? null : 'ES2024';
const minTestTargetForFlagGroups = envSupportsFlagGroups ? 'ES2025' : Infinity;
const minTestTargetForFlagV = 'ES2024';

export {
maxTestTargetForDuplicateNames,
maxTestTargetForFlagGroups,
minTestTargetForFlagGroups,
minTestTargetForFlagV,
Expand Down
47 changes: 10 additions & 37 deletions spec/match-backreference.spec.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {toDetails} from '../dist/index.mjs';
import {cp, r} from '../src/utils.js';
import {maxTestTargetForDuplicateNames, maxTestTargetForFlagGroups, minTestTargetForFlagGroups} from './helpers/features.js';
import {maxTestTargetForFlagGroups, minTestTargetForFlagGroups} from './helpers/features.js';
import {matchers} from './helpers/matchers.js';

beforeEach(() => {
Expand Down Expand Up @@ -309,10 +309,7 @@ describe('Backreference', () => {

it('should reference the group to the left when there are duplicate names to the right', () => {
expect('aab').toExactlyMatch(r`(?<n>a)\k<n>(?<n>b)`);
expect('aa').toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect('aa').toExactlyMatch(r`(?<n>a)\k<n>|(?<n>b)`);
});

it('should multiplex for duplicate names to the left', () => {
Expand Down Expand Up @@ -367,27 +364,15 @@ describe('Backreference', () => {
expect('').not.toFindMatch(r`(?<a>\g<b>(?<b>\k<a>))`);
expect('').not.toFindMatch(r`(?<a>\g<b>(?<b>\k<b>))`);
expect('').not.toFindMatch(r`(?<a>(?<a>\k<a>))`);
expect('aa').toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b\k<n>)`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'b', 'ba', 'bb']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b\k<n>)`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect('aa').toExactlyMatch(r`(?<n>a)\k<n>|(?<n>b\k<n>)`);
expect(['a', 'b', 'ba', 'bb']).not.toFindMatch(r`(?<n>a)\k<n>|(?<n>b\k<n>)`);
});

it('should preclude not-yet-closed groups when multiplexing', () => {
expect('aa').toExactlyMatch(r`(?<a>a)(?<a>\k<a>)`);
expect('aba').toExactlyMatch(r`(?<n>a)(?<n>b\k<n>)`);
expect(['aa', 'bcb']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c\k<n>)`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'bc', 'bca', 'bcc']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c\k<n>)`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['aa', 'bcb']).toExactlyMatch(r`(?<n>a)\k<n>|(?<n>b)(?<n>c\k<n>)`);
expect(['a', 'bc', 'bca', 'bcc']).not.toFindMatch(r`(?<n>a)\k<n>|(?<n>b)(?<n>c\k<n>)`);
});

it('should not match references to groups not in the alternation path', () => {
Expand All @@ -397,22 +382,10 @@ describe('Backreference', () => {
it('should preclude groups not in the alternation path when multiplexing', () => {
// This enforces Oniguruma logic where backrefs to nonparticipating groups fail to match
// rather than JS logic where they match the empty string
expect(['aa', 'bb']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)\k<n>`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'b', 'ba']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)\k<n>`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['aa', 'bcb', 'bcc']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c)\k<n>`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'bc', 'bca']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c)\k<n>`,
maxTestTarget: maxTestTargetForDuplicateNames,
});
expect(['aa', 'bb']).toExactlyMatch(r`(?<n>a)\k<n>|(?<n>b)\k<n>`);
expect(['a', 'b', 'ba']).not.toFindMatch(r`(?<n>a)\k<n>|(?<n>b)\k<n>`);
expect(['aa', 'bcb', 'bcc']).toExactlyMatch(r`(?<n>a)\k<n>|(?<n>b)(?<n>c)\k<n>`);
expect(['a', 'bc', 'bca']).not.toFindMatch(r`(?<n>a)\k<n>|(?<n>b)(?<n>c)\k<n>`);
});

it('should throw for forward references to defined groups', () => {
Expand Down
55 changes: 23 additions & 32 deletions spec/match-capturing-group.spec.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {toDetails} from '../dist/index.mjs';
import {toDetails, toRegExp} from '../dist/index.mjs';
import {matchers} from './helpers/matchers.js';

beforeEach(() => {
Expand All @@ -14,36 +14,7 @@ describe('CapturingGroup', () => {
// });

describe('named', () => {
it('should keep only the first of duplicate names per alternation path with target ES2025', () => {
const opts = {
target: 'ES2025',
verbose: true,
};
const tests = [
['(?<a>)(?<a>)', '(?<a>)()'],
['(?<a>)|(?<a>)', '(?<a>)|(?<a>)'],
['((?<a>)|(?<a>))', '(?:(?<a>)|(?<a>))'],
['(?<a>)(((?<a>)))', '(?<a>)(?:(?:()))'],
['(((?<a>)))(?<a>)', '(?:(?:(?<a>)))()'],
['(?<a>)|(((?<a>)))', '(?<a>)|(?:(?:(?<a>)))'],
['(((?<a>)))|(?<a>)', '(?:(?:(?<a>)))|(?<a>)'],
['(?<a>(?<a>))', '(?<a>())'],
['(?<a>(?<a>))|(?<a>)', '(?<a>())|(?<a>)'],
['(?<a>)(?<a>)(|(?<a>))(?<a>)', '(?<a>)()(?:|())()'],
['((?<a>)(?<a>))(((?<a>)|(?<a>)))((?<a>))', '(?:(?<a>)())(?:(?:()|()))(?:())'],
['(?<a>)(?<a>)((?<a>)|(?<a>))', '(?<a>)()(?:()|())'],
['((?<a>)|(?<a>))(?<a>)(?<a>)', '(?:(?<a>)|(?<a>))()()'],
];
for (const [pattern, output] of tests) {
expect(toDetails(pattern, opts).pattern).toBe(output);
}
});

it('should keep only the first of duplicate names with target < ES2025', () => {
const opts = {
target: 'ES2024',
verbose: true,
};
it('should preserve the name only for the first instance of duplicate names', () => {
const tests = [
['(?<a>)(?<a>)', '(?<a>)()'],
['(?<a>)|(?<a>)', '(?<a>)|()'],
Expand All @@ -60,10 +31,30 @@ describe('CapturingGroup', () => {
['((?<a>)|(?<a>))(?<a>)(?<a>)', '(?:(?<a>)|())()()'],
];
for (const [pattern, output] of tests) {
expect(toDetails(pattern, opts).pattern).toBe(output);
expect(toDetails(pattern, {verbose: true}).pattern).toBe(output);
}
});

it('should store subpattern values from the first instance of duplicate names', () => {
const match = toRegExp('(?<n>.)(?<n>.)').exec('ab');
expect(match.groups.n).toBe('a');
expect([...match]).toEqual(['ab', 'a', 'b']);
});

// Matches Oniguruma behavior; ES2025 (which allows duplicate names across mutually exclusive
// alternation) differs since it would store the matched value from the participating group
it('should store subpattern values from the first instance of duplicate names in separate alternation paths', () => {
const re = toRegExp('(?<n>a)(?<n>b)|(?<n>c)(?<n>d)');

const match1 = re.exec('ab');
expect(match1.groups.n).toBe('a');
expect([...match1]).toEqual(['ab', 'a', 'b', undefined, undefined]);

const match2 = re.exec('cd');
expect(match2.groups.n).toBe(undefined);
expect([...match2]).toEqual(['cd', undefined, undefined, 'c', 'd']);
});

// TODO: Add remaining
});
});
8 changes: 4 additions & 4 deletions spec/match-subroutine.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,13 @@ describe('Subroutine', () => {
});

it('should transfer captured values on match results', () => {
expect(toRegExp(r`(?<n>[ab])\g<n>`).exec('ab').groups.n).toBe('b');
expect(toRegExp(r`\g<n>(?<n>[ab])`).exec('ab').groups.n).toBe('b');
expect(toRegExp(r`(?<n>.)\g<n>`).exec('ab').groups.n).toBe('b');
expect(toRegExp(r`\g<n>(?<n>.)`).exec('ab').groups.n).toBe('b');
});

it('should transfer captured values on match results for child captures', () => {
expect(toRegExp(r`(?<n1>(?<n2>[ab]))\g<n1>`).exec('ab').groups.n2).toBe('b');
expect(toRegExp(r`\g<n1>(?<n1>(?<n2>[ab]))`).exec('ab').groups.n2).toBe('b');
expect(toRegExp(r`(?<n1>(?<n2>.))\g<n1>`).exec('ab').groups.n2).toBe('b');
expect(toRegExp(r`\g<n1>(?<n1>(?<n2>.))`).exec('ab').groups.n2).toBe('b');
});

it('should transfer subpattern match indices', () => {
Expand Down
9 changes: 3 additions & 6 deletions src/generate.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ function generate(ast, options) {
lastNode,
maxRecursionDepth: rDepth,
useAppliedIgnoreCase: !!(!minTargetEs2025 && hasCaseInsensitiveNode && hasCaseSensitiveNode),
useDuplicateNames: minTargetEs2025,
useFlagMods: minTargetEs2025,
useFlagV: minTargetEs2024,
verbose: opts.verbose,
Expand Down Expand Up @@ -248,11 +247,9 @@ function genBackreference({ref}, state) {
function genCapturingGroup({name, number, alternatives, _originNumber}, state, gen) {
if (name) {
if (state.groupNames.has(name)) {
if (!state.useDuplicateNames) {
// Keep the name only in the first alternation path that used it; the transformer already
// stripped all but the first duplicate name per alternation path
name = null;
}
// Keep the name only in the first alternation path that used it; the transformer already
// stripped all but the first duplicate name per alternation path
name = null;
} else {
state.groupNames.add(name);
}
Expand Down
6 changes: 2 additions & 4 deletions src/options.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {envSupportsDuplicateNames, envSupportsFlagGroups, envSupportsFlagV} from './utils.js';
import {envSupportsFlagGroups, envSupportsFlagV} from './utils.js';

const Accuracy = /** @type {const} */ ({
default: 'default',
Expand Down Expand Up @@ -66,9 +66,7 @@ function getOptions(options) {
},
};
if (opts.target === 'auto') {
opts.target = (envSupportsDuplicateNames && envSupportsFlagGroups) ?
'ES2025' :
(envSupportsFlagV ? 'ES2024' : 'ES2018');
opts.target = envSupportsFlagGroups ? 'ES2025' : (envSupportsFlagV ? 'ES2024' : 'ES2018');
}
return opts;
}
Expand Down
23 changes: 10 additions & 13 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -435,11 +435,12 @@ const SecondPassVisitor = {
multiplexCapturesToLeftByRef.get(node.name).push({node, origin});
}

// ## Track data for duplicate names within an alternation path
// Pre-ES2025 doesn't allow duplicate names, but ES2025+ allows duplicate names that are
// unique per mutually exclusive alternation path. So if using a duplicate name for this
// path, remove the name from all but the latest instance (also applies to groups added via
// subroutine expansion)
// ## Track data for duplicate names
// Pre-ES2025 doesn't allow duplicate names, but ES2025 allows duplicate names that are
// unique per mutually exclusive alternation path. However, Oniguruma's handling for named
// subpatterns on match results means we can't use this ES2025 feature even when in an ES2025
// env. So, if using a duplicate name, remove the name from all but the first instance that
// wasn't created by subroutine expansion
if (node.name) {
const groupsWithSameName = getOrCreate(groupsByName, node.name, new Map());
let hasDuplicateNameToRemove = false;
Expand All @@ -448,9 +449,7 @@ const SecondPassVisitor = {
hasDuplicateNameToRemove = true;
} else {
for (const groupInfo of groupsWithSameName.values()) {
if (!groupInfo.hasDuplicateNameToRemove && canParticipateWithNode(groupInfo.node, node, {
ancestorsParticipate: true,
})) {
if (!groupInfo.hasDuplicateNameToRemove) {
// Will change to an unnamed capture in a later pass
hasDuplicateNameToRemove = true;
break;
Expand Down Expand Up @@ -539,9 +538,7 @@ const ThirdPassVisitor = {
return;
}
const reffedNodes = state.reffedNodesByReferencer.get(node);
const participants = reffedNodes.filter(reffed => canParticipateWithNode(reffed, node, {
ancestorsParticipate: false,
}));
const participants = reffedNodes.filter(reffed => canParticipateWithNode(reffed, node));
// For the backref's `ref`, use `number` rather than `name` because group names might have been
// removed if they're duplicates within their alternation path, or they might be removed later
// by the generator (depending on target) if they're duplicates within the overall pattern.
Expand Down Expand Up @@ -621,7 +618,7 @@ function areFlagsEqual(a, b) {
return a.dotAll === b.dotAll && a.ignoreCase === b.ignoreCase;
}

function canParticipateWithNode(capture, node, {ancestorsParticipate}) {
function canParticipateWithNode(capture, node) {
// Walks to the left (prev siblings), down (sibling descendants), up (parent), then back down
// (parent's prev sibling descendants) the tree in a loop
let rightmostPoint = node;
Expand All @@ -636,7 +633,7 @@ function canParticipateWithNode(capture, node, {ancestorsParticipate}) {
}
if (rightmostPoint === capture) {
// Capture is ancestor of node
return ancestorsParticipate;
return false;
}
const kidsOfParent = getKids(rightmostPoint.parent);
for (const kid of kidsOfParent) {
Expand Down
10 changes: 0 additions & 10 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,6 @@ import {EsVersion, Target} from './options.js';
const cp = String.fromCodePoint;
const r = String.raw;

const envSupportsDuplicateNames = (() => {
try {
new RegExp('(?<n>)|(?<n>)');
} catch {
return false;
}
return true;
})();

const envSupportsFlagGroups = (() => {
try {
new RegExp('(?i:)');
Expand Down Expand Up @@ -62,7 +53,6 @@ function throwIfNot(value, msg) {

export {
cp,
envSupportsDuplicateNames,
envSupportsFlagGroups,
envSupportsFlagV,
getNewCurrentFlags,
Expand Down

0 comments on commit aa5e33b

Please sign in to comment.