From aa5e33b4f8dc67d39dc12db01b74d96c64cb4357 Mon Sep 17 00:00:00 2001 From: Steven Levithan Date: Tue, 31 Dec 2024 17:18:16 +0100 Subject: [PATCH] Preserve only first instance of duplicate name across mutually exclusive alternation paths with target ES2025 (fixes #14) --- README.md | 4 +-- demo/demo.js | 12 +------ spec/helpers/features.js | 4 +-- spec/match-backreference.spec.js | 47 ++++++------------------- spec/match-capturing-group.spec.js | 55 +++++++++++++----------------- spec/match-subroutine.spec.js | 8 ++--- src/generate.js | 9 ++--- src/options.js | 6 ++-- src/transform.js | 23 ++++++------- src/utils.js | 10 ------ 10 files changed, 56 insertions(+), 122 deletions(-) diff --git a/README.md b/README.md index fa8bd27..9e0a346 100644 --- a/README.md +++ b/README.md @@ -239,8 +239,8 @@ JavaScript version used for generated regexes. Using `auto` detects the best val - `ES2024`: Uses JS flag `v`. - No emulation restrictions. - Generated regexes require Node.js 20 or any 2023-era browser ([compat table](https://caniuse.com/mdn-javascript_builtins_regexp_unicodesets)). -- `ES2025`: Uses JS flag `v` and allows use of flag groups and duplicate group names. - - Benefits: Faster transpilation, simpler generated source, and duplicate group names are preserved across separate alternation paths. +- `ES2025`: Uses JS flag `v` and allows use of flag groups. + - Benefits: Faster transpilation, simpler generated source. - Generated regexes might use features that require Node.js 23 or a 2024-era browser (except Safari, which lacks support for flag groups). diff --git a/demo/demo.js b/demo/demo.js index 3f79c67..7e11b3d 100644 --- a/demo/demo.js +++ b/demo/demo.js @@ -32,14 +32,6 @@ const state = { comparison: getValue('comparison'), }; -const envSupportsDuplicateNames = (() => { - try { - new RegExp('(?)|(?)'); - } catch { - return false; - } - return true; -})(); const envSupportsFlagGroups = (() => { try { new RegExp('(?i:)'); @@ -57,9 +49,7 @@ const envSupportsFlagV = (() => { return true; })(); // Logic from `src/options.js` -const autoTarget = (envSupportsDuplicateNames && envSupportsFlagGroups) ? - 'ES2025' : - (envSupportsFlagV ? 'ES2024' : 'ES2018'); +const autoTarget = envSupportsFlagGroups ? 'ES2025' : (envSupportsFlagV ? 'ES2024' : 'ES2018'); ui.autoTargetOption.innerHTML += ` [${autoTarget}]`; autoGrow(); diff --git a/spec/helpers/features.js b/spec/helpers/features.js index f5b4a08..8c18e8e 100644 --- a/spec/helpers/features.js +++ b/spec/helpers/features.js @@ -1,12 +1,10 @@ -import {envSupportsDuplicateNames, envSupportsFlagGroups} from '../../src/utils.js'; +import {envSupportsFlagGroups} from '../../src/utils.js'; -const maxTestTargetForDuplicateNames = envSupportsDuplicateNames ? null : 'ES2024'; const maxTestTargetForFlagGroups = envSupportsFlagGroups ? null : 'ES2024'; const minTestTargetForFlagGroups = envSupportsFlagGroups ? 'ES2025' : Infinity; const minTestTargetForFlagV = 'ES2024'; export { - maxTestTargetForDuplicateNames, maxTestTargetForFlagGroups, minTestTargetForFlagGroups, minTestTargetForFlagV, diff --git a/spec/match-backreference.spec.js b/spec/match-backreference.spec.js index 54bb7cc..df94a01 100644 --- a/spec/match-backreference.spec.js +++ b/spec/match-backreference.spec.js @@ -1,6 +1,6 @@ import {toDetails} from '../dist/index.mjs'; import {cp, r} from '../src/utils.js'; -import {maxTestTargetForDuplicateNames, maxTestTargetForFlagGroups, minTestTargetForFlagGroups} from './helpers/features.js'; +import {maxTestTargetForFlagGroups, minTestTargetForFlagGroups} from './helpers/features.js'; import {matchers} from './helpers/matchers.js'; beforeEach(() => { @@ -309,10 +309,7 @@ describe('Backreference', () => { it('should reference the group to the left when there are duplicate names to the right', () => { expect('aab').toExactlyMatch(r`(?a)\k(?b)`); - expect('aa').toExactlyMatch({ - pattern: r`(?a)\k|(?b)`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); + expect('aa').toExactlyMatch(r`(?a)\k|(?b)`); }); it('should multiplex for duplicate names to the left', () => { @@ -367,27 +364,15 @@ describe('Backreference', () => { expect('').not.toFindMatch(r`(?\g(?\k))`); expect('').not.toFindMatch(r`(?\g(?\k))`); expect('').not.toFindMatch(r`(?(?\k))`); - expect('aa').toExactlyMatch({ - pattern: r`(?a)\k|(?b\k)`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); - expect(['a', 'b', 'ba', 'bb']).not.toFindMatch({ - pattern: r`(?a)\k|(?b\k)`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); + expect('aa').toExactlyMatch(r`(?a)\k|(?b\k)`); + expect(['a', 'b', 'ba', 'bb']).not.toFindMatch(r`(?a)\k|(?b\k)`); }); it('should preclude not-yet-closed groups when multiplexing', () => { expect('aa').toExactlyMatch(r`(?a)(?\k)`); expect('aba').toExactlyMatch(r`(?a)(?b\k)`); - expect(['aa', 'bcb']).toExactlyMatch({ - pattern: r`(?a)\k|(?b)(?c\k)`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); - expect(['a', 'bc', 'bca', 'bcc']).not.toFindMatch({ - pattern: r`(?a)\k|(?b)(?c\k)`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); + expect(['aa', 'bcb']).toExactlyMatch(r`(?a)\k|(?b)(?c\k)`); + expect(['a', 'bc', 'bca', 'bcc']).not.toFindMatch(r`(?a)\k|(?b)(?c\k)`); }); it('should not match references to groups not in the alternation path', () => { @@ -397,22 +382,10 @@ describe('Backreference', () => { it('should preclude groups not in the alternation path when multiplexing', () => { // This enforces Oniguruma logic where backrefs to nonparticipating groups fail to match // rather than JS logic where they match the empty string - expect(['aa', 'bb']).toExactlyMatch({ - pattern: r`(?a)\k|(?b)\k`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); - expect(['a', 'b', 'ba']).not.toFindMatch({ - pattern: r`(?a)\k|(?b)\k`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); - expect(['aa', 'bcb', 'bcc']).toExactlyMatch({ - pattern: r`(?a)\k|(?b)(?c)\k`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); - expect(['a', 'bc', 'bca']).not.toFindMatch({ - pattern: r`(?a)\k|(?b)(?c)\k`, - maxTestTarget: maxTestTargetForDuplicateNames, - }); + expect(['aa', 'bb']).toExactlyMatch(r`(?a)\k|(?b)\k`); + expect(['a', 'b', 'ba']).not.toFindMatch(r`(?a)\k|(?b)\k`); + expect(['aa', 'bcb', 'bcc']).toExactlyMatch(r`(?a)\k|(?b)(?c)\k`); + expect(['a', 'bc', 'bca']).not.toFindMatch(r`(?a)\k|(?b)(?c)\k`); }); it('should throw for forward references to defined groups', () => { diff --git a/spec/match-capturing-group.spec.js b/spec/match-capturing-group.spec.js index 9677a01..0b2a532 100644 --- a/spec/match-capturing-group.spec.js +++ b/spec/match-capturing-group.spec.js @@ -1,4 +1,4 @@ -import {toDetails} from '../dist/index.mjs'; +import {toDetails, toRegExp} from '../dist/index.mjs'; import {matchers} from './helpers/matchers.js'; beforeEach(() => { @@ -14,36 +14,7 @@ describe('CapturingGroup', () => { // }); describe('named', () => { - it('should keep only the first of duplicate names per alternation path with target ES2025', () => { - const opts = { - target: 'ES2025', - verbose: true, - }; - const tests = [ - ['(?)(?)', '(?)()'], - ['(?)|(?)', '(?)|(?)'], - ['((?)|(?))', '(?:(?)|(?))'], - ['(?)(((?)))', '(?)(?:(?:()))'], - ['(((?)))(?)', '(?:(?:(?)))()'], - ['(?)|(((?)))', '(?)|(?:(?:(?)))'], - ['(((?)))|(?)', '(?:(?:(?)))|(?)'], - ['(?(?))', '(?())'], - ['(?(?))|(?)', '(?())|(?)'], - ['(?)(?)(|(?))(?)', '(?)()(?:|())()'], - ['((?)(?))(((?)|(?)))((?))', '(?:(?)())(?:(?:()|()))(?:())'], - ['(?)(?)((?)|(?))', '(?)()(?:()|())'], - ['((?)|(?))(?)(?)', '(?:(?)|(?))()()'], - ]; - for (const [pattern, output] of tests) { - expect(toDetails(pattern, opts).pattern).toBe(output); - } - }); - - it('should keep only the first of duplicate names with target < ES2025', () => { - const opts = { - target: 'ES2024', - verbose: true, - }; + it('should preserve the name only for the first instance of duplicate names', () => { const tests = [ ['(?)(?)', '(?)()'], ['(?)|(?)', '(?)|()'], @@ -60,10 +31,30 @@ describe('CapturingGroup', () => { ['((?)|(?))(?)(?)', '(?:(?)|())()()'], ]; for (const [pattern, output] of tests) { - expect(toDetails(pattern, opts).pattern).toBe(output); + expect(toDetails(pattern, {verbose: true}).pattern).toBe(output); } }); + it('should store subpattern values from the first instance of duplicate names', () => { + const match = toRegExp('(?.)(?.)').exec('ab'); + expect(match.groups.n).toBe('a'); + expect([...match]).toEqual(['ab', 'a', 'b']); + }); + + // Matches Oniguruma behavior; ES2025 (which allows duplicate names across mutually exclusive + // alternation) differs since it would store the matched value from the participating group + it('should store subpattern values from the first instance of duplicate names in separate alternation paths', () => { + const re = toRegExp('(?a)(?b)|(?c)(?d)'); + + const match1 = re.exec('ab'); + expect(match1.groups.n).toBe('a'); + expect([...match1]).toEqual(['ab', 'a', 'b', undefined, undefined]); + + const match2 = re.exec('cd'); + expect(match2.groups.n).toBe(undefined); + expect([...match2]).toEqual(['cd', undefined, undefined, 'c', 'd']); + }); + // TODO: Add remaining }); }); diff --git a/spec/match-subroutine.spec.js b/spec/match-subroutine.spec.js index d5e7de4..20e6630 100644 --- a/spec/match-subroutine.spec.js +++ b/spec/match-subroutine.spec.js @@ -139,13 +139,13 @@ describe('Subroutine', () => { }); it('should transfer captured values on match results', () => { - expect(toRegExp(r`(?[ab])\g`).exec('ab').groups.n).toBe('b'); - expect(toRegExp(r`\g(?[ab])`).exec('ab').groups.n).toBe('b'); + expect(toRegExp(r`(?.)\g`).exec('ab').groups.n).toBe('b'); + expect(toRegExp(r`\g(?.)`).exec('ab').groups.n).toBe('b'); }); it('should transfer captured values on match results for child captures', () => { - expect(toRegExp(r`(?(?[ab]))\g`).exec('ab').groups.n2).toBe('b'); - expect(toRegExp(r`\g(?(?[ab]))`).exec('ab').groups.n2).toBe('b'); + expect(toRegExp(r`(?(?.))\g`).exec('ab').groups.n2).toBe('b'); + expect(toRegExp(r`\g(?(?.))`).exec('ab').groups.n2).toBe('b'); }); it('should transfer subpattern match indices', () => { diff --git a/src/generate.js b/src/generate.js index f574e6d..c43ac69 100644 --- a/src/generate.js +++ b/src/generate.js @@ -71,7 +71,6 @@ function generate(ast, options) { lastNode, maxRecursionDepth: rDepth, useAppliedIgnoreCase: !!(!minTargetEs2025 && hasCaseInsensitiveNode && hasCaseSensitiveNode), - useDuplicateNames: minTargetEs2025, useFlagMods: minTargetEs2025, useFlagV: minTargetEs2024, verbose: opts.verbose, @@ -248,11 +247,9 @@ function genBackreference({ref}, state) { function genCapturingGroup({name, number, alternatives, _originNumber}, state, gen) { if (name) { if (state.groupNames.has(name)) { - if (!state.useDuplicateNames) { - // Keep the name only in the first alternation path that used it; the transformer already - // stripped all but the first duplicate name per alternation path - name = null; - } + // Keep the name only in the first alternation path that used it; the transformer already + // stripped all but the first duplicate name per alternation path + name = null; } else { state.groupNames.add(name); } diff --git a/src/options.js b/src/options.js index 7f84232..77f6de1 100644 --- a/src/options.js +++ b/src/options.js @@ -1,4 +1,4 @@ -import {envSupportsDuplicateNames, envSupportsFlagGroups, envSupportsFlagV} from './utils.js'; +import {envSupportsFlagGroups, envSupportsFlagV} from './utils.js'; const Accuracy = /** @type {const} */ ({ default: 'default', @@ -66,9 +66,7 @@ function getOptions(options) { }, }; if (opts.target === 'auto') { - opts.target = (envSupportsDuplicateNames && envSupportsFlagGroups) ? - 'ES2025' : - (envSupportsFlagV ? 'ES2024' : 'ES2018'); + opts.target = envSupportsFlagGroups ? 'ES2025' : (envSupportsFlagV ? 'ES2024' : 'ES2018'); } return opts; } diff --git a/src/transform.js b/src/transform.js index 390220e..8363ef9 100644 --- a/src/transform.js +++ b/src/transform.js @@ -435,11 +435,12 @@ const SecondPassVisitor = { multiplexCapturesToLeftByRef.get(node.name).push({node, origin}); } - // ## Track data for duplicate names within an alternation path - // Pre-ES2025 doesn't allow duplicate names, but ES2025+ allows duplicate names that are - // unique per mutually exclusive alternation path. So if using a duplicate name for this - // path, remove the name from all but the latest instance (also applies to groups added via - // subroutine expansion) + // ## Track data for duplicate names + // Pre-ES2025 doesn't allow duplicate names, but ES2025 allows duplicate names that are + // unique per mutually exclusive alternation path. However, Oniguruma's handling for named + // subpatterns on match results means we can't use this ES2025 feature even when in an ES2025 + // env. So, if using a duplicate name, remove the name from all but the first instance that + // wasn't created by subroutine expansion if (node.name) { const groupsWithSameName = getOrCreate(groupsByName, node.name, new Map()); let hasDuplicateNameToRemove = false; @@ -448,9 +449,7 @@ const SecondPassVisitor = { hasDuplicateNameToRemove = true; } else { for (const groupInfo of groupsWithSameName.values()) { - if (!groupInfo.hasDuplicateNameToRemove && canParticipateWithNode(groupInfo.node, node, { - ancestorsParticipate: true, - })) { + if (!groupInfo.hasDuplicateNameToRemove) { // Will change to an unnamed capture in a later pass hasDuplicateNameToRemove = true; break; @@ -539,9 +538,7 @@ const ThirdPassVisitor = { return; } const reffedNodes = state.reffedNodesByReferencer.get(node); - const participants = reffedNodes.filter(reffed => canParticipateWithNode(reffed, node, { - ancestorsParticipate: false, - })); + const participants = reffedNodes.filter(reffed => canParticipateWithNode(reffed, node)); // For the backref's `ref`, use `number` rather than `name` because group names might have been // removed if they're duplicates within their alternation path, or they might be removed later // by the generator (depending on target) if they're duplicates within the overall pattern. @@ -621,7 +618,7 @@ function areFlagsEqual(a, b) { return a.dotAll === b.dotAll && a.ignoreCase === b.ignoreCase; } -function canParticipateWithNode(capture, node, {ancestorsParticipate}) { +function canParticipateWithNode(capture, node) { // Walks to the left (prev siblings), down (sibling descendants), up (parent), then back down // (parent's prev sibling descendants) the tree in a loop let rightmostPoint = node; @@ -636,7 +633,7 @@ function canParticipateWithNode(capture, node, {ancestorsParticipate}) { } if (rightmostPoint === capture) { // Capture is ancestor of node - return ancestorsParticipate; + return false; } const kidsOfParent = getKids(rightmostPoint.parent); for (const kid of kidsOfParent) { diff --git a/src/utils.js b/src/utils.js index 182fac3..8f45bf2 100644 --- a/src/utils.js +++ b/src/utils.js @@ -3,15 +3,6 @@ import {EsVersion, Target} from './options.js'; const cp = String.fromCodePoint; const r = String.raw; -const envSupportsDuplicateNames = (() => { - try { - new RegExp('(?)|(?)'); - } catch { - return false; - } - return true; -})(); - const envSupportsFlagGroups = (() => { try { new RegExp('(?i:)'); @@ -62,7 +53,6 @@ function throwIfNot(value, msg) { export { cp, - envSupportsDuplicateNames, envSupportsFlagGroups, envSupportsFlagV, getNewCurrentFlags,