From b4734fdf28a61399aa3e304f1849df9db3b014f2 Mon Sep 17 00:00:00 2001 From: Steven Levithan Date: Sun, 21 Jul 2024 18:59:15 +0200 Subject: [PATCH] Don't allow nested groups to use duplicate names within DEFINE --- README.md | 2 +- spec/subroutines-spec.js | 11 ++++++++--- src/subroutines.js | 27 ++++++++++++++++++++------- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 3cb3b7f..6d1e01e 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![bundle size](https://deno.bundlejs.com/badge?q=regex&treeshake=[*])](https://bundlejs.com/?q=regex&treeshake=[*]) -`regex` is a template tag that extends JavaScript regular expressions with key features that make them more powerful and dramatically more readable. It returns native `RegExp` instances that maintain or exceed native performance. It's also lightweight, supports all ES2024+ regex functionality, and can be used dependency-free as a [Babel plugin](https://github.com/slevithan/babel-plugin-transform-regex). +`regex` is a template tag that extends JavaScript regular expressions with features that make them more powerful and dramatically more readable. It returns native `RegExp` instances that equal or exceed native performance. It's also lightweight, supports all ES2024+ regex features, and can be used as a [Babel plugin](https://github.com/slevithan/babel-plugin-transform-regex) to avoid any dependencies or runtime cost. Highlights include support for free spacing and comments, atomic groups via `(?>…)` that can help you avoid [ReDoS](https://en.wikipedia.org/wiki/ReDoS), subroutines via `\g` that enable powerful composition, and context-aware interpolation of regexes, escaped strings, and partial patterns. diff --git a/spec/subroutines-spec.js b/spec/subroutines-spec.js index a9ec0ae..c5b985e 100644 --- a/spec/subroutines-spec.js +++ b/spec/subroutines-spec.js @@ -219,7 +219,7 @@ describe('subroutines', () => { regex`^.$(?(DEFINE) # comment )` ); - + // Flag x off expect(() => regex({__flagX: false})`^.$(?(DEFINE) )`).toThrow(); expect(() => { regex({__flagX: false})`^.$(?(DEFINE) # comment @@ -240,18 +240,23 @@ describe('subroutines', () => { ); }); - // Just documenting current behavior; this probably shouldn't be relied on + // Just documenting current behavior; this shouldn't be relied on it('should allow unreferenced groups', () => { expect('a').toMatch(regex`^.$(?(DEFINE)(?))`); expect('a').toMatch(regex`^.$(?(DEFINE)(?x))`); expect('a').toMatch(regex`^\g$(?(DEFINE)(?.)(?x))`); }); - it('should not allow duplicate group names', () => { + it('should not allow top-level groups to use duplicate names', () => { expect(() => regex`(?(DEFINE)(?)(?))`).toThrow(); expect(() => regex`(?(DEFINE)(?)(?(?)))`).toThrow(); expect(() => regex`(?)(?(DEFINE)(?))`).toThrow(); }); + + it('should not allow nested groups to use duplicate names', () => { + expect(() => regex`(?(DEFINE)(?(?))(?(?)))`).toThrow(); + expect(() => regex`(?)(?(DEFINE)(?(?)))`).toThrow(); + }); }); }); }); diff --git a/src/subroutines.js b/src/subroutines.js index ed62028..0fccb78 100644 --- a/src/subroutines.js +++ b/src/subroutines.js @@ -161,7 +161,7 @@ function processSubroutines(expression, namedGroups) { } /** -Strip a valid, trailing `(?(DEFINE)…)` +Strip `(?(DEFINE)…)` @param {string} expression @param {NamedCapturingGroupsMap} namedGroups @returns {string} @@ -173,8 +173,8 @@ function processDefinitionGroup(expression, namedGroups) { } const defineGroup = getGroup(expression, defineDelim); if (defineGroup.afterPos < expression.length) { - // Supporting DEFINE at positions other than the end of regexes would significantly complicate - // edge-case backref handling. Note: Flag x in fact permits trailing whitespace and comments + // Supporting DEFINE at positions other than the end would significantly complicate edge-case + // backref handling. Note: Flag x's preprocessing permits trailing whitespace and comments throw new Error('DEFINE group allowed only at the end of a regex'); } else if (defineGroup.afterPos > expression.length) { throw new Error('DEFINE group is unclosed'); @@ -185,16 +185,29 @@ function processDefinitionGroup(expression, namedGroups) { while (match = contentsToken.exec(defineGroup.contents)) { const {captureName, unsupported} = match.groups; if (captureName) { + let group = getGroup(defineGroup.contents, match); + let duplicateName; if (!namedGroups.get(captureName).isUnique) { - throw new Error('Group names within DEFINE group must be unique'); + duplicateName = captureName; + } else { + const nestedNamedGroups = getNamedCapturingGroups(group.contents); + for (const name of nestedNamedGroups.keys()) { + if (!namedGroups.get(name).isUnique) { + duplicateName = name; + break; + } + } + } + if (duplicateName) { + throw new Error(`Group names within DEFINE must be unique; has duplicate "${duplicateName}"`); } - contentsToken.lastIndex = getGroup(defineGroup.contents, match).afterPos; + contentsToken.lastIndex = group.afterPos; continue; } if (unsupported) { // Since a DEFINE group is stripped from its expression, we can't easily check if - // unreferenced syntax within it is valid. Such syntax adds no value, so it's easiest to just - // not allow it + // unreferenced top-level syntax within it is valid. Such syntax serves no purpose, so it's + // easiest to not allow it throw new Error(`DEFINE group includes unsupported syntax at top level`); } }