Skip to content

Commit

Permalink
Don't allow nested groups to use duplicate names within DEFINE
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Jul 21, 2024
1 parent 5653642 commit b4734fd
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 11 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
[![bundle size](https://deno.bundlejs.com/badge?q=regex&treeshake=[*])](https://bundlejs.com/?q=regex&treeshake=[*])
</div>

`regex` is a template tag that extends JavaScript regular expressions with key features that make them more powerful and dramatically more readable. It returns native `RegExp` instances that maintain or exceed native performance. It's also lightweight, supports all ES2024+ regex functionality, and can be used dependency-free as a [Babel plugin](https://github.com/slevithan/babel-plugin-transform-regex).
`regex` is a template tag that extends JavaScript regular expressions with features that make them more powerful and dramatically more readable. It returns native `RegExp` instances that equal or exceed native performance. It's also lightweight, supports all ES2024+ regex features, and can be used as a [Babel plugin](https://github.com/slevithan/babel-plugin-transform-regex) to avoid any dependencies or runtime cost.

Highlights include support for free spacing and comments, atomic groups via `(?>…)` that can help you avoid [ReDoS](https://en.wikipedia.org/wiki/ReDoS), subroutines via `\g<name>` that enable powerful composition, and context-aware interpolation of regexes, escaped strings, and partial patterns.

Expand Down
11 changes: 8 additions & 3 deletions spec/subroutines-spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ describe('subroutines', () => {
regex`^.$(?(DEFINE) # comment
)`
);

// Flag x off
expect(() => regex({__flagX: false})`^.$(?(DEFINE) )`).toThrow();
expect(() => {
regex({__flagX: false})`^.$(?(DEFINE) # comment
Expand All @@ -240,18 +240,23 @@ describe('subroutines', () => {
);
});

// Just documenting current behavior; this probably shouldn't be relied on
// Just documenting current behavior; this shouldn't be relied on
it('should allow unreferenced groups', () => {
expect('a').toMatch(regex`^.$(?(DEFINE)(?<a>))`);
expect('a').toMatch(regex`^.$(?(DEFINE)(?<a>x))`);
expect('a').toMatch(regex`^\g<a>$(?(DEFINE)(?<a>.)(?<b>x))`);
});

it('should not allow duplicate group names', () => {
it('should not allow top-level groups to use duplicate names', () => {
expect(() => regex`(?(DEFINE)(?<a>)(?<a>))`).toThrow();
expect(() => regex`(?(DEFINE)(?<a>)(?<b>(?<a>)))`).toThrow();
expect(() => regex`(?<a>)(?(DEFINE)(?<a>))`).toThrow();
});

it('should not allow nested groups to use duplicate names', () => {
expect(() => regex`(?(DEFINE)(?<a>(?<b>))(?<c>(?<b>)))`).toThrow();
expect(() => regex`(?<a>)(?(DEFINE)(?<b>(?<a>)))`).toThrow();
});
});
});
});
27 changes: 20 additions & 7 deletions src/subroutines.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ function processSubroutines(expression, namedGroups) {
}

/**
Strip a valid, trailing `(?(DEFINE)…)`
Strip `(?(DEFINE)…)`
@param {string} expression
@param {NamedCapturingGroupsMap} namedGroups
@returns {string}
Expand All @@ -173,8 +173,8 @@ function processDefinitionGroup(expression, namedGroups) {
}
const defineGroup = getGroup(expression, defineDelim);
if (defineGroup.afterPos < expression.length) {
// Supporting DEFINE at positions other than the end of regexes would significantly complicate
// edge-case backref handling. Note: Flag x in fact permits trailing whitespace and comments
// Supporting DEFINE at positions other than the end would significantly complicate edge-case
// backref handling. Note: Flag x's preprocessing permits trailing whitespace and comments
throw new Error('DEFINE group allowed only at the end of a regex');
} else if (defineGroup.afterPos > expression.length) {
throw new Error('DEFINE group is unclosed');
Expand All @@ -185,16 +185,29 @@ function processDefinitionGroup(expression, namedGroups) {
while (match = contentsToken.exec(defineGroup.contents)) {
const {captureName, unsupported} = match.groups;
if (captureName) {
let group = getGroup(defineGroup.contents, match);
let duplicateName;
if (!namedGroups.get(captureName).isUnique) {
throw new Error('Group names within DEFINE group must be unique');
duplicateName = captureName;
} else {
const nestedNamedGroups = getNamedCapturingGroups(group.contents);
for (const name of nestedNamedGroups.keys()) {
if (!namedGroups.get(name).isUnique) {
duplicateName = name;
break;
}
}
}
if (duplicateName) {
throw new Error(`Group names within DEFINE must be unique; has duplicate "${duplicateName}"`);
}
contentsToken.lastIndex = getGroup(defineGroup.contents, match).afterPos;
contentsToken.lastIndex = group.afterPos;
continue;
}
if (unsupported) {
// Since a DEFINE group is stripped from its expression, we can't easily check if
// unreferenced syntax within it is valid. Such syntax adds no value, so it's easiest to just
// not allow it
// unreferenced top-level syntax within it is valid. Such syntax serves no purpose, so it's
// easiest to not allow it
throw new Error(`DEFINE group includes unsupported syntax at top level`);
}
}
Expand Down

0 comments on commit b4734fd

Please sign in to comment.