Skip to content

Commit

Permalink
Subroutines: Avoid adding capturing groups if there are no backrefs i…
Browse files Browse the repository at this point in the history
…n the regex
  • Loading branch information
slevithan committed Jul 22, 2024
1 parent 576358d commit 728e115
Showing 1 changed file with 38 additions and 36 deletions.
74 changes: 38 additions & 36 deletions src/subroutines.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ function processSubroutines(expression, namedGroups) {
if (!hasUnescaped(expression, '\\\\g<', Context.DEFAULT)) {
return expression;
}
const hasBackrefs = hasUnescaped(expression, '\\\\(?:[1-9]|k<[^>]+>)', Context.DEFAULT);
const backrefIncrements = [0];
const openSubroutinesMap = new Map();
const openSubroutinesStack = [];
Expand Down Expand Up @@ -70,9 +71,10 @@ function processSubroutines(expression, namedGroups) {
numCapturesPassedInsideSubroutines += numCaptures;
// Wrap value in case it has top-level alternation or is followed by a quantifier. The
// wrapper also marks the end of the expanded contents, which we'll track using
// `unclosedGroupCount`. Wrap with '()' instead of '(?:)' so that backrefs line up, in case
// there are backrefs inside the subroutine that refer to their parent capturing group
const subroutineValue = `(${contents})`;
// `unclosedGroupCount`. If there are any backrefs in the expression, wrap with '()'
// instead of '(?:)' so that backrefs line up, in case there are backrefs inside the
// subroutine that refer to their parent capturing group
const subroutineValue = `${hasBackrefs ? '(' : '(?:'}${contents})`;
openSubroutinesMap.set(subroutineName, {
contents,
unclosedGroupCount: countSubgroups(subroutineValue),
Expand Down Expand Up @@ -217,21 +219,6 @@ function processDefinitionGroup(expression, namedGroups) {
return expression.slice(0, defineStart.index);
}

/**
@param {string} expression
@param {RegExpExecArray} delimMatch
@returns {{contents: string; afterPos: number}}
*/
function getGroup(expression, delimMatch) {
const contentsStart = delimMatch.index + delimMatch[0].length;
const contents = getGroupContents(expression, contentsStart);
const afterPos = contentsStart + contents.length + 1;
return {
contents,
afterPos,
};
}

/**
@param {string} expression
@param {string} groupName
Expand All @@ -252,6 +239,16 @@ function countCapturesBeforeGroupName(expression, groupName) {
return num;
}

/**
@param {string} expression
@returns {number}
*/
function countSubgroups(expression) {
let num = 0;
forEachUnescaped(expression, String.raw`\(`, () => num++, Context.DEFAULT);
return num;
}

/**
@param {string} expression
@param {string} groupName
Expand All @@ -273,14 +270,18 @@ function getCaptureNum(expression, groupName) {
}

/**
@param {string} str
@param {number} pos
@param {string} oldValue
@param {string} newValue
@returns {string}
@param {string} expression
@param {RegExpExecArray} delimMatch
@returns {{contents: string; afterPos: number}}
*/
function spliceStr(str, pos, oldValue, newValue) {
return str.slice(0, pos) + newValue + str.slice(pos + oldValue.length);
function getGroup(expression, delimMatch) {
const contentsStart = delimMatch.index + delimMatch[0].length;
const contents = getGroupContents(expression, contentsStart);
const afterPos = contentsStart + contents.length + 1;
return {
contents,
afterPos,
};
}

/**
Expand Down Expand Up @@ -311,20 +312,21 @@ function getNamedCapturingGroups(expression, {includeContents} = {}) {
}

/**
@param {string} expression
@returns {number}
*/
function countSubgroups(expression) {
let num = 0;
forEachUnescaped(expression, String.raw`\(`, () => num++, Context.DEFAULT);
return num;
}

/**
Remove when support for ES2022 string/array method `at` (Node.js 16.6+) is no longer an issue
Remove when support for ES2022 string/array method `at` (Node.js 16.6) is no longer an issue
@param {string | any[]} strOrArr
@returns {any}
*/
function lastOf(strOrArr) {
return strOrArr[strOrArr.length - 1];
}

/**
@param {string} str
@param {number} pos
@param {string} oldValue
@param {string} newValue
@returns {string}
*/
function spliceStr(str, pos, oldValue, newValue) {
return str.slice(0, pos) + newValue + str.slice(pos + oldValue.length);
}

0 comments on commit 728e115

Please sign in to comment.