Skip to content

Commit

Permalink
Fix edge case subroutine backref bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Jul 20, 2024
1 parent 2b1eab1 commit 89e6321
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 24 deletions.
8 changes: 5 additions & 3 deletions spec/subroutines-spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ describe('subroutines', () => {
});

it('should rewrite named and numbered backreferences as needed', () => {
// Test the *output* to make sure each adjustment is precise and works correctly even in cases
// Test the *output* to ensure each adjustment is precise and works correctly even in cases
// where there are discrete backreferences that each match empty strings
const cases = [
[String.raw`()(?<a>\1)\g<a>`, String.raw`()(?<a>\1)(\1)`],
Expand All @@ -59,10 +59,12 @@ describe('subroutines', () => {
[String.raw`(?<a>)\g<a>()\2\g<a>()\3`, String.raw`(?<a>)()()\3()()\5`],
[String.raw`\1\2\3(?<a>\1\2\3()\1\2\3)\1\2\3\g<a>\1\2\3()\1\2\3\g<a>\1\2\3()\1\2\3`, String.raw`\1\2\3(?<a>\1\2\3()\1\2\3)\1\2\3(\3\4\5()\3\4\5)\1\2\5()\1\2\5(\6\7\8()\6\7\8)\1\2\5()\1\2\5`],
[String.raw`\g<a>(?<a>\1)`, String.raw`(\1)(?<a>\2)`],
[String.raw`(?<a>\k<a>)\g<a>`, String.raw`(?<a>\k<a>)(\k<a>)`],
[String.raw`\g<a>(?<a>\k<a>)`, String.raw`(\k<a>)(?<a>\k<a>)`],
[String.raw`(?<a>\k<a>)\g<a>`, String.raw`(?<a>\k<a>)(\2)`],
[String.raw`\g<a>(?<a>\k<a>)`, String.raw`(\1)(?<a>\k<a>)`],
[String.raw`(?<a>(?<b>)\k<b>)\g<a>`, String.raw`(?<a>(?<b>)\k<b>)(()\4)`],
[String.raw`\g<a>(?<a>(?<b>)\k<b>)`, String.raw`(()\2)(?<a>(?<b>)\k<b>)`],
[String.raw`(?<a>(?<b>\k<a>\k<b>))\g<a>\g<b>`, String.raw`(?<a>(?<b>\k<a>\k<b>))((\3\4))(\k<a>\5)`],
[String.raw`(?<a>(?<b>(?<c>\k<a>\k<b>\k<c>)))\g<a>\g<b>\g<c>`, String.raw`(?<a>(?<b>(?<c>\k<a>\k<b>\k<c>)))(((\4\5\6)))((\k<a>\7\8))(\k<a>\k<b>\9)`],
];
cases.forEach(([input, output]) => {
expect(regex({__flagN: false})({raw: [input]}).source).toBe(output);
Expand Down
43 changes: 22 additions & 21 deletions src/subroutines.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ function processSubroutines(expression, namedGroups) {
return expression;
}
const backrefIncrements = [0];
const numCapturesBeforeFirstReferencedBySubroutine = countCapturesBeforeFirstReferencedBySubroutine(expression);
let numCapturesPassedOutsideSubroutines = 0;
let numCapturesPassedInsideSubroutines = 0;
let openSubroutinesMap = new Map();
Expand Down Expand Up @@ -103,12 +102,13 @@ function processSubroutines(expression, namedGroups) {
} else if (backrefNum) {
// Beware: backref renumbering with subroutines is complicated
const num = +backrefNum;
let increment;
let increment = 0;
if (openSubroutinesMap.size) {
if (num > numCapturesBeforeFirstReferencedBySubroutine) {
const numCapturesBeforeReferencedGroup = countCapturesBeforeGroupName(expression, openSubroutinesStack[0]);
if (num > numCapturesBeforeReferencedGroup) {
increment = numCapturesPassedOutsideSubroutines +
numCapturesPassedInsideSubroutines -
numCapturesBeforeFirstReferencedBySubroutine -
numCapturesBeforeReferencedGroup -
subroutine.numCaptures;
}
} else {
Expand All @@ -121,19 +121,23 @@ function processSubroutines(expression, namedGroups) {
}
} else if (backrefName) {
if (openSubroutinesMap.size) {
// Search for the corresponding group in the contents of the subroutine stack
let found = false;
for (const s of openSubroutinesStack) {
if (hasUnescaped(
openSubroutinesMap.get(s).contents,
String.raw`\(\?<${backrefName}>`,
Context.DEFAULT
)) {
found = true;
break;
let isGroupFromThisSubroutine = false;
if (backrefName === openSubroutinesStack[0]) {
isGroupFromThisSubroutine = true;
// Search for the group in the contents of the subroutine stack
} else {
for (const s of openSubroutinesStack) {
if (hasUnescaped(
openSubroutinesMap.get(s).contents,
String.raw`\(\?<${backrefName}>`,
Context.DEFAULT
)) {
isGroupFromThisSubroutine = true;
break;
}
}
}
if (found) {
if (isGroupFromThisSubroutine) {
// Point to the group, then let normal renumbering work in the next loop iteration
const adjusted = `\\${getCaptureNum(expression, backrefName)}`;
result = spliceStr(result, index, m, adjusted);
Expand Down Expand Up @@ -213,19 +217,16 @@ function getGroup(expression, delimMatch) {

/**
@param {string} expression
@param {string} groupName
@returns {number}
*/
function countCapturesBeforeFirstReferencedBySubroutine(expression) {
const subroutines = new Set();
forEachUnescaped(expression, subroutinePattern, ({groups: {subroutineName}}) => {
subroutines.add(subroutineName);
}, Context.DEFAULT);
function countCapturesBeforeGroupName(expression, groupName) {
let num = 0;
let pos = 0;
let match;
while (match = execUnescaped(expression, capturingStartPattern, pos, Context.DEFAULT)) {
const {0: m, index, groups: {captureName}} = match;
if (subroutines.has(captureName)) {
if (captureName === groupName) {
break;
}
num++;
Expand Down

0 comments on commit 89e6321

Please sign in to comment.