Skip to content

Commit

Permalink
Remove the after_search_start_or_subpattern subclass strategy since s…
Browse files Browse the repository at this point in the history
…plitting the pattern can cause issues with emulation groups
  • Loading branch information
slevithan committed Nov 15, 2024
1 parent 9b95676 commit c0f04fc
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 103 deletions.
10 changes: 0 additions & 10 deletions spec/match-search-start.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,5 @@ describe('Assertion: Search start', () => {
expect(toRegExp(r`(?=;)(?!\G)^`).exec(';;\n;')?.index).toBe(3);
expect(toRegExp(r`(?=;)^(?!\G)`).exec(';;\n;')?.index).toBe(3);
});

// Leading `(?<=\G|…)` and similar
it('should apply after_search_start_or_subpattern', () => {
expect(toRegExp(r`(?<=\G|a)b`).exec('ba')?.index).toBe(0);
expect(toRegExp(r`(?<=\G|a)b`).exec('aba')?.index).toBe(1);
expect(toRegExp(r`(?<=\G|a)b`).exec('aaba')?.index).toBe(2);
expect(toRegExp(r`(?<=\G|a)b`).exec('cbbab')?.index).toBe(4);
expect(toRegExp(r`((?<=xy?|\G|a)b)`).exec('cbbab')?.index).toBe(4);
expect(toRegExp(r`(?<=\G|a)b`).exec('cbba')).toBeNull();
});
});
});
44 changes: 3 additions & 41 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ import {recursion} from 'regex-recursion';
}} Options
@typedef {{
useEmulationGroups: boolean;
strategy?: string;
subpattern?: string;
strategy: string;
}} SubclassOptions
*/

Expand Down Expand Up @@ -65,29 +64,15 @@ function toDetails(pattern, options) {
const atomized = atomic(pattern, {useEmulationGroups: !opts.avoidSubclass});
const useEmulationGroups = atomized !== pattern && !opts.avoidSubclass;
pattern = atomized;
let subpattern;
if (regexAst._strategy) {
// Look for an emulation marker added as part of the strategy. Do this after the pattern has
// been passed through Regex+ plugins, so they can operate on the full pattern
pattern = pattern.replace(/\(\?:\\p{sc=<<}\|(.*?)\|\\p{sc=>>}\)/s, (_, sub) => {
subpattern = sub;
return '';
});
}
const result = {
pattern,
flags: `${opts.hasIndices ? 'd' : ''}${opts.global ? 'g' : ''}${generated.flags}${generated.options.disable.v ? 'u' : 'v'}`,
};
if (useEmulationGroups || regexAst._strategy) {
result.subclass = {
useEmulationGroups,
strategy: regexAst._strategy ?? null,
};
if (regexAst._strategy) {
result.subclass.strategy = regexAst._strategy.name;
if (subpattern) {
result.subclass.subpattern = subpattern;
}
}
}
return result;
}
Expand Down Expand Up @@ -128,24 +113,20 @@ results from `toDetails` to produce the same result as `toRegExp`.
*/
class EmulatedRegExp extends RegExpSubclass {
#strategy;
#subpattern;
constructor(pattern, flags, options) {
const opts = {
useEmulationGroups: false,
strategy: null,
subpattern: null,
...options,
};
super(pattern, flags, {useEmulationGroups: opts.useEmulationGroups});
if (opts.strategy) {
this.#strategy = opts.strategy;
this.#subpattern = opts.subpattern;
// The third argument `options` isn't provided when regexes are copied as part of the internal
// handling of string methods `matchAll` and `split`
} else if (pattern instanceof EmulatedRegExp) {
// Can read private properties of the existing object since it was created by this class
this.#strategy = pattern.#strategy;
this.#subpattern = pattern.#subpattern;
}
}
/**
Expand All @@ -158,11 +139,10 @@ class EmulatedRegExp extends RegExpSubclass {
// Special case handling that requires coupling with pattern changes for the specific strategy
// in the transformer. These changes add emulation support for some common patterns that are
// otherwise unsupportable. Only one subclass strategy is supported per pattern
const exec = super.exec;
const useLastIndex = this.global || this.sticky;
const pos = this.lastIndex;
const exec = super.exec;
const strategy = this.#strategy;
const subpattern = this.#subpattern;

// ## Support leading `(^|\G)` and similar
if (strategy === 'line_or_search_start' && useLastIndex && this.lastIndex) {
Expand All @@ -188,24 +168,6 @@ class EmulatedRegExp extends RegExpSubclass {
return match;
}

// ## Support leading `(?<=\G|…)` and similar
// Note: Leading `(?<=\G)` without other alts is supported without the need for a subclass
if (strategy === 'after_search_start_or_subpattern') {
let match = exec.call(this, str);
if (
!match ||
// Satisfied `\G` in lookbehind
match.index === pos
) {
return match;
}
const assembled = new RegExp(`(?<=${subpattern})${this.source}`, this.flags);
assembled.lastIndex = pos;
match = exec.call(assembled, str);
this.lastIndex = assembled.lastIndex;
return match;
}

return exec.call(this, str);
}
}
Expand Down
55 changes: 6 additions & 49 deletions src/subclass-strategies.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import {AstAssertionKinds, AstTypes, createAlternative, createGroup, createUnicodeProperty, isLookaround} from './parse.js';
import {adoptAndSwapKids} from './transform.js';
import {AstAssertionKinds, AstTypes, isLookaround} from './parse.js';
import {hasOnlyChild} from './utils.js';

// Special case handling that requires coupling with a `RegExp` subclass (see `EmulatedRegExp`).
Expand Down Expand Up @@ -46,65 +45,23 @@ function applySubclassStrategies(ast, accuracy) {
} else {
firstElIn.alternatives.shift();
}
return {name: 'line_or_search_start'};
return 'line_or_search_start';
}
}

// ## Strategy `not_search_start`: Support leading `(?!\G)` and similar
if (isNegatedSearchStart(firstElIn)) {
// Remove the lookaround
// Remove the negative lookaround
firstElIn.parent.elements.shift();
return {name: 'not_search_start'};
return 'not_search_start';
}
const negGIndex = singleAltIn.elements.findIndex(el => isNegatedSearchStart(el));
if (negGIndex > -1 && singleAltIn.elements.every(el => el.type === AstTypes.Assertion)) {
// Remove the lookaround
// Remove the negative lookaround
singleAltIn.elements.splice(negGIndex, 1);
return {name: 'not_search_start'};
return 'not_search_start';
}

// ## Strategy `after_search_start_or_subpattern`: Support leading `(?<=\G|…)` and similar
// Note: Leading `(?<=\G)` without other alts is supported without the need for a subclass
if (
firstElIn.kind === AstAssertionKinds.lookbehind &&
!firstElIn.negate &&
firstElIn.alternatives.length > 1
) {
const siblingAlts = [];
let hasGAlt = false;
firstElIn.alternatives.forEach(alt => {
if (alt.elements.length === 1 && alt.elements[0].kind === AstAssertionKinds.search_start) {
hasGAlt = true;
} else {
siblingAlts.push(alt);
}
});
if (hasGAlt && siblingAlts.length) {
let supported = true;
if (siblingAlts.some(alt => alt.elements.some(el => {
// Check for nodes that are or can include captures
return el.type === AstTypes.CapturingGroup || el.type === AstTypes.Group || el.type === AstTypes.Subroutine || isLookaround(el);
}))) {
if (accuracy === 'loose') {
supported = false;
} else {
throw new Error(r`Uses "\G" in a way that's unsupported`);
}
}
if (supported) {
// [HACK] Replace the lookbehind with an emulation marker since it isn't easy from here to
// acurately extract what will later become the generated subpattern
const emulationGroup = adoptAndSwapKids(createGroup(), [
adoptAndSwapKids(createAlternative(), [createUnicodeProperty('<<', {skipPropertyNameValidation: true})]),
...siblingAlts,
adoptAndSwapKids(createAlternative(), [createUnicodeProperty('>>', {skipPropertyNameValidation: true})]),
]);
emulationGroup.parent = firstElIn.parent;
firstElIn.parent.elements[0] = emulationGroup;
return {name: 'after_search_start_or_subpattern'};
}
}
}
return null;
}

Expand Down
4 changes: 1 addition & 3 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ import emojiRegex from 'emoji-regex-xs';
pattern: Object;
flags: Object;
options: Object;
_strategy?: {
name: string;
};
_strategy?: string;
}} RegexAst
*/
/**
Expand Down

0 comments on commit c0f04fc

Please sign in to comment.