diff --git a/README.md b/README.md
index 2beb166..4922610 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,7 @@ function toRegExp(
pattern: string,
flags?: OnigurumaFlags,
options?: (CompileOptions & {
- allowSubclassBasedEmulation?: boolean;
+ avoidSubclass?: boolean;
})
): RegExp;
```
@@ -478,7 +478,7 @@ Notice that nearly every feature below has at least subtle differences from Java
✔ Scripts
✔ Aliases
✔ POSIX properties
- ✔ Negate with \p{^…}
, \P{^…}
+ ✔ Invert with \p{^…}
, \P{^…}
✔ Insignificant spaces, underscores, and casing in names
✔ \p
, \P
without {
is an identity escape
✔ Error for key prefixes
@@ -640,7 +640,7 @@ Notice that nearly every feature below has at least subtle differences from Java
{,n}
for implicit min 0{,n}
for min 0u
, v
)\k
without <
, '
is an identity escape\k
without <
'
is an identity escape\g
without <
, '
is an identity escape\g
without <
'
is an identity escape
diff --git a/scripts/utils.js b/scripts/utils.js index 17ddcdd..85169a3 100644 --- a/scripts/utils.js +++ b/scripts/utils.js @@ -75,7 +75,7 @@ function getMatchDetails(match) { const transpiledRegExpResult = (pattern, str, pos) => { let result; try { - const options = {allowSubclassBasedEmulation: true}; + const options = {}; if (pos) { options.global = true; } diff --git a/spec/match-assertion.spec.js b/spec/match-assertion.spec.js index 8a0a659..8f608a4 100644 --- a/spec/match-assertion.spec.js +++ b/spec/match-assertion.spec.js @@ -53,6 +53,9 @@ describe('Assertion', () => { }); describe('search_start', () => { + // TODO: Consider enabling `avoidSubclass` for all of these except when specifically testing + // subclass strategies + it('should match at the start of the search', () => { expect('a').toExactlyMatch(r`\Ga`); expect([ @@ -133,7 +136,7 @@ describe('Assertion', () => { r`(?:(?=\G))?a`, r`(?=\G)a|b`, ].forEach(pattern => { - expect(() => compile(pattern)).toThrow(); + expect(() => compile(pattern, '', {avoidSubclass: true})).toThrow(); }); }); @@ -149,19 +152,20 @@ describe('Assertion', () => { r`(?:(?<=\G))?a`, r`(?<=\G)a|b`, ].forEach(pattern => { - expect(() => compile(pattern)).toThrow(); + expect(() => compile(pattern, '', {avoidSubclass: true})).toThrow(); }); }); it('should throw if leading in a leading positive lookbehind', () => { - // Matches at index 3 within `abc`, but doesn't match within `aabc`. Emulatable by replacing - // `\G` with `^`, slicing the string to `lastIndex`, and doing a non-sticky search + // [Oniguruma] Matches at index 3 within `abc`, but doesn't match within `aabc` + // [TODO] Emulatable by replacing `\G` with `^`, slicing the string to `lastIndex`, and doing + // a non-sticky search expect(() => compile(r`(?<=\Gabc)`)).toThrow(); }); it('should throw if leading in a leading negative lookaround', () => { - expect(() => compile(r`(?!\G)a`)).toThrow(); - expect(() => compile(r`(? compile(r`(?!\G)a`, '', {avoidSubclass: true})).toThrow(); + expect(() => compile(r`(? { }); describe('subclass strategies', () => { - const opts = {allowSubclassBasedEmulation: true}; - // Leading `(^|\G)` and similar it('should apply line_or_search_start', () => { // Matches with `^` since not global - expect(toRegExp(r`(^|\G)a`, '', opts).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(^|\G)a`).exec('b\na')?.index).toBe(2); // Match the first 3 and last 1 expect('aaabaaacaa\na'.match(toRegExp( - r`(^|\G)a`, '', {...opts, global: true} + r`(^|\G)a`, '', {global: true} ))).toEqual(['a', 'a', 'a', 'a']); - expect(toRegExp(r`(?:^|\G)a`, '', opts).exec('b\na')?.index).toBe(2); - expect(toRegExp(r`(\G|^)a`, '', opts).exec('b\na')?.index).toBe(2); - expect(toRegExp(r`(?:(\G|^)a)`, '', opts).exec('b\na')?.index).toBe(2); - expect(toRegExp(r`((\G|^)a)`, '', opts).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(?:^|\G)a`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(\G|^)a`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(?:(\G|^)a)`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`((\G|^)a)`).exec('b\na')?.index).toBe(2); }); // Leading `(?!\G)` and similar it('should apply not_search_start', () => { // Leading - expect(toRegExp(r`(?!\G)a`, '', opts).exec('aba')?.index).toBe(2); - expect(toRegExp(r`(? { - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('ba')?.index).toBe(0); - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('aba')?.index).toBe(1); - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('aaba')?.index).toBe(2); - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('cbbab')?.index).toBe(4); - expect(toRegExp(r`((?<=xy?|\G|a)b)`, '', opts).exec('cbbab')?.index).toBe(4); - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('cbba')).toBeNull(); + expect(toRegExp(r`(?<=\G|a)b`).exec('ba')?.index).toBe(0); + expect(toRegExp(r`(?<=\G|a)b`).exec('aba')?.index).toBe(1); + expect(toRegExp(r`(?<=\G|a)b`).exec('aaba')?.index).toBe(2); + expect(toRegExp(r`(?<=\G|a)b`).exec('cbbab')?.index).toBe(4); + expect(toRegExp(r`((?<=xy?|\G|a)b)`).exec('cbbab')?.index).toBe(4); + expect(toRegExp(r`(?<=\G|a)b`).exec('cbba')).toBeNull(); }); }); }); diff --git a/src/compile.js b/src/compile.js index 1816b71..cd13f01 100644 --- a/src/compile.js +++ b/src/compile.js @@ -17,7 +17,7 @@ import {recursion} from 'regex-recursion'; tmGrammar?: boolean; }} CompileOptions @typedef {CompileOptions & { - allowSubclassBasedEmulation?: boolean; + avoidSubclass?: boolean; }} ToRegExpOptions */ @@ -57,7 +57,7 @@ function compileInternal(pattern, flags, options) { }); const regexAst = transform(onigurumaAst, { accuracy: opts.accuracy, - allowSubclassBasedEmulation: opts.allowSubclassBasedEmulation, + avoidSubclass: opts.avoidSubclass, bestEffortTarget: opts.target, }); const generated = generate(regexAst, opts); @@ -92,9 +92,9 @@ function getOptions(options) { return { // Sets the level of emulation rigor/strictness accuracy: 'default', - // Allows advanced emulation strategies that rely on returning a `RegExp` subclass with an - // overridden `exec` method. A subclass is only used if needed for the given pattern - allowSubclassBasedEmulation: false, + // Prevents use of advanced emulation strategies that rely on returning a `RegExp` subclass, + // resulting in certain patterns not being emulatable + avoidSubclass: false, // Include JS flag `g` in the result global: false, // Include JS flag `d` in the result diff --git a/src/transform.js b/src/transform.js index 1598ec3..4aa6e8c 100644 --- a/src/transform.js +++ b/src/transform.js @@ -21,7 +21,7 @@ then down-convert to the desired JS target version. @param {import('./parse.js').OnigurumaAst} ast @param {{ accuracy?: keyof Accuracy; - allowSubclassBasedEmulation?: boolean; + avoidSubclass?: boolean; bestEffortTarget?: keyof Target; }} [options] @returns {RegexAst} @@ -35,12 +35,12 @@ function transform(ast, options) { // representations are hard to change after the fact in the generator to a best-effort // approximation based on the target, so produce the appropriate structure here. accuracy: 'default', - allowSubclassBasedEmulation: false, + avoidSubclass: false, bestEffortTarget: 'ESNext', ...options, }; // AST changes that work together with a `RegExp` subclass to add advanced emulation - const strategy = opts.allowSubclassBasedEmulation ? applySubclassStrategies(ast, opts.accuracy) : null; + const strategy = opts.avoidSubclass ? null : applySubclassStrategies(ast, opts.accuracy); const firstPassState = { accuracy: opts.accuracy, flagDirectivesByAlt: new Map(), @@ -578,6 +578,7 @@ function applySubclassStrategies(ast, accuracy) { return null; } const hasWrapperGroup = + alts[0].elements.length === 1 && (firstEl.type === AstTypes.CapturingGroup || firstEl.type === AstTypes.Group) && firstEl.alternatives.length === 1; // First element within first group if the group doesn't contain top-level alternation, else just