diff --git a/README.md b/README.md index 2beb166..4922610 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ function toRegExp( pattern: string, flags?: OnigurumaFlags, options?: (CompileOptions & { - allowSubclassBasedEmulation?: boolean; + avoidSubclass?: boolean; }) ): RegExp; ``` @@ -478,7 +478,7 @@ Notice that nearly every feature below has at least subtle differences from Java ✔ Scripts
✔ Aliases
✔ POSIX properties
- ✔ Negate with \p{^…}, \P{^…}
+ ✔ Invert with \p{^…}, \P{^…}
✔ Insignificant spaces, underscores, and casing in names
\p, \P without { is an identity escape
✔ Error for key prefixes
@@ -640,7 +640,7 @@ Notice that nearly every feature below has at least subtle differences from Java ✅ ✔ Includes all JS forms
- ✔ Adds form {,n} for implicit min 0
+ ✔ Adds {,n} for min 0
✔ Explicit bounds have upper limit of 100,000 (unlimited in JS)
✔ Error with assertions (same as JS with flag u, v)
@@ -731,7 +731,7 @@ Notice that nearly every feature below has at least subtle differences from Java ✔ Error if named capture used
✔ Allows leading 0s
✔ Refs the most recent of a capture/subroutine set
- ✔ \k without <, ' is an identity escape
+ ✔ \k without < ' is an identity escape
@@ -779,7 +779,7 @@ Notice that nearly every feature below has at least subtle differences from Java ✔ Doesn't alter backref nums
✔ Reuses flags from the reffed group (ignores local flags)
✔ Replaces most recent captured values (for backrefs)
- ✔ \g without <, ' is an identity escape
+ ✔ \g without < ' is an identity escape
✔ Error if named capture used
diff --git a/demo/demo.js b/demo/demo.js index 4b1b487..650471a 100644 --- a/demo/demo.js +++ b/demo/demo.js @@ -6,7 +6,7 @@ const state = { }, opts: { accuracy: getValue('option-accuracy'), - allowSubclassBasedEmulation: getValue('option-allowSubclassBasedEmulation'), + avoidSubclass: getValue('option-avoidSubclass'), global: getValue('option-global'), hasIndices: getValue('option-hasIndices'), maxRecursionDepth: getValue('option-maxRecursionDepth'), @@ -35,7 +35,7 @@ function showOutput(el) { // Use `compile` but display output as if `toRegExp` was called. This avoids erroring when the // selected `target` includes features that don't work in the user's browser const compiled = OnigurumaToES.compile(input, flags, opts); - if (opts.allowSubclassBasedEmulation && compiled._internal) { + if (compiled._internal) { infoEl.classList.remove('hidden'); outputEl.classList.add('subclass'); output = getFormattedSubclass(compiled.pattern, compiled.flags, { diff --git a/demo/index.html b/demo/index.html index b176da4..34799af 100644 --- a/demo/index.html +++ b/demo/index.html @@ -72,8 +72,8 @@

Try it

diff --git a/scripts/utils.js b/scripts/utils.js index 17ddcdd..85169a3 100644 --- a/scripts/utils.js +++ b/scripts/utils.js @@ -75,7 +75,7 @@ function getMatchDetails(match) { const transpiledRegExpResult = (pattern, str, pos) => { let result; try { - const options = {allowSubclassBasedEmulation: true}; + const options = {}; if (pos) { options.global = true; } diff --git a/spec/match-assertion.spec.js b/spec/match-assertion.spec.js index 8a0a659..8f608a4 100644 --- a/spec/match-assertion.spec.js +++ b/spec/match-assertion.spec.js @@ -53,6 +53,9 @@ describe('Assertion', () => { }); describe('search_start', () => { + // TODO: Consider enabling `avoidSubclass` for all of these except when specifically testing + // subclass strategies + it('should match at the start of the search', () => { expect('a').toExactlyMatch(r`\Ga`); expect([ @@ -133,7 +136,7 @@ describe('Assertion', () => { r`(?:(?=\G))?a`, r`(?=\G)a|b`, ].forEach(pattern => { - expect(() => compile(pattern)).toThrow(); + expect(() => compile(pattern, '', {avoidSubclass: true})).toThrow(); }); }); @@ -149,19 +152,20 @@ describe('Assertion', () => { r`(?:(?<=\G))?a`, r`(?<=\G)a|b`, ].forEach(pattern => { - expect(() => compile(pattern)).toThrow(); + expect(() => compile(pattern, '', {avoidSubclass: true})).toThrow(); }); }); it('should throw if leading in a leading positive lookbehind', () => { - // Matches at index 3 within `abc`, but doesn't match within `aabc`. Emulatable by replacing - // `\G` with `^`, slicing the string to `lastIndex`, and doing a non-sticky search + // [Oniguruma] Matches at index 3 within `abc`, but doesn't match within `aabc` + // [TODO] Emulatable by replacing `\G` with `^`, slicing the string to `lastIndex`, and doing + // a non-sticky search expect(() => compile(r`(?<=\Gabc)`)).toThrow(); }); it('should throw if leading in a leading negative lookaround', () => { - expect(() => compile(r`(?!\G)a`)).toThrow(); - expect(() => compile(r`(? compile(r`(?!\G)a`, '', {avoidSubclass: true})).toThrow(); + expect(() => compile(r`(? { }); describe('subclass strategies', () => { - const opts = {allowSubclassBasedEmulation: true}; - // Leading `(^|\G)` and similar it('should apply line_or_search_start', () => { // Matches with `^` since not global - expect(toRegExp(r`(^|\G)a`, '', opts).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(^|\G)a`).exec('b\na')?.index).toBe(2); // Match the first 3 and last 1 expect('aaabaaacaa\na'.match(toRegExp( - r`(^|\G)a`, '', {...opts, global: true} + r`(^|\G)a`, '', {global: true} ))).toEqual(['a', 'a', 'a', 'a']); - expect(toRegExp(r`(?:^|\G)a`, '', opts).exec('b\na')?.index).toBe(2); - expect(toRegExp(r`(\G|^)a`, '', opts).exec('b\na')?.index).toBe(2); - expect(toRegExp(r`(?:(\G|^)a)`, '', opts).exec('b\na')?.index).toBe(2); - expect(toRegExp(r`((\G|^)a)`, '', opts).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(?:^|\G)a`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(\G|^)a`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(?:(\G|^)a)`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`((\G|^)a)`).exec('b\na')?.index).toBe(2); }); // Leading `(?!\G)` and similar it('should apply not_search_start', () => { // Leading - expect(toRegExp(r`(?!\G)a`, '', opts).exec('aba')?.index).toBe(2); - expect(toRegExp(r`(? { - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('ba')?.index).toBe(0); - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('aba')?.index).toBe(1); - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('aaba')?.index).toBe(2); - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('cbbab')?.index).toBe(4); - expect(toRegExp(r`((?<=xy?|\G|a)b)`, '', opts).exec('cbbab')?.index).toBe(4); - expect(toRegExp(r`(?<=\G|a)b`, '', opts).exec('cbba')).toBeNull(); + expect(toRegExp(r`(?<=\G|a)b`).exec('ba')?.index).toBe(0); + expect(toRegExp(r`(?<=\G|a)b`).exec('aba')?.index).toBe(1); + expect(toRegExp(r`(?<=\G|a)b`).exec('aaba')?.index).toBe(2); + expect(toRegExp(r`(?<=\G|a)b`).exec('cbbab')?.index).toBe(4); + expect(toRegExp(r`((?<=xy?|\G|a)b)`).exec('cbbab')?.index).toBe(4); + expect(toRegExp(r`(?<=\G|a)b`).exec('cbba')).toBeNull(); }); }); }); diff --git a/src/compile.js b/src/compile.js index 1816b71..cd13f01 100644 --- a/src/compile.js +++ b/src/compile.js @@ -17,7 +17,7 @@ import {recursion} from 'regex-recursion'; tmGrammar?: boolean; }} CompileOptions @typedef {CompileOptions & { - allowSubclassBasedEmulation?: boolean; + avoidSubclass?: boolean; }} ToRegExpOptions */ @@ -57,7 +57,7 @@ function compileInternal(pattern, flags, options) { }); const regexAst = transform(onigurumaAst, { accuracy: opts.accuracy, - allowSubclassBasedEmulation: opts.allowSubclassBasedEmulation, + avoidSubclass: opts.avoidSubclass, bestEffortTarget: opts.target, }); const generated = generate(regexAst, opts); @@ -92,9 +92,9 @@ function getOptions(options) { return { // Sets the level of emulation rigor/strictness accuracy: 'default', - // Allows advanced emulation strategies that rely on returning a `RegExp` subclass with an - // overridden `exec` method. A subclass is only used if needed for the given pattern - allowSubclassBasedEmulation: false, + // Prevents use of advanced emulation strategies that rely on returning a `RegExp` subclass, + // resulting in certain patterns not being emulatable + avoidSubclass: false, // Include JS flag `g` in the result global: false, // Include JS flag `d` in the result diff --git a/src/transform.js b/src/transform.js index 1598ec3..4aa6e8c 100644 --- a/src/transform.js +++ b/src/transform.js @@ -21,7 +21,7 @@ then down-convert to the desired JS target version. @param {import('./parse.js').OnigurumaAst} ast @param {{ accuracy?: keyof Accuracy; - allowSubclassBasedEmulation?: boolean; + avoidSubclass?: boolean; bestEffortTarget?: keyof Target; }} [options] @returns {RegexAst} @@ -35,12 +35,12 @@ function transform(ast, options) { // representations are hard to change after the fact in the generator to a best-effort // approximation based on the target, so produce the appropriate structure here. accuracy: 'default', - allowSubclassBasedEmulation: false, + avoidSubclass: false, bestEffortTarget: 'ESNext', ...options, }; // AST changes that work together with a `RegExp` subclass to add advanced emulation - const strategy = opts.allowSubclassBasedEmulation ? applySubclassStrategies(ast, opts.accuracy) : null; + const strategy = opts.avoidSubclass ? null : applySubclassStrategies(ast, opts.accuracy); const firstPassState = { accuracy: opts.accuracy, flagDirectivesByAlt: new Map(), @@ -578,6 +578,7 @@ function applySubclassStrategies(ast, accuracy) { return null; } const hasWrapperGroup = + alts[0].elements.length === 1 && (firstEl.type === AstTypes.CapturingGroup || firstEl.type === AstTypes.Group) && firstEl.alternatives.length === 1; // First element within first group if the group doesn't contain top-level alternation, else just