Skip to content

Commit

Permalink
Test recursion; allow JS flags dg with toRegExp
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Oct 29, 2024
1 parent 0589dd2 commit 9bc8f3c
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 24 deletions.
2 changes: 1 addition & 1 deletion demo/demo.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function showOutput(el) {
try {
// Use `compile` but display output as if `toRegExp` was called. This avoids erroring when the
// selected `target` includes features that don't work in the user's browser
const re = compile(input, flags, {
const re = OnigurumaToES.compile(input, flags, {
allowBestEffort: optionAllowBestEffortValue,
maxRecursionDepth: optionMaxRecursionDepthValue === '' ? null : +optionMaxRecursionDepthValue,
optimize: optionOptimizeValue,
Expand Down
6 changes: 4 additions & 2 deletions demo/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ <h2>Try it</h2>
</main>

<script src="../dist/index.min.js"></script>
<script src="./demo.js"></script>
<script>
Object.assign(globalThis, OnigurumaToES);
// For testing in the console
Object.assign(globalThis, OnigurumaToES);
const r = String.raw;
const cp = String.fromCodePoint;
function printAst(ast) {
if (ast?.type !== 'Regex') {
throw new Error('Oniguruma or `regex` AST expected');
Expand Down Expand Up @@ -104,6 +107,5 @@ <h2>Try it</h2>
}, '│ '));
}
</script>
<script src="./demo.js"></script>
</body>
</html>
10 changes: 5 additions & 5 deletions dist/index.min.js

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions spec/match-assertion.spec.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {compile} from '../dist/index.mjs';
import {compile, toRegExp} from '../dist/index.mjs';
import {r} from '../src/utils.js';
import {matchers} from './helpers/matchers.js';

Expand Down Expand Up @@ -88,9 +88,7 @@ describe('Assertion', () => {
});

it('should match only at the start of the search when applied repeatedly', () => {
const compiled = compile(r`\G[ab]`);
const re = new RegExp(compiled.pattern, `g${compiled.flags}`);
expect('abbcbb'.match(re)).toEqual(['a', 'b', 'b']);
expect('abbcbb'.match(toRegExp(r`\G[ab]`, 'g'))).toEqual(['a', 'b', 'b']);
});

it('should apply with positive min quantification', () => {
Expand Down
85 changes: 85 additions & 0 deletions spec/match-recursion.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import {compile, toRegExp} from '../dist/index.mjs';
import {r} from '../src/utils.js';
import {matchers} from './helpers/matchers.js';

beforeEach(() => {
jasmine.addMatchers(matchers);
});

describe('Recursion', () => {
describe('global', () => {
it('should match an equal number of two different subpatterns', () => {
expect('aaabbb').toExactlyMatch(r`a\g<0>?b`);
expect('test aaaaaabbb').toFindMatch(r`a\g<0>?b`);
expect('aaabbb').toExactlyMatch(r`(?<n>a\g<0>?b)`);
});

it('should match balanced brackets', () => {
const pattern = r`<(?:[^<>]|\g<0>)*>`;
expect([
'<>', '<<>>', '<a<b<c>d>e>', '<<<<<<a>>>bc>>>',
]).toExactlyMatch(pattern);
expect(
'test > <balanced <<brackets>>> <> <<a>> < <b>'.match(toRegExp(pattern, 'g'))
).toEqual(['<balanced <<brackets>>>', '<>', '<<a>>', '<b>']);
});

it('should throw for multiple overlapping recursions', () => {
expect(() => compile(r`a\g<0>?\g<0>?`)).toThrow();
});

it('should throw for leading 0s', () => {
expect(() => compile(r`a\g<00>?`)).toThrow();
});
});

describe('numbered', () => {
// Current limitation of `regex-recursion`
it('should throw for recursion by number', () => {
expect(() => compile(r`(a\g<1>?)`)).toThrow();
expect(() => compile(r`(a\g<2>(\g<1>?))`)).toThrow();
});
});

describe('relative numbered', () => {
it('should throw for relative 0', () => {
expect(() => compile(r`a\g<-0>?`)).toThrow();
expect(() => compile(r`a\g<+0>?`)).toThrow();
});

// Current limitation of `regex-recursion`
it('should throw for recursion by number', () => {
expect(() => compile(r`(a\g<-1>?)`)).toThrow();
expect(() => compile(r`(a\g<+1>(\g<-2>?))`)).toThrow();
});
});

describe('named', () => {
it('should match an equal number of two different subpatterns', () => {
expect('aaabbb').toExactlyMatch(r`\A(?<r>a\g<r>?b)\z`);
expect('aaabb').not.toFindMatch(r`\A(?<r>a\g<r>?b)\z`);
});

it('should throw for multiple direct, overlapping recursions', () => {
expect(() => compile(r`a\g<0>?(?<r>a\g<r>?)`)).toThrow();
expect(() => compile(r`(?<r>a\g<r>?\g<r>?)`)).toThrow();
});

// Current limitation of `regex-recursion`
it('should throw for multiple direct, non-overlapping recursions', () => {
// TODO: `regex-recursion` has a bug so using `toRegExp` instead of `compile`
expect(() => toRegExp(r`(?<r1>a\g<r1>?)(?<r2>a\g<r2>?)`)).toThrow();
});

it('should throw for multiple indirect, overlapping recursions', () => {
expect(() => compile(r`(?<a>\g<b>(?<b>a\g<a>?))`)).toThrow();
});

// Current limitation of `regex-recursion`
it('should throw for multiple indirect, non-overlapping recursions', () => {
expect(() => compile(r`(?<a>\g<b>)(?<b>a\g<a>?)`)).toThrow();
expect(() => compile(r`\g<a>(?<a>\g<b>)(?<b>a\g<a>?)`)).toThrow();
expect(() => compile(r`(?<a>\g<b>)(?<b>\g<c>)(?<c>a\g<a>?)`)).toThrow();
});
});
});
26 changes: 21 additions & 5 deletions spec/match-subroutine.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,18 @@ describe('Subroutine', () => {
expect('aa').toExactlyMatch(r`\g<1>(a)`);
expect('aa').toExactlyMatch(r`(\g<2>(a))`);
});

it('should throw if referencing a missing group', () => {
expect(() => compile(r`\g<1>`)).toThrow();
expect(() => compile(r`()\g<2>`)).toThrow();
expect(() => compile(r`(\g<2>)`)).toThrow();
});


it('should throw if referencing a named group by number', () => {
expect(() => compile(r`(?<a>)\g<1>`)).toThrow();
expect(() => compile(r`\g<1>(?<a>)`)).toThrow();
});

it('should allow referencing groups that contain subroutines', () => {
expect('ababa').toExactlyMatch(r`(a)(b\g<1>)\g<2>`);
expect('ababa').toExactlyMatch(r`(a)\g<2>(b\g<1>)`);
Expand All @@ -41,12 +47,21 @@ describe('Subroutine', () => {
expect('aa').toExactlyMatch(r`\g<+1>(a)`);
expect('aa').toExactlyMatch(r`(\g<+1>(a))`);
});

it('should throw if referencing a missing group', () => {
expect(() => compile(r`\g<-1>`)).toThrow();
expect(() => compile(r`\g<+1>`)).toThrow();
expect(() => compile(r`()\g<-2>`)).toThrow();
expect(() => compile(r`()\g<+1>`)).toThrow();
expect(() => compile(r`(\g<-2>)`)).toThrow();
expect(() => compile(r`(\g<+1>)`)).toThrow();
});


it('should throw if referencing a named group by relative number', () => {
expect(() => compile(r`(?<a>)\g<-1>`)).toThrow();
expect(() => compile(r`\g<+1>(?<a>)`)).toThrow();
});

it('should allow referencing groups that contain subroutines', () => {
expect('ababa').toExactlyMatch(r`(a)(b\g<-2>)\g<-1>`);
expect('ababa').toExactlyMatch(r`(a)\g<+1>(b\g<-2>)`);
Expand All @@ -69,6 +84,7 @@ describe('Subroutine', () => {
it('should throw if referencing a missing group', () => {
expect(() => compile(r`\g<a>`)).toThrow();
expect(() => compile(r`(?<a>)\g<b>`)).toThrow();
expect(() => compile(r`(?<a>\g<b>)`)).toThrow();
});

it('should throw if referencing a duplicate group name', () => {
Expand All @@ -79,7 +95,7 @@ describe('Subroutine', () => {
expect(() => compile(r`(?<a>)(?<a>\g<a>?)`)).toThrow();
expect(() => compile(r`(?<a>(?<a>\g<a>?))`)).toThrow();
});

it('should allow referencing groups that contain subroutines', () => {
expect('ababa').toExactlyMatch(r`(?<a>a)(?<b>b\g<a>)\g<b>`);
expect('ababa').toExactlyMatch(r`(?<a>a)\g<b>(?<b>b\g<a>)`);
Expand Down
2 changes: 1 addition & 1 deletion src/compile.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import {recursion} from 'regex-recursion';
/**
Transpiles an Oniguruma regex pattern and flags to native JS.
@param {string} pattern Oniguruma regex pattern.
@param {import('./tokenize.js').OnigurumaFlags} [flags] Oniguruma flags. Flag m is equivalent to JS's flag s.
@param {import('./tokenize.js').OnigurumaFlags} [flags] Oniguruma flags. Flag `m` is equivalent to JS's flag `s`.
@param {CompileOptions} [options]
@returns {{
pattern: string;
Expand Down
13 changes: 8 additions & 5 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {tokenize} from './tokenize.js';
/**
Generates an Oniguruma AST from an Oniguruma pattern and flags.
@param {string} pattern Oniguruma regex pattern.
@param {import('./tokenize.js').OnigurumaFlags} [flags] Oniguruma flags. Flag m is equivalent to JS's flag s.
@param {import('./tokenize.js').OnigurumaFlags} [flags] Oniguruma flags. Flag `m` is equivalent to JS's flag `s`.
@returns {import('./parse.js').OnigurumaAst}
*/
function toOnigurumaAst(pattern, flags) {
Expand All @@ -29,7 +29,7 @@ function toOnigurumaAst(pattern, flags) {
/**
Generates a `regex` AST from an Oniguruma pattern and flags.
@param {string} pattern Oniguruma regex pattern.
@param {import('./tokenize.js').OnigurumaFlags} [flags] Oniguruma flags. Flag m is equivalent to JS's flag s.
@param {import('./tokenize.js').OnigurumaFlags} [flags] Oniguruma flags. Flag `m` is equivalent to JS's flag `s`.
@returns {import('./transform.js').RegexAst}
*/
function toRegexAst(pattern, flags) {
Expand All @@ -39,13 +39,16 @@ function toRegexAst(pattern, flags) {
/**
Transpiles an Oniguruma regex pattern and flags to a native JS RegExp.
@param {string} pattern Oniguruma regex pattern.
@param {import('./tokenize.js').OnigurumaFlags} [flags] Oniguruma flags. Flag m is equivalent to JS's flag s.
@param {string} [flags] Any combination of Oniguruma flags `imx` and JS flags `dg`. Flag `m` is
equivalent to JS's flag `s`.
@param {import('./compile.js').CompileOptions} [options]
@returns {RegExp}
*/
function toRegExp(pattern, flags, options) {
function toRegExp(pattern, flags = '', options) {
const allowedJsFlags = flags.replace(/[^dg]+/g, '');
flags = flags.replace(/[dg]+/g, '');
const result = compile(pattern, flags, options);
return new RegExp(result.pattern, result.flags);
return new RegExp(result.pattern, `${allowedJsFlags}${result.flags}`);
}

export {
Expand Down
2 changes: 1 addition & 1 deletion src/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ const charClassTokenRe = new RegExp(r`
*/
/**
@param {string} pattern
@param {OnigurumaFlags} [flags] Oniguruma flags. Flag m is equivalent to JS's flag s.
@param {OnigurumaFlags} [flags] Oniguruma flags. Flag `m` is equivalent to JS's flag `s`.
@returns {TokenizerResult}
*/
function tokenize(pattern, flags = '') {
Expand Down

0 comments on commit 9bc8f3c

Please sign in to comment.