Skip to content

Commit

Permalink
Misc
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 5, 2024
1 parent 41682a9 commit e199f30
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 26 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ Sets the JavaScript language version for generated patterns and flags. Later tar

Following are the supported features by target.

> [!NOTE]
> Targets `ES2024` and `ESNext` have the same emulation capabilities. Resulting regexes might have different source and flags, but they match the same strings.
Notice that nearly every feature below has at least subtle differences from JavaScript. Some features and subfeatures listed as unsupported are not emulatable using native JavaScript regexes, but support for others might be added in future versions of Oniguruma-To-ES. Unsupported features throw an error.
Expand Down
4 changes: 4 additions & 0 deletions demo/demo.css
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ td {
padding-right: 3vw;
}

summary {
cursor: pointer;
}

label, .label {
margin-right: 0.4em;
}
Expand Down
15 changes: 9 additions & 6 deletions demo/demo.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@ function showOutput(el) {
try {
// Use `compile` but display output as if `toRegExp` was called. This avoids erroring when the
// selected `target` includes features that don't work in the user's browser
const re = OnigurumaToES.compile(input, flags, opts);
if (opts.allowSubclassBasedEmulation && re._internal) {
const compiled = OnigurumaToES.compile(input, flags, opts);
if (opts.allowSubclassBasedEmulation && compiled._internal) {
infoEl.classList.remove('hidden');
outputEl.classList.add('subclass');
output = getFormattedSubclass(re.pattern, re.flags, re._internal);
output = getFormattedSubclass(compiled.pattern, compiled.flags, {
strategy: compiled._internal.strategy,
subpattern: compiled._internal.subpattern,
});
} else {
output = `/${getRegExpLiteralPattern(re.pattern)}/${re.flags}`;
output = `/${getRegExpLiteralPattern(compiled.pattern)}/${compiled.flags}`;
}
} catch (err) {
outputEl.classList.add('error');
Expand All @@ -58,12 +61,12 @@ function escapeHtml(str) {
return str.replace(/&/g, '&amp;').replace(/</g, '&lt;');
}

function getFormattedSubclass(pattern, flags, data) {
function getFormattedSubclass(pattern, flags, {strategy, subpattern}) {
return `new WrappedRegExp('${
pattern.replace(/\\/g, '\\\\').replace(/'/g, "\\'")
}', '${
flags
}', {\n strategy: '${data.strategy}',${data.subpattern ? `\n subpattern: '${data.subpattern}',` : ''}\n})`;
}', {\n strategy: '${strategy}',${subpattern ? `\n subpattern: '${subpattern}',` : ''}\n})`;
}

function getRegExpLiteralPattern(str) {
Expand Down
2 changes: 1 addition & 1 deletion demo/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ <h2>Try it</h2>
</table>
</details>
<pre id="output"></pre>
<div id="info" class="hidden"><p>✅ This regex is emulated through the combination of changes in the pattern and the use of a <code>RegExp</code> subclass.</p></div>
<div id="info" class="hidden"><p>✅ This regex is emulated through the combination of changes in the pattern and the use of a <code>RegExp</code> subclass with custom logic.</p></div>
<p>The output shows the result of calling <code>toRegExp</code>. Oniguruma-To-ES includes functions to generate additional formats: <code>compile</code>, <code>toOnigurumaAst</code>, and <code>toRegexAst</code> (for an AST based on <a href="https://github.com/slevithan/regex"><code>regex</code></a>). You can run all of these from the console on this page, and you can pretty-print AST results by passing them to <code>printAst</code>.</li>
</main>

Expand Down
2 changes: 1 addition & 1 deletion scripts/onig-match.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ async function exec([pattern, str]) {
onigMatch = await onigurumaResult(pattern, str, onigMatch.index + (onigMatch.result.length || 1));
}

console.log('Pattern:', color('yellow', pattern));
console.log('Pattern:', color('yellow', `/${pattern}/`));
console.log('String:', `${value(str)} ${color('gray', `(len ${str.length})`)}`);
if (onigMatch.error) {
err(null, `Oniguruma error: ${onigMatch.error.message}`);
Expand Down
44 changes: 35 additions & 9 deletions spec/match-assertion.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ describe('Assertion', () => {
expect('abbcbb'.match(toRegExp(r`\G[ab]`, '', {global: true}))).toEqual(['a', 'b', 'b']);
});

// Unsupported: not emulatable without RegExp subclass
// Unsupported; not emulatable without a subclass
it('should throw if not used at the start of every top-level alternative', () => {
expect(() => compile(r`a\G`)).toThrow();
expect(() => compile(r`\Ga|b`)).toThrow();
Expand Down Expand Up @@ -123,27 +123,53 @@ describe('Assertion', () => {
expect(() => compile(r`(\Ga)+\G`)).toThrow();
});

it('should allow if leading in a leading positive lookaround', () => {
it('should allow if leading in a leading positive lookahead', () => {
expect('a').toExactlyMatch(r`(?=\G)a`);
expect('a').toExactlyMatch(r`(?=\Ga)a`);
expect('aaba'.match(toRegExp(r`(?=\Ga)a`, '', {global: true}))).toEqual(['a', 'a']);
});

it('should allow if trailing in a leading positive lookbehind', () => {
expect('a').toExactlyMatch(r`(?<=\G)a`);
expect(() => compile(r`(?<=a\G)a`)).toThrow();
expect(() => compile(r`(?<=\G|)a`)).toThrow();
expect(() => compile(r`(?:(?<=\G))?a`)).toThrow();
expect('a').toExactlyMatch(r`(?=\G)a|\Gb`);
expect(() => compile(r`(?=\G)a|b`)).toThrow();
expect(['aa', 'abaa']).not.toFindMatch(r`(?<=a\G)a`);
let re = toRegExp(r`(?<=a\G)a`);
re.lastIndex = 3;
expect(re.exec('abaa')?.index).toBe(3);
});

it('should throw if leading in a leading positive lookbehind', () => {
// Matches at index 3 within `abc`, but doesn't match within `aabc`. Emulatable by replacing
// `\G` with `^`, slicing the string to `lastIndex`, and doing a non-sticky search
expect(() => compile(r`(?<=\Gabc)`)).toThrow();
});

it('should throw if leading in a leading negative lookaround', () => {
expect(() => compile(r`(?!\G)a`)).toThrow();
expect(() => compile(r`(?<!\G)a`)).toThrow();
});

// Just documenting current behavior; supportable
it('should throw for redundant assertions', () => {
// Just documenting current behavior
it('should throw for redundant but otherwise supportable assertions', () => {
expect(() => compile(r`\G\Ga`)).toThrow();
expect(() => compile(r`\Ga|\G\Gb`)).toThrow();
});

// Could support by replacing `\G` with `(?!)`
it('should throw at unmatchable positions', () => {
expect(() => compile(r`a\Gb`)).toThrow();
expect(() => compile(r`(?<=a\Gb)`)).toThrow();
expect(() => compile(r`(?=a\Gb)`)).toThrow();
expect(() => compile(r`(?=ab\G)`)).toThrow();
});

// Unsupported; some or all might be emulatable
it('should throw for other unsupported uses', () => {
expect(() => compile(r`(?<=\G|)a`)).toThrow();
expect(() => compile(r`(?:(?<=\G))?a`)).toThrow();
expect('a').toExactlyMatch(r`(?=\G)a|\Gb`);
expect(() => compile(r`(?=\G)a|b`)).toThrow();
});

describe('subclass strategies', () => {
const opts = {allowSubclassBasedEmulation: true};

Expand Down
2 changes: 0 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ class WrappedRegExp extends RegExp {
// Can read private properties of the existing object since it was created by this class
this.#data = pattern.#data;
}
// TODO: Change to getters since values are for tools and won't be read internally
this._internal = this.#data;
}
/**
Called internally by all String/RegExp methods that use regexes.
Expand Down
15 changes: 8 additions & 7 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -771,14 +771,15 @@ function getLeadingG(els) {
const first = els[0];
// Special case for leading positive lookaround with leading `\G`; else all leading assertions
// are ignored when looking for `\G`
if (
isLookaround(first) &&
!first.negate &&
first.alternatives.length === 1 &&
first.alternatives[0].elements[0]?.kind === AstAssertionKinds.search_start
) {
return first.alternatives[0].elements[0];
if (isLookaround(first) && !first.negate && first.alternatives.length === 1 && first.alternatives[0].elements.length) {
const els = first.alternatives[0].elements;
const index = first.kind === AstAssertionKinds.lookahead ? 0 : els.length - 1;
// `\G` is first node in lookahead or last node in lookbehind
if (els[index].kind === AstAssertionKinds.search_start) {
return els[index];
}
}

const firstToConsider = els.find(el => {
return el.kind === AstAssertionKinds.search_start ?
true :
Expand Down

0 comments on commit e199f30

Please sign in to comment.