Skip to content

Commit

Permalink
Add option asciiWordBoundaries
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 22, 2024
1 parent 0f39c04 commit a5a55b0
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 14 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ type OnigurumaToEsOptions = {
hasIndices?: boolean;
maxRecursionDepth?: number | null;
overrides?: {
allowAllSearchStartAnchors?: boolean;
allowOrphanBackrefs?: boolean;
asciiWordBoundaries?: boolean;
};
target?: 'auto' | 'ES2025' | 'ES2024' | 'ES2018';
verbose?: boolean;
Expand Down Expand Up @@ -209,11 +211,12 @@ Using a high limit has a small impact on performance. Generally, this is only a

### `overrides`

Advanced options that take precedence over standard error checking and flags.
Advanced options that take precedence over standard error checking and flags when enabled.

- `allowOrphanBackrefs`: Useful with TextMate grammars that merge backreferences across `begin` and `end` patterns.
- `allowAllSearchStartAnchors`: Silences errors for unsupported uses of the search-start anchor `\G`.
- Oniguruma-To-ES uses a variety of strategies to accurately emulate many common uses of `\G`. When using this option, if a `\G` is found that doesn't have a known emulation strategy, the `\G` is simply removed and JavaScript's `y` (`sticky`) flag is added. This might lead to some false positives and negatives, but is useful for non-critical matching like syntax highlighting when having some mismatches is better than not working.
- `allowOrphanBackrefs`: Useful with TextMate grammars that merge backreferences across `begin` and `end` patterns.
- `asciiWordBoundaries`: ASCII-only `\b` and `\B`.

### `target`

Expand Down
3 changes: 2 additions & 1 deletion demo/demo.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ const state = {
hasIndices: getValue('option-hasIndices'),
maxRecursionDepth: getValue('option-maxRecursionDepth'),
overrides: {
allowOrphanBackrefs: getValue('option-allowOrphanBackrefs'),
allowAllSearchStartAnchors: getValue('option-allowAllSearchStartAnchors'),
allowOrphanBackrefs: getValue('option-allowOrphanBackrefs'),
asciiWordBoundaries: getValue('option-asciiWordBoundaries'),
},
target: getValue('option-target'),
verbose: getValue('option-verbose'),
Expand Down
17 changes: 12 additions & 5 deletions demo/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -108,22 +108,29 @@ <h2>Try it</h2>
</p>
</div>
<div>
<p>
<label>
<input type="checkbox" id="option-allowAllSearchStartAnchors" onchange="setOverride('allowAllSearchStartAnchors', this.checked)">
<code>allowAllSearchStartAnchors</code>
<span class="tip tip-lg">Silences errors for unsupported uses of <code>\G</code></span>
</label>
</p>
<p>
<label>
<input type="checkbox" id="option-allowOrphanBackrefs" onchange="setOverride('allowOrphanBackrefs', this.checked)">
<code>allowOrphanBackrefs</code>
<span class="tip tip-xl">Useful with TextMate grammars that merge backrefs across <code>begin</code> and <code>end</code> patterns</span>
</label>
</p>
</div>
<div>
<p>
<label>
<input type="checkbox" id="option-allowAllSearchStartAnchors" onchange="setOverride('allowAllSearchStartAnchors', this.checked)">
<code>allowAllSearchStartAnchors</code>
<span class="tip tip-lg">Silences errors for unsupported uses of <code>\G</code></span>
<input type="checkbox" id="option-asciiWordBoundaries" onchange="setOverride('asciiWordBoundaries', this.checked)">
<code>asciiWordBoundaries</code>
<span class="tip tip-lg">ASCII-only <code>\b</code> and <code>\B</code></span>
</label>
</p>
</div>
<div>
<p>
<label>
<input type="checkbox" id="option-verbose" onchange="setOption('verbose', this.checked)">
Expand Down
4 changes: 3 additions & 1 deletion src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ import {recursion} from 'regex-recursion';
hasIndices?: boolean;
maxRecursionDepth?: number | null;
overrides?: {
allowAllSearchStartAnchors?: boolean;
allowOrphanBackrefs?: boolean;
allowAllSearchStartAnchors: boolean;
asciiWordBoundaries?: boolean;
};
target?: keyof Target;
verbose?: boolean;
Expand All @@ -57,6 +58,7 @@ function toDetails(pattern, options) {
const regexAst = transform(onigurumaAst, {
accuracy: opts.accuracy,
allowAllSearchStartAnchors: opts.overrides.allowAllSearchStartAnchors,
asciiWordBoundaries: opts.overrides.asciiWordBoundaries,
avoidSubclass: opts.avoidSubclass,
bestEffortTarget: opts.target,
});
Expand Down
8 changes: 5 additions & 3 deletions src/options.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,14 @@ function getOptions(options) {
// Disables optimizations that simplify the pattern when it doesn't change the meaning.
verbose: false,
...options,
// Advanced options that take precedence over standard error checking and flags.
// Advanced options that take precedence over standard error checking and flags when enabled.
overrides: {
// Useful with TextMate grammars that merge backreferences across `begin` and `end` patterns.
allowOrphanBackrefs: false,
// Silences errors for unsupported uses of the search-start anchor `\G`.
allowAllSearchStartAnchors: false,
// Useful with TextMate grammars that merge backreferences across `begin` and `end` patterns.
allowOrphanBackrefs: false,
// ASCII-only `\b` and `\B`.
asciiWordBoundaries: false,
...(options?.overrides),
},
};
Expand Down
7 changes: 5 additions & 2 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ AST represents what's needed to precisely reproduce Oniguruma behavior using Reg
@param {{
accuracy?: keyof Accuracy;
allowAllSearchStartAnchors?: boolean;
asciiWordBoundaries?: boolean;
avoidSubclass?: boolean;
bestEffortTarget?: keyof Target;
}} [options]
Expand All @@ -46,6 +47,7 @@ function transform(ast, options) {
// based on `target`/`accuracy`, so produce the appropriate structure here.
accuracy: 'default',
allowAllSearchStartAnchors: false,
asciiWordBoundaries: false,
avoidSubclass: false,
bestEffortTarget: 'ES2025',
...options,
Expand All @@ -55,6 +57,7 @@ function transform(ast, options) {
const firstPassState = {
accuracy: opts.accuracy,
allowAllSearchStartAnchors: opts.allowAllSearchStartAnchors,
asciiWordBoundaries: opts.asciiWordBoundaries,
flagDirectivesByAlt: new Map(),
minTargetEs2024: isMinTarget(opts.bestEffortTarget, 'ES2024'),
// Subroutines can appear before the groups they ref, so collect reffed nodes for a second pass
Expand Down Expand Up @@ -127,7 +130,7 @@ const FirstPassVisitor = {
},
},

Assertion({node, ast, remove, replaceWith}, {allowAllSearchStartAnchors, supportedGNodes, wordIsAscii}) {
Assertion({node, ast, remove, replaceWith}, {allowAllSearchStartAnchors, asciiWordBoundaries, supportedGNodes, wordIsAscii}) {
const {kind, negate} = node;
if (kind === AstAssertionKinds.line_end) {
// Onig's only line break char is line feed, unlike JS
Expand All @@ -143,7 +146,7 @@ const FirstPassVisitor = {
remove();
} else if (kind === AstAssertionKinds.string_end_newline) {
replaceWith(parseFragment(r`(?=\n?\z)`));
} else if (kind === AstAssertionKinds.word_boundary && !wordIsAscii) {
} else if (kind === AstAssertionKinds.word_boundary && !wordIsAscii && !asciiWordBoundaries) {
const b = `(?:(?<=${defaultWordChar})(?!${defaultWordChar})|(?<!${defaultWordChar})(?=${defaultWordChar}))`;
const B = `(?:(?<=${defaultWordChar})(?=${defaultWordChar})|(?<!${defaultWordChar})(?!${defaultWordChar}))`;
replaceWith(parseFragment(negate ? B : b));
Expand Down

0 comments on commit a5a55b0

Please sign in to comment.