From 37699d9c2868584a6a9a72b3c29a217d59b20df0 Mon Sep 17 00:00:00 2001 From: Steven Levithan Date: Tue, 17 Dec 2024 19:22:38 +0100 Subject: [PATCH] ^ doesn't match after string-terminating LF (fixes #9) --- README.md | 3 ++- spec/match-assertion.spec.js | 8 +++++++- src/generate.js | 3 ++- src/transform.js | 5 +++-- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6305b97..16e6204 100644 --- a/README.md +++ b/README.md @@ -554,7 +554,7 @@ Notice that nearly every feature below has at least subtle differences from Java ✔ Unescaped - outside of range is literal in some contexts (different than JS rules in any mode)
✔ Error for unescaped [ that doesn't form nested class
- ✔ Allows leading unescaped ]
+ ✔ Leading unescaped ] OK
✔ Fewer chars require escaping than JS
@@ -616,6 +616,7 @@ Notice that nearly every feature below has at least subtle differences from Java ✔ Always "multiline"
✔ Only \n as newline
+ ✔ No match after string-terminating \n
diff --git a/spec/match-assertion.spec.js b/spec/match-assertion.spec.js index 0c61196..56ba10d 100644 --- a/spec/match-assertion.spec.js +++ b/spec/match-assertion.spec.js @@ -1,4 +1,4 @@ -import {toDetails} from '../dist/index.mjs'; +import {toDetails, toRegExp} from '../dist/index.mjs'; import {r} from '../src/utils.js'; import {matchers} from './helpers/matchers.js'; @@ -50,6 +50,12 @@ describe('Assertion', () => { it('should not match at positions other than the start of the string or after a line feed', () => { expect('ba').not.toFindMatch('^a'); }); + + it('should not match after a string-terminating line feed', () => { + expect(''.match(toRegExp('^', {global: true}))).toHaveSize(1); + expect('\n'.match(toRegExp('^', {global: true}))).toHaveSize(1); + expect('\n\n'.match(toRegExp('^', {global: true}))).toHaveSize(2); + }); }); describe('string_end', () => { diff --git a/src/generate.js b/src/generate.js index 5707306..20fdd42 100644 --- a/src/generate.js +++ b/src/generate.js @@ -219,7 +219,8 @@ function genAssertion(node, _, gen) { if (kind === AstAssertionKinds.string_start) { return '^'; } - // If a word boundary came through the transformer unaltered, that means `wordIsAscii` is enabled + // If a word boundary came through the transformer unaltered, that means `wordIsAscii` or + // `asciiWordBoundaries` is enabled if (kind === AstAssertionKinds.word_boundary) { return negate ? r`\B` : r`\b`; } diff --git a/src/transform.js b/src/transform.js index 8735387..72b0e23 100644 --- a/src/transform.js +++ b/src/transform.js @@ -136,8 +136,9 @@ const FirstPassVisitor = { // Onig's only line break char is line feed, unlike JS replaceWith(parseFragment(r`(?=\z|\n)`)); } else if (kind === AstAssertionKinds.line_start) { - // Onig's only line break char is line feed, unlike JS - replaceWith(parseFragment(r`(?<=\A|\n)`)); + // Onig's only line break char is line feed, unlike JS. Onig's `^` doesn't match after a + // string-terminating line feed + replaceWith(parseFragment(r`(?<=\A|\n(?!\z))`)); } else if (kind === AstAssertionKinds.search_start) { if (!supportedGNodes.has(node) && !allowUnhandledGAnchors) { throw new Error(r`Uses "\G" in a way that's unsupported`);