From 611b7da9fa8d2a27b9e4cf8d856ce280b82a37e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6khan=20Kurt?= Date: Sun, 18 Jul 2021 08:52:14 +0300 Subject: [PATCH] Fix [^] and [] patterns in Regex (#173) * add failing adaptable regex test * fix adapting regex * add more failing regex * fix failing regex with [] and [^] * make only the parser pass but don't change the output regex --- src/Esprima/Scanner.cs | 65 ++++++++++++++++++------------- test/Esprima.Tests/RegExpTests.cs | 19 +++------ 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/src/Esprima/Scanner.cs b/src/Esprima/Scanner.cs index 9a6d8403..33022bbc 100644 --- a/src/Esprima/Scanner.cs +++ b/src/Esprima/Scanner.cs @@ -1549,13 +1549,6 @@ public Token ScanTemplate() tmp = Regex.Replace(tmp, "[\uD800-\uDBFF][\uDC00-\uDFFF]", astralSubstitute); } - // .NET doesn't support [^] which is equivalent to `[^.]` - // c.f. https://github.com/sebastienros/esprima-dotnet/issues/146 - if (tmp.Contains("[^]")) - { - tmp = tmp.Replace("[^]", "[^.]"); - } - // First, detect invalid regular expressions. var options = ParseRegexOptions(flags); @@ -1565,7 +1558,16 @@ public Token ScanTemplate() } catch { - ThrowUnexpectedToken(Messages.InvalidRegExp); + tmp = EscapeFailingRegex(tmp); + + try + { + new Regex(tmp, options); + } + catch + { + ThrowUnexpectedToken(Messages.InvalidRegExp); + } } // Return a regular expression object for this pattern-flag pair, or @@ -1574,31 +1576,22 @@ public Token ScanTemplate() try { // Do we need to convert the expression to its .NET equivalent? - if (_adaptRegexp) + if (_adaptRegexp && options.HasFlag(RegexOptions.Multiline)) { - var newPattern = pattern; + // Replace all non-escaped $ occurences by \r?$ + // c.f. http://programmaticallyspeaking.com/regular-expression-multiline-mode-whats-a-newline.html - if (options.HasFlag(RegexOptions.Multiline)) + int index = 0; + var newPattern = pattern; + while ((index = newPattern.IndexOf("$", index, StringComparison.Ordinal)) != -1) { - // Replace all non-escaped $ occurences by \r?$ - // c.f. http://programmaticallyspeaking.com/regular-expression-multiline-mode-whats-a-newline.html - - int index = 0; - while ((index = newPattern.IndexOf("$", index, StringComparison.Ordinal)) != -1) + if (index > 0 && newPattern[index - 1] != '\\') { - if (index > 0 && newPattern[index - 1] != '\\') - { - newPattern = newPattern.Substring(0, index) + @"\r?" + newPattern.Substring(index); - index += 4; - } + newPattern = newPattern.Substring(0, index) + @"\r?" + newPattern.Substring(index); + index += 4; } } - if (newPattern.Contains("[^]")) - { - newPattern = newPattern.Replace("[^]", "[^.]"); - } - pattern = newPattern; } @@ -1610,6 +1603,26 @@ public Token ScanTemplate() } } + public string EscapeFailingRegex(string pattern) + { + // .NET 4.x doesn't support [^] which should match any character including newline + // c.f. https://github.com/sebastienros/esprima-dotnet/issues/146 + if (pattern.Contains("[^]")) + { + pattern = pattern.Replace("[^]", @"[\s\S]"); + } + + + // .NET doesn't support [] which should not match any characters (inverse of [^]) + if (pattern.Contains("[]")) + { + // This is a temporary solution to make the parser pass. It is not a correct replacement as it will match the \0 char. + pattern = pattern.Replace("[]", @"[\0]"); + } + + return pattern; + } + public Token ScanRegExpBody() { var ch = Source[Index]; diff --git a/test/Esprima.Tests/RegExpTests.cs b/test/Esprima.Tests/RegExpTests.cs index a307bc4d..552a3cd4 100644 --- a/test/Esprima.Tests/RegExpTests.cs +++ b/test/Esprima.Tests/RegExpTests.cs @@ -23,27 +23,18 @@ private static Regex CreateRegex(string code) } [Theory] - [InlineData(@"/[^]*? (:[rp] [el] a[\w -]+)[^]*/")] + [InlineData(@"/[^]*?(:[rp][el]a[\w-]+)[^]*/")] [InlineData(@"/[^]/")] [InlineData(@"/[^ ]/")] + [InlineData(@"/[]/")] + [InlineData(@"/[]*/")] + [InlineData(@"/[]a/")] public void ShouldParseRegularExpression(string regexp) { var parser = new JavaScriptParser(@"var O = " + regexp); var program = parser.ParseScript(); Assert.NotNull(program); - } - - [Theory] - [InlineData(@"/[^]*? (:[rp] [el] a[\w -]+)[^]*/")] - [InlineData(@"/[^]/")] - [InlineData(@"/[^ ]/")] - public void ShouldGetNonNullRegexFromScanner(string regexp) - { - var scanner = new Scanner("", new ParserOptions { AdaptRegexp = true }); - var regex = scanner.TestRegExp(regexp, ""); - - Assert.NotNull(regex); } } -} \ No newline at end of file +}