Skip to content

Commit

Permalink
Fix [^] and [] patterns in Regex (#173)
Browse files Browse the repository at this point in the history
* add failing adaptable regex test
* fix adapting regex
* add more failing regex
* fix failing regex with [] and [^]
* make only the parser pass but don't change the output regex
  • Loading branch information
KurtGokhan authored Jul 18, 2021
1 parent a47cf50 commit 611b7da
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 40 deletions.
65 changes: 39 additions & 26 deletions src/Esprima/Scanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1549,13 +1549,6 @@ public Token ScanTemplate()
tmp = Regex.Replace(tmp, "[\uD800-\uDBFF][\uDC00-\uDFFF]", astralSubstitute);
}

// .NET doesn't support [^] which is equivalent to `[^.]`
// c.f. https://github.com/sebastienros/esprima-dotnet/issues/146
if (tmp.Contains("[^]"))
{
tmp = tmp.Replace("[^]", "[^.]");
}

// First, detect invalid regular expressions.
var options = ParseRegexOptions(flags);

Expand All @@ -1565,7 +1558,16 @@ public Token ScanTemplate()
}
catch
{
ThrowUnexpectedToken(Messages.InvalidRegExp);
tmp = EscapeFailingRegex(tmp);

try
{
new Regex(tmp, options);
}
catch
{
ThrowUnexpectedToken(Messages.InvalidRegExp);
}
}

// Return a regular expression object for this pattern-flag pair, or
Expand All @@ -1574,31 +1576,22 @@ public Token ScanTemplate()
try
{
// Do we need to convert the expression to its .NET equivalent?
if (_adaptRegexp)
if (_adaptRegexp && options.HasFlag(RegexOptions.Multiline))
{
var newPattern = pattern;
// Replace all non-escaped $ occurences by \r?$
// c.f. http://programmaticallyspeaking.com/regular-expression-multiline-mode-whats-a-newline.html

if (options.HasFlag(RegexOptions.Multiline))
int index = 0;
var newPattern = pattern;
while ((index = newPattern.IndexOf("$", index, StringComparison.Ordinal)) != -1)
{
// Replace all non-escaped $ occurences by \r?$
// c.f. http://programmaticallyspeaking.com/regular-expression-multiline-mode-whats-a-newline.html

int index = 0;
while ((index = newPattern.IndexOf("$", index, StringComparison.Ordinal)) != -1)
if (index > 0 && newPattern[index - 1] != '\\')
{
if (index > 0 && newPattern[index - 1] != '\\')
{
newPattern = newPattern.Substring(0, index) + @"\r?" + newPattern.Substring(index);
index += 4;
}
newPattern = newPattern.Substring(0, index) + @"\r?" + newPattern.Substring(index);
index += 4;
}
}

if (newPattern.Contains("[^]"))
{
newPattern = newPattern.Replace("[^]", "[^.]");
}

pattern = newPattern;
}

Expand All @@ -1610,6 +1603,26 @@ public Token ScanTemplate()
}
}

public string EscapeFailingRegex(string pattern)
{
// .NET 4.x doesn't support [^] which should match any character including newline
// c.f. https://github.com/sebastienros/esprima-dotnet/issues/146
if (pattern.Contains("[^]"))
{
pattern = pattern.Replace("[^]", @"[\s\S]");
}


// .NET doesn't support [] which should not match any characters (inverse of [^])
if (pattern.Contains("[]"))
{
// This is a temporary solution to make the parser pass. It is not a correct replacement as it will match the \0 char.
pattern = pattern.Replace("[]", @"[\0]");
}

return pattern;
}

public Token ScanRegExpBody()
{
var ch = Source[Index];
Expand Down
19 changes: 5 additions & 14 deletions test/Esprima.Tests/RegExpTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,18 @@ private static Regex CreateRegex(string code)
}

[Theory]
[InlineData(@"/[^]*? (:[rp] [el] a[\w -]+)[^]*/")]
[InlineData(@"/[^]*?(:[rp][el]a[\w-]+)[^]*/")]
[InlineData(@"/[^]/")]
[InlineData(@"/[^ ]/")]
[InlineData(@"/[]/")]
[InlineData(@"/[]*/")]
[InlineData(@"/[]a/")]
public void ShouldParseRegularExpression(string regexp)
{
var parser = new JavaScriptParser(@"var O = " + regexp);
var program = parser.ParseScript();

Assert.NotNull(program);
}

[Theory]
[InlineData(@"/[^]*? (:[rp] [el] a[\w -]+)[^]*/")]
[InlineData(@"/[^]/")]
[InlineData(@"/[^ ]/")]
public void ShouldGetNonNullRegexFromScanner(string regexp)
{
var scanner = new Scanner("", new ParserOptions { AdaptRegexp = true });
var regex = scanner.TestRegExp(regexp, "");

Assert.NotNull(regex);
}
}
}
}

0 comments on commit 611b7da

Please sign in to comment.