Skip to content

Commit

Permalink
Fix capturing group numbering bug and a few more regex-related issues (
Browse files Browse the repository at this point in the history
  • Loading branch information
adams85 authored Aug 7, 2023
1 parent 0ebf7cd commit e9ace1a
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 63 deletions.
6 changes: 0 additions & 6 deletions Jint.Tests.Test262/Test262Harness.settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,8 @@
"language/literals/regexp/named-groups/forward-reference.js",

// RegExp handling problems
"built-ins/RegExp/named-groups/non-unicode-match.js",
"built-ins/RegExp/named-groups/non-unicode-property-names-valid.js",
"built-ins/RegExp/named-groups/unicode-match.js",
"built-ins/RegExp/named-groups/unicode-property-names-valid.js",
"built-ins/RegExp/prototype/exec/S15.10.6.2_A1_T6.js",
"built-ins/String/prototype/split/separator-regexp.js",
"language/literals/regexp/u-case-mapping.js",
"language/literals/regexp/u-surrogate-pairs-atom-escape-decimal.js",

// requires investigation how to process complex function name evaluation for property
"built-ins/Function/prototype/toString/method-computed-property-name.js",
Expand Down
2 changes: 1 addition & 1 deletion Jint/Jint.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Esprima" Version="3.0.0-rc-03" />
<PackageReference Include="Esprima" Version="3.0.0-rc-04" />
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="all" />
<PackageReference Include="Nullable" Version="1.3.1" PrivateAssets="all" />
<PackageReference Include="PolySharp" Version="1.13.1">
Expand Down
3 changes: 3 additions & 0 deletions Jint/Native/RegExp/JsRegExp.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Text.RegularExpressions;
using Esprima;
using Jint.Native.Object;
using Jint.Runtime;
using Jint.Runtime.Descriptors;
Expand Down Expand Up @@ -62,6 +63,8 @@ public string Flags
}
}

public RegExpParseResult ParseResult { get; set; }

public bool DotAll { get; private set; }
public bool Global { get; private set; }
public bool Indices { get; private set; }
Expand Down
12 changes: 7 additions & 5 deletions Jint/Native/RegExp/RegExpConstructor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,15 @@ private ObjectInstance RegExpInitialize(JsRegExp r, JsValue pattern, JsValue fla

try
{
var regExp = Scanner.AdaptRegExp(p, f, compiled: false, _engine.Options.Constraints.RegexTimeout);
var regExpParseResult = Scanner.AdaptRegExp(p, f, compiled: false, _engine.Options.Constraints.RegexTimeout);

if (regExp is null)
if (!regExpParseResult.Success)
{
ExceptionHelper.ThrowSyntaxError(_realm, $"Unsupported regular expression: '/{p}/{flags}'");
ExceptionHelper.ThrowSyntaxError(_realm, $"Unsupported regular expression. {regExpParseResult.ConversionError!.Description}");
}

r.Value = regExp;
r.Value = regExpParseResult.Regex!;
r.ParseResult = regExpParseResult;
}
catch (Exception ex)
{
Expand All @@ -135,12 +136,13 @@ private JsRegExp RegExpAlloc(JsValue newTarget)
return r;
}

public JsRegExp Construct(Regex regExp, string source, string flags)
public JsRegExp Construct(Regex regExp, string source, string flags, RegExpParseResult regExpParseResult = default)
{
var r = RegExpAlloc(this);
r.Value = regExp;
r.Source = source;
r.Flags = flags;
r.ParseResult = regExpParseResult;

RegExpInitialize(r);

Expand Down
80 changes: 32 additions & 48 deletions Jint/Native/RegExp/RegExpPrototype.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Diagnostics.CodeAnalysis;
using System.Text;
using System.Text.RegularExpressions;
using Jint.Collections;
using Jint.Native.Number;
Expand Down Expand Up @@ -168,16 +167,17 @@ private JsValue Replace(JsValue thisObject, JsValue[] arguments)
{
string Evaluator(Match match)
{
var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
var actualGroupCount = GetActualRegexGroupCount(rei, match);
var replacerArgs = new List<JsValue>(actualGroupCount + 2);
replacerArgs.Add(match.Value);

ObjectInstance? groups = null;
for (var i = 1; i < match.Groups.Count; i++)
for (var i = 1; i < actualGroupCount; i++)
{
var capture = match.Groups[i];
replacerArgs.Add(capture.Success ? capture.Value : Undefined);

var groupName = GetRegexGroupName(rei.Value, i);
var groupName = GetRegexGroupName(rei, i);
if (!string.IsNullOrWhiteSpace(groupName))
{
groups ??= OrdinaryObjectCreate(_engine, null);
Expand Down Expand Up @@ -475,21 +475,13 @@ private JsValue Split(JsValue thisObject, JsValue[] arguments)
}

var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
var match = R.Value.Match(s, 0);

if (!match.Success) // No match at all return the string in an array
{
a.SetIndexValue(0, s, updateLength: true);
return a;
}

int lastIndex = 0;
uint index = 0;
while (match.Success && index < lim)
for (var match = R.Value.Match(s, 0); match.Success; match = match.NextMatch())
{
if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
{
match = match.NextMatch();
continue;
}

Expand All @@ -502,7 +494,8 @@ private JsValue Split(JsValue thisObject, JsValue[] arguments)
}

lastIndex = match.Index + match.Length;
for (int i = 1; i < match.Groups.Count; i++)
var actualGroupCount = GetActualRegexGroupCount(R, match);
for (int i = 1; i < actualGroupCount; i++)
{
var group = match.Groups[i];
var item = Undefined;
Expand All @@ -518,14 +511,11 @@ private JsValue Split(JsValue thisObject, JsValue[] arguments)
return a;
}
}

match = match.NextMatch();
if (!match.Success) // Add the last part of the split
{
a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
}
}

// Add the last part of the split
a.SetIndexValue(index, s.Substring(lastIndex), updateLength: true);

return a;
}

Expand Down Expand Up @@ -720,7 +710,7 @@ private JsValue Match(JsValue thisObject, JsValue[] arguments)
match = match.NextMatch();
if (!match.Success || match.Index != ++li)
break;
a.SetIndexValue(li, match.Value, updateLength: false);
a.SetIndexValue(li, match.Value, updateLength: false);
}
a.SetLength(li);
return a;
Expand Down Expand Up @@ -898,7 +888,7 @@ private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
return Null;
}

return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false, hasIndices: false);
return CreateReturnValueArray(R, m, s, fullUnicode: false, hasIndices: false);
}

// the stateful version
Expand Down Expand Up @@ -949,25 +939,26 @@ private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
R.Set(JsRegExp.PropertyLastIndex, e, true);
}

return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
return CreateReturnValueArray(R, match, s, fullUnicode, hasIndices);
}

private static JsArray CreateReturnValueArray(
Engine engine,
Regex regex,
JsRegExp rei,
Match match,
string s,
bool fullUnicode,
bool hasIndices)
{
var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
var engine = rei.Engine;
var actualGroupCount = GetActualRegexGroupCount(rei, match);
var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) actualGroupCount);
array.CreateDataProperty(PropertyIndex, match.Index);
array.CreateDataProperty(PropertyInput, s);

ObjectInstance? groups = null;
List<string>? groupNames = null;
var indices = hasIndices ? new List<JsNumber[]?>(match.Groups.Count) : null;
for (uint i = 0; i < match.Groups.Count; i++)
var indices = hasIndices ? new List<JsNumber[]?>(actualGroupCount) : null;
for (uint i = 0; i < actualGroupCount; i++)
{
var capture = match.Groups[(int) i];
var capturedValue = Undefined;
Expand All @@ -988,7 +979,7 @@ private static JsArray CreateReturnValueArray(
}
}

var groupName = GetRegexGroupName(regex, (int) i);
var groupName = GetRegexGroupName(rei, (int) i);
if (!string.IsNullOrWhiteSpace(groupName))
{
groups ??= OrdinaryObjectCreate(engine, null);
Expand Down Expand Up @@ -1055,35 +1046,28 @@ private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] mat
return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
}

private static string? GetRegexGroupName(Regex regex, int index)
private static int GetActualRegexGroupCount(JsRegExp rei, Match match)
{
return rei.ParseResult.Success ? rei.ParseResult.ActualRegexGroupCount : match.Groups.Count;
}

private static string? GetRegexGroupName(JsRegExp rei, int index)
{
if (index == 0)
{
return null;
}
var regex = rei.Value;
if (rei.ParseResult.Success)
{
return rei.ParseResult.GetRegexGroupName(index);
}

var groupNameFromNumber = regex.GroupNameFromNumber(index);
if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
{
// regex defaults to index as group name when it's not a named group
return null;

}

// The characters allowed in group names differs between the JS and .NET regex engines.
// For example the group name "$group" is valid in JS but invalid in .NET.
// As a workaround for this issue, the parser make an attempt to encode the problematic group names to
// names which are valid in .NET and probably won't collide with other group names present in the pattern
// (https://github.com/sebastienros/esprima-dotnet/blob/v3.0.0-rc-03/src/Esprima/Scanner.RegExpParser.cs#L942).
// We need to decode such group names.
const string encodedGroupNamePrefix = "__utf8_";
if (groupNameFromNumber.StartsWith(encodedGroupNamePrefix, StringComparison.Ordinal))
{
try
{
var bytes = groupNameFromNumber.AsSpan(encodedGroupNamePrefix.Length).BytesFromHexString();
groupNameFromNumber = Encoding.UTF8.GetString(bytes);
}
catch { /* intentional no-op */ }
}

return groupNameFromNumber;
Expand Down
7 changes: 4 additions & 3 deletions Jint/Runtime/Interpreter/Expressions/JintLiteralExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,13 @@ private JsValue ResolveValue(EvaluationContext context)
if (expression.TokenType == TokenType.RegularExpression)
{
var regExpLiteral = (RegExpLiteral) _expression;
if (regExpLiteral.Value is System.Text.RegularExpressions.Regex regex)
var regExpParseResult = regExpLiteral.ParseResult;
if (regExpParseResult.Success)
{
return context.Engine.Realm.Intrinsics.RegExp.Construct(regex, regExpLiteral.Regex.Pattern, regExpLiteral.Regex.Flags);
return context.Engine.Realm.Intrinsics.RegExp.Construct(regExpParseResult.Regex!, regExpLiteral.Regex.Pattern, regExpLiteral.Regex.Flags, regExpParseResult);
}

ExceptionHelper.ThrowSyntaxError(context.Engine.Realm, $"Unsupported regular expression: '{regExpLiteral.Regex.Pattern}/{regExpLiteral.Regex.Flags}'");
ExceptionHelper.ThrowSyntaxError(context.Engine.Realm, $"Unsupported regular expression. {regExpParseResult.ConversionError!.Description}");
}

return JsValue.FromObject(context.Engine, expression.Value);
Expand Down

0 comments on commit e9ace1a

Please sign in to comment.