Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize URI decode and encode #1647

Merged
merged 1 commit into from
Oct 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Jint.Tests.CommonScripts/SunSpiderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ private static void RunTest(string source)
{
var engine = new Engine()
.SetValue("log", new Action<object>(Console.WriteLine))
.SetValue("assert", new Action<bool, string>((condition, message) => Assert.True(condition, message)));
.SetValue("assert", new Action<bool, string>((condition, message) => Assert.That(condition, message)));

try
{
Expand Down
6 changes: 6 additions & 0 deletions Jint/Engine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public sealed partial class Engine : IDisposable
private readonly ExecutionContextStack _executionContexts;
private JsValue _completionValue = JsValue.Undefined;
internal EvaluationContext? _activeEvaluationContext;
internal ErrorDispatchInfo? _error;

private readonly EventLoop _eventLoop = new();

Expand Down Expand Up @@ -1554,6 +1555,11 @@ private ObjectInstance Construct(
return result;
}

internal void SignalError(ErrorDispatchInfo error)
{
_error = error;
}

public void Dispose()
{
if (_objectWrapperCache is null)
Expand Down
227 changes: 137 additions & 90 deletions Jint/Native/Global/GlobalObject.cs
Original file line number Diff line number Diff line change
Expand Up @@ -272,21 +272,10 @@ public static JsValue IsFinite(JsValue thisObject, JsValue[] arguments)
return true;
}

private static readonly HashSet<char> UriReserved = new HashSet<char>
{
';', '/', '?', ':', '@', '&', '=', '+', '$', ','
};

private static readonly HashSet<char> UriUnescaped = new HashSet<char>
{
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', '.', '!',
'~', '*', '\'', '(', ')'
};

private static readonly HashSet<char> UnescapedUriSet = new HashSet<char>(UriReserved.Concat(UriUnescaped).Concat(new[] { '#' }));
private static readonly HashSet<char> ReservedUriSet = new HashSet<char>(UriReserved.Concat(new[] { '#' }));
private static readonly string UriReserved = new (new [] { ';', '/', '?', ':', '@', '&', '=', '+', '$', ',' });
private static readonly string UriUnescaped = new(new [] { '-', '_', '.', '!', '~', '*', '\'', '(', ')' });
private static readonly string UnescapedUriSet = UriReserved + UriUnescaped + '#';
private static readonly string ReservedUriSet = UriReserved + '#';

private const string HexaMap = "0123456789ABCDEF";

Expand Down Expand Up @@ -320,25 +309,26 @@ public JsValue EncodeUriComponent(JsValue thisObject, JsValue[] arguments)
return Encode(uriString, UriUnescaped);
}

private string Encode(string uriString, HashSet<char> unescapedUriSet)
private JsValue Encode(string uriString, string unescapedUriSet)
{
var strLen = uriString.Length;

_stringBuilder.EnsureCapacity(uriString.Length);
_stringBuilder.Clear();
var buffer = new byte[4];

for (var k = 0; k < strLen; k++)
{
var c = uriString[k];
if (unescapedUriSet != null && unescapedUriSet.Contains(c))
if (c is >= 'a' and <= 'z' || c is >= 'A' and <= 'Z' || c is >= '0' and <= '9' || unescapedUriSet.IndexOf(c) != -1)
{
_stringBuilder.Append(c);
}
else
{
if (c >= 0xDC00 && c <= 0xDBFF)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

int v;
Expand All @@ -351,70 +341,58 @@ private string Encode(string uriString, HashSet<char> unescapedUriSet)
k++;
if (k == strLen)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

var kChar = (int)uriString[k];
if (kChar < 0xDC00 || kChar > 0xDFFF)
var kChar = (int) uriString[k];
if (kChar is < 0xDC00 or > 0xDFFF)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

v = (c - 0xD800) * 0x400 + (kChar - 0xDC00) + 0x10000;
}

byte[] octets = System.Array.Empty<byte>();

if (v >= 0 && v <= 0x007F)
{
// 00000000 0zzzzzzz -> 0zzzzzzz
octets = new[] { (byte)v };
}
else if (v <= 0x07FF)
{
// 00000yyy yyzzzzzz -> 110yyyyy ; 10zzzzzz
octets = new[]
{
(byte)(0xC0 | (v >> 6)),
(byte)(0x80 | (v & 0x3F))
};
}
else if (v <= 0xD7FF)
{
// xxxxyyyy yyzzzzzz -> 1110xxxx; 10yyyyyy; 10zzzzzz
octets = new[]
{
(byte)(0xE0 | (v >> 12)),
(byte)(0x80 | ((v >> 6) & 0x3F)),
(byte)(0x80 | (v & 0x3F))
};
}
else if (v <= 0xDFFF)
{
ExceptionHelper.ThrowUriError(_realm);
}
else if (v <= 0xFFFF)
var length = 1;
switch (v)
{
octets = new[]
{
(byte) (0xE0 | (v >> 12)),
(byte) (0x80 | ((v >> 6) & 0x3F)),
(byte) (0x80 | (v & 0x3F))
};
}
else
{
octets = new[]
{
(byte) (0xF0 | (v >> 18)),
(byte) (0x80 | (v >> 12 & 0x3F)),
(byte) (0x80 | (v >> 6 & 0x3F)),
(byte) (0x80 | (v >> 0 & 0x3F))
};
case >= 0 and <= 0x007F:
// 00000000 0zzzzzzz -> 0zzzzzzz
buffer[0] = (byte) v;
break;
case <= 0x07FF:
// 00000yyy yyzzzzzz -> 110yyyyy ; 10zzzzzz
length = 2;
buffer[0] = (byte) (0xC0 | (v >> 6));
buffer[1] = (byte) (0x80 | (v & 0x3F));
break;
case <= 0xD7FF:
// xxxxyyyy yyzzzzzz -> 1110xxxx; 10yyyyyy; 10zzzzzz
length = 3;
buffer[0] = (byte) (0xE0 | (v >> 12));
buffer[1] = (byte) (0x80 | ((v >> 6) & 0x3F));
buffer[2] = (byte) (0x80 | (v & 0x3F));
break;
case <= 0xDFFF:
goto uriError;
case <= 0xFFFF:
length = 3;
buffer[0] = (byte) (0xE0 | (v >> 12));
buffer[1] = (byte) (0x80 | ((v >> 6) & 0x3F));
buffer[2] = (byte) (0x80 | (v & 0x3F));
break;
default:
length = 4;
buffer[0] = (byte) (0xF0 | (v >> 18));
buffer[1] = (byte) (0x80 | (v >> 12 & 0x3F));
buffer[2] = (byte) (0x80 | (v >> 6 & 0x3F));
buffer[3] = (byte) (0x80 | (v >> 0 & 0x3F));
break;
}

foreach (var octet in octets)
for (var i = 0; i < length; i++)
{
var octet = buffer[i];
var x1 = HexaMap[octet / 16];
var x2 = HexaMap[octet % 16];
_stringBuilder.Append('%').Append(x1).Append(x2);
Expand All @@ -423,6 +401,10 @@ private string Encode(string uriString, HashSet<char> unescapedUriSet)
}

return _stringBuilder.ToString();

uriError:
_engine.SignalError(ExceptionHelper.CreateUriError(_realm, "URI malformed"));
return null!;
}

public JsValue DecodeUri(JsValue thisObject, JsValue[] arguments)
Expand All @@ -439,14 +421,18 @@ public JsValue DecodeUriComponent(JsValue thisObject, JsValue[] arguments)
return Decode(componentString, null);
}

private string Decode(string uriString, HashSet<char>? reservedSet)
private JsValue Decode(string uriString, string? reservedSet)
{
var strLen = uriString.Length;

_stringBuilder.EnsureCapacity(strLen);
_stringBuilder.Clear();

var octets = System.Array.Empty<byte>();
#if SUPPORTS_SPAN_PARSE
Span<byte> octets = stackalloc byte[4];
#else
var octets = new byte[4];
#endif

for (var k = 0; k < strLen; k++)
{
Expand All @@ -460,21 +446,23 @@ private string Decode(string uriString, HashSet<char>? reservedSet)
var start = k;
if (k + 2 >= strLen)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

if (!IsValidHexaChar(uriString[k + 1]) || !IsValidHexaChar(uriString[k + 2]))
var c1 = uriString[k + 1];
var c2 = uriString[k + 2];
if (!IsValidHexaChar(c1) || !IsValidHexaChar(c2))
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

var B = Convert.ToByte(uriString[k + 1].ToString() + uriString[k + 2], 16);
var B = StringToIntBase16(uriString.AsSpan(k + 1, 2));

k += 2;
if ((B & 0x80) == 0)
{
C = (char)B;
if (reservedSet == null || !reservedSet.Contains(C))
if (reservedSet == null || reservedSet.IndexOf(C) == -1)
{
_stringBuilder.Append(C);
}
Expand All @@ -486,56 +474,115 @@ private string Decode(string uriString, HashSet<char>? reservedSet)
else
{
var n = 0;
for (; ((B << n) & 0x80) != 0; n++) ;
for (; ((B << n) & 0x80) != 0; n++);

if (n == 1 || n > 4)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

octets = octets.Length == n
? octets
: new byte[n];

octets[0] = B;

if (k + (3 * (n - 1)) >= strLen)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

for (var j = 1; j < n; j++)
{
k++;
if (uriString[k] != '%')
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

if (!IsValidHexaChar(uriString[k + 1]) || !IsValidHexaChar(uriString[k + 2]))
c1 = uriString[k + 1];
c2 = uriString[k + 2];
if (!IsValidHexaChar(c1) || !IsValidHexaChar(c2))
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

B = Convert.ToByte(uriString[k + 1].ToString() + uriString[k + 2], 16);
B = StringToIntBase16(uriString.AsSpan(k + 1, 2));

// B & 11000000 != 10000000
if ((B & 0xC0) != 0x80)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

k += 2;

octets[j] = B;
}

_stringBuilder.Append(Encoding.UTF8.GetString(octets, 0, octets.Length));
#if SUPPORTS_SPAN_PARSE
_stringBuilder.Append(Encoding.UTF8.GetString(octets.Slice(0, n)));
#else
_stringBuilder.Append(Encoding.UTF8.GetString(octets, 0, n));
#endif
}
}
}

return _stringBuilder.ToString();

uriError:
_engine.SignalError(ExceptionHelper.CreateUriError(_realm, "URI malformed"));
return null!;
}

private static byte StringToIntBase16(ReadOnlySpan<char> s)
{
var i = 0;
var length = s.Length;

if (s[i] == '+')
{
i++;
}

if (i + 1 < length && s[i] == '0')
{
if (s[i + 1] == 'x' || s[i + 1] == 'X')
{
i += 2;
}
}

uint result = 0;
while (i < s.Length && IsDigit(s[i], 16, out var value))
{
result = result * 16 + (uint) value;
i++;
}

return (byte) (int) result;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsDigit(char c, int radix, out int result)
{
int tmp;
if ((uint)(c - '0') <= 9)
{
result = tmp = c - '0';
}
else if ((uint)(c - 'A') <= 'Z' - 'A')
{
result = tmp = c - 'A' + 10;
}
else if ((uint)(c - 'a') <= 'z' - 'a')
{
result = tmp = c - 'a' + 10;
}
else
{
result = -1;
return false;
}

return tmp < radix;
}

/// <summary>
Expand Down
Loading