Skip to content

Commit

Permalink
Optimize UrlEncode in Utils (#3307)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielmarbach authored Jul 10, 2024
1 parent c5e8e47 commit 7bd0df1
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 47 deletions.
2 changes: 2 additions & 0 deletions sdk/src/Core/AWSSDK.Core.NetFramework.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
<NoWarn>$(NoWarn);CS1591</NoWarn>

<SignAssembly>True</SignAssembly>

<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

<PropertyGroup Condition=" '$(RuleSetFileForBuild)' == 'false' Or '$(RuleSetFileForBuild)' == '' ">
Expand Down
2 changes: 2 additions & 0 deletions sdk/src/Core/AWSSDK.Core.NetStandard.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
<NoWarn>$(NoWarn);CS1591;CA1822</NoWarn>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<SignAssembly>True</SignAssembly>

<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup Condition="'$(TargetFramework)' == 'net8.0'">
<WarningsAsErrors>IL2026,IL2075</WarningsAsErrors>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#if !NET8_0_OR_GREATER
namespace System.Runtime.CompilerServices
{
/// <summary>Indicates to the compiler that the .locals init flag should not be set in nested method headers when emitting to metadata.</summary>
[AttributeUsage(AttributeTargets.Module | AttributeTargets.Class | AttributeTargets.Struct | AttributeTargets.Constructor | AttributeTargets.Method | AttributeTargets.Property | AttributeTargets.Event | AttributeTargets.Interface, Inherited = false)]
internal sealed class SkipLocalsInitAttribute : Attribute
{
}
}
#endif
92 changes: 64 additions & 28 deletions sdk/src/Core/Amazon.Util/AWSSDKUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Threading;
using Amazon.Runtime.Endpoints;
using ThirdParty.RuntimeBackports;
Expand Down Expand Up @@ -79,12 +80,6 @@ public static partial class AWSSDKUtils
// Default value of progress update interval for streaming is 100KB.
public const long DefaultProgressUpdateInterval = 102400;

internal static Dictionary<int, string> RFCEncodingSchemes = new Dictionary<int, string>
{
{ 3986, ValidUrlCharacters },
{ 1738, ValidUrlCharactersRFC1738 }
};

internal const string S3Accelerate = "s3-accelerate";
internal const string S3Control = "s3-control";

Expand Down Expand Up @@ -1028,35 +1023,76 @@ public static string UrlEncode(string data, bool path)
/// Currently recognised RFC versions are 1738 (Dec '94) and 3986 (Jan '05).
/// If the specified RFC is not recognised, 3986 is used by default.
/// </remarks>
[SkipLocalsInit]
public static string UrlEncode(int rfcNumber, string data, bool path)
{
StringBuilder encoded = new StringBuilder(data.Length * 2);
string validUrlCharacters;
if (!RFCEncodingSchemes.TryGetValue(rfcNumber, out validUrlCharacters))
validUrlCharacters = ValidUrlCharacters;

string unreservedChars = String.Concat(validUrlCharacters, (path ? ValidPathCharacters : ""));
foreach (char symbol in System.Text.Encoding.UTF8.GetBytes(data))
byte[] sharedDataBuffer = null;
const int MaxStackLimit = 256;
try
{
if (unreservedChars.IndexOf(symbol) != -1)
{
encoded.Append(symbol);
}
else
if (!TryGetRFCEncodingSchemes(rfcNumber, out var validUrlCharacters))
validUrlCharacters = ValidUrlCharacters;

var unreservedChars = string.Concat(validUrlCharacters, path ? ValidPathCharacters : string.Empty).AsSpan();

var dataAsSpan = data.AsSpan();
var encoding = Encoding.UTF8;

var dataByteLength = encoding.GetMaxByteCount(dataAsSpan.Length);
var encodedByteLength = 2 * dataByteLength;
var dataBuffer = encodedByteLength <= MaxStackLimit
? stackalloc byte[MaxStackLimit]
: sharedDataBuffer = ArrayPool<byte>.Shared.Rent(encodedByteLength);
// Instead of stack allocating or renting two buffers we use one buffer with at least twice the capacity of the
// max encoding length. Then store the character data as bytes in the second half reserving the first half of the buffer
// for the encoded representation.
var encodingBuffer = dataBuffer.Slice(dataBuffer.Length - dataByteLength);
var bytesWritten = encoding.GetBytes(dataAsSpan, encodingBuffer);

var index = 0;
foreach (var symbol in encodingBuffer.Slice(0, bytesWritten))
{
encoded.Append('%');

// Break apart the byte into two four-bit components and
// then convert each into their hexadecimal equivalent.
byte b = (byte)symbol;
int hiNibble = b >> 4;
int loNibble = b & 0xF;
encoded.Append(ToUpperHex(hiNibble));
encoded.Append(ToUpperHex(loNibble));
if (unreservedChars.IndexOf((char)symbol) != -1)
{
dataBuffer[index++] = symbol;
}
else
{
dataBuffer[index++] = (byte)'%';

// Break apart the byte into two four-bit components and
// then convert each into their hexadecimal equivalent.
var hiNibble = symbol >> 4;
var loNibble = symbol & 0xF;
dataBuffer[index++] = (byte)ToUpperHex(hiNibble);
dataBuffer[index++] = (byte)ToUpperHex(loNibble);
}
}

return encoding.GetString(dataBuffer.Slice(0, index));
}
finally
{
if (sharedDataBuffer != null) ArrayPool<byte>.Shared.Return(sharedDataBuffer);
}
}

internal static bool TryGetRFCEncodingSchemes(int rfcNumber, out string encodingScheme)
{
if (rfcNumber == 3986)
{
encodingScheme = ValidUrlCharacters;
return true;
}

return encoded.ToString();
if (rfcNumber == 1738)
{
encodingScheme = ValidUrlCharactersRFC1738;
return true;
}

encodingScheme = null;
return false;
}

private static void ToHexString(Span<byte> source, Span<char> destination, bool lowercase)
Expand Down
70 changes: 70 additions & 0 deletions sdk/src/Core/Amazon.Util/_bcl+netstandard/Extensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
using System;
using System.Globalization;
using System.Text;

namespace Amazon.Util
{
internal static class Extensions
{
internal static string ToUpper(this String str, CultureInfo culture)
{
if (culture != CultureInfo.InvariantCulture)
throw new ArgumentException("The extension method ToUpper only works for invariant culture");
return str.ToUpperInvariant();
}

#if NETSTANDARD || NETFRAMEWORK
/// <summary>
/// Encodes into a span of bytes a set of characters from the specified read-only span.
/// </summary>
/// <param name="encoding">The encoding to be used.</param>
/// <param name="src">The span containing the set of characters to encode.</param>
/// <param name="dest">The byte span to hold the encoded bytes.</param>
/// <returns>The count of encoded bytes.</returns>
/// <remarks>
/// The method was introduced as a compatibility shim for .NET Standard and can be replaced for target frameworks that provide those methods out of the box.
/// </remarks>
/// <seealso
/// href="https://docs.microsoft.com/dotnet/api/system.text.encoding.getbytes?view=netstandard-2.1#system-text-encoding-getbytes(system-readonlyspan((system-char))-system-span((system-byte)))" />
public static unsafe int GetBytes(this Encoding encoding,
ReadOnlySpan<char> src,
Span<byte> dest)
{
if (src.Length == 0) return 0;

if (dest.Length == 0) return 0;

fixed (char* charPointer = src)
{
fixed (byte* bytePointer = dest)
{
return encoding.GetBytes(
charPointer,
src.Length,
bytePointer,
dest.Length);
}
}
}

/// <summary>
/// When overridden in a derived class, decodes all the bytes in the specified byte span into a string.
/// </summary>
/// <param name="encoding">The encoding to be used.</param>
/// <param name="bytes">A read-only byte span to decode to a Unicode string.</param>
/// <returns>A string that contains the decoded bytes from the provided read-only span.</returns>
/// <remarks>
/// The method was introduced as a compatibility shim for .NET Standard and can be replaced for target frameworks that provide those methods out of the box.
/// </remarks>
public static unsafe string GetString(this Encoding encoding, ReadOnlySpan<byte> bytes)
{
if (bytes.Length == 0) return string.Empty;

fixed (byte* bytePointer = bytes)
{
return encoding.GetString(bytePointer, bytes.Length);
}
}
#endif
}
}
18 changes: 0 additions & 18 deletions sdk/src/Core/Amazon.Util/_netstandard/Extensions.cs

This file was deleted.

23 changes: 22 additions & 1 deletion sdk/test/NetStandard/UnitTests/Core/AWSSDKUtilsTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System.Collections.Generic;
using System.Collections.Generic;
using Amazon.Util;
using System.Text;
using Amazon.Runtime.Internal;
Expand Down Expand Up @@ -63,5 +63,26 @@ public void DetermineService(string url, string expectedService)

Assert.Equal(expectedService, service);
}

[Theory]
[InlineData("value, with special chars!", "value%2C%20with%20special%20chars%21")]
[InlineData("value, with special chars and path {/+:}", "value%2C%20with%20special%20chars%20and%20path%20%7B%2F%2B%3A%7D")]
public void UrlEncodeWithoutPath(string input, string expected)
{
var encoded = AWSSDKUtils.UrlEncode(input, path: false);

Assert.Equal(expected, encoded);
}

[Theory]
[InlineData("\ud83d\ude02 value, with special chars!", "%F0%9F%98%82%20value%2C%20with%20special%20chars!")]
[InlineData("value, with special chars!", "value%2C%20with%20special%20chars!")]
[InlineData("value, with special chars and path {/+:}", "value%2C%20with%20special%20chars%20and%20path%20%7B/%2B:%7D")]
public void UrlEncodeWithPath(string input, string expected)
{
var encoded = AWSSDKUtils.UrlEncode(input, path: true);

Assert.Equal(expected, encoded);
}
}
}

0 comments on commit 7bd0df1

Please sign in to comment.