Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FieldMap abstraction #162

Merged
merged 1 commit into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"name": ".NET Core Launch (console)",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "build",
"program": "${workspaceFolder}/perf/bench/bin/Debug/net8.0/bench.dll",
"args": [],
"cwd": "${workspaceFolder}/perf/bench",
Expand Down
2 changes: 1 addition & 1 deletion .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"type": "process",
"args": [
"build",
"${workspaceFolder}/bench/bench.csproj",
"${workspaceFolder}/perf/bench/bench.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
Expand Down
2 changes: 1 addition & 1 deletion perf/bench/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
var json1 = System.Text.Json.JsonSerializer.Serialize(DataGenerator.CreateLocation(), options);
var json2 = Serde.Json.JsonSerializer.Serialize(DataGenerator.CreateLocation());
var loc1 = System.Text.Json.JsonSerializer.Deserialize<Location>(LocationSample, options);
var loc2 = Serde.Json.JsonSerializer.Deserialize<Location>(LocationSample);
var loc2 = Serde.Json.JsonSerializer.Deserialize<Location, LocationWrap>(LocationSample);

Console.WriteLine("Checking correctness of serialization: " + (loc1 == loc2));
if (loc1 != loc2)
Expand Down
200 changes: 78 additions & 122 deletions perf/bench/SampleTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,138 +58,94 @@ public partial record Location

public partial record LocationWrap : IDeserialize<Location>
{
static Benchmarks.Location Serde.IDeserialize<Benchmarks.Location>.Deserialize(IDeserializer deserializer)
{
var fieldNames = new[]
{
"Id",
"Address1",
"Address2",
"City",
"State",
"PostalCode",
"Name",
"PhoneNumber",
"Country"
};
return deserializer.DeserializeType("Location", fieldNames, SerdeVisitor.Instance);
}
private static readonly FieldMap s_fieldMap = new(nameof(LocationWrap), [
"id",
"address1",
"address2",
"city",
"state",
"postalCode",
"name",
"phoneNumber",
"country"
]);

private sealed class SerdeVisitor : Serde.IDeserializeVisitor<Benchmarks.Location>
static Benchmarks.Location Serde.IDeserialize<Benchmarks.Location>.Deserialize(IDeserializer deserializer)
{
public static readonly SerdeVisitor Instance = new SerdeVisitor();
public string ExpectedTypeName => "Benchmarks.Location";
int _l_id = default !;
string _l_address1 = default !;
string _l_address2 = default !;
string _l_city = default !;
string _l_state = default !;
string _l_postalcode = default !;
string _l_name = default !;
string _l_phonenumber = default !;
string _l_country = default !;
ushort _r_assignedValid = 0b0;

private sealed class FieldNameVisitor : Serde.IDeserialize<byte>, Serde.IDeserializeVisitor<byte>
var typeDeserialize = deserializer.DeserializeType(s_fieldMap);
int index;
while ((index = typeDeserialize.TryReadIndex(s_fieldMap)) != IDeserializeType.EndOfType)
{
public static readonly FieldNameVisitor Instance = new FieldNameVisitor();
public static byte Deserialize(IDeserializer deserializer) => deserializer.DeserializeString(Instance);
public string ExpectedTypeName => "string";

byte Serde.IDeserializeVisitor<byte>.VisitString(string s) => VisitUtf8Span(System.Text.Encoding.UTF8.GetBytes(s));
public byte VisitUtf8Span(System.ReadOnlySpan<byte> s)
switch (index)
{
switch (s[0])
{
case (byte)'i'when s.SequenceEqual("id"u8):
return 1;
case (byte)'a'when s.SequenceEqual("address1"u8):
return 2;
case (byte)'a'when s.SequenceEqual("address2"u8):
return 3;
case (byte)'c'when s.SequenceEqual("city"u8):
return 4;
case (byte)'s'when s.SequenceEqual("state"u8):
return 5;
case (byte)'p'when s.SequenceEqual("postalCode"u8):
return 6;
case (byte)'n'when s.SequenceEqual("name"u8):
return 7;
case (byte)'p'when s.SequenceEqual("phoneNumber"u8):
return 8;
case (byte)'c'when s.SequenceEqual("country"u8):
return 9;
default:
return 0;
}
case 0:
_l_id = typeDeserialize.ReadValue<int, Int32Wrap>();
_r_assignedValid |= ((ushort)1) << 0;
break;
case 1:
_l_address1 = typeDeserialize.ReadValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 1;
break;
case 2:
_l_address2 = typeDeserialize.ReadValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 2;
break;
case 3:
_l_city = typeDeserialize.ReadValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 3;
break;
case 4:
_l_state = typeDeserialize.ReadValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 4;
break;
case 5:
_l_postalcode = typeDeserialize.ReadValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 5;
break;
case 6:
_l_name = typeDeserialize.ReadValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 6;
break;
case 7:
_l_phonenumber = typeDeserialize.ReadValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 7;
break;
case 8:
_l_country = typeDeserialize.ReadValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 8;
break;
}
}

Benchmarks.Location Serde.IDeserializeVisitor<Benchmarks.Location>.VisitDictionary<D>(ref D d)
if (_r_assignedValid != 0b111111111)
{
int _l_id = default !;
string _l_address1 = default !;
string _l_address2 = default !;
string _l_city = default !;
string _l_state = default !;
string _l_postalcode = default !;
string _l_name = default !;
string _l_phonenumber = default !;
string _l_country = default !;
ushort _r_assignedValid = 0b0;
while (d.TryGetNextKey<byte, FieldNameVisitor>(out byte key))
{
switch (key)
{
case 1:
_l_id = d.GetNextValue<int, Int32Wrap>();
_r_assignedValid |= ((ushort)1) << 0;
break;
case 2:
_l_address1 = d.GetNextValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 1;
break;
case 3:
_l_address2 = d.GetNextValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 2;
break;
case 4:
_l_city = d.GetNextValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 3;
break;
case 5:
_l_state = d.GetNextValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 4;
break;
case 6:
_l_postalcode = d.GetNextValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 5;
break;
case 7:
_l_name = d.GetNextValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 6;
break;
case 8:
_l_phonenumber = d.GetNextValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 7;
break;
case 9:
_l_country = d.GetNextValue<string, StringWrap>();
_r_assignedValid |= ((ushort)1) << 8;
break;
}
}

if (_r_assignedValid != 0b111111111)
{
throw new Serde.InvalidDeserializeValueException("Not all members were assigned");
}

var newType = new Benchmarks.Location()
{
Id = _l_id,
Address1 = _l_address1,
Address2 = _l_address2,
City = _l_city,
State = _l_state,
PostalCode = _l_postalcode,
Name = _l_name,
PhoneNumber = _l_phonenumber,
Country = _l_country,
};
return newType;
throw new Serde.InvalidDeserializeValueException("Not all members were assigned");
}
}

var newType = new Benchmarks.Location()
{
Id = _l_id,
Address1 = _l_address1,
Address2 = _l_address2,
City = _l_city,
State = _l_state,
PostalCode = _l_postalcode,
Name = _l_name,
PhoneNumber = _l_phonenumber,
Country = _l_country,
};
return newType;
}
}
}
103 changes: 103 additions & 0 deletions src/serde/IDeserialize.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@

using System;
using System.Collections.Immutable;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;

namespace Serde
{
Expand Down Expand Up @@ -69,6 +73,104 @@ bool TryGetNextEntry<K, DK, V, DV>([MaybeNullWhen(false)] out (K, V) next)
int? SizeOpt { get; }
}

public interface IDeserializeType
{
public const int EndOfType = -1;
public const int IndexNotFound = -2;

int TryReadIndex(FieldMap map);

V ReadValue<V, D>() where D : IDeserialize<V>;
}

/// <summary>
/// A map from field names to int indices. This is an optimization for deserializing types
/// that avoids allocating strings for field names.
/// </summary>
public sealed class FieldMap
{
#region Fields and auto-props

public string TypeName { get; }
private readonly ImmutableArray<(byte[] Utf8String, int Index)> _fieldNames;

#endregion


private static readonly UTF8Encoding s_utf8 = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
/// <summary>
/// Create a new field mapping. The ordering of the field names is important -- it
/// corresponds to the index returned by <see cref="IDeserializeType.TryReadIndex" />.
/// </summary>
public FieldMap(
string typeName,
ReadOnlySpan<string> fieldNames)
{
TypeName = typeName;

var builder = ImmutableArray.CreateBuilder<(byte[] Utf8String, int Index)>(fieldNames.Length);
for (int index = 0; index < fieldNames.Length; index++)
{
builder.Add((s_utf8.GetBytes(fieldNames[index]), index));
}
builder.Sort((left, right) =>
left.Utf8String.AsSpan().SequenceCompareTo(right.Utf8String.AsSpan()));

_fieldNames = builder.MoveToImmutable();
}

public int TryReadIndex(Utf8Span utf8FieldName)
{
int mapIndex = BinarySearch(_fieldNames.AsSpan(), utf8FieldName);

return mapIndex < 0 ? IDeserializeType.IndexNotFound : _fieldNames[mapIndex].Index;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int BinarySearch(ReadOnlySpan<(byte[] Utf8String, int Index)> span, Utf8Span fieldName)
{
return BinarySearch(ref MemoryMarshal.GetReference(span), span.Length, fieldName);
}

// This is a copy of the BinarySearch method from System.MemoryExtensions.
// We can't use that version because ref structs can't yet be substituted for type arguments.
private static int BinarySearch(ref (byte[] Utf8String, int Index) spanStart, int length, Utf8Span fieldName)
{
int lo = 0;
int hi = length - 1;
// If length == 0, hi == -1, and loop will not be entered
while (lo <= hi)
{
// PERF: `lo` or `hi` will never be negative inside the loop,
// so computing median using uints is safe since we know
// `length <= int.MaxValue`, and indices are >= 0
// and thus cannot overflow an uint.
// Saves one subtraction per loop compared to
// `int i = lo + ((hi - lo) >> 1);`
int i = (int)(((uint)hi + (uint)lo) >> 1);

int c = fieldName.SequenceCompareTo(Unsafe.Add(ref spanStart, i).Utf8String);
if (c == 0)
{
return i;
}
else if (c > 0)
{
lo = i + 1;
}
else
{
hi = i - 1;
}
}
// If none found, then a negative number that is the bitwise complement
// of the index of the next element that is larger than or, if there is
// no larger element, the bitwise complement of `length`, which
// is `lo` at this point.
return ~lo;
}
}

public interface IDeserializer
{
T DeserializeAny<T>(IDeserializeVisitor<T> v);
Expand All @@ -91,5 +193,6 @@ public interface IDeserializer
T DeserializeEnumerable<T>(IDeserializeVisitor<T> v);
T DeserializeDictionary<T>(IDeserializeVisitor<T> v);
T DeserializeNullableRef<T>(IDeserializeVisitor<T> v);
IDeserializeType DeserializeType(FieldMap fieldMap);
}
}
Loading
Loading