From 6eb317fe146ca91b0c657b032cee0ead0058e074 Mon Sep 17 00:00:00 2001 From: Andy Gocke Date: Sun, 24 Mar 2024 13:16:03 -0700 Subject: [PATCH] Add FieldMap abstraction FieldMap is a simple way of mapping field names to indices, which can be used for deserialization. The dispatch path looks similar to the existing visitor pattern, but results in less code both on the generation side and on the compiler side. It's one component in the work-in-progress in minimizing generic interface dispatch. --- .vscode/launch.json | 1 + .vscode/tasks.json | 2 +- perf/bench/Program.cs | 2 +- perf/bench/SampleTypes.cs | 200 +++++++----------- src/serde/IDeserialize.cs | 103 +++++++++ src/serde/json/JsonDeserializer.cs | 44 ++++ test/Serde.Test/CustomImplTests.cs | 57 +++++ ...tomImplTests.RgbWithFieldMap.ISerialize.cs | 22 ++ ...mImplTests.RgbWithFieldMap.ISerialize`1.cs | 22 ++ 9 files changed, 329 insertions(+), 124 deletions(-) create mode 100644 test/Serde.Test/CustomImplTests.cs create mode 100644 test/Serde.Test/generated/SerdeGenerator/Serde.SerdeImplRoslynGenerator/Serde.Test.CustomImplTests.RgbWithFieldMap.ISerialize.cs create mode 100644 test/Serde.Test/generated/SerdeGenerator/Serde.SerdeImplRoslynGenerator/Serde.Test.CustomImplTests.RgbWithFieldMap.ISerialize`1.cs diff --git a/.vscode/launch.json b/.vscode/launch.json index fabbf293..2551497b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -19,6 +19,7 @@ "name": ".NET Core Launch (console)", "type": "coreclr", "request": "launch", + "preLaunchTask": "build", "program": "${workspaceFolder}/perf/bench/bin/Debug/net8.0/bench.dll", "args": [], "cwd": "${workspaceFolder}/perf/bench", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 56d4ffe7..263b84fc 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -7,7 +7,7 @@ "type": "process", "args": [ "build", - "${workspaceFolder}/bench/bench.csproj", + "${workspaceFolder}/perf/bench/bench.csproj", "/property:GenerateFullPaths=true", "/consoleloggerparameters:NoSummary" ], diff --git a/perf/bench/Program.cs b/perf/bench/Program.cs index 65317f59..8a54ed61 100644 --- a/perf/bench/Program.cs +++ b/perf/bench/Program.cs @@ -15,7 +15,7 @@ var json1 = System.Text.Json.JsonSerializer.Serialize(DataGenerator.CreateLocation(), options); var json2 = Serde.Json.JsonSerializer.Serialize(DataGenerator.CreateLocation()); var loc1 = System.Text.Json.JsonSerializer.Deserialize(LocationSample, options); -var loc2 = Serde.Json.JsonSerializer.Deserialize(LocationSample); +var loc2 = Serde.Json.JsonSerializer.Deserialize(LocationSample); Console.WriteLine("Checking correctness of serialization: " + (loc1 == loc2)); if (loc1 != loc2) diff --git a/perf/bench/SampleTypes.cs b/perf/bench/SampleTypes.cs index d3b34b21..c2865397 100644 --- a/perf/bench/SampleTypes.cs +++ b/perf/bench/SampleTypes.cs @@ -58,138 +58,94 @@ public partial record Location public partial record LocationWrap : IDeserialize { - static Benchmarks.Location Serde.IDeserialize.Deserialize(IDeserializer deserializer) - { - var fieldNames = new[] - { - "Id", - "Address1", - "Address2", - "City", - "State", - "PostalCode", - "Name", - "PhoneNumber", - "Country" - }; - return deserializer.DeserializeType("Location", fieldNames, SerdeVisitor.Instance); - } + private static readonly FieldMap s_fieldMap = new(nameof(LocationWrap), [ + "id", + "address1", + "address2", + "city", + "state", + "postalCode", + "name", + "phoneNumber", + "country" + ]); - private sealed class SerdeVisitor : Serde.IDeserializeVisitor + static Benchmarks.Location Serde.IDeserialize.Deserialize(IDeserializer deserializer) { - public static readonly SerdeVisitor Instance = new SerdeVisitor(); - public string ExpectedTypeName => "Benchmarks.Location"; + int _l_id = default !; + string _l_address1 = default !; + string _l_address2 = default !; + string _l_city = default !; + string _l_state = default !; + string _l_postalcode = default !; + string _l_name = default !; + string _l_phonenumber = default !; + string _l_country = default !; + ushort _r_assignedValid = 0b0; - private sealed class FieldNameVisitor : Serde.IDeserialize, Serde.IDeserializeVisitor + var typeDeserialize = deserializer.DeserializeType(s_fieldMap); + int index; + while ((index = typeDeserialize.TryReadIndex(s_fieldMap)) != IDeserializeType.EndOfType) { - public static readonly FieldNameVisitor Instance = new FieldNameVisitor(); - public static byte Deserialize(IDeserializer deserializer) => deserializer.DeserializeString(Instance); - public string ExpectedTypeName => "string"; - - byte Serde.IDeserializeVisitor.VisitString(string s) => VisitUtf8Span(System.Text.Encoding.UTF8.GetBytes(s)); - public byte VisitUtf8Span(System.ReadOnlySpan s) + switch (index) { - switch (s[0]) - { - case (byte)'i'when s.SequenceEqual("id"u8): - return 1; - case (byte)'a'when s.SequenceEqual("address1"u8): - return 2; - case (byte)'a'when s.SequenceEqual("address2"u8): - return 3; - case (byte)'c'when s.SequenceEqual("city"u8): - return 4; - case (byte)'s'when s.SequenceEqual("state"u8): - return 5; - case (byte)'p'when s.SequenceEqual("postalCode"u8): - return 6; - case (byte)'n'when s.SequenceEqual("name"u8): - return 7; - case (byte)'p'when s.SequenceEqual("phoneNumber"u8): - return 8; - case (byte)'c'when s.SequenceEqual("country"u8): - return 9; - default: - return 0; - } + case 0: + _l_id = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 0; + break; + case 1: + _l_address1 = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 1; + break; + case 2: + _l_address2 = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 2; + break; + case 3: + _l_city = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 3; + break; + case 4: + _l_state = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 4; + break; + case 5: + _l_postalcode = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 5; + break; + case 6: + _l_name = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 6; + break; + case 7: + _l_phonenumber = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 7; + break; + case 8: + _l_country = typeDeserialize.ReadValue(); + _r_assignedValid |= ((ushort)1) << 8; + break; } } - Benchmarks.Location Serde.IDeserializeVisitor.VisitDictionary(ref D d) + if (_r_assignedValid != 0b111111111) { - int _l_id = default !; - string _l_address1 = default !; - string _l_address2 = default !; - string _l_city = default !; - string _l_state = default !; - string _l_postalcode = default !; - string _l_name = default !; - string _l_phonenumber = default !; - string _l_country = default !; - ushort _r_assignedValid = 0b0; - while (d.TryGetNextKey(out byte key)) - { - switch (key) - { - case 1: - _l_id = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 0; - break; - case 2: - _l_address1 = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 1; - break; - case 3: - _l_address2 = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 2; - break; - case 4: - _l_city = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 3; - break; - case 5: - _l_state = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 4; - break; - case 6: - _l_postalcode = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 5; - break; - case 7: - _l_name = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 6; - break; - case 8: - _l_phonenumber = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 7; - break; - case 9: - _l_country = d.GetNextValue(); - _r_assignedValid |= ((ushort)1) << 8; - break; - } - } - - if (_r_assignedValid != 0b111111111) - { - throw new Serde.InvalidDeserializeValueException("Not all members were assigned"); - } - - var newType = new Benchmarks.Location() - { - Id = _l_id, - Address1 = _l_address1, - Address2 = _l_address2, - City = _l_city, - State = _l_state, - PostalCode = _l_postalcode, - Name = _l_name, - PhoneNumber = _l_phonenumber, - Country = _l_country, - }; - return newType; + throw new Serde.InvalidDeserializeValueException("Not all members were assigned"); } - } + var newType = new Benchmarks.Location() + { + Id = _l_id, + Address1 = _l_address1, + Address2 = _l_address2, + City = _l_city, + State = _l_state, + PostalCode = _l_postalcode, + Name = _l_name, + PhoneNumber = _l_phonenumber, + Country = _l_country, + }; + return newType; + } } } \ No newline at end of file diff --git a/src/serde/IDeserialize.cs b/src/serde/IDeserialize.cs index 930a0ce9..9fa1cef2 100644 --- a/src/serde/IDeserialize.cs +++ b/src/serde/IDeserialize.cs @@ -1,6 +1,10 @@ using System; +using System.Collections.Immutable; using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; namespace Serde { @@ -69,6 +73,104 @@ bool TryGetNextEntry([MaybeNullWhen(false)] out (K, V) next) int? SizeOpt { get; } } + public interface IDeserializeType + { + public const int EndOfType = -1; + public const int IndexNotFound = -2; + + int TryReadIndex(FieldMap map); + + V ReadValue() where D : IDeserialize; + } + + /// + /// A map from field names to int indices. This is an optimization for deserializing types + /// that avoids allocating strings for field names. + /// + public sealed class FieldMap + { + #region Fields and auto-props + + public string TypeName { get; } + private readonly ImmutableArray<(byte[] Utf8String, int Index)> _fieldNames; + + #endregion + + + private static readonly UTF8Encoding s_utf8 = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + /// + /// Create a new field mapping. The ordering of the field names is important -- it + /// corresponds to the index returned by . + /// + public FieldMap( + string typeName, + ReadOnlySpan fieldNames) + { + TypeName = typeName; + + var builder = ImmutableArray.CreateBuilder<(byte[] Utf8String, int Index)>(fieldNames.Length); + for (int index = 0; index < fieldNames.Length; index++) + { + builder.Add((s_utf8.GetBytes(fieldNames[index]), index)); + } + builder.Sort((left, right) => + left.Utf8String.AsSpan().SequenceCompareTo(right.Utf8String.AsSpan())); + + _fieldNames = builder.MoveToImmutable(); + } + + public int TryReadIndex(Utf8Span utf8FieldName) + { + int mapIndex = BinarySearch(_fieldNames.AsSpan(), utf8FieldName); + + return mapIndex < 0 ? IDeserializeType.IndexNotFound : _fieldNames[mapIndex].Index; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int BinarySearch(ReadOnlySpan<(byte[] Utf8String, int Index)> span, Utf8Span fieldName) + { + return BinarySearch(ref MemoryMarshal.GetReference(span), span.Length, fieldName); + } + + // This is a copy of the BinarySearch method from System.MemoryExtensions. + // We can't use that version because ref structs can't yet be substituted for type arguments. + private static int BinarySearch(ref (byte[] Utf8String, int Index) spanStart, int length, Utf8Span fieldName) + { + int lo = 0; + int hi = length - 1; + // If length == 0, hi == -1, and loop will not be entered + while (lo <= hi) + { + // PERF: `lo` or `hi` will never be negative inside the loop, + // so computing median using uints is safe since we know + // `length <= int.MaxValue`, and indices are >= 0 + // and thus cannot overflow an uint. + // Saves one subtraction per loop compared to + // `int i = lo + ((hi - lo) >> 1);` + int i = (int)(((uint)hi + (uint)lo) >> 1); + + int c = fieldName.SequenceCompareTo(Unsafe.Add(ref spanStart, i).Utf8String); + if (c == 0) + { + return i; + } + else if (c > 0) + { + lo = i + 1; + } + else + { + hi = i - 1; + } + } + // If none found, then a negative number that is the bitwise complement + // of the index of the next element that is larger than or, if there is + // no larger element, the bitwise complement of `length`, which + // is `lo` at this point. + return ~lo; + } + } + public interface IDeserializer { T DeserializeAny(IDeserializeVisitor v); @@ -91,5 +193,6 @@ public interface IDeserializer T DeserializeEnumerable(IDeserializeVisitor v); T DeserializeDictionary(IDeserializeVisitor v); T DeserializeNullableRef(IDeserializeVisitor v); + IDeserializeType DeserializeType(FieldMap fieldMap); } } \ No newline at end of file diff --git a/src/serde/json/JsonDeserializer.cs b/src/serde/json/JsonDeserializer.cs index 890c399d..9249b085 100644 --- a/src/serde/json/JsonDeserializer.cs +++ b/src/serde/json/JsonDeserializer.cs @@ -249,6 +249,19 @@ public T DeserializeType(string typeName, ReadOnlySpan fieldNames, ID return DeserializeDictionary(v); } + public IDeserializeType DeserializeType(FieldMap fieldMap) + { + ref var reader = ref GetReader(); + reader.ReadOrThrow(); + + if (reader.TokenType != JsonTokenType.StartObject) + { + throw new InvalidDeserializeValueException("Expected object start"); + } + + return this; + } + public T DeserializeByte(IDeserializeVisitor v) => DeserializeU64(v); @@ -284,6 +297,37 @@ public T DeserializeNullableRef(IDeserializeVisitor v) } } + partial class JsonDeserializer : IDeserializeType + { + V IDeserializeType.ReadValue() + { + return D.Deserialize(this); + } + + int IDeserializeType.TryReadIndex(FieldMap map) + { + ref var reader = ref GetReader(); + reader.ReadOrThrow(); + + if (reader.TokenType == JsonTokenType.EndObject) + { + return IDeserializeType.EndOfType; + } + + Utf8Span span; + if (reader.HasValueSequence || reader.ValueIsEscaped) + { + var s = reader.GetString()!; + span = Encoding.UTF8.GetBytes(s); + } + else + { + span = reader.ValueSpan; + } + return map.TryReadIndex(span); + } + } + internal static class Utf8JsonReaderExtensions { public static void ReadOrThrow(ref this Utf8JsonReader reader) diff --git a/test/Serde.Test/CustomImplTests.cs b/test/Serde.Test/CustomImplTests.cs new file mode 100644 index 00000000..90e190da --- /dev/null +++ b/test/Serde.Test/CustomImplTests.cs @@ -0,0 +1,57 @@ + +using System.IO; +using Serde.Json; +using Xunit; + +namespace Serde.Test; + +public sealed partial class CustomImplTests +{ + [GenerateSerialize] + private sealed partial record RgbWithFieldMap : IDeserialize + { + public int Red, Green, Blue; + + private static readonly FieldMap s_fieldMap = new(nameof(RgbWithFieldMap), [ + "red", + "green", + "blue" + ]); + + static RgbWithFieldMap IDeserialize.Deserialize(IDeserializer deserializer) + { + var fieldMap = s_fieldMap; + var deType = deserializer.DeserializeType(fieldMap); + int red = default; + int green = default; + int blue = default; + int index; + while ((index = deType.TryReadIndex(fieldMap)) != IDeserializeType.EndOfType) + { + switch (index) + { + case 0: + red = deType.ReadValue(); + break; + case 1: + green = deType.ReadValue(); + break; + case 2: + blue = deType.ReadValue(); + break; + } + } + + return new RgbWithFieldMap { Red = red, Green = green, Blue = blue }; + } + } + + [Fact] + public void TestLocation() + { + var rgb = new RgbWithFieldMap { Red = 255, Green = 128, Blue = 0 }; + var json = JsonSerializer.Serialize(rgb); + var deserialized = JsonSerializer.Deserialize(json); + Assert.Equal(rgb.Red, deserialized.Red); + } +} \ No newline at end of file diff --git a/test/Serde.Test/generated/SerdeGenerator/Serde.SerdeImplRoslynGenerator/Serde.Test.CustomImplTests.RgbWithFieldMap.ISerialize.cs b/test/Serde.Test/generated/SerdeGenerator/Serde.SerdeImplRoslynGenerator/Serde.Test.CustomImplTests.RgbWithFieldMap.ISerialize.cs new file mode 100644 index 00000000..ffc09fbc --- /dev/null +++ b/test/Serde.Test/generated/SerdeGenerator/Serde.SerdeImplRoslynGenerator/Serde.Test.CustomImplTests.RgbWithFieldMap.ISerialize.cs @@ -0,0 +1,22 @@ + +#nullable enable +using System; +using Serde; + +namespace Serde.Test +{ + partial class CustomImplTests + { + partial record RgbWithFieldMap : Serde.ISerialize + { + void Serde.ISerialize.Serialize(ISerializer serializer) + { + var type = serializer.SerializeType("RgbWithFieldMap", 3); + type.SerializeField("red"u8, new Int32Wrap(this.Red)); + type.SerializeField("green"u8, new Int32Wrap(this.Green)); + type.SerializeField("blue"u8, new Int32Wrap(this.Blue)); + type.End(); + } + } + } +} \ No newline at end of file diff --git a/test/Serde.Test/generated/SerdeGenerator/Serde.SerdeImplRoslynGenerator/Serde.Test.CustomImplTests.RgbWithFieldMap.ISerialize`1.cs b/test/Serde.Test/generated/SerdeGenerator/Serde.SerdeImplRoslynGenerator/Serde.Test.CustomImplTests.RgbWithFieldMap.ISerialize`1.cs new file mode 100644 index 00000000..5f5e3db0 --- /dev/null +++ b/test/Serde.Test/generated/SerdeGenerator/Serde.SerdeImplRoslynGenerator/Serde.Test.CustomImplTests.RgbWithFieldMap.ISerialize`1.cs @@ -0,0 +1,22 @@ + +#nullable enable +using System; +using Serde; + +namespace Serde.Test +{ + partial class CustomImplTests + { + partial record RgbWithFieldMap : Serde.ISerialize + { + void ISerialize.Serialize(Serde.Test.CustomImplTests.RgbWithFieldMap value, ISerializer serializer) + { + var type = serializer.SerializeType("RgbWithFieldMap", 3); + type.SerializeField("red", value.Red); + type.SerializeField("green", value.Green); + type.SerializeField("blue", value.Blue); + type.End(); + } + } + } +} \ No newline at end of file