Skip to content

Commit

Permalink
SchemaUtils Nested Map Fix (#499)
Browse files Browse the repository at this point in the history
  • Loading branch information
jescalada authored Jan 24, 2025
1 parent b956e27 commit a6630c0
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
23 changes: 17 additions & 6 deletions csharp.test/TestMaps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ public static void CanRoundtripOptionalMaps()
var keys = new[] {new[] {"k1", "k2"}, new[] {"k3", "k4"}, null, Array.Empty<string>()};
var values = new[] {new[] {"v1", "v2"}, new[] {"v3", "v4"}, null, Array.Empty<string>()};

DoRoundtripTest(true, keys!, values!);
using var schemaNode = CreateMapSchema(true);
DoRoundtripTest(keys!, values!, schemaNode);
}

[Test]
Expand All @@ -46,7 +47,18 @@ public static void CanRoundtripRequiredMaps()
var keys = new[] {new[] {"k1", "k2"}, new[] {"k3", "k4"}, Array.Empty<string>()};
var values = new[] {new[] {"v1", "v2"}, new[] {"v3", "v4"}, Array.Empty<string>()};

DoRoundtripTest(false, keys, values);
using var schemaNode = CreateMapSchema(false);
DoRoundtripTest(keys, values, schemaNode);
}

[Test]
public static void CanRoundtripNonStandardMapAnnotation()
{
var keys = new[] {new[] {"k1", "k2"}, new[] {"k3", "k4"}, Array.Empty<string>()};
var values = new[] {new[] {"v1", "v2"}, new[] {"v3", "v4"}, Array.Empty<string>()};

using var schemaNode = CreateMapSchema(false, true);
DoRoundtripTest(keys, values, schemaNode);
}

/// <summary>
Expand Down Expand Up @@ -179,7 +191,7 @@ public static void NullsInRequiredMapGiveException()
Assert.AreEqual(0, pool.BytesAllocated);
}

private static void DoRoundtripTest(bool optional, string[][] keys, string[][] values)
private static void DoRoundtripTest(string[][] keys, string[][] values, GroupNode schemaNode)
{
var pool = MemoryPool.GetDefaultMemoryPool();
Assert.AreEqual(0, pool.BytesAllocated);
Expand All @@ -190,7 +202,6 @@ private static void DoRoundtripTest(bool optional, string[][] keys, string[][] v
{
using var propertiesBuilder = new WriterPropertiesBuilder();
using var writerProperties = propertiesBuilder.Build();
using var schemaNode = CreateMapSchema(optional);
using var fileWriter = new ParquetFileWriter(outStream, schemaNode, writerProperties);
using var rowGroupWriter = fileWriter.AppendRowGroup();

Expand Down Expand Up @@ -220,15 +231,15 @@ private static void DoRoundtripTest(bool optional, string[][] keys, string[][] v
Assert.AreEqual(0, pool.BytesAllocated);
}

private static GroupNode CreateMapSchema(bool optional)
private static GroupNode CreateMapSchema(bool optional, bool extraLogicalMapType = false)
{
using var stringType = LogicalType.String();
using var mapType = LogicalType.Map();

using var keyNode = new PrimitiveNode("key", Repetition.Required, stringType, PhysicalType.ByteArray);
using var valueNode = new PrimitiveNode("value", Repetition.Optional, stringType, PhysicalType.ByteArray);
using var keyValueNode = new GroupNode(
"key_value", Repetition.Repeated, new Node[] {keyNode, valueNode});
"key_value", Repetition.Repeated, new Node[] {keyNode, valueNode}, extraLogicalMapType ? mapType : null);
var repetition = optional ? Repetition.Optional : Repetition.Required;
using var colNode = new GroupNode(
"col1", repetition, new Node[] {keyValueNode}, mapType);
Expand Down
5 changes: 4 additions & 1 deletion csharp/Schema/SchemaUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ public static bool IsListOrMap(Node[] schemaNodes)
// a value field for map values."
// - "The key field encodes the map's key type. This field must have repetition required and must always be present.
// The value field encodes the map's value type and repetition. This field can be required, optional, or omitted."

// Note: We want to allow maps as children nodes, as Parquet files generated by 3rd-party tools may not enforce the
// official spec.
var rootNode = schemaNodes[0];
var childNode = schemaNodes[1];
using var rootLogicalType = rootNode.LogicalType;
Expand All @@ -31,7 +34,7 @@ public static bool IsListOrMap(Node[] schemaNodes)
rootLogicalType is ListLogicalType or MapLogicalType &&
rootNode.Repetition is Repetition.Optional or Repetition.Required &&
childNode is GroupNode &&
childLogicalType is NoneLogicalType &&
childLogicalType is NoneLogicalType or MapLogicalType &&
childNode.Repetition is Repetition.Repeated;
}
}
Expand Down

0 comments on commit a6630c0

Please sign in to comment.