Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SchemaUtils Nested Map Fix #499

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions csharp.test/TestMaps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ public static void CanRoundtripOptionalMaps()
var keys = new[] {new[] {"k1", "k2"}, new[] {"k3", "k4"}, null, Array.Empty<string>()};
var values = new[] {new[] {"v1", "v2"}, new[] {"v3", "v4"}, null, Array.Empty<string>()};

DoRoundtripTest(true, keys!, values!);
using var schemaNode = CreateMapSchema(true);
DoRoundtripTest(keys!, values!, schemaNode);
}

[Test]
Expand All @@ -46,7 +47,18 @@ public static void CanRoundtripRequiredMaps()
var keys = new[] {new[] {"k1", "k2"}, new[] {"k3", "k4"}, Array.Empty<string>()};
var values = new[] {new[] {"v1", "v2"}, new[] {"v3", "v4"}, Array.Empty<string>()};

DoRoundtripTest(false, keys, values);
using var schemaNode = CreateMapSchema(false);
DoRoundtripTest(keys, values, schemaNode);
}

[Test]
public static void CanRoundtripNonStandardMapAnnotation()
{
var keys = new[] {new[] {"k1", "k2"}, new[] {"k3", "k4"}, Array.Empty<string>()};
var values = new[] {new[] {"v1", "v2"}, new[] {"v3", "v4"}, Array.Empty<string>()};

using var schemaNode = CreateMapSchema(false, true);
DoRoundtripTest(keys, values, schemaNode);
}

/// <summary>
Expand Down Expand Up @@ -179,7 +191,7 @@ public static void NullsInRequiredMapGiveException()
Assert.AreEqual(0, pool.BytesAllocated);
}

private static void DoRoundtripTest(bool optional, string[][] keys, string[][] values)
private static void DoRoundtripTest(string[][] keys, string[][] values, GroupNode schemaNode)
{
var pool = MemoryPool.GetDefaultMemoryPool();
Assert.AreEqual(0, pool.BytesAllocated);
Expand All @@ -190,7 +202,6 @@ private static void DoRoundtripTest(bool optional, string[][] keys, string[][] v
{
using var propertiesBuilder = new WriterPropertiesBuilder();
using var writerProperties = propertiesBuilder.Build();
using var schemaNode = CreateMapSchema(optional);
using var fileWriter = new ParquetFileWriter(outStream, schemaNode, writerProperties);
using var rowGroupWriter = fileWriter.AppendRowGroup();

Expand Down Expand Up @@ -220,15 +231,15 @@ private static void DoRoundtripTest(bool optional, string[][] keys, string[][] v
Assert.AreEqual(0, pool.BytesAllocated);
}

private static GroupNode CreateMapSchema(bool optional)
private static GroupNode CreateMapSchema(bool optional, bool extraLogicalMapType = false)
{
using var stringType = LogicalType.String();
using var mapType = LogicalType.Map();

using var keyNode = new PrimitiveNode("key", Repetition.Required, stringType, PhysicalType.ByteArray);
using var valueNode = new PrimitiveNode("value", Repetition.Optional, stringType, PhysicalType.ByteArray);
using var keyValueNode = new GroupNode(
"key_value", Repetition.Repeated, new Node[] {keyNode, valueNode});
"key_value", Repetition.Repeated, new Node[] {keyNode, valueNode}, extraLogicalMapType ? mapType : null);
var repetition = optional ? Repetition.Optional : Repetition.Required;
using var colNode = new GroupNode(
"col1", repetition, new Node[] {keyValueNode}, mapType);
Expand Down
5 changes: 4 additions & 1 deletion csharp/Schema/SchemaUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ public static bool IsListOrMap(Node[] schemaNodes)
// a value field for map values."
// - "The key field encodes the map's key type. This field must have repetition required and must always be present.
// The value field encodes the map's value type and repetition. This field can be required, optional, or omitted."

// Note: We want to allow maps as children nodes, as Parquet files generated by 3rd-party tools may not enforce the
// official spec.
var rootNode = schemaNodes[0];
var childNode = schemaNodes[1];
using var rootLogicalType = rootNode.LogicalType;
Expand All @@ -31,7 +34,7 @@ public static bool IsListOrMap(Node[] schemaNodes)
rootLogicalType is ListLogicalType or MapLogicalType &&
rootNode.Repetition is Repetition.Optional or Repetition.Required &&
childNode is GroupNode &&
childLogicalType is NoneLogicalType &&
childLogicalType is NoneLogicalType or MapLogicalType &&
childNode.Repetition is Repetition.Repeated;
}
}
Expand Down
Loading