From f4778f5fc6d8b1e90672fab96cc6247c005cba29 Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 09:13:20 +0200 Subject: [PATCH 01/24] MapType : Init type and array --- csharp/src/Apache.Arrow/Arrays/ListArray.cs | 8 +++ csharp/src/Apache.Arrow/Arrays/MapArray.cs | 61 +++++++++++++++++++ csharp/src/Apache.Arrow/Types/ListType.cs | 4 +- csharp/src/Apache.Arrow/Types/MapType.cs | 42 +++++++++++++ .../Apache.Arrow.Tests/Types/MapTypeTests.cs | 50 +++++++++++++++ 5 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 csharp/src/Apache.Arrow/Arrays/MapArray.cs create mode 100644 csharp/src/Apache.Arrow/Types/MapType.cs create mode 100644 csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs diff --git a/csharp/src/Apache.Arrow/Arrays/ListArray.cs b/csharp/src/Apache.Arrow/Arrays/ListArray.cs index 97673cb48e6a8..0438f6c8c3e09 100644 --- a/csharp/src/Apache.Arrow/Arrays/ListArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/ListArray.cs @@ -139,6 +139,14 @@ private ListArray(ArrayData data, IArrowArray values) : base(data) Values = values; } + // Constructor for child MapArray + internal ListArray(ArrayData data, IArrowArray values, ArrowTypeId typeId) : base(data) + { + data.EnsureBufferCount(2); + data.EnsureDataType(typeId); + Values = values; + } + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs new file mode 100644 index 0000000000000..a3119d4169b5c --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class MapArray : ListArray + { + public IArrowArray Keys { get; } + + public MapArray(IArrowType dataType, int length, + ArrowBuffer valueOffsetsBuffer, IArrowArray keys, IArrowArray values, + ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) + : this( + new ArrayData( + dataType, length, nullCount, offset, new[] { nullBitmapBuffer, valueOffsetsBuffer }, + new[] { keys.Data, values.Data } + ), keys, values) + { + } + + public MapArray(ArrayData data) + : this(data, ArrowArrayFactory.BuildArray(data.Children[0]), ArrowArrayFactory.BuildArray(data.Children[1])) + { + } + + private MapArray(ArrayData data, IArrowArray keys, IArrowArray values) : base(data, values, ArrowTypeId.Map) + { + Keys = keys; + } + + public Tuple GetKeyValueArray(int index) + where TKeyArray : Array where TValueArray : Array + { + if (Keys is not Array keys || Values is not Array values) + { + return default; + } + + ReadOnlySpan offsets = ValueOffsets; + return Tuple.Create( + keys.Slice(offsets[index], offsets[index + 1] - offsets[index]) as TKeyArray, + values.Slice(offsets[index], offsets[index + 1] - offsets[index]) as TValueArray + ); + } + } +} diff --git a/csharp/src/Apache.Arrow/Types/ListType.cs b/csharp/src/Apache.Arrow/Types/ListType.cs index a006c2282dd11..a4092292f0a9a 100644 --- a/csharp/src/Apache.Arrow/Types/ListType.cs +++ b/csharp/src/Apache.Arrow/Types/ListType.cs @@ -13,11 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -using System; - namespace Apache.Arrow.Types { - public sealed class ListType : NestedType + public class ListType : NestedType { public override ArrowTypeId TypeId => ArrowTypeId.List; public override string Name => "list"; diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs b/csharp/src/Apache.Arrow/Types/MapType.cs new file mode 100644 index 0000000000000..b09055113187e --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/MapType.cs @@ -0,0 +1,42 @@ +using System.Collections.Generic; + +namespace Apache.Arrow.Types +{ + public sealed class MapType : ListType + { + public override ArrowTypeId TypeId => ArrowTypeId.Map; + public override string Name => "map"; + + public StructType KeyValueType => Fields[0].DataType as StructType; + public Field KeyField => KeyValueType.Fields[0]; + public new Field ValueField => KeyValueType.Fields[1]; + + public IArrowType KeyDataType => Fields[0].DataType; + public new IArrowType ValueDataType => Fields[1].DataType; + + public MapType(IArrowType key, IArrowType value) + : this(key, value, true) + { + } + + public MapType(IArrowType key, IArrowType value, bool nullable) + : this(new Field("key", key, false), new Field("value", value, nullable)) + { + } + + public MapType(Field key, Field value) + : this(new StructType(new List() { key, value })) + { + } + + public MapType(StructType entries) : this(new Field("entries", entries, false)) + { + } + + public MapType(Field keyvalue) : base(keyvalue) + { + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs b/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs new file mode 100644 index 0000000000000..62233477f91c1 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests.Types +{ + public class MapTypeTests + { + [Fact] + public void MapType_Should_HaveCorrectTypeId() + { + var type = new MapType(StringType.Default, Int32Type.Default); + Assert.Equal(ArrowTypeId.Map, type.TypeId); + } + + [Fact] + public void MapType_Should_HaveCorrectStructType() + { + var type = new MapType(BooleanType.Default, Int32Type.Default, true); + + Assert.IsType(type.Fields[0].DataType); + Assert.Equal(2, type.KeyValueType.Fields.Count); + + Assert.Equal("entries", type.Fields[0].Name); + Assert.Equal("key", type.KeyField.Name); + Assert.Equal("value", type.ValueField.Name); + + Assert.False(type.Fields[0].IsNullable); + Assert.False(type.KeyField.IsNullable); + Assert.True(type.ValueField.IsNullable); + + Assert.IsType(type.KeyField.DataType); + Assert.IsType(type.ValueField.DataType); + } + } +} From e159ca92da4c5c659c0cb44feaba5c1a3ea1cc2c Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 09:19:32 +0200 Subject: [PATCH 02/24] ListType : seal type --- csharp/src/Apache.Arrow/Types/ListType.cs | 2 +- csharp/src/Apache.Arrow/Types/MapType.cs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/src/Apache.Arrow/Types/ListType.cs b/csharp/src/Apache.Arrow/Types/ListType.cs index a4092292f0a9a..b467934ec541d 100644 --- a/csharp/src/Apache.Arrow/Types/ListType.cs +++ b/csharp/src/Apache.Arrow/Types/ListType.cs @@ -15,7 +15,7 @@ namespace Apache.Arrow.Types { - public class ListType : NestedType + public sealed class ListType : NestedType { public override ArrowTypeId TypeId => ArrowTypeId.List; public override string Name => "list"; diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs b/csharp/src/Apache.Arrow/Types/MapType.cs index b09055113187e..082f1c8edd5c2 100644 --- a/csharp/src/Apache.Arrow/Types/MapType.cs +++ b/csharp/src/Apache.Arrow/Types/MapType.cs @@ -2,17 +2,17 @@ namespace Apache.Arrow.Types { - public sealed class MapType : ListType + public sealed class MapType : NestedType { public override ArrowTypeId TypeId => ArrowTypeId.Map; public override string Name => "map"; public StructType KeyValueType => Fields[0].DataType as StructType; public Field KeyField => KeyValueType.Fields[0]; - public new Field ValueField => KeyValueType.Fields[1]; + public Field ValueField => KeyValueType.Fields[1]; public IArrowType KeyDataType => Fields[0].DataType; - public new IArrowType ValueDataType => Fields[1].DataType; + public IArrowType ValueDataType => Fields[1].DataType; public MapType(IArrowType key, IArrowType value) : this(key, value, true) From d11d75b34d50c6871cbd61c6b0087140fc6e8ec3 Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 12:12:36 +0200 Subject: [PATCH 03/24] MapType : Add IPC tests --- .../Apache.Arrow/Arrays/ArrowArrayFactory.cs | 3 +- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 119 ++++++++++++++---- .../Ipc/ArrowReaderImplementation.cs | 7 +- .../Ipc/ArrowTypeFlatbufferBuilder.cs | 12 +- .../src/Apache.Arrow/Ipc/MessageSerializer.cs | 7 ++ csharp/src/Apache.Arrow/Types/MapType.cs | 24 ++-- csharp/test/Apache.Arrow.Tests/TableTests.cs | 2 +- csharp/test/Apache.Arrow.Tests/TestData.cs | 23 +++- .../Apache.Arrow.Tests/Types/MapTypeTests.cs | 9 +- 9 files changed, 160 insertions(+), 46 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs index 319dcab17e75c..9010d17525c18 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs @@ -57,6 +57,8 @@ public static IArrowArray BuildArray(ArrayData data) return new TimestampArray(data); case ArrowTypeId.List: return new ListArray(data); + case ArrowTypeId.Map: + return new MapArray(data); case ArrowTypeId.Struct: return new StructArray(data); case ArrowTypeId.Union: @@ -82,7 +84,6 @@ public static IArrowArray BuildArray(ArrayData data) throw new NotSupportedException("Half-float arrays are not supported by this target framework."); #endif case ArrowTypeId.Interval: - case ArrowTypeId.Map: default: throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}."); } diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index a3119d4169b5c..c28cf3802ac00 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -13,49 +13,122 @@ // See the License for the specific language governing permissions and // limitations under the License. -using System; +using Apache.Arrow.Memory; using Apache.Arrow.Types; namespace Apache.Arrow { - public class MapArray : ListArray + public class MapArray : ListArray // MapArray = ListArray(StrucArray("key", "value")) { + // Same as ListArray.Builder, but with KeyBuilder + public new class Builder : IArrowArrayBuilder + { + public IArrowArrayBuilder> KeyBuilder { get; } + public IArrowArrayBuilder> ValueBuilder { get; } + + public int Length => ValueOffsetsBufferBuilder.Length; + + private ArrowBuffer.Builder ValueOffsetsBufferBuilder { get; } + + private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; } + + public int NullCount { get; protected set; } + + public MapType DataType { get; } + + public Builder(MapType type) + { + KeyBuilder = ArrowArrayBuilderFactory.Build(type.KeyField.DataType); + ValueBuilder = ArrowArrayBuilderFactory.Build(type.ValueField.DataType); + ValueOffsetsBufferBuilder = new ArrowBuffer.Builder(); + ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder(); + DataType = type; + } + + /// + /// Start a new variable-length list slot + /// + /// This function should be called before beginning to append elements to the + /// value builder + /// + /// + public Builder Append() + { + ValueOffsetsBufferBuilder.Append(KeyBuilder.Length); + ValidityBufferBuilder.Append(true); + + return this; + } + + public Builder AppendNull() + { + ValueOffsetsBufferBuilder.Append(KeyBuilder.Length); + ValidityBufferBuilder.Append(false); + NullCount++; + + return this; + } + + public MapArray Build(MemoryAllocator allocator = default) + { + ValueOffsetsBufferBuilder.Append(KeyBuilder.Length); + + ArrowBuffer validityBuffer = NullCount > 0 ? ValidityBufferBuilder.Build(allocator) : ArrowBuffer.Empty; + + StructArray structs = new StructArray( + DataType.KeyValueType, KeyBuilder.Length, + new IArrowArray[] { KeyBuilder.Build(allocator), ValueBuilder.Build(allocator) }, + validityBuffer, NullCount + ); + + return new MapArray(DataType, Length - 1, ValueOffsetsBufferBuilder.Build(allocator), structs, ArrowBuffer.Empty); + } + + public Builder Reserve(int capacity) + { + ValueOffsetsBufferBuilder.Reserve(capacity + 1); + ValidityBufferBuilder.Reserve(capacity + 1); + return this; + } + + public Builder Resize(int length) + { + ValueOffsetsBufferBuilder.Resize(length + 1); + ValidityBufferBuilder.Resize(length + 1); + return this; + } + + public Builder Clear() + { + ValueOffsetsBufferBuilder.Clear(); + KeyBuilder.Clear(); + ValueBuilder.Clear(); + ValidityBufferBuilder.Clear(); + return this; + } + + } + public IArrowArray Keys { get; } public MapArray(IArrowType dataType, int length, - ArrowBuffer valueOffsetsBuffer, IArrowArray keys, IArrowArray values, + ArrowBuffer valueOffsetsBuffer, IArrowArray structs, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : this( new ArrayData( dataType, length, nullCount, offset, new[] { nullBitmapBuffer, valueOffsetsBuffer }, - new[] { keys.Data, values.Data } - ), keys, values) + new[] { structs.Data } + ), structs) { } public MapArray(ArrayData data) - : this(data, ArrowArrayFactory.BuildArray(data.Children[0]), ArrowArrayFactory.BuildArray(data.Children[1])) - { - } - - private MapArray(ArrayData data, IArrowArray keys, IArrowArray values) : base(data, values, ArrowTypeId.Map) + : this(data, ArrowArrayFactory.BuildArray(data.Children[0])) { - Keys = keys; } - public Tuple GetKeyValueArray(int index) - where TKeyArray : Array where TValueArray : Array + private MapArray(ArrayData data, IArrowArray structs) : base(data, structs, ArrowTypeId.Map) { - if (Keys is not Array keys || Values is not Array values) - { - return default; - } - - ReadOnlySpan offsets = ValueOffsets; - return Tuple.Create( - keys.Slice(offsets[index], offsets[index + 1] - offsets[index]) as TKeyArray, - values.Slice(offsets[index], offsets[index + 1] - offsets[index]) as TValueArray - ); } } } diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index a1c1430124013..814777fd3dc14 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -231,7 +231,10 @@ private ArrayData LoadPrimitiveField( ByteBuffer bodyData, IBufferCreator bufferCreator) { - + if (field.DataType.TypeId == ArrowTypeId.Map) + { + var i = 1; + } ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator); if (!recordBatchEnumerator.MoveNextBuffer()) { @@ -262,7 +265,7 @@ private ArrayData LoadPrimitiveField( recordBatchEnumerator.MoveNextBuffer(); arrowBuff = new[] { nullArrowBuffer, valueArrowBuffer }; - } + } ArrayData[] children = GetChildren(ref recordBatchEnumerator, field, bodyData, bufferCreator); diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index ee119ae5d7f20..7bea2f5aab563 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -65,7 +65,8 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private FlatBufferBuilder Builder { get; } @@ -218,9 +219,16 @@ public void Visit(FixedSizeBinaryType type) Flatbuf.FixedSizeBinary.CreateFixedSizeBinary(Builder, type.ByteWidth)); } + public void Visit(MapType type) + { + Result = FieldType.Build( + Flatbuf.Type.Map, + Flatbuf.Map.CreateMap(Builder, type.KeySorted)); + } + public void Visit(IArrowType type) { - throw new NotImplementedException(); + throw new NotImplementedException($"Cannot visit type {type}"); } } diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index a09fff61b4ac8..64a61d106184d 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -193,6 +193,13 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c case Flatbuf.Type.Struct_: Debug.Assert(childFields != null); return new Types.StructType(childFields); + case Flatbuf.Type.Map: + if (childFields == null || childFields.Length != 1) + { + throw new InvalidDataException($"Map type must have exactly one struct child."); + } + Field child = childFields[0]; + return new Types.MapType(child); default: throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported."); } diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs b/csharp/src/Apache.Arrow/Types/MapType.cs index 082f1c8edd5c2..b67cfec853458 100644 --- a/csharp/src/Apache.Arrow/Types/MapType.cs +++ b/csharp/src/Apache.Arrow/Types/MapType.cs @@ -2,39 +2,33 @@ namespace Apache.Arrow.Types { - public sealed class MapType : NestedType + public sealed class MapType : NestedType // MapType = ListType(StrucType("key", "value")) { public override ArrowTypeId TypeId => ArrowTypeId.Map; public override string Name => "map"; + public readonly bool KeySorted; public StructType KeyValueType => Fields[0].DataType as StructType; public Field KeyField => KeyValueType.Fields[0]; public Field ValueField => KeyValueType.Fields[1]; - public IArrowType KeyDataType => Fields[0].DataType; - public IArrowType ValueDataType => Fields[1].DataType; - - public MapType(IArrowType key, IArrowType value) - : this(key, value, true) - { - } - - public MapType(IArrowType key, IArrowType value, bool nullable) - : this(new Field("key", key, false), new Field("value", value, nullable)) + public MapType(IArrowType key, IArrowType value, bool nullable = true, bool keySorted = false) + : this(new Field("key", key, false), new Field("value", value, nullable), keySorted) { } - public MapType(Field key, Field value) - : this(new StructType(new List() { key, value })) + public MapType(Field key, Field value, bool keySorted = false) + : this(new StructType(new List() { key, value }), keySorted) { } - public MapType(StructType entries) : this(new Field("entries", entries, false)) + public MapType(StructType entries, bool keySorted = false) : this(new Field("entries", entries, false), keySorted) { } - public MapType(Field keyvalue) : base(keyvalue) + public MapType(Field entries, bool keySorted = false) : base(entries) { + KeySorted = keySorted; } public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs index 45a14cc25616e..b4c4b1faed190 100644 --- a/csharp/test/Apache.Arrow.Tests/TableTests.cs +++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs @@ -60,7 +60,7 @@ public void TestTableFromRecordBatches() Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches); Assert.Equal(20, table1.RowCount); - Assert.Equal(23, table1.ColumnCount); + Assert.Equal(24, table1.ColumnCount); FixedSizeBinaryType type = new FixedSizeBinaryType(17); Field newField1 = new Field(type.Name, type, false); diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index 02186378ad919..bc99035c38424 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -54,6 +54,7 @@ public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount builder.Field(CreateField(new StructType(new List { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i)); builder.Field(CreateField(new Decimal128Type(10, 6), i)); builder.Field(CreateField(new Decimal256Type(16, 8), i)); + builder.Field(CreateField(new MapType(StringType.Default, Int32Type.Default), i)); if (createAdvancedTypeArrays) { @@ -126,7 +127,8 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private int Length { get; } public IArrowArray Array { get; private set; } @@ -317,6 +319,25 @@ public void Visit(FixedSizeBinaryType type) Array = new FixedSizeBinaryArray(arrayData); } + public void Visit(MapType type) + { + MapArray.Builder builder = new MapArray.Builder(type).Reserve(Length); + var keyBuilder = builder.KeyBuilder.Reserve(Length + 1) as StringArray.Builder; + var valueBuilder = builder.ValueBuilder.Reserve(Length + 1) as Int32Array.Builder; + + for (var i = 0; i < Length; i++) + { + builder.Append(); + keyBuilder.Append(i.ToString()); + valueBuilder.Append(i); + } + //Add a value to check if Values.Length can exceed MapArray.Length + keyBuilder.Append("0"); + valueBuilder.Append(0); + + Array = builder.Build(); + } + private void GenerateArray(IArrowArrayBuilder builder, Func generator) where TArrayBuilder : IArrowArrayBuilder where TArray : IArrowArray diff --git a/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs b/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs index 62233477f91c1..ba306ec104e79 100644 --- a/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs +++ b/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs @@ -31,7 +31,6 @@ public void MapType_Should_HaveCorrectTypeId() public void MapType_Should_HaveCorrectStructType() { var type = new MapType(BooleanType.Default, Int32Type.Default, true); - Assert.IsType(type.Fields[0].DataType); Assert.Equal(2, type.KeyValueType.Fields.Count); @@ -42,9 +41,17 @@ public void MapType_Should_HaveCorrectStructType() Assert.False(type.Fields[0].IsNullable); Assert.False(type.KeyField.IsNullable); Assert.True(type.ValueField.IsNullable); + Assert.False(new MapType(BooleanType.Default, Int32Type.Default, false).ValueField.IsNullable); Assert.IsType(type.KeyField.DataType); Assert.IsType(type.ValueField.DataType); } + + [Fact] + public void MapType_Should_SetKeySorted() + { + Assert.False(new MapType(BooleanType.Default, Int32Type.Default).KeySorted); + Assert.True(new MapType(StringType.Default, Int32Type.Default, true, true).KeySorted); + } } } From 3c5b5b9a57c7aa98574852a064aa54cc046a5a5f Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 12:15:55 +0200 Subject: [PATCH 04/24] code : clean --- csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index 814777fd3dc14..d9d595e974084 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -231,10 +231,6 @@ private ArrayData LoadPrimitiveField( ByteBuffer bodyData, IBufferCreator bufferCreator) { - if (field.DataType.TypeId == ArrowTypeId.Map) - { - var i = 1; - } ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator); if (!recordBatchEnumerator.MoveNextBuffer()) { @@ -265,7 +261,7 @@ private ArrayData LoadPrimitiveField( recordBatchEnumerator.MoveNextBuffer(); arrowBuff = new[] { nullArrowBuffer, valueArrowBuffer }; - } + } ArrayData[] children = GetChildren(ref recordBatchEnumerator, field, bodyData, bufferCreator); From 10ae2236e04b657467bb92fc9102702b68372aa8 Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 12:22:28 +0200 Subject: [PATCH 05/24] code : clean --- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index c28cf3802ac00..89ea42c67bd71 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -109,8 +109,6 @@ public Builder Clear() } - public IArrowArray Keys { get; } - public MapArray(IArrowType dataType, int length, ArrowBuffer valueOffsetsBuffer, IArrowArray structs, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) From 220bd972859334e9af2bd7b691c4912254fc4f06 Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 13:48:53 +0200 Subject: [PATCH 06/24] MapArray : Add builder test --- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 28 ++++++++- .../test/Apache.Arrow.Tests/MapArrayTests.cs | 59 +++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 csharp/test/Apache.Arrow.Tests/MapArrayTests.cs diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index 89ea42c67bd71..00acab09bef30 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -13,6 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +using System; +using System.Collections.Generic; using Apache.Arrow.Memory; using Apache.Arrow.Types; @@ -81,7 +83,7 @@ public MapArray Build(MemoryAllocator allocator = default) validityBuffer, NullCount ); - return new MapArray(DataType, Length - 1, ValueOffsetsBufferBuilder.Build(allocator), structs, ArrowBuffer.Empty); + return new MapArray(DataType, Length - 1, ValueOffsetsBufferBuilder.Build(allocator), structs, validityBuffer, NullCount); } public Builder Reserve(int capacity) @@ -109,6 +111,10 @@ public Builder Clear() } + public StructArray KeyValues => base.Values as StructArray; + public IArrowArray Keys => KeyValues.Fields[0]; + public new IArrowArray Values => KeyValues.Fields[1]; + public MapArray(IArrowType dataType, int length, ArrowBuffer valueOffsetsBuffer, IArrowArray structs, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) @@ -128,5 +134,25 @@ public MapArray(ArrayData data) private MapArray(ArrayData data, IArrowArray structs) : base(data, structs, ArrowTypeId.Map) { } + +#if NETCOREAPP3_1_OR_GREATER + public IEnumerable> GetKeyValuePairs(int index, Func getKey, Func getValue) + where TKeyArray : Array where TValueArray : Array + { + ReadOnlySpan offsets = ValueOffsets; + // Get key values + int start = offsets[index]; + int end = offsets[index + 1]; + StructArray array = KeyValues.Slice(start, end) as StructArray; + + TKeyArray keyArray = array.Fields[0] as TKeyArray; + TValueArray valueArray = array.Fields[1] as TValueArray; + + for (int i = start; i < end; i++) + { + yield return KeyValuePair.Create(getKey(keyArray, i), getValue(valueArray, i)); + } + } +#endif } } diff --git a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs new file mode 100644 index 0000000000000..0c8793aca6d9d --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Collections.Generic; +using System.Linq; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class MapArrayTests + { + [Fact] + public void MapArray_Should_Build() + { + MapType type = new MapType(StringType.Default, Int32Type.Default); + MapArray.Builder builder = new MapArray.Builder(type); + var keyBuilder = builder.KeyBuilder as StringArray.Builder; + var valueBuilder = builder.ValueBuilder as Int32Array.Builder; + + KeyValuePair kv0 = KeyValuePair.Create("test", (int?)1); + KeyValuePair kv1 = KeyValuePair.Create("other", (int?)123); + KeyValuePair kv2 = KeyValuePair.Create("kv", (int?)null); + + builder.Append(); + keyBuilder.Append("test"); + valueBuilder.Append(1); + + builder.AppendNull(); + + builder.Append(); + keyBuilder.Append("other"); + valueBuilder.Append(123); + keyBuilder.Append("kv"); + valueBuilder.AppendNull(); + + MapArray array = builder.Build(); + + Assert.Equal(new KeyValuePair[] { kv0 }, array.GetKeyValuePairs(0, GetKey, GetValue).ToArray()); + Assert.True(array.IsNull(1)); + Assert.Equal(new KeyValuePair[] { kv1, kv2 }, array.GetKeyValuePairs(2, GetKey, GetValue).ToArray()); + } + + private static string GetKey(StringArray array, int index) => array.GetString(index); + private static int? GetValue(Int32Array array, int index) => array.GetValue(index); + } +} From 96b3965b9d6e226a81a0d97b8a14c37c2b54abc5 Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 13:55:38 +0200 Subject: [PATCH 07/24] MapArray : test netcore 3 1 constraint --- csharp/test/Apache.Arrow.Tests/MapArrayTests.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs index 0c8793aca6d9d..731ac11c0bbb5 100644 --- a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs @@ -22,6 +22,7 @@ namespace Apache.Arrow.Tests { public class MapArrayTests { +#if NETCOREAPP3_1_OR_GREATER [Fact] public void MapArray_Should_Build() { @@ -52,6 +53,7 @@ public void MapArray_Should_Build() Assert.True(array.IsNull(1)); Assert.Equal(new KeyValuePair[] { kv1, kv2 }, array.GetKeyValuePairs(2, GetKey, GetValue).ToArray()); } +#endif private static string GetKey(StringArray array, int index) => array.GetString(index); private static int? GetValue(Int32Array array, int index) => array.GetValue(index); From a6b927adefa143de081e359ad5339a816d18c335 Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 14:02:43 +0200 Subject: [PATCH 08/24] MapArray : inner struct not null array --- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index 00acab09bef30..de07eba317c8e 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -80,7 +80,7 @@ public MapArray Build(MemoryAllocator allocator = default) StructArray structs = new StructArray( DataType.KeyValueType, KeyBuilder.Length, new IArrowArray[] { KeyBuilder.Build(allocator), ValueBuilder.Build(allocator) }, - validityBuffer, NullCount + ArrowBuffer.Empty, 0 ); return new MapArray(DataType, Length - 1, ValueOffsetsBufferBuilder.Build(allocator), structs, validityBuffer, NullCount); From 093a24870812aa28da26374f239af946a7fb244e Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 14:34:34 +0200 Subject: [PATCH 09/24] MapArray : GetKeyValuePairs with constructor --- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index de07eba317c8e..3dc53bc7e3ae2 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -150,7 +150,7 @@ public IEnumerable> GetKeyValuePairs(getKey(keyArray, i), getValue(valueArray, i)); } } #endif From acc2bc7a9dce95b45a585ee28e06e168e37da7b9 Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 14:36:01 +0200 Subject: [PATCH 10/24] MapArray : fix KeyValues slice --- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index 3dc53bc7e3ae2..7774b8c49d17a 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -143,7 +143,7 @@ public IEnumerable> GetKeyValuePairs Date: Fri, 21 Apr 2023 15:43:40 +0200 Subject: [PATCH 11/24] MapArray : GetTuple --- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 18 ++++++++++ .../test/Apache.Arrow.Tests/MapArrayTests.cs | 36 ++++++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index 7774b8c49d17a..47b14649ae181 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -135,6 +135,24 @@ private MapArray(ArrayData data, IArrowArray structs) : base(data, structs, Arro { } + public IEnumerable> GetTuple(int index, Func getKey, Func getValue) + where TKeyArray : Array where TValueArray : Array + { + ReadOnlySpan offsets = ValueOffsets; + // Get key values + int start = offsets[index]; + int end = offsets[index + 1]; + StructArray array = KeyValues.Slice(start, end - start) as StructArray; + + TKeyArray keyArray = array.Fields[0] as TKeyArray; + TValueArray valueArray = array.Fields[1] as TValueArray; + + for (int i = start; i < end; i++) + { + yield return new Tuple(getKey(keyArray, i), getValue(valueArray, i)); + } + } + #if NETCOREAPP3_1_OR_GREATER public IEnumerable> GetKeyValuePairs(int index, Func getKey, Func getValue) where TKeyArray : Array where TValueArray : Array diff --git a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs index 731ac11c0bbb5..9f624aebaf408 100644 --- a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs @@ -16,15 +16,48 @@ using System.Collections.Generic; using System.Linq; using Apache.Arrow.Types; +using Apache.Arrow.Ipc; using Xunit; +using System; namespace Apache.Arrow.Tests { public class MapArrayTests { + [Fact] + public void MapArray_Should_GetTuple() + { + MapType type = new MapType(StringType.Default, Int64Type.Default); + MapArray.Builder builder = new MapArray.Builder(type); + var keyBuilder = builder.KeyBuilder as StringArray.Builder; + var valueBuilder = builder.ValueBuilder as Int64Array.Builder; + + Tuple kv0 = Tuple.Create("test", (long?)1); + Tuple kv1 = Tuple.Create("other", (long?)123); + Tuple kv2 = Tuple.Create("kv", (long?)null); + + builder.Append(); + keyBuilder.Append("test"); + valueBuilder.Append(1); + + builder.AppendNull(); + + builder.Append(); + keyBuilder.Append("other"); + valueBuilder.Append(123); + keyBuilder.Append("kv"); + valueBuilder.AppendNull(); + + MapArray array = builder.Build(); + + Assert.Equal(new Tuple[] { kv0 }, array.GetTuple(0, GetKey, GetValue).ToArray()); + Assert.True(array.IsNull(1)); + Assert.Equal(new Tuple[] { kv1, kv2 }, array.GetTuple(2, GetKey, GetValue).ToArray()); + } + #if NETCOREAPP3_1_OR_GREATER [Fact] - public void MapArray_Should_Build() + public void MapArray_Should_GetKeyValuePairs() { MapType type = new MapType(StringType.Default, Int32Type.Default); MapArray.Builder builder = new MapArray.Builder(type); @@ -57,5 +90,6 @@ public void MapArray_Should_Build() private static string GetKey(StringArray array, int index) => array.GetString(index); private static int? GetValue(Int32Array array, int index) => array.GetValue(index); + private static long? GetValue(Int64Array array, int index) => array.GetValue(index); } } From 62013c122e6af50242cec6aa37ba161b08389fea Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 15:51:57 +0200 Subject: [PATCH 12/24] code : clean --- csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index 64a61d106184d..db188814b7c78 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -198,8 +198,7 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c { throw new InvalidDataException($"Map type must have exactly one struct child."); } - Field child = childFields[0]; - return new Types.MapType(child); + return new Types.MapType(childFields[0]); default: throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported."); } From 571cb5e7749b1243fda9ebebf52f51aa8ecfabc2 Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 16:54:30 +0200 Subject: [PATCH 13/24] MapArray : rename GetTuple to GetTuples --- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 2 +- csharp/test/Apache.Arrow.Tests/MapArrayTests.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index 47b14649ae181..d097d27456a31 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -135,7 +135,7 @@ private MapArray(ArrayData data, IArrowArray structs) : base(data, structs, Arro { } - public IEnumerable> GetTuple(int index, Func getKey, Func getValue) + public IEnumerable> GetTuples(int index, Func getKey, Func getValue) where TKeyArray : Array where TValueArray : Array { ReadOnlySpan offsets = ValueOffsets; diff --git a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs index 9f624aebaf408..ef936c9741800 100644 --- a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs @@ -50,9 +50,9 @@ public void MapArray_Should_GetTuple() MapArray array = builder.Build(); - Assert.Equal(new Tuple[] { kv0 }, array.GetTuple(0, GetKey, GetValue).ToArray()); + Assert.Equal(new Tuple[] { kv0 }, array.GetTuples(0, GetKey, GetValue).ToArray()); Assert.True(array.IsNull(1)); - Assert.Equal(new Tuple[] { kv1, kv2 }, array.GetTuple(2, GetKey, GetValue).ToArray()); + Assert.Equal(new Tuple[] { kv1, kv2 }, array.GetTuples(2, GetKey, GetValue).ToArray()); } #if NETCOREAPP3_1_OR_GREATER From c9b4694c3dd7d90d845aebc20ce42eb560dcd8bb Mon Sep 17 00:00:00 2001 From: Platob Date: Fri, 21 Apr 2023 19:13:07 +0200 Subject: [PATCH 14/24] MapArray : IPC get meta --- csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs | 3 ++- csharp/test/Apache.Arrow.Tests/MapArrayTests.cs | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index db188814b7c78..d64159636dd41 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -198,7 +198,8 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c { throw new InvalidDataException($"Map type must have exactly one struct child."); } - return new Types.MapType(childFields[0]); + Flatbuf.Map meta = field.Type().Value; + return new Types.MapType(childFields[0], meta.KeysSorted); default: throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported."); } diff --git a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs index ef936c9741800..1e46e70f15052 100644 --- a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs @@ -13,12 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +using System; using System.Collections.Generic; using System.Linq; using Apache.Arrow.Types; -using Apache.Arrow.Ipc; using Xunit; -using System; namespace Apache.Arrow.Tests { From fb01fe558ffe76dbe8cebc1652444c8498e21a52 Mon Sep 17 00:00:00 2001 From: Platob Date: Sat, 22 Apr 2023 15:57:53 +0200 Subject: [PATCH 15/24] rm #if NETCOREAPP3_1_OR_GREATER constraint --- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 2 -- csharp/test/Apache.Arrow.Tests/MapArrayTests.cs | 2 -- 2 files changed, 4 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index d097d27456a31..5c6bd479c0768 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -153,7 +153,6 @@ public IEnumerable> GetTuples(int inde } } -#if NETCOREAPP3_1_OR_GREATER public IEnumerable> GetKeyValuePairs(int index, Func getKey, Func getValue) where TKeyArray : Array where TValueArray : Array { @@ -171,6 +170,5 @@ public IEnumerable> GetKeyValuePairs(getKey(keyArray, i), getValue(valueArray, i)); } } -#endif } } diff --git a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs index 1e46e70f15052..034f120f3f016 100644 --- a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs @@ -54,7 +54,6 @@ public void MapArray_Should_GetTuple() Assert.Equal(new Tuple[] { kv1, kv2 }, array.GetTuples(2, GetKey, GetValue).ToArray()); } -#if NETCOREAPP3_1_OR_GREATER [Fact] public void MapArray_Should_GetKeyValuePairs() { @@ -85,7 +84,6 @@ public void MapArray_Should_GetKeyValuePairs() Assert.True(array.IsNull(1)); Assert.Equal(new KeyValuePair[] { kv1, kv2 }, array.GetKeyValuePairs(2, GetKey, GetValue).ToArray()); } -#endif private static string GetKey(StringArray array, int index) => array.GetString(index); private static int? GetValue(Int32Array array, int index) => array.GetValue(index); From eb0e3d03941ed24be9469067a64a7046a7aaab69 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 25 Sep 2023 07:55:51 -0700 Subject: [PATCH 16/24] Support C API for maps --- csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs | 3 +++ csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs | 6 +++--- csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs | 6 ++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs index da1b0f31b8f08..1b40ec49658bb 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -180,6 +180,9 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) }; ; break; case ArrowTypeId.Map: + MapType mapType = (MapType)type; + children = ProcessListChildren(cArray, mapType.Fields[0].DataType); + buffers = ImportListBuffers(cArray); break; case ArrowTypeId.Null: buffers = System.Array.Empty(); diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs index c1a12362a942a..f3479903889d1 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs @@ -188,6 +188,7 @@ private static string GetFormat(IArrowType datatype) return $"+w:{fixedListType.ListSize}"; case StructType _: return "+s"; case UnionType u: return FormatUnion(u); + case MapType _: return "+m"; // Dictionary case DictionaryType dictionaryType: return GetFormat(dictionaryType.IndexType); @@ -212,10 +213,9 @@ private static long GetFlags(IArrowType datatype, bool nullable = true) } } - if (datatype.TypeId == ArrowTypeId.Map) + if (datatype is MapType mapType && mapType.KeySorted) { - // TODO: when we implement MapType, make sure to set the KEYS_SORTED flag. - throw new NotSupportedException("Exporting MapTypes is not supported."); + flags |= CArrowSchema.ArrowFlagMapKeysSorted; } return flags; diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs index 42c8cdd5ef548..ab390e1748293 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs @@ -206,6 +206,12 @@ public ArrowType GetAsType() return new FixedSizeListType(childField, width); } + else if (format == "+m") + { + return new MapType( + ParseChildren("map").Single(), + (_cSchema->flags & CArrowSchema.ArrowFlagMapKeysSorted) != 0); + } // TODO: Map type and large list type From bc703d68c29979b0446562e48c841313b2c790a4 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 25 Sep 2023 20:30:00 -0700 Subject: [PATCH 17/24] Finish implementing Map. Fix concatenation of Structs to work correctly. --- .../Arrays/ArrayDataConcatenator.cs | 28 +++++++---- .../Arrays/ArrayDataTypeComparer.cs | 13 ++++- .../IntegrationCommand.cs | 22 ++++++++ .../Apache.Arrow.IntegrationTest/JsonFile.cs | 6 ++- .../Apache.Arrow.Tests/ArrayTypeComparer.cs | 13 ++++- .../ArrowArrayConcatenatorTests.cs | 50 ++++++++++++++++++- dev/archery/archery/integration/datagen.py | 3 +- docs/source/status.rst | 2 +- 8 files changed, 118 insertions(+), 19 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs index 806defdc7ce66..929c7b1a489de 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs @@ -50,7 +50,8 @@ private class ArrayDataConcatenationVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { public ArrayData Result { get; private set; } private readonly IReadOnlyList _arrayDataList; @@ -92,15 +93,7 @@ public void Visit(FixedWidthType type) public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type); - public void Visit(ListType type) - { - CheckData(type, 2); - ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); - ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); - ArrayData child = Concatenate(SelectChildren(0), _allocator); - - Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child }); - } + public void Visit(ListType type) => ConcatenateLists(type); public void Visit(FixedSizeListType type) { @@ -114,6 +107,7 @@ public void Visit(FixedSizeListType type) public void Visit(StructType type) { CheckData(type, 1); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); List children = new List(type.Fields.Count); for (int i = 0; i < type.Fields.Count; i++) @@ -121,7 +115,7 @@ public void Visit(StructType type) children.Add(Concatenate(SelectChildren(i), _allocator)); } - Result = new ArrayData(type, _arrayDataList[0].Length, _arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children); + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer }, children); } public void Visit(UnionType type) @@ -151,6 +145,8 @@ public void Visit(UnionType type) Result = new ArrayData(type, _totalLength, _totalNullCount, 0, buffers, children); } + public void Visit(MapType type) => ConcatenateLists(type); + public void Visit(IArrowType type) { throw new NotImplementedException($"Concatenation for {type.Name} is not supported yet."); @@ -175,6 +171,16 @@ private void ConcatenateVariableBinaryArrayData(IArrowType type) Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer }); } + private void ConcatenateLists(NestedType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); + ArrayData child = Concatenate(SelectChildren(0), _allocator); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child }); + } + private ArrowBuffer ConcatenateValidityBuffer() { if (_totalNullCount == 0) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs index 6b54ec1edb573..1698e0672fb60 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs @@ -28,7 +28,8 @@ internal sealed class ArrayDataTypeComparer : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private readonly IArrowType _expectedType; private bool _dataTypeMatch; @@ -132,6 +133,16 @@ public void Visit(UnionType actualType) } } + public void Visit(MapType actualType) + { + if (_expectedType is MapType expectedType + && expectedType.KeySorted == actualType.KeySorted + && CompareNested(expectedType, actualType)) + { + _dataTypeMatch = true; + } + } + private static bool CompareNested(NestedType expectedType, NestedType actualType) { if (expectedType.Fields.Count != actualType.Fields.Count) diff --git a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs index 1e76ee505a516..ac29a23337686 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs @@ -189,6 +189,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children) "fixedsizelist" => ToFixedSizeListArrowType(type, children), "struct" => ToStructArrowType(type, children), "union" => ToUnionArrowType(type, children), + "map" => ToMapArrowType(type, children), "null" => NullType.Default, _ => throw new NotSupportedException($"JsonArrowType not supported: {type.Name}") }; @@ -272,6 +273,11 @@ private static IArrowType ToListArrowType(JsonArrowType type, Field[] children) return new ListType(children[0]); } + private static IArrowType ToMapArrowType(JsonArrowType type, Field[] children) + { + return new MapType(children[0], type.KeysSorted); + } + private static IArrowType ToFixedSizeListArrowType(JsonArrowType type, Field[] children) { return new FixedSizeListType(children[0], type.ListSize); @@ -319,6 +325,7 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor { private JsonFieldData JsonFieldData { get; set; } @@ -590,6 +597,21 @@ public void Visit(UnionType type) Array = UnionArray.Create(arrayData); } + public void Visit(MapType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetOffsetBuffer(); + + var data = JsonFieldData; + JsonFieldData = data.Children[0]; + type.KeyValueType.Accept(this); + JsonFieldData = data; + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, + new[] { validityBuffer, offsetBuffer }, new[] { Array.Data }); + Array = new MapArray(arrayData); + } + private ArrayData[] GetChildren(NestedType type) { ArrayData[] children = new ArrayData[type.Fields.Count]; diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 112eeabcb9931..45fd79aedc775 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -74,7 +74,11 @@ public class JsonArrowType // union fields public string Mode { get; set; } public int[] TypeIds { get; set; } - + + // map fields + [JsonIgnore] + public bool KeysSorted => ExtensionData["keysSorted"].GetBoolean(); + [JsonExtensionData] public Dictionary ExtensionData { get; set; } } diff --git a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs index c8bcc3cee0f99..ad3527c47807b 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs @@ -29,7 +29,8 @@ public class ArrayTypeComparer : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private readonly IArrowType _expectedType; @@ -131,6 +132,16 @@ public void Visit(UnionType actualType) CompareNested(expectedType, actualType); } + public void Visit(MapType actualType) + { + Assert.IsAssignableFrom(_expectedType); + var expectedType = (MapType)_expectedType; + + Assert.Equal(expectedType.KeySorted, actualType.KeySorted); + + CompareNested(expectedType, actualType); + } + private static void CompareNested(NestedType expectedType, NestedType actualType) { Assert.Equal(expectedType.Fields.Count, actualType.Fields.Count); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs index f5a2c345e2ae6..1f09017ee15c6 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs @@ -93,6 +93,10 @@ private static IEnumerable, IArrowArray>> GenerateTestDa new[] { 0, 1 }, UnionMode.Dense ), + new MapType( + new Field.Builder().Name("key").DataType(StringType.Default).Nullable(false).Build(), + new Field.Builder().Name("value").DataType(Int32Type.Default).Nullable(true).Build(), + keySorted: false), }; foreach (IArrowType type in targetTypes) @@ -136,7 +140,8 @@ private class TestDataGenerator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private List> _baseData; @@ -406,7 +411,7 @@ public void Visit(StructType type) StringArray resultStringArray = resultStringBuilder.Build(); Int32Array resultInt32Array = resultInt32Builder.Build(); - ExpectedArray = new StructArray(type, 3, new List { resultStringArray, resultInt32Array }, nullBitmapBuffer, 1); + ExpectedArray = new StructArray(type, 9, new List { resultStringArray, resultInt32Array }, nullBitmapBuffer, 3); } public void Visit(UnionType type) @@ -495,6 +500,47 @@ public void Visit(UnionType type) new[] { stringResultBuilder.Build().Data, intResultBuilder.Build().Data })); } + public void Visit(MapType type) + { + MapArray.Builder resultBuilder = new MapArray.Builder(type).Reserve(_baseDataTotalElementCount); + //Todo : Support various types + StringArray.Builder resultKeyBuilder = (StringArray.Builder)resultBuilder.KeyBuilder.Reserve(_baseDataTotalElementCount); + Int32Array.Builder resultValueBuilder = (Int32Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount); + ArrowBuffer nullBitmapBuilder = new ArrowBuffer.BitmapBuilder().Append(true).Append(true).Append(false).Build(); + + for (int i = 0; i < _baseData.Count; i++) + { + List dataList = _baseData[i]; + + MapArray.Builder builder = new MapArray.Builder(type).Reserve(dataList.Count); + StringArray.Builder keyBuilder = (StringArray.Builder)builder.KeyBuilder.Reserve(dataList.Count); + Int32Array.Builder valueBuilder = (Int32Array.Builder)builder.ValueBuilder.Reserve(dataList.Count); + + foreach (int? value in dataList) + { + if (value.HasValue) + { + builder.Append(); + resultBuilder.Append(); + + keyBuilder.Append(value.Value.ToString()); + valueBuilder.Append(value.Value); + resultKeyBuilder.Append(value.Value.ToString()); + resultValueBuilder.Append(value.Value); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } + public void Visit(IArrowType type) { throw new NotImplementedException(); diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 299881c4b613a..fda2d1e1b4057 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1816,8 +1816,7 @@ def _temp_path(): .skip_tester('C#') .skip_tester('JS'), - generate_map_case() - .skip_tester('C#'), + generate_map_case(), generate_non_canonical_map_case() .skip_tester('C#') diff --git a/docs/source/status.rst b/docs/source/status.rst index 6314fd4c8d31f..6297f39f98e7f 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -81,7 +81,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Struct | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Map | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | +| Map | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Dense Union | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ From af69e083b88ae8b8756c3afbcc3c6469b2fbb141 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Tue, 26 Sep 2023 05:35:55 -0700 Subject: [PATCH 18/24] One fix plus one test --- .../Apache.Arrow/Arrays/ArrayDataConcatenator.cs | 2 +- csharp/src/Apache.Arrow/Types/MapType.cs | 7 +++++++ .../CDataInterfacePythonTests.cs | 15 ++++++++++++++- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs index 929c7b1a489de..1cc2860a2471d 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs @@ -145,7 +145,7 @@ public void Visit(UnionType type) Result = new ArrayData(type, _totalLength, _totalNullCount, 0, buffers, children); } - public void Visit(MapType type) => ConcatenateLists(type); + public void Visit(MapType type) => ConcatenateLists(type.UnsortedKey()); public void Visit(IArrowType type) { diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs b/csharp/src/Apache.Arrow/Types/MapType.cs index b67cfec853458..0fcc98e54c661 100644 --- a/csharp/src/Apache.Arrow/Types/MapType.cs +++ b/csharp/src/Apache.Arrow/Types/MapType.cs @@ -32,5 +32,12 @@ public MapType(Field entries, bool keySorted = false) : base(entries) } public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + + public MapType UnsortedKey() + { + if (!KeySorted) { return this; } + + return new MapType(Fields[0], keySorted: false); + } } } diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index f28b89a9cd17e..b4c764cddbdb1 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -115,6 +115,8 @@ private static Schema GetTestSchema() .Field(f => f.Name("dense_union").DataType(new UnionType(new[] { new Field("i64", Int64Type.Default, false), new Field("f32", FloatType.Default, true), }, new[] { 0, 1 }, UnionMode.Dense))) .Field(f => f.Name("sparse_union").DataType(new UnionType(new[] { new Field("i32", Int32Type.Default, true), new Field("f64", DoubleType.Default, false), }, new[] { 0, 1 }, UnionMode.Sparse))) + .Field(f => f.Name("map").DataType(new MapType(StringType.Default, Int32Type.Default)).Nullable(false)) + // Checking wider characters. .Field(f => f.Name("hello 你好 😄").DataType(BooleanType.Default).Nullable(true)) @@ -178,6 +180,8 @@ private static IEnumerable GetPythonFields() yield return pa.field("dense_union", pa.dense_union(List(pa.field("i64", pa.int64(), false), pa.field("f32", pa.float32(), true)))); yield return pa.field("sparse_union", pa.sparse_union(List(pa.field("i32", pa.int32(), true), pa.field("f64", pa.float64(), false)))); + yield return pa.field("map", pa.map_(pa.@string(), pa.int32()), false); + yield return pa.field("hello 你好 😄", pa.bool_(), true); } } @@ -512,8 +516,12 @@ public unsafe void ImportRecordBatch() ), /* field name */ List("i32", "s"), /* type codes */ List(3, 2)), + pa.MapArray.from_arrays( + List(0, 0, 1, 2, 4, 10), + pa.array(List("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten")), + pa.array(List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))), }), - new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9" }); + new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10" }); dynamic batch = table.to_batches()[0]; @@ -585,6 +593,11 @@ public unsafe void ImportRecordBatch() UnionArray col9 = (UnionArray)recordBatch.Column("col9"); Assert.Equal(5, col9.Length); Assert.True(col9 is DenseUnionArray); + + MapArray col10 = (MapArray)recordBatch.Column("col10"); + Assert.Equal(5, col10.Length); + Assert.Equal(new int[] { 0, 0, 1, 2, 4, 10}, col10.ValueOffsets.ToArray()); + Assert.Equal(new long?[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, ((Int64Array)col10.Values).ToList().ToArray()); } [SkippableFact] From 396427c9435da96753e99c027fac9bf84720b9e3 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Tue, 26 Sep 2023 11:39:50 -0700 Subject: [PATCH 19/24] Add missing copyright header --- csharp/src/Apache.Arrow/Types/MapType.cs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs b/csharp/src/Apache.Arrow/Types/MapType.cs index 0fcc98e54c661..e3e1980ab5eda 100644 --- a/csharp/src/Apache.Arrow/Types/MapType.cs +++ b/csharp/src/Apache.Arrow/Types/MapType.cs @@ -1,4 +1,19 @@ -using System.Collections.Generic; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Collections.Generic; namespace Apache.Arrow.Types { From 87faaf6f73e9b7aa79ee69b184cd45d16703409f Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Tue, 26 Sep 2023 12:22:25 -0700 Subject: [PATCH 20/24] Make "keysSorted" a little more optional. --- csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 45fd79aedc775..aca5251c99722 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -77,7 +77,7 @@ public class JsonArrowType // map fields [JsonIgnore] - public bool KeysSorted => ExtensionData["keysSorted"].GetBoolean(); + public bool KeysSorted => ExtensionData.TryGetValue("keysSorted", out JsonElement value) && value.GetBoolean(); [JsonExtensionData] public Dictionary ExtensionData { get; set; } From cef8911e6d80e244d0a24a82980dcf7d847a45db Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Tue, 26 Sep 2023 13:29:45 -0700 Subject: [PATCH 21/24] Actually fix "KeysSorted" problem. --- csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index aca5251c99722..923991df025f3 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -76,8 +76,7 @@ public class JsonArrowType public int[] TypeIds { get; set; } // map fields - [JsonIgnore] - public bool KeysSorted => ExtensionData.TryGetValue("keysSorted", out JsonElement value) && value.GetBoolean(); + public bool KeysSorted { get; set; } [JsonExtensionData] public Dictionary ExtensionData { get; set; } From addd08176fa4518097c9771b22e90a6d306b8af7 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 2 Oct 2023 06:50:23 -0700 Subject: [PATCH 22/24] Address code review feedback --- csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs | 2 +- csharp/src/Apache.Arrow/Arrays/BinaryArray.cs | 4 ++-- csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs | 4 ++-- csharp/src/Apache.Arrow/Arrays/ListArray.cs | 4 ++-- csharp/src/Apache.Arrow/Arrays/MapArray.cs | 6 +++--- csharp/src/Apache.Arrow/Types/MapType.cs | 2 +- .../test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs | 3 --- csharp/test/Apache.Arrow.Tests/TestData.cs | 2 -- 8 files changed, 11 insertions(+), 16 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs index 1cc2860a2471d..a3b39923809bb 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs @@ -145,7 +145,7 @@ public void Visit(UnionType type) Result = new ArrayData(type, _totalLength, _totalNullCount, 0, buffers, children); } - public void Visit(MapType type) => ConcatenateLists(type.UnsortedKey()); + public void Visit(MapType type) => ConcatenateLists(type.UnsortedKey()); /* Can't tell if the output is still sorted */ public void Visit(IArrowType type) { diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs index 4fd8059f6fe70..20fe0342cca40 100644 --- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs @@ -237,7 +237,7 @@ public TBuilder Reserve(int capacity) // TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way. ValueOffsets.Reserve(capacity + 1); ValueBuffer.Reserve(capacity); - ValidityBuffer.Reserve(capacity + 1); + ValidityBuffer.Reserve(capacity); return Instance; } @@ -246,7 +246,7 @@ public TBuilder Resize(int length) // TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`. ValueOffsets.Resize(length + 1); ValueBuffer.Resize(length); - ValidityBuffer.Resize(length + 1); + ValidityBuffer.Resize(length); return Instance; } diff --git a/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs index 7d3d8754747e9..866a674bc9df8 100644 --- a/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs @@ -111,14 +111,14 @@ public TArray Build(MemoryAllocator allocator = default) public TBuilder Reserve(int capacity) { ValueBuffer.Reserve(capacity * ByteWidth); - ValidityBuffer.Reserve(capacity + 1); + ValidityBuffer.Reserve(capacity); return Instance; } public TBuilder Resize(int length) { ValueBuffer.Resize(length * ByteWidth); - ValidityBuffer.Resize(length + 1); + ValidityBuffer.Resize(length); return Instance; } diff --git a/csharp/src/Apache.Arrow/Arrays/ListArray.cs b/csharp/src/Apache.Arrow/Arrays/ListArray.cs index 0438f6c8c3e09..4d2ff96a3d005 100644 --- a/csharp/src/Apache.Arrow/Arrays/ListArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/ListArray.cs @@ -91,14 +91,14 @@ public ListArray Build(MemoryAllocator allocator = default) public Builder Reserve(int capacity) { ValueOffsetsBufferBuilder.Reserve(capacity + 1); - ValidityBufferBuilder.Reserve(capacity + 1); + ValidityBufferBuilder.Reserve(capacity); return this; } public Builder Resize(int length) { ValueOffsetsBufferBuilder.Resize(length + 1); - ValidityBufferBuilder.Resize(length + 1); + ValidityBufferBuilder.Resize(length); return this; } diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs index 5c6bd479c0768..a6676b134e34a 100644 --- a/csharp/src/Apache.Arrow/Arrays/MapArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -20,7 +20,7 @@ namespace Apache.Arrow { - public class MapArray : ListArray // MapArray = ListArray(StrucArray("key", "value")) + public class MapArray : ListArray // MapArray = ListArray(StructArray("key", "value")) { // Same as ListArray.Builder, but with KeyBuilder public new class Builder : IArrowArrayBuilder @@ -89,14 +89,14 @@ public MapArray Build(MemoryAllocator allocator = default) public Builder Reserve(int capacity) { ValueOffsetsBufferBuilder.Reserve(capacity + 1); - ValidityBufferBuilder.Reserve(capacity + 1); + ValidityBufferBuilder.Reserve(capacity); return this; } public Builder Resize(int length) { ValueOffsetsBufferBuilder.Resize(length + 1); - ValidityBufferBuilder.Resize(length + 1); + ValidityBufferBuilder.Resize(length); return this; } diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs b/csharp/src/Apache.Arrow/Types/MapType.cs index 0fcc98e54c661..dc9be6450d8a7 100644 --- a/csharp/src/Apache.Arrow/Types/MapType.cs +++ b/csharp/src/Apache.Arrow/Types/MapType.cs @@ -2,7 +2,7 @@ namespace Apache.Arrow.Types { - public sealed class MapType : NestedType // MapType = ListType(StrucType("key", "value")) + public sealed class MapType : NestedType // MapType = ListType(StructType("key", "value")) { public override ArrowTypeId TypeId => ArrowTypeId.Map; public override string Name => "map"; diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs index 1f09017ee15c6..f1dcbb5d37b8f 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs @@ -315,7 +315,6 @@ public void Visit(StringType type) public void Visit(ListType type) { ListArray.Builder resultBuilder = new ListArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount); - //Todo : Support various types Int64Array.Builder resultValueBuilder = (Int64Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount); for (int i = 0; i < _baseDataListCount; i++) @@ -351,7 +350,6 @@ public void Visit(ListType type) public void Visit(FixedSizeListType type) { FixedSizeListArray.Builder resultBuilder = new FixedSizeListArray.Builder(type.ValueDataType, type.ListSize).Reserve(_baseDataTotalElementCount); - //Todo : Support various types Int32Array.Builder resultValueBuilder = (Int32Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount); for (int i = 0; i < _baseDataListCount; i++) @@ -503,7 +501,6 @@ public void Visit(UnionType type) public void Visit(MapType type) { MapArray.Builder resultBuilder = new MapArray.Builder(type).Reserve(_baseDataTotalElementCount); - //Todo : Support various types StringArray.Builder resultKeyBuilder = (StringArray.Builder)resultBuilder.KeyBuilder.Reserve(_baseDataTotalElementCount); Int32Array.Builder resultValueBuilder = (Int32Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount); ArrowBuffer nullBitmapBuilder = new ArrowBuffer.BitmapBuilder().Append(true).Append(true).Append(false).Build(); diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index 0bff6e2d414f8..e3a40dbdafd61 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -263,7 +263,6 @@ public void Visit(ListType type) { var builder = new ListArray.Builder(type.ValueField).Reserve(Length); - //Todo : Support various types var valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(Length + 1); for (var i = 0; i < Length; i++) @@ -281,7 +280,6 @@ public void Visit(FixedSizeListType type) { var builder = new FixedSizeListArray.Builder(type.ValueField, type.ListSize).Reserve(Length); - //Todo : Support various types var valueBuilder = (Int32Array.Builder)builder.ValueBuilder; for (var i = 0; i < Length; i++) From 4f7e54816f90f541ecb4addfb00bb810fa76802a Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 2 Oct 2023 06:51:27 -0700 Subject: [PATCH 23/24] Fixed comment --- csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs index ab390e1748293..11420ad72963b 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs @@ -213,7 +213,7 @@ public ArrowType GetAsType() (_cSchema->flags & CArrowSchema.ArrowFlagMapKeysSorted) != 0); } - // TODO: Map type and large list type + // TODO: Large list type // Decimals if (format.StartsWith("d:")) From 5316971f6029599e87b8674dba69f4f0ef1d182e Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 2 Oct 2023 13:43:15 -0700 Subject: [PATCH 24/24] Restore changes lost to the merge --- .../Apache.Arrow.IntegrationTest/JsonFile.cs | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 85f66890edf47..d06249bef2661 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -124,6 +124,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children) "fixedsizelist" => ToFixedSizeListArrowType(type, children), "struct" => ToStructArrowType(type, children), "union" => ToUnionArrowType(type, children), + "map" => ToMapArrowType(type, children), "null" => NullType.Default, _ => throw new NotSupportedException($"JsonArrowType not supported: {type.Name}") }; @@ -227,6 +228,11 @@ private static IArrowType ToUnionArrowType(JsonArrowType type, Field[] children) }; return new UnionType(children, type.TypeIds, mode); } + + private static IArrowType ToMapArrowType(JsonArrowType type, Field[] children) + { + return new MapType(children[0], type.KeysSorted); + } } public class JsonField @@ -271,6 +277,9 @@ public class JsonArrowType public string Mode { get; set; } public int[] TypeIds { get; set; } + // map fields + public bool KeysSorted { get; set; } + [JsonExtensionData] public Dictionary ExtensionData { get; set; } } @@ -345,6 +354,7 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor { private JsonFieldData JsonFieldData { get; set; } @@ -616,6 +626,21 @@ public void Visit(UnionType type) Array = UnionArray.Create(arrayData); } + public void Visit(MapType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetOffsetBuffer(); + + var data = JsonFieldData; + JsonFieldData = data.Children[0]; + type.KeyValueType.Accept(this); + JsonFieldData = data; + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, + new[] { validityBuffer, offsetBuffer }, new[] { Array.Data }); + Array = new MapArray(arrayData); + } + private ArrayData[] GetChildren(NestedType type) { ArrayData[] children = new ArrayData[type.Fields.Count];