diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs index 806defdc7ce66..a3b39923809bb 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs @@ -50,7 +50,8 @@ private class ArrayDataConcatenationVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { public ArrayData Result { get; private set; } private readonly IReadOnlyList _arrayDataList; @@ -92,15 +93,7 @@ public void Visit(FixedWidthType type) public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type); - public void Visit(ListType type) - { - CheckData(type, 2); - ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); - ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); - ArrayData child = Concatenate(SelectChildren(0), _allocator); - - Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child }); - } + public void Visit(ListType type) => ConcatenateLists(type); public void Visit(FixedSizeListType type) { @@ -114,6 +107,7 @@ public void Visit(FixedSizeListType type) public void Visit(StructType type) { CheckData(type, 1); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); List children = new List(type.Fields.Count); for (int i = 0; i < type.Fields.Count; i++) @@ -121,7 +115,7 @@ public void Visit(StructType type) children.Add(Concatenate(SelectChildren(i), _allocator)); } - Result = new ArrayData(type, _arrayDataList[0].Length, _arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children); + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer }, children); } public void Visit(UnionType type) @@ -151,6 +145,8 @@ public void Visit(UnionType type) Result = new ArrayData(type, _totalLength, _totalNullCount, 0, buffers, children); } + public void Visit(MapType type) => ConcatenateLists(type.UnsortedKey()); /* Can't tell if the output is still sorted */ + public void Visit(IArrowType type) { throw new NotImplementedException($"Concatenation for {type.Name} is not supported yet."); @@ -175,6 +171,16 @@ private void ConcatenateVariableBinaryArrayData(IArrowType type) Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer }); } + private void ConcatenateLists(NestedType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer(); + ArrayData child = Concatenate(SelectChildren(0), _allocator); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child }); + } + private ArrowBuffer ConcatenateValidityBuffer() { if (_totalNullCount == 0) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs index 6b54ec1edb573..1698e0672fb60 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs @@ -28,7 +28,8 @@ internal sealed class ArrayDataTypeComparer : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private readonly IArrowType _expectedType; private bool _dataTypeMatch; @@ -132,6 +133,16 @@ public void Visit(UnionType actualType) } } + public void Visit(MapType actualType) + { + if (_expectedType is MapType expectedType + && expectedType.KeySorted == actualType.KeySorted + && CompareNested(expectedType, actualType)) + { + _dataTypeMatch = true; + } + } + private static bool CompareNested(NestedType expectedType, NestedType actualType) { if (expectedType.Fields.Count != actualType.Fields.Count) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs index aa407203d1858..d3b7d65185abe 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs @@ -59,6 +59,8 @@ public static IArrowArray BuildArray(ArrayData data) return new TimestampArray(data); case ArrowTypeId.List: return new ListArray(data); + case ArrowTypeId.Map: + return new MapArray(data); case ArrowTypeId.Struct: return new StructArray(data); case ArrowTypeId.Union: @@ -86,7 +88,6 @@ public static IArrowArray BuildArray(ArrayData data) case ArrowTypeId.FixedSizeList: return new FixedSizeListArray(data); case ArrowTypeId.Interval: - case ArrowTypeId.Map: default: throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}."); } diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs index 4fd8059f6fe70..20fe0342cca40 100644 --- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs @@ -237,7 +237,7 @@ public TBuilder Reserve(int capacity) // TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way. ValueOffsets.Reserve(capacity + 1); ValueBuffer.Reserve(capacity); - ValidityBuffer.Reserve(capacity + 1); + ValidityBuffer.Reserve(capacity); return Instance; } @@ -246,7 +246,7 @@ public TBuilder Resize(int length) // TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`. ValueOffsets.Resize(length + 1); ValueBuffer.Resize(length); - ValidityBuffer.Resize(length + 1); + ValidityBuffer.Resize(length); return Instance; } diff --git a/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs index 7d3d8754747e9..866a674bc9df8 100644 --- a/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs @@ -111,14 +111,14 @@ public TArray Build(MemoryAllocator allocator = default) public TBuilder Reserve(int capacity) { ValueBuffer.Reserve(capacity * ByteWidth); - ValidityBuffer.Reserve(capacity + 1); + ValidityBuffer.Reserve(capacity); return Instance; } public TBuilder Resize(int length) { ValueBuffer.Resize(length * ByteWidth); - ValidityBuffer.Resize(length + 1); + ValidityBuffer.Resize(length); return Instance; } diff --git a/csharp/src/Apache.Arrow/Arrays/ListArray.cs b/csharp/src/Apache.Arrow/Arrays/ListArray.cs index 97673cb48e6a8..4d2ff96a3d005 100644 --- a/csharp/src/Apache.Arrow/Arrays/ListArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/ListArray.cs @@ -91,14 +91,14 @@ public ListArray Build(MemoryAllocator allocator = default) public Builder Reserve(int capacity) { ValueOffsetsBufferBuilder.Reserve(capacity + 1); - ValidityBufferBuilder.Reserve(capacity + 1); + ValidityBufferBuilder.Reserve(capacity); return this; } public Builder Resize(int length) { ValueOffsetsBufferBuilder.Resize(length + 1); - ValidityBufferBuilder.Resize(length + 1); + ValidityBufferBuilder.Resize(length); return this; } @@ -139,6 +139,14 @@ private ListArray(ArrayData data, IArrowArray values) : base(data) Values = values; } + // Constructor for child MapArray + internal ListArray(ArrayData data, IArrowArray values, ArrowTypeId typeId) : base(data) + { + data.EnsureBufferCount(2); + data.EnsureDataType(typeId); + Values = values; + } + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); diff --git a/csharp/src/Apache.Arrow/Arrays/MapArray.cs b/csharp/src/Apache.Arrow/Arrays/MapArray.cs new file mode 100644 index 0000000000000..a6676b134e34a --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/MapArray.cs @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class MapArray : ListArray // MapArray = ListArray(StructArray("key", "value")) + { + // Same as ListArray.Builder, but with KeyBuilder + public new class Builder : IArrowArrayBuilder + { + public IArrowArrayBuilder> KeyBuilder { get; } + public IArrowArrayBuilder> ValueBuilder { get; } + + public int Length => ValueOffsetsBufferBuilder.Length; + + private ArrowBuffer.Builder ValueOffsetsBufferBuilder { get; } + + private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; } + + public int NullCount { get; protected set; } + + public MapType DataType { get; } + + public Builder(MapType type) + { + KeyBuilder = ArrowArrayBuilderFactory.Build(type.KeyField.DataType); + ValueBuilder = ArrowArrayBuilderFactory.Build(type.ValueField.DataType); + ValueOffsetsBufferBuilder = new ArrowBuffer.Builder(); + ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder(); + DataType = type; + } + + /// + /// Start a new variable-length list slot + /// + /// This function should be called before beginning to append elements to the + /// value builder + /// + /// + public Builder Append() + { + ValueOffsetsBufferBuilder.Append(KeyBuilder.Length); + ValidityBufferBuilder.Append(true); + + return this; + } + + public Builder AppendNull() + { + ValueOffsetsBufferBuilder.Append(KeyBuilder.Length); + ValidityBufferBuilder.Append(false); + NullCount++; + + return this; + } + + public MapArray Build(MemoryAllocator allocator = default) + { + ValueOffsetsBufferBuilder.Append(KeyBuilder.Length); + + ArrowBuffer validityBuffer = NullCount > 0 ? ValidityBufferBuilder.Build(allocator) : ArrowBuffer.Empty; + + StructArray structs = new StructArray( + DataType.KeyValueType, KeyBuilder.Length, + new IArrowArray[] { KeyBuilder.Build(allocator), ValueBuilder.Build(allocator) }, + ArrowBuffer.Empty, 0 + ); + + return new MapArray(DataType, Length - 1, ValueOffsetsBufferBuilder.Build(allocator), structs, validityBuffer, NullCount); + } + + public Builder Reserve(int capacity) + { + ValueOffsetsBufferBuilder.Reserve(capacity + 1); + ValidityBufferBuilder.Reserve(capacity); + return this; + } + + public Builder Resize(int length) + { + ValueOffsetsBufferBuilder.Resize(length + 1); + ValidityBufferBuilder.Resize(length); + return this; + } + + public Builder Clear() + { + ValueOffsetsBufferBuilder.Clear(); + KeyBuilder.Clear(); + ValueBuilder.Clear(); + ValidityBufferBuilder.Clear(); + return this; + } + + } + + public StructArray KeyValues => base.Values as StructArray; + public IArrowArray Keys => KeyValues.Fields[0]; + public new IArrowArray Values => KeyValues.Fields[1]; + + public MapArray(IArrowType dataType, int length, + ArrowBuffer valueOffsetsBuffer, IArrowArray structs, + ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) + : this( + new ArrayData( + dataType, length, nullCount, offset, new[] { nullBitmapBuffer, valueOffsetsBuffer }, + new[] { structs.Data } + ), structs) + { + } + + public MapArray(ArrayData data) + : this(data, ArrowArrayFactory.BuildArray(data.Children[0])) + { + } + + private MapArray(ArrayData data, IArrowArray structs) : base(data, structs, ArrowTypeId.Map) + { + } + + public IEnumerable> GetTuples(int index, Func getKey, Func getValue) + where TKeyArray : Array where TValueArray : Array + { + ReadOnlySpan offsets = ValueOffsets; + // Get key values + int start = offsets[index]; + int end = offsets[index + 1]; + StructArray array = KeyValues.Slice(start, end - start) as StructArray; + + TKeyArray keyArray = array.Fields[0] as TKeyArray; + TValueArray valueArray = array.Fields[1] as TValueArray; + + for (int i = start; i < end; i++) + { + yield return new Tuple(getKey(keyArray, i), getValue(valueArray, i)); + } + } + + public IEnumerable> GetKeyValuePairs(int index, Func getKey, Func getValue) + where TKeyArray : Array where TValueArray : Array + { + ReadOnlySpan offsets = ValueOffsets; + // Get key values + int start = offsets[index]; + int end = offsets[index + 1]; + StructArray array = KeyValues.Slice(start, end - start) as StructArray; + + TKeyArray keyArray = array.Fields[0] as TKeyArray; + TValueArray valueArray = array.Fields[1] as TValueArray; + + for (int i = start; i < end; i++) + { + yield return new KeyValuePair(getKey(keyArray, i), getValue(valueArray, i)); + } + } + } +} diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs index da1b0f31b8f08..1b40ec49658bb 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -180,6 +180,9 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) }; ; break; case ArrowTypeId.Map: + MapType mapType = (MapType)type; + children = ProcessListChildren(cArray, mapType.Fields[0].DataType); + buffers = ImportListBuffers(cArray); break; case ArrowTypeId.Null: buffers = System.Array.Empty(); diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs index c1a12362a942a..f3479903889d1 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs @@ -188,6 +188,7 @@ private static string GetFormat(IArrowType datatype) return $"+w:{fixedListType.ListSize}"; case StructType _: return "+s"; case UnionType u: return FormatUnion(u); + case MapType _: return "+m"; // Dictionary case DictionaryType dictionaryType: return GetFormat(dictionaryType.IndexType); @@ -212,10 +213,9 @@ private static long GetFlags(IArrowType datatype, bool nullable = true) } } - if (datatype.TypeId == ArrowTypeId.Map) + if (datatype is MapType mapType && mapType.KeySorted) { - // TODO: when we implement MapType, make sure to set the KEYS_SORTED flag. - throw new NotSupportedException("Exporting MapTypes is not supported."); + flags |= CArrowSchema.ArrowFlagMapKeysSorted; } return flags; diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs index f7216df869abd..f2a08f5e4a40b 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs @@ -206,8 +206,14 @@ public ArrowType GetAsType() return new FixedSizeListType(childField, width); } + else if (format == "+m") + { + return new MapType( + ParseChildren("map").Single(), + (_cSchema->flags & CArrowSchema.ArrowFlagMapKeysSorted) != 0); + } - // TODO: Map type and large list type + // TODO: Large list type // Decimals if (format.StartsWith("d:")) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index b11467538dd04..425d9326addfe 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -67,6 +67,7 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor { private FlatBufferBuilder Builder { get; } @@ -229,6 +230,13 @@ public void Visit(FixedSizeBinaryType type) Flatbuf.FixedSizeBinary.CreateFixedSizeBinary(Builder, type.ByteWidth)); } + public void Visit(MapType type) + { + Result = FieldType.Build( + Flatbuf.Type.Map, + Flatbuf.Map.CreateMap(Builder, type.KeySorted)); + } + public void Visit(NullType type) { Flatbuf.Null.StartNull(Builder); @@ -239,7 +247,7 @@ public void Visit(NullType type) public void Visit(IArrowType type) { - throw new NotImplementedException(); + throw new NotImplementedException($"Cannot visit type {type}"); } } diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index 6249063ba81f4..9847c376cf82e 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -207,6 +207,13 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c Debug.Assert(childFields != null); Flatbuf.Union unionMetadata = field.Type().Value; return new Types.UnionType(childFields, unionMetadata.GetTypeIdsArray(), unionMetadata.Mode.ToArrow()); + case Flatbuf.Type.Map: + if (childFields == null || childFields.Length != 1) + { + throw new InvalidDataException($"Map type must have exactly one struct child."); + } + Flatbuf.Map meta = field.Type().Value; + return new Types.MapType(childFields[0], meta.KeysSorted); default: throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported."); } diff --git a/csharp/src/Apache.Arrow/Types/ListType.cs b/csharp/src/Apache.Arrow/Types/ListType.cs index a006c2282dd11..b467934ec541d 100644 --- a/csharp/src/Apache.Arrow/Types/ListType.cs +++ b/csharp/src/Apache.Arrow/Types/ListType.cs @@ -13,8 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -using System; - namespace Apache.Arrow.Types { public sealed class ListType : NestedType diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs b/csharp/src/Apache.Arrow/Types/MapType.cs new file mode 100644 index 0000000000000..73112c815bfbf --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/MapType.cs @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Collections.Generic; + +namespace Apache.Arrow.Types +{ + public sealed class MapType : NestedType // MapType = ListType(StructType("key", "value")) + { + public override ArrowTypeId TypeId => ArrowTypeId.Map; + public override string Name => "map"; + public readonly bool KeySorted; + + public StructType KeyValueType => Fields[0].DataType as StructType; + public Field KeyField => KeyValueType.Fields[0]; + public Field ValueField => KeyValueType.Fields[1]; + + public MapType(IArrowType key, IArrowType value, bool nullable = true, bool keySorted = false) + : this(new Field("key", key, false), new Field("value", value, nullable), keySorted) + { + } + + public MapType(Field key, Field value, bool keySorted = false) + : this(new StructType(new List() { key, value }), keySorted) + { + } + + public MapType(StructType entries, bool keySorted = false) : this(new Field("entries", entries, false), keySorted) + { + } + + public MapType(Field entries, bool keySorted = false) : base(entries) + { + KeySorted = keySorted; + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + + public MapType UnsortedKey() + { + if (!KeySorted) { return this; } + + return new MapType(Fields[0], keySorted: false); + } + } +} diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 85f66890edf47..d06249bef2661 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -124,6 +124,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children) "fixedsizelist" => ToFixedSizeListArrowType(type, children), "struct" => ToStructArrowType(type, children), "union" => ToUnionArrowType(type, children), + "map" => ToMapArrowType(type, children), "null" => NullType.Default, _ => throw new NotSupportedException($"JsonArrowType not supported: {type.Name}") }; @@ -227,6 +228,11 @@ private static IArrowType ToUnionArrowType(JsonArrowType type, Field[] children) }; return new UnionType(children, type.TypeIds, mode); } + + private static IArrowType ToMapArrowType(JsonArrowType type, Field[] children) + { + return new MapType(children[0], type.KeysSorted); + } } public class JsonField @@ -271,6 +277,9 @@ public class JsonArrowType public string Mode { get; set; } public int[] TypeIds { get; set; } + // map fields + public bool KeysSorted { get; set; } + [JsonExtensionData] public Dictionary ExtensionData { get; set; } } @@ -345,6 +354,7 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor { private JsonFieldData JsonFieldData { get; set; } @@ -616,6 +626,21 @@ public void Visit(UnionType type) Array = UnionArray.Create(arrayData); } + public void Visit(MapType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetOffsetBuffer(); + + var data = JsonFieldData; + JsonFieldData = data.Children[0]; + type.KeyValueType.Accept(this); + JsonFieldData = data; + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, + new[] { validityBuffer, offsetBuffer }, new[] { Array.Data }); + Array = new MapArray(arrayData); + } + private ArrayData[] GetChildren(NestedType type) { ArrayData[] children = new ArrayData[type.Fields.Count]; diff --git a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs index c8bcc3cee0f99..ad3527c47807b 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs @@ -29,7 +29,8 @@ public class ArrayTypeComparer : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private readonly IArrowType _expectedType; @@ -131,6 +132,16 @@ public void Visit(UnionType actualType) CompareNested(expectedType, actualType); } + public void Visit(MapType actualType) + { + Assert.IsAssignableFrom(_expectedType); + var expectedType = (MapType)_expectedType; + + Assert.Equal(expectedType.KeySorted, actualType.KeySorted); + + CompareNested(expectedType, actualType); + } + private static void CompareNested(NestedType expectedType, NestedType actualType) { Assert.Equal(expectedType.Fields.Count, actualType.Fields.Count); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs index f5a2c345e2ae6..f1dcbb5d37b8f 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs @@ -93,6 +93,10 @@ private static IEnumerable, IArrowArray>> GenerateTestDa new[] { 0, 1 }, UnionMode.Dense ), + new MapType( + new Field.Builder().Name("key").DataType(StringType.Default).Nullable(false).Build(), + new Field.Builder().Name("value").DataType(Int32Type.Default).Nullable(true).Build(), + keySorted: false), }; foreach (IArrowType type in targetTypes) @@ -136,7 +140,8 @@ private class TestDataGenerator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private List> _baseData; @@ -310,7 +315,6 @@ public void Visit(StringType type) public void Visit(ListType type) { ListArray.Builder resultBuilder = new ListArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount); - //Todo : Support various types Int64Array.Builder resultValueBuilder = (Int64Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount); for (int i = 0; i < _baseDataListCount; i++) @@ -346,7 +350,6 @@ public void Visit(ListType type) public void Visit(FixedSizeListType type) { FixedSizeListArray.Builder resultBuilder = new FixedSizeListArray.Builder(type.ValueDataType, type.ListSize).Reserve(_baseDataTotalElementCount); - //Todo : Support various types Int32Array.Builder resultValueBuilder = (Int32Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount); for (int i = 0; i < _baseDataListCount; i++) @@ -406,7 +409,7 @@ public void Visit(StructType type) StringArray resultStringArray = resultStringBuilder.Build(); Int32Array resultInt32Array = resultInt32Builder.Build(); - ExpectedArray = new StructArray(type, 3, new List { resultStringArray, resultInt32Array }, nullBitmapBuffer, 1); + ExpectedArray = new StructArray(type, 9, new List { resultStringArray, resultInt32Array }, nullBitmapBuffer, 3); } public void Visit(UnionType type) @@ -495,6 +498,46 @@ public void Visit(UnionType type) new[] { stringResultBuilder.Build().Data, intResultBuilder.Build().Data })); } + public void Visit(MapType type) + { + MapArray.Builder resultBuilder = new MapArray.Builder(type).Reserve(_baseDataTotalElementCount); + StringArray.Builder resultKeyBuilder = (StringArray.Builder)resultBuilder.KeyBuilder.Reserve(_baseDataTotalElementCount); + Int32Array.Builder resultValueBuilder = (Int32Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount); + ArrowBuffer nullBitmapBuilder = new ArrowBuffer.BitmapBuilder().Append(true).Append(true).Append(false).Build(); + + for (int i = 0; i < _baseData.Count; i++) + { + List dataList = _baseData[i]; + + MapArray.Builder builder = new MapArray.Builder(type).Reserve(dataList.Count); + StringArray.Builder keyBuilder = (StringArray.Builder)builder.KeyBuilder.Reserve(dataList.Count); + Int32Array.Builder valueBuilder = (Int32Array.Builder)builder.ValueBuilder.Reserve(dataList.Count); + + foreach (int? value in dataList) + { + if (value.HasValue) + { + builder.Append(); + resultBuilder.Append(); + + keyBuilder.Append(value.Value.ToString()); + valueBuilder.Append(value.Value); + resultKeyBuilder.Append(value.Value.ToString()); + resultValueBuilder.Append(value.Value); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } + public void Visit(IArrowType type) { throw new NotImplementedException(); diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index b6b65a582d953..7aee37b8212c3 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -115,6 +115,8 @@ private static Schema GetTestSchema() .Field(f => f.Name("dense_union").DataType(new UnionType(new[] { new Field("i64", Int64Type.Default, false), new Field("f32", FloatType.Default, true), }, new[] { 0, 1 }, UnionMode.Dense))) .Field(f => f.Name("sparse_union").DataType(new UnionType(new[] { new Field("i32", Int32Type.Default, true), new Field("f64", DoubleType.Default, false), }, new[] { 0, 1 }, UnionMode.Sparse))) + .Field(f => f.Name("map").DataType(new MapType(StringType.Default, Int32Type.Default)).Nullable(false)) + // Checking wider characters. .Field(f => f.Name("hello 你好 😄").DataType(BooleanType.Default).Nullable(true)) @@ -178,6 +180,8 @@ private static IEnumerable GetPythonFields() yield return pa.field("dense_union", pa.dense_union(List(pa.field("i64", pa.int64(), false), pa.field("f32", pa.float32(), true)))); yield return pa.field("sparse_union", pa.sparse_union(List(pa.field("i32", pa.int32(), true), pa.field("f64", pa.float64(), false)))); + yield return pa.field("map", pa.map_(pa.@string(), pa.int32()), false); + yield return pa.field("hello 你好 😄", pa.bool_(), true); } } @@ -512,8 +516,12 @@ public unsafe void ImportRecordBatch() ), /* field name */ List("i32", "s"), /* type codes */ List(3, 2)), + pa.MapArray.from_arrays( + List(0, 0, 1, 2, 4, 10), + pa.array(List("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten")), + pa.array(List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))), }), - new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9" }); + new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10" }); dynamic batch = table.to_batches()[0]; @@ -585,6 +593,11 @@ public unsafe void ImportRecordBatch() UnionArray col9 = (UnionArray)recordBatch.Column("col9"); Assert.Equal(5, col9.Length); Assert.True(col9 is DenseUnionArray); + + MapArray col10 = (MapArray)recordBatch.Column("col10"); + Assert.Equal(5, col10.Length); + Assert.Equal(new int[] { 0, 0, 1, 2, 4, 10}, col10.ValueOffsets.ToArray()); + Assert.Equal(new long?[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, ((Int64Array)col10.Values).ToList().ToArray()); } [SkippableFact] diff --git a/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs new file mode 100644 index 0000000000000..034f120f3f016 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/MapArrayTests.cs @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class MapArrayTests + { + [Fact] + public void MapArray_Should_GetTuple() + { + MapType type = new MapType(StringType.Default, Int64Type.Default); + MapArray.Builder builder = new MapArray.Builder(type); + var keyBuilder = builder.KeyBuilder as StringArray.Builder; + var valueBuilder = builder.ValueBuilder as Int64Array.Builder; + + Tuple kv0 = Tuple.Create("test", (long?)1); + Tuple kv1 = Tuple.Create("other", (long?)123); + Tuple kv2 = Tuple.Create("kv", (long?)null); + + builder.Append(); + keyBuilder.Append("test"); + valueBuilder.Append(1); + + builder.AppendNull(); + + builder.Append(); + keyBuilder.Append("other"); + valueBuilder.Append(123); + keyBuilder.Append("kv"); + valueBuilder.AppendNull(); + + MapArray array = builder.Build(); + + Assert.Equal(new Tuple[] { kv0 }, array.GetTuples(0, GetKey, GetValue).ToArray()); + Assert.True(array.IsNull(1)); + Assert.Equal(new Tuple[] { kv1, kv2 }, array.GetTuples(2, GetKey, GetValue).ToArray()); + } + + [Fact] + public void MapArray_Should_GetKeyValuePairs() + { + MapType type = new MapType(StringType.Default, Int32Type.Default); + MapArray.Builder builder = new MapArray.Builder(type); + var keyBuilder = builder.KeyBuilder as StringArray.Builder; + var valueBuilder = builder.ValueBuilder as Int32Array.Builder; + + KeyValuePair kv0 = KeyValuePair.Create("test", (int?)1); + KeyValuePair kv1 = KeyValuePair.Create("other", (int?)123); + KeyValuePair kv2 = KeyValuePair.Create("kv", (int?)null); + + builder.Append(); + keyBuilder.Append("test"); + valueBuilder.Append(1); + + builder.AppendNull(); + + builder.Append(); + keyBuilder.Append("other"); + valueBuilder.Append(123); + keyBuilder.Append("kv"); + valueBuilder.AppendNull(); + + MapArray array = builder.Build(); + + Assert.Equal(new KeyValuePair[] { kv0 }, array.GetKeyValuePairs(0, GetKey, GetValue).ToArray()); + Assert.True(array.IsNull(1)); + Assert.Equal(new KeyValuePair[] { kv1, kv2 }, array.GetKeyValuePairs(2, GetKey, GetValue).ToArray()); + } + + private static string GetKey(StringArray array, int index) => array.GetString(index); + private static int? GetValue(Int32Array array, int index) => array.GetValue(index); + private static long? GetValue(Int64Array array, int index) => array.GetValue(index); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs index 8b07a38c1b8c0..234dd63a79cd2 100644 --- a/csharp/test/Apache.Arrow.Tests/TableTests.cs +++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs @@ -60,7 +60,7 @@ public void TestTableFromRecordBatches() Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches); Assert.Equal(20, table1.RowCount); - Assert.Equal(26, table1.ColumnCount); + Assert.Equal(27, table1.ColumnCount); FixedSizeBinaryType type = new FixedSizeBinaryType(17); Field newField1 = new Field(type.Name, type, false); diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index 9e2061e3428a9..e3a40dbdafd61 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -54,6 +54,7 @@ public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount builder.Field(CreateField(new StructType(new List { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i)); builder.Field(CreateField(new Decimal128Type(10, 6), i)); builder.Field(CreateField(new Decimal256Type(16, 8), i)); + builder.Field(CreateField(new MapType(StringType.Default, Int32Type.Default), i)); if (createAdvancedTypeArrays) { @@ -132,6 +133,7 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor { private int Length { get; } @@ -261,7 +263,6 @@ public void Visit(ListType type) { var builder = new ListArray.Builder(type.ValueField).Reserve(Length); - //Todo : Support various types var valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(Length + 1); for (var i = 0; i < Length; i++) @@ -279,7 +280,6 @@ public void Visit(FixedSizeListType type) { var builder = new FixedSizeListArray.Builder(type.ValueField, type.ListSize).Reserve(Length); - //Todo : Support various types var valueBuilder = (Int32Array.Builder)builder.ValueBuilder; for (var i = 0; i < Length; i++) @@ -410,6 +410,25 @@ public void Visit(FixedSizeBinaryType type) Array = new FixedSizeBinaryArray(arrayData); } + public void Visit(MapType type) + { + MapArray.Builder builder = new MapArray.Builder(type).Reserve(Length); + var keyBuilder = builder.KeyBuilder.Reserve(Length + 1) as StringArray.Builder; + var valueBuilder = builder.ValueBuilder.Reserve(Length + 1) as Int32Array.Builder; + + for (var i = 0; i < Length; i++) + { + builder.Append(); + keyBuilder.Append(i.ToString()); + valueBuilder.Append(i); + } + //Add a value to check if Values.Length can exceed MapArray.Length + keyBuilder.Append("0"); + valueBuilder.Append(0); + + Array = builder.Build(); + } + public void Visit(NullType type) { Array = new NullArray(Length); diff --git a/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs b/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs new file mode 100644 index 0000000000000..ba306ec104e79 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/Types/MapTypeTests.cs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests.Types +{ + public class MapTypeTests + { + [Fact] + public void MapType_Should_HaveCorrectTypeId() + { + var type = new MapType(StringType.Default, Int32Type.Default); + Assert.Equal(ArrowTypeId.Map, type.TypeId); + } + + [Fact] + public void MapType_Should_HaveCorrectStructType() + { + var type = new MapType(BooleanType.Default, Int32Type.Default, true); + Assert.IsType(type.Fields[0].DataType); + Assert.Equal(2, type.KeyValueType.Fields.Count); + + Assert.Equal("entries", type.Fields[0].Name); + Assert.Equal("key", type.KeyField.Name); + Assert.Equal("value", type.ValueField.Name); + + Assert.False(type.Fields[0].IsNullable); + Assert.False(type.KeyField.IsNullable); + Assert.True(type.ValueField.IsNullable); + Assert.False(new MapType(BooleanType.Default, Int32Type.Default, false).ValueField.IsNullable); + + Assert.IsType(type.KeyField.DataType); + Assert.IsType(type.ValueField.DataType); + } + + [Fact] + public void MapType_Should_SetKeySorted() + { + Assert.False(new MapType(BooleanType.Default, Int32Type.Default).KeySorted); + Assert.True(new MapType(StringType.Default, Int32Type.Default, true, true).KeySorted); + } + } +} diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 8d0cc6b0b01a8..01672fbe7488a 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1815,8 +1815,7 @@ def _temp_path(): .skip_tester('C#') .skip_tester('JS'), - generate_map_case() - .skip_tester('C#'), + generate_map_case(), generate_non_canonical_map_case() .skip_tester('C#') diff --git a/docs/source/status.rst b/docs/source/status.rst index e2b3852e2229f..6024c1d3172bb 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -81,7 +81,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Struct | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Map | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | +| Map | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Dense Union | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+