Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-35243: [C#] Implement MapType #37885

Merged
merged 28 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f4778f5
MapType : Init type and array
Platob Apr 21, 2023
e159ca9
ListType : seal type
Platob Apr 21, 2023
d11d75b
MapType : Add IPC tests
Platob Apr 21, 2023
3c5b5b9
code : clean
Platob Apr 21, 2023
10ae223
code : clean
Platob Apr 21, 2023
220bd97
MapArray : Add builder test
Platob Apr 21, 2023
96b3965
MapArray : test netcore 3 1 constraint
Platob Apr 21, 2023
a6b927a
MapArray : inner struct not null array
Platob Apr 21, 2023
093a248
MapArray : GetKeyValuePairs with constructor
Platob Apr 21, 2023
acc2bc7
MapArray : fix KeyValues slice
Platob Apr 21, 2023
e403994
MapArray : GetTuple
Platob Apr 21, 2023
62013c1
code : clean
Platob Apr 21, 2023
571cb5e
MapArray : rename GetTuple to GetTuples
Platob Apr 21, 2023
c9b4694
MapArray : IPC get meta
Platob Apr 21, 2023
fb01fe5
rm #if NETCOREAPP3_1_OR_GREATER constraint
Platob Apr 22, 2023
d06c753
Merge from main
CurtHagenlocher Sep 25, 2023
eb0e3d0
Support C API for maps
CurtHagenlocher Sep 25, 2023
bc703d6
Finish implementing Map.
CurtHagenlocher Sep 26, 2023
af69e08
One fix plus one test
CurtHagenlocher Sep 26, 2023
396427c
Add missing copyright header
CurtHagenlocher Sep 26, 2023
87faaf6
Make "keysSorted" a little more optional.
CurtHagenlocher Sep 26, 2023
cef8911
Actually fix "KeysSorted" problem.
CurtHagenlocher Sep 26, 2023
addd081
Address code review feedback
CurtHagenlocher Oct 2, 2023
d2377d6
Merge branch 'Map' of https://github.com/CurtHagenlocher/arrow into Map
CurtHagenlocher Oct 2, 2023
4f7e548
Fixed comment
CurtHagenlocher Oct 2, 2023
cfaff68
Merge from main
CurtHagenlocher Oct 2, 2023
5316971
Restore changes lost to the merge
CurtHagenlocher Oct 2, 2023
9ae4a0c
Merge branch 'main' of https://github.com/CurtHagenlocher/arrow into Map
CurtHagenlocher Oct 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ private class ArrayDataConcatenationVisitor :
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
IArrowTypeVisitor<UnionType>
IArrowTypeVisitor<UnionType>,
IArrowTypeVisitor<MapType>
{
public ArrayData Result { get; private set; }
private readonly IReadOnlyList<ArrayData> _arrayDataList;
Expand Down Expand Up @@ -92,15 +93,7 @@ public void Visit(FixedWidthType type)

public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type);

public void Visit(ListType type)
{
CheckData(type, 2);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer();
ArrayData child = Concatenate(SelectChildren(0), _allocator);

Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child });
}
public void Visit(ListType type) => ConcatenateLists(type);

public void Visit(FixedSizeListType type)
{
Expand All @@ -114,14 +107,15 @@ public void Visit(FixedSizeListType type)
public void Visit(StructType type)
{
CheckData(type, 1);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
CurtHagenlocher marked this conversation as resolved.
Show resolved Hide resolved
List<ArrayData> children = new List<ArrayData>(type.Fields.Count);

for (int i = 0; i < type.Fields.Count; i++)
{
children.Add(Concatenate(SelectChildren(i), _allocator));
}

Result = new ArrayData(type, _arrayDataList[0].Length, _arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children);
Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer }, children);
}

public void Visit(UnionType type)
Expand Down Expand Up @@ -151,6 +145,8 @@ public void Visit(UnionType type)
Result = new ArrayData(type, _totalLength, _totalNullCount, 0, buffers, children);
}

public void Visit(MapType type) => ConcatenateLists(type.UnsortedKey());
CurtHagenlocher marked this conversation as resolved.
Show resolved Hide resolved

public void Visit(IArrowType type)
{
throw new NotImplementedException($"Concatenation for {type.Name} is not supported yet.");
Expand All @@ -175,6 +171,16 @@ private void ConcatenateVariableBinaryArrayData(IArrowType type)
Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer });
}

private void ConcatenateLists(NestedType type)
{
CheckData(type, 2);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
ArrowBuffer offsetBuffer = ConcatenateOffsetBuffer();
ArrayData child = Concatenate(SelectChildren(0), _allocator);

Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child });
}

private ArrowBuffer ConcatenateValidityBuffer()
{
if (_totalNullCount == 0)
Expand Down
13 changes: 12 additions & 1 deletion csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ internal sealed class ArrayDataTypeComparer :
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
IArrowTypeVisitor<UnionType>
IArrowTypeVisitor<UnionType>,
IArrowTypeVisitor<MapType>
{
private readonly IArrowType _expectedType;
private bool _dataTypeMatch;
Expand Down Expand Up @@ -132,6 +133,16 @@ public void Visit(UnionType actualType)
}
}

public void Visit(MapType actualType)
{
if (_expectedType is MapType expectedType
&& expectedType.KeySorted == actualType.KeySorted
&& CompareNested(expectedType, actualType))
{
_dataTypeMatch = true;
}
}

private static bool CompareNested(NestedType expectedType, NestedType actualType)
{
if (expectedType.Fields.Count != actualType.Fields.Count)
Expand Down
3 changes: 2 additions & 1 deletion csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public static IArrowArray BuildArray(ArrayData data)
return new TimestampArray(data);
case ArrowTypeId.List:
return new ListArray(data);
case ArrowTypeId.Map:
return new MapArray(data);
case ArrowTypeId.Struct:
return new StructArray(data);
case ArrowTypeId.Union:
Expand Down Expand Up @@ -86,7 +88,6 @@ public static IArrowArray BuildArray(ArrayData data)
case ArrowTypeId.FixedSizeList:
return new FixedSizeListArray(data);
case ArrowTypeId.Interval:
case ArrowTypeId.Map:
default:
throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}.");
}
Expand Down
8 changes: 8 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ListArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,14 @@ private ListArray(ArrayData data, IArrowArray values) : base(data)
Values = values;
}

// Constructor for child MapArray
internal ListArray(ArrayData data, IArrowArray values, ArrowTypeId typeId) : base(data)
{
data.EnsureBufferCount(2);
data.EnsureDataType(typeId);
Values = values;
}

public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);


Expand Down
174 changes: 174 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/MapArray.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections.Generic;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;

namespace Apache.Arrow
{
public class MapArray : ListArray // MapArray = ListArray(StrucArray("key", "value"))
CurtHagenlocher marked this conversation as resolved.
Show resolved Hide resolved
{
// Same as ListArray.Builder, but with KeyBuilder
public new class Builder : IArrowArrayBuilder<MapArray, Builder>
{
public IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> KeyBuilder { get; }
public IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; }

public int Length => ValueOffsetsBufferBuilder.Length;

private ArrowBuffer.Builder<int> ValueOffsetsBufferBuilder { get; }

private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }

public int NullCount { get; protected set; }

public MapType DataType { get; }

public Builder(MapType type)
{
KeyBuilder = ArrowArrayBuilderFactory.Build(type.KeyField.DataType);
ValueBuilder = ArrowArrayBuilderFactory.Build(type.ValueField.DataType);
ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>();
ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
DataType = type;
}

/// <summary>
/// Start a new variable-length list slot
///
/// This function should be called before beginning to append elements to the
/// value builder
/// </summary>
/// <returns></returns>
public Builder Append()
{
ValueOffsetsBufferBuilder.Append(KeyBuilder.Length);
ValidityBufferBuilder.Append(true);

return this;
}

public Builder AppendNull()
{
ValueOffsetsBufferBuilder.Append(KeyBuilder.Length);
ValidityBufferBuilder.Append(false);
NullCount++;

return this;
}

public MapArray Build(MemoryAllocator allocator = default)
{
ValueOffsetsBufferBuilder.Append(KeyBuilder.Length);

ArrowBuffer validityBuffer = NullCount > 0 ? ValidityBufferBuilder.Build(allocator) : ArrowBuffer.Empty;

StructArray structs = new StructArray(
DataType.KeyValueType, KeyBuilder.Length,
new IArrowArray[] { KeyBuilder.Build(allocator), ValueBuilder.Build(allocator) },
ArrowBuffer.Empty, 0
);

return new MapArray(DataType, Length - 1, ValueOffsetsBufferBuilder.Build(allocator), structs, validityBuffer, NullCount);
}

public Builder Reserve(int capacity)
{
ValueOffsetsBufferBuilder.Reserve(capacity + 1);
ValidityBufferBuilder.Reserve(capacity + 1);
CurtHagenlocher marked this conversation as resolved.
Show resolved Hide resolved
return this;
}

public Builder Resize(int length)
{
ValueOffsetsBufferBuilder.Resize(length + 1);
ValidityBufferBuilder.Resize(length + 1);
CurtHagenlocher marked this conversation as resolved.
Show resolved Hide resolved
return this;
}

public Builder Clear()
{
ValueOffsetsBufferBuilder.Clear();
KeyBuilder.Clear();
ValueBuilder.Clear();
ValidityBufferBuilder.Clear();
return this;
}

}

public StructArray KeyValues => base.Values as StructArray;
public IArrowArray Keys => KeyValues.Fields[0];
public new IArrowArray Values => KeyValues.Fields[1];

public MapArray(IArrowType dataType, int length,
ArrowBuffer valueOffsetsBuffer, IArrowArray structs,
ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
: this(
new ArrayData(
dataType, length, nullCount, offset, new[] { nullBitmapBuffer, valueOffsetsBuffer },
new[] { structs.Data }
), structs)
{
}

public MapArray(ArrayData data)
: this(data, ArrowArrayFactory.BuildArray(data.Children[0]))
{
}

private MapArray(ArrayData data, IArrowArray structs) : base(data, structs, ArrowTypeId.Map)
{
}

public IEnumerable<Tuple<K, V>> GetTuples<TKeyArray, K, TValueArray, V>(int index, Func<TKeyArray, int, K> getKey, Func<TValueArray, int, V> getValue)
where TKeyArray : Array where TValueArray : Array
{
ReadOnlySpan<int> offsets = ValueOffsets;
// Get key values
int start = offsets[index];
int end = offsets[index + 1];
StructArray array = KeyValues.Slice(start, end - start) as StructArray;

TKeyArray keyArray = array.Fields[0] as TKeyArray;
TValueArray valueArray = array.Fields[1] as TValueArray;

for (int i = start; i < end; i++)
{
yield return new Tuple<K, V>(getKey(keyArray, i), getValue(valueArray, i));
}
}

public IEnumerable<KeyValuePair<K,V>> GetKeyValuePairs<TKeyArray, K, TValueArray, V>(int index, Func<TKeyArray, int, K> getKey, Func<TValueArray, int, V> getValue)
where TKeyArray : Array where TValueArray : Array
{
ReadOnlySpan<int> offsets = ValueOffsets;
// Get key values
int start = offsets[index];
int end = offsets[index + 1];
StructArray array = KeyValues.Slice(start, end - start) as StructArray;

TKeyArray keyArray = array.Fields[0] as TKeyArray;
TValueArray valueArray = array.Fields[1] as TValueArray;

for (int i = start; i < end; i++)
{
yield return new KeyValuePair<K,V>(getKey(keyArray, i), getValue(valueArray, i));
}
}
CurtHagenlocher marked this conversation as resolved.
Show resolved Hide resolved
}
}
3 changes: 3 additions & 0 deletions csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
}; ;
break;
case ArrowTypeId.Map:
MapType mapType = (MapType)type;
children = ProcessListChildren(cArray, mapType.Fields[0].DataType);
buffers = ImportListBuffers(cArray);
break;
case ArrowTypeId.Null:
buffers = System.Array.Empty<ArrowBuffer>();
Expand Down
6 changes: 3 additions & 3 deletions csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ private static string GetFormat(IArrowType datatype)
return $"+w:{fixedListType.ListSize}";
case StructType _: return "+s";
case UnionType u: return FormatUnion(u);
case MapType _: return "+m";
// Dictionary
case DictionaryType dictionaryType:
return GetFormat(dictionaryType.IndexType);
Expand All @@ -212,10 +213,9 @@ private static long GetFlags(IArrowType datatype, bool nullable = true)
}
}

if (datatype.TypeId == ArrowTypeId.Map)
if (datatype is MapType mapType && mapType.KeySorted)
{
// TODO: when we implement MapType, make sure to set the KEYS_SORTED flag.
throw new NotSupportedException("Exporting MapTypes is not supported.");
flags |= CArrowSchema.ArrowFlagMapKeysSorted;
}

return flags;
Expand Down
6 changes: 6 additions & 0 deletions csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@ public ArrowType GetAsType()

return new FixedSizeListType(childField, width);
}
else if (format == "+m")
{
return new MapType(
ParseChildren("map").Single(),
(_cSchema->flags & CArrowSchema.ArrowFlagMapKeysSorted) != 0);
}

// TODO: Map type and large list type
CurtHagenlocher marked this conversation as resolved.
Show resolved Hide resolved

Expand Down
10 changes: 9 additions & 1 deletion csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class TypeVisitor :
IArrowTypeVisitor<Decimal256Type>,
IArrowTypeVisitor<DictionaryType>,
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<MapType>,
IArrowTypeVisitor<NullType>
{
private FlatBufferBuilder Builder { get; }
Expand Down Expand Up @@ -229,6 +230,13 @@ public void Visit(FixedSizeBinaryType type)
Flatbuf.FixedSizeBinary.CreateFixedSizeBinary(Builder, type.ByteWidth));
}

public void Visit(MapType type)
{
Result = FieldType.Build(
Flatbuf.Type.Map,
Flatbuf.Map.CreateMap(Builder, type.KeySorted));
}

public void Visit(NullType type)
{
Flatbuf.Null.StartNull(Builder);
Expand All @@ -239,7 +247,7 @@ public void Visit(NullType type)

public void Visit(IArrowType type)
{
throw new NotImplementedException();
throw new NotImplementedException($"Cannot visit type {type}");
}
}

Expand Down
7 changes: 7 additions & 0 deletions csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c
Debug.Assert(childFields != null);
Flatbuf.Union unionMetadata = field.Type<Flatbuf.Union>().Value;
return new Types.UnionType(childFields, unionMetadata.GetTypeIdsArray(), unionMetadata.Mode.ToArrow());
case Flatbuf.Type.Map:
if (childFields == null || childFields.Length != 1)
{
throw new InvalidDataException($"Map type must have exactly one struct child.");
}
Flatbuf.Map meta = field.Type<Flatbuf.Map>().Value;
return new Types.MapType(childFields[0], meta.KeysSorted);
default:
throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported.");
}
Expand Down
Loading
Loading