Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-35243: [C#] Implement MapType #35263

Closed
wants to merge 15 commits into from
3 changes: 2 additions & 1 deletion csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ public static IArrowArray BuildArray(ArrayData data)
return new TimestampArray(data);
case ArrowTypeId.List:
return new ListArray(data);
case ArrowTypeId.Map:
return new MapArray(data);
case ArrowTypeId.Struct:
return new StructArray(data);
case ArrowTypeId.Union:
Expand All @@ -82,7 +84,6 @@ public static IArrowArray BuildArray(ArrayData data)
throw new NotSupportedException("Half-float arrays are not supported by this target framework.");
#endif
case ArrowTypeId.Interval:
case ArrowTypeId.Map:
default:
throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}.");
}
Expand Down
8 changes: 8 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ListArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,14 @@ private ListArray(ArrayData data, IArrowArray values) : base(data)
Values = values;
}

// Constructor for child MapArray
internal ListArray(ArrayData data, IArrowArray values, ArrowTypeId typeId) : base(data)
{
data.EnsureBufferCount(2);
data.EnsureDataType(typeId);
Values = values;
}

public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);


Expand Down
174 changes: 174 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/MapArray.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections.Generic;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;

namespace Apache.Arrow
{
public class MapArray : ListArray // MapArray = ListArray(StrucArray("key", "value"))
{
// Same as ListArray.Builder, but with KeyBuilder
public new class Builder : IArrowArrayBuilder<MapArray, Builder>
{
public IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> KeyBuilder { get; }
public IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; }

public int Length => ValueOffsetsBufferBuilder.Length;

private ArrowBuffer.Builder<int> ValueOffsetsBufferBuilder { get; }

private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }

public int NullCount { get; protected set; }

public MapType DataType { get; }

public Builder(MapType type)
{
KeyBuilder = ArrowArrayBuilderFactory.Build(type.KeyField.DataType);
ValueBuilder = ArrowArrayBuilderFactory.Build(type.ValueField.DataType);
ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>();
ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
DataType = type;
}

/// <summary>
/// Start a new variable-length list slot
///
/// This function should be called before beginning to append elements to the
/// value builder
/// </summary>
/// <returns></returns>
public Builder Append()
{
ValueOffsetsBufferBuilder.Append(KeyBuilder.Length);
ValidityBufferBuilder.Append(true);

return this;
}

public Builder AppendNull()
{
ValueOffsetsBufferBuilder.Append(KeyBuilder.Length);
ValidityBufferBuilder.Append(false);
NullCount++;

return this;
}

public MapArray Build(MemoryAllocator allocator = default)
{
ValueOffsetsBufferBuilder.Append(KeyBuilder.Length);

ArrowBuffer validityBuffer = NullCount > 0 ? ValidityBufferBuilder.Build(allocator) : ArrowBuffer.Empty;

StructArray structs = new StructArray(
DataType.KeyValueType, KeyBuilder.Length,
new IArrowArray[] { KeyBuilder.Build(allocator), ValueBuilder.Build(allocator) },
ArrowBuffer.Empty, 0
);

return new MapArray(DataType, Length - 1, ValueOffsetsBufferBuilder.Build(allocator), structs, validityBuffer, NullCount);
}

public Builder Reserve(int capacity)
{
ValueOffsetsBufferBuilder.Reserve(capacity + 1);
ValidityBufferBuilder.Reserve(capacity + 1);
return this;
}

public Builder Resize(int length)
{
ValueOffsetsBufferBuilder.Resize(length + 1);
ValidityBufferBuilder.Resize(length + 1);
return this;
}

public Builder Clear()
{
ValueOffsetsBufferBuilder.Clear();
KeyBuilder.Clear();
ValueBuilder.Clear();
ValidityBufferBuilder.Clear();
return this;
}

}

public StructArray KeyValues => base.Values as StructArray;
public IArrowArray Keys => KeyValues.Fields[0];
public new IArrowArray Values => KeyValues.Fields[1];

public MapArray(IArrowType dataType, int length,
ArrowBuffer valueOffsetsBuffer, IArrowArray structs,
ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
: this(
new ArrayData(
dataType, length, nullCount, offset, new[] { nullBitmapBuffer, valueOffsetsBuffer },
new[] { structs.Data }
), structs)
{
}

public MapArray(ArrayData data)
: this(data, ArrowArrayFactory.BuildArray(data.Children[0]))
{
}

private MapArray(ArrayData data, IArrowArray structs) : base(data, structs, ArrowTypeId.Map)
{
}

public IEnumerable<Tuple<K, V>> GetTuples<TKeyArray, K, TValueArray, V>(int index, Func<TKeyArray, int, K> getKey, Func<TValueArray, int, V> getValue)
where TKeyArray : Array where TValueArray : Array
{
ReadOnlySpan<int> offsets = ValueOffsets;
// Get key values
int start = offsets[index];
int end = offsets[index + 1];
StructArray array = KeyValues.Slice(start, end - start) as StructArray;

TKeyArray keyArray = array.Fields[0] as TKeyArray;
TValueArray valueArray = array.Fields[1] as TValueArray;

for (int i = start; i < end; i++)
{
yield return new Tuple<K, V>(getKey(keyArray, i), getValue(valueArray, i));
}
}

public IEnumerable<KeyValuePair<K,V>> GetKeyValuePairs<TKeyArray, K, TValueArray, V>(int index, Func<TKeyArray, int, K> getKey, Func<TValueArray, int, V> getValue)
where TKeyArray : Array where TValueArray : Array
{
ReadOnlySpan<int> offsets = ValueOffsets;
// Get key values
int start = offsets[index];
int end = offsets[index + 1];
StructArray array = KeyValues.Slice(start, end - start) as StructArray;

TKeyArray keyArray = array.Fields[0] as TKeyArray;
TValueArray valueArray = array.Fields[1] as TValueArray;

for (int i = start; i < end; i++)
{
yield return new KeyValuePair<K,V>(getKey(keyArray, i), getValue(valueArray, i));
}
}
}
}
1 change: 0 additions & 1 deletion csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,6 @@ private ArrayData LoadPrimitiveField(
ByteBuffer bodyData,
IBufferCreator bufferCreator)
{

ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator);
if (!recordBatchEnumerator.MoveNextBuffer())
{
Expand Down
12 changes: 10 additions & 2 deletions csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class TypeVisitor :
IArrowTypeVisitor<Decimal128Type>,
IArrowTypeVisitor<Decimal256Type>,
IArrowTypeVisitor<DictionaryType>,
IArrowTypeVisitor<FixedSizeBinaryType>
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<MapType>
{
private FlatBufferBuilder Builder { get; }

Expand Down Expand Up @@ -218,9 +219,16 @@ public void Visit(FixedSizeBinaryType type)
Flatbuf.FixedSizeBinary.CreateFixedSizeBinary(Builder, type.ByteWidth));
}

public void Visit(MapType type)
{
Result = FieldType.Build(
Flatbuf.Type.Map,
Flatbuf.Map.CreateMap(Builder, type.KeySorted));
}

public void Visit(IArrowType type)
{
throw new NotImplementedException();
throw new NotImplementedException($"Cannot visit type {type}");
}
}

Expand Down
7 changes: 7 additions & 0 deletions csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,13 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c
case Flatbuf.Type.Struct_:
Debug.Assert(childFields != null);
return new Types.StructType(childFields);
case Flatbuf.Type.Map:
if (childFields == null || childFields.Length != 1)
{
throw new InvalidDataException($"Map type must have exactly one struct child.");
}
Flatbuf.Map meta = field.Type<Flatbuf.Map>().Value;
return new Types.MapType(childFields[0], meta.KeysSorted);
default:
throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported.");
}
Expand Down
2 changes: 0 additions & 2 deletions csharp/src/Apache.Arrow/Types/ListType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

using System;

namespace Apache.Arrow.Types
{
public sealed class ListType : NestedType
Expand Down
36 changes: 36 additions & 0 deletions csharp/src/Apache.Arrow/Types/MapType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System.Collections.Generic;

namespace Apache.Arrow.Types
{
public sealed class MapType : NestedType // MapType = ListType(StrucType("key", "value"))
{
public override ArrowTypeId TypeId => ArrowTypeId.Map;
public override string Name => "map";
public readonly bool KeySorted;

public StructType KeyValueType => Fields[0].DataType as StructType;
public Field KeyField => KeyValueType.Fields[0];
public Field ValueField => KeyValueType.Fields[1];

public MapType(IArrowType key, IArrowType value, bool nullable = true, bool keySorted = false)
: this(new Field("key", key, false), new Field("value", value, nullable), keySorted)
{
}

public MapType(Field key, Field value, bool keySorted = false)
: this(new StructType(new List<Field>() { key, value }), keySorted)
{
}

public MapType(StructType entries, bool keySorted = false) : this(new Field("entries", entries, false), keySorted)
{
}

public MapType(Field entries, bool keySorted = false) : base(entries)
{
KeySorted = keySorted;
}

public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
}
}
92 changes: 92 additions & 0 deletions csharp/test/Apache.Arrow.Tests/MapArrayTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections.Generic;
using System.Linq;
using Apache.Arrow.Types;
using Xunit;

namespace Apache.Arrow.Tests
{
public class MapArrayTests
{
[Fact]
public void MapArray_Should_GetTuple()
{
MapType type = new MapType(StringType.Default, Int64Type.Default);
MapArray.Builder builder = new MapArray.Builder(type);
var keyBuilder = builder.KeyBuilder as StringArray.Builder;
var valueBuilder = builder.ValueBuilder as Int64Array.Builder;

Tuple<string, long?> kv0 = Tuple.Create("test", (long?)1);
Tuple<string, long?> kv1 = Tuple.Create("other", (long?)123);
Tuple<string, long?> kv2 = Tuple.Create("kv", (long?)null);

builder.Append();
keyBuilder.Append("test");
valueBuilder.Append(1);

builder.AppendNull();

builder.Append();
keyBuilder.Append("other");
valueBuilder.Append(123);
keyBuilder.Append("kv");
valueBuilder.AppendNull();

MapArray array = builder.Build();

Assert.Equal(new Tuple<string, long?>[] { kv0 }, array.GetTuples<StringArray, string, Int64Array, long?>(0, GetKey, GetValue).ToArray());
Assert.True(array.IsNull(1));
Assert.Equal(new Tuple<string, long?>[] { kv1, kv2 }, array.GetTuples<StringArray, string, Int64Array, long?>(2, GetKey, GetValue).ToArray());
}

[Fact]
public void MapArray_Should_GetKeyValuePairs()
{
MapType type = new MapType(StringType.Default, Int32Type.Default);
MapArray.Builder builder = new MapArray.Builder(type);
var keyBuilder = builder.KeyBuilder as StringArray.Builder;
var valueBuilder = builder.ValueBuilder as Int32Array.Builder;

KeyValuePair<string, int?> kv0 = KeyValuePair.Create("test", (int?)1);
KeyValuePair<string, int?> kv1 = KeyValuePair.Create("other", (int?)123);
KeyValuePair<string, int?> kv2 = KeyValuePair.Create("kv", (int?)null);

builder.Append();
keyBuilder.Append("test");
valueBuilder.Append(1);

builder.AppendNull();

builder.Append();
keyBuilder.Append("other");
valueBuilder.Append(123);
keyBuilder.Append("kv");
valueBuilder.AppendNull();

MapArray array = builder.Build();

Assert.Equal(new KeyValuePair<string, int?>[] { kv0 }, array.GetKeyValuePairs<StringArray, string, Int32Array, int?>(0, GetKey, GetValue).ToArray());
Assert.True(array.IsNull(1));
Assert.Equal(new KeyValuePair<string, int?>[] { kv1, kv2 }, array.GetKeyValuePairs<StringArray, string, Int32Array, int?>(2, GetKey, GetValue).ToArray());
}

private static string GetKey(StringArray array, int index) => array.GetString(index);
private static int? GetValue(Int32Array array, int index) => array.GetValue(index);
private static long? GetValue(Int64Array array, int index) => array.GetValue(index);
}
}
Loading