Skip to content

Commit

Permalink
GH-39341: [C#] Support Utf8View, BinaryView and ListView (#39342)
Browse files Browse the repository at this point in the history
### What changes are included in this PR?

Support for reading, writing and representing Utf8View, BinaryView and ListView.

### Are these changes tested?

Yes

### Are there any user-facing changes?

New classes and APIs for Utf8View, BinaryView and ListView.

* Closes: #39341

Authored-by: Curt Hagenlocher <[email protected]>
Signed-off-by: Curt Hagenlocher <[email protected]>
  • Loading branch information
CurtHagenlocher authored Dec 27, 2023
1 parent ae627c0 commit 9e33d12
Show file tree
Hide file tree
Showing 42 changed files with 2,017 additions and 149 deletions.
102 changes: 99 additions & 3 deletions csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

using Apache.Arrow.Memory;
using Apache.Arrow.Scalars;
using Apache.Arrow.Types;
using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -46,8 +47,11 @@ private class ArrayDataConcatenationVisitor :
IArrowTypeVisitor<BooleanType>,
IArrowTypeVisitor<FixedWidthType>,
IArrowTypeVisitor<BinaryType>,
IArrowTypeVisitor<BinaryViewType>,
IArrowTypeVisitor<StringType>,
IArrowTypeVisitor<StringViewType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<ListViewType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
IArrowTypeVisitor<UnionType>,
Expand Down Expand Up @@ -84,17 +88,50 @@ public void Visit(FixedWidthType type)
{
CheckData(type, 2);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(type);
ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(1, type);

Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer });
}

public void Visit(BinaryType type) => ConcatenateVariableBinaryArrayData(type);

public void Visit(BinaryViewType type) => ConcatenateBinaryViewArrayData(type);

public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type);

public void Visit(StringViewType type) => ConcatenateBinaryViewArrayData(type);

public void Visit(ListType type) => ConcatenateLists(type);

public void Visit(ListViewType type)
{
CheckData(type, 3);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();

var offsetsBuilder = new ArrowBuffer.Builder<int>(_totalLength);
int baseOffset = 0;

foreach (ArrayData arrayData in _arrayDataList)
{
if (arrayData.Length > 0)
{
ReadOnlySpan<int> span = arrayData.Buffers[1].Span.CastTo<int>().Slice(0, arrayData.Length);
foreach (int offset in span)
{
offsetsBuilder.Append(baseOffset + offset);
}
}

baseOffset += arrayData.Children[0].Length;
}

ArrowBuffer offsetBuffer = offsetsBuilder.Build(_allocator);
ArrowBuffer sizesBuffer = ConcatenateFixedWidthTypeValueBuffer(2, Int32Type.Default);
ArrayData child = Concatenate(SelectChildren(0), _allocator);

Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, sizesBuffer }, new[] { child });
}

public void Visit(FixedSizeListType type)
{
CheckData(type, 1);
Expand Down Expand Up @@ -161,6 +198,15 @@ private void CheckData(IArrowType type, int expectedBufferCount)
}
}

private void CheckDataVariadicCount(IArrowType type, int expectedBufferCount)
{
foreach (ArrayData arrayData in _arrayDataList)
{
arrayData.EnsureDataType(type.TypeId);
arrayData.EnsureVariadicBufferCount(expectedBufferCount);
}
}

private void ConcatenateVariableBinaryArrayData(IArrowType type)
{
CheckData(type, 3);
Expand All @@ -171,6 +217,26 @@ private void ConcatenateVariableBinaryArrayData(IArrowType type)
Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer });
}

private void ConcatenateBinaryViewArrayData(IArrowType type)
{
CheckDataVariadicCount(type, 2);
ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
ArrowBuffer viewBuffer = ConcatenateViewBuffer(out int variadicBufferCount);
ArrowBuffer[] buffers = new ArrowBuffer[2 + variadicBufferCount];
buffers[0] = validityBuffer;
buffers[1] = viewBuffer;
int index = 2;
foreach (ArrayData arrayData in _arrayDataList)
{
for (int i = 2; i < arrayData.Buffers.Length; i++)
{
buffers[index++] = arrayData.Buffers[i];
}
}

Result = new ArrayData(type, _totalLength, _totalNullCount, 0, buffers);
}

private void ConcatenateLists(NestedType type)
{
CheckData(type, 2);
Expand Down Expand Up @@ -206,7 +272,7 @@ private ArrowBuffer ConcatenateBitmapBuffer(int bufferIndex)
return builder.Build(_allocator);
}

private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type)
private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(int bufferIndex, FixedWidthType type)
{
int typeByteWidth = type.BitWidth / 8;
var builder = new ArrowBuffer.Builder<byte>(_totalLength * typeByteWidth);
Expand All @@ -216,7 +282,7 @@ private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type)
int length = arrayData.Length;
int byteLength = length * typeByteWidth;

builder.Append(arrayData.Buffers[1].Span.Slice(0, byteLength));
builder.Append(arrayData.Buffers[bufferIndex].Span.Slice(0, byteLength));
}

return builder.Build(_allocator);
Expand Down Expand Up @@ -265,6 +331,36 @@ private ArrowBuffer ConcatenateOffsetBuffer()
return builder.Build(_allocator);
}

private ArrowBuffer ConcatenateViewBuffer(out int variadicBufferCount)
{
var builder = new ArrowBuffer.Builder<BinaryView>(_totalLength);
variadicBufferCount = 0;
foreach (ArrayData arrayData in _arrayDataList)
{
if (arrayData.Length == 0)
{
continue;
}

ReadOnlySpan<BinaryView> span = arrayData.Buffers[1].Span.CastTo<BinaryView>().Slice(0, arrayData.Length);
foreach (BinaryView view in span)
{
if (view.Length > BinaryView.MaxInlineLength)
{
builder.Append(view.AdjustBufferIndex(variadicBufferCount));
}
else
{
builder.Append(view);
}
}

variadicBufferCount += (arrayData.Buffers.Length - 2);
}

return builder.Build(_allocator);
}

private ArrowBuffer ConcatenateUnionTypeBuffer()
{
var builder = new ArrowBuffer.Builder<byte>(_totalLength);
Expand Down
6 changes: 6 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@ internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>>
return new DoubleArray.Builder();
case ArrowTypeId.String:
return new StringArray.Builder();
case ArrowTypeId.StringView:
return new StringViewArray.Builder();
case ArrowTypeId.Binary:
return new BinaryArray.Builder();
case ArrowTypeId.BinaryView:
return new BinaryViewArray.Builder();
case ArrowTypeId.Timestamp:
return new TimestampArray.Builder();
case ArrowTypeId.Date64:
Expand All @@ -70,6 +74,8 @@ internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>>
return new DurationArray.Builder(dataType as DurationType);
case ArrowTypeId.List:
return new ListArray.Builder(dataType as ListType);
case ArrowTypeId.ListView:
return new ListViewArray.Builder(dataType as ListViewType);
case ArrowTypeId.FixedSizeList:
return new FixedSizeListArray.Builder(dataType as FixedSizeListType);
case ArrowTypeId.Decimal128:
Expand Down
6 changes: 6 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,20 @@ public static IArrowArray BuildArray(ArrayData data)
return new DoubleArray(data);
case ArrowTypeId.String:
return new StringArray(data);
case ArrowTypeId.StringView:
return new StringViewArray(data);
case ArrowTypeId.FixedSizedBinary:
return new FixedSizeBinaryArray(data);
case ArrowTypeId.Binary:
return new BinaryArray(data);
case ArrowTypeId.BinaryView:
return new BinaryViewArray(data);
case ArrowTypeId.Timestamp:
return new TimestampArray(data);
case ArrowTypeId.List:
return new ListArray(data);
case ArrowTypeId.ListView:
return new ListViewArray(data);
case ArrowTypeId.Map:
return new MapArray(data);
case ArrowTypeId.Struct:
Expand Down
Loading

0 comments on commit 9e33d12

Please sign in to comment.