Skip to content

Commit e77a656

Browse files
authored
feat: Implement LIST_VIEW and LARGE_LIST_VIEW support (#710)
1 parent 24b78e3 commit e77a656

File tree

8 files changed

+759
-6
lines changed

8 files changed

+759
-6
lines changed

src/nanoarrow/common/array.c

+58-2
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
123123
case NANOARROW_TYPE_LARGE_STRING:
124124
case NANOARROW_TYPE_BINARY:
125125
case NANOARROW_TYPE_LARGE_BINARY:
126+
case NANOARROW_TYPE_LIST_VIEW:
127+
case NANOARROW_TYPE_LARGE_LIST_VIEW:
126128
array->n_buffers = 3;
127129
break;
128130

@@ -169,6 +171,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array,
169171
private_data->n_variadic_buffers = 0;
170172
private_data->variadic_buffers = NULL;
171173
private_data->variadic_buffer_sizes = NULL;
174+
private_data->list_view_offset = 0;
172175

173176
array->private_data = private_data;
174177
array->buffers = (const void**)(private_data->buffer_data);
@@ -700,6 +703,8 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length)
700703
continue;
701704
case NANOARROW_BUFFER_TYPE_TYPE_ID:
702705
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
706+
case NANOARROW_BUFFER_TYPE_VIEW_OFFSET:
707+
case NANOARROW_BUFFER_TYPE_SIZE:
703708
array_view->buffer_views[i].size_bytes = element_size_bytes * length;
704709
continue;
705710
case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
@@ -856,12 +861,19 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
856861

857862
min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length);
858863
break;
864+
case NANOARROW_BUFFER_TYPE_SIZE:
865+
min_buffer_size_bytes = element_size_bytes * offset_plus_length;
866+
break;
859867
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
860868
// Probably don't want/need to rely on the producer to have allocated an
861869
// offsets buffer of length 1 for a zero-size array
862870
min_buffer_size_bytes =
863871
(offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1);
864872
break;
873+
case NANOARROW_BUFFER_TYPE_VIEW_OFFSET:
874+
min_buffer_size_bytes =
875+
(offset_plus_length != 0) * element_size_bytes * offset_plus_length;
876+
break;
865877
case NANOARROW_BUFFER_TYPE_DATA:
866878
min_buffer_size_bytes =
867879
_ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] *
@@ -898,6 +910,8 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
898910
case NANOARROW_TYPE_LARGE_LIST:
899911
case NANOARROW_TYPE_FIXED_SIZE_LIST:
900912
case NANOARROW_TYPE_MAP:
913+
case NANOARROW_TYPE_LIST_VIEW:
914+
case NANOARROW_TYPE_LARGE_LIST_VIEW:
901915
if (array_view->n_children != 1) {
902916
ArrowErrorSet(error,
903917
"Expected 1 child of %s array but found %" PRId64 " child arrays",
@@ -1177,10 +1191,11 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view,
11771191

11781192
if (array_view->children[0]->length < last_offset) {
11791193
ArrowErrorSet(error,
1180-
"Expected child of large list array to have length >= %" PRId64
1194+
"Expected child of %s array to have length >= %" PRId64
11811195
" but found array "
11821196
"with length %" PRId64,
1183-
last_offset, array_view->children[0]->length);
1197+
ArrowTypeString(array_view->storage_type), last_offset,
1198+
array_view->children[0]->length);
11841199
return EINVAL;
11851200
}
11861201
}
@@ -1423,6 +1438,47 @@ static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
14231438
}
14241439
}
14251440

1441+
if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW ||
1442+
array_view->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) {
1443+
int64_t child_len = array_view->children[0]->length;
1444+
1445+
struct ArrowBufferView offsets, sizes;
1446+
offsets.data.data = array_view->buffer_views[1].data.data;
1447+
sizes.data.data = array_view->buffer_views[2].data.data;
1448+
1449+
for (int64_t i = array_view->offset; i < array_view->length + array_view->offset;
1450+
i++) {
1451+
int64_t offset, size;
1452+
if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW) {
1453+
offset = offsets.data.as_int32[i];
1454+
size = sizes.data.as_int32[i];
1455+
} else {
1456+
offset = offsets.data.as_int64[i];
1457+
size = sizes.data.as_int64[i];
1458+
}
1459+
1460+
if (offset < 0) {
1461+
ArrowErrorSet(error, "Invalid negative offset %" PRId64 " at index %" PRId64,
1462+
offset, i);
1463+
return EINVAL;
1464+
}
1465+
1466+
if (size < 0) {
1467+
ArrowErrorSet(error, "Invalid negative size %" PRId64 " at index %" PRId64, size,
1468+
i);
1469+
return EINVAL;
1470+
}
1471+
1472+
if ((offset + size) > child_len) {
1473+
ArrowErrorSet(error,
1474+
"Offset: %" PRId64 " + size: %" PRId64 " at index: %" PRId64
1475+
" exceeds length of child view: %" PRId64,
1476+
offset, size, i, child_len);
1477+
return EINVAL;
1478+
}
1479+
}
1480+
}
1481+
14261482
// Recurse for children
14271483
for (int64_t i = 0; i < array_view->n_children; i++) {
14281484
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error));

0 commit comments

Comments
 (0)