Skip to content

Commit e52ff0d

Browse files
authored
feat: Add ArrowArrayView accessors to inspect buffer properties (#638)
This PR abstracts accessors for the buffer_view, buffer type, buffer data type, and element bit width for the `ArrowArrayView`. Before adding string/binary view support, this was done by directly accessing the `layout` and `buffer_view` members; however, this required special-casing + some duplicated code in the string view in the R/Python bindings. This PR also removes the dependence on the `ArrowArrayView::array` member, since this member is optional (i.e., the data backing an `ArrowArrayView` need not be related to an actual `ArrowArray`).
1 parent 5b98b3d commit e52ff0d

File tree

13 files changed

+381
-167
lines changed

13 files changed

+381
-167
lines changed

python/src/nanoarrow/_array.pyx

+19-36
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,13 @@ from nanoarrow_c cimport (
4646
ArrowArrayViewComputeNullCount,
4747
ArrowArrayViewInitFromSchema,
4848
ArrowArrayViewIsNull,
49-
ArrowArrayViewGetStringUnsafe,
5049
ArrowArrayViewGetBytesUnsafe,
50+
ArrowArrayViewGetBufferDataType,
51+
ArrowArrayViewGetBufferElementSizeBits,
52+
ArrowArrayViewGetBufferType,
53+
ArrowArrayViewGetBufferView,
54+
ArrowArrayViewGetNumBuffers,
55+
ArrowArrayViewGetStringUnsafe,
5156
ArrowArrayViewSetArray,
5257
ArrowArrayViewSetArrayMinimal,
5358
ArrowBitCountSet,
@@ -62,7 +67,8 @@ from nanoarrow_c cimport (
6267
ArrowValidationLevel,
6368
NANOARROW_BUFFER_TYPE_DATA,
6469
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
65-
NANOARROW_BUFFER_TYPE_DATA_VIEW,
70+
NANOARROW_BUFFER_TYPE_VARIADIC_DATA,
71+
NANOARROW_BUFFER_TYPE_VARIADIC_SIZE,
6672
NANOARROW_BUFFER_TYPE_TYPE_ID,
6773
NANOARROW_BUFFER_TYPE_UNION_OFFSET,
6874
NANOARROW_BUFFER_TYPE_VALIDITY,
@@ -84,7 +90,6 @@ from nanoarrow._device cimport Device, CSharedSyncEvent
8490

8591
from nanoarrow._buffer cimport CBuffer, CBufferView
8692
from nanoarrow._schema cimport CSchema, CLayout
87-
from nanoarrow cimport _types
8893
from nanoarrow._utils cimport (
8994
alloc_c_array,
9095
alloc_c_device_array,
@@ -196,44 +201,20 @@ cdef class CArrayView:
196201

197202
@property
198203
def n_buffers(self):
199-
if _types.is_data_view(self._ptr.storage_type):
200-
return 2 + self._ptr.n_variadic_buffers + 1
201-
202-
return self.layout.n_buffers
204+
return ArrowArrayViewGetNumBuffers(self._ptr)
203205

204206
def _buffer_info(self, int64_t i):
205207
if i < 0 or i >= self.n_buffers:
206208
raise IndexError(f"{i} out of range [0, {self.n_buffers}]")
207209

208-
if (
209-
_types.is_data_view(self._ptr.storage_type)
210-
and i == (2 + self._ptr.n_variadic_buffers)
211-
):
212-
return (
213-
NANOARROW_BUFFER_TYPE_DATA,
214-
_types.INT64,
215-
64,
216-
<uintptr_t>self._ptr.array.buffers[i],
217-
(self._ptr.n_variadic_buffers) * 8
218-
)
219-
elif (
220-
_types.is_data_view(self._ptr.storage_type)
221-
and i >= 2
222-
):
223-
return (
224-
NANOARROW_BUFFER_TYPE_DATA,
225-
_types.STRING if int(self._ptr.storage_type) == _types.STRING_VIEW else _types.BINARY,
226-
0,
227-
<uintptr_t>self._ptr.array.buffers[i],
228-
(<int64_t*>self._ptr.array.buffers[2 + self._ptr.n_variadic_buffers])[i - 2]
229-
)
210+
cdef ArrowBufferView view = ArrowArrayViewGetBufferView(self._ptr, i)
230211

231212
return (
232-
self._ptr.layout.buffer_type[i],
233-
self._ptr.layout.buffer_data_type[i],
234-
self._ptr.layout.element_size_bits[i],
235-
<uintptr_t>self._ptr.buffer_views[i].data.data,
236-
self._ptr.buffer_views[i].size_bytes
213+
ArrowArrayViewGetBufferType(self._ptr, i),
214+
ArrowArrayViewGetBufferDataType(self._ptr, i),
215+
ArrowArrayViewGetBufferElementSizeBits(self._ptr, i),
216+
<uintptr_t>view.data.data,
217+
view.size_bytes
237218
)
238219

239220
def buffer_type(self, int64_t i):
@@ -248,8 +229,10 @@ cdef class CArrayView:
248229
return "data_offset"
249230
elif buffer_type == NANOARROW_BUFFER_TYPE_DATA:
250231
return "data"
251-
elif buffer_type == NANOARROW_BUFFER_TYPE_DATA_VIEW:
252-
return "data_view"
232+
elif buffer_type == NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
233+
return "variadic_data"
234+
elif buffer_type == NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
235+
return "variadic_size"
253236
else:
254237
return "none"
255238

python/tests/test_c_array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def test_c_array_from_iterable_bytes():
337337
na.c_array([buf_2d], na.binary())
338338

339339

340-
def test_c_array_from_iterable__view():
340+
def test_c_array_from_iterable_view():
341341
string = na.c_array(
342342
[b"abc", None, b"a string longer than 12 bytes"], na.binary_view()
343343
)

r/src/array.c

+4-32
Original file line numberDiff line numberDiff line change
@@ -370,38 +370,10 @@ static SEXP borrow_buffer(struct ArrowArrayView* array_view, int64_t i, SEXP she
370370
SEXP buffer_class = PROTECT(Rf_allocVector(STRSXP, 2));
371371
SET_STRING_ELT(buffer_class, 1, Rf_mkChar("nanoarrow_buffer"));
372372

373-
struct ArrowBufferView view;
374-
enum ArrowBufferType buffer_type;
375-
enum ArrowType data_type;
376-
int64_t element_size_bits;
377-
if ((array_view->storage_type == NANOARROW_TYPE_STRING_VIEW ||
378-
array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) &&
379-
i >= NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
380-
view.data.data = array_view->array->buffers[i];
381-
382-
if (i == (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
383-
view.size_bytes = array_view->n_variadic_buffers * sizeof(int64_t);
384-
buffer_type = NANOARROW_BUFFER_TYPE_DATA;
385-
data_type = NANOARROW_TYPE_INT64;
386-
element_size_bits = 64;
387-
} else {
388-
view.size_bytes =
389-
array_view->variadic_buffer_sizes[i - NANOARROW_BINARY_VIEW_FIXED_BUFFERS];
390-
buffer_type = NANOARROW_BUFFER_TYPE_DATA;
391-
392-
if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW) {
393-
data_type = NANOARROW_TYPE_STRING;
394-
} else {
395-
data_type = NANOARROW_TYPE_BINARY;
396-
}
397-
element_size_bits = 0;
398-
}
399-
} else {
400-
view = array_view->buffer_views[i];
401-
buffer_type = array_view->layout.buffer_type[i];
402-
data_type = array_view->layout.buffer_data_type[i];
403-
element_size_bits = array_view->layout.element_size_bits[i];
404-
}
373+
struct ArrowBufferView view = ArrowArrayViewGetBufferView(array_view, i);
374+
enum ArrowBufferType buffer_type = ArrowArrayViewGetBufferType(array_view, i);
375+
enum ArrowType data_type = ArrowArrayViewGetBufferDataType(array_view, i);
376+
int64_t element_size_bits = ArrowArrayViewGetBufferElementSizeBits(array_view, i);
405377

406378
SEXP buffer_xptr =
407379
PROTECT(buffer_borrowed_xptr(view.data.data, view.size_bytes, shelter));

r/src/buffer.c

+5-2
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,11 @@ SEXP nanoarrow_c_buffer_info(SEXP buffer_xptr) {
163163
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
164164
buffer_type_string = "union_offset";
165165
break;
166-
case NANOARROW_BUFFER_TYPE_DATA_VIEW:
167-
buffer_type_string = "data_view";
166+
case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
167+
buffer_type_string = "variadic_data";
168+
break;
169+
case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
170+
buffer_type_string = "variadic_size";
168171
break;
169172
default:
170173
buffer_type_string = "unknown";

r/tests/testthat/_snaps/array.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
$ offset : int 0
1010
$ buffers :List of 3
1111
..$ :<nanoarrow_buffer validity<bool>[null] ``
12-
..$ :<nanoarrow_buffer data_view<string_view>[26][416 b]>`
13-
..$ :<nanoarrow_buffer data<int64>[null] ``
12+
..$ :<nanoarrow_buffer data<string_view>[26][416 b]>`
13+
..$ :<nanoarrow_buffer variadic_size<int64>[null] ``
1414
$ dictionary: NULL
1515
$ children : list()
1616

@@ -25,9 +25,9 @@
2525
$ offset : int 0
2626
$ buffers :List of 4
2727
..$ :<nanoarrow_buffer validity<bool>[null] ``
28-
..$ :<nanoarrow_buffer data_view<string_view>[1][16 b]>`
29-
..$ :<nanoarrow_buffer data<string>[35 b]> `this string is longer than 12 ...`
30-
..$ :<nanoarrow_buffer data<int64>[1][8 b]> `35`
28+
..$ :<nanoarrow_buffer data<string_view>[1][16 b]>`
29+
..$ :<nanoarrow_buffer variadic_data<string>[35 b]> `this string is longer...`
30+
..$ :<nanoarrow_buffer variadic_size<int64>[1][8 b]> `35`
3131
$ dictionary: NULL
3232
$ children : list()
3333

src/nanoarrow/common/array.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -696,11 +696,12 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length)
696696
_ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) /
697697
8;
698698
continue;
699-
case NANOARROW_BUFFER_TYPE_DATA_VIEW:
700699
case NANOARROW_BUFFER_TYPE_TYPE_ID:
701700
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
702701
array_view->buffer_views[i].size_bytes = element_size_bytes * length;
703702
continue;
703+
case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
704+
case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
704705
case NANOARROW_BUFFER_TYPE_NONE:
705706
array_view->buffer_views[i].size_bytes = 0;
706707
continue;
@@ -734,6 +735,7 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
734735
array_view->length = array->length;
735736
array_view->null_count = array->null_count;
736737
array_view->variadic_buffer_sizes = NULL;
738+
array_view->variadic_buffers = NULL;
737739
array_view->n_variadic_buffers = 0;
738740

739741
int64_t buffers_required = 0;
@@ -767,6 +769,7 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
767769
const int32_t nvariadic_buf = (int32_t)(n_buffers - nfixed_buf - 1);
768770
array_view->n_variadic_buffers = nvariadic_buf;
769771
buffers_required += nvariadic_buf + 1;
772+
array_view->variadic_buffers = array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
770773
array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers - 1];
771774
}
772775

@@ -863,9 +866,10 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
863866
break;
864867
case NANOARROW_BUFFER_TYPE_TYPE_ID:
865868
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
866-
case NANOARROW_BUFFER_TYPE_DATA_VIEW:
867869
min_buffer_size_bytes = element_size_bytes * offset_plus_length;
868870
break;
871+
case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
872+
case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
869873
case NANOARROW_BUFFER_TYPE_NONE:
870874
continue;
871875
}

0 commit comments

Comments
 (0)