@@ -22,10 +22,12 @@ from cpython.pycapsule cimport PyCapsule_GetPointer
22
22
from cpython.unicode cimport PyUnicode_AsUTF8AndSize
23
23
from cpython cimport (
24
24
Py_buffer,
25
- PyObject_GetBuffer,
26
25
PyBuffer_Release,
27
26
PyBUF_ANY_CONTIGUOUS,
28
27
PyBUF_FORMAT,
28
+ PyBytes_FromStringAndSize,
29
+ PyObject_GetBuffer,
30
+ PyUnicode_FromStringAndSize,
29
31
)
30
32
31
33
from nanoarrow_c cimport (
@@ -43,6 +45,9 @@ from nanoarrow_c cimport (
43
45
ArrowArrayView,
44
46
ArrowArrayViewComputeNullCount,
45
47
ArrowArrayViewInitFromSchema,
48
+ ArrowArrayViewIsNull,
49
+ ArrowArrayViewGetStringUnsafe,
50
+ ArrowArrayViewGetBytesUnsafe,
46
51
ArrowArrayViewSetArray,
47
52
ArrowArrayViewSetArrayMinimal,
48
53
ArrowBitCountSet,
@@ -57,6 +62,7 @@ from nanoarrow_c cimport (
57
62
ArrowValidationLevel,
58
63
NANOARROW_BUFFER_TYPE_DATA,
59
64
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
65
+ NANOARROW_BUFFER_TYPE_DATA_VIEW,
60
66
NANOARROW_BUFFER_TYPE_TYPE_ID,
61
67
NANOARROW_BUFFER_TYPE_UNION_OFFSET,
62
68
NANOARROW_BUFFER_TYPE_VALIDITY,
@@ -78,6 +84,7 @@ from nanoarrow._device cimport Device, CSharedSyncEvent
78
84
79
85
from nanoarrow._buffer cimport CBuffer, CBufferView
80
86
from nanoarrow._schema cimport CSchema, CLayout
87
+ from nanoarrow cimport _types
81
88
from nanoarrow._utils cimport (
82
89
alloc_c_array,
83
90
alloc_c_device_array,
@@ -189,13 +196,48 @@ cdef class CArrayView:
189
196
190
197
@property
191
198
def n_buffers (self ):
199
+ if _types.is_data_view(self ._ptr.storage_type):
200
+ return 2 + self ._ptr.n_variadic_buffers + 1
201
+
192
202
return self .layout.n_buffers
193
203
194
- def buffer_type (self , int64_t i ):
204
+ def _buffer_info (self , int64_t i ):
195
205
if i < 0 or i >= self .n_buffers:
196
206
raise IndexError (f" {i} out of range [0, {self.n_buffers}]" )
197
207
198
- buffer_type = self ._ptr.layout.buffer_type[i]
208
+ if (
209
+ _types.is_data_view(self ._ptr.storage_type)
210
+ and i == (2 + self ._ptr.n_variadic_buffers)
211
+ ):
212
+ return (
213
+ NANOARROW_BUFFER_TYPE_DATA,
214
+ _types.INT64,
215
+ 64 ,
216
+ < uintptr_t> self ._ptr.array.buffers[i],
217
+ (self ._ptr.n_variadic_buffers) * 8
218
+ )
219
+ elif (
220
+ _types.is_data_view(self ._ptr.storage_type)
221
+ and i >= 2
222
+ ):
223
+ return (
224
+ NANOARROW_BUFFER_TYPE_DATA,
225
+ _types.STRING if int (self ._ptr.storage_type) == _types.STRING_VIEW else _types.BINARY,
226
+ 0 ,
227
+ < uintptr_t> self ._ptr.array.buffers[i],
228
+ (< int64_t* > self ._ptr.array.buffers[2 + self ._ptr.n_variadic_buffers])[i - 2 ]
229
+ )
230
+
231
+ return (
232
+ self ._ptr.layout.buffer_type[i],
233
+ self ._ptr.layout.buffer_data_type[i],
234
+ self ._ptr.layout.element_size_bits[i],
235
+ < uintptr_t> self ._ptr.buffer_views[i].data.data,
236
+ self ._ptr.buffer_views[i].size_bytes
237
+ )
238
+
239
+ def buffer_type (self , int64_t i ):
240
+ buffer_type = self ._buffer_info(i)[0 ]
199
241
if buffer_type == NANOARROW_BUFFER_TYPE_VALIDITY:
200
242
return " validity"
201
243
elif buffer_type == NANOARROW_BUFFER_TYPE_TYPE_ID:
@@ -206,14 +248,17 @@ cdef class CArrayView:
206
248
return " data_offset"
207
249
elif buffer_type == NANOARROW_BUFFER_TYPE_DATA:
208
250
return " data"
251
+ elif buffer_type == NANOARROW_BUFFER_TYPE_DATA_VIEW:
252
+ return " data_view"
209
253
else :
210
254
return " none"
211
255
212
256
def buffer (self , int64_t i ):
213
- if i < 0 or i >= self .n_buffers:
214
- raise IndexError (f" {i} out of range [0, {self.n_buffers}]" )
257
+ _, data_type, element_size_bits, addr, size = self ._buffer_info(i)
215
258
216
- cdef ArrowBufferView* buffer_view = & (self ._ptr.buffer_views[i])
259
+ cdef ArrowBufferView buffer_view
260
+ buffer_view.data.data = < void * > addr
261
+ buffer_view.size_bytes = size
217
262
218
263
# Check the buffer size here because the error later is cryptic.
219
264
# Buffer sizes are set to -1 when they are "unknown", so because of errors
@@ -224,10 +269,10 @@ cdef class CArrayView:
224
269
225
270
return CBufferView(
226
271
self ._array_base,
227
- < uintptr_t > buffer_view.data.data ,
228
- buffer_view.size_bytes ,
229
- self ._ptr.layout.buffer_data_type[i] ,
230
- self ._ptr.layout. element_size_bits[i] ,
272
+ addr ,
273
+ size ,
274
+ data_type ,
275
+ element_size_bits,
231
276
self ._event
232
277
)
233
278
@@ -249,6 +294,24 @@ cdef class CArrayView:
249
294
250
295
return dictionary
251
296
297
+ def _iter_bytes (self , int64_t offset , int64_t length ) -> bytes | None:
298
+ cdef ArrowBufferView item_view
299
+ for i in range(offset , length ):
300
+ if ArrowArrayViewIsNull(self ._ptr, i):
301
+ yield None
302
+ else :
303
+ item_view = ArrowArrayViewGetBytesUnsafe(self ._ptr, i)
304
+ yield PyBytes_FromStringAndSize(item_view.data.as_char, item_view.size_bytes)
305
+
306
+ def _iter_str (self , int64_t offset , int64_t length ) -> str | None:
307
+ cdef ArrowStringView item_view
308
+ for i in range(offset , length ):
309
+ if ArrowArrayViewIsNull(self ._ptr, i):
310
+ yield None
311
+ else :
312
+ item_view = ArrowArrayViewGetStringUnsafe(self ._ptr, i)
313
+ yield PyUnicode_FromStringAndSize(item_view.data, item_view.size_bytes)
314
+
252
315
def __repr__ (self ):
253
316
return _repr_utils.array_view_repr(self )
254
317
0 commit comments