@@ -67,15 +67,14 @@ from nanoarrow_c cimport (
67
67
NANOARROW_OK,
68
68
)
69
69
70
-
71
70
from nanoarrow_device_c cimport (
72
71
ARROW_DEVICE_CPU,
73
72
ArrowDeviceType,
74
73
ArrowDeviceArray,
75
74
ArrowDeviceArrayInit,
76
75
)
77
76
78
- from nanoarrow._device cimport Device
77
+ from nanoarrow._device cimport Device, CSharedSyncEvent
79
78
80
79
from nanoarrow._buffer cimport CBuffer, CBufferView
81
80
from nanoarrow._schema cimport CSchema, CLayout
@@ -107,7 +106,7 @@ cdef class CArrayView:
107
106
def __cinit__ (self , object base , uintptr_t addr ):
108
107
self ._base = base
109
108
self ._ptr = < ArrowArrayView* > addr
110
- self ._device = DEVICE_CPU
109
+ self ._event = CSharedSyncEvent( DEVICE_CPU)
111
110
112
111
def _set_array (self , CArray array , Device device = DEVICE_CPU):
113
112
cdef Error error = Error()
@@ -120,7 +119,8 @@ cdef class CArrayView:
120
119
121
120
error.raise_message_not_ok(" ArrowArrayViewSetArray()" , code)
122
121
self ._array_base = array._base
123
- self ._device = device
122
+ self ._event = CSharedSyncEvent(device, < uintptr_t> array._sync_event)
123
+
124
124
return self
125
125
126
126
@property
@@ -160,7 +160,7 @@ cdef class CArrayView:
160
160
self ._ptr.null_count = 0
161
161
elif validity_bits == NULL :
162
162
self ._ptr.null_count = 0
163
- elif self ._device is DEVICE_CPU:
163
+ elif self ._event.device is DEVICE_CPU:
164
164
self ._ptr.null_count = ArrowArrayViewComputeNullCount(self ._ptr)
165
165
166
166
return self ._ptr.null_count
@@ -178,7 +178,8 @@ cdef class CArrayView:
178
178
< uintptr_t> self ._ptr.children[i]
179
179
)
180
180
181
- child._device = self ._device
181
+ child._event = self ._event
182
+
182
183
return child
183
184
184
185
@property
@@ -227,7 +228,7 @@ cdef class CArrayView:
227
228
buffer_view.size_bytes,
228
229
self ._ptr.layout.buffer_data_type[i],
229
230
self ._ptr.layout.element_size_bits[i],
230
- self ._device
231
+ self ._event
231
232
)
232
233
233
234
@property
@@ -239,11 +240,14 @@ cdef class CArrayView:
239
240
def dictionary (self ):
240
241
if self ._ptr.dictionary == NULL :
241
242
return None
242
- else :
243
- return CArrayView(
244
- self ,
245
- < uintptr_t> self ._ptr.dictionary
246
- )
243
+
244
+ cdef CArrayView dictionary = CArrayView(
245
+ self ,
246
+ < uintptr_t> self ._ptr.dictionary
247
+ )
248
+ dictionary._event = self ._event
249
+
250
+ return dictionary
247
251
248
252
def __repr__ (self ):
249
253
return _repr_utils.array_view_repr(self )
@@ -288,11 +292,13 @@ cdef class CArray:
288
292
self ._ptr = < ArrowArray* > addr
289
293
self ._schema = schema
290
294
self ._device_type = ARROW_DEVICE_CPU
291
- self ._device_id = 0
295
+ self ._device_id = - 1
296
+ self ._sync_event = NULL
292
297
293
- cdef _set_device(self , ArrowDeviceType device_type, int64_t device_id):
298
+ cdef _set_device(self , ArrowDeviceType device_type, int64_t device_id, void * sync_event ):
294
299
self ._device_type = device_type
295
300
self ._device_id = device_id
301
+ self ._sync_event = sync_event
296
302
297
303
@staticmethod
298
304
def _import_from_c_capsule (schema_capsule , array_capsule ) -> CArray:
@@ -350,7 +356,8 @@ cdef class CArray:
350
356
c_array_out.offset = c_array_out.offset + start
351
357
c_array_out.length = stop - start
352
358
cdef CArray out = CArray(base, < uintptr_t> c_array_out, self ._schema)
353
- out._set_device(self ._device_type, self ._device_id)
359
+ out._set_device(self ._device_type, self ._device_id, self ._sync_event)
360
+
354
361
return out
355
362
356
363
def __arrow_c_array__ (self , requested_schema = None ):
@@ -466,7 +473,7 @@ cdef class CArray:
466
473
< uintptr_t> self ._ptr.children[i],
467
474
self ._schema.child(i)
468
475
)
469
- out._set_device(self ._device_type, self ._device_id)
476
+ out._set_device(self ._device_type, self ._device_id, self ._sync_event )
470
477
return out
471
478
472
479
@property
@@ -480,7 +487,7 @@ cdef class CArray:
480
487
cdef CArray out
481
488
if self._ptr.dictionary != NULL:
482
489
out = CArray(self , < uintptr_t> self ._ptr.dictionary, self ._schema.dictionary)
483
- out._set_device(self._device_type , self._device_id )
490
+ out._set_device(self._device_type , self._device_id , self. _sync_event )
484
491
return out
485
492
else:
486
493
return None
@@ -497,22 +504,24 @@ cdef class CArrayBuilder:
497
504
"""
498
505
cdef CArray c_array
499
506
cdef ArrowArray* _ptr
507
+ cdef Device _device
500
508
cdef bint _can_validate
501
509
502
- def __cinit__(self , CArray array ):
510
+ def __cinit__(self , CArray array , Device device = DEVICE_CPU ):
503
511
self .c_array = array
504
512
self ._ptr = array._ptr
505
- self ._can_validate = True
513
+ self ._device = device
514
+ self ._can_validate = device is DEVICE_CPU
506
515
507
516
@staticmethod
508
- def allocate ():
517
+ def allocate (Device device = DEVICE_CPU ):
509
518
""" Create a CArrayBuilder
510
519
511
520
Allocates memory for an ArrowArray and populates it with nanoarrow's
512
521
ArrowArray private_data/release callback implementation. This should
513
522
usually be followed by :meth:`init_from_type` or :meth:`init_from_schema`.
514
523
"""
515
- return CArrayBuilder(CArray.allocate(CSchema.allocate()))
524
+ return CArrayBuilder(CArray.allocate(CSchema.allocate()), device )
516
525
517
526
def is_empty (self ) -> bool:
518
527
"""Check if any items have been appended to this builder"""
@@ -550,6 +559,9 @@ cdef class CArrayBuilder:
550
559
Calling this method is required to produce a valid array prior to calling
551
560
:meth:`append_strings` or `append_bytes`.
552
561
"""
562
+ if self._device != DEVICE_CPU:
563
+ raise ValueError("Can't append to non-CPU array")
564
+
553
565
cdef int code = ArrowArrayStartAppending(self ._ptr)
554
566
Error.raise_error_not_ok("ArrowArrayStartAppending()", code )
555
567
return self
@@ -617,7 +629,11 @@ cdef class CArrayBuilder:
617
629
return self
618
630
619
631
def resolve_null_count(self ) -> CArrayBuilder:
620
- """Ensure the output null count is synchronized with existing buffers"""
632
+ """Ensure the output null count is synchronized with existing buffers
633
+
634
+ Note that this will not attempt to access non-CPU buffers such that
635
+ :attr:`null_count` might still be -1 after calling this method.
636
+ """
621
637
self.c_array._assert_valid()
622
638
623
639
# This doesn't apply to unions. We currently don't have a schema view
@@ -636,6 +652,10 @@ cdef class CArrayBuilder:
636
652
self ._ptr.null_count = 0
637
653
return self
638
654
655
+ # Don't attempt to access a non-cpu buffer
656
+ if self ._device != DEVICE_CPU:
657
+ return self
658
+
639
659
# From _ArrowBytesForBits(), which is not included in nanoarrow_c.pxd
640
660
# because it's an internal inline function.
641
661
cdef int64_t bits = self ._ptr.offset + self ._ptr.length
@@ -669,6 +689,14 @@ cdef class CArrayBuilder:
669
689
if i < 0 or i > 3:
670
690
raise IndexError("i must be >= 0 and <= 3")
671
691
692
+ if buffer._device != self._device:
693
+ raise ValueError(
694
+ f"Builder device ({self._device.device_type}/{self._device.device_id})"
695
+ " and buffer device "
696
+ f"({buffer._device.device_type}/{buffer._device.device_id})"
697
+ " are not identical"
698
+ )
699
+
672
700
self.c_array._assert_valid()
673
701
if not move:
674
702
buffer = CBuffer.from_pybuffer(buffer )
@@ -694,6 +722,26 @@ cdef class CArrayBuilder:
694
722
if child._ptr.release != NULL:
695
723
ArrowArrayRelease(child._ptr )
696
724
725
+ if (
726
+ self._device.device_type_id != c_array.device_type_id
727
+ or self._device.device_id != c_array.device_id
728
+ ):
729
+ raise ValueError (
730
+ f" Builder device ({self._device.device_type}/{self._device.device_id})"
731
+ " and child device "
732
+ f" ({c_array.device_type}/{c_array.device_id}) are not identical"
733
+ )
734
+
735
+ # There is probably a way to avoid a full synchronize for each child
736
+ # (e.g., perhaps the ArrayBuilder could allocate a stream to use such
737
+ # that an event can be allocated on finish_device() and synchronization
738
+ # could be avoided entirely). Including this for now for safety.
739
+ cdef CSharedSyncEvent sync = CSharedSyncEvent(
740
+ self ._device,
741
+ < uintptr_t> c_array._sync_event
742
+ )
743
+ sync.synchronize()
744
+
697
745
if not move:
698
746
c_array_shallow_copy(c_array._base, c_array._ptr, child._ptr)
699
747
else :
@@ -747,6 +795,20 @@ cdef class CArrayBuilder:
747
795
748
796
return out
749
797
798
+ def finish_device (self ):
799
+ """ Finish building this array and export to an ArrowDeviceArray
800
+
801
+ Calls :meth:`finish`, propagating device information into an ArrowDeviceArray.
802
+ """
803
+ cdef CArray array = self .finish()
804
+
805
+ cdef ArrowDeviceArray* device_array_ptr
806
+ holder = alloc_c_device_array(& device_array_ptr)
807
+ cdef int code = ArrowDeviceArrayInit(self ._device._ptr, device_array_ptr, array._ptr, NULL )
808
+ Error.raise_error_not_ok(" ArrowDeviceArrayInit" , code)
809
+
810
+ return CDeviceArray(holder, < uintptr_t> device_array_ptr, array._schema)
811
+
750
812
751
813
cdef class CDeviceArray:
752
814
""" Low-level ArrowDeviceArray wrapper
@@ -792,10 +854,8 @@ cdef class CDeviceArray:
792
854
793
855
@property
794
856
def array(self ) -> CArray:
795
- # TODO: We lose access to the sync_event here , so we probably need to
796
- # synchronize (or propagate it , or somehow prevent data access downstream )
797
857
cdef CArray array = CArray(self , < uintptr_t> & self ._ptr.array, self ._schema)
798
- array._set_device(self._ptr.device_type , self._ptr.device_id )
858
+ array._set_device(self._ptr.device_type , self._ptr.device_id , self._ptr. sync_event )
799
859
return array
800
860
801
861
def view(self ) -> CArrayView:
0 commit comments