19
19
# cython: linetrace=True
20
20
21
21
from libc.stdint cimport uint8_t, int64_t, uintptr_t
22
- from libc.errno cimport EIO
22
+ from libc.errno cimport EIO, EAGAIN
23
23
from libc.stdio cimport snprintf
24
24
from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF
25
25
from cpython cimport Py_buffer, PyBuffer_FillInfo
26
26
27
27
from nanoarrow_c cimport (
28
+ ArrowArrayStream,
29
+ ArrowArrayView,
30
+ ArrowSchema,
28
31
ArrowErrorCode,
29
32
ArrowError,
30
33
NANOARROW_OK,
31
- ArrowArrayStream,
32
34
)
33
35
36
+ from nanoarrow._schema cimport CSchema
37
+ from nanoarrow._array cimport CArrayView
38
+ from nanoarrow._utils cimport Error
39
+
34
40
35
41
cdef extern from " nanoarrow_ipc.h" nogil:
36
42
struct ArrowIpcInputStream:
@@ -48,18 +54,43 @@ cdef extern from "nanoarrow_ipc.h" nogil:
48
54
ArrowArrayStream* out, ArrowIpcInputStream* input_stream,
49
55
ArrowIpcArrayStreamReaderOptions* options)
50
56
57
+ struct ArrowIpcOutputStream:
58
+ ArrowErrorCode (* write)(ArrowIpcOutputStream* stream, const void * buf,
59
+ int64_t buf_size_bytes, int64_t* size_written_out,
60
+ ArrowError* error)
61
+ void (* release)(ArrowIpcOutputStream* stream)
62
+ void * private_data
51
63
52
- cdef class PyInputStreamPrivate:
64
+ struct ArrowIpcWriter:
65
+ void * private_data
66
+
67
+ ArrowErrorCode ArrowIpcWriterInit(ArrowIpcWriter* writer,
68
+ ArrowIpcOutputStream* output_stream)
69
+ void ArrowIpcWriterReset(ArrowIpcWriter* writer)
70
+ ArrowErrorCode ArrowIpcWriterWriteSchema(ArrowIpcWriter* writer,
71
+ const ArrowSchema* in_,
72
+ ArrowError* error)
73
+ ArrowErrorCode ArrowIpcWriterWriteArrayView(ArrowIpcWriter* writer,
74
+ const ArrowArrayView* in_,
75
+ ArrowError* error)
76
+
77
+ ArrowErrorCode ArrowIpcWriterWriteArrayStream(ArrowIpcWriter* writer,
78
+ ArrowArrayStream* in_,
79
+ ArrowError* error)
80
+
81
+ cdef class PyStreamPrivate:
53
82
cdef object _obj
54
83
cdef bint _close_obj
55
84
cdef void * _addr
56
85
cdef Py_ssize_t _size_bytes
86
+ cdef bint _buffer_readonly
57
87
58
- def __cinit__ (self , obj , close_obj = False ):
88
+ def __cinit__ (self , obj , bint buffer_readonly , bint close_obj = False ):
59
89
self ._obj = obj
60
90
self ._close_obj = close_obj
61
91
self ._addr = NULL
62
92
self ._size_bytes = 0
93
+ self ._buffer_readonly = buffer_readonly
63
94
64
95
@property
65
96
def obj (self ):
@@ -78,14 +109,16 @@ cdef class PyInputStreamPrivate:
78
109
return self ._size_bytes
79
110
80
111
# Implement the buffer protocol so that this object can be used as
81
- # the argument to xxx.readinto(). This ensures that no extra copies
82
- # (beyond any buffering done by the upstream file-like object) are held
83
- # since the upstream object has access to the preallocated output buffer.
84
- # In this case, the preallocation is done by the ArrowArrayStream
112
+ # the argument to xxx.readinto() or xxx.write(). This ensures that
113
+ # no extra copies (beyond any buffering done by the upstream file-like object)
114
+ # are held since the upstream object has access to the preallocated output buffer.
115
+ # In the read case, the preallocation is done by the ArrowArrayStream
85
116
# implementation before issuing each read call (two per message, with
86
117
# an extra call for a RecordBatch message to get the actual buffer data).
118
+ # In the write case, this will be a view of whatever information was provided to
119
+ # the write callback.
87
120
def __getbuffer__ (self , Py_buffer* buffer , int flags ):
88
- PyBuffer_FillInfo(buffer , self , self ._addr, self ._size_bytes, 0 , flags)
121
+ PyBuffer_FillInfo(buffer , self , self ._addr, self ._size_bytes, self ._buffer_readonly , flags)
89
122
90
123
def __releasebuffer__ (self , Py_buffer* buffer ):
91
124
pass
@@ -100,8 +133,16 @@ cdef ArrowErrorCode py_input_stream_read(ArrowIpcInputStream* stream, uint8_t* b
100
133
stream_private.set_buffer(< uintptr_t> buf, buf_size_bytes)
101
134
102
135
try :
103
- size_read_out[0 ] = stream_private.obj.readinto(stream_private)
104
- return NANOARROW_OK
136
+ # Non-blocking streams may return None here, or buffered
137
+ # wrappers of them may raise BufferedIOError
138
+ read_result = stream_private.obj.readinto(stream_private)
139
+
140
+ if read_result is None :
141
+ size_read_out[0 ] = 0
142
+ return EAGAIN
143
+ else :
144
+ size_read_out[0 ] = read_result
145
+ return NANOARROW_OK
105
146
except Exception as e:
106
147
cls = type (e).__name__.encode()
107
148
msg = str (e).encode()
@@ -126,6 +167,51 @@ cdef void py_input_stream_release(ArrowIpcInputStream* stream) noexcept nogil:
126
167
stream.release = NULL
127
168
128
169
170
+
171
+ cdef ArrowErrorCode py_output_stream_write(ArrowIpcOutputStream* stream, const void * buf,
172
+ int64_t buf_size_bytes, int64_t* size_written_out,
173
+ ArrowError* error) noexcept nogil:
174
+
175
+ with gil:
176
+ stream_private = < object > stream.private_data
177
+ stream_private.set_buffer(< uintptr_t> buf, buf_size_bytes)
178
+
179
+ try :
180
+ # Non-blocking streams may return None here, or buffered
181
+ # wrappers of them may raise BufferedIOError
182
+ write_result = stream_private.obj.write(stream_private)
183
+
184
+ # Non-blocking streams may return None here
185
+ if write_result is None :
186
+ size_written_out[0 ] = 0
187
+ return EAGAIN
188
+ else :
189
+ size_written_out[0 ] = write_result
190
+ return NANOARROW_OK
191
+ except Exception as e:
192
+ cls = type (e).__name__.encode()
193
+ msg = str (e).encode()
194
+ snprintf(
195
+ error.message,
196
+ sizeof(error.message),
197
+ " %s : %s " ,
198
+ < const char * > cls ,
199
+ < const char * > msg
200
+ )
201
+ return EIO
202
+
203
+ cdef void py_output_stream_release(ArrowIpcOutputStream* stream) noexcept nogil:
204
+ with gil:
205
+ stream_private = < object > stream.private_data
206
+ if stream_private.close_obj:
207
+ stream_private.obj.close()
208
+
209
+ Py_DECREF(stream_private)
210
+
211
+ stream.private_data = NULL
212
+ stream.release = NULL
213
+
214
+
129
215
cdef class CIpcInputStream:
130
216
cdef ArrowIpcInputStream _stream
131
217
@@ -150,7 +236,11 @@ cdef class CIpcInputStream:
150
236
@staticmethod
151
237
def from_readable (obj , close_obj = False ):
152
238
cdef CIpcInputStream stream = CIpcInputStream()
153
- cdef PyInputStreamPrivate private_data = PyInputStreamPrivate(obj, close_obj)
239
+ cdef PyStreamPrivate private_data = PyStreamPrivate(
240
+ obj,
241
+ buffer_readonly = False ,
242
+ close_obj = close_obj
243
+ )
154
244
155
245
stream._stream.private_data = < PyObject* > private_data
156
246
Py_INCREF(private_data)
@@ -166,3 +256,84 @@ def init_array_stream(CIpcInputStream input_stream, uintptr_t out):
166
256
cdef int code = ArrowIpcArrayStreamReaderInit(out_ptr, & input_stream._stream, NULL )
167
257
if code != NANOARROW_OK:
168
258
raise RuntimeError (f" ArrowIpcArrayStreamReaderInit() failed with code [{code}]" )
259
+
260
+
261
+ cdef class CIpcOutputStream:
262
+ cdef ArrowIpcOutputStream _stream
263
+
264
+ def __cinit__ (self ):
265
+ self ._stream.release = NULL
266
+
267
+ def is_valid (self ):
268
+ return self ._stream.release != NULL
269
+
270
+ def __dealloc__ (self ):
271
+ # Duplicating release() to avoid Python API calls in the deallocator
272
+ if self ._stream.release != NULL :
273
+ self ._stream.release(& self ._stream)
274
+
275
+ def release (self ):
276
+ if self ._stream.release != NULL :
277
+ self ._stream.release(& self ._stream)
278
+ return True
279
+ else :
280
+ return False
281
+
282
+ @staticmethod
283
+ def from_writable (obj , close_obj = False ):
284
+ cdef CIpcOutputStream stream = CIpcOutputStream()
285
+ cdef PyStreamPrivate private_data = PyStreamPrivate(
286
+ obj,
287
+ buffer_readonly = True ,
288
+ close_obj = close_obj
289
+ )
290
+
291
+ stream._stream.private_data = < PyObject* > private_data
292
+ Py_INCREF(private_data)
293
+ stream._stream.write = & py_output_stream_write
294
+ stream._stream.release = & py_output_stream_release
295
+ return stream
296
+
297
+
298
+ cdef class CIpcWriter:
299
+ cdef ArrowIpcWriter _writer
300
+
301
+ def __cinit__ (self , CIpcOutputStream stream ):
302
+ self ._writer.private_data = NULL
303
+ if not stream.is_valid():
304
+ raise ValueError (" Can't create writer from released stream" )
305
+
306
+ cdef int code = ArrowIpcWriterInit(& self ._writer, & stream._stream)
307
+ Error.raise_error_not_ok(" ArrowIpcWriterInit()" , code)
308
+
309
+ def is_valid (self ):
310
+ return self ._writer.private_data != NULL
311
+
312
+ def __dealloc__ (self ):
313
+ if self ._writer.private_data != NULL :
314
+ ArrowIpcWriterReset(& self ._writer)
315
+
316
+ def release (self ):
317
+ if self ._writer.private_data != NULL :
318
+ ArrowIpcWriterReset(& self ._writer)
319
+
320
+ def write_schema (self , CSchema schema ):
321
+ cdef Error error = Error()
322
+ cdef int code = ArrowIpcWriterWriteSchema(& self ._writer, schema._ptr, & error.c_error)
323
+ error.raise_message_not_ok(" ArrowIpcWriterWriteSchema()" , code)
324
+
325
+ def write_array_view (self , CArrayView array_view ):
326
+ cdef Error error = Error()
327
+ cdef int code = ArrowIpcWriterWriteArrayView(& self ._writer, array_view._ptr, & error.c_error)
328
+ error.raise_message_not_ok(" ArrowIpcWriterWriteArrayView()" , code)
329
+
330
+ def write_array_stream (self , uintptr_t stream_addr ):
331
+ cdef ArrowArrayStream* array_stream = < ArrowArrayStream* > stream_addr
332
+ cdef Error error = Error()
333
+ cdef int code = ArrowIpcWriterWriteArrayStream(& self ._writer, array_stream, & error.c_error)
334
+ error.raise_message_not_ok(" ArrowIpcWriterWriteArrayStream()" , code)
335
+
336
+ def write_end_of_stream (self ):
337
+ cdef Error error = Error()
338
+ cdef int code = ArrowIpcWriterWriteArrayView(& self ._writer, NULL , & error.c_error)
339
+ error.raise_message_not_ok(" ArrowIpcWriterWriteArrayView()" , code)
0 commit comments