-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy path_ipc_lib.pyx
340 lines (272 loc) · 11.9 KB
/
_ipc_lib.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# cython: language_level = 3
# cython: linetrace=True
from libc.stdint cimport uint8_t, int64_t, uintptr_t
from libc.errno cimport EIO, EAGAIN
from libc.stdio cimport snprintf
from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF
from cpython cimport Py_buffer, PyBuffer_FillInfo
from nanoarrow_c cimport (
ArrowArrayStream,
ArrowArrayView,
ArrowSchema,
ArrowErrorCode,
ArrowError,
)
from nanoarrow_macros cimport NANOARROW_OK
from nanoarrow._schema cimport CSchema
from nanoarrow._array cimport CArrayView
from nanoarrow._utils cimport Error
cdef extern from "nanoarrow/nanoarrow_ipc.h" nogil:
struct ArrowIpcInputStream:
ArrowErrorCode (*read)(ArrowIpcInputStream* stream, uint8_t* buf,
int64_t buf_size_bytes, int64_t* size_read_out,
ArrowError* error)
void (*release)(ArrowIpcInputStream* stream)
void* private_data
struct ArrowIpcArrayStreamReaderOptions:
int64_t field_index
int use_shared_buffers
ArrowErrorCode ArrowIpcArrayStreamReaderInit(
ArrowArrayStream* out, ArrowIpcInputStream* input_stream,
ArrowIpcArrayStreamReaderOptions* options)
struct ArrowIpcOutputStream:
ArrowErrorCode (*write)(ArrowIpcOutputStream* stream, const void* buf,
int64_t buf_size_bytes, int64_t* size_written_out,
ArrowError* error)
void (*release)(ArrowIpcOutputStream* stream)
void* private_data
struct ArrowIpcWriter:
void* private_data
ArrowErrorCode ArrowIpcWriterInit(ArrowIpcWriter* writer,
ArrowIpcOutputStream* output_stream)
void ArrowIpcWriterReset(ArrowIpcWriter* writer)
ArrowErrorCode ArrowIpcWriterWriteSchema(ArrowIpcWriter* writer,
const ArrowSchema* in_,
ArrowError* error)
ArrowErrorCode ArrowIpcWriterWriteArrayView(ArrowIpcWriter* writer,
const ArrowArrayView* in_,
ArrowError* error)
ArrowErrorCode ArrowIpcWriterWriteArrayStream(ArrowIpcWriter* writer,
ArrowArrayStream* in_,
ArrowError* error)
cdef class PyStreamPrivate:
cdef object _obj
cdef bint _close_obj
cdef void* _addr
cdef Py_ssize_t _size_bytes
cdef bint _buffer_readonly
def __cinit__(self, obj, bint buffer_readonly, bint close_obj=False):
self._obj = obj
self._close_obj = close_obj
self._addr = NULL
self._size_bytes = 0
self._buffer_readonly = buffer_readonly
@property
def obj(self):
return self._obj
@property
def close_obj(self):
return self._close_obj
def set_buffer(self, uintptr_t addr, Py_ssize_t size_bytes):
self._addr = <void*>addr
self._size_bytes = size_bytes
# Needed for at least some implementations of readinto()
def __len__(self):
return self._size_bytes
# Implement the buffer protocol so that this object can be used as
# the argument to xxx.readinto() or xxx.write(). This ensures that
# no extra copies (beyond any buffering done by the upstream file-like object)
# are held since the upstream object has access to the preallocated output buffer.
# In the read case, the preallocation is done by the ArrowArrayStream
# implementation before issuing each read call (two per message, with
# an extra call for a RecordBatch message to get the actual buffer data).
# In the write case, this will be a view of whatever information was provided to
# the write callback.
def __getbuffer__(self, Py_buffer* buffer, int flags):
PyBuffer_FillInfo(buffer, self, self._addr, self._size_bytes, self._buffer_readonly, flags)
def __releasebuffer__(self, Py_buffer* buffer):
pass
cdef ArrowErrorCode py_input_stream_read(ArrowIpcInputStream* stream, uint8_t* buf,
int64_t buf_size_bytes, int64_t* size_read_out,
ArrowError* error) noexcept nogil:
with gil:
stream_private = <object>stream.private_data
stream_private.set_buffer(<uintptr_t>buf, buf_size_bytes)
try:
# Non-blocking streams may return None here, or buffered
# wrappers of them may raise BufferedIOError
read_result = stream_private.obj.readinto(stream_private)
if read_result is None:
size_read_out[0] = 0
return EAGAIN
else:
size_read_out[0] = read_result
return NANOARROW_OK
except Exception as e:
cls = type(e).__name__.encode()
msg = str(e).encode()
snprintf(
error.message,
sizeof(error.message),
"%s: %s",
<const char*>cls,
<const char*>msg
)
return EIO
cdef void py_input_stream_release(ArrowIpcInputStream* stream) noexcept nogil:
with gil:
stream_private = <object>stream.private_data
if stream_private.close_obj:
stream_private.obj.close()
Py_DECREF(stream_private)
stream.private_data = NULL
stream.release = NULL
cdef ArrowErrorCode py_output_stream_write(ArrowIpcOutputStream* stream, const void* buf,
int64_t buf_size_bytes, int64_t* size_written_out,
ArrowError* error) noexcept nogil:
with gil:
stream_private = <object>stream.private_data
stream_private.set_buffer(<uintptr_t>buf, buf_size_bytes)
try:
# Non-blocking streams may return None here, or buffered
# wrappers of them may raise BufferedIOError
write_result = stream_private.obj.write(stream_private)
# Non-blocking streams may return None here
if write_result is None:
size_written_out[0] = 0
return EAGAIN
else:
size_written_out[0] = write_result
return NANOARROW_OK
except Exception as e:
cls = type(e).__name__.encode()
msg = str(e).encode()
snprintf(
error.message,
sizeof(error.message),
"%s: %s",
<const char*>cls,
<const char*>msg
)
return EIO
cdef void py_output_stream_release(ArrowIpcOutputStream* stream) noexcept nogil:
with gil:
stream_private = <object>stream.private_data
if stream_private.close_obj:
stream_private.obj.close()
Py_DECREF(stream_private)
stream.private_data = NULL
stream.release = NULL
cdef class CIpcInputStream:
cdef ArrowIpcInputStream _stream
def __cinit__(self):
self._stream.release = NULL
def is_valid(self):
return self._stream.release != NULL
def __dealloc__(self):
# Duplicating release() to avoid Python API calls in the deallocator
if self._stream.release != NULL:
self._stream.release(&self._stream)
def release(self):
if self._stream.release != NULL:
self._stream.release(&self._stream)
return True
else:
return False
@staticmethod
def from_readable(obj, close_obj=False):
cdef CIpcInputStream stream = CIpcInputStream()
cdef PyStreamPrivate private_data = PyStreamPrivate(
obj,
buffer_readonly=False,
close_obj=close_obj
)
stream._stream.private_data = <PyObject*>private_data
Py_INCREF(private_data)
stream._stream.read = &py_input_stream_read
stream._stream.release = &py_input_stream_release
return stream
def init_array_stream(CIpcInputStream input_stream, uintptr_t out):
cdef ArrowArrayStream* out_ptr = <ArrowArrayStream*>out
# There are some options here that could be exposed at some point
cdef int code = ArrowIpcArrayStreamReaderInit(out_ptr, &input_stream._stream, NULL)
if code != NANOARROW_OK:
raise RuntimeError(f"ArrowIpcArrayStreamReaderInit() failed with code [{code}]")
cdef class CIpcOutputStream:
cdef ArrowIpcOutputStream _stream
def __cinit__(self):
self._stream.release = NULL
def is_valid(self):
return self._stream.release != NULL
def __dealloc__(self):
# Duplicating release() to avoid Python API calls in the deallocator
if self._stream.release != NULL:
self._stream.release(&self._stream)
def release(self):
if self._stream.release != NULL:
self._stream.release(&self._stream)
return True
else:
return False
@staticmethod
def from_writable(obj, close_obj=False):
cdef CIpcOutputStream stream = CIpcOutputStream()
cdef PyStreamPrivate private_data = PyStreamPrivate(
obj,
buffer_readonly=True,
close_obj=close_obj
)
stream._stream.private_data = <PyObject*>private_data
Py_INCREF(private_data)
stream._stream.write = &py_output_stream_write
stream._stream.release = &py_output_stream_release
return stream
cdef class CIpcWriter:
cdef ArrowIpcWriter _writer
def __cinit__(self, CIpcOutputStream stream):
self._writer.private_data = NULL
if not stream.is_valid():
raise ValueError("Can't create writer from released stream")
cdef int code = ArrowIpcWriterInit(&self._writer, &stream._stream)
Error.raise_error_not_ok("ArrowIpcWriterInit()", code)
def is_valid(self):
return self._writer.private_data != NULL
def __dealloc__(self):
if self._writer.private_data != NULL:
ArrowIpcWriterReset(&self._writer)
def release(self):
if self._writer.private_data != NULL:
ArrowIpcWriterReset(&self._writer)
def write_schema(self, CSchema schema):
cdef Error error = Error()
cdef int code = ArrowIpcWriterWriteSchema(&self._writer, schema._ptr, &error.c_error)
error.raise_message_not_ok("ArrowIpcWriterWriteSchema()", code)
def write_array_view(self, CArrayView array_view):
cdef Error error = Error()
cdef int code = ArrowIpcWriterWriteArrayView(&self._writer, array_view._ptr, &error.c_error)
error.raise_message_not_ok("ArrowIpcWriterWriteArrayView()", code)
def write_array_stream(self, uintptr_t stream_addr):
cdef ArrowArrayStream* array_stream = <ArrowArrayStream*>stream_addr
cdef Error error = Error()
cdef int code = ArrowIpcWriterWriteArrayStream(&self._writer, array_stream, &error.c_error)
error.raise_message_not_ok("ArrowIpcWriterWriteArrayStream()", code)
def write_end_of_stream(self):
cdef Error error = Error()
cdef int code = ArrowIpcWriterWriteArrayView(&self._writer, NULL, &error.c_error)
error.raise_message_not_ok("ArrowIpcWriterWriteArrayView()", code)