Skip to content

Commit 499d865

Browse files
authored
chore(python): Fix type stubs for updated module structure (#606)
This PR updates the type stubs for the Cython modules to reflect the updated structure. I don't think this mechanism for generating stubs is the best long-term strategy (requires remembering to do so!)...we could also do something like omit the generated stubs from the source tree and generate them only before packaging; however, this would make them not useful for local development (which is one of the primary short-term benefits of including them).
1 parent 3488ff1 commit 499d865

11 files changed

+1067
-22
lines changed

.github/workflows/python.yaml

+6-9
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,12 @@ jobs:
6464
- name: Check type stubs
6565
if: success() && matrix.python-version == '3.12'
6666
run: |
67-
pip install mypy "black==22.3.0"
68-
python/generate_type_stubs.sh
69-
70-
if git diff --name-only | grep -e "\\.pxi$"; then
71-
echo "Type stubs were changed. Update them with python/generate_type_stubs.sh."
72-
fi
73-
74-
stubtest nanoarrow._lib
75-
stubtest nanoarrow._ipc_lib
67+
pip install mypy
68+
cd src/nanoarrow
69+
for mod in $(find . -name "*.pyx" | sed -e "s|./||" -e "s|.pyx||"); do
70+
cat $mod
71+
stubtest "nanoarrow.$mod"
72+
done
7673
7774
- name: Run doctests
7875
if: success() && matrix.python-version == '3.12'

python/generate_type_stubs.sh

+13-13
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,7 @@ SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
2323

2424
pushd "${SOURCE_DIR}"
2525

26-
# Generate stubs using mypy
27-
stubgen --module nanoarrow._lib --include-docstrings -o build/tmp
28-
stubgen --module nanoarrow._ipc_lib --include-docstrings -o build/tmp
29-
30-
# Add license to the start of the files
26+
# We'll add the license to the start of the files
3127
LICENSE='
3228
# Licensed to the Apache Software Foundation (ASF) under one
3329
# or more contributor license agreements. See the NOTICE file
@@ -47,13 +43,17 @@ LICENSE='
4743
# under the License.
4844
'
4945

50-
echo "$LICENSE" > src/nanoarrow/_lib.pyi
51-
cat build/tmp/nanoarrow/_lib.pyi >> src/nanoarrow/_lib.pyi
52-
53-
echo "$LICENSE" > src/nanoarrow/_ipc_lib.pyi
54-
cat build/tmp/nanoarrow/_ipc_lib.pyi >> src/nanoarrow/_ipc_lib.pyi
55-
56-
# Reformat stubs
57-
black src/nanoarrow/*.pyi
46+
# Remove old stubs
47+
find src/nanoarrow -name "*.pyi" -delete
48+
49+
# Generate new ones
50+
pushd src/nanoarrow
51+
for mod in $(find . -name "*.pyx" | sed -e "s|./||" -e "s|.pyx||"); do
52+
stubgen --module "nanoarrow.${mod}" --include-docstrings -o ../../build/tmp
53+
echo "$LICENSE" > "${mod}.pyi"
54+
cat "../../build/tmp/nanoarrow/${mod}.pyi" >> "${mod}.pyi"
55+
black "${mod}.pyi"
56+
done
57+
popd
5858

5959
popd

python/src/nanoarrow/_array.pyi

+187
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import _cython_3_0_11
19+
import nanoarrow._device
20+
from _typeshed import Incomplete
21+
from nanoarrow._device import DeviceType as DeviceType
22+
from typing import ClassVar
23+
24+
DEVICE_CPU: nanoarrow._device.Device
25+
__reduce_cython__: _cython_3_0_11.cython_function_or_method
26+
__setstate_cython__: _cython_3_0_11.cython_function_or_method
27+
__test__: dict
28+
29+
class CArray:
30+
__pyx_vtable__: ClassVar[PyCapsule] = ...
31+
buffers: Incomplete
32+
children: Incomplete
33+
device_id: Incomplete
34+
device_type: Incomplete
35+
device_type_id: Incomplete
36+
dictionary: Incomplete
37+
length: Incomplete
38+
n_buffers: Incomplete
39+
n_children: Incomplete
40+
null_count: Incomplete
41+
offset: Incomplete
42+
schema: Incomplete
43+
@classmethod
44+
def __init__(cls, *args, **kwargs) -> None:
45+
"""Create and return a new object. See help(type) for accurate signature."""
46+
@staticmethod
47+
def allocate(*args, **kwargs):
48+
"""Allocate a released ArrowArray"""
49+
def child(self, *args, **kwargs): ...
50+
def is_valid(self, *args, **kwargs):
51+
"""Check for a non-null and non-released underlying ArrowArray"""
52+
def view(self, *args, **kwargs):
53+
"""Allocate a :class:`CArrayView` to access the buffers of this array"""
54+
def __arrow_c_array__(self, *args, **kwargs):
55+
"""
56+
Get a pair of PyCapsules containing a C ArrowArray representation of the object.
57+
58+
Parameters
59+
----------
60+
requested_schema : PyCapsule | None
61+
A PyCapsule containing a C ArrowSchema representation of a requested
62+
schema. Not supported.
63+
64+
Returns
65+
-------
66+
Tuple[PyCapsule, PyCapsule]
67+
A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
68+
respectively.
69+
"""
70+
def __getitem__(self, index):
71+
"""Return self[key]."""
72+
def __len__(self) -> int:
73+
"""Return len(self)."""
74+
def __reduce__(self): ...
75+
76+
class CArrayBuilder:
77+
@classmethod
78+
def __init__(cls, *args, **kwargs) -> None:
79+
"""Create and return a new object. See help(type) for accurate signature."""
80+
@staticmethod
81+
def allocate(*args, **kwargs):
82+
"""Create a CArrayBuilder
83+
84+
Allocates memory for an ArrowArray and populates it with nanoarrow's
85+
ArrowArray private_data/release callback implementation. This should
86+
usually be followed by :meth:`init_from_type` or :meth:`init_from_schema`.
87+
"""
88+
def append_bytes(self, *args, **kwargs): ...
89+
def append_strings(self, *args, **kwargs): ...
90+
def finish(self, *args, **kwargs):
91+
"""Finish building this array
92+
93+
Performs any steps required to return a valid ArrowArray and optionally
94+
validates the output to ensure that the result is valid (given the information
95+
the array has available to it).
96+
97+
Parameters
98+
----------
99+
validation_level : None, "full", "default", "minimal", or "none", optional
100+
Explicitly define a validation level or use None to perform default
101+
validation if possible. Validation may not be possible if children
102+
were set that were not created by nanoarrow.
103+
"""
104+
def finish_device(self, *args, **kwargs):
105+
"""Finish building this array and export to an ArrowDeviceArray
106+
107+
Calls :meth:`finish`, propagating device information into an ArrowDeviceArray.
108+
"""
109+
def init_from_schema(self, *args, **kwargs): ...
110+
def init_from_type(self, *args, **kwargs): ...
111+
def is_empty(self, *args, **kwargs):
112+
"""Check if any items have been appended to this builder"""
113+
def resolve_null_count(self, *args, **kwargs):
114+
"""Ensure the output null count is synchronized with existing buffers
115+
116+
Note that this will not attempt to access non-CPU buffers such that
117+
:attr:`null_count` might still be -1 after calling this method.
118+
"""
119+
def set_buffer(self, *args, **kwargs):
120+
"""Set an ArrowArray buffer
121+
122+
Sets a buffer of this ArrowArray such the pointer at array->buffers[i] is
123+
equal to buffer->data and such that the buffer's lifcycle is managed by
124+
the array. If move is True, the input Python object that previously wrapped
125+
the ArrowBuffer will be invalidated, which is usually the desired behaviour
126+
if you built or imported a buffer specifically to build this array. If move
127+
is False (the default), this function will a make a shallow copy via another
128+
layer of Python object wrapping.
129+
"""
130+
def set_child(self, *args, **kwargs):
131+
"""Set an ArrowArray child
132+
133+
Set a child of this array by performing a show copy or optionally
134+
transferring ownership to this object. The initialized child array
135+
must have been initialized before this call by initializing this
136+
builder with a schema containing the correct number of children.
137+
"""
138+
def set_length(self, *args, **kwargs): ...
139+
def set_null_count(self, *args, **kwargs): ...
140+
def set_offset(self, *args, **kwargs): ...
141+
def start_appending(self, *args, **kwargs):
142+
"""Use append mode for building this ArrowArray
143+
144+
Calling this method is required to produce a valid array prior to calling
145+
:meth:`append_strings` or `append_bytes`.
146+
"""
147+
def __reduce__(self): ...
148+
149+
class CArrayView:
150+
buffers: Incomplete
151+
children: Incomplete
152+
dictionary: Incomplete
153+
layout: Incomplete
154+
length: Incomplete
155+
n_buffers: Incomplete
156+
n_children: Incomplete
157+
null_count: Incomplete
158+
offset: Incomplete
159+
storage_type: Incomplete
160+
storage_type_id: Incomplete
161+
@classmethod
162+
def __init__(cls, *args, **kwargs) -> None:
163+
"""Create and return a new object. See help(type) for accurate signature."""
164+
def buffer(self, *args, **kwargs): ...
165+
def buffer_type(self, *args, **kwargs): ...
166+
def child(self, *args, **kwargs): ...
167+
@staticmethod
168+
def from_array(*args, **kwargs): ...
169+
@staticmethod
170+
def from_schema(*args, **kwargs): ...
171+
def __len__(self) -> int:
172+
"""Return len(self)."""
173+
def __reduce__(self): ...
174+
175+
class CDeviceArray:
176+
array: Incomplete
177+
device_id: Incomplete
178+
device_type: Incomplete
179+
device_type_id: Incomplete
180+
schema: Incomplete
181+
@classmethod
182+
def __init__(cls, *args, **kwargs) -> None:
183+
"""Create and return a new object. See help(type) for accurate signature."""
184+
def view(self, *args, **kwargs): ...
185+
def __arrow_c_array__(self, *args, **kwargs): ...
186+
def __arrow_c_device_array__(self, *args, **kwargs): ...
187+
def __reduce__(self): ...
+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import _cython_3_0_11
19+
import types
20+
from _typeshed import Incomplete
21+
from typing import ClassVar
22+
23+
__reduce_cython__: _cython_3_0_11.cython_function_or_method
24+
__setstate_cython__: _cython_3_0_11.cython_function_or_method
25+
__test__: dict
26+
27+
class CArrayStream:
28+
@classmethod
29+
def __init__(cls, *args, **kwargs) -> None:
30+
"""Create and return a new object. See help(type) for accurate signature."""
31+
@staticmethod
32+
def allocate(*args, **kwargs):
33+
"""Allocate a released ArrowArrayStream"""
34+
@staticmethod
35+
def from_c_arrays(*args, **kwargs):
36+
"""Create an ArrowArrayStream from an existing set of arrays
37+
38+
Given a previously resolved list of arrays, create an ArrowArrayStream
39+
representation of the sequence of chunks.
40+
41+
Parameters
42+
----------
43+
arrays : List[CArray]
44+
A list of arrays to use as batches.
45+
schema : CSchema
46+
The schema that will be returned. Must be type equal with the schema
47+
of each array (this is checked if validate is ``True``)
48+
move : bool, optional
49+
If True, transfer ownership from each array instead of creating a
50+
shallow copy. This is only safe if the caller knows the origin of the
51+
arrays and knows that they will not be accessed after this stream has been
52+
created.
53+
validate : bool, optional
54+
If True, enforce type equality between the provided schema and the schema
55+
of each array.
56+
"""
57+
def get_next(self, *args, **kwargs):
58+
"""Get the next Array from this stream
59+
60+
Raises StopIteration when there are no more arrays in this stream.
61+
"""
62+
def get_schema(self, *args, **kwargs):
63+
"""Get the schema associated with this stream
64+
65+
Calling this method will always issue a call to the underlying stream's
66+
get_schema callback.
67+
"""
68+
def is_valid(self, *args, **kwargs):
69+
"""Check for a non-null and non-released underlying ArrowArrayStream"""
70+
def release(self, *args, **kwargs):
71+
"""Explicitly call the release callback of this stream"""
72+
def __arrow_c_stream__(self, *args, **kwargs):
73+
"""
74+
Export the stream as an Arrow C stream PyCapsule.
75+
76+
Parameters
77+
----------
78+
requested_schema : PyCapsule | None
79+
A PyCapsule containing a C ArrowSchema representation of a requested
80+
schema. Not supported.
81+
82+
Returns
83+
-------
84+
PyCapsule
85+
"""
86+
def __enter__(self): ...
87+
def __exit__(
88+
self,
89+
type: type[BaseException] | None,
90+
value: BaseException | None,
91+
traceback: types.TracebackType | None,
92+
): ...
93+
def __iter__(self):
94+
"""Implement iter(self)."""
95+
def __next__(self): ...
96+
def __reduce__(self): ...
97+
98+
class CMaterializedArrayStream:
99+
__pyx_vtable__: ClassVar[PyCapsule] = ...
100+
arrays: Incomplete
101+
n_arrays: Incomplete
102+
schema: Incomplete
103+
@classmethod
104+
def __init__(cls, *args, **kwargs) -> None:
105+
"""Create and return a new object. See help(type) for accurate signature."""
106+
def array(self, *args, **kwargs): ...
107+
def child(self, *args, **kwargs): ...
108+
@staticmethod
109+
def from_c_array(*args, **kwargs):
110+
""" "Create a materialized array stream from a single array"""
111+
@staticmethod
112+
def from_c_array_stream(*args, **kwargs):
113+
""" "Create a materialized array stream from an unmaterialized ArrowArrayStream"""
114+
@staticmethod
115+
def from_c_arrays(*args, **kwargs):
116+
""" "Create a materialized array stream from an existing iterable of arrays
117+
118+
This is slightly more efficient than creating a stream and then consuming it
119+
because the implementation can avoid a shallow copy of each array.
120+
"""
121+
def __arrow_c_stream__(self, *args, **kwargs): ...
122+
def __getitem__(self, index):
123+
"""Return self[key]."""
124+
def __iter__(self):
125+
"""Implement iter(self)."""
126+
def __len__(self) -> int:
127+
"""Return len(self)."""
128+
def __reduce__(self): ...

python/src/nanoarrow/_array_stream.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ cdef class CArrayStream:
184184
return array_stream_capsule
185185

186186
def _addr(self) -> int:
187+
"""test to see if this causes a ci fail"""
187188
return <uintptr_t>self._ptr
188189

189190
def is_valid(self) -> bool:

0 commit comments

Comments
 (0)