Skip to content

Commit a416b6b

Browse files
authored
Merge pull request #198 from lincc-frameworks/list-lengths
List lengths property for extension array and accessor
2 parents d09e044 + 63f5524 commit a416b6b

File tree

4 files changed

+51
-3
lines changed

4 files changed

+51
-3
lines changed

src/nested_pandas/series/accessor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ def to_flat(self, fields: list[str] | None = None) -> pd.DataFrame:
114114

115115
return pd.DataFrame(flat_series)
116116

117+
@property
118+
def list_lengths(self) -> list[int]:
119+
"""Lengths of the list arrays"""
120+
return self._series.array.list_lengths
121+
117122
@property
118123
def flat_length(self) -> int:
119124
"""Length of the flat arrays"""

src/nested_pandas/series/ext_array.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -761,12 +761,24 @@ def field_names(self) -> list[str]:
761761
"""Names of the nested columns"""
762762
return [field.name for field in self._chunked_array.chunk(0).type]
763763

764+
def _iter_list_lengths(self) -> Generator[int, None, None]:
765+
"""Iterate over the lengths of the list arrays"""
766+
for chunk in self._chunked_array.iterchunks():
767+
for length in chunk.field(0).value_lengths():
768+
if length.is_valid:
769+
yield length.as_py()
770+
else:
771+
yield 0
772+
773+
@property
774+
def list_lengths(self) -> list[int]:
775+
"""Lengths of the list arrays"""
776+
return list(self._iter_list_lengths())
777+
764778
@property
765779
def flat_length(self) -> int:
766780
"""Length of the flat arrays"""
767-
return sum(
768-
chunk.field(0).value_lengths().sum().as_py() or 0 for chunk in self._chunked_array.iterchunks()
769-
)
781+
return sum(self._iter_list_lengths())
770782

771783
@property
772784
def num_chunks(self) -> int:

tests/nested_pandas/series/test_accessor.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,19 @@ def test_fields():
286286
assert_array_equal(series.nest.fields, ["a", "b"])
287287

288288

289+
def test_list_lengths():
290+
"""Test that the .nest.list_lengths attribute works."""
291+
series = pack_seq(
292+
[
293+
pd.DataFrame({"a": [1, 2, 3], "b": [1.0, 5.0, 6.0], "c": ["a", "b", "c"]}),
294+
None,
295+
pd.DataFrame({"a": [1, 2], "b": [None, 0.0], "c": ["a", "b"]}),
296+
]
297+
)
298+
assert series.shape == (3,)
299+
assert series.nest.list_lengths == [3, 0, 2]
300+
301+
289302
def test_flat_length():
290303
"""Test that the .nest.flat_length attribute works."""
291304
struct_array = pa.StructArray.from_arrays(

tests/nested_pandas/series/test_ext_array.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,24 @@ def test_field_names():
12261226
assert ext_array.field_names == ["a", "b"]
12271227

12281228

1229+
def test_list_lengths():
1230+
"""Tests that the list lengths of the extension array are correct."""
1231+
struct_array = pa.StructArray.from_arrays(
1232+
arrays=[
1233+
pa.array([np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, 1.0, 2.0])]),
1234+
pa.array([-np.array([4.0, 5.0, 6.0]), -np.array([3.0, 4.0, 5.0, 6.0])]),
1235+
],
1236+
names=["a", "b"],
1237+
)
1238+
empty_struct_array = pa.array([], type=struct_array.type)
1239+
null_struct_array = pa.array([None], type=struct_array.type)
1240+
ext_array = NestedExtensionArray(
1241+
pa.chunked_array([struct_array, empty_struct_array, struct_array, null_struct_array])
1242+
)
1243+
1244+
assert ext_array.list_lengths == [3, 4, 3, 4, 0]
1245+
1246+
12291247
def test_flat_length():
12301248
"""Tests that the flat length of the extension array is correct."""
12311249
struct_array = pa.StructArray.from_arrays(

0 commit comments

Comments
 (0)