Skip to content

Commit 6e2f433

Browse files
authored
Merge pull request #136 from lincc-frameworks/from_lists
add pack_lists class function
2 parents 975fbc8 + b087283 commit 6e2f433

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

src/nested_pandas/nestedframe/core.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,69 @@ def from_flat(cls, df, base_columns, nested_columns=None, index=None, name="nest
214214
nested_columns = [col for col in df.columns if col not in base_columns]
215215
return out_df.add_nested(df[nested_columns], name=name)
216216

217+
@classmethod
218+
def from_lists(cls, df, base_columns=None, list_columns=None, name="nested"):
219+
"""Creates a NestedFrame with base and nested columns from a flat
220+
dataframe.
221+
222+
Parameters
223+
----------
224+
df: pd.DataFrame or NestedFrame
225+
A dataframe with list columns.
226+
base_columns: list-like, or None
227+
Any columns that have non-list values in the input df. These will
228+
simply be kept as identical columns in the result
229+
list_columns: list-like, or None
230+
The list-value columns that should be packed into a nested column.
231+
All columns in the list will attempt to be packed into a single
232+
nested column with the name provided in `nested_name`. If None, is
233+
defined as all columns not in `base_columns`.
234+
name:
235+
The name of the output column the `nested_columns` are packed into.
236+
237+
Returns
238+
-------
239+
NestedFrame
240+
A NestedFrame with the specified nesting structure.
241+
242+
Examples
243+
--------
244+
245+
>>> nf = NestedFrame({"c":[1,2,3], "d":[2,4,6],
246+
... "e":[[1,2,3], [4,5,6], [7,8,9]]},
247+
... index=[0,1,2])
248+
249+
250+
>>> NestedFrame.from_lists(nf, base_columns=["c","d"])
251+
"""
252+
253+
# Resolve base and list columns
254+
if base_columns is None:
255+
if list_columns is None:
256+
# with no inputs, assume all columns are list-valued
257+
list_columns = df.columns
258+
else:
259+
# if list_columns are defined, assume everything else is base
260+
base_columns = [col for col in df.columns if col not in list_columns]
261+
else:
262+
if list_columns is None:
263+
# with defined base_columns, assume everything else is list
264+
list_columns = [col for col in df.columns if col not in base_columns]
265+
266+
if len(list_columns) == 0:
267+
raise ValueError("No columns were assigned as list columns.")
268+
269+
# Pack list columns into a nested column
270+
packed_df = packer.pack_lists(df[list_columns])
271+
packed_df.name = name
272+
273+
# join the nested column to the base_column df
274+
if base_columns is not None:
275+
return df[base_columns].join(packed_df)
276+
# or just return the packed_df as a nestedframe if no base cols
277+
else:
278+
return NestedFrame(packed_df.to_frame())
279+
217280
def _split_query(self, expr) -> dict:
218281
"""Splits a pandas query into multiple subqueries for nested and base layers"""
219282
# Ensure query has needed spacing for upcoming split

tests/nested_pandas/nestedframe/test_nestedframe.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,59 @@ def test_recover_from_flat():
320320
assert nf2.equals(nf)
321321

322322

323+
def test_from_lists():
324+
"""Test NestedFrame.from_lists behavior"""
325+
nf = NestedFrame(
326+
{"c": [1, 2, 3], "d": [2, 4, 6], "e": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, index=[0, 1, 2]
327+
)
328+
329+
# Test a few combinations
330+
res = NestedFrame.from_lists(nf, base_columns=["c", "d"], name="nested_e")
331+
assert list(res.columns) == ["c", "d", "nested_e"]
332+
assert list(res.nested_columns) == ["nested_e"]
333+
334+
res = NestedFrame.from_lists(nf, base_columns=["c", "d"], list_columns=["e"])
335+
assert list(res.columns) == ["c", "d", "nested"]
336+
assert list(res.nested_columns) == ["nested"]
337+
338+
res = NestedFrame.from_lists(nf, list_columns=["e"])
339+
assert list(res.columns) == ["c", "d", "nested"]
340+
assert list(res.nested_columns) == ["nested"]
341+
342+
# Check for the no list columns error
343+
with pytest.raises(ValueError):
344+
res = NestedFrame.from_lists(nf, base_columns=["c", "d", "e"])
345+
346+
# Multiple list columns (of uneven length)
347+
nf2 = NestedFrame(
348+
{
349+
"c": [1, 2, 3],
350+
"d": [2, 4, 6],
351+
"e": [[1, 2, 3], [4, 5, 6, 7], [8, 9]],
352+
"f": [[10, 20, 30], [40, 50, 60, 70], [80, 90]],
353+
},
354+
index=[0, 1, 2],
355+
)
356+
357+
res = NestedFrame.from_lists(nf2, list_columns=["e", "f"])
358+
assert list(res.columns) == ["c", "d", "nested"]
359+
assert list(res.nested_columns) == ["nested"]
360+
assert list(res.nested.nest.fields) == ["e", "f"]
361+
362+
# Check for subsetting
363+
res = NestedFrame.from_lists(nf, base_columns=["c"], list_columns=["e"])
364+
assert list(res.columns) == ["c", "nested"]
365+
assert list(res.nested_columns) == ["nested"]
366+
367+
res = NestedFrame.from_lists(nf, base_columns=[], list_columns=["e"])
368+
assert list(res.columns) == ["nested"]
369+
assert list(res.nested_columns) == ["nested"]
370+
371+
res = NestedFrame.from_lists(nf[["e"]], base_columns=None, list_columns=None)
372+
assert list(res.columns) == ["nested"]
373+
assert list(res.nested_columns) == ["nested"]
374+
375+
323376
def test_query():
324377
"""Test that NestedFrame.query handles nested queries correctly"""
325378

0 commit comments

Comments
 (0)