-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from lincc-frameworks/init_nestedframe
initialize nestedframe
- Loading branch information
Showing
4 changed files
with
139 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
from .example_module import greetings, meaning | ||
from .nestedframe import NestedFrame | ||
|
||
# Import for registering | ||
from .series.accessor import NestSeriesAccessor # noqa: F401 | ||
from .series.dtype import NestedDtype | ||
|
||
__all__ = ["greetings", "meaning", "NestedDtype"] | ||
__all__ = ["greetings", "meaning", "NestedDtype", "NestedFrame"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .core import NestedFrame # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# typing.Self and "|" union syntax don't exist in Python 3.9 | ||
from __future__ import annotations | ||
|
||
import pandas as pd | ||
|
||
from nested_pandas.series import packer | ||
from nested_pandas.series.dtype import NestedDtype | ||
|
||
|
||
class NestedFrame(pd.DataFrame): | ||
"""A Pandas Dataframe extension with support for nested structure. | ||
See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures | ||
""" | ||
|
||
# normal properties | ||
_metadata = ["added_property"] | ||
|
||
@property | ||
def _constructor(self) -> Self: # type: ignore[name-defined] # noqa: F821 | ||
return NestedFrame | ||
|
||
@property | ||
def _constructor_expanddim(self) -> Self: # type: ignore[name-defined] # noqa: F821 | ||
return NestedFrame | ||
|
||
@property | ||
def all_columns(self) -> dict: | ||
"""returns a dictionary of columns for each base/nested dataframe""" | ||
all_columns = {"base": self.columns} | ||
for column in self.columns: | ||
if isinstance(self[column].dtype, NestedDtype): | ||
nest_cols = self[column].nest.fields | ||
all_columns[column] = nest_cols | ||
return all_columns | ||
|
||
@property | ||
def nested_columns(self) -> list: | ||
"""retrieves the base column names for all nested dataframes""" | ||
nest_cols = [] | ||
for column in self.columns: | ||
if isinstance(self[column].dtype, NestedDtype): | ||
nest_cols.append(column) | ||
return nest_cols | ||
|
||
def _is_known_hierarchical_column(self, colname) -> bool: | ||
"""Determine whether a string is a known hierarchical column name""" | ||
if "." in colname: | ||
left, right = colname.split(".") | ||
if left in self.nested_columns: | ||
return right in self.all_columns[left] | ||
return False | ||
return False | ||
|
||
def add_nested(self, nested, name) -> Self: # type: ignore[name-defined] # noqa: F821 | ||
"""Packs a dataframe into a nested column""" | ||
# Add sources to objects | ||
packed = packer.pack_flat(nested, name=name) | ||
label = packed.name | ||
return self.assign(**{f"{label}": packed}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import pandas as pd | ||
from nested_pandas import NestedFrame | ||
|
||
|
||
def test_nestedframe_construction(): | ||
"""Test NestedFrame construction""" | ||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
assert isinstance(base, NestedFrame) | ||
|
||
|
||
def test_all_columns(): | ||
"""Test the all_columns function""" | ||
|
||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
assert list(base.all_columns.keys()) == ["base"] | ||
assert list(base.all_columns["base"]) == list(base.columns) | ||
|
||
nested = pd.DataFrame( | ||
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, | ||
index=[0, 0, 0, 1, 1, 1, 2, 2, 2], | ||
) | ||
|
||
base = base.add_nested(nested, "nested") | ||
|
||
assert list(base.all_columns.keys()) == ["base", "nested"] | ||
assert list(base.all_columns["nested"]) == list(nested.columns) | ||
|
||
|
||
def test_nested_columns(): | ||
"""Test that nested_columns correctly retrieves the nested base columns""" | ||
|
||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
nested = pd.DataFrame( | ||
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, | ||
index=[0, 0, 0, 1, 1, 1, 2, 2, 2], | ||
) | ||
|
||
base = base.add_nested(nested, "nested") | ||
|
||
assert base.nested_columns == ["nested"] | ||
|
||
|
||
def test_is_known_hierarchical_column(): | ||
"""Test that hierarchical column labels can be identified""" | ||
|
||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
nested = pd.DataFrame( | ||
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, | ||
index=[0, 0, 0, 1, 1, 1, 2, 2, 2], | ||
) | ||
|
||
base = base.add_nested(nested, "nested") | ||
|
||
assert base._is_known_hierarchical_column("nested.c") | ||
assert not base._is_known_hierarchical_column("nested.b") | ||
assert not base._is_known_hierarchical_column("base.a") | ||
|
||
|
||
def test_add_nested(): | ||
"""Test that add_nested correctly adds a nested column to the base df""" | ||
|
||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
nested = pd.DataFrame( | ||
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, | ||
index=[0, 0, 0, 1, 1, 1, 2, 2, 2], | ||
) | ||
|
||
base = base.add_nested(nested, "nested") | ||
|
||
assert "nested" in base.columns | ||
assert base.nested.nest.to_flat().equals(nested) |