Skip to content

Commit 99ecc3e

Browse files
authored
Custom HTML Rendering for Nested Columns (#103)
* nested html implementation
1 parent bbe1866 commit 99ecc3e

File tree

8 files changed

+83
-7
lines changed

8 files changed

+83
-7
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ Nested-Pandas allows data like this:
2727
To instead be represented like this:
2828

2929
<p align="center">
30-
<img src="./docs/intro_images/nestedframe.png" alt="nestedframe" width="400"/>
30+
<img src="./docs/intro_images/nestedframe_example.png" alt="nestedframe" width="300"/>
3131
</p>
3232

3333
Where the nested data is represented as nested dataframes:

docs/gettingstarted/quickstart.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@
9999
"source": [
100100
"The above dataframe is a `NestedFrame`, which extends the capabilities of the Pandas `DataFrame` to support columns with nested information. \n",
101101
"\n",
102-
"We now have the top level dataframe with 3 rows, each of which corresponds to a single object. The table has three columns beyond \"id\". Two columns, \"ra\" and \"dec\", have a single value for the object (in this case the position on the sky). The last column \"lightcurve\" contains a nested table with a series of observation times and observation brightnesses for the object. As we will see below, this nested table allows the user to easily access to the all of the observations for a given object.\n",
102+
"We now have the top level dataframe with 3 rows, each of which corresponds to a single object. The table has three columns beyond \"id\". Two columns, \"ra\" and \"dec\", have a single value for the object (in this case the position on the sky). The last column \"lightcurve\" contains a nested table with a series of observation times and observation brightnesses for the object. The first row of this nested table is provided along with dimensions to provide a sense for the contents of the nested data. As we will see below, this nested table allows the user to easily access to the all of the observations for a given object.\n",
103103
"\n",
104104
"## Accessing Nested Data\n",
105105
"\n",

docs/index.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ Nested-Pandas allows data like this:
2323

2424
To instead be represented like this:
2525

26-
.. image:: ./intro_images/nestedframe.png
27-
:width: 400
26+
.. image:: ./intro_images/nestedframe_example.png
27+
:width: 300
2828
:align: center
2929
:alt: pandas dataframes
3030

@@ -36,7 +36,7 @@ Where the nested data is represented as nested dataframes:
3636
object_nf.loc[0]["nested_sources"]
3737
3838
.. image:: ./intro_images/loc_into_nested.png
39-
:width: 225
39+
:width: 300
4040
:align: center
4141
:alt: pandas dataframes
4242

docs/intro_images/nestedframe.png

-109 KB
Binary file not shown.
48.9 KB
Loading

docs/tutorials/nested_spectra.ipynb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
"outputs": [],
5454
"source": [
5555
"# Query SDSS for the corresponding spectra\n",
56+
"SDSS.clear_cache()\n",
5657
"sp = SDSS.get_spectra(matches=xid)\n",
5758
"sp"
5859
]
@@ -161,7 +162,7 @@
161162
],
162163
"metadata": {
163164
"kernelspec": {
164-
"display_name": "Python 3 (ipykernel)",
165+
"display_name": "lsdb",
165166
"language": "python",
166167
"name": "python3"
167168
},
@@ -175,7 +176,7 @@
175176
"name": "python",
176177
"nbconvert_exporter": "python",
177178
"pygments_lexer": "ipython3",
178-
"version": "3.12.6"
179+
"version": "3.12.8"
179180
}
180181
},
181182
"nbformat": 4,

src/nested_pandas/nestedframe/core.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
from nested_pandas.series.dtype import NestedDtype
2020
from nested_pandas.series.packer import pack, pack_lists, pack_sorted_df_into_struct
2121

22+
pd.set_option("display.max_rows", 30)
23+
pd.set_option("display.min_rows", 5)
24+
2225
# Used to identify backtick-protected names in the expressions
2326
# used in NestedFrame.eval() and NestedFrame.query().
2427
_backtick_protected_names = re.compile(r"`[^`]+`", re.MULTILINE)
@@ -274,6 +277,52 @@ def nested_columns(self) -> list:
274277
nest_cols.append(column)
275278
return nest_cols
276279

280+
def _repr_html_(self) -> str | None:
281+
"""Override html representation"""
282+
283+
# Without nested columns, just do representation as normal
284+
if len(self.nested_columns) == 0:
285+
# This mimics pandas behavior
286+
if self.shape[0] > pd.get_option("display.max_rows"):
287+
return super().to_html(max_rows=pd.get_option("display.min_rows"), show_dimensions=True)
288+
else:
289+
return super().to_html(max_rows=pd.get_option("display.max_rows"), show_dimensions=True)
290+
291+
# Nested Column Formatting
292+
# first cell shows the nested df header and a preview row
293+
def repack_first_cell(chunk):
294+
# Render header separately to keep data aligned
295+
output = chunk.head(0).to_html(
296+
max_rows=0, max_cols=5, show_dimensions=False, index=False, header=True
297+
)
298+
# Then add a preview row
299+
output += repack_row(chunk)
300+
return output
301+
302+
# remaining cells show only a preview row
303+
def repack_row(chunk):
304+
return chunk.to_html(max_rows=1, max_cols=5, show_dimensions=True, index=False, header=False)
305+
306+
# Apply repacking to all nested columns
307+
repr = self.style.format(
308+
{col: repack_first_cell for col in self.nested_columns}, subset=self.index[0]
309+
)
310+
repr = repr.format(
311+
{col: repack_row for col in self.nested_columns}, subset=pd.IndexSlice[self.index[1] :]
312+
)
313+
314+
# Recover some truncation formatting, limited to head truncation
315+
if repr.data.shape[0] > pd.get_option("display.max_rows"):
316+
html_repr = repr.to_html(max_rows=pd.get_option("display.min_rows"))
317+
else:
318+
# when under the max_rows threshold, display all rows (behavior of 0 here)
319+
html_repr = repr.to_html(max_rows=0)
320+
321+
# Manually append dimensionality to a styler output
322+
html_repr += f"{repr.data.shape[0]} rows x {repr.data.shape[1]} columns"
323+
324+
return html_repr
325+
277326
def _parse_hierarchical_components(self, delimited_path: str, delimiter: str = ".") -> list[str]:
278327
"""
279328
Given a string that may be a delimited path, parse it into its components,

tests/nested_pandas/nestedframe/test_nestedframe.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,32 @@ def test_nestedseries_construction():
3434
assert (frame[0] == [1, 2, 3]).all()
3535

3636

37+
def test_html_repr():
38+
"""Just make sure the html representation code doesn't throw any errors"""
39+
40+
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])
41+
42+
assert list(base.all_columns.keys()) == ["base"]
43+
assert list(base.all_columns["base"]) == list(base.columns)
44+
45+
nested = pd.DataFrame(
46+
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
47+
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
48+
)
49+
50+
base = base.add_nested(nested, "nested")
51+
52+
# Check nested repr
53+
base._repr_html_()
54+
55+
# Check repr path without nested cols
56+
base[["a", "b"]]._repr_html_()
57+
58+
# Check repr truncation for larger nf
59+
nf = generate_data(100, 2)
60+
nf._repr_html_()
61+
62+
3763
def test_all_columns():
3864
"""Test the all_columns function"""
3965

0 commit comments

Comments
 (0)