From 30457443fcbd1f4241cbd754eb9e0f4d8bb42f95 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 24 Jul 2025 21:26:05 +0000 Subject: [PATCH 1/3] remove expensive len() call --- bigframes/display/anywidget.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 8bbb72c11a..60eee48ac2 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -44,8 +44,10 @@ class TableWidget(WIDGET_BASE): - """ - An interactive, paginated table widget for BigFrames DataFrames. + """An interactive, paginated table widget for BigFrames DataFrames. + + This widget provides a user-friendly way to display and navigate through + large BigQuery DataFrames within a Jupyter environment. """ def __init__(self, dataframe: bigframes.dataframe.DataFrame): @@ -74,15 +76,18 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): # Initialize data fetching attributes. self._batches = dataframe.to_pandas_batches(page_size=initial_page_size) + # Access total_rows through type casting (internal use only) + from bigframes.core.blocks import PandasBatches + + if isinstance(self._batches, PandasBatches): + self.row_count = self._batches.total_rows or 0 + else: + # Fallback for compatibility + self.row_count = 0 + # set traitlets properties that trigger observers self.page_size = initial_page_size - # len(dataframe) is expensive, since it will trigger a - # SELECT COUNT(*) query. It is a must have however. - # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()` - # before we get here so that the count might already be cached. - self.row_count = len(dataframe) - # get the initial page self._set_table_html() From 303c4af1eb48ce34ad8bad8c41bea1bd02b55569 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 24 Jul 2025 23:21:12 +0000 Subject: [PATCH 2/3] add testcase --- notebooks/dataframes/anywidget_mode.ipynb | 10 +- tests/system/small/test_anywidget.py | 116 ++++++++++++++++++++-- 2 files changed, 115 insertions(+), 11 deletions(-) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index f6380a9fd4..240fca765b 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -75,7 +75,7 @@ { "data": { "text/html": [ - "Query job c5fcfd5e-1617-49c8-afa3-86ca21019de4 is DONE. 0 Bytes processed. Open Job" + "Query job 1ea2b594-2bd7-46de-a3c8-6aeee5884ba2 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -141,7 +141,7 @@ { "data": { "text/html": [ - "Query job ab900a53-5011-4e80-85d5-0ef2958598db is DONE. 171.4 MB processed. Open Job" + "Query job 67e679e9-94da-47f7-8be1-8b4a496fbfbd is DONE. 171.4 MB processed. Open Job" ], "text/plain": [ "" @@ -153,7 +153,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bda63ba739dc4d5f83a5e18eb27b2686", + "model_id": "e74c3920b93644a0b2afdaa3841cad31", "version_major": 2, "version_minor": 1 }, @@ -204,7 +204,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9bffeb73549e48419c3dd895edfe30e8", + "model_id": "b4f7a3f86ef54e07b24ef10061088391", "version_major": 2, "version_minor": 1 }, @@ -290,7 +290,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "dfd4fa3a1c6f4b3eb1877cb0e9ba7e94", + "model_id": "44a829aca2f24cfdba4b61afd1a259fe", "version_major": 2, "version_minor": 1 }, diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 8a91176dd9..fbb8851ef9 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import pandas as pd import pytest @@ -61,11 +63,12 @@ def table_widget(paginated_bf_df: bf.dataframe.DataFrame): Helper fixture to create a TableWidget instance with a fixed page size. This reduces duplication across tests that use the same widget configuration. """ - from bigframes import display + + from bigframes.display import TableWidget with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): # Delay context manager cleanup of `max_rows` until after tests finish. - yield display.TableWidget(paginated_bf_df) + yield TableWidget(paginated_bf_df) @pytest.fixture(scope="module") @@ -90,10 +93,10 @@ def small_bf_df( @pytest.fixture def small_widget(small_bf_df): """Helper fixture for tests using a DataFrame smaller than the page size.""" - from bigframes import display + from bigframes.display import TableWidget with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 5): - yield display.TableWidget(small_bf_df) + yield TableWidget(small_bf_df) @pytest.fixture(scope="module") @@ -135,10 +138,10 @@ def test_widget_initialization_should_calculate_total_row_count( paginated_bf_df: bf.dataframe.DataFrame, ): """A TableWidget should correctly calculate the total row count on creation.""" - from bigframes import display + from bigframes.display import TableWidget with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): - widget = display.TableWidget(paginated_bf_df) + widget = TableWidget(paginated_bf_df) assert widget.row_count == EXPECTED_ROW_COUNT @@ -436,6 +439,107 @@ def test_widget_creation_should_load_css_for_rendering(table_widget): assert ".bigframes-widget .footer" in css_content +def test_widget_row_count_should_be_immutable_after_creation( + paginated_bf_df: bf.dataframe.DataFrame, +): + """ + Given a widget created with a specific configuration when global display + options are changed later, the widget's original row_count should remain + unchanged. + """ + from bigframes.display import TableWidget + + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): + widget = TableWidget(paginated_bf_df) + initial_row_count = widget.row_count + assert initial_row_count == EXPECTED_ROW_COUNT + + # Change a global option that could influence row count + bf.options.display.max_rows = 10 + + # The widget's row count was fixed at creation and should not change. + assert widget.row_count == initial_row_count + + +def test_widget_should_fallback_to_zero_rows_when_total_rows_is_none( + paginated_bf_df: bf.dataframe.DataFrame, monkeypatch: pytest.MonkeyPatch +): + """ + Given an internal component that fails to provide a total row count, + when the widget is created, the row_count should safely fall back to 0. + """ + from bigframes.core.blocks import PandasBatches + + # Simulate an internal failure where total_rows returns None + monkeypatch.setattr(PandasBatches, "total_rows", property(lambda self: None)) + + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget + + widget = TableWidget(paginated_bf_df) + + assert widget.row_count == 0 + + +def test_widget_should_fallback_to_zero_rows_when_batches_are_invalid_type( + paginated_bf_df: bf.dataframe.DataFrame, monkeypatch: pytest.MonkeyPatch +): + """ + Given an internal component that returns an unexpected data type, + when the widget is created, the row_count should safely fall back to 0. + """ + # Simulate internal method returning an unexpected type (a simple iterator) + def mock_to_pandas_batches(self, **kwargs): + return iter([paginated_bf_df.to_pandas().iloc[:2]]) + + monkeypatch.setattr( + "bigframes.dataframe.DataFrame.to_pandas_batches", mock_to_pandas_batches + ) + + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget + + widget = TableWidget(paginated_bf_df) + + assert widget.row_count == 0 + + +@pytest.mark.parametrize( + "max_results, expected_rows", + [ + (None, EXPECTED_ROW_COUNT), + (3, 3), + (10, EXPECTED_ROW_COUNT), + ], + ids=["no_limit", "limit_is_less_than_total", "limit_is_greater_than_total"], +) +def test_widget_row_count_should_respect_max_results_on_creation( + paginated_bf_df: bf.dataframe.DataFrame, + max_results: typing.Optional[int], + expected_rows: int, +): + """ + Given a max_results value, when a TableWidget is created with custom batches, + its row_count should be correctly capped by that value. + """ + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): + from bigframes.core.blocks import PandasBatches + from bigframes.display import TableWidget + + widget = TableWidget(paginated_bf_df) + + # Override batches with max_results to test the behavior + widget._batches = paginated_bf_df.to_pandas_batches( + page_size=widget.page_size, max_results=max_results + ) + + # Re-apply thelogic to update row_count + if isinstance(widget._batches, PandasBatches): + widget.row_count = widget._batches.total_rows or 0 + + assert widget.row_count == expected_rows + + # TODO(shuowei): Add tests for custom index and multiindex # This may not be necessary for the SQL Cell use case but should be # considered for completeness. From fc38cf39d68219ccffe7d137c4526f3b9556d6c0 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 24 Jul 2025 23:24:46 +0000 Subject: [PATCH 3/3] fix a typo --- tests/system/small/test_anywidget.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index fbb8851ef9..37ff53fa01 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -533,7 +533,7 @@ def test_widget_row_count_should_respect_max_results_on_creation( page_size=widget.page_size, max_results=max_results ) - # Re-apply thelogic to update row_count + # Re-apply the logic to update row_count if isinstance(widget._batches, PandasBatches): widget.row_count = widget._batches.total_rows or 0