From a885a5dbe0493531220f1bddb3795136fc8a706f Mon Sep 17 00:00:00 2001 From: Shadnikn Date: Tue, 3 Jun 2025 17:14:38 -0700 Subject: [PATCH 1/2] BENCH: Adding DataFrame plotting benchmarks for large datasets --- asv_bench/benchmarks/plotting.py | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 789bb8d8533b1..28febc7464cb7 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -161,4 +161,42 @@ def time_get_plot_backend_fallback(self): _get_plot_backend("pandas_dummy_backend") -from .pandas_vb_common import setup # noqa: F401 isort:skip +class DataFramePlottingLarge: + """ + Benchmarks for DataFrame plotting performance with large datasets + Addresses performance issues like #61398 and #61532 + """ + params = [ + [(1000, 10), (1000, 50), (1000, 100), (5000, 20), (10000, 10)], + [True, False] # DatetimeIndex or not + ] + param_names = ["size", "datetime_index"] + + def setup(self, size, datetime_index): + rows, cols = size + + if datetime_index: + # Create DataFrame with DatetimeIndex (problematic case #61398) + idx = date_range("2020-01-01", periods=rows, freq="min") + self.df = DataFrame( + np.random.randn(rows, cols), + index=idx, + columns=[f"col_{i}" for i in range(cols)] + ) + else: + # Regular integer index for comparison + self.df = DataFrame( + np.random.randn(rows, cols), + columns=[f"col_{i}" for i in range(cols)] + ) + + def time_plot_large_dataframe(self, size, datetime_index): + """Benchmark plotting large DataFrames (bottleneck #61398/#61532)""" + self.df.plot() + + def time_plot_large_dataframe_single_column(self, size, datetime_index): + """Baseline: plotting single column for comparison""" + self.df.iloc[:, 0].plot() + + +from .pandas_vb_common import setup # noqa isort:skip From 23245630374b043e8c6d8fe45339731e1bcd92ed Mon Sep 17 00:00:00 2001 From: Shadnikn Date: Tue, 10 Jun 2025 19:13:14 -0700 Subject: [PATCH 2/2] ENH: Move iloc operation to setup method. --- asv_bench/benchmarks/plotting.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 28febc7464cb7..0f2c6449dc919 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -189,6 +189,9 @@ def setup(self, size, datetime_index): np.random.randn(rows, cols), columns=[f"col_{i}" for i in range(cols)] ) + + # Pre-select single column for baseline comparison + self.single_column = self.df.iloc[:, 0] def time_plot_large_dataframe(self, size, datetime_index): """Benchmark plotting large DataFrames (bottleneck #61398/#61532)""" @@ -196,7 +199,7 @@ def time_plot_large_dataframe(self, size, datetime_index): def time_plot_large_dataframe_single_column(self, size, datetime_index): """Baseline: plotting single column for comparison""" - self.df.iloc[:, 0].plot() + self.single_column.plot() from .pandas_vb_common import setup # noqa isort:skip