diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py index ae9ce2e022df6..223fef687f65f 100644 --- a/examples/compose/plot_digits_pipe.py +++ b/examples/compose/plot_digits_pipe.py @@ -16,7 +16,7 @@ import matplotlib.pyplot as plt import numpy as np -import pandas as pd +import polars as pl from sklearn import datasets from sklearn.decomposition import PCA @@ -63,11 +63,15 @@ ax0.legend(prop=dict(size=12)) # For each number of components, find the best classifier results -results = pd.DataFrame(search.cv_results_) components_col = "param_pca__n_components" -best_clfs = results.groupby(components_col)[ - [components_col, "mean_test_score", "std_test_score"] -].apply(lambda g: g.nlargest(1, "mean_test_score")) +is_max_test_score = pl.col("mean_test_score") == pl.col("mean_test_score").max() +best_clfs = ( + pl.LazyFrame(search.cv_results_) + .filter(is_max_test_score.over(components_col)) + .unique(components_col) + .sort(components_col) + .collect() +) ax1.errorbar( best_clfs[components_col], best_clfs["mean_test_score"],