ENH adding alias "regression" and "classification" (skrub-data#1180)

jeromedockes · Dec 9, 2024 · cf3c354 · cf3c354
1 parent da7dd9c
commit cf3c354
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 14 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -26,6 +26,10 @@ Changes
   printing of progress information when a table report is being generated.
   :pr:`1188` by :user:`Priscilla Baah<priscilla-b>`.
 
+* :func:`tabular_learner` accepts the alias ``"regression"`` for the option
+   ``"regressor"`` and ``"classification"`` for ``"classifier"``.
+   :pr:`1180` by :user:`Mojdeh Rastgoo <mrastgoo>`.
+
 Bug fixes
 ---------
 

diff --git a/skrub/_tabular_learner.py b/skrub/_tabular_learner.py
@@ -31,7 +31,7 @@ def tabular_learner(estimator, *, n_jobs=None):
     ``estimator``.
 
     Instead of an actual estimator, ``estimator`` can also be the special-cased strings
-    ``'regressor'`` or ``'classifier'`` to use a
+    ``'regressor'``, ``'regression'``, ``'classifier'``, ``'classification'`` to use a
     :obj:`~sklearn.ensemble.HistGradientBoostingRegressor` or a
     :obj:`~sklearn.ensemble.HistGradientBoostingClassifier` with default
     parameters.
@@ -61,14 +61,14 @@ def tabular_learner(estimator, *, n_jobs=None):
 
     Parameters
     ----------
-    estimator : {"regressor", "classifier"} or scikit-learn estimator
+    estimator : {"regressor", "regression", "classifier", "classification"} or scikit-learn estimator
         The estimator to use as the final step in the pipeline. Based on the type of
         estimator, the previous preprocessing steps and their respective parameters are
         chosen. The possible values are:
 
-        - ``'regressor'``: a :obj:`~sklearn.ensemble.HistGradientBoostingRegressor`
+        - ``'regressor'`` or ``'regression'``: a :obj:`~sklearn.ensemble.HistGradientBoostingRegressor`
           is used as the final step;
-        - ``'classifier'``: a :obj:`~sklearn.ensemble.HistGradientBoostingClassifier`
+        - ``'classifier'`` or ``'classification'``: a :obj:`~sklearn.ensemble.HistGradientBoostingClassifier`
           is used as the final step;
         - a scikit-learn estimator: the provided estimator is used as the final step.
 
@@ -106,24 +106,24 @@ def tabular_learner(estimator, *, n_jobs=None):
 
     We can easily get a default pipeline for regression or classification:
 
-    >>> tabular_learner('regressor')                                    # doctest: +SKIP
+    >>> tabular_learner('regression')                                    # doctest: +SKIP
     Pipeline(steps=[('tablevectorizer',
                      TableVectorizer(high_cardinality=MinHashEncoder(),
                                      low_cardinality=ToCategorical())),
                     ('histgradientboostingregressor',
                      HistGradientBoostingRegressor(categorical_features='from_dtype'))])
 
-    When requesting a ``'regressor'``, the last step of the pipeline is set to a
+    When requesting a ``'regression'``, the last step of the pipeline is set to a
     :obj:`~sklearn.ensemble.HistGradientBoostingRegressor`.
 
-    >>> tabular_learner('classifier')                                   # doctest: +SKIP
+    >>> tabular_learner('classification')                                   # doctest: +SKIP
     Pipeline(steps=[('tablevectorizer',
                      TableVectorizer(high_cardinality=MinHashEncoder(),
                                      low_cardinality=ToCategorical())),
                     ('histgradientboostingclassifier',
                      HistGradientBoostingClassifier(categorical_features='from_dtype'))])
 
-    When requesting a ``'classifier'``, the last step of the pipeline is set to a
+    When requesting a ``'classification'``, the last step of the pipeline is set to a
     :obj:`~sklearn.ensemble.HistGradientBoostingClassifier`.
 
     This pipeline can be applied to rich tabular data:
@@ -227,18 +227,19 @@ def tabular_learner(estimator, *, n_jobs=None):
         cat_feat_kwargs = {"categorical_features": "from_dtype"}
 
     if isinstance(estimator, str):
-        if estimator == "classifier":
+        if estimator in ("classifier", "classification"):
             return tabular_learner(
                 ensemble.HistGradientBoostingClassifier(**cat_feat_kwargs),
                 n_jobs=n_jobs,
             )
-        if estimator == "regressor":
+        if estimator in ("regressor", "regression"):
             return tabular_learner(
                 ensemble.HistGradientBoostingRegressor(**cat_feat_kwargs),
                 n_jobs=n_jobs,
             )
         raise ValueError(
-            "If ``estimator`` is a string it should be 'regressor' or 'classifier'."
+            "If ``estimator`` is a string it should be 'regressor', 'regression',"
+            " 'classifier' or 'classification'."
         )
     if isinstance(estimator, type) and issubclass(estimator, BaseEstimator):
         raise TypeError(

diff --git a/skrub/tests/test_tabular_learner.py b/skrub/tests/test_tabular_learner.py
@@ -15,7 +15,9 @@
 )
 
 
-@pytest.mark.parametrize("learner_kind", ["regressor", "classifier"])
+@pytest.mark.parametrize(
+    "learner_kind", ["regressor", "regression", "classifier", "classification"]
+)
 def test_default_pipeline(learner_kind):
     p = tabular_learner(learner_kind)
     tv, learner = [e for _, e in p.steps]
@@ -26,14 +28,17 @@ def test_default_pipeline(learner_kind):
     else:
         assert isinstance(tv.low_cardinality, ToCategorical)
         assert learner.categorical_features == "from_dtype"
-    if learner_kind == "regressor":
+    if learner_kind in ("regressor", "regression"):
         assert isinstance(learner, ensemble.HistGradientBoostingRegressor)
     else:
         assert isinstance(learner, ensemble.HistGradientBoostingClassifier)
 
 
 def test_bad_learner():
-    with pytest.raises(ValueError, match=".*should be 'regressor' or 'classifier'"):
+    with pytest.raises(
+        ValueError,
+        match=".*should be 'regressor', 'regression', 'classifier' or 'classification'",
+    ):
         tabular_learner("bad")
     with pytest.raises(
         TypeError, match=".*Pass an instance of HistGradientBoostingRegressor"