Skip to content

Commit

Permalink
Renaming
Browse files Browse the repository at this point in the history
  • Loading branch information
rcap107 committed Nov 25, 2024
1 parent 4b861d2 commit 1f78684
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from . import _dataframe as sbd
from ._on_each_column import SingleColumnTransformer

__all__ = ["DropColumnIfNull"]
__all__ = ["DropIfTooManyNulls"]


class DropColumnIfNull(SingleColumnTransformer):
class DropIfTooManyNulls(SingleColumnTransformer):
"""Drop a single column if the fraction of Null or NaN values in the column
is larger than the given threshold.
Expand Down
20 changes: 10 additions & 10 deletions skrub/_table_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ._clean_categories import CleanCategories
from ._clean_null_strings import CleanNullStrings
from ._datetime_encoder import DatetimeEncoder
from ._drop_column_if_null import DropColumnIfNull
from ._drop_if_too_many_nulls import DropIfTooManyNulls
from ._gap_encoder import GapEncoder
from ._on_each_column import SingleColumnTransformer
from ._select_cols import Drop
Expand Down Expand Up @@ -192,12 +192,12 @@ class TableVectorizer(TransformerMixin, BaseEstimator):
similar functionality to what is offered by scikit-learn's
:class:`~sklearn.compose.ColumnTransformer`.
null_threshold : float or None, default=1.0
Fraction of null above which the column is dropped. If `null_threshold` is
drop_null_fraction : float or None, default=1.0
Fraction of null above which the column is dropped. If `drop_null_fraction` is
set to ``1.0``, the column is dropped if it contains only
nulls or NaNs (this is the default behavior). If `null_threshold` is a
nulls or NaNs (this is the default behavior). If `drop_null_fraction` is a
number in ``[0.0, 1.0)``, the column is dropped if the fraction of nulls
is strictly larger than `null_threshold`. If `null_threshold` is ``None``,
is strictly larger than `drop_null_fraction`. If `drop_null_fraction` is ``None``,
this selection is disabled: no columns are dropped based on the number
of null values they contain.
Expand Down Expand Up @@ -325,7 +325,7 @@ class TableVectorizer(TransformerMixin, BaseEstimator):
We can inspect all the processing steps that were applied to a given column:
>>> vectorizer.all_processing_steps_['B']
[CleanNullStrings(), DropColumnIfNull(), ToDatetime(), DatetimeEncoder(), {'B_day': ToFloat32(), 'B_month': ToFloat32(), ...}]
[CleanNullStrings(), DropIfTooManyNulls(), ToDatetime(), DatetimeEncoder(), {'B_day': ToFloat32(), 'B_month': ToFloat32(), ...}]
Note that as the encoder (``DatetimeEncoder()`` above) produces multiple
columns, the last processing step is not described by a single transformer
Expand Down Expand Up @@ -400,7 +400,7 @@ class TableVectorizer(TransformerMixin, BaseEstimator):
``ToDatetime()``:
>>> vectorizer.all_processing_steps_
{'A': [Drop()], 'B': [OrdinalEncoder()], 'C': [CleanNullStrings(), DropColumnIfNull(), ToFloat32(), PassThrough(), {'C': ToFloat32()}]}
{'A': [Drop()], 'B': [OrdinalEncoder()], 'C': [CleanNullStrings(), DropIfTooManyNulls(), ToFloat32(), PassThrough(), {'C': ToFloat32()}]}
Specifying several ``specific_transformers`` for the same column is not allowed.
Expand All @@ -423,7 +423,7 @@ def __init__(
numeric=NUMERIC_TRANSFORMER,
datetime=DATETIME_TRANSFORMER,
specific_transformers=(),
null_threshold=1.0,
drop_null_fraction=1.0,
n_jobs=None,
):
self.cardinality_threshold = cardinality_threshold
Expand All @@ -437,7 +437,7 @@ def __init__(
self.datetime = _utils.clone_if_default(datetime, DATETIME_TRANSFORMER)
self.specific_transformers = specific_transformers
self.n_jobs = n_jobs
self.null_threshold = null_threshold
self.drop_null_fraction = drop_null_fraction

def fit(self, X, y=None):
"""Fit transformer.
Expand Down Expand Up @@ -551,7 +551,7 @@ def add_step(steps, transformer, cols, allow_reject=False):
self._preprocessors = [CheckInputDataFrame()]

transformer_list = [CleanNullStrings()]
transformer_list.append(DropColumnIfNull(self.null_threshold))
transformer_list.append(DropIfTooManyNulls(self.drop_null_fraction))

transformer_list += [
ToDatetime(),
Expand Down

0 comments on commit 1f78684

Please sign in to comment.