From 3732f700da72642afe54095d4b252b05c5018c7d Mon Sep 17 00:00:00 2001 From: Damien Wojtowicz Date: Sat, 15 Jan 2022 00:56:55 +0100 Subject: [PATCH] Refactored feature renaming (#821) --- docs/releases/unreleased.md | 5 ++ river/compose/__init__.py | 4 +- river/compose/pipeline.py | 16 +++--- river/compose/rename.py | 33 ------------ river/compose/renamer.py | 94 +++++++++++++++++++++++++++++++++++ river/compose/test_product.py | 18 +++++++ river/test_estimators.py | 3 ++ 7 files changed, 129 insertions(+), 44 deletions(-) delete mode 100644 river/compose/rename.py create mode 100644 river/compose/renamer.py diff --git a/docs/releases/unreleased.md b/docs/releases/unreleased.md index 5a635a6b24..819a36ed1f 100644 --- a/docs/releases/unreleased.md +++ b/docs/releases/unreleased.md @@ -4,6 +4,11 @@ - Created this module to store estimator unit testing, rather than having it in the `utils` module. +## compose + +- Split `compose.Renamer` into `compose.Prefixer` and `compose.Suffixer` that respectively prepend and append a string to the features' name. +- Changed `compose.Renamer` to allow feature renaming following a mapping. + ## evaluate - Refactored `evaluate.progressive_validation` to work with `base.AnomalyDetector`s. diff --git a/river/compose/__init__.py b/river/compose/__init__.py index 38604edb1d..af22431387 100644 --- a/river/compose/__init__.py +++ b/river/compose/__init__.py @@ -8,7 +8,7 @@ from .grouper import Grouper from .pipeline import Pipeline from .product import TransformerProduct -from .rename import Renamer +from .renamer import Prefixer, Renamer, Suffixer from .select import Discard, Select, SelectType from .target_transform import TargetTransformRegressor from .union import TransformerUnion @@ -18,9 +18,11 @@ "FuncTransformer", "Grouper", "Pipeline", + "Prefixer", "Renamer", "Select", "SelectType", + "Suffixer", "TargetTransformRegressor", "TransformerProduct", "TransformerUnion", diff --git a/river/compose/pipeline.py b/river/compose/pipeline.py index b79f97112b..10359d66ab 100644 --- a/river/compose/pipeline.py +++ b/river/compose/pipeline.py @@ -282,8 +282,8 @@ class Pipeline(base.Estimator): ... ('A harsh comment', False) ... ] - >>> tfidf = fx.TFIDF() | compose.Renamer(prefix='tfidf_') - >>> counts = fx.BagOfWords() | compose.Renamer(prefix='count_') + >>> tfidf = fx.TFIDF() | compose.Prefixer('tfidf_') + >>> counts = fx.BagOfWords() | compose.Prefixer('count_') >>> mnb = naive_bayes.MultinomialNB() >>> model = (tfidf + counts) | mnb @@ -296,24 +296,20 @@ class Pipeline(base.Estimator): 0. Input -------- A positive comment - 1. Transformer union -------------------- - 1.0 TFIDF | Renamer - ------------------- + 1.0 TFIDF | Prefixer + -------------------- tfidf_comment: 0.47606 (float) tfidf_positive: 0.87942 (float) - - 1.1 BagOfWords | Renamer - ------------------------ + 1.1 BagOfWords | Prefixer + ------------------------- count_comment: 1 (int) count_positive: 1 (int) - count_comment: 1 (int) count_positive: 1 (int) tfidf_comment: 0.50854 (float) tfidf_positive: 0.86104 (float) - 2. MultinomialNB ---------------- False: 0.19313 diff --git a/river/compose/rename.py b/river/compose/rename.py deleted file mode 100644 index 8109c8cb63..0000000000 --- a/river/compose/rename.py +++ /dev/null @@ -1,33 +0,0 @@ -from .. import base - -__all__ = ["Renamer"] - - -class Renamer(base.Transformer): - """Renames keys based on given parameters. - - Parameters - ---------- - prefix - suffix - - Examples - -------- - - >>> from river import compose - - >>> x = {'a': 42, 'b': 12} - >>> compose.Renamer(prefix='prefix_', suffix='_suffix').transform_one(x) - {'prefix_a_suffix': 42, 'prefix_b_suffix': 12} - - """ - - def __init__(self, prefix=None, suffix=None): - self.prefix = prefix or "" - self.suffix = suffix or "" - - def _rename(self, s): - return self.prefix + s + self.suffix - - def transform_one(self, x): - return {self._rename(i): xi for i, xi in x.items()} diff --git a/river/compose/renamer.py b/river/compose/renamer.py new file mode 100644 index 0000000000..74a357eb56 --- /dev/null +++ b/river/compose/renamer.py @@ -0,0 +1,94 @@ +from typing import Dict + +from .. import base + +__all__ = ["Renamer", "Prefixer", "Suffixer"] + + +class Renamer(base.Transformer): + """Renames features following substitution rules. + + Parameters + ---------- + mapping + Dictionnary describing substitution rules. Keys in `mapping` that are not a feature's name are silently ignored. + + Examples + -------- + + >>> from river import compose + + >>> mapping = {'a': 'v', 'c': 'o'} + >>> x = {'a': 42, 'b': 12} + >>> compose.Renamer(mapping).transform_one(x) + {'b': 12, 'v': 42} + + """ + + def __init__(self, mapping: Dict[str, str]): + self.mapping = mapping + + def transform_one(self, x): + for old_key, new_key in self.mapping.items(): + try: + x[new_key] = x.pop(old_key) + except KeyError: + pass # Ignoring keys that are not a feature's name + + return x + + +class Prefixer(base.Transformer): + """Prepends a prefix on features names. + + Parameters + ---------- + prefix + + Examples + -------- + + >>> from river import compose + + >>> x = {'a': 42, 'b': 12} + >>> compose.Prefixer('prefix_').transform_one(x) + {'prefix_a': 42, 'prefix_b': 12} + + """ + + def __init__(self, prefix: str): + self.prefix = prefix + + def _rename(self, s: str) -> str: + return f"{self.prefix}{s}" + + def transform_one(self, x): + return {self._rename(i): xi for i, xi in x.items()} + + +class Suffixer(base.Transformer): + """Appends a suffix on features names. + + Parameters + ---------- + suffix + + Examples + -------- + + >>> from river import compose + + >>> x = {'a': 42, 'b': 12} + >>> compose.Suffixer('_suffix').transform_one(x) + {'a_suffix': 42, 'b_suffix': 12} + + """ + + def __init__(self, suffix: str): + self.suffix = suffix + + def _rename(self, s: str) -> str: + return f"{s}{self.suffix}" + + def transform_one(self, x): + return {self._rename(i): xi for i, xi in x.items()} diff --git a/river/compose/test_product.py b/river/compose/test_product.py index e337d017aa..e7b8c90f70 100644 --- a/river/compose/test_product.py +++ b/river/compose/test_product.py @@ -53,3 +53,21 @@ def test_both_are_pipelines(): "b_2": 1, "a_1": 1, } + + +def test_renaming(): + + renamer = compose.Renamer(dict(a="z", b="y", c="x")) + assert renamer.transform_one(dict(a=1, b=2, d=3)) == dict(z=1, y=2, d=3) + + +def test_prefixing(): + + prefixer = compose.Prefixer("x_") + assert prefixer.transform_one(dict(a=1, b=2, d=3)) == dict(x_a=1, x_b=2, x_d=3) + + +def test_suffixing(): + + suffixer = compose.Suffixer("_x") + assert suffixer.transform_one(dict(a=1, b=2, d=3)) == dict(a_x=1, b_x=2, d_x=3) diff --git a/river/test_estimators.py b/river/test_estimators.py index cd1c7c0d15..38e3254f49 100644 --- a/river/test_estimators.py +++ b/river/test_estimators.py @@ -54,6 +54,9 @@ def iter_estimators_which_can_be_tested(): compose.FuncTransformer, compose.Grouper, compose.Pipeline, + compose.Prefixer, + compose.Renamer, + compose.Suffixer, compose.TargetTransformRegressor, ensemble.AdaptiveRandomForestClassifier, ensemble.AdaptiveRandomForestRegressor,