Skip to content

Commit

Permalink
Refactored feature renaming (#821)
Browse files Browse the repository at this point in the history
  • Loading branch information
damienwojtowicz authored Jan 14, 2022
1 parent b64f14f commit 3732f70
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 44 deletions.
5 changes: 5 additions & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

- Created this module to store estimator unit testing, rather than having it in the `utils` module.

## compose

- Split `compose.Renamer` into `compose.Prefixer` and `compose.Suffixer` that respectively prepend and append a string to the features' name.
- Changed `compose.Renamer` to allow feature renaming following a mapping.

## evaluate

- Refactored `evaluate.progressive_validation` to work with `base.AnomalyDetector`s.
Expand Down
4 changes: 3 additions & 1 deletion river/compose/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .grouper import Grouper
from .pipeline import Pipeline
from .product import TransformerProduct
from .rename import Renamer
from .renamer import Prefixer, Renamer, Suffixer
from .select import Discard, Select, SelectType
from .target_transform import TargetTransformRegressor
from .union import TransformerUnion
Expand All @@ -18,9 +18,11 @@
"FuncTransformer",
"Grouper",
"Pipeline",
"Prefixer",
"Renamer",
"Select",
"SelectType",
"Suffixer",
"TargetTransformRegressor",
"TransformerProduct",
"TransformerUnion",
Expand Down
16 changes: 6 additions & 10 deletions river/compose/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,8 @@ class Pipeline(base.Estimator):
... ('A harsh comment', False)
... ]
>>> tfidf = fx.TFIDF() | compose.Renamer(prefix='tfidf_')
>>> counts = fx.BagOfWords() | compose.Renamer(prefix='count_')
>>> tfidf = fx.TFIDF() | compose.Prefixer('tfidf_')
>>> counts = fx.BagOfWords() | compose.Prefixer('count_')
>>> mnb = naive_bayes.MultinomialNB()
>>> model = (tfidf + counts) | mnb
Expand All @@ -296,24 +296,20 @@ class Pipeline(base.Estimator):
0. Input
--------
A positive comment
<BLANKLINE>
1. Transformer union
--------------------
1.0 TFIDF | Renamer
-------------------
1.0 TFIDF | Prefixer
--------------------
tfidf_comment: 0.47606 (float)
tfidf_positive: 0.87942 (float)
<BLANKLINE>
1.1 BagOfWords | Renamer
------------------------
1.1 BagOfWords | Prefixer
-------------------------
count_comment: 1 (int)
count_positive: 1 (int)
<BLANKLINE>
count_comment: 1 (int)
count_positive: 1 (int)
tfidf_comment: 0.50854 (float)
tfidf_positive: 0.86104 (float)
<BLANKLINE>
2. MultinomialNB
----------------
False: 0.19313
Expand Down
33 changes: 0 additions & 33 deletions river/compose/rename.py

This file was deleted.

94 changes: 94 additions & 0 deletions river/compose/renamer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import Dict

from .. import base

__all__ = ["Renamer", "Prefixer", "Suffixer"]


class Renamer(base.Transformer):
"""Renames features following substitution rules.
Parameters
----------
mapping
Dictionnary describing substitution rules. Keys in `mapping` that are not a feature's name are silently ignored.
Examples
--------
>>> from river import compose
>>> mapping = {'a': 'v', 'c': 'o'}
>>> x = {'a': 42, 'b': 12}
>>> compose.Renamer(mapping).transform_one(x)
{'b': 12, 'v': 42}
"""

def __init__(self, mapping: Dict[str, str]):
self.mapping = mapping

def transform_one(self, x):
for old_key, new_key in self.mapping.items():
try:
x[new_key] = x.pop(old_key)
except KeyError:
pass # Ignoring keys that are not a feature's name

return x


class Prefixer(base.Transformer):
"""Prepends a prefix on features names.
Parameters
----------
prefix
Examples
--------
>>> from river import compose
>>> x = {'a': 42, 'b': 12}
>>> compose.Prefixer('prefix_').transform_one(x)
{'prefix_a': 42, 'prefix_b': 12}
"""

def __init__(self, prefix: str):
self.prefix = prefix

def _rename(self, s: str) -> str:
return f"{self.prefix}{s}"

def transform_one(self, x):
return {self._rename(i): xi for i, xi in x.items()}


class Suffixer(base.Transformer):
"""Appends a suffix on features names.
Parameters
----------
suffix
Examples
--------
>>> from river import compose
>>> x = {'a': 42, 'b': 12}
>>> compose.Suffixer('_suffix').transform_one(x)
{'a_suffix': 42, 'b_suffix': 12}
"""

def __init__(self, suffix: str):
self.suffix = suffix

def _rename(self, s: str) -> str:
return f"{s}{self.suffix}"

def transform_one(self, x):
return {self._rename(i): xi for i, xi in x.items()}
18 changes: 18 additions & 0 deletions river/compose/test_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,21 @@ def test_both_are_pipelines():
"b_2": 1,
"a_1": 1,
}


def test_renaming():

renamer = compose.Renamer(dict(a="z", b="y", c="x"))
assert renamer.transform_one(dict(a=1, b=2, d=3)) == dict(z=1, y=2, d=3)


def test_prefixing():

prefixer = compose.Prefixer("x_")
assert prefixer.transform_one(dict(a=1, b=2, d=3)) == dict(x_a=1, x_b=2, x_d=3)


def test_suffixing():

suffixer = compose.Suffixer("_x")
assert suffixer.transform_one(dict(a=1, b=2, d=3)) == dict(a_x=1, b_x=2, d_x=3)
3 changes: 3 additions & 0 deletions river/test_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def iter_estimators_which_can_be_tested():
compose.FuncTransformer,
compose.Grouper,
compose.Pipeline,
compose.Prefixer,
compose.Renamer,
compose.Suffixer,
compose.TargetTransformRegressor,
ensemble.AdaptiveRandomForestClassifier,
ensemble.AdaptiveRandomForestRegressor,
Expand Down

0 comments on commit 3732f70

Please sign in to comment.