From aa7f7a06a339b738a9eff6f4fcdb55f82c8bca4c Mon Sep 17 00:00:00 2001 From: Gaurav Sheni Date: Thu, 19 Aug 2021 17:23:58 -0400 Subject: [PATCH] primitives renamed --- categorical_encoding/primitives/binary_enc.py | 7 +- .../primitives/hashing_enc.py | 7 +- .../primitives/leave_one_out_enc.py | 8 +-- .../primitives/one_hot_enc.py | 8 +-- .../primitives/ordinal_enc.py | 8 +-- categorical_encoding/primitives/target_enc.py | 8 +-- contributing.md | 68 +++++++++++++++++++ dev-requirements.txt | 8 +-- docs/source/changelog.rst | 18 ----- docs/source/release_notes.rst | 33 +++++++++ test-requirements.txt | 6 +- 11 files changed, 132 insertions(+), 47 deletions(-) create mode 100644 contributing.md delete mode 100644 docs/source/changelog.rst create mode 100644 docs/source/release_notes.rst diff --git a/categorical_encoding/primitives/binary_enc.py b/categorical_encoding/primitives/binary_enc.py index 9357334..4e78f50 100644 --- a/categorical_encoding/primitives/binary_enc.py +++ b/categorical_encoding/primitives/binary_enc.py @@ -2,7 +2,8 @@ from featuretools.primitives.base.transform_primitive_base import ( TransformPrimitive ) -from featuretools.variable_types import Categorical, Numeric +from woodwork.column_schema import ColumnSchema +from woodwork.logical_types import Categorical, Integer class BinaryEnc(TransformPrimitive): @@ -25,8 +26,8 @@ class BinaryEnc(TransformPrimitive): [0, 1, 1, 1]] """ name = "binary_enc" - input_types = [Categorical] - return_type = [Numeric] + input_types = [ColumnSchema(logical_type=Categorical)] + return_type = ColumnSchema(semantic_tags={'numeric'}) def __init__(self, fitted_encoder, category): self.mapping, self.mapping_ord = fitted_encoder.get_mapping(category) diff --git a/categorical_encoding/primitives/hashing_enc.py b/categorical_encoding/primitives/hashing_enc.py index fd8a6db..d0375fe 100644 --- a/categorical_encoding/primitives/hashing_enc.py +++ b/categorical_encoding/primitives/hashing_enc.py @@ -3,7 +3,8 @@ from featuretools.primitives.base.transform_primitive_base import ( TransformPrimitive ) -from featuretools.variable_types import Categorical, Numeric +from woodwork.column_schema import ColumnSchema +from woodwork.logical_types import Categorical, Integer class HashingEnc(TransformPrimitive): @@ -28,8 +29,8 @@ class HashingEnc(TransformPrimitive): [0, 0, 0, 0]] """ name = "hashing_enc" - input_types = [Categorical] - return_type = [Numeric] + input_types = [ColumnSchema(logical_type=Categorical)] + return_type = [ColumnSchema(logical_type=Integer, semantic_tags={'numeric'})] def __init__(self, fitted_encoder): self.hash_method = fitted_encoder.get_hash_method() diff --git a/categorical_encoding/primitives/leave_one_out_enc.py b/categorical_encoding/primitives/leave_one_out_enc.py index a786329..428e02b 100644 --- a/categorical_encoding/primitives/leave_one_out_enc.py +++ b/categorical_encoding/primitives/leave_one_out_enc.py @@ -1,8 +1,8 @@ from featuretools.primitives.base.transform_primitive_base import ( TransformPrimitive ) -from featuretools.variable_types import Categorical, Numeric - +from woodwork.column_schema import ColumnSchema +from woodwork.logical_types import Categorical, Integer class LeaveOneOutEnc(TransformPrimitive): """Applies a fitted LeaveOneOut Encoder to the values. @@ -23,8 +23,8 @@ class LeaveOneOutEnc(TransformPrimitive): [2, 3, 1, 1] """ name = "leave_one_out" - input_types = [Categorical] - return_type = Numeric + input_types = [ColumnSchema(logical_type=Categorical)] + return_type = [ColumnSchema(logical_type=Integer, semantic_tags={'numeric'})] def __init__(self, fitted_encoder, category): self.mapping = fitted_encoder.get_mapping(category) diff --git a/categorical_encoding/primitives/one_hot_enc.py b/categorical_encoding/primitives/one_hot_enc.py index 81c2db2..131c631 100644 --- a/categorical_encoding/primitives/one_hot_enc.py +++ b/categorical_encoding/primitives/one_hot_enc.py @@ -2,8 +2,8 @@ from featuretools.primitives.base.transform_primitive_base import ( TransformPrimitive ) -from featuretools.variable_types import Categorical, Numeric - +from woodwork.column_schema import ColumnSchema +from woodwork.logical_types import Categorical, Integer class OneHotEnc(TransformPrimitive): """Applies one hot encoding for the specific category value to the column. @@ -20,8 +20,8 @@ class OneHotEnc(TransformPrimitive): [0, 0, 1, 1] """ name = "one_hot_enc" - input_types = [Categorical] - return_type = [Numeric] + input_types = [ColumnSchema(logical_type=Categorical)] + return_type = [ColumnSchema(logical_type=Integer, semantic_tags={'numeric'})] def __init__(self, value=None): self.value = value diff --git a/categorical_encoding/primitives/ordinal_enc.py b/categorical_encoding/primitives/ordinal_enc.py index 3f2962d..34c333e 100644 --- a/categorical_encoding/primitives/ordinal_enc.py +++ b/categorical_encoding/primitives/ordinal_enc.py @@ -1,8 +1,8 @@ from featuretools.primitives.base.transform_primitive_base import ( TransformPrimitive ) -from featuretools.variable_types import Categorical, Ordinal - +from woodwork.column_schema import ColumnSchema +from woodwork.logical_types import Categorical, Integer class OrdinalEnc(TransformPrimitive): """Applies a fitted Ordinal Encoder to the values. @@ -23,8 +23,8 @@ class OrdinalEnc(TransformPrimitive): [2, 3, 1, 1] """ name = "ordinal_enc" - input_types = [Categorical] - return_type = Ordinal + input_types = [ColumnSchema(logical_type=Categorical)] + return_type = [ColumnSchema(logical_type=Integer, semantic_tags={'numeric'})] def __init__(self, fitted_encoder, category): self.mapping = fitted_encoder.get_mapping(category) diff --git a/categorical_encoding/primitives/target_enc.py b/categorical_encoding/primitives/target_enc.py index fb12e7d..e62e6d6 100644 --- a/categorical_encoding/primitives/target_enc.py +++ b/categorical_encoding/primitives/target_enc.py @@ -1,8 +1,8 @@ from featuretools.primitives.base.transform_primitive_base import ( TransformPrimitive ) -from featuretools.variable_types import Categorical, Numeric - +from woodwork.column_schema import ColumnSchema +from woodwork.logical_types import Categorical, Integer class TargetEnc(TransformPrimitive): """Applies a fitted Target Encoder to the values. @@ -23,8 +23,8 @@ class TargetEnc(TransformPrimitive): [2, 3, 1, 1] """ name = "target_enc" - input_types = [Categorical] - return_type = Numeric + input_types = [ColumnSchema(logical_type=Categorical)] + return_type = [ColumnSchema(logical_type=Integer, semantic_tags={'numeric'})] def __init__(self, fitted_encoder, category): self.mapping, self.mapping_ord = fitted_encoder.get_mapping(category) diff --git a/contributing.md b/contributing.md new file mode 100644 index 0000000..1d57955 --- /dev/null +++ b/contributing.md @@ -0,0 +1,68 @@ +# Contributing to Categorical Encoding + +:+1::tada: First off, thank you for taking the time to contribute! :tada::+1: + +Whether you are a novice or experienced software developer, all contributions and suggestions are welcome! + +There are many ways to contribute to Categorical Encoding, with the most common ones being contribution of code or documentation to the project. + +**To contribute, you can:** +1. Help users on our [Slack channel](https://join.slack.com/t/featuretools/shared_invite/enQtNTEwODEzOTEwMjg4LTQ1MjZlOWFmZDk2YzAwMjEzNTkwZTZkN2NmOGFjOGI4YzE5OGMyMGM5NGIxNTE4NjkzYWI3OWEwZjkyZGExYmQ). + +2. Submit a pull request for one of [Good First Issues](https://github.com/alteryx/categorical_encoding/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) + +3. Make changes to the codebase, see [Contributing to the codebase](#Contributing-to-the-Codebase). + +4. [Report issues](#Report-issues) you're facing, and give a "thumbs up" on issues that others reported and that are relevant to you. Issues should be used for bugs, and feature requests only. + +## Contributing to the Codebase + +#### 1. Clone repo +* The code is hosted on GitHub, so you will need to use Git to clone the project and make changes to the codebase. Once you have obtained a copy of the code, you should create a development environment that is separate from your existing Python environment so that you can make and test changes without compromising your own work environment. +* You can run the following steps to clone the code, create a separate virtual environment, and install featuretools in editable mode. + ```bash + git clone https://github.com/alteryx/categorical_encoding.git + python -m venv venv + source venv/bin/activate + python -m pip install -e . + python -m pip install -r dev-requirements.txt + ``` +#### 2. Implement your Pull Request + +* Implement your pull request. If needed, add new tests or update the documentation. +* Before submitting to GitHub, verify the tests run and the code lints properly + ```bash + # runs test + make test + + # runs linting + + + # will fix some common linting issues automatically + make lint-fix + ``` +* If you made changes to the documentation, build the documentation locally. + ```bash + # go to docs and build + cd docs + make html + + # view docs locally + open build/html/index.html + ``` + +#### 3. Submit your Pull Request + +* Once your changes are ready to be submitted, make sure to push your changes to GitHub before creating a pull request. Create a pull request, and our continuous integration will run automatically. +* Update the "Future Release" section of the release notes (`docs/source/release_notes.rst`) to include your pull request and add your github username to the list of contributors. Add a description of your PR to the subsection that most closely matches your contribution: + * Enhancements: new features or additions to Featuretools. + * Fixes: things like bugfixes or adding more descriptive error messages. + * Changes: modifications to an existing part of Featuretools. + * Documentation Changes + * Testing Changes + + Documentation or testing changes rarely warrant an individual release notes entry; the PR number can be added to their respective "Miscellaneous changes" entries. +* We will review your changes, and you will most likely be asked to make additional changes before it is finally ready to merge. However, once it's reviewed by a maintainer of Featuretools, passes continuous integration, we will merge it, and you will have successfully contributed to Featuretools! + +## Report issues +When reporting issues please include as much detail as possible about your operating system, featuretools version and python version. Whenever possible, please also include a brief, self-contained code example that demonstrates the problem. \ No newline at end of file diff --git a/dev-requirements.txt b/dev-requirements.txt index 745e663..126804a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,5 @@ -r test-requirements.txt -codecov==2.0.15 -flake8==3.7.0 -autopep8==1.4.3 -isort==4.3.4 +codecov==2.1.8 +flake8==3.7.8 +autopep8==1.4.4 +isort==4.3.21 \ No newline at end of file diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst deleted file mode 100644 index 4c04ceb..0000000 --- a/docs/source/changelog.rst +++ /dev/null @@ -1,18 +0,0 @@ -========= -Changelog -========= - -**v0.4.1** - * Fix conflicting jupyter and nbconvert requirement - * Removed unused requirements (jupyter, nbconvert, nbsphinx) - * Pinned category_encoders to 2.0.0 - -**v0.2.0** - * Supports Bayesian encoders: Target and LeaveOneOut - * Encoder class independent of primitives - -**v0.1.0** - * Supports all 4 Classic encoders: OneHotEncoder, OrdinalEncoder, HashingEncoder, BinaryEncoder - -**v0.0.0** - * Initial Release diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst new file mode 100644 index 0000000..4eea23a --- /dev/null +++ b/docs/source/release_notes.rst @@ -0,0 +1,33 @@ +.. _release_notes: + +Release Notes +------------- +.. Future Release + ============== + * Enhancements + * Fixes + * Changes + * Documentation Changes + * Testing Changes + +.. Thanks to the following people for contributing to this release: + + +v0.4.1 Apr 27, 2020 +=================== + * Fix conflicting jupyter and nbconvert requirement + * Removed unused requirements (jupyter, nbconvert, nbsphinx) + * Pinned category_encoders to 2.0.0 + +v0.2.0 Aug 13, 2019 +=================== + * Supports Bayesian encoders: Target and LeaveOneOut + * Encoder class independent of primitives + +v0.1.0 Aug 12, 2019 +=================== + * Supports all 4 Classic encoders: OneHotEncoder, OrdinalEncoder, HashingEncoder, BinaryEncoder + +v0.0.0 Aug 7, 2019 +================== + * Initial Release diff --git a/test-requirements.txt b/test-requirements.txt index 4efa87f..f5f32fd 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,3 @@ -pytest==4.4.1 -pytest-xdist==1.26.1 -pytest-cov==2.6.1 +pytest>=5.2.0 +pytest-xdist>=1.26.1 +pytest-cov>=2.6.1