alteryx · chukarsten · Aug 10, 2022 · Jul 24, 2022 · Jul 25, 2022 · Jul 25, 2022
diff --git a/core-requirements.txt b/core-requirements.txt
@@ -11,7 +11,7 @@ requirements-parser>=0.2.0
 shap>=0.40.0
 statsmodels>=0.12.2
 texttable>=1.6.2
-woodwork>=0.16.2, < 0.17.0
+woodwork>=0.17.0
 dask>=2021.10.0
 nlp-primitives>=2.1.0,!=2.6.0
 featuretools>=1.7.0

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -1,6 +1,18 @@
 Release Notes
 -------------
 **Future Releases**
+    * Enhancements
+    * Fixes
+    * Changes
+    * Documentation Changes
+    * Testing Changes
+
+.. warning::
+
+    **Breaking Changes**
+
+
+**v0.55.0 July. 24, 2022**
     * Enhancements
         * Increased the amount of logical type information passed to Woodwork when calling ``ww.init()`` in transformers :pr:`3604`
         * Added ability to log how long each batch and pipeline take in ``automl.search()`` :pr:`3577`
@@ -13,16 +25,11 @@ Release Notes
         * Bump minimum scikit-optimize version to 0.9.0 `:pr:`3614`
     * Changes
         * Add pre-commit hooks for linting :pr:`3608`
-    * Documentation Changes
     * Testing Changes
         * Pinned GraphViz version for Windows CI Test :pr:`3596`
         * Removed ``pytest.mark.skip_if_39`` pytest marker :pr:`3602` :pr:`3607`
         * Refactored test cases that iterate over all components to use ``pytest.mark.parametrise`` and changed the corresponding ``if...continue`` blocks to ``pytest.mark.xfail`` :pr:`3622`
 
-.. warning::
-
-    **Breaking Changes**
-
 
 **v0.54.0 June. 23, 2022**
     * Fixes

diff --git a/evalml/__init__.py b/evalml/__init__.py
@@ -23,4 +23,4 @@
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-__version__ = "0.54.0"
+__version__ = "0.55.0"
diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -4016,7 +4016,7 @@ def test_automl_baseline_pipeline_predictions_and_scores_time_series(problem_typ
         expected_predictions = pd.Series(expected_predictions, name="target_delay_1")
 
     preds = baseline.predict(X_validation, None, X_train, y_train)
-    pd.testing.assert_series_equal(expected_predictions, preds)
+    pd.testing.assert_series_equal(expected_predictions, preds, check_dtype=False)
     if is_classification(problem_type):
         pd.testing.assert_frame_equal(
             expected_predictions_proba,

diff --git a/evalml/tests/component_tests/test_datetime_featurizer.py b/evalml/tests/component_tests/test_datetime_featurizer.py
@@ -288,10 +288,17 @@ def test_datetime_featurizer_with_inconsistent_date_format():
     answer = pd.DataFrame(
         {
             "numerical": [0] * len(dates),
-            "date col_year": [2021.0] * 18 + [np.nan] * 2,
-            "date col_month": [9.0] * 18 + [np.nan] * 2,
+            "date col_year": [2021] * 18 + [pd.NA] * 2,
+            "date col_month": [9] * 18 + [pd.NA] * 2,
             "date col_day_of_week": expected_dow,
-            "date col_hour": [0.0] * 18 + [np.nan] * 2,
+            "date col_hour": [0] * 18 + [pd.NA] * 2,
+        },
+    ).astype(
+        dtype={
+            "date col_year": "Int64",
+            "date col_month": "Int64",
+            "date col_day_of_week": "Int64",
+            "date col_hour": "Int64",
         },
     )
     pd.testing.assert_frame_equal(answer, expected)

diff --git a/evalml/tests/component_tests/test_drop_nan_rows_transformer.py b/evalml/tests/component_tests/test_drop_nan_rows_transformer.py
@@ -16,8 +16,7 @@ def test_drop_rows_transformer():
     X_expected = pd.DataFrame(
         {"a column": [3], "another col": [6]},
         index=[2],
-        dtype=np.float64,
-    )
+    ).astype("Int64")
     drop_rows_transformer = DropNaNRowsTransformer()
     drop_rows_transformer.fit(X)
     transformed_X, _ = drop_rows_transformer.transform(X)

diff --git a/evalml/tests/component_tests/test_drop_null_columns_transformer.py b/evalml/tests/component_tests/test_drop_null_columns_transformer.py
@@ -45,7 +45,7 @@ def test_drop_null_transformer_transform_default_pct_null_threshold():
     X = pd.DataFrame(
         {"lots_of_null": [None, None, None, None, 5], "no_null": [1, 2, 3, 4, 5]},
     )
-    X_expected = X.astype({"lots_of_null": "float64", "no_null": "int64"})
+    X_expected = X.astype({"lots_of_null": "Int64", "no_null": "int64"})
     drop_null_transformer.fit(X)
     X_t = drop_null_transformer.transform(X)
     assert_frame_equal(X_expected, X_t)
@@ -94,7 +94,12 @@ def test_drop_null_transformer_transform_boundary_pct_null_threshold():
     drop_null_transformer = DropNullColumns(pct_null_threshold=1.0)
     drop_null_transformer.fit(X)
     X_t = drop_null_transformer.transform(X)
-    assert_frame_equal(X_t, X.drop(["all_null"], axis=1))
+    assert_frame_equal(
+        X_t,
+        X.drop(columns=["all_null"]).astype(
+            {"some_null": "Int64", "lots_of_null": "Int64"},
+        ),
+    )
     # check that X is untouched
     assert X.equals(
         pd.DataFrame(
@@ -112,7 +117,7 @@ def test_drop_null_transformer_fit_transform():
     X = pd.DataFrame(
         {"lots_of_null": [None, None, None, None, 5], "no_null": [1, 2, 3, 4, 5]},
     )
-    X_expected = X.astype({"lots_of_null": "float64", "no_null": "int64"})
+    X_expected = X.astype({"lots_of_null": "Int64", "no_null": "int64"})
     X_t = drop_null_transformer.fit_transform(X)
     assert_frame_equal(X_expected, X_t)
 
@@ -152,6 +157,11 @@ def test_drop_null_transformer_fit_transform():
             "lots_of_null": [None, None, None, None, 5],
             "some_null": [None, 0, 3, 4, 5],
         },
+    ).astype(
+        {
+            "lots_of_null": "Int64",
+            "some_null": "Int64",
+        },
     )
     drop_null_transformer = DropNullColumns(pct_null_threshold=1.0)
     X_t = drop_null_transformer.fit_transform(X)

diff --git a/evalml/tests/component_tests/test_imputer.py b/evalml/tests/component_tests/test_imputer.py
@@ -8,9 +8,11 @@
 from pandas.testing import assert_frame_equal
 from woodwork.logical_types import (
     Boolean,
+    BooleanNullable,
     Categorical,
     Double,
     Integer,
+    IntegerNullable,
     NaturalLanguage,
 )
 
@@ -512,7 +514,7 @@ def test_imputer_all_bool_return_original(data_type, make_data_type):
 def test_imputer_bool_dtype_object(data_type, make_data_type):
     X = pd.DataFrame([True, np.nan, False, np.nan, True] * 4)
     y = pd.Series([1, 0, 0, 1, 0] * 4)
-    X_expected_arr = pd.DataFrame([True, True, False, True, True] * 4, dtype="category")
+    X_expected_arr = pd.DataFrame([True, True, False, True, True] * 4, dtype="boolean")
     X = make_data_type(data_type, X)
     y = make_data_type(data_type, y)
     imputer = Imputer()
@@ -537,7 +539,7 @@ def test_imputer_multitype_with_one_bool(data_type, make_data_type):
         {
             "bool with nan": pd.Series(
                 [True, False, False, False, False] * 4,
-                dtype="category",
+                dtype="boolean",
             ),
             "bool no nan": pd.Series(
                 [False, False, False, False, True] * 4,
@@ -563,7 +565,9 @@ def test_imputer_int_preserved():
         transformed,
         pd.DataFrame(pd.Series([1, 2, 11, 14 / 3])),
     )
-    assert {k: type(v) for k, v in transformed.ww.logical_types.items()} == {0: Double}
+    assert {k: type(v) for k, v in transformed.ww.logical_types.items()} == {
+        0: IntegerNullable,
+    }
 
     X = pd.DataFrame(pd.Series([1, 2, 3, np.nan]))
     imputer = Imputer(numeric_impute_strategy="mean")
@@ -573,7 +577,9 @@ def test_imputer_int_preserved():
         pd.DataFrame(pd.Series([1, 2, 3, 2])),
         check_dtype=False,
     )
-    assert {k: type(v) for k, v in transformed.ww.logical_types.items()} == {0: Double}
+    assert {k: type(v) for k, v in transformed.ww.logical_types.items()} == {
+        0: IntegerNullable,
+    }
 
     X = pd.DataFrame(pd.Series([1, 2, 3, 4], dtype="int"))
     imputer = Imputer(numeric_impute_strategy="mean")
@@ -595,9 +601,9 @@ def test_imputer_bool_preserved(test_case, null_type):
         ]
         X = pd.DataFrame(pd.Series([True, False, True, null_type] * 4))
         expected = pd.DataFrame(
-            pd.Series([True, False, True, True] * 4, dtype="category"),
+            pd.Series([True, False, True, True] * 4, dtype="boolean"),
         )
-        expected_ww_dtype = Categorical
+        expected_ww_dtype = BooleanNullable
         check_dtype = True
     elif test_case == "boolean_without_null":
         X = pd.DataFrame(pd.Series([True, False, True, False] * 4))

diff --git a/evalml/tests/component_tests/test_per_column_imputer.py b/evalml/tests/component_tests/test_per_column_imputer.py
@@ -219,15 +219,18 @@ def test_fit_transform_drop_all_nan_columns():
         "another_col": {"impute_strategy": "most_frequent"},
     }
     transformer = PerColumnImputer(impute_strategies=strategies)
-    X_expected_arr = pd.DataFrame({"some_nan": [0, 1, 0], "another_col": [0, 1, 2]})
+    X_expected_arr = pd.DataFrame(
+        {"some_nan": [0, 1, 0], "another_col": [0, 1, 2]},
+    ).astype({"some_nan": "Int64"})
     X_t = transformer.fit_transform(X)
     assert_frame_equal(X_expected_arr, X_t, check_dtype=False)
+    # Check that original dataframe remains unchanged
     assert_frame_equal(
         X,
         pd.DataFrame(
             {
                 "all_nan": [np.nan, np.nan, np.nan],
-                "some_nan": [0.0, 1.0, 0.0],
+                "some_nan": [0, 1, 0],
                 "another_col": [0, 1, 2],
             },
         ),
@@ -259,7 +262,7 @@ def test_transform_drop_all_nan_columns():
         pd.DataFrame(
             {
                 "all_nan": [np.nan, np.nan, np.nan],
-                "some_nan": [0.0, 1.0, 0.0],
+                "some_nan": [0, 1, 0],
                 "another_col": [0, 1, 2],
             },
         ),
@@ -347,8 +350,9 @@ def test_per_column_imputer_column_subset():
     )
     X_expected.ww.init(
         logical_types={
-            "all_nan_not_included": "double",
-            "column_with_nan_included": "double",
+            "all_nan_not_included": "Double",
+            "column_with_nan_not_included": "IntegerNullable",
+            "column_with_nan_included": "IntegerNullable",
         },
     )
     X.ww.init(
@@ -362,11 +366,10 @@ def test_per_column_imputer_column_subset():
             {
                 "all_nan_not_included": [np.nan, np.nan, np.nan],
                 "all_nan_included": [np.nan, np.nan, np.nan],
-                "column_with_nan_not_included": [np.nan, 1, 0],
-                # Because of https://github.com/alteryx/evalml/issues/2055
-                "column_with_nan_included": [0.0, 1.0, 0.0],
+                "column_with_nan_not_included": [pd.NA, 1, 0],
+                "column_with_nan_included": [0, 1, 0],
             },
-        ),
+        ).astype({"column_with_nan_not_included": "Int64"}),
     )
 
 

diff --git a/evalml/tests/component_tests/test_simple_imputer.py b/evalml/tests/component_tests/test_simple_imputer.py
@@ -531,6 +531,7 @@ def test_simple_imputer_ignores_natural_language(
 
     if has_nan == "has_nan":
         X_df.iloc[-1, :] = None
+        X_df.astype({"int col": "Int64"})
         X_df.ww.init()
     y = pd.Series([x for x in range(X_df.shape[1])])
 
@@ -551,10 +552,16 @@ def test_simple_imputer_ignores_natural_language(
         if numeric_impute_strategy == "mean" and has_nan == "has_nan":
             ans = X_df.mean()
             ans["natural language col"] = pd.NA
+            X_df = X_df.astype(
+                {"int col": float},
+            )  # Convert to float as the imputer will do this as we're requesting the mean
             X_df.iloc[-1, :] = ans
         elif numeric_impute_strategy == "median" and has_nan == "has_nan":
             ans = X_df.median()
             ans["natural language col"] = pd.NA
+            X_df = X_df.astype(
+                {"int col": float},
+            )  # Convert to float as the imputer will do this as we're requesting the mean
             X_df.iloc[-1, :] = ans
         elif numeric_impute_strategy == "constant" and has_nan == "has_nan":
             X_df.iloc[-1, 0:2] = fill_value

diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py
@@ -1982,7 +1982,7 @@ def imputer_test_data():
             ),
             "int col": [0, 1, 2, 0, 3] * 4,
             "object col": ["b", "b", "a", "c", "d"] * 4,
-            "float col": [0.0, 1.0, 0.0, -2.0, 5.0] * 4,
+            "float col": [0.1, 1.0, 0.0, -2.0, 5.0] * 4,
             "bool col": [True, False, False, True, True] * 4,
             "categorical with nan": pd.Series(
                 [np.nan, "1", "0", "0", "3"] * 4,

diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt
@@ -29,5 +29,5 @@ sktime==0.13.0
 statsmodels==0.13.2
 texttable==1.6.4
 vowpalwabbit==9.2.0
-woodwork==0.16.4
+woodwork==0.17.0
 xgboost==1.6.1
diff --git a/evalml/tests/dependency_update_check/minimum_requirements.txt b/evalml/tests/dependency_update_check/minimum_requirements.txt
@@ -29,5 +29,5 @@ sktime==0.7.0
 statsmodels==0.12.2
 texttable==1.6.2
 vowpalwabbit==8.11.0
-woodwork==0.16.2
+woodwork==0.17.0
 xgboost==1.5.1
diff --git a/evalml/tests/dependency_update_check/minimum_test_requirements.txt b/evalml/tests/dependency_update_check/minimum_test_requirements.txt
@@ -38,5 +38,5 @@ sktime==0.7.0
 statsmodels==0.12.2
 texttable==1.6.2
 vowpalwabbit==8.11.0
-woodwork==0.16.2
+woodwork==0.17.0
 xgboost==1.5.1
diff --git a/setup.cfg b/setup.cfg
@@ -53,7 +53,7 @@ install_requires =
     shap >= 0.40.0
     statsmodels >= 0.12.2
     texttable >= 1.6.2
-    woodwork >= 0.16.2, < 0.17.0
+    woodwork >= 0.17.0
     dask >= 2021.10.0
     nlp-primitives >= 2.1.0,!=2.6.0
     featuretools >= 1.7.0