diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ec5027840dfd5..bdcafb1f789fe 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -942,6 +942,7 @@ Other - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`) - Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`) +- Bug in :meth:`pandas.testing.assert_series_equal` incorrectly failing when comparing :class:`Categorical` series with null values and different category orders when ``check_category_order=False`` (:issue:`62008`) - Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index daa5187cdb636..f7d3caae83e75 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -495,8 +495,8 @@ def assert_categorical_equal( lc, rc = left.categories, right.categories assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact) assert_index_equal( - left.categories.take(left.codes), - right.categories.take(right.codes), + left.categories.take(left.codes, allow_fill=True, fill_value=np.nan), + right.categories.take(right.codes, allow_fill=True, fill_value=np.nan), obj=f"{obj}.values", exact=exact, ) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 8c9fff8e6ae2d..20276c37d30d2 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -507,3 +507,19 @@ def test_assert_series_equal_check_exact_index_default(left_idx, right_idx): ser2 = Series(np.zeros(6, dtype=int), right_idx) tm.assert_series_equal(ser1, ser2) tm.assert_frame_equal(ser1.to_frame(), ser2.to_frame()) + + +def test_assert_series_equal_categorical_nulls_different_order(): + # https://github.com/pandas-dev/pandas/issues/62008 + values = ["B", np.nan, "D"] + categorical_left = ["B", "D"] + categorical_right = categorical_left[::-1] # Different Order + + left = Series(Categorical(values, categories=categorical_left)) + right = Series(Categorical(values, categories=categorical_right)) + + tm.assert_series_equal(left, right, check_category_order=False) + + msg = "Categorical.categories are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(left, right, check_category_order=True)