diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst index 50550862e7436..2d2553e7b32be 100644 --- a/doc/source/whatsnew/v2.3.2.rst +++ b/doc/source/whatsnew/v2.3.2.rst @@ -26,8 +26,8 @@ Bug fixes "string" type in the JSON Table Schema for :class:`StringDtype` columns (:issue:`61889`) - Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`) -- Fixed ``~Series.str.match`` and ``~Series.str.fullmatch`` with compiled regex - for the Arrow-backed string dtype (:issue:`61964`) +- Fixed ``~Series.str.match``, ``~Series.str.fullmatch`` and ``~Series.str.contains`` + with compiled regex for the Arrow-backed string dtype (:issue:`61964`, :issue:`61942`) .. --------------------------------------------------------------------------- .. _whatsnew_232.contributors: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index af446eb0da04a..39e84a3abdcf5 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -346,6 +346,8 @@ def _str_contains( ): if flags: return super()._str_contains(pat, case, flags, na, regex) + if isinstance(pat, re.Pattern): + pat = pat.pattern return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 598626318e66b..cce96f38d216a 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -281,6 +281,19 @@ def test_contains_nan(any_string_dtype): tm.assert_series_equal(result, expected) +def test_contains_compiled_regex(any_string_dtype): + # GH#61942 + ser = Series(["foo", "bar", "baz"], dtype=any_string_dtype) + pat = re.compile("ba.") + result = ser.str.contains(pat) + + expected_dtype = ( + np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean" + ) + expected = Series([False, True, True], dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + # -------------------------------------------------------------------------------------- # str.startswith # --------------------------------------------------------------------------------------