From 9a9c20382291ce422ad89798f01001ae4c7b068d Mon Sep 17 00:00:00 2001 From: iabhi4 Date: Sun, 1 Jun 2025 15:06:19 -0700 Subject: [PATCH 1/2] BUG: Fix Series.str.zfill for ArrowDtype string arrays #61485 --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/arrays/arrow/array.py | 7 +++++++ pandas/core/arrays/string_.py | 3 +++ pandas/core/strings/accessor.py | 4 ++-- pandas/core/strings/object_array.py | 3 +++ pandas/tests/strings/test_string_array.py | 16 ++++++++++++++++ 6 files changed, 32 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 099e5bc48353a..1d11d0c72acac 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -725,8 +725,8 @@ Conversion Strings ^^^^^^^ +- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for ``ArrowDtype(pa.string())``. Now supported via ``_str_zfill`` implementation in ``ArrowExtensionArray`` (:issue:`61485`) - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`) -- Interval ^^^^^^^^ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0b90bcea35100..ac55a42ad4a6e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2601,6 +2601,13 @@ def _str_wrap(self, width: int, **kwargs) -> Self: result = self._apply_elementwise(predicate) return type(self)(pa.chunked_array(result)) + def _str_zfill(self, width: int) -> Self: + # TODO: Replace with pc.utf8_zfill when supported by arrow + # Arrow ENH - https://github.com/apache/arrow/issues/46683 + predicate = lambda val: val.zfill(width) + result = self._apply_elementwise(predicate) + return type(self)(pa.chunked_array(result)) + @property def _dt_days(self) -> Self: return type(self)( diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 8048306df91a2..0efa8550fc78d 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1077,6 +1077,9 @@ def _cmp_method(self, other, op): _arith_method = _cmp_method + def _str_zfill(self, width: int) -> Self: + return self._str_map(lambda x: x.zfill(width)) + class StringArrayNumpySemantics(StringArray): _storage = "python" diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 81f7441846589..636a8c997f2a5 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1938,8 +1938,8 @@ def zfill(self, width: int): if not is_integer(width): msg = f"width must be of integer type, not {type(width).__name__}" raise TypeError(msg) - f = lambda x: x.zfill(width) - result = self._data.array._str_map(f) + + result = self._data.array._str_zfill(width) return self._wrap_result(result) def slice(self, start=None, stop=None, step=None): diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 0adb7b51cf2b7..bbb73928bf6fb 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -537,3 +537,6 @@ def f(x): return empty_row return [f(val) for val in np.asarray(self)] + + def _str_zfill(self, width: int): + return self._str_map(lambda x: x.zfill(width)) diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index c5414022e664b..11fc3034cf290 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -110,3 +110,19 @@ def test_string_array_extract(nullable_string_dtype): result = result.astype(object) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "values, width, expected", + [ + (["a", "ab", "abc", None], 4, ["000a", "00ab", "0abc", None]), + (["1", "-1", "+1", None], 4, ["0001", "-001", "+001", None]), + (["1234", "-1234"], 3, ["1234", "-1234"]), + ], +) +def test_string_array_zfill(nullable_string_dtype, values, width, expected): + # GH #61485 + s = Series(values, dtype=nullable_string_dtype) + result = s.str.zfill(width) + expected = Series(expected, dtype=nullable_string_dtype) + tm.assert_series_equal(result, expected) From 4ebf1eb6a7a6fba010711dba3603f7c6f2564202 Mon Sep 17 00:00:00 2001 From: Abhinav <61010675+iabhi4@users.noreply.github.com> Date: Mon, 9 Jun 2025 10:59:18 -0700 Subject: [PATCH 2/2] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1d11d0c72acac..6104baeb5b5f3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -725,7 +725,7 @@ Conversion Strings ^^^^^^^ -- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for ``ArrowDtype(pa.string())``. Now supported via ``_str_zfill`` implementation in ``ArrowExtensionArray`` (:issue:`61485`) +- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`) - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`) Interval