diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 473d67acf6e74..7ccafbb4cf1df 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -34,7 +34,7 @@ Other enhancements - The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been - updated to work correctly with NumPy >= 2 (:issue:`57739`) + updated to raise FutureWarning with NumPy >= 2 (:issue:`60340`) - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`) - diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0c1e1d0c63c85..16dfa7e051d0d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -12,6 +12,7 @@ cast, ) import unicodedata +import warnings import numpy as np @@ -28,6 +29,7 @@ pa_version_under13p0, ) from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.cast import ( @@ -663,9 +665,15 @@ def __array__( ) -> np.ndarray: """Correctly construct numpy arrays when passed to `np.asarray()`.""" if copy is False: - # TODO: By using `zero_copy_only` it may be possible to implement this - raise ValueError( - "Unable to avoid copy while creating an array as requested." + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) elif copy is None: # `to_numpy(copy=False)` has the meaning of NumPy `copy=None`. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3383f35bb7d55..0fe69f6d1ebc2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1672,8 +1672,15 @@ def __array__( array(['a', 'b'], dtype=object) """ if copy is False: - raise ValueError( - "Unable to avoid copy while creating an array as requested." + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) ret = take_nd(self.categories._values, self._codes) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 990116bad13d1..cfe1f3acd9143 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -359,9 +359,17 @@ def __array__( # used for Timedelta/DatetimeArray, overwritten by PeriodArray if is_object_dtype(dtype): if copy is False: - raise ValueError( - "Unable to avoid copy while creating an array as requested." + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow this " + "behavior starting with pandas 3.0.\nThis conversion to NumPy " + "requires a copy, but 'copy=False' was passed. Consider using " + "'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) + return np.array(list(self), dtype=object) if copy is True: diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 5aac3d3b28db5..da57e4ceed87e 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -42,6 +42,7 @@ from pandas.compat.numpy import function as nv from pandas.errors import IntCastingNaNError from pandas.util._decorators import Appender +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( LossySetitemError, @@ -1575,8 +1576,15 @@ def __array__( objects (with dtype='object') """ if copy is False: - raise ValueError( - "Unable to avoid copy while creating an array as requested." + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) left = self._left diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 0e839dc7a80bb..da656a2768901 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -38,6 +38,7 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.base import ExtensionDtype @@ -604,8 +605,16 @@ def __array__( if not self._hasna: # special case, here we can simply return the underlying data return np.array(self._data, dtype=dtype, copy=copy) - raise ValueError( - "Unable to avoid copy while creating an array as requested." + + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) if copy is None: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index aad7737b8dd94..2947ba7b8c72a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -415,8 +415,15 @@ def __array__( return np.array(self.asi8, dtype=dtype) if copy is False: - raise ValueError( - "Unable to avoid copy while creating an array as requested." + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) if dtype == bool: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 13577e366d54b..07ff592f491a8 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -562,8 +562,15 @@ def __array__( return self.sp_values if copy is False: - raise ValueError( - "Unable to avoid copy while creating an array as requested." + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) fill_value = self.fill_value diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e55a54112ee72..70b72577dd5d1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2151,9 +2151,16 @@ def __array__( ) -> np.ndarray: if copy is False and not self._mgr.is_single_block and not self.empty: # check this manually, otherwise ._values will already return a copy - # and np.array(values, copy=False) will not raise an error - raise ValueError( - "Unable to avoid copy while creating an array as requested." + # and np.array(values, copy=False) will not raise a warning + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) values = self._values if copy is None: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7cb28214c7289..8954d49649a2b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1314,8 +1314,15 @@ def __array__(self, dtype=None, copy=None) -> np.ndarray: """the array interface, return my values""" if copy is False: # self.values is always a newly construct array, so raise. - raise ValueError( - "Unable to avoid copy while creating an array as requested." + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) if copy is True: # explicit np.array call to ensure a copy is made and unique objects diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 1e8d36b184e48..b2a570b14df3c 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -500,8 +500,8 @@ def test_array_interface(arr_data, arr): # copy=False semantics are only supported in NumPy>=2. return - # for sparse arrays, copy=False is never allowed - with pytest.raises(ValueError, match="Unable to avoid copy while creating"): + msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" + with tm.assert_produces_warning(FutureWarning, match=msg): np.array(arr, copy=False) # except when there are actually no sparse filled values diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index a65ab4d287d11..4d0e2d1ce0e07 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -378,8 +378,8 @@ def test_to_numpy(arr, expected, zero_copy, index_or_series_or_array): return if not zero_copy: - with pytest.raises(ValueError, match="Unable to avoid copy while creating"): - # An error is always acceptable for `copy=False` + msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" + with tm.assert_produces_warning(FutureWarning, match=msg): np.array(thing, copy=False) else: diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 06d9424450011..0dabec6014b0d 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -187,7 +187,8 @@ def test_dataframe_multiple_numpy_dtypes(): if np_version_gt2: # copy=False semantics are only supported in NumPy>=2. - with pytest.raises(ValueError, match="Unable to avoid copy while creating"): + msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" + with pytest.raises(FutureWarning, match=msg): arr = np.array(df, copy=False) arr = np.array(df, copy=True) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 79eb64b5a654f..38cece7da3308 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pytest @@ -82,15 +84,27 @@ def test_array_interface_copy(self, data): # copy=False semantics are only supported in NumPy>=2. return - try: + warning_raised = False + msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") result_nocopy1 = np.array(data, copy=False) - except ValueError: - # An error is always acceptable for `copy=False` - return - - result_nocopy2 = np.array(data, copy=False) - # If copy=False was given and did not raise, these must share the same data - assert np.may_share_memory(result_nocopy1, result_nocopy2) + assert len(w) <= 1 + if len(w): + warning_raised = True + assert msg in str(w[0].message) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result_nocopy2 = np.array(data, copy=False) + assert len(w) <= 1 + if len(w): + warning_raised = True + assert msg in str(w[0].message) + + if not warning_raised: + # If copy=False was given and did not raise, these must share the same data + assert np.may_share_memory(result_nocopy1, result_nocopy2) def test_is_extension_array_dtype(self, data): assert is_extension_array_dtype(data) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 8afb989508e04..8590cd7fdc235 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas.compat.numpy import np_version_gt2 + import pandas as pd import pandas._testing as tm from pandas.tests.extension import base @@ -289,6 +291,24 @@ def test_series_repr(self, data): def test_unary_ufunc_dunder_equivalence(self, data, ufunc): super().test_unary_ufunc_dunder_equivalence(data, ufunc) + def test_array_interface_copy(self, data): + result_copy1 = np.array(data, copy=True) + result_copy2 = np.array(data, copy=True) + assert not np.may_share_memory(result_copy1, result_copy2) + if not np_version_gt2: + # copy=False semantics are only supported in NumPy>=2. + return + + try: + result_nocopy1 = np.array(data, copy=False) + except ValueError: + # An error is always acceptable for `copy=False` + return + + result_nocopy2 = np.array(data, copy=False) + # If copy=False was given and did not raise, these must share the same data + assert np.may_share_memory(result_nocopy1, result_nocopy2) + def test_take_na_value_other_decimal(): arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index b6d72c10712f2..5ff99589a1961 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -25,9 +25,12 @@ TYPE_CHECKING, Any, ) +import warnings import numpy as np +from pandas.util._exceptions import find_stack_level + from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( is_bool_dtype, @@ -148,8 +151,15 @@ def __ne__(self, other): def __array__(self, dtype=None, copy=None): if copy is False: - raise ValueError( - "Unable to avoid copy while creating an array as requested." + warnings.warn( + "Starting with NumPy 2.0, the behavior of the 'copy' keyword has " + "changed and passing 'copy=False' raises an error when returning " + "a zero-copy NumPy array is not possible. pandas will follow " + "this behavior starting with pandas 3.0.\nThis conversion to " + "NumPy requires a copy, but 'copy=False' was passed. Consider " + "using 'np.asarray(..)' instead.", + FutureWarning, + stacklevel=find_stack_level(), ) if dtype is None: diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 58a2dc00f937d..d62bd5438a1e3 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -47,7 +47,8 @@ def test_array_interface(idx): return # for MultiIndex, copy=False is never allowed - with pytest.raises(ValueError, match="Unable to avoid copy while creating"): + msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" + with tm.assert_produces_warning(FutureWarning, match=msg): np.array(idx, copy=False) diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index cf59e3e4c4934..e0d652facb8fc 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -5,8 +5,6 @@ from pandas._config import using_string_dtype -from pandas.compat import HAS_PYARROW - from pandas import ( DataFrame, date_range, @@ -170,9 +168,6 @@ def test_excel_options(fsspectest): assert fsspectest.test[0] == "read" -@pytest.mark.xfail( - using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string) fastparquet" -) def test_to_parquet_new_file(cleared_fs, df1): """Regression test for writing to a not-yet-existent GCS Parquet file.""" pytest.importorskip("fastparquet")