Skip to content

Commit

Permalink
GH-37574: [Python] Compatibilty with numpy 2.0 (#38040)
Browse files Browse the repository at this point in the history
### What changes are included in this PR?

support for numpy 2
install numpy 2 and pandas 2.2.0.dev0 from scientific-python-nightly-wheels

### Are these changes tested?
I tested this locally with numpy==2.0.0.dev0 and pandas==2.2.0.dev0+325.g6c58a217f5

### Are there any user-facing changes?
No

* Closes: #37574

Authored-by: Thomas Grainger <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
graingert authored Oct 5, 2023
1 parent 1b262a2 commit 1eec38d
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 13 deletions.
4 changes: 2 additions & 2 deletions ci/scripts/install_pandas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pandas=$1
numpy=${2:-"latest"}

if [ "${numpy}" = "nightly" ]; then
pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy
pip install --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre numpy
elif [ "${numpy}" = "latest" ]; then
pip install numpy
else
Expand All @@ -38,7 +38,7 @@ fi
if [ "${pandas}" = "upstream_devel" ]; then
pip install git+https://github.com/pandas-dev/pandas.git
elif [ "${pandas}" = "nightly" ]; then
pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas
pip install --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre pandas
elif [ "${pandas}" = "latest" ]; then
pip install pandas
else
Expand Down
7 changes: 4 additions & 3 deletions python/pyarrow/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import warnings

import numpy as np
from numpy.core.numerictypes import sctypes as _np_sctypes

import pyarrow as pa
from pyarrow.lib import _pandas_api, frombytes # noqa
Expand Down Expand Up @@ -98,7 +99,7 @@ def get_logical_type(arrow_type):
np.float32: 'float32',
np.float64: 'float64',
'datetime64[D]': 'date',
np.unicode_: 'string',
np.str_: 'string',
np.bytes_: 'bytes',
}

Expand Down Expand Up @@ -780,7 +781,7 @@ def table_to_blockmanager(options, table, categories=None,
# dataframe (complex not included since not supported by Arrow)
_pandas_supported_numpy_types = {
str(np.dtype(typ))
for typ in (np.sctypes['int'] + np.sctypes['uint'] + np.sctypes['float'] +
for typ in (_np_sctypes['int'] + _np_sctypes['uint'] + _np_sctypes['float'] +
['object', 'bool'])
}

Expand Down Expand Up @@ -1010,7 +1011,7 @@ def _is_generated_index_name(name):
'date': 'datetime64[D]',
'datetime': 'datetime64[ns]',
'datetimetz': 'datetime64[ns]',
'unicode': np.unicode_,
'unicode': np.str_,
'bytes': np.bytes_,
'string': np.str_,
'integer': np.int64,
Expand Down
22 changes: 15 additions & 7 deletions python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@
pass


try:
_np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
except AttributeError:
from numpy.exceptions import (
VisibleDeprecationWarning as _np_VisibleDeprecationWarning
)


# Marks all of the tests in this module
pytestmark = pytest.mark.pandas

Expand Down Expand Up @@ -706,7 +714,7 @@ def test_float_nulls(self):

def test_float_nulls_to_ints(self):
# ARROW-2135
df = pd.DataFrame({"a": [1.0, 2.0, np.NaN]})
df = pd.DataFrame({"a": [1.0, 2.0, np.nan]})
schema = pa.schema([pa.field("a", pa.int16(), nullable=True)])
table = pa.Table.from_pandas(df, schema=schema, safe=False)
assert table[0].to_pylist() == [1, 2, None]
Expand Down Expand Up @@ -2329,7 +2337,7 @@ def test_nested_large_list(self):
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
"Creating an ndarray from ragged nested",
np.VisibleDeprecationWarning)
_np_VisibleDeprecationWarning)
warnings.filterwarnings("ignore", "elementwise comparison failed",
DeprecationWarning)
tm.assert_series_equal(
Expand Down Expand Up @@ -2441,26 +2449,26 @@ def test_list_no_duplicate_base(self):
np_arr = chunked_arr.to_numpy()

expected = np.array([[1., 2.], [3., 4., 5.], None,
[6., np.NaN]], dtype="object")
[6., np.nan]], dtype="object")
for left, right in zip(np_arr, expected):
if right is None:
assert left == right
else:
npt.assert_array_equal(left, right)

expected_base = np.array([[1., 2., 3., 4., 5., 6., np.NaN]])
expected_base = np.array([[1., 2., 3., 4., 5., 6., np.nan]])
npt.assert_array_equal(np_arr[0].base, expected_base)

np_arr_sliced = chunked_arr.slice(1, 3).to_numpy()

expected = np.array([[3, 4, 5], None, [6, np.NaN]], dtype="object")
expected = np.array([[3, 4, 5], None, [6, np.nan]], dtype="object")
for left, right in zip(np_arr_sliced, expected):
if right is None:
assert left == right
else:
npt.assert_array_equal(left, right)

expected_base = np.array([[3., 4., 5., 6., np.NaN]])
expected_base = np.array([[3., 4., 5., 6., np.nan]])
npt.assert_array_equal(np_arr_sliced[0].base, expected_base)

def test_list_values_behind_null(self):
Expand All @@ -2471,7 +2479,7 @@ def test_list_values_behind_null(self):
)
np_arr = arr.to_numpy(zero_copy_only=False)

expected = np.array([[1., 2.], None, [3., np.NaN]], dtype="object")
expected = np.array([[1., 2.], None, [3., np.nan]], dtype="object")
for left, right in zip(np_arr, expected):
if right is None:
assert left == right
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/tests/test_scalars.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ def test_numerics():
# float16
s = pa.scalar(np.float16(0.5), type='float16')
assert isinstance(s, pa.HalfFloatScalar)
assert repr(s) == "<pyarrow.HalfFloatScalar: 0.5>"
# on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
# on numpy1 repr(np.float16(0.5)) == "0.5"
assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
assert str(s) == "0.5"
assert s.as_py() == 0.5

Expand Down

0 comments on commit 1eec38d

Please sign in to comment.