Skip to content

Commit

Permalink
Closes Bears-R-Us#3917: copy function to match numpy
Browse files Browse the repository at this point in the history
  • Loading branch information
ajpotts committed Dec 3, 2024
1 parent 15bd176 commit fbaec58
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 5 deletions.
13 changes: 13 additions & 0 deletions arkouda/pdarrayclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,19 @@ def max_bits(self, max_bits):
generic_msg(cmd="set_max_bits", args={"array": self, "max_bits": max_bits})
self._max_bits = max_bits

def copy(self) -> pdarray:
"""
Return an array copy of the given object.
Returns
-------
pdarray
A deep copy of the pdarray.
"""
from arkouda.pdarraycreation import array

return array(self, copy=True)

def equals(self, other) -> bool_scalars:
"""
Whether pdarrays are the same size and all entries are equal.
Expand Down
16 changes: 15 additions & 1 deletion arkouda/pdarraycreation.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def from_series(series: pd.Series, dtype: Optional[Union[type, str]] = None) ->
def array(
a: Union[pdarray, np.ndarray, Iterable],
dtype: Union[np.dtype, type, str, None] = None,
copy: bool = True,
max_bits: int = -1,
) -> Union[pdarray, Strings]:
"""
Expand All @@ -153,6 +154,11 @@ def array(
Rank-1 array of a supported dtype
dtype: np.dtype, type, or str
The target dtype to cast values to
copybool, optional
copy: bool=True, optional
If True (default), then the array data is copied.
Note that any copy of the data is deep, which differs from numpy.
For False it raises a ValueError if a copy cannot be avoided. Default: True.
max_bits: int
Specifies the maximum number of bits; only used for bigint pdarrays
Expand Down Expand Up @@ -206,9 +212,17 @@ def array(
"""
from arkouda.numpy import cast as akcast

if copy is False:
if isinstance(a, (Strings, pdarray)):
return a
else:
raise ValueError(
"In ak.array, copy=False can only used with applied to Strings and pdarray objects."
)

# If a is already a pdarray, do nothing
if isinstance(a, pdarray):
casted = a if dtype is None else akcast(a, dtype)
casted = a[:] if dtype is None else akcast(a, dtype)
if dtype == bigint and max_bits != -1:
casted.max_bits = max_bits
return casted
Expand Down
37 changes: 33 additions & 4 deletions arkouda/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import builtins
import json
from typing import TYPE_CHECKING, Sequence, Tuple, Union, cast
from typing import TYPE_CHECKING, Iterable, Sequence, Tuple, TypeVar, Union, cast
from warnings import warn

import numpy as np
from typeguard import typechecked

from arkouda.categorical import Categorical
Expand All @@ -24,12 +25,16 @@
from arkouda.pdarraysetops import unique
from arkouda.segarray import SegArray
from arkouda.sorting import coargsort
from arkouda.strings import Strings
from arkouda.timeclass import Datetime, Timedelta

if TYPE_CHECKING:
from arkouda.index import Index
from arkouda.series import Series
from arkouda.strings import Strings
else:
Strings = TypeVar("Strings")
Series = TypeVar("Series")
Categorical = TypeVar("Categorical")


def identity(x):
Expand Down Expand Up @@ -529,9 +534,10 @@ def is_int(arry: Union[pdarray, Strings, Categorical, "Series", "Index"]): # no
return False


@typechecked
def map(
values: Union[pdarray, Strings, Categorical], mapping: Union[dict, "Series"]
) -> Union[pdarray, Strings]:
values: Union[pdarray, "Strings", "Categorical"], mapping: Union[dict, "Series"]
) -> Union[pdarray, "Strings"]:
"""
Map values of an array according to an input mapping.
Expand Down Expand Up @@ -573,7 +579,9 @@ def map(
import numpy as np

from arkouda import Series, array, broadcast, full
from arkouda.categorical import Categorical
from arkouda.pdarraysetops import in1d
from arkouda.strings import Strings

keys = values
gb = GroupBy(keys, dropna=False)
Expand Down Expand Up @@ -623,3 +631,24 @@ def _infer_shape_from_size(size):
shape = full_size
ndim = 1
return shape, ndim, full_size


@typechecked
def copy(a: Union[pdarray, np.ndarray, Iterable, "Strings"]) -> Union[pdarray, "Strings"]:
"""
Return an array copy of the given object.
Returns
-------
pdarray
Array interpretation of a.
"""
from arkouda.strings import Strings

if isinstance(a, Strings):
cpy = a[:]
return cpy

from arkouda.pdarraycreation import array

return array(a, copy=True)
14 changes: 14 additions & 0 deletions tests/pdarray_creation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,20 @@ def test_array_creation_misc(self):
with pytest.raises(TypeError):
ak.array(list(list(0)))

@pytest.mark.parametrize("dtype", [ak.int64, ak.float64, ak.bool_, ak.bigint])
def test_array_copy(self, dtype):
from arkouda.testing import assert_equal as ak_assert_equal

a = ak.arange(100, dtype=dtype)

b = ak.array(a, copy=True)
assert not a is b
ak_assert_equal(a, b)

c = ak.array(a, copy=False)
assert a is c
ak_assert_equal(a, c)

@pytest.mark.skip_if_max_rank_less_than(2)
def test_array_creation_transpose_bug_reproducer(self):

Expand Down
11 changes: 11 additions & 0 deletions tests/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import arkouda as ak
from arkouda.util import is_float, is_int, is_numeric, map
import pytest


class TestUtil:
Expand Down Expand Up @@ -119,3 +120,13 @@ def test_map(self):

result = map(d, {"1": 7.0})
assert np.allclose(result.to_list(), [7.0, 7.0, np.nan, np.nan, np.nan], equal_nan=True)

@pytest.mark.parametrize("dtype", [ak.int64, ak.float64, ak.bool_, ak.bigint, ak.str_])
def test_copy(self, dtype):
a = ak.arange(10, dtype=dtype)
b = ak.util.copy(a)

from arkouda import assert_equal as ak_assert_equal

assert not a is b
ak_assert_equal(a, b)

0 comments on commit fbaec58

Please sign in to comment.