diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py index 7086335903..1501f51563 100644 --- a/arkouda/pdarrayclass.py +++ b/arkouda/pdarrayclass.py @@ -450,6 +450,19 @@ def max_bits(self, max_bits): generic_msg(cmd="set_max_bits", args={"array": self, "max_bits": max_bits}) self._max_bits = max_bits + def copy(self) -> pdarray: + """ + Return an array copy of the given object. + + Returns + ------- + pdarray + A deep copy of the pdarray. + """ + from arkouda.pdarraycreation import array + + return array(self, copy=True) + def equals(self, other) -> bool_scalars: """ Whether pdarrays are the same size and all entries are equal. diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py index 5031094ed0..b19bb534bb 100644 --- a/arkouda/pdarraycreation.py +++ b/arkouda/pdarraycreation.py @@ -141,6 +141,7 @@ def from_series(series: pd.Series, dtype: Optional[Union[type, str]] = None) -> def array( a: Union[pdarray, np.ndarray, Iterable], dtype: Union[np.dtype, type, str, None] = None, + copy: bool = True, max_bits: int = -1, ) -> Union[pdarray, Strings]: """ @@ -153,6 +154,11 @@ def array( Rank-1 array of a supported dtype dtype: np.dtype, type, or str The target dtype to cast values to + copybool, optional + copy: bool=True, optional + If True (default), then the array data is copied. + Note that any copy of the data is deep, which differs from numpy. + For False it raises a ValueError if a copy cannot be avoided. Default: True. max_bits: int Specifies the maximum number of bits; only used for bigint pdarrays @@ -206,9 +212,17 @@ def array( """ from arkouda.numpy import cast as akcast + if copy is False: + if isinstance(a, (Strings, pdarray)): + return a + else: + raise ValueError( + "In ak.array, copy=False can only used with applied to Strings and pdarray objects." + ) + # If a is already a pdarray, do nothing if isinstance(a, pdarray): - casted = a if dtype is None else akcast(a, dtype) + casted = a[:] if dtype is None else akcast(a, dtype) if dtype == bigint and max_bits != -1: casted.max_bits = max_bits return casted diff --git a/arkouda/util.py b/arkouda/util.py index e84d7dfa5f..d84d25a4b2 100644 --- a/arkouda/util.py +++ b/arkouda/util.py @@ -2,9 +2,10 @@ import builtins import json -from typing import TYPE_CHECKING, Sequence, Tuple, Union, cast +from typing import TYPE_CHECKING, Iterable, Sequence, Tuple, TypeVar, Union, cast from warnings import warn +import numpy as np from typeguard import typechecked from arkouda.categorical import Categorical @@ -24,12 +25,16 @@ from arkouda.pdarraysetops import unique from arkouda.segarray import SegArray from arkouda.sorting import coargsort -from arkouda.strings import Strings from arkouda.timeclass import Datetime, Timedelta if TYPE_CHECKING: from arkouda.index import Index from arkouda.series import Series + from arkouda.strings import Strings +else: + Strings = TypeVar("Strings") + Series = TypeVar("Series") + Categorical = TypeVar("Categorical") def identity(x): @@ -529,9 +534,10 @@ def is_int(arry: Union[pdarray, Strings, Categorical, "Series", "Index"]): # no return False +@typechecked def map( - values: Union[pdarray, Strings, Categorical], mapping: Union[dict, "Series"] -) -> Union[pdarray, Strings]: + values: Union[pdarray, "Strings", "Categorical"], mapping: Union[dict, "Series"] +) -> Union[pdarray, "Strings"]: """ Map values of an array according to an input mapping. @@ -573,7 +579,9 @@ def map( import numpy as np from arkouda import Series, array, broadcast, full + from arkouda.categorical import Categorical from arkouda.pdarraysetops import in1d + from arkouda.strings import Strings keys = values gb = GroupBy(keys, dropna=False) @@ -623,3 +631,24 @@ def _infer_shape_from_size(size): shape = full_size ndim = 1 return shape, ndim, full_size + + +@typechecked +def copy(a: Union[pdarray, np.ndarray, Iterable, "Strings"]) -> Union[pdarray, "Strings"]: + """ + Return an array copy of the given object. + + Returns + ------- + pdarray + Array interpretation of a. + """ + from arkouda.strings import Strings + + if isinstance(a, Strings): + cpy = a[:] + return cpy + + from arkouda.pdarraycreation import array + + return array(a, copy=True) diff --git a/tests/pdarray_creation_test.py b/tests/pdarray_creation_test.py index f3878eb069..e32750b4cc 100644 --- a/tests/pdarray_creation_test.py +++ b/tests/pdarray_creation_test.py @@ -105,6 +105,20 @@ def test_array_creation_misc(self): with pytest.raises(TypeError): ak.array(list(list(0))) + @pytest.mark.parametrize("dtype", [ak.int64, ak.float64, ak.bool_, ak.bigint]) + def test_array_copy(self, dtype): + from arkouda.testing import assert_equal as ak_assert_equal + + a = ak.arange(100, dtype=dtype) + + b = ak.array(a, copy=True) + assert not a is b + ak_assert_equal(a, b) + + c = ak.array(a, copy=False) + assert a is c + ak_assert_equal(a, c) + @pytest.mark.skip_if_max_rank_less_than(2) def test_array_creation_transpose_bug_reproducer(self): diff --git a/tests/util_test.py b/tests/util_test.py index 5482bb7a67..a255be921a 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -2,6 +2,7 @@ import arkouda as ak from arkouda.util import is_float, is_int, is_numeric, map +import pytest class TestUtil: @@ -119,3 +120,13 @@ def test_map(self): result = map(d, {"1": 7.0}) assert np.allclose(result.to_list(), [7.0, 7.0, np.nan, np.nan, np.nan], equal_nan=True) + + @pytest.mark.parametrize("dtype", [ak.int64, ak.float64, ak.bool_, ak.bigint, ak.str_]) + def test_copy(self, dtype): + a = ak.arange(10, dtype=dtype) + b = ak.util.copy(a) + + from arkouda import assert_equal as ak_assert_equal + + assert not a is b + ak_assert_equal(a, b)