diff --git a/src/finch/tensor.py b/src/finch/tensor.py index b37be04..4124f36 100644 --- a/src/finch/tensor.py +++ b/src/finch/tensor.py @@ -1,6 +1,5 @@ import builtins from typing import Any, Callable, Optional, Iterable, Literal -import warnings import numpy as np from numpy.core.numeric import normalize_axis_index, normalize_axis_tuple @@ -52,6 +51,9 @@ class Tensor(_Display, SparseArray): order. Default: row-major. fill_value : np.number, optional Only used when `arr : np.ndarray` is passed. + copy : bool, optional + If ``True``, then the object is copied. If ``None`` then the object is copied only if needed. + For ``False`` it raises a ``ValueError`` if a copy cannot be avoided. Default: ``None``. Returns ------- @@ -85,24 +87,30 @@ def __init__( /, *, fill_value: np.number = 0.0, + copy: bool | None = None, ): if isinstance(obj, (int, float, complex, bool)): - obj = np.array(obj) - + obj = np.asarray(obj) if _is_scipy_sparse_obj(obj): # scipy constructor - jl_data = self._from_scipy_sparse(obj) + jl_data = self._from_scipy_sparse(obj, copy=copy) self._obj = jl_data elif isinstance(obj, np.ndarray): # numpy constructor - jl_data = self._from_numpy(obj, fill_value=fill_value) + jl_data = self._from_numpy(obj, fill_value=fill_value, copy=copy) self._obj = jl_data elif isinstance(obj, Storage): # from-storage constructor + if copy: + self._raise_julia_copy_not_supported() order = self.preprocess_order( obj.order, self.get_lvl_ndim(obj.levels_descr._obj) ) self._obj = jl.swizzle(jl.Tensor(obj.levels_descr._obj), *order) elif jl.isa(obj, jl.Finch.Tensor): # raw-Julia-object constructors + if copy: + self._raise_julia_copy_not_supported() self._obj = jl.swizzle(obj, *tuple(range(1, jl.ndims(obj) + 1))) elif jl.isa(obj, jl.Finch.SwizzleArray) or jl.isa(obj, jl.Finch.LazyTensor): + if copy: + self._raise_julia_copy_not_supported() self._obj = obj else: raise ValueError( @@ -339,7 +347,9 @@ def _from_other_tensor(cls, tensor: "Tensor", storage: Storage | None) -> JuliaO ) @classmethod - def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: + def _from_numpy(cls, arr: np.ndarray, fill_value: np.number, copy: bool | None = None) -> JuliaObj: + if copy: + arr = arr.copy() order_char = "F" if np.isfortran(arr) else "C" order = cls.preprocess_order(order_char, arr.ndim) inv_order = tuple(i - 1 for i in jl.invperm(order)) @@ -355,21 +365,20 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: return jl.swizzle(jl.Tensor(lvl._obj), *order) @classmethod - def from_scipy_sparse(cls, x) -> "Tensor": + def from_scipy_sparse(cls, x, copy: bool | None = None) -> "Tensor": if not _is_scipy_sparse_obj(x): raise ValueError("{x} is not a SciPy sparse object.") - return Tensor(x) + return Tensor(x, copy=copy) @classmethod - def _from_scipy_sparse(cls, x) -> JuliaObj: + def _from_scipy_sparse(cls, x, *, copy: bool | None = None) -> JuliaObj: + if copy is False and not (x.has_canonical_format and x.format in ("coo", "csr", "csc")): + raise ValueError("Unable to avoid copy while creating an array as requested.") + if copy or not x.has_canonical_format: + x = x.copy() if x.format not in ("coo", "csr", "csc"): x = x.asformat("coo") if not x.has_canonical_format: - warnings.warn( - "SciPy sparse input must be in a canonical format. " - "Calling `sum_duplicates`." - ) - x = x.copy() x.sum_duplicates() assert x.has_canonical_format @@ -531,6 +540,10 @@ def to_scipy_sparse(self): else: raise ValueError("Tensor can't be converted to scipy.sparse object.") + @staticmethod + def _raise_julia_copy_not_supported() -> None: + raise ValueError("copy=True isn't supported for Julia object inputs") + def __array_namespace__(self, *, api_version: str | None = None) -> Any: if api_version is None: api_version = "2023.12" @@ -555,7 +568,14 @@ def random(shape, density=0.01, random_state=None): return Tensor(jl.fsprand(*args)) -def asarray(obj, /, *, dtype=None, format=None): +def asarray( + obj, + /, + *, + dtype: DType | None = None, + format: str | None = None, + copy: bool | None = None, +) -> Tensor: if format not in {"coo", "csr", "csc", "csf", "dense", None}: raise ValueError(f"{format} format not supported.") tensor = obj if isinstance(obj, Tensor) else Tensor(obj) @@ -647,12 +667,12 @@ def permute_dims(x: Tensor, axes: tuple[int, ...]): return x.permute_dims(axes) -def astype(x: Tensor, dtype: DType, /, *, copy: bool = True): - if not copy: - if x.dtype == dtype: - return x - else: - raise ValueError("Unable to avoid a copy while casting in no-copy mode.") +def astype(x: Tensor, dtype: DType, /, *, copy: bool | None = None): + if copy is False and x.dtype != dtype: + raise ValueError("Unable to avoid a copy while casting in no-copy mode.") + + if not copy and x.dtype == dtype: + return x else: finch_tns = x._obj.body result = jl.copyto_b( diff --git a/tests/test_sparse.py b/tests/test_sparse.py index 9f154a3..7cb28d8 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -44,9 +44,10 @@ def test_wrappers(dtype, jl_dtype, order): @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128]) @pytest.mark.parametrize("order", ["C", "F", None]) -def test_no_copy_fully_dense(dtype, order, arr3d): +@pytest.mark.parametrize("copy", [True, False, None]) +def test_no_copy_fully_dense(dtype, order, copy, arr3d): arr = np.array(arr3d, dtype=dtype, order=order) - arr_finch = finch.Tensor(arr) + arr_finch = finch.Tensor(arr, copy=copy) arr_todense = arr_finch.todense() assert_equal(arr_todense, arr)