diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index bab53958d..bb24f0eb7 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -55,7 +55,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.10.3 + - uses: pypa/gh-action-pypi-publish@v1.11.0 with: user: __token__ password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2866ed4b8..c388ba31d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,15 +33,14 @@ jobs: numpy-version: '2.1' dependency-set: 'optional' os: 'macos-latest' - # https://github.com/zarr-developers/zarr-python/issues/2438 - # - python-version: '3.11' - # numpy-version: '1.25' - # dependency-set: 'optional' - # os: 'windows-latest' - # - python-version: '3.13' - # numpy-version: '2.1' - # dependency-set: 'optional' - # os: 'windows-latest' + - python-version: '3.11' + numpy-version: '1.25' + dependency-set: 'optional' + os: 'windows-latest' + - python-version: '3.13' + numpy-version: '2.1' + dependency-set: 'optional' + os: 'windows-latest' runs-on: ${{ matrix.os }} steps: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 40cddb50c..1c5edaec5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.0 + rev: v0.7.2 hooks: - id: ruff args: ["--fix", "--show-fixes"] @@ -22,7 +22,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.12.1 + rev: v1.13.0 hooks: - id: mypy files: src|tests diff --git a/pyproject.toml b/pyproject.toml index 7be199948..dc0e4730e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,7 +83,7 @@ gpu = [ docs = [ 'sphinx==8.1.3', 'sphinx-autobuild>=2021.3.14', - 'sphinx-autoapi==3.3.2', + 'sphinx-autoapi==3.3.3', 'sphinx_design', 'sphinx-issues', 'sphinx-copybutton', @@ -269,19 +269,25 @@ extend-exclude = [ extend-select = [ "ANN", # flake8-annotations "B", # flake8-bugbear + "EXE", # flake8-executable "C4", # flake8-comprehensions + "FA", # flake8-future-annotations "FLY", # flynt "FURB", # refurb "G", # flake8-logging-format "I", # isort "ISC", # flake8-implicit-str-concat + "LOG", # flake8-logging "PERF", # Perflint + "PIE", # flake8-pie "PGH", # pygrep-hooks "PT", # flake8-pytest-style "PYI", # flake8-pyi - "RSE", # flake8-raise "RET", # flake8-return + "RSE", # flake8-raise "RUF", + "SIM", # flake8-simplify + "SLOT", # flake8-slots "TCH", # flake8-type-checking "TRY", # tryceratops "UP", # pyupgrade @@ -298,6 +304,7 @@ ignore = [ "RET505", "RET506", "RUF005", + "SIM108", "TRY003", "UP027", # deprecated "UP038", # https://github.com/astral-sh/ruff/issues/7871 @@ -319,7 +326,7 @@ ignore = [ ] [tool.ruff.lint.extend-per-file-ignores] -"tests/**" = ["ANN001", "ANN201"] +"tests/**" = ["ANN001", "ANN201", "RUF029", "SIM117", "SIM300"] [tool.mypy] python_version = "3.11" diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index f27152e84..fabd042db 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -106,7 +106,6 @@ def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: Chun chunk_grid : ChunkGrid The array chunk grid """ - ... async def _decode_single(self, chunk_data: CodecOutput, chunk_spec: ArraySpec) -> CodecInput: raise NotImplementedError diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py index 291ceb459..a56f98664 100644 --- a/src/zarr/abc/metadata.py +++ b/src/zarr/abc/metadata.py @@ -42,6 +42,5 @@ def from_dict(cls, data: dict[str, JSON]) -> Self: """ Create an instance of the model from a dictionary """ - ... return cls(**data) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 055fe1b3a..424214986 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -250,7 +250,6 @@ async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None: Insert multiple (key, value) pairs into storage. """ await gather(*starmap(self.set, values)) - return @property @abstractmethod @@ -296,20 +295,19 @@ def supports_listing(self) -> bool: ... @abstractmethod - def list(self) -> AsyncGenerator[str, None]: + def list(self) -> AsyncGenerator[str]: """Retrieve all keys in the store. Returns ------- AsyncGenerator[str, None] """ - ... @abstractmethod - def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + def list_prefix(self, prefix: str) -> AsyncGenerator[str]: """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned as - absolute paths (i.e. including the prefix). + Retrieve all keys in the store that begin with a given prefix. Keys are returned relative + to the root of the store. Parameters ---------- @@ -319,10 +317,9 @@ def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: ------- AsyncGenerator[str, None] """ - ... @abstractmethod - def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + def list_dir(self, prefix: str) -> AsyncGenerator[str]: """ Retrieve all keys and prefixes with a given prefix and which do not contain the character “/” after the given prefix. @@ -335,7 +332,20 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: ------- AsyncGenerator[str, None] """ - ... + + async def delete_dir(self, prefix: str) -> None: + """ + Remove all keys and prefixes in the store that begin with a given prefix. + """ + if not self.supports_deletes: + raise NotImplementedError + if not self.supports_listing: + raise NotImplementedError + self._check_writable() + if not prefix.endswith("/"): + prefix += "/" + async for key in self.list_prefix(prefix): + await self.delete(key) async def delete_dir(self, prefix: str) -> None: """ diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 69c44611c..9f6721e86 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -9,6 +9,7 @@ import numpy.typing as npt from zarr.core.array import Array, AsyncArray, get_array_metadata +from zarr.core.buffer import NDArrayLike from zarr.core.common import ( JSON, AccessModeLiteral, @@ -422,6 +423,8 @@ async def save_array( _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) or _default_zarr_version() ) + if not isinstance(arr, NDArrayLike): + raise TypeError("arr argument must be numpy or other NDArrayLike array") mode = kwargs.pop("mode", "a") store_mode = _handle_store_mode(mode) @@ -484,16 +487,27 @@ async def save_group( or _default_zarr_version() ) + for arg in args: + if not isinstance(arg, NDArrayLike): + raise TypeError( + "All arguments must be numpy or other NDArrayLike arrays (except store, path, storage_options, and zarr_format)" + ) + for k, v in kwargs.items(): + if not isinstance(v, NDArrayLike): + raise TypeError(f"Keyword argument '{k}' must be a numpy or other NDArrayLike array") + if len(args) == 0 and len(kwargs) == 0: raise ValueError("at least one array must be provided") aws = [] for i, arr in enumerate(args): + _path = f"{path}/arr_{i}" if path is not None else f"arr_{i}" aws.append( save_array( store_path, arr, zarr_format=zarr_format, - path=f"arr_{i}", + path=_path, + storage_options=storage_options, ) ) for k, arr in kwargs.items(): @@ -923,7 +937,6 @@ async def create( store, path=path, mode=store_mode, storage_options=storage_options ) await store_path._init(mode) - return await AsyncArray.create( store_path, shape=shape, diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py index dc6c3f915..e407d9489 100644 --- a/src/zarr/codecs/__init__.py +++ b/src/zarr/codecs/__init__.py @@ -9,7 +9,6 @@ from zarr.codecs.bytes import BytesCodec, Endian from zarr.codecs.crc32c_ import Crc32cCodec from zarr.codecs.gzip import GzipCodec -from zarr.codecs.pipeline import BatchedCodecPipeline from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation from zarr.codecs.transpose import TransposeCodec from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec @@ -17,7 +16,6 @@ from zarr.core.metadata.v3 import DataType __all__ = [ - "BatchedCodecPipeline", "BloscCname", "BloscCodec", "BloscShuffle", diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py index c0ad5e138..b6e693148 100644 --- a/src/zarr/codecs/gzip.py +++ b/src/zarr/codecs/gzip.py @@ -21,7 +21,7 @@ def parse_gzip_level(data: JSON) -> int: if not isinstance(data, (int)): raise TypeError(f"Expected int, got {type(data)}") - if data not in range(0, 10): + if data not in range(10): raise ValueError( f"Expected an integer from the inclusive range (0, 9). Got {data} instead." ) diff --git a/src/zarr/codecs/registry.py b/src/zarr/codecs/registry.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index e7fd14ecb..5372d5ec5 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -252,7 +252,7 @@ def create_empty( def __setitem__(self, chunk_coords: ChunkCoords, value: Buffer) -> None: chunk_start = len(self.buf) chunk_length = len(value) - self.buf = self.buf + value + self.buf += value self.index.set_chunk_slice(chunk_coords, slice(chunk_start, chunk_start + chunk_length)) def __delitem__(self, chunk_coords: ChunkCoords) -> None: diff --git a/src/zarr/core/__init__.py b/src/zarr/core/__init__.py index e69de29bb..cbacfe342 100644 --- a/src/zarr/core/__init__.py +++ b/src/zarr/core/__init__.py @@ -0,0 +1,4 @@ +from __future__ import annotations + +from zarr.core.buffer import Buffer, NDBuffer # noqa: F401 +from zarr.core.codec_pipeline import BatchedCodecPipeline # noqa: F401 diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index d8500cc1d..72c22611b 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -77,7 +77,7 @@ T_ArrayMetadata, ) from zarr.core.metadata.v3 import parse_node_type_array -from zarr.core.sync import collect_aiterator, sync +from zarr.core.sync import sync from zarr.errors import MetadataValidationError from zarr.registry import get_pipeline_class from zarr.storage import StoreLike, make_store_path @@ -839,17 +839,31 @@ def nchunks(self) -> int: """ return product(self.cdata_shape) - @property - def nchunks_initialized(self) -> int: + async def nchunks_initialized(self) -> int: """ - The number of chunks that have been persisted in storage. + Calculate the number of chunks that have been initialized, i.e. the number of chunks that have + been persisted to the storage backend. Returns ------- - int - The number of initialized chunks in the array. + nchunks_initialized : int + The number of chunks that have been initialized. + + Notes + ----- + On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous) + property :attr:`Array.nchunks_initialized`. + + Examples + -------- + >>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(2,)) + >>> await arr.nchunks_initialized() + 0 + >>> await arr.setitem(slice(5), 1) + >>> await arr.nchunks_initialized() + 3 """ - return nchunks_initialized(self) + return len(await chunks_initialized(self)) def _iter_chunk_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -1502,9 +1516,29 @@ def nbytes(self) -> int: @property def nchunks_initialized(self) -> int: """ - The number of chunks that have been initialized in the stored representation of this array. + Calculate the number of chunks that have been initialized, i.e. the number of chunks that have + been persisted to the storage backend. + + Returns + ------- + nchunks_initialized : int + The number of chunks that have been initialized. + + Notes + ----- + On :class:`Array` this is a (synchronous) property, unlike asynchronous function + :meth:`AsyncArray.nchunks_initialized`. + + Examples + -------- + >>> arr = await zarr.create(shape=(10,), chunks=(2,)) + >>> arr.nchunks_initialized + 0 + >>> arr[:5] = 1 + >>> arr.nchunks_initialized + 3 """ - return self._async_array.nchunks_initialized + return sync(self._async_array.nchunks_initialized()) def _iter_chunk_keys( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2915,39 +2949,15 @@ def info(self) -> None: ) -def nchunks_initialized( - array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array, -) -> int: - """ - Calculate the number of chunks that have been initialized, i.e. the number of chunks that have - been persisted to the storage backend. - - Parameters - ---------- - array : Array - The array to inspect. - - Returns - ------- - nchunks_initialized : int - The number of chunks that have been initialized. - - See Also - -------- - chunks_initialized - """ - return len(chunks_initialized(array)) - - -def chunks_initialized( - array: Array | AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], +async def chunks_initialized( + array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], ) -> tuple[str, ...]: """ Return the keys of the chunks that have been persisted to the storage backend. Parameters ---------- - array : Array + array : AsyncArray The array to inspect. Returns @@ -2960,10 +2970,9 @@ def chunks_initialized( nchunks_initialized """ - # TODO: make this compose with the underlying async iterator - store_contents = list( - collect_aiterator(array.store_path.store.list_prefix(prefix=array.store_path.path)) - ) + store_contents = [ + x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path) + ] return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index ed7f8a1f4..afecc6824 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -182,7 +182,7 @@ def to_dict(self) -> dict[str, JSON]: def all_chunk_coords(self, array_shape: ChunkCoords) -> Iterator[ChunkCoords]: return itertools.product( - *(range(0, ceildiv(s, c)) for s, c in zip(array_shape, self.chunk_shape, strict=False)) + *(range(ceildiv(s, c)) for s, c in zip(array_shape, self.chunk_shape, strict=False)) ) def get_nchunks(self, array_shape: ChunkCoords) -> int: diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/core/codec_pipeline.py similarity index 100% rename from src/zarr/codecs/pipeline.py rename to src/zarr/core/codec_pipeline.py diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index ec2c8c47a..29f5e139f 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -47,7 +47,7 @@ def reset(self) -> None: "threading": {"max_workers": None}, "json_indent": 2, "codec_pipeline": { - "path": "zarr.codecs.pipeline.BatchedCodecPipeline", + "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", "batch_size": 1, }, "codecs": { diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index a9985ef2e..683f7b936 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -967,7 +967,7 @@ async def create_array( @deprecated("Use AsyncGroup.create_array instead.") async def create_dataset( - self, name: str, **kwargs: Any + self, name: str, *, shape: ShapeLike, **kwargs: Any ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array. @@ -988,7 +988,7 @@ async def create_dataset( .. deprecated:: 3.0.0 The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead. """ - return await self.create_array(name, **kwargs) + return await self.create_array(name, shape=shape, **kwargs) @deprecated("Use AsyncGroup.require_array instead.") async def require_dataset( @@ -1225,7 +1225,7 @@ def _members_consolidated( # we kind of just want the top-level keys. if consolidated_metadata is not None: - for key in consolidated_metadata.metadata.keys(): + for key in consolidated_metadata.metadata: obj = self._getitem_consolidated( self.store_path, key, prefix=self.name ) # Metadata -> Group/Array @@ -1666,7 +1666,7 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.create_dataset(name, **kwargs))) @deprecated("Use Group.require_array instead.") - def require_dataset(self, name: str, **kwargs: Any) -> Array: + def require_dataset(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array: """Obtain an array, creating if it doesn't exist. Arrays are known as "datasets" in HDF5 terminology. For compatibility @@ -1688,9 +1688,9 @@ def require_dataset(self, name: str, **kwargs: Any) -> Array: .. deprecated:: 3.0.0 The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead. """ - return Array(self._sync(self._async_group.require_array(name, **kwargs))) + return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs))) - def require_array(self, name: str, **kwargs: Any) -> Array: + def require_array(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array: """Obtain an array, creating if it doesn't exist. @@ -1707,7 +1707,7 @@ def require_array(self, name: str, **kwargs: Any) -> Array: ------- a : Array """ - return Array(self._sync(self._async_group.require_array(name, **kwargs))) + return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs))) @_deprecate_positional_args def empty(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index 1873d5c83..723dadfb4 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -241,12 +241,13 @@ def is_pure_fancy_indexing(selection: Any, ndim: int) -> bool: # is mask selection return True - if ndim == 1: - if is_integer_list(selection) or is_integer_array(selection) or is_bool_list(selection): - return True + if ndim == 1 and ( + is_integer_list(selection) or is_integer_array(selection) or is_bool_list(selection) + ): + return True - # if not, we go through the normal path below, because a 1-tuple - # of integers is also allowed. + # if not, we go through the normal path below, because a 1-tuple + # of integers is also allowed. no_slicing = ( isinstance(selection, tuple) and len(selection) == ndim @@ -675,7 +676,7 @@ def check(a: npt.NDArray[Any]) -> Order: def wraparound_indices(x: npt.NDArray[Any], dim_len: int) -> None: loc_neg = x < 0 if np.any(loc_neg): - x[loc_neg] = x[loc_neg] + dim_len + x[loc_neg] += dim_len def boundscheck_indices(x: npt.NDArray[Any], dim_len: int) -> None: @@ -1000,8 +1001,8 @@ def __init__( if stop < 0: stop = dim_numchunks + stop - start = start * dim_chunk_size - stop = stop * dim_chunk_size + start *= dim_chunk_size + stop *= dim_chunk_size slice_ = slice(start, stop) else: diff --git a/src/zarr/core/metadata/__init__.py b/src/zarr/core/metadata/__init__.py index f4374d9ab..43b5ec98f 100644 --- a/src/zarr/core/metadata/__init__.py +++ b/src/zarr/core/metadata/__init__.py @@ -8,10 +8,10 @@ T_ArrayMetadata = TypeVar("T_ArrayMetadata", ArrayV2Metadata, ArrayV3Metadata) __all__ = [ - "ArrayV2Metadata", - "ArrayV3Metadata", "ArrayMetadata", "ArrayMetadataDict", - "ArrayV3MetadataDict", + "ArrayV2Metadata", "ArrayV2MetadataDict", + "ArrayV3Metadata", + "ArrayV3MetadataDict", ] diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 7a38e9fd7..6ea9ed69f 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -481,9 +481,9 @@ def parse_fill_value( except (ValueError, OverflowError, TypeError) as e: raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}") from e # Check if the value is still representable by the dtype - if fill_value == "NaN" and np.isnan(casted_value): - pass - elif fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value): + if (fill_value == "NaN" and np.isnan(casted_value)) or ( + fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value) + ): pass elif np_dtype.kind == "f": # float comparison is not exact, especially when dtype Self: return type(self)(root=self.root, mode=mode) def __str__(self) -> str: - return f"file://{self.root}" + return f"file://{self.root.as_posix()}" def __repr__(self) -> str: return f"LocalStore({str(self)!r})" @@ -202,14 +202,14 @@ async def exists(self, key: str) -> bool: path = self.root / key return await asyncio.to_thread(path.is_file) - async def list(self) -> AsyncGenerator[str, None]: + async def list(self) -> AsyncGenerator[str]: # docstring inherited to_strip = self.root.as_posix() + "/" for p in list(self.root.rglob("*")): if p.is_file(): yield p.as_posix().replace(to_strip, "") - async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_prefix(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited to_strip = self.root.as_posix() + "/" prefix = prefix.rstrip("/") @@ -217,14 +217,12 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: if p.is_file(): yield p.as_posix().replace(to_strip, "") - async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_dir(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited base = self.root / prefix - to_strip = str(base) + "/" - try: key_iter = base.iterdir() for key in key_iter: - yield key.as_posix().replace(to_strip, "") + yield key.relative_to(base).as_posix() except (FileNotFoundError, NotADirectoryError): pass diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index c07aff13d..7ec4cbf66 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -54,7 +54,7 @@ def _configure_logger( self, log_level: str = "DEBUG", log_handler: logging.Handler | None = None ) -> None: self.log_level = log_level - self.logger = logging.getLogger(f"LoggingStore({self._store!s})") + self.logger = logging.getLogger(f"LoggingStore({self._store})") self.logger.setLevel(log_level) if not self.logger.hasHandlers(): @@ -146,7 +146,7 @@ async def clear(self) -> None: return await self._store.clear() def __str__(self) -> str: - return f"logging-{self._store!s}" + return f"logging-{self._store}" def __repr__(self) -> str: return f"LoggingStore({repr(self._store)!r})" @@ -203,19 +203,19 @@ async def set_partial_values( with self.log(keys): return await self._store.set_partial_values(key_start_values=key_start_values) - async def list(self) -> AsyncGenerator[str, None]: + async def list(self) -> AsyncGenerator[str]: # docstring inherited with self.log(): async for key in self._store.list(): yield key - async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_prefix(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited with self.log(prefix): async for key in self._store.list_prefix(prefix=prefix): yield key - async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_dir(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited with self.log(prefix): async for key in self._store.list_dir(prefix=prefix): diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 5164be0ca..b33a2140b 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -17,6 +17,9 @@ logger = getLogger(__name__) +logger = getLogger(__name__) + + class MemoryStore(Store): """ In-memory store for testing purposes. @@ -142,19 +145,19 @@ async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, by # docstring inherited raise NotImplementedError - async def list(self) -> AsyncGenerator[str, None]: + async def list(self) -> AsyncGenerator[str]: # docstring inherited for key in self._store_dict: yield key - async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_prefix(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited # note: we materialize all dict keys into a list here so we can mutate the dict in-place (e.g. in delete_prefix) for key in list(self._store_dict): if key.startswith(prefix): yield key - async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_dir(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited prefix = prefix.rstrip("/") diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 8e8970bdf..9c050f1ff 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -312,13 +312,13 @@ async def set_partial_values( # docstring inherited raise NotImplementedError - async def list(self) -> AsyncGenerator[str, None]: + async def list(self) -> AsyncGenerator[str]: # docstring inherited allfiles = await self.fs._find(self.path, detail=False, withdirs=False) for onefile in (a.replace(self.path + "/", "") for a in allfiles): yield onefile - async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_dir(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited prefix = f"{self.path}/{prefix.rstrip('/')}" try: @@ -328,7 +328,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: for onefile in (a.replace(prefix + "/", "") for a in allfiles): yield onefile.removeprefix(self.path).removeprefix("/") - async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_prefix(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited for onefile in await self.fs._find( f"{self.path}/{prefix}", detail=False, maxdepth=None, withdirs=False diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 85f6fe041..7a95f857d 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -234,19 +234,19 @@ async def exists(self, key: str) -> bool: else: return True - async def list(self) -> AsyncGenerator[str, None]: + async def list(self) -> AsyncGenerator[str]: # docstring inherited with self._lock: for key in self._zf.namelist(): yield key - async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_prefix(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited async for key in self.list(): if key.startswith(prefix): yield key - async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + async def list_dir(self, prefix: str) -> AsyncGenerator[str]: # docstring inherited prefix = prefix.rstrip("/") diff --git a/tests/test_api.py b/tests/test_api.py index bca816f4e..0436469f9 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -132,6 +132,33 @@ async def test_open_group_unspecified_version( assert g2.metadata.zarr_format == zarr_format +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +@pytest.mark.parametrize("n_args", [10, 1, 0]) +@pytest.mark.parametrize("n_kwargs", [10, 1, 0]) +def test_save(store: Store, n_args: int, n_kwargs: int) -> None: + data = np.arange(10) + args = [np.arange(10) for _ in range(n_args)] + kwargs = {f"arg_{i}": data for i in range(n_kwargs)} + + if n_kwargs == 0 and n_args == 0: + with pytest.raises(ValueError): + save(store) + elif n_args == 1 and n_kwargs == 0: + save(store, *args) + array = open(store) + assert isinstance(array, Array) + assert_array_equal(array[:], data) + else: + save(store, *args, **kwargs) # type: ignore[arg-type] + group = open(store) + assert isinstance(group, Group) + for array in group.array_values(): + assert_array_equal(array[:], data) + for k in kwargs: + assert k in group + assert group.nmembers() == n_args + n_kwargs + + def test_save_errors() -> None: with pytest.raises(ValueError): # no arrays provided @@ -142,6 +169,10 @@ def test_save_errors() -> None: with pytest.raises(ValueError): # no arrays provided save("data/group.zarr") + with pytest.raises(TypeError): + # mode is no valid argument and would get handled as an array + a = np.arange(10) + zarr.save("data/example.zarr", a, mode="w") def test_open_with_mode_r(tmp_path: pathlib.Path) -> None: diff --git a/tests/test_array.py b/tests/test_array.py index 96475d43e..b8af26133 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -325,7 +325,7 @@ def test_nchunks(test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int) -> @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) -def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: +async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: """ Test that nchunks_initialized accurately returns the number of stored chunks. """ @@ -339,7 +339,7 @@ def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> N if test_cls == Array: observed = arr.nchunks_initialized else: - observed = arr._async_array.nchunks_initialized + observed = await arr._async_array.nchunks_initialized() assert observed == expected # delete chunks @@ -348,13 +348,12 @@ def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> N if test_cls == Array: observed = arr.nchunks_initialized else: - observed = arr._async_array.nchunks_initialized + observed = await arr._async_array.nchunks_initialized() expected = arr.nchunks - idx - 1 assert observed == expected -@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) -def test_chunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: +async def test_chunks_initialized() -> None: """ Test that chunks_initialized accurately returns the keys of stored chunks. """ @@ -366,12 +365,7 @@ def test_chunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> No ) for keys, region in zip(chunks_accumulated, arr._iter_chunk_regions(), strict=False): arr[region] = 1 - - if test_cls == Array: - observed = sorted(chunks_initialized(arr)) - else: - observed = sorted(chunks_initialized(arr._async_array)) - + observed = sorted(await chunks_initialized(arr._async_array)) expected = sorted(keys) assert observed == expected diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index 0f2f89291..dfb8e1c59 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -56,7 +56,6 @@ def test_sharding_pickle() -> None: """ Test that sharding codecs can be pickled """ - pass @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index 85315c878..78f32fef0 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -229,7 +229,7 @@ def test_sharding_partial_overwrite( read_data = a[0:10, 0:10, 0:10] assert np.array_equal(data, read_data) - data = data + 10 + data += 10 a[:10, :10, :10] = data read_data = a[0:10, 0:10, 0:10] assert np.array_equal(data, read_data) diff --git a/tests/test_config.py b/tests/test_config.py index c4cf794c5..ddabffb46 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -11,9 +11,10 @@ from zarr import Array, zeros from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline from zarr.abc.store import ByteSetter, Store -from zarr.codecs import BatchedCodecPipeline, BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec +from zarr.codecs import BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec from zarr.core.array_spec import ArraySpec from zarr.core.buffer import NDBuffer +from zarr.core.codec_pipeline import BatchedCodecPipeline from zarr.core.config import BadConfigError, config from zarr.core.indexing import SelectorTuple from zarr.registry import ( @@ -45,7 +46,7 @@ def test_config_defaults_set() -> None: "threading": {"max_workers": None}, "json_indent": 2, "codec_pipeline": { - "path": "zarr.codecs.pipeline.BatchedCodecPipeline", + "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", "batch_size": 1, }, "buffer": "zarr.core.buffer.cpu.Buffer", @@ -96,8 +97,8 @@ def test_config_codec_pipeline_class(store: Store) -> None: # has default value assert get_pipeline_class().__name__ != "" - config.set({"codec_pipeline.name": "zarr.codecs.pipeline.BatchedCodecPipeline"}) - assert get_pipeline_class() == zarr.codecs.pipeline.BatchedCodecPipeline + config.set({"codec_pipeline.name": "zarr.core.codec_pipeline.BatchedCodecPipeline"}) + assert get_pipeline_class() == zarr.core.codec_pipeline.BatchedCodecPipeline _mock = Mock() diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index c0218602f..d9143d09d 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -87,31 +87,27 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: metadata={ "air": ArrayV3Metadata.from_dict( { - **{ - "shape": (1, 2, 3), - "chunk_grid": { - "configuration": {"chunk_shape": (1, 2, 3)}, - "name": "regular", - }, + "shape": (1, 2, 3), + "chunk_grid": { + "configuration": {"chunk_shape": (1, 2, 3)}, + "name": "regular", }, **array_metadata, } ), "lat": ArrayV3Metadata.from_dict( { - **{ - "shape": (1,), - "chunk_grid": { - "configuration": {"chunk_shape": (1,)}, - "name": "regular", - }, + "shape": (1,), + "chunk_grid": { + "configuration": {"chunk_shape": (1,)}, + "name": "regular", }, **array_metadata, } ), "lon": ArrayV3Metadata.from_dict( { - **{"shape": (2,)}, + "shape": (2,), "chunk_grid": { "configuration": {"chunk_shape": (2,)}, "name": "regular", @@ -121,12 +117,10 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: ), "time": ArrayV3Metadata.from_dict( { - **{ - "shape": (3,), - "chunk_grid": { - "configuration": {"chunk_shape": (3,)}, - "name": "regular", - }, + "shape": (3,), + "chunk_grid": { + "configuration": {"chunk_shape": (3,)}, + "name": "regular", }, **array_metadata, } @@ -138,13 +132,11 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: "array": ArrayV3Metadata.from_dict( { **array_metadata, - **{ - "attributes": {"key": "child"}, - "shape": (4, 4), - "chunk_grid": { - "configuration": {"chunk_shape": (4, 4)}, - "name": "regular", - }, + "attributes": {"key": "child"}, + "shape": (4, 4), + "chunk_grid": { + "configuration": {"chunk_shape": (4, 4)}, + "name": "regular", }, } ), @@ -162,15 +154,11 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: "array": ArrayV3Metadata.from_dict( { **array_metadata, - **{ - "attributes": {"key": "grandchild"}, - "shape": (4, 4), - "chunk_grid": { - "configuration": { - "chunk_shape": (4, 4) - }, - "name": "regular", - }, + "attributes": {"key": "grandchild"}, + "shape": (4, 4), + "chunk_grid": { + "configuration": {"chunk_shape": (4, 4)}, + "name": "regular", }, } ), @@ -243,31 +231,27 @@ def test_consolidated_sync(self, memory_store): metadata={ "air": ArrayV3Metadata.from_dict( { - **{ - "shape": (1, 2, 3), - "chunk_grid": { - "configuration": {"chunk_shape": (1, 2, 3)}, - "name": "regular", - }, + "shape": (1, 2, 3), + "chunk_grid": { + "configuration": {"chunk_shape": (1, 2, 3)}, + "name": "regular", }, **array_metadata, } ), "lat": ArrayV3Metadata.from_dict( { - **{ - "shape": (1,), - "chunk_grid": { - "configuration": {"chunk_shape": (1,)}, - "name": "regular", - }, + "shape": (1,), + "chunk_grid": { + "configuration": {"chunk_shape": (1,)}, + "name": "regular", }, **array_metadata, } ), "lon": ArrayV3Metadata.from_dict( { - **{"shape": (2,)}, + "shape": (2,), "chunk_grid": { "configuration": {"chunk_shape": (2,)}, "name": "regular", @@ -277,12 +261,10 @@ def test_consolidated_sync(self, memory_store): ), "time": ArrayV3Metadata.from_dict( { - **{ - "shape": (3,), - "chunk_grid": { - "configuration": {"chunk_shape": (3,)}, - "name": "regular", - }, + "shape": (3,), + "chunk_grid": { + "configuration": {"chunk_shape": (3,)}, + "name": "regular", }, **array_metadata, } @@ -357,24 +339,20 @@ def test_flatten(self): metadata={ "air": ArrayV3Metadata.from_dict( { - **{ - "shape": (1, 2, 3), - "chunk_grid": { - "configuration": {"chunk_shape": (1, 2, 3)}, - "name": "regular", - }, + "shape": (1, 2, 3), + "chunk_grid": { + "configuration": {"chunk_shape": (1, 2, 3)}, + "name": "regular", }, **array_metadata, } ), "lat": ArrayV3Metadata.from_dict( { - **{ - "shape": (1,), - "chunk_grid": { - "configuration": {"chunk_shape": (1,)}, - "name": "regular", - }, + "shape": (1,), + "chunk_grid": { + "configuration": {"chunk_shape": (1,)}, + "name": "regular", }, **array_metadata, } @@ -386,13 +364,11 @@ def test_flatten(self): "array": ArrayV3Metadata.from_dict( { **array_metadata, - **{ - "attributes": {"key": "child"}, - "shape": (4, 4), - "chunk_grid": { - "configuration": {"chunk_shape": (4, 4)}, - "name": "regular", - }, + "attributes": {"key": "child"}, + "shape": (4, 4), + "chunk_grid": { + "configuration": {"chunk_shape": (4, 4)}, + "name": "regular", }, } ), @@ -403,13 +379,11 @@ def test_flatten(self): "array": ArrayV3Metadata.from_dict( { **array_metadata, - **{ - "attributes": {"key": "grandchild"}, - "shape": (4, 4), - "chunk_grid": { - "configuration": {"chunk_shape": (4, 4)}, - "name": "regular", - }, + "attributes": {"key": "grandchild"}, + "shape": (4, 4), + "chunk_grid": { + "configuration": {"chunk_shape": (4, 4)}, + "name": "regular", }, } ) diff --git a/tests/test_store/test_local.py b/tests/test_store/test_local.py index 1733ee2df..42cb6d63a 100644 --- a/tests/test_store/test_local.py +++ b/tests/test_store/test_local.py @@ -31,7 +31,7 @@ def store_kwargs(self, tmpdir) -> dict[str, str]: return {"root": str(tmpdir), "mode": "w"} def test_store_repr(self, store: LocalStore) -> None: - assert str(store) == f"file://{store.root!s}" + assert str(store) == f"file://{store.root.as_posix()}" def test_store_supports_writes(self, store: LocalStore) -> None: assert store.supports_writes diff --git a/tests/test_store/test_zip.py b/tests/test_store/test_zip.py index cba80165f..30f478ac4 100644 --- a/tests/test_store/test_zip.py +++ b/tests/test_store/test_zip.py @@ -54,7 +54,7 @@ async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> await store.set("foo", cpu.Buffer.from_bytes(b"bar")) def test_store_repr(self, store: ZipStore) -> None: - assert str(store) == f"zip://{store.path!s}" + assert str(store) == f"zip://{store.path}" def test_store_supports_writes(self, store: ZipStore) -> None: assert store.supports_writes diff --git a/tests/test_sync.py b/tests/test_sync.py index a6216a485..bff3837e2 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -147,7 +147,7 @@ def test_open_positional_args_deprecate(): zarr.open(store, "w", shape=(1,)) -@pytest.mark.parametrize("workers", [None, 1, 2]) # +@pytest.mark.parametrize("workers", [None, 1, 2]) def test_get_executor(clean_state, workers) -> None: with zarr.config.set({"threading.max_workers": workers}): e = _get_executor()