diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ffa3c94ef..578a66fd0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,14 +6,14 @@ default_stages: [commit, push] default_language_version: python: python3 repos: - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.4.3' + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: 'v0.4.5' hooks: - id: ruff args: ["--fix", "--show-fixes"] - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo,zar", "-S", "fixture"] @@ -31,3 +31,5 @@ repos: - types-setuptools - pytest - numpy + - numcodecs + - zstandard diff --git a/pyproject.toml b/pyproject.toml index 947bec936..9f50c33db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -187,48 +187,32 @@ python_version = "3.10" ignore_missing_imports = true namespace_packages = false -warn_unused_configs = true -warn_redundant_casts = true -warn_unused_ignores = true -strict_equality = true -strict_concatenate = true +strict = true -check_untyped_defs = true -disallow_untyped_decorators = true -disallow_any_generics = true -disallow_incomplete_defs = true -disallow_untyped_calls = true - -disallow_untyped_defs = true +enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] [[tool.mypy.overrides]] module = [ "zarr.v2.*", - "zarr.group", - "tests.*", ] -check_untyped_defs = false +ignore_errors = true [[tool.mypy.overrides]] module = [ - "zarr.v2.*", - "zarr.array_v2", + "zarr.group", + "tests.*", ] -disallow_any_generics = false +check_untyped_defs = false [[tool.mypy.overrides]] module = [ - "zarr.v2.*", - "zarr.array_v2", "zarr.group" ] disallow_incomplete_defs = false [[tool.mypy.overrides]] module = [ - "zarr.v2.*", - "zarr.array_v2", "zarr.array", "zarr.buffer" ] @@ -236,13 +220,19 @@ disallow_untyped_calls = false [[tool.mypy.overrides]] module = [ - "zarr.v2.*", - "zarr.array_v2", "zarr.array", "zarr.group", ] disallow_untyped_defs = false + +[[tool.mypy.overrides]] +module = [ + "zarr.metadata", + "zarr.store.remote" +] +warn_return_any = false + [tool.pytest.ini_options] minversion = "7" testpaths = ["tests"] diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index d555070b0..3f65e628f 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -11,9 +11,10 @@ from zarr.abc.codec import Codec from zarr.array import Array, AsyncArray from zarr.buffer import NDArrayLike +from zarr.chunk_key_encodings import ChunkKeyEncoding from zarr.common import JSON, MEMORY_ORDER, ChunkCoords, OpenMode, ZarrFormat from zarr.group import AsyncGroup -from zarr.metadata import ArrayV2Metadata, ArrayV3Metadata, ChunkKeyEncoding +from zarr.metadata import ArrayV2Metadata, ArrayV3Metadata from zarr.store import ( StoreLike, make_store_path, diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 470c1d028..7f3dd8dba 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -3,7 +3,7 @@ from typing import Any import zarr.api.asynchronous as async_api -from zarr.array import Array +from zarr.array import Array, AsyncArray from zarr.buffer import NDArrayLike from zarr.common import JSON, OpenMode, ZarrFormat from zarr.group import Group @@ -106,7 +106,7 @@ def open( **kwargs, ) ) - if isinstance(obj, async_api.AsyncArray): + if isinstance(obj, AsyncArray): return Array(obj) else: return Group(obj) diff --git a/src/zarr/array.py b/src/zarr/array.py index 215251fef..dea398414 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -34,9 +34,9 @@ ZarrFormat, concurrent_map, ) -from zarr.config import config +from zarr.config import config, parse_indexing_order from zarr.indexing import BasicIndexer -from zarr.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata, parse_indexing_order +from zarr.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import sync diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py index 0f055093c..59994e70d 100644 --- a/src/zarr/buffer.py +++ b/src/zarr/buffer.py @@ -79,7 +79,7 @@ def ravel(self, order: Literal["K", "A", "C", "F"] = "C") -> Self: ... def all(self) -> bool: ... - def __eq__(self, other: Any) -> Self: # type: ignore + def __eq__(self, other: Any) -> Self: # type: ignore[explicit-override, override] """Element-wise equal Notice diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py index 0f0ff55df..3ef3a87db 100644 --- a/src/zarr/codecs/__init__.py +++ b/src/zarr/codecs/__init__.py @@ -1,10 +1,25 @@ from __future__ import annotations -from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle # noqa: F401 -from zarr.codecs.bytes import BytesCodec, Endian # noqa: F401 -from zarr.codecs.crc32c_ import Crc32cCodec # noqa: F401 -from zarr.codecs.gzip import GzipCodec # noqa: F401 -from zarr.codecs.pipeline import BatchedCodecPipeline # noqa: F401 -from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation # noqa: F401 -from zarr.codecs.transpose import TransposeCodec # noqa: F401 -from zarr.codecs.zstd import ZstdCodec # noqa: F401 +from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle +from zarr.codecs.bytes import BytesCodec, Endian +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.gzip import GzipCodec +from zarr.codecs.pipeline import BatchedCodecPipeline +from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation +from zarr.codecs.transpose import TransposeCodec +from zarr.codecs.zstd import ZstdCodec + +__all__ = [ + "BatchedCodecPipeline", + "BloscCodec", + "BloscCname", + "BloscShuffle", + "BytesCodec", + "Endian", + "Crc32cCodec", + "GzipCodec", + "ShardingCodec", + "ShardingCodecIndexLocation", + "TransposeCodec", + "ZstdCodec", +] diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index 893cbc8b4..6f493c9e8 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -11,12 +11,11 @@ ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin, - ByteGetter, BytesBytesCodec, - ByteSetter, Codec, CodecPipeline, ) +from zarr.abc.store import ByteGetter, ByteSetter from zarr.buffer import Buffer, NDBuffer from zarr.codecs.registry import get_codec_class from zarr.common import JSON, concurrent_map, parse_named_configuration diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 3d7ab4a23..a7b6edc3b 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -14,11 +14,10 @@ ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin, - ByteGetter, - ByteSetter, Codec, CodecPipeline, ) +from zarr.abc.store import ByteGetter, ByteSetter from zarr.buffer import Buffer, NDBuffer from zarr.chunk_grids import RegularChunkGrid from zarr.codecs.bytes import BytesCodec @@ -102,7 +101,7 @@ def is_all_empty(self) -> bool: return bool(np.array_equiv(self.offsets_and_lengths, MAX_UINT_64)) def get_full_chunk_map(self) -> npt.NDArray[np.bool_]: - return self.offsets_and_lengths[..., 0] != MAX_UINT_64 + return np.not_equal(self.offsets_and_lengths[..., 0], MAX_UINT_64) def get_chunk_slice(self, chunk_coords: ChunkCoords) -> tuple[int, int] | None: localized_chunk = self._localize_chunk(chunk_coords) @@ -206,25 +205,7 @@ def merge_with_morton_order( ) -> _ShardBuilder: obj = cls.create_empty(chunks_per_shard) for chunk_coords in morton_order_iter(chunks_per_shard): - if tombstones is not None and chunk_coords in tombstones: - continue - for shard_dict in shard_dicts: - maybe_value = shard_dict.get(chunk_coords, None) - if maybe_value is not None: - obj[chunk_coords] = maybe_value - break - return obj - - @classmethod - def merge_with_c_order( - cls, - chunks_per_shard: ChunkCoords, - tombstones: set[ChunkCoords], - *shard_dicts: ShardMapping, - ) -> _ShardBuilder: - obj = cls.create_empty(chunks_per_shard) - for chunk_coords in c_order_iter(chunks_per_shard): - if tombstones is not None and chunk_coords in tombstones: + if chunk_coords in tombstones: continue for shard_dict in shard_dicts: maybe_value = shard_dict.get(chunk_coords, None) @@ -302,8 +283,7 @@ async def finalize( index_location: ShardingCodecIndexLocation, index_encoder: Callable[[_ShardIndex], Awaitable[Buffer]], ) -> Buffer: - print("merging shards with c order") - shard_builder = _ShardBuilder.merge_with_c_order( + shard_builder = _ShardBuilder.merge_with_morton_order( self.new_dict.index.chunks_per_shard, self.tombstones, self.new_dict, diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index 76e625ad6..451fae8b3 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -55,11 +55,11 @@ def to_dict(self) -> dict[str, JSON]: def _compress(self, data: npt.NDArray[Any]) -> bytes: ctx = ZstdCompressor(level=self.level, write_checksum=self.checksum) - return ctx.compress(data) + return ctx.compress(data.tobytes()) def _decompress(self, data: npt.NDArray[Any]) -> bytes: ctx = ZstdDecompressor() - return ctx.decompress(data) + return ctx.decompress(data.tobytes()) async def _decode_single( self, diff --git a/src/zarr/common.py b/src/zarr/common.py index d2d86f30c..8dc2edbf7 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -7,7 +7,15 @@ from collections.abc import Iterable from dataclasses import dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Literal, ParamSpec, TypeVar, overload +from typing import ( + TYPE_CHECKING, + Any, + Literal, + ParamSpec, + TypeVar, + cast, + overload, +) if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Iterator @@ -181,5 +189,5 @@ def parse_fill_value(data: Any) -> Any: def parse_order(data: Any) -> Literal["C", "F"]: if data in ("C", "F"): - return data + return cast(Literal["C", "F"], data) raise ValueError(f"Expected one of ('C', 'F'), got {data} instead.") diff --git a/src/zarr/config.py b/src/zarr/config.py index 5b1640bd5..7c5b48a16 100644 --- a/src/zarr/config.py +++ b/src/zarr/config.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Literal +from typing import Any, Literal, cast from donfig import Config @@ -18,6 +18,6 @@ def parse_indexing_order(data: Any) -> Literal["C", "F"]: if data in ("C", "F"): - return data + return cast(Literal["C", "F"], data) msg = f"Expected one of ('C', 'F'), got {data} instead." raise ValueError(msg) diff --git a/src/zarr/group.py b/src/zarr/group.py index e0061846a..55390bac9 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -5,7 +5,7 @@ import logging from collections.abc import Iterator from dataclasses import asdict, dataclass, field, replace -from typing import TYPE_CHECKING, overload +from typing import TYPE_CHECKING, Literal, cast, overload import numpy.typing as npt @@ -37,7 +37,7 @@ def parse_zarr_format(data: Any) -> ZarrFormat: if data in (2, 3): - return data + return cast(Literal[2, 3], data) msg = msg = f"Invalid zarr_format. Expected one 2 or 3. Got {data}." raise ValueError(msg) diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index 45413bc5b..6bc83d506 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -199,11 +199,8 @@ def is_total_slice(item: Selection, shape: ChunkCoords) -> bool: if isinstance(item, tuple): return all( ( - isinstance(dim_sel, slice) - and ( - (dim_sel == slice(None)) - or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None])) - ) + (dim_sel == slice(None)) + or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None])) ) for dim_sel, dim_len in zip(item, shape, strict=False) ) diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 09764a576..2c2cebd38 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -42,6 +42,9 @@ _bool = bool +__all__ = ["ArrayMetadata"] + + class DataType(Enum): bool = "bool" int8 = "int8" @@ -432,10 +435,11 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self: def parse_dimension_names(data: None | Iterable[str]) -> tuple[str, ...] | None: if data is None: return data - if isinstance(data, Iterable) and all([isinstance(x, str) for x in data]): + elif all([isinstance(x, str) for x in data]): return tuple(data) - msg = f"Expected either None or a iterable of str, got {type(data)}" - raise TypeError(msg) + else: + msg = f"Expected either None or a iterable of str, got {type(data)}" + raise TypeError(msg) # todo: real validation diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 533c7d3ad..ea1f9b1c2 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -47,7 +47,7 @@ def test_store_mode(self, store: S, store_kwargs: dict[str, Any]) -> None: assert store.writeable with pytest.raises(AttributeError): - store.mode = "w" # type: ignore + store.mode = "w" # type: ignore[misc] # read-only kwargs = {**store_kwargs, "mode": "r"} diff --git a/src/zarr/v2/n5.py b/src/zarr/v2/n5.py index 4ea5e4572..a6fd39f5b 100644 --- a/src/zarr/v2/n5.py +++ b/src/zarr/v2/n5.py @@ -780,7 +780,7 @@ def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dic return zarr_config -class N5ChunkWrapper(Codec): +class N5ChunkWrapper(Codec): # type: ignore[misc] codec_id = "n5_wrapper" def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): diff --git a/src/zarr/v2/util.py b/src/zarr/v2/util.py index 48d7d30d8..6926bb2d1 100644 --- a/src/zarr/v2/util.py +++ b/src/zarr/v2/util.py @@ -444,7 +444,7 @@ def get_type(self): return type(self.obj).__name__ -class TreeTraversal(Traversal): +class TreeTraversal(Traversal): # type: ignore[misc] def get_children(self, node): return node.get_children()