Merge branch 'main' into creation-from-other-zarr

scalableminds · Jan 15, 2025 · addb785 · addb785
2 parents 4f3e156 + 45146ca
commit addb785
Show file tree

Hide file tree

Showing 13 changed files with 201 additions and 67 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -60,7 +60,12 @@ jobs:
         hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
     - name: Run Tests
       run: |
-        hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run
+        hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage
+    - name: Upload coverage
+      uses: codecov/codecov-action@v5
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        verbose: true # optional (default = false)
 
   test-upstream-and-min-deps:
     name: py=${{ matrix.python-version }}-${{ matrix.dependency-set }}

diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,10 @@
+coverage:
+  status:
+    patch:
+      default:
+        target: auto
+    project:
+      default:
+        target: auto
+        threshold: 0.1
+comment: false
diff --git a/docs/conf.py b/docs/conf.py
@@ -91,7 +91,7 @@ def skip_submodules(
 
 # General information about the project.
 project = "zarr"
-copyright = "2024, Zarr Developers"
+copyright = "2025, Zarr Developers"
 author = "Zarr Developers"
 
 version = get_version("zarr")
@@ -181,6 +181,7 @@ def skip_submodules(
     ],
     "collapse_navigation": True,
     "navigation_with_keys": False,
+    "announcement": "Zarr-Python 3 is here! Check out the release announcement <a href='https://zarr.dev/blog/zarr-python-3-release/'>here.</a>",
 }
 
 # Add any paths that contain custom themes here, relative to this directory.

diff --git a/docs/developers/contributing.rst b/docs/developers/contributing.rst
@@ -329,10 +329,16 @@ Release procedure
    Most of the release process is now handled by GitHub workflow which should
    automatically push a release to PyPI if a tag is pushed.
 
-Before releasing, make sure that all pull requests which will be
-included in the release have been properly documented in
-`docs/release.rst`.
-
+Pre-release
+"""""""""""
+1. Make sure that all pull requests which will be
+   included in the release have been properly documented in
+   :file:`docs/release-notes.rst`.
+2. Rename the "Unreleased" section heading in :file:`docs/release-notes.rst`
+   to the version you are about to release.
+
+Releasing
+"""""""""
 To make a new release, go to
 https://github.com/zarr-developers/zarr-python/releases and
 click "Draft a new release". Choose a version number prefixed
@@ -355,5 +361,8 @@ https://readthedocs.io. Full releases will be available under
 pre-releases will be available under
 `/latest <https://zarr.readthedocs.io/en/latest>`_.
 
-Also review and merge the https://github.com/conda-forge/zarr-feedstock
-pull request that will be automatically generated.
+Post-release
+""""""""""""
+
+- Review and merge the pull request on the `conda-forge feedstock <https://github.com/conda-forge/zarr-feedstock>`_ that will be automatically generated.
+- Create a new "Unreleased" section in the release notes
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
@@ -74,7 +74,7 @@ Zarr supports data compression and filters. For example, to use Blosc compressio
     ...    "data/example-3.zarr",
     ...    mode="w", shape=(100, 100),
     ...    chunks=(10, 10), dtype="f4",
-    ...    compressor=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.SHUFFLE)
+    ...    compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
     ... )
     >>> z[:, :] = np.random.random((100, 100))
     >>>
@@ -101,7 +101,7 @@ Zarr allows you to create hierarchical groups, similar to directories::
     >>> root = zarr.group("data/example-2.zarr")
     >>> foo = root.create_group(name="foo")
     >>> bar = root.create_array(
-    ...     name="bar", shape=(100, 10), chunks=(10, 10)
+    ...     name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4"
     ... )
     >>> spam = foo.create_array(name="spam", shape=(10,), dtype="i4")
     >>>
@@ -112,6 +112,7 @@ Zarr allows you to create hierarchical groups, similar to directories::
     >>> # print the hierarchy
     >>> root.tree()
     /
+    ├── bar (100, 10) float32
     └── foo
         └── spam (10,) int32
     <BLANKLINE>
@@ -130,7 +131,7 @@ using external libraries like `s3fs <https://s3fs.readthedocs.io>`_ or
 
     >>> import s3fs # doctest: +SKIP
     >>>
-    >>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10)) # doctest: +SKIP
+    >>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") # doctest: +SKIP
     >>> z[:, :] = np.random.random((100, 100)) # doctest: +SKIP
 
 A single-file store can also be created using the the :class:`zarr.storage.ZipStore`::

diff --git a/docs/release-notes.rst b/docs/release-notes.rst
@@ -1,6 +1,29 @@
 Release notes
 =============
 
+Unreleased
+----------
+
+New features
+~~~~~~~~~~~~
+
+Bug fixes
+~~~~~~~~~
+* Fixes ``order`` argument for Zarr format 2 arrays (:issue:`2679`).
+
+* Fixes a bug that prevented reading Zarr format 2 data with consolidated metadata written using ``zarr-python`` version 2 (:issue:`2694`).
+
+* Ensure that compressor=None results in no compression when writing Zarr format 2 data (:issue:`2708`)
+
+Behaviour changes
+~~~~~~~~~~~~~~~~~
+
+Other
+~~~~~
+* Removed some unnecessary files from the source distribution
+  to reduce its size. (:issue:`2686`)
+
+
 .. _release_3.0.0:
 
 3.0.0

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,6 +2,13 @@
 requires = ["hatchling", "hatch-vcs"]
 build-backend = "hatchling.build"
 
+[tool.hatch.build.targets.sdist]
+exclude = [
+  "/.github",
+  "/bench",
+  "/docs",
+  "/notebooks"
+]
 
 [project]
 name = "zarr"
@@ -103,13 +110,13 @@ Homepage = "https://github.com/zarr-developers/zarr-python"
 [tool.coverage.report]
 exclude_lines = [
     "pragma: no cover",
+    "if TYPE_CHECKING:",
     "pragma: ${PY_MAJOR_VERSION} no cover",
     '.*\.\.\.' # Ignore "..." lines
 ]
 
 [tool.coverage.run]
 omit = [
-    "src/zarr/meta_v1.py",
     "bench/compress_normal.py",
 ]
 
@@ -140,8 +147,8 @@ numpy = ["1.25", "2.1"]
 features = ["gpu"]
 
 [tool.hatch.envs.test.scripts]
-run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=src"
-run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=src"
+run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
+run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
@@ -170,7 +177,7 @@ numpy = ["1.25", "2.1"]
 version = ["minimal"]
 
 [tool.hatch.envs.gputest.scripts]
-run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=src"
+run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
@@ -4,7 +4,7 @@
 import warnings
 from asyncio import gather
 from collections.abc import Iterable
-from dataclasses import dataclass, field
+from dataclasses import dataclass, field, replace
 from itertools import starmap
 from logging import getLogger
 from typing import (
@@ -1227,14 +1227,17 @@ async def _get_selection(
                 fill_value=self.metadata.fill_value,
             )
         if product(indexer.shape) > 0:
+            # need to use the order from the metadata for v2
+            _config = self._config
+            if self.metadata.zarr_format == 2:
+                _config = replace(_config, order=self.metadata.order)
+
             # reading chunks and decoding them
             await self.codec_pipeline.read(
                 [
                     (
                         self.store_path / self.metadata.encode_chunk_key(chunk_coords),
-                        self.metadata.get_chunk_spec(
-                            chunk_coords, self._config, prototype=prototype
-                        ),
+                        self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype),
                         chunk_selection,
                         out_selection,
                     )
@@ -1351,12 +1354,17 @@ async def _set_selection(
         # Buffer and NDBuffer between components.
         value_buffer = prototype.nd_buffer.from_ndarray_like(value)
 
+        # need to use the order from the metadata for v2
+        _config = self._config
+        if self.metadata.zarr_format == 2:
+            _config = replace(_config, order=self.metadata.order)
+
         # merging with existing data and encoding chunks
         await self.codec_pipeline.write(
             [
                 (
                     self.store_path / self.metadata.encode_chunk_key(chunk_coords),
-                    self.metadata.get_chunk_spec(chunk_coords, self._config, prototype),
+                    self.metadata.get_chunk_spec(chunk_coords, _config, prototype),
                     chunk_selection,
                     out_selection,
                 )
@@ -4393,15 +4401,22 @@ def _parse_chunk_encoding_v3(
 
 
 def _parse_deprecated_compressor(
-    compressor: CompressorLike | None, compressors: CompressorsLike
+    compressor: CompressorLike | None, compressors: CompressorsLike, zarr_format: int = 3
 ) -> CompressorsLike | None:
-    if compressor:
+    if compressor != "auto":
         if compressors != "auto":
             raise ValueError("Cannot specify both `compressor` and `compressors`.")
-        warn(
-            "The `compressor` argument is deprecated. Use `compressors` instead.",
-            category=UserWarning,
-            stacklevel=2,
-        )
-        compressors = (compressor,)
+        if zarr_format == 3:
+            warn(
+                "The `compressor` argument is deprecated. Use `compressors` instead.",
+                category=UserWarning,
+                stacklevel=2,
+            )
+        if compressor is None:
+            # "no compression"
+            compressors = ()
+        else:
+            compressors = (compressor,)
+    elif zarr_format == 2 and compressor == compressors == "auto":
+        compressors = ({"id": "blosc"},)
     return compressors
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
@@ -573,8 +573,8 @@ def _from_bytes_v2(
             v2_consolidated_metadata = json.loads(consolidated_metadata_bytes.to_bytes())
             v2_consolidated_metadata = v2_consolidated_metadata["metadata"]
             # We already read zattrs and zgroup. Should we ignore these?
-            v2_consolidated_metadata.pop(".zattrs")
-            v2_consolidated_metadata.pop(".zgroup")
+            v2_consolidated_metadata.pop(".zattrs", None)
+            v2_consolidated_metadata.pop(".zgroup", None)
 
             consolidated_metadata: defaultdict[str, dict[str, Any]] = defaultdict(dict)
 
@@ -1011,7 +1011,7 @@ async def create_array(
         shards: ShardsLike | None = None,
         filters: FiltersLike = "auto",
         compressors: CompressorsLike = "auto",
-        compressor: CompressorLike = None,
+        compressor: CompressorLike = "auto",
         serializer: SerializerLike = "auto",
         fill_value: Any | None = 0,
         order: MemoryOrder | None = None,
@@ -1114,8 +1114,9 @@ async def create_array(
         AsyncArray
 
         """
-
-        compressors = _parse_deprecated_compressor(compressor, compressors)
+        compressors = _parse_deprecated_compressor(
+            compressor, compressors, zarr_format=self.metadata.zarr_format
+        )
         return await create_array(
             store=self.store_path,
             name=name,
@@ -2244,7 +2245,7 @@ def create_array(
         shards: ShardsLike | None = None,
         filters: FiltersLike = "auto",
         compressors: CompressorsLike = "auto",
-        compressor: CompressorLike = None,
+        compressor: CompressorLike = "auto",
         serializer: SerializerLike = "auto",
         fill_value: Any | None = 0,
         order: MemoryOrder | None = "C",
@@ -2346,7 +2347,9 @@ def create_array(
         -------
         AsyncArray
         """
-        compressors = _parse_deprecated_compressor(compressor, compressors)
+        compressors = _parse_deprecated_compressor(
+            compressor, compressors, zarr_format=self.metadata.zarr_format
+        )
         return Array(
             self._sync(
                 self._async_group.create_array(

diff --git a/tests/test_group.py b/tests/test_group.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import pytest
-from numcodecs import Zstd
+from numcodecs import Blosc
 
 import zarr
 import zarr.api.asynchronous
@@ -499,7 +499,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
                     "chunks": (1,),
                     "order": "C",
                     "filters": None,
-                    "compressor": Zstd(level=0),
+                    "compressor": Blosc(),
                     "zarr_format": zarr_format,
                 },
                 "subgroup": {
@@ -1505,13 +1505,3 @@ def test_group_members_concurrency_limit(store: MemoryStore) -> None:
         elapsed = time.time() - start
 
         assert elapsed > num_groups * get_latency
-
-
-@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
-def test_deprecated_compressor(store: Store) -> None:
-    g = zarr.group(store=store, zarr_format=2)
-    with pytest.warns(UserWarning, match="The `compressor` argument is deprecated.*"):
-        a = g.create_array(
-            "foo", shape=(100,), chunks=(10,), dtype="i4", compressor={"id": "blosc"}
-        )
-        assert a.metadata.compressor.codec_id == "blosc"