diff --git a/docs/release-notes.rst b/docs/release-notes.rst index 2e57328293..ecd413510b 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -13,6 +13,8 @@ Bug fixes * Fixes a bug that prevented reading Zarr format 2 data with consolidated metadata written using ``zarr-python`` version 2 (:issue:`2694`). +* Ensure that compressor=None results in no compression when writing Zarr format 2 data (:issue:`2708`) + Behaviour changes ~~~~~~~~~~~~~~~~~ diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 6f67b612d5..0f1fb42f79 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -4131,15 +4131,22 @@ def _parse_chunk_encoding_v3( def _parse_deprecated_compressor( - compressor: CompressorLike | None, compressors: CompressorsLike + compressor: CompressorLike | None, compressors: CompressorsLike, zarr_format: int = 3 ) -> CompressorsLike | None: - if compressor: + if compressor != "auto": if compressors != "auto": raise ValueError("Cannot specify both `compressor` and `compressors`.") - warn( - "The `compressor` argument is deprecated. Use `compressors` instead.", - category=UserWarning, - stacklevel=2, - ) - compressors = (compressor,) + if zarr_format == 3: + warn( + "The `compressor` argument is deprecated. Use `compressors` instead.", + category=UserWarning, + stacklevel=2, + ) + if compressor is None: + # "no compression" + compressors = () + else: + compressors = (compressor,) + elif zarr_format == 2 and compressor == compressors == "auto": + compressors = ({"id": "blosc"},) return compressors diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 775e8ac0bb..93e55da377 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1011,7 +1011,7 @@ async def create_array( shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", - compressor: CompressorLike = None, + compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = 0, order: MemoryOrder | None = None, @@ -1114,8 +1114,9 @@ async def create_array( AsyncArray """ - - compressors = _parse_deprecated_compressor(compressor, compressors) + compressors = _parse_deprecated_compressor( + compressor, compressors, zarr_format=self.metadata.zarr_format + ) return await create_array( store=self.store_path, name=name, @@ -2244,7 +2245,7 @@ def create_array( shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", - compressor: CompressorLike = None, + compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = 0, order: MemoryOrder | None = "C", @@ -2346,7 +2347,9 @@ def create_array( ------- AsyncArray """ - compressors = _parse_deprecated_compressor(compressor, compressors) + compressors = _parse_deprecated_compressor( + compressor, compressors, zarr_format=self.metadata.zarr_format + ) return Array( self._sync( self._async_group.create_array( diff --git a/tests/test_group.py b/tests/test_group.py index 788e81e603..144054605e 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from numcodecs import Zstd +from numcodecs import Blosc import zarr import zarr.api.asynchronous @@ -499,7 +499,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat "chunks": (1,), "order": "C", "filters": None, - "compressor": Zstd(level=0), + "compressor": Blosc(), "zarr_format": zarr_format, }, "subgroup": { @@ -1505,13 +1505,3 @@ def test_group_members_concurrency_limit(store: MemoryStore) -> None: elapsed = time.time() - start assert elapsed > num_groups * get_latency - - -@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) -def test_deprecated_compressor(store: Store) -> None: - g = zarr.group(store=store, zarr_format=2) - with pytest.warns(UserWarning, match="The `compressor` argument is deprecated.*"): - a = g.create_array( - "foo", shape=(100,), chunks=(10,), dtype="i4", compressor={"id": "blosc"} - ) - assert a.metadata.compressor.codec_id == "blosc" diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index 4065f35120..c1ff2e130a 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from numcodecs import Zstd +from numcodecs import Blosc import zarr.api.asynchronous import zarr.api.synchronous @@ -522,7 +522,7 @@ async def test_consolidated_metadata_v2(self): attributes={"key": "a"}, chunks=(1,), fill_value=0, - compressor=Zstd(level=0), + compressor=Blosc(), order="C", ), "g1": GroupMetadata( diff --git a/tests/test_v2.py b/tests/test_v2.py index 9fe31956f8..b657af9c47 100644 --- a/tests/test_v2.py +++ b/tests/test_v2.py @@ -7,11 +7,13 @@ import pytest from numcodecs import Delta from numcodecs.blosc import Blosc +from numcodecs.zstd import Zstd import zarr import zarr.core.buffer import zarr.storage from zarr import config +from zarr.abc.store import Store from zarr.core.buffer.core import default_buffer_prototype from zarr.core.sync import sync from zarr.storage import MemoryStore, StorePath @@ -93,11 +95,7 @@ async def test_v2_encode_decode(dtype): store = zarr.storage.MemoryStore() g = zarr.group(store=store, zarr_format=2) g.create_array( - name="foo", - shape=(3,), - chunks=(3,), - dtype=dtype, - fill_value=b"X", + name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=b"X", compressor=None ) result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype()) @@ -166,6 +164,29 @@ def test_v2_filters_codecs(filters: Any, order: Literal["C", "F"]) -> None: np.testing.assert_array_equal(result, array_fixture) +@pytest.mark.filterwarnings("ignore") +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_create_array_defaults(store: Store): + """ + Test that passing compressor=None results in no compressor. Also test that the default value of the compressor + parameter does produce a compressor. + """ + g = zarr.open(store, mode="w", zarr_format=2) + arr = g.create_array("one", dtype="i8", shape=(1,), chunks=(1,), compressor=None) + assert arr._async_array.compressor is None + assert not (arr.filters) + arr = g.create_array("two", dtype="i8", shape=(1,), chunks=(1,)) + assert arr._async_array.compressor is not None + assert not (arr.filters) + arr = g.create_array("three", dtype="i8", shape=(1,), chunks=(1,), compressor=Zstd()) + assert arr._async_array.compressor is not None + assert not (arr.filters) + with pytest.raises(ValueError): + g.create_array( + "four", dtype="i8", shape=(1,), chunks=(1,), compressor=None, compressors=None + ) + + @pytest.mark.parametrize("array_order", ["C", "F"]) @pytest.mark.parametrize("data_order", ["C", "F"]) @pytest.mark.parametrize("memory_order", ["C", "F"])