Skip to content

Commit

Permalink
Ensure compressor=None results in no compression for V2 (zarr-develop…
Browse files Browse the repository at this point in the history
…ers#2709)

* Ensure compressor=None results in no compression for V2

* rename argumnent

* Update tests/test_v2.py

Co-authored-by: Davis Bennett <[email protected]>

* fix

* coverage

* add release note

* Update release note

---------

Co-authored-by: Davis Bennett <[email protected]>
Co-authored-by: David Stansby <[email protected]>
  • Loading branch information
3 people authored Jan 14, 2025
1 parent 168999c commit 45146ca
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 32 deletions.
2 changes: 2 additions & 0 deletions docs/release-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Bug fixes

* Fixes a bug that prevented reading Zarr format 2 data with consolidated metadata written using ``zarr-python`` version 2 (:issue:`2694`).

* Ensure that compressor=None results in no compression when writing Zarr format 2 data (:issue:`2708`)

Behaviour changes
~~~~~~~~~~~~~~~~~

Expand Down
23 changes: 15 additions & 8 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -4131,15 +4131,22 @@ def _parse_chunk_encoding_v3(


def _parse_deprecated_compressor(
compressor: CompressorLike | None, compressors: CompressorsLike
compressor: CompressorLike | None, compressors: CompressorsLike, zarr_format: int = 3
) -> CompressorsLike | None:
if compressor:
if compressor != "auto":
if compressors != "auto":
raise ValueError("Cannot specify both `compressor` and `compressors`.")
warn(
"The `compressor` argument is deprecated. Use `compressors` instead.",
category=UserWarning,
stacklevel=2,
)
compressors = (compressor,)
if zarr_format == 3:
warn(
"The `compressor` argument is deprecated. Use `compressors` instead.",
category=UserWarning,
stacklevel=2,
)
if compressor is None:
# "no compression"
compressors = ()
else:
compressors = (compressor,)
elif zarr_format == 2 and compressor == compressors == "auto":
compressors = ({"id": "blosc"},)
return compressors
13 changes: 8 additions & 5 deletions src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,7 +1011,7 @@ async def create_array(
shards: ShardsLike | None = None,
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
compressor: CompressorLike = None,
compressor: CompressorLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = 0,
order: MemoryOrder | None = None,
Expand Down Expand Up @@ -1114,8 +1114,9 @@ async def create_array(
AsyncArray
"""

compressors = _parse_deprecated_compressor(compressor, compressors)
compressors = _parse_deprecated_compressor(
compressor, compressors, zarr_format=self.metadata.zarr_format
)
return await create_array(
store=self.store_path,
name=name,
Expand Down Expand Up @@ -2244,7 +2245,7 @@ def create_array(
shards: ShardsLike | None = None,
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
compressor: CompressorLike = None,
compressor: CompressorLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = 0,
order: MemoryOrder | None = "C",
Expand Down Expand Up @@ -2346,7 +2347,9 @@ def create_array(
-------
AsyncArray
"""
compressors = _parse_deprecated_compressor(compressor, compressors)
compressors = _parse_deprecated_compressor(
compressor, compressors, zarr_format=self.metadata.zarr_format
)
return Array(
self._sync(
self._async_group.create_array(
Expand Down
14 changes: 2 additions & 12 deletions tests/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import numpy as np
import pytest
from numcodecs import Zstd
from numcodecs import Blosc

import zarr
import zarr.api.asynchronous
Expand Down Expand Up @@ -499,7 +499,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
"chunks": (1,),
"order": "C",
"filters": None,
"compressor": Zstd(level=0),
"compressor": Blosc(),
"zarr_format": zarr_format,
},
"subgroup": {
Expand Down Expand Up @@ -1505,13 +1505,3 @@ def test_group_members_concurrency_limit(store: MemoryStore) -> None:
elapsed = time.time() - start

assert elapsed > num_groups * get_latency


@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
def test_deprecated_compressor(store: Store) -> None:
g = zarr.group(store=store, zarr_format=2)
with pytest.warns(UserWarning, match="The `compressor` argument is deprecated.*"):
a = g.create_array(
"foo", shape=(100,), chunks=(10,), dtype="i4", compressor={"id": "blosc"}
)
assert a.metadata.compressor.codec_id == "blosc"
4 changes: 2 additions & 2 deletions tests/test_metadata/test_consolidated.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np
import pytest
from numcodecs import Zstd
from numcodecs import Blosc

import zarr.api.asynchronous
import zarr.api.synchronous
Expand Down Expand Up @@ -522,7 +522,7 @@ async def test_consolidated_metadata_v2(self):
attributes={"key": "a"},
chunks=(1,),
fill_value=0,
compressor=Zstd(level=0),
compressor=Blosc(),
order="C",
),
"g1": GroupMetadata(
Expand Down
31 changes: 26 additions & 5 deletions tests/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
import pytest
from numcodecs import Delta
from numcodecs.blosc import Blosc
from numcodecs.zstd import Zstd

import zarr
import zarr.core.buffer
import zarr.storage
from zarr import config
from zarr.abc.store import Store
from zarr.core.buffer.core import default_buffer_prototype
from zarr.core.sync import sync
from zarr.storage import MemoryStore, StorePath
Expand Down Expand Up @@ -93,11 +95,7 @@ async def test_v2_encode_decode(dtype):
store = zarr.storage.MemoryStore()
g = zarr.group(store=store, zarr_format=2)
g.create_array(
name="foo",
shape=(3,),
chunks=(3,),
dtype=dtype,
fill_value=b"X",
name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=b"X", compressor=None
)

result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
Expand Down Expand Up @@ -166,6 +164,29 @@ def test_v2_filters_codecs(filters: Any, order: Literal["C", "F"]) -> None:
np.testing.assert_array_equal(result, array_fixture)


@pytest.mark.filterwarnings("ignore")
@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_create_array_defaults(store: Store):
"""
Test that passing compressor=None results in no compressor. Also test that the default value of the compressor
parameter does produce a compressor.
"""
g = zarr.open(store, mode="w", zarr_format=2)
arr = g.create_array("one", dtype="i8", shape=(1,), chunks=(1,), compressor=None)
assert arr._async_array.compressor is None
assert not (arr.filters)
arr = g.create_array("two", dtype="i8", shape=(1,), chunks=(1,))
assert arr._async_array.compressor is not None
assert not (arr.filters)
arr = g.create_array("three", dtype="i8", shape=(1,), chunks=(1,), compressor=Zstd())
assert arr._async_array.compressor is not None
assert not (arr.filters)
with pytest.raises(ValueError):
g.create_array(
"four", dtype="i8", shape=(1,), chunks=(1,), compressor=None, compressors=None
)


@pytest.mark.parametrize("array_order", ["C", "F"])
@pytest.mark.parametrize("data_order", ["C", "F"])
@pytest.mark.parametrize("memory_order", ["C", "F"])
Expand Down

0 comments on commit 45146ca

Please sign in to comment.