diff --git a/docs/api/v3.md b/docs/api/v3.md index 4934fa4..04cb984 100644 --- a/docs/api/v3.md +++ b/docs/api/v3.md @@ -1 +1 @@ -::: pydantic_zarr.v3 \ No newline at end of file +::: pydantic_zarr.v3 diff --git a/docs/api/v3/codecs.md b/docs/api/v3/codecs.md new file mode 100644 index 0000000..b74ab60 --- /dev/null +++ b/docs/api/v3/codecs.md @@ -0,0 +1 @@ +::: pydantic_zarr.v3.codecs diff --git a/mkdocs.yaml b/mkdocs.yaml index ddf31c8..3cdeb9e 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -33,7 +33,12 @@ nav: - API: - core: api/core.md - v2: api/v2.md - - v3: api/v3.md + - v3: + - Core: api/v3.md + - Codecs: api/v3/codecs.md + +watch: + - src plugins: - mkdocstrings: @@ -47,6 +52,8 @@ plugins: docstring_options: ignore_init_summary: true merge_init_into_class: true + extensions: + - griffe_pydantic: markdown_extensions: - pymdownx.highlight: diff --git a/pyproject.toml b/pyproject.toml index e9fd4b8..ac54387 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,11 +31,12 @@ Source = "https://github.com/zarr-developers/pydantic-zarr" test = ["coverage", "pytest<8.4", "pytest-cov", "pytest-examples"] docs = [ + "griffe-pydantic", "mkdocs-material", "mkdocstrings[python]", "pytest-examples", "pydantic==2.11", - "zarr>=3.1.0" + "zarr>=3.1.0", ] [tool.hatch] diff --git a/src/pydantic_zarr/v3.py b/src/pydantic_zarr/v3/__init__.py similarity index 99% rename from src/pydantic_zarr/v3.py rename to src/pydantic_zarr/v3/__init__.py index da40886..1e2ef8b 100644 --- a/src/pydantic_zarr/v3.py +++ b/src/pydantic_zarr/v3/__init__.py @@ -35,6 +35,7 @@ model_like, tuplify_json, ) +from pydantic_zarr.v3.codecs import Codec if TYPE_CHECKING: from collections.abc import Sequence @@ -95,7 +96,7 @@ class AnyNamedConfig(NamedConfig[str, Mapping[str, object]]): """ -CodecLike = str | AnyNamedConfig +CodecLike = str | Codec """A type modelling the permissible declarations for codecs""" diff --git a/src/pydantic_zarr/v3/codecs.py b/src/pydantic_zarr/v3/codecs.py new file mode 100644 index 0000000..24b5960 --- /dev/null +++ b/src/pydantic_zarr/v3/codecs.py @@ -0,0 +1,139 @@ +""" +Models for Zarr v3 codecs.""" + +from typing import Any, Literal + +from pydantic import BaseModel, Field, PositiveInt, field_validator, model_serializer + + +class Codec(BaseModel): + """ + Base class for codec models. + """ + + name: str + configuration: BaseModel + + +class BloscConfiguration(BaseModel): + """ + Configuration for blosc codec. + """ + + cname: Literal["lz4", "lz4hc", "blosclz", "zstd", "snappy", "zlib"] + clevel: Literal[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + shuffle: Literal["noshuffle", "shuffle", "bitshuffle"] + typesize: PositiveInt + blocksize: int + + +class Blosc(Codec): + """ + Blosc codec. + """ + + name: Literal["blosc"] = "blosc" + configuration: BloscConfiguration + + +class BytesConfig(BaseModel): + """ + Configuration for bytes codec. + """ + + endian: Literal["big", "little"] | None = None + + @model_serializer + def ser_model(self) -> dict[str, Any]: + if self.endian is None: + return {} + else: + return super().model_dump() + + +class Bytes(Codec): + """ + Bytes codec. + """ + + name: Literal["bytes"] = "bytes" + configuration: BytesConfig + + +class CRC32CConfig(BaseModel): + """ + Configuration for crc32c codec. + """ + + +class CRC32C(Codec): + """ + CRC32C codec. + """ + + name: Literal["crc32c"] = "crc32c" + configuration: CRC32CConfig = Field(default=CRC32CConfig()) + + +class GzipConfig(BaseModel): + """ + Configuration for gzip codec. + """ + + level: Literal[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + + +class Gzip(Codec): + """ + Gzip codec. + """ + + name: Literal["gzip"] = "gzip" + configuration: GzipConfig + + +class ShardingConfig(BaseModel): + """ + Configuration for sharding codec. + """ + + chunk_shape: tuple[int, ...] + codecs: tuple[Codec, ...] + # Default is recommended in the specification + index_codecs: tuple[Codec, ...] = Field( + default=(Bytes(configuration=BytesConfig(endian="little")), CRC32C()) + ) + index_location: Literal["start", "end"] = "end" + + +class Sharding(Codec): + """ + Sharding codec. + """ + + name: Literal["sharding_indexed"] = "sharding_indexed" + configuration: ShardingConfig + + +class TransposeConfig(BaseModel): + """ + Configuration for transpose codec. + """ + + order: tuple[int, ...] + + @field_validator("order") + @classmethod + def check_order(cls, order: tuple[int, ...]) -> tuple[int, ...]: + if set(range(len(order))) != set(order): + raise ValueError("order must be a permutation of positive integers starting from 0") + return order + + +class Transpose(Codec): + """ + Transpose codec. + """ + + name: Literal["transpose"] = "transpose" + configuration: TransposeConfig diff --git a/tests/test_pydantic_zarr/test_v3.py b/tests/test_pydantic_zarr/test_v3.py index 74f8bdd..ca6b223 100644 --- a/tests/test_pydantic_zarr/test_v3.py +++ b/tests/test_pydantic_zarr/test_v3.py @@ -22,6 +22,7 @@ RegularChunkingConfig, auto_codecs, ) +from pydantic_zarr.v3.codecs import Bytes, BytesConfig, Gzip, GzipConfig from .conftest import DTYPE_EXAMPLES_V3, DTypeExample @@ -31,14 +32,14 @@ def test_serialize_deserialize() -> None: group_attributes = {"group": True} - array_spec = ArraySpec( + array_spec: AnyArraySpec = ArraySpec( attributes=array_attributes, shape=[1000, 1000], dimension_names=["rows", "columns"], data_type="float64", chunk_grid=NamedConfig(name="regular", configuration={"chunk_shape": [1000, 100]}), chunk_key_encoding=NamedConfig(name="default", configuration={"separator": "/"}), - codecs=[NamedConfig(name="GZip", configuration={"level": 1})], + codecs=[Gzip(configuration=GzipConfig(level=1))], fill_value="NaN", storage_transformers=[], ) @@ -205,7 +206,7 @@ def test_from_flat() -> None: @staticmethod def test_from_zarr_depth() -> None: - codecs = ({"name": "bytes", "configuration": {}},) + codecs = (Bytes(configuration=BytesConfig()),) tree: dict[str, AnyGroupSpec | AnyArraySpec] = { "": GroupSpec(members=None, attributes={"level": 0, "type": "group"}), "/1": GroupSpec(members=None, attributes={"level": 1, "type": "group"}),