From 1224aed42496846e232d15cd57e0ef42c5f8c729 Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 26 Jun 2023 20:46:18 +0100 Subject: [PATCH] Run mypy in CI (#229) * Run mypy in CI * Fix type errors in vendored Dask functions * Fix type errors in rechunk * Fix type errors in cubed.utils * Fix type errors in cubed.core.array * Add xarray dependency for mypy type checking --- .github/workflows/array-api-tests.yml | 2 ++ .github/workflows/mypy.yml | 49 +++++++++++++++++++++++++++ .github/workflows/tests.yml | 2 ++ cubed/core/array.py | 8 ++--- cubed/primitive/rechunk.py | 7 ++-- cubed/primitive/types.py | 9 +++++ cubed/runtime/pipeline.py | 12 +++---- cubed/utils.py | 10 +++--- cubed/vendor/dask/utils.py | 2 +- cubed/vendor/dask/widgets/__init__.py | 4 +-- 10 files changed, 82 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/mypy.yml diff --git a/.github/workflows/array-api-tests.yml b/.github/workflows/array-api-tests.yml index ee480821..28a84fb0 100644 --- a/.github/workflows/array-api-tests.yml +++ b/.github/workflows/array-api-tests.yml @@ -2,6 +2,8 @@ name: Array API Tests on: push: + branches: + - "main" pull_request: schedule: # Every weekday at 03:19 UTC, see https://crontab.guru/ diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml new file mode 100644 index 00000000..7bbd09f5 --- /dev/null +++ b/.github/workflows/mypy.yml @@ -0,0 +1,49 @@ +name: Mypy + +on: + push: + branches: + - "main" + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] + python-version: ["3.8"] + + steps: + - name: Checkout source + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + + - name: Install + run: | + python -m pip install -e .[test] xarray + + - name: Install mypy + run: | + python -m pip install mypy + + - name: Run mypy + run: | + python -m mypy cubed diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d3457ec8..35471524 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,6 +2,8 @@ name: Tests on: push: + branches: + - "main" pull_request: workflow_dispatch: diff --git a/cubed/core/array.py b/cubed/core/array.py index 44d0afff..82f1166c 100644 --- a/cubed/core/array.py +++ b/cubed/core/array.py @@ -254,7 +254,7 @@ def __init__( self._work_dir = work_dir - self._reserved_mem = convert_to_bytes(reserved_mem) + self._reserved_mem = convert_to_bytes(reserved_mem or 0) if allowed_mem is None: self._allowed_mem = (max_mem or 0) + self.reserved_mem else: @@ -264,7 +264,7 @@ def __init__( self._storage_options = storage_options @property - def work_dir(self) -> str: + def work_dir(self) -> Optional[str]: """The directory path (specified as an fsspec URL) used for storing intermediate data.""" return self._work_dir @@ -289,12 +289,12 @@ def reserved_mem(self) -> int: return self._reserved_mem @property - def executor(self) -> Executor: + def executor(self) -> Optional[Executor]: """The default executor for running computations.""" return self._executor @property - def storage_options(self) -> dict: + def storage_options(self) -> Optional[dict]: """Storage options to be passed to fsspec.""" return self._storage_options diff --git a/cubed/primitive/rechunk.py b/cubed/primitive/rechunk.py index 8ec2873b..35affd8c 100644 --- a/cubed/primitive/rechunk.py +++ b/cubed/primitive/rechunk.py @@ -3,7 +3,7 @@ import zarr import cubed -from cubed.primitive.types import CubedArrayProxy +from cubed.primitive.types import CubedArrayProxy, CubedCopySpec from cubed.runtime.pipeline import spec_to_pipeline from cubed.storage.zarr import lazy_empty from cubed.vendor.rechunker.algorithm import rechunking_plan @@ -12,7 +12,6 @@ _shape_dict_to_tuple, _validate_options, ) -from cubed.vendor.rechunker.types import CopySpec def rechunk( @@ -103,7 +102,7 @@ def _setup_array_rechunk( temp_store_or_group=None, temp_options=None, name=None, -) -> CopySpec: +) -> CubedCopySpec: _validate_options(target_options) _validate_options(temp_options) shape = source_array.shape @@ -184,7 +183,7 @@ def _setup_array_rechunk( read_proxy = CubedArrayProxy(source_array, read_chunks) int_proxy = CubedArrayProxy(int_array, int_chunks) write_proxy = CubedArrayProxy(target_array, write_chunks) - return CopySpec(read_proxy, int_proxy, write_proxy) + return CubedCopySpec(read_proxy, int_proxy, write_proxy) def total_chunks(shape, chunks): diff --git a/cubed/primitive/types.py b/cubed/primitive/types.py index ff5ccbc1..accb4ef2 100644 --- a/cubed/primitive/types.py +++ b/cubed/primitive/types.py @@ -26,3 +26,12 @@ def __init__(self, array, chunks): def open(self): return open_if_lazy_zarr_array(self.array) + + +@dataclass(frozen=True) +class CubedCopySpec: + """Generalisation of rechunker ``CopySpec`` with support for ``LazyZarrArray``.""" + + read: CubedArrayProxy + intermediate: CubedArrayProxy + write: CubedArrayProxy diff --git a/cubed/runtime/pipeline.py b/cubed/runtime/pipeline.py index ceb9b7ad..93e7bf26 100644 --- a/cubed/runtime/pipeline.py +++ b/cubed/runtime/pipeline.py @@ -4,9 +4,9 @@ import numpy as np -from cubed.primitive.types import CubedPipeline +from cubed.primitive.types import CubedCopySpec, CubedPipeline from cubed.storage.zarr import open_if_lazy_zarr_array -from cubed.vendor.rechunker.types import CopySpec, Stage +from cubed.vendor.rechunker.types import Stage from .utils import gensym @@ -39,7 +39,7 @@ def __iter__(self): return chunk_keys(self.shape, self.chunks) -def copy_read_to_write(chunk_key, *, config=CopySpec): +def copy_read_to_write(chunk_key, *, config: CubedCopySpec): # workaround limitation of lithops.utils.verify_args if isinstance(chunk_key, list): chunk_key = tuple(chunk_key) @@ -47,7 +47,7 @@ def copy_read_to_write(chunk_key, *, config=CopySpec): config.write.open()[chunk_key] = data -def copy_read_to_intermediate(chunk_key, *, config=CopySpec): +def copy_read_to_intermediate(chunk_key, *, config: CubedCopySpec): # workaround limitation of lithops.utils.verify_args if isinstance(chunk_key, list): chunk_key = tuple(chunk_key) @@ -55,7 +55,7 @@ def copy_read_to_intermediate(chunk_key, *, config=CopySpec): config.intermediate.open()[chunk_key] = data -def copy_intermediate_to_write(chunk_key, *, config=CopySpec): +def copy_intermediate_to_write(chunk_key, *, config: CubedCopySpec): # workaround limitation of lithops.utils.verify_args if isinstance(chunk_key, list): chunk_key = tuple(chunk_key) @@ -64,7 +64,7 @@ def copy_intermediate_to_write(chunk_key, *, config=CopySpec): def spec_to_pipeline( - spec: CopySpec, target_array: Any, projected_mem: int, num_tasks: int + spec: CubedCopySpec, target_array: Any, projected_mem: int, num_tasks: int ) -> CubedPipeline: # typing won't work until we start using numpy types shape = spec.read.array.shape # type: ignore diff --git a/cubed/utils.py b/cubed/utils.py index 08fbb4dd..31696130 100644 --- a/cubed/utils.py +++ b/cubed/utils.py @@ -213,10 +213,8 @@ def convert_to_bytes(size: Union[int, str]) -> int: ) if unit in units and value.isdigit(): - value = int(value) # convert to bytes - return value * (1000 ** units[unit]) - else: - raise ValueError( - f"Invalid value: {size}. Expected a positive integer or a string ending with an SI prefix." - ) + return int(value) * (1000 ** units[unit]) + raise ValueError( + f"Invalid value: {size}. Expected a positive integer or a string ending with an SI prefix." + ) diff --git a/cubed/vendor/dask/utils.py b/cubed/vendor/dask/utils.py index 3b3903dc..cad4f61b 100644 --- a/cubed/vendor/dask/utils.py +++ b/cubed/vendor/dask/utils.py @@ -1,9 +1,9 @@ from __future__ import annotations -import datetime import functools import inspect import re +from datetime import datetime from numbers import Integral from typing import Any diff --git a/cubed/vendor/dask/widgets/__init__.py b/cubed/vendor/dask/widgets/__init__.py index 833be047..73280677 100644 --- a/cubed/vendor/dask/widgets/__init__.py +++ b/cubed/vendor/dask/widgets/__init__.py @@ -17,8 +17,8 @@ FILTERS = {} TEMPLATE_PATHS = [] - def get_environment(): + def get_environment(): # type: ignore raise ImportError(msg) from exception - def get_template(name: str): + def get_template(name: str): # type: ignore raise ImportError(msg) from exception