Merge branch 'main' into update_seqfish

scverse · Dec 10, 2024 · ee1d801 · ee1d801
2 parents d9ec1f5 + 5d537d6
commit ee1d801
Show file tree

Hide file tree

Showing 28 changed files with 858 additions and 73 deletions.
diff --git a/.flake8 b/.flake8
@@ -38,6 +38,8 @@ ignore =
     D400
     # First line should be in imperative mood; try rephrasing
     D401
+    # Abstract base class without abstractmethod.
+    B024
 exclude = .git,__pycache__,build,docs/_build,dist
 per-file-ignores =
     tests/*: D

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -18,7 +18,7 @@ jobs:
         strategy:
             fail-fast: false
             matrix:
-                python: ["3.9", "3.10"]
+                python: ["3.10", "3.12"]
                 os: [ubuntu-latest]
 
         env:
@@ -52,15 +52,15 @@ jobs:
                   pip install --pre -e ".[dev,test]"
 
             - name: Download artifact of test data
-              if: matrix.python == '3.10'
+              if: matrix.python == '3.12'
               uses: dawidd6/action-download-artifact@v2
               with:
                   workflow: prepare_test_data.yaml
                   name: data
                   path: ./data
 
             - name: List the data directory
-              if: matrix.python == '3.10'
+              if: matrix.python == '3.12'
               run: |
                   ls -l ./data
                   pwd

diff --git a/.mypy.ini b/.mypy.ini
@@ -1,5 +1,5 @@
 [mypy]
-python_version = 3.9
+python_version = 3.10
 plugins = numpy.typing.mypy_plugin
 
 ignore_errors = False

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,8 +2,8 @@ fail_fast: false
 default_language_version:
     python: python3
 default_stages:
-    - commit
-    - push
+    - pre-commit
+    - pre-push
 minimum_pre_commit_version: 2.16.0
 repos:
     - repo: https://github.com/psf/black
@@ -73,7 +73,7 @@ repos:
       rev: v3.17.0
       hooks:
           - id: pyupgrade
-            args: [--py3-plus, --py39-plus, --keep-runtime-typing]
+            args: [--py3-plus, --py310-plus, --keep-runtime-typing]
     - repo: local
       hooks:
           - id: forbid-to-commit

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -3,7 +3,7 @@ version: 2
 build:
     os: ubuntu-20.04
     tools:
-        python: "3.9"
+        python: "3.10"
 sphinx:
     configuration: docs/conf.py
     fail_on_warning: true

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,9 +8,16 @@ and this project adheres to [Semantic Versioning][].
 [keep a changelog]: https://keepachangelog.com/en/1.0.0/
 [semantic versioning]: https://semver.org/spec/v2.0.0.html
 
-## [0.1.6] - xxxx-xx-xx
+## incoming release
+
+-   (Visium/Visium HD) lowres and hires images now mapped also to the 'global' coordinate system #230
+-   (Macsima) added support @berombau #224
+
+## [0.1.6] - 2024-11-26
 
 -   (MERSCOPE) added `feature_key` attribute for points (i.e., the `'gene'` column) #210
+-   (Visium HD) get transformation matrices even when only images are parsed #215
+-   Support for `xarray.DataTree` (which was moved from `datatree.DataTree`) #232
 
 ## [0.1.5] - 2024-09-25
 

diff --git a/README.md b/README.md
@@ -24,9 +24,24 @@ This package contains reader functions to load common spatial omics formats into
 -   Steinbock (output data)
 -   STOmics Stereo-seq®
 -   Vizgen MERSCOPE® (MERFISH)
+-   MACSima® (MACS® iQ View output)
 
 Note: all mentioned technologies are registered trademarks of their respective companies.
 
+## Known limitations
+
+Contributions for addressing the below limitations are very welcomed.
+
+-   Only Stereo-seq 7.x is supported, 8.x is not currently supported. https://github.com/scverse/spatialdata-io/issues/161
+
+### How to Contribute
+
+1. **Open a GitHub Issue**: Start by opening a new issue or commenting on an existing one in the repository. Clearly describe the problem and your proposed changes to avoid overlapping efforts with others.
+
+2. **Submit a Pull Request (PR)**: Once the issue is discussed, submit a PR to the `spatialdata-io` repository. Ensure your PR includes information about a suitable dataset for testing the reader, ideally no larger than 10 GB. Include clear instructions for accessing the data, preferably with a `curl` or `wget` command for easy downloading.
+
+3. **Optional Enhancements**: To facilitate reproducibility and ease of data access, consider adding a folder in the [spatialdata-sandbox](https://github.com/giovp/spatialdata-sandbox) repository. Include a `download.py` and `to_zarr.py` script (refer to examples in the repository) to enable others to reproduce your reader by simply running these scripts sequentially.
+
 ## Getting started
 
 Please refer to the [documentation][link-docs]. In particular, the

diff --git a/docs/conf.py b/docs/conf.py
@@ -90,6 +90,7 @@
 }
 
 intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
     "anndata": ("https://anndata.readthedocs.io/en/stable/", None),
     "spatialdata": ("https://scverse-spatialdata.readthedocs.io/en/latest/", None),
     "numpy": ("https://numpy.org/doc/stable/", None),

diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ dynamic= [
 ]
 description = "SpatialData IO for common techs"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = {file = "LICENSE"}
 authors = [
     {name = "scverse"},
@@ -25,7 +25,7 @@ dependencies = [
     "anndata",
     "numpy",
     "scanpy",
-    "spatialdata>=0.2.3",
+    "spatialdata>=0.2.6",
     "scikit-image",
     "h5py",
     "joblib",
@@ -34,6 +34,7 @@ dependencies = [
     "pyarrow",
     "readfcs",
     "tifffile>=2023.8.12",
+    "ome-types",
 ]
 
 [project.optional-dependencies]
@@ -83,7 +84,7 @@ skip_glob = ["docs/*"]
 
 [tool.black]
 line-length = 120
-target-version = ['py39']
+target-version = ['py310']
 include = '\.pyi?$'
 exclude = '''
 (

diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py
@@ -4,6 +4,7 @@
 from spatialdata_io.readers.cosmx import cosmx
 from spatialdata_io.readers.curio import curio
 from spatialdata_io.readers.dbit import dbit
+from spatialdata_io.readers.macsima import macsima
 from spatialdata_io.readers.mcmicro import mcmicro
 from spatialdata_io.readers.merscope import merscope
 from spatialdata_io.readers.seqfish import seqfish
@@ -32,6 +33,7 @@
     "xenium_explorer_selection",
     "dbit",
     "visium_hd",
+    "macsima",
 ]
 
 __version__ = version("spatialdata-io")
diff --git a/src/spatialdata_io/_constants/_enum.py b/src/spatialdata_io/_constants/_enum.py
@@ -1,7 +1,8 @@
 from abc import ABC, ABCMeta
+from collections.abc import Callable
 from enum import Enum, EnumMeta
 from functools import wraps
-from typing import Any, Callable
+from typing import Any
 
 
 class PrettyEnum(Enum):

diff --git a/src/spatialdata_io/_docs.py b/src/spatialdata_io/_docs.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
+from collections.abc import Callable
 from textwrap import dedent
-from typing import Any, Callable
+from typing import Any
 
 
 def inject_docs(**kwargs: Any) -> Callable[..., Any]:  # noqa: D103

diff --git a/src/spatialdata_io/_utils.py b/src/spatialdata_io/_utils.py
@@ -2,7 +2,8 @@
 
 import functools
 import warnings
-from typing import Any, Callable, TypeVar
+from collections.abc import Callable
+from typing import Any, TypeVar
 
 RT = TypeVar("RT")
 

diff --git a/src/spatialdata_io/readers/_utils/_read_10x_h5.py b/src/spatialdata_io/readers/_utils/_read_10x_h5.py
@@ -31,7 +31,7 @@
 # code below taken from https://github.com/scverse/scanpy/blob/master/scanpy/readwrite.py
 
 from pathlib import Path
-from typing import Any, Optional, Union
+from typing import Any
 
 import h5py
 import numpy as np
@@ -40,8 +40,8 @@
 
 
 def _read_10x_h5(
-    filename: Union[str, Path],
-    genome: Optional[str] = None,
+    filename: str | Path,
+    genome: str | None = None,
     gex_only: bool = True,
 ) -> AnnData:
     """
@@ -96,7 +96,7 @@ def _read_10x_h5(
     return adata
 
 
-def _read_v3_10x_h5(filename: Union[str, Path], *, start: Optional[Any] = None) -> AnnData:
+def _read_v3_10x_h5(filename: str | Path, *, start: Any | None = None) -> AnnData:
     """Read hdf5 file from Cell Ranger v3 or later versions."""
     with h5py.File(str(filename), "r") as f:
         try:

diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py
@@ -3,28 +3,23 @@
 import os
 from collections.abc import Mapping
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import Any, Union
 
-import numpy as np
 from anndata import AnnData, read_text
 from h5py import File
+from ome_types import from_tiff
+from ome_types.model import Pixels, UnitsLength
+from spatialdata._logging import logger
 
 from spatialdata_io.readers._utils._read_10x_h5 import _read_10x_h5
 
 PathLike = Union[os.PathLike, str]  # type:ignore[type-arg]
 
-if TYPE_CHECKING:
-    from numpy.typing import NDArray
-
-    NDArrayA = NDArray[Any]
-else:
-    NDArrayA = np.ndarray
-
 
 def _read_counts(
     path: str | Path,
     counts_file: str,
-    library_id: Optional[str] = None,
+    library_id: str | None = None,
     **kwargs: Any,
 ) -> tuple[AnnData, str]:
     path = Path(path)
@@ -83,3 +78,48 @@ def _initialize_raster_models_kwargs(
     if "scale_factors" not in labels_models_kwargs:
         labels_models_kwargs["scale_factors"] = [2, 2, 2, 2]
     return image_models_kwargs, labels_models_kwargs
+
+
+def calc_scale_factors(lower_scale_limit: float, min_size: int = 1000, default_scale_factor: int = 2) -> list[int]:
+    """Calculate scale factors based on image size to get lowest resolution under min_size pixels."""
+    # get lowest dimension, ignoring channels
+    scale_factor: int = default_scale_factor
+    scale_factors = [scale_factor]
+    lower_scale_limit /= scale_factor
+    while lower_scale_limit >= min_size:
+        # scale_factors are cumulative, so we don't need to do e.g. scale_factor *= 2
+        scale_factors.append(scale_factor)
+        lower_scale_limit /= scale_factor
+    return scale_factors
+
+
+def parse_channels(path: Path) -> list[str]:
+    """Parse channel names from an OME-TIFF file."""
+    images = from_tiff(path).images
+    if len(images) > 1:
+        logger.warning("Found multiple images in OME-TIFF file. Only the first one will be used.")
+    channels = images[0].pixels.channels
+    logger.debug(channels)
+    names = [c.name for c in channels if c.name is not None]
+    return names
+
+
+def parse_physical_size(path: Path | None = None, ome_pixels: Pixels | None = None) -> float:
+    """Parse physical size from OME-TIFF to micrometer."""
+    pixels = ome_pixels or from_tiff(path).images[0].pixels
+    logger.debug(pixels)
+    if pixels.physical_size_x_unit != pixels.physical_size_y_unit:
+        logger.error("Physical units for x and y dimensions are not the same.")
+        raise NotImplementedError
+    if pixels.physical_size_x != pixels.physical_size_y:
+        logger.error("Physical sizes for x and y dimensions are not the same.")
+        raise NotImplementedError
+    # convert to micrometer if needed
+    if pixels.physical_size_x_unit == UnitsLength.NANOMETER:
+        physical_size = pixels.physical_size_x / 1000
+    elif pixels.physical_size_x_unit == UnitsLength.MICROMETER:
+        physical_size = pixels.physical_size_x
+    else:
+        logger.error(f"Physical unit not recognized: '{pixels.physical_size_x_unit}'.")
+        raise NotImplementedError
+    return float(physical_size)
diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py
@@ -5,7 +5,7 @@
 from collections.abc import Mapping
 from pathlib import Path
 from types import MappingProxyType
-from typing import Any, Optional
+from typing import Any
 
 import dask.array as da
 import numpy as np
@@ -38,7 +38,7 @@
 @inject_docs(cx=CosmxKeys)
 def cosmx(
     path: str | Path,
-    dataset_id: Optional[str] = None,
+    dataset_id: str | None = None,
     transcripts: bool = True,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
     image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),

diff --git a/src/spatialdata_io/readers/dbit.py b/src/spatialdata_io/readers/dbit.py
@@ -4,7 +4,6 @@
 import re
 from pathlib import Path
 from re import Pattern
-from typing import Optional, Union
 
 import anndata as ad
 import numpy as np
@@ -27,9 +26,9 @@ def _check_path(
     path: Path,
     pattern: Pattern[str],
     key: DbitKeys,
-    path_specific: Optional[str | Path] = None,
+    path_specific: str | Path | None = None,
     optional_arg: bool = False,
-) -> tuple[Union[Path, None], bool]:
+) -> tuple[Path | None, bool]:
     """
     Check that the path is valid and match a regex pattern.
 
@@ -218,11 +217,11 @@ def _xy2edges(xy: list[int], scale: float = 1.0, border: bool = True, border_sca
 
 @inject_docs(vx=DbitKeys)
 def dbit(
-    path: Optional[str | Path] = None,
-    anndata_path: Optional[str] = None,
-    barcode_position: Optional[str] = None,
-    image_path: Optional[str] = None,
-    dataset_id: Optional[str] = None,
+    path: str | Path | None = None,
+    anndata_path: str | None = None,
+    barcode_position: str | None = None,
+    image_path: str | None = None,
+    dataset_id: str | None = None,
     border: bool = True,
     border_scale: float = 1,
 ) -> SpatialData: