From 601a1223d7255e754b1f90046b08d2d1d8d0df75 Mon Sep 17 00:00:00 2001
From: LLehner <laurenslehner@googlemail.com>
Date: Thu, 24 Oct 2024 16:10:13 +0200
Subject: [PATCH 01/11] update seqfish reader

---
 src/spatialdata_io/_constants/_constants.py |  11 +-
 src/spatialdata_io/readers/_utils/_utils.py |   9 +-
 src/spatialdata_io/readers/seqfish.py       | 136 +++++++++++++-------
 3 files changed, 99 insertions(+), 57 deletions(-)

diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index 34848137..57f50db0 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -66,14 +66,15 @@ class SeqfishKeys(ModeEnum):
     # file extensions
     CSV_FILE = ".csv"
     TIFF_FILE = ".tiff"
-    OME_TIFF_FILE = ".ome.tiff"
+    GEOJSON_FILE = ".geojson"
     # file identifiers
-    SECTION = "section"
-    TRANSCRIPT_COORDINATES = "TranscriptCoordinates"
+    SECTION = "_section"
+    TRANSCRIPT_COORDINATES = "TranscriptList"
     DAPI = "DAPI"
-    COUNTS_FILE = "CxG"
-    CELL_MASK_FILE = "CellMask"
+    COUNTS_FILE = "CellxGene"
+    SEGMENTATION = "Segmentation"
     CELL_COORDINATES = "CellCoordinates"
+    BOUNDARIES = "Boundaries"
     # transcripts
     TRANSCRIPTS_X = "x"
     TRANSCRIPTS_Y = "y"
diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py
index 4623fced..8c9a412a 100644
--- a/src/spatialdata_io/readers/_utils/_utils.py
+++ b/src/spatialdata_io/readers/_utils/_utils.py
@@ -3,7 +3,7 @@
 import os
 from collections.abc import Mapping
 from pathlib import Path
-from typing import Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
 
 import numpy as np
 from anndata import AnnData, read_text
@@ -13,15 +13,14 @@
 
 PathLike = Union[os.PathLike, str]  # type:ignore[type-arg]
 
-try:
+if TYPE_CHECKING:
     from numpy.typing import NDArray
-
     NDArrayA = NDArray[Any]
-except (ImportError, TypeError):
-    NDArray = np.ndarray
+else:
     NDArrayA = np.ndarray
 
 
+
 def _read_counts(
     path: str | Path,
     counts_file: str,
diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py
index b82b2eef..e1f97f42 100644
--- a/src/spatialdata_io/readers/seqfish.py
+++ b/src/spatialdata_io/readers/seqfish.py
@@ -5,7 +5,7 @@
 from collections.abc import Mapping
 from pathlib import Path
 from types import MappingProxyType
-from typing import Any
+from typing import Any, Literal, Optional, Union
 
 import anndata as ad
 import numpy as np
@@ -19,7 +19,7 @@
     ShapesModel,
     TableModel,
 )
-from spatialdata.transformations import Identity
+from spatialdata.transformations.transformations import Identity
 
 from spatialdata_io._constants._constants import SeqfishKeys as SK
 from spatialdata_io._docs import inject_docs
@@ -33,7 +33,9 @@ def seqfish(
     load_images: bool = True,
     load_labels: bool = True,
     load_points: bool = True,
-    sections: list[int] | None = None,
+    load_shapes: bool = True,
+    load_additional_shapes: Union[Literal["segmentation", "boundaries", "all"], str, None] = None,
+    sections: Optional[list[int]] = None,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> SpatialData:
     """
@@ -43,8 +45,8 @@ def seqfish(
 
         - ```{vx.COUNTS_FILE!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Counts and metadata file.
         - ```{vx.CELL_COORDINATES!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Cell coordinates file.
-        - ```{vx.DAPI!r}{vx.SECTION!r}{vx.OME_TIFF_FILE!r}```: High resolution tiff image.
-        - ```{vx.CELL_MASK_FILE!r}{vx.SECTION!r}{vx.TIFF_FILE!r}```: Cell mask file.
+        - ```{vx.DAPI!r}{vx.SECTION!r}{vx.TIFF_FILE!r}```: High resolution tiff image.
+        - ```{vx.SEGMENTATION!r}{vx.SECTION!r}{vx.TIFF_FILE!r}```: Cell mask file.
         - ```{vx.TRANSCRIPT_COORDINATES!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Transcript coordinates file.
 
     .. seealso::
@@ -58,11 +60,16 @@ def seqfish(
     load_images
         Whether to load the images.
     load_labels
-        Whether to load the labels.
+        Whether to load cell segmentation.
     load_points
-        Whether to load the points.
+        Whether to load the transcript locations.
+    load_shapes
+        Whether to load cells as shape.
+    load_additional_shapes
+        Whether to load additional shapes such as segmentation or boundaries for both cells and nuclei.
     sections
-        Which sections (specified as integers) to load. By default, all sections are loaded.
+        Which sections (specified as integers) to load. Only necessary if multiple sections present.
+        If "all" is specified, reads all remaining .tiff and .geojson files in the directory.
     imread_kwargs
         Keyword arguments to pass to :func:`dask_image.imread.imread`.
 
@@ -71,7 +78,7 @@ def seqfish(
     :class:`spatialdata.SpatialData`
     """
     path = Path(path)
-    count_file_pattern = re.compile(rf"(.*?)_{SK.CELL_COORDINATES}_{SK.SECTION}[0-9]+" + re.escape(SK.CSV_FILE))
+    count_file_pattern = re.compile(rf"(.*?){re.escape(SK.CELL_COORDINATES)}.*{re.escape(SK.CSV_FILE)}$")
     count_files = [i for i in os.listdir(path) if count_file_pattern.match(i)]
     if not count_files:
         # no file matching tbe pattern found
@@ -85,52 +92,56 @@ def seqfish(
 
     n = len(count_files)
     all_sections = list(range(1, n + 1))
-    if sections is None:
-        sections = all_sections
-    else:
+    sections_str: list[Optional[str]] = []
+    if sections is not None:
         for section in sections:
             if section not in all_sections:
                 raise ValueError(f"Section {section} not found in the data.")
-    sections_str = [f"{SK.SECTION}{x}" for x in sections]
+            else:
+                sections_str.append(f"{SK.SECTION}{section}")
+    elif sections is None:
+        sections_str.append(None)
+    else:
+        raise ValueError("Invalid type for 'section'. Must be list[int] or None.")
 
-    def get_cell_file(section: str) -> str:
-        return f"{prefix}_{SK.CELL_COORDINATES}_{section}{SK.CSV_FILE}"
+    def get_cell_file(section: str | None) -> str:
+        return f"{prefix}{SK.CELL_COORDINATES}{section or ''}{SK.CSV_FILE}"
 
-    def get_count_file(section: str) -> str:
-        return f"{prefix}_{SK.COUNTS_FILE}_{section}{SK.CSV_FILE}"
+    def get_count_file(section: str | None) -> str:
+        return f"{prefix}{SK.COUNTS_FILE}{section or ''}{SK.CSV_FILE}"
 
-    def get_dapi_file(section: str) -> str:
-        return f"{prefix}_{SK.DAPI}_{section}{SK.OME_TIFF_FILE}"
+    def get_dapi_file(section: str | None) -> str:
+        return f"{prefix}{SK.DAPI}{section or ''}{SK.TIFF_FILE}"
 
-    def get_cell_mask_file(section: str) -> str:
-        return f"{prefix}_{SK.CELL_MASK_FILE}_{section}{SK.TIFF_FILE}"
+    def get_cell_mask_file(section: str | None) -> str:
+        return f"{prefix}{SK.SEGMENTATION}{section or ''}{SK.TIFF_FILE}"
 
-    def get_transcript_file(section: str) -> str:
-        return f"{prefix}_{SK.TRANSCRIPT_COORDINATES}_{section}{SK.CSV_FILE}"
+    def get_transcript_file(section: str | None) -> str:
+        return f"{prefix}{SK.TRANSCRIPT_COORDINATES}{section or ''}{SK.CSV_FILE}"
 
-    adatas: dict[str, ad.AnnData] = {}
-    for section in sections_str:  # type: ignore[assignment]
-        assert isinstance(section, str)
-        cell_file = get_cell_file(section)
-        count_matrix = get_count_file(section)
+    adatas: dict[Optional[str], ad.AnnData] = {}
+    for section_str in sections_str:
+        assert isinstance(section_str, str) or section_str is None
+        cell_file = get_cell_file(section_str)
+        count_matrix = get_count_file(section_str)
         adata = ad.read_csv(path / count_matrix, delimiter=",")
         cell_info = pd.read_csv(path / cell_file, delimiter=",")
         adata.obsm[SK.SPATIAL_KEY] = cell_info[[SK.CELL_X, SK.CELL_Y]].to_numpy()
         adata.obs[SK.AREA] = np.reshape(cell_info[SK.AREA].to_numpy(), (-1, 1))
-        region = f"cells_{section}"
+        region = f"cells_{section_str}"
         adata.obs[SK.REGION_KEY] = region
         adata.obs[SK.INSTANCE_KEY_TABLE] = adata.obs.index.astype(int)
-        adatas[section] = adata
+        adatas[section_str] = adata
 
     scale_factors = [2, 2, 2, 2]
 
     if load_images:
         images = {
-            f"image_{x}": Image2DModel.parse(
+            f"{os.path.splitext(get_dapi_file(x))[0]}": Image2DModel.parse(
                 imread(path / get_dapi_file(x), **imread_kwargs),
                 dims=("c", "y", "x"),
                 scale_factors=scale_factors,
-                transformations={x: Identity()},
+                transformations={"global": Identity()},
             )
             for x in sections_str
         }
@@ -139,11 +150,11 @@ def get_transcript_file(section: str) -> str:
 
     if load_labels:
         labels = {
-            f"labels_{x}": Labels2DModel.parse(
+            f"{os.path.splitext(get_cell_mask_file(x))[0]}": Labels2DModel.parse(
                 imread(path / get_cell_mask_file(x), **imread_kwargs).squeeze(),
                 dims=("y", "x"),
                 scale_factors=scale_factors,
-                transformations={x: Identity()},
+                transformations={"global": Identity()},
             )
             for x in sections_str
         }
@@ -152,12 +163,12 @@ def get_transcript_file(section: str) -> str:
 
     if load_points:
         points = {
-            f"transcripts_{x}": PointsModel.parse(
+            f"{os.path.splitext(get_transcript_file(x))[0]}": PointsModel.parse(
                 pd.read_csv(path / get_transcript_file(x), delimiter=","),
                 coordinates={"x": SK.TRANSCRIPTS_X, "y": SK.TRANSCRIPTS_Y},
                 feature_key=SK.FEATURE_KEY.value,
-                instance_key=SK.INSTANCE_KEY_POINTS.value,
-                transformations={x: Identity()},
+                # instance_key=SK.INSTANCE_KEY_POINTS.value, # TODO: should be optional but parameter might get deprecated anyway
+                transformations={"global": Identity()},
             )
             for x in sections_str
         }
@@ -174,16 +185,47 @@ def get_transcript_file(section: str) -> str:
         instance_key=SK.INSTANCE_KEY_TABLE.value,
     )
 
-    shapes = {
-        f"cells_{x}": ShapesModel.parse(
-            adata.obsm[SK.SPATIAL_KEY],
-            geometry=0,
-            radius=np.sqrt(adata.obs[SK.AREA].to_numpy() / np.pi),
-            index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
-            transformations={x: Identity()},
-        )
-        for x, adata in adatas.items()
-    }
+    if load_shapes:
+        shapes = {
+            f"{os.path.splitext(get_cell_file(x))[0]}": ShapesModel.parse(
+                adata.obsm[SK.SPATIAL_KEY],
+                geometry=0,
+                radius=np.sqrt(adata.obs[SK.AREA].to_numpy() / np.pi),
+                index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
+                transformations={"global": Identity()},
+            )
+            for x, adata in adatas.items()
+        }
+    else:
+        shapes = {}
+
+    if load_additional_shapes is not None:
+        shape_file_names = []
+        for filename in os.listdir(path):
+            if filename.endswith((SK.TIFF_FILE, SK.GEOJSON_FILE)):
+                if load_additional_shapes == "all":
+                    if not any(key in filename for key in images.keys()) and not any(
+                        key in filename for key in labels.keys()
+                    ):
+                        shape_file_names.append(filename)
+                elif load_additional_shapes == "segmentation":
+                    if SK.SEGMENTATION in filename and not any(key in filename for key in labels.keys()):
+                        shape_file_names.append(filename)
+                elif load_additional_shapes == "boundaries":
+                    if SK.BOUNDARIES in filename:
+                        shape_file_names.append(filename)
+                elif isinstance(load_additional_shapes, str):
+                    if load_additional_shapes in filename:
+                        shape_file_names.append(filename)
+                else:
+                    raise ValueError(f"No file found with identifier {load_additional_shapes}")
+
+        for x in range(len(shape_file_names)):
+            shapes[f"{os.path.splitext(shape_file_names[x])[0]}"] = ShapesModel.parse(
+                path / shape_file_names[x],
+                index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
+                transformations={"global": Identity()},
+            )
 
     sdata = SpatialData(images=images, labels=labels, points=points, table=table, shapes=shapes)
 

From 044ab9d168c2d755e44f4480e9f1f77b8d75a4d0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 24 Oct 2024 14:19:08 +0000
Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata_io/readers/_utils/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py
index 8c9a412a..f7f3abe5 100644
--- a/src/spatialdata_io/readers/_utils/_utils.py
+++ b/src/spatialdata_io/readers/_utils/_utils.py
@@ -15,12 +15,12 @@
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
+
     NDArrayA = NDArray[Any]
 else:
     NDArrayA = np.ndarray
 
 
-
 def _read_counts(
     path: str | Path,
     counts_file: str,

From 233946ae30b97665274bf197544929aaab601252 Mon Sep 17 00:00:00 2001
From: Laurens Lehner <laurens.lehner@helmholtz-munich.de>
Date: Tue, 12 Nov 2024 18:32:59 +0100
Subject: [PATCH 03/11] Update prefixes; Add scalefactors

---
 src/spatialdata_io/_constants/_constants.py |   4 +-
 src/spatialdata_io/readers/seqfish.py       | 129 ++++++++++++--------
 2 files changed, 78 insertions(+), 55 deletions(-)

diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index 57f50db0..92d26db3 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -68,7 +68,7 @@ class SeqfishKeys(ModeEnum):
     TIFF_FILE = ".tiff"
     GEOJSON_FILE = ".geojson"
     # file identifiers
-    SECTION = "_section"
+    ROI = "Roi"
     TRANSCRIPT_COORDINATES = "TranscriptList"
     DAPI = "DAPI"
     COUNTS_FILE = "CellxGene"
@@ -88,6 +88,8 @@ class SeqfishKeys(ModeEnum):
     SPATIAL_KEY = "spatial"
     REGION_KEY = "region"
     INSTANCE_KEY_TABLE = "instance_id"
+    SCALEFEFACTOR_X = "PhysicalSizeX"
+    SCALEFEFACTOR_Y = "PhysicalSizeY"
 
 
 @unique
diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py
index e1f97f42..0ce6f144 100644
--- a/src/spatialdata_io/readers/seqfish.py
+++ b/src/spatialdata_io/readers/seqfish.py
@@ -2,6 +2,7 @@
 
 import os
 import re
+import xml.etree.ElementTree as ET
 from collections.abc import Mapping
 from pathlib import Path
 from types import MappingProxyType
@@ -10,6 +11,7 @@
 import anndata as ad
 import numpy as np
 import pandas as pd
+import tifffile
 from dask_image.imread import imread
 from spatialdata import SpatialData
 from spatialdata.models import (
@@ -19,7 +21,7 @@
     ShapesModel,
     TableModel,
 )
-from spatialdata.transformations.transformations import Identity
+from spatialdata.transformations.transformations import Identity, Scale
 
 from spatialdata_io._constants._constants import SeqfishKeys as SK
 from spatialdata_io._docs import inject_docs
@@ -35,7 +37,7 @@ def seqfish(
     load_points: bool = True,
     load_shapes: bool = True,
     load_additional_shapes: Union[Literal["segmentation", "boundaries", "all"], str, None] = None,
-    sections: Optional[list[int]] = None,
+    ROIs: Optional[list[int]] = None,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> SpatialData:
     """
@@ -43,11 +45,11 @@ def seqfish(
 
     This function reads the following files:
 
-        - ```{vx.COUNTS_FILE!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Counts and metadata file.
-        - ```{vx.CELL_COORDINATES!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Cell coordinates file.
-        - ```{vx.DAPI!r}{vx.SECTION!r}{vx.TIFF_FILE!r}```: High resolution tiff image.
-        - ```{vx.SEGMENTATION!r}{vx.SECTION!r}{vx.TIFF_FILE!r}```: Cell mask file.
-        - ```{vx.TRANSCRIPT_COORDINATES!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Transcript coordinates file.
+        - ```{vx.ROI!r}{vx.COUNTS_FILE!r}{vx.CSV_FILE!r}```: Counts and metadata file.
+        - ```{vx.ROI!r}{vx.CELL_COORDINATES!r}{vx.CSV_FILE!r}```: Cell coordinates file.
+        - ```{vx.ROI!r}{vx.DAPI!r}{vx.TIFF_FILE!r}```: High resolution tiff image.
+        - ```{vx.ROI!r}{vx.SEGMENTATION!r}{vx.TIFF_FILE!r}```: Cell mask file.
+        - ```{vx.ROI!r}{vx.TRANSCRIPT_COORDINATES!r}{vx.CSV_FILE!r}```: Transcript coordinates file.
 
     .. seealso::
 
@@ -67,8 +69,8 @@ def seqfish(
         Whether to load cells as shape.
     load_additional_shapes
         Whether to load additional shapes such as segmentation or boundaries for both cells and nuclei.
-    sections
-        Which sections (specified as integers) to load. Only necessary if multiple sections present.
+    ROIs
+        Which ROIs (specified as integers) to load. Only necessary if multiple ROIs present.
         If "all" is specified, reads all remaining .tiff and .geojson files in the directory.
     imread_kwargs
         Keyword arguments to pass to :func:`dask_image.imread.imread`.
@@ -80,6 +82,9 @@ def seqfish(
     path = Path(path)
     count_file_pattern = re.compile(rf"(.*?){re.escape(SK.CELL_COORDINATES)}.*{re.escape(SK.CSV_FILE)}$")
     count_files = [i for i in os.listdir(path) if count_file_pattern.match(i)]
+    roi_pattern = re.compile(f"^{SK.ROI}(\\d+)")
+    # n_rois = {roi_pattern.match(i).group(1) for i in os.listdir(path) if roi_pattern.match(i)}
+    n_rois = {m.group(1) for i in os.listdir(path) if (m := roi_pattern.match(i))}
     if not count_files:
         # no file matching tbe pattern found
         raise ValueError(
@@ -88,50 +93,53 @@ def seqfish(
     matched = count_file_pattern.match(count_files[0])
     if matched is None:
         raise ValueError(f"File {count_files[0]} does not match the pattern {count_file_pattern}")
-    prefix = matched.group(1)
-
-    n = len(count_files)
-    all_sections = list(range(1, n + 1))
-    sections_str: list[Optional[str]] = []
-    if sections is not None:
-        for section in sections:
-            if section not in all_sections:
-                raise ValueError(f"Section {section} not found in the data.")
-            else:
-                sections_str.append(f"{SK.SECTION}{section}")
-    elif sections is None:
-        sections_str.append(None)
+    # prefix = matched.group(1)
+
+    rois_str: list[str] = []
+    if ROIs is None:
+        for roi in n_rois:
+            rois_str.append(f"{SK.ROI}{roi}")
+    elif isinstance(ROIs, list):
+        for roi in ROIs:
+            if str(roi) not in n_rois:
+                raise ValueError(f"ROI{roi} not found.")
+            rois_str.append(f"{SK.ROI}{roi}")
     else:
-        raise ValueError("Invalid type for 'section'. Must be list[int] or None.")
+        raise ValueError("Invalid type for 'roi'. Must be list[int] or None.")
 
-    def get_cell_file(section: str | None) -> str:
-        return f"{prefix}{SK.CELL_COORDINATES}{section or ''}{SK.CSV_FILE}"
+    def get_cell_file(roi: str) -> str:
+        return f"{roi}_{SK.CELL_COORDINATES}{SK.CSV_FILE}"
 
-    def get_count_file(section: str | None) -> str:
-        return f"{prefix}{SK.COUNTS_FILE}{section or ''}{SK.CSV_FILE}"
+    def get_count_file(roi: str) -> str:
+        return f"{roi}_{SK.COUNTS_FILE}{SK.CSV_FILE}"
 
-    def get_dapi_file(section: str | None) -> str:
-        return f"{prefix}{SK.DAPI}{section or ''}{SK.TIFF_FILE}"
+    def get_dapi_file(roi: str) -> str:
+        return f"{roi}_{SK.DAPI}{SK.TIFF_FILE}"
 
-    def get_cell_mask_file(section: str | None) -> str:
-        return f"{prefix}{SK.SEGMENTATION}{section or ''}{SK.TIFF_FILE}"
+    def get_cell_mask_file(roi: str) -> str:
+        return f"{roi}_{SK.SEGMENTATION}{SK.TIFF_FILE}"
 
-    def get_transcript_file(section: str | None) -> str:
-        return f"{prefix}{SK.TRANSCRIPT_COORDINATES}{section or ''}{SK.CSV_FILE}"
+    def get_transcript_file(roi: str) -> str:
+        return f"{roi}_{SK.TRANSCRIPT_COORDINATES}{SK.CSV_FILE}"
 
+    scaled = {}
     adatas: dict[Optional[str], ad.AnnData] = {}
-    for section_str in sections_str:
-        assert isinstance(section_str, str) or section_str is None
-        cell_file = get_cell_file(section_str)
-        count_matrix = get_count_file(section_str)
+    for roi_str in rois_str:
+        assert isinstance(roi_str, str) or roi_str is None
+        cell_file = get_cell_file(roi_str)
+        count_matrix = get_count_file(roi_str)
         adata = ad.read_csv(path / count_matrix, delimiter=",")
         cell_info = pd.read_csv(path / cell_file, delimiter=",")
         adata.obsm[SK.SPATIAL_KEY] = cell_info[[SK.CELL_X, SK.CELL_Y]].to_numpy()
         adata.obs[SK.AREA] = np.reshape(cell_info[SK.AREA].to_numpy(), (-1, 1))
-        region = f"cells_{section_str}"
+        region = f"cells_{roi_str}"
         adata.obs[SK.REGION_KEY] = region
         adata.obs[SK.INSTANCE_KEY_TABLE] = adata.obs.index.astype(int)
-        adatas[section_str] = adata
+        adatas[roi_str] = adata
+        scaled[roi_str] = Scale(
+            np.array(_get_scale_factors(path / get_dapi_file(roi_str), SK.SCALEFEFACTOR_X, SK.SCALEFEFACTOR_Y)),
+            axes=("y", "x"),
+        )
 
     scale_factors = [2, 2, 2, 2]
 
@@ -141,9 +149,9 @@ def get_transcript_file(section: str | None) -> str:
                 imread(path / get_dapi_file(x), **imread_kwargs),
                 dims=("c", "y", "x"),
                 scale_factors=scale_factors,
-                transformations={"global": Identity()},
+                transformations={"global": scaled[x]},
             )
-            for x in sections_str
+            for x in rois_str
         }
     else:
         images = {}
@@ -156,7 +164,7 @@ def get_transcript_file(section: str | None) -> str:
                 scale_factors=scale_factors,
                 transformations={"global": Identity()},
             )
-            for x in sections_str
+            for x in rois_str
         }
     else:
         labels = {}
@@ -170,20 +178,22 @@ def get_transcript_file(section: str | None) -> str:
                 # instance_key=SK.INSTANCE_KEY_POINTS.value, # TODO: should be optional but parameter might get deprecated anyway
                 transformations={"global": Identity()},
             )
-            for x in sections_str
+            for x in rois_str
         }
     else:
         points = {}
 
-    adata = ad.concat(adatas.values())
-    adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category")
-    adata.obs = adata.obs.reset_index(drop=True)
-    table = TableModel.parse(
-        adata,
-        region=[f"cells_{x}" for x in sections_str],
-        region_key=SK.REGION_KEY.value,
-        instance_key=SK.INSTANCE_KEY_TABLE.value,
-    )
+    # adata = ad.concat(adatas.values())
+    tables = {}
+    for name, adata in adatas.items():
+        adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category")
+        adata.obs = adata.obs.reset_index(drop=True)
+        tables[name] = TableModel.parse(
+            adata,
+            region=f"cells_{name}",
+            region_key=SK.REGION_KEY.value,
+            instance_key=SK.INSTANCE_KEY_TABLE.value,
+        )
 
     if load_shapes:
         shapes = {
@@ -194,7 +204,7 @@ def get_transcript_file(section: str | None) -> str:
                 index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
                 transformations={"global": Identity()},
             )
-            for x, adata in adatas.items()
+            for x, adata in zip(rois_str, adatas.values())
         }
     else:
         shapes = {}
@@ -227,6 +237,17 @@ def get_transcript_file(section: str | None) -> str:
                 transformations={"global": Identity()},
             )
 
-    sdata = SpatialData(images=images, labels=labels, points=points, table=table, shapes=shapes)
+    sdata = SpatialData(images=images, labels=labels, points=points, tables=tables, shapes=shapes)
 
     return sdata
+
+
+def _get_scale_factors(DAPI_path: Path, scalefactor_x_key: str, scalefactor_y_key: str) -> list[float]:
+    with tifffile.TiffFile(DAPI_path) as tif:
+        ome_metadata = tif.ome_metadata
+        root = ET.fromstring(ome_metadata)
+        for element in root.iter():
+            if scalefactor_x_key in element.attrib.keys():
+                scalefactor_x = element.attrib[scalefactor_x_key]
+                scalefactor_y = element.attrib[scalefactor_y_key]
+    return [float(scalefactor_x), float(scalefactor_y)]

From d9ec1f51dcd889ab8fd6919b22e3e42df78467b6 Mon Sep 17 00:00:00 2001
From: Laurens Lehner <laurens.lehner@helmholtz-munich.de>
Date: Tue, 12 Nov 2024 18:42:56 +0100
Subject: [PATCH 04/11] Remove unused code

---
 src/spatialdata_io/readers/seqfish.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py
index 0ce6f144..dac52f99 100644
--- a/src/spatialdata_io/readers/seqfish.py
+++ b/src/spatialdata_io/readers/seqfish.py
@@ -83,17 +83,14 @@ def seqfish(
     count_file_pattern = re.compile(rf"(.*?){re.escape(SK.CELL_COORDINATES)}.*{re.escape(SK.CSV_FILE)}$")
     count_files = [i for i in os.listdir(path) if count_file_pattern.match(i)]
     roi_pattern = re.compile(f"^{SK.ROI}(\\d+)")
-    # n_rois = {roi_pattern.match(i).group(1) for i in os.listdir(path) if roi_pattern.match(i)}
     n_rois = {m.group(1) for i in os.listdir(path) if (m := roi_pattern.match(i))}
     if not count_files:
-        # no file matching tbe pattern found
         raise ValueError(
             f"No files matching the pattern {count_file_pattern} were found. Cannot infer the naming scheme."
         )
     matched = count_file_pattern.match(count_files[0])
     if matched is None:
         raise ValueError(f"File {count_files[0]} does not match the pattern {count_file_pattern}")
-    # prefix = matched.group(1)
 
     rois_str: list[str] = []
     if ROIs is None:
@@ -183,7 +180,6 @@ def get_transcript_file(roi: str) -> str:
     else:
         points = {}
 
-    # adata = ad.concat(adatas.values())
     tables = {}
     for name, adata in adatas.items():
         adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category")

From c8fc779ca51559e2c2593953de93b50114b57ea6 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 10 Dec 2024 16:13:33 +0100
Subject: [PATCH 05/11] rename variable

---
 src/spatialdata_io/readers/seqfish.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py
index 028753fb..187c178b 100644
--- a/src/spatialdata_io/readers/seqfish.py
+++ b/src/spatialdata_io/readers/seqfish.py
@@ -37,7 +37,7 @@ def seqfish(
     load_points: bool = True,
     load_shapes: bool = True,
     load_additional_shapes: Literal["segmentation", "boundaries", "all"] | str | None = None,
-    ROIs: list[int] | None = None,
+    rois: list[int] | None = None,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> SpatialData:
     """
@@ -69,7 +69,7 @@ def seqfish(
         Whether to load cells as shape.
     load_additional_shapes
         Whether to load additional shapes such as segmentation or boundaries for both cells and nuclei.
-    ROIs
+    rois
         Which ROIs (specified as integers) to load. Only necessary if multiple ROIs present.
         If "all" is specified, reads all remaining .tiff and .geojson files in the directory.
     imread_kwargs
@@ -93,11 +93,11 @@ def seqfish(
         raise ValueError(f"File {count_files[0]} does not match the pattern {count_file_pattern}")
 
     rois_str: list[str] = []
-    if ROIs is None:
+    if rois is None:
         for roi in n_rois:
             rois_str.append(f"{SK.ROI}{roi}")
-    elif isinstance(ROIs, list):
-        for roi in ROIs:
+    elif isinstance(rois, list):
+        for roi in rois:
             if str(roi) not in n_rois:
                 raise ValueError(f"ROI{roi} not found.")
             rois_str.append(f"{SK.ROI}{roi}")

From f219088cc055f5c6bf66f14232ffdbad05450b5a Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Wed, 11 Dec 2024 20:16:54 +0100
Subject: [PATCH 06/11] fix scale factor labels; fix table

---
 src/spatialdata_io/readers/seqfish.py | 133 ++++++++++++++++++--------
 1 file changed, 93 insertions(+), 40 deletions(-)

diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py
index 187c178b..d7aa656a 100644
--- a/src/spatialdata_io/readers/seqfish.py
+++ b/src/spatialdata_io/readers/seqfish.py
@@ -37,8 +37,10 @@ def seqfish(
     load_points: bool = True,
     load_shapes: bool = True,
     load_additional_shapes: Literal["segmentation", "boundaries", "all"] | str | None = None,
+    cells_as_circles: bool = False,
     rois: list[int] | None = None,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    raster_models_scale_factors: list[float] | None = None,
 ) -> SpatialData:
     """
     Read *seqfish* formatted dataset.
@@ -69,38 +71,47 @@ def seqfish(
         Whether to load cells as shape.
     load_additional_shapes
         Whether to load additional shapes such as segmentation or boundaries for both cells and nuclei.
+        If "all" is specified, reads all remaining .tiff and .geojson files in the directory.
+    cells_as_circles
+        Whether to read cells also as circles instead of labels.
     rois
         Which ROIs (specified as integers) to load. Only necessary if multiple ROIs present.
-        If "all" is specified, reads all remaining .tiff and .geojson files in the directory.
     imread_kwargs
         Keyword arguments to pass to :func:`dask_image.imread.imread`.
 
     Returns
     -------
     :class:`spatialdata.SpatialData`
+
+    Examples
+    --------
+    This code shows how to change the annotation target of the table from the cell labels to the cell cirlces.
+    Please check that Roi1 is present in your dataset, otherwise adjust the code below.
+    >>> from spatialdata_io import seqfish
+    >>> sdata = seqfish("path/to/raw/data")
+    >>> sdata["table"].obs["region"] = "Roi1_CellCoordinates"
+    >>> sdata.set_table_annotates_spatialelement(
+    ...     table_name="Roi1", region="Roi1_CellCoordinates", region_key="region", instance_key="instance_id"
+    ... )
+    >>> sdata.write("path/to/data.zarr")
     """
     path = Path(path)
-    count_file_pattern = re.compile(rf"(.*?){re.escape(SK.CELL_COORDINATES)}.*{re.escape(SK.CSV_FILE)}$")
-    count_files = [i for i in os.listdir(path) if count_file_pattern.match(i)]
-    roi_pattern = re.compile(f"^{SK.ROI}(\\d+)")
-    n_rois = {m.group(1) for i in os.listdir(path) if (m := roi_pattern.match(i))}
+    count_file_pattern = re.compile(rf"(.*?){re.escape(SK.CELL_COORDINATES)}{re.escape(SK.CSV_FILE)}$")
+    count_files = [f for f in os.listdir(path) if count_file_pattern.match(f)]
     if not count_files:
         raise ValueError(
             f"No files matching the pattern {count_file_pattern} were found. Cannot infer the naming scheme."
         )
-    matched = count_file_pattern.match(count_files[0])
-    if matched is None:
-        raise ValueError(f"File {count_files[0]} does not match the pattern {count_file_pattern}")
 
-    rois_str: list[str] = []
+    roi_pattern = re.compile(f"^{SK.ROI}(\\d+)")
+    found_rois = {m.group(1) for i in os.listdir(path) if (m := roi_pattern.match(i))}
     if rois is None:
-        for roi in n_rois:
-            rois_str.append(f"{SK.ROI}{roi}")
+        rois_str = [f"{SK.ROI}{roi}" for roi in found_rois]
     elif isinstance(rois, list):
         for roi in rois:
-            if str(roi) not in n_rois:
+            if str(roi) not in found_rois:
                 raise ValueError(f"ROI{roi} not found.")
-            rois_str.append(f"{SK.ROI}{roi}")
+        rois_str = [f"{SK.ROI}{roi}" for roi in rois]
     else:
         raise ValueError("Invalid type for 'roi'. Must be list[int] or None.")
 
@@ -119,33 +130,54 @@ def get_cell_mask_file(roi: str) -> str:
     def get_transcript_file(roi: str) -> str:
         return f"{roi}_{SK.TRANSCRIPT_COORDINATES}{SK.CSV_FILE}"
 
-    scaled = {}
-    adatas: dict[str, ad.AnnData] = {}
+    # parse table information
+    tables: dict[str, ad.AnnData] = {}
     for roi_str in rois_str:
-        assert isinstance(roi_str, str) or roi_str is None
-        cell_file = get_cell_file(roi_str)
+        # parse cell gene expression data
         count_matrix = get_count_file(roi_str)
-        adata = ad.read_csv(path / count_matrix, delimiter=",")
+        df = pd.read_csv(path / count_matrix, delimiter=",")
+        instance_id = df.iloc[:, 0].astype(str)
+        expression = df.drop(columns=["Unnamed: 0"])
+        expression.set_index(instance_id, inplace=True)
+        adata = ad.AnnData(expression)
+
+        # parse cell spatial information
+        cell_file = get_cell_file(roi_str)
         cell_info = pd.read_csv(path / cell_file, delimiter=",")
+        cell_info["label"] = cell_info["label"].astype("str")
+        # below, the obsm are assigned by position, not by index. Here we check that we can do it
+        assert cell_info["label"].to_numpy().tolist() == adata.obs.index.to_numpy().tolist()
+        cell_info.set_index("label", inplace=True)
+        adata.obs[SK.AREA] = cell_info[SK.AREA]
         adata.obsm[SK.SPATIAL_KEY] = cell_info[[SK.CELL_X, SK.CELL_Y]].to_numpy()
-        adata.obs[SK.AREA] = np.reshape(cell_info[SK.AREA].to_numpy(), (-1, 1))
-        region = f"cells_{roi_str}"
+
+        # map tables to cell labels (defined later)
+        region = os.path.splitext(get_cell_mask_file(roi_str))[0]
         adata.obs[SK.REGION_KEY] = region
-        adata.obs[SK.INSTANCE_KEY_TABLE] = adata.obs.index.astype(int)
-        adatas[roi_str] = adata
+        adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category")
+        adata.obs[SK.INSTANCE_KEY_TABLE] = instance_id.to_numpy().astype(np.uint16)
+        adata.obs = adata.obs.reset_index(drop=True)
+        tables[f"table_{roi_str}"] = TableModel.parse(
+            adata,
+            region=region,
+            region_key=SK.REGION_KEY.value,
+            instance_key=SK.INSTANCE_KEY_TABLE.value,
+        )
+
+    # parse scale factors to scale images and labels
+    scaled = {}
+    for roi_str in rois_str:
         scaled[roi_str] = Scale(
             np.array(_get_scale_factors(path / get_dapi_file(roi_str), SK.SCALEFEFACTOR_X, SK.SCALEFEFACTOR_Y)),
             axes=("y", "x"),
         )
 
-    scale_factors = [2, 2, 2, 2]
-
     if load_images:
         images = {
             f"{os.path.splitext(get_dapi_file(x))[0]}": Image2DModel.parse(
                 imread(path / get_dapi_file(x), **imread_kwargs),
                 dims=("c", "y", "x"),
-                scale_factors=scale_factors,
+                scale_factors=raster_models_scale_factors,
                 transformations={"global": scaled[x]},
             )
             for x in rois_str
@@ -158,8 +190,8 @@ def get_transcript_file(roi: str) -> str:
             f"{os.path.splitext(get_cell_mask_file(x))[0]}": Labels2DModel.parse(
                 imread(path / get_cell_mask_file(x), **imread_kwargs).squeeze(),
                 dims=("y", "x"),
-                scale_factors=scale_factors,
-                transformations={"global": Identity()},
+                scale_factors=raster_models_scale_factors,
+                transformations={"global": scaled[x]},
             )
             for x in rois_str
         }
@@ -172,7 +204,7 @@ def get_transcript_file(roi: str) -> str:
                 pd.read_csv(path / get_transcript_file(x), delimiter=","),
                 coordinates={"x": SK.TRANSCRIPTS_X, "y": SK.TRANSCRIPTS_Y},
                 feature_key=SK.FEATURE_KEY.value,
-                # instance_key=SK.INSTANCE_KEY_POINTS.value, # TODO: should be optional but parameter might get deprecated anyway
+                instance_key=None,
                 transformations={"global": Identity()},
             )
             for x in rois_str
@@ -180,17 +212,6 @@ def get_transcript_file(roi: str) -> str:
     else:
         points = {}
 
-    tables = {}
-    for name, adata in adatas.items():
-        adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category")
-        adata.obs = adata.obs.reset_index(drop=True)
-        tables[name] = TableModel.parse(
-            adata,
-            region=f"cells_{name}",
-            region_key=SK.REGION_KEY.value,
-            instance_key=SK.INSTANCE_KEY_TABLE.value,
-        )
-
     if load_shapes:
         shapes = {
             f"{os.path.splitext(get_cell_file(x))[0]}": ShapesModel.parse(
@@ -200,7 +221,7 @@ def get_transcript_file(roi: str) -> str:
                 index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
                 transformations={"global": Identity()},
             )
-            for x, adata in zip(rois_str, adatas.values())
+            for x, adata in zip(rois_str, tables.values())
         }
     else:
         shapes = {}
@@ -247,3 +268,35 @@ def _get_scale_factors(DAPI_path: Path, scalefactor_x_key: str, scalefactor_y_ke
                 scalefactor_x = element.attrib[scalefactor_x_key]
                 scalefactor_y = element.attrib[scalefactor_y_key]
     return [float(scalefactor_x), float(scalefactor_y)]
+
+
+if __name__ == "__main__":
+    path_read = "/Users/macbook/ssd/biodata/seqfish/instrument 2 official"
+    sdata = seqfish(
+        path=path_read,
+        # load_images=True,
+        # load_labels=True,
+        # load_points=True,
+        # rois=[1],
+    )
+    # sdata.pl.render_labels(color='Arg1').pl.show()
+    import matplotlib.pyplot as plt
+
+    # plt.show()
+    # sdata["table_Roi1"].obs["region"] = "Roi1_CellCoordinates"
+    # sdata.set_table_annotates_spatialelement(
+    #     table_name="table_Roi1", region="Roi1_CellCoordinates", region_key="region", instance_key="instance_id"
+    # )
+
+    gene_name = "Arg1"
+    (
+        sdata.pl.render_images("Roi1_DAPI", cmap="gray")
+        .pl.render_labels("Roi1_Segmentation", color=gene_name)
+        .pl.render_points("Roi1_TranscriptList", color="name", groups=gene_name, palette="orange")
+        .pl.show(title=f"{gene_name} expression over DAPI image", coordinate_systems="global", figsize=(10, 5))
+    )
+    plt.show()
+
+    from napari_spatialdata import Interactive
+
+    Interactive(sdata)

From ff14eed2b0b144085c4758f1a286c3c7d3b0b0f2 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Thu, 12 Dec 2024 11:24:57 +0100
Subject: [PATCH 07/11] before removing 'load_additional_shapes'

---
 src/spatialdata_io/readers/seqfish.py | 90 ++++++++++++++++-----------
 1 file changed, 55 insertions(+), 35 deletions(-)

diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py
index d7aa656a..c41a940c 100644
--- a/src/spatialdata_io/readers/seqfish.py
+++ b/src/spatialdata_io/readers/seqfish.py
@@ -36,7 +36,7 @@ def seqfish(
     load_labels: bool = True,
     load_points: bool = True,
     load_shapes: bool = True,
-    load_additional_shapes: Literal["segmentation", "boundaries", "all"] | str | None = None,
+    load_additional_geometries: Literal["segmentation", "boundaries", "all"] | str | None = None,
     cells_as_circles: bool = False,
     rois: list[int] | None = None,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -69,9 +69,11 @@ def seqfish(
         Whether to load the transcript locations.
     load_shapes
         Whether to load cells as shape.
-    load_additional_shapes
-        Whether to load additional shapes such as segmentation or boundaries for both cells and nuclei.
-        If "all" is specified, reads all remaining .tiff and .geojson files in the directory.
+    load_additional_geometries
+        Whether to load additional shapes such as segmentation or boundaries for both cells and nuclei. If "all" is
+        specified (default), it reads all remaining .tiff and .geojson files in the directory. Other options are
+        "segmentation", "boundaries", or a string to match in the filename (as a subset). If `None` is passed,
+        no additional geometry is loaded.
     cells_as_circles
         Whether to read cells also as circles instead of labels.
     rois
@@ -198,19 +200,24 @@ def get_transcript_file(roi: str) -> str:
     else:
         labels = {}
 
+    points = {}
     if load_points:
-        points = {
-            f"{os.path.splitext(get_transcript_file(x))[0]}": PointsModel.parse(
-                pd.read_csv(path / get_transcript_file(x), delimiter=","),
+        for x in rois_str:
+
+
+            # prepare data
+            name = f"{os.path.splitext(get_transcript_file(x))[0]}"
+            p = pd.read_csv(path / get_transcript_file(x), delimiter=",")
+            instance_key_points = SK.INSTANCE_KEY_POINTS.value if SK.INSTANCE_KEY_POINTS.value in p.columns else None
+
+            # call parser
+            points[name] = PointsModel.parse(
+                p,
                 coordinates={"x": SK.TRANSCRIPTS_X, "y": SK.TRANSCRIPTS_Y},
                 feature_key=SK.FEATURE_KEY.value,
-                instance_key=None,
+                instance_key=instance_key_points,
                 transformations={"global": Identity()},
             )
-            for x in rois_str
-        }
-    else:
-        points = {}
 
     if load_shapes:
         shapes = {
@@ -226,33 +233,45 @@ def get_transcript_file(roi: str) -> str:
     else:
         shapes = {}
 
-    if load_additional_shapes is not None:
-        shape_file_names = []
+    if load_additional_geometries is not None:
+        labels_filenames = []
+        shapes_filenames = []
+
         for filename in os.listdir(path):
-            if filename.endswith((SK.TIFF_FILE, SK.GEOJSON_FILE)):
-                if load_additional_shapes == "all":
-                    if not any(key in filename for key in images.keys()) and not any(
-                        key in filename for key in labels.keys()
-                    ):
-                        shape_file_names.append(filename)
-                elif load_additional_shapes == "segmentation":
-                    if SK.SEGMENTATION in filename and not any(key in filename for key in labels.keys()):
-                        shape_file_names.append(filename)
-                elif load_additional_shapes == "boundaries":
-                    if SK.BOUNDARIES in filename:
-                        shape_file_names.append(filename)
-                elif isinstance(load_additional_shapes, str):
-                    if load_additional_shapes in filename:
-                        shape_file_names.append(filename)
-                else:
-                    raise ValueError(f"No file found with identifier {load_additional_shapes}")
-
-        for x in range(len(shape_file_names)):
-            shapes[f"{os.path.splitext(shape_file_names[x])[0]}"] = ShapesModel.parse(
-                path / shape_file_names[x],
+            if filename.endswith(SK.TIFF_FILE):
+                if (
+                    load_additional_geometries == "all"
+                    or load_additional_geometries == "segmentation" and SK.SEGMENTATION in filename
+                    or isinstance(load_additional_geometries, str) and load_additional_geometries in filename
+                ):
+                    labels_filenames.append(filename)
+                    continue
+            if filename.endswith(SK.GEOJSON_FILE):
+                if (
+                    load_additional_geometries == "all"
+                    or load_additional_geometries == "boundaries" and SK.BOUNDARIES in filename
+                    or isinstance(load_additional_geometries, str) and load_additional_geometries in filename
+                ):
+                    shapes_filenames.append(filename)
+                    continue
+            raise ValueError(f"No file found with identifier {load_additional_geometries}")
+
+        for labels_filename in labels_filenames:
+            labels[f"{os.path.splitext(labels_filename)[0]}"] = Labels2DModel.parse(
+                imread(path / labels_filename, **imread_kwargs).squeeze(),
+                dims=("y", "x"),
+                scale_factors=raster_models_scale_factors,
+                transformations={"global": scaled[x]},
+            )
+
+        for shape_filename in shapes_filenames:
+            # TODO: check that indices of newly parsed shapes match the indices of the existing shapes
+            shapes[f"{os.path.splitext(shape_filename)[0]}"] = ShapesModel.parse(
+                path / shape_filename,
                 index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
                 transformations={"global": Identity()},
             )
+            pass
 
     sdata = SpatialData(images=images, labels=labels, points=points, tables=tables, shapes=shapes)
 
@@ -287,6 +306,7 @@ def _get_scale_factors(DAPI_path: Path, scalefactor_x_key: str, scalefactor_y_ke
     # sdata.set_table_annotates_spatialelement(
     #     table_name="table_Roi1", region="Roi1_CellCoordinates", region_key="region", instance_key="instance_id"
     # )
+    import spatialdata_plot
 
     gene_name = "Arg1"
     (

From a4434695d18829171cd804a1eb932aef9ec71491 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 12 Dec 2024 10:25:18 +0000
Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata_io/readers/seqfish.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py
index c41a940c..996c6d4d 100644
--- a/src/spatialdata_io/readers/seqfish.py
+++ b/src/spatialdata_io/readers/seqfish.py
@@ -204,7 +204,6 @@ def get_transcript_file(roi: str) -> str:
     if load_points:
         for x in rois_str:
 
-
             # prepare data
             name = f"{os.path.splitext(get_transcript_file(x))[0]}"
             p = pd.read_csv(path / get_transcript_file(x), delimiter=",")
@@ -241,16 +240,20 @@ def get_transcript_file(roi: str) -> str:
             if filename.endswith(SK.TIFF_FILE):
                 if (
                     load_additional_geometries == "all"
-                    or load_additional_geometries == "segmentation" and SK.SEGMENTATION in filename
-                    or isinstance(load_additional_geometries, str) and load_additional_geometries in filename
+                    or load_additional_geometries == "segmentation"
+                    and SK.SEGMENTATION in filename
+                    or isinstance(load_additional_geometries, str)
+                    and load_additional_geometries in filename
                 ):
                     labels_filenames.append(filename)
                     continue
             if filename.endswith(SK.GEOJSON_FILE):
                 if (
                     load_additional_geometries == "all"
-                    or load_additional_geometries == "boundaries" and SK.BOUNDARIES in filename
-                    or isinstance(load_additional_geometries, str) and load_additional_geometries in filename
+                    or load_additional_geometries == "boundaries"
+                    and SK.BOUNDARIES in filename
+                    or isinstance(load_additional_geometries, str)
+                    and load_additional_geometries in filename
                 ):
                     shapes_filenames.append(filename)
                     continue
@@ -271,7 +274,6 @@ def get_transcript_file(roi: str) -> str:
                 index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
                 transformations={"global": Identity()},
             )
-            pass
 
     sdata = SpatialData(images=images, labels=labels, points=points, tables=tables, shapes=shapes)
 
@@ -306,7 +308,6 @@ def _get_scale_factors(DAPI_path: Path, scalefactor_x_key: str, scalefactor_y_ke
     # sdata.set_table_annotates_spatialelement(
     #     table_name="table_Roi1", region="Roi1_CellCoordinates", region_key="region", instance_key="instance_id"
     # )
-    import spatialdata_plot
 
     gene_name = "Arg1"
     (

From 375fa5cc25b3666d4fc7077d18bf256e0f1e7903 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Thu, 12 Dec 2024 13:21:48 +0100
Subject: [PATCH 09/11] finished code review

---
 src/spatialdata_io/readers/seqfish.py | 115 +++++---------------------
 1 file changed, 22 insertions(+), 93 deletions(-)

diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py
index c41a940c..27431c09 100644
--- a/src/spatialdata_io/readers/seqfish.py
+++ b/src/spatialdata_io/readers/seqfish.py
@@ -6,7 +6,7 @@
 from collections.abc import Mapping
 from pathlib import Path
 from types import MappingProxyType
-from typing import Any, Literal
+from typing import Any
 
 import anndata as ad
 import numpy as np
@@ -36,7 +36,6 @@ def seqfish(
     load_labels: bool = True,
     load_points: bool = True,
     load_shapes: bool = True,
-    load_additional_geometries: Literal["segmentation", "boundaries", "all"] | str | None = None,
     cells_as_circles: bool = False,
     rois: list[int] | None = None,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -69,11 +68,6 @@ def seqfish(
         Whether to load the transcript locations.
     load_shapes
         Whether to load cells as shape.
-    load_additional_geometries
-        Whether to load additional shapes such as segmentation or boundaries for both cells and nuclei. If "all" is
-        specified (default), it reads all remaining .tiff and .geojson files in the directory. Other options are
-        "segmentation", "boundaries", or a string to match in the filename (as a subset). If `None` is passed,
-        no additional geometry is loaded.
     cells_as_circles
         Whether to read cells also as circles instead of labels.
     rois
@@ -87,13 +81,13 @@ def seqfish(
 
     Examples
     --------
-    This code shows how to change the annotation target of the table from the cell labels to the cell cirlces.
-    Please check that Roi1 is present in your dataset, otherwise adjust the code below.
+    This code shows how to change the annotation target of the table from the cell labels to the cell boundaries.
+    Please check that the string Roi1 is used in the naming of your dataset, otherwise adjust the code below.
     >>> from spatialdata_io import seqfish
     >>> sdata = seqfish("path/to/raw/data")
-    >>> sdata["table"].obs["region"] = "Roi1_CellCoordinates"
+    >>> sdata["table_Roi1"].obs["region"] = "Roi1_Boundaries"
     >>> sdata.set_table_annotates_spatialelement(
-    ...     table_name="Roi1", region="Roi1_CellCoordinates", region_key="region", instance_key="instance_id"
+    ...     table_name="table_Roi1", region="Roi1_Boundaries", region_key="region", instance_key="instance_id"
     ... )
     >>> sdata.write("path/to/data.zarr")
     """
@@ -126,9 +120,12 @@ def get_count_file(roi: str) -> str:
     def get_dapi_file(roi: str) -> str:
         return f"{roi}_{SK.DAPI}{SK.TIFF_FILE}"
 
-    def get_cell_mask_file(roi: str) -> str:
+    def get_cell_segmentation_labels_file(roi: str) -> str:
         return f"{roi}_{SK.SEGMENTATION}{SK.TIFF_FILE}"
 
+    def get_cell_segmentation_shapes_file(roi: str) -> str:
+        return f"{roi}_{SK.BOUNDARIES}{SK.GEOJSON_FILE}"
+
     def get_transcript_file(roi: str) -> str:
         return f"{roi}_{SK.TRANSCRIPT_COORDINATES}{SK.CSV_FILE}"
 
@@ -154,7 +151,7 @@ def get_transcript_file(roi: str) -> str:
         adata.obsm[SK.SPATIAL_KEY] = cell_info[[SK.CELL_X, SK.CELL_Y]].to_numpy()
 
         # map tables to cell labels (defined later)
-        region = os.path.splitext(get_cell_mask_file(roi_str))[0]
+        region = os.path.splitext(get_cell_segmentation_labels_file(roi_str))[0]
         adata.obs[SK.REGION_KEY] = region
         adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category")
         adata.obs[SK.INSTANCE_KEY_TABLE] = instance_id.to_numpy().astype(np.uint16)
@@ -189,8 +186,8 @@ def get_transcript_file(roi: str) -> str:
 
     if load_labels:
         labels = {
-            f"{os.path.splitext(get_cell_mask_file(x))[0]}": Labels2DModel.parse(
-                imread(path / get_cell_mask_file(x), **imread_kwargs).squeeze(),
+            f"{os.path.splitext(get_cell_segmentation_labels_file(x))[0]}": Labels2DModel.parse(
+                imread(path / get_cell_segmentation_labels_file(x), **imread_kwargs).squeeze(),
                 dims=("y", "x"),
                 scale_factors=raster_models_scale_factors,
                 transformations={"global": scaled[x]},
@@ -204,7 +201,6 @@ def get_transcript_file(roi: str) -> str:
     if load_points:
         for x in rois_str:
 
-
             # prepare data
             name = f"{os.path.splitext(get_transcript_file(x))[0]}"
             p = pd.read_csv(path / get_transcript_file(x), delimiter=",")
@@ -219,59 +215,25 @@ def get_transcript_file(roi: str) -> str:
                 transformations={"global": Identity()},
             )
 
-    if load_shapes:
-        shapes = {
-            f"{os.path.splitext(get_cell_file(x))[0]}": ShapesModel.parse(
+    shapes = {}
+    if cells_as_circles:
+        for x, adata in zip(rois_str, tables.values()):
+            shapes[f"{os.path.splitext(get_cell_file(x))[0]}"] = ShapesModel.parse(
                 adata.obsm[SK.SPATIAL_KEY],
                 geometry=0,
                 radius=np.sqrt(adata.obs[SK.AREA].to_numpy() / np.pi),
                 index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
                 transformations={"global": Identity()},
             )
-            for x, adata in zip(rois_str, tables.values())
-        }
-    else:
-        shapes = {}
-
-    if load_additional_geometries is not None:
-        labels_filenames = []
-        shapes_filenames = []
-
-        for filename in os.listdir(path):
-            if filename.endswith(SK.TIFF_FILE):
-                if (
-                    load_additional_geometries == "all"
-                    or load_additional_geometries == "segmentation" and SK.SEGMENTATION in filename
-                    or isinstance(load_additional_geometries, str) and load_additional_geometries in filename
-                ):
-                    labels_filenames.append(filename)
-                    continue
-            if filename.endswith(SK.GEOJSON_FILE):
-                if (
-                    load_additional_geometries == "all"
-                    or load_additional_geometries == "boundaries" and SK.BOUNDARIES in filename
-                    or isinstance(load_additional_geometries, str) and load_additional_geometries in filename
-                ):
-                    shapes_filenames.append(filename)
-                    continue
-            raise ValueError(f"No file found with identifier {load_additional_geometries}")
-
-        for labels_filename in labels_filenames:
-            labels[f"{os.path.splitext(labels_filename)[0]}"] = Labels2DModel.parse(
-                imread(path / labels_filename, **imread_kwargs).squeeze(),
-                dims=("y", "x"),
-                scale_factors=raster_models_scale_factors,
+    if load_shapes:
+        for x in rois_str:
+            # this assumes that the index matches the instance key of the table. A more robust approach could be
+            # implemented, as described here https://github.com/scverse/spatialdata-io/issues/249
+            shapes[f"{os.path.splitext(get_cell_segmentation_shapes_file(x))[0]}"] = ShapesModel.parse(
+                path / get_cell_segmentation_shapes_file(x),
                 transformations={"global": scaled[x]},
-            )
-
-        for shape_filename in shapes_filenames:
-            # TODO: check that indices of newly parsed shapes match the indices of the existing shapes
-            shapes[f"{os.path.splitext(shape_filename)[0]}"] = ShapesModel.parse(
-                path / shape_filename,
                 index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(),
-                transformations={"global": Identity()},
             )
-            pass
 
     sdata = SpatialData(images=images, labels=labels, points=points, tables=tables, shapes=shapes)
 
@@ -287,36 +249,3 @@ def _get_scale_factors(DAPI_path: Path, scalefactor_x_key: str, scalefactor_y_ke
                 scalefactor_x = element.attrib[scalefactor_x_key]
                 scalefactor_y = element.attrib[scalefactor_y_key]
     return [float(scalefactor_x), float(scalefactor_y)]
-
-
-if __name__ == "__main__":
-    path_read = "/Users/macbook/ssd/biodata/seqfish/instrument 2 official"
-    sdata = seqfish(
-        path=path_read,
-        # load_images=True,
-        # load_labels=True,
-        # load_points=True,
-        # rois=[1],
-    )
-    # sdata.pl.render_labels(color='Arg1').pl.show()
-    import matplotlib.pyplot as plt
-
-    # plt.show()
-    # sdata["table_Roi1"].obs["region"] = "Roi1_CellCoordinates"
-    # sdata.set_table_annotates_spatialelement(
-    #     table_name="table_Roi1", region="Roi1_CellCoordinates", region_key="region", instance_key="instance_id"
-    # )
-    import spatialdata_plot
-
-    gene_name = "Arg1"
-    (
-        sdata.pl.render_images("Roi1_DAPI", cmap="gray")
-        .pl.render_labels("Roi1_Segmentation", color=gene_name)
-        .pl.render_points("Roi1_TranscriptList", color="name", groups=gene_name, palette="orange")
-        .pl.show(title=f"{gene_name} expression over DAPI image", coordinate_systems="global", figsize=(10, 5))
-    )
-    plt.show()
-
-    from napari_spatialdata import Interactive
-
-    Interactive(sdata)

From 28b0eb3b0d42bc014e75f53b61c316504d84c375 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Thu, 12 Dec 2024 16:30:48 +0100
Subject: [PATCH 10/11] tests for seqfish

---
 tests/test_seqfish.py | 27 +++++++++++++++++++++++++++
 tests/test_xenium.py  |  8 ++------
 2 files changed, 29 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_seqfish.py

diff --git a/tests/test_seqfish.py b/tests/test_seqfish.py
new file mode 100644
index 00000000..e0d66655
--- /dev/null
+++ b/tests/test_seqfish.py
@@ -0,0 +1,27 @@
+import math
+from pathlib import Path
+
+import pytest
+
+from spatialdata_io.readers.seqfish import seqfish
+from tests._utils import skip_if_below_python_version
+
+
+# See https://github.com/scverse/spatialdata-io/blob/main/.github/workflows/prepare_test_data.yaml for instructions on
+# how to download and place the data on disk
+@skip_if_below_python_version()
+@pytest.mark.parametrize("dataset,expected", [("instrument 2 official", "{'y': (0, 108), 'x': (0, 108)}")])
+@pytest.mark.parametrize("rois", [[1], None])
+@pytest.mark.parametrize("cells_as_circles", [False, True])
+def test_example_data(dataset: str, expected: str, rois: list[int] | None, cells_as_circles: bool) -> None:
+    f = Path("./data") / dataset
+    assert f.is_dir()
+    sdata = seqfish(f, cells_as_circles=cells_as_circles, rois=rois)
+    from spatialdata import get_extent
+
+    extent = get_extent(sdata, exact=False)
+    extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent}
+    if cells_as_circles:
+        # manual correction required to take into account for the circle radii
+        expected = "{'y': (-2, 109), 'x': (-2, 109)}"
+    assert str(extent) == expected
diff --git a/tests/test_xenium.py b/tests/test_xenium.py
index 2fb35ce1..32408323 100644
--- a/tests/test_xenium.py
+++ b/tests/test_xenium.py
@@ -40,12 +40,8 @@ def test_roundtrip_with_data_limits() -> None:
     assert np.array_equal(cell_id_str, f0(*f1(cell_id_str)))
 
 
-# The datasets should be downloaded from
-# https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/resources/xenium-example-data#test-data
-# and placed in the "data" directory; if you run the tests locally you may need to create a symlink in "tests/data"
-# pointing to "data".
-# The GitHub workflow "prepare_test_data.yaml" takes care of downloading the datasets and uploading an artifact for the
-# tests to use
+# See https://github.com/scverse/spatialdata-io/blob/main/.github/workflows/prepare_test_data.yaml for instructions on
+# how to download and place the data on disk
 @skip_if_below_python_version()
 @pytest.mark.parametrize(
     "dataset,expected",

From 91efd4519072aab66b405cd69ec40c8a59b229a0 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Thu, 12 Dec 2024 17:01:04 +0100
Subject: [PATCH 11/11] fix seqfish test path

---
 tests/test_seqfish.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_seqfish.py b/tests/test_seqfish.py
index e0d66655..bdbf2a60 100644
--- a/tests/test_seqfish.py
+++ b/tests/test_seqfish.py
@@ -10,7 +10,9 @@
 # See https://github.com/scverse/spatialdata-io/blob/main/.github/workflows/prepare_test_data.yaml for instructions on
 # how to download and place the data on disk
 @skip_if_below_python_version()
-@pytest.mark.parametrize("dataset,expected", [("instrument 2 official", "{'y': (0, 108), 'x': (0, 108)}")])
+@pytest.mark.parametrize(
+    "dataset,expected", [("seqfish-2-test-dataset/instrument 2 official", "{'y': (0, 108), 'x': (0, 108)}")]
+)
 @pytest.mark.parametrize("rois", [[1], None])
 @pytest.mark.parametrize("cells_as_circles", [False, True])
 def test_example_data(dataset: str, expected: str, rois: list[int] | None, cells_as_circles: bool) -> None: