From b99fa5f3e9f1a11c70b368bc37ac5dbd4913f0d0 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Wed, 7 Aug 2024 17:29:44 -0400 Subject: [PATCH 01/20] MVP for internal calling of extra props --- src/squidpy/im/__init__.py | 2 +- src/squidpy/im/_feature.py | 168 +++++++++++++++++++++++++++++++++++-- 2 files changed, 163 insertions(+), 7 deletions(-) diff --git a/src/squidpy/im/__init__.py b/src/squidpy/im/__init__.py index fcf54cf94..e0de6618e 100644 --- a/src/squidpy/im/__init__.py +++ b/src/squidpy/im/__init__.py @@ -3,7 +3,7 @@ from __future__ import annotations from squidpy.im._container import ImageContainer -from squidpy.im._feature import calculate_image_features +from squidpy.im._feature import calculate_image_features, quantify_morphology from squidpy.im._process import process from squidpy.im._segment import ( SegmentationCustom, diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 91cf4a8c3..54df0edbc 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -6,6 +6,7 @@ import pandas as pd from anndata import AnnData +from spatialdata import SpatialData from scanpy import logging as logg from squidpy._constants._constants import ImageFeature @@ -14,7 +15,94 @@ from squidpy.gr._utils import _save_data from squidpy.im._container import ImageContainer -__all__ = ["calculate_image_features"] +import xarray as xr +import numpy as np +from collections.abc import Generator +from typing import Tuple, List +import matplotlib.pyplot as plt +from skimage.measure import label, perimeter, regionprops + +__all__ = ["calculate_image_features", "quantify_morphology"] + + +def circularity(regionmask) -> float: + """ + Calculate the circularity of the region. + + :param region: Region properties object + :return: circularity of the region + """ + perim = perimeter(regionmask) + if perim == 0: + return 0 + area = np.sum(regionmask) + return float((4 * np.pi * area) / (perim**2)) + + +def _get_region_props( + label_element: xr.DataArray, + image_element: xr.DataArray, + props: List[str] | None = None, +) -> pd.DataFrame: + + if props is None: + # if we didn't get any properties, we'll do the bare minimum + props = ["label"] + + np_bitmap = label_element.values + np_rgb_image = image_element.values.transpose(1, 2, 0) # (c, y, x) -> (y, x, c) + + labeled_image = label(np_bitmap) + # can't use regionprops_table because it only returns int + regions = regionprops( + label_image=labeled_image, + intensity_image=np_rgb_image, + extra_properties=(circularity,), + ) + # dynamically extract specified properties and create a df + extracted_props = {prop: [] for prop in props} + for region in regions: + for prop in props: + try: + extracted_props[prop].append(getattr(region, prop)) + except AttributeError as e: + # Handle custom properties or missing attributes + if prop == "circularity": + extracted_props[prop].append(circularity(region)) + else: + raise ValueError( + f"Property '{prop}' is not available in the region properties." + ) from e + + return pd.DataFrame(extracted_props) + + +def _subset_image_using_label( + label_element: xr.DataArray, image_element: xr.DataArray +) -> Generator[Tuple[int, xr.DataArray], None, None]: + """ + A generator that extracts subsets of the RGB image based on the bitmap. + + :param label: xarray.DataArray with cell identifiers + :param image: xarray.DataArray with RGB image data + :yield: Subsets of the RGB image corresponding to each cell in the bitmap + """ + unique_cells = np.unique(label_element.values) + + for cell_id in unique_cells: + if cell_id == 0: + # Assuming 0 is the background or non-cell area, skip it + continue + + cell_mask = xr.DataArray( + label_element.values == cell_id, + dims=label_element.dims, + coords=label_element.coords, + ) + + subset = image_element.where(cell_mask, drop=True) + + yield cell_id, subset @d.dedent @@ -85,7 +173,9 @@ def calculate_image_features( features = sorted({ImageFeature(f).s for f in features}) n_jobs = _get_n_cores(n_jobs) - start = logg.info(f"Calculating features `{list(features)}` using `{n_jobs}` core(s)") + start = logg.info( + f"Calculating features `{list(features)}` using `{n_jobs}` core(s)" + ) res = parallelize( _calculate_image_features_helper, @@ -94,7 +184,15 @@ def calculate_image_features( n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, - )(adata, img, layer=layer, library_id=library_id, features=features, features_kwargs=features_kwargs, **kwargs) + )( + adata, + img, + layer=layer, + library_id=library_id, + features=features, + features_kwargs=features_kwargs, + **kwargs, + ) if copy: logg.info("Finish", time=start) @@ -103,6 +201,55 @@ def calculate_image_features( _save_data(adata, attr="obsm", key=key_added, data=res, time=start) +def quantify_morphology( + sdata: SpatialData, + label: str | None = None, + image: str | None = None, + methods: list[str] | str = None, + **kwargs: Any, +) -> pd.DataFrame | None: + + if label is None and image is None: + raise ValueError("Either `label` or `image` must be provided.") + + if image is not None and label is None: + raise ValueError( + "If `image` is provided, a `label` with matching segmentation borders must be provided." + ) + + if methods is None: + # default case but without mutable argument as default value + methods = ["label", "area", "eccentricity", "perimeter", "sphericity"] + elif isinstance(methods, str): + methods = [methods] + + if not isinstance(methods, list): + raise ValueError("Argument `methods` must be a list of strings.") + + if not all(isinstance(method, str) for method in methods): + raise ValueError("All elements in `methods` must be strings.") + + for element in [label, image]: + if element is not None and element not in sdata: + raise KeyError(f"Key `{element}` not found in `sdata`.") + + # from here on we should be certain that we have a label + label_element = sdata[label] + image_element = sdata[image] if image is not None else None + + # cell_generator = _subset_image_using_label(label_element, image_element) + + # for cell_id, subset in cell_generator: + # print(f"Cell ID: {cell_id}") + # print(subset) + # plt.imshow(subset.transpose("y", "x", "c")) + + props = _get_region_props(label_element, image_element, props=methods) + + # Print some properties for each region + return props + + def _calculate_image_features_helper( obs_ids: Sequence[str], adata: AnnData, @@ -116,7 +263,12 @@ def _calculate_image_features_helper( ) -> pd.DataFrame: features_list = [] for crop in img.generate_spot_crops( - adata, obs_names=obs_ids, library_id=library_id, return_obs=False, as_array=False, **kwargs + adata, + obs_names=obs_ids, + library_id=library_id, + return_obs=False, + as_array=False, + **kwargs, ): if TYPE_CHECKING: assert isinstance(crop, ImageContainer) @@ -135,12 +287,16 @@ def _calculate_image_features_helper( elif feature == ImageFeature.SUMMARY: res = crop.features_summary(layer=layer, **feature_kwargs) elif feature == ImageFeature.SEGMENTATION: - res = crop.features_segmentation(intensity_layer=layer, **feature_kwargs) + res = crop.features_segmentation( + intensity_layer=layer, **feature_kwargs + ) elif feature == ImageFeature.CUSTOM: res = crop.features_custom(layer=layer, **feature_kwargs) else: # should never get here - raise NotImplementedError(f"Feature `{feature}` is not yet implemented.") + raise NotImplementedError( + f"Feature `{feature}` is not yet implemented." + ) features_dict.update(res) features_list.append(features_dict) From bbecec3c6693ca52fe55f87790e0da1e1330dcf5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 21:32:30 +0000 Subject: [PATCH 02/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/im/_feature.py | 39 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 54df0edbc..ef17b860b 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -1,13 +1,17 @@ from __future__ import annotations -from collections.abc import Mapping, Sequence +from collections.abc import Generator, Mapping, Sequence from types import MappingProxyType -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, List, Tuple +import matplotlib.pyplot as plt +import numpy as np import pandas as pd +import xarray as xr from anndata import AnnData -from spatialdata import SpatialData from scanpy import logging as logg +from skimage.measure import label, perimeter, regionprops +from spatialdata import SpatialData from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs @@ -15,13 +19,6 @@ from squidpy.gr._utils import _save_data from squidpy.im._container import ImageContainer -import xarray as xr -import numpy as np -from collections.abc import Generator -from typing import Tuple, List -import matplotlib.pyplot as plt -from skimage.measure import label, perimeter, regionprops - __all__ = ["calculate_image_features", "quantify_morphology"] @@ -44,7 +41,6 @@ def _get_region_props( image_element: xr.DataArray, props: List[str] | None = None, ) -> pd.DataFrame: - if props is None: # if we didn't get any properties, we'll do the bare minimum props = ["label"] @@ -70,9 +66,7 @@ def _get_region_props( if prop == "circularity": extracted_props[prop].append(circularity(region)) else: - raise ValueError( - f"Property '{prop}' is not available in the region properties." - ) from e + raise ValueError(f"Property '{prop}' is not available in the region properties.") from e return pd.DataFrame(extracted_props) @@ -173,9 +167,7 @@ def calculate_image_features( features = sorted({ImageFeature(f).s for f in features}) n_jobs = _get_n_cores(n_jobs) - start = logg.info( - f"Calculating features `{list(features)}` using `{n_jobs}` core(s)" - ) + start = logg.info(f"Calculating features `{list(features)}` using `{n_jobs}` core(s)") res = parallelize( _calculate_image_features_helper, @@ -208,14 +200,11 @@ def quantify_morphology( methods: list[str] | str = None, **kwargs: Any, ) -> pd.DataFrame | None: - if label is None and image is None: raise ValueError("Either `label` or `image` must be provided.") if image is not None and label is None: - raise ValueError( - "If `image` is provided, a `label` with matching segmentation borders must be provided." - ) + raise ValueError("If `image` is provided, a `label` with matching segmentation borders must be provided.") if methods is None: # default case but without mutable argument as default value @@ -287,16 +276,12 @@ def _calculate_image_features_helper( elif feature == ImageFeature.SUMMARY: res = crop.features_summary(layer=layer, **feature_kwargs) elif feature == ImageFeature.SEGMENTATION: - res = crop.features_segmentation( - intensity_layer=layer, **feature_kwargs - ) + res = crop.features_segmentation(intensity_layer=layer, **feature_kwargs) elif feature == ImageFeature.CUSTOM: res = crop.features_custom(layer=layer, **feature_kwargs) else: # should never get here - raise NotImplementedError( - f"Feature `{feature}` is not yet implemented." - ) + raise NotImplementedError(f"Feature `{feature}` is not yet implemented.") features_dict.update(res) features_list.append(features_dict) From 6e3d652476edefd672f2252623e02dfc46c0739f Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Thu, 8 Aug 2024 19:06:08 -0400 Subject: [PATCH 03/20] Added option to externally feed in functions --- src/squidpy/im/_feature.py | 56 +++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 54df0edbc..55c18280b 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -2,7 +2,7 @@ from collections.abc import Mapping, Sequence from types import MappingProxyType -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Callable import pandas as pd from anndata import AnnData @@ -43,6 +43,7 @@ def _get_region_props( label_element: xr.DataArray, image_element: xr.DataArray, props: List[str] | None = None, + extra_methods: List[Callable] = [], ) -> pd.DataFrame: if props is None: @@ -51,18 +52,18 @@ def _get_region_props( np_bitmap = label_element.values np_rgb_image = image_element.values.transpose(1, 2, 0) # (c, y, x) -> (y, x, c) - + a = [circularity,] + extra_methods labeled_image = label(np_bitmap) # can't use regionprops_table because it only returns int regions = regionprops( label_image=labeled_image, intensity_image=np_rgb_image, - extra_properties=(circularity,), + extra_properties=a, ) # dynamically extract specified properties and create a df - extracted_props = {prop: [] for prop in props} + extracted_props = {prop: [] for prop in props + [e.__name__ for e in extra_methods]} for region in regions: - for prop in props: + for prop in props + [e.__name__ for e in extra_methods]: try: extracted_props[prop].append(getattr(region, prop)) except AttributeError as e: @@ -205,7 +206,8 @@ def quantify_morphology( sdata: SpatialData, label: str | None = None, image: str | None = None, - methods: list[str] | str = None, + methods: list[str | Callable] | str | Callable = None, + split_by_channels: bool = False, **kwargs: Any, ) -> pd.DataFrame | None: @@ -220,14 +222,20 @@ def quantify_morphology( if methods is None: # default case but without mutable argument as default value methods = ["label", "area", "eccentricity", "perimeter", "sphericity"] - elif isinstance(methods, str): + elif isinstance(methods, (str, Callable)): methods = [methods] if not isinstance(methods, list): raise ValueError("Argument `methods` must be a list of strings.") - if not all(isinstance(method, str) for method in methods): - raise ValueError("All elements in `methods` must be strings.") + if not all(isinstance(method, (str, Callable)) for method in methods): + raise ValueError("All elements in `methods` must be strings or callables.") + + extra_methods = [] + for method in methods: + if callable(method): + extra_methods.append(method) + methods.remove(method) for element in [label, image]: if element is not None and element not in sdata: @@ -237,17 +245,27 @@ def quantify_morphology( label_element = sdata[label] image_element = sdata[image] if image is not None else None - # cell_generator = _subset_image_using_label(label_element, image_element) - - # for cell_id, subset in cell_generator: - # print(f"Cell ID: {cell_id}") - # print(subset) - # plt.imshow(subset.transpose("y", "x", "c")) - - props = _get_region_props(label_element, image_element, props=methods) + region_props = _get_region_props( + label_element, + image_element, + props=methods, + extra_methods=extra_methods, + ) - # Print some properties for each region - return props + if split_by_channels: + channels = image_element.c.values + for col in region_props.columns: + # did the method return a list of values? + if isinstance(region_props[col].values[0], (list, tuple, np.ndarray)): + # are all lists of the length of the channel list? + if all(len(val) == len(channels) for val in region_props[col].values): + for i, channel in enumerate(channels): + region_props[f"{col}_ch{channel}"] = [ + val[i] for val in region_props[col].values + ] + region_props.drop(columns=[col], inplace=True) + + return region_props def _calculate_image_features_helper( From 753ec3a7d745d9fd061261fa783fc1b2fd18a1e6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 8 Aug 2024 23:07:34 +0000 Subject: [PATCH 04/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/im/_feature.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 824656fbe..b7d678a3e 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -2,10 +2,8 @@ from collections.abc import Generator, Mapping, Sequence from types import MappingProxyType - from typing import TYPE_CHECKING, Any, Callable, List, Tuple - import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -50,7 +48,9 @@ def _get_region_props( np_bitmap = label_element.values np_rgb_image = image_element.values.transpose(1, 2, 0) # (c, y, x) -> (y, x, c) - a = [circularity,] + extra_methods + a = [ + circularity, + ] + extra_methods labeled_image = label(np_bitmap) # can't use regionprops_table because it only returns int regions = regionprops( @@ -251,9 +251,7 @@ def quantify_morphology( # are all lists of the length of the channel list? if all(len(val) == len(channels) for val in region_props[col].values): for i, channel in enumerate(channels): - region_props[f"{col}_ch{channel}"] = [ - val[i] for val in region_props[col].values - ] + region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] region_props.drop(columns=[col], inplace=True) return region_props From 34f745f530410d11cd3870055130bd0f86180490 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Fri, 16 Aug 2024 15:10:22 +0200 Subject: [PATCH 05/20] add rough functionality to write regionprops into sdata["table"].obsm as numpy array --- src/squidpy/im/_feature.py | 53 +++++++++++++++++++++++++++------ tests/image/test_morphology.py | 54 ++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 9 deletions(-) create mode 100644 tests/image/test_morphology.py diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index b7d678a3e..0d1dde85a 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -1,8 +1,10 @@ from __future__ import annotations +import re +import warnings from collections.abc import Generator, Mapping, Sequence from types import MappingProxyType -from typing import TYPE_CHECKING, Any, Callable, List, Tuple +from typing import TYPE_CHECKING, Any, Callable import matplotlib.pyplot as plt import numpy as np @@ -10,7 +12,7 @@ import xarray as xr from anndata import AnnData from scanpy import logging as logg -from skimage.measure import label, perimeter, regionprops +from skimage.measure import perimeter, regionprops from spatialdata import SpatialData from squidpy._constants._constants import ImageFeature @@ -22,7 +24,7 @@ __all__ = ["calculate_image_features", "quantify_morphology"] -def circularity(regionmask) -> float: +def circularity(regionmask: np.ndarray) -> float: """ Calculate the circularity of the region. @@ -39,26 +41,30 @@ def circularity(regionmask) -> float: def _get_region_props( label_element: xr.DataArray, image_element: xr.DataArray, - props: List[str] | None = None, - extra_methods: List[Callable] = [], + props: list[str] | None = None, + extra_methods: list[Callable[[np.ndarray, np.ndarray], int | float | list[int | float]]] | None = None, ) -> pd.DataFrame: + if not extra_methods: + extra_methods = [] if props is None: # if we didn't get any properties, we'll do the bare minimum props = ["label"] - np_bitmap = label_element.values np_rgb_image = image_element.values.transpose(1, 2, 0) # (c, y, x) -> (y, x, c) a = [ circularity, ] + extra_methods - labeled_image = label(np_bitmap) # can't use regionprops_table because it only returns int regions = regionprops( - label_image=labeled_image, + label_image=label_element.values, intensity_image=np_rgb_image, extra_properties=a, ) # dynamically extract specified properties and create a df + # if np.any(label_element.values == 0): + # # initialize with label 0 because regionprops drops label 0 + # extracted_props = {prop: [0] for prop in props + [e.__name__ for e in extra_methods]} + # else: extracted_props = {prop: [] for prop in props + [e.__name__ for e in extra_methods]} for region in regions: for prop in props + [e.__name__ for e in extra_methods]: @@ -76,7 +82,7 @@ def _get_region_props( def _subset_image_using_label( label_element: xr.DataArray, image_element: xr.DataArray -) -> Generator[Tuple[int, xr.DataArray], None, None]: +) -> Generator[tuple[int, xr.DataArray], None, None]: """ A generator that extracts subsets of the RGB image based on the bitmap. @@ -254,6 +260,35 @@ def quantify_morphology( region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] region_props.drop(columns=[col], inplace=True) + if len(sdata.tables) > 1: + warnings.warn( + "Multiple tables detected in `sdata`, " + "using first table to store regionprops from sq.im.quantify_morphology", + stacklevel=1, + ) + + table_key = next(iter(sdata.tables)) + # columns = sdata[table_key].obs.columns + # id_regex = re.compile(".*id.*", re.IGNORECASE) + # + # matches = [] + # for column in columns: + # match = id_regex.match(column) + # if match: + # matches.append(match.group(0)) + # + # if len(matches) != 1: + # raise AttributeError("Unable to find ID column in `sdata`.") + # + # else: + # id_column = matches[0] + + # df = region_props.set_index("label", drop=False) + # df.index.name = id_column + # df.rename({"label": id_column}, axis="columns", inplace=True) + + sdata[table_key].obsm["morphology"] = region_props.to_numpy() + return region_props diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py new file mode 100644 index 000000000..8f6ce5d35 --- /dev/null +++ b/tests/image/test_morphology.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import numpy as np +import numpy.testing as npt +import pytest +import skimage.measure +import squidpy as sq +from spatialdata import SpatialData +from spatialdata.datasets import blobs, raccoon + +# noinspection PyProtectedMember +from squidpy.im._feature import _get_region_props + + +@pytest.fixture +def sdata_racoon() -> SpatialData: + return raccoon() + + +@pytest.fixture +def sdata_blobs() -> SpatialData: + return blobs() + + +@pytest.mark.xfail( + raises=AssertionError, reason="calling skimage.measure.label on segmentation produces additional labels" +) +class TestSkimageBackend: + def test_label_bug_present(self, sdata_blobs): + npt.assert_array_equal( + np.unique(skimage.measure.label(sdata_blobs["blobs_labels"])), + np.unique(sdata_blobs["blobs_labels"]), + ) + + +class TestMorphology: + def test__get_region_props(self, sdata_blobs): + region_props = _get_region_props( + label_element=sdata_blobs["blobs_labels"], + image_element=sdata_blobs["blobs_image"], + props=["label", "area", "eccentricity"], + ) + assert len(region_props) == len(sdata_blobs["table"].obs) + + def test_quantify_morphology_callables(self, sdata_blobs): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + methods=["label", "area", "eccentricity"], + split_by_channels=True, + ) + + assert "morphology" in sdata_blobs["table"].obsm.keys() From 07606e2157d2c5a1266924e1d52f052519fbf09b Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Fri, 23 Aug 2024 15:12:56 +0200 Subject: [PATCH 06/20] DataFrame now written to obsm instead of numpy array Also fix some failing code checks --- src/squidpy/im/_feature.py | 61 +++++++++++++++------------------- tests/image/test_morphology.py | 2 ++ 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 0d1dde85a..b5b7397a1 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -1,14 +1,14 @@ from __future__ import annotations -import re import warnings from collections.abc import Generator, Mapping, Sequence from types import MappingProxyType -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any, Callable, TypeVar, Union -import matplotlib.pyplot as plt import numpy as np +import numpy.typing as npt import pandas as pd +import spatialdata as sd import xarray as xr from anndata import AnnData from scanpy import logging as logg @@ -23,12 +23,18 @@ __all__ = ["calculate_image_features", "quantify_morphology"] +IntegerNDArrayType = TypeVar("IntegerNDArrayType", bound=npt.NDArray[np.integer[Any]]) +FloatNDArrayType = TypeVar("FloatNDArrayType", bound=npt.NDArray[np.floating[Any]]) +RegionPropsCallableType = Callable[ + [IntegerNDArrayType, FloatNDArrayType], Union[Union[int, float, list[Union[int, float]]]] +] -def circularity(regionmask: np.ndarray) -> float: + +def circularity(regionmask: IntegerNDArrayType) -> float: """ Calculate the circularity of the region. - :param region: Region properties object + :param regionmask: Region properties object :return: circularity of the region """ perim = perimeter(regionmask) @@ -42,7 +48,7 @@ def _get_region_props( label_element: xr.DataArray, image_element: xr.DataArray, props: list[str] | None = None, - extra_methods: list[Callable[[np.ndarray, np.ndarray], int | float | list[int | float]]] | None = None, + extra_methods: list[RegionPropsCallableType] | None = None, ) -> pd.DataFrame: if not extra_methods: extra_methods = [] @@ -61,11 +67,7 @@ def _get_region_props( extra_properties=a, ) # dynamically extract specified properties and create a df - # if np.any(label_element.values == 0): - # # initialize with label 0 because regionprops drops label 0 - # extracted_props = {prop: [0] for prop in props + [e.__name__ for e in extra_methods]} - # else: - extracted_props = {prop: [] for prop in props + [e.__name__ for e in extra_methods]} + extracted_props = {prop: [] for prop in props + [e.__name__ for e in extra_methods]} # type: dict[str, list[int | float]] for region in regions: for prop in props + [e.__name__ for e in extra_methods]: try: @@ -86,8 +88,8 @@ def _subset_image_using_label( """ A generator that extracts subsets of the RGB image based on the bitmap. - :param label: xarray.DataArray with cell identifiers - :param image: xarray.DataArray with RGB image data + :param label_element: xarray.DataArray with cell identifiers + :param image_element: xarray.DataArray with RGB image data :yield: Subsets of the RGB image corresponding to each cell in the bitmap """ unique_cells = np.unique(label_element.values) @@ -206,7 +208,7 @@ def quantify_morphology( sdata: SpatialData, label: str | None = None, image: str | None = None, - methods: list[str | Callable] | str | Callable = None, + methods: list[str | Callable] | None = None, split_by_channels: bool = False, **kwargs: Any, ) -> pd.DataFrame | None: @@ -228,6 +230,9 @@ def quantify_morphology( if not all(isinstance(method, (str, Callable)) for method in methods): raise ValueError("All elements in `methods` must be strings or callables.") + if "label" not in methods: + methods = ["label"].extend(methods) + extra_methods = [] for method in methods: if callable(method): @@ -267,27 +272,13 @@ def quantify_morphology( stacklevel=1, ) - table_key = next(iter(sdata.tables)) - # columns = sdata[table_key].obs.columns - # id_regex = re.compile(".*id.*", re.IGNORECASE) - # - # matches = [] - # for column in columns: - # match = id_regex.match(column) - # if match: - # matches.append(match.group(0)) - # - # if len(matches) != 1: - # raise AttributeError("Unable to find ID column in `sdata`.") - # - # else: - # id_column = matches[0] - - # df = region_props.set_index("label", drop=False) - # df.index.name = id_column - # df.rename({"label": id_column}, axis="columns", inplace=True) - - sdata[table_key].obsm["morphology"] = region_props.to_numpy() + table_key = next(iter(sd.get_element_annotators(sdata, "blobs_labels"))) + + region_props = region_props.set_index("label", drop=True) + region_props.index.name = None + region_props.index = region_props.index.astype(str) + + sdata[table_key].obsm["morphology"] = region_props return region_props diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 8f6ce5d35..10e56cbc4 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -2,6 +2,7 @@ import numpy as np import numpy.testing as npt +import pandas as pd import pytest import skimage.measure import squidpy as sq @@ -52,3 +53,4 @@ def test_quantify_morphology_callables(self, sdata_blobs): ) assert "morphology" in sdata_blobs["table"].obsm.keys() + assert isinstance(sdata_blobs["table"].obsm["morphology"], pd.DataFrame) From 0f0ea7c75164b9f6cd359c89d829b11dd8e2f8c0 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Tue, 27 Aug 2024 16:11:09 +0200 Subject: [PATCH 07/20] add granularity measurement from afermg/cp_measurements https://github.com/afermg/cp_measurements/blob/main/src/cp_measure/minimal/measuregranularity.py --- src/squidpy/im/_feature.py | 27 +-- src/squidpy/im/_measurements.py | 306 ++++++++++++++++++++++++++++++++ tests/image/test_morphology.py | 6 +- 3 files changed, 327 insertions(+), 12 deletions(-) create mode 100644 src/squidpy/im/_measurements.py diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index b5b7397a1..e782a3793 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -20,6 +20,7 @@ from squidpy._utils import Signal, SigQueue, _get_n_cores, parallelize from squidpy.gr._utils import _save_data from squidpy.im._container import ImageContainer +from squidpy.im import _measurements __all__ = ["calculate_image_features", "quantify_morphology"] @@ -57,14 +58,18 @@ def _get_region_props( props = ["label"] np_rgb_image = image_element.values.transpose(1, 2, 0) # (c, y, x) -> (y, x, c) - a = [ + + # Add custom extra methods here + rp_extra_methods = [ circularity, + _measurements.granularity, ] + extra_methods + # can't use regionprops_table because it only returns int regions = regionprops( label_image=label_element.values, intensity_image=np_rgb_image, - extra_properties=a, + extra_properties=rp_extra_methods, ) # dynamically extract specified properties and create a df extracted_props = {prop: [] for prop in props + [e.__name__ for e in extra_methods]} # type: dict[str, list[int | float]] @@ -73,11 +78,11 @@ def _get_region_props( try: extracted_props[prop].append(getattr(region, prop)) except AttributeError as e: - # Handle custom properties or missing attributes - if prop == "circularity": - extracted_props[prop].append(circularity(region)) - else: - raise ValueError(f"Property '{prop}' is not available in the region properties.") from e + # # Handle custom properties or missing attributes + # if prop == "circularity": + # extracted_props[prop].append(circularity(region)) + # else: + raise ValueError(f"Property '{prop}' is not available in the region properties.") from e return pd.DataFrame(extracted_props) @@ -265,14 +270,14 @@ def quantify_morphology( region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] region_props.drop(columns=[col], inplace=True) - if len(sdata.tables) > 1: + tables = sd.get_element_annotators(sdata, label) + if len(tables) > 1: warnings.warn( - "Multiple tables detected in `sdata`, " + f"Multiple tables detected in `sdata` matching , {label}" "using first table to store regionprops from sq.im.quantify_morphology", stacklevel=1, ) - - table_key = next(iter(sd.get_element_annotators(sdata, "blobs_labels"))) + table_key = next(iter(tables)) region_props = region_props.set_index("label", drop=True) region_props.index.name = None diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py new file mode 100644 index 000000000..c2e4d2432 --- /dev/null +++ b/src/squidpy/im/_measurements.py @@ -0,0 +1,306 @@ +# Copied from https://github.com/afermg/cp_measurements/blob/main/src/cp_measure/minimal/measuregranularity.py + +import numpy +import scipy.ndimage +import skimage.morphology + +__doc__ = """\ +MeasureGranularity +================== +**MeasureGranularity** outputs spectra of size measurements of the +textures in the image. + +Image granularity is a texture measurement that tries to fit a series of +structure elements of increasing size into the texture of the image and outputs a spectrum of measures +based on how well they fit. +Granularity is measured as described by Ilya Ravkin (references below). + +Basically, MeasureGranularity: +1 - Downsamples the image (if you tell it to). This is set in +**Subsampling factor for granularity measurements** or **Subsampling factor for background reduction**. +2 - Background subtracts anything larger than the radius in pixels set in +**Radius of structuring element.** +3 - For as many times as you set in **Range of the granular spectrum**, it gets rid of bright areas +that are only 1 pixel across, reports how much signal was lost by doing that, then repeats. +i.e. The first time it removes one pixel from all bright areas in the image, +(effectively deleting those that are only 1 pixel in size) and then reports what % of the signal was lost. +It then takes the first-iteration image and repeats the removal and reporting (effectively reporting +the amount of signal that is two pixels in size). etc. + +|MeasureGranularity_example| + +As of **CellProfiler 4.0** the settings for this module have been changed to simplify +configuration. A single set of parameters is now applied to all images and objects within the module, +rather than each image needing individual configuration. +Pipelines from older versions will be converted to match this format. If multiple sets of parameters +were defined CellProfiler will apply the first set from the older pipeline version. +Specifying multiple sets of parameters can still be achieved by running multiple copies of this module. + + +| + +============ ============ =============== +Supports 2D? Supports 3D? Respects masks? +============ ============ =============== +YES YES YES +============ ============ =============== + +Measurements made by this module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- *Granularity:* The module returns one measurement for each instance + of the granularity spectrum set in **Range of the granular spectrum**. + +References +^^^^^^^^^^ + +- Serra J. (1989) *Image Analysis and Mathematical Morphology*, Vol. 1. + Academic Press, London +- Maragos P. “Pattern spectrum and multiscale shape representation”, + *IEEE Transactions on Pattern Analysis and Machine Intelligence*, 11, + N 7, pp. 701-716, 1989 +- Vincent L. (2000) “Granulometries and Opening Trees”, *Fundamenta + Informaticae*, 41, No. 1-2, pp. 57-90, IOS Press, 2000. +- Vincent L. (1992) “Morphological Area Opening and Closing for + Grayscale Images”, *Proc. NATO Shape in Picture Workshop*, + Driebergen, The Netherlands, pp. 197-208. +- Ravkin I, Temov V. (1988) “Bit representation techniques and image + processing”, *Applied Informatics*, v.14, pp. 41-90, Finances and + Statistics, Moskow, (in Russian) +""" + + +def granularity( + mask: numpy.ndarray, + pixels: numpy.ndarray, + subsample_size: float = 0.25, + image_sample_size: float = 0.25, + element_size: int = 10, + granular_spectrum_length: int = 1, # default 16 +): + """ + Parameters + ---------- + subsample_size : float, optional + Subsampling factor for granularity measurements. + If the textures of interest are larger than a few pixels, we recommend + you subsample the image with a factor <1 to speed up the processing. + Downsampling the image will let you detect larger structures with a + smaller sized structure element. A factor >1 might increase the accuracy + but also require more processing time. Images are typically of higher + resolution than is required for granularity measurements, so the default + value is 0.25. For low-resolution images, increase the subsampling + fraction; for high-resolution images, decrease the subsampling fraction. + Subsampling by 1/4 reduces computation time by (1/4) :sup:`3` because the + size of the image is (1/4) :sup:`2` of original and the range of granular + spectrum can be 1/4 of original. Moreover, the results are sometimes + actually a little better with subsampling, which is probably because + with subsampling the individual granular spectrum components can be used + as features, whereas without subsampling a feature should be a sum of + several adjacent granular spectrum components. The recommendation on the + numerical value cannot be determined in advance; an analysis as in this + reference may be required before running the whole set. See this `pdf`_, + slides 27-31, 49-50. + + .. _pdf: http://www.ravkin.net/presentations/Statistical%20properties%20of%20algorithms%20for%20analysis%20of%20cell%20images.pdf" + + image_sample_size : float, optional + Subsampling factor for background reduction. + It is important to remove low frequency image background variations as + they will affect the final granularity measurement. Any method can be + used as a pre-processing step prior to this module; we have chosen to + simply subtract a highly open image. To do it quickly, we subsample the + image first. The subsampling factor for background reduction is usually + [0.125 – 0.25]. This is highly empirical, but a small factor should be + used if the structures of interest are large. The significance of + background removal in the context of granulometry is that image volume + at certain granular size is normalized by total image volume, which + depends on how the background was removed. + + element_size : int, optional + Radius of structuring element. + This radius should correspond to the radius of the textures of interest + *after* subsampling; i.e., if textures in the original image scale have + a radius of 40 pixels, and a subsampling factor of 0.25 is used, the + structuring element size should be 10 or slightly smaller, and the range + of the spectrum defined below will cover more sizes. + + granular_spectrum_length : int, optional + Range of the granular spectrum. + You may need a trial run to see which granular + spectrum range yields informative measurements. Start by using a wide + spectrum and narrow it down to the informative range to save time. + + Returns + ------- + None + + """ + # + # Downsample the image and mask + # + new_shape = numpy.array(pixels.shape) + if subsample_size < 1: + new_shape = new_shape * subsample_size + if pixels.ndim == 2: + i, j = ( + numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) + / subsample_size + ) + pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) + mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 + else: + k, i, j = ( + numpy.mgrid[ + 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] + ].astype(float) + / subsample_size + ) + pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) + mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 + else: + pixels = pixels.copy() + mask = mask.copy() + # + # Remove background pixels using a greyscale tophat filter + # + if image_sample_size < 1: + back_shape = new_shape * image_sample_size + if pixels.ndim == 2: + i, j = ( + numpy.mgrid[0 : back_shape[0], 0 : back_shape[1]].astype(float) + / image_sample_size + ) + back_pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) + back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 + else: + k, i, j = ( + numpy.mgrid[ + 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] + ].astype(float) + / subsample_size + ) + back_pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) + back_mask = ( + scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 + ) + else: + back_pixels = pixels + back_mask = mask + back_shape = new_shape + radius = element_size + if pixels.ndim == 2: + footprint = skimage.morphology.disk(radius, dtype=bool) + else: + footprint = skimage.morphology.ball(radius, dtype=bool) + back_pixels_mask = numpy.zeros_like(back_pixels) + back_pixels_mask[back_mask == True] = back_pixels[back_mask == True] + back_pixels = skimage.morphology.erosion(back_pixels_mask, footprint=footprint) + back_pixels_mask = numpy.zeros_like(back_pixels) + back_pixels_mask[back_mask == True] = back_pixels[back_mask == True] + back_pixels = skimage.morphology.dilation(back_pixels_mask, footprint=footprint) + if image_sample_size < 1: + if pixels.ndim == 2: + i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) + # + # Make sure the mapping only references the index range of + # back_pixels. + # + i *= float(back_shape[0] - 1) / float(new_shape[0] - 1) + j *= float(back_shape[1] - 1) / float(new_shape[1] - 1) + back_pixels = scipy.ndimage.map_coordinates(back_pixels, (i, j), order=1) + else: + k, i, j = numpy.mgrid[ + 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] + ].astype(float) + k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) + i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) + j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) + back_pixels = scipy.ndimage.map_coordinates(back_pixels, (k, i, j), order=1) + pixels -= back_pixels + pixels[pixels < 0] = 0 + + # Transcribed from the Matlab module: granspectr function + # + # CALCULATES GRANULAR SPECTRUM, ALSO KNOWN AS SIZE DISTRIBUTION, + # GRANULOMETRY, AND PATTERN SPECTRUM, SEE REF.: + # J.Serra, Image Analysis and Mathematical Morphology, Vol. 1. Academic Press, London, 1989 + # Maragos,P. "Pattern spectrum and multiscale shape representation", IEEE Transactions on Pattern Analysis and Machine Intelligence, 11, N 7, pp. 701-716, 1989 + # L.Vincent "Granulometries and Opening Trees", Fundamenta Informaticae, 41, No. 1-2, pp. 57-90, IOS Press, 2000. + # L.Vincent "Morphological Area Opening and Closing for Grayscale Images", Proc. NATO Shape in Picture Workshop, Driebergen, The Netherlands, pp. 197-208, 1992. + # I.Ravkin, V.Temov "Bit representation techniques and image processing", Applied Informatics, v.14, pp. 41-90, Finances and Statistics, Moskow, 1988 (in Russian) + # THIS IMPLEMENTATION INSTEAD OF OPENING USES EROSION FOLLOWED BY RECONSTRUCTION + # + ng = granular_spectrum_length + startmean = numpy.mean(pixels[mask]) + ero = pixels.copy() + # Mask the test image so that masked pixels will have no effect + # during reconstruction + # + ero[~mask] = 0 + currentmean = startmean + startmean = max(startmean, numpy.finfo(float).eps) + + if pixels.ndim == 2: + footprint = skimage.morphology.disk(1, dtype=bool) + else: + footprint = skimage.morphology.ball(1, dtype=bool) + results = [] + for i in range(1, ng + 1): + prevmean = currentmean + ero_mask = numpy.zeros_like(ero) + ero_mask[mask == True] = ero[mask == True] + ero = skimage.morphology.erosion(ero_mask, footprint=footprint) + rec = skimage.morphology.reconstruction(ero, pixels, footprint=footprint) + currentmean = numpy.mean(rec[mask]) + gs = (prevmean - currentmean) * 100 / startmean + + # TODO find better solution for the return + # results[f"Granularity_{str(i)}"] = gs + results.append(gs) + # Restore the reconstructed image to the shape of the + # original image so we can match against object labels + # + orig_shape = pixels.shape + if pixels.ndim == 2: + i, j = numpy.mgrid[0 : orig_shape[0], 0 : orig_shape[1]].astype(float) + # + # Make sure the mapping only references the index range of + # back_pixels. + # + i *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) + j *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) + rec = scipy.ndimage.map_coordinates(rec, (i, j), order=1) + else: + k, i, j = numpy.mgrid[ + 0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2] + ].astype(float) + k *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) + i *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) + j *= float(new_shape[2] - 1) / float(orig_shape[2] - 1) + rec = scipy.ndimage.map_coordinates(rec, (k, i, j), order=1) + # TODO check if this is necessary + # Calculate the means for the objects + # + # for object_record in object_records: + # assert isinstance(object_record, ObjectRecord) + # if object_record.nobjects > 0: + # new_mean = fix( + # scipy.ndimage.mean( + # rec, object_record.labels, object_record.range + # ) + # ) + # gss = ( + # (object_record.current_mean - new_mean) + # * 100 + # / object_record.start_mean + # ) + # object_record.current_mean = new_mean + # else: + # gss = numpy.zeros((0,)) + # measurements.add_measurement(object_record.name, feature, gss) + + if len(results) == 1: + return results[0] + else: + return results diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 10e56cbc4..1c8e47238 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -48,9 +48,13 @@ def test_quantify_morphology_callables(self, sdata_blobs): sdata=sdata_blobs, label="blobs_labels", image="blobs_image", - methods=["label", "area", "eccentricity"], + methods=["label", "area", "eccentricity", "circularity", "granularity"], split_by_channels=True, ) assert "morphology" in sdata_blobs["table"].obsm.keys() assert isinstance(sdata_blobs["table"].obsm["morphology"], pd.DataFrame) + assert "circularity" in sdata_blobs["table"].obsm["morphology"].columns + assert "granularity_ch0" in sdata_blobs["table"].obsm["morphology"].columns + assert "granularity_ch1" in sdata_blobs["table"].obsm["morphology"].columns + assert "granularity_ch2" in sdata_blobs["table"].obsm["morphology"].columns From b33895bf0e3f6fba66f27aebd1156ce1a5393b60 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Thu, 29 Aug 2024 16:28:15 +0200 Subject: [PATCH 08/20] add border_occupied_factor --- src/squidpy/im/_feature.py | 26 +++-- src/squidpy/im/_measurements.py | 177 +++++++++++++++++++++++++------- tests/image/test_morphology.py | 64 +++++++++++- 3 files changed, 222 insertions(+), 45 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index e782a3793..ef94182c4 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -30,6 +30,10 @@ [IntegerNDArrayType, FloatNDArrayType], Union[Union[int, float, list[Union[int, float]]]] ] +RegionPropsImageCallableType = Callable[ + [IntegerNDArrayType, FloatNDArrayType], dict[str, Union[int, float]] +] + def circularity(regionmask: IntegerNDArrayType) -> float: """ @@ -62,9 +66,15 @@ def _get_region_props( # Add custom extra methods here rp_extra_methods = [ circularity, - _measurements.granularity, + _measurements.granularity, # <--- Add additional measurements here that handle individual regions ] + extra_methods + image_extra_methods = [ + _measurements.border_occupied_factor # <--- Add additional measurements here that calculate on the entire label image + ] # type: list[RegionPropsImageCallableType] + image_extra_methods = {method.__name__: method for method in image_extra_methods} + + # can't use regionprops_table because it only returns int regions = regionprops( label_image=label_element.values, @@ -73,15 +83,17 @@ def _get_region_props( ) # dynamically extract specified properties and create a df extracted_props = {prop: [] for prop in props + [e.__name__ for e in extra_methods]} # type: dict[str, list[int | float]] - for region in regions: - for prop in props + [e.__name__ for e in extra_methods]: + for prop in props + [e.__name__ for e in extra_methods]: + if prop in image_extra_methods: + im_extra_result = image_extra_methods[prop](label_element.values, np_rgb_image) + for region in regions: + extracted_props[prop].append(im_extra_result.get(region.label)) + continue + + for region in regions: try: extracted_props[prop].append(getattr(region, prop)) except AttributeError as e: - # # Handle custom properties or missing attributes - # if prop == "circularity": - # extracted_props[prop].append(circularity(region)) - # else: raise ValueError(f"Property '{prop}' is not available in the region properties.") from e return pd.DataFrame(extracted_props) diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py index c2e4d2432..a423753e0 100644 --- a/src/squidpy/im/_measurements.py +++ b/src/squidpy/im/_measurements.py @@ -1,9 +1,100 @@ -# Copied from https://github.com/afermg/cp_measurements/blob/main/src/cp_measure/minimal/measuregranularity.py - import numpy +import numpy as np import scipy.ndimage import skimage.morphology +def border_occupied_factor( + mask: numpy.ndarray, + *args, + **kwargs +) -> dict[int, float]: + """ + Calculates the percentage of border pixels that are in a 4-connected neighborhood of another label + Takes ~1.7s/megapixels to calculate + + Parameters + ---------- + mask: numpy.ndarray integer grayscale image with the labels + args None + kwargs None + + Returns + ------- + Dict of percentages for each label key + + """ + n_border_pixels = {} + n_border_occupied_pixels = {} + + adjacent_indices = [ + [-1, 0], + [0, -1], + [1, 0], + [0, 1], + ] + + for coordinates, pixel in np.ndenumerate(mask): + if pixel == 0: + continue + is_border = False + is_occupied = False + + for adjacent_index in adjacent_indices: + try: + adjacent_pixel = mask[ + coordinates[0] + adjacent_index[0], + coordinates[1] + adjacent_index[1] + ] + except IndexError: + # At image border, don't count image borders + continue + if adjacent_pixel == pixel: + continue + + is_border = True + if adjacent_pixel != 0: + is_occupied = True + + if is_border: + try: + n_border_pixels[pixel] += 1 + except KeyError: + n_border_pixels[pixel] = 1 + n_border_occupied_pixels[pixel] = 0 + if is_occupied: + n_border_occupied_pixels[pixel] += 1 + + result = {key: n_border_occupied_pixels[key] / n_border_pixels[key] for key in n_border_pixels.keys()} + + return result + + +if __name__ == '__main__': + import time + from spatialdata.datasets import blobs + # label_image = np.array([ + # [0, 0, 0, 0, 0, 0, 0, 0], + # [0, 1, 1, 1, 0, 0, 0, 0], + # [0, 1, 1, 1, 2, 2, 2, 0], + # [0, 1, 1, 1, 2, 2, 2, 0], + # [0, 0, 3, 3, 2, 2, 2, 0], + # [0, 0, 3, 3, 0, 0, 0, 0], + # [0, 0, 0, 0, 0, 0, 0, 0], + # ]) + label_image = blobs(length=512).labels["blobs_labels"].values + + start_time = time.perf_counter() + actual = border_occupied_factor(label_image) + end_time = time.perf_counter() + print(end_time - start_time) + assert len(actual) == len(np.unique(label_image)) - 1 + # for idx, actual_value in enumerate(actual): + # assert actual_value == expected[idx] + + +# Copied from https://github.com/afermg/cp_measurements/blob/main/src/cp_measure/minimal/measuregranularity.py + + __doc__ = """\ MeasureGranularity ================== @@ -199,24 +290,29 @@ def granularity( back_pixels_mask = numpy.zeros_like(back_pixels) back_pixels_mask[back_mask == True] = back_pixels[back_mask == True] back_pixels = skimage.morphology.dilation(back_pixels_mask, footprint=footprint) - if image_sample_size < 1: - if pixels.ndim == 2: - i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) - # - # Make sure the mapping only references the index range of - # back_pixels. - # - i *= float(back_shape[0] - 1) / float(new_shape[0] - 1) - j *= float(back_shape[1] - 1) / float(new_shape[1] - 1) - back_pixels = scipy.ndimage.map_coordinates(back_pixels, (i, j), order=1) - else: - k, i, j = numpy.mgrid[ - 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] - ].astype(float) - k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) - i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) - j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) - back_pixels = scipy.ndimage.map_coordinates(back_pixels, (k, i, j), order=1) + try: + if image_sample_size < 1: + if pixels.ndim == 2: + i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) + # + # Make sure the mapping only references the index range of + # back_pixels. + # + i *= float(back_shape[0] - 1) / float(new_shape[0] - 1) + j *= float(back_shape[1] - 1) / float(new_shape[1] - 1) + back_pixels = scipy.ndimage.map_coordinates(back_pixels, (i, j), order=1) + else: + k, i, j = numpy.mgrid[ + 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] + ].astype(float) + k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) + i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) + j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) + back_pixels = scipy.ndimage.map_coordinates(back_pixels, (k, i, j), order=1) + # TODO Debug the reason for the ZeroDivisionError when using MIBI-TOF dataeset + # from https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks/examples/technology_mibitof.html + except ZeroDivisionError: + return -1 pixels -= back_pixels pixels[pixels < 0] = 0 @@ -262,23 +358,30 @@ def granularity( # original image so we can match against object labels # orig_shape = pixels.shape - if pixels.ndim == 2: - i, j = numpy.mgrid[0 : orig_shape[0], 0 : orig_shape[1]].astype(float) - # - # Make sure the mapping only references the index range of - # back_pixels. - # - i *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) - j *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) - rec = scipy.ndimage.map_coordinates(rec, (i, j), order=1) - else: - k, i, j = numpy.mgrid[ - 0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2] - ].astype(float) - k *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) - i *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) - j *= float(new_shape[2] - 1) / float(orig_shape[2] - 1) - rec = scipy.ndimage.map_coordinates(rec, (k, i, j), order=1) + try: + if pixels.ndim == 2: + i, j = numpy.mgrid[0 : orig_shape[0], 0 : orig_shape[1]].astype(float) + # + # Make sure the mapping only references the index range of + # back_pixels. + # + i *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) + j *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) + rec = scipy.ndimage.map_coordinates(rec, (i, j), order=1) + else: + k, i, j = numpy.mgrid[ + 0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2] + ].astype(float) + k *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) + i *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) + j *= float(new_shape[2] - 1) / float(orig_shape[2] - 1) + rec = scipy.ndimage.map_coordinates(rec, (k, i, j), order=1) + + # TODO Debug the reason for the ZeroDivisionError when using MIBI-TOF dataeset + # from https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks/examples/technology_mibitof.html + except ZeroDivisionError: + return -1 + # TODO check if this is necessary # Calculate the means for the objects # diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 1c8e47238..7f2e79a8b 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -1,5 +1,7 @@ from __future__ import annotations +import time + import numpy as np import numpy.testing as npt import pandas as pd @@ -8,9 +10,12 @@ import squidpy as sq from spatialdata import SpatialData from spatialdata.datasets import blobs, raccoon +import spatialdata as sd # noinspection PyProtectedMember from squidpy.im._feature import _get_region_props +# noinspection PyProtectedMember +from squidpy.im._measurements import border_occupied_factor @pytest.fixture @@ -35,6 +40,16 @@ def test_label_bug_present(self, sdata_blobs): class TestMorphology: + def test_quantify_morphology_border_occupied_factor(self, sdata_blobs): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + methods=["label", "border_occupied_factor"], + split_by_channels=True, + ) + assert "border_occupied_factor" in sdata_blobs["table"].obsm["morphology"].columns + def test__get_region_props(self, sdata_blobs): region_props = _get_region_props( label_element=sdata_blobs["blobs_labels"], @@ -48,7 +63,7 @@ def test_quantify_morphology_callables(self, sdata_blobs): sdata=sdata_blobs, label="blobs_labels", image="blobs_image", - methods=["label", "area", "eccentricity", "circularity", "granularity"], + methods=["label", "area", "eccentricity", "circularity", "granularity", "border_occupied_factor"], split_by_channels=True, ) @@ -58,3 +73,50 @@ def test_quantify_morphology_callables(self, sdata_blobs): assert "granularity_ch0" in sdata_blobs["table"].obsm["morphology"].columns assert "granularity_ch1" in sdata_blobs["table"].obsm["morphology"].columns assert "granularity_ch2" in sdata_blobs["table"].obsm["morphology"].columns + assert "border_occupied_factor" in sdata_blobs["table"].obsm["morphology"].columns + + +@pytest.fixture +def sdata_merfish() -> SpatialData: + zarr_path = "./data/merfish.zarr" + return sd.read_zarr(zarr_path) + + +@pytest.fixture +def sdata_mibitof() -> SpatialData: + zarr_path = "./data/mibitof.zarr" + return sd.read_zarr(zarr_path) + + +class TestMorphologyPerformance: + def test_performance(self, sdata_mibitof): + start_time = time.perf_counter() + sq.im.quantify_morphology( + sdata=sdata_mibitof, + label="point8_labels", + image="point8_image", + methods=["label", "area", "eccentricity", "circularity", "granularity"], + split_by_channels=True, + ) + end_time = time.perf_counter() + assert end_time - start_time < 10 + + +class TestMeasurements: + def test_border_occupied_factor(self): + label_image = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 2, 2, 2, 0], + [0, 1, 1, 1, 2, 2, 2, 0], + [0, 0, 3, 3, 2, 2, 2, 0], + [0, 0, 3, 3, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + ]) + + expected = [3/8, 3/8, 2/4] + actual = border_occupied_factor(label_image) + assert len(actual) == len(expected) + for idx, actual_value in enumerate(actual): + assert actual_value == expected[idx] + From ebc08d8f1ab6129f60d950dd1d8cbb70c179800b Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Fri, 30 Aug 2024 00:06:41 +0200 Subject: [PATCH 09/20] add sanity checks and make multiple coordinate systems work --- src/squidpy/im/_feature.py | 55 ++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index ef94182c4..2cb2d39eb 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -260,9 +260,38 @@ def quantify_morphology( if element is not None and element not in sdata: raise KeyError(f"Key `{element}` not found in `sdata`.") + table_key = kwargs.get("table_key", None) + if table_key is None: + tables = sd.get_element_annotators(sdata, label) + if len(tables) > 1: + raise ValueError( + f"Multiple tables detected in `sdata` for {label}, " + f"please specify a specific table with the `table_key` parameter" + ) + table_key = next(iter(tables)) + + region_key = sdata[table_key].uns["spatialdata_attrs"]["region_key"] + if not np.any(sdata[table_key].obs[region_key] == label): + raise ValueError(f"Label {label} not found in region key ({region_key}) column of sdata table `{table_key}`") + + instance_key = sdata[table_key].uns["spatialdata_attrs"]["instance_key"] + + label_transform = sdata[label].transform + image_transform = sdata[image].transform + + for transform in [label_transform, image_transform]: + if len(transform) != 1: + raise ValueError("More than one coordinate system detected") + + coord_sys_label = next(iter(label_transform)) + coord_sys_image = next(iter(image_transform)) + + if coord_sys_label != coord_sys_image: + raise ValueError(f"Coordinate system do not match! label: {coord_sys_label}, image: {coord_sys_image}") + # from here on we should be certain that we have a label - label_element = sdata[label] - image_element = sdata[image] if image is not None else None + label_element = sdata.transform_element_to_coordinate_system(label, coord_sys_label) + image_element = sdata.transform_element_to_coordinate_system(image, coord_sys_image) if image is not None else None region_props = _get_region_props( label_element, @@ -282,20 +311,18 @@ def quantify_morphology( region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] region_props.drop(columns=[col], inplace=True) - tables = sd.get_element_annotators(sdata, label) - if len(tables) > 1: - warnings.warn( - f"Multiple tables detected in `sdata` matching , {label}" - "using first table to store regionprops from sq.im.quantify_morphology", - stacklevel=1, - ) - table_key = next(iter(tables)) + region_props.rename(columns={"label": instance_key}, inplace=True) + region_props[region_key] = label + region_props.set_index([region_key, instance_key], inplace=True) + + results = sdata[table_key].obs[[region_key, instance_key]] + results = results.join(region_props, how="left", on=[region_key, instance_key]) - region_props = region_props.set_index("label", drop=True) - region_props.index.name = None - region_props.index = region_props.index.astype(str) + # region_props = region_props.set_index("label", drop=True) + # region_props.index.name = None + # region_props.index = region_props.index.astype(str) - sdata[table_key].obsm["morphology"] = region_props + sdata[table_key].obsm["morphology"] = results return region_props From 50d58d27a3031b1233a961c3874c00bf6faf8e5f Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Fri, 30 Aug 2024 00:16:45 +0200 Subject: [PATCH 10/20] fix assertion to accommodate new interface --- tests/image/test_morphology.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 7f2e79a8b..24b7e66fe 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -114,9 +114,6 @@ def test_border_occupied_factor(self): [0, 0, 0, 0, 0, 0, 0, 0], ]) - expected = [3/8, 3/8, 2/4] + expected = {1: 3/8, 2: 3/8, 3: 2/4} actual = border_occupied_factor(label_image) - assert len(actual) == len(expected) - for idx, actual_value in enumerate(actual): - assert actual_value == expected[idx] - + assert actual == expected From 328ba164ac22878e2d0b7f3830f194b2d5f40473 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 29 Aug 2024 22:17:20 +0000 Subject: [PATCH 11/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/im/_feature.py | 9 ++---- src/squidpy/im/_measurements.py | 54 +++++++++------------------------ tests/image/test_morphology.py | 27 +++++++++-------- 3 files changed, 33 insertions(+), 57 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 2cb2d39eb..4df70366b 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -19,8 +19,8 @@ from squidpy._docs import d, inject_docs from squidpy._utils import Signal, SigQueue, _get_n_cores, parallelize from squidpy.gr._utils import _save_data -from squidpy.im._container import ImageContainer from squidpy.im import _measurements +from squidpy.im._container import ImageContainer __all__ = ["calculate_image_features", "quantify_morphology"] @@ -30,9 +30,7 @@ [IntegerNDArrayType, FloatNDArrayType], Union[Union[int, float, list[Union[int, float]]]] ] -RegionPropsImageCallableType = Callable[ - [IntegerNDArrayType, FloatNDArrayType], dict[str, Union[int, float]] -] +RegionPropsImageCallableType = Callable[[IntegerNDArrayType, FloatNDArrayType], dict[str, Union[int, float]]] def circularity(regionmask: IntegerNDArrayType) -> float: @@ -71,10 +69,9 @@ def _get_region_props( image_extra_methods = [ _measurements.border_occupied_factor # <--- Add additional measurements here that calculate on the entire label image - ] # type: list[RegionPropsImageCallableType] + ] # type: list[RegionPropsImageCallableType] image_extra_methods = {method.__name__: method for method in image_extra_methods} - # can't use regionprops_table because it only returns int regions = regionprops( label_image=label_element.values, diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py index a423753e0..fbcf32324 100644 --- a/src/squidpy/im/_measurements.py +++ b/src/squidpy/im/_measurements.py @@ -1,13 +1,12 @@ +from __future__ import annotations + import numpy import numpy as np import scipy.ndimage import skimage.morphology -def border_occupied_factor( - mask: numpy.ndarray, - *args, - **kwargs -) -> dict[int, float]: + +def border_occupied_factor(mask: numpy.ndarray, *args, **kwargs) -> dict[int, float]: """ Calculates the percentage of border pixels that are in a 4-connected neighborhood of another label Takes ~1.7s/megapixels to calculate @@ -41,10 +40,7 @@ def border_occupied_factor( for adjacent_index in adjacent_indices: try: - adjacent_pixel = mask[ - coordinates[0] + adjacent_index[0], - coordinates[1] + adjacent_index[1] - ] + adjacent_pixel = mask[coordinates[0] + adjacent_index[0], coordinates[1] + adjacent_index[1]] except IndexError: # At image border, don't count image borders continue @@ -69,9 +65,11 @@ def border_occupied_factor( return result -if __name__ == '__main__': +if __name__ == "__main__": import time + from spatialdata.datasets import blobs + # label_image = np.array([ # [0, 0, 0, 0, 0, 0, 0, 0], # [0, 1, 1, 1, 0, 0, 0, 0], @@ -234,19 +232,11 @@ def granularity( if subsample_size < 1: new_shape = new_shape * subsample_size if pixels.ndim == 2: - i, j = ( - numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) - / subsample_size - ) + i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) / subsample_size pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 else: - k, i, j = ( - numpy.mgrid[ - 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] - ].astype(float) - / subsample_size - ) + k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 else: @@ -258,23 +248,13 @@ def granularity( if image_sample_size < 1: back_shape = new_shape * image_sample_size if pixels.ndim == 2: - i, j = ( - numpy.mgrid[0 : back_shape[0], 0 : back_shape[1]].astype(float) - / image_sample_size - ) + i, j = numpy.mgrid[0 : back_shape[0], 0 : back_shape[1]].astype(float) / image_sample_size back_pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 else: - k, i, j = ( - numpy.mgrid[ - 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] - ].astype(float) - / subsample_size - ) + k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size back_pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) - back_mask = ( - scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 - ) + back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 else: back_pixels = pixels back_mask = mask @@ -302,9 +282,7 @@ def granularity( j *= float(back_shape[1] - 1) / float(new_shape[1] - 1) back_pixels = scipy.ndimage.map_coordinates(back_pixels, (i, j), order=1) else: - k, i, j = numpy.mgrid[ - 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] - ].astype(float) + k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) @@ -369,9 +347,7 @@ def granularity( j *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) rec = scipy.ndimage.map_coordinates(rec, (i, j), order=1) else: - k, i, j = numpy.mgrid[ - 0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2] - ].astype(float) + k, i, j = numpy.mgrid[0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2]].astype(float) k *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) i *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) j *= float(new_shape[2] - 1) / float(orig_shape[2] - 1) diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 24b7e66fe..3f48c67d2 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -7,13 +7,14 @@ import pandas as pd import pytest import skimage.measure +import spatialdata as sd import squidpy as sq from spatialdata import SpatialData from spatialdata.datasets import blobs, raccoon -import spatialdata as sd # noinspection PyProtectedMember from squidpy.im._feature import _get_region_props + # noinspection PyProtectedMember from squidpy.im._measurements import border_occupied_factor @@ -104,16 +105,18 @@ def test_performance(self, sdata_mibitof): class TestMeasurements: def test_border_occupied_factor(self): - label_image = np.array([ - [0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 1, 1, 0, 0, 0, 0], - [0, 1, 1, 1, 2, 2, 2, 0], - [0, 1, 1, 1, 2, 2, 2, 0], - [0, 0, 3, 3, 2, 2, 2, 0], - [0, 0, 3, 3, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0], - ]) - - expected = {1: 3/8, 2: 3/8, 3: 2/4} + label_image = np.array( + [ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 2, 2, 2, 0], + [0, 1, 1, 1, 2, 2, 2, 0], + [0, 0, 3, 3, 2, 2, 2, 0], + [0, 0, 3, 3, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + ] + ) + + expected = {1: 3 / 8, 2: 3 / 8, 3: 2 / 4} actual = border_occupied_factor(label_image) assert actual == expected From f7b52708de6065a6374d66af22b3e54e18b5b34b Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Tue, 3 Sep 2024 14:57:12 +0200 Subject: [PATCH 12/20] add possibility for granularity to return multiple values per channel and label --- src/squidpy/im/_feature.py | 62 +++++++++++++++++++-------- src/squidpy/im/_measurements.py | 64 ++++++++++----------------- tests/image/test_morphology.py | 76 ++++++++++++++++++++++++--------- 3 files changed, 123 insertions(+), 79 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 2cb2d39eb..9eaeda421 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -1,5 +1,6 @@ from __future__ import annotations +import typing import warnings from collections.abc import Generator, Mapping, Sequence from types import MappingProxyType @@ -19,8 +20,8 @@ from squidpy._docs import d, inject_docs from squidpy._utils import Signal, SigQueue, _get_n_cores, parallelize from squidpy.gr._utils import _save_data -from squidpy.im._container import ImageContainer from squidpy.im import _measurements +from squidpy.im._container import ImageContainer __all__ = ["calculate_image_features", "quantify_morphology"] @@ -30,9 +31,7 @@ [IntegerNDArrayType, FloatNDArrayType], Union[Union[int, float, list[Union[int, float]]]] ] -RegionPropsImageCallableType = Callable[ - [IntegerNDArrayType, FloatNDArrayType], dict[str, Union[int, float]] -] +RegionPropsImageCallableType = Callable[[IntegerNDArrayType, FloatNDArrayType], dict[str, Union[int, float]]] def circularity(regionmask: IntegerNDArrayType) -> float: @@ -71,10 +70,9 @@ def _get_region_props( image_extra_methods = [ _measurements.border_occupied_factor # <--- Add additional measurements here that calculate on the entire label image - ] # type: list[RegionPropsImageCallableType] + ] # type: list[RegionPropsImageCallableType] image_extra_methods = {method.__name__: method for method in image_extra_methods} - # can't use regionprops_table because it only returns int regions = regionprops( label_image=label_element.values, @@ -260,19 +258,11 @@ def quantify_morphology( if element is not None and element not in sdata: raise KeyError(f"Key `{element}` not found in `sdata`.") - table_key = kwargs.get("table_key", None) - if table_key is None: - tables = sd.get_element_annotators(sdata, label) - if len(tables) > 1: - raise ValueError( - f"Multiple tables detected in `sdata` for {label}, " - f"please specify a specific table with the `table_key` parameter" - ) - table_key = next(iter(tables)) + table_key = _get_table_key(sdata, label, kwargs) region_key = sdata[table_key].uns["spatialdata_attrs"]["region_key"] if not np.any(sdata[table_key].obs[region_key] == label): - raise ValueError(f"Label {label} not found in region key ({region_key}) column of sdata table `{table_key}`") + raise ValueError(f"Label '{label}' not found in region key ({region_key}) column of sdata table '{table_key}'") instance_key = sdata[table_key].uns["spatialdata_attrs"]["instance_key"] @@ -304,12 +294,32 @@ def quantify_morphology( channels = image_element.c.values for col in region_props.columns: # did the method return a list of values? - if isinstance(region_props[col].values[0], (list, tuple, np.ndarray)): + if isinstance(region_props[col].values[0], (list, tuple)): # are all lists of the length of the channel list? if all(len(val) == len(channels) for val in region_props[col].values): for i, channel in enumerate(channels): region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] region_props.drop(columns=[col], inplace=True) + if isinstance(region_props[col].values[0], np.ndarray): + shape = region_props[col].values[0].shape + + if not all(val.shape == shape for val in region_props[col].values): + raise ValueError( + f"The results of the morphology method {col} have different shapes, " f"this cannot be handled" + ) + if not shape[1] == len(channels): + raise ValueError( + f"Number of channels {len(channels)} do not match " + f"the shape of the returned numpy arrays {shape} of the morphology method {col}. " + f"It is expected that shape[1] should be equal to number of channels." + ) + + for channel_idx, channel in enumerate(channels): + for prop_idx in range(shape[0]): + region_props[f"{col}_ch{channel_idx}_{prop_idx}"] = [ + val[prop_idx, channel_idx] for val in region_props[col].values + ] + region_props.drop(columns=[col], inplace=True) region_props.rename(columns={"label": instance_key}, inplace=True) region_props[region_key] = label @@ -327,6 +337,24 @@ def quantify_morphology( return region_props +def _get_table_key(sdata: sd.SpatialData, label: str, kwargs: dict[str, typing.Any]) -> str: + table_key = kwargs.get("table_key", None) + if table_key is None: + tables = sd.get_element_annotators(sdata, label) + if len(tables) > 1: + raise ValueError( + f"Multiple tables detected in `sdata` for {label}, " + f"please specify a specific table with the `table_key` parameter" + ) + if len(tables) == 0: + raise ValueError( + f"No tables automatically detected in `sdata` for {label}, " + f"please specify a specific table with the `table_key` parameter" + ) + table_key = next(iter(tables)) + return table_key + + def _calculate_image_features_helper( obs_ids: Sequence[str], adata: AnnData, diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py index a423753e0..29aa8e6b1 100644 --- a/src/squidpy/im/_measurements.py +++ b/src/squidpy/im/_measurements.py @@ -1,13 +1,12 @@ +from __future__ import annotations + import numpy import numpy as np import scipy.ndimage import skimage.morphology -def border_occupied_factor( - mask: numpy.ndarray, - *args, - **kwargs -) -> dict[int, float]: + +def border_occupied_factor(mask: numpy.ndarray, *args, **kwargs) -> dict[int, float]: """ Calculates the percentage of border pixels that are in a 4-connected neighborhood of another label Takes ~1.7s/megapixels to calculate @@ -41,10 +40,7 @@ def border_occupied_factor( for adjacent_index in adjacent_indices: try: - adjacent_pixel = mask[ - coordinates[0] + adjacent_index[0], - coordinates[1] + adjacent_index[1] - ] + adjacent_pixel = mask[coordinates[0] + adjacent_index[0], coordinates[1] + adjacent_index[1]] except IndexError: # At image border, don't count image borders continue @@ -69,9 +65,11 @@ def border_occupied_factor( return result -if __name__ == '__main__': +if __name__ == "__main__": import time + from spatialdata.datasets import blobs + # label_image = np.array([ # [0, 0, 0, 0, 0, 0, 0, 0], # [0, 1, 1, 1, 0, 0, 0, 0], @@ -167,7 +165,7 @@ def granularity( subsample_size: float = 0.25, image_sample_size: float = 0.25, element_size: int = 10, - granular_spectrum_length: int = 1, # default 16 + granular_spectrum_length: int = 16, # default 16 ): """ Parameters @@ -234,19 +232,11 @@ def granularity( if subsample_size < 1: new_shape = new_shape * subsample_size if pixels.ndim == 2: - i, j = ( - numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) - / subsample_size - ) + i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) / subsample_size pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 else: - k, i, j = ( - numpy.mgrid[ - 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] - ].astype(float) - / subsample_size - ) + k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 else: @@ -258,23 +248,13 @@ def granularity( if image_sample_size < 1: back_shape = new_shape * image_sample_size if pixels.ndim == 2: - i, j = ( - numpy.mgrid[0 : back_shape[0], 0 : back_shape[1]].astype(float) - / image_sample_size - ) + i, j = numpy.mgrid[0 : back_shape[0], 0 : back_shape[1]].astype(float) / image_sample_size back_pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 else: - k, i, j = ( - numpy.mgrid[ - 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] - ].astype(float) - / subsample_size - ) + k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size back_pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) - back_mask = ( - scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 - ) + back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 else: back_pixels = pixels back_mask = mask @@ -302,9 +282,7 @@ def granularity( j *= float(back_shape[1] - 1) / float(new_shape[1] - 1) back_pixels = scipy.ndimage.map_coordinates(back_pixels, (i, j), order=1) else: - k, i, j = numpy.mgrid[ - 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] - ].astype(float) + k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) @@ -312,7 +290,7 @@ def granularity( # TODO Debug the reason for the ZeroDivisionError when using MIBI-TOF dataeset # from https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks/examples/technology_mibitof.html except ZeroDivisionError: - return -1 + return _handle_granularity_error(granular_spectrum_length) pixels -= back_pixels pixels[pixels < 0] = 0 @@ -369,9 +347,7 @@ def granularity( j *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) rec = scipy.ndimage.map_coordinates(rec, (i, j), order=1) else: - k, i, j = numpy.mgrid[ - 0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2] - ].astype(float) + k, i, j = numpy.mgrid[0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2]].astype(float) k *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) i *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) j *= float(new_shape[2] - 1) / float(orig_shape[2] - 1) @@ -380,7 +356,7 @@ def granularity( # TODO Debug the reason for the ZeroDivisionError when using MIBI-TOF dataeset # from https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks/examples/technology_mibitof.html except ZeroDivisionError: - return -1 + return _handle_granularity_error(granular_spectrum_length) # TODO check if this is necessary # Calculate the means for the objects @@ -407,3 +383,7 @@ def granularity( return results[0] else: return results + + +def _handle_granularity_error(granular_spectrum_length: int): + return [None for _ in range(granular_spectrum_length)] diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 24b7e66fe..dd66b3daa 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -7,13 +7,14 @@ import pandas as pd import pytest import skimage.measure +import spatialdata as sd import squidpy as sq from spatialdata import SpatialData from spatialdata.datasets import blobs, raccoon -import spatialdata as sd # noinspection PyProtectedMember from squidpy.im._feature import _get_region_props + # noinspection PyProtectedMember from squidpy.im._measurements import border_occupied_factor @@ -28,6 +29,11 @@ def sdata_blobs() -> SpatialData: return blobs() +@pytest.fixture +def morphology_methods() -> list[str]: + return ["label", "area", "eccentricity", "circularity", "granularity", "border_occupied_factor"] + + @pytest.mark.xfail( raises=AssertionError, reason="calling skimage.measure.label on segmentation produces additional labels" ) @@ -40,6 +46,22 @@ def test_label_bug_present(self, sdata_blobs): class TestMorphology: + def test_quantify_morphology_granularity(self, sdata_blobs): + granular_spectrum_length = 16 + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + methods=["label", "granularity"], + split_by_channels=True, + granularity_granular_spectrum_length=granular_spectrum_length, + ) + + columns = sdata_blobs["table"].obsm["morphology"].columns + for channel in range(3): + for granular_spectrum in range(granular_spectrum_length): + assert f"granularity_ch{channel}_{granular_spectrum}" in columns + def test_quantify_morphology_border_occupied_factor(self, sdata_blobs): sq.im.quantify_morphology( sdata=sdata_blobs, @@ -58,30 +80,42 @@ def test__get_region_props(self, sdata_blobs): ) assert len(region_props) == len(sdata_blobs["table"].obs) - def test_quantify_morphology_callables(self, sdata_blobs): + def test_quantify_morphology_callables(self, sdata_blobs, morphology_methods): sq.im.quantify_morphology( sdata=sdata_blobs, label="blobs_labels", image="blobs_image", - methods=["label", "area", "eccentricity", "circularity", "granularity", "border_occupied_factor"], + methods=morphology_methods, split_by_channels=True, ) assert "morphology" in sdata_blobs["table"].obsm.keys() assert isinstance(sdata_blobs["table"].obsm["morphology"], pd.DataFrame) assert "circularity" in sdata_blobs["table"].obsm["morphology"].columns - assert "granularity_ch0" in sdata_blobs["table"].obsm["morphology"].columns - assert "granularity_ch1" in sdata_blobs["table"].obsm["morphology"].columns - assert "granularity_ch2" in sdata_blobs["table"].obsm["morphology"].columns assert "border_occupied_factor" in sdata_blobs["table"].obsm["morphology"].columns + @pytest.mark.xfail( + raises=ValueError, + reason="For the moment, there is no association " "between the multiscale labels and a table in blobs.", + ) + def test_quantify_morphology_multiscale(self, sdata_blobs, morphology_methods): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_multiscale_labels", + image="blobs_multiscale_image", + methods=morphology_methods, + split_by_channels=True, + ) + +# TODO Remove for release @pytest.fixture def sdata_merfish() -> SpatialData: zarr_path = "./data/merfish.zarr" return sd.read_zarr(zarr_path) +# TODO Remove for release @pytest.fixture def sdata_mibitof() -> SpatialData: zarr_path = "./data/mibitof.zarr" @@ -89,31 +123,33 @@ def sdata_mibitof() -> SpatialData: class TestMorphologyPerformance: - def test_performance(self, sdata_mibitof): + def test_performance(self, sdata_mibitof, morphology_methods): start_time = time.perf_counter() sq.im.quantify_morphology( sdata=sdata_mibitof, label="point8_labels", image="point8_image", - methods=["label", "area", "eccentricity", "circularity", "granularity"], + methods=morphology_methods, split_by_channels=True, ) end_time = time.perf_counter() - assert end_time - start_time < 10 + assert end_time - start_time < 60 class TestMeasurements: def test_border_occupied_factor(self): - label_image = np.array([ - [0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 1, 1, 0, 0, 0, 0], - [0, 1, 1, 1, 2, 2, 2, 0], - [0, 1, 1, 1, 2, 2, 2, 0], - [0, 0, 3, 3, 2, 2, 2, 0], - [0, 0, 3, 3, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0], - ]) - - expected = {1: 3/8, 2: 3/8, 3: 2/4} + label_image = np.array( + [ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 2, 2, 2, 0], + [0, 1, 1, 1, 2, 2, 2, 0], + [0, 0, 3, 3, 2, 2, 2, 0], + [0, 0, 3, 3, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + ] + ) + + expected = {1: 3 / 8, 2: 3 / 8, 3: 2 / 4} actual = border_occupied_factor(label_image) assert actual == expected From 112d1a964c5098bb7be4222e0bbbccc3d15da185 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 12:58:32 +0000 Subject: [PATCH 13/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/image/test_morphology.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index dd66b3daa..7f6a11d79 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -8,10 +8,11 @@ import pytest import skimage.measure import spatialdata as sd -import squidpy as sq from spatialdata import SpatialData from spatialdata.datasets import blobs, raccoon +import squidpy as sq + # noinspection PyProtectedMember from squidpy.im._feature import _get_region_props From 8047ddec011f96b775ec0d0772d78577649a54ec Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Thu, 5 Sep 2024 15:13:08 +0200 Subject: [PATCH 14/20] add "all" option for regionprops through using methods=None --- src/squidpy/im/_feature.py | 84 +++++++++++++++++++++++---------- src/squidpy/im/_measurements.py | 35 +++++++++++++- tests/image/test_morphology.py | 14 ++++++ 3 files changed, 108 insertions(+), 25 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 9eaeda421..89bfd14c4 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -221,21 +221,16 @@ def calculate_image_features( def quantify_morphology( sdata: SpatialData, - label: str | None = None, - image: str | None = None, + label: str, + image: str, methods: list[str | Callable] | None = None, - split_by_channels: bool = False, + split_by_channels: bool = True, **kwargs: Any, ) -> pd.DataFrame | None: - if label is None and image is None: - raise ValueError("Either `label` or `image` must be provided.") - - if image is not None and label is None: - raise ValueError("If `image` is provided, a `label` with matching segmentation borders must be provided.") - if methods is None: # default case but without mutable argument as default value - methods = ["label", "area", "eccentricity", "perimeter", "sphericity"] + # noinspection PyProtectedMember + methods = _measurements._all_regionprops_names() elif isinstance(methods, (str, Callable)): methods = [methods] @@ -246,7 +241,7 @@ def quantify_morphology( raise ValueError("All elements in `methods` must be strings or callables.") if "label" not in methods: - methods = ["label"].extend(methods) + methods.append("label") extra_methods = [] for method in methods: @@ -293,33 +288,74 @@ def quantify_morphology( if split_by_channels: channels = image_element.c.values for col in region_props.columns: + if isinstance(region_props[col].values[0], (int, float, np.integer, np.floating)): + continue # ignore single value returns + + is_processed = False + # did the method return a list of values? if isinstance(region_props[col].values[0], (list, tuple)): # are all lists of the length of the channel list? - if all(len(val) == len(channels) for val in region_props[col].values): + length = len(region_props[col].values[0]) + if not all(len(val) == length for val in region_props[col].values): + raise ValueError( + f"The results of the morphology method {col} have different lengths, this cannot be handled" + ) + + if length == len(channels): for i, channel in enumerate(channels): region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] - region_props.drop(columns=[col], inplace=True) + is_processed = True + + if length != len(channels): + for prop_idx in range(length): + region_props[f"{col}_{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + if isinstance(region_props[col].values[0], np.ndarray): shape = region_props[col].values[0].shape if not all(val.shape == shape for val in region_props[col].values): raise ValueError( - f"The results of the morphology method {col} have different shapes, " f"this cannot be handled" - ) - if not shape[1] == len(channels): - raise ValueError( - f"Number of channels {len(channels)} do not match " - f"the shape of the returned numpy arrays {shape} of the morphology method {col}. " - f"It is expected that shape[1] should be equal to number of channels." + f"The results of the morphology method {col} have different shapes, this cannot be handled" ) - for channel_idx, channel in enumerate(channels): + # Handle cases like centroids which return coordinates for each region + if len(shape) == 1 and shape[0] != len(channels): for prop_idx in range(shape[0]): - region_props[f"{col}_ch{channel_idx}_{prop_idx}"] = [ - val[prop_idx, channel_idx] for val in region_props[col].values - ] + region_props[f"{col}_{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + + # Handle cases like intensity which return one value per channel for each region + if len(shape) == 1 and shape[0] == len(channels): + for prop_idx in range(shape[0]): + region_props[f"{col}_ch{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + + # Handle cases like granularity which return many values for each channel for each region + if len(shape) == 2: + if not shape[1] == len(channels): + raise ValueError( + f"Number of channels {len(channels)} do not match " + f"the shape of the returned numpy arrays {shape} of the morphology method {col}. " + f"It is expected that shape[1] should be equal to number of channels." + ) + + for channel_idx, channel in enumerate(channels): + for prop_idx in range(shape[0]): + region_props[f"{col}_ch{channel_idx}_{prop_idx}"] = [ + val[prop_idx, channel_idx] for val in region_props[col].values + ] + + is_processed = True + if is_processed: region_props.drop(columns=[col], inplace=True) + else: + raise NotImplementedError( + f"Result of morphology method '{col}' cannot be interpreted, " + f"as its dtype ({type(region_props[col].values[0])} and shape " + f"does not conform to the expected shapes and dtypes." + ) region_props.rename(columns={"label": instance_key}, inplace=True) region_props[region_key] = label diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py index 29aa8e6b1..a94c5b6f7 100644 --- a/src/squidpy/im/_measurements.py +++ b/src/squidpy/im/_measurements.py @@ -6,6 +6,40 @@ import skimage.morphology +def _all_regionprops_names() -> list[str]: + names = [ + "area", + "area_bbox", + "area_convex", + "area_filled", + "axis_major_length", + "axis_minor_length", + "centroid", # TODO might drop centroids + "centroid_local", + "centroid_weighted_local", + "eccentricity", + "equivalent_diameter_area", + "euler_number", + "extent", + "feret_diameter_max", + # "inertia_tensor", + "inertia_tensor_eigvals", + "intensity_max", + "intensity_min", + "intensity_mean", + "intensity_std", + # "moments", # TODO evaluate if more moments necessary + "moments_hu", + # "moments_normalized", + "num_pixels", + "orientation", + "perimeter", + "perimeter_crofton", + "solidity", + ] + return names + + def border_occupied_factor(mask: numpy.ndarray, *args, **kwargs) -> dict[int, float]: """ Calculates the percentage of border pixels that are in a 4-connected neighborhood of another label @@ -89,7 +123,6 @@ def border_occupied_factor(mask: numpy.ndarray, *args, **kwargs) -> dict[int, fl # for idx, actual_value in enumerate(actual): # assert actual_value == expected[idx] - # Copied from https://github.com/afermg/cp_measurements/blob/main/src/cp_measure/minimal/measuregranularity.py diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 7f6a11d79..5bd7ec1a2 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -12,6 +12,7 @@ from spatialdata.datasets import blobs, raccoon import squidpy as sq +from squidpy.im import _measurements # noinspection PyProtectedMember from squidpy.im._feature import _get_region_props @@ -95,6 +96,19 @@ def test_quantify_morphology_callables(self, sdata_blobs, morphology_methods): assert "circularity" in sdata_blobs["table"].obsm["morphology"].columns assert "border_occupied_factor" in sdata_blobs["table"].obsm["morphology"].columns + def test_quantify_morphology_all(self, sdata_blobs): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + split_by_channels=True, + ) + + for name in _measurements._all_regionprops_names(): + assert any([column.startswith(name) for column in sdata_blobs["table"].obsm["morphology"].columns]) + # for column in sdata_blobs["table"].obsm["morphology"].columns: + # assert any(column.startswith(name) for name in _measurements._all_regionprops_names()) + @pytest.mark.xfail( raises=ValueError, reason="For the moment, there is no association " "between the multiscale labels and a table in blobs.", From 39f9db81192412c13d16ce8f57f20715f6ac47f1 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Tue, 24 Sep 2024 15:41:49 +0200 Subject: [PATCH 15/20] add granularity and border_occupied_factor to _all_regionprops_names --- src/squidpy/im/_measurements.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py index a94c5b6f7..f92ab0a58 100644 --- a/src/squidpy/im/_measurements.py +++ b/src/squidpy/im/_measurements.py @@ -36,6 +36,8 @@ def _all_regionprops_names() -> list[str]: "perimeter", "perimeter_crofton", "solidity", + "border_occupied_factor", + "granularity", ] return names From 02d21c80e645ee7b10c2ea446ddaa64741183de1 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Tue, 8 Oct 2024 17:06:00 +0200 Subject: [PATCH 16/20] refactor quantify_morphology --- src/squidpy/im/_feature.py | 191 +++++++++++++++++---------------- tests/image/test_morphology.py | 78 +++++++++++++- 2 files changed, 175 insertions(+), 94 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 89bfd14c4..9eb74e39f 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -45,7 +45,7 @@ def circularity(regionmask: IntegerNDArrayType) -> float: if perim == 0: return 0 area = np.sum(regionmask) - return float((4 * np.pi * area) / (perim**2)) + return float((4 * np.pi * area) / (perim ** 2)) def _get_region_props( @@ -63,13 +63,16 @@ def _get_region_props( np_rgb_image = image_element.values.transpose(1, 2, 0) # (c, y, x) -> (y, x, c) # Add custom extra methods here + # Add additional measurements here that handle individual regions rp_extra_methods = [ circularity, - _measurements.granularity, # <--- Add additional measurements here that handle individual regions + _measurements.granularity, ] + extra_methods + # Add additional measurements here that calculate on the entire label image image_extra_methods = [ - _measurements.border_occupied_factor # <--- Add additional measurements here that calculate on the entire label image + _measurements.border_occupied_factor, + _measurements.zernike, ] # type: list[RegionPropsImageCallableType] image_extra_methods = {method.__name__: method for method in image_extra_methods} @@ -227,27 +230,7 @@ def quantify_morphology( split_by_channels: bool = True, **kwargs: Any, ) -> pd.DataFrame | None: - if methods is None: - # default case but without mutable argument as default value - # noinspection PyProtectedMember - methods = _measurements._all_regionprops_names() - elif isinstance(methods, (str, Callable)): - methods = [methods] - - if not isinstance(methods, list): - raise ValueError("Argument `methods` must be a list of strings.") - - if not all(isinstance(method, (str, Callable)) for method in methods): - raise ValueError("All elements in `methods` must be strings or callables.") - - if "label" not in methods: - methods.append("label") - - extra_methods = [] - for method in methods: - if callable(method): - extra_methods.append(method) - methods.remove(method) + extra_methods, methods = _validate_methods(methods) for element in [label, image]: if element is not None and element not in sdata: @@ -261,22 +244,7 @@ def quantify_morphology( instance_key = sdata[table_key].uns["spatialdata_attrs"]["instance_key"] - label_transform = sdata[label].transform - image_transform = sdata[image].transform - - for transform in [label_transform, image_transform]: - if len(transform) != 1: - raise ValueError("More than one coordinate system detected") - - coord_sys_label = next(iter(label_transform)) - coord_sys_image = next(iter(image_transform)) - - if coord_sys_label != coord_sys_image: - raise ValueError(f"Coordinate system do not match! label: {coord_sys_label}, image: {coord_sys_image}") - - # from here on we should be certain that we have a label - label_element = sdata.transform_element_to_coordinate_system(label, coord_sys_label) - image_element = sdata.transform_element_to_coordinate_system(image, coord_sys_image) if image is not None else None + image_element, label_element = _apply_transformations(image, label, sdata) region_props = _get_region_props( label_element, @@ -295,59 +263,10 @@ def quantify_morphology( # did the method return a list of values? if isinstance(region_props[col].values[0], (list, tuple)): - # are all lists of the length of the channel list? - length = len(region_props[col].values[0]) - if not all(len(val) == length for val in region_props[col].values): - raise ValueError( - f"The results of the morphology method {col} have different lengths, this cannot be handled" - ) - - if length == len(channels): - for i, channel in enumerate(channels): - region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] - is_processed = True - - if length != len(channels): - for prop_idx in range(length): - region_props[f"{col}_{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] - is_processed = True + is_processed = _extract_from_list_like(channels, col, is_processed, region_props) if isinstance(region_props[col].values[0], np.ndarray): - shape = region_props[col].values[0].shape - - if not all(val.shape == shape for val in region_props[col].values): - raise ValueError( - f"The results of the morphology method {col} have different shapes, this cannot be handled" - ) - - # Handle cases like centroids which return coordinates for each region - if len(shape) == 1 and shape[0] != len(channels): - for prop_idx in range(shape[0]): - region_props[f"{col}_{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] - is_processed = True - - # Handle cases like intensity which return one value per channel for each region - if len(shape) == 1 and shape[0] == len(channels): - for prop_idx in range(shape[0]): - region_props[f"{col}_ch{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] - is_processed = True - - # Handle cases like granularity which return many values for each channel for each region - if len(shape) == 2: - if not shape[1] == len(channels): - raise ValueError( - f"Number of channels {len(channels)} do not match " - f"the shape of the returned numpy arrays {shape} of the morphology method {col}. " - f"It is expected that shape[1] should be equal to number of channels." - ) - - for channel_idx, channel in enumerate(channels): - for prop_idx in range(shape[0]): - region_props[f"{col}_ch{channel_idx}_{prop_idx}"] = [ - val[prop_idx, channel_idx] for val in region_props[col].values - ] - - is_processed = True + is_processed = _extract_from_ndarray(channels, col, is_processed, region_props) if is_processed: region_props.drop(columns=[col], inplace=True) else: @@ -373,6 +292,96 @@ def quantify_morphology( return region_props +def _apply_transformations(image, label, sdata): + label_transform = sdata[label].transform + image_transform = sdata[image].transform + for transform in [label_transform, image_transform]: + if len(transform) != 1: + raise ValueError("More than one coordinate system detected") + coord_sys_label = next(iter(label_transform)) + coord_sys_image = next(iter(image_transform)) + if coord_sys_label != coord_sys_image: + raise ValueError(f"Coordinate system do not match! label: {coord_sys_label}, image: {coord_sys_image}") + # from here on we should be certain that we have a label + label_element = sdata.transform_element_to_coordinate_system(label, coord_sys_label) + image_element = sdata.transform_element_to_coordinate_system(image, coord_sys_image) if image is not None else None + return image_element, label_element + + +def _validate_methods(methods): + if methods is None: + # default case but without mutable argument as default value + # noinspection PyProtectedMember + methods = _measurements._all_regionprops_names() + elif isinstance(methods, (str, Callable)): + methods = [methods] + if not isinstance(methods, list): + raise ValueError("Argument `methods` must be a list of strings.") + if not all(isinstance(method, (str, Callable)) for method in methods): + raise ValueError("All elements in `methods` must be strings or callables.") + if "label" not in methods: + methods.append("label") + extra_methods = [] + for method in methods: + if callable(method): + extra_methods.append(method) + methods.remove(method) + return extra_methods, methods + + +def _extract_from_ndarray(channels, col, is_processed, region_props): + shape = region_props[col].values[0].shape + if not all(val.shape == shape for val in region_props[col].values): + raise ValueError( + f"The results of the morphology method {col} have different shapes, this cannot be handled" + ) + # Handle cases like centroids which return coordinates for each region + if len(shape) == 1 and shape[0] != len(channels): + for prop_idx in range(shape[0]): + region_props[f"{col}_{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + # Handle cases like intensity which return one value per channel for each region + if len(shape) == 1 and shape[0] == len(channels): + for prop_idx in range(shape[0]): + region_props[f"{col}_ch{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + # Handle cases like granularity which return many values for each channel for each region + if len(shape) == 2: + if not shape[1] == len(channels): + raise ValueError( + f"Number of channels {len(channels)} do not match " + f"the shape of the returned numpy arrays {shape} of the morphology method {col}. " + f"It is expected that shape[1] should be equal to number of channels." + ) + + for channel_idx, channel in enumerate(channels): + for prop_idx in range(shape[0]): + region_props[f"{col}_ch{channel_idx}_{prop_idx}"] = [ + val[prop_idx, channel_idx] for val in region_props[col].values + ] + + is_processed = True + return is_processed + + +def _extract_from_list_like(channels, col, is_processed, region_props): + # are all lists of the length of the channel list? + length = len(region_props[col].values[0]) + if not all(len(val) == length for val in region_props[col].values): + raise ValueError( + f"The results of the morphology method {col} have different lengths, this cannot be handled" + ) + if length == len(channels): + for i, channel in enumerate(channels): + region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] + is_processed = True + if length != len(channels): + for prop_idx in range(length): + region_props[f"{col}_{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + return is_processed + + def _get_table_key(sdata: sd.SpatialData, label: str, kwargs: dict[str, typing.Any]) -> str: table_key = kwargs.get("table_key", None) if table_key is None: diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 5bd7ec1a2..0b7c4367a 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -1,6 +1,7 @@ from __future__ import annotations import time +import typing import numpy as np import numpy.testing as npt @@ -15,7 +16,7 @@ from squidpy.im import _measurements # noinspection PyProtectedMember -from squidpy.im._feature import _get_region_props +from squidpy.im._feature import _get_region_props, _get_table_key # noinspection PyProtectedMember from squidpy.im._measurements import border_occupied_factor @@ -47,7 +48,79 @@ def test_label_bug_present(self, sdata_blobs): ) +def dummy_callable(): + pass + + +@pytest.fixture +def malformed_morphology_methods() -> dict[str, typing.Any]: + methods = { + "wrong_container": [ + ("label", "area"), + "label,area" + ], + "wrong_method_type": [ + ["test", dummy_callable, 42.42], + ["test", dummy_callable, 42] + ] + } + return methods + + class TestMorphology: + # @pytest.mark.parametrize( + # "sdata,methods", + # pytest.param( + # sdata_blobs, ("label", "area"), id="tuple" + # ), + # pytest.param( + # sdata_blobs, "label,area", id="string" + # ), + # ) + def test_sanity_method_list(self, sdata_blobs, malformed_morphology_methods): + with pytest.raises(ValueError, match="Argument `methods` must be a list of strings."): + for methods in malformed_morphology_methods["wrong_container"]: + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + methods=methods + ) + + # @pytest.mark.parametrize( + # "sdata,methods", + # pytest.param(sdata_blobs, ["test", dummy_callable, 42.42], id="float"), + # pytest.param(sdata_blobs, ["test", dummy_callable, 42], id="int"), + # ) + def test_sanity_method_list_types(self, sdata_blobs, malformed_morphology_methods): + with pytest.raises(ValueError, match="All elements in `methods` must be strings or callables."): + for methods in malformed_morphology_methods["wrong_method_type"]: + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + methods=methods + ) + + def test_get_table_key_no_annotators(self, sdata_blobs): + label = "blobs_multiscale_labels" + with pytest.raises(ValueError, match=f"No tables automatically detected in `sdata` for {label}"): + _get_table_key( + sdata=sdata_blobs, + label=label, + kwargs={} + ) + + def test_get_table_key_multiple_annotators(self, sdata_blobs): + sdata_blobs.tables["multi_table"] = sd.deepcopy(sdata_blobs["table"]) + label = "blobs_labels" + with pytest.raises(ValueError, match=f"Multiple tables detected in `sdata` for {label}"): + _get_table_key( + sdata=sdata_blobs, + label=label, + kwargs={} + ) + def test_quantify_morphology_granularity(self, sdata_blobs): granular_spectrum_length = 16 sq.im.quantify_morphology( @@ -56,7 +129,6 @@ def test_quantify_morphology_granularity(self, sdata_blobs): image="blobs_image", methods=["label", "granularity"], split_by_channels=True, - granularity_granular_spectrum_length=granular_spectrum_length, ) columns = sdata_blobs["table"].obsm["morphology"].columns @@ -144,7 +216,7 @@ def test_performance(self, sdata_mibitof, morphology_methods): sdata=sdata_mibitof, label="point8_labels", image="point8_image", - methods=morphology_methods, + methods=None, split_by_channels=True, ) end_time = time.perf_counter() From 59d8137ff29ce8d8b11305b41e939207bcc5d151 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Tue, 8 Oct 2024 17:06:13 +0200 Subject: [PATCH 17/20] add zernike --- pyproject.toml | 1 + src/squidpy/im/_measurements.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8cc85ffd2..898252a2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ dependencies = [ "xarray>=0.16.1", "zarr>=2.6.1", "spatialdata>=0.2.0", + "centrosome>=1.2.3" ] [project.optional-dependencies] diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py index f92ab0a58..3b141a9a5 100644 --- a/src/squidpy/im/_measurements.py +++ b/src/squidpy/im/_measurements.py @@ -1,5 +1,6 @@ from __future__ import annotations +import centrosome.zernike import numpy import numpy as np import scipy.ndimage @@ -27,7 +28,7 @@ def _all_regionprops_names() -> list[str]: "intensity_max", "intensity_min", "intensity_mean", - "intensity_std", + # "intensity_std", # TODO either bump version for skimage to >=0.23 for intensity_std to be included in regionprops or drop std # "moments", # TODO evaluate if more moments necessary "moments_hu", # "moments_normalized", @@ -38,6 +39,7 @@ def _all_regionprops_names() -> list[str]: "solidity", "border_occupied_factor", "granularity", + "zernike", ] return names @@ -422,3 +424,31 @@ def granularity( def _handle_granularity_error(granular_spectrum_length: int): return [None for _ in range(granular_spectrum_length)] + + +# Copied from https://github.com/afermg/cp_measure/blob/main/src/cp_measure/fast/measureobjectsizeshape.py +def zernike(masks: numpy.ndarray, pixels: numpy.ndarray, zernike_numbers: int = 9) -> dict[int, numpy.ndarray]: + unique_indices = numpy.unique(masks) + unique_indices = unique_indices[unique_indices != 0] + res = centrosome.zernike.zernike( + zernike_indexes=centrosome.zernike.get_zernike_indexes(zernike_numbers + 1), + labels=masks, + indexes=unique_indices, + ) + return {orig_idx: res[res_idx] for orig_idx, res_idx in zip(unique_indices, range(res.shape[0]))} + + # # + # # Zernike features + # # + # unique_indices = numpy.unique(masks) + # unique_indices = unique_indices[unique_indices>0] + # indices = list(range(1,len(unique_indices) + 1)) + # labels = masks + # zernike_numbers = centrosome.zernike.get_zernike_indexes(zernike_numbers + 1) + # + # zf_l = centrosome.zernike.zernike(zernike_numbers, labels, indices) + # results = {} + # for (n, m), z in zip(zernike_numbers, zf_l.transpose()): + # results[f"Zernike_{n}_{m}"] = z + + # return results From bc035d2678ac73f2681e64dffbb6f18e1d4d1fa1 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Thu, 14 Nov 2024 18:05:01 +0100 Subject: [PATCH 18/20] status update --- pyproject.toml | 3 +- src/squidpy/im/_feature.py | 34 ++- src/squidpy/im/_measurements.py | 421 +++++++++++++++++++++++++++----- tests/image/test_morphology.py | 266 +++++++++++++++++--- 4 files changed, 620 insertions(+), 104 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 898252a2e..d0254185d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,8 @@ dependencies = [ "xarray>=0.16.1", "zarr>=2.6.1", "spatialdata>=0.2.0", - "centrosome>=1.2.3" + "centrosome>=1.2.3", + "cp-measure>=0.1.4" ] [project.optional-dependencies] diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 9eb74e39f..02ed3a2bf 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -16,6 +16,7 @@ from skimage.measure import perimeter, regionprops from spatialdata import SpatialData +import squidpy._utils from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, SigQueue, _get_n_cores, parallelize @@ -45,7 +46,7 @@ def circularity(regionmask: IntegerNDArrayType) -> float: if perim == 0: return 0 area = np.sum(regionmask) - return float((4 * np.pi * area) / (perim ** 2)) + return float((4 * np.pi * area) / (perim**2)) def _get_region_props( @@ -67,6 +68,7 @@ def _get_region_props( rp_extra_methods = [ circularity, _measurements.granularity, + _measurements.radial_distribution, ] + extra_methods # Add additional measurements here that calculate on the entire label image @@ -130,6 +132,7 @@ def _subset_image_using_label( @d.dedent @inject_docs(f=ImageFeature) +@squidpy._utils.deprecated def calculate_image_features( adata: AnnData, img: ImageContainer, @@ -222,6 +225,23 @@ def calculate_image_features( _save_data(adata, attr="obsm", key=key_added, data=res, time=start) +def _sdata_image_features_helper( + adata: AnnData, + img: ImageContainer, + layer: str | None = None, + library_id: str | Sequence[str] | None = None, + features: str | Sequence[str] = ImageFeature.SUMMARY.s, + features_kwargs: Mapping[str, Mapping[str, Any]] = MappingProxyType({}), + key_added: str = "img_features", + copy: bool = False, + n_jobs: int | None = None, + backend: str = "loky", + show_progress_bar: bool = True, + **kwargs: Any, +) -> pd.DataFrame | None: + return None + + def quantify_morphology( sdata: SpatialData, label: str, @@ -332,9 +352,7 @@ def _validate_methods(methods): def _extract_from_ndarray(channels, col, is_processed, region_props): shape = region_props[col].values[0].shape if not all(val.shape == shape for val in region_props[col].values): - raise ValueError( - f"The results of the morphology method {col} have different shapes, this cannot be handled" - ) + raise ValueError(f"The results of the morphology method {col} have different shapes, this cannot be handled") # Handle cases like centroids which return coordinates for each region if len(shape) == 1 and shape[0] != len(channels): for prop_idx in range(shape[0]): @@ -343,6 +361,8 @@ def _extract_from_ndarray(channels, col, is_processed, region_props): # Handle cases like intensity which return one value per channel for each region if len(shape) == 1 and shape[0] == len(channels): for prop_idx in range(shape[0]): + if region_props[col]: + pass region_props[f"{col}_ch{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] is_processed = True # Handle cases like granularity which return many values for each channel for each region @@ -368,9 +388,7 @@ def _extract_from_list_like(channels, col, is_processed, region_props): # are all lists of the length of the channel list? length = len(region_props[col].values[0]) if not all(len(val) == length for val in region_props[col].values): - raise ValueError( - f"The results of the morphology method {col} have different lengths, this cannot be handled" - ) + raise ValueError(f"The results of the morphology method {col} have different lengths, this cannot be handled") if length == len(channels): for i, channel in enumerate(channels): region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] @@ -400,6 +418,7 @@ def _get_table_key(sdata: sd.SpatialData, label: str, kwargs: dict[str, typing.A return table_key +@squidpy._utils.deprecated def _calculate_image_features_helper( obs_ids: Sequence[str], adata: AnnData, @@ -437,6 +456,7 @@ def _calculate_image_features_helper( elif feature == ImageFeature.SUMMARY: res = crop.features_summary(layer=layer, **feature_kwargs) elif feature == ImageFeature.SEGMENTATION: + # TODO: Potential bug here, should be label_layer and intensity layer must be specified via feature_kwargs res = crop.features_segmentation(intensity_layer=layer, **feature_kwargs) elif feature == ImageFeature.CUSTOM: res = crop.features_custom(layer=layer, **feature_kwargs) diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py index 3b141a9a5..5d9d7d2c4 100644 --- a/src/squidpy/im/_measurements.py +++ b/src/squidpy/im/_measurements.py @@ -1,11 +1,17 @@ from __future__ import annotations +import typing +from typing import Protocol + +import centrosome.cpmorphology +import centrosome.propagate import centrosome.zernike -import numpy import numpy as np import scipy.ndimage import skimage.morphology +from squidpy.im import ImageContainer + def _all_regionprops_names() -> list[str]: names = [ @@ -40,18 +46,138 @@ def _all_regionprops_names() -> list[str]: "border_occupied_factor", "granularity", "zernike", + "radial_distribution", ] return names -def border_occupied_factor(mask: numpy.ndarray, *args, **kwargs) -> dict[int, float]: +# class CalcImageFeatureCallable(Protocol): +# def __call__(self, mask: np.ndarray, pixels: np.ndarray, **kwargs: dict[str, typing.Any]) -> dict: ... + + +def calculate_image_feature(feature: typing.Callable, mask: np.ndarray, pixels: np.ndarray, *args, **kwargs) -> dict: + result = {} + for label in np.unique(mask): + if label == 0: + continue + if _is_multichannel(pixels): + result[label] = {} + for channel in range(_get_n_channels(pixels)): + result[label][channel] = feature( + mask=mask, pixels=pixels[..., channel][mask[..., 0] == label], **kwargs + ) + else: + result[label] = feature(mask=mask, pixels=pixels[mask == label], **kwargs) + # result[label] = {quantile: np.quantile(pixels[mask == label], quantile) for quantile in quantiles} + + return result + + +def calculate_histogram(mask: np.ndarray, pixels: np.ndarray, **kwargs) -> dict: + """ + This is way too slow of an implementation + + + Parameters + ---------- + mask + pixels + kwargs + + Returns + ------- + + """ + bins = kwargs.get("bins", 10) + v_range = kwargs.get("v_range", None) + # if v_range is None, use whole-image range + if v_range is None: + v_range = np.min(pixels), np.max(pixels) + hist, _ = np.histogram( + pixels, + bins=bins, + range=v_range, + weights=None, + density=False, + ) + result = {str(i): count for i, count in enumerate(hist)} + return result + + +def calculate_quantiles(mask: np.ndarray, pixels: np.ndarray, **kwargs) -> dict[str, float]: + quantiles = kwargs.get("quantiles", [0.1, 0.5, 0.9]) + result = {str(quantile): np.quantile(pixels, quantile) for quantile in quantiles} + return result + + +# def calculate_quantiles(mask: np.ndarray, pixels: np.ndarray, *args, **kwargs) -> dict: +# quantiles = kwargs.get("quantiles", [0.1, 0.5, 0.9]) +# +# result = {} +# for label in np.unique(mask): +# if label == 0: +# continue +# if _is_multichannel(pixels): +# result[label] = {} +# for channel in range(_get_n_channels(pixels)): +# # result[label][channel] = _get_quantile(label, mask, pixels[..., channel], quantiles) +# result[label][channel] = {quantile: np.quantile(pixels[..., channel][mask[..., 0] == label], quantile) for quantile in quantiles} +# # current_pixels = pixels[mask == label] +# # result = {} +# # for quantile in quantiles: +# # result[quantile] = np.quantile(current_pixels, quantile) +# # return result +# else: +# result[label] = {quantile: np.quantile(pixels[mask == label], quantile) for quantile in quantiles} +# # result[label] = _get_quantile(label, mask, pixels, quantiles) + + +def _get_n_channels(img: np.ndarray | ImageContainer) -> int: + """ + Returns the number of channels in the image. + Right now, ImageContainer's layer organization is assumed meaning + (y, x, z, channels) + + Parameters + ---------- + img : np.ndarray or ImageContainer The image to test + + Returns + ------- + The number of channels in the image + """ + if len(img.shape) == 4: + return img.shape[3] + else: + raise NotImplementedError(f"Unexpected image shape (got {img.shape}, but expected (y, x, z, channels))") + + +def _is_multichannel(img: np.ndarray) -> bool: + """ + Determines if the image is multichannel, i.e. if it has more than 1 intensity layer. + Right now, ImageContainer's layer organization is assumed meaning + (y, x, z, channels) + + Parameters + ---------- + img : np.ndarray The image to test + + Returns + ------- + True if img has more than one channel + + """ + return _get_n_channels(img) > 1 + + +def border_occupied_factor(mask: np.ndarray, *args, **kwargs) -> dict[int, float]: """ Calculates the percentage of border pixels that are in a 4-connected neighborhood of another label Takes ~1.7s/megapixels to calculate Parameters ---------- - mask: numpy.ndarray integer grayscale image with the labels + mask: np.ndarray integer grayscale image with the labels args None kwargs None @@ -103,33 +229,7 @@ def border_occupied_factor(mask: numpy.ndarray, *args, **kwargs) -> dict[int, fl return result -if __name__ == "__main__": - import time - - from spatialdata.datasets import blobs - - # label_image = np.array([ - # [0, 0, 0, 0, 0, 0, 0, 0], - # [0, 1, 1, 1, 0, 0, 0, 0], - # [0, 1, 1, 1, 2, 2, 2, 0], - # [0, 1, 1, 1, 2, 2, 2, 0], - # [0, 0, 3, 3, 2, 2, 2, 0], - # [0, 0, 3, 3, 0, 0, 0, 0], - # [0, 0, 0, 0, 0, 0, 0, 0], - # ]) - label_image = blobs(length=512).labels["blobs_labels"].values - - start_time = time.perf_counter() - actual = border_occupied_factor(label_image) - end_time = time.perf_counter() - print(end_time - start_time) - assert len(actual) == len(np.unique(label_image)) - 1 - # for idx, actual_value in enumerate(actual): - # assert actual_value == expected[idx] - # Copied from https://github.com/afermg/cp_measurements/blob/main/src/cp_measure/minimal/measuregranularity.py - - __doc__ = """\ MeasureGranularity ================== @@ -197,8 +297,8 @@ def border_occupied_factor(mask: numpy.ndarray, *args, **kwargs) -> dict[int, fl def granularity( - mask: numpy.ndarray, - pixels: numpy.ndarray, + mask: np.ndarray, + pixels: np.ndarray, subsample_size: float = 0.25, image_sample_size: float = 0.25, element_size: int = 10, @@ -265,15 +365,15 @@ def granularity( # # Downsample the image and mask # - new_shape = numpy.array(pixels.shape) + new_shape = np.array(pixels.shape) if subsample_size < 1: new_shape = new_shape * subsample_size if pixels.ndim == 2: - i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) / subsample_size + i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) / subsample_size pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 else: - k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size + k, i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 else: @@ -285,11 +385,11 @@ def granularity( if image_sample_size < 1: back_shape = new_shape * image_sample_size if pixels.ndim == 2: - i, j = numpy.mgrid[0 : back_shape[0], 0 : back_shape[1]].astype(float) / image_sample_size + i, j = np.mgrid[0 : back_shape[0], 0 : back_shape[1]].astype(float) / image_sample_size back_pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 else: - k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size + k, i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size back_pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 else: @@ -301,16 +401,16 @@ def granularity( footprint = skimage.morphology.disk(radius, dtype=bool) else: footprint = skimage.morphology.ball(radius, dtype=bool) - back_pixels_mask = numpy.zeros_like(back_pixels) + back_pixels_mask = np.zeros_like(back_pixels) back_pixels_mask[back_mask == True] = back_pixels[back_mask == True] back_pixels = skimage.morphology.erosion(back_pixels_mask, footprint=footprint) - back_pixels_mask = numpy.zeros_like(back_pixels) + back_pixels_mask = np.zeros_like(back_pixels) back_pixels_mask[back_mask == True] = back_pixels[back_mask == True] back_pixels = skimage.morphology.dilation(back_pixels_mask, footprint=footprint) try: if image_sample_size < 1: if pixels.ndim == 2: - i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) + i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) # # Make sure the mapping only references the index range of # back_pixels. @@ -319,7 +419,7 @@ def granularity( j *= float(back_shape[1] - 1) / float(new_shape[1] - 1) back_pixels = scipy.ndimage.map_coordinates(back_pixels, (i, j), order=1) else: - k, i, j = numpy.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) + k, i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) @@ -343,14 +443,14 @@ def granularity( # THIS IMPLEMENTATION INSTEAD OF OPENING USES EROSION FOLLOWED BY RECONSTRUCTION # ng = granular_spectrum_length - startmean = numpy.mean(pixels[mask]) + startmean = np.mean(pixels[mask]) ero = pixels.copy() # Mask the test image so that masked pixels will have no effect # during reconstruction # ero[~mask] = 0 currentmean = startmean - startmean = max(startmean, numpy.finfo(float).eps) + startmean = max(startmean, np.finfo(float).eps) if pixels.ndim == 2: footprint = skimage.morphology.disk(1, dtype=bool) @@ -359,11 +459,11 @@ def granularity( results = [] for i in range(1, ng + 1): prevmean = currentmean - ero_mask = numpy.zeros_like(ero) + ero_mask = np.zeros_like(ero) ero_mask[mask == True] = ero[mask == True] ero = skimage.morphology.erosion(ero_mask, footprint=footprint) rec = skimage.morphology.reconstruction(ero, pixels, footprint=footprint) - currentmean = numpy.mean(rec[mask]) + currentmean = np.mean(rec[mask]) gs = (prevmean - currentmean) * 100 / startmean # TODO find better solution for the return @@ -375,7 +475,7 @@ def granularity( orig_shape = pixels.shape try: if pixels.ndim == 2: - i, j = numpy.mgrid[0 : orig_shape[0], 0 : orig_shape[1]].astype(float) + i, j = np.mgrid[0 : orig_shape[0], 0 : orig_shape[1]].astype(float) # # Make sure the mapping only references the index range of # back_pixels. @@ -384,7 +484,7 @@ def granularity( j *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) rec = scipy.ndimage.map_coordinates(rec, (i, j), order=1) else: - k, i, j = numpy.mgrid[0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2]].astype(float) + k, i, j = np.mgrid[0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2]].astype(float) k *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) i *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) j *= float(new_shape[2] - 1) / float(orig_shape[2] - 1) @@ -413,7 +513,7 @@ def granularity( # ) # object_record.current_mean = new_mean # else: - # gss = numpy.zeros((0,)) + # gss = np.zeros((0,)) # measurements.add_measurement(object_record.name, feature, gss) if len(results) == 1: @@ -426,9 +526,9 @@ def _handle_granularity_error(granular_spectrum_length: int): return [None for _ in range(granular_spectrum_length)] -# Copied from https://github.com/afermg/cp_measure/blob/main/src/cp_measure/fast/measureobjectsizeshape.py -def zernike(masks: numpy.ndarray, pixels: numpy.ndarray, zernike_numbers: int = 9) -> dict[int, numpy.ndarray]: - unique_indices = numpy.unique(masks) +# Inspired by https://github.com/afermg/cp_measure/blob/main/src/cp_measure/fast/measureobjectsizeshape.py +def zernike(masks: np.ndarray, pixels: np.ndarray, zernike_numbers: int = 9) -> dict[int, np.ndarray]: + unique_indices = np.unique(masks) unique_indices = unique_indices[unique_indices != 0] res = centrosome.zernike.zernike( zernike_indexes=centrosome.zernike.get_zernike_indexes(zernike_numbers + 1), @@ -437,18 +537,213 @@ def zernike(masks: numpy.ndarray, pixels: numpy.ndarray, zernike_numbers: int = ) return {orig_idx: res[res_idx] for orig_idx, res_idx in zip(unique_indices, range(res.shape[0]))} - # # - # # Zernike features - # # - # unique_indices = numpy.unique(masks) - # unique_indices = unique_indices[unique_indices>0] - # indices = list(range(1,len(unique_indices) + 1)) - # labels = masks - # zernike_numbers = centrosome.zernike.get_zernike_indexes(zernike_numbers + 1) + +# Copied from https://github.com/afermg/cp_measure/blob/main/src/cp_measure/fast/measureobjectintensitydistribution.py +def radial_distribution( + labels: np.ndarray, + pixels: np.ndarray, + scaled: bool = True, + bin_count: int = 4, + maximum_radius: int = 100, +): + """ + zernike_degree : int + Maximum zernike moment. + + This is the maximum radial moment that will be calculated. There are + increasing numbers of azimuthal moments as you increase the radial + moment, so higher values are increasingly expensive to calculate. + + scaled : bool + Scale the bins? + + When True divide the object radially into the number of bins + that you specify. Otherwise create the number of bins you specify + based on distance. If True, it will use a maximum distance so + that each object will have the same measurements (which might be zero + for small objects) and so that the measurements can be taken without + knowing the maximum object radius before the run starts. + + bin_count : int + Number of bins + + Specify the number of bins that you want to use to measure the distribution. + Radial distribution is measured with respect to a series of concentric + rings starting from the object center (or more generally, between contours + at a normalized distance from the object center). This number specifies + the number of rings into which the distribution is to be divided. + + maximum_radius : int + Maximum radius + + Specify the maximum radius for the unscaled bins. The unscaled binning method + creates the number of bins that you specify and creates equally spaced bin + boundaries up to the maximum radius. Parts of the object that are beyond this + radius will be counted in an overflow bin. The radius is measured in pixels. + """ + + if labels.dtype == bool: + labels = labels.astype(np.integer) + + M_CATEGORY = "RadialDistribution" + F_FRAC_AT_D = "FracAtD" + F_MEAN_FRAC = "MeanFrac" + F_RADIAL_CV = "RadialCV" + + FF_SCALE = "%dof%d" + FF_OVERFLOW = "Overflow" + FF_GENERIC = FF_SCALE + + MF_FRAC_AT_D = "_".join((M_CATEGORY, F_FRAC_AT_D, FF_GENERIC)) + MF_MEAN_FRAC = "_".join((M_CATEGORY, F_MEAN_FRAC, FF_GENERIC)) + MF_RADIAL_CV = "_".join((M_CATEGORY, F_RADIAL_CV, FF_GENERIC)) + OF_FRAC_AT_D = "_".join((M_CATEGORY, F_FRAC_AT_D, FF_OVERFLOW)) + OF_MEAN_FRAC = "_".join((M_CATEGORY, F_MEAN_FRAC, FF_OVERFLOW)) + OF_RADIAL_CV = "_".join((M_CATEGORY, F_RADIAL_CV, FF_OVERFLOW)) + + unique_labels = np.unique(labels) + n_objects = len(unique_labels[unique_labels > 0]) + d_to_edge = centrosome.cpmorphology.distance_to_edge(labels) + + # Find the point in each object farthest away from the edge. + # This does better than the centroid: + # * The center is within the object + # * The center tends to be an interesting point, like the + # center of the nucleus or the center of one or the other + # of two touching cells. + # + # MODIFICATION: Delegated label indices to maximum_position_of_labels + # This should not affect this one-mask/object function + i, j = centrosome.cpmorphology.maximum_position_of_labels( + # d_to_edge, labels, indices=[1] + d_to_edge, + labels, + indices=[1], + ) + + center_labels = np.zeros(labels.shape, int) + + center_labels[i, j] = labels[i, j] + + # + # Use the coloring trick here to process touching objects + # in separate operations + # + colors = centrosome.cpmorphology.color_labels(labels) + + n_colors = np.max(colors) + + d_from_center = np.zeros(labels.shape) + + cl = np.zeros(labels.shape, int) + + for color in range(1, n_colors + 1): + mask = colors == color + l, d = centrosome.propagate.propagate(np.zeros(center_labels.shape), center_labels, mask, 1) + + d_from_center[mask] = d[mask] + + cl[mask] = l[mask] + + good_mask = cl > 0 + + i_center = np.zeros(cl.shape) + + i_center[good_mask] = i[cl[good_mask] - 1] + + j_center = np.zeros(cl.shape) + + j_center[good_mask] = j[cl[good_mask] - 1] + + normalized_distance = np.zeros(labels.shape) + + if scaled: + total_distance = d_from_center + d_to_edge + + normalized_distance[good_mask] = d_from_center[good_mask] / (total_distance[good_mask] + 0.001) + else: + normalized_distance[good_mask] = d_from_center[good_mask] / maximum_radius + + n_good_pixels = np.sum(good_mask) + + good_labels = labels[good_mask] + + bin_indexes = (normalized_distance * bin_count).astype(int) + + bin_indexes[bin_indexes > bin_count] = bin_count + + labels_and_bins = (good_labels - 1, bin_indexes[good_mask]) + + histogram = scipy.sparse.coo_matrix((pixels[good_mask], labels_and_bins), (n_objects, bin_count + 1)).toarray() + + sum_by_object = np.sum(histogram, 1) + + sum_by_object_per_bin = np.dstack([sum_by_object] * (bin_count + 1))[0] + + fraction_at_distance = histogram / sum_by_object_per_bin + + number_at_distance = scipy.sparse.coo_matrix( + (np.ones(n_good_pixels), labels_and_bins), (n_objects, bin_count + 1) + ).toarray() + + sum_by_object = np.sum(number_at_distance, 1) + + sum_by_object_per_bin = np.dstack([sum_by_object] * (bin_count + 1))[0] + + fraction_at_bin = number_at_distance / sum_by_object_per_bin + + mean_pixel_fraction = fraction_at_distance / (fraction_at_bin + np.finfo(float).eps) + + # Anisotropy calculation. Split each cell into eight wedges, then + # compute coefficient of variation of the wedges' mean intensities + # in each ring. # - # zf_l = centrosome.zernike.zernike(zernike_numbers, labels, indices) - # results = {} - # for (n, m), z in zip(zernike_numbers, zf_l.transpose()): - # results[f"Zernike_{n}_{m}"] = z + # Compute each pixel's delta from the center object's centroid + i, j = np.mgrid[0 : labels.shape[0], 0 : labels.shape[1]] + + i_mask = i[good_mask] > i_center[good_mask] + + j_mask = j[good_mask] > j_center[good_mask] + + abs_mask = abs(i[good_mask] - i_center[good_mask]) > abs(j[good_mask] - j_center[good_mask]) + + radial_index = i_mask.astype(int) + j_mask.astype(int) * 2 + abs_mask.astype(int) * 4 + + results = {} + + for bin_idx in range(bin_count + (0 if scaled else 1)): + bin_mask = good_mask & (bin_indexes == bin_idx) + + bin_pixels = np.sum(bin_mask) + + bin_labels = labels[bin_mask] + + bin_radial_index = radial_index[bin_indexes[good_mask] == bin_idx] + + labels_and_radii = (bin_labels - 1, bin_radial_index) + + radial_values = scipy.sparse.coo_matrix((pixels[bin_mask], labels_and_radii), (n_objects, 8)).toarray() + + pixel_count = scipy.sparse.coo_matrix((np.ones(bin_pixels), labels_and_radii), (n_objects, 8)).toarray() + + mask = pixel_count == 0 + + radial_means = np.ma.masked_array(radial_values / pixel_count, mask) + + radial_cv = np.std(radial_means, 1) / np.mean(radial_means, 1) + + radial_cv[np.sum(~mask, 1) == 0] = 0 + + for measurement, feature, overflow_feature in ( + (fraction_at_distance[:, bin_idx], MF_FRAC_AT_D, OF_FRAC_AT_D), + (mean_pixel_fraction[:, bin_idx], MF_MEAN_FRAC, OF_MEAN_FRAC), + (np.array(radial_cv), MF_RADIAL_CV, OF_RADIAL_CV), + ): + if bin_idx == bin_count: + measurement_name = overflow_feature + else: + measurement_name = feature % (bin_idx + 1, bin_count) + + results[measurement_name] = measurement - # return results + return results diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 0b7c4367a..095980a96 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -2,6 +2,7 @@ import time import typing +from pyexpat import features import numpy as np import numpy.testing as npt @@ -9,17 +10,31 @@ import pytest import skimage.measure import spatialdata as sd +from anndata import AnnData +from skimage.measure import regionprops from spatialdata import SpatialData from spatialdata.datasets import blobs, raccoon import squidpy as sq -from squidpy.im import _measurements # noinspection PyProtectedMember -from squidpy.im._feature import _get_region_props, _get_table_key +from squidpy.im import ImageContainer, _measurements, quantify_morphology # noinspection PyProtectedMember -from squidpy.im._measurements import border_occupied_factor +from squidpy.im._feature import ( + _get_region_props, + _get_table_key, + _sdata_image_features_helper, + calculate_image_features, +) + +# noinspection PyProtectedMember +from squidpy.im._measurements import ( + border_occupied_factor, + calculate_histogram, + calculate_image_feature, + calculate_quantiles, +) @pytest.fixture @@ -55,14 +70,8 @@ def dummy_callable(): @pytest.fixture def malformed_morphology_methods() -> dict[str, typing.Any]: methods = { - "wrong_container": [ - ("label", "area"), - "label,area" - ], - "wrong_method_type": [ - ["test", dummy_callable, 42.42], - ["test", dummy_callable, 42] - ] + "wrong_container": [("label", "area"), "label,area"], + "wrong_method_type": [["test", dummy_callable, 42.42], ["test", dummy_callable, 42]], } return methods @@ -80,12 +89,7 @@ class TestMorphology: def test_sanity_method_list(self, sdata_blobs, malformed_morphology_methods): with pytest.raises(ValueError, match="Argument `methods` must be a list of strings."): for methods in malformed_morphology_methods["wrong_container"]: - sq.im.quantify_morphology( - sdata=sdata_blobs, - label="blobs_labels", - image="blobs_image", - methods=methods - ) + sq.im.quantify_morphology(sdata=sdata_blobs, label="blobs_labels", image="blobs_image", methods=methods) # @pytest.mark.parametrize( # "sdata,methods", @@ -95,31 +99,18 @@ def test_sanity_method_list(self, sdata_blobs, malformed_morphology_methods): def test_sanity_method_list_types(self, sdata_blobs, malformed_morphology_methods): with pytest.raises(ValueError, match="All elements in `methods` must be strings or callables."): for methods in malformed_morphology_methods["wrong_method_type"]: - sq.im.quantify_morphology( - sdata=sdata_blobs, - label="blobs_labels", - image="blobs_image", - methods=methods - ) + sq.im.quantify_morphology(sdata=sdata_blobs, label="blobs_labels", image="blobs_image", methods=methods) def test_get_table_key_no_annotators(self, sdata_blobs): label = "blobs_multiscale_labels" with pytest.raises(ValueError, match=f"No tables automatically detected in `sdata` for {label}"): - _get_table_key( - sdata=sdata_blobs, - label=label, - kwargs={} - ) + _get_table_key(sdata=sdata_blobs, label=label, kwargs={}) def test_get_table_key_multiple_annotators(self, sdata_blobs): sdata_blobs.tables["multi_table"] = sd.deepcopy(sdata_blobs["table"]) label = "blobs_labels" with pytest.raises(ValueError, match=f"Multiple tables detected in `sdata` for {label}"): - _get_table_key( - sdata=sdata_blobs, - label=label, - kwargs={} - ) + _get_table_key(sdata=sdata_blobs, label=label, kwargs={}) def test_quantify_morphology_granularity(self, sdata_blobs): granular_spectrum_length = 16 @@ -194,6 +185,17 @@ def test_quantify_morphology_multiscale(self, sdata_blobs, morphology_methods): split_by_channels=True, ) + def test_quantify_morphology_cp_measure(self, sdata_blobs): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + split_by_channels=True, + methods=["label", "radial_distribution"], + ) + + print(sdata_blobs["table"].obsm["morphology"].columns) + # TODO Remove for release @pytest.fixture @@ -240,3 +242,201 @@ def test_border_occupied_factor(self): expected = {1: 3 / 8, 2: 3 / 8, 3: 2 / 4} actual = border_occupied_factor(label_image) assert actual == expected + + def test_radial_distribution(self, sdata_mibitof): + pixels = np.random.randint(100, size=64**2).reshape((64, 64)) + mask = np.zeros_like(pixels, dtype=bool) + mask[2:-3, 2:-3] = True + + result = _measurements.radial_distribution( + labels=pixels, + pixels=mask, + ) + + # result = _measurements.radial_distribution( + # labels=np.array(sdata_mibitof["point8_labels"]), + # pixels=np.array(sdata_mibitof["point8_image"]), + # ) + + print(result) + + def test_cp_measure(self, sdata_blobs): + from cp_measure.bulk import get_fast_measurements + + measurements = get_fast_measurements() + + size = 200 + rng = np.random.default_rng(42) + pixels = rng.integers(low=0, high=10, size=(size, size)) + + masks = np.zeros_like(pixels) + masks[5:-6, 5:-6] = 1 + + results = {} + for measurement_name, measurement in measurements.items(): + results[measurement_name] = measurement(masks, pixels) + + print(results) + + +@pytest.fixture() +def blobs_as_image_container(sdata_blobs: SpatialData) -> ImageContainer: + img_layer_name = "blobs_image" + seg_layer_name = "blobs_labels" + img = ImageContainer(sdata_blobs[img_layer_name].to_numpy(), layer=img_layer_name) + img.add_img(sdata_blobs[seg_layer_name].to_numpy(), layer=seg_layer_name) + + return img + + +@pytest.fixture() +def blobs_as_adata(sdata_blobs: SpatialData) -> AnnData: + s_adata = sdata_blobs.tables["table"] + # print(s_adata.uns) + return s_adata + + +@pytest.fixture() +def mibitof_as_image_container(sdata_mibitof: SpatialData) -> ImageContainer: + img_layer_name = "point8_image" + seg_layer_name = "point8_labels" + img = ImageContainer(sdata_mibitof[img_layer_name].to_numpy(), layer=img_layer_name) + img.add_img(sdata_mibitof[seg_layer_name].to_numpy(), layer=seg_layer_name) + return img + + +@pytest.fixture() +def mibitof_as_adata(sdata_mibitof: SpatialData) -> AnnData: + s_adata = sdata_mibitof.tables["table"] + s_adata.uns["spatial"] = {"point8_labels": {"scalefactors": {"spot_diameter_fullres": 7.0}}} + return s_adata + + +@pytest.fixture() +def visium_adata() -> AnnData: + return sq.datasets.visium_fluo_adata_crop() + + +@pytest.fixture() +def visium_img() -> ImageContainer: + img = sq.datasets.visium_fluo_image_crop() + sq.im.segment( + img=img, + layer="image", + layer_added="segmented_watershed", + method="watershed", + channel=0, + ) + return img + + +@pytest.fixture() +def calc_im_features_kwargs() -> dict[str, typing.Any]: + kwargs = { + # "adata": visium_adata, + # "img": visium_img, + # "features": features, + "layer": "image", + # "library_id": "point8_labels", + "key_added": "segmentation_features", + "features_kwargs": { + "segmentation": { + "label_layer": "segmented_watershed", + "props": ["label", "area", "mean_intensity"], + "channels": [1, 2], + } + }, + "copy": True, + "mask_circle": True, + } + return kwargs + + +class TestMorphologyImageFeaturesCompatibility: + def test_quantiles( + self, calc_im_features_kwargs: dict[str, typing.Any], visium_adata: AnnData, visium_img: ImageContainer + ): + calc_im_features_kwargs.update( + { + "features": ["summary"], + "adata": visium_adata, + "img": visium_img, + } + ) + + # expected = calculate_image_features( + # **calc_im_features_kwargs + # ) + actual = calculate_quantiles( + mask=visium_img["segmented_watershed"].to_numpy(), pixels=visium_img["image"].to_numpy() + ) + print(actual) + + def test_calculate_image_features(self, visium_adata: AnnData, visium_img: ImageContainer): + actual = calculate_image_feature( + feature=calculate_histogram, + mask=visium_img["segmented_watershed"].to_numpy(), + pixels=visium_img["image"].to_numpy(), + ) + print(actual) + + def test_calculate_image_features_performance(self, visium_adata: AnnData, visium_img: ImageContainer): + start_time = time.perf_counter() + props = regionprops( + label_image=visium_img["segmented_watershed"].to_numpy()[:, :, 0, 0], + intensity_image=visium_img["image"].to_numpy()[:, :, 0, 0], + extra_properties=calculate_histogram, + ) + actual = {prop.label: prop.calculate_histogram for prop in props} + end_time = time.perf_counter() + # calc_im_features_result = calculate_image_feature( + # feature=calculate_histogram, + # mask=visium_img["segmented_watershed"].to_numpy(), + # pixels=visium_img["image"].to_numpy() + # ) + # end_time = time.perf_counter() + # assert end_time - start_time < 60 + + print(end_time - start_time) + + def test_im_features_morphology_equivalence( + self, + # blobs_as_adata: AnnData, blobs_as_image_container: ImageContainer, + # mibitof_as_adata: AnnData, mibitof_as_image_container: ImageContainer, + calc_im_features_kwargs: dict[str, typing.Any], + visium_adata: AnnData, + visium_img: ImageContainer, + # adata: AnnData + ): + # print(adata.uns.spatial) + # expected = calculate_image_features( + # adata=mibitof_as_adata, + # img=mibitof_as_image_container, + # layer="point8_labels", + # library_id="point8_labels", + # features=features, + # key_added="foo", + # copy=True, + # features_kwargs={"segmentation": {"label_layer": "point8_labels", "intensity_layer": "point8_image"}}, + # ) + + calc_im_features_kwargs.update( + { + "features": ["texture", "summary", "histogram", "segmentation"], + "adata": visium_adata, + "img": visium_img, + } + ) + + expected = calculate_image_features(**calc_im_features_kwargs) + + actual = _sdata_image_features_helper(**calc_im_features_kwargs) + # morphology = quantify_morphology() + + pd.testing.assert_frame_equal(actual, expected) + + # def test_helper_equivalence(self, adata: AnnData, cont: ImageContainer): + # expected = _calculate_image_features_helper( + # adata=adata, + # img=cont["image"], + # ) From cde052646b0138732ab04a13e3ee115f181386c8 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Fri, 22 Nov 2024 19:04:09 +0100 Subject: [PATCH 19/20] handle dict returns from calculate_* features --- src/squidpy/im/_feature.py | 13 ++++++++++--- src/squidpy/im/_measurements.py | 32 +++++++++++++++++++++++++++++++- tests/image/test_morphology.py | 4 +++- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 02ed3a2bf..088927ba4 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -69,6 +69,9 @@ def _get_region_props( circularity, _measurements.granularity, _measurements.radial_distribution, + _measurements.calculate_image_texture, + _measurements.calculate_histogram, + _measurements.calculate_quantiles, ] + extra_methods # Add additional measurements here that calculate on the entire label image @@ -361,9 +364,13 @@ def _extract_from_ndarray(channels, col, is_processed, region_props): # Handle cases like intensity which return one value per channel for each region if len(shape) == 1 and shape[0] == len(channels): for prop_idx in range(shape[0]): - if region_props[col]: - pass - region_props[f"{col}_ch{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + if all(isinstance(val[prop_idx], dict) for val in region_props[col].values): + for key in region_props[col][0][prop_idx].keys(): + region_props[f"{col}_ch{prop_idx}_{key}"] = [ + float(val[prop_idx][key]) for val in region_props[col].values + ] + else: + region_props[f"{col}_ch{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] is_processed = True # Handle cases like granularity which return many values for each channel for each region if len(shape) == 2: diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py index 5d9d7d2c4..1a7442900 100644 --- a/src/squidpy/im/_measurements.py +++ b/src/squidpy/im/_measurements.py @@ -9,6 +9,8 @@ import numpy as np import scipy.ndimage import skimage.morphology +from skimage.feature import graycomatrix, graycoprops +from skimage.util import img_as_ubyte from squidpy.im import ImageContainer @@ -47,6 +49,9 @@ def _all_regionprops_names() -> list[str]: "granularity", "zernike", "radial_distribution", + "calculate_image_texture", + "calculate_histogram", + "calculate_quantiles", ] return names @@ -68,11 +73,36 @@ def calculate_image_feature(feature: typing.Callable, mask: np.ndarray, pixels: ) else: result[label] = feature(mask=mask, pixels=pixels[mask == label], **kwargs) - # result[label] = {quantile: np.quantile(pixels[mask == label], quantile) for quantile in quantiles} return result +def calculate_image_texture(mask: np.ndarray, pixels: np.ndarray, **kwargs) -> dict: + distances = kwargs.get("image_texture_distances", (1,)) + angles = kwargs.get("image_texture_angles", (0, np.pi / 4, np.pi / 2, 3 * np.pi / 4)) + props = kwargs.get( + "image_texture_graycoprops", + ( + "contrast", + "dissimilarity", + "homogeneity", + "correlation", + "ASM", + ), + ) + if not np.issubdtype(pixels.dtype, np.uint8): + pixels = img_as_ubyte(pixels, force_copy=False) # values must be in [0, 255] + + features = {} + comatrix = graycomatrix(pixels, distances=distances, angles=angles, levels=256) + for prop in props: + tmp_features = graycoprops(comatrix, prop=prop) + for distance_idx, dist in enumerate(distances): + for angle_idx, a in enumerate(angles): + features[f"{prop}_dist-{dist}_angle-{a:.2f}"] = tmp_features[distance_idx, angle_idx] + return features + + def calculate_histogram(mask: np.ndarray, pixels: np.ndarray, **kwargs) -> dict: """ This is way too slow of an implementation diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index 095980a96..d7624d4ba 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -33,6 +33,7 @@ border_occupied_factor, calculate_histogram, calculate_image_feature, + calculate_image_texture, calculate_quantiles, ) @@ -374,7 +375,8 @@ def test_quantiles( def test_calculate_image_features(self, visium_adata: AnnData, visium_img: ImageContainer): actual = calculate_image_feature( - feature=calculate_histogram, + # feature=calculate_histogram, + feature=calculate_image_texture, mask=visium_img["segmented_watershed"].to_numpy(), pixels=visium_img["image"].to_numpy(), ) From 9a9511f73f50db67a466469b5c4e0c4bae72aa54 Mon Sep 17 00:00:00 2001 From: Nicolas Peschke Date: Tue, 21 Jan 2025 14:24:16 +0100 Subject: [PATCH 20/20] add _sdata_image_features_helper --- src/squidpy/im/_feature.py | 73 ++++++++++++++++++++++- tests/image/test_morphology.py | 104 ++++++++++++++++++++++++++++++--- 2 files changed, 167 insertions(+), 10 deletions(-) diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 088927ba4..cd56be207 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -15,6 +15,7 @@ from scanpy import logging as logg from skimage.measure import perimeter, regionprops from spatialdata import SpatialData +from spatialdata.models import Image2DModel, Labels2DModel, TableModel import squidpy._utils from squidpy._constants._constants import ImageFeature @@ -228,7 +229,7 @@ def calculate_image_features( _save_data(adata, attr="obsm", key=key_added, data=res, time=start) -def _sdata_image_features_helper( +def sdata_image_features_helper( adata: AnnData, img: ImageContainer, layer: str | None = None, @@ -240,9 +241,75 @@ def _sdata_image_features_helper( n_jobs: int | None = None, backend: str = "loky", show_progress_bar: bool = True, + return_results_as_new_adata: bool = False, **kwargs: Any, -) -> pd.DataFrame | None: - return None +) -> pd.DataFrame | AnnData | None: + segmentation_kwargs = features_kwargs.get("segmentation", None) + if segmentation_kwargs is None: + raise ValueError( + "A segmentation layer with its " + "`features_kwargs['segmentation']['label_layer']` " + "is necessary for the morphology quantification" + ) + + label_layer = segmentation_kwargs.get("label_layer", None) + if label_layer is None: + raise ValueError( + "`features_kwargs['segmentation']['label_layer']` " "is necessary for the morphology quantification" + ) + + image_layer = layer + if image_layer is None: + raise ValueError( + "A `layer` name for the image layer in the ImageContainer " "is necessary for the morphology quantification" + ) + + adata.uns.update( + {"spatialdata_attrs": {"region": label_layer, "region_key": "region", "instance_key": "instance_id"}} + ) + + adata.obs["region"] = label_layer + adata.obs["instance_id"] = -1 + # adata.uns["spatialdata_attrs"]["region"] = label_layer + # adata.uns["spatialdata_attrs"]["region_key"] = "region" + # adata.uns["spatialdata_attrs"]["instance_key"] = "instance_id" + + try: + sdata = SpatialData( + images={ + image_layer: Image2DModel.parse( + data=img[image_layer].rename({"channels": "c"}).transpose("z", "c", "y", "x")[0, ...] + ), + }, + labels={ + label_layer: Labels2DModel.parse( + data=img[label_layer].rename({"channels:0": "c"}).transpose("z", "c", "y", "x")[0, 0, ...] + ), + }, + tables={"table": TableModel.parse(adata)}, + ) + except ValueError as exc: + raise ValueError( + "Failed to create SpatialData object, " + "possibly due to dimensions in xarray not being named " + "('c', 'z', 'y', 'x') or them not being in this particular order" + ) from exc + + start = logg.info("Calculating features") + + res = quantify_morphology( + sdata, + label=label_layer, + image=image_layer, + ) + + if copy: + return res + + if return_results_as_new_adata: + return AnnData(res) + + _save_data(adata, attr="obsm", key=key_added, data=res, time=start) def quantify_morphology( diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py index d7624d4ba..08c0332e2 100644 --- a/tests/image/test_morphology.py +++ b/tests/image/test_morphology.py @@ -24,8 +24,8 @@ from squidpy.im._feature import ( _get_region_props, _get_table_key, - _sdata_image_features_helper, calculate_image_features, + sdata_image_features_helper, ) # noinspection PyProtectedMember @@ -432,13 +432,103 @@ def test_im_features_morphology_equivalence( expected = calculate_image_features(**calc_im_features_kwargs) - actual = _sdata_image_features_helper(**calc_im_features_kwargs) + actual = sdata_image_features_helper(**calc_im_features_kwargs) # morphology = quantify_morphology() pd.testing.assert_frame_equal(actual, expected) - # def test_helper_equivalence(self, adata: AnnData, cont: ImageContainer): - # expected = _calculate_image_features_helper( - # adata=adata, - # img=cont["image"], - # ) + +class TestImageContainerSDataCompatibility: + @pytest.mark.xfail( + raises=AssertionError, + reason="There cannot exist two xarray datasets in one ImageContainer " + "with the same 'channel' name, while SpatialData enforces that dimensions " + "must be named ('c', 'z', 'y', 'x')", + ) + def test_image_container_enforcing_unique_channel_naming( + self, calc_im_features_kwargs: dict[str, typing.Any], visium_img: ImageContainer + ): + from spatialdata.models._utils import _validate_dims + + label_layer = calc_im_features_kwargs["features_kwargs"]["segmentation"]["label_layer"] + image_layer = calc_im_features_kwargs["layer"] + + visium_img[label_layer] = visium_img[label_layer].rename({"channels:0": "c"}) + visium_img[image_layer] = visium_img[image_layer].rename({"channels": "c"}) + + assert visium_img[label_layer].dims[-1] == "c" + assert visium_img[label_layer].dims[-1] == visium_img[image_layer].dims[-1] + # "WARNING: Channel dimension cannot be aligned with an existing one, using `c_0`" + # "c_0" + + +class TestSdataHelper: + def test_sdata_helper_copy( + self, + calc_im_features_kwargs: dict[str, typing.Any], + visium_adata: AnnData, + visium_img: ImageContainer, + ): + calc_im_features_kwargs.update( + { + "features": ["texture", "summary", "histogram", "segmentation"], + "adata": visium_adata, + "img": visium_img, + } + ) + + # label_layer = calc_im_features_kwargs["features_kwargs"]["segmentation"]["label_layer"] + # image_layer = calc_im_features_kwargs["layer"] + # + # visium_img[label_layer] = visium_img[label_layer].rename({"channels:0": "c"}) + # visium_img[label_layer] = visium_img[label_layer].transpose("c", "z", "y", "x") + # + # visium_img[image_layer] = visium_img[image_layer].rename({"channels": "c"}) + # visium_img[image_layer] = visium_img[image_layer].transpose("c", "z", "y", "x") + + actual = sdata_image_features_helper(**calc_im_features_kwargs) + assert isinstance(actual, pd.DataFrame) + for name in _measurements._all_regionprops_names(): + assert any([column.startswith(name) for column in actual.columns]) + + def test_sdata_helper_obsm( + self, + calc_im_features_kwargs: dict[str, typing.Any], + visium_adata: AnnData, + visium_img: ImageContainer, + ): + calc_im_features_kwargs.update( + { + "features": ["texture", "summary", "histogram", "segmentation"], + "adata": visium_adata, + "img": visium_img, + "copy": False, + } + ) + + actual = sdata_image_features_helper(**calc_im_features_kwargs) + assert actual is None + for name in _measurements._all_regionprops_names(): + assert any( + [column.startswith(name) for column in visium_adata.obsm[calc_im_features_kwargs["key_added"]].columns] + ) + + def test_sdata_helper_adata( + self, + calc_im_features_kwargs: dict[str, typing.Any], + visium_adata: AnnData, + visium_img: ImageContainer, + ): + calc_im_features_kwargs.update( + { + "features": ["texture", "summary", "histogram", "segmentation"], + "adata": visium_adata, + "img": visium_img, + "copy": False, + } + ) + + actual = sdata_image_features_helper(**calc_im_features_kwargs) + assert isinstance(actual, AnnData) + for name in _measurements._all_regionprops_names(): + assert any([column.startswith(name) for column in actual.var_names])