From ac6bcc273c159ee4ab25b86f9bcf3faa32d5285b Mon Sep 17 00:00:00 2001 From: tdrose Date: Mon, 18 Dec 2023 17:30:03 +0000 Subject: [PATCH 1/9] Colocalization module --- metaspace_converter/__init__.py | 8 +- metaspace_converter/colocalization.py | 101 ++++++++++++++++++ .../tests/colocalization_test.py | 96 +++++++++++++++++ pyproject.toml | 2 +- 4 files changed, 205 insertions(+), 2 deletions(-) create mode 100644 metaspace_converter/colocalization.py create mode 100644 metaspace_converter/tests/colocalization_test.py diff --git a/metaspace_converter/__init__.py b/metaspace_converter/__init__.py index 87c9805..9f5dabe 100644 --- a/metaspace_converter/__init__.py +++ b/metaspace_converter/__init__.py @@ -1,5 +1,11 @@ +from metaspace_converter import colocalization from metaspace_converter.anndata_to_array import anndata_to_image_array from metaspace_converter.to_anndata import metaspace_to_anndata from metaspace_converter.to_spatialdata import metaspace_to_spatialdata -__all__ = ["metaspace_to_anndata", "metaspace_to_spatialdata", "anndata_to_image_array"] +__all__ = [ + "metaspace_to_anndata", + "metaspace_to_spatialdata", + "anndata_to_image_array", + "colocalization", +] diff --git a/metaspace_converter/colocalization.py b/metaspace_converter/colocalization.py new file mode 100644 index 0000000..b476413 --- /dev/null +++ b/metaspace_converter/colocalization.py @@ -0,0 +1,101 @@ +from typing import Optional, Tuple + +import numpy as np +from anndata import AnnData +from scipy import ndimage + +from metaspace_converter.anndata_to_array import anndata_to_image_array + + +def colocML_preprocessing( + adata: AnnData, + layer: Optional[str] = "colocml_preprocessing", + median_filter_size: Tuple[int, int] = (3, 3), + quantile_threshold: float = 0.5, +): + """ + Preprocessing for colocalization analysis according to the colocML publication + (https://doi.org/10.1093/bioinformatics/btaa085). + + In the publication the authors evaluated colocalization metrics and preprocessing approaches. + They found the best performance for + 1) median filtering of ion images with a (3, 3) kernel size and + 2) quantile thresholding ad 50%, meaning all pixels with intensities below the 50% + quantile set to 0. + + This function performs the same preprocessing steps. + Recommended for call before running the ``colocalization`` function. + + Args: + adata: An AnnData object. + layer: Key for the adata.layers dict in which the processed data will be saved. + Default value is ``colocml_preprocessing``. + If None, ``adata.X`` will be overwritten with the processed data. + median_filter_size: 2-dimensional filter size for the median filtering that + will be performed per ion image. + quantile_threshold: Float between 0 and 1. The function computes the quantile value per + ion image and all pixels below the quantile threshold will be set to 0. + + Returns: + None. The processed data is saved in ``layer``. If layer is net to None, ``adata.X`` will + be overwritten + + """ + + # Extract image array from anndata: + imarray = anndata_to_image_array(adata=adata) + + # Median filtering + imarray = ndimage.median_filter(imarray, size=(1, median_filter_size[0], median_filter_size[1])) + + # reshaping + imarray = imarray.reshape((imarray.shape[0], -1)) + + # Quantile thresholding + mask = imarray < np.percentile(imarray, q=quantile_threshold * 100, axis=1)[:, np.newaxis] + imarray[mask] = 0 + + # Inserting of processed data into adata object + if layer is None: + adata.X = imarray.transpose() + else: + adata.layers[layer] = imarray.transpose() + + +def colocalization(adata: AnnData, layer: Optional[str] = "colocml_preprocessing"): + """ + Colocalization of ion images using the cosine similarity metric. + + In combination with ``colocML_preprocessing`` this metric performed best in the + colocML publication (https://doi.org/10.1093/bioinformatics/btaa085). + + We recommend to call the the ``colocML_preprocessing`` function beforehand. + + Args: + adata: An AnnData object. + layer: Key for ``adata.layer`` from which the ionimage_data for preprocessing taken. + If ``None``, ``adata.X`` is used. ``colocML_preprocessing`` will save the preprocessed + data per default in ``adata.layer['colocml_preprocessing']``. + + Returns: + None. The processed data is saved in ``adata.uns['colocalization]``. + """ + + if layer == None or layer not in adata.layers.keys(): + data = adata.X.transpose() + else: + data = adata.layers[layer].transpose() + + coloc = _pairwise_cosine_similarity(data) + + adata.uns["colocalization"] = coloc + + +def _pairwise_cosine_similarity(data: np.ndarray) -> np.ndarray: + + norm_data = data / np.linalg.norm(data, axis=1, keepdims=True) + + # Compute pairwise cosine similarity + similarity_matrix = np.dot(norm_data, norm_data.T) + + return similarity_matrix diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py new file mode 100644 index 0000000..2fd3e9e --- /dev/null +++ b/metaspace_converter/tests/colocalization_test.py @@ -0,0 +1,96 @@ +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd +import pytest +from anndata import AnnData + +if TYPE_CHECKING: + import _pytest.fixtures + +from metaspace_converter import anndata_to_image_array +from metaspace_converter.colocalization import colocalization, colocML_preprocessing +from metaspace_converter.constants import COL, METASPACE_KEY, X, Y +from metaspace_converter.to_anndata import all_image_pixel_coordinates + + +@pytest.fixture +def adata_dummy(request: "_pytest.fixtures.SubRequest") -> AnnData: + num_ions = request.param.get("num_ions", 0) + height = request.param.get("height", 0) + width = request.param.get("width", 0) + shape = (num_ions, height, width) + # Create a stack of ion images where every pixel has a different value + ion_images_stack = np.arange(np.prod(shape)).reshape(shape) + coordinates = all_image_pixel_coordinates(shape[1:]) + # Create an AnnData with pixel coordinates + obs = pd.DataFrame( + {COL.ion_image_pixel_y: coordinates[:, 0], COL.ion_image_pixel_x: coordinates[:, 1]} + ) + adata = AnnData( + X=ion_images_stack.reshape((height * width, num_ions)), + obs=obs, + uns={METASPACE_KEY: {"image_size": {X: width, Y: height}}}, + ) + return adata + + +@pytest.mark.parametrize( + "adata_dummy", + [dict(num_ions=5, height=10, width=10), dict(num_ions=4, height=4, width=4)], + indirect=["adata_dummy"], +) +def test_colocml_preprocessing(adata_dummy): + + adata = adata_dummy + + # Test median filtering + colocML_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=0, layer="colocml_preprocessing" + ) + + actual = anndata_to_image_array(adata) + + # median filter + + expected_preprocessing = np.median(actual[0][:3, :3]) + observed = adata.layers["colocml_preprocessing"][ + adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0 + ] + assert observed == expected_preprocessing + + expected_preprocessing = np.median(actual[1][:3, :3]) + observed = adata.layers["colocml_preprocessing"][ + adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1 + ] + assert observed == expected_preprocessing + + # Quantile thresholding + adata.X[0].reshape(-1) + + colocML_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer="colocml_preprocessing" + ) + + assert all(np.sum(adata.layers["colocml_preprocessing"] == 0, axis=0) <= 0.5 * adata.X.shape[0]) + + # Layer exists + assert "colocml_preprocessing" in adata.layers.keys() + + # Layer sizes match + assert adata.X.shape == adata.layers["colocml_preprocessing"].shape + + +def test_colocalization(): + + arr = np.array([[0.0, 1.0, 0.0], [0.0, 2.0, 0.0], [1.0, 0.0, 0.0]]).transpose() + adata = AnnData(X=arr) + + colocalization(adata, layer=None) + + assert "colocalization" in adata.uns.keys() + + coloc = adata.uns["colocalization"] + + expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) + assert np.all(coloc == expected) diff --git a/pyproject.toml b/pyproject.toml index 8f1dbf6..860d151 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "metaspace-converter" -version = "1.0.1" +version = "1.1.0" authors = [ {name = "Tim Daniel Rose", email = "tim.rose@embl.de"}, {name = "Andreas Eisenbarth", email = "andreas.eisenbarth@embl.de"}, From 3968687cea8a1a36f2f05a9e10eec9ea9f481395 Mon Sep 17 00:00:00 2001 From: tdrose Date: Tue, 19 Dec 2023 17:02:26 +0100 Subject: [PATCH 2/9] Documentation for coloc module --- README.md | 6 +- docs/examples.rst | 37 ++++++++++ metaspace_converter/colocalization.py | 23 ++++-- metaspace_converter/constants.py | 2 + .../tests/colocalization_test.py | 73 +++++++++++-------- 5 files changed, 102 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 6054664..e902323 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,12 @@ of many packages of the [scverse](https://doi.org/10.1038/s41587-023-01733-8) su [squidpy](https://squidpy.readthedocs.io/en/stable/index.html) for spatial omics analysis. Another supported format that is part of the [scverse](https://doi.org/10.1038/s41587-023-01733-8) -is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data. +is [SpatialData](https://spatialdata.scverse.org/en/latest/) for storing, aligning, and processing spatial omics data. This enables users to easily align and integrate METASPACE datasets to other spatial omics modalities. +Additionally, the commonly used colocalization analysis for spatial metabolomics can be performed with the package. + If you encounter any bugs or have suggestions for new features, please open an issue in the [GitHub repository](https://github.com/metaspace2020/metaspace-converter). @@ -28,6 +30,7 @@ If you encounter any bugs or have suggestions for new features, please open an i Our package requires `python >= 3.9`. You can install the package directly from [PyPI](https://pypi.org/project/metaspace-converter/): + ```bash pip install metaspace-converter ``` @@ -161,7 +164,6 @@ sdata.points["maldi_points"] = sdata.transform_element_to_coordinate_system( Unless specified otherwise in file headers or LICENSE files present in subdirectories, all files are licensed under the [Apache 2.0 license](https://github.com/metaspace2020/metaspace/blob/master/LICENSE). - [badge-docs]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/docs.yml?label=documentation [badge-pypi]: https://img.shields.io/pypi/v/metaspace-converter [badge-tests]: https://img.shields.io/github/actions/workflow/status/metaspace2020/metaspace-converter/tests.yml?branch=master&label=tests diff --git a/docs/examples.rst b/docs/examples.rst index 2a4dfae..14a0cf4 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -114,3 +114,40 @@ Here using a reversed colormap which better represents intense values on bright .. image:: ./_static/img/example_img_sd.png :alt: Visualization with SpatialData + + +Colocalization Analysis +----------------------- + +A popular type of analysis for imaging Mass Spectrometry data or spatial metabolomics +is colocalization analysis. +In the `ColocML`_ publication, +the authors evaluated data processing and colocalization metrics. +Their best method (median filtering, quantile thresholding, and cosine similarity), +can be executed with our package: + +.. testcode:: + + from metaspace_converter import metaspace_to_anndata, colocalization + + # Download data + adata = metaspace_to_anndata(dataset_id="2023-11-14_21h58m39s", fdr=0.1) + + # Perform median filtering and quantile thresholding + # The processed data is saved as a layer `adata.layers["colocml_preprocessing"]` + # It has the same dimensions as `adata.X` + colocalization.colocML_preprocessing(adata, layer="colocml_preprocessing") + + # Compute the pairwise colocalization metrix between all ion images + # As an input, the processed data from `adata.layers["colocml_preprocessing"]` is used + # The colocalization matrix is saved in `adata.uns["colocalization"]` + colocalization.colocalization(adata, layer="colocml_preprocessing") + +.. testoutput:: + :hide: + + ... + + + +.. _ColocML: https://doi.org/10.1093/bioinformatics/btaa085 diff --git a/metaspace_converter/colocalization.py b/metaspace_converter/colocalization.py index b476413..56a7c9a 100644 --- a/metaspace_converter/colocalization.py +++ b/metaspace_converter/colocalization.py @@ -5,6 +5,7 @@ from scipy import ndimage from metaspace_converter.anndata_to_array import anndata_to_image_array +from metaspace_converter.constants import COLOCALIZATION def colocML_preprocessing( @@ -40,11 +41,17 @@ def colocML_preprocessing( None. The processed data is saved in ``layer``. If layer is net to None, ``adata.X`` will be overwritten + Raises: + ValueError: If no annotations are available in ``adata.X``. + """ # Extract image array from anndata: imarray = anndata_to_image_array(adata=adata) + if len(imarray) == 0: + raise ValueError("AnnData contains no annotations. ColocML preprocessing cannot be called.") + # Median filtering imarray = ndimage.median_filter(imarray, size=(1, median_filter_size[0], median_filter_size[1])) @@ -66,10 +73,10 @@ def colocalization(adata: AnnData, layer: Optional[str] = "colocml_preprocessing """ Colocalization of ion images using the cosine similarity metric. - In combination with ``colocML_preprocessing`` this metric performed best in the + In combination with the ``colocML_preprocessing`` function, this metric performed best in the colocML publication (https://doi.org/10.1093/bioinformatics/btaa085). - We recommend to call the the ``colocML_preprocessing`` function beforehand. + It is recommended to call the the ``colocML_preprocessing`` function beforehand. Args: adata: An AnnData object. @@ -78,21 +85,25 @@ def colocalization(adata: AnnData, layer: Optional[str] = "colocml_preprocessing data per default in ``adata.layer['colocml_preprocessing']``. Returns: - None. The processed data is saved in ``adata.uns['colocalization]``. + None. The processed data is saved in ``adata.uns['colocalization']``. """ + # Select data if layer == None or layer not in adata.layers.keys(): - data = adata.X.transpose() + data = np.array(adata.X).transpose() else: - data = adata.layers[layer].transpose() + data = np.array(adata.layers[layer]).transpose() + # Compute colocalization coloc = _pairwise_cosine_similarity(data) - adata.uns["colocalization"] = coloc + # Save colocalization + adata.uns[COLOCALIZATION] = coloc def _pairwise_cosine_similarity(data: np.ndarray) -> np.ndarray: + # Divide image vectors by euclidean norm norm_data = data / np.linalg.norm(data, axis=1, keepdims=True) # Compute pairwise cosine similarity diff --git a/metaspace_converter/constants.py b/metaspace_converter/constants.py index 99bf75e..1d4f4f9 100644 --- a/metaspace_converter/constants.py +++ b/metaspace_converter/constants.py @@ -28,3 +28,5 @@ class COL: XY: Final = (X, Y) YX: Final = (Y, X) YXC: Final = (Y, X, C) + +COLOCALIZATION = "colocalization" diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py index 2fd3e9e..408cb8a 100644 --- a/metaspace_converter/tests/colocalization_test.py +++ b/metaspace_converter/tests/colocalization_test.py @@ -10,7 +10,7 @@ from metaspace_converter import anndata_to_image_array from metaspace_converter.colocalization import colocalization, colocML_preprocessing -from metaspace_converter.constants import COL, METASPACE_KEY, X, Y +from metaspace_converter.constants import COL, COLOCALIZATION, METASPACE_KEY, X, Y from metaspace_converter.to_anndata import all_image_pixel_coordinates @@ -37,60 +37,71 @@ def adata_dummy(request: "_pytest.fixtures.SubRequest") -> AnnData: @pytest.mark.parametrize( "adata_dummy", - [dict(num_ions=5, height=10, width=10), dict(num_ions=4, height=4, width=4)], + [ + dict(num_ions=5, height=10, width=10), + dict(num_ions=0, height=2, width=3), + dict(num_ions=4, height=4, width=4), + ], indirect=["adata_dummy"], ) def test_colocml_preprocessing(adata_dummy): + COLOCML_LAYER = "colocml_preprocessing" + adata = adata_dummy - # Test median filtering - colocML_preprocessing( - adata, median_filter_size=(3, 3), quantile_threshold=0, layer="colocml_preprocessing" - ) + if adata.X.shape[1] == 0: + with pytest.raises(ValueError) as e_info: + colocML_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER + ) + else: - actual = anndata_to_image_array(adata) + # Test median filtering + colocML_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER + ) - # median filter + actual = anndata_to_image_array(adata) - expected_preprocessing = np.median(actual[0][:3, :3]) - observed = adata.layers["colocml_preprocessing"][ - adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0 - ] - assert observed == expected_preprocessing + # median filter - expected_preprocessing = np.median(actual[1][:3, :3]) - observed = adata.layers["colocml_preprocessing"][ - adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1 - ] - assert observed == expected_preprocessing + expected_preprocessing = np.median(actual[0][:3, :3]) + observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0] + assert observed == expected_preprocessing - # Quantile thresholding - adata.X[0].reshape(-1) + expected_preprocessing = np.median(actual[1][:3, :3]) + observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1] + assert observed == expected_preprocessing - colocML_preprocessing( - adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer="colocml_preprocessing" - ) + # Quantile thresholding + adata.X[0].reshape(-1) + + colocML_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer=COLOCML_LAYER + ) - assert all(np.sum(adata.layers["colocml_preprocessing"] == 0, axis=0) <= 0.5 * adata.X.shape[0]) + assert all(np.sum(adata.layers[COLOCML_LAYER] == 0, axis=0) <= 0.5 * adata.X.shape[0]) - # Layer exists - assert "colocml_preprocessing" in adata.layers.keys() + # Layer exists + assert COLOCML_LAYER in adata.layers.keys() - # Layer sizes match - assert adata.X.shape == adata.layers["colocml_preprocessing"].shape + # Layer sizes match + assert adata.X.shape == adata.layers[COLOCML_LAYER].shape def test_colocalization(): arr = np.array([[0.0, 1.0, 0.0], [0.0, 2.0, 0.0], [1.0, 0.0, 0.0]]).transpose() + + expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) + adata = AnnData(X=arr) colocalization(adata, layer=None) - assert "colocalization" in adata.uns.keys() + assert COLOCALIZATION in adata.uns.keys() - coloc = adata.uns["colocalization"] + coloc = adata.uns[COLOCALIZATION] - expected = np.array([[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) assert np.all(coloc == expected) From a12792fd6dea7b19276338770238f0ad93a2e9e1 Mon Sep 17 00:00:00 2001 From: tdrose Date: Tue, 19 Dec 2023 17:26:14 +0100 Subject: [PATCH 3/9] Docs examples fix. --- docs/examples.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/examples.rst b/docs/examples.rst index 14a0cf4..b686ebe 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -143,11 +143,6 @@ can be executed with our package: # The colocalization matrix is saved in `adata.uns["colocalization"]` colocalization.colocalization(adata, layer="colocml_preprocessing") -.. testoutput:: - :hide: - - ... - .. _ColocML: https://doi.org/10.1093/bioinformatics/btaa085 From 1de5c51ff0e919acafe262b41cf258f0f14d22bd Mon Sep 17 00:00:00 2001 From: tdrose Date: Mon, 5 Feb 2024 10:56:00 +0100 Subject: [PATCH 4/9] renaming of coloc_ml_processing, testing, and moving colocs to varp --- docs/examples.rst | 10 +++---- metaspace_converter/colocalization.py | 25 ++++++++++------- .../tests/colocalization_test.py | 28 ++++++++++--------- 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/docs/examples.rst b/docs/examples.rst index b686ebe..d07920e 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -134,13 +134,13 @@ can be executed with our package: adata = metaspace_to_anndata(dataset_id="2023-11-14_21h58m39s", fdr=0.1) # Perform median filtering and quantile thresholding - # The processed data is saved as a layer `adata.layers["colocml_preprocessing"]` + # The processed data is saved as a layer `adata.layers["coloc_ml_preprocessing"]` # It has the same dimensions as `adata.X` - colocalization.colocML_preprocessing(adata, layer="colocml_preprocessing") + colocalization.coloc_ml_preprocessing(adata, layer="colocml_preprocessing") - # Compute the pairwise colocalization metrix between all ion images - # As an input, the processed data from `adata.layers["colocml_preprocessing"]` is used - # The colocalization matrix is saved in `adata.uns["colocalization"]` + # Compute the pairwise colocalization matrix between all ion images + # As an input, the processed data from `adata.layers["coloc_ml_preprocessing"]` is used + # The colocalization matrix is saved in `adata.varp["colocalization"]` colocalization.colocalization(adata, layer="colocml_preprocessing") diff --git a/metaspace_converter/colocalization.py b/metaspace_converter/colocalization.py index 56a7c9a..606139f 100644 --- a/metaspace_converter/colocalization.py +++ b/metaspace_converter/colocalization.py @@ -8,9 +8,9 @@ from metaspace_converter.constants import COLOCALIZATION -def colocML_preprocessing( +def coloc_ml_preprocessing( adata: AnnData, - layer: Optional[str] = "colocml_preprocessing", + layer: Optional[str] = "coloc_ml_preprocessing", median_filter_size: Tuple[int, int] = (3, 3), quantile_threshold: float = 0.5, ): @@ -69,36 +69,41 @@ def colocML_preprocessing( adata.layers[layer] = imarray.transpose() -def colocalization(adata: AnnData, layer: Optional[str] = "colocml_preprocessing"): +def colocalization(adata: AnnData, layer: Optional[str] = "coloc_ml_preprocessing"): """ Colocalization of ion images using the cosine similarity metric. In combination with the ``colocML_preprocessing`` function, this metric performed best in the colocML publication (https://doi.org/10.1093/bioinformatics/btaa085). - It is recommended to call the the ``colocML_preprocessing`` function beforehand. + It is recommended to call the the ``coloc_ml_preprocessing`` function beforehand. Args: adata: An AnnData object. layer: Key for ``adata.layer`` from which the ionimage_data for preprocessing taken. - If ``None``, ``adata.X`` is used. ``colocML_preprocessing`` will save the preprocessed - data per default in ``adata.layer['colocml_preprocessing']``. + If ``None``, ``adata.X`` is used. ``coloc_ml_preprocessing`` will save the preprocessed + data per default in ``adata.layer['coloc_ml_preprocessing']``. Returns: - None. The processed data is saved in ``adata.uns['colocalization']``. + None. The processed data is saved in ``adata.varp['colocalization']``. + + Raises: + ValueError: If layer is not found in adata.layers. """ # Select data - if layer == None or layer not in adata.layers.keys(): + if layer is None: data = np.array(adata.X).transpose() - else: + elif layer in adata.layers.keys(): data = np.array(adata.layers[layer]).transpose() + else: + raise ValueError(f"Layer `{layer}` not found in adata.layers.") # Compute colocalization coloc = _pairwise_cosine_similarity(data) # Save colocalization - adata.uns[COLOCALIZATION] = coloc + adata.varp[COLOCALIZATION] = coloc def _pairwise_cosine_similarity(data: np.ndarray) -> np.ndarray: diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py index 408cb8a..238bd75 100644 --- a/metaspace_converter/tests/colocalization_test.py +++ b/metaspace_converter/tests/colocalization_test.py @@ -9,7 +9,7 @@ import _pytest.fixtures from metaspace_converter import anndata_to_image_array -from metaspace_converter.colocalization import colocalization, colocML_preprocessing +from metaspace_converter.colocalization import colocalization, coloc_ml_preprocessing from metaspace_converter.constants import COL, COLOCALIZATION, METASPACE_KEY, X, Y from metaspace_converter.to_anndata import all_image_pixel_coordinates @@ -46,26 +46,32 @@ def adata_dummy(request: "_pytest.fixtures.SubRequest") -> AnnData: ) def test_colocml_preprocessing(adata_dummy): - COLOCML_LAYER = "colocml_preprocessing" + COLOCML_LAYER = "coloc_ml_preprocessing" adata = adata_dummy if adata.X.shape[1] == 0: with pytest.raises(ValueError) as e_info: - colocML_preprocessing( + coloc_ml_preprocessing( adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER ) else: + # Test median filtering - colocML_preprocessing( + coloc_ml_preprocessing( adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER ) actual = anndata_to_image_array(adata) - # median filter + # Layer exists + assert COLOCML_LAYER in adata.layers.keys() + + # Layer sizes match + assert adata.X.shape == adata.layers[COLOCML_LAYER].shape + # median filter expected_preprocessing = np.median(actual[0][:3, :3]) observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0] assert observed == expected_preprocessing @@ -77,17 +83,13 @@ def test_colocml_preprocessing(adata_dummy): # Quantile thresholding adata.X[0].reshape(-1) - colocML_preprocessing( + coloc_ml_preprocessing( adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer=COLOCML_LAYER ) assert all(np.sum(adata.layers[COLOCML_LAYER] == 0, axis=0) <= 0.5 * adata.X.shape[0]) - # Layer exists - assert COLOCML_LAYER in adata.layers.keys() - - # Layer sizes match - assert adata.X.shape == adata.layers[COLOCML_LAYER].shape + def test_colocalization(): @@ -100,8 +102,8 @@ def test_colocalization(): colocalization(adata, layer=None) - assert COLOCALIZATION in adata.uns.keys() + assert COLOCALIZATION in adata.varp.keys() - coloc = adata.uns[COLOCALIZATION] + coloc = adata.varp[COLOCALIZATION] assert np.all(coloc == expected) From 7bc4dca4bc6dc06f35347aa712f642bbcbcb4465 Mon Sep 17 00:00:00 2001 From: tdrose Date: Mon, 5 Feb 2024 11:56:25 +0100 Subject: [PATCH 5/9] Changed test scenarios. --- metaspace_converter/tests/colocalization_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py index 238bd75..3296f2d 100644 --- a/metaspace_converter/tests/colocalization_test.py +++ b/metaspace_converter/tests/colocalization_test.py @@ -39,8 +39,8 @@ def adata_dummy(request: "_pytest.fixtures.SubRequest") -> AnnData: "adata_dummy", [ dict(num_ions=5, height=10, width=10), - dict(num_ions=0, height=2, width=3), - dict(num_ions=4, height=4, width=4), + dict(num_ions=0, height=10, width=10), + dict(num_ions=5, height=3, width=4), ], indirect=["adata_dummy"], ) @@ -81,7 +81,6 @@ def test_colocml_preprocessing(adata_dummy): assert observed == expected_preprocessing # Quantile thresholding - adata.X[0].reshape(-1) coloc_ml_preprocessing( adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer=COLOCML_LAYER From 94a220352c11aa4db08abf20e38059fcd0921129 Mon Sep 17 00:00:00 2001 From: tdrose Date: Mon, 5 Feb 2024 12:46:31 +0100 Subject: [PATCH 6/9] Testing median filter for all ions of one pixel. --- .../tests/colocalization_test.py | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py index 3296f2d..2b0de37 100644 --- a/metaspace_converter/tests/colocalization_test.py +++ b/metaspace_converter/tests/colocalization_test.py @@ -72,13 +72,22 @@ def test_colocml_preprocessing(adata_dummy): assert adata.X.shape == adata.layers[COLOCML_LAYER].shape # median filter - expected_preprocessing = np.median(actual[0][:3, :3]) - observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0] - assert observed == expected_preprocessing - - expected_preprocessing = np.median(actual[1][:3, :3]) - observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1] - assert observed == expected_preprocessing + # Check that preprocessing was done correctly. + # Probe a single pixel of the resulting array and compute the expected median filter. + x = 1 + y = 1 + width = adata.uns[METASPACE_KEY]["image_size"][X] + expected_pixel_features = np.median(actual[:, y - 1:y + 2, x - 1:x + 2], axis=[1, 2]) + actual_pixel_features = adata.layers[COLOCML_LAYER][y * width + x, :] + np.testing.assert_allclose(actual_pixel_features, expected_pixel_features) + + # expected_preprocessing = np.median(actual[0][:3, :3]) + # observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0] + # assert observed == expected_preprocessing + + # expected_preprocessing = np.median(actual[1][:3, :3]) + # observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1] + # assert observed == expected_preprocessing # Quantile thresholding From 7561423929c0ec7ae1b33a0e624ce059602d3bcc Mon Sep 17 00:00:00 2001 From: tdrose Date: Mon, 5 Feb 2024 13:23:54 +0100 Subject: [PATCH 7/9] Separated quantile thresholding and median filtering in test. --- .../tests/colocalization_test.py | 77 ++++++++++--------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/metaspace_converter/tests/colocalization_test.py b/metaspace_converter/tests/colocalization_test.py index 2b0de37..fca5fd5 100644 --- a/metaspace_converter/tests/colocalization_test.py +++ b/metaspace_converter/tests/colocalization_test.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Tuple import numpy as np import pandas as pd @@ -15,10 +15,11 @@ @pytest.fixture -def adata_dummy(request: "_pytest.fixtures.SubRequest") -> AnnData: +def adata_dummy(request: "_pytest.fixtures.SubRequest") -> Tuple[AnnData, float]: num_ions = request.param.get("num_ions", 0) height = request.param.get("height", 0) width = request.param.get("width", 0) + quantile_threshold = request.param.get("quantile_threshold", 0) shape = (num_ions, height, width) # Create a stack of ion images where every pixel has a different value ion_images_stack = np.arange(np.prod(shape)).reshape(shape) @@ -32,15 +33,16 @@ def adata_dummy(request: "_pytest.fixtures.SubRequest") -> AnnData: obs=obs, uns={METASPACE_KEY: {"image_size": {X: width, Y: height}}}, ) - return adata + return adata, quantile_threshold @pytest.mark.parametrize( "adata_dummy", [ - dict(num_ions=5, height=10, width=10), - dict(num_ions=0, height=10, width=10), - dict(num_ions=5, height=3, width=4), + dict(num_ions=5, height=10, width=10, quantile_threshold=0), + dict(num_ions=0, height=10, width=10, quantile_threshold=0), + dict(num_ions=5, height=3, width=4, quantile_threshold=0), + dict(num_ions=5, height=3, width=4, quantile_threshold=0.5) ], indirect=["adata_dummy"], ) @@ -48,54 +50,53 @@ def test_colocml_preprocessing(adata_dummy): COLOCML_LAYER = "coloc_ml_preprocessing" - adata = adata_dummy + adata, quantile_threshold = adata_dummy if adata.X.shape[1] == 0: with pytest.raises(ValueError) as e_info: coloc_ml_preprocessing( - adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER + adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, + layer=COLOCML_LAYER ) else: + # Quantile thresholding and median filtering is tested separately + if quantile_threshold == 0: # Test median filtering - coloc_ml_preprocessing( - adata, median_filter_size=(3, 3), quantile_threshold=0, layer=COLOCML_LAYER - ) - - actual = anndata_to_image_array(adata) - - # Layer exists - assert COLOCML_LAYER in adata.layers.keys() + coloc_ml_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, + layer=COLOCML_LAYER + ) - # Layer sizes match - assert adata.X.shape == adata.layers[COLOCML_LAYER].shape + actual = anndata_to_image_array(adata) - # median filter - # Check that preprocessing was done correctly. - # Probe a single pixel of the resulting array and compute the expected median filter. - x = 1 - y = 1 - width = adata.uns[METASPACE_KEY]["image_size"][X] - expected_pixel_features = np.median(actual[:, y - 1:y + 2, x - 1:x + 2], axis=[1, 2]) - actual_pixel_features = adata.layers[COLOCML_LAYER][y * width + x, :] - np.testing.assert_allclose(actual_pixel_features, expected_pixel_features) + # Layer exists + assert COLOCML_LAYER in adata.layers.keys() - # expected_preprocessing = np.median(actual[0][:3, :3]) - # observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 0] - # assert observed == expected_preprocessing + # Layer sizes match + assert adata.X.shape == adata.layers[COLOCML_LAYER].shape - # expected_preprocessing = np.median(actual[1][:3, :3]) - # observed = adata.layers[COLOCML_LAYER][adata.uns[METASPACE_KEY]["image_size"][X] + 1, 1] - # assert observed == expected_preprocessing + # median filter + # Probe a single pixel of the resulting array and compute the expected median filter. + x = 1 + y = 1 + width = adata.uns[METASPACE_KEY]["image_size"][X] + print(actual[:, y - 1:y + 2, x - 1:x + 2]) + expected_pixel_features = np.median(actual[:, y - 1:y + 2, x - 1:x + 2], axis=[1, 2]) + actual_pixel_features = adata.layers[COLOCML_LAYER][y * width + x, :] + np.testing.assert_allclose(actual_pixel_features, expected_pixel_features) # Quantile thresholding + else: + coloc_ml_preprocessing( + adata, median_filter_size=(3, 3), quantile_threshold=quantile_threshold, + layer=COLOCML_LAYER + ) + quantile_value = quantile_threshold * adata.X.shape[0] + actual_thresholded = np.sum(adata.layers[COLOCML_LAYER] == 0, axis=0) + assert all(actual_thresholded <= quantile_value) - coloc_ml_preprocessing( - adata, median_filter_size=(3, 3), quantile_threshold=0.5, layer=COLOCML_LAYER - ) - - assert all(np.sum(adata.layers[COLOCML_LAYER] == 0, axis=0) <= 0.5 * adata.X.shape[0]) From 178e56cf85c73e20af58940b3a9721038a46158b Mon Sep 17 00:00:00 2001 From: tdrose Date: Mon, 5 Feb 2024 13:25:42 +0100 Subject: [PATCH 8/9] Added layer argument to anndata_to_image_array function. --- metaspace_converter/anndata_to_array.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/metaspace_converter/anndata_to_array.py b/metaspace_converter/anndata_to_array.py index 61d98a2..2399895 100644 --- a/metaspace_converter/anndata_to_array.py +++ b/metaspace_converter/anndata_to_array.py @@ -1,3 +1,4 @@ +from typing import Optional import numpy as np from anndata import AnnData @@ -16,13 +17,15 @@ def _check_pixel_coordinates(adata: AnnData) -> bool: return np.all(np.equal(pixel_list, required_pixels)) -def anndata_to_image_array(adata: AnnData) -> np.ndarray: +def anndata_to_image_array(adata: AnnData, layer: Optional[str]=None) -> np.ndarray: """ Extracts an array of ion images from an AnnData object (that has been generated through the ``metaspace_to_anndata`` function). Args: adata: An AnnData object. + layer: ``AnnData.layer`` that should be extracted to an image array. + Default is None, which means that ``adata.X`` will be used. Returns: A three-dimensional Numpy array in the following shape @@ -36,8 +39,13 @@ def anndata_to_image_array(adata: AnnData) -> np.ndarray: E.g. Pixel have been removed/added and the number of pixels and their coordinates do not match the original image dimensions. """ - - pixel_array = adata.X.transpose().copy() + if layer is None: + pixel_array = np.array(adata.X).transpose().copy() + elif layer in adata.layers.keys(): + pixel_array = np.array(adata.layers[layer]).transpose().copy() + else: + raise ValueError(f"Layer `{layer}` not found in adata.layers.") + img_size = adata.uns[METASPACE_KEY]["image_size"] # Check if image dimensions are okay From 9d09565c03ac2569ac6ec94824b13bf9552071bf Mon Sep 17 00:00:00 2001 From: tdrose Date: Tue, 6 Feb 2024 10:02:43 +0100 Subject: [PATCH 9/9] Fixed typo --- metaspace_converter/colocalization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metaspace_converter/colocalization.py b/metaspace_converter/colocalization.py index 606139f..8230390 100644 --- a/metaspace_converter/colocalization.py +++ b/metaspace_converter/colocalization.py @@ -38,7 +38,7 @@ def coloc_ml_preprocessing( ion image and all pixels below the quantile threshold will be set to 0. Returns: - None. The processed data is saved in ``layer``. If layer is net to None, ``adata.X`` will + None. The processed data is saved in ``layer``. If layer is set to None, ``adata.X`` will be overwritten Raises: