Skip to content

Commit

Permalink
feat: differential tissue position row/col max sizes for visium and v…
Browse files Browse the repository at this point in the history
…isium 11 (#1143)

Co-authored-by: Evan Molinelli <[email protected]>
  • Loading branch information
ejmolinelli and Evan Molinelli authored Dec 6, 2024
1 parent 3d2fa6d commit b517b86
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 25 deletions.
29 changes: 26 additions & 3 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import re
from datetime import datetime
from typing import Dict, List, Mapping, Optional, Union
from typing import Dict, List, Mapping, Optional, Tuple, Union

import anndata
import matplotlib.colors as mcolors
Expand All @@ -29,6 +29,10 @@

VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 14336
VISIUM_TISSUE_POSITION_MAX_ROW = 77
VISIUM_TISSUE_POSITION_MAX_COL = 127
VISIUM_11MM_TISSUE_POSITION_MAX_ROW = 127
VISIUM_11MM_TISSUE_POSITION_MAX_COL = 223
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM = 4000

Expand Down Expand Up @@ -57,6 +61,7 @@ def __init__(self, ignore_labels=False):
self._visium_and_is_single_true_matrix_size = None
self._hires_max_dimension_size = None
self._visium_error_suffix = None
self._visium_tissue_position_max = None

# Values will be instances of gencode.GeneChecker,
# keys will be one of gencode.SupportedOrganisms
Expand Down Expand Up @@ -122,6 +127,24 @@ def hires_max_dimension_size(self) -> Optional[int]:
self._hires_max_dimension_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
return self._hires_max_dimension_size

@property
def tissue_position_maxes(self) -> Tuple[int, int]:
if self._visium_tissue_position_max is None and self._is_visium_and_is_single_true:
# visium 11 has different requirements than other visium
if (
self.adata.obs["assay_ontology_term_id"]
.apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
.astype(bool)
.any()
):
self._visium_tissue_position_max = (
VISIUM_11MM_TISSUE_POSITION_MAX_ROW,
VISIUM_11MM_TISSUE_POSITION_MAX_COL,
)
else:
self._visium_tissue_position_max = (VISIUM_TISSUE_POSITION_MAX_ROW, VISIUM_TISSUE_POSITION_MAX_COL)
return self._visium_tissue_position_max

def _is_single(self) -> bool | None:
"""
Determine value of uns.spatial.is_single. None if non-spatial.
Expand Down Expand Up @@ -1732,8 +1755,8 @@ def _validate_spatial_tissue_positions(self):
:rtype none
"""
self._validate_spatial_tissue_position("array_col", 0, 127)
self._validate_spatial_tissue_position("array_row", 0, 77)
self._validate_spatial_tissue_position("array_col", 0, self.tissue_position_maxes[1])
self._validate_spatial_tissue_position("array_row", 0, self.tissue_position_maxes[0])
self._validate_spatial_tissue_position("in_tissue", 0, 1)

def _check_spatial_uns(self):
Expand Down
65 changes: 43 additions & 22 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import hashlib
import os
import re
import tempfile
from typing import Union
from unittest import mock
Expand Down Expand Up @@ -1011,21 +1012,32 @@ def test__validate_tissue_position_required(self, tissue_position_name):
validator.adata = adata_visium.copy()
validator.adata.obs.pop(tissue_position_name)

# check visium
validator.adata.obs["assay_ontology_term_id"] = "EFO:0010961"
validator._check_spatial_obs()
assert validator.errors
assert (
f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}." in validator.errors[0]
)
validator.reset()

# check visium descendant
validator.adata.obs["assay_ontology_term_id"] = "EFO:0022860"
validator._check_spatial_obs()
assert validator.errors
assert (
f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}." in validator.errors[0]
)
validator.reset()

@pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0030062"])
@pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0030062", "EFO:0022860"])
def test__validate_tissue_position_not_required(self, assay_ontology_term_id):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_slide_seqv2.copy()
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.uns["spatial"]["is_single"] = False
validator.adata.uns["spatial"]["is_single"] = False # setting to false removes the requirement
validator.adata.obs["is_primary_data"] = False

validator._check_spatial_obs()
assert not validator.errors

Expand All @@ -1041,43 +1053,52 @@ def test__validate_tissue_position_int_error(self, tissue_position_name):
assert validator.errors
assert f"obs['{tissue_position_name}'] must be of int type" in validator.errors[0]

@pytest.mark.parametrize(
"tissue_position_name, min, error_message_token",
[
("array_col", 0, "between 0 and 127"),
("array_row", 0, "between 0 and 77"),
("in_tissue", 0, "0 or 1"),
],
)
def test__validate_tissue_position_int_min_error(self, tissue_position_name, min, error_message_token):
@pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0022860", "EFO:0022859"])
@pytest.mark.parametrize("tissue_position_name, min", [("array_col", 0), ("array_row", 0), ("in_tissue", 0)])
def test__validate_tissue_position_int_min_error(self, assay_ontology_term_id, tissue_position_name, min):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.obs[tissue_position_name] = min - 1

# Confirm tissue_position is identified as invalid.
validator._check_spatial_obs()
assert validator.errors
assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]
assert (
re.match(f"^obs\['{tissue_position_name}'\] must be (between )?{min} (and|or) [0-9]+", validator.errors[0])
is not None
)

@pytest.mark.parametrize(
"tissue_position_name, max, error_message_token",
"assay_ontology_term_id, tissue_position_name, tissue_position_max",
[
("array_col", 127, "between 0 and 127"),
("array_row", 77, "between 0 and 77"),
("in_tissue", 1, "0 or 1"),
("EFO:0010961", "array_col", 127),
("EFO:0010961", "array_row", 77),
("EFO:0022860", "array_col", 223),
("EFO:0022860", "array_row", 127),
("EFO:0022859", "array_col", 127),
("EFO:0022859", "array_row", 77),
("EFO:0022859", "in_tissue", 1),
],
)
def test__validate_tissue_position_int_max_error(self, tissue_position_name, max, error_message_token):
def test__validate_tissue_position_int_max_error(
self, assay_ontology_term_id, tissue_position_name, tissue_position_max
):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs[tissue_position_name] = max + 1
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.obs[tissue_position_name] = tissue_position_max + 1

# Confirm tissue_position is identified as invalid.
validator._check_spatial_obs()
assert validator.errors
assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]
assert (
re.match(
f"^obs\['{tissue_position_name}'\] must be (between )?[0-9]+ (and|or) {tissue_position_max}",
validator.errors[0],
)
is not None
)

@pytest.mark.parametrize(
"cell_type_ontology_term_id, in_tissue, assay_ontology_term_id",
Expand Down

0 comments on commit b517b86

Please sign in to comment.