Skip to content

Commit

Permalink
Merge pull request #93 from yutanagano/unified_standardisation
Browse files Browse the repository at this point in the history
Implement species-agnostic TR and MH standardisation
  • Loading branch information
yutanagano authored Jan 19, 2025
2 parents e5e14a3 + 7451bce commit 98fc971
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 30 deletions.
66 changes: 52 additions & 14 deletions src/tidytcells/mh/_standardize.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,13 @@ def standardize(
:type symbol:
str
:param species:
Species to which the MH gene belongs (see above for supported species).
Can be specified to standardise to a TR symbol that is known to be valid for that species (see above for supported species).
If set to ``"any"``, then first attempts standardisation for *Homo sapiens*, then *Mus musculus*.
Defaults to ``"homosapiens"``.
.. note::
From version 3, the default behaviour will change to ``"any"``.
:type species:
str
:param precision:
Expand Down Expand Up @@ -188,30 +193,63 @@ def standardize(

species = _utils.clean_and_lowercase(species)

species_is_supported = species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS
if not species_is_supported:
if log_failures:
_utils.warn_unsupported_species(species, "MH", logger)
return symbol
if species == "any":
best_attempt_invalid_reason = None
best_attempt_standardised_symbol = None
best_attempt_species = None

StandardizedMhSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species]
standardized_mh_symbol = StandardizedMhSymbolClass(symbol)
for species, StandardizedMhSymbolClass in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS.items():
standardized_tr_symbol = StandardizedMhSymbolClass(symbol)
invalid_reason = standardized_tr_symbol.get_reason_why_invalid()

if invalid_reason is None:
return standardized_tr_symbol.compile(precision)

if species == "homosapiens":
best_attempt_invalid_reason = invalid_reason
best_attempt_standardised_symbol = standardized_tr_symbol
best_attempt_species = species

invalid_reason = standardized_mh_symbol.get_reason_why_invalid()
if invalid_reason is not None:
if log_failures:
_utils.warn_failure(
reason_for_failure=invalid_reason,
reason_for_failure=best_attempt_invalid_reason,
original_input=symbol,
attempted_fix=standardized_mh_symbol.compile("allele"),
species=species,
attempted_fix=best_attempt_standardised_symbol.compile("allele"),
species=best_attempt_species,
logger=logger,
)

if on_fail == "reject":
return None

return symbol

return standardized_mh_symbol.compile(precision)
if species not in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS:
if log_failures:
_utils.warn_unsupported_species(species, "MH", logger)
return symbol

StandardizedMhSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species]
standardized_mh_symbol = StandardizedMhSymbolClass(symbol)

invalid_reason = standardized_mh_symbol.get_reason_why_invalid()

if invalid_reason is None:
return standardized_mh_symbol.compile(precision)

if log_failures:
_utils.warn_failure(
reason_for_failure=invalid_reason,
original_input=symbol,
attempted_fix=standardized_mh_symbol.compile("allele"),
species=species,
logger=logger,
)

if on_fail == "reject":
return None

return symbol


def standardise(*args, **kwargs):
Expand Down
64 changes: 50 additions & 14 deletions src/tidytcells/tr/_standardize.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,13 @@ def standardize(
:type symbol:
str
:param species:
Species to which the TR gene / allele belongs (see above for supported species).
Can be specified to standardise to a TR symbol that is known to be valid for that species (see above for supported species).
If set to ``"any"``, then first attempts standardisation for *Homo sapiens*, then *Mus musculus*.
Defaults to ``"homosapiens"``.
.. note::
From version 3, the default behaviour will change to ``"any"``.
:type species:
str
:param enforce_functional:
Expand Down Expand Up @@ -212,30 +217,61 @@ def standardize(

species = _utils.clean_and_lowercase(species)

species_is_supported = species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS
if not species_is_supported:
if log_failures:
_utils.warn_unsupported_species(species, "TR", logger)
return symbol
if species == "any":
best_attempt_invalid_reason = None
best_attempt_standardised_symbol = None
best_attempt_species = None

StandardizedTrSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species]
standardized_tr_symbol = StandardizedTrSymbolClass(symbol)
for species, StandardizedTrSymbolClass in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS.items():
standardized_tr_symbol = StandardizedTrSymbolClass(symbol)
invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional)

if invalid_reason is None:
return standardized_tr_symbol.compile(precision)

if species == "homosapiens":
best_attempt_invalid_reason = invalid_reason
best_attempt_standardised_symbol = standardized_tr_symbol
best_attempt_species = species

invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional)
if invalid_reason is not None:
if log_failures:
_utils.warn_failure(
reason_for_failure=invalid_reason,
reason_for_failure=best_attempt_invalid_reason,
original_input=symbol,
attempted_fix=standardized_tr_symbol.compile("allele"),
species=species,
attempted_fix=best_attempt_standardised_symbol.compile("allele"),
species=best_attempt_species,
logger=logger,
)
if on_fail == "reject":
return None
return symbol

return standardized_tr_symbol.compile(precision)
if not species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS:
if log_failures:
_utils.warn_unsupported_species(species, "TR", logger)
return symbol

StandardizedTrSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species]
standardized_tr_symbol = StandardizedTrSymbolClass(symbol)

invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional)

if invalid_reason is None:
return standardized_tr_symbol.compile(precision)

if log_failures:
_utils.warn_failure(
reason_for_failure=invalid_reason,
original_input=symbol,
attempted_fix=standardized_tr_symbol.compile("allele"),
species=species,
logger=logger,
)

if on_fail == "reject":
return None

return symbol


def standardise(*args, **kwargs):
Expand Down
14 changes: 13 additions & 1 deletion tests/test_mh.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,21 @@ def test_bad_type(self, symbol):

def test_default_homosapiens(self):
result = mh.standardize("HLA-B*07")

assert result == "HLA-B*07"

@pytest.mark.parametrize(
("symbol", "expected"),
(
("HLA-B8", "HLA-B*08"),
("A1", "HLA-A*01"),
("H-2Eb1", "MH2-EB1"),
("H-2Aa", "MH2-AA")
)
)
def test_any_species(self, symbol, expected):
result = mh.standardize(symbol, species="any")
assert result == expected

@pytest.mark.parametrize(
("symbol", "expected", "precision"),
(
Expand Down
14 changes: 13 additions & 1 deletion tests/test_tr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,21 @@ def test_bad_type(self, symbol):

def test_default_homosapiens(self):
result = tr.standardize("TRBV20/OR9-2*01")

assert result == "TRBV20/OR9-2*01"

@pytest.mark.parametrize(
("symbol", "expected"),
(
("TRBV20/OR9-2*01", "TRBV20/OR9-2*01"),
("TCRAV14/4", "TRAV14/DV4"),
("TRAV15-1-DV6-1", "TRAV15-1/DV6-1"),
("TRAV15/DV6", "TRAV15-1/DV6-1"),
)
)
def test_any_species(self, symbol, expected):
result = tr.standardize(symbol, species="any")
assert result == expected

@pytest.mark.parametrize(
("symbol", "expected"),
(("TRAV3*01 ", "TRAV3*01"), (" TRAV3 * 01 ", "TRAV3*01")),
Expand Down

0 comments on commit 98fc971

Please sign in to comment.