From 7a3af400ce093ee13370d9614cddbd8b1428e773 Mon Sep 17 00:00:00 2001 From: Yuta Nagano <52748151+yutanagano@users.noreply.github.com> Date: Sun, 19 Jan 2025 16:36:17 +0000 Subject: [PATCH 1/2] implement species-agnostic TR standardisation --- src/tidytcells/tr/_standardize.py | 64 ++++++++++++++++++++++++------- tests/test_tr.py | 14 ++++++- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/tidytcells/tr/_standardize.py b/src/tidytcells/tr/_standardize.py index 9452448..97255a5 100644 --- a/src/tidytcells/tr/_standardize.py +++ b/src/tidytcells/tr/_standardize.py @@ -41,8 +41,13 @@ def standardize( :type symbol: str :param species: - Species to which the TR gene / allele belongs (see above for supported species). + Can be specified to standardise to a TR symbol that is known to be valid for that species (see above for supported species). + If set to ``"any"``, then first attempts standardisation for *Homo sapiens*, then *Mus musculus*. Defaults to ``"homosapiens"``. + + .. note:: + From version 3, the default behaviour will change to ``"any"``. + :type species: str :param enforce_functional: @@ -212,30 +217,61 @@ def standardize( species = _utils.clean_and_lowercase(species) - species_is_supported = species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS - if not species_is_supported: - if log_failures: - _utils.warn_unsupported_species(species, "TR", logger) - return symbol + if species == "any": + best_attempt_invalid_reason = None + best_attempt_standardised_symbol = None + best_attempt_species = None - StandardizedTrSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species] - standardized_tr_symbol = StandardizedTrSymbolClass(symbol) + for species, StandardizedTrSymbolClass in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS.items(): + standardized_tr_symbol = StandardizedTrSymbolClass(symbol) + invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional) + + if invalid_reason is None: + return standardized_tr_symbol.compile(precision) + + if species == "homosapiens": + best_attempt_invalid_reason = invalid_reason + best_attempt_standardised_symbol = standardized_tr_symbol + best_attempt_species = species - invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional) - if invalid_reason is not None: if log_failures: _utils.warn_failure( - reason_for_failure=invalid_reason, + reason_for_failure=best_attempt_invalid_reason, original_input=symbol, - attempted_fix=standardized_tr_symbol.compile("allele"), - species=species, + attempted_fix=best_attempt_standardised_symbol.compile("allele"), + species=best_attempt_species, logger=logger, ) if on_fail == "reject": return None return symbol - return standardized_tr_symbol.compile(precision) + if not species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS: + if log_failures: + _utils.warn_unsupported_species(species, "TR", logger) + return symbol + + StandardizedTrSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species] + standardized_tr_symbol = StandardizedTrSymbolClass(symbol) + + invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional) + + if invalid_reason is None: + return standardized_tr_symbol.compile(precision) + + if log_failures: + _utils.warn_failure( + reason_for_failure=invalid_reason, + original_input=symbol, + attempted_fix=standardized_tr_symbol.compile("allele"), + species=species, + logger=logger, + ) + + if on_fail == "reject": + return None + + return symbol def standardise(*args, **kwargs): diff --git a/tests/test_tr.py b/tests/test_tr.py index 204e61b..f07b53d 100644 --- a/tests/test_tr.py +++ b/tests/test_tr.py @@ -17,9 +17,21 @@ def test_bad_type(self, symbol): def test_default_homosapiens(self): result = tr.standardize("TRBV20/OR9-2*01") - assert result == "TRBV20/OR9-2*01" + @pytest.mark.parametrize( + ("symbol", "expected"), + ( + ("TRBV20/OR9-2*01", "TRBV20/OR9-2*01"), + ("TCRAV14/4", "TRAV14/DV4"), + ("TRAV15-1-DV6-1", "TRAV15-1/DV6-1"), + ("TRAV15/DV6", "TRAV15-1/DV6-1"), + ) + ) + def test_any_species(self, symbol, expected): + result = tr.standardize(symbol, species="any") + assert result == expected + @pytest.mark.parametrize( ("symbol", "expected"), (("TRAV3*01 ", "TRAV3*01"), (" TRAV3 * 01 ", "TRAV3*01")), From 7451bcef267263dc844fc781041fd270274ec5f1 Mon Sep 17 00:00:00 2001 From: Yuta Nagano <52748151+yutanagano@users.noreply.github.com> Date: Sun, 19 Jan 2025 16:44:02 +0000 Subject: [PATCH 2/2] implement species-agnostic MH standardisation --- src/tidytcells/mh/_standardize.py | 66 ++++++++++++++++++++++++------- tests/test_mh.py | 14 ++++++- 2 files changed, 65 insertions(+), 15 deletions(-) diff --git a/src/tidytcells/mh/_standardize.py b/src/tidytcells/mh/_standardize.py index ca5219f..70588da 100644 --- a/src/tidytcells/mh/_standardize.py +++ b/src/tidytcells/mh/_standardize.py @@ -45,8 +45,13 @@ def standardize( :type symbol: str :param species: - Species to which the MH gene belongs (see above for supported species). + Can be specified to standardise to a TR symbol that is known to be valid for that species (see above for supported species). + If set to ``"any"``, then first attempts standardisation for *Homo sapiens*, then *Mus musculus*. Defaults to ``"homosapiens"``. + + .. note:: + From version 3, the default behaviour will change to ``"any"``. + :type species: str :param precision: @@ -188,30 +193,63 @@ def standardize( species = _utils.clean_and_lowercase(species) - species_is_supported = species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS - if not species_is_supported: - if log_failures: - _utils.warn_unsupported_species(species, "MH", logger) - return symbol + if species == "any": + best_attempt_invalid_reason = None + best_attempt_standardised_symbol = None + best_attempt_species = None - StandardizedMhSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species] - standardized_mh_symbol = StandardizedMhSymbolClass(symbol) + for species, StandardizedMhSymbolClass in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS.items(): + standardized_tr_symbol = StandardizedMhSymbolClass(symbol) + invalid_reason = standardized_tr_symbol.get_reason_why_invalid() + + if invalid_reason is None: + return standardized_tr_symbol.compile(precision) + + if species == "homosapiens": + best_attempt_invalid_reason = invalid_reason + best_attempt_standardised_symbol = standardized_tr_symbol + best_attempt_species = species - invalid_reason = standardized_mh_symbol.get_reason_why_invalid() - if invalid_reason is not None: if log_failures: _utils.warn_failure( - reason_for_failure=invalid_reason, + reason_for_failure=best_attempt_invalid_reason, original_input=symbol, - attempted_fix=standardized_mh_symbol.compile("allele"), - species=species, + attempted_fix=best_attempt_standardised_symbol.compile("allele"), + species=best_attempt_species, logger=logger, ) + if on_fail == "reject": return None + return symbol - return standardized_mh_symbol.compile(precision) + if species not in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS: + if log_failures: + _utils.warn_unsupported_species(species, "MH", logger) + return symbol + + StandardizedMhSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species] + standardized_mh_symbol = StandardizedMhSymbolClass(symbol) + + invalid_reason = standardized_mh_symbol.get_reason_why_invalid() + + if invalid_reason is None: + return standardized_mh_symbol.compile(precision) + + if log_failures: + _utils.warn_failure( + reason_for_failure=invalid_reason, + original_input=symbol, + attempted_fix=standardized_mh_symbol.compile("allele"), + species=species, + logger=logger, + ) + + if on_fail == "reject": + return None + + return symbol def standardise(*args, **kwargs): diff --git a/tests/test_mh.py b/tests/test_mh.py index ea043be..52665c1 100644 --- a/tests/test_mh.py +++ b/tests/test_mh.py @@ -17,9 +17,21 @@ def test_bad_type(self, symbol): def test_default_homosapiens(self): result = mh.standardize("HLA-B*07") - assert result == "HLA-B*07" + @pytest.mark.parametrize( + ("symbol", "expected"), + ( + ("HLA-B8", "HLA-B*08"), + ("A1", "HLA-A*01"), + ("H-2Eb1", "MH2-EB1"), + ("H-2Aa", "MH2-AA") + ) + ) + def test_any_species(self, symbol, expected): + result = mh.standardize(symbol, species="any") + assert result == expected + @pytest.mark.parametrize( ("symbol", "expected", "precision"), (