Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement species-agnostic TR and MH standardisation #93

Merged
merged 2 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 52 additions & 14 deletions src/tidytcells/mh/_standardize.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,13 @@ def standardize(
:type symbol:
str
:param species:
Species to which the MH gene belongs (see above for supported species).
Can be specified to standardise to a TR symbol that is known to be valid for that species (see above for supported species).
If set to ``"any"``, then first attempts standardisation for *Homo sapiens*, then *Mus musculus*.
Defaults to ``"homosapiens"``.

.. note::
From version 3, the default behaviour will change to ``"any"``.

:type species:
str
:param precision:
Expand Down Expand Up @@ -188,30 +193,63 @@ def standardize(

species = _utils.clean_and_lowercase(species)

species_is_supported = species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS
if not species_is_supported:
if log_failures:
_utils.warn_unsupported_species(species, "MH", logger)
return symbol
if species == "any":
best_attempt_invalid_reason = None
best_attempt_standardised_symbol = None
best_attempt_species = None

StandardizedMhSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species]
standardized_mh_symbol = StandardizedMhSymbolClass(symbol)
for species, StandardizedMhSymbolClass in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS.items():
standardized_tr_symbol = StandardizedMhSymbolClass(symbol)
invalid_reason = standardized_tr_symbol.get_reason_why_invalid()

if invalid_reason is None:
return standardized_tr_symbol.compile(precision)

if species == "homosapiens":
best_attempt_invalid_reason = invalid_reason
best_attempt_standardised_symbol = standardized_tr_symbol
best_attempt_species = species

invalid_reason = standardized_mh_symbol.get_reason_why_invalid()
if invalid_reason is not None:
if log_failures:
_utils.warn_failure(
reason_for_failure=invalid_reason,
reason_for_failure=best_attempt_invalid_reason,
original_input=symbol,
attempted_fix=standardized_mh_symbol.compile("allele"),
species=species,
attempted_fix=best_attempt_standardised_symbol.compile("allele"),
species=best_attempt_species,
logger=logger,
)

if on_fail == "reject":
return None

return symbol

return standardized_mh_symbol.compile(precision)
if species not in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS:
if log_failures:
_utils.warn_unsupported_species(species, "MH", logger)
return symbol

StandardizedMhSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species]
standardized_mh_symbol = StandardizedMhSymbolClass(symbol)

invalid_reason = standardized_mh_symbol.get_reason_why_invalid()

if invalid_reason is None:
return standardized_mh_symbol.compile(precision)

if log_failures:
_utils.warn_failure(
reason_for_failure=invalid_reason,
original_input=symbol,
attempted_fix=standardized_mh_symbol.compile("allele"),
species=species,
logger=logger,
)

if on_fail == "reject":
return None

return symbol


def standardise(*args, **kwargs):
Expand Down
64 changes: 50 additions & 14 deletions src/tidytcells/tr/_standardize.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,13 @@ def standardize(
:type symbol:
str
:param species:
Species to which the TR gene / allele belongs (see above for supported species).
Can be specified to standardise to a TR symbol that is known to be valid for that species (see above for supported species).
If set to ``"any"``, then first attempts standardisation for *Homo sapiens*, then *Mus musculus*.
Defaults to ``"homosapiens"``.

.. note::
From version 3, the default behaviour will change to ``"any"``.

:type species:
str
:param enforce_functional:
Expand Down Expand Up @@ -212,30 +217,61 @@ def standardize(

species = _utils.clean_and_lowercase(species)

species_is_supported = species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS
if not species_is_supported:
if log_failures:
_utils.warn_unsupported_species(species, "TR", logger)
return symbol
if species == "any":
best_attempt_invalid_reason = None
best_attempt_standardised_symbol = None
best_attempt_species = None

StandardizedTrSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species]
standardized_tr_symbol = StandardizedTrSymbolClass(symbol)
for species, StandardizedTrSymbolClass in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS.items():
standardized_tr_symbol = StandardizedTrSymbolClass(symbol)
invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional)

if invalid_reason is None:
return standardized_tr_symbol.compile(precision)

if species == "homosapiens":
best_attempt_invalid_reason = invalid_reason
best_attempt_standardised_symbol = standardized_tr_symbol
best_attempt_species = species

invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional)
if invalid_reason is not None:
if log_failures:
_utils.warn_failure(
reason_for_failure=invalid_reason,
reason_for_failure=best_attempt_invalid_reason,
original_input=symbol,
attempted_fix=standardized_tr_symbol.compile("allele"),
species=species,
attempted_fix=best_attempt_standardised_symbol.compile("allele"),
species=best_attempt_species,
logger=logger,
)
if on_fail == "reject":
return None
return symbol

return standardized_tr_symbol.compile(precision)
if not species in SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS:
if log_failures:
_utils.warn_unsupported_species(species, "TR", logger)
return symbol

StandardizedTrSymbolClass = SUPPORTED_SPECIES_AND_THEIR_STANDARDIZERS[species]
standardized_tr_symbol = StandardizedTrSymbolClass(symbol)

invalid_reason = standardized_tr_symbol.get_reason_why_invalid(enforce_functional)

if invalid_reason is None:
return standardized_tr_symbol.compile(precision)

if log_failures:
_utils.warn_failure(
reason_for_failure=invalid_reason,
original_input=symbol,
attempted_fix=standardized_tr_symbol.compile("allele"),
species=species,
logger=logger,
)

if on_fail == "reject":
return None

return symbol


def standardise(*args, **kwargs):
Expand Down
14 changes: 13 additions & 1 deletion tests/test_mh.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,21 @@ def test_bad_type(self, symbol):

def test_default_homosapiens(self):
result = mh.standardize("HLA-B*07")

assert result == "HLA-B*07"

@pytest.mark.parametrize(
("symbol", "expected"),
(
("HLA-B8", "HLA-B*08"),
("A1", "HLA-A*01"),
("H-2Eb1", "MH2-EB1"),
("H-2Aa", "MH2-AA")
)
)
def test_any_species(self, symbol, expected):
result = mh.standardize(symbol, species="any")
assert result == expected

@pytest.mark.parametrize(
("symbol", "expected", "precision"),
(
Expand Down
14 changes: 13 additions & 1 deletion tests/test_tr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,21 @@ def test_bad_type(self, symbol):

def test_default_homosapiens(self):
result = tr.standardize("TRBV20/OR9-2*01")

assert result == "TRBV20/OR9-2*01"

@pytest.mark.parametrize(
("symbol", "expected"),
(
("TRBV20/OR9-2*01", "TRBV20/OR9-2*01"),
("TCRAV14/4", "TRAV14/DV4"),
("TRAV15-1-DV6-1", "TRAV15-1/DV6-1"),
("TRAV15/DV6", "TRAV15-1/DV6-1"),
)
)
def test_any_species(self, symbol, expected):
result = tr.standardize(symbol, species="any")
assert result == expected

@pytest.mark.parametrize(
("symbol", "expected"),
(("TRAV3*01 ", "TRAV3*01"), (" TRAV3 * 01 ", "TRAV3*01")),
Expand Down
Loading