diff --git a/src/pheval/prepare/prepare_corpus.py b/src/pheval/prepare/prepare_corpus.py index d749d0dd0..df0beda29 100644 --- a/src/pheval/prepare/prepare_corpus.py +++ b/src/pheval/prepare/prepare_corpus.py @@ -57,6 +57,11 @@ def prepare_corpus( f"Removed {phenopacket_path.name} from the corpus due to missing variant fields." ) continue + elif phenopacket_util.check_variant_alleles(): + info_log.warning( + f"Removed {phenopacket_path.name} from the corpus due to identical " + "reference and alternate allele fields." + ) if gene_analysis: if phenopacket_util.check_incomplete_gene_record(): info_log.warning( diff --git a/src/pheval/utils/phenopacket_utils.py b/src/pheval/utils/phenopacket_utils.py index bd4097053..1727472af 100644 --- a/src/pheval/utils/phenopacket_utils.py +++ b/src/pheval/utils/phenopacket_utils.py @@ -503,6 +503,19 @@ def check_incomplete_variant_record(self) -> bool: return True return False + def check_variant_alleles(self) -> bool: + """ + Check if any variant record in the phenopacket has identical reference and alternate alleles. + + Returns: + bool: True if the reference and alternate alleles are identical, False otherwise. + """ + variants = self.diagnosed_variants() + for variant in variants: + if variant.ref == variant.alt: + return True + return False + def check_incomplete_gene_record(self) -> bool: """ Check if any gene record in the phenopacket has incomplete information. diff --git a/tests/test_phenopacket_utils.py b/tests/test_phenopacket_utils.py index 492496a87..c720cdb9c 100644 --- a/tests/test_phenopacket_utils.py +++ b/tests/test_phenopacket_utils.py @@ -1,4 +1,5 @@ import unittest +from copy import deepcopy from pathlib import Path from phenopackets import ( @@ -606,6 +607,16 @@ def test_check_incomplete_disease_record(self): def test_check_incomplete_disease_record_missing_records(self): self.assertTrue(self.structural_variant_phenopacket.check_incomplete_disease_record()) + def test_check_variant_alleles(self): + self.assertFalse(self.phenopacket.check_variant_alleles()) + + def test_check_variant_alleles_duplicate(self): + phenopacket_copy = deepcopy(self.phenopacket) + phenopacket_copy.phenopacket_contents.interpretations[0].diagnosis.genomic_interpretations[ + 0 + ].variant_interpretation.variation_descriptor.vcf_record.alt = "C" + self.assertTrue(phenopacket_copy.check_variant_alleles()) + class TestPhenopacketRebuilder(unittest.TestCase): @classmethod