From b506926b2fdeafee939ebc5e44cb7ab6152113c3 Mon Sep 17 00:00:00 2001 From: Jeremy Arbesfeld <50678786+jarbesfeld@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:53:04 -0500 Subject: [PATCH] fix: Add gene symbol check in translator methods (#214) --- src/fusor/translator.py | 45 +++++++++++++++++++++++++++++++++++++++ tests/test_translators.py | 9 ++++++++ 2 files changed, 54 insertions(+) diff --git a/src/fusor/translator.py b/src/fusor/translator.py index ca13eb6..faf9cb6 100644 --- a/src/fusor/translator.py +++ b/src/fusor/translator.py @@ -141,6 +141,23 @@ def _get_gene_element(self, genes: str, caller: Caller) -> GeneElement: ge = self.fusor.gene_element(gene=gene) return ge[0] if ge[0] else self._get_gene_element_unnormalized(gene) + def _are_fusion_partners_different( + self, gene_5prime: str, gene_3prime: str + ) -> bool: + """Check if the normalized gene symbols for the two fusion partners + are different. If not, this event is not a fusion + + :param gene_5prime: The 5' gene partner + :param gene_3prime: The 3' gene partner + :return ``True`` if the gene symbols are different, ``False`` if not + """ + if gene_5prime != gene_3prime: + return True + _logger.error( + "The supplied fusion is not valid as the two fusion partners are the same" + ) + return False + def _get_genomic_ac(self, chrom: str, build: Assembly) -> str: """Return a RefSeq genomic accession given a chromosome and a reference build @@ -190,6 +207,9 @@ async def from_jaffa( gene_5prime = gene_5prime_element.gene.label gene_3prime = gene_3prime_element.gene.label + if not self._are_fusion_partners_different(gene_5prime, gene_3prime): + return None + tr_5prime = await self.fusor.transcript_segment_element( tx_to_genomic_coords=False, genomic_ac=self._get_genomic_ac(chrom1, rb), @@ -244,6 +264,9 @@ async def from_star_fusion( gene_5prime = gene_5prime_element.gene.label gene_3prime = gene_3prime_element.gene.label + if not self._are_fusion_partners_different(gene_5prime, gene_3prime): + return None + five_prime = left_breakpoint.split(":") three_prime = right_breakpoint.split(":") @@ -296,6 +319,10 @@ async def from_fusion_catcher( gene_3prime_element = self._get_gene_element( three_prime_partner, Caller.FUSION_CATCHER ) + if not self._are_fusion_partners_different( + gene_5prime_element.gene.label, gene_3prime_element.gene.label + ): + return None five_prime = five_prime_fusion_point.split(":") three_prime = three_prime_fusion_point.split(":") @@ -335,6 +362,9 @@ async def from_fusion_map( gene_5prime = self._get_gene_element(gene1, "fusion_map").gene.label gene_3prime = self._get_gene_element(gene2, "fusion_map").gene.label + if not self._are_fusion_partners_different(gene_5prime, gene_3prime): + return None + tr_5prime = await self.fusor.transcript_segment_element( tx_to_genomic_coords=False, genomic_ac=self._get_genomic_ac( @@ -414,6 +444,9 @@ async def from_arriba( gene_5prime = gene_5prime_element.gene.label gene_3prime = gene_3prime_element.gene.label + if not self._are_fusion_partners_different(gene_5prime, gene_3prime): + return None + strand1 = strand1.split("/")[1] # Determine strand that is transcribed strand2 = strand2.split("/")[1] # Determine strand that is transcribed if strand1 == "+": @@ -509,6 +542,9 @@ async def from_cicero( gene_5prime = gene_5prime_element.gene.label gene_3prime = gene_3prime_element.gene.label + if not self._are_fusion_partners_different(gene_5prime, gene_3prime): + return None + tr_5prime = await self.fusor.transcript_segment_element( tx_to_genomic_coords=False, genomic_ac=self._get_genomic_ac(chr_5prime, rb), @@ -559,6 +595,9 @@ async def from_mapsplice( gene_5prime = gene_5prime_element.gene.label gene_3prime = gene_3prime_element.gene.label + if not self._are_fusion_partners_different(gene_5prime, gene_3prime): + return None + tr_5prime = await self.fusor.transcript_segment_element( tx_to_genomic_coords=False, genomic_ac=self._get_genomic_ac(mapsplice_row[0].split("~")[0], rb), @@ -606,6 +645,9 @@ async def from_enfusion( gene_5prime = gene_5prime_element.gene.label gene_3prime = gene_3prime_element.gene.label + if not self._are_fusion_partners_different(gene_5prime, gene_3prime): + return None + tr_5prime = await self.fusor.transcript_segment_element( tx_to_genomic_coords=False, genomic_ac=self._get_genomic_ac(chr_5prime, rb), @@ -660,6 +702,9 @@ async def from_genie( gene_5prime = gene_5prime_element.gene.label gene_3prime = gene_3prime_element.gene.label + if not self._are_fusion_partners_different(gene_5prime, gene_3prime): + return None + tr_5prime = await self.fusor.transcript_segment_element( tx_to_genomic_coords=False, genomic_ac=self._get_genomic_ac(site1_chrom, rb), diff --git a/tests/test_translators.py b/tests/test_translators.py index 5eb8c0f..98dfd91 100644 --- a/tests/test_translators.py +++ b/tests/test_translators.py @@ -115,6 +115,15 @@ def test_gene_element_arriba(translator_instance): assert gene.gene.label == "MIR3672" +def test_valid_fusion_partners(translator_instance): + """Test that the fusion partners supplied to the translator are different""" + partners_check = translator_instance._are_fusion_partners_different("BCR", "ABL1") + assert partners_check + + partners_check = translator_instance._are_fusion_partners_different("BCR", "BCR") + assert not partners_check + + @pytest.mark.asyncio() async def test_jaffa( fusion_data_example, fusion_data_example_nonexonic, translator_instance