Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: change arriba fusion detection algorithm input parameters #204

Merged
merged 3 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 48 additions & 13 deletions src/fusor/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,53 +374,88 @@ async def from_fusion_map(
)

async def from_arriba(
self, arriba_row: pl.DataFrame, rb: Assembly
self,
gene1: str,
gene2: str,
strand1: str,
strand2: str,
breakpoint1: str,
breakpoint2: str,
event: str,
confidence: str,
direction1: str,
direction2: str,
rf: str,
rb: Assembly,
) -> AssayedFusion:
"""Parse Arriba output to create AssayedFusion object
:param arriba_row: A row of Arriba output
:param gene1: The 5' gene fusion partner
:param gene2: The 3' gene fusion partner
:param strand1: The strand information for the 5' gene fusion partner
:param strand2: The strand information for the 3' gene fusion partner
:param breakpoint1: The chromosome and breakpoint for gene1
:param breakpoint2: The chromosome and breakpoint for gene2
:param event: An inference about the type of fusion event
:param confidence: A metric describing the confidence of the fusion prediction
:param direction1: A description that indicates if the transcript segment
starts or ends at breakpoint1
:param direction2: A description that indicates if the transcript segment
starts or ends at breakpoint2
:param rf: A description if the reading frame is preserved for the fusion
:param rb: The reference build used to call the fusion
:return: An AssayedFusion object, if construction is successful
"""
gene1 = arriba_row.get_column("#gene1").item()
gene2 = arriba_row.get_column("gene2").item()

# Arriba reports two gene symbols if a breakpoint occurs in an intergenic
# space. We select the gene symbol with the smallest distance from the
# breakpoint.
gene_5prime = self._get_gene_element(gene1, "arriba")[0].gene.label
gene_3prime = self._get_gene_element(gene2, "arriba")[0].gene.label

breakpoint1 = arriba_row.get_column("breakpoint1").item().split(":")
breakpoint2 = arriba_row.get_column("breakpoint2").item().split(":")
strand1 = strand1.split("/")[1] # Determine strand that is transcribed
strand2 = strand2.split("/")[1] # Determine strand that is transcribed
if strand1 == "-":
gene1_seg_start = direction1 == "upstream"
else:
gene1_seg_start = direction1 == "downstream"
if strand2 == "-":
gene2_seg_start = direction2 == "upstream"
else:
gene2_seg_start = direction2 == "downstream"

breakpoint1 = breakpoint1.split(":")
breakpoint2 = breakpoint2.split(":")

tr_5prime = await self.fusor.transcript_segment_element(
tx_to_genomic_coords=False,
genomic_ac=self._get_genomic_ac(breakpoint1[0], rb),
seg_end_genomic=int(breakpoint1[1]),
seg_start_genomic=int(breakpoint1[1]) if gene1_seg_start else None,
seg_end_genomic=int(breakpoint1[1]) if not gene1_seg_start else None,
gene=gene_5prime,
get_nearest_transcript_junction=True,
)

tr_3prime = await self.fusor.transcript_segment_element(
tx_to_genomic_coords=False,
genomic_ac=self._get_genomic_ac(breakpoint2[0], rb),
seg_start_genomic=int(breakpoint2[1]),
seg_start_genomic=int(breakpoint2[1]) if gene2_seg_start else None,
seg_end_genomic=int(breakpoint2[1]) if not gene2_seg_start else None,
gene=gene_3prime,
get_nearest_transcript_junction=True,
)

ce = (
CausativeEvent(
eventType=EventType("read-through"),
eventDescription=arriba_row.get_column("confidence").item(),
eventDescription=confidence,
)
if "read_through" in arriba_row["type"]
if "read_through" in event
else CausativeEvent(
eventType=EventType("rearrangement"),
eventDescription=arriba_row.get_column("confidence").item(),
eventDescription=confidence,
)
)
rf = bool(arriba_row.get_column("reading_frame").item() == "in-frame")
rf = bool(rf == "in-frame") if rf != "." else None
return self._format_fusion(
gene_5prime, gene_3prime, tr_5prime, tr_3prime, ce, rf
)
Expand Down
72 changes: 48 additions & 24 deletions tests/test_translators.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,36 +304,60 @@ async def test_arriba(
):
"""Test Arriba translator"""
# Test exonic breakpoint
arriba_data = pl.DataFrame(
{
"#gene1": "TPM3",
"gene2": "PDGFRB",
"breakpoint1": "1:154170465",
"breakpoint2": "5:150126612",
"type": ".",
"confidence": "high",
"reading_frame": "in-frame",
}
)
gene1 = "TPM3"
gene2 = "PDGFRB"
strand1 = "-/-"
strand2 = "-/-"
breakpoint1 = "1:154170465"
breakpoint2 = "5:150126612"
event = "translocation"
confidence = "high"
direction1 = "dowstream"
direction2 = "upstream"
rf = "in-frame"

arriba_fusor = await translator_instance.from_arriba(
arriba_data, Assembly.GRCH38.value
gene1,
gene2,
strand1,
strand2,
breakpoint1,
breakpoint2,
event,
confidence,
direction1,
direction2,
rf,
Assembly.GRCH38.value,
)
assert arriba_fusor.structure == fusion_data_example.structure

# Test non-exonic breakpoint
arriba_data_nonexonic = pl.DataFrame(
{
"#gene1": "TPM3",
"gene2": "PDGFRB",
"breakpoint1": "1:154173078",
"breakpoint2": "5:150127173",
"type": ".",
"confidence": "high",
"reading_frame": "in-frame",
}
)
gene1 = "TPM3"
gene2 = "PDGFRB"
strand1 = "-/-"
strand2 = "-/-"
breakpoint1 = "1:154173078"
breakpoint2 = "5:150127173"
event = "translocation"
confidence = "high"
direction1 = "dowstream"
direction2 = "upstream"
rf = "in-frame"

arriba_fusor_nonexonic = await translator_instance.from_arriba(
arriba_data_nonexonic, Assembly.GRCH38.value
gene1,
gene2,
strand1,
strand2,
breakpoint1,
breakpoint2,
event,
confidence,
direction1,
direction2,
rf,
Assembly.GRCH38.value,
)
assert arriba_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure

Expand Down