Skip to content

Commit

Permalink
feat!: change CICERO fusion detection algorithm input parameters (#206)
Browse files Browse the repository at this point in the history
  • Loading branch information
jarbesfeld authored Nov 25, 2024
1 parent 1b067f0 commit 4013480
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 38 deletions.
59 changes: 45 additions & 14 deletions src/fusor/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,44 +461,75 @@ async def from_arriba(
)

async def from_cicero(
self, cicero_row: pl.DataFrame, rb: Assembly
) -> AssayedFusion:
self,
gene_5prime: str,
gene_3prime: str,
chr_5prime: str,
chr_3prime: str,
pos_5prime: int,
pos_3prime: int,
sv_ort: str,
event_type: str,
rb: Assembly,
) -> AssayedFusion | str:
"""Parse CICERO output to create AssayedFusion object
:param cicero_row: A row of CICERO output
:param gene_5prime: The gene symbol for the 5' partner
:param gene_3prime: The gene symbol for the 3' partner
:param chr_5prime: The chromosome for the 5' partner
:param chr_3prime: The chromosome for the 3' partner
:param pos_5prime: The genomic breakpoint for the 5' partner
:param pos_3prime: The genomic breakpoint for the 3' partner
:param sv_ort: Whether the mapping orientation of assembled contig (driven by
structural variation) has confident biological meaning
:param event_type: The structural variation event that created the called fusion
:param rb: The reference build used to call the fusion
:return: An AssayedFusion object, if construction is successful
"""
gene1 = cicero_row.get_column("geneA").item()
gene2 = cicero_row.get_column("geneB").item()
gene_5prime = self._get_gene_element(gene1, "cicero")[0].gene.label
gene_3prime = self._get_gene_element(gene2, "cicero")[0].gene.label
# Check if gene symbols have valid formatting. CICERO can output two or more
# gene symbols for `gene_5prime` or `gene_3prime`, which are separated by a comma. As
# there is not a precise way to resolve this ambiguity, we do not process
# these events
if "," in gene_5prime or "," in gene_3prime:
msg = "Ambiguous gene symbols are reported by CICERO for at least one of the fusion partners"
_logger.warning(msg)
return msg

# Check CICERO annotation regarding the confidence that the called fusion
# has biological meaning
if sv_ort != ">":
msg = "CICERO annotation indicates that this event does not have confident biological meaning"
_logger.warning(msg)
return msg

gene_5prime = self._get_gene_element(gene_5prime, "cicero")[0].gene.label
gene_3prime = self._get_gene_element(gene_3prime, "cicero")[0].gene.label

tr_5prime = await self.fusor.transcript_segment_element(
tx_to_genomic_coords=False,
genomic_ac=self._get_genomic_ac(cicero_row.get_column("chrA").item(), rb),
seg_end_genomic=int(cicero_row.get_column("posA").item()),
genomic_ac=self._get_genomic_ac(chr_5prime, rb),
seg_end_genomic=pos_5prime,
gene=gene_5prime,
get_nearest_transcript_junction=True,
)

tr_3prime = await self.fusor.transcript_segment_element(
tx_to_genomic_coords=False,
genomic_ac=self._get_genomic_ac(cicero_row.get_column("chrB").item(), rb),
seg_start_genomic=int(cicero_row.get_column("posB").item()),
genomic_ac=self._get_genomic_ac(chr_3prime, rb),
seg_start_genomic=pos_3prime,
gene=gene_3prime,
get_nearest_transcript_junction=True,
)

if cicero_row.get_column("type").item() == "read_through":
if event_type == "read_through":
ce = CausativeEvent(
eventType=EventType("read-through"),
eventDescription=cicero_row.get_column("type").item(),
eventDescription=event_type,
)
else:
ce = CausativeEvent(
eventType=EventType("rearrangement"),
eventDescription=cicero_row.get_column("type").item(),
eventDescription=event_type,
)
return self._format_fusion(
gene_5prime,
Expand Down
112 changes: 88 additions & 24 deletions tests/test_translators.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,39 +368,103 @@ async def test_cicero(
):
"""Test CICERO translator"""
# Test exonic breakpoint
cicero_data = pl.DataFrame(
{
"geneA": "TPM3",
"geneB": "PDGFRB",
"chrA": "1",
"posA": "154170465",
"chrB": "5",
"posB": "150126612",
"type": "CTX",
}
)
gene_5prime = "TPM3"
gene_3prime = "PDGFRB"
chr_5prime = "1"
chr_3prime = "5"
pos_5prime = 154170465
pos_3prime = 150126612
sv_ort = ">"
event_type = "CTX"

cicero_fusor = await translator_instance.from_cicero(
cicero_data, Assembly.GRCH38.value
gene_5prime,
gene_3prime,
chr_5prime,
chr_3prime,
pos_5prime,
pos_3prime,
sv_ort,
event_type,
Assembly.GRCH38.value,
)
assert cicero_fusor.structure == fusion_data_example.structure

# Test non-exonic breakpoint
cicero_data_nonexonic = pl.DataFrame(
{
"geneA": "TPM3",
"geneB": "PDGFRB",
"chrA": "1",
"posA": "154173078",
"chrB": "5",
"posB": "150127173",
"type": "CTX",
}
)
gene_5prime = "TPM3"
gene_3prime = "PDGFRB"
chr_5prime = "1"
chr_3prime = "5"
pos_5prime = 154173078
pos_3prime = 150127173
sv_ort = ">"
event_type = "CTX"

cicero_fusor_nonexonic = await translator_instance.from_cicero(
cicero_data_nonexonic, Assembly.GRCH38.value
gene_5prime,
gene_3prime,
chr_5prime,
chr_3prime,
pos_5prime,
pos_3prime,
sv_ort,
event_type,
Assembly.GRCH38.value,
)
assert cicero_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure

# Test case where the called fusion does not have confident biological meaning
gene_5prime = "TPM3"
gene_3prime = "PDGFRB"
chr_5prime = "1"
chr_3prime = "5"
pos_5prime = 154173078
pos_3prime = 150127173
sv_ort = "?"
event_type = "CTX"

non_confident_bio = await translator_instance.from_cicero(
gene_5prime,
gene_3prime,
chr_5prime,
chr_3prime,
pos_5prime,
pos_3prime,
sv_ort,
event_type,
Assembly.GRCH38.value,
)
assert (
non_confident_bio
== "CICERO annotation indicates that this event does not have confident biological meaning"
)

# Test case where multiple gene symbols are reported for a fusion partner
gene_5prime = "TPM3"
gene_3prime = "PDGFRB,PDGFRB-FGFR4,FGFR4"
chr_5prime = "1"
chr_3prime = "5"
pos_5prime = 154173078
pos_3prime = 150127173
sv_ort = "?"
event_type = "CTX"

multiple_genes_fusion_partner = await translator_instance.from_cicero(
gene_5prime,
gene_3prime,
chr_5prime,
chr_3prime,
pos_5prime,
pos_3prime,
sv_ort,
event_type,
Assembly.GRCH38.value,
)
assert (
multiple_genes_fusion_partner
== "Ambiguous gene symbols are reported by CICERO for at least one of the fusion partners"
)


@pytest.mark.asyncio()
async def test_enfusion(
Expand Down

0 comments on commit 4013480

Please sign in to comment.