From 0341b3be77604fe04ba34f264d8efd4c958c6b05 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 22 Aug 2024 10:17:55 -0400 Subject: [PATCH] fix: `SequenceLocation` `start`/`end` for `TranscriptSegmentElement` (#172) close #171 The tests for transcript segment elements are kind of confusing IMO. `transcript_segments` test fixture in `test_models` leverages `sequence_locations`. Some of these sequence locations have both start/end, but we only use start or end. I didn't change these because I don't think the values are actually tested. We can revisit in the future. --- src/fusor/models.py | 26 +++++++++++++------------- tests/conftest.py | 20 ++++++++++---------- tests/test_fusor.py | 16 ++++++++-------- tests/test_nomenclature.py | 14 +++++--------- 4 files changed, 36 insertions(+), 40 deletions(-) diff --git a/src/fusor/models.py b/src/fusor/models.py index 544df0b..faea494 100644 --- a/src/fusor/models.py +++ b/src/fusor/models.py @@ -170,18 +170,19 @@ def check_exons(cls, values): "label": "TPM3", }, "elementGenomicStart": { - "id": "ga4gh:SL.2K1vML0ofuYrYncrzzXUQOISRFJldZrO", + "id": "ga4gh:SL.Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", + "digest": "Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", "type": "SequenceReference", "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", }, - "start": 154192135, - "end": 154192136, + "end": 154192135, }, "elementGenomicEnd": { - "id": "ga4gh:SL.rtR6x2NnJEpROlxiT_DY9C-spf6ijYQi", + "id": "ga4gh:SL.Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", + "digest": "Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", @@ -189,7 +190,6 @@ def check_exons(cls, values): "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", }, "start": 154170399, - "end": 154170400, }, } }, @@ -703,26 +703,26 @@ class CategoricalFusion(AbstractFusion): "label": "TPM3", }, "elementGenomicStart": { - "id": "ga4gh:SL.2K1vML0ofuYrYncrzzXUQOISRFJldZrO", + "id": "ga4gh:SL.Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", + "digest": "Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", - "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", "type": "SequenceReference", + "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", }, - "start": 154192135, - "end": 154192136, + "end": 154192135, }, "elementGenomicEnd": { - "id": "ga4gh:SL.m5_Spfzt1v4sfVw9u4kmuYn7dM7gyNeb", + "id": "ga4gh:SL.Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", + "digest": "Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", - "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", "type": "SequenceReference", + "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", }, - "start": 154170398, - "end": 154170399, + "start": 154170399, }, }, { diff --git a/tests/conftest.py b/tests/conftest.py index 7a8d720..cc7d947 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -743,7 +743,8 @@ def exhaustive_example(alk_gene, braf_gene, tpm3_gene): "exonEndOffset": 0, "gene": tpm3_gene, "elementGenomicStart": { - "id": "ga4gh:SL.2K1vML0ofuYrYncrzzXUQOISRFJldZrO", + "id": "ga4gh:SL.Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", + "digest": "Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", "description": None, "xrefs": None, "alternativeLabels": None, @@ -754,11 +755,11 @@ def exhaustive_example(alk_gene, braf_gene, tpm3_gene): "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", "type": "SequenceReference", }, - "start": 154192135, - "end": 154192136, + "end": 154192135, }, "elementGenomicEnd": { - "id": "ga4gh:SL.rtR6x2NnJEpROlxiT_DY9C-spf6ijYQi", + "id": "ga4gh:SL.Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", + "digest": "Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", "description": None, "xrefs": None, "alternativeLabels": None, @@ -770,7 +771,6 @@ def exhaustive_example(alk_gene, braf_gene, tpm3_gene): "type": "SequenceReference", }, "start": 154170399, - "end": 154170400, }, }, { @@ -868,7 +868,8 @@ def fusion_example(): "id": "hgnc:12012", }, "elementGenomicStart": { - "id": "ga4gh:SL.2K1vML0ofuYrYncrzzXUQOISRFJldZrO", + "id": "ga4gh:SL.Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", + "digest": "Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", "description": None, "xrefs": None, "alternativeLabels": None, @@ -879,11 +880,11 @@ def fusion_example(): "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", "type": "SequenceReference", }, - "start": 154192135, - "end": 154192136, + "end": 154192135, }, "elementGenomicEnd": { - "id": "ga4gh:SL.rtR6x2NnJEpROlxiT_DY9C-spf6ijYQi", + "id": "ga4gh:SL.Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", + "digest": "Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", "description": None, "xrefs": None, "alternativeLabels": None, @@ -895,7 +896,6 @@ def fusion_example(): "type": "SequenceReference", }, "start": 154170399, - "end": 154170400, }, }, { diff --git a/tests/test_fusor.py b/tests/test_fusor.py index 93d612d..a3f6304 100644 --- a/tests/test_fusor.py +++ b/tests/test_fusor.py @@ -194,27 +194,27 @@ def transcript_segment_element(): "type": "Gene", }, "transcript": "refseq:NM_152263.3", - "elementGenomicEnd": { - "id": "ga4gh:SL.Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", - "digest": "Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", + "elementGenomicStart": { + "id": "ga4gh:SL.Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", + "digest": "Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", "type": "SequenceReference", }, - "start": 154170399, + "end": 154192135, }, - "elementGenomicStart": { - "id": "ga4gh:SL.Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", - "digest": "Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", + "elementGenomicEnd": { + "id": "ga4gh:SL.Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", + "digest": "Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", "type": "SequenceReference", }, - "end": 154192135, + "start": 154170399, }, } return TranscriptSegmentElement(**params) diff --git a/tests/test_nomenclature.py b/tests/test_nomenclature.py index 44ddf62..83dc313 100644 --- a/tests/test_nomenclature.py +++ b/tests/test_nomenclature.py @@ -127,8 +127,7 @@ def exon_offset_example(): "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", "type": "SequenceReference", }, - "start": 156864428, - "end": 156864429, + "start": 156864353, }, }, ], @@ -147,18 +146,17 @@ def tx_seg_example(): exonEndOffset=0, gene=Gene(id="hgnc:12012", label="TPM3"), elementGenomicStart={ - "id": "ga4gh:SL.2K1vML0ofuYrYncrzzXUQOISRFJldZrO", + "id": "ga4gh:SL.Q8vkGp7_xR9vI0PQ7g1IvUUeQ4JlJG8l", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", "type": "SequenceReference", }, - "start": 154192135, - "end": 154192136, + "end": 154192135, }, elementGenomicEnd={ - "id": "ga4gh:SL.rtR6x2NnJEpROlxiT_DY9C-spf6ijYQi", + "id": "ga4gh:SL.Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", @@ -166,7 +164,6 @@ def tx_seg_example(): "type": "SequenceReference", }, "start": 154170399, - "end": 154170400, }, ) @@ -185,7 +182,7 @@ def junction_example(): "id": "hgnc:12012", }, elementGenomicEnd={ - "id": "ga4gh:SL.rtR6x2NnJEpROlxiT_DY9C-spf6ijYQi", + "id": "ga4gh:SL.Lnne0bSsgjzmNkKsNnXg98FeJSrDJuLb", "type": "SequenceLocation", "sequenceReference": { "id": "refseq:NC_000001.11", @@ -193,7 +190,6 @@ def junction_example(): "type": "SequenceReference", }, "start": 154170399, - "end": 154170400, }, )