Skip to content

Commit

Permalink
feat: Add new metadata classes for AssayedFusion events (#219)
Browse files Browse the repository at this point in the history
  • Loading branch information
jarbesfeld authored Jan 10, 2025
1 parent 69c1654 commit 4b66e7a
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 1 deletion.
77 changes: 76 additions & 1 deletion src/fusor/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
StrictBool,
StrictInt,
StrictStr,
StringConstraints,
model_validator,
)

Expand All @@ -36,6 +37,10 @@ class FUSORTypes(str, Enum):
GENE_ELEMENT = "GeneElement"
UNKNOWN_GENE_ELEMENT = "UnknownGeneElement"
MULTIPLE_POSSIBLE_GENES_ELEMENT = "MultiplePossibleGenesElement"
BREAKPOINT_COVERAGE = "BreakpointCoverage"
CONTIG_SEQUENCE = "ContigSequence"
SPLIT_READS = "SplitReads"
SPANNING_READS = "SpanningReads"
REGULATORY_ELEMENT = "RegulatoryElement"
CATEGORICAL_FUSION = "CategoricalFusion"
ASSAYED_FUSION = "AssayedFusion"
Expand Down Expand Up @@ -112,6 +117,73 @@ class BaseStructuralElement(ABC, BaseModel):
type: StructuralElementType


class BreakpointCoverage(BaseStructuralElement):
"""Define BreakpointCoverage class.
This class models breakpoint coverage, or the number of fragments
that are retained near the breakpoint for a fusion partner
"""

type: Literal[FUSORTypes.BREAKPOINT_COVERAGE] = FUSORTypes.BREAKPOINT_COVERAGE
fragmentCoverage: int = Field(ge=0)

model_config = ConfigDict(
json_schema_extra={
"example": {"type": "BreakpointCoverage", "fragmentCoverage": 180}
}
)


class ContigSequence(BaseStructuralElement):
"""Define ContigSequence class.
This class models the assembled contig sequence that supports the reported fusion
event
"""

type: Literal[FUSORTypes.CONTIG_SEQUENCE] = FUSORTypes.CONTIG_SEQUENCE
contig: Annotated[
str,
StringConstraints(strip_whitespace=True, to_upper=True, pattern=r"^[ACGT]+$"),
]

model_config = ConfigDict(
json_schema_extra={
"example": {"type": "ContigSequence", "contig": "GTACTACTGATCTAGCATCTAGTA"}
}
)


class SplitReads(BaseStructuralElement):
"""Define SplitReads class.
This class models the number of reads that cover the junction bewteen the
detected partners in the fusion
"""

type: Literal[FUSORTypes.SPLIT_READS] = FUSORTypes.SPLIT_READS
splitReads: int = Field(ge=0)

model_config = ConfigDict(
json_schema_extra={"example": {"type": "SplitReads", "splitReads": 100}}
)


class SpanningReads(BaseStructuralElement):
"""Define Spanning Reads class.
This class models the number of pairs of reads that support the reported fusion
event
"""

type: Literal[FUSORTypes.SPANNING_READS] = FUSORTypes.SPANNING_READS
spanningReads: int = Field(ge=0)

model_config = ConfigDict(
json_schema_extra={"example": {"type": "SpanningReads", "spanningReads": 100}}
)


class TranscriptSegmentElement(BaseStructuralElement):
"""Define TranscriptSegment class"""

Expand All @@ -126,6 +198,7 @@ class TranscriptSegmentElement(BaseStructuralElement):
gene: Gene
elementGenomicStart: SequenceLocation | None = None
elementGenomicEnd: SequenceLocation | None = None
coverage: BreakpointCoverage | None = None

@model_validator(mode="before")
def check_exons(cls, values):
Expand Down Expand Up @@ -571,7 +644,8 @@ class Assay(BaseModelForbidExtra):
| GeneElement
| TemplatedSequenceElement
| LinkerElement
| UnknownGeneElement,
| UnknownGeneElement
| ContigSequence,
Field(discriminator="type"),
]

Expand Down Expand Up @@ -620,6 +694,7 @@ class AssayedFusion(AbstractFusion):
structure: list[AssayedFusionElement]
causativeEvent: CausativeEvent | None = None
assay: Assay | None = None
contig: ContigSequence | None = None

model_config = ConfigDict(
json_schema_extra={
Expand Down
52 changes: 52 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@
AbstractFusion,
Assay,
AssayedFusion,
BreakpointCoverage,
CategoricalFusion,
CausativeEvent,
ContigSequence,
EventType,
FunctionalDomain,
GeneElement,
LinkerElement,
MultiplePossibleGenesElement,
RegulatoryElement,
SpanningReads,
SplitReads,
TemplatedSequenceElement,
TranscriptSegmentElement,
UnknownGeneElement,
Expand Down Expand Up @@ -612,6 +616,54 @@ def test_mult_gene_element():
check_validation_error(exc_info, msg)


def test_coverage():
"""Test that BreakpointCoverage class initializes correctly"""
test_coverage = BreakpointCoverage(fragmentCoverage=100)
assert test_coverage.fragmentCoverage == 100

# test enum validation
with pytest.raises(ValidationError) as exc_info:
assert BreakpointCoverage(type="coverage")
msg = "Input should be <FUSORTypes.BREAKPOINT_COVERAGE: 'BreakpointCoverage'>"
check_validation_error(exc_info, msg)


def test_contig():
"""Test that Contig class initializes correctly"""
test_contig = ContigSequence(contig="GTATACTATGATCAGT")
assert test_contig.contig == "GTATACTATGATCAGT"

# test enum validation
with pytest.raises(ValidationError) as exc_info:
assert ContigSequence(type="contig")
msg = "Input should be <FUSORTypes.CONTIG_SEQUENCE: 'ContigSequence'>"
check_validation_error(exc_info, msg)


def test_split_reads():
"""Test that SplitReads class initializes correctly"""
test_split_reads = SplitReads(splitReads=97)
assert test_split_reads.splitReads == 97

# test enum validation
with pytest.raises(ValidationError) as exc_info:
assert SplitReads(type="splitreads")
msg = "Input should be <FUSORTypes.SPLIT_READS: 'SplitReads'>"
check_validation_error(exc_info, msg)


def test_spanning_reads():
"""Test that SpanningReads class initializes correctly"""
test_spanning_reads = SpanningReads(spanningReads=97)
assert test_spanning_reads.spanningReads == 97

# test enum validation
with pytest.raises(ValidationError) as exc_info:
assert SpanningReads(type="spanningreads")
msg = "Input should be <FUSORTypes.SPANNING_READS: 'SpanningReads'>"
check_validation_error(exc_info, msg)


def test_event():
"""Test Event object initializes correctly"""
rearrangement = EventType.REARRANGEMENT
Expand Down

0 comments on commit 4b66e7a

Please sign in to comment.