Skip to content

Commit

Permalink
feat: Add bedrecord/interval converters (#27)
Browse files Browse the repository at this point in the history
* feat: add bedrecord/interval converters

* fix: resolve circular import

* refactor: use new constructor

* refactor: fix circular import

* doc: fix docstring

* doc: fix docstrings
  • Loading branch information
msto authored Mar 20, 2024
1 parent 9bbabbe commit 674567c
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 59 deletions.
32 changes: 32 additions & 0 deletions pybedlite/bed_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,20 @@
- :class:`~pybedtools.bed_record.BedRecord` -- Lightweight class for storing information
pertaining to a BED record.
"""

from __future__ import annotations

import attr
import enum
from typing import Optional
from typing import Tuple
from typing import List
from typing import ClassVar
from typing import Type
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pybedlite.overlap_detector import Interval


"""Maximum BED fields that can be present in a well formed BED file written to specification"""
Expand Down Expand Up @@ -188,3 +196,27 @@ def as_bed_line(self, number_of_output_fields: Optional[int] = None) -> str:
)
fields = self.bed_fields[:number_of_output_fields]
return "\t".join(fields)

@classmethod
def from_interval(cls: Type["BedRecord"], interval: Interval) -> "BedRecord":
"""
Construct a `BedRecord` from a `Interval` instance.
**Note that `Interval` cannot represent a `BedRecord` with a missing strand.**
Converting a record with no strand to `Interval` and then back to `BedRecord` will result in
a record with **positive strand**.
Args:
interval: The `Interval` instance to convert.
Returns:
A `BedRecord` corresponding to the same region specified in the interval.
"""

return BedRecord(
chrom=interval.refname,
start=interval.start,
end=interval.end,
strand=BedStrand.Negative if interval.negative else BedStrand.Positive,
name=interval.name,
)
1 change: 1 addition & 0 deletions pybedlite/bed_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- :class:`~pybedtools.bed_source.BedSource` -- Reader class for parsing BED files and iterate
over their contained records
"""

import io
from typing import IO
from typing import Optional
Expand Down
40 changes: 30 additions & 10 deletions pybedlite/overlap_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,14 @@
from typing import List
from typing import Optional
from typing import Set
from typing import Type

import attr
import cgranges as cr

from pybedlite.bed_record import BedStrand
from pybedlite.bed_source import BedSource
from pybedlite.bed_record import BedRecord


@attr.s(frozen=True, auto_attribs=True)
Expand Down Expand Up @@ -100,11 +102,34 @@ def length(self) -> int:
"""Returns the length of the interval."""
return self.end - self.start

@classmethod
def from_bedrecord(cls: Type["Interval"], record: BedRecord) -> "Interval":
"""
Construct an `Interval` from a `BedRecord` instance.
Note that when the `BedRecord` does not have a specified strand, the `Interval`'s negative
attribute is set to False. This mimics the behavior of `OverlapDetector.from_bed()` when
reading a record that does not have a specified strand.
Args:
record: The `BedRecord` instance to convert.
Returns:
An `Interval` corresponding to the same region specified in the record.
"""
return cls(
refname=record.chrom,
start=record.start,
end=record.end,
negative=record.strand is BedStrand.Negative,
name=record.name,
)


class OverlapDetector(Iterable[Interval]):
"""Detects and returns overlaps between a set of genomic regions and another genomic region.
Since :class:`~samwell.overlap_detector.Interval` objects are used both to populate the
Since :class:`~pybedlite.overlap_detector.Interval` objects are used both to populate the
overlap detector and to query it, the coordinate system in use is also 0-based open-ended.
The same interval may be added multiple times, but only a single instance will be returned
Expand Down Expand Up @@ -236,20 +261,15 @@ def get_enclosed(self, interval: Interval) -> List[Interval]:

@classmethod
def from_bed(cls, path: Path) -> "OverlapDetector":
"""Builds an :class:`~samwell.overlap_detector.OverlapDetector` from a BED file.
"""Builds a :class:`~pybedlite.overlap_detector.OverlapDetector` from a BED file.
Args:
path: the path to the BED file
Returns:
An overlap detector for the regions in the BED file.
"""
detector = OverlapDetector()

for region in BedSource(path):
locatable = Interval(
refname=region.chrom,
start=region.start,
end=region.end,
negative=region.strand == BedStrand.Negative,
name=region.name,
)
detector.add(locatable)
detector.add(Interval.from_bedrecord(region))

return detector
52 changes: 52 additions & 0 deletions pybedlite/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pytest
from typing import List
from pybedlite.bed_record import BedRecord
from pybedlite.bed_record import BedStrand


@pytest.fixture
def bed_records() -> List[BedRecord]:
return [
BedRecord(
chrom="1",
start=100,
end=150,
name="test_record1",
score=100,
strand=BedStrand.Positive,
thick_start=100,
thick_end=100,
item_rgb=(0, 0, 0),
block_count=1,
block_sizes=[50],
block_starts=[0],
),
BedRecord(
chrom="1",
start=200,
end=300,
name="test_record2",
score=100,
strand=BedStrand.Negative,
thick_start=210,
thick_end=290,
item_rgb=(0, 0, 0),
block_count=1,
block_sizes=[100],
block_starts=[0],
),
BedRecord(
chrom="2",
start=200,
end=300,
name="test_record3",
score=None,
strand=None,
thick_start=None,
thick_end=None,
item_rgb=None,
block_count=None,
block_sizes=None,
block_starts=None,
),
]
40 changes: 40 additions & 0 deletions pybedlite/tests/test_overlap_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from pybedlite.overlap_detector import Interval
from pybedlite.overlap_detector import OverlapDetector
from pybedlite.bed_record import BedStrand
from pybedlite.bed_record import BedRecord


def run_test(targets: List[Interval], query: Interval, results: List[Interval]) -> None:
Expand Down Expand Up @@ -148,3 +150,41 @@ def test_iterable() -> None:
assert list(detector) == [a]
detector.add_all([a, b, c, d, e])
assert list(detector) == [a, a, b, c, d, e]


def test_conversion_to_interval(bed_records: List[BedRecord]) -> None:
"""
Test that we can convert a BedRecord to an Interval.
"""

# I don't think pytest.mark.parametrize can accept a fixture and expand over its values.
# For loop it is.
for record in bed_records:
interval = Interval.from_bedrecord(record)

assert interval.refname == record.chrom
assert interval.start == record.start
assert interval.end == record.end
assert interval.negative is (record.strand is BedStrand.Negative)
assert interval.name == record.name


def test_construction_from_interval(bed_records: List[BedRecord]) -> None:
"""
Test that we can convert a BedRecord to an Interval and back.
"""

# I don't think pytest.mark.parametrize can accept a fixture and expand over its values.
# For loop it is.
for record in bed_records:
new_record = BedRecord.from_interval(Interval.from_bedrecord(record))

assert new_record.chrom == record.chrom
assert new_record.start == record.start
assert new_record.end == record.end
assert new_record.name == record.name

if record.strand is None:
assert new_record.strand is BedStrand.Positive
else:
assert new_record.strand is record.strand
49 changes: 0 additions & 49 deletions pybedlite/tests/test_pybedlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,6 @@
from pybedlite.bed_writer import BedWriter
from pybedlite.bed_source import BedSource
from pybedlite.bed_record import BedRecord
from pybedlite.bed_record import BedStrand


@pytest.fixture
def bed_records() -> List[BedRecord]:
return [
BedRecord(
chrom="1",
start=100,
end=150,
name="test_record1",
score=100,
strand=BedStrand.Positive,
thick_start=100,
thick_end=100,
item_rgb=(0, 0, 0),
block_count=1,
block_sizes=[50],
block_starts=[0],
),
BedRecord(
chrom="1",
start=200,
end=300,
name="test_record2",
score=100,
strand=BedStrand.Negative,
thick_start=210,
thick_end=290,
item_rgb=(0, 0, 0),
block_count=1,
block_sizes=[100],
block_starts=[0],
),
BedRecord(
chrom="2",
start=200,
end=300,
name="test_record3",
score=None,
strand=None,
thick_start=None,
thick_end=None,
item_rgb=None,
block_count=None,
block_sizes=None,
block_starts=None,
),
]


SNIPPET_BED = """\
Expand Down

0 comments on commit 674567c

Please sign in to comment.