Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
nh13 committed Jul 31, 2024
1 parent 8283030 commit cf99c00
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pybedlite/bed_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def refname(self) -> str:
@property
def negative(self) -> bool:
"""True if the interval is on the negative strand, False otherwise"""
return self.strand is not None and self.strand == BedStrand.Positive
return self.strand is BedStrand.Negative

Check warning on line 193 in pybedlite/bed_record.py

View check run for this annotation

Codecov / codecov/patch

pybedlite/bed_record.py#L193

Added line #L193 was not covered by tests

def as_bed_line(self, number_of_output_fields: Optional[int] = None) -> str:
"""
Expand Down
37 changes: 25 additions & 12 deletions pybedlite/overlap_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,35 +175,48 @@ def from_bedrecord(cls: Type["Interval"], record: BedRecord) -> "Interval":
)


GenericGenomicsSpan = TypeVar("GenericGenomicsSpan", bound=Union[GenomicSpan, StrandedGenomicSpan])
GenericGenomicSpan = TypeVar("GenericGenomicSpan", bound=Union[GenomicSpan, StrandedGenomicSpan])
"""
A generic genomic feature. This type variable is used for describing the
generic type contained within the :class:`~pybedlite.overlap_detector.OverlapDetector`.
"""


class OverlapDetector(Generic[GenericGenomicsSpan], Iterable[GenericGenomicsSpan]):
class OverlapDetector(Generic[GenericGenomicSpan], Iterable[GenericGenomicSpan]):
"""Detects and returns overlaps between a set of genomic regions and another genomic region.
The overlap detector may contain any interval-like Python objects that have the following
properties:
* `chrom` or `contig` or `refname`: The reference sequence name
* `start`: A 0-based start position
* `end`: A 0-based exclusive end position
Interval-like Python objects may also contain strandedness information which will be used
for sorting them in :func:`~pybedlite.overlap_detector.OverlapDetector.get_overlaps` using
either of the following properties if they are present:
* `negative (bool)`: Whether or not the feature is negative stranded or not
* `strand (BedStrand)`: The BED strand of the feature
* `strand (str)`: The strand of the feature (`"-"` for negative)
The same interval may be added multiple times, but only a single instance will be returned
when querying for overlaps.
This detector is the most efficient when all intervals are added ahead of time.
"""

def __init__(self, intervals: Optional[Iterable[GenericGenomicsSpan]] = None) -> None:
def __init__(self, intervals: Optional[Iterable[GenericGenomicSpan]] = None) -> None:
# A mapping from the contig/chromosome name to the associated interval tree
self._refname_to_tree: Dict[str, cr.cgranges] = {} # type: ignore
self._refname_to_indexed: Dict[str, bool] = {}
self._refname_to_intervals: Dict[str, List[GenericGenomicsSpan]] = {}
self._refname_to_intervals: Dict[str, List[GenericGenomicSpan]] = {}
if intervals is not None:
self.add_all(intervals)

def __iter__(self) -> Iterator[GenericGenomicsSpan]:
def __iter__(self) -> Iterator[GenericGenomicSpan]:
"""Iterates over the intervals in the overlap detector."""
return itertools.chain(*self._refname_to_intervals.values())

def add(self, interval: GenericGenomicsSpan) -> None:
def add(self, interval: GenericGenomicSpan) -> None:
"""Adds an interval to this detector.
Args:
Expand Down Expand Up @@ -231,7 +244,7 @@ def add(self, interval: GenericGenomicsSpan) -> None:
# indexing
self._refname_to_indexed[refname] = False

def add_all(self, intervals: Iterable[GenericGenomicsSpan]) -> None:
def add_all(self, intervals: Iterable[GenericGenomicSpan]) -> None:
"""Adds one or more intervals to this detector.
Args:
Expand Down Expand Up @@ -264,7 +277,7 @@ def overlaps_any(self, interval: GenomicSpan) -> bool:
else:
return True

def get_overlaps(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
def get_overlaps(self, interval: GenomicSpan) -> List[GenericGenomicSpan]:
"""Returns any intervals in this detector that overlap the given interval.
Args:
Expand All @@ -281,9 +294,9 @@ def get_overlaps(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
else:
if not self._refname_to_indexed[refname]:
tree.index()
ref_intervals: List[GenericGenomicsSpan] = self._refname_to_intervals[refname]
ref_intervals: List[GenericGenomicSpan] = self._refname_to_intervals[refname]
# NB: only return unique instances of intervals
intervals: Set[GenericGenomicsSpan] = {
intervals: Set[GenericGenomicSpan] = {
ref_intervals[index]
for _, _, index in tree.overlap(refname, interval.start, interval.end)
}
Expand All @@ -301,7 +314,7 @@ def get_overlaps(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
def _negative(interval: GenomicSpan) -> bool:
return getattr(interval, "negative", False)

def get_enclosing_intervals(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
def get_enclosing_intervals(self, interval: GenomicSpan) -> List[GenericGenomicSpan]:
"""Returns the set of intervals in this detector that wholly enclose the query interval.
i.e. `query.start >= target.start` and `query.end <= target.end`.
Expand All @@ -314,7 +327,7 @@ def get_enclosing_intervals(self, interval: GenomicSpan) -> List[GenericGenomics
results = self.get_overlaps(interval)
return [i for i in results if interval.start >= i.start and interval.end <= i.end]

def get_enclosed(self, interval: GenomicSpan) -> List[GenericGenomicsSpan]:
def get_enclosed(self, interval: GenomicSpan) -> List[GenericGenomicSpan]:
"""Returns the set of intervals in this detector that are enclosed by the query
interval. I.e. target.start >= query.start and target.end <= query.end.
Expand Down

0 comments on commit cf99c00

Please sign in to comment.