Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Check against HN tag instead of is_unmapped property #69

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ jobs:
- name: Set up miniconda
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-variant: Mambaforge
miniforge-version: latest
channels: conda-forge,bioconda
activate-environment: prymer
Expand Down
36 changes: 22 additions & 14 deletions prymer/offtarget/bwa.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,10 @@ def __str__(self) -> str:

@dataclass(init=True, frozen=True)
class BwaResult:
"""Represents zero or more hits or alignments found by BWA for the given query. The number of
hits found may be more than the number of hits listed in the `hits` attribute.
"""
Represents zero or more hits or alignments found by BWA for the given query.

The number of hits found may be more than the number of hits listed in the `hits` attribute.

Attributes:
query: the query (as given, no RC'ing)
Expand Down Expand Up @@ -346,7 +348,8 @@ def map_all(self, queries: list[Query]) -> list[BwaResult]:
return results

def _to_result(self, query: Query, rec: pysam.AlignedSegment) -> BwaResult:
"""Converts the query and alignment to a result.
"""
Convert the query and alignment to a result.

Args:
query: the original query
Expand All @@ -357,17 +360,17 @@ def _to_result(self, query: Query, rec: pysam.AlignedSegment) -> BwaResult:
"Query and Results are out of order" f"Query=${query.id}, Result=${rec.query_name}"
)

num_hits: Optional[int] = int(rec.get_tag("HN")) if rec.has_tag("HN") else None
if rec.is_unmapped:
if num_hits is not None and num_hits > 0:
raise ValueError(f"Read was unmapped but num_hits > 0: {rec}")
return BwaResult(query=query, hit_count=0, hits=[])
elif num_hits > self.max_hits:
return BwaResult(query=query, hit_count=num_hits, hits=[])
else:
hits = self.to_hits(rec=rec)
hit_count = num_hits if len(hits) == 0 else len(hits)
return BwaResult(query=query, hit_count=hit_count, hits=hits)
num_hits: int = int(rec.get_tag("HN")) if rec.has_tag("HN") else 0
hits: list[BwaHit] = self.to_hits(rec=rec) if 0 < num_hits < self.max_hits else []

# `to_hits()` removes artifactual hits which span the boundary between concatenated
# reference sequences. If we are reporting a list of hits, the number of hits should match
# the size of this list. Otherwise, we either have zero hits, or more than the maximum
# number of hits. In both of the latter cases, we have to rely on the count reported in the
# `HN` tag.
num_hits = len(hits) if hits else num_hits
msto marked this conversation as resolved.
Show resolved Hide resolved

return BwaResult(query=query, hit_count=num_hits, hits=hits)

def to_hits(self, rec: AlignedSegment) -> list[BwaHit]:
"""Extracts the hits from the given alignment. Beyond the current alignment
Expand Down Expand Up @@ -412,4 +415,9 @@ def to_hits(self, rec: AlignedSegment) -> list[BwaHit]:
if not self.include_alt_hits:
hits = [hit for hit in hits if not hit.refname.endswith("_alt")]

# Remove hits that extend beyond the end of a contig - these are artifacts of `bwa aln`'s
# alignment process, which concatenates all reference sequences and reports hits which span
# across contigs.
hits = [hit for hit in hits if hit.end <= self.header.get_reference_length(hit.refname)]

return hits
Loading