Skip to content

Commit

Permalink
builder
Browse files Browse the repository at this point in the history
  • Loading branch information
nh13 committed Dec 13, 2024
1 parent beb22ae commit 4d204e2
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 17 deletions.
1 change: 1 addition & 0 deletions bwapy/libbwapy.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ cdef extern from "bwt.h":

cdef extern from "bwtaln.h":
int BWA_TYPE_NO_MATCH
int BWA_MODE_LOGGAP

int __cigar_op(uint16_t __cigar)
int __cigar_len(uint16_t __cigar)
Expand Down
28 changes: 21 additions & 7 deletions bwapy/libbwapy.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,29 @@ from pysam import AlignmentHeader
from pysam import FastxRecord

class BwaOptions:
def __init__(self) -> None: ...
@property
def trim_qual(self) -> int: ...
max_hits: int
def __init__(self, max_hits: int = 3) -> None: ...

class BwaOptionsBuilder:
def __init__(self) -> None: ...
_options: BwaOptions
def __init__(self, options: BwaOptions | None = None) -> None: ...
def build(self) -> BwaOptions: ...
def max_mismatches(self, value: int) -> BwaOptionsBuilder: ... # -n <int>
# def fnr(self, value: float) -> BwaOptionsBuilder: ... # -n <float>
def max_gap_opens(self, value: int) -> BwaOptionsBuilder: ... # -o <int>
def max_gap_extensions(self, value: int) -> BwaOptionsBuilder: ... # -e <int>
def min_indel_to_end_distance(self, value: int) -> BwaOptionsBuilder: ... # -i <int>
def max_occurences_for_extending_long_deletion(
self, value: int
) -> BwaOptionsBuilder: ... # -d <int>
def seed_length(self, value: int) -> BwaOptionsBuilder: ... # -l <int>
def max_mismatches_in_seed(self, value: int) -> BwaOptionsBuilder: ... # -k <int>
def mismatch_penalty(self, value: int) -> BwaOptionsBuilder: ... # -M <int>
def gap_open_penalty(self, value: int) -> BwaOptionsBuilder: ... # -O <int>
def gap_extension_penalty(self, value: int) -> BwaOptionsBuilder: ... # -E <int>
def stop_at_max_best_hits(self, value: int) -> BwaOptionsBuilder: ... # -R <int>
def max_hits(self, value: int) -> BwaOptionsBuilder: ... # bwa samse -n <int>
def log_scaled_gap_penalty(self, value: bool = True) -> BwaOptionsBuilder: ... # -L

ERROR_HANDLER: str
TEXT_ENCODING: str
Expand All @@ -23,6 +39,4 @@ class BwaIndex:

class Bwa:
def __init__(self, prefix: str | Path | None = None, index: BwaIndex | None = None) -> None: ...
def align(
self, opt: BwaOptions, max_hits: int, queries: List[FastxRecord]
) -> List[AlignedSegment]: ...
def align(self, opt: BwaOptions, queries: List[FastxRecord]) -> List[AlignedSegment]: ...
87 changes: 79 additions & 8 deletions bwapy/libbwapy.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ __all__ = [

cdef class BwaOptions:
cdef gap_opt_t * _delegate
max_hits: int

def __init__(self):
def __init__(self, max_hits: int = 3):
self.max_hits = max_hits
self._cinit()

cdef _cinit(self):
Expand All @@ -33,13 +35,82 @@ cdef class BwaOptions:
return self._delegate.trim_qual


class BwaOptionsBuilder:
def __init__(self) -> None:
self._options = BwaOptions()
cdef class BwaOptionsBuilder:
def __init__(self, options: BwaOptions | None = None) -> None:
self._options: BwaOptions = BwaOptions() if options is None else options

def build(self) -> BwaOptions:
return self._options

cdef max_mismatches(self, value: int):
"""bwa aln -n <int>"""
self._options._delegate.s_mm = value
return self
#cdef fnr(self, value: float) -> BwaOptionsBuilder: ... # -n <float>
cdef max_gap_opens(self, value: int):
"""bwa aln -o <int>"""
self._options._delegate.s_gapo = value
return self

cdef max_gap_extensions(self, value: int):
"""bwa aln -e <int>"""
self._options._delegate.s_gape = value
return self

cdef min_indel_to_end_distance(self, value: int):
"""bwa aln -i <int>"""
self._options._delegate.indel_end_skip = value
return self

def max_occurences_for_extending_long_deletion(self, value: int):
"""bwa aln -d <int>"""
self._options._delegate.max_del_occ = value
return self

def seed_length(self, value: int):
"""bwa aln -l <int>"""
self._options._delegate.seed_len = value
return self

def max_mismatches_in_seed(self, value: int):
"""bwa aln -k <int>"""
self._options._delegate.max_seed_diff = value
return self

def mismatch_penalty(self, value: int):
"""bwa aln -M <int>"""
self._options._delegate.s_mm = value
return self

def gap_open_penalty(self, value: int):
"""bwa aln -O <int>"""
self._options._delegate.s_gapo = value
return self

def gap_extension_penalty(self, value: int):
"""bwa aln -E <int>"""
self._options._delegate.s_gape = value
return self

def stop_at_max_best_hits(self, value: int):
"""bwa aln -R <int>"""
self._options._delegate.max_top2 = value
return self

def max_hits(self, value: int):
"""bwa samse -n <int>"""
self._options.max_hits = value
return self

def log_scaled_gap_penalty(self, value: bool = True):
"""bwa aln -L"""
if value:
self._options._delegate.mode |= BWA_MODE_LOGGAP
else:
self._options._delegate.mode &= ~BWA_MODE_LOGGAP
return self


cdef str ERROR_HANDLER = 'strict'
cdef str TEXT_ENCODING = 'utf-8'

Expand Down Expand Up @@ -127,8 +198,8 @@ cdef class Bwa:
err_fread_noeof(self._pacseq, 1, self._index._bns.l_pac//4+1, self._index._bns.fp_pac)

# TODO: a list of records...
def align(self, opt: BwaOptions, max_hits: int, queries: List[FastxRecord]) -> List[AlignedSegment]:
return self._calign(opt, max_hits, queries)
def align(self, opt: BwaOptions, queries: List[FastxRecord]) -> List[AlignedSegment]:
return self._calign(opt, queries)

cdef _copy_seq(self, q: FastxRecord, bwa_seq_t* s):
seq_len = len(q.sequence)
Expand All @@ -152,7 +223,7 @@ cdef class Bwa:
strncpy(s.name, force_bytes(q.name), len(q.name))
s.name[len(q.name)] = b'\0'

cdef _calign(self, opt: BwaOptions, max_hits: int, queries: List[FastxRecord]):
cdef _calign(self, opt: BwaOptions, queries: List[FastxRecord]):
cdef bwa_seq_t* seqs
cdef bwa_seq_t* s
cdef char* s_char
Expand All @@ -179,7 +250,7 @@ cdef class Bwa:
# bwa_cal_sa_reg_gap frees name, seq, rseq, and qual, so add them back in again
self._copy_seq(queries[i], s)
print(f"[bwa_aln2seq_core] {i}")
bwa_aln2seq_core(s.n_aln, s.aln, s, 1, max_hits)
bwa_aln2seq_core(s.n_aln, s.aln, s, 1, opt.max_hits)

# calculate the genomic position given the suffix array offsite
print("[bwa_cal_pac_pos_with_bwt]")
Expand Down
9 changes: 7 additions & 2 deletions tests/test_bwapy.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from pathlib import Path

import pytest
from pysam import FastxRecord

from bwapy.libbwapy import Bwa
from bwapy.libbwapy import BwaIndex
from bwapy.libbwapy import BwaOptions
from bwapy.libbwapy import BwaOptionsBuilder


@pytest.fixture()
Expand All @@ -19,6 +18,12 @@ def test_bwapy_options() -> None:
BwaOptions()


def test_bwapy_options_builder() -> None:
builder = BwaOptionsBuilder()
builder.build()
# TODO: test setting individual options...


def test_bwapy_index(ref_fasta: Path) -> None:
BwaIndex(prefix=ref_fasta)

Expand Down

0 comments on commit 4d204e2

Please sign in to comment.