Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emit primer pairs in penalty order. #87

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.DS_Store
.vscode/
.idea/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
103 changes: 62 additions & 41 deletions prymer/api/picking.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

"""

from collections.abc import Sequence
from pathlib import Path
from typing import Iterator
from typing import Optional
from typing import Tuple

from pysam import FastaFile

Expand Down Expand Up @@ -79,20 +81,21 @@ def score(
# The penalty for the amplicon melting temperature.
# The difference in melting temperature between the calculated and optimal is weighted by the
# product melting temperature.
tm = amplicon_tm
tm_penalty: float
if tm > amplicon_tms.opt:
tm_penalty = (tm - amplicon_tms.opt) * weights.product_tm_gt
if amplicon_tms.opt == 0.0:
tm_penalty = 0.0
elif amplicon_tm > amplicon_tms.opt:
tm_penalty = (amplicon_tm - amplicon_tms.opt) * weights.product_tm_gt
else:
tm_penalty = (amplicon_tms.opt - tm) * weights.product_tm_lt
tm_penalty = (amplicon_tms.opt - amplicon_tm) * weights.product_tm_lt

# Put it all together
return left_primer.penalty + right_primer.penalty + size_penalty + tm_penalty


def build_primer_pairs(
left_primers: list[Oligo],
right_primers: list[Oligo],
def build_primer_pairs( # noqa: C901
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion I think this is a good sign that now would be an appropriate time to consider implementing #85

left_primers: Sequence[Oligo],
right_primers: Sequence[Oligo],
target: Span,
amplicon_sizes: MinOptMax[int],
amplicon_tms: MinOptMax[float],
Expand Down Expand Up @@ -136,42 +139,60 @@ def build_primer_pairs(
region_end = max(p.span.end for p in right_primers)
bases = fasta.fetch(target.refname, region_start - 1, region_end)

# Each tuple is left_idx, right_idx, penalty, tm
pairings: list[Tuple[int, int, float, float]] = []
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion Could we make this a dataclass? A four element tuple with multiple elements of the same type seems destined to be mis-indexed.


for i in range(0, len(left_primers)):
for j in range(0, len(right_primers)):
lp = left_primers[i]
rp = right_primers[j]

# Ignore pairings with amplicons too large
if rp.span.end - lp.span.start + 1 > amplicon_sizes.max:
continue

# Ignore pairings with amplicons too small
if rp.span.end - lp.span.start + 1 < amplicon_sizes.min:
continue

amp_mapping = Span(refname=target.refname, start=lp.span.start, end=rp.span.end)
amp_bases = bases[amp_mapping.start - region_start : amp_mapping.end - region_start + 1]
amp_tm = calculate_long_seq_tm(amp_bases)

if amp_tm < amplicon_tms.min or amp_tm > amplicon_tms.max:
continue

penalty = score(
left_primer=lp,
right_primer=rp,
amplicon=amp_mapping,
amplicon_tm=amp_tm,
amplicon_sizes=amplicon_sizes,
amplicon_tms=amplicon_tms,
weights=weights,
)

pairings.append((i, j, penalty, amp_tm))

pairings.sort(key=lambda tup: tup[2])

with NtThermoAlign() as ntthal:
# generate all the primer pairs that don't violate hard size and Tm constraints
for lp in left_primers:
for rp in right_primers:
if rp.span.end - lp.span.start + 1 > amplicon_sizes.max:
for i, j, penalty, tm in pairings:
lp = left_primers[i]
rp = right_primers[j]

if max_heterodimer_tm is not None:
if ntthal.duplex_tm(lp.bases, rp.bases) > max_heterodimer_tm:
continue

amp_mapping = Span(refname=target.refname, start=lp.span.start, end=rp.span.end)
amp_bases = bases[
amp_mapping.start - region_start : amp_mapping.end - region_start + 1
]
amp_tm = calculate_long_seq_tm(amp_bases)
amp_bases = bases[lp.span.start - region_start : rp.span.end - region_start + 1]

if amp_tm < amplicon_tms.min or amp_tm > amplicon_tms.max:
continue
pp = PrimerPair(
left_primer=lp,
right_primer=rp,
amplicon_sequence=amp_bases,
amplicon_tm=tm,
penalty=penalty,
)

if max_heterodimer_tm is not None:
if ntthal.duplex_tm(lp.bases, rp.bases) > max_heterodimer_tm:
continue

penalty = score(
left_primer=lp,
right_primer=rp,
amplicon=amp_mapping,
amplicon_tm=amp_tm,
amplicon_sizes=amplicon_sizes,
amplicon_tms=amplicon_tms,
weights=weights,
)

pp = PrimerPair(
left_primer=lp,
right_primer=rp,
amplicon_sequence=amp_bases,
amplicon_tm=amp_tm,
penalty=penalty,
)

yield pp
yield pp
44 changes: 24 additions & 20 deletions tests/api/test_picking.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,25 +383,29 @@ def test_build_primer_pairs_fails_when_primers_on_wrong_reference(
assert next(picks) is not None

with pytest.raises(ValueError, match="Left primers exist on different reference"):
_picks = list(picking.build_primer_pairs(
left_primers=invalid_lefts,
right_primers=valid_rights,
target=target,
amplicon_sizes=MinOptMax(0, 100, 500),
amplicon_tms=MinOptMax(0, 80, 150),
max_heterodimer_tm=None,
weights=weights,
fasta_path=fasta,
))
_picks = list(
picking.build_primer_pairs(
left_primers=invalid_lefts,
right_primers=valid_rights,
target=target,
amplicon_sizes=MinOptMax(0, 100, 500),
amplicon_tms=MinOptMax(0, 80, 150),
max_heterodimer_tm=None,
weights=weights,
fasta_path=fasta,
)
)

with pytest.raises(ValueError, match="Right primers exist on different reference"):
_picks = list(picking.build_primer_pairs(
left_primers=valid_lefts,
right_primers=invalid_rights,
target=target,
amplicon_sizes=MinOptMax(0, 100, 500),
amplicon_tms=MinOptMax(0, 80, 150),
max_heterodimer_tm=None,
weights=weights,
fasta_path=fasta,
))
_picks = list(
picking.build_primer_pairs(
left_primers=valid_lefts,
right_primers=invalid_rights,
target=target,
amplicon_sizes=MinOptMax(0, 100, 500),
amplicon_tms=MinOptMax(0, 80, 150),
max_heterodimer_tm=None,
weights=weights,
fasta_path=fasta,
)
)
Loading