Skip to content

Commit 94d171b

Browse files
committed
r1276: skip unnecessary reverse spliced alignment
for short-read RNA-seq only
1 parent ff312a2 commit 94d171b

File tree

2 files changed

+25
-20
lines changed

2 files changed

+25
-20
lines changed

align.c

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,30 +1012,35 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
10121012
n_a = mm_squeeze_a(km, n_regs, regs, a);
10131013
memset(&ez, 0, sizeof(ksw_extz_t));
10141014
for (i = 0; i < n_regs; ++i) {
1015-
mm_reg1_t r2;
1015+
mm_reg1_t r2; // only used for inversion
10161016
if ((opt->flag&MM_F_SPLICE) && (opt->flag&MM_F_SPLICE_FOR) && (opt->flag&MM_F_SPLICE_REV)) { // then do two rounds of alignments for both strands
10171017
mm_reg1_t s[2], s2[2], *r;
1018-
int which, trans_strand;
10191018
s[0] = s[1] = regs[i];
1020-
mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], n_a, a, &ez, MM_F_SPLICE_FOR);
1021-
mm_align1(km, opt, mi, qlen, qseq0, &s[1], &s2[1], n_a, a, &ez, MM_F_SPLICE_REV);
1022-
if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1;
1023-
else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2;
1024-
else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively
1025-
if (which == 0) {
1019+
mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], n_a, a, &ez, MM_F_SPLICE_FOR); // assume the transcript is on the + strand of the genome
1020+
if ((opt->flag&MM_F_SR_RNA) && regs[i].qe - regs[i].qs == regs[i].re - regs[i].rs && s[0].qe - s[0].qs == s[0].re - s[0].rs && s[0].qs == 0 && s[0].qe == qlen) {
10261021
regs[i] = s[0], r2 = s2[0];
1027-
free(s[1].p);
1022+
regs[i].p->trans_strand = 0;
10281023
} else {
1029-
regs[i] = s[1], r2 = s2[1];
1030-
free(s[0].p);
1031-
}
1032-
r = &regs[i];
1033-
r->p->trans_strand = trans_strand;
1034-
if (r->is_spliced) {
1035-
if (trans_strand == 1 || trans_strand == 2) // this is an *approximate* way to tell if there are splice signals.
1036-
r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
1037-
else if (trans_strand == 3)
1038-
r->p->dp_max -= opt->a + opt->b;
1024+
int which, trans_strand;
1025+
mm_align1(km, opt, mi, qlen, qseq0, &s[1], &s2[1], n_a, a, &ez, MM_F_SPLICE_REV); // assume the transcript on the - strand
1026+
if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1;
1027+
else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2;
1028+
else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively
1029+
if (which == 0) {
1030+
regs[i] = s[0], r2 = s2[0];
1031+
free(s[1].p);
1032+
} else {
1033+
regs[i] = s[1], r2 = s2[1];
1034+
free(s[0].p);
1035+
}
1036+
r = &regs[i];
1037+
r->p->trans_strand = trans_strand;
1038+
if (r->is_spliced) {
1039+
if (trans_strand == 1 || trans_strand == 2) // this is an *approximate* way to tell if there are splice signals.
1040+
r->p->dp_max += (opt->a + opt->b) + ((opt->a + opt->b) >> 1);
1041+
else if (trans_strand == 3)
1042+
r->p->dp_max -= opt->a + opt->b;
1043+
}
10391044
}
10401045
} else { // one round of alignment
10411046
mm_align1(km, opt, mi, qlen, qseq0, &regs[i], &r2, n_a, a, &ez, opt->flag);

minimap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <stdio.h>
66
#include <sys/types.h>
77

8-
#define MM_VERSION "2.28-r1275-dirty"
8+
#define MM_VERSION "2.28-r1276-dirty"
99

1010
#define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
1111
#define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name

0 commit comments

Comments
 (0)