Skip to content

Commit a5411fc

Browse files
committed
r1269: added --write-junc
in preparation for 2-pass
1 parent bd03d97 commit a5411fc

File tree

5 files changed

+59
-2
lines changed

5 files changed

+59
-2
lines changed

format.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,52 @@ static void write_cs_ds_core(kstring_t *s, const uint8_t *tseq, const uint8_t *q
253253
assert(t_off == r->re - r->rs && q_off == r->qe - r->qs);
254254
}
255255

256+
static inline void revcomp_splice(uint8_t s[2])
257+
{
258+
uint8_t c = s[1] < 4? 3 - s[1] : 4;
259+
s[1] = s[0] < 4? 3 - s[0] : 4;
260+
s[0] = c;
261+
}
262+
263+
void mm_write_junc(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r)
264+
{
265+
int32_t i, t_off, swritten = 0;
266+
s->l = 0;
267+
if (!r->is_spliced || r->p == 0) return; // no junctions
268+
if (r->p->trans_strand != 1 && r->p->trans_strand != 2) return; // no preferred strand
269+
for (i = 0, t_off = r->rs; i < (int)r->p->n_cigar; ++i) {
270+
int op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
271+
if (op == MM_CIGAR_MATCH || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH || op == MM_CIGAR_DEL) {
272+
t_off += len;
273+
} else if (op == MM_CIGAR_N_SKIP) { // intron
274+
uint8_t donor[2], acceptor[2];
275+
int32_t score1 = 0, score2 = 0, rev;
276+
assert(len >= 2);
277+
rev = (r->p->trans_strand == 2) ^ r->rev;
278+
if (!rev) {
279+
mm_idx_getseq(mi, r->rid, t_off, t_off + 2, donor);
280+
mm_idx_getseq(mi, r->rid, t_off + len - 2, t_off + len, acceptor);
281+
} else {
282+
mm_idx_getseq(mi, r->rid, t_off, t_off + 2, acceptor);
283+
mm_idx_getseq(mi, r->rid, t_off + len - 2, t_off + len, donor);
284+
revcomp_splice(donor);
285+
revcomp_splice(acceptor);
286+
}
287+
//fprintf(stderr, "%c%c-%c%c\n", "ACGTN"[donor[0]], "ACGTN"[donor[1]], "ACGTN"[acceptor[0]], "ACGTN"[acceptor[1]]);
288+
if (donor[0] == 2 && donor[1] == 3) score1 = 3;
289+
else if (donor[0] == 2 && donor[1] == 1) score1 = 2;
290+
else if (donor[0] == 0 && donor[1] == 3) score1 = 1;
291+
if (acceptor[0] == 0 && acceptor[1] == 2) score2 = 3;
292+
else if (acceptor[0] == 0 && acceptor[1] == 1) score2 = 1;
293+
if (swritten) mm_sprintf_lite(s, "\n");
294+
else swritten = 1;
295+
mm_sprintf_lite(s, "%s\t%d\t%d\t%s\t%d\t%c", mi->seq[r->rid].name, t_off, t_off + len, t->name, score1 + score2, "+-"[rev]);
296+
t_off += len;
297+
}
298+
}
299+
assert(t_off == r->re);
300+
}
301+
256302
static void write_MD_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq, const mm_reg1_t *r, char *tmp, int write_tag)
257303
{
258304
int i, q_off, t_off, l_MD = 0;

main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ static ko_longopt_t long_options[] = {
8383
{ "junc-pen", ko_required_argument, 358 },
8484
{ "pairing", ko_required_argument, 359 },
8585
{ "jump-min-match", ko_required_argument, 360 },
86+
{ "write-junc", ko_no_argument, 361 },
8687
{ "dbg-seed-occ", ko_no_argument, 501 },
8788
{ "help", ko_no_argument, 'h' },
8889
{ "max-intron-len", ko_required_argument, 'G' },
@@ -255,6 +256,7 @@ int main(int argc, char *argv[])
255256
else if (c == 356) opt.rmq_inner_dist = mm_parse_num(o.arg); // --rmq-inner
256257
else if (c == 357) fn_spsc = o.arg; // --spsc
257258
else if (c == 360) opt.jump_min_match = mm_parse_num(o.arg); // --jump-min-match
259+
else if (c == 361) opt.flag |= MM_F_OUT_JUNC | MM_F_CIGAR; // --write-junc
258260
else if (c == 501) mm_dbg_flag |= MM_DBG_SEED_FREQ; // --dbg-seed-occ
259261
else if (c == 330) {
260262
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");

map.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,9 +594,16 @@ static void *worker_pipeline(void *shared, int step, void *in)
594594
mm_err_fwrite(r->p, r->p->capacity, 4, p->fp_split);
595595
}
596596
}
597+
} else if (p->opt->flag & MM_F_OUT_JUNC) { // extra logic for --write-junc
598+
for (j = 0; j < s->n_reg[i]; ++j) {
599+
const mm_reg1_t *r = &s->reg[i][j];
600+
if (r->id != r->parent) continue;
601+
mm_write_junc(&p->str, mi, t, r);
602+
if (p->str.l > 0) mm_err_puts(p->str.s);
603+
}
597604
} else if (s->n_reg[i] > 0) { // the query has at least one hit
598605
for (j = 0; j < s->n_reg[i]; ++j) {
599-
mm_reg1_t *r = &s->reg[i][j];
606+
const mm_reg1_t *r = &s->reg[i][j];
600607
assert(!r->sam_pri || r->id == r->parent);
601608
if ((p->opt->flag & MM_F_NO_PRINT_2ND) && r->id != r->parent)
602609
continue;

minimap.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <stdio.h>
66
#include <sys/types.h>
77

8-
#define MM_VERSION "2.28-r1268-dirty"
8+
#define MM_VERSION "2.28-r1269-dirty"
99

1010
#define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
1111
#define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
@@ -47,6 +47,7 @@
4747
#define MM_F_OUT_DS (0x2000000000LL)
4848
#define MM_F_WEAK_PAIRING (0x4000000000LL)
4949
#define MM_F_SR_RNA (0x8000000000LL)
50+
#define MM_F_OUT_JUNC (0x10000000000LL)
5051

5152
#define MM_I_HPC 0x1
5253
#define MM_I_NO_SEQ 0x2

mmpriv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ void mm_write_paf4(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const
7878
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs);
7979
void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regs, const mm_reg1_t *const* regs, void *km, int64_t opt_flag);
8080
void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regss, const mm_reg1_t *const* regss, void *km, int64_t opt_flag, int rep_len);
81+
void mm_write_junc(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r);
8182

8283
void mm_idxopt_init(mm_idxopt_t *opt);
8384
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);

0 commit comments

Comments
 (0)