forked from yangao07/abPOA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sub_example.c
133 lines (123 loc) · 4.98 KB
/
sub_example.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/* sub_example.c libabpoa usage example
To compile:
gcc -O2 sub_example.c -I ./include -L ./lib -labpoa -lz -o sub_example
Or: gcc -O2 sub_example.c -I ./include ./lib/libabpoa.a -lz -o sub_example
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "include/abpoa.h"
// AaCcGgTtNn ==> 0,1,2,3,4
unsigned char _nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*'-'*/, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
};
int main(void) {
int i, j, n_seqs = 6;
char seqs[100][1000] = {
// 0 1 2 3
// 23456789012345678901234567890123
"CGTCAATCTATCGAAGCATACGCGGGCAGAGC",
"CCACGTCAATCTATCGAAGCATACGCGGCAGC",
"AATCTATCGAAGCATACG",
"CAATGCTAGTCGAAGCAGCTGCGGCAG",
"CGTCAATCTATCGAAGCATTCTACGCGGCAGAGC",
"CGTCAATCTAGAAGCATACGCGGCAAGAGC",
"CGTCAATCTATCGGTAAAGCATACGCTCTGTAGC",
"CGTCAATCTATCTTCAAGCATACGCGGCAGAGC",
"CGTCAATGGATCGAGTACGCGGCAGAGC",
"CGTCAATCTAATCGAAGCATACGCGGCAGAGC"
};
int beg_end_id[100][2] = {
{0, 1},
{2, 33},
{6, 23},
{5, 30},
{0, 1},
{0, 1},
{0, 1},
{0, 1},
{0, 1},
{0, 1},
//{2, 52},
//{2, 52},
//{2, 52},
//{2, 52},
//{2, 52},
//{2, 52},
//{2, 52},
//{2, 52},
//{2, 52}
};
// initialize variables
abpoa_t *ab = abpoa_init();
abpoa_para_t *abpt = abpoa_init_para();
// alignment parameters
// abpt->align_mode = 0; // 0:global 1:local, 2:extension
// abpt->match = 2; // match score
// abpt->mismatch = 4; // mismatch penalty
// abpt->gap_mode = ABPOA_CONVEX_GAP; // gap penalty mode
// abpt->gap_open1 = 4; // gap open penalty #1
// abpt->gap_ext1 = 2; // gap extension penalty #1
// abpt->gap_open2 = 24; // gap open penalty #2
// abpt->gap_ext2 = 1; // gap extension penalty #2
// gap_penalty = min{gap_open1 + gap_len * gap_ext1, gap_open2 + gap_len * gap_ext2}
// abpt->bw = 10; // extra band used in adaptive banded DP
// abpt->bf = 0.01;
// output options
abpt->out_msa = 1; // generate Row-Column multiple sequence alignment(RC-MSA), set 0 to disable
abpt->out_cons = 1; // generate consensus sequence, set 0 to disable
abpoa_post_set_para(abpt);
// collect sequence length, trasform ACGT to 0123
int *seq_lens = (int*)malloc(sizeof(int) * n_seqs);
uint8_t **bseqs = (uint8_t**)malloc(sizeof(uint8_t*) * n_seqs);
for (i = 0; i < n_seqs; ++i) {
seq_lens[i] = strlen(seqs[i]);
bseqs[i] = (uint8_t*)malloc(sizeof(uint8_t) * seq_lens[i]);
for (j = 0; j < seq_lens[i]; ++j)
bseqs[i][j] = _nt4_table[(int)seqs[i][j]];
}
// perform abpoa-msa
ab->abs->n_seq = n_seqs;
abpoa_res_t res;
for (i = 0; i < n_seqs; ++i) {
res.graph_cigar = 0, res.n_cigar = 0; res.is_rc = 0;
int exc_beg, exc_end;
if (i != 0) abpoa_subgraph_nodes(ab, abpt, beg_end_id[i][0], beg_end_id[i][1], &exc_beg, &exc_end);
else exc_beg = 0, exc_end = 1;
fprintf(stderr, "i: %d, beg: %d, end: %d\n", i, exc_beg, exc_end);
abpoa_align_sequence_to_subgraph(ab, abpt, exc_beg, exc_end, bseqs[i], seq_lens[i], &res);
abpoa_add_subgraph_alignment(ab, abpt, exc_beg, exc_end, bseqs[i], seq_lens[i], NULL, res, i, n_seqs, 0);
if (res.n_cigar) free(res.graph_cigar);
}
if (abpt->out_cons) {
abpoa_generate_consensus(ab, abpt, stdout, NULL, NULL, NULL, NULL);
if (ab->abg->is_called_cons == 0)
fprintf(stderr, "Warning: no consensus sequence generated.\n");
}
if (abpt->out_msa) {
abpoa_generate_rc_msa(ab, abpt, stdout, NULL, NULL);
}
// abpoa_reset_graph(ab, abpt, seq_lens[0]); // reset graph before re-use
/* generate DOT partial order graph plot */
abpt->out_pog = strdup("sub_example.png"); // dump parital order graph to file
if (abpt->out_pog != NULL) abpoa_dump_pog(ab, abpt);
for (i = 0; i < n_seqs; ++i) free(bseqs[i]); free(bseqs); free(seq_lens);
abpoa_free(ab); abpoa_free_para(abpt);
return 0;
}