Skip to content

Commit

Permalink
Refactor and added Utests
Browse files Browse the repository at this point in the history
- BiWFA code refactor
- Add Utests for correctness, memory, and performance
  • Loading branch information
smarco committed Jul 24, 2022
1 parent 06af50c commit 6e45427
Show file tree
Hide file tree
Showing 12 changed files with 398 additions and 274 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ WFA2's heuristics are classified into the following categories: ['wf-adaptive'](
attributes.heuristic.steps_between_cutoffs = 100;
```

        **Z-drop** implements the Z-drop heuristic (as described in Minimap2). This heuristic halts the alignment process if the score drops too fast in the diagonal direction. Let $sw_{max}$, computed at cell ($i'$,$j'$), be the maximum observed score so far. Then, let $sw$, computed at cell ($i$,$j$), the maximum score found in the last computed wavefront. The Z-drop heuristic stops the alignment process if $sw_{max} - sw > zdrop + gap_e·|(i-i')-(j-j')|$, being $gap_e$ the gap-extension penalty and $zdrop$ a parameter of the heuristic.
        **Z-drop** implements the Z-drop heuristic (as described in Minimap2). This heuristic halts the alignment process if the score drops too fast in the diagonal direction. Let $sw_{max}$ be the maximum observed score so far, computed at cell ($i'$,$j'$). Then, let $sw$ be the maximum score found in the last computed wavefront, computed at cell ($i$,$j$). The Z-drop heuristic stops the alignment process if $sw_{max} - sw > zdrop + gap_e·|(i-i')-(j-j')|$, being $gap_e$ the gap-extension penalty and $zdrop$ a parameter of the heuristic.


```C
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v2.2
v2.3
16 changes: 11 additions & 5 deletions tools/align_benchmark/align_benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ void align_benchmark_sequential() {
++seqs_processed;
if (++progress == parameters.progress) {
progress = 0;
align_benchmark_print_progress(seqs_processed);
if (parameters.verbose >= 0) align_benchmark_print_progress(seqs_processed);
}
// DEBUG
//mm_allocator_print(stderr,align_input.wf_aligner->mm_allocator,false);
Expand All @@ -621,7 +621,7 @@ void align_benchmark_sequential() {
}
// Print benchmark results
timer_stop(&parameters.timer_global);
align_benchmark_print_results(&align_input,seqs_processed,true);
if (parameters.verbose >= 0) align_benchmark_print_results(&align_input,seqs_processed,true);
// Free
align_benchmark_free(&align_input);
fclose(parameters.input_file);
Expand Down Expand Up @@ -687,14 +687,14 @@ void align_benchmark_parallel() {
progress += seqs_batch;
if (progress >= parameters.progress) {
progress -= parameters.progress;
align_benchmark_print_progress(seqs_processed);
if (parameters.verbose >= 0) align_benchmark_print_progress(seqs_processed);
}
// DEBUG
//mm_allocator_print(stderr,align_input->wf_aligner->mm_allocator,false);
}
// Print benchmark results
timer_stop(&parameters.timer_global);
align_benchmark_print_results(align_input,seqs_processed,true);
if (parameters.verbose >= 0) align_benchmark_print_results(align_input,seqs_processed,true);
// Free
for (int tid=0;tid<parameters.num_threads;++tid) {
align_benchmark_free(align_input+tid);
Expand Down Expand Up @@ -775,6 +775,8 @@ void usage() {
" --num-threads|t INT \n"
" --batch-size INT \n"
//" --progress|P INT \n"
" --verbose|v INT \n"
" --quiet|q \n"
" --help|h \n");
}
void parse_arguments(int argc,char** argv) {
Expand Down Expand Up @@ -809,6 +811,7 @@ void parse_arguments(int argc,char** argv) {
{ "batch-size", required_argument, 0, 4000 },
{ "progress", required_argument, 0, 'P' },
{ "verbose", optional_argument, 0, 'v' },
{ "quiet", no_argument, 0, 'q' },
{ "help", no_argument, 0, 'h' },
{ 0, 0, 0, 0 } };
int c,option_index;
Expand All @@ -817,7 +820,7 @@ void parse_arguments(int argc,char** argv) {
exit(0);
}
while (1) {
c=getopt_long(argc,argv,"a:i:o:p:g:P:c:v::t:h",long_options,&option_index);
c=getopt_long(argc,argv,"a:i:o:p:g:P:c:v::qt:h",long_options,&option_index);
if (c==-1) break;
switch (c) {
/*
Expand Down Expand Up @@ -1068,6 +1071,9 @@ void parse_arguments(int argc,char** argv) {
}
}
break;
case 'q':
parameters.verbose = -1;
break;
case 'h':
usage();
exit(1);
Expand Down
59 changes: 39 additions & 20 deletions wavefront/wavefront_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,29 @@
/*
* Checks
*/
void wavefront_align_checks(
wavefront_aligner_t* const wf_aligner) {
alignment_form_t* const form = &wf_aligner->alignment_form;
if (wf_aligner->bialigner != NULL) {
const bool ends_free =
form->pattern_begin_free > 0 ||
form->pattern_end_free > 0 ||
form->text_begin_free > 0 ||
form->text_end_free > 0;
if (ends_free) {
fprintf(stderr,"[WFA] BiWFA and ends-free is not supported yet\n");
exit(1);
}
}
const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric;
const bool is_heuristic_drop =
(wf_aligner->heuristic.strategy & wf_heuristic_xdrop) ||
(wf_aligner->heuristic.strategy & wf_heuristic_zdrop);
if (is_heuristic_drop && (distance_metric==edit || distance_metric==indel)) {
fprintf(stderr,"[WFA] Heuristics drops are not compatible with 'edit'/'indel' distance metrics\n");
exit(1);
}
}
void wavefront_check_endsfree_form(
wavefront_aligner_t* const wf_aligner,
const int pattern_length,
Expand Down Expand Up @@ -394,20 +417,18 @@ void wavefront_align_start(
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
const bool subalignment) {
const int text_length) {
// DEBUG
wavefront_debug_prologue(wf_aligner,
pattern,pattern_length,text,text_length,subalignment);
pattern,pattern_length,text,text_length);
}
void wavefront_align_finish(
wavefront_aligner_t* const wf_aligner,
const bool subalignment) {
wavefront_aligner_t* const wf_aligner) {
// Compute memory used
uint64_t memory_used = wavefront_aligner_get_size(wf_aligner);
wf_aligner->align_status.memory_used = memory_used;
// DEBUG
wavefront_debug_epilogue(wf_aligner,subalignment);
wavefront_debug_epilogue(wf_aligner);
// Reap memory (controlled reaping)
if (memory_used > wf_aligner->system.max_memory_resident) {
// Wavefront components
Expand All @@ -418,11 +439,8 @@ void wavefront_align_finish(
// Slab
if (memory_used > wf_aligner->system.max_memory_resident) {
wavefront_slab_reap(wf_aligner->wavefront_slab);
if (wf_aligner->aligner_forward != NULL) {
wavefront_slab_reap(wf_aligner->aligner_forward->wavefront_slab);
}
if (wf_aligner->aligner_reverse != NULL) {
wavefront_slab_reap(wf_aligner->aligner_reverse->wavefront_slab);
if (wf_aligner->bialigner != NULL) {
wavefront_bialigner_reap(wf_aligner->bialigner);
}
}
}
Expand All @@ -435,17 +453,16 @@ void wavefront_align_unidirectional(
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
const bool subalignment) {
const int text_length) {
// Start alignment
wavefront_align_start(wf_aligner,pattern,pattern_length,text,text_length,subalignment);
wavefront_align_start(wf_aligner,pattern,pattern_length,text,text_length);
// Prepare alignment
wavefront_align_sequences_init(wf_aligner,pattern,pattern_length,text,text_length);
// Wavefront align sequences
wavefront_align_sequences(wf_aligner);
// Finish alignment
if (wf_aligner->align_status.status == WF_STATUS_MAX_SCORE_REACHED) return; // Alignment paused
wavefront_align_finish(wf_aligner,subalignment);
wavefront_align_finish(wf_aligner);
}
void wavefront_align_bidirectional(
wavefront_aligner_t* const wf_aligner,
Expand All @@ -454,23 +471,25 @@ void wavefront_align_bidirectional(
const char* const text,
const int text_length) {
// Start alignment
wavefront_align_start(wf_aligner,pattern,pattern_length,text,text_length,false);
wavefront_align_start(wf_aligner,pattern,pattern_length,text,text_length);
// Bidirectional alignment
wavefront_bialign(wf_aligner,pattern,pattern_length,text,text_length);
// Finish alignment
wavefront_align_finish(wf_aligner,false);
wavefront_align_finish(wf_aligner);
}
int wavefront_align(
wavefront_aligner_t* const wf_aligner,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length) {
// Checks
wavefront_align_checks(wf_aligner);
// Dispatcher
if (wf_aligner->bidirectional_alignment) {
if (wf_aligner->align_mode == wavefront_align_biwfa) {
wavefront_align_bidirectional(wf_aligner,pattern,pattern_length,text,text_length);
} else {
wavefront_align_unidirectional(wf_aligner,pattern,pattern_length,text,text_length,false);
wavefront_align_unidirectional(wf_aligner,pattern,pattern_length,text,text_length);
}
// Return
return wf_aligner->align_status.status;
Expand All @@ -490,7 +509,7 @@ int wavefront_align_resume(
if (align_status->status == WF_STATUS_MAX_SCORE_REACHED) {
return WF_STATUS_MAX_SCORE_REACHED; // Alignment paused
}
wavefront_align_finish(wf_aligner,false);
wavefront_align_finish(wf_aligner);
return align_status->status;
}

Loading

0 comments on commit 6e45427

Please sign in to comment.