Skip to content

Commit

Permalink
Implemented BiWFA+Heuristics+CutOffs and patched cases when heuristic…
Browse files Browse the repository at this point in the history
…s make impossible find the alignment (WFA and BiWFA)
  • Loading branch information
smarco committed Jul 10, 2022
1 parent b2c0af1 commit adba5b0
Show file tree
Hide file tree
Showing 21 changed files with 237 additions and 139 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Section [WFA2-lib features](#wfa2.features) explores the most relevant options a
* [Alignment Span](#wfa2.span)
* [Memory modes](#wfa2.mem)
* [Heuristic modes](#wfa2.heuristics)
<!-- * [Other features](#wfa2.other) -->
* [Technical notes](#wfa2.other.notes)
* [Reporting Bugs and Feature Request](#wfa2.complains)
* [License](#wfa2.licence)
* [Citation](#wfa2.cite)
Expand Down Expand Up @@ -502,7 +502,7 @@ The WFA algorithm can be used combined with many heuristics to reduce the alignm

### <a name="wfa2.other.notes"></a> 3.6 Some technical notes

- Thanks to Eizenga's formulation, WFA2-lib can operate with any match score. Although, in practice, M=0 is still the most efficient choice as it takes advantage of matches between the sequences to accelerate the alignment computation.
- Thanks to Eizenga's formulation, WFA2-lib can operate with any match score. Although, in practice, M=0 is still the most efficient choice.

- All WFA2-lib algorithms/variants are stable. That is, for alignments having the same score, the library always resolves ties (between M, X, I,and D) using the same criteria: M (highest prio) > X > D > I (lowest prio). Nevertheless, the memory mode `ultralow` (BiWFA) is optimal (always reports the best alignment) but not stable.

Expand Down
1 change: 1 addition & 0 deletions bindings/cpp/WFAligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ WFAligner::WFAligner(
default: this->attributes.memory_mode = wavefront_memory_high; break;
}
this->attributes.alignment_scope = (alignmentScope==Score) ? compute_score : compute_alignment;
//this->attributes.system.verbose = 2;
this->wfAligner = nullptr;
}
WFAligner::~WFAligner() {
Expand Down
2 changes: 1 addition & 1 deletion bindings/cpp/WFAligner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class WFAligner {
};
enum AlignmentStatus {
StatusSuccessful = WF_STATUS_SUCCESSFUL,
StatusDropped = WF_STATUS_HEURISTICALY_DROPPED,
StatusUnfeasible = WF_STATUS_UNFEASIBLE,
StatusMaxScoreReached = WF_STATUS_MAX_SCORE_REACHED,
StatusOOM = WF_STATUS_OOM,
};
Expand Down
9 changes: 6 additions & 3 deletions scripts/wfa.alg.cmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,12 @@ def plot_score_distribution(stats,input_path1,input_path2):
# ax1.xaxis.grid(True)
# ax1.yaxis.grid(True)
# Plot score histogram
b=range(min(stats.scores1+stats.scores2), max(stats.scores1+stats.scores2) + 10, 10)
ax1.hist(stats.scores1,bins=b,color="royalblue",edgecolor = 'black',alpha=0.75)
ax1.hist(stats.scores2,bins=b,color="darkorange",edgecolor = 'black',alpha=0.75)
range_min = min(min(stats.scores1),min(stats.scores2))
range_max = max(max(stats.scores1),max(stats.scores2))
n, bins, patches = ax1.hist(stats.scores1,50,range=[range_min,range_max],color="royalblue",edgecolor='black',alpha=0.5)
n, bins, patches = ax1.hist(stats.scores2,50,range=[range_min,range_max],color="darkorange",edgecolor='black',alpha=0.5)
start, end = ax1.get_xlim()
ax1.set_xticks(np.arange(start,end,(end-start)/10))
# Leyend
handles = [Rectangle((0,0),1,1,color=c,ec="k") for c in ["royalblue","darkorange"]]
labels= [input_path1,input_path2]
Expand Down
4 changes: 2 additions & 2 deletions tools/align_benchmark/align_benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -1060,8 +1060,8 @@ void parse_arguments(int argc,char** argv) {
parameters.verbose = 1;
} else {
parameters.verbose = atoi(optarg);
if (parameters.verbose < 0 || parameters.verbose > 3) {
fprintf(stderr,"Option '--verbose' must be in {0,1,2,3}\n");
if (parameters.verbose < 0 || parameters.verbose > 4) {
fprintf(stderr,"Option '--verbose' must be in {0,1,2,3,4}\n");
exit(1);
}
}
Expand Down
103 changes: 54 additions & 49 deletions wavefront/wavefront_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ bool wavefront_align_reached_limits(
wavefront_aligner_print_status(stderr,wf_aligner,score); // DEBUG
}
// BT-Buffer
wavefront_components_t*const wf_components = &wf_aligner->wf_components;
wavefront_components_t* const wf_components = &wf_aligner->wf_components;
if (wf_components->bt_buffer!=NULL && (score%system->probe_interval_compact)==0) {
uint64_t bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer);
// Check BT-buffer memory
Expand Down Expand Up @@ -297,41 +297,42 @@ void wavefront_align_terminate(
int wavefront_align_sequences(
wavefront_aligner_t* const wf_aligner) {
// Parameters
wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status;
void (*wf_align_compute)(wavefront_aligner_t* const,const int) = wf_align_status->wf_align_compute;
int (*wf_align_extend)(wavefront_aligner_t* const,const int) = wf_align_status->wf_align_extend;
wavefront_align_status_t* const align_status = &wf_aligner->align_status;
void (*wf_align_compute)(wavefront_aligner_t* const,const int) = align_status->wf_align_compute;
int (*wf_align_extend)(wavefront_aligner_t* const,const int) = align_status->wf_align_extend;
// Compute wavefronts of increasing score
int score = wf_aligner->align_status.score;
align_status->num_null_steps = 0;
int score = align_status->score;
while (true) {
// Exact extend s-wavefront
const int finished = (*wf_align_extend)(wf_aligner,score);
if (finished) {
// DEBUG
// wavefront_aligner_print(stderr,wf_aligner,0,score,7,0);
if (wf_aligner->align_status.status == WF_STATUS_SUCCESSFUL) {
if (align_status->status == WF_STATUS_SUCCESSFUL) {
wavefront_align_terminate(wf_aligner,score);
}
wf_aligner->align_status.score = score;
return wf_aligner->align_status.status;
align_status->score = score;
return align_status->status;
}
// Compute (s+1)-wavefront
++score;
(*wf_align_compute)(wf_aligner,score);
// Probe limits
if (wavefront_align_reached_limits(wf_aligner,score)) {
wf_aligner->align_status.score = score;
return wf_aligner->align_status.status;
align_status->score = score;
return align_status->status;
}
// PROFILE
if (wf_aligner->plot_params.plot_enabled) {
wavefront_plot(wf_aligner,wf_aligner->pattern,wf_aligner->text,score);
}
// DEBUG
// wavefront_aligner_print(stderr,wf_aligner,0,score,7,0);
//wavefront_aligner_print(stderr,wf_aligner,score,score,7,0);
}
// Return OK
wf_aligner->align_status.score = score;
wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL;
align_status->score = score;
align_status->status = WF_STATUS_SUCCESSFUL;
return WF_STATUS_SUCCESSFUL;
}
/*
Expand All @@ -344,23 +345,23 @@ void wavefront_align_sequences_init(
const char* const text,
const int text_length) {
// Parameters
wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status;
wavefront_align_status_t* const align_status = &wf_aligner->align_status;
// Resize wavefront aligner
wavefront_aligner_resize(wf_aligner,pattern,pattern_length,text,text_length,false);
// Configure WF-compute function
switch (wf_aligner->penalties.distance_metric) {
case indel:
case edit:
wf_align_status->wf_align_compute = &wavefront_compute_edit;
align_status->wf_align_compute = &wavefront_compute_edit;
break;
case gap_linear:
wf_align_status->wf_align_compute = &wavefront_compute_linear;
align_status->wf_align_compute = &wavefront_compute_linear;
break;
case gap_affine:
wf_align_status->wf_align_compute = &wavefront_compute_affine;
align_status->wf_align_compute = &wavefront_compute_affine;
break;
case gap_affine_2p:
wf_align_status->wf_align_compute = &wavefront_compute_affine2p;
align_status->wf_align_compute = &wavefront_compute_affine2p;
break;
default:
fprintf(stderr,"[WFA] Distance function not implemented\n");
Expand All @@ -370,11 +371,11 @@ void wavefront_align_sequences_init(
// Configure WF-extend function
const bool end2end = (wf_aligner->alignment_form.span == alignment_end2end);
if (wf_aligner->match_funct != NULL) {
wf_align_status->wf_align_extend = &wavefront_extend_custom;
align_status->wf_align_extend = &wavefront_extend_custom;
} else if (end2end) {
wf_align_status->wf_align_extend = &wavefront_extend_end2end;
align_status->wf_align_extend = &wavefront_extend_end2end;
} else {
wf_align_status->wf_align_extend = &wavefront_extend_endsfree;
align_status->wf_align_extend = &wavefront_extend_endsfree;
}
// Initialize wavefront
wf_aligner->alignment_end_pos.score = -1; // Not aligned
Expand All @@ -391,17 +392,24 @@ void wavefront_align_sequences_init(
}
}
void wavefront_align_start(
wavefront_aligner_t* const wf_aligner) {
wavefront_aligner_t* const wf_aligner,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
const bool subalignment) {
// DEBUG
wavefront_debug_prologue(wf_aligner);
wavefront_debug_prologue(wf_aligner,
pattern,pattern_length,text,text_length,subalignment);
}
void wavefront_align_finish(
wavefront_aligner_t* const wf_aligner) {
wavefront_aligner_t* const wf_aligner,
const bool subalignment) {
// Compute memory used
uint64_t memory_used = wavefront_aligner_get_size(wf_aligner);
wf_aligner->align_status.memory_used = memory_used;
// DEBUG
wavefront_debug_epilogue(wf_aligner);
wavefront_debug_epilogue(wf_aligner,subalignment);
// Reap memory (controlled reaping)
if (memory_used > wf_aligner->system.max_memory_resident) {
// Wavefront components
Expand Down Expand Up @@ -429,24 +437,31 @@ void wavefront_align_unidirectional(
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length) {
const int text_length,
const bool subalignment) {
// Start alignment
wavefront_align_start(wf_aligner,pattern,pattern_length,text,text_length,subalignment);
// Prepare alignment
wavefront_align_sequences_init(wf_aligner,pattern,pattern_length,text,text_length);
// Wavefront align sequences
wavefront_align_sequences(wf_aligner);
// Finish alignment
if (wf_aligner->align_status.status == WF_STATUS_MAX_SCORE_REACHED) return; // Alignment paused
wavefront_align_finish(wf_aligner,subalignment);
}
void wavefront_align_bidirectional(
wavefront_aligner_t* const wf_aligner,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length) {
// Parameters
wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status;
// Start alignment
wavefront_align_start(wf_aligner,pattern,pattern_length,text,text_length,false);
// Allocate cigar
cigar_t cigar;
cigar_allocate(&cigar,2*(pattern_length+text_length),wf_aligner->mm_allocator);
// Bidirectional alignment
wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; // Init OK
wavefront_bialign(wf_aligner,
pattern,pattern_length,text,text_length,
&wf_aligner->alignment_form,
Expand All @@ -455,8 +470,8 @@ void wavefront_align_bidirectional(
// Swap and free cigar
SWAP(wf_aligner->cigar,cigar);
cigar_free(&cigar);
// Finish
wf_align_status->status = WF_STATUS_SUCCESSFUL; // For the moment, all good
// Finish alignment
wavefront_align_finish(wf_aligner,false);
}
int wavefront_align(
wavefront_aligner_t* const wf_aligner,
Expand All @@ -465,42 +480,32 @@ int wavefront_align(
const char* const text,
const int text_length) {
// Parameters
wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status;
// Start alignment
wavefront_align_start(wf_aligner);
wavefront_align_status_t* const align_status = &wf_aligner->align_status;
// Dispatcher
if (wf_aligner->bidirectional_alignment) {
wavefront_align_bidirectional(wf_aligner,pattern,pattern_length,text,text_length);
} else {
wavefront_align_unidirectional(wf_aligner,pattern,pattern_length,text,text_length);
// Check pause condition
if (wf_align_status->status == WF_STATUS_MAX_SCORE_REACHED) {
return WF_STATUS_MAX_SCORE_REACHED; // Alignment paused
}
wavefront_align_unidirectional(wf_aligner,pattern,pattern_length,text,text_length,false);
}
// Finish alignment
wavefront_align_finish(wf_aligner);
// Return
return wf_align_status->status;
return align_status->status;
}
int wavefront_align_resume(
wavefront_aligner_t* const wf_aligner) {
// Parameters
wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status;
wavefront_align_status_t* const align_status = &wf_aligner->align_status;
// Check current alignment status
if (wf_align_status->status != WF_STATUS_MAX_SCORE_REACHED) {
if (align_status->status != WF_STATUS_MAX_SCORE_REACHED) {
fprintf(stderr,"[WFA] Alignment cannot be resumed (already finished)\n");
exit(1);
}
// Resume aligning sequences
wavefront_align_sequences(wf_aligner);
// Check pause condition
if (wf_align_status->status == WF_STATUS_MAX_SCORE_REACHED) {
// Finish alignment
if (align_status->status == WF_STATUS_MAX_SCORE_REACHED) {
return WF_STATUS_MAX_SCORE_REACHED; // Alignment paused
}
// Finish alignment
wavefront_align_finish(wf_aligner);
// Return
return wf_align_status->status;
wavefront_align_finish(wf_aligner,false);
return align_status->status;
}

6 changes: 3 additions & 3 deletions wavefront/wavefront_aligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ char* wf_error_msg[] =
{
/* WF_STATUS_OOM == -3 */ "[WFA] Alignment failed. Maximum memory threshold reached",
/* WF_STATUS_MAX_SCORE_REACHED == -2 */ "[WFA] Alignment failed. Maximum score reached",
/* WF_STATUS_HEURISTICALY_DROPPED == -1 */ "[WFA] Alignment dropped heuristically",
/* WF_STATUS_UNFEASIBLE == -1 */ "[WFA] Alignment unfeasible (possible due to heuristic parameters)",
/* WF_STATUS_SUCCESSFUL == 0 */ "[WFA] Alignment successful",
/* WF_STATUS_IN_PROGRESS == 1 */ "[WFA] Alignment in progress",
};
Expand Down Expand Up @@ -159,8 +159,8 @@ void wavefront_aligner_init_alignment(
subsidiary_attr.match_funct = attributes->match_funct;
subsidiary_attr.match_funct_arguments = attributes->match_funct_arguments;
// Set specifics for subsidiary aligners
subsidiary_attr.heuristic.strategy = wf_heuristic_none;
subsidiary_attr.memory_mode = wavefront_memory_high;
subsidiary_attr.heuristic = attributes->heuristic; // Inherit same heuristic
subsidiary_attr.memory_mode = wavefront_memory_high; // Classic WFA
subsidiary_attr.alignment_scope = compute_score;
// Set other parameter for subsidiary aligners
subsidiary_attr.system = attributes->system;
Expand Down
11 changes: 6 additions & 5 deletions wavefront/wavefront_aligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@
/*
* Error codes & messages
*/
#define WF_STATUS_SUCCESSFUL 0
#define WF_STATUS_IN_PROGRESS 1
#define WF_STATUS_HEURISTICALY_DROPPED -1
#define WF_STATUS_MAX_SCORE_REACHED -2
#define WF_STATUS_OOM -3
#define WF_STATUS_SUCCESSFUL 0
#define WF_STATUS_IN_PROGRESS 1
#define WF_STATUS_UNFEASIBLE -1
#define WF_STATUS_MAX_SCORE_REACHED -2
#define WF_STATUS_OOM -3
extern char* wf_error_msg[5];
char* wavefront_align_strerror(const int wf_error_code);

Expand All @@ -66,6 +66,7 @@ typedef struct {
// Status
int status; // Status code
int score; // Current WF-alignment score
int num_null_steps; // Total contiguous null-steps performed
uint64_t memory_used; // Total memory used
// Wavefront alignment functions
void (*wf_align_compute)(wavefront_aligner_t* const,const int); // WF Compute function
Expand Down
7 changes: 4 additions & 3 deletions wavefront/wavefront_attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,10 @@ typedef struct {
uint64_t max_memory_abort; // Maximum memory allowed to be used before aborting alignment
// Verbose
// 0 - Quiet
// 1 - Report WFA progress and heavy tasks
// 2 - Report each sequence aligned (brief)
// 3 - Report each sequence aligned (very verbose)
// 1 - Report WFA progress and heavy tasks (brief)
// 2 - Report each sequence aligned (succinct)
// 3 - Report each sequence aligned (verbose)
// 4 - Report each sequence aligned and BiWFA sub-alignments (very verbose)
int verbose; // Verbose (regulates messages during alignment)
// Debug
bool check_alignment_correct; // Verify that the alignment CIGAR output is correct
Expand Down
Loading

0 comments on commit adba5b0

Please sign in to comment.