Skip to content

Commit

Permalink
Implemented exact-prunning on edit-distance
Browse files Browse the repository at this point in the history
  • Loading branch information
smarco committed Apr 4, 2022
1 parent 347e1b4 commit 965dbfc
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 8 deletions.
84 changes: 84 additions & 0 deletions wavefront/wavefront_compute_edit.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,85 @@ void wavefront_compute_edit_idm_piggyback(
curr_offsets[k] = max;
}
}
/*
* Exact pruning paths
*/
int wf_compute_edit_best_score(
const int pattern_length,
const int text_length,
const int k,
const wf_offset_t offset) {
// Compute best-alignment case
const int left_v = pattern_length - WAVEFRONT_V(k,offset);
const int left_h = text_length - WAVEFRONT_H(k,offset);
return (left_v >= left_h) ? left_v - left_h : left_h - left_v;
}
int wf_compute_edit_worst_score(
const int pattern_length,
const int text_length,
const int k,
const wf_offset_t offset) {
// Compute worst-alignment case
const int left_v = pattern_length - WAVEFRONT_V(k,offset);
const int left_h = text_length - WAVEFRONT_H(k,offset);
return MAX(left_v,left_h);
}
void wavefront_compute_edit_exact_prune(
wavefront_aligner_t* const wf_aligner,
wavefront_t* const wavefront) {
// Parameters
const int plen = wf_aligner->pattern_length;
const int tlen = wf_aligner->text_length;
wf_offset_t* const offsets = wavefront->offsets;
const int lo = wavefront->lo;
const int hi = wavefront->hi;
// Speculative compute if needed
if (WAVEFRONT_LENGTH(lo,hi) < 1000) return;
const int sample_k = lo + (hi-lo)/2;
const wf_offset_t sample_offset = offsets[sample_k];
if (sample_offset < 0) return; // Unlucky null in the middle
const int smax_sample = wf_compute_edit_worst_score(plen,tlen,sample_k,offsets[sample_k]);
const int smin_lo = wf_compute_edit_best_score(plen,tlen,lo,offsets[lo]);
const int smin_hi = wf_compute_edit_best_score(plen,tlen,hi,offsets[hi]);
if (smin_lo <= smax_sample && smin_hi <= smax_sample) return;
/*
* Suggested by Heng Li as an effective exact-prunning technique
* for sequences of very different length where some diagonals
* can be proven impossible to yield better alignments.
*/
// Compute the best worst-case-alignment
int score_min_worst = INT_MAX;
int k;
for (k=lo;k<=hi;++k) {
const wf_offset_t offset = offsets[k];
if (offset < 0) continue; // Skip nulls
// Compute worst-alignment case
const int score_worst = wf_compute_edit_worst_score(plen,tlen,k,offset);
if (score_worst < score_min_worst) score_min_worst = score_worst;
}
// Compare against the best-case-alignment (Prune from bottom)
int lo_reduced = lo;
for (k=lo;k<=hi;++k) {
// Compute best-alignment case
const wf_offset_t offset = offsets[k];
const int score_best = wf_compute_edit_best_score(plen,tlen,k,offset);
// Compare best and worst
if (score_best <= score_min_worst) break;
++lo_reduced;
}
wavefront->lo = lo_reduced;
// Compare against the best-case-alignment (Prune from top)
int hi_reduced = hi;
for (k=hi;k>lo_reduced;--k) {
// Compute best-alignment case
const wf_offset_t offset = offsets[k];
const int score_best = wf_compute_edit_best_score(plen,tlen,k,offset);
// Compare best and worst
if (score_best <= score_min_worst) break;
--hi_reduced;
}
wavefront->hi = hi_reduced;
}
/*
* Compute next wavefront
*/
Expand Down Expand Up @@ -268,6 +347,11 @@ void wavefront_compute_edit(
}
// Trim wavefront ends
wavefront_compute_trim_ends(wf_aligner,wf_curr);
// Exact pruning paths
if (wf_aligner->alignment_form.span == alignment_end2end &&
wf_aligner->penalties.distance_metric == edit) {
wavefront_compute_edit_exact_prune(wf_aligner,wf_curr);
}
}


7 changes: 0 additions & 7 deletions wavefront/wavefront_heuristic.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,6 @@ void wavefront_heuristic_clear(
/*
* Utils
*/
//int wf_compute_antidiagonal(
// const wf_offset_t offset,
// const int k) {
// const int v = WAVEFRONT_V(k,offset);
// const int h = WAVEFRONT_H(k,offset);
// return v + h;
//}
int wf_compute_distance_end2end(
const wf_offset_t offset,
const int k,
Expand Down
2 changes: 1 addition & 1 deletion wavefront/wavefront_slab.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ wavefront_t* wavefront_slab_allocate(
void wavefront_slab_free(
wavefront_slab_t* const wavefront_slab,
wavefront_t* const wavefront) {
// Check reasons to repurpose wavefront
// Check reasons to repurpose wavefront (NOTE: Tight-mode never slab_frees())
// (A) Reuse-mode and wavefront has current wf-length
// (B) Tight-mode and wavefront has init wf-length
const int wf_length = wavefront->wf_elements_allocated;
Expand Down

0 comments on commit 965dbfc

Please sign in to comment.