From 4b4d88f39ed460467449bf80ef91d72e8d1293a7 Mon Sep 17 00:00:00 2001 From: smarco Date: Mon, 25 Jul 2022 14:43:00 +0200 Subject: [PATCH] Added bialigner/unialigner --- wavefront/wavefront_bialigner.c | 97 ++++++ wavefront/wavefront_bialigner.h | 81 +++++ wavefront/wavefront_unialign.c | 516 ++++++++++++++++++++++++++++++++ wavefront/wavefront_unialign.h | 79 +++++ 4 files changed, 773 insertions(+) create mode 100644 wavefront/wavefront_bialigner.c create mode 100644 wavefront/wavefront_bialigner.h create mode 100644 wavefront/wavefront_unialign.c create mode 100644 wavefront/wavefront_unialign.h diff --git a/wavefront/wavefront_bialigner.c b/wavefront/wavefront_bialigner.c new file mode 100644 index 0000000..5c4745b --- /dev/null +++ b/wavefront/wavefront_bialigner.c @@ -0,0 +1,97 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + */ + +#include "wavefront_bialigner.h" +#include "wavefront_aligner.h" +#include "wavefront_attributes.h" +#include "wavefront_heuristic.h" + +/* + * Setup + */ +wavefront_bialigner_t* wavefront_bialigner_new( + wavefront_aligner_attr_t* const attributes) { + // Allocate + wavefront_bialigner_t* const wf_bialigner = malloc(sizeof(wavefront_bialigner_t)); + // Configure subsidiary aligners + wavefront_aligner_attr_t subsidiary_attr = wavefront_aligner_attr_default; + // Inherit attributes from master aligner + subsidiary_attr.distance_metric = attributes->distance_metric; + subsidiary_attr.linear_penalties = attributes->linear_penalties; + subsidiary_attr.affine_penalties = attributes->affine_penalties; + subsidiary_attr.affine2p_penalties = attributes->affine2p_penalties; + subsidiary_attr.match_funct = attributes->match_funct; + subsidiary_attr.match_funct_arguments = attributes->match_funct_arguments; + // Set specifics for subsidiary aligners + subsidiary_attr.heuristic = attributes->heuristic; // Inherit same heuristic + subsidiary_attr.memory_mode = wavefront_memory_high; // Classic WFA + subsidiary_attr.alignment_scope = compute_score; + // Set other parameter for subsidiary aligners + subsidiary_attr.system = attributes->system; + // Allocate forward/reverse aligners + wf_bialigner->alg_forward = wavefront_aligner_new(&subsidiary_attr); + wf_bialigner->alg_forward->align_mode_tag = "BiWFA::Breakpoint_f"; + wf_bialigner->alg_reverse = wavefront_aligner_new(&subsidiary_attr); + wf_bialigner->alg_reverse->align_mode_tag = "BiWFA::Breakpoint_r"; + // Allocate subsidiary aligner + subsidiary_attr.alignment_scope = compute_alignment; + wf_bialigner->alg_subsidiary = wavefront_aligner_new(&subsidiary_attr); + wf_bialigner->alg_subsidiary->align_mode_tag = "BiWFA::SubWFA"; + // Return + return wf_bialigner; +} +void wavefront_bialigner_reap( + wavefront_bialigner_t* const wf_bialigner) { + wavefront_aligner_reap(wf_bialigner->alg_forward); + wavefront_aligner_reap(wf_bialigner->alg_reverse); + wavefront_aligner_reap(wf_bialigner->alg_subsidiary); +} +void wavefront_bialigner_delete( + wavefront_bialigner_t* const wf_bialigner) { + wavefront_aligner_delete(wf_bialigner->alg_forward); + wavefront_aligner_delete(wf_bialigner->alg_reverse); + wavefront_aligner_delete(wf_bialigner->alg_subsidiary); +} +/* + * Accessors + */ +uint64_t wavefront_bialigner_get_size( + wavefront_bialigner_t* const wf_bialigner) { + return wavefront_aligner_get_size(wf_bialigner->alg_forward) + + wavefront_aligner_get_size(wf_bialigner->alg_reverse) + + wavefront_aligner_get_size(wf_bialigner->alg_subsidiary); +} +void wavefront_bialigner_heuristic_inherit( + wavefront_bialigner_t* const wf_bialigner, + wavefront_heuristic_t* const heuristic) { + wf_bialigner->alg_forward->heuristic = *heuristic; + wf_bialigner->alg_reverse->heuristic = *heuristic; + wf_bialigner->alg_subsidiary->heuristic = *heuristic; +} diff --git a/wavefront/wavefront_bialigner.h b/wavefront/wavefront_bialigner.h new file mode 100644 index 0000000..fe32fe1 --- /dev/null +++ b/wavefront/wavefront_bialigner.h @@ -0,0 +1,81 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + */ + +#ifndef WAVEFRONT_BIALIGNER_H_ +#define WAVEFRONT_BIALIGNER_H_ + +#include "utils/commons.h" +#include "wavefront_penalties.h" +#include "wavefront_attributes.h" +#include "wavefront_heuristic.h" +#include "wavefront_offset.h" + +// Wavefront ahead definition +typedef struct _wavefront_aligner_t wavefront_aligner_t; + +typedef struct { + // Scores + int score; // Score total + int score_forward; // Score (forward) + int score_reverse; // Score (reverse) + // Location + int k_forward; // Breakpoint diagonal (forward) + int k_reverse; // Breakpoint diagonal (reverse) + wf_offset_t offset_forward; // Offset (forward) + wf_offset_t offset_reverse; // Offset (reverse) + affine2p_matrix_type component; // Component (M/I/D) +} wf_bialign_breakpoint_t; + +typedef struct { + wavefront_aligner_t* alg_forward; // Forward aligner + wavefront_aligner_t* alg_reverse; // Reverse aligner + wavefront_aligner_t* alg_subsidiary; // Subsidiary aligner +} wavefront_bialigner_t; + +/* + * Setup + */ +wavefront_bialigner_t* wavefront_bialigner_new( + wavefront_aligner_attr_t* const attributes); +void wavefront_bialigner_reap( + wavefront_bialigner_t* const wf_bialigner); +void wavefront_bialigner_delete( + wavefront_bialigner_t* const wf_bialigner); + +/* + * Accessors + */ +uint64_t wavefront_bialigner_get_size( + wavefront_bialigner_t* const wf_bialigner); +void wavefront_bialigner_heuristic_inherit( + wavefront_bialigner_t* const wf_bialigner, + wavefront_heuristic_t* const heuristic); + +#endif /* WAVEFRONT_BIALIGNER_H_ */ diff --git a/wavefront/wavefront_unialign.c b/wavefront/wavefront_unialign.c new file mode 100644 index 0000000..a0c5317 --- /dev/null +++ b/wavefront/wavefront_unialign.c @@ -0,0 +1,516 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + */ + +#include "wavefront_unialign.h" +#include "wavefront.h" +#include "wavefront_attributes.h" +#include "wavefront_offset.h" +#include "wavefront_penalties.h" +#include "wavefront_plot.h" +#include "wavefront_slab.h" + +#include "wavefront_components.h" +#include "wavefront_compute_affine.h" +#include "wavefront_compute_affine2p.h" +#include "wavefront_compute_edit.h" +#include "wavefront_compute_linear.h" +#include "wavefront_extend.h" +#include "wavefront_backtrace.h" +#include "wavefront_backtrace_buffer.h" + +/* + * Configuration + */ +#define SEQUENCES_PADDING 10 + +/* + * Setup + */ +void wavefront_unialign_status_clear( + wavefront_align_status_t* const align_status) { + align_status->status = WF_STATUS_SUCCESSFUL; + align_status->score = 0; +} +void wavefront_unialigner_system_clear( + wavefront_aligner_t* const wf_aligner) { + // Reset effective limits + wf_aligner->system.max_memory_compact = BUFFER_SIZE_256M; + wf_aligner->system.max_memory_resident = BUFFER_SIZE_256M + BUFFER_SIZE_256M; + switch (wf_aligner->memory_mode) { + case wavefront_memory_med: + wf_aligner->system.max_partial_compacts = 4; + break; + case wavefront_memory_low: + wf_aligner->system.max_partial_compacts = 1; + break; + default: + break; + } + // Profile + timer_reset(&wf_aligner->system.timer); +} +/* + * Resize + */ +void wavefront_unialign_resize( + wavefront_aligner_t* const wf_aligner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + const bool reverse_sequences) { + // Parameters + const bool score_only = (wf_aligner->alignment_scope == compute_score); + // Configure sequences and status + wf_aligner->pattern_length = pattern_length; + wf_aligner->text_length = text_length; + if (wf_aligner->match_funct == NULL) { + if (wf_aligner->sequences != NULL) strings_padded_delete(wf_aligner->sequences); + wf_aligner->sequences = strings_padded_new_rhomb( + pattern,pattern_length,text,text_length, + SEQUENCES_PADDING,reverse_sequences, + wf_aligner->mm_allocator); + wf_aligner->pattern = wf_aligner->sequences->pattern_padded; + wf_aligner->text = wf_aligner->sequences->text_padded; + } else { + wf_aligner->sequences = NULL; + wf_aligner->pattern = NULL; + wf_aligner->text = NULL; + } + wavefront_unialign_status_clear(&wf_aligner->align_status); + // Heuristics clear + wavefront_heuristic_clear(&wf_aligner->heuristic); + // Wavefront components + wavefront_components_resize(&wf_aligner->wf_components, + pattern_length,text_length,&wf_aligner->penalties); + // CIGAR + if (!score_only) { + cigar_resize(&wf_aligner->cigar,2*(pattern_length+text_length)); + } + // Slab + wavefront_slab_clear(wf_aligner->wavefront_slab); + // Display + if (wf_aligner->plot_params.plot_enabled) { + wavefront_plot_free(&wf_aligner->wf_plot); + wavefront_plot_allocate(&wf_aligner->wf_plot, + wf_aligner->penalties.distance_metric, + pattern_length,text_length, + &wf_aligner->plot_params); + } + // System + wavefront_unialigner_system_clear(wf_aligner); +} +/* + * Initialize alignment + */ +void wavefront_unialign_initialize_end2end( + wavefront_aligner_t* const wf_aligner) { + // Parameters + wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + const int max_score_scope = wf_components->max_score_scope; + const int effective_lo = -(max_score_scope+1); + const int effective_hi = (max_score_scope+1); + // Init wavefronts + switch (wf_aligner->component_begin) { + case affine2p_matrix_M: + wf_components->mwavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->mwavefronts[0]->offsets[0] = 0; + wf_components->mwavefronts[0]->lo = 0; + wf_components->mwavefronts[0]->hi = 0; + if (wf_components->bt_piggyback) { // Store initial BT-piggypack element + wf_components->mwavefronts[0]->bt_pcigar[0] = 0; + wf_components->mwavefronts[0]->bt_prev[0] = + wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,0); + } + // Nullify unused WFs + if (distance_metric <= gap_linear) return; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + if (distance_metric==gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_I1: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->i1wavefronts[0]->offsets[0] = 0; + wf_components->i1wavefronts[0]->lo = 0; + wf_components->i1wavefronts[0]->hi = 0; + wf_components->d1wavefronts[0] = NULL; + // Nullify unused WFs + if (distance_metric==gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_I2: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->i2wavefronts[0]->offsets[0] = 0; + wf_components->i2wavefronts[0]->lo = 0; + wf_components->i2wavefronts[0]->hi = 0; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_D1: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->d1wavefronts[0]->offsets[0] = 0; + wf_components->d1wavefronts[0]->lo = 0; + wf_components->d1wavefronts[0]->hi = 0; + // Nullify unused WFs + if (distance_metric==gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_D2: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->d2wavefronts[0]->offsets[0] = 0; + wf_components->d2wavefronts[0]->lo = 0; + wf_components->d2wavefronts[0]->hi = 0; + break; + default: + break; + } +} +void wavefront_unialign_endsfree_check( + wavefront_aligner_t* const wf_aligner, + const int pattern_length, + const int text_length) { + alignment_form_t* const form = &wf_aligner->alignment_form; + if (form->pattern_begin_free > pattern_length || + form->pattern_end_free > pattern_length || + form->text_begin_free > text_length || + form->text_end_free > text_length) { + fprintf(stderr,"[WFA] Ends-free parameters must be not larger than the sequences " + "(P0=%d,Pf=%d,T0=%d,Tf=%d). Must be (P0<=|P|,Pf<=|P|,T0<=|T|,Tf<=|T|) where (|P|,|T|)=(%d,%d)\n", + form->pattern_begin_free,form->pattern_end_free, + form->text_begin_free,form->text_end_free, + pattern_length,text_length); + exit(1); + } +} +void wavefront_unialign_initialize_endsfree( + wavefront_aligner_t* const wf_aligner, + const int pattern_length, + const int text_length) { + // Check + wavefront_unialign_endsfree_check(wf_aligner,pattern_length,text_length); + // Parameters + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + const int text_begin_free = wf_aligner->alignment_form.text_begin_free; + const int pattern_begin_free = wf_aligner->alignment_form.pattern_begin_free; + const int max_score_scope = wf_components->max_score_scope; + // Init wavefront zero + const int effective_lo = -pattern_begin_free - (max_score_scope+1); + const int effective_hi = text_begin_free + (max_score_scope+1); + wf_components->mwavefronts[0] = wavefront_slab_allocate( + wf_aligner->wavefront_slab,effective_lo,effective_hi); + wf_components->mwavefronts[0]->offsets[0] = 0; + wf_components->mwavefronts[0]->lo = -pattern_begin_free; + wf_components->mwavefronts[0]->hi = text_begin_free; + // Store initial BT-piggypack element + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,0); + wf_components->mwavefronts[0]->bt_pcigar[0] = 0; + wf_components->mwavefronts[0]->bt_prev[0] = block_idx; + } + // Init text begin-free + int h; + for (h=1;h<=text_begin_free;++h) { + const int k = DPMATRIX_DIAGONAL(h,0); + wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(h,0); + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,h); + wf_components->mwavefronts[0]->bt_pcigar[k] = 0; + wf_components->mwavefronts[0]->bt_prev[k] = block_idx; + } + } + // Init pattern begin-free + int v; + for (v=1;v<=pattern_begin_free;++v) { + const int k = DPMATRIX_DIAGONAL(0,v); + wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(0,v); + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,v,0); + wf_components->mwavefronts[0]->bt_pcigar[k] = 0; + wf_components->mwavefronts[0]->bt_prev[k] = block_idx; + } + } + // Nullify unused WFs + if (distance_metric <= gap_linear) return; + wf_components->d1wavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + if (distance_metric==gap_affine) return; + wf_components->d2wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = NULL; +} +void wavefront_unialign_init( + wavefront_aligner_t* const wf_aligner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length) { + // Parameters + wavefront_align_status_t* const align_status = &wf_aligner->align_status; + // Resize wavefront aligner + wavefront_unialign_resize(wf_aligner,pattern,pattern_length,text,text_length,false); + // Configure WF-compute function + switch (wf_aligner->penalties.distance_metric) { + case indel: + case edit: + align_status->wf_align_compute = &wavefront_compute_edit; + break; + case gap_linear: + align_status->wf_align_compute = &wavefront_compute_linear; + break; + case gap_affine: + align_status->wf_align_compute = &wavefront_compute_affine; + break; + case gap_affine_2p: + align_status->wf_align_compute = &wavefront_compute_affine2p; + break; + default: + fprintf(stderr,"[WFA] Distance function not implemented\n"); + exit(1); + break; + } + // Configure WF-extend function + const bool end2end = (wf_aligner->alignment_form.span == alignment_end2end); + if (wf_aligner->match_funct != NULL) { + align_status->wf_align_extend = &wavefront_extend_custom; + } else if (end2end) { + align_status->wf_align_extend = &wavefront_extend_end2end; + } else { + align_status->wf_align_extend = &wavefront_extend_endsfree; + } + // Initialize wavefront + wf_aligner->alignment_end_pos.score = -1; // Not aligned + wf_aligner->alignment_end_pos.k = DPMATRIX_DIAGONAL_NULL; + if (end2end) { + wavefront_unialign_initialize_end2end(wf_aligner); + } else { + wavefront_unialign_initialize_endsfree(wf_aligner,pattern_length,text_length); + } + // Plot WF-0 + const bool plot = wf_aligner->plot_params.plot_enabled; + if (plot) { + wavefront_plot(wf_aligner,pattern,text,0); + } +} +/* + * Limits + */ +bool wavefront_unialign_reached_limits( + wavefront_aligner_t* const wf_aligner, + const int score) { + // Check alignment-score limit + if (score >= wf_aligner->system.max_alignment_score) { + wf_aligner->cigar.score = wf_aligner->system.max_alignment_score; + wf_aligner->align_status.status = WF_STATUS_MAX_SCORE_REACHED; + wf_aligner->align_status.score = score; + return true; // Stop + } + // Global probing interval + alignment_system_t* const system = &wf_aligner->system; + if (score % system->probe_interval_global != 0) return false; // Continue + if (system->verbose >= 1) { + wavefront_unialign_print_status(stderr,wf_aligner,score); // DEBUG + } + // BT-Buffer + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + if (wf_components->bt_buffer!=NULL && (score%system->probe_interval_compact)==0) { + uint64_t bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer); + // Check BT-buffer memory + if (bt_memory > system->max_memory_compact) { + // Compact BT-buffer + wavefront_components_compact_bt_buffer(wf_components,score,wf_aligner->system.verbose); + // Set new buffer limit + bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer); + uint64_t proposed_mem = (double)bt_memory * TELESCOPIC_FACTOR; + if (system->max_memory_compact < proposed_mem && proposed_mem < system->max_memory_abort) { + proposed_mem = system->max_memory_compact; + } + // Reset (if maximum compacts has been performed) + if (wf_components->bt_buffer->num_compactions >= system->max_partial_compacts) { + wf_backtrace_buffer_reset_compaction(wf_components->bt_buffer); + } + } + } + // Check overall memory used + const uint64_t wf_memory_used = wavefront_aligner_get_size(wf_aligner); + if (wf_memory_used > system->max_memory_abort) { + wf_aligner->align_status.status = WF_STATUS_OOM; + wf_aligner->align_status.score = score; + return true; // Stop + } + // Otherwise continue + return false; +} +/* + * Terminate alignment (backtrace) + */ +void wavefront_unialign_terminate( + wavefront_aligner_t* const wf_aligner, + const int score) { + // Parameters + const int pattern_length = wf_aligner->pattern_length; + const int text_length = wf_aligner->text_length; + // Retrieve alignment + if (wf_aligner->alignment_scope == compute_score) { + cigar_clear(&wf_aligner->cigar); + wf_aligner->cigar.score = + wavefront_get_classic_score(wf_aligner,pattern_length,text_length,score); + } else { + // Parameters + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const int alignment_end_k = wf_aligner->alignment_end_pos.k; + const wf_offset_t alignment_end_offset = wf_aligner->alignment_end_pos.offset; + if (wf_components->bt_piggyback) { + // Fetch wavefront + const bool memory_modular = wf_aligner->wf_components.memory_modular; + const int max_score_scope = wf_aligner->wf_components.max_score_scope; + const int score_mod = (memory_modular) ? score % max_score_scope : score; + wavefront_t* const mwavefront = wf_components->mwavefronts[score_mod]; + // Backtrace alignment from buffer (unpacking pcigar) + wavefront_backtrace_pcigar( + wf_aligner,alignment_end_k,alignment_end_offset, + mwavefront->bt_pcigar[alignment_end_k], + mwavefront->bt_prev[alignment_end_k]); + } else { + // Backtrace alignment + if (wf_aligner->penalties.distance_metric <= gap_linear) { + wavefront_backtrace_linear(wf_aligner, + score,alignment_end_k,alignment_end_offset); + } else { + wavefront_backtrace_affine(wf_aligner, + wf_aligner->component_begin,wf_aligner->component_end, + score,alignment_end_k,alignment_end_offset); + } + } + // Set score & finish + wf_aligner->cigar.score = + wavefront_get_classic_score(wf_aligner,pattern_length,text_length,score); + } + // Set successful + wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; +} +/* + * Classic WF-Alignment (Unidirectional) + */ +int wavefront_unialign( + wavefront_aligner_t* const wf_aligner) { + // Parameters + wavefront_align_status_t* const align_status = &wf_aligner->align_status; + void (*wf_align_compute)(wavefront_aligner_t* const,const int) = align_status->wf_align_compute; + int (*wf_align_extend)(wavefront_aligner_t* const,const int) = align_status->wf_align_extend; + // Compute wavefronts of increasing score + align_status->num_null_steps = 0; + int score = align_status->score; + while (true) { + // Exact extend s-wavefront + const int finished = (*wf_align_extend)(wf_aligner,score); + if (finished) { + // DEBUG + // wavefront_aligner_print(stderr,wf_aligner,0,score,7,0); + if (align_status->status == WF_STATUS_END_REACHED) { + wavefront_unialign_terminate(wf_aligner,score); + } + return align_status->status; + } + // Compute (s+1)-wavefront + ++score; + (*wf_align_compute)(wf_aligner,score); + // Probe limits + if (wavefront_unialign_reached_limits(wf_aligner,score)) return align_status->status; + // PROFILE + if (wf_aligner->plot_params.plot_enabled) { + wavefront_plot(wf_aligner,wf_aligner->pattern,wf_aligner->text,score); + } + // DEBUG + //wavefront_aligner_print(stderr,wf_aligner,score,score,7,0); + } + // Return OK + align_status->score = score; + align_status->status = WF_STATUS_SUCCESSFUL; + return WF_STATUS_SUCCESSFUL; +} +/* + * Display + */ +void wavefront_unialign_print_status( + FILE* const stream, + wavefront_aligner_t* const wf_aligner, + const int score) { + // Parameters + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + // Approximate progress + const int dist_total = MAX(wf_aligner->text_length,wf_aligner->pattern_length); + int s = (wf_components->memory_modular) ? score%wf_components->max_score_scope : score; + wavefront_t* wavefront = wf_components->mwavefronts[s]; + if (wavefront==NULL && s>0) { + s = (wf_components->memory_modular) ? (score-1)%wf_components->max_score_scope : (score-1); + wavefront = wf_components->mwavefronts[s]; + } + int dist_max = -1, wf_len = -1, k; + if (wavefront!=NULL) { + wf_offset_t* const offsets = wavefront->offsets; + for (k=wavefront->lo;k<=wavefront->hi;++k) { + const int dist = MAX(WAVEFRONT_V(k,offsets[k]),WAVEFRONT_H(k,offsets[k])); + dist_max = MAX(dist_max,dist); + } + wf_len = wavefront->hi-wavefront->lo+1; + } + // Memory used + const uint64_t slab_size = wavefront_slab_get_size(wf_aligner->wavefront_slab); + const uint64_t bt_buffer_used = (wf_components->bt_buffer) ? + wf_backtrace_buffer_get_size_used(wf_components->bt_buffer) : 0; + // Progress + const float aligned_progress = (dist_max>=0) ? (100.0f*(float)dist_max/(float)dist_total) : -1.0f; + const float million_offsets = (wf_len>=0) ? (float)wf_len/1000000.0f : -1.0f; + // Print one-line status + fprintf(stream,"["); + wavefront_aligner_print_type(stream,wf_aligner); + fprintf(stream, + "] SequenceLength=(%d,%d) Score %d (~ %2.3f%% aligned). " + "MemoryUsed(WF-Slab,BT-buffer)=(%lu MB,%lu MB). " + "Wavefronts ~ %2.3f Moffsets\n", + wf_aligner->pattern_length,wf_aligner->text_length,score,aligned_progress, + CONVERT_B_TO_MB(slab_size),CONVERT_B_TO_MB(bt_buffer_used),million_offsets); +} + diff --git a/wavefront/wavefront_unialign.h b/wavefront/wavefront_unialign.h new file mode 100644 index 0000000..c45f402 --- /dev/null +++ b/wavefront/wavefront_unialign.h @@ -0,0 +1,79 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + */ + +#ifndef WAVEFRONT_UNIALIGN_H_ +#define WAVEFRONT_UNIALIGN_H_ + +#include "utils/commons.h" +#include "wavefront_aligner.h" + +/* + * Resize + */ +void wavefront_unialign_resize( + wavefront_aligner_t* const wf_aligner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + const bool reverse_sequences); + +/* + * Initialize alignment + */ +void wavefront_unialign_initialize_end2end( + wavefront_aligner_t* const wf_aligner); +void wavefront_unialign_initialize_endsfree( + wavefront_aligner_t* const wf_aligner, + const int pattern_length, + const int text_length); +void wavefront_unialign_init( + wavefront_aligner_t* const wf_aligner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length); + +/* + * Classic WF-Alignment (Unidirectional) + */ +int wavefront_unialign( + wavefront_aligner_t* const wf_aligner); + +/* + * Display + */ +void wavefront_unialign_print_status( + FILE* const stream, + wavefront_aligner_t* const wf_aligner, + const int current_score); + +#endif /* WAVEFRONT_UNIALIGN_H_ */ +