diff --git a/Makefile b/Makefile index c7af4a60..1a6442c5 100644 --- a/Makefile +++ b/Makefile @@ -16,13 +16,10 @@ CC_FLAGS=-Wall -g AR=ar AR_FLAGS=-rsc -ifndef BUILD_EXAMPLES -BUILD_EXAMPLES=1 -endif - -ifndef BUILD_TOOLS +BUILD_EXAMPLES=0 BUILD_TOOLS=1 -endif +BUILD_WFA_PARALLEL=1 + ############################################################################### # Configuration rules ############################################################################### @@ -41,10 +38,7 @@ ifeq ($(BUILD_EXAMPLES),1) APPS+=examples endif -release: CC_FLAGS+=-O3 -march=native -flto -release: build - -all: CC_FLAGS+=-O3 -march=native +all: CC_FLAGS+=-O3 -march=native#-flto all: build debug: build diff --git a/tools/align_benchmark/align_benchmark.c b/tools/align_benchmark/align_benchmark.c index a09a7e18..418796ae 100644 --- a/tools/align_benchmark/align_benchmark.c +++ b/tools/align_benchmark/align_benchmark.c @@ -118,6 +118,7 @@ typedef struct { alignment_match_funct_t wfa_match_funct; void* wfa_match_funct_arguments; uint64_t wfa_max_memory; + int wfa_max_threads; // Other algorithms parameters int bandwidth; // Misc @@ -186,6 +187,7 @@ benchmark_args parameters = { .wfa_match_funct = NULL, .wfa_match_funct_arguments = NULL, .wfa_max_memory = UINT64_MAX, + .wfa_max_threads = 1, // Other algorithms parameters .bandwidth = -1, // Misc @@ -370,6 +372,7 @@ wavefront_aligner_t* align_input_configure_wavefront( attributes.plot_params.resolution_points = parameters.plot; attributes.system.verbose = parameters.verbose; attributes.system.max_memory_abort = parameters.wfa_max_memory; + attributes.system.max_num_threads = parameters.wfa_max_threads; // Allocate return wavefront_aligner_new(&attributes); } @@ -760,6 +763,7 @@ void usage() { " P1 = z-drop \n" " P2 = steps-between-cutoffs \n" " --wfa-max-memory \n" + " --wfa-max-threads (intra-parallelism; default=1) \n" " [Other Parameters] \n" " --bandwidth \n" " [Misc] \n" @@ -792,6 +796,7 @@ void parse_arguments(int argc,char** argv) { { "wfa-heuristic-parameters", required_argument, 0, 1003 }, { "wfa-custom-match-funct", no_argument, 0, 1004 }, { "wfa-max-memory", required_argument, 0, 1005 }, + { "wfa-max-threads", required_argument, 0, 1006 }, /* Other alignment parameters */ { "bandwidth", required_argument, 0, 2000 }, /* Misc */ @@ -977,9 +982,12 @@ void parse_arguments(int argc,char** argv) { parameters.wfa_match_funct = match_function; parameters.wfa_match_funct_arguments = &match_function_params; break; - case 1005: + case 1005: // --wfa-max-memory parameters.wfa_max_memory = atol(optarg); break; + case 1006: // --wfa-max-threads + parameters.wfa_max_threads = atoi(optarg); + break; /* * Other alignment parameters */ diff --git a/utils/commons.h b/utils/commons.h index ebc32085..528801b7 100644 --- a/utils/commons.h +++ b/utils/commons.h @@ -243,6 +243,12 @@ uint64_t rand_iid(const uint64_t min,const uint64_t max); uint32_t nominal_prop_u32(const uint32_t base,const double factor); uint64_t nominal_prop_u64(const uint64_t base,const double factor); +/* + * Inline + */ +#define FORCE_INLINE __attribute__((always_inline)) inline +#define FORCE_NO_INLINE __attribute__ ((noinline)) + /* * Vectorize */ diff --git a/wavefront/Makefile b/wavefront/Makefile index 2395ad14..2d3bdbed 100644 --- a/wavefront/Makefile +++ b/wavefront/Makefile @@ -32,7 +32,10 @@ MODULES=wavefront_align \ SRCS=$(addsuffix .c, $(MODULES)) OBJS=$(addprefix $(FOLDER_BUILD)/, $(SRCS:.c=.o)) -#CC_XFLAGS=-fopt-info-vec-optimized +ifeq ($(BUILD_WFA_PARALLEL),1) +PFLAGS=-DWFA_PARALLEL -fopenmp +endif +#CC_XFLAGS+=-fopt-info-vec-optimized ############################################################################### # Rules @@ -41,6 +44,6 @@ all: $(OBJS) # General building rule $(FOLDER_BUILD)/%.o : %.c - $(CC) $(CC_FLAGS) $(CC_XFLAGS) -I$(FOLDER_ROOT) -c $< -o $@ + $(CC) $(CC_FLAGS) $(PFLAGS) -I$(FOLDER_ROOT) -c $< -o $@ diff --git a/wavefront/wavefront_align.c b/wavefront/wavefront_align.c index f190a148..943c47b3 100644 --- a/wavefront/wavefront_align.c +++ b/wavefront/wavefront_align.c @@ -67,8 +67,8 @@ bool wavefront_align_reached_limits( wavefront_aligner_t* const wf_aligner, const int score) { // Check alignment-score limit - if (score >= wf_aligner->alignment_form.max_alignment_score) { - wf_aligner->cigar.score = wf_aligner->alignment_form.max_alignment_score; + if (score >= wf_aligner->system.max_alignment_score) { + wf_aligner->cigar.score = wf_aligner->system.max_alignment_score; wf_aligner->align_status.status = WF_STATUS_MAX_SCORE_REACHED; return true; // Stop } diff --git a/wavefront/wavefront_aligner.c b/wavefront/wavefront_aligner.c index 07d5eb3b..da262b4e 100644 --- a/wavefront/wavefront_aligner.c +++ b/wavefront/wavefront_aligner.c @@ -355,7 +355,7 @@ void wavefront_aligner_set_match_funct( void wavefront_aligner_set_max_alignment_score( wavefront_aligner_t* const wf_aligner, const int max_alignment_score) { - wf_aligner->alignment_form.max_alignment_score = max_alignment_score; + wf_aligner->system.max_alignment_score = max_alignment_score; } void wavefront_aligner_set_max_memory( wavefront_aligner_t* const wf_aligner, diff --git a/wavefront/wavefront_attributes.c b/wavefront/wavefront_attributes.c index e31d0bb3..6c93d145 100644 --- a/wavefront/wavefront_attributes.c +++ b/wavefront/wavefront_attributes.c @@ -44,7 +44,6 @@ wavefront_aligner_attr_t wavefront_aligner_attr_default = { .pattern_end_free = 0, .text_begin_free = 0, .text_end_free = 0, - .max_alignment_score = INT_MAX, // Unlimited }, // Custom matching functions .match_funct = NULL, // Use default match-compare function @@ -91,12 +90,15 @@ wavefront_aligner_attr_t wavefront_aligner_attr_default = { }, // System .system = { - .check_alignment_correct = false, + .max_alignment_score = INT_MAX, // Unlimited .probe_interval_global = 2000, .probe_interval_compact = 6000, .max_memory_compact = -1, // Automatically set based on memory-mode .max_memory_resident = -1, // Automatically set based on memory-mode .max_memory_abort = UINT64_MAX, // Unlimited .verbose = 0, // Quiet + .check_alignment_correct = false, + .max_num_threads = 1, // Single thread by default + .min_offsets_per_thread = 1000 // Minimum WF-length to spawn a thread }, }; diff --git a/wavefront/wavefront_attributes.h b/wavefront/wavefront_attributes.h index 2c16bf9d..6a5eab02 100644 --- a/wavefront/wavefront_attributes.h +++ b/wavefront/wavefront_attributes.h @@ -64,8 +64,6 @@ typedef struct { int pattern_end_free; // Allow free-gap at the end of the pattern int text_begin_free; // Allow free-gap at the beginning of the text int text_end_free; // Allow free-gap at the end of the text - // Limits - int max_alignment_score; // Maximum score allowed before quit } alignment_form_t; /* @@ -92,8 +90,8 @@ typedef int (*alignment_match_funct_t)(int,int,void*); * Alignment system configuration */ typedef struct { - // Debug - bool check_alignment_correct; // Verify that the alignment CIGAR output is correct + // Limits + int max_alignment_score; // Maximum score allowed before quit // Probing intervals int probe_interval_global; // Score-ticks interval to check any limits int probe_interval_compact; // Score-ticks interval to check BT-buffer compacting @@ -108,8 +106,13 @@ typedef struct { // 2 - Report each sequence aligned (brief) // 3 - Report each sequence aligned (very verbose) int verbose; // Verbose (regulates messages during alignment) + // Debug + bool check_alignment_correct; // Verify that the alignment CIGAR output is correct // Profile profiler_timer_t timer; // Time alignment + // OS + int max_num_threads; // Maximum number of threads to use to compute/extend WFs + int min_offsets_per_thread; // Minimum amount of offsets to spawn a thread } alignment_system_t; /* diff --git a/wavefront/wavefront_compute.c b/wavefront/wavefront_compute.c index 82727ec5..ae54bf89 100644 --- a/wavefront/wavefront_compute.c +++ b/wavefront/wavefront_compute.c @@ -415,3 +415,34 @@ void wavefront_compute_trim_ends_set( if (wavefront_set->out_i2wavefront) wavefront_compute_trim_ends(wf_aligner,wavefront_set->out_i2wavefront); if (wavefront_set->out_d2wavefront) wavefront_compute_trim_ends(wf_aligner,wavefront_set->out_d2wavefront); } +/* + * Multithread dispatcher + */ +#ifdef WFA_PARALLEL +int wavefront_compute_num_threads( + wavefront_aligner_t* const wf_aligner, + const int lo, + const int hi) { + // Parameters + const int max_num_threads = wf_aligner->system.max_num_threads; + const int min_offsets_per_thread = wf_aligner->system.min_offsets_per_thread; + // Compute minimum work-chunks worth spawning threads + const int num_chunks = WAVEFRONT_LENGTH(lo,hi)/min_offsets_per_thread; + const int max_workers = MIN(num_chunks,max_num_threads); + return MAX(max_workers,1); +} +void wavefront_compute_thread_limits( + const int thread_id, + const int num_theads, + const int lo, + const int hi, + int* const thread_lo, + int* const thread_hi) { + const int chunk_size = WAVEFRONT_LENGTH(lo,hi)/num_theads; + const int t_lo = lo + thread_id*chunk_size; + const int t_hi = (thread_id+1 == num_theads) ? hi : t_lo + chunk_size - 1; + *thread_lo = t_lo; + *thread_hi = t_hi; +} +#endif + diff --git a/wavefront/wavefront_compute.h b/wavefront/wavefront_compute.h index bdcb85ba..12fad795 100644 --- a/wavefront/wavefront_compute.h +++ b/wavefront/wavefront_compute.h @@ -83,4 +83,23 @@ void wavefront_compute_trim_ends_set( wavefront_aligner_t* const wf_aligner, wavefront_set_t* const wavefront_set); +/* + * Multithread dispatcher + */ +#ifdef WFA_PARALLEL +int wavefront_compute_num_threads( + wavefront_aligner_t* const wf_aligner, + const int lo, + const int hi); +void wavefront_compute_thread_limits( + const int thread_id, + const int num_theads, + const int lo, + const int hi, + int* const thread_lo, + int* const thread_hi); +#else +#define wavefront_compute_num_threads(wf_aligner,lo,hi) 1 +#endif + #endif /* WAVEFRONT_COMPUTE_H_ */ diff --git a/wavefront/wavefront_compute_affine.c b/wavefront/wavefront_compute_affine.c index 3b3fe219..ea4b4642 100644 --- a/wavefront/wavefront_compute_affine.c +++ b/wavefront/wavefront_compute_affine.c @@ -33,6 +33,10 @@ #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" +#ifdef WFA_PARALLEL +#include +#endif + /* * Compute Kernels */ @@ -181,8 +185,6 @@ void wavefront_compute_affine_idm_piggyback( if (v > pattern_length) max = WAVEFRONT_OFFSET_NULL; out_m[k] = max; } - // Offload backtrace - wavefront_backtrace_offload_affine(wf_aligner,wavefront_set,lo,hi); } /* * Compute next wavefront @@ -201,18 +203,43 @@ void wavefront_compute_affine( wavefront_compute_allocate_output_null(wf_aligner,score); // Null s-wavefront return; } - // Set limits + // Parameters + const bool bt_piggyback = wf_aligner->wf_components.bt_piggyback; int hi, lo; + // Set limits wavefront_compute_limits(wf_aligner,&wavefront_set,&lo,&hi); // Allocate wavefronts wavefront_compute_allocate_output(wf_aligner,&wavefront_set,score,lo,hi); // Init wavefront ends wavefront_compute_init_ends(wf_aligner,&wavefront_set,lo,hi); - // Compute next wavefront - if (wf_aligner->wf_components.bt_piggyback) { - wavefront_compute_affine_idm_piggyback(wf_aligner,&wavefront_set,lo,hi); + // Multithreading dispatcher + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Compute next wavefront + if (bt_piggyback) { + wavefront_compute_affine_idm_piggyback(wf_aligner,&wavefront_set,lo,hi); + } else { + wavefront_compute_affine_idm(wf_aligner,&wavefront_set,lo,hi); + } } else { - wavefront_compute_affine_idm(wf_aligner,&wavefront_set,lo,hi); +#ifdef WFA_PARALLEL + // Compute next wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits( + omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + if (bt_piggyback) { + wavefront_compute_affine_idm_piggyback(wf_aligner,&wavefront_set,t_lo,t_hi); + } else { + wavefront_compute_affine_idm(wf_aligner,&wavefront_set,t_lo,t_hi); + } + } +#endif + } + // Offload backtrace (if necessary) + if (bt_piggyback) { + wavefront_backtrace_offload_affine(wf_aligner,&wavefront_set,lo,hi); } // Trim wavefront ends wavefront_compute_trim_ends_set(wf_aligner,&wavefront_set); diff --git a/wavefront/wavefront_compute_affine2p.c b/wavefront/wavefront_compute_affine2p.c index f113b81d..82eaf9d0 100644 --- a/wavefront/wavefront_compute_affine2p.c +++ b/wavefront/wavefront_compute_affine2p.c @@ -34,6 +34,10 @@ #include "wavefront_compute_affine.h" #include "wavefront_backtrace_offload.h" +#ifdef WFA_PARALLEL +#include +#endif + /* * Compute Kernels */ @@ -272,12 +276,33 @@ void wavefront_compute_affine2p_idm_piggyback( if (v > pattern_length) max = WAVEFRONT_OFFSET_NULL; out_m[k] = max; } - // Offload backtrace - wavefront_backtrace_offload_affine(wf_aligner,wavefront_set,lo,hi); } /* * Compute next wavefront */ +void wavefront_compute_affine2p_dispatcher( + wavefront_aligner_t* const wf_aligner, + wavefront_set_t* const wavefront_set, + const int lo, + const int hi) { + if (wavefront_set->in_mwavefront_open2->null && + wavefront_set->in_i2wavefront_ext->null && + wavefront_set->in_d2wavefront_ext->null) { + // Delegate to regular gap-affine + if (wf_aligner->wf_components.bt_piggyback) { + wavefront_compute_affine_idm_piggyback(wf_aligner,wavefront_set,lo,hi); + } else { + wavefront_compute_affine_idm(wf_aligner,wavefront_set,lo,hi); + } + } else { + // Full gap-affine-2p + if (wf_aligner->wf_components.bt_piggyback) { + wavefront_compute_affine2p_idm_piggyback(wf_aligner,wavefront_set,lo,hi); + } else { + wavefront_compute_affine2p_idm(wf_aligner,wavefront_set,lo,hi); + } + } +} void wavefront_compute_affine2p( wavefront_aligner_t* const wf_aligner, const int score) { @@ -302,27 +327,28 @@ void wavefront_compute_affine2p( wavefront_compute_allocate_output(wf_aligner,&wavefront_set,score,lo,hi); // Init wavefront ends wavefront_compute_init_ends(wf_aligner,&wavefront_set,lo,hi); - // Compute next wavefront - if (wavefront_set.in_mwavefront_open2->null && - wavefront_set.in_i2wavefront_ext->null && - wavefront_set.in_d2wavefront_ext->null) { - // Derive to regular gap-affine - if (wf_aligner->wf_components.bt_piggyback) { - wavefront_compute_affine_idm_piggyback(wf_aligner,&wavefront_set,lo,hi); - } else { - wavefront_compute_affine_idm(wf_aligner,&wavefront_set,lo,hi); - } + // Multithreading dispatcher + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Compute next wavefront + wavefront_compute_affine2p_dispatcher(wf_aligner,&wavefront_set,lo,hi); } else { - // Full gap-affine-2p - if (wf_aligner->wf_components.bt_piggyback) { - wavefront_compute_affine2p_idm_piggyback(wf_aligner,&wavefront_set,lo,hi); - } else { - wavefront_compute_affine2p_idm(wf_aligner,&wavefront_set,lo,hi); +#ifdef WFA_PARALLEL + // Compute next wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits( + omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + wavefront_compute_affine2p_dispatcher(wf_aligner,&wavefront_set,t_lo,t_hi); } +#endif + } + // Offload backtrace (if necessary) + if (wf_aligner->wf_components.bt_piggyback) { + wavefront_backtrace_offload_affine(wf_aligner,&wavefront_set,lo,hi); } // Trim wavefront ends wavefront_compute_trim_ends_set(wf_aligner,&wavefront_set); } - - diff --git a/wavefront/wavefront_compute_edit.c b/wavefront/wavefront_compute_edit.c index d02c460e..61d42f6c 100644 --- a/wavefront/wavefront_compute_edit.c +++ b/wavefront/wavefront_compute_edit.c @@ -33,6 +33,10 @@ #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" +#ifdef WFA_PARALLEL +#include +#endif + /* * Compute Kernels */ @@ -47,12 +51,10 @@ void wavefront_compute_indel_idm( const int text_length = wf_aligner->text_length; const wf_offset_t* const prev_offsets = wf_prev->offsets; wf_offset_t* const curr_offsets = wf_curr->offsets; - // Loop peeling (k=lo) - curr_offsets[lo] = prev_offsets[lo+1]; // Compute-Next kernel loop int k; PRAGMA_LOOP_VECTORIZE - for (k=lo+1;k<=hi-1;++k) { + for (k=lo;k<=hi;++k) { // Compute maximum offset const wf_offset_t ins = prev_offsets[k-1] + 1; const wf_offset_t del = prev_offsets[k+1]; @@ -64,8 +66,6 @@ void wavefront_compute_indel_idm( if (v > pattern_length) max = WAVEFRONT_OFFSET_NULL; curr_offsets[k] = max; } - // Loop peeling (k=hi) - curr_offsets[hi] = prev_offsets[hi-1] + 1; } void wavefront_compute_edit_idm( wavefront_aligner_t* const wf_aligner, @@ -78,12 +78,10 @@ void wavefront_compute_edit_idm( const int text_length = wf_aligner->text_length; const wf_offset_t* const prev_offsets = wf_prev->offsets; wf_offset_t* const curr_offsets = wf_curr->offsets; - // Loop peeling (k=lo) - curr_offsets[lo] = prev_offsets[lo+1]; // Compute-Next kernel loop int k; PRAGMA_LOOP_VECTORIZE - for (k=lo+1;k<=hi-1;++k) { + for (k=lo;k<=hi;++k) { // Compute maximum offset const wf_offset_t ins = prev_offsets[k-1]; // Lower const wf_offset_t del = prev_offsets[k+1]; // Upper @@ -96,8 +94,6 @@ void wavefront_compute_edit_idm( if (v > pattern_length) max = WAVEFRONT_OFFSET_NULL; curr_offsets[k] = max; } - // Loop peeling (k=hi) - curr_offsets[hi] = prev_offsets[hi-1] + 1; } /* * Compute Kernel (Piggyback) @@ -120,14 +116,10 @@ void wavefront_compute_indel_idm_piggyback( wf_offset_t* const curr_offsets = wf_curr->offsets; pcigar_t* const curr_pcigar = wf_curr->bt_pcigar; bt_block_idx_t* const curr_bt_idx = wf_curr->bt_prev; - // Loop peeling (k=lo) - curr_offsets[lo] = prev_offsets[lo+1]; - curr_pcigar[lo] = PCIGAR_PUSH_BACK_DEL(prev_pcigar[lo+1]); - curr_bt_idx[lo] = prev_bt_idx[lo+1]; // Compute-Next kernel loop int k; PRAGMA_LOOP_VECTORIZE // Ifs predicated by the compiler - for (k=lo+1;k<=hi-1;++k) { + for (k=lo;k<=hi;++k) { // Compute maximum offset const wf_offset_t ins = prev_offsets[k-1] + 1; const wf_offset_t del = prev_offsets[k+1]; @@ -147,15 +139,6 @@ void wavefront_compute_indel_idm_piggyback( if (v > pattern_length) max = WAVEFRONT_OFFSET_NULL; curr_offsets[k] = max; } - // Loop peeling (k=hi) - curr_offsets[hi] = prev_offsets[hi-1] + 1; - curr_pcigar[hi] = PCIGAR_PUSH_BACK_INS(prev_pcigar[hi-1]); - curr_bt_idx[hi] = prev_bt_idx[hi-1]; - // Offload backtrace (if necessary) - if (score % PCIGAR_MAX_LENGTH == 0) { - wavefront_backtrace_offload_blocks_linear( - wf_aligner,curr_offsets,curr_pcigar,curr_bt_idx,lo,hi); - } } void wavefront_compute_edit_idm_piggyback( wavefront_aligner_t* const wf_aligner, @@ -175,14 +158,10 @@ void wavefront_compute_edit_idm_piggyback( wf_offset_t* const curr_offsets = wf_curr->offsets; pcigar_t* const curr_pcigar = wf_curr->bt_pcigar; bt_block_idx_t* const curr_bt_idx = wf_curr->bt_prev; - // Loop peeling (k=lo) - curr_offsets[lo] = prev_offsets[lo+1]; - curr_pcigar[lo] = PCIGAR_PUSH_BACK_DEL(prev_pcigar[lo+1]); - curr_bt_idx[lo] = prev_bt_idx[lo+1]; // Compute-Next kernel loop int k; PRAGMA_LOOP_VECTORIZE // Ifs predicated by the compiler - for (k=lo+1;k<=hi-1;++k) { + for (k=lo;k<=hi;++k) { // Compute maximum offset const wf_offset_t ins = prev_offsets[k-1] + 1; // Lower const wf_offset_t del = prev_offsets[k+1]; // Upper @@ -208,19 +187,31 @@ void wavefront_compute_edit_idm_piggyback( if (v > pattern_length) max = WAVEFRONT_OFFSET_NULL; curr_offsets[k] = max; } - // Loop peeling (k=hi) - curr_offsets[hi] = prev_offsets[hi-1] + 1; - curr_pcigar[hi] = PCIGAR_PUSH_BACK_INS(prev_pcigar[hi-1]); - curr_bt_idx[hi] = prev_bt_idx[hi-1]; - // Offload backtrace (if necessary) - if (score % PCIGAR_MAX_LENGTH == 0) { - wavefront_backtrace_offload_blocks_linear( - wf_aligner,curr_offsets,curr_pcigar,curr_bt_idx,lo,hi); - } } /* * Compute next wavefront */ +void wavefront_compute_edit_dispatcher( + wavefront_aligner_t* const wf_aligner, + const int score, + wavefront_t* const wf_prev, + wavefront_t* const wf_curr, + const int lo, + const int hi) { + if (wf_aligner->wf_components.bt_piggyback) { + if (wf_aligner->penalties.distance_metric == indel) { + wavefront_compute_indel_idm_piggyback(wf_aligner,wf_prev,wf_curr,lo,hi,score); + } else { + wavefront_compute_edit_idm_piggyback(wf_aligner,wf_prev,wf_curr,lo,hi,score); + } + } else { + if (wf_aligner->penalties.distance_metric == indel) { + wavefront_compute_indel_idm(wf_aligner,wf_prev,wf_curr,lo,hi); + } else { + wavefront_compute_edit_idm(wf_aligner,wf_prev,wf_curr,lo,hi); + } + } +} void wavefront_compute_edit( wavefront_aligner_t* const wf_aligner, const int score) { @@ -242,26 +233,38 @@ void wavefront_compute_edit( const int hi = wf_prev->hi + 1; // wf_components->historic_min_lo = min_lo; // wf_components->historic_max_hi = max_hi; + wf_prev->offsets[lo-1] = WAVEFRONT_OFFSET_NULL; wf_prev->offsets[lo] = WAVEFRONT_OFFSET_NULL; wf_prev->offsets[hi] = WAVEFRONT_OFFSET_NULL; + wf_prev->offsets[hi+1] = WAVEFRONT_OFFSET_NULL; // Allocate output wavefront - wavefront_t* const wf_curr = wavefront_slab_allocate(wf_aligner->wavefront_slab,lo-1,hi+1); + wavefront_t* const wf_curr = wavefront_slab_allocate(wf_aligner->wavefront_slab,lo-2,hi+2); wf_components->mwavefronts[score_curr] = wf_curr; wf_components->mwavefronts[score_curr]->lo = lo; wf_components->mwavefronts[score_curr]->hi = hi; - // Compute next wavefront - if (wf_aligner->wf_components.bt_piggyback) { - if (wf_aligner->penalties.distance_metric == indel) { - wavefront_compute_indel_idm_piggyback(wf_aligner,wf_prev,wf_curr,lo,hi,score); - } else { - wavefront_compute_edit_idm_piggyback(wf_aligner,wf_prev,wf_curr,lo,hi,score); - } + // Multithreading dispatcher + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Compute next wavefront + wavefront_compute_edit_dispatcher( + wf_aligner,score,wf_prev,wf_curr,lo,hi); } else { - if (wf_aligner->penalties.distance_metric == indel) { - wavefront_compute_indel_idm(wf_aligner,wf_prev,wf_curr,lo,hi); - } else { - wavefront_compute_edit_idm(wf_aligner,wf_prev,wf_curr,lo,hi); +#ifdef WFA_PARALLEL + // Compute next wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits( + omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + wavefront_compute_edit_dispatcher( + wf_aligner,score,wf_prev,wf_curr,t_lo,t_hi); } +#endif + } + // Offload backtrace (if necessary) + if (wf_components->bt_piggyback && score % PCIGAR_MAX_LENGTH == 0) { + wavefront_backtrace_offload_blocks_linear( + wf_aligner,wf_curr->offsets,wf_curr->bt_pcigar,wf_curr->bt_prev,lo,hi); } // Trim wavefront ends wavefront_compute_trim_ends(wf_aligner,wf_curr); diff --git a/wavefront/wavefront_compute_linear.c b/wavefront/wavefront_compute_linear.c index 6788eeb3..1f290e6b 100644 --- a/wavefront/wavefront_compute_linear.c +++ b/wavefront/wavefront_compute_linear.c @@ -33,6 +33,10 @@ #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" +#ifdef WFA_PARALLEL +#include +#endif + /* * Compute Kernels */ @@ -118,8 +122,6 @@ void wavefront_compute_linear_idm_piggyback( if (v > pattern_length) max = WAVEFRONT_OFFSET_NULL; out_m[k] = max; } - // Offload backtrace - wavefront_backtrace_offload_linear(wf_aligner,wavefront_set,lo,hi); } /* * Compute next wavefront @@ -136,18 +138,43 @@ void wavefront_compute_linear( wavefront_compute_allocate_output_null(wf_aligner,score); // Null s-wavefront return; } - // Set limits + // Parameters + const bool bt_piggyback = wf_aligner->wf_components.bt_piggyback; int hi, lo; + // Set limits wavefront_compute_limits(wf_aligner,&wavefront_set,&lo,&hi); // Allocate wavefronts wavefront_compute_allocate_output(wf_aligner,&wavefront_set,score,lo,hi); // Init wavefront ends wavefront_compute_init_ends(wf_aligner,&wavefront_set,lo,hi); - // Compute next wavefront - if (wf_aligner->wf_components.bt_piggyback) { - wavefront_compute_linear_idm_piggyback(wf_aligner,&wavefront_set,lo,hi); + // Multithreading dispatcher + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Compute next wavefront + if (bt_piggyback) { + wavefront_compute_linear_idm_piggyback(wf_aligner,&wavefront_set,lo,hi); + } else { + wavefront_compute_linear_idm(wf_aligner,&wavefront_set,lo,hi); + } } else { - wavefront_compute_linear_idm(wf_aligner,&wavefront_set,lo,hi); +#ifdef WFA_PARALLEL + // Compute next wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits( + omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + if (bt_piggyback) { + wavefront_compute_linear_idm_piggyback(wf_aligner,&wavefront_set,t_lo,t_hi); + } else { + wavefront_compute_linear_idm(wf_aligner,&wavefront_set,t_lo,t_hi); + } + } +#endif + } + // Offload backtrace (if necessary) + if (bt_piggyback) { + wavefront_backtrace_offload_linear(wf_aligner,&wavefront_set,lo,hi); } // Trim wavefront ends wavefront_compute_trim_ends_set(wf_aligner,&wavefront_set); diff --git a/wavefront/wavefront_debug.c b/wavefront/wavefront_debug.c index 7fe88225..8cce88df 100644 --- a/wavefront/wavefront_debug.c +++ b/wavefront/wavefront_debug.c @@ -171,7 +171,7 @@ void wavefront_report_verbose_begin( wf_aligner->alignment_form.text_end_free); } fprintf(stream,"[WFA::Debug]\tMax-score\t%d\n", - wf_aligner->alignment_form.max_alignment_score); + wf_aligner->system.max_alignment_score); // Penalties fprintf(stream,"[WFA::Debug]\tPenalties\t"); wavefronts_penalties_print(stream,&wf_aligner->penalties); diff --git a/wavefront/wavefront_extend.c b/wavefront/wavefront_extend.c index 3e097d65..d1165b74 100644 --- a/wavefront/wavefront_extend.c +++ b/wavefront/wavefront_extend.c @@ -35,6 +35,10 @@ #include "wavefront_compute.h" #include "wavefront_heuristic.h" +#ifdef WFA_PARALLEL +#include +#endif + /* * Wavefront check termination (detect end of alignment) */ @@ -72,7 +76,12 @@ bool wavefront_extend_endsfree_check_termination( const int pattern_left = pattern_length - v_pos; const int pattern_end_free = wf_aligner->alignment_form.pattern_end_free; if (pattern_left <= pattern_end_free) { - mwavefront->k_alignment_end = k; + #ifdef WFA_PARALLEL + #pragma omp critical + #endif + { + mwavefront->k_alignment_end = k; + } return true; // Quit (we are done) } } @@ -81,80 +90,100 @@ bool wavefront_extend_endsfree_check_termination( const int text_left = text_length - h_pos; const int text_end_free = wf_aligner->alignment_form.text_end_free; if (text_left <= text_end_free) { - mwavefront->k_alignment_end = k; + #ifdef WFA_PARALLEL + #pragma omp critical + #endif + { + mwavefront->k_alignment_end = k; + } return true; // Quit (we are done) } } // Not done return false; } +FORCE_INLINE wf_offset_t wavefront_extend_matches_packed_kernel( + wavefront_aligner_t* const wf_aligner, + const int k, + wf_offset_t offset) { + // Fetch pattern/text blocks + uint64_t* pattern_blocks = (uint64_t*)(wf_aligner->pattern+WAVEFRONT_V(k,offset)); + uint64_t* text_blocks = (uint64_t*)(wf_aligner->text+WAVEFRONT_H(k,offset)); + // Compare 64-bits blocks + uint64_t cmp = *pattern_blocks ^ *text_blocks; + while (__builtin_expect(cmp==0,0)) { + // Increment offset (full block) + offset += 8; + // Next blocks + ++pattern_blocks; + ++text_blocks; + // Compare + cmp = *pattern_blocks ^ *text_blocks; + } + // Count equal characters + const int equal_right_bits = __builtin_ctzl(cmp); + const int equal_chars = DIV_FLOOR(equal_right_bits,8); + offset += equal_chars; + // Return extended offset + return offset; +} /* * Wavefront offset extension comparing characters + * Remember: + * - No offset is out of boundaries !(h>tlen,v>plen) + * - if (h==tlen,v==plen) extension won't increment (sentinels) */ -bool wavefront_extend_matches_packed( +FORCE_NO_INLINE void wavefront_extend_matches_packed_end2end( wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, const int score, - const bool endsfree) { - // Fetch m-wavefront - wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score]; - if (mwavefront==NULL) return false; - // Extend diagonally each wavefront point + const int lo, + const int hi) { wf_offset_t* const offsets = mwavefront->offsets; - const int lo = mwavefront->lo; - const int hi = mwavefront->hi; int k; for (k=lo;k<=hi;++k) { - // Check offset & positions - // - No offset should be out of boundaries !(h>tlen,v>plen) - // - if (h==tlen,v==plen) extension won't increment (sentinels) + // Fetch offset + const wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Extend offset + offsets[k] = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); + } +} +FORCE_NO_INLINE bool wavefront_extend_matches_packed_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi) { + wf_offset_t* const offsets = mwavefront->offsets; + int k; + for (k=lo;k<=hi;++k) { + // Fetch offset wf_offset_t offset = offsets[k]; if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Fetch pattern/text blocks - uint64_t* pattern_blocks = (uint64_t*)(wf_aligner->pattern+WAVEFRONT_V(k,offset)); - uint64_t* text_blocks = (uint64_t*)(wf_aligner->text+WAVEFRONT_H(k,offset)); - // Compare 64-bits blocks - uint64_t cmp = *pattern_blocks ^ *text_blocks; - while (__builtin_expect(cmp==0,0)) { - // Increment offset (full block) - offset += 8; - // Next blocks - ++pattern_blocks; - ++text_blocks; - // Compare - cmp = *pattern_blocks ^ *text_blocks; - } - // Count equal characters - const int equal_right_bits = __builtin_ctzl(cmp); - const int equal_chars = DIV_FLOOR(equal_right_bits,8); - offset += equal_chars; - // Update offset + // Extend offset + offset = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); offsets[k] = offset; // Check ends-free reaching boundaries - if (endsfree && wavefront_extend_endsfree_check_termination(wf_aligner,mwavefront,offset,k)) { + if (wavefront_extend_endsfree_check_termination(wf_aligner,mwavefront,offset,k)) { return true; // Quit (we are done) } } - // Check end-to-end finished - if (!endsfree) { - return wavefront_extend_end2end_check_termination(wf_aligner,mwavefront); - } // Alignment not finished return false; } bool wavefront_extend_matches_custom( wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, const int score, + const int lo, + const int hi, const bool endsfree) { - // Fetch m-wavefront - wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score]; - if (mwavefront==NULL) return false; // Parameters (custom matching function) alignment_match_funct_t match_funct = wf_aligner->match_funct; void* const func_arguments = wf_aligner->match_funct_arguments; // Extend diagonally each wavefront point wf_offset_t* const offsets = mwavefront->offsets; - const int lo = mwavefront->lo; - const int hi = mwavefront->hi; int k; for (k=lo;k<=hi;++k) { // Check offset @@ -173,10 +202,6 @@ bool wavefront_extend_matches_custom( return true; // Quit (we are done) } } - // Check end-to-end finished - if (!endsfree) { - return wavefront_extend_end2end_check_termination(wf_aligner,mwavefront); - } // Alignment not finished return false; } @@ -188,8 +213,33 @@ bool wavefront_extend_end2end( int score) { // Modular wavefront if (wf_aligner->wf_components.memory_modular) score = score % wf_aligner->wf_components.max_score_scope; - // Extend wavefront - const bool end_reached = wavefront_extend_matches_packed(wf_aligner,score,false); + // Fetch m-wavefront + wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score]; + if (mwavefront==NULL) return false; + // Multithreading dispatcher + const int lo = mwavefront->lo; + const int hi = mwavefront->hi; + bool end_reached = false; + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Extend wavefront + wavefront_extend_matches_packed_end2end(wf_aligner,mwavefront,score,lo,hi); + // Check end-to-end finished + end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront); + } else { +#ifdef WFA_PARALLEL + // Extend wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits( + omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + wavefront_extend_matches_packed_end2end(wf_aligner,mwavefront,score,t_lo,t_hi); + } + // Check end-to-end finished + end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront); +#endif + } if (end_reached) { wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; return true; // Done @@ -209,8 +259,31 @@ bool wavefront_extend_endsfree( int score) { // Modular wavefront if (wf_aligner->wf_components.memory_modular) score = score % wf_aligner->wf_components.max_score_scope; - // Extend wavefront - const bool end_reached = wavefront_extend_matches_packed(wf_aligner,score,true); + // Fetch m-wavefront + wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score]; + if (mwavefront==NULL) return false; + // Multithreading dispatcher + const int lo = mwavefront->lo; + const int hi = mwavefront->hi; + bool end_reached = false; + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Extend wavefront + end_reached = wavefront_extend_matches_packed_endsfree(wf_aligner,mwavefront,score,lo,hi); + } else { +#ifdef WFA_PARALLEL + // Extend wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits( + omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + if (wavefront_extend_matches_packed_endsfree(wf_aligner,mwavefront,score,t_lo,t_hi)) { + end_reached = true; + } + } +#endif + } if (end_reached) { wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; return true; // Done @@ -230,9 +303,36 @@ bool wavefront_extend_custom( int score) { // Modular wavefront if (wf_aligner->wf_components.memory_modular) score = score % wf_aligner->wf_components.max_score_scope; - // Extend wavefront + // Fetch m-wavefront + wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score]; + if (mwavefront==NULL) return false; + // Multithreading dispatcher const bool endsfree = (wf_aligner->alignment_form.span == alignment_endsfree); - const bool end_reached = wavefront_extend_matches_custom(wf_aligner,score,endsfree); + const int lo = mwavefront->lo; + const int hi = mwavefront->hi; + bool end_reached = false; + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Extend wavefront + end_reached = wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,endsfree); + } else { +#ifdef WFA_PARALLEL + // Extend wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits( + omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + if (wavefront_extend_matches_custom(wf_aligner,mwavefront,score,t_lo,t_hi,endsfree)) { + end_reached = true; + } + } +#endif + } + // Check end-to-end finished + if (!endsfree) { + end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront); + } if (end_reached) { wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; return true; // Done