diff --git a/src/sketch/compute_sketches.cpp b/src/sketch/compute_sketches.cpp index e1d63d10..e92371c9 100644 --- a/src/sketch/compute_sketches.cpp +++ b/src/sketch/compute_sketches.cpp @@ -45,6 +45,34 @@ void compute_sketches(config const & config, sketch::estimate_kmer_counts(sketches, kmer_counts); } +struct too_few_kmers_handler +{ + std::atomic_flag flag{}; + size_t available{}; + + inline bool test() noexcept + { + return flag.test(); + } + + inline void set(size_t const available) noexcept + { + // Sets the flag to true and returns previous value. + // If the flag was already set, another thread encountered this block at the same time. + // This basically acts as a mutex for setting available. + if (!flag.test_and_set()) + this->available = available; + } + + inline void check_and_throw() + { + if (test()) + throw std::runtime_error{"Not enough kmers (" + std::to_string(available) + ") to get " + + std::to_string(minHashes::num_sketches * minHashes::sketch_size) + + " hashes for all minHash sketches."}; + } +}; + // minHash_sketches data structure: // Vector L1 : number of user bins // Vector L2 : number_of_max_minHash_sketches (LSH ADD+OR parameter b) @@ -59,15 +87,14 @@ void compute_sketches_with_minhash(config const & config, // compute sketches robin_hood::unordered_flat_set kmers; + // OMP does not allow throwing in a thread (it requires that the throw is handled in the same thread). - // So we use an atomic flag to signal that we have too few kmers and we should stop the computation. - std::atomic_flag too_few_kmers{}; - size_t too_few_kmers_size{}; - size_t too_few_kmers_required{}; + too_few_kmers_handler too_few_kmers{}; #pragma omp parallel for schedule(dynamic) num_threads(config.threads) private(kmers) for (size_t i = 0; i < config.number_of_user_bins; ++i) { + // Skip work if we already know that we have too few kmers. if (too_few_kmers.test()) continue; @@ -98,9 +125,7 @@ void compute_sketches_with_minhash(config const & config, if (heap_size > kmers.size()) { - too_few_kmers.test_and_set(); - too_few_kmers_size = kmers.size(); - too_few_kmers_required = minHash_sketch.table.size() * minHashes::sketch_size; + too_few_kmers.set(kmers.size()); break; } @@ -122,9 +147,7 @@ void compute_sketches_with_minhash(config const & config, minHash_sketches[i] = minHash_sketch; } - if (too_few_kmers.test()) - throw std::runtime_error{"Not enough kmers (" + std::to_string(too_few_kmers_size) + ") to get " - + std::to_string(too_few_kmers_required) + " hashes for all minHash sketches."}; + too_few_kmers.check_and_throw(); } void compute_sketches_with_minhash(config const & config,