From e5c21eab17adc8a8bc043b9ce0b2f48f670e2f2c Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Mon, 19 Jul 2021 09:03:12 +0100 Subject: [PATCH 01/80] pre-increment --- src/tree_distances.cpp | 76 +++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index a610f9012..73646e5ba 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -35,14 +35,14 @@ List cpp_robinson_foulds_distance (const RawMatrix x, const RawMatrix y, bool all_match = true, all_complement = true; - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { if ((a.state[ai][bin] != b.state[bi][bin])) { all_match = false; break; } } if (!all_match) { - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { if ((a.state[ai][bin] != b_complement[bi][bin])) { all_complement = false; break; @@ -83,25 +83,25 @@ List cpp_robinson_foulds_info (const RawMatrix x, const RawMatrix y, /* Dynamic allocation 20% faster for 105 tips, but VLA not permitted in C11 */ splitbit b_complement[MAX_SPLITS][MAX_BINS]; for (int16 i = 0; i != b.n_splits; i++) { - for (int16 bin = 0; bin != last_bin; bin++) { + for (int16 bin = 0; bin != last_bin; ++bin) { b_complement[i][bin] = ~b.state[i][bin]; } b_complement[i][last_bin] = b.state[i][last_bin] ^ unset_mask; } - for (int16 ai = 0; ai != a.n_splits; ai++) { - for (int16 bi = 0; bi != b.n_splits; bi++) { + for (int16 ai = 0; ai != a.n_splits; ++ai) { + for (int16 bi = 0; bi != b.n_splits; ++bi) { bool all_match = true, all_complement = true; - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { if ((a.state[ai][bin] != b.state[bi][bin])) { all_match = false; break; } } if (!all_match) { - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { if ((a.state[ai][bin] != b_complement[bi][bin])) { all_complement = false; break; @@ -110,7 +110,7 @@ List cpp_robinson_foulds_info (const RawMatrix x, const RawMatrix y, } if (all_match || all_complement) { int16 leaves_in_split = 0; - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { leaves_in_split += count_bits(a.state[ai][bin]); } @@ -149,20 +149,20 @@ List cpp_matching_split_distance (const RawMatrix x, const RawMatrix y, cost** score = new cost*[most_splits]; for (int16 i = most_splits; i--; ) score[i] = new cost[most_splits]; - for (int16 ai = 0; ai != a.n_splits; ai++) { - for (int16 bi = 0; bi != b.n_splits; bi++) { + for (int16 ai = 0; ai != a.n_splits; ++ai) { + for (int16 bi = 0; bi != b.n_splits; ++bi) { score[ai][bi] = 0; - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { score[ai][bi] += count_bits(a.state[ai][bin] ^ b.state[bi][bin]); } if (score[ai][bi] > half_tips) score[ai][bi] = n_tips - score[ai][bi]; } - for (int16 bi = b.n_splits; bi < most_splits; bi++) { + for (int16 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } - for (int16 ai = a.n_splits; ai < most_splits; ai++) { - for (int16 bi = 0; bi != most_splits; bi++) { + for (int16 ai = a.n_splits; ai < most_splits; ++ai) { + for (int16 bi = 0; bi != most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -212,18 +212,18 @@ List cpp_jaccard_similarity (const RawMatrix x, const RawMatrix y, cost** score = new cost*[most_splits]; for (int16 i = most_splits; i--; ) score[i] = new cost[most_splits]; - for (int16 ai = 0; ai != a.n_splits; ai++) { + for (int16 ai = 0; ai != a.n_splits; ++ai) { const int16 na = a.in_split[ai], nA = n_tips - na ; - for (int16 bi = 0; bi != b.n_splits; bi++) { + for (int16 bi = 0; bi != b.n_splits; ++bi) { // x divides tips into a|A; y divides tips into b|B int16 a_and_b = 0; - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { a_and_b += count_bits(a.state[ai][bin] & b.state[bi][bin]); } @@ -277,12 +277,12 @@ List cpp_jaccard_similarity (const RawMatrix x, const RawMatrix y, } } } - for (int16 bi = b.n_splits; bi < most_splits; bi++) { + for (int16 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } - for (int16 ai = a.n_splits; ai < most_splits; ai++) { - for (int16 bi = 0; bi != most_splits; bi++) { + for (int16 ai = a.n_splits; ai < most_splits; ++ai) { + for (int16 bi = 0; bi != most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -327,14 +327,14 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, splitbit different[MAX_BINS]; - for (int16 ai = 0; ai != a.n_splits; ai++) { - for (int16 bi = 0; bi != b.n_splits; bi++) { + for (int16 ai = 0; ai != a.n_splits; ++ai) { + for (int16 bi = 0; bi != b.n_splits; ++bi) { int16 n_different = 0, n_a_only = 0, n_a_and_b = 0 ; - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { different[bin] = a.state[ai][bin] ^ b.state[bi][bin]; n_different += count_bits(different[bin]); n_a_only += count_bits(a.state[ai][bin] & different[bin]); @@ -346,12 +346,12 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, ((max_score / max_possible) * mmsi_score(n_same, n_a_and_b, n_different, n_a_only)); } - for (int16 bi = b.n_splits; bi < most_splits; bi++) { + for (int16 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } - for (int16 ai = a.n_splits; ai < most_splits; ai++) { - for (int16 bi = 0; bi < most_splits; bi++) { + for (int16 ai = a.n_splits; ai < most_splits; ++ai) { + for (int16 bi = 0; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -405,18 +405,18 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, NumericVector a_match(a.n_splits); std::unique_ptr b_match = std::make_unique(b.n_splits); - for (int16 ai = 0; ai != a.n_splits; ai++) { + for (int16 ai = 0; ai != a.n_splits; ++ai) { if (a_match[ai]) continue; const int16 na = a.in_split[ai], nA = n_tips - na ; - for (int16 bi = 0; bi != b.n_splits; bi++) { + for (int16 bi = 0; bi != b.n_splits; ++bi) { // x divides tips into a|A; y divides tips into b|B int16 a_and_b = 0; - for (int16 bin = 0; bin != a.n_bins; bin++) { + for (int16 bin = 0; bin != a.n_bins; ++bin) { a_and_b += count_bits(a.state[ai][bin] & b.state[bi][bin]); } @@ -452,7 +452,7 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, ); } } - for (int16 bi = b.n_splits; bi < most_splits; bi++) { + for (int16 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -473,21 +473,21 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, if (exact_matches) { int16 a_pos = 0; - for (int16 ai = 0; ai != a.n_splits; ai++) { + for (int16 ai = 0; ai != a.n_splits; ++ai) { if (a_match[ai]) continue; int16 b_pos = 0; - for (int16 bi = 0; bi != b.n_splits; bi++) { + for (int16 bi = 0; bi != b.n_splits; ++bi) { if (b_match[bi]) continue; score[a_pos][b_pos] = score[ai][bi]; b_pos++; } - for (int16 bi = lap_dim - a_extra_splits; bi < lap_dim; bi++) { + for (int16 bi = lap_dim - a_extra_splits; bi < lap_dim; ++bi) { score[a_pos][bi] = max_score; } a_pos++; } - for (int16 ai = lap_dim - b_extra_splits; ai < lap_dim; ai++) { - for (int16 bi = 0; bi != lap_dim; bi++) { + for (int16 ai = lap_dim - b_extra_splits; ai < lap_dim; ++ai) { + for (int16 bi = 0; bi != lap_dim; ++bi) { score[ai][bi] = max_score; } } @@ -503,7 +503,7 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, std::unique_ptr lap_decode = std::make_unique(lap_dim); int16 fuzzy_match = 0; - for (int16 bi = 0; bi != b.n_splits; bi++) { + for (int16 bi = 0; bi != b.n_splits; ++bi) { if (!b_match[bi]) { lap_decode[fuzzy_match++] = bi + 1; } else { @@ -537,8 +537,8 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, return List::create(Named("score") = final_score, _["matching"] = final_matching); } else { - for (int16 ai = a.n_splits; ai < most_splits; ai++) { - for (int16 bi = 0; bi != most_splits; bi++) { + for (int16 ai = a.n_splits; ai < most_splits; ++ai) { + for (int16 bi = 0; bi != most_splits; ++bi) { score[ai][bi] = max_score; } } From c56b4ab5f4bef1efc7eab56f202aa85d62fe4bcb Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Mon, 19 Jul 2021 10:22:11 +0100 Subject: [PATCH 02/80] Header guards --- src/tree_distances.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tree_distances.h b/src/tree_distances.h index 543c848c5..dc56a925e 100644 --- a/src/tree_distances.h +++ b/src/tree_distances.h @@ -1,3 +1,6 @@ +#ifndef _TREEDIST_TREE_DISTANCES_H +#define _TREEDIST_TREE_DISTANCES_H + #include #include #include "ints.h" @@ -77,3 +80,5 @@ extern double extern List cpp_robinson_foulds_distance (RawMatrix x, RawMatrix y, IntegerVector nTip); + +#endif \ No newline at end of file From daeb1d75d56c6e6a27faa3366cdc1df2095572b5 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Mon, 19 Jul 2021 10:22:29 +0100 Subject: [PATCH 03/80] alpha --- src/tree_distances.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 73646e5ba..26e478d3b 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -1,5 +1,5 @@ -#include /* for unique_ptr, make_unique */ #include +#include /* for unique_ptr, make_unique */ #include #include "tree_distances.h" #include "SplitList.h" From 530f43adfa4e6b507f513f7f4c88395ffb575382 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Mon, 19 Jul 2021 10:59:08 +0100 Subject: [PATCH 04/80] start work on cpp_all_pairs_mci() --- R/tree_distance_info.R | 9 +++++-- src/RcppExports.cpp | 14 +++++++++++ src/SplitList.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++ src/SplitList.h | 8 ++++++ src/tree_distances.cpp | 20 +++++++++++++++ 5 files changed, 104 insertions(+), 2 deletions(-) diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R index 9f7548e55..7f3a2fa49 100644 --- a/R/tree_distance_info.R +++ b/R/tree_distance_info.R @@ -297,8 +297,13 @@ ExpectedVariation <- function (tree1, tree2, samples = 1e+4) { #' @export MutualClusteringInfo <- function (tree1, tree2 = NULL, normalize = FALSE, reportMatching = FALSE, diag = TRUE) { - unnormalized <- CalculateTreeDistance(MutualClusteringInfoSplits, tree1, tree2, - reportMatching) + unnormalized <- if (is.null(tree2) && is.list(tree1) && + !inherits(tree1, 'phylo')) { + cpp_all_pairs_msi(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])) + } else { + CalculateTreeDistance(MutualClusteringInfoSplits, tree1, tree2, + reportMatching) + } if (diag && is.null(tree2)) { unnormalized <- as.matrix(unnormalized) diag(unnormalized) <- ClusteringEntropy(tree1) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 9989fd522..852f0f7e0 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -177,6 +177,19 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// cpp_all_pairs_mci +List cpp_all_pairs_mci(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); +RcppExport SEXP _TreeDist_cpp_all_pairs_mci(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const RawMatrix >::type x(xSEXP); + Rcpp::traits::input_parameter< const RawMatrix >::type y(ySEXP); + Rcpp::traits::input_parameter< const IntegerVector >::type nTip(nTipSEXP); + rcpp_result_gen = Rcpp::wrap(cpp_all_pairs_mci(x, y, nTip)); + return rcpp_result_gen; +END_RCPP +} // cpp_mutual_clustering List cpp_mutual_clustering(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); RcppExport SEXP _TreeDist_cpp_mutual_clustering(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { @@ -219,6 +232,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3}, {"_TreeDist_cpp_jaccard_similarity", (DL_FUNC) &_TreeDist_cpp_jaccard_similarity, 5}, {"_TreeDist_cpp_msi_distance", (DL_FUNC) &_TreeDist_cpp_msi_distance, 3}, + {"_TreeDist_cpp_all_pairs_mci", (DL_FUNC) &_TreeDist_cpp_all_pairs_mci, 3}, {"_TreeDist_cpp_mutual_clustering", (DL_FUNC) &_TreeDist_cpp_mutual_clustering, 3}, {"_TreeDist_cpp_shared_phylo", (DL_FUNC) &_TreeDist_cpp_shared_phylo, 3}, {NULL, NULL, 0} diff --git a/src/SplitList.cpp b/src/SplitList.cpp index bb315f530..b416fce5e 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -2,6 +2,7 @@ using namespace Rcpp; #include "tree_distances.h" // Before SplitList.h, for int16 #include +#include "timsort.hpp" #include "SplitList.h" SplitList::SplitList(RawMatrix x) { @@ -49,3 +50,57 @@ SplitList::SplitList(RawMatrix x) { } } } + +void SplitList::swap(int16 a, int16 b) { + for (int16 bin = n_bins; bin--; ) { + const splitbit tmp = state[a][bin]; + state[a][bin] = state[b][bin]; + state[b][bin] = tmp; + } +} + +bool SplitList::less_than(int16 a, int16 b) { + for (int16 bin = 0; bin != n_bins; ++bin) { + if (state[a][bin] < state[b][bin]) { + return true; + } + } + return false; +} + +bool SplitList::greater_than(int16 a, int16 b) { + for (int16 bin = 0; bin != n_bins; ++bin) { + if (state[a][bin] > state[b][bin]) { + return true; + } + } + return false; +} + +int16 SplitList::partition(int16 lo, int16 hi) { + const int16 pivot = (hi + lo) / 2; + for (int16 i = lo - 1, j = hi + 1; ; ) { + do { + ++i; + } while (less_than(i, pivot)); + do { + --j; + } while (greater_than(j, pivot)); + if (i >= j) { + return j; + } + swap(i, j); + } +} + +void SplitList::quicksort(int16 lo, int16 hi) { + if (lo < hi) { + const int16 p = partition(lo, hi); + quicksort(lo, p); + quicksort(p + 1, hi); + } +} + +void SplitList::quicksort() { + quicksort(0, n_splits - 1); +} diff --git a/src/SplitList.h b/src/SplitList.h index 7481975a4..18605cd94 100644 --- a/src/SplitList.h +++ b/src/SplitList.h @@ -3,16 +3,24 @@ #include #include +#include "tree_distances.h" + using namespace Rcpp; const int16 R_BIN_SIZE = 8; class SplitList { + int16 partition(int16 lo, int16 hi); + void swap(int16 a, int16 b); + bool less_than(int16 a, int16 b); + bool greater_than(int16 a, int16 b); public: int16 n_splits, n_bins; splitbit state[MAX_SPLITS][MAX_BINS]; splitbit in_split[MAX_SPLITS]; SplitList(RawMatrix); + void quicksort(); + void quicksort(int16 lo, int16 hi); }; #endif diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 26e478d3b..b0a1cf83d 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -380,6 +380,26 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, } +// [[Rcpp::export]] +List cpp_all_pairs_mci (const List x, const IntegerVector nTip) { + const int16 + n_tips = nTip[0], + n_trees = x.length() + ; + auto splits = std::make_unique(n_trees); + for (int16 i = x.length(); i--; ) { + const RawMatrix a = x[i]; + splits[i] = SplitList(a); + splits[i].quicksort(); + } + + const int16 max_splits = n_tips - 3; + auto split_library = std::make_unique(n_trees * max_splits * + splits[0].n_bins); + + const int16 tournament_size = 2 ^ n_trees +} + // [[Rcpp::export]] List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, const IntegerVector nTip) { From 1a73ebdf7775df90647bd6a9723121378033574d Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Mon, 19 Jul 2021 12:02:58 +0100 Subject: [PATCH 05/80] Constructing tournament tree --- src/tree_distances.cpp | 60 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index b0a1cf83d..214aa54cc 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -378,6 +378,37 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, return List::create(Named("score") = final_score, _["matching"] = final_matching); +} + +inline int16 game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, + const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b, + const int16* n_bins) { + for (int16 bin = 0; bin != *n_bins; ++bin) { + if (a[split_a][bin] < b[split_b][bin]) { + return 1; + } else if (a[split_a][bin] > b[split_b][bin]) { + return -1; + } + } + return 0; +} + +inline void play_game(int16 *node, + std::unique_ptr &which_tree, + std::unique_ptr &which_split, + std::unique_ptr &tie, + std::unique_ptr &splits, + const int16* n_bins) { + const int16 + child1 = *node * 2, + child2 = child1 + 1, + result = game_result(splits[which_tree[child1]].state, which_split[child1], + splits[which_tree[child2]].state, which_split[child2], + n_bins); + } + + + } // [[Rcpp::export]] @@ -392,12 +423,39 @@ List cpp_all_pairs_mci (const List x, const IntegerVector nTip) { splits[i] = SplitList(a); splits[i].quicksort(); } + const int16 n_bins = splits[0].n_bins; const int16 max_splits = n_tips - 3; auto split_library = std::make_unique(n_trees * max_splits * splits[0].n_bins); - const int16 tournament_size = 2 ^ n_trees + const int16 + tournament_rounds = std::ceil(std::log2(double(n_trees))) + 1, + tournament_games = n_trees - 1, + tournament_nodes = n_trees + tournament_games + ; + + auto which_tree = std::make_unique(tournament_nodes); + auto which_split = std::make_unique(tournament_nodes); + auto tie = std::make_unique(tournament_games); + + + // Populate children of tree + for (int16 i = 0; i != n_trees; ++i) { + const int16 this_node = i + tournament_games; + which_tree[this_node] = i; + which_split[this_node] = splits[i].n_splits - 1; + } + for (int16 i = n_trees + tournament_games; i != tournament_nodes; ++i) { + which_split[i] = -1; + } + + // Initial games + for (int16 i = tournament_games; i--; ) { + play_game(&i; &which_tree; &which_split, &tie, &splits); + } + + } // [[Rcpp::export]] From 2bbd1b56ca948d3c08e0b47884f119be9791e243 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Mon, 19 Jul 2021 12:13:46 +0100 Subject: [PATCH 06/80] Play first games --- src/tree_distances.cpp | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 214aa54cc..e90d1b035 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -393,7 +393,7 @@ inline int16 game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 return 0; } -inline void play_game(int16 *node, +inline void play_game(const int16 *node, std::unique_ptr &which_tree, std::unique_ptr &which_split, std::unique_ptr &tie, @@ -401,14 +401,37 @@ inline void play_game(int16 *node, const int16* n_bins) { const int16 child1 = *node * 2, - child2 = child1 + 1, + child2 = child1 + 1 + ; + + int16 result; + if (which_split[child1] < 0) { // child 1 undefined; child 2 wins (or ties) + result = 1; + } else if (which_split[child2] < 0) { // child 2 is undefined; child 1 wins + result = -1; + } else { result = game_result(splits[which_tree[child1]].state, which_split[child1], splits[which_tree[child2]].state, which_split[child2], n_bins); - } + } + switch (result) { + case -1: // child 1 wins (greater, or defined) + tie[*node] = false; + which_tree[*node] = which_tree[child2]; + which_split[*node] = which_split[child2]; + break; + case 1: // child 2 wins (greater, or defined) + tie[*node] = false; + which_tree[*node] = which_tree[child1]; + which_split[*node] = which_split[child1]; + break; + case 0: //it's a tie + tie[*node] = true; + which_tree[*node] = which_tree[child1]; + which_split[*node] = which_split[child1]; + } - } // [[Rcpp::export]] @@ -452,7 +475,7 @@ List cpp_all_pairs_mci (const List x, const IntegerVector nTip) { // Initial games for (int16 i = tournament_games; i--; ) { - play_game(&i; &which_tree; &which_split, &tie, &splits); + play_game(&i, which_tree, which_split, tie, splits, &n_bins); } From cc459a9158af971e5587c1e0b0535cf1eb2ffe7e Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 04:49:56 +0100 Subject: [PATCH 07/80] Test for equality at end --- src/tree_distances.cpp | 64 ++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index e90d1b035..6d83bb55e 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -380,23 +380,20 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, } -inline int16 game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, +inline bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b, const int16* n_bins) { for (int16 bin = 0; bin != *n_bins; ++bin) { - if (a[split_a][bin] < b[split_b][bin]) { - return 1; - } else if (a[split_a][bin] > b[split_b][bin]) { - return -1; + if (a[split_a][bin] > b[split_b][bin]) { + return true; } } - return 0; + return false; } -inline void play_game(const int16 *node, +inline int16 play_game(const int16 *node, std::unique_ptr &which_tree, std::unique_ptr &which_split, - std::unique_ptr &tie, std::unique_ptr &splits, const int16* n_bins) { const int16 @@ -404,34 +401,22 @@ inline void play_game(const int16 *node, child2 = child1 + 1 ; - int16 result; - if (which_split[child1] < 0) { // child 1 undefined; child 2 wins (or ties) - result = 1; - } else if (which_split[child2] < 0) { // child 2 is undefined; child 1 wins - result = -1; + bool child1_greater; + if (which_split[child1] < 0) { + child1_greater = false; + } else if (which_split[child2] < 0) { + child1_greater = true; } else { - result = game_result(splits[which_tree[child1]].state, which_split[child1], - splits[which_tree[child2]].state, which_split[child2], - n_bins); - } - - switch (result) { - case -1: // child 1 wins (greater, or defined) - tie[*node] = false; - which_tree[*node] = which_tree[child2]; - which_split[*node] = which_split[child2]; - break; - case 1: // child 2 wins (greater, or defined) - tie[*node] = false; - which_tree[*node] = which_tree[child1]; - which_split[*node] = which_split[child1]; - break; - case 0: //it's a tie - tie[*node] = true; - which_tree[*node] = which_tree[child1]; - which_split[*node] = which_split[child1]; + child1_greater = game_result( + splits[which_tree[child1]].state, which_split[child1], + splits[which_tree[child2]].state, which_split[child2], + n_bins); } + const int16 loser = child1_greater ? child2 : child1; + which_tree[*node] = which_tree[loser]; + which_split[*node] = which_split[loser]; + return child1_greater ? child1 : child2; } // [[Rcpp::export]] @@ -460,7 +445,6 @@ List cpp_all_pairs_mci (const List x, const IntegerVector nTip) { auto which_tree = std::make_unique(tournament_nodes); auto which_split = std::make_unique(tournament_nodes); - auto tie = std::make_unique(tournament_games); // Populate children of tree @@ -474,10 +458,18 @@ List cpp_all_pairs_mci (const List x, const IntegerVector nTip) { } // Initial games - for (int16 i = tournament_games; i--; ) { - play_game(&i, which_tree, which_split, tie, splits, &n_bins); + for (int16 i = tournament_games; --i; ) { + // TODO duplicate function as void to avoid overhead of return + play_game(&i, which_tree, which_split, splits, &n_bins); } + int16 library_position = 0, winning_split; + do { + update_library(&library_position, + } while (winning_split > -1); + + + } From 79ddbf68a3b50105d09d05c73ef8486c9f08463c Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 04:57:21 +0100 Subject: [PATCH 08/80] File reorganization --- src/{SplitList.h => SplitList.hpp} | 0 src/SplitRoster.cpp | 95 ++++++++++++++++++++++++++++++ src/SplitRoster.hpp | 16 +++++ src/tree_distances.cpp | 93 ----------------------------- 4 files changed, 111 insertions(+), 93 deletions(-) rename src/{SplitList.h => SplitList.hpp} (100%) create mode 100644 src/SplitRoster.cpp create mode 100644 src/SplitRoster.hpp diff --git a/src/SplitList.h b/src/SplitList.hpp similarity index 100% rename from src/SplitList.h rename to src/SplitList.hpp diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp new file mode 100644 index 000000000..93bb26729 --- /dev/null +++ b/src/SplitRoster.cpp @@ -0,0 +1,95 @@ +#include "SplitRoster.hpp" + +inline bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, + const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b, + const int16* n_bins) { + for (int16 bin = 0; bin != *n_bins; ++bin) { + if (a[split_a][bin] > b[split_b][bin]) { + return true; + } + } + return false; +} + +inline int16 play_game(const int16 *node, + std::unique_ptr &which_tree, + std::unique_ptr &which_split, + std::unique_ptr &splits, + const int16* n_bins) { + const int16 + child1 = *node * 2, + child2 = child1 + 1 + ; + + bool child1_greater; + if (which_split[child1] < 0) { + child1_greater = false; + } else if (which_split[child2] < 0) { + child1_greater = true; + } else { + child1_greater = game_result( + splits[which_tree[child1]].state, which_split[child1], + splits[which_tree[child2]].state, which_split[child2], + n_bins); + } + + const int16 loser = child1_greater ? child2 : child1; + which_tree[*node] = which_tree[loser]; + which_split[*node] = which_split[loser]; + return child1_greater ? child1 : child2; +} + +// [[Rcpp::export]] +SplitRoster::SplitRoster (const List x, const IntegerVector nTip) { + const int16 + n_tips = nTip[0], + n_trees = x.length() + ; + auto splits = std::make_unique(n_trees); + for (int16 i = x.length(); i--; ) { + const RawMatrix a = x[i]; + splits[i] = SplitList(a); + splits[i].quicksort(); + } + const int16 n_bins = splits[0].n_bins; + + const int16 max_splits = n_tips - 3; + auto split_library = std::make_unique(n_trees * max_splits * + splits[0].n_bins); + + const int16 + tournament_rounds = std::ceil(std::log2(double(n_trees))) + 1, + tournament_games = n_trees - 1, + tournament_nodes = n_trees + tournament_games + ; + + auto which_tree = std::make_unique(tournament_nodes); + auto which_split = std::make_unique(tournament_nodes); + + + // Populate children of tree + for (int16 i = 0; i != n_trees; ++i) { + const int16 this_node = i + tournament_games; + which_tree[this_node] = i; + which_split[this_node] = splits[i].n_splits - 1; + } + for (int16 i = n_trees + tournament_games; i != tournament_nodes; ++i) { + which_split[i] = -1; + } + + // Initial games + for (int16 i = tournament_games; --i; ) { + // TODO duplicate function as void to avoid overhead of return + play_game(&i, which_tree, which_split, splits, &n_bins); + } + + int16 library_position = 0, winning_split; + do { + update_library(&library_position, + } while (winning_split > -1); + + + + + } + \ No newline at end of file diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp new file mode 100644 index 000000000..c8dd0b1ac --- /dev/null +++ b/src/SplitRoster.hpp @@ -0,0 +1,16 @@ +#ifndef _TREEDIST_SPLITROSTER_HPP +#define _TREEDIST_SPLITROSTER_HPP + +#include +#include +#include "tree_distances.h" +#include "SplitList.h" + +using namespace Rcpp; + +class SplitRoster { +public: + int16 n_splits, n_trees; +}; + +#endif diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 6d83bb55e..26e478d3b 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -378,99 +378,6 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, return List::create(Named("score") = final_score, _["matching"] = final_matching); -} - -inline bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, - const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b, - const int16* n_bins) { - for (int16 bin = 0; bin != *n_bins; ++bin) { - if (a[split_a][bin] > b[split_b][bin]) { - return true; - } - } - return false; -} - -inline int16 play_game(const int16 *node, - std::unique_ptr &which_tree, - std::unique_ptr &which_split, - std::unique_ptr &splits, - const int16* n_bins) { - const int16 - child1 = *node * 2, - child2 = child1 + 1 - ; - - bool child1_greater; - if (which_split[child1] < 0) { - child1_greater = false; - } else if (which_split[child2] < 0) { - child1_greater = true; - } else { - child1_greater = game_result( - splits[which_tree[child1]].state, which_split[child1], - splits[which_tree[child2]].state, which_split[child2], - n_bins); - } - - const int16 loser = child1_greater ? child2 : child1; - which_tree[*node] = which_tree[loser]; - which_split[*node] = which_split[loser]; - return child1_greater ? child1 : child2; -} - -// [[Rcpp::export]] -List cpp_all_pairs_mci (const List x, const IntegerVector nTip) { - const int16 - n_tips = nTip[0], - n_trees = x.length() - ; - auto splits = std::make_unique(n_trees); - for (int16 i = x.length(); i--; ) { - const RawMatrix a = x[i]; - splits[i] = SplitList(a); - splits[i].quicksort(); - } - const int16 n_bins = splits[0].n_bins; - - const int16 max_splits = n_tips - 3; - auto split_library = std::make_unique(n_trees * max_splits * - splits[0].n_bins); - - const int16 - tournament_rounds = std::ceil(std::log2(double(n_trees))) + 1, - tournament_games = n_trees - 1, - tournament_nodes = n_trees + tournament_games - ; - - auto which_tree = std::make_unique(tournament_nodes); - auto which_split = std::make_unique(tournament_nodes); - - - // Populate children of tree - for (int16 i = 0; i != n_trees; ++i) { - const int16 this_node = i + tournament_games; - which_tree[this_node] = i; - which_split[this_node] = splits[i].n_splits - 1; - } - for (int16 i = n_trees + tournament_games; i != tournament_nodes; ++i) { - which_split[i] = -1; - } - - // Initial games - for (int16 i = tournament_games; --i; ) { - // TODO duplicate function as void to avoid overhead of return - play_game(&i, which_tree, which_split, splits, &n_bins); - } - - int16 library_position = 0, winning_split; - do { - update_library(&library_position, - } while (winning_split > -1); - - - - } // [[Rcpp::export]] From 99f7ec846a6f32d890d6b487e2f4f36958d68208 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 05:01:11 +0100 Subject: [PATCH 09/80] =?UTF-8?q?.h=20=E2=86=92=20.hpp?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SplitList.cpp | 5 ++--- src/SplitList.hpp | 2 +- src/SplitRoster.cpp | 2 ++ src/SplitRoster.hpp | 4 ++-- src/day_1985.cpp | 6 +++--- src/{information.h => information.hpp} | 6 +++--- src/{ints.h => ints.hpp} | 4 ++-- src/nni_distance.cpp | 4 ++-- src/tree_distance_functions.cpp | 4 ++-- src/tree_distances.cpp | 4 ++-- src/{tree_distances.h => tree_distances.hpp} | 0 11 files changed, 21 insertions(+), 20 deletions(-) rename src/{information.h => information.hpp} (95%) rename src/{ints.h => ints.hpp} (85%) rename src/{tree_distances.h => tree_distances.hpp} (100%) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index b416fce5e..0f9545e6d 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -1,9 +1,8 @@ #include using namespace Rcpp; -#include "tree_distances.h" // Before SplitList.h, for int16 #include -#include "timsort.hpp" -#include "SplitList.h" +#include "tree_distances.hpp" // Before SplitList.h, for int16 +#include "SplitList.hpp" SplitList::SplitList(RawMatrix x) { n_splits = x.rows(); diff --git a/src/SplitList.hpp b/src/SplitList.hpp index 18605cd94..0a7a68a14 100644 --- a/src/SplitList.hpp +++ b/src/SplitList.hpp @@ -3,7 +3,7 @@ #include #include -#include "tree_distances.h" +#include "tree_distances.hpp" using namespace Rcpp; diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 93bb26729..b3efcc031 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -1,3 +1,5 @@ +#include "tree_distances.hpp" +#include "SplitList.hpp" #include "SplitRoster.hpp" inline bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index c8dd0b1ac..2f3a1d075 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -3,8 +3,8 @@ #include #include -#include "tree_distances.h" -#include "SplitList.h" +#include "tree_distances.hpp" +#include "SplitList.hpp" using namespace Rcpp; diff --git a/src/day_1985.cpp b/src/day_1985.cpp index 90999bd03..a2f5bd299 100644 --- a/src/day_1985.cpp +++ b/src/day_1985.cpp @@ -6,9 +6,9 @@ using namespace Rcpp; #include /* for bitset */ #include /* for log2() */ #include /* for unique_ptr, make_unique */ -#include "tree_distances.h" -#include "SplitList.h" -#include "information.h" +#include "tree_distances.hpp" +#include "SplitList.hpp" +#include "information.hpp" const int16 INF = INTX_MAX, diff --git a/src/information.h b/src/information.hpp similarity index 95% rename from src/information.h rename to src/information.hpp index 1e169e544..5993e4a7b 100644 --- a/src/information.h +++ b/src/information.hpp @@ -1,8 +1,8 @@ -#ifndef _TREEDIST_INFO_H -#define _TREEDIST_INFO_H +#ifndef _TREEDIST_INFO_HPP +#define _TREEDIST_INFO_HPP #include /* for log2() */ -#include "ints.h" /* for int16 */ +#include "ints.hpp" /* for int16 */ const int_fast32_t DAY_MAX_LEAVES = 16383, diff --git a/src/ints.h b/src/ints.hpp similarity index 85% rename from src/ints.h rename to src/ints.hpp index ea7c6e62a..878eee602 100644 --- a/src/ints.h +++ b/src/ints.hpp @@ -1,5 +1,5 @@ -#ifndef _TREEDIST_INT_H -#define _TREEDIST_INT_H +#ifndef _TREEDIST_INT_HPP +#define _TREEDIST_INT_HPP #include diff --git a/src/nni_distance.cpp b/src/nni_distance.cpp index 05fcd4a8a..f94ab25a4 100644 --- a/src/nni_distance.cpp +++ b/src/nni_distance.cpp @@ -1,7 +1,7 @@ #include #include -#include "tree_distances.h" -#include "SplitList.h" +#include "tree_distances.hpp" +#include "SplitList.hpp" using namespace Rcpp; diff --git a/src/tree_distance_functions.cpp b/src/tree_distance_functions.cpp index 133762d17..df59a870d 100644 --- a/src/tree_distance_functions.cpp +++ b/src/tree_distance_functions.cpp @@ -1,8 +1,8 @@ #include using namespace Rcpp; #include /* for log2() */ -#include "tree_distances.h" -#include "SplitList.h" +#include "tree_distances.hpp" +#include "SplitList.hpp" uint_fast32_t bitcounts[65536]; // the bytes representing bit count of each number 0-65535 diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 26e478d3b..bfc9efc0c 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -1,8 +1,8 @@ #include #include /* for unique_ptr, make_unique */ #include -#include "tree_distances.h" -#include "SplitList.h" +#include "tree_distances.hpp" +#include "SplitList.hpp" using namespace Rcpp; diff --git a/src/tree_distances.h b/src/tree_distances.hpp similarity index 100% rename from src/tree_distances.h rename to src/tree_distances.hpp From 210db89aa48b89f16e94dbfc09e7f02b96d03d5e Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 05:08:58 +0100 Subject: [PATCH 10/80] Initialize SplitRoster() --- src/SplitList.hpp | 1 + src/SplitRoster.cpp | 27 ++++++++++++++------------- src/SplitRoster.hpp | 6 +++++- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/SplitList.hpp b/src/SplitList.hpp index 0a7a68a14..2173bd560 100644 --- a/src/SplitList.hpp +++ b/src/SplitList.hpp @@ -3,6 +3,7 @@ #include #include +#include "ints.hpp" #include "tree_distances.hpp" using namespace Rcpp; diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index b3efcc031..99449049f 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -1,6 +1,8 @@ +#include #include "tree_distances.hpp" #include "SplitList.hpp" #include "SplitRoster.hpp" +using namespace Rcpp; inline bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b, @@ -42,28 +44,27 @@ inline int16 play_game(const int16 *node, } // [[Rcpp::export]] -SplitRoster::SplitRoster (const List x, const IntegerVector nTip) { - const int16 - n_tips = nTip[0], - n_trees = x.length() - ; - auto splits = std::make_unique(n_trees); - for (int16 i = x.length(); i--; ) { +SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { + n_tips = nTip[0]; + n_trees = x.length(); + splits = std::make_unique(n_trees); + + for (int16 i = n_trees; i--; ) { const RawMatrix a = x[i]; splits[i] = SplitList(a); splits[i].quicksort(); } - const int16 n_bins = splits[0].n_bins; + n_bins = splits[0].n_bins; const int16 max_splits = n_tips - 3; - auto split_library = std::make_unique(n_trees * max_splits * - splits[0].n_bins); + roster = std::make_unique(n_trees * max_splits * n_bins); + // Populate roster using k-way merge with tournament tree const int16 tournament_rounds = std::ceil(std::log2(double(n_trees))) + 1, - tournament_games = n_trees - 1, - tournament_nodes = n_trees + tournament_games - ; + tournament_games = n_trees - 1, + tournament_nodes = n_trees + tournament_games + ; auto which_tree = std::make_unique(tournament_nodes); auto which_split = std::make_unique(tournament_nodes); diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 2f3a1d075..85ddf7658 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -3,14 +3,18 @@ #include #include +#include "ints.hpp" #include "tree_distances.hpp" #include "SplitList.hpp" using namespace Rcpp; class SplitRoster { + std::unique_ptr splits; + std::unique_ptr roster; + int16 n_bins; public: - int16 n_splits, n_trees; + int16 n_tips, n_splits, n_trees; }; #endif From 4f777955315955cdd08464f3a6c69b4b9debc966 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 05:37:24 +0100 Subject: [PATCH 11/80] int32s --- src/SplitRoster.cpp | 68 ++++++++++++++++++++++++++++----------------- src/SplitRoster.hpp | 16 +++++++++-- 2 files changed, 55 insertions(+), 29 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 99449049f..856699a0a 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -1,13 +1,14 @@ #include +#include "ints.hpp" #include "tree_distances.hpp" #include "SplitList.hpp" #include "SplitRoster.hpp" using namespace Rcpp; -inline bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, - const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b, - const int16* n_bins) { - for (int16 bin = 0; bin != *n_bins; ++bin) { +inline bool SplitRoster::game_result( + const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, + const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b) { + for (int16 bin = 0; bin != n_bins; ++bin) { if (a[split_a][bin] > b[split_b][bin]) { return true; } @@ -15,13 +16,12 @@ inline bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 s return false; } -inline int16 play_game(const int16 *node, - std::unique_ptr &which_tree, - std::unique_ptr &which_split, - std::unique_ptr &splits, - const int16* n_bins) { - const int16 - child1 = *node * 2, +inline int32 SplitRoster::play_game( + const int32 *node, + std::unique_ptr &which_tree, + std::unique_ptr &which_split) { + const int32 + child1 = *node * 2, child2 = child1 + 1 ; @@ -33,23 +33,32 @@ inline int16 play_game(const int16 *node, } else { child1_greater = game_result( splits[which_tree[child1]].state, which_split[child1], - splits[which_tree[child2]].state, which_split[child2], - n_bins); + splits[which_tree[child2]].state, which_split[child2])); } - const int16 loser = child1_greater ? child2 : child1; + const int32 loser = child1_greater ? child2 : child1; which_tree[*node] = which_tree[loser]; which_split[*node] = which_split[loser]; return child1_greater ? child1 : child2; } +inline void SplitRoster::push( + const int32 node, + std::unique_ptr &which_tree, + std::unique_ptr &which_split) { + + roster_tree[roster_pos] = which_tree[node]; + roster_split[roster_pos] = which_split[node]; + ++roster_pos; +} + // [[Rcpp::export]] SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { n_tips = nTip[0]; n_trees = x.length(); splits = std::make_unique(n_trees); - for (int16 i = n_trees; i--; ) { + for (int32 i = n_trees; i--; ) { const RawMatrix a = x[i]; splits[i] = SplitList(a); splits[i].quicksort(); @@ -57,11 +66,12 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { n_bins = splits[0].n_bins; const int16 max_splits = n_tips - 3; - roster = std::make_unique(n_trees * max_splits * n_bins); + roster_tree = std::make_unique(n_trees * max_splits); + roster_split = std::make_unique(n_trees * max_splits); + roster_hits = std::make_unique(n_trees * max_splits); // Populate roster using k-way merge with tournament tree - const int16 - tournament_rounds = std::ceil(std::log2(double(n_trees))) + 1, + const int32 tournament_games = n_trees - 1, tournament_nodes = n_trees + tournament_games ; @@ -71,24 +81,30 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { // Populate children of tree - for (int16 i = 0; i != n_trees; ++i) { - const int16 this_node = i + tournament_games; + for (int32 i = 0; i != n_trees; ++i) { + const int32 this_node = i + tournament_games; which_tree[this_node] = i; which_split[this_node] = splits[i].n_splits - 1; } - for (int16 i = n_trees + tournament_games; i != tournament_nodes; ++i) { + for (int32 i = n_trees + tournament_games; i != tournament_nodes; ++i) { which_split[i] = -1; } // Initial games - for (int16 i = tournament_games; --i; ) { - // TODO duplicate function as void to avoid overhead of return - play_game(&i, which_tree, which_split, splits, &n_bins); + for (int32 i = tournament_games; --i; ) { + // TODO duplicate function as void to avoid overhead of unused return + play_game(&i, which_tree, which_split); } - int16 library_position = 0, winning_split; + const int16 zero = 0; + const int16 winner = play_game(&zero, which_tree, which_split); + + roster_tree[0] = which_tree[winner]; + roster_split[0] = which_split[winner]; + roster_hits[0] = 1; + roster_pos = 1; do { - update_library(&library_position, + push(&library_position, } while (winning_split > -1); diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 85ddf7658..b80b27cff 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -10,11 +10,21 @@ using namespace Rcpp; class SplitRoster { + bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, + const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b); + int32 play_game( + const int32 *node, + std::unique_ptr &which_tree, + std::unique_ptr &which_split); + void push(); std::unique_ptr splits; - std::unique_ptr roster; - int16 n_bins; + std::unique_ptr roster_tree; + std::unique_ptr roster_split; + std::unique_ptr roster_hits; + int16 n_bins, roster_pos; public: - int16 n_tips, n_splits, n_trees; + int16 n_tips, n_splits; + int32 n_trees; }; #endif From fc86ba253572c33f7bdb995c5eaf8c13486654eb Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 05:55:17 +0100 Subject: [PATCH 12/80] Log splits --- src/SplitRoster.cpp | 42 +++++++++++++++++++++++++++++++++++------- src/SplitRoster.hpp | 5 ++++- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 856699a0a..4ea660699 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -16,6 +16,18 @@ inline bool SplitRoster::game_result( return false; } +inline bool SplitRoster::splits_equal( + const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, + const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b) { + for (int16 bin = n_bins; bin--; ) { + if (a[split_a][bin] != b[split_b][bin]) { + return false; + } + } + return true; +} + + inline int32 SplitRoster::play_game( const int32 *node, std::unique_ptr &which_tree, @@ -46,10 +58,21 @@ inline void SplitRoster::push( const int32 node, std::unique_ptr &which_tree, std::unique_ptr &which_split) { - - roster_tree[roster_pos] = which_tree[node]; - roster_split[roster_pos] = which_split[node]; - ++roster_pos; + const int32 new_tree = which_tree[node]; + const int16 new_split = which_split[node]; + if (splits_equal(splits[which_tree[new_tree]].state, + which_split[new_split], + splits[roster_tree[roster_pos]].state, + roster_split[roster_pos])) { + ++roster_hits[roster_pos]; + } else { + ++roster_pos; + roster_tree[roster_pos] = new_tree; + roster_split[roster_pos] = new_split; + roster_size[roster_pos] = splits[which_tree[new_tree]].in_split; + roster_hits[roster_pos] = 1; + } + index[new_tree][new_split] = roster_pos; } // [[Rcpp::export]] @@ -68,7 +91,12 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { const int16 max_splits = n_tips - 3; roster_tree = std::make_unique(n_trees * max_splits); roster_split = std::make_unique(n_trees * max_splits); + roster_size = std::make_unique(n_trees * max_splits); roster_hits = std::make_unique(n_trees * max_splits); + index = std::make_unique[]>(n_trees); + for (int32 i = n_trees; i--; ) { + index[i] = std::make_unique(max_splits); + } // Populate roster using k-way merge with tournament tree const int32 @@ -96,13 +124,13 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { play_game(&i, which_tree, which_split); } - const int16 zero = 0; - const int16 winner = play_game(&zero, which_tree, which_split); + roster_pos = 0; + const int16 winner = play_game(&roster_pos, which_tree, which_split); roster_tree[0] = which_tree[winner]; roster_split[0] = which_split[winner]; roster_hits[0] = 1; - roster_pos = 1; + do { push(&library_position, } while (winning_split > -1); diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index b80b27cff..076e12bf2 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -20,8 +20,11 @@ class SplitRoster { std::unique_ptr splits; std::unique_ptr roster_tree; std::unique_ptr roster_split; + std::unique_ptr roster_size; std::unique_ptr roster_hits; - int16 n_bins, roster_pos; + std::unique_ptr[]> index; + int16 n_bins; + int32 roster_pos; public: int16 n_tips, n_splits; int32 n_trees; From 2e02e24b1089e36259cef0c698e6e779ab936b44 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 06:23:46 +0100 Subject: [PATCH 13/80] Tournament implementation --- src/SplitRoster.cpp | 32 ++++++++++++++++++++------------ src/SplitRoster.hpp | 2 +- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 4ea660699..3b962dedf 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -28,7 +28,7 @@ inline bool SplitRoster::splits_equal( } -inline int32 SplitRoster::play_game( +inline int32 SplitRoster::game_winner( const int32 *node, std::unique_ptr &which_tree, std::unique_ptr &which_split) { @@ -106,6 +106,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { auto which_tree = std::make_unique(tournament_nodes); auto which_split = std::make_unique(tournament_nodes); + auto winner_index = std::make_unique(tournament_nodes); // Populate children of tree @@ -113,30 +114,37 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { const int32 this_node = i + tournament_games; which_tree[this_node] = i; which_split[this_node] = splits[i].n_splits - 1; + winner_index[this_node] = this_node; } for (int32 i = n_trees + tournament_games; i != tournament_nodes; ++i) { which_split[i] = -1; } // Initial games - for (int32 i = tournament_games; --i; ) { + for (int32 i = tournament_games; i--; ) { // TODO duplicate function as void to avoid overhead of unused return - play_game(&i, which_tree, which_split); + winner_index[i] = game_winner(&i, which_tree, which_split); } roster_pos = 0; - const int16 winner = play_game(&roster_pos, which_tree, which_split); - - roster_tree[0] = which_tree[winner]; - roster_split[0] = which_split[winner]; + roster_tree[0] = which_tree[winner_index[0]]; + roster_split[0] = which_split[winner_index[0]]; roster_hits[0] = 1; - do { - push(&library_position, - } while (winning_split > -1); + for (; ; ) { + const int32 winner = winner_index[0]; + + + int32 i = winner; + do { + i /= 2; + winner_index[i] = game_winner(&i, which_tree, which_split); + } while (i); + + push(winner_index[0], which_tree, which_split); + } - } - \ No newline at end of file +} diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 076e12bf2..bbbac3c95 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -12,7 +12,7 @@ using namespace Rcpp; class SplitRoster { bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b); - int32 play_game( + int32 game_winner( const int32 *node, std::unique_ptr &which_tree, std::unique_ptr &which_split); From b1d0276a2b2ceb993452e399608fcc7c23c848c6 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 08:30:05 +0100 Subject: [PATCH 14/80] Save winners & losers --- src/SplitRoster.cpp | 60 +++++++++++++++++++++++++-------------------- src/SplitRoster.hpp | 7 +++--- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 3b962dedf..ec5abf316 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -28,30 +28,36 @@ inline bool SplitRoster::splits_equal( } -inline int32 SplitRoster::game_winner( +inline void SplitRoster::play_game( const int32 *node, - std::unique_ptr &which_tree, + std::unique_ptr &winners, + std::unique_ptr &losers, std::unique_ptr &which_split) { const int32 child1 = *node * 2, - child2 = child1 + 1 + child2 = child1 + 1, + tree1 = winners[child1], + tree2 = winners[child2] ; bool child1_greater; - if (which_split[child1] < 0) { + if (tree1 < 0 || which_split[tree1] < 0) { child1_greater = false; - } else if (which_split[child2] < 0) { + } else if (tree2 < 0 || which_split[tree2] < 0) { child1_greater = true; } else { child1_greater = game_result( - splits[which_tree[child1]].state, which_split[child1], - splits[which_tree[child2]].state, which_split[child2])); + splits[tree1].state, which_split[tree1], + splits[tree2].state, which_split[tree2])); } - const int32 loser = child1_greater ? child2 : child1; - which_tree[*node] = which_tree[loser]; - which_split[*node] = which_split[loser]; - return child1_greater ? child1 : child2; + if (child1_greater) { + winners[*node] = tree1; + losers[*node] = tree2; + } else { + winners[*node] = tree2; + losers[*node] = tree1; + } } inline void SplitRoster::push( @@ -75,7 +81,7 @@ inline void SplitRoster::push( index[new_tree][new_split] = roster_pos; } -// [[Rcpp::export]] + SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { n_tips = nTip[0]; n_trees = x.length(); @@ -104,44 +110,44 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { tournament_nodes = n_trees + tournament_games ; - auto which_tree = std::make_unique(tournament_nodes); - auto which_split = std::make_unique(tournament_nodes); - auto winner_index = std::make_unique(tournament_nodes); + auto which_split = std::make_unique(n_trees); + auto winners = std::make_unique(tournament_nodes); + auto losers = std::make_unique(tournament_nodes); // Populate children of tree for (int32 i = 0; i != n_trees; ++i) { - const int32 this_node = i + tournament_games; - which_tree[this_node] = i; - which_split[this_node] = splits[i].n_splits - 1; - winner_index[this_node] = this_node; + which_split[i] = splits[i].n_splits - 1; + winners[i + tournament_games] = i; } for (int32 i = n_trees + tournament_games; i != tournament_nodes; ++i) { which_split[i] = -1; + winners[i] = -1; } // Initial games for (int32 i = tournament_games; i--; ) { - // TODO duplicate function as void to avoid overhead of unused return - winner_index[i] = game_winner(&i, which_tree, which_split); + play_game(&i, which_split, winners, losers); } roster_pos = 0; - roster_tree[0] = which_tree[winner_index[0]]; - roster_split[0] = which_split[winner_index[0]]; + roster_tree[0] = winners[0]; + roster_split[0] = which_split[winners[0]]; roster_hits[0] = 1; for (; ; ) { - const int32 winner = winner_index[0]; - + const int32 winner = winners[0]; + --which_split[winner]; int32 i = winner; do { i /= 2; - winner_index[i] = game_winner(&i, which_tree, which_split); + play_game(&i, which_tree, which_split, winners, losers); } while (i); - push(winner_index[0], which_tree, which_split); + if (which_split[winners[0]] < 0) break; + + push(winners[0], which_tree, which_split); } diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index bbbac3c95..11a94c76b 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -12,10 +12,11 @@ using namespace Rcpp; class SplitRoster { bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b); - int32 game_winner( + void SplitRoster::play_game( const int32 *node, - std::unique_ptr &which_tree, - std::unique_ptr &which_split); + std::unique_ptr &winners, + std::unique_ptr &losers, + std::unique_ptr &which_split) void push(); std::unique_ptr splits; std::unique_ptr roster_tree; From 06bae678aef3e5ab975236dd2d594565eccac3cf Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 08:38:25 +0100 Subject: [PATCH 15/80] Simplify --- src/SplitRoster.cpp | 20 +++++++++----------- src/SplitRoster.hpp | 8 +++++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index ec5abf316..3ab09ba28 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -61,24 +61,22 @@ inline void SplitRoster::play_game( } inline void SplitRoster::push( - const int32 node, - std::unique_ptr &which_tree, + const int32 tree, std::unique_ptr &which_split) { - const int32 new_tree = which_tree[node]; - const int16 new_split = which_split[node]; - if (splits_equal(splits[which_tree[new_tree]].state, + const int16 new_split = which_split[tree]; + if (splits_equal(splits[tree].state, which_split[new_split], splits[roster_tree[roster_pos]].state, roster_split[roster_pos])) { ++roster_hits[roster_pos]; } else { ++roster_pos; - roster_tree[roster_pos] = new_tree; + roster_tree[roster_pos] = tree; roster_split[roster_pos] = new_split; - roster_size[roster_pos] = splits[which_tree[new_tree]].in_split; + roster_size[roster_pos] = splits[tree].in_split; roster_hits[roster_pos] = 1; } - index[new_tree][new_split] = roster_pos; + index[tree][new_split] = roster_pos; } @@ -127,7 +125,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { // Initial games for (int32 i = tournament_games; i--; ) { - play_game(&i, which_split, winners, losers); + play_game(&i, winners, losers, which_split); } roster_pos = 0; @@ -142,12 +140,12 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { int32 i = winner; do { i /= 2; - play_game(&i, which_tree, which_split, winners, losers); + play_game(&i, winners, losers, which_split); } while (i); if (which_split[winners[0]] < 0) break; - push(winners[0], which_tree, which_split); + push(winners[0], which_split); } diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 11a94c76b..c06d9d8c0 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -12,12 +12,14 @@ using namespace Rcpp; class SplitRoster { bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b); - void SplitRoster::play_game( + void play_game( const int32 *node, std::unique_ptr &winners, std::unique_ptr &losers, - std::unique_ptr &which_split) - void push(); + std::unique_ptr &which_split); + void push( + const int32 tree, + std::unique_ptr &which_split); std::unique_ptr splits; std::unique_ptr roster_tree; std::unique_ptr roster_split; From f1228cdd3c2335b4b8f12e34206e43f82c03b2d9 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 08:39:04 +0100 Subject: [PATCH 16/80] .hpp --- src/tree_distances.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tree_distances.hpp b/src/tree_distances.hpp index dc56a925e..7aba769ee 100644 --- a/src/tree_distances.hpp +++ b/src/tree_distances.hpp @@ -3,7 +3,7 @@ #include #include -#include "ints.h" +#include "ints.hpp" using namespace Rcpp; From e9061b8d711d5c37c303fc57774e13ffe687dd2b Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 08:43:05 +0100 Subject: [PATCH 17/80] Declarations --- src/SplitRoster.cpp | 11 +++-------- src/SplitRoster.hpp | 29 +++++++++++++++++++---------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 3ab09ba28..f8620a087 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -48,7 +48,7 @@ inline void SplitRoster::play_game( } else { child1_greater = game_result( splits[tree1].state, which_split[tree1], - splits[tree2].state, which_split[tree2])); + splits[tree2].state, which_split[tree2]); } if (child1_greater) { @@ -79,7 +79,6 @@ inline void SplitRoster::push( index[tree][new_split] = roster_pos; } - SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { n_tips = nTip[0]; n_trees = x.length(); @@ -105,8 +104,8 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { // Populate roster using k-way merge with tournament tree const int32 tournament_games = n_trees - 1, - tournament_nodes = n_trees + tournament_games - ; + tournament_nodes = n_trees + tournament_games + ; auto which_split = std::make_unique(n_trees); auto winners = std::make_unique(tournament_nodes); @@ -147,8 +146,4 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { push(winners[0], which_split); } - - - - } diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index c06d9d8c0..f0257af23 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -10,8 +10,22 @@ using namespace Rcpp; class SplitRoster { - bool game_result(const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, - const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b); + int16 n_bins; + int32 roster_pos; + + std::unique_ptr splits; + std::unique_ptr roster_tree; + std::unique_ptr roster_split; + std::unique_ptr roster_size; + std::unique_ptr roster_hits; + std::unique_ptr[]> index; + + bool splits_equal( + const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, + const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b); + bool game_result( + const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, + const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b); void play_game( const int32 *node, std::unique_ptr &winners, @@ -20,17 +34,12 @@ class SplitRoster { void push( const int32 tree, std::unique_ptr &which_split); - std::unique_ptr splits; - std::unique_ptr roster_tree; - std::unique_ptr roster_split; - std::unique_ptr roster_size; - std::unique_ptr roster_hits; - std::unique_ptr[]> index; - int16 n_bins; - int32 roster_pos; + public: int16 n_tips, n_splits; int32 n_trees; + + SplitRoster(const List x, const IntegerVector nTip); }; #endif From 2c1d60d9543ac1e82f41db13dd0b6aecf79cf97a Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 08:51:46 +0100 Subject: [PATCH 18/80] vector of arrays --- src/SplitRoster.cpp | 30 +++++++++++++----------------- src/SplitRoster.hpp | 3 ++- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index f8620a087..6b288f342 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -5,17 +5,6 @@ #include "SplitRoster.hpp" using namespace Rcpp; -inline bool SplitRoster::game_result( - const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, - const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b) { - for (int16 bin = 0; bin != n_bins; ++bin) { - if (a[split_a][bin] > b[split_b][bin]) { - return true; - } - } - return false; -} - inline bool SplitRoster::splits_equal( const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b) { @@ -27,6 +16,16 @@ inline bool SplitRoster::splits_equal( return true; } +inline bool SplitRoster::game_result( + const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, + const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b) { + for (int16 bin = 0; bin != n_bins; ++bin) { + if (a[split_a][bin] > b[split_b][bin]) { + return true; + } + } + return false; +} inline void SplitRoster::play_game( const int32 *node, @@ -96,10 +95,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { roster_split = std::make_unique(n_trees * max_splits); roster_size = std::make_unique(n_trees * max_splits); roster_hits = std::make_unique(n_trees * max_splits); - index = std::make_unique[]>(n_trees); - for (int32 i = n_trees; i--; ) { - index[i] = std::make_unique(max_splits); - } + index = std::vector[]>(n_trees); // Populate roster using k-way merge with tournament tree const int32 @@ -107,9 +103,9 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { tournament_nodes = n_trees + tournament_games ; + auto winners = std::make_unique(tournament_nodes); + auto losers = std::make_unique(tournament_nodes); auto which_split = std::make_unique(n_trees); - auto winners = std::make_unique(tournament_nodes); - auto losers = std::make_unique(tournament_nodes); // Populate children of tree diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index f0257af23..3f7c623b0 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -1,6 +1,7 @@ #ifndef _TREEDIST_SPLITROSTER_HPP #define _TREEDIST_SPLITROSTER_HPP +#include #include #include #include "ints.hpp" @@ -18,7 +19,7 @@ class SplitRoster { std::unique_ptr roster_split; std::unique_ptr roster_size; std::unique_ptr roster_hits; - std::unique_ptr[]> index; + std::vector[]> index; bool splits_equal( const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, From 36a0ccfeba8766b357335dc70fbecb13581f2ea5 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 08:56:27 +0100 Subject: [PATCH 19/80] in_split is int16 --- src/SplitList.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitList.hpp b/src/SplitList.hpp index 2173bd560..99f4f822a 100644 --- a/src/SplitList.hpp +++ b/src/SplitList.hpp @@ -17,8 +17,8 @@ class SplitList { bool greater_than(int16 a, int16 b); public: int16 n_splits, n_bins; + int16 in_split[MAX_SPLITS]; splitbit state[MAX_SPLITS][MAX_BINS]; - splitbit in_split[MAX_SPLITS]; SplitList(RawMatrix); void quicksort(); void quicksort(int16 lo, int16 hi); From 33ded8fdd3e8fef13accbe4d645fdddbdef2ebc4 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 08:56:33 +0100 Subject: [PATCH 20/80] types --- src/SplitRoster.cpp | 6 +++--- src/SplitRoster.hpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 6b288f342..e3e345dc3 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -95,13 +95,13 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { roster_split = std::make_unique(n_trees * max_splits); roster_size = std::make_unique(n_trees * max_splits); roster_hits = std::make_unique(n_trees * max_splits); - index = std::vector[]>(n_trees); + index = std::vector[]>(n_trees); // Populate roster using k-way merge with tournament tree const int32 tournament_games = n_trees - 1, - tournament_nodes = n_trees + tournament_games - ; + tournament_nodes = n_trees + tournament_games + ; auto winners = std::make_unique(tournament_nodes); auto losers = std::make_unique(tournament_nodes); diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 3f7c623b0..c2b8da1c5 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -19,7 +19,7 @@ class SplitRoster { std::unique_ptr roster_split; std::unique_ptr roster_size; std::unique_ptr roster_hits; - std::vector[]> index; + std::vector[]> index; bool splits_equal( const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, From 8724fff028a285c88edc0fb0efa66881781ff7b9 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 10:29:32 +0100 Subject: [PATCH 21/80] index format --- src/SplitRoster.cpp | 4 ++-- src/SplitRoster.hpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index e3e345dc3..ff0a9c7b9 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -72,7 +72,7 @@ inline void SplitRoster::push( ++roster_pos; roster_tree[roster_pos] = tree; roster_split[roster_pos] = new_split; - roster_size[roster_pos] = splits[tree].in_split; + roster_size[roster_pos] = splits[tree].in_split[new_split]; roster_hits[roster_pos] = 1; } index[tree][new_split] = roster_pos; @@ -95,7 +95,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { roster_split = std::make_unique(n_trees * max_splits); roster_size = std::make_unique(n_trees * max_splits); roster_hits = std::make_unique(n_trees * max_splits); - index = std::vector[]>(n_trees); + index.reserve(n_trees); // Populate roster using k-way merge with tournament tree const int32 diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index c2b8da1c5..290c6436e 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -2,6 +2,7 @@ #define _TREEDIST_SPLITROSTER_HPP #include +#include #include #include #include "ints.hpp" @@ -19,7 +20,7 @@ class SplitRoster { std::unique_ptr roster_split; std::unique_ptr roster_size; std::unique_ptr roster_hits; - std::vector[]> index; + std::vector > index; bool splits_equal( const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, From 76593497041be2c4a27872b2b9e2a42a197aa65a Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 11:37:16 +0100 Subject: [PATCH 22/80] hpp --- src/lap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lap.cpp b/src/lap.cpp index 65a3ab970..f292a1b36 100644 --- a/src/lap.cpp +++ b/src/lap.cpp @@ -24,7 +24,7 @@ https://www.mathworks.com/matlabcentral/fileexchange/26836-lapjv-jonker-volgenant-algorithm-for-linear-assignment-problem-v3-0: * *************************************************************************/ -#include "tree_distances.h" +#include "tree_distances.hpp" using namespace std; From 97d4c407d7932952073bcf84df18e78b9c77af4b Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 11:37:38 +0100 Subject: [PATCH 23/80] std::vector splits --- src/SplitRoster.cpp | 6 +++--- src/SplitRoster.hpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index ff0a9c7b9..d076880e3 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -81,11 +81,11 @@ inline void SplitRoster::push( SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { n_tips = nTip[0]; n_trees = x.length(); - splits = std::make_unique(n_trees); + splits.reserve(n_trees); - for (int32 i = n_trees; i--; ) { + for (int32 i = 0; i != n_trees; ++i) { const RawMatrix a = x[i]; - splits[i] = SplitList(a); + splits.emplace_back(SplitList(a)); splits[i].quicksort(); } n_bins = splits[0].n_bins; diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 290c6436e..2dca6763d 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -15,7 +15,7 @@ class SplitRoster { int16 n_bins; int32 roster_pos; - std::unique_ptr splits; + std::vector splits; std::unique_ptr roster_tree; std::unique_ptr roster_split; std::unique_ptr roster_size; From 7de40cd881aecf29d9686d5ee95555789c459fc4 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 11:38:08 +0100 Subject: [PATCH 24/80] hpp --- src/mast.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mast.cpp b/src/mast.cpp index 15bb6e938..1cba125f2 100644 --- a/src/mast.cpp +++ b/src/mast.cpp @@ -1,6 +1,6 @@ #include #include -#include "ints.h" +#include "ints.hpp" using namespace Rcpp; const int16 MAST_MAX_NODE = 4095, /* Increasing --> overflow size of stack frame */ From 18cd5d35a7b1ca7643df556371508ca169b09a2e Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 11:51:28 +0100 Subject: [PATCH 25/80] Correct assertions --- src/tree_distance_functions.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/tree_distance_functions.cpp b/src/tree_distance_functions.cpp index df59a870d..d123de02e 100644 --- a/src/tree_distance_functions.cpp +++ b/src/tree_distance_functions.cpp @@ -34,20 +34,21 @@ double *lg2_rooted = &lg2_unrooted[0] + 1; __attribute__((constructor)) void initialize_ldf() { lg2[0] = 0; - for (int32 i = 1; i != int32(MAX_TIPS - 1) * (MAX_TIPS - 1) + 1; i++) { + for (int32 i = 1; i != int32(MAX_TIPS - 1) * (MAX_TIPS - 1) + 1; ++i) { lg2[i] = log2(i); } - for (int16 i = 0; i != 3; i++) { + for (int16 i = 0; i != 3; ++i) { lg2_double_factorial[i] = 0; lg2_unrooted[i] = 0; - assert(lg2_rooted[i] = 0); } - for (int16 i = 2; i != MAX_TIPS + MAX_TIPS - 2; i++) { + assert(lg2_rooted[0] == 0); + assert(lg2_rooted[1] == 0); + for (int16 i = 2; i != MAX_TIPS + MAX_TIPS - 2; ++i) { lg2_double_factorial[i] = lg2_double_factorial[i - 2] + lg2[i]; } - for (int16 i = 3; i != MAX_TIPS + 2; i++) { + for (int16 i = 3; i != MAX_TIPS + 2; ++i) { lg2_unrooted[i] = lg2_double_factorial[i + i - 5]; - assert(lg2_rooted[i] == lg2_double_factorial[i + i - 3]); + assert(lg2_rooted[i - 1] == lg2_double_factorial[i + i - 5]); } } From fa24471f9f4bf4980cbac0c3ec850e8b15141967 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 11:56:00 +0100 Subject: [PATCH 26/80] emplace_back takes params only --- src/SplitRoster.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index d076880e3..5cc470a7b 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -84,8 +84,8 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { splits.reserve(n_trees); for (int32 i = 0; i != n_trees; ++i) { - const RawMatrix a = x[i]; - splits.emplace_back(SplitList(a)); + const RawMatrix xi = x[i]; + splits.emplace_back(xi); splits[i].quicksort(); } n_bins = splits[0].n_bins; From 0eff372c6bda5b834305169037ce512eceb3f9f1 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 12:02:00 +0100 Subject: [PATCH 27/80] mvoe up --- src/SplitRoster.cpp | 95 +++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 29 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 5cc470a7b..e6fedd961 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -5,6 +5,72 @@ #include "SplitRoster.hpp" using namespace Rcpp; +SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { + n_tips = nTip[0]; + n_trees = x.length(); + splits.reserve(n_trees); + + for (int32 i = 0; i != n_trees; ++i) { + const RawMatrix xi = x[i]; + splits.emplace_back(xi); + splits[i].quicksort(); + } + n_bins = splits[0].n_bins; + + const int16 max_splits = n_tips - 3; + roster_tree = std::make_unique(n_trees * max_splits); + roster_split = std::make_unique(n_trees * max_splits); + roster_size = std::make_unique(n_trees * max_splits); + roster_hits = std::make_unique(n_trees * max_splits); + index.reserve(n_trees); + + // Populate roster using k-way merge with tournament tree + const int32 + tournament_games = n_trees - 1, + tournament_nodes = n_trees + tournament_games + ; + + auto winners = std::make_unique(tournament_nodes); + auto losers = std::make_unique(tournament_nodes); + auto which_split = std::make_unique(n_trees); + + + // Populate children of tree + for (int32 i = 0; i != n_trees; ++i) { + which_split[i] = splits[i].n_splits - 1; + winners[i + tournament_games] = i; + } + for (int32 i = n_trees + tournament_games; i != tournament_nodes; ++i) { + which_split[i] = -1; + winners[i] = -1; + } + + // Initial games + for (int32 i = tournament_games; i--; ) { + play_game(&i, winners, losers, which_split); + } + + roster_pos = 0; + roster_tree[0] = winners[0]; + roster_split[0] = which_split[winners[0]]; + roster_hits[0] = 1; + + for (; ; ) { + const int32 winner = winners[0]; + --which_split[winner]; + + int32 i = winner; + do { + i /= 2; + play_game(&i, winners, losers, which_split); + } while (i); + + if (which_split[winners[0]] < 0) break; + + push(winners[0], which_split); + } +} + inline bool SplitRoster::splits_equal( const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, const splitbit (&b)[MAX_SPLITS][MAX_BINS], const int16 split_b) { @@ -78,35 +144,6 @@ inline void SplitRoster::push( index[tree][new_split] = roster_pos; } -SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { - n_tips = nTip[0]; - n_trees = x.length(); - splits.reserve(n_trees); - - for (int32 i = 0; i != n_trees; ++i) { - const RawMatrix xi = x[i]; - splits.emplace_back(xi); - splits[i].quicksort(); - } - n_bins = splits[0].n_bins; - - const int16 max_splits = n_tips - 3; - roster_tree = std::make_unique(n_trees * max_splits); - roster_split = std::make_unique(n_trees * max_splits); - roster_size = std::make_unique(n_trees * max_splits); - roster_hits = std::make_unique(n_trees * max_splits); - index.reserve(n_trees); - - // Populate roster using k-way merge with tournament tree - const int32 - tournament_games = n_trees - 1, - tournament_nodes = n_trees + tournament_games - ; - - auto winners = std::make_unique(tournament_nodes); - auto losers = std::make_unique(tournament_nodes); - auto which_split = std::make_unique(n_trees); - // Populate children of tree for (int32 i = 0; i != n_trees; ++i) { From 193f19e20d53dccdfdbb9dff39f09ae93bbd4077 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Tue, 20 Jul 2021 12:03:41 +0100 Subject: [PATCH 28/80] begin cpp_all_pairs_mci --- R/RcppExports.R | 4 + R/tree_distance_info.R | 2 +- src/RcppExports.cpp | 11 ++- src/SplitRoster.cpp | 195 +++++++++++++++++++++++++++++++++++------ src/tree_distances.cpp | 7 ++ 5 files changed, 186 insertions(+), 33 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 1abe44b60..1adeb2a11 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -57,6 +57,10 @@ cpp_msi_distance <- function(x, y, nTip) { .Call(`_TreeDist_cpp_msi_distance`, x, y, nTip) } +cpp_all_pairs_mci <- function(x, nTip) { + .Call(`_TreeDist_cpp_all_pairs_mci`, x, nTip) +} + cpp_mutual_clustering <- function(x, y, nTip) { .Call(`_TreeDist_cpp_mutual_clustering`, x, y, nTip) } diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R index 7f3a2fa49..0bf500742 100644 --- a/R/tree_distance_info.R +++ b/R/tree_distance_info.R @@ -299,7 +299,7 @@ MutualClusteringInfo <- function (tree1, tree2 = NULL, normalize = FALSE, reportMatching = FALSE, diag = TRUE) { unnormalized <- if (is.null(tree2) && is.list(tree1) && !inherits(tree1, 'phylo')) { - cpp_all_pairs_msi(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])) + cpp_all_pairs_mci(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])) } else { CalculateTreeDistance(MutualClusteringInfoSplits, tree1, tree2, reportMatching) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 852f0f7e0..2afb43d76 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -178,15 +178,14 @@ BEGIN_RCPP END_RCPP } // cpp_all_pairs_mci -List cpp_all_pairs_mci(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_all_pairs_mci(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { +NumericVector cpp_all_pairs_mci(const List x, const IntegerVector nTip); +RcppExport SEXP _TreeDist_cpp_all_pairs_mci(SEXP xSEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const RawMatrix >::type x(xSEXP); - Rcpp::traits::input_parameter< const RawMatrix >::type y(ySEXP); + Rcpp::traits::input_parameter< const List >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type nTip(nTipSEXP); - rcpp_result_gen = Rcpp::wrap(cpp_all_pairs_mci(x, y, nTip)); + rcpp_result_gen = Rcpp::wrap(cpp_all_pairs_mci(x, nTip)); return rcpp_result_gen; END_RCPP } @@ -232,7 +231,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3}, {"_TreeDist_cpp_jaccard_similarity", (DL_FUNC) &_TreeDist_cpp_jaccard_similarity, 5}, {"_TreeDist_cpp_msi_distance", (DL_FUNC) &_TreeDist_cpp_msi_distance, 3}, - {"_TreeDist_cpp_all_pairs_mci", (DL_FUNC) &_TreeDist_cpp_all_pairs_mci, 3}, + {"_TreeDist_cpp_all_pairs_mci", (DL_FUNC) &_TreeDist_cpp_all_pairs_mci, 2}, {"_TreeDist_cpp_mutual_clustering", (DL_FUNC) &_TreeDist_cpp_mutual_clustering, 3}, {"_TreeDist_cpp_shared_phylo", (DL_FUNC) &_TreeDist_cpp_shared_phylo, 3}, {NULL, NULL, 0} diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index e6fedd961..e86bb576d 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -144,39 +144,182 @@ inline void SplitRoster::push( index[tree][new_split] = roster_pos; } +NumericVector SplitRoster::mutual_clustering() { + + const SplitList a(x), b(y); + const bool a_has_more_splits = (a.n_splits > b.n_splits); + const int16 + most_splits = a_has_more_splits ? a.n_splits : b.n_splits, + a_extra_splits = a_has_more_splits ? most_splits - b.n_splits : 0, + b_extra_splits = a_has_more_splits ? 0 : most_splits - a.n_splits, + n_tips = nTip[0] + ; + const cost max_score = BIG; - // Populate children of tree - for (int32 i = 0; i != n_trees; ++i) { - which_split[i] = splits[i].n_splits - 1; - winners[i + tournament_games] = i; + cost** score = new cost*[most_splits]; + for (int16 i = most_splits; i--; ) score[i] = new cost[most_splits]; + double exact_match_score = 0; + int16 exact_matches = 0; + // NumericVector zero-initializes [so does make_unique] + // match will have one added to it so numbering follows R; hence 0 = UNMATCHED + NumericVector a_match(a.n_splits); + std::unique_ptr b_match = std::make_unique(b.n_splits); + + for (int16 ai = 0; ai != a.n_splits; ++ai) { + if (a_match[ai]) continue; + const int16 + na = a.in_split[ai], + nA = n_tips - na + ; + + for (int16 bi = 0; bi != b.n_splits; ++bi) { + + // x divides tips into a|A; y divides tips into b|B + int16 a_and_b = 0; + for (int16 bin = 0; bin != a.n_bins; ++bin) { + a_and_b += count_bits(a.state[ai][bin] & b.state[bi][bin]); + } + + const int16 + nb = b.in_split[bi], + nB = n_tips - nb, + a_and_B = na - a_and_b, + A_and_b = nb - a_and_b, + A_and_B = nA - A_and_b + ; + + if ((!a_and_B && !A_and_b) || + (!a_and_b && !A_and_B)) { + exact_match_score += ic_matching(na, nA, n_tips); + exact_matches++; + a_match[ai] = bi + 1; + b_match[bi] = ai + 1; + break; + } else if (a_and_b == A_and_b && + a_and_b == a_and_B && + a_and_b == A_and_B) { + score[ai][bi] = max_score; // Don't risk rounding error + } else { + score[ai][bi] = max_score - + // Division by n_tips converts n(A&B) to P(A&B) for each ic_element + cost(max_score * (( + // 0 < Sum of IC_elements <= n_tips + ic_element(a_and_b, na, nb, n_tips) + + ic_element(a_and_B, na, nB, n_tips) + + ic_element(A_and_b, nA, nb, n_tips) + + ic_element(A_and_B, nA, nB, n_tips) + ) / n_tips) + ); + } + } + for (int16 bi = b.n_splits; bi < most_splits; ++bi) { + score[ai][bi] = max_score; + } } - for (int32 i = n_trees + tournament_games; i != tournament_nodes; ++i) { - which_split[i] = -1; - winners[i] = -1; + if (exact_matches == b.n_splits || exact_matches == a.n_splits) { + for (int16 i = most_splits; i--; ) delete[] score[i]; + delete[] score; + + return List::create( + Named("score") = NumericVector::create(exact_match_score / n_tips), + _["matching"] = a_match); } - // Initial games - for (int32 i = tournament_games; i--; ) { - play_game(&i, winners, losers, which_split); - } - roster_pos = 0; - roster_tree[0] = winners[0]; - roster_split[0] = which_split[winners[0]]; - roster_hits[0] = 1; + const int16 lap_dim = most_splits - exact_matches; + lap_col *rowsol = new lap_col[lap_dim]; + lap_row *colsol = new lap_row[lap_dim]; + cost *u = new cost[lap_dim], *v = new cost[lap_dim]; - for (; ; ) { - const int32 winner = winners[0]; - --which_split[winner]; + if (exact_matches) { + int16 a_pos = 0; + for (int16 ai = 0; ai != a.n_splits; ++ai) { + if (a_match[ai]) continue; + int16 b_pos = 0; + for (int16 bi = 0; bi != b.n_splits; ++bi) { + if (b_match[bi]) continue; + score[a_pos][b_pos] = score[ai][bi]; + b_pos++; + } + for (int16 bi = lap_dim - a_extra_splits; bi < lap_dim; ++bi) { + score[a_pos][bi] = max_score; + } + a_pos++; + } + for (int16 ai = lap_dim - b_extra_splits; ai < lap_dim; ++ai) { + for (int16 bi = 0; bi != lap_dim; ++bi) { + score[ai][bi] = max_score; + } + } - int32 i = winner; - do { - i /= 2; - play_game(&i, winners, losers, which_split); - } while (i); + const double lap_score = + double((max_score * lap_dim) - lap(lap_dim, score, rowsol, colsol, u, v)) + / max_score; + NumericVector final_score = + NumericVector::create(lap_score + (exact_match_score / n_tips)); - if (which_split[winners[0]] < 0) break; + for (int16 i = most_splits; i--; ) delete[] score[i]; + delete[] colsol; delete[] u; delete[] v; delete[] score; - push(winners[0], which_split); + std::unique_ptr lap_decode = std::make_unique(lap_dim); + int16 fuzzy_match = 0; + for (int16 bi = 0; bi != b.n_splits; ++bi) { + if (!b_match[bi]) { + lap_decode[fuzzy_match++] = bi + 1; + } else { + } + } + + fuzzy_match = 0; + IntegerVector final_matching(a.n_splits); + for (int16 i = 0; i != a.n_splits; i++) { + if (a_match[i]) { + // Rcout << "a" << (1+i) << " exactly matches b" << a_match[i]<< "\n"; + final_matching[i] = a_match[i]; + } else { + const int16 this_sol = rowsol[fuzzy_match++]; + // Rcout << "a"<<(1+i) << " fuzzily matches rowsol[" << this_sol <<"] == " + // << rowsol[this_sol] << "; "; + if (rowsol[this_sol] >= lap_dim - a_extra_splits) { + // Rcout << " unmatched (NA)\n"; + final_matching[i] = NA_INTEGER; + } else { + // Rcout << " matched with b" << lap_decode[rowsol[this_sol]] <<".\n"; + final_matching[i] = lap_decode[rowsol[this_sol]]; + } + } + // Rcout << " "; + // if (final_matching[i] > 0) Rcout << final_matching[i]; else Rcout << "NA"; + } + + delete[] rowsol; + + return List::create(Named("score") = final_score, + _["matching"] = final_matching); + } else { + for (int16 ai = a.n_splits; ai < most_splits; ++ai) { + for (int16 bi = 0; bi != most_splits; ++bi) { + score[ai][bi] = max_score; + } + } + + const double lap_score = double( + (max_score * lap_dim) - + lap(most_splits, score, rowsol, colsol, u, v) + ) / max_score; + NumericVector final_score = NumericVector::create(lap_score); + + for (int16 i = most_splits; i--; ) delete[] score[i]; + delete[] colsol; delete[] u; delete[] v; delete[] score; + + IntegerVector final_matching (a.n_splits); + for (int16 i = a.n_splits; i--; ) { + final_matching[i] = (rowsol[i] < b.n_splits) ? rowsol[i] + 1 : NA_INTEGER; + } + + delete[] rowsol; + + return List::create(Named("score") = final_score, + _["matching"] = final_matching); } -} +} \ No newline at end of file diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index bfc9efc0c..3d76f29fc 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -3,6 +3,7 @@ #include #include "tree_distances.hpp" #include "SplitList.hpp" +#include "SplitRoster.hpp" using namespace Rcpp; @@ -380,6 +381,12 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, } +// [[Rcpp::export]] +NumericVector cpp_all_pairs_mci(const List x, const IntegerVector nTip) { + SplitRoster roster(x, nTip); + +} + // [[Rcpp::export]] List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, const IntegerVector nTip) { From 35248fdf7a018bf9ef89fdda592e42c273ff2166 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 08:35:28 +0100 Subject: [PATCH 29/80] Framework for self-comparison --- src/SplitRoster.cpp | 95 ++++++++++++++++++++++++++++++++++++++------- src/SplitRoster.hpp | 9 +++-- 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index e86bb576d..27a7ba104 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -20,7 +20,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { const int16 max_splits = n_tips - 3; roster_tree = std::make_unique(n_trees * max_splits); roster_split = std::make_unique(n_trees * max_splits); - roster_size = std::make_unique(n_trees * max_splits); + in_split = std::make_unique(n_trees * max_splits); roster_hits = std::make_unique(n_trees * max_splits); index.reserve(n_trees); @@ -50,7 +50,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { play_game(&i, winners, losers, which_split); } - roster_pos = 0; + roster_len = 0; roster_tree[0] = winners[0]; roster_split[0] = which_split[winners[0]]; roster_hits[0] = 1; @@ -69,6 +69,9 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { push(winners[0], which_split); } + + ++roster_len; // Point to first position after array + score.reserve(roster_len * roster_len); } inline bool SplitRoster::splits_equal( @@ -131,21 +134,87 @@ inline void SplitRoster::push( const int16 new_split = which_split[tree]; if (splits_equal(splits[tree].state, which_split[new_split], - splits[roster_tree[roster_pos]].state, - roster_split[roster_pos])) { - ++roster_hits[roster_pos]; + splits[roster_tree[roster_len]].state, + roster_split[roster_len])) { + ++roster_hits[roster_len]; } else { - ++roster_pos; - roster_tree[roster_pos] = tree; - roster_split[roster_pos] = new_split; - roster_size[roster_pos] = splits[tree].in_split[new_split]; - roster_hits[roster_pos] = 1; + ++roster_len; + roster_tree[roster_len] = tree; + roster_split[roster_len] = new_split; + in_split[roster_len] = splits[tree].in_split[new_split]; + roster_hits[roster_len] = 1; + } + index[tree][new_split] = roster_len; +} + +#define SCORE(a, b) score[(a) * roster_len + (b)] + +#define SPLIT(i) splits[roster_tree[(i)]].state[roster_split[(i)]] + +void SplitRoster::mutual_clustering() { + const cost max_score = BIG; + double exact_match_score = 0; + int16 exact_matches = 0; + + for (int16 ai = 0; ai != roster_len - 1; ++ai) { + const int16 + na = in_split[ai], + nA = n_tips - na + ; + + SCORE(ai, ai) = ic_matching(na, nA, n_tips); + + for (int16 bi = ai + 1; bi != roster_len; ++bi) { + + // x divides tips into a|A; y divides tips into b|B + int16 a_and_b = 0; + for (int16 bin = n_bins; --bin; ) { + a_and_b += count_bits(SPLIT(ai)[bin] & SPLIT(bi)[bin]); + } + + const int16 + nb = in_split[bi], + nB = n_tips - nb, + a_and_B = na - a_and_b, + A_and_b = nb - a_and_b, + A_and_B = nA - A_and_b + ; + + if (a_and_b == A_and_b && + a_and_b == a_and_B && + a_and_b == A_and_B) { + SCORE(ai, bi) = max_score; // Don't risk rounding error + } else { + SCORE(ai, bi) = max_score - + // Division by n_tips converts n(A&B) to P(A&B) for each ic_element + cost(max_score * (( + // 0 < Sum of IC_elements <= n_tips + ic_element(a_and_b, na, nb, n_tips) + + ic_element(a_and_B, na, nB, n_tips) + + ic_element(A_and_b, nA, nb, n_tips) + + ic_element(A_and_B, nA, nB, n_tips) + ) / n_tips) + ); + } + } } - index[tree][new_split] = roster_pos; } -NumericVector SplitRoster::mutual_clustering() { +// [[Rcpp::export]] +NumericVector SplitRoster::score_pairs() { + int32 i = 0, entry = 0; + for (i = 0; ; ++i) { + // Calculate tree's similarity to self + + + if (i == n_trees) break; + entry++; + for (int32 j = i + 1; j != n_trees; ++j) { + + entry++; + } + } const SplitList a(x), b(y); const bool a_has_more_splits = (a.n_splits > b.n_splits); const int16 @@ -322,4 +391,4 @@ NumericVector SplitRoster::mutual_clustering() { return List::create(Named("score") = final_score, _["matching"] = final_matching); } -} \ No newline at end of file +} diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 2dca6763d..528b04ce6 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -1,8 +1,8 @@ #ifndef _TREEDIST_SPLITROSTER_HPP #define _TREEDIST_SPLITROSTER_HPP -#include #include +#include #include #include #include "ints.hpp" @@ -13,12 +13,13 @@ using namespace Rcpp; class SplitRoster { int16 n_bins; - int32 roster_pos; + int32 roster_len; std::vector splits; + std::vector score; std::unique_ptr roster_tree; std::unique_ptr roster_split; - std::unique_ptr roster_size; + std::unique_ptr in_split; std::unique_ptr roster_hits; std::vector > index; @@ -42,6 +43,8 @@ class SplitRoster { int32 n_trees; SplitRoster(const List x, const IntegerVector nTip); + void mutual_clustering(); + NumericVector score_pairs(); }; #endif From caa176119c6112833e96de8bd3a41ed67599c529 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 09:06:01 +0100 Subject: [PATCH 30/80] Draft of score_pairs() --- src/SplitRoster.cpp | 225 ++++++++++---------------------------------- src/SplitRoster.hpp | 1 + 2 files changed, 49 insertions(+), 177 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 27a7ba104..fe65b79de 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -21,14 +21,15 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { roster_tree = std::make_unique(n_trees * max_splits); roster_split = std::make_unique(n_trees * max_splits); in_split = std::make_unique(n_trees * max_splits); + out_split = std::make_unique(n_trees * max_splits); roster_hits = std::make_unique(n_trees * max_splits); index.reserve(n_trees); // Populate roster using k-way merge with tournament tree const int32 tournament_games = n_trees - 1, - tournament_nodes = n_trees + tournament_games - ; + tournament_nodes = n_trees + tournament_games + ; auto winners = std::make_unique(tournament_nodes); auto losers = std::make_unique(tournament_nodes); @@ -142,6 +143,7 @@ inline void SplitRoster::push( roster_tree[roster_len] = tree; roster_split[roster_len] = new_split; in_split[roster_len] = splits[tree].in_split[new_split]; + out_split[roster_len] = n_tips - in_split[roster_len]; roster_hits[roster_len] = 1; } index[tree][new_split] = roster_len; @@ -159,7 +161,7 @@ void SplitRoster::mutual_clustering() { for (int16 ai = 0; ai != roster_len - 1; ++ai) { const int16 na = in_split[ai], - nA = n_tips - na + nA = out_split[ai] ; SCORE(ai, ai) = ic_matching(na, nA, n_tips); @@ -174,7 +176,7 @@ void SplitRoster::mutual_clustering() { const int16 nb = in_split[bi], - nB = n_tips - nb, + nB = out_split[bi], a_and_B = na - a_and_b, A_and_b = nb - a_and_b, A_and_B = nA - A_and_b @@ -203,192 +205,61 @@ void SplitRoster::mutual_clustering() { // [[Rcpp::export]] NumericVector SplitRoster::score_pairs() { - int32 i = 0, entry = 0; + int32 + i = 0, + entry = 0, + n_scores = n_trees * (n_trees + 1) / 2; + ; + const cost max_score = BIG; + score = std::vector (n_scores, 0.0); for (i = 0; ; ++i) { + const int16 i_splits = splits[i].n_splits; // Calculate tree's similarity to self - + for (int16 sp = 0; sp != i_splits; ++sp) { + const int32 sp_i = index[i][sp]; + score[entry] += SCORE(sp_i, sp_i); + } if (i == n_trees) break; entry++; for (int32 j = i + 1; j != n_trees; ++j) { - - entry++; - } - } - const SplitList a(x), b(y); - const bool a_has_more_splits = (a.n_splits > b.n_splits); - const int16 - most_splits = a_has_more_splits ? a.n_splits : b.n_splits, - a_extra_splits = a_has_more_splits ? most_splits - b.n_splits : 0, - b_extra_splits = a_has_more_splits ? 0 : most_splits - a.n_splits, - n_tips = nTip[0] - ; - const cost max_score = BIG; - - cost** score = new cost*[most_splits]; - for (int16 i = most_splits; i--; ) score[i] = new cost[most_splits]; - double exact_match_score = 0; - int16 exact_matches = 0; - // NumericVector zero-initializes [so does make_unique] - // match will have one added to it so numbering follows R; hence 0 = UNMATCHED - NumericVector a_match(a.n_splits); - std::unique_ptr b_match = std::make_unique(b.n_splits); - - for (int16 ai = 0; ai != a.n_splits; ++ai) { - if (a_match[ai]) continue; - const int16 - na = a.in_split[ai], - nA = n_tips - na + const int16 + j_splits = splits[j].n_splits, + most_splits = i_splits > j_splits ? i_splits : j_splits ; - - for (int16 bi = 0; bi != b.n_splits; ++bi) { - // x divides tips into a|A; y divides tips into b|B - int16 a_and_b = 0; - for (int16 bin = 0; bin != a.n_bins; ++bin) { - a_and_b += count_bits(a.state[ai][bin] & b.state[bi][bin]); - } - const int16 - nb = b.in_split[bi], - nB = n_tips - nb, - a_and_B = na - a_and_b, - A_and_b = nb - a_and_b, - A_and_B = nA - A_and_b - ; + // TODO declare lap_score once at maximum size n_tips - 3 and retain. + cost** lap_score = new cost*[most_splits]; + for (int16 i = most_splits; i--; ) lap_score[i] = new cost[most_splits]; + lap_col *rowsol = new lap_col[most_splits]; + lap_row *colsol = new lap_row[most_splits]; + cost *u = new cost[most_splits], *v = new cost[most_splits]; - if ((!a_and_B && !A_and_b) || - (!a_and_b && !A_and_B)) { - exact_match_score += ic_matching(na, nA, n_tips); - exact_matches++; - a_match[ai] = bi + 1; - b_match[bi] = ai + 1; - break; - } else if (a_and_b == A_and_b && - a_and_b == a_and_B && - a_and_b == A_and_B) { - score[ai][bi] = max_score; // Don't risk rounding error - } else { - score[ai][bi] = max_score - - // Division by n_tips converts n(A&B) to P(A&B) for each ic_element - cost(max_score * (( - // 0 < Sum of IC_elements <= n_tips - ic_element(a_and_b, na, nb, n_tips) + - ic_element(a_and_B, na, nB, n_tips) + - ic_element(A_and_b, nA, nb, n_tips) + - ic_element(A_and_B, nA, nB, n_tips) - ) / n_tips) - ); - } - } - for (int16 bi = b.n_splits; bi < most_splits; ++bi) { - score[ai][bi] = max_score; - } - } - if (exact_matches == b.n_splits || exact_matches == a.n_splits) { - for (int16 i = most_splits; i--; ) delete[] score[i]; - delete[] score; - - return List::create( - Named("score") = NumericVector::create(exact_match_score / n_tips), - _["matching"] = a_match); - } - - - const int16 lap_dim = most_splits - exact_matches; - lap_col *rowsol = new lap_col[lap_dim]; - lap_row *colsol = new lap_row[lap_dim]; - cost *u = new cost[lap_dim], *v = new cost[lap_dim]; - - if (exact_matches) { - int16 a_pos = 0; - for (int16 ai = 0; ai != a.n_splits; ++ai) { - if (a_match[ai]) continue; - int16 b_pos = 0; - for (int16 bi = 0; bi != b.n_splits; ++bi) { - if (b_match[bi]) continue; - score[a_pos][b_pos] = score[ai][bi]; - b_pos++; - } - for (int16 bi = lap_dim - a_extra_splits; bi < lap_dim; ++bi) { - score[a_pos][bi] = max_score; - } - a_pos++; - } - for (int16 ai = lap_dim - b_extra_splits; ai < lap_dim; ++ai) { - for (int16 bi = 0; bi != lap_dim; ++bi) { - score[ai][bi] = max_score; - } - } - - const double lap_score = - double((max_score * lap_dim) - lap(lap_dim, score, rowsol, colsol, u, v)) - / max_score; - NumericVector final_score = - NumericVector::create(lap_score + (exact_match_score / n_tips)); - - for (int16 i = most_splits; i--; ) delete[] score[i]; - delete[] colsol; delete[] u; delete[] v; delete[] score; - - std::unique_ptr lap_decode = std::make_unique(lap_dim); - int16 fuzzy_match = 0; - for (int16 bi = 0; bi != b.n_splits; ++bi) { - if (!b_match[bi]) { - lap_decode[fuzzy_match++] = bi + 1; - } else { - } - } - - fuzzy_match = 0; - IntegerVector final_matching(a.n_splits); - for (int16 i = 0; i != a.n_splits; i++) { - if (a_match[i]) { - // Rcout << "a" << (1+i) << " exactly matches b" << a_match[i]<< "\n"; - final_matching[i] = a_match[i]; - } else { - const int16 this_sol = rowsol[fuzzy_match++]; - // Rcout << "a"<<(1+i) << " fuzzily matches rowsol[" << this_sol <<"] == " - // << rowsol[this_sol] << "; "; - if (rowsol[this_sol] >= lap_dim - a_extra_splits) { - // Rcout << " unmatched (NA)\n"; - final_matching[i] = NA_INTEGER; - } else { - // Rcout << " matched with b" << lap_decode[rowsol[this_sol]] <<".\n"; - final_matching[i] = lap_decode[rowsol[this_sol]]; + + + for (int16 ai = i_splits; ai--; ) { + for (int16 bi = j_splits; bi--; ) { + lap_score[ai][bi] = SCORE(ai, bi); } } - // Rcout << " "; - // if (final_matching[i] > 0) Rcout << final_matching[i]; else Rcout << "NA"; - } - - delete[] rowsol; - - return List::create(Named("score") = final_score, - _["matching"] = final_matching); - } else { - for (int16 ai = a.n_splits; ai < most_splits; ++ai) { - for (int16 bi = 0; bi != most_splits; ++bi) { - score[ai][bi] = max_score; + + for (int16 ai = i_splits; ai < most_splits; ++ai) { + for (int16 bi = 0; bi != most_splits; ++bi) { + lap_score[ai][bi] = max_score; + } } + + score[entry] = double( + (max_score * most_splits) - + lap(most_splits, lap_score, rowsol, colsol, u, v) + ) / max_score; + + for (int16 i = most_splits; i--; ) delete[] lap_score[i]; + delete[] colsol; delete[] u; delete[] v; delete[] lap_score; + delete[] rowsol; + + entry++; } - - const double lap_score = double( - (max_score * lap_dim) - - lap(most_splits, score, rowsol, colsol, u, v) - ) / max_score; - NumericVector final_score = NumericVector::create(lap_score); - - for (int16 i = most_splits; i--; ) delete[] score[i]; - delete[] colsol; delete[] u; delete[] v; delete[] score; - - IntegerVector final_matching (a.n_splits); - for (int16 i = a.n_splits; i--; ) { - final_matching[i] = (rowsol[i] < b.n_splits) ? rowsol[i] + 1 : NA_INTEGER; - } - - delete[] rowsol; - - return List::create(Named("score") = final_score, - _["matching"] = final_matching); } } diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 528b04ce6..e1401c17d 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -20,6 +20,7 @@ class SplitRoster { std::unique_ptr roster_tree; std::unique_ptr roster_split; std::unique_ptr in_split; + std::unique_ptr out_split; std::unique_ptr roster_hits; std::vector > index; From 7bda05c71b0d4c1c9149160fe02add32b339f43e Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 09:09:40 +0100 Subject: [PATCH 31/80] Draft ready to debug? --- src/SplitRoster.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index fe65b79de..5af6bc04b 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -155,8 +155,6 @@ inline void SplitRoster::push( void SplitRoster::mutual_clustering() { const cost max_score = BIG; - double exact_match_score = 0; - int16 exact_matches = 0; for (int16 ai = 0; ai != roster_len - 1; ++ai) { const int16 @@ -202,8 +200,6 @@ void SplitRoster::mutual_clustering() { } } - -// [[Rcpp::export]] NumericVector SplitRoster::score_pairs() { int32 i = 0, @@ -211,13 +207,14 @@ NumericVector SplitRoster::score_pairs() { n_scores = n_trees * (n_trees + 1) / 2; ; const cost max_score = BIG; - score = std::vector (n_scores, 0.0); + //score = std::vector (n_scores, 0.0); + NumericVector ret(n_scores); for (i = 0; ; ++i) { const int16 i_splits = splits[i].n_splits; // Calculate tree's similarity to self for (int16 sp = 0; sp != i_splits; ++sp) { const int32 sp_i = index[i][sp]; - score[entry] += SCORE(sp_i, sp_i); + ret[entry] += SCORE(sp_i, sp_i); } if (i == n_trees) break; @@ -250,7 +247,7 @@ NumericVector SplitRoster::score_pairs() { } } - score[entry] = double( + ret[entry] = double( (max_score * most_splits) - lap(most_splits, lap_score, rowsol, colsol, u, v) ) / max_score; @@ -262,4 +259,5 @@ NumericVector SplitRoster::score_pairs() { entry++; } } + return ret; } From 0b23a3345c1965ebd008bdd81d923da1569d2e24 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 09:10:20 +0100 Subject: [PATCH 32/80] Update tree_distances.cpp --- src/tree_distances.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 3d76f29fc..9553fdb0f 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -384,7 +384,8 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, // [[Rcpp::export]] NumericVector cpp_all_pairs_mci(const List x, const IntegerVector nTip) { SplitRoster roster(x, nTip); - + roster.mutual_clustering(); + return roster.score_pairs(); } // [[Rcpp::export]] From 6cf0c201abe11fa2155c0c3652beb1e3538fabd4 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 09:22:05 +0100 Subject: [PATCH 33/80] return --- R/tree_distance_info.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R index 0bf500742..e13019494 100644 --- a/R/tree_distance_info.R +++ b/R/tree_distance_info.R @@ -299,7 +299,8 @@ MutualClusteringInfo <- function (tree1, tree2 = NULL, normalize = FALSE, reportMatching = FALSE, diag = TRUE) { unnormalized <- if (is.null(tree2) && is.list(tree1) && !inherits(tree1, 'phylo')) { - cpp_all_pairs_mci(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])) + structure(cpp_all_pairs_mci(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])), + Size = length(tree1), Diag = FALSE, Upper = FALSE, class = 'dist') } else { CalculateTreeDistance(MutualClusteringInfoSplits, tree1, tree2, reportMatching) From 007167281ce07d709e13821cbc7402794cb317b4 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 12:42:07 +0100 Subject: [PATCH 34/80] parent/child offset --- src/SplitRoster.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 5af6bc04b..ac715566e 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -62,7 +62,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { int32 i = winner; do { - i /= 2; + i = (i - 1) / 2; play_game(&i, winners, losers, which_split); } while (i); @@ -103,7 +103,7 @@ inline void SplitRoster::play_game( std::unique_ptr &losers, std::unique_ptr &which_split) { const int32 - child1 = *node * 2, + child1 = *node * 2 + 1, child2 = child1 + 1, tree1 = winners[child1], tree2 = winners[child2] From cb030ae93893acebda60a212894b8b91a85a015c Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 13:11:50 +0100 Subject: [PATCH 35/80] Debug --- R/tree_distance_info.R | 6 ++++-- src/SplitRoster.cpp | 8 +++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R index e13019494..b25cefffa 100644 --- a/R/tree_distance_info.R +++ b/R/tree_distance_info.R @@ -299,8 +299,10 @@ MutualClusteringInfo <- function (tree1, tree2 = NULL, normalize = FALSE, reportMatching = FALSE, diag = TRUE) { unnormalized <- if (is.null(tree2) && is.list(tree1) && !inherits(tree1, 'phylo')) { - structure(cpp_all_pairs_mci(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])), - Size = length(tree1), Diag = FALSE, Upper = FALSE, class = 'dist') + mtx <- matrix(0, length(tree1), length(tree1)) + mtx[!upper.tri(mtx)] <- cpp_all_pairs_mci(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])) + mtx[upper.tri(mtx)] <- t(mtx)[upper.tri(mtx)] + mtx } else { CalculateTreeDistance(MutualClusteringInfoSplits, tree1, tree2, reportMatching) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index ac715566e..874c269fb 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -36,7 +36,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { auto which_split = std::make_unique(n_trees); - // Populate children of tree + // Populate leaves of tournament tree for (int32 i = 0; i != n_trees; ++i) { which_split[i] = splits[i].n_splits - 1; winners[i + tournament_games] = i; @@ -54,11 +54,13 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { roster_len = 0; roster_tree[0] = winners[0]; roster_split[0] = which_split[winners[0]]; + in_split[0] = splits[winners[0]].in_split[roster_split[0]]; + out_split[0] = n_tips - in_split[0]; roster_hits[0] = 1; for (; ; ) { const int32 winner = winners[0]; - --which_split[winner]; + --(which_split[winner]); int32 i = winner; do { @@ -168,7 +170,7 @@ void SplitRoster::mutual_clustering() { // x divides tips into a|A; y divides tips into b|B int16 a_and_b = 0; - for (int16 bin = n_bins; --bin; ) { + for (int16 bin = n_bins; bin--; ) { a_and_b += count_bits(SPLIT(ai)[bin] & SPLIT(bi)[bin]); } From 6badf49cd0370acaa7e5b270d04f69039c4df216 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 13:22:37 +0100 Subject: [PATCH 36/80] new_split --- src/SplitRoster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 874c269fb..da5b097b3 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -136,7 +136,7 @@ inline void SplitRoster::push( std::unique_ptr &which_split) { const int16 new_split = which_split[tree]; if (splits_equal(splits[tree].state, - which_split[new_split], + new_split, splits[roster_tree[roster_len]].state, roster_split[roster_len])) { ++roster_hits[roster_len]; From 7b045874efdb814a72504339bc26d0cb32e48000 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 14:33:15 +0100 Subject: [PATCH 37/80] SplitList(RawMatrix x, int16 n_tip); --- src/SplitList.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++++++ src/SplitList.hpp | 3 ++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 0f9545e6d..46132b831 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -50,6 +50,66 @@ SplitList::SplitList(RawMatrix x) { } } +SplitList::SplitList(RawMatrix x, int16 n_tip) { + n_splits = x.rows(); + const int16 + n_input_bins = x.cols(), + input_bins_per_bin = BIN_SIZE / R_BIN_SIZE + ; + + n_bins = (n_input_bins + R_BIN_SIZE - 1) / input_bins_per_bin; + + if (n_bins < 1) throw std::invalid_argument("No leaves present."); + if (n_splits < 0) throw std::invalid_argument("Negative number of splits!?"); + if (n_bins > MAX_BINS) { + throw std::length_error("This many leaves cannot be supported. " // # nocov + "Please contact the TreeDist maintainer if you " // # nocov + "need to use more!"); // # nocov + } + Rcout << "\n=== SplitList::SplitList(x, n_tip) ===\n"; + const int16 last_bin_tips = INLASTBIN(n_tip, R_BIN_SIZE); + Rcout << "Last bt: " << last_bin_tips << ".\n"; + Rcout << "Last 2^bt: " << (2^last_bin_tips) << ".\n"; + const splitbit last_mask = std::pow(2, last_bin_tips) - 1; + Rcout << "Last mask: " << last_mask << ".\n"; + + for (int16 split = 0; split != n_splits; split++) { + int16 last_bin = n_bins - 1; + const int16 raggedy_bins = INLASTBIN(n_input_bins, R_BIN_SIZE); + + splitbit last = INSUBBIN(last_bin, 0); + bool invert = last & splitbit(1); + Rcout << n_input_bins << " bins in; " << raggedy_bins << " raggedy bins\n"; + Rcout << "Last bin: " << last << "; intersection with " << splitbit(1) + << " = " << invert << "; " << (invert ? "inverting" : "Retaining") + << ".\n"; + state[split][last_bin] = invert ? ~last & last_mask : last; + Rcout << " State[" << split << "][" << last_bin << "] = " << state[split][last_bin] << ".\n"; + for (int16 input_bin = 1; input_bin != raggedy_bins; input_bin++) { + Rcout << "Adding " << (splitbit (x(split, (last_bin * 2) + input_bin))) << " << " + << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" << last_bin + << "], was " << state[split][last_bin] << "\n"; + state[split][last_bin] += invert ? + ~INBIN(input_bin, last_bin): + INBIN(input_bin, last_bin); + } + in_split[split] = count_bits(state[split][last_bin]); + + for (int16 bin = 0; bin != n_bins - 1; bin++) { + Rcout << "Split " << split << ", bin << " << bin << ".\n"; + state[split][bin] = invert ? ~INSUBBIN(bin, 0) : INSUBBIN(bin, 0); + for (int16 input_bin = 1; input_bin != input_bins_per_bin; input_bin++) { + Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " + << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" + << bin << "], was " << state[split][bin] << "\n"; + state[split][bin] += invert ? + ~INSUBBIN(bin, input_bin): + INSUBBIN(bin, input_bin); + } + in_split[split] += count_bits(state[split][bin]); + } + } +} void SplitList::swap(int16 a, int16 b) { for (int16 bin = n_bins; bin--; ) { const splitbit tmp = state[a][bin]; diff --git a/src/SplitList.hpp b/src/SplitList.hpp index 99f4f822a..e5331b09c 100644 --- a/src/SplitList.hpp +++ b/src/SplitList.hpp @@ -19,7 +19,8 @@ class SplitList { int16 n_splits, n_bins; int16 in_split[MAX_SPLITS]; splitbit state[MAX_SPLITS][MAX_BINS]; - SplitList(RawMatrix); + SplitList(RawMatrix x); + SplitList(RawMatrix x, int16 n_tip); void quicksort(); void quicksort(int16 lo, int16 hi); }; From 3a375b2c56305c5eb83e6cd83c8fa0c1ea611cfb Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 14:34:40 +0100 Subject: [PATCH 38/80] // --- src/SplitList.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 46132b831..259f12b28 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -66,12 +66,12 @@ SplitList::SplitList(RawMatrix x, int16 n_tip) { "Please contact the TreeDist maintainer if you " // # nocov "need to use more!"); // # nocov } - Rcout << "\n=== SplitList::SplitList(x, n_tip) ===\n"; + // Rcout << "\n=== SplitList::SplitList(x, n_tip) ===\n"; const int16 last_bin_tips = INLASTBIN(n_tip, R_BIN_SIZE); - Rcout << "Last bt: " << last_bin_tips << ".\n"; - Rcout << "Last 2^bt: " << (2^last_bin_tips) << ".\n"; + // Rcout << "Last bt: " << last_bin_tips << ".\n"; + // Rcout << "Last 2^bt: " << (2^last_bin_tips) << ".\n"; const splitbit last_mask = std::pow(2, last_bin_tips) - 1; - Rcout << "Last mask: " << last_mask << ".\n"; + // Rcout << "Last mask: " << last_mask << ".\n"; for (int16 split = 0; split != n_splits; split++) { int16 last_bin = n_bins - 1; @@ -79,16 +79,16 @@ SplitList::SplitList(RawMatrix x, int16 n_tip) { splitbit last = INSUBBIN(last_bin, 0); bool invert = last & splitbit(1); - Rcout << n_input_bins << " bins in; " << raggedy_bins << " raggedy bins\n"; - Rcout << "Last bin: " << last << "; intersection with " << splitbit(1) - << " = " << invert << "; " << (invert ? "inverting" : "Retaining") - << ".\n"; + // Rcout << n_input_bins << " bins in; " << raggedy_bins << " raggedy bins\n"; + // Rcout << "Last bin: " << last << "; intersection with " << splitbit(1) + // << " = " << invert << "; " << (invert ? "inverting" : "Retaining") + // << ".\n"; state[split][last_bin] = invert ? ~last & last_mask : last; - Rcout << " State[" << split << "][" << last_bin << "] = " << state[split][last_bin] << ".\n"; + // Rcout << " State[" << split << "][" << last_bin << "] = " << state[split][last_bin] << ".\n"; for (int16 input_bin = 1; input_bin != raggedy_bins; input_bin++) { - Rcout << "Adding " << (splitbit (x(split, (last_bin * 2) + input_bin))) << " << " - << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" << last_bin - << "], was " << state[split][last_bin] << "\n"; + // Rcout << "Adding " << (splitbit (x(split, (last_bin * 2) + input_bin))) << " << " + // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" << last_bin + // << "], was " << state[split][last_bin] << "\n"; state[split][last_bin] += invert ? ~INBIN(input_bin, last_bin): INBIN(input_bin, last_bin); @@ -96,12 +96,12 @@ SplitList::SplitList(RawMatrix x, int16 n_tip) { in_split[split] = count_bits(state[split][last_bin]); for (int16 bin = 0; bin != n_bins - 1; bin++) { - Rcout << "Split " << split << ", bin << " << bin << ".\n"; + // Rcout << "Split " << split << ", bin << " << bin << ".\n"; state[split][bin] = invert ? ~INSUBBIN(bin, 0) : INSUBBIN(bin, 0); for (int16 input_bin = 1; input_bin != input_bins_per_bin; input_bin++) { - Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " - << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" - << bin << "], was " << state[split][bin] << "\n"; + // Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " + // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" + // << bin << "], was " << state[split][bin] << "\n"; state[split][bin] += invert ? ~INSUBBIN(bin, input_bin): INSUBBIN(bin, input_bin); From 31942d4617bc98e438bb2e494f17587a7a97ab7f Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 14:34:46 +0100 Subject: [PATCH 39/80] #define --- src/SplitList.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 259f12b28..4ea6e8483 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -4,6 +4,12 @@ using namespace Rcpp; #include "tree_distances.hpp" // Before SplitList.h, for int16 #include "SplitList.hpp" +#define INLASTBIN(n, size) ((size) - ((size) - ((n) % (size))) % (size)) + +#define INSUBBIN(bin, offset) splitbit(x(split, ((bin) * input_bins_per_bin) + (offset))) + +#define INBIN(in_bin, out_bin) ((INSUBBIN((out_bin), (in_bin))) << (R_BIN_SIZE * (in_bin))) + SplitList::SplitList(RawMatrix x) { n_splits = x.rows(); const int16 n_input_bins = x.cols(), From 43037226724e211f52ea98fe9e061707cbb9f8da Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 14:34:54 +0100 Subject: [PATCH 40/80] leaves --- src/SplitList.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 4ea6e8483..cda38d20c 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -17,10 +17,10 @@ SplitList::SplitList(RawMatrix x) { n_bins = (n_input_bins + R_BIN_SIZE - 1) / input_bins_per_bin; - if (n_bins < 1) throw std::invalid_argument("No tips present."); + if (n_bins < 1) throw std::invalid_argument("No leaves present."); if (n_splits < 0) throw std::invalid_argument("Negative number of splits!?"); if (n_bins > MAX_BINS) { - throw std::length_error("This many tips cannot be supported. " // # nocov + throw std::length_error("This many leaves cannot be supported. " // # nocov "Please contact the TreeDist maintainer if you " // # nocov "need to use more!"); // # nocov } From 8db011d45019d3cd286d57947316f1f54b0c1530 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 14:35:13 +0100 Subject: [PATCH 41/80] , n_tips --- src/SplitRoster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index da5b097b3..7c60c5251 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -12,7 +12,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { for (int32 i = 0; i != n_trees; ++i) { const RawMatrix xi = x[i]; - splits.emplace_back(xi); + splits.emplace_back(xi, n_tips); splits[i].quicksort(); } n_bins = splits[0].n_bins; From 4284859a748cbd166ab801bf51d71fda690a0fb4 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 14:55:49 +0100 Subject: [PATCH 42/80] na/nA switch --- src/SplitRoster.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 7c60c5251..41621c5d1 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -160,9 +160,10 @@ void SplitRoster::mutual_clustering() { for (int16 ai = 0; ai != roster_len - 1; ++ai) { const int16 - na = in_split[ai], - nA = out_split[ai] + nA = in_split[ai], + na = out_split[ai] ; + assert(na + nA == n_tips); SCORE(ai, ai) = ic_matching(na, nA, n_tips); @@ -175,12 +176,17 @@ void SplitRoster::mutual_clustering() { } const int16 - nb = in_split[bi], - nB = out_split[bi], + nB = in_split[bi], + nb = out_split[bi], a_and_B = na - a_and_b, A_and_b = nb - a_and_b, A_and_B = nA - A_and_b ; + assert(nb + nB == n_tips); + assert(a_and_B >= 0); + assert(a_and_b >= 0); + assert(A_and_B >= 0); + assert(A_and_b >= 0); if (a_and_b == A_and_b && a_and_b == a_and_B && From 6e0979d77571512233fbe202a43e14b4d01b2949 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 15:16:21 +0100 Subject: [PATCH 43/80] li --- src/SplitRoster.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 41621c5d1..9b320fe94 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -236,7 +236,7 @@ NumericVector SplitRoster::score_pairs() { // TODO declare lap_score once at maximum size n_tips - 3 and retain. cost** lap_score = new cost*[most_splits]; - for (int16 i = most_splits; i--; ) lap_score[i] = new cost[most_splits]; + for (int16 li = most_splits; li--; ) lap_score[li] = new cost[most_splits]; lap_col *rowsol = new lap_col[most_splits]; lap_row *colsol = new lap_row[most_splits]; cost *u = new cost[most_splits], *v = new cost[most_splits]; @@ -260,7 +260,7 @@ NumericVector SplitRoster::score_pairs() { lap(most_splits, lap_score, rowsol, colsol, u, v) ) / max_score; - for (int16 i = most_splits; i--; ) delete[] lap_score[i]; + for (int16 li = most_splits; li--; ) delete[] lap_score[li]; delete[] colsol; delete[] u; delete[] v; delete[] lap_score; delete[] rowsol; From 94d2e465dd56a6afec1faf37415508de55e79b64 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 15:16:37 +0100 Subject: [PATCH 44/80] sp_i/j --- src/SplitRoster.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 9b320fe94..8bb26c330 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -243,15 +243,15 @@ NumericVector SplitRoster::score_pairs() { - for (int16 ai = i_splits; ai--; ) { - for (int16 bi = j_splits; bi--; ) { - lap_score[ai][bi] = SCORE(ai, bi); + for (int16 sp_i = i_splits; sp_i--; ) { + for (int16 sp_j = j_splits; sp_j--; ) { + lap_score[sp_i][sp_j] = SCORE(index[i][sp_i], index[j][sp_j]); } } - for (int16 ai = i_splits; ai < most_splits; ++ai) { - for (int16 bi = 0; bi != most_splits; ++bi) { - lap_score[ai][bi] = max_score; + for (int16 sp_i = i_splits; sp_i < most_splits; ++sp_i) { + for (int16 sp_j = 0; sp_j != most_splits; ++sp_j) { + lap_score[sp_i][sp_j] = max_score; } } From 9b578b6897ecabd0538350d267c82f96c88e5158 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 15:16:53 +0100 Subject: [PATCH 45/80] max-/ --- src/SplitRoster.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 8bb26c330..67e3e1e2f 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -165,7 +165,8 @@ void SplitRoster::mutual_clustering() { ; assert(na + nA == n_tips); - SCORE(ai, ai) = ic_matching(na, nA, n_tips); + SCORE(ai, ai) = max_score - + cost(max_score * (ic_matching(na, nA, n_tips) / n_tips)); for (int16 bi = ai + 1; bi != roster_len; ++bi) { From 6a77b283ed7ab2f7f2e0373d7b2025f648deba6e Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 15:17:01 +0100 Subject: [PATCH 46/80] max lookip --- src/SplitRoster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 67e3e1e2f..175ddad23 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -151,7 +151,7 @@ inline void SplitRoster::push( index[tree][new_split] = roster_len; } -#define SCORE(a, b) score[(a) * roster_len + (b)] +#define SCORE(a, b) ((a) > (b) ? score[(b) * roster_len + (a)] : score[(a) * roster_len + (b)]) #define SPLIT(i) splits[roster_tree[(i)]].state[roster_split[(i)]] From ab9287a1a53b15f08503116273a7078708101699 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 15:19:29 +0100 Subject: [PATCH 47/80] n_trees - 1 --- src/SplitRoster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 175ddad23..893823858 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -226,7 +226,7 @@ NumericVector SplitRoster::score_pairs() { ret[entry] += SCORE(sp_i, sp_i); } - if (i == n_trees) break; + if (i == n_trees - 1) break; entry++; for (int32 j = i + 1; j != n_trees; ++j) { const int16 From 5f79c9a2c43b08171a30d6518d6892a3b7256f41 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 16:54:51 +0100 Subject: [PATCH 48/80] Swap in_split --- src/SplitList.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index cda38d20c..bae1855be 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -122,6 +122,9 @@ void SplitList::swap(int16 a, int16 b) { state[a][bin] = state[b][bin]; state[b][bin] = tmp; } + const int16 tmp = in_split[a]; + in_split[a] = in_split[b]; + in_split[b] = tmp; } bool SplitList::less_than(int16 a, int16 b) { From f435292183ab7697bb4ffda61560655fb4210b76 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 16:57:19 +0100 Subject: [PATCH 49/80] nA, na --- src/SplitRoster.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 893823858..1c71239cd 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -160,8 +160,8 @@ void SplitRoster::mutual_clustering() { for (int16 ai = 0; ai != roster_len - 1; ++ai) { const int16 - nA = in_split[ai], - na = out_split[ai] + na = in_split[ai], + nA = out_split[ai] ; assert(na + nA == n_tips); @@ -177,8 +177,8 @@ void SplitRoster::mutual_clustering() { } const int16 - nB = in_split[bi], - nb = out_split[bi], + nb = in_split[bi], + nB = out_split[bi], a_and_B = na - a_and_b, A_and_b = nb - a_and_b, A_and_B = nA - A_and_b From d6106eb9348b907915f8cf845dc2d8ab1416866e Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 19:39:16 +0100 Subject: [PATCH 50/80] n_tips --- src/SplitList.cpp | 3 ++- src/SplitList.hpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index bae1855be..8b8a84113 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -56,7 +56,7 @@ SplitList::SplitList(RawMatrix x) { } } -SplitList::SplitList(RawMatrix x, int16 n_tip) { +SplitList::SplitList(RawMatrix x, int16 n_tips) { n_splits = x.rows(); const int16 n_input_bins = x.cols(), @@ -78,6 +78,7 @@ SplitList::SplitList(RawMatrix x, int16 n_tip) { // Rcout << "Last 2^bt: " << (2^last_bin_tips) << ".\n"; const splitbit last_mask = std::pow(2, last_bin_tips) - 1; // Rcout << "Last mask: " << last_mask << ".\n"; + Rcout << "\n=== SplitList::SplitList(x, n_tips) ===\n"; for (int16 split = 0; split != n_splits; split++) { int16 last_bin = n_bins - 1; diff --git a/src/SplitList.hpp b/src/SplitList.hpp index e5331b09c..248c275d9 100644 --- a/src/SplitList.hpp +++ b/src/SplitList.hpp @@ -20,7 +20,7 @@ class SplitList { int16 in_split[MAX_SPLITS]; splitbit state[MAX_SPLITS][MAX_BINS]; SplitList(RawMatrix x); - SplitList(RawMatrix x, int16 n_tip); + SplitList(RawMatrix x, int16 n_tips); void quicksort(); void quicksort(int16 lo, int16 hi); }; From 9f4e42922e3da7fcb56a425921a4dfd68825b05a Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 19:40:35 +0100 Subject: [PATCH 51/80] Working for 9 leaves --- src/SplitList.cpp | 48 +++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 8b8a84113..c146d0eac 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -72,33 +72,32 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { "Please contact the TreeDist maintainer if you " // # nocov "need to use more!"); // # nocov } - // Rcout << "\n=== SplitList::SplitList(x, n_tip) ===\n"; - const int16 last_bin_tips = INLASTBIN(n_tip, R_BIN_SIZE); - // Rcout << "Last bt: " << last_bin_tips << ".\n"; - // Rcout << "Last 2^bt: " << (2^last_bin_tips) << ".\n"; - const splitbit last_mask = std::pow(2, last_bin_tips) - 1; - // Rcout << "Last mask: " << last_mask << ".\n"; - Rcout << "\n=== SplitList::SplitList(x, n_tips) ===\n"; + // Rcout << "\n=== SplitList::SplitList(x, n_tips) ===\n"; for (int16 split = 0; split != n_splits; split++) { int16 last_bin = n_bins - 1; const int16 raggedy_bins = INLASTBIN(n_input_bins, R_BIN_SIZE); - splitbit last = INSUBBIN(last_bin, 0); - bool invert = last & splitbit(1); - // Rcout << n_input_bins << " bins in; " << raggedy_bins << " raggedy bins\n"; - // Rcout << "Last bin: " << last << "; intersection with " << splitbit(1) - // << " = " << invert << "; " << (invert ? "inverting" : "Retaining") - // << ".\n"; - state[split][last_bin] = invert ? ~last & last_mask : last; + Rcout << n_input_bins << " bins in; " << raggedy_bins << " raggedy bins\n"; + assert(raggedy_bins <= BIN_SIZE / R_BIN_SIZE); + assert(raggedy_bins > 0); + state[split][last_bin] = INSUBBIN(last_bin, 0); // Rcout << " State[" << split << "][" << last_bin << "] = " << state[split][last_bin] << ".\n"; for (int16 input_bin = 1; input_bin != raggedy_bins; input_bin++) { - // Rcout << "Adding " << (splitbit (x(split, (last_bin * 2) + input_bin))) << " << " - // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" << last_bin - // << "], was " << state[split][last_bin] << "\n"; - state[split][last_bin] += invert ? - ~INBIN(input_bin, last_bin): - INBIN(input_bin, last_bin); + // Rcout << " state [" << split << "][" << last_bin + // << "] increased from " << state[split][last_bin]; + state[split][last_bin] += INBIN(input_bin, last_bin); + // Rcout << " by " << INBIN(input_bin, last_bin) << "\n"; + } + const bool invert = state[split][last_bin] & splitbit(1); + if (invert) { + const int16 last_bin_tips = INLASTBIN(n_tips, BIN_SIZE); + // Rcout << "Last bin tips: " << last_bin_tips << ".\n"; + + const splitbit last_mask = std::pow(2, INLASTBIN(n_tips, BIN_SIZE)) - 1; + // Rcout << "Last mask: " << last_mask << ".\n"; + + state[split][last_bin] = ~state[split][last_bin] & last_mask; } in_split[split] = count_bits(state[split][last_bin]); @@ -107,11 +106,12 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { state[split][bin] = invert ? ~INSUBBIN(bin, 0) : INSUBBIN(bin, 0); for (int16 input_bin = 1; input_bin != input_bins_per_bin; input_bin++) { // Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " - // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" + // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" // << bin << "], was " << state[split][bin] << "\n"; - state[split][bin] += invert ? - ~INSUBBIN(bin, input_bin): - INSUBBIN(bin, input_bin); + state[split][bin] += INSUBBIN(bin, input_bin); + } + if (invert) { + state[split][bin] = ~state[split][bin]; } in_split[split] += count_bits(state[split][bin]); } From 38af2d739ecf02fbae250921c67b1523db7c7576 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Wed, 21 Jul 2021 19:43:32 +0100 Subject: [PATCH 52/80] // --- src/SplitList.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index c146d0eac..989477d82 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -78,7 +78,7 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { int16 last_bin = n_bins - 1; const int16 raggedy_bins = INLASTBIN(n_input_bins, R_BIN_SIZE); - Rcout << n_input_bins << " bins in; " << raggedy_bins << " raggedy bins\n"; + // Rcout << n_input_bins << " bins in; " << raggedy_bins << " raggedy bins\n"; assert(raggedy_bins <= BIN_SIZE / R_BIN_SIZE); assert(raggedy_bins > 0); state[split][last_bin] = INSUBBIN(last_bin, 0); @@ -91,9 +91,7 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { } const bool invert = state[split][last_bin] & splitbit(1); if (invert) { - const int16 last_bin_tips = INLASTBIN(n_tips, BIN_SIZE); - // Rcout << "Last bin tips: " << last_bin_tips << ".\n"; - + // Rcout << "Last bin tips: " << INLASTBIN(n_tips, BIN_SIZE) << ".\n"; const splitbit last_mask = std::pow(2, INLASTBIN(n_tips, BIN_SIZE)) - 1; // Rcout << "Last mask: " << last_mask << ".\n"; From 4a324f953daba611bf549f7a870b8bdcd8624edc Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 05:32:23 +0100 Subject: [PATCH 53/80] * Use MACROS for SplitList(x) --- src/SplitList.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 989477d82..deaa05ca9 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -27,29 +27,26 @@ SplitList::SplitList(RawMatrix x) { for (int16 split = 0; split != n_splits; split++) { int16 last_bin = n_bins - 1; - const int16 raggedy_bins = R_BIN_SIZE - - ((R_BIN_SIZE - (n_input_bins % R_BIN_SIZE)) % R_BIN_SIZE); + const int16 raggedy_bins = INLASTBIN(n_input_bins, R_BIN_SIZE); /*Rcout << n_input_bins << " bins in; " << raggedy_bins << " raggedy bins\n";*/ - state[split][last_bin] = x(split, last_bin * input_bins_per_bin); + state[split][last_bin] = INSUBBIN(last_bin, 0); /*Rcout << " State[" << split << "][" << bin << "] = " << state[split][bin] << ".\n";*/ for (int16 input_bin = 1; input_bin != raggedy_bins; input_bin++) { /*Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" << bin << "], was " << state[split][bin] << "\n";*/ - state[split][last_bin] += ((splitbit (x(split, (last_bin * input_bins_per_bin) + input_bin))) - << (R_BIN_SIZE * input_bin)); + state[split][last_bin] += INBIN(input_bin, last_bin); } in_split[split] = count_bits(state[split][last_bin]); for (int16 bin = 0; bin != n_bins - 1; bin++) { /*Rcout << "Split " << split << ", bin << " << bin << ".\n";*/ - state[split][bin] = (splitbit) x(split, (bin * input_bins_per_bin)); + state[split][bin] = INSUBBIN(bin, 0); for (int16 input_bin = 1; input_bin != input_bins_per_bin; input_bin++) { /*Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" << bin << "], was " << state[split][bin] << "\n";*/ - state[split][bin] += ((splitbit (x(split, (bin * input_bins_per_bin) + input_bin))) - << (R_BIN_SIZE * input_bin)); + state[split][bin] += INSUBBIN(bin, input_bin); } in_split[split] += count_bits(state[split][bin]); } From dd41e88ad3629baebdc84f562270484a9b89a166 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 05:33:43 +0100 Subject: [PATCH 54/80] i = winner + tournament_games --- src/SplitRoster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 1c71239cd..2a30222c0 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -62,7 +62,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { const int32 winner = winners[0]; --(which_split[winner]); - int32 i = winner; + int32 i = winner + tournament_games; do { i = (i - 1) / 2; play_game(&i, winners, losers, which_split); From b4bbe22d8540d3d053d2d4088bc24e04fa83f814 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 05:41:40 +0100 Subject: [PATCH 55/80] Update SplitRoster.cpp --- src/SplitRoster.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 2a30222c0..5fc07550c 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -216,7 +216,6 @@ NumericVector SplitRoster::score_pairs() { n_scores = n_trees * (n_trees + 1) / 2; ; const cost max_score = BIG; - //score = std::vector (n_scores, 0.0); NumericVector ret(n_scores); for (i = 0; ; ++i) { const int16 i_splits = splits[i].n_splits; From 20b4e2174ef2d7d7218d855e084ef3a68df7867c Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 05:54:33 +0100 Subject: [PATCH 56/80] =?UTF-8?q?pow=20=E2=86=92=20< Date: Thu, 22 Jul 2021 06:29:32 +0100 Subject: [PATCH 57/80] Full bin --- src/SplitList.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 5f2de54f4..9c8a47454 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -89,7 +89,10 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { const bool invert = state[split][last_bin] & splitbit(1); if (invert) { // Rcout << "Last bin tips: " << INLASTBIN(n_tips, BIN_SIZE) << ".\n"; - const splitbit last_mask = (splitbit(1) << INLASTBIN(n_tips, BIN_SIZE)) - 1; + const int16 last_bin_tips = INLASTBIN(n_tips, BIN_SIZE); + const splitbit last_mask = last_bin_tips == sizeof(splitbit) * 8 ? + splitbit(0) - 1 : + (splitbit(1) << INLASTBIN(n_tips, BIN_SIZE)) - 1; // Rcout << "Last mask: " << last_mask << ".\n"; state[split][last_bin] = ~state[split][last_bin] & last_mask; From 8ba32e1227b34762edd9da01411429d8da3c8b4f Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 06:29:40 +0100 Subject: [PATCH 58/80] Update SplitList.cpp --- src/SplitList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 9c8a47454..9fe8ab91b 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -88,8 +88,8 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { } const bool invert = state[split][last_bin] & splitbit(1); if (invert) { - // Rcout << "Last bin tips: " << INLASTBIN(n_tips, BIN_SIZE) << ".\n"; const int16 last_bin_tips = INLASTBIN(n_tips, BIN_SIZE); + // Rcout << "Last bin tips: " << last_bin_tips << ".\n"; const splitbit last_mask = last_bin_tips == sizeof(splitbit) * 8 ? splitbit(0) - 1 : (splitbit(1) << INLASTBIN(n_tips, BIN_SIZE)) - 1; From 91536f97afc1a0535b1366d826b2addd8c3f2dcf Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 06:44:13 +0100 Subject: [PATCH 59/80] index for 0! --- src/SplitRoster.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 5fc07550c..443335fe2 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -57,6 +57,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { in_split[0] = splits[winners[0]].in_split[roster_split[0]]; out_split[0] = n_tips - in_split[0]; roster_hits[0] = 1; + index[roster_tree[0]][roster_split[0]] = 0; for (; ; ) { const int32 winner = winners[0]; From d3482ceaf0ad3cd492b9c99372fb84a8e8bf19ab Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 06:46:07 +0100 Subject: [PATCH 60/80] Debugging comments --- src/SplitRoster.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 443335fe2..fa8e77cd5 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -205,6 +205,13 @@ void SplitRoster::mutual_clustering() { ic_element(A_and_B, nA, nB, n_tips) ) / n_tips) ); + // Rcout << " Splits: \n " + // << SPLIT(ai)[0] <<", " << SPLIT(bi)[0]<<"\n"; + // Rcout << " Elements: \n" + // << " ___|_" << nb << "_|_" << nB << "_|_\n" + // << " " << na << " | " << a_and_b << " | " << a_and_B << " | \n" + // << " " << nA << " | " << A_and_b << " | " << A_and_B << " | " + // << " = " << n_tips << ";\n\n"; } } } From 0ff8fc827d71d0aca987ac2558919f717fbca2a7 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 08:42:09 +0100 Subject: [PATCH 61/80] INBIN not INSUBBIN --- src/SplitList.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 9fe8ab91b..377f23e8e 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -8,7 +8,7 @@ using namespace Rcpp; #define INSUBBIN(bin, offset) splitbit(x(split, ((bin) * input_bins_per_bin) + (offset))) -#define INBIN(in_bin, out_bin) ((INSUBBIN((out_bin), (in_bin))) << (R_BIN_SIZE * (in_bin))) +#define INBIN(r_bin, bin) ((INSUBBIN((bin), (r_bin))) << (R_BIN_SIZE * (r_bin))) SplitList::SplitList(RawMatrix x) { n_splits = x.rows(); @@ -46,7 +46,7 @@ SplitList::SplitList(RawMatrix x) { /*Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" << bin << "], was " << state[split][bin] << "\n";*/ - state[split][bin] += INSUBBIN(bin, input_bin); + state[split][bin] += INBIN(bin, input_bin); } in_split[split] += count_bits(state[split][bin]); } @@ -106,7 +106,7 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { // Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" // << bin << "], was " << state[split][bin] << "\n"; - state[split][bin] += INSUBBIN(bin, input_bin); + state[split][bin] += INBIN(input_bin, bin); } if (invert) { state[split][bin] = ~state[split][bin]; From 49125ae5c817f797bd22a6d475bed2833d979d8f Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 09:50:57 +0100 Subject: [PATCH 62/80] assert(fuzzy_match < lap_dim); --- src/tree_distances.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 9553fdb0f..31e830827 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -513,8 +513,8 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, int16 fuzzy_match = 0; for (int16 bi = 0; bi != b.n_splits; ++bi) { if (!b_match[bi]) { + assert(fuzzy_match < lap_dim); lap_decode[fuzzy_match++] = bi + 1; - } else { } } @@ -525,6 +525,7 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, // Rcout << "a" << (1+i) << " exactly matches b" << a_match[i]<< "\n"; final_matching[i] = a_match[i]; } else { + assert(fuzzy_match < lap_dim); const int16 this_sol = rowsol[fuzzy_match++]; // Rcout << "a"<<(1+i) << " fuzzily matches rowsol[" << this_sol <<"] == " // << rowsol[this_sol] << "; "; From 4d1e955bdb1fbc8681e2576c51ab76853a46e174 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 12:00:16 +0100 Subject: [PATCH 63/80] Update SplitList.cpp --- src/SplitList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index c90bce314..0053d01d7 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -103,7 +103,7 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { // Rcout << "Split " << split << ", bin << " << bin << ".\n"; state[split][bin] = invert ? ~INSUBBIN(bin, 0) : INSUBBIN(bin, 0); for (int16 input_bin = 1; input_bin != input_bins_per_bin; input_bin++) { - // Rcout << "Adding " << (splitbit (x(split, (bin * 2) + input_bin))) << " << " + // Rcout << "Adding " << (splitbit (x(split, (bin * input_bins_per_bin) + input_bin))) << " << " // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" // << bin << "], was " << state[split][bin] << "\n"; state[split][bin] += INBIN(input_bin, bin); From af3a9c9ab8ac1f409b597f8643d83fe8459cf8cc Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 12:01:25 +0100 Subject: [PATCH 64/80] Improve comment --- src/SplitList.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 0053d01d7..aaeb60143 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -103,7 +103,8 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { // Rcout << "Split " << split << ", bin << " << bin << ".\n"; state[split][bin] = invert ? ~INSUBBIN(bin, 0) : INSUBBIN(bin, 0); for (int16 input_bin = 1; input_bin != input_bins_per_bin; input_bin++) { - // Rcout << "Adding " << (splitbit (x(split, (bin * input_bins_per_bin) + input_bin))) << " << " + // Rcout << " Adding " << INBIN(input_bin, bin) << " = " + // << (splitbit (x(split, (bin * input_bins_per_bin) + input_bin))) << " << " // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" // << bin << "], was " << state[split][bin] << "\n"; state[split][bin] += INBIN(input_bin, bin); From 5d5c04fe7730db3851d1246e43cacc2e1af2463b Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 12:06:25 +0100 Subject: [PATCH 65/80] ws --- src/SplitRoster.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index fa8e77cd5..dc5d61353 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -135,6 +135,7 @@ inline void SplitRoster::play_game( inline void SplitRoster::push( const int32 tree, std::unique_ptr &which_split) { + const int16 new_split = which_split[tree]; if (splits_equal(splits[tree].state, new_split, @@ -176,6 +177,7 @@ void SplitRoster::mutual_clustering() { for (int16 bin = n_bins; bin--; ) { a_and_b += count_bits(SPLIT(ai)[bin] & SPLIT(bi)[bin]); } + assert(a_and_b < n_tips - 1); const int16 nb = in_split[bi], @@ -225,6 +227,7 @@ NumericVector SplitRoster::score_pairs() { ; const cost max_score = BIG; NumericVector ret(n_scores); + for (i = 0; ; ++i) { const int16 i_splits = splits[i].n_splits; // Calculate tree's similarity to self From 5adb89a0a6b8cdabb481492b1e54ee06461d2c9f Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 14:10:18 +0100 Subject: [PATCH 66/80] =?UTF-8?q?ai:=E2=86=92=20roster=5Flen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SplitRoster.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index dc5d61353..4f1cc8995 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -18,6 +18,7 @@ SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { n_bins = splits[0].n_bins; const int16 max_splits = n_tips - 3; + roster_tree = std::make_unique(n_trees * max_splits); roster_split = std::make_unique(n_trees * max_splits); in_split = std::make_unique(n_trees * max_splits); @@ -118,6 +119,8 @@ inline void SplitRoster::play_game( } else if (tree2 < 0 || which_split[tree2] < 0) { child1_greater = true; } else { + assert(tree1 >= 0); + assert(tree2 >= 0); child1_greater = game_result( splits[tree1].state, which_split[tree1], splits[tree2].state, which_split[tree2]); @@ -160,7 +163,7 @@ inline void SplitRoster::push( void SplitRoster::mutual_clustering() { const cost max_score = BIG; - for (int16 ai = 0; ai != roster_len - 1; ++ai) { + for (int16 ai = 0; ai != roster_len; ++ai) { const int16 na = in_split[ai], nA = out_split[ai] @@ -169,6 +172,8 @@ void SplitRoster::mutual_clustering() { SCORE(ai, ai) = max_score - cost(max_score * (ic_matching(na, nA, n_tips) / n_tips)); + assert(SCORE(ai, ai) >= 0); + assert(SCORE(ai, ai) <= max_score); for (int16 bi = ai + 1; bi != roster_len; ++bi) { @@ -215,6 +220,8 @@ void SplitRoster::mutual_clustering() { // << " " << nA << " | " << A_and_b << " | " << A_and_B << " | " // << " = " << n_tips << ";\n\n"; } + assert(SCORE(ai, bi) >= 0); + assert(SCORE(ai, bi) <= max_score); } } } From ec0d493549693e63fe4339b194bb91bfbd8d7ca4 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 14:18:37 +0100 Subject: [PATCH 67/80] Drop TR1 reference --- src/SplitRoster.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index e1401c17d..1645be2ee 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -1,7 +1,7 @@ #ifndef _TREEDIST_SPLITROSTER_HPP #define _TREEDIST_SPLITROSTER_HPP -#include +#include #include #include #include @@ -22,7 +22,7 @@ class SplitRoster { std::unique_ptr in_split; std::unique_ptr out_split; std::unique_ptr roster_hits; - std::vector > index; + std::vector > index; bool splits_equal( const splitbit (&a)[MAX_SPLITS][MAX_BINS], const int16 split_a, From f38abdcec282269b7fda8471391b75a0a3b27436 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 14:56:34 +0100 Subject: [PATCH 68/80] =?UTF-8?q?.hpp=E2=86=92.h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Using .hpp is not guaranteed to be portable." --- src/{SplitList.hpp => SplitList.h} | 4 +- src/SplitRoster.hpp | 6 +- src/day_1985.cpp | 6 +- src/{information.hpp => information.h} | 4 +- src/{ints.hpp => ints.h} | 0 src/lap.cpp | 2 +- src/mast.cpp | 2 +- src/nni_distance.cpp | 4 +- src/timsort.h | 811 +++++++++++++++++++ src/tree_distance_functions.cpp | 4 +- src/tree_distances.cpp | 8 +- src/{tree_distances.hpp => tree_distances.h} | 4 +- 12 files changed, 833 insertions(+), 22 deletions(-) rename src/{SplitList.hpp => SplitList.h} (91%) rename src/{information.hpp => information.h} (97%) rename src/{ints.hpp => ints.h} (100%) create mode 100644 src/timsort.h rename src/{tree_distances.hpp => tree_distances.h} (99%) diff --git a/src/SplitList.hpp b/src/SplitList.h similarity index 91% rename from src/SplitList.hpp rename to src/SplitList.h index 248c275d9..42fb7a6a9 100644 --- a/src/SplitList.hpp +++ b/src/SplitList.h @@ -3,8 +3,8 @@ #include #include -#include "ints.hpp" -#include "tree_distances.hpp" +#include "ints.h" +#include "tree_distances.h" using namespace Rcpp; diff --git a/src/SplitRoster.hpp b/src/SplitRoster.hpp index 1645be2ee..1bf3106e8 100644 --- a/src/SplitRoster.hpp +++ b/src/SplitRoster.hpp @@ -5,9 +5,9 @@ #include #include #include -#include "ints.hpp" -#include "tree_distances.hpp" -#include "SplitList.hpp" +#include "ints.h" +#include "tree_distances.h" +#include "SplitList.h" using namespace Rcpp; diff --git a/src/day_1985.cpp b/src/day_1985.cpp index a2f5bd299..90999bd03 100644 --- a/src/day_1985.cpp +++ b/src/day_1985.cpp @@ -6,9 +6,9 @@ using namespace Rcpp; #include /* for bitset */ #include /* for log2() */ #include /* for unique_ptr, make_unique */ -#include "tree_distances.hpp" -#include "SplitList.hpp" -#include "information.hpp" +#include "tree_distances.h" +#include "SplitList.h" +#include "information.h" const int16 INF = INTX_MAX, diff --git a/src/information.hpp b/src/information.h similarity index 97% rename from src/information.hpp rename to src/information.h index 5993e4a7b..4bda1bdb6 100644 --- a/src/information.hpp +++ b/src/information.h @@ -2,7 +2,7 @@ #define _TREEDIST_INFO_HPP #include /* for log2() */ -#include "ints.hpp" /* for int16 */ +#include "ints.h" /* for int16 */ const int_fast32_t DAY_MAX_LEAVES = 16383, @@ -72,4 +72,4 @@ inline double split_clust_info (const int16 n_in, const int16 *n_tip, ); } -#endif \ No newline at end of file +#endif diff --git a/src/ints.hpp b/src/ints.h similarity index 100% rename from src/ints.hpp rename to src/ints.h diff --git a/src/lap.cpp b/src/lap.cpp index f292a1b36..65a3ab970 100644 --- a/src/lap.cpp +++ b/src/lap.cpp @@ -24,7 +24,7 @@ https://www.mathworks.com/matlabcentral/fileexchange/26836-lapjv-jonker-volgenant-algorithm-for-linear-assignment-problem-v3-0: * *************************************************************************/ -#include "tree_distances.hpp" +#include "tree_distances.h" using namespace std; diff --git a/src/mast.cpp b/src/mast.cpp index 1cba125f2..15bb6e938 100644 --- a/src/mast.cpp +++ b/src/mast.cpp @@ -1,6 +1,6 @@ #include #include -#include "ints.hpp" +#include "ints.h" using namespace Rcpp; const int16 MAST_MAX_NODE = 4095, /* Increasing --> overflow size of stack frame */ diff --git a/src/nni_distance.cpp b/src/nni_distance.cpp index f94ab25a4..05fcd4a8a 100644 --- a/src/nni_distance.cpp +++ b/src/nni_distance.cpp @@ -1,7 +1,7 @@ #include #include -#include "tree_distances.hpp" -#include "SplitList.hpp" +#include "tree_distances.h" +#include "SplitList.h" using namespace Rcpp; diff --git a/src/timsort.h b/src/timsort.h new file mode 100644 index 000000000..c8cf438dc --- /dev/null +++ b/src/timsort.h @@ -0,0 +1,811 @@ +/* + * C++ implementation of timsort + * + * ported from Python's and OpenJDK's: + * - http://svn.python.org/projects/python/trunk/Objects/listobject.c + * - http://cr.openjdk.java.net/~martin/webrevs/openjdk7/timsort/raw_files/new/src/share/classes/java/util/TimSort.java + * + * Copyright (c) 2011 Fuji, Goro (gfx) . + * Copyright (c) 2019-2021 Morwenn. + * Copyright (c) 2021 Igor Kushnir . + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef GFX_TIMSORT_HPP +#define GFX_TIMSORT_HPP + +#include +#include +#include +#include +#include + +// Semantic versioning macros + +#define GFX_TIMSORT_VERSION_MAJOR 2 +#define GFX_TIMSORT_VERSION_MINOR 1 +#define GFX_TIMSORT_VERSION_PATCH 0 + +// Diagnostic selection macros + +#if defined(GFX_TIMSORT_ENABLE_ASSERT) || defined(GFX_TIMSORT_ENABLE_AUDIT) +# include +#endif + +#ifdef GFX_TIMSORT_ENABLE_ASSERT +# define GFX_TIMSORT_ASSERT(expr) assert(expr) +#else +# define GFX_TIMSORT_ASSERT(expr) ((void)0) +#endif + +#ifdef GFX_TIMSORT_ENABLE_AUDIT +# define GFX_TIMSORT_AUDIT(expr) assert(expr) +#else +# define GFX_TIMSORT_AUDIT(expr) ((void)0) +#endif + +#ifdef GFX_TIMSORT_ENABLE_LOG +# include +# define GFX_TIMSORT_LOG(expr) (std::clog << "# " << __func__ << ": " << expr << std::endl) +#else +# define GFX_TIMSORT_LOG(expr) ((void)0) +#endif + + +namespace gfx { + +// --------------------------------------- +// Implementation details +// --------------------------------------- + +namespace detail { + +// Equivalent to C++20 std::identity +struct identity { + template + constexpr T&& operator()(T&& value) const noexcept + { + return std::forward(value); + } +}; + +// Merge a predicate and a projection function +template +struct projection_compare { + projection_compare(Compare comp, Projection proj) : compare(comp), projection(proj) { + } + + template + bool operator()(T &&lhs, U &&rhs) { +#ifdef __cpp_lib_invoke + return static_cast(std::invoke(compare, + std::invoke(projection, std::forward(lhs)), + std::invoke(projection, std::forward(rhs)) + )); +#else + return static_cast(compare( + projection(std::forward(lhs)), + projection(std::forward(rhs)) + )); +#endif + } + + Compare compare; + Projection projection; +}; + +template struct run { + typedef typename std::iterator_traits::difference_type diff_t; + + Iterator base; + diff_t len; + + run(Iterator b, diff_t l) : base(b), len(l) { + } +}; + +template class TimSort { + typedef RandomAccessIterator iter_t; + typedef typename std::iterator_traits::value_type value_t; + typedef typename std::iterator_traits::reference ref_t; + typedef typename std::iterator_traits::difference_type diff_t; + + static const int MIN_MERGE = 32; + static const int MIN_GALLOP = 7; + + int minGallop_; // default to MIN_GALLOP + + std::vector tmp_; // temp storage for merges + typedef typename std::vector::iterator tmp_iter_t; + + std::vector > pending_; + + static void binarySort(iter_t const lo, iter_t const hi, iter_t start, Compare compare) { + GFX_TIMSORT_ASSERT(lo <= start); + GFX_TIMSORT_ASSERT(start <= hi); + if (start == lo) { + ++start; + } + for (; start < hi; ++start) { + GFX_TIMSORT_ASSERT(lo <= start); + value_t pivot = std::move(*start); + + iter_t const pos = std::upper_bound(lo, start, pivot, compare); + for (iter_t p = start; p > pos; --p) { + *p = std::move(*(p - 1)); + } + *pos = std::move(pivot); + } + } + + static diff_t countRunAndMakeAscending(iter_t const lo, iter_t const hi, Compare compare) { + GFX_TIMSORT_ASSERT(lo < hi); + + iter_t runHi = lo + 1; + if (runHi == hi) { + return 1; + } + + if (compare(*runHi, *lo)) { // decreasing + do { + ++runHi; + } while (runHi < hi && compare(*runHi, *(runHi - 1))); + std::reverse(lo, runHi); + } else { // non-decreasing + do { + ++runHi; + } while (runHi < hi && !compare(*runHi, *(runHi - 1))); + } + + return runHi - lo; + } + + static diff_t minRunLength(diff_t n) { + GFX_TIMSORT_ASSERT(n >= 0); + + diff_t r = 0; + while (n >= 2 * MIN_MERGE) { + r |= (n & 1); + n >>= 1; + } + return n + r; + } + + TimSort() : minGallop_(MIN_GALLOP) { + } + + // Silence GCC -Winline warning + ~TimSort() {} + + void pushRun(iter_t const runBase, diff_t const runLen) { + pending_.push_back(run(runBase, runLen)); + } + + void mergeCollapse(Compare compare) { + while (pending_.size() > 1) { + diff_t n = pending_.size() - 2; + + if ((n > 0 && pending_[n - 1].len <= pending_[n].len + pending_[n + 1].len) || + (n > 1 && pending_[n - 2].len <= pending_[n - 1].len + pending_[n].len)) { + if (pending_[n - 1].len < pending_[n + 1].len) { + --n; + } + mergeAt(n, compare); + } else if (pending_[n].len <= pending_[n + 1].len) { + mergeAt(n, compare); + } else { + break; + } + } + } + + void mergeForceCollapse(Compare compare) { + while (pending_.size() > 1) { + diff_t n = pending_.size() - 2; + + if (n > 0 && pending_[n - 1].len < pending_[n + 1].len) { + --n; + } + mergeAt(n, compare); + } + } + + void mergeAt(diff_t const i, Compare compare) { + diff_t const stackSize = pending_.size(); + GFX_TIMSORT_ASSERT(stackSize >= 2); + GFX_TIMSORT_ASSERT(i >= 0); + GFX_TIMSORT_ASSERT(i == stackSize - 2 || i == stackSize - 3); + + iter_t base1 = pending_[i].base; + diff_t len1 = pending_[i].len; + iter_t base2 = pending_[i + 1].base; + diff_t len2 = pending_[i + 1].len; + + pending_[i].len = len1 + len2; + + if (i == stackSize - 3) { + pending_[i + 1] = pending_[i + 2]; + } + + pending_.pop_back(); + + mergeConsecutiveRuns(base1, len1, base2, len2, std::move(compare)); + } + + void mergeConsecutiveRuns(iter_t base1, diff_t len1, iter_t base2, diff_t len2, Compare compare) { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 0); + GFX_TIMSORT_ASSERT(base1 + len1 == base2); + + diff_t const k = gallopRight(*base2, base1, len1, 0, compare); + GFX_TIMSORT_ASSERT(k >= 0); + + base1 += k; + len1 -= k; + + if (len1 == 0) { + return; + } + + len2 = gallopLeft(*(base1 + (len1 - 1)), base2, len2, len2 - 1, compare); + GFX_TIMSORT_ASSERT(len2 >= 0); + if (len2 == 0) { + return; + } + + if (len1 <= len2) { + mergeLo(base1, len1, base2, len2, compare); + } else { + mergeHi(base1, len1, base2, len2, compare); + } + } + + template + static diff_t gallopLeft(ref_t key, Iter const base, diff_t const len, + diff_t const hint, Compare compare) { + GFX_TIMSORT_ASSERT(len > 0); + GFX_TIMSORT_ASSERT(hint >= 0); + GFX_TIMSORT_ASSERT(hint < len); + + diff_t lastOfs = 0; + diff_t ofs = 1; + + if (compare(*(base + hint), key)) { + diff_t const maxOfs = len - hint; + while (ofs < maxOfs && compare(*(base + (hint + ofs)), key)) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + + if (ofs <= 0) { // int overflow + ofs = maxOfs; + } + } + if (ofs > maxOfs) { + ofs = maxOfs; + } + + lastOfs += hint; + ofs += hint; + } else { + diff_t const maxOfs = hint + 1; + while (ofs < maxOfs && !compare(*(base + (hint - ofs)), key)) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + + if (ofs <= 0) { + ofs = maxOfs; + } + } + if (ofs > maxOfs) { + ofs = maxOfs; + } + + diff_t const tmp = lastOfs; + lastOfs = hint - ofs; + ofs = hint - tmp; + } + GFX_TIMSORT_ASSERT(-1 <= lastOfs); + GFX_TIMSORT_ASSERT(lastOfs < ofs); + GFX_TIMSORT_ASSERT(ofs <= len); + + return std::lower_bound(base + (lastOfs + 1), base + ofs, key, compare) - base; + } + + template + static diff_t gallopRight(ref_t key, Iter const base, diff_t const len, + diff_t const hint, Compare compare) { + GFX_TIMSORT_ASSERT(len > 0); + GFX_TIMSORT_ASSERT(hint >= 0); + GFX_TIMSORT_ASSERT(hint < len); + + diff_t ofs = 1; + diff_t lastOfs = 0; + + if (compare(key, *(base + hint))) { + diff_t const maxOfs = hint + 1; + while (ofs < maxOfs && compare(key, *(base + (hint - ofs)))) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + + if (ofs <= 0) { + ofs = maxOfs; + } + } + if (ofs > maxOfs) { + ofs = maxOfs; + } + + diff_t const tmp = lastOfs; + lastOfs = hint - ofs; + ofs = hint - tmp; + } else { + diff_t const maxOfs = len - hint; + while (ofs < maxOfs && !compare(key, *(base + (hint + ofs)))) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + + if (ofs <= 0) { // int overflow + ofs = maxOfs; + } + } + if (ofs > maxOfs) { + ofs = maxOfs; + } + + lastOfs += hint; + ofs += hint; + } + GFX_TIMSORT_ASSERT(-1 <= lastOfs); + GFX_TIMSORT_ASSERT(lastOfs < ofs); + GFX_TIMSORT_ASSERT(ofs <= len); + + return std::upper_bound(base + (lastOfs + 1), base + ofs, key, compare) - base; + } + + static void rotateLeft(iter_t first, iter_t last) + { + value_t tmp = std::move(*first); + iter_t last_1 = std::move(first + 1, last, first); + *last_1 = std::move(tmp); + } + + static void rotateRight(iter_t first, iter_t last) + { + iter_t last_1 = last - 1; + value_t tmp = std::move(*last_1); + std::move_backward(first, last_1, last); + *first = std::move(tmp); + } + + + void mergeLo(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2, Compare compare) { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 0); + GFX_TIMSORT_ASSERT(base1 + len1 == base2); + + if (len1 == 1) { + return rotateLeft(base1, base2 + len2); + } + if (len2 == 1) { + return rotateRight(base1, base2 + len2); + } + + copy_to_tmp(base1, len1); + + tmp_iter_t cursor1 = tmp_.begin(); + iter_t cursor2 = base2; + iter_t dest = base1; + + *dest = std::move(*cursor2); + ++cursor2; + ++dest; + --len2; + + int minGallop(minGallop_); + + // outer: + while (true) { + diff_t count1 = 0; + diff_t count2 = 0; + + do { + GFX_TIMSORT_ASSERT(len1 > 1); + GFX_TIMSORT_ASSERT(len2 > 0); + + if (compare(*cursor2, *cursor1)) { + *dest = std::move(*cursor2); + ++cursor2; + ++dest; + ++count2; + count1 = 0; + if (--len2 == 0) { + goto epilogue; + } + } else { + *dest = std::move(*cursor1); + ++cursor1; + ++dest; + ++count1; + count2 = 0; + if (--len1 == 1) { + goto epilogue; + } + } + } while ((count1 | count2) < minGallop); + + do { + GFX_TIMSORT_ASSERT(len1 > 1); + GFX_TIMSORT_ASSERT(len2 > 0); + + count1 = gallopRight(*cursor2, cursor1, len1, 0, compare); + if (count1 != 0) { + std::move_backward(cursor1, cursor1 + count1, dest + count1); + dest += count1; + cursor1 += count1; + len1 -= count1; + + if (len1 <= 1) { + goto epilogue; + } + } + *dest = std::move(*cursor2); + ++cursor2; + ++dest; + if (--len2 == 0) { + goto epilogue; + } + + count2 = gallopLeft(*cursor1, cursor2, len2, 0, compare); + if (count2 != 0) { + std::move(cursor2, cursor2 + count2, dest); + dest += count2; + cursor2 += count2; + len2 -= count2; + if (len2 == 0) { + goto epilogue; + } + } + *dest = std::move(*cursor1); + ++cursor1; + ++dest; + if (--len1 == 1) { + goto epilogue; + } + + --minGallop; + } while ((count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP)); + + if (minGallop < 0) { + minGallop = 0; + } + minGallop += 2; + } // end of "outer" loop + + epilogue: // merge what is left from either cursor1 or cursor2 + + minGallop_ = (std::min)(minGallop, 1); + + if (len1 == 1) { + GFX_TIMSORT_ASSERT(len2 > 0); + std::move(cursor2, cursor2 + len2, dest); + *(dest + len2) = std::move(*cursor1); + } else { + GFX_TIMSORT_ASSERT(len1 != 0 && "Comparison function violates its general contract"); + GFX_TIMSORT_ASSERT(len2 == 0); + GFX_TIMSORT_ASSERT(len1 > 1); + std::move(cursor1, cursor1 + len1, dest); + } + } + + void mergeHi(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2, Compare compare) { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 0); + GFX_TIMSORT_ASSERT(base1 + len1 == base2); + + if (len1 == 1) { + return rotateLeft(base1, base2 + len2); + } + if (len2 == 1) { + return rotateRight(base1, base2 + len2); + } + + copy_to_tmp(base2, len2); + + iter_t cursor1 = base1 + len1; + tmp_iter_t cursor2 = tmp_.begin() + (len2 - 1); + iter_t dest = base2 + (len2 - 1); + + *dest = std::move(*(--cursor1)); + --dest; + --len1; + + int minGallop(minGallop_); + + // outer: + while (true) { + diff_t count1 = 0; + diff_t count2 = 0; + + // The next loop is a hot path of the algorithm, so we decrement + // eagerly the cursor so that it always points directly to the value + // to compare, but we have to implement some trickier logic to make + // sure that it points to the next value again by the end of said loop + --cursor1; + + do { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 1); + + if (compare(*cursor2, *cursor1)) { + *dest = std::move(*cursor1); + --dest; + ++count1; + count2 = 0; + if (--len1 == 0) { + goto epilogue; + } + --cursor1; + } else { + *dest = std::move(*cursor2); + --cursor2; + --dest; + ++count2; + count1 = 0; + if (--len2 == 1) { + ++cursor1; // See comment before the loop + goto epilogue; + } + } + } while ((count1 | count2) < minGallop); + ++cursor1; // See comment before the loop + + do { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 1); + + count1 = len1 - gallopRight(*cursor2, base1, len1, len1 - 1, compare); + if (count1 != 0) { + dest -= count1; + cursor1 -= count1; + len1 -= count1; + std::move_backward(cursor1, cursor1 + count1, dest + (1 + count1)); + + if (len1 == 0) { + goto epilogue; + } + } + *dest = std::move(*cursor2); + --cursor2; + --dest; + if (--len2 == 1) { + goto epilogue; + } + + count2 = len2 - gallopLeft(*(cursor1 - 1), tmp_.begin(), len2, len2 - 1, compare); + if (count2 != 0) { + dest -= count2; + cursor2 -= count2; + len2 -= count2; + std::move(cursor2 + 1, cursor2 + (1 + count2), dest + 1); + if (len2 <= 1) { + goto epilogue; + } + } + *dest = std::move(*(--cursor1)); + --dest; + if (--len1 == 0) { + goto epilogue; + } + + --minGallop; + } while ((count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP)); + + if (minGallop < 0) { + minGallop = 0; + } + minGallop += 2; + } // end of "outer" loop + + epilogue: // merge what is left from either cursor1 or cursor2 + + minGallop_ = (std::min)(minGallop, 1); + + if (len2 == 1) { + GFX_TIMSORT_ASSERT(len1 > 0); + dest -= len1; + std::move_backward(cursor1 - len1, cursor1, dest + (1 + len1)); + *dest = std::move(*cursor2); + } else { + GFX_TIMSORT_ASSERT(len2 != 0 && "Comparison function violates its general contract"); + GFX_TIMSORT_ASSERT(len1 == 0); + GFX_TIMSORT_ASSERT(len2 > 1); + std::move(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1)); + } + } + + void copy_to_tmp(iter_t const begin, diff_t len) { + tmp_.assign(std::make_move_iterator(begin), + std::make_move_iterator(begin + len)); + } + +public: + + static void merge(iter_t const lo, iter_t const mid, iter_t const hi, Compare compare) { + GFX_TIMSORT_ASSERT(lo <= mid); + GFX_TIMSORT_ASSERT(mid <= hi); + + if (lo == mid || mid == hi) { + return; // nothing to do + } + + TimSort ts; + ts.mergeConsecutiveRuns(lo, mid - lo, mid, hi - mid, std::move(compare)); + + GFX_TIMSORT_LOG("1st size: " << (mid - lo) << "; 2nd size: " << (hi - mid) + << "; tmp_.size(): " << ts.tmp_.size()); + } + + static void sort(iter_t const lo, iter_t const hi, Compare compare) { + GFX_TIMSORT_ASSERT(lo <= hi); + + diff_t nRemaining = (hi - lo); + if (nRemaining < 2) { + return; // nothing to do + } + + if (nRemaining < MIN_MERGE) { + diff_t const initRunLen = countRunAndMakeAscending(lo, hi, compare); + GFX_TIMSORT_LOG("initRunLen: " << initRunLen); + binarySort(lo, hi, lo + initRunLen, compare); + return; + } + + TimSort ts; + diff_t const minRun = minRunLength(nRemaining); + iter_t cur = lo; + do { + diff_t runLen = countRunAndMakeAscending(cur, hi, compare); + + if (runLen < minRun) { + diff_t const force = (std::min)(nRemaining, minRun); + binarySort(cur, cur + force, cur + runLen, compare); + runLen = force; + } + + ts.pushRun(cur, runLen); + ts.mergeCollapse(compare); + + cur += runLen; + nRemaining -= runLen; + } while (nRemaining != 0); + + GFX_TIMSORT_ASSERT(cur == hi); + ts.mergeForceCollapse(compare); + GFX_TIMSORT_ASSERT(ts.pending_.size() == 1); + + GFX_TIMSORT_LOG("size: " << (hi - lo) << " tmp_.size(): " << ts.tmp_.size() + << " pending_.size(): " << ts.pending_.size()); + } +}; + +} // namespace detail + + +// --------------------------------------- +// Public interface implementation +// --------------------------------------- + +/** + * Stably merges two consecutive sorted ranges [first, middle) and [middle, last) into one + * sorted range [first, last) with a comparison function and a projection function. + */ +template +void timmerge(RandomAccessIterator first, RandomAccessIterator middle, + RandomAccessIterator last, Compare compare, Projection projection) { + typedef detail::projection_compare compare_t; + compare_t comp(std::move(compare), std::move(projection)); + GFX_TIMSORT_AUDIT(std::is_sorted(first, middle, comp) && "Precondition"); + GFX_TIMSORT_AUDIT(std::is_sorted(middle, last, comp) && "Precondition"); + detail::TimSort::merge(first, middle, last, comp); + GFX_TIMSORT_AUDIT(std::is_sorted(first, last, comp) && "Postcondition"); +} + +/** + * Same as std::inplace_merge(first, middle, last, compare). + */ +template +void timmerge(RandomAccessIterator first, RandomAccessIterator middle, + RandomAccessIterator last, Compare compare) { + gfx::timmerge(first, middle, last, compare, detail::identity()); +} + +/** + * Same as std::inplace_merge(first, middle, last). + */ +template +void timmerge(RandomAccessIterator first, RandomAccessIterator middle, + RandomAccessIterator last) { + typedef typename std::iterator_traits::value_type value_type; + gfx::timmerge(first, middle, last, std::less(), detail::identity()); +} + +/** + * Stably sorts a range with a comparison function and a projection function. + */ +template +void timsort(RandomAccessIterator const first, RandomAccessIterator const last, + Compare compare, Projection projection) { + typedef detail::projection_compare compare_t; + compare_t comp(std::move(compare), std::move(projection)); + detail::TimSort::sort(first, last, comp); + GFX_TIMSORT_AUDIT(std::is_sorted(first, last, comp) && "Postcondition"); +} + +/** + * Same as std::stable_sort(first, last, compare). + */ +template +void timsort(RandomAccessIterator const first, RandomAccessIterator const last, Compare compare) { + gfx::timsort(first, last, compare, detail::identity()); +} + +/** + * Same as std::stable_sort(first, last). + */ +template +void timsort(RandomAccessIterator const first, RandomAccessIterator const last) { + typedef typename std::iterator_traits::value_type value_type; + gfx::timsort(first, last, std::less(), detail::identity()); +} + +/** + * Stably sorts a range with a comparison function and a projection function. + */ +template +void timsort(RandomAccessRange &range, Compare compare, Projection projection) { + gfx::timsort(std::begin(range), std::end(range), compare, projection); +} + +/** + * Same as std::stable_sort(std::begin(range), std::end(range), compare). + */ +template +void timsort(RandomAccessRange &range, Compare compare) { + gfx::timsort(std::begin(range), std::end(range), compare); +} + +/** + * Same as std::stable_sort(std::begin(range), std::end(range)). + */ +template +void timsort(RandomAccessRange &range) { + gfx::timsort(std::begin(range), std::end(range)); +} + +} // namespace gfx + +#undef GFX_TIMSORT_ENABLE_ASSERT +#undef GFX_TIMSORT_ASSERT +#undef GFX_TIMSORT_ENABLE_AUDIT +#undef GFX_TIMSORT_AUDIT +#undef GFX_TIMSORT_ENABLE_LOG +#undef GFX_TIMSORT_LOG + +#endif // GFX_TIMSORT_HPP \ No newline at end of file diff --git a/src/tree_distance_functions.cpp b/src/tree_distance_functions.cpp index d123de02e..edc05f1db 100644 --- a/src/tree_distance_functions.cpp +++ b/src/tree_distance_functions.cpp @@ -1,8 +1,8 @@ #include using namespace Rcpp; #include /* for log2() */ -#include "tree_distances.hpp" -#include "SplitList.hpp" +#include "tree_distances.h" +#include "SplitList.h" uint_fast32_t bitcounts[65536]; // the bytes representing bit count of each number 0-65535 diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 31e830827..83be425d5 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -1,9 +1,9 @@ #include #include /* for unique_ptr, make_unique */ #include -#include "tree_distances.hpp" -#include "SplitList.hpp" -#include "SplitRoster.hpp" +#include "tree_distances.h" +#include "SplitList.h" +#include "SplitRoster.h" using namespace Rcpp; @@ -643,4 +643,4 @@ List cpp_shared_phylo (const RawMatrix x, const RawMatrix y, _["matching"] = final_matching); } - \ No newline at end of file + diff --git a/src/tree_distances.hpp b/src/tree_distances.h similarity index 99% rename from src/tree_distances.hpp rename to src/tree_distances.h index 7aba769ee..0f575c734 100644 --- a/src/tree_distances.hpp +++ b/src/tree_distances.h @@ -3,7 +3,7 @@ #include #include -#include "ints.hpp" +#include "ints.h" using namespace Rcpp; @@ -81,4 +81,4 @@ extern double extern List cpp_robinson_foulds_distance (RawMatrix x, RawMatrix y, IntegerVector nTip); -#endif \ No newline at end of file +#endif From 9649ec25d25eac7e2f025a1ba83e221c8d02a65f Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:04:12 +0100 Subject: [PATCH 69/80] =?UTF-8?q?.hpp=E2=86=92.h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/{SplitRoster.hpp => SplitRoster.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{SplitRoster.hpp => SplitRoster.h} (100%) diff --git a/src/SplitRoster.hpp b/src/SplitRoster.h similarity index 100% rename from src/SplitRoster.hpp rename to src/SplitRoster.h From a44e2059698989e07645a4136daa2a0f278d2c89 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:05:23 +0100 Subject: [PATCH 70/80] =?UTF-8?q?.hpp=E2=86=92.h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SplitRoster.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 4f1cc8995..7fe84e5fa 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -1,8 +1,8 @@ #include -#include "ints.hpp" -#include "tree_distances.hpp" -#include "SplitList.hpp" -#include "SplitRoster.hpp" +#include "ints.h" +#include "tree_distances.h" +#include "SplitList.h" +#include "SplitRoster.h" using namespace Rcpp; SplitRoster::SplitRoster(const List x, const IntegerVector nTip) { From 9b5f61ff664777e73076859bedad92bd6b287967 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:05:34 +0100 Subject: [PATCH 71/80] =?UTF-8?q?.hpp=E2=86=92.h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SplitList.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index aaeb60143..00760a120 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -1,8 +1,8 @@ #include using namespace Rcpp; #include -#include "tree_distances.hpp" // Before SplitList.h, for int16 -#include "SplitList.hpp" +#include "tree_distances.h" // Before SplitList.h, for int16 +#include "SplitList.h" #define INLASTBIN(n, size) ((size) - ((size) - ((n) % (size))) % (size)) From a6edeb41d6fc3732ae48148032a7578dafbac4b0 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:06:29 +0100 Subject: [PATCH 72/80] Don't calculate self-similarity --- R/tree_distance_info.R | 7 +++---- src/SplitRoster.cpp | 14 ++++++-------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R index b25cefffa..de9b25013 100644 --- a/R/tree_distance_info.R +++ b/R/tree_distance_info.R @@ -299,10 +299,9 @@ MutualClusteringInfo <- function (tree1, tree2 = NULL, normalize = FALSE, reportMatching = FALSE, diag = TRUE) { unnormalized <- if (is.null(tree2) && is.list(tree1) && !inherits(tree1, 'phylo')) { - mtx <- matrix(0, length(tree1), length(tree1)) - mtx[!upper.tri(mtx)] <- cpp_all_pairs_mci(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])) - mtx[upper.tri(mtx)] <- t(mtx)[upper.tri(mtx)] - mtx + structure(cpp_all_pairs_mci(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])), + class = "Dist", + Size = length(tree1), diag = FALSE, upper = FALSE) } else { CalculateTreeDistance(MutualClusteringInfoSplits, tree1, tree2, reportMatching) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 7fe84e5fa..a5c665acd 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -230,20 +230,18 @@ NumericVector SplitRoster::score_pairs() { int32 i = 0, entry = 0, - n_scores = n_trees * (n_trees + 1) / 2; + n_scores = n_trees * (n_trees - 1) / 2; ; const cost max_score = BIG; NumericVector ret(n_scores); - for (i = 0; ; ++i) { + for (i = 0; i != n_trees - 1; ++i) { const int16 i_splits = splits[i].n_splits; // Calculate tree's similarity to self - for (int16 sp = 0; sp != i_splits; ++sp) { - const int32 sp_i = index[i][sp]; - ret[entry] += SCORE(sp_i, sp_i); - } - - if (i == n_trees - 1) break; + // for (int16 sp = 0; sp != i_splits; ++sp) { + // const int32 sp_i = index[i][sp]; + // ret[entry] += SCORE(sp_i, sp_i); + // } entry++; for (int32 j = i + 1; j != n_trees; ++j) { const int16 From 0ba89d782ed3cd926be09cbca70637d526a2b75b Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:07:54 +0100 Subject: [PATCH 73/80] spell 'dist' --- R/tree_distance_info.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R index de9b25013..5ca371ab5 100644 --- a/R/tree_distance_info.R +++ b/R/tree_distance_info.R @@ -300,7 +300,7 @@ MutualClusteringInfo <- function (tree1, tree2 = NULL, normalize = FALSE, unnormalized <- if (is.null(tree2) && is.list(tree1) && !inherits(tree1, 'phylo')) { structure(cpp_all_pairs_mci(as.Splits(tree1, tree1[[1]]), NTip(tree1[[1]])), - class = "Dist", + class = "dist", Size = length(tree1), diag = FALSE, upper = FALSE) } else { CalculateTreeDistance(MutualClusteringInfoSplits, tree1, tree2, From c1ee96400ea55d039183589c29e38395f43c2bac Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:10:55 +0100 Subject: [PATCH 74/80] // entry++ --- src/SplitRoster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index a5c665acd..9dc894ce8 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -242,7 +242,7 @@ NumericVector SplitRoster::score_pairs() { // const int32 sp_i = index[i][sp]; // ret[entry] += SCORE(sp_i, sp_i); // } - entry++; + // entry++; for (int32 j = i + 1; j != n_trees; ++j) { const int16 j_splits = splits[j].n_splits, From 2437e8d0d55e4040e122867f467fdd7a2a95ad2f Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:11:29 +0100 Subject: [PATCH 75/80] Populate spare j-splits --- src/SplitRoster.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index 9dc894ce8..a17e63e20 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -270,6 +270,11 @@ NumericVector SplitRoster::score_pairs() { lap_score[sp_i][sp_j] = max_score; } } + for (int16 sp_j = j_splits; sp_j < most_splits; ++sp_j) { + for (int16 sp_i = 0; sp_i != most_splits; ++sp_i) { + lap_score[sp_i][sp_j] = max_score; + } + } ret[entry] = double( (max_score * most_splits) - From aed65a4434e8d6aa39026d9425a6aa166b6f17ce Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:38:07 +0100 Subject: [PATCH 76/80] spacing --- src/SplitList.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index 00760a120..a6ecdd3a2 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -86,6 +86,7 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { state[split][last_bin] += INBIN(input_bin, last_bin); // Rcout << " by " << INBIN(input_bin, last_bin) << "\n"; } + const bool invert = state[split][last_bin] & splitbit(1); if (invert) { const int16 last_bin_tips = INLASTBIN(n_tips, BIN_SIZE); @@ -97,9 +98,11 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { state[split][last_bin] = ~state[split][last_bin] & last_mask; } + in_split[split] = count_bits(state[split][last_bin]); for (int16 bin = 0; bin != n_bins - 1; bin++) { + // Rcout << "Split " << split << ", bin << " << bin << ".\n"; state[split][bin] = invert ? ~INSUBBIN(bin, 0) : INSUBBIN(bin, 0); for (int16 input_bin = 1; input_bin != input_bins_per_bin; input_bin++) { @@ -107,15 +110,19 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { // << (splitbit (x(split, (bin * input_bins_per_bin) + input_bin))) << " << " // << (R_BIN_SIZE * input_bin) << " to state [" << split << "][" // << bin << "], was " << state[split][bin] << "\n"; + state[split][bin] += INBIN(input_bin, bin); } + if (invert) { state[split][bin] = ~state[split][bin]; } + in_split[split] += count_bits(state[split][bin]); } } } + void SplitList::swap(int16 a, int16 b) { for (int16 bin = n_bins; bin--; ) { const splitbit tmp = state[a][bin]; From 201dc8e37a89ddba1ec4aeebfa37d6552fa2e592 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 15:38:15 +0100 Subject: [PATCH 77/80] don't invert --- src/SplitList.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/SplitList.cpp b/src/SplitList.cpp index a6ecdd3a2..f672723cc 100644 --- a/src/SplitList.cpp +++ b/src/SplitList.cpp @@ -104,7 +104,8 @@ SplitList::SplitList(RawMatrix x, int16 n_tips) { for (int16 bin = 0; bin != n_bins - 1; bin++) { // Rcout << "Split " << split << ", bin << " << bin << ".\n"; - state[split][bin] = invert ? ~INSUBBIN(bin, 0) : INSUBBIN(bin, 0); + state[split][bin] = INSUBBIN(bin, 0); + for (int16 input_bin = 1; input_bin != input_bins_per_bin; input_bin++) { // Rcout << " Adding " << INBIN(input_bin, bin) << " = " // << (splitbit (x(split, (bin * input_bins_per_bin) + input_bin))) << " << " From c23662d0790929a3229659b772a9438b851c4218 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 16:01:23 +0100 Subject: [PATCH 78/80] =?UTF-8?q?consts=20=E2=86=92=20#defines?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tree_distances.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tree_distances.h b/src/tree_distances.h index 0f575c734..4210d7a97 100644 --- a/src/tree_distances.h +++ b/src/tree_distances.h @@ -17,18 +17,18 @@ typedef int16 lap_col; /*************** CONSTANTS *******************/ -const int16 BIN_SIZE = 64, - MAX_BINS = 32, - MAX_TIPS = BIN_SIZE * MAX_BINS, - MAX_SPLITS = MAX_TIPS; /* -3, but quicker if a power of two? */ +#define BIN_SIZE 64 +#define MAX_BINS 32 +#define MAX_TIPS (BIN_SIZE * MAX_BINS) +#define MAX_SPLITS MAX_TIPS /* -3, but quicker if a power of two? */ -const splitbit ALL_ONES = (std::numeric_limits::max)(); +#define ALL_ONES splitbit((std::numeric_limits::max)()) /* For a reason I've not estabilshed, shrinking BIG is necessary to avoid * an infinite loop in lap. */ -const cost BIG = ((std::numeric_limits::max)() / MAX_SPLITS); +#define BIG cost((std::numeric_limits::max)() / MAX_SPLITS) -const splitbit right16bits = 65535U; +#define right16bits splitbit(65535U) const splitbit powers_of_two[64] = { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, @@ -48,7 +48,7 @@ const splitbit powers_of_two[64] = { 0x1000000000000000, 0x2000000000000000, 0x4000000000000000, 0x8000000000000000 }; -const cost ROUND_PRECISION = 2048 * 2048; +#define ROUND_PRECISION cost(2048 * 2048) /***** Constants requiring initialization *****/ From 823bf06f88182dfb06254174d1093225bb0ad49d Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 16:26:06 +0100 Subject: [PATCH 79/80] use memory once --- src/SplitRoster.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index a17e63e20..be3a18063 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -232,8 +232,15 @@ NumericVector SplitRoster::score_pairs() { entry = 0, n_scores = n_trees * (n_trees - 1) / 2; ; - const cost max_score = BIG; NumericVector ret(n_scores); + const cost max_score = BIG; + const int16 max_splits = n_tips - 3; + cost** lap_score = new cost*[max_splits]; + for (int16 li = max_splits; li--; ) lap_score[li] = new cost[max_splits]; + lap_col *rowsol = new lap_col[max_splits]; + lap_row *colsol = new lap_row[max_splits]; + cost *u = new cost[max_splits], *v = new cost[max_splits]; + for (i = 0; i != n_trees - 1; ++i) { const int16 i_splits = splits[i].n_splits; @@ -281,12 +288,12 @@ NumericVector SplitRoster::score_pairs() { lap(most_splits, lap_score, rowsol, colsol, u, v) ) / max_score; - for (int16 li = most_splits; li--; ) delete[] lap_score[li]; - delete[] colsol; delete[] u; delete[] v; delete[] lap_score; - delete[] rowsol; entry++; } } + for (int16 li = max_splits; li--; ) delete[] lap_score[li]; + delete[] colsol; delete[] u; delete[] v; delete[] lap_score; + delete[] rowsol; return ret; } From 67c90ad6e67d889c0033558f144ce6389301b3b7 Mon Sep 17 00:00:00 2001 From: Martin Smith Date: Thu, 22 Jul 2021 16:26:21 +0100 Subject: [PATCH 80/80] ws --- src/SplitRoster.cpp | 1 + src/tree_distances.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/SplitRoster.cpp b/src/SplitRoster.cpp index be3a18063..671b4ebbb 100644 --- a/src/SplitRoster.cpp +++ b/src/SplitRoster.cpp @@ -227,6 +227,7 @@ void SplitRoster::mutual_clustering() { } NumericVector SplitRoster::score_pairs() { + int32 i = 0, entry = 0, diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 83be425d5..f9da15e4a 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -500,7 +500,7 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, } } - const double lap_score = + const double lap_score = double((max_score * lap_dim) - lap(lap_dim, score, rowsol, colsol, u, v)) / max_score; NumericVector final_score =