From 77ebc1c2ea4ff380cbbe5ba244c81d8ce2b20aa5 Mon Sep 17 00:00:00 2001 From: GregJohnson <31873199+YoungKrug@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:48:55 -0400 Subject: [PATCH] Release polish (#6) * Added to build ignore and properly documented the new R code. * Unstable, have to fix the mismatch header situation, but was able to return a vector with all the metrics! * Cluster now returns test metrics and added test to support it. --- .Rbuildignore | 2 ++ R/Cluster.R | 25 +++++++++++++++++++----- man/opti_cluster.Rd | 4 +++- src/ClusterCommand.cpp | 22 +++++++++++---------- src/MothurDependencies/ClusterCommand.h | 4 ++-- src/RcppExports.cpp | 2 +- src/main.cpp | 2 +- tests/.DS_Store | Bin 0 -> 6148 bytes tests/testthat/.DS_Store | Bin 0 -> 6148 bytes tests/testthat/extdata/randomdata.txt | 5 +++++ tests/testthat/extdata/test_file.txt | 4 ++++ tests/testthat/test-test-opticluster.R | 6 ++++-- 12 files changed, 54 insertions(+), 22 deletions(-) create mode 100644 tests/.DS_Store create mode 100644 tests/testthat/.DS_Store create mode 100644 tests/testthat/extdata/randomdata.txt create mode 100644 tests/testthat/extdata/test_file.txt diff --git a/.Rbuildignore b/.Rbuildignore index c503c4f..442f8bf 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1 +1,3 @@ ^\.github$ +^src/CMakeLists\.txt$ +^src/cmake-build-debug$ diff --git a/R/Cluster.R b/R/Cluster.R index c9417c0..4cf6d84 100644 --- a/R/Cluster.R +++ b/R/Cluster.R @@ -12,16 +12,31 @@ #' @param sparse_matrix A Sparse Matrix. #' @param cutoff A cutoff value #' @param iterations The number of iterations +#' @param shuffle a boolean to determine whether or not you want to shuffle the data before you cluster #' @return A data.frame of the clusters. opti_cluster <- function(sparse_matrix, cutoff, iterations, shuffle = TRUE) { index_one_list <- sparse_matrix@i index_two_list <- sparse_matrix@j value_list <- sparse_matrix@x - clustering_output_string <- MatrixToOpiMatrixCluster(index_one_list, index_two_list, value_list, cutoff, + clustering_output_string_list <- MatrixToOpiMatrixCluster(index_one_list, index_two_list, value_list, cutoff, iterations, shuffle) - df <- t(read.table(text = clustering_output_string, + clustering_output_string <- clustering_output_string_list[1] + clustering_metric <- clustering_output_string_list[2] + clustering_metric_2 <- clustering_output_string_list[3] + df_cluster_metrics <- (read.table(text = clustering_metric, sep = "\t", header = TRUE)) - df <- data.frame(df[-1, ]) - colnames(df)[1] <- "cluster" - return(df) + df_other_cluster_metrics <- (read.table(text = clustering_metric_2, + sep = "\t", header = TRUE)) + + df_cluster <- t(read.table(text = clustering_output_string, + sep = "\t", header = TRUE)) + df_cluster <- data.frame(df_cluster[-1, ]) + + colnames(df_cluster)[1] <- "cluster" + + opticluster_data <- list(cluster = df_cluster, + cluster_metrics = df_cluster_metrics, + other_cluster_metrics = df_other_cluster_metrics) + + return(opticluster_data) } diff --git a/man/opti_cluster.Rd b/man/opti_cluster.Rd index a4443ef..fc3e767 100644 --- a/man/opti_cluster.Rd +++ b/man/opti_cluster.Rd @@ -4,7 +4,7 @@ \alias{opti_cluster} \title{Opticluster Description} \usage{ -opti_cluster(sparse_matrix, cutoff, iterations) +opti_cluster(sparse_matrix, cutoff, iterations, shuffle = TRUE) } \arguments{ \item{sparse_matrix}{A Sparse Matrix.} @@ -12,6 +12,8 @@ opti_cluster(sparse_matrix, cutoff, iterations) \item{cutoff}{A cutoff value} \item{iterations}{The number of iterations} + +\item{shuffle}{a boolean to determine whether or not you want to shuffle the data before you cluster} } \value{ A data.frame of the clusters. diff --git a/src/ClusterCommand.cpp b/src/ClusterCommand.cpp index 4417969..7d53fd8 100644 --- a/src/ClusterCommand.cpp +++ b/src/ClusterCommand.cpp @@ -16,17 +16,17 @@ ClusterCommand::~ClusterCommand() { /// Bad allocations, returns basic_string, returns empty string, returns non-utf8 characters, etc /// @param optiMatrix /// @return -std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) { +std::vector ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) { std::string clusterMetrics; std::string sensFile; std::string outStep; std::string clusterMatrixOutput; if (!cutOffSet) { - clusterMetrics += ("\nYou did not set a cutoff, using 0.03.\n"); + // clusterMetrics += ("\nYou did not set a cutoff, using 0.03.\n"); cutoff = 0.05; } - clusterMetrics += ("\nClustering " + distfile + "\n"); + // clusterMetrics += ("\nClustering " + distfile + "\n"); ClusterMetric *metric = nullptr; metricName = "mcc"; @@ -39,7 +39,7 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) { metricName == "accuracy") { metric = new Accuracy(); } else if ( metricName == "ppv") { metric = new PPV(); } else if (metricName == "npv") { metric = new NPV(); } else if ( metricName == "fdr") { metric = new FDR(); } else if (metricName == "fpfn") { metric = new FPFN(); } else { - return 0; + return {}; } // string nameOrCount = ""; @@ -59,7 +59,7 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) { // sensFile += "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; clusterMetrics += ( - "\n\niter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); + "iter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); outStep += "iter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; @@ -91,7 +91,8 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) { outStep += std::to_string(result) + "\t"; } //m->mothurOutEndLine(); - // outStep += "\n"; + clusterMetrics += "\n"; + outStep += "\n"; // Stable Metric -> Keep the data stable, to prevent errors (rounding errors) // The difference between what the current and last metric (delta) // MaxIters -> is an exit condition @@ -105,6 +106,7 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) { iters++; stats = cluster.getStats(tp, tn, fp, fn); + numBins = cluster.getNumBins(); clusterMetrics += (std::to_string(iters) + "\t" + std::to_string(time(nullptr) - start) + "\t" + @@ -120,11 +122,12 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) { clusterMetrics += (std::to_string(result) + "\t"); outStep += std::to_string(result) + "\t"; } + clusterMetrics += "\n"; outStep += "\n"; } ListVector *list = nullptr; - clusterMetrics += "\n\n"; + // clusterMetrics += "\n\n"; list = cluster.getList(); // if (printHeaders) { @@ -138,9 +141,8 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) { sensFile += std::to_string(cutoff) + '\t' + std::to_string(cutoff) + '\t' + std::to_string(tp) + '\t' + std::to_string(tn) + '\t' + std::to_string(fp) + '\t' + std::to_string(fn) + '\t'; - for (double result : stats) { sensFile + std::to_string(result) + '\t'; } - Rcpp::Rcout << "Metrics for the current cluster:\n\n " << sensFile << "\n\n" << clusterMetrics << "\n\n"; + for (double result : stats) { sensFile += std::to_string(result) + '\t'; } } delete matrix; - return clusterMatrixOutput; + return {clusterMatrixOutput, sensFile, clusterMetrics}; } diff --git a/src/MothurDependencies/ClusterCommand.h b/src/MothurDependencies/ClusterCommand.h index 70892c5..2d6199c 100644 --- a/src/MothurDependencies/ClusterCommand.h +++ b/src/MothurDependencies/ClusterCommand.h @@ -48,14 +48,14 @@ using namespace std; class ClusterCommand { - public: //ClusterCommand(string); ClusterCommand() {} ~ClusterCommand(); bool SetMaxIterations(const int iterations) {maxIters = iterations; return maxIters == iterations;} bool SetOpticlusterRandomShuffle(const bool shuffle) {canShuffle = shuffle; return canShuffle;} - std::string runOptiCluster(OptiMatrix*); + bool SetMetricType(const string& newMetric) {metric = newMetric; return metric == newMetric;} + std::vector runOptiCluster(OptiMatrix*); diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 1d31b3f..8287fed 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -11,7 +11,7 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // MatrixToOpiMatrixCluster -std::string MatrixToOpiMatrixCluster(const std::vector& xPosition, const std::vector& yPosition, const std::vector& data, const double cutoff, const int iterations, const bool shuffle); +std::vector MatrixToOpiMatrixCluster(const std::vector& xPosition, const std::vector& yPosition, const std::vector& data, const double cutoff, const int iterations, const bool shuffle); RcppExport SEXP _Opticluster_MatrixToOpiMatrixCluster(SEXP xPositionSEXP, SEXP yPositionSEXP, SEXP dataSEXP, SEXP cutoffSEXP, SEXP iterationsSEXP, SEXP shuffleSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; diff --git a/src/main.cpp b/src/main.cpp index 0bd7b3e..089672d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -12,7 +12,7 @@ #include //[[Rcpp::export]] -std::string MatrixToOpiMatrixCluster(const std::vector &xPosition, +std::vector MatrixToOpiMatrixCluster(const std::vector &xPosition, const std::vector &yPosition, const std::vector &data, const double cutoff, const int iterations = 2, const bool shuffle = true) { diff --git a/tests/.DS_Store b/tests/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..50263db50192ad10b7de85110f640b3ac7fe9709 GIT binary patch literal 6148 zcmeHK%}T>S5Z<-bZYe?z3OxqA7OX!N#Y>3w1&ruHr6wfUV9b^#HHT8jSzpK}@p+ut z-GHSzcoMNQu=~x<&u->}>56$kiM<8#ABfL6J>r zCi;se{C2@|7O@QaZ{HszG63I4FiGMp@Ap6XMx(j8)ev&{9UcR0G1 zG6^c(4{qY97+E_PGR^%sjixFg4kHM;zm3yS7OtG7VWx6D?GSCz9$DS_e9-Ur?BmnH zqG!*~4pBckSuEOOcW?jvYV@4Er1DLZ$$@tz+XgFm2W6$MS8tjoGJOJHl~=_R5(C5l zF+dEg4+G{55S{fYnJOg)h=Ct6fct|D4bd@JYE)YXba;J6e-jY}bbL!7+6EnirAF|8 zaGeUMQ@MF!aGefz+r&8rON~06akVncV^*#nFI=q-c3Xur?r5Z*7$63g8EELCh3Eez z{4%wV{N)rH5d*})KVyJ5hu+YIqRiR4QXZbQHnc}*C>U3w0s{KdB>)Dvk93t&`)$-A Z&M{bO#97d;(gEorpa`Lk82AMSz5w+-O+Wwu literal 0 HcmV?d00001 diff --git a/tests/testthat/.DS_Store b/tests/testthat/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3877875b374a75ab0c438f6cbcc3e6bf172d55e9 GIT binary patch literal 6148 zcmeHK%TB{E5S)cdRB-7HE=c|XD)9%VDhICc12m5VQluc-N*wb?d?L(xO-V?^5h1i2 z*|WBHC(cOm1Awgd)itmHFlSK|btX)^Cx?!5@Qf&lV}l(wcTHOs%>>b3?9#cPpvEg_ zUZFStw%~}$a>KHGPeyMPU2V&zXxoMY;FKgawydvE;+fA9_a?`I?u~JMy7OMnS9Z(X z9iGV2V4oOkyo}l8@*5kQZmxhU;0m|`uE5U<=$1vBpBZ}X3b+EUz(E0B9|DVF%-9*G zrGr(v0uc2Wos7O7nl&e6j2Syaj?lzXiIyt)iXoQH{*dEh#?H{vA^Gwl`OTk~7fEj? ze@NkwXy~;o;0g>CIJV|O_y1e|GLx75VT!k00axIkDIl};!+OP!%6seR^K`EbERQT| r8rP{qqdzzVu%YY7MH;m6pg!Yb#?DY#v_GX2{UDG8@yZqW1qD6;#)m;G literal 0 HcmV?d00001 diff --git a/tests/testthat/extdata/randomdata.txt b/tests/testthat/extdata/randomdata.txt new file mode 100644 index 0000000..a06cdf7 --- /dev/null +++ b/tests/testthat/extdata/randomdata.txt @@ -0,0 +1,5 @@ +iter time label num_otus cutoff tp tn fp fn sensitivity specificity ppv npv fdr accuracy mcc f1score +0 0 0.000000 351 0.000000 4199.000000 256025.000000 150647.000000 0.000000 1.000000 0.629561 0.027117 1.000000 0.027117 0.633347 0.130660 0.052803 +1 1718226744 0.000000 604 0.000000 3171.000000 391832.000000 14840.000000 1028.000000 0.755180 0.963509 0.176059 0.997383 0.176059 0.961380 0.353060 0.285547 +2 1718226744 0.000000 793 0.000000 2450.000000 408919.000000 -2247.000000 1749.000000 0.583472 1.005525 12.068966 0.995741 12.068966 1.001212 2.665724 1.113130 + diff --git a/tests/testthat/extdata/test_file.txt b/tests/testthat/extdata/test_file.txt new file mode 100644 index 0000000..bef34f3 --- /dev/null +++ b/tests/testthat/extdata/test_file.txt @@ -0,0 +1,4 @@ +iter time label num_otus cutoff tp tn fp fn sensitivity specificity ppv npv fdr accuracy mcc f1score +0 0 0.050000 351 0.050000 4199.000000 256025.000000 150647.000000 0.000000 1.000000 0.629561 0.027117 1.000000 0.027117 0.633347 0.130660 0.052803 1 +-19703246666551735 0.050000 604 0.050000 3171.000000 391832.000000 14840.000000 1028.000000 0.755180 0.963509 0.176059 0.997383 0.176059 0.961380 0.353060 0.285547 2 +-19703246666551735 0.050000 793 0.050000 2450.000000 408919.000000 -2247.000000 1749.000000 0.583472 1.005525 12.068966 0.995741 12.068966 1.001212 2.665724 1.113130 diff --git a/tests/testthat/test-test-opticluster.R b/tests/testthat/test-test-opticluster.R index ec5306b..e0c5d90 100644 --- a/tests/testthat/test-test-opticluster.R +++ b/tests/testthat/test-test-opticluster.R @@ -2,7 +2,9 @@ test_that("Clustering returns proper results", { expected_df <- readRDS(test_path("extdata","df_test_file.RDS")) matrix <- readRDS(test_path("extdata","matrix_data.RDS")) df <- Opticluster::opti_cluster(matrix, 0.2, 2, FALSE) - df$exists <- do.call(paste0, df) %in% do.call(paste0, expected_df) - expect_equal(class(df), "data.frame") + df$cluster$exists <- do.call(paste0, df$cluster) %in% do.call(paste0, expected_df) + expect_equal(class(df$cluster), "data.frame") + expect_equal(class(df$cluster_metrics), "data.frame") + expect_equal(class(df$other_cluster_metrics), "data.frame") expect_true(all(df$exists == TRUE)) })