Skip to content

Commit

Permalink
Release polish (#6)
Browse files Browse the repository at this point in the history
* Added to build ignore and properly documented the new R code.

* Unstable, have to fix the mismatch header situation, but was able to return a vector with all the metrics!

* Cluster now returns test metrics and added test to support it.
  • Loading branch information
GregJohnsonJr authored Jun 14, 2024
1 parent b717404 commit 77ebc1c
Show file tree
Hide file tree
Showing 12 changed files with 54 additions and 22 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
^\.github$
^src/CMakeLists\.txt$
^src/cmake-build-debug$
25 changes: 20 additions & 5 deletions R/Cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,31 @@
#' @param sparse_matrix A Sparse Matrix.
#' @param cutoff A cutoff value
#' @param iterations The number of iterations
#' @param shuffle a boolean to determine whether or not you want to shuffle the data before you cluster

Check notice

Code scanning / lintr

Lines should not be more than 80 characters. This line is 103 characters. Note

Lines should not be more than 80 characters. This line is 103 characters.
#' @return A data.frame of the clusters.
opti_cluster <- function(sparse_matrix, cutoff, iterations, shuffle = TRUE) {
index_one_list <- sparse_matrix@i
index_two_list <- sparse_matrix@j
value_list <- sparse_matrix@x
clustering_output_string <- MatrixToOpiMatrixCluster(index_one_list, index_two_list, value_list, cutoff,
clustering_output_string_list <- MatrixToOpiMatrixCluster(index_one_list, index_two_list, value_list, cutoff,

Check warning

Code scanning / lintr

no visible global function definition for 'MatrixToOpiMatrixCluster' Warning

no visible global function definition for 'MatrixToOpiMatrixCluster'

Check notice

Code scanning / lintr

Lines should not be more than 80 characters. This line is 111 characters. Note

Lines should not be more than 80 characters. This line is 111 characters.
iterations, shuffle)

Check notice

Code scanning / lintr

Hanging indent should be 60 spaces but is 55 spaces. Note

Hanging indent should be 60 spaces but is 55 spaces.
df <- t(read.table(text = clustering_output_string,
clustering_output_string <- clustering_output_string_list[1]
clustering_metric <- clustering_output_string_list[2]
clustering_metric_2 <- clustering_output_string_list[3]
df_cluster_metrics <- (read.table(text = clustering_metric,
sep = "\t", header = TRUE))

Check notice

Code scanning / lintr

Hanging indent should be 36 spaces but is 21 spaces. Note

Hanging indent should be 36 spaces but is 21 spaces.
df <- data.frame(df[-1, ])
colnames(df)[1] <- "cluster"
return(df)
df_other_cluster_metrics <- (read.table(text = clustering_metric_2,
sep = "\t", header = TRUE))

Check notice

Code scanning / lintr

Hanging indent should be 43 spaces but is 21 spaces. Note

Hanging indent should be 43 spaces but is 21 spaces.

df_cluster <- t(read.table(text = clustering_output_string,
sep = "\t", header = TRUE))

Check notice

Code scanning / lintr

Hanging indent should be 29 spaces but is 21 spaces. Note

Hanging indent should be 29 spaces but is 21 spaces.
df_cluster <- data.frame(df_cluster[-1, ])

colnames(df_cluster)[1] <- "cluster"

opticluster_data <- list(cluster = df_cluster,
cluster_metrics = df_cluster_metrics,
other_cluster_metrics = df_other_cluster_metrics)

return(opticluster_data)
}
4 changes: 3 additions & 1 deletion man/opti_cluster.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 12 additions & 10 deletions src/ClusterCommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@ ClusterCommand::~ClusterCommand() {
/// Bad allocations, returns basic_string, returns empty string, returns non-utf8 characters, etc
/// @param optiMatrix
/// @return
std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) {
std::vector<std::string> ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) {
std::string clusterMetrics;
std::string sensFile;
std::string outStep;
std::string clusterMatrixOutput;
if (!cutOffSet) {
clusterMetrics += ("\nYou did not set a cutoff, using 0.03.\n");
// clusterMetrics += ("\nYou did not set a cutoff, using 0.03.\n");
cutoff = 0.05;
}

clusterMetrics += ("\nClustering " + distfile + "\n");
// clusterMetrics += ("\nClustering " + distfile + "\n");

ClusterMetric *metric = nullptr;
metricName = "mcc";
Expand All @@ -39,7 +39,7 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) {
metricName == "accuracy") { metric = new Accuracy(); } else if (
metricName == "ppv") { metric = new PPV(); } else if (metricName == "npv") { metric = new NPV(); } else if (
metricName == "fdr") { metric = new FDR(); } else if (metricName == "fpfn") { metric = new FPFN(); } else {
return 0;
return {};
}

// string nameOrCount = "";
Expand All @@ -59,7 +59,7 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) {
//
sensFile += "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
clusterMetrics += (
"\n\niter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n");
"iter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n");
outStep +=
"iter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";

Expand Down Expand Up @@ -91,7 +91,8 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) {
outStep += std::to_string(result) + "\t";
}
//m->mothurOutEndLine();
// outStep += "\n";
clusterMetrics += "\n";
outStep += "\n";
// Stable Metric -> Keep the data stable, to prevent errors (rounding errors)
// The difference between what the current and last metric (delta)
// MaxIters -> is an exit condition
Expand All @@ -105,6 +106,7 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) {
iters++;

stats = cluster.getStats(tp, tn, fp, fn);

numBins = cluster.getNumBins();

clusterMetrics += (std::to_string(iters) + "\t" + std::to_string(time(nullptr) - start) + "\t" +
Expand All @@ -120,11 +122,12 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) {
clusterMetrics += (std::to_string(result) + "\t");
outStep += std::to_string(result) + "\t";
}
clusterMetrics += "\n";
outStep += "\n";
}
ListVector *list = nullptr;

clusterMetrics += "\n\n";
// clusterMetrics += "\n\n";
list = cluster.getList();
//
if (printHeaders) {
Expand All @@ -138,9 +141,8 @@ std::string ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix) {
sensFile += std::to_string(cutoff) + '\t' + std::to_string(cutoff) + '\t' + std::to_string(tp) + '\t' +
std::to_string(tn) + '\t' +
std::to_string(fp) + '\t' + std::to_string(fn) + '\t';
for (double result : stats) { sensFile + std::to_string(result) + '\t'; }
Rcpp::Rcout << "Metrics for the current cluster:\n\n " << sensFile << "\n\n" << clusterMetrics << "\n\n";
for (double result : stats) { sensFile += std::to_string(result) + '\t'; }
}
delete matrix;
return clusterMatrixOutput;
return {clusterMatrixOutput, sensFile, clusterMetrics};
}
4 changes: 2 additions & 2 deletions src/MothurDependencies/ClusterCommand.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ using namespace std;


class ClusterCommand {

public:
//ClusterCommand(string);
ClusterCommand() {}
~ClusterCommand();
bool SetMaxIterations(const int iterations) {maxIters = iterations; return maxIters == iterations;}
bool SetOpticlusterRandomShuffle(const bool shuffle) {canShuffle = shuffle; return canShuffle;}
std::string runOptiCluster(OptiMatrix*);
bool SetMetricType(const string& newMetric) {metric = newMetric; return metric == newMetric;}
std::vector<std::string> runOptiCluster(OptiMatrix*);



Expand Down
2 changes: 1 addition & 1 deletion src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// MatrixToOpiMatrixCluster
std::string MatrixToOpiMatrixCluster(const std::vector<int>& xPosition, const std::vector<int>& yPosition, const std::vector<double>& data, const double cutoff, const int iterations, const bool shuffle);
std::vector<std::string> MatrixToOpiMatrixCluster(const std::vector<int>& xPosition, const std::vector<int>& yPosition, const std::vector<double>& data, const double cutoff, const int iterations, const bool shuffle);
RcppExport SEXP _Opticluster_MatrixToOpiMatrixCluster(SEXP xPositionSEXP, SEXP yPositionSEXP, SEXP dataSEXP, SEXP cutoffSEXP, SEXP iterationsSEXP, SEXP shuffleSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include <Rcpp.h>
//[[Rcpp::export]]
std::string MatrixToOpiMatrixCluster(const std::vector<int> &xPosition,
std::vector<std::string> MatrixToOpiMatrixCluster(const std::vector<int> &xPosition,
const std::vector<int> &yPosition, const std::vector<double> &data, const double cutoff,
const int iterations = 2, const bool shuffle = true)
{
Expand Down
Binary file added tests/.DS_Store
Binary file not shown.
Binary file added tests/testthat/.DS_Store
Binary file not shown.
5 changes: 5 additions & 0 deletions tests/testthat/extdata/randomdata.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
iter time label num_otus cutoff tp tn fp fn sensitivity specificity ppv npv fdr accuracy mcc f1score
0 0 0.000000 351 0.000000 4199.000000 256025.000000 150647.000000 0.000000 1.000000 0.629561 0.027117 1.000000 0.027117 0.633347 0.130660 0.052803
1 1718226744 0.000000 604 0.000000 3171.000000 391832.000000 14840.000000 1028.000000 0.755180 0.963509 0.176059 0.997383 0.176059 0.961380 0.353060 0.285547
2 1718226744 0.000000 793 0.000000 2450.000000 408919.000000 -2247.000000 1749.000000 0.583472 1.005525 12.068966 0.995741 12.068966 1.001212 2.665724 1.113130

4 changes: 4 additions & 0 deletions tests/testthat/extdata/test_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
iter time label num_otus cutoff tp tn fp fn sensitivity specificity ppv npv fdr accuracy mcc f1score
0 0 0.050000 351 0.050000 4199.000000 256025.000000 150647.000000 0.000000 1.000000 0.629561 0.027117 1.000000 0.027117 0.633347 0.130660 0.052803 1
-19703246666551735 0.050000 604 0.050000 3171.000000 391832.000000 14840.000000 1028.000000 0.755180 0.963509 0.176059 0.997383 0.176059 0.961380 0.353060 0.285547 2
-19703246666551735 0.050000 793 0.050000 2450.000000 408919.000000 -2247.000000 1749.000000 0.583472 1.005525 12.068966 0.995741 12.068966 1.001212 2.665724 1.113130
6 changes: 4 additions & 2 deletions tests/testthat/test-test-opticluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ test_that("Clustering returns proper results", {
expected_df <- readRDS(test_path("extdata","df_test_file.RDS"))

Check notice

Code scanning / lintr

Commas should always have a space after. Note test

Commas should always have a space after.
matrix <- readRDS(test_path("extdata","matrix_data.RDS"))

Check notice

Code scanning / lintr

Commas should always have a space after. Note test

Commas should always have a space after.
df <- Opticluster::opti_cluster(matrix, 0.2, 2, FALSE)
df$exists <- do.call(paste0, df) %in% do.call(paste0, expected_df)
expect_equal(class(df), "data.frame")
df$cluster$exists <- do.call(paste0, df$cluster) %in% do.call(paste0, expected_df)

Check notice

Code scanning / lintr

Lines should not be more than 80 characters. This line is 84 characters. Note test

Lines should not be more than 80 characters. This line is 84 characters.
expect_equal(class(df$cluster), "data.frame")
expect_equal(class(df$cluster_metrics), "data.frame")
expect_equal(class(df$other_cluster_metrics), "data.frame")
expect_true(all(df$exists == TRUE))
})

0 comments on commit 77ebc1c

Please sign in to comment.