From 64e95da489c37cb57b37d0e2d210262670f87d7f Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Tue, 18 Jul 2023 10:23:22 +0200 Subject: [PATCH 01/11] Remove functions from export R --- NAMESPACE | 5 ----- R/HelperFunctions.R | 5 ----- R/MainFunctions.R | 11 +++++------ R/RcppExports.R | 3 --- src/ExploreInterface.cpp | 2 -- 5 files changed, 5 insertions(+), 21 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 863972eb..2947b1d3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,7 @@ # Generated by roxygen2: do not edit by hand export(aurocEXPLORE) -export(changeSetting) -export(getSetting) export(predictExplore) -export(runExplore) -export(saveData) -export(settingsExplore) export(trainExplore) import(Rcpp) import(checkmate) diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index 1831d0d6..41662686 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -6,7 +6,6 @@ #' #' @return A parameter value, character. #' @importFrom stringr str_extract str_replace_all -#' @export getSetting <- function(settings, parameter, type = "value") { extraction <- stringr::str_extract(settings, paste0(parameter , "=.*?\u000A"))[[1]] extraction <- stringr::str_replace_all(extraction, "\\n", "") @@ -33,8 +32,6 @@ getSetting <- function(settings, parameter, type = "value") { #' #' @return A setting parameter value #' @importFrom utils write.table -#' -#' @export changeSetting <- function(settings, parameter, input, default_setting) { current_setting <- getSetting(settings, parameter, type = "complete") @@ -77,8 +74,6 @@ changeSetting <- function(settings, parameter, input, default_setting) { #' #' @importFrom farff writeARFF #' @importFrom utils write.table -#' -#' @export saveData <- function(output_path, train_data, file_name) { # Save data as arff file diff --git a/R/MainFunctions.R b/R/MainFunctions.R index 122bb171..4f4cd73b 100644 --- a/R/MainFunctions.R +++ b/R/MainFunctions.R @@ -50,7 +50,7 @@ trainExplore <- function(train_data = NULL, BranchBound = TRUE, Parallel = FALSE) { - + # Create output folder if(!endsWith(output_path, "/")) { warning("Output path should end with /, add this") @@ -67,7 +67,7 @@ trainExplore <- function(train_data = NULL, OutputFile <- paste0(output_path, file_name, ".result") } else { checkmate::checkFileExists(OutputFile, - add = errorMessage) + add = errorMessage) } # check settings_path @@ -100,7 +100,7 @@ trainExplore <- function(train_data = NULL, combine = "and" ) checkmate::reportAssertions(collection = errorMessage) - + PrintSettings <- ifelse(PrintSettings == TRUE, "yes", "no") PrintPerformance <- ifelse(PrintPerformance == TRUE, "yes", "no") Subsumption <- ifelse(Subsumption == TRUE, "yes", "no") @@ -160,7 +160,7 @@ trainExplore <- function(train_data = NULL, # Load model rule_string <- stringr::str_extract(results, "Best candidate \\(overall\\):.*?\u000A") - + # Clean string rule_string <- stringr::str_replace(rule_string, "Best candidate \\(overall\\):", "") rule_string <- stringr::str_replace_all(rule_string, " ", "") @@ -197,7 +197,6 @@ trainExplore <- function(train_data = NULL, #' #' @return Settings path #' @import checkmate -#' @export settingsExplore <- function(settings, output_path, # C++ cannot handle spaces in file path well, avoid those file_name, @@ -219,7 +218,7 @@ settingsExplore <- function(settings, BranchBound = "yes", Parallel = "no") { - + # Insert location training data and cutoff file if train_data is entered if (!is.null(train_data)) { settings <- changeSetting(settings, parameter = "DataFile", input = paste0(output_path, file_name, ".arff")) diff --git a/R/RcppExports.R b/R/RcppExports.R index 965007b3..03a44fe6 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -3,9 +3,6 @@ #' @useDynLib Explore #' @import Rcpp -NULL - -#' @export runExplore <- function(input) { invisible(.Call('_Explore_runExplore', PACKAGE = 'Explore', input)) } diff --git a/src/ExploreInterface.cpp b/src/ExploreInterface.cpp index d641bcd6..b8c48211 100644 --- a/src/ExploreInterface.cpp +++ b/src/ExploreInterface.cpp @@ -11,8 +11,6 @@ using namespace Rcpp; //' @useDynLib Explore //' @import Rcpp - -//' @export // [[Rcpp::export]] void runExplore(Rcpp::CharacterVector input) { // Transfer data from CharacterVector to std::string From bad3b050c7a8f1ba4b16f71c89c98495dc9cb2a7 Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Tue, 18 Jul 2023 17:37:22 +0200 Subject: [PATCH 02/11] Update functions ROC curve EXPLORE --- R/MainFunctions.R | 100 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 21 deletions(-) diff --git a/R/MainFunctions.R b/R/MainFunctions.R index 4f4cd73b..e0b6922f 100644 --- a/R/MainFunctions.R +++ b/R/MainFunctions.R @@ -299,7 +299,7 @@ predictExplore <- function(model, test_data) { } -#' aucrocExplore +#' modelsCurveExplore # TODO: update documentation? #' #' @param output_path A string declaring the path to the settings #' @param train_data Train data @@ -308,33 +308,91 @@ predictExplore <- function(model, test_data) { #' @param ... List of arguments #' #' @import checkmate -#' @return auroc +#' @return models for different sensitivities/specificities #' @export -aurocEXPLORE <- function(output_path, train_data, settings_path, file_name, ...) { - # TODO: check with latest implementation in PLP +modelsCurveExplore <- function(train_data = NULL, + settings_path = NULL, + output_path, + file_name = "train_data", + OutputFile = NULL, + StartRulelength = 1, + EndRulelength = 3, + OperatorMethod = "EXHAUSTIVE", + CutoffMethod = "RVAC", + ClassFeature = "'class'", + PositiveClass = "'Iris-versicolor'", + FeatureInclude = "", + Maximize = "ACCURACY", + Accuracy = 0, + Specificity = 0, + PrintSettings = TRUE, + PrintPerformance = TRUE, + Subsumption = TRUE, + BranchBound = TRUE, + Parallel = FALSE) { + # TODO: only input required variables? # Range of specificities to check - specificities <- seq(from = 0.01, to = 0.99, by = 0.02) + constraints <- c(seq(0.05,0.65,0.1), seq(0.75,0.97,0.02)) - # Set specificity constraint and maximize sensitivity - sensitivities <- rep(NA, length(specificities)) - for (s in 1:length(specificities)) { # s <- 0.1 - - model <- trainExplore(output_path = output_path, train_data = train_data, settings_path = settings_path, Maximize = "SENSITIVITY", Specificity = specificities[s], ...) - - # Extract sensitivity from results file - results <- paste(readLines(paste0(output_path, "train_data.result")), collapse="\n") + modelsCurve <- tryCatch({ + models <- sapply(constraints, function(constraint) { + print(paste0("Model for specificity: ", as.character(constraint))) + + # Fit EXPLORE + model <- Explore::trainExplore(output_path = file.path(output_path, "modelsCurve"), train_data = train_data, + settings_path = settings_path, + file_name = paste0("explore_specificity", as.character(constraint)), + OutputFile = OutputFile, + StartRulelength = StartRulelength, EndRulelength = EndRulelength, + OperatorMethod = OperatorMethod, CutoffMethod = CutoffMethod, + ClassFeature = ClassFeature, PositiveClass = PositiveClass, + FeatureInclude = FeatureInclude, Maximize = "SENSITIVITY", + Accuracy = Accuracy, Specificity = constraint, + PrintSettings = PrintSettings, PrintPerformance = PrintPerformance, + Subsumption = Subsumption, BranchBound = BranchBound, + Parallel = Parallel) + return(model) + }) + }, + finally = ParallelLogger::logInfo('No model for specificity.') + ) + + return(modelsCurve) +} + + +#' rocCurveExplore +#' +#' @return auc value for EXPLORE +#' @export +rocCurveExplore <- function(modelsCurve, data, labels) { # labels <- cohort$outcomeCount + + # TODO: input checks? + + # Combine all these results + curve_TPR <- c(1,0) + curve_FPR <- c(1,0) + + for (c in length(modelsCurve):1) { + model <- modelsCurve[c] - sensitivity <- stringr::str_extract_all(results, "Train-set: .*?\u000A")[[1]] - sensitivity <- stringr::str_extract(results, "SE:.*? ")[[1]] - sensitivity <- stringr::str_remove_all(sensitivity, "SE:") - sensitivity <- stringr::str_replace_all(sensitivity, " ", "") + # Predict using train and test + predict <- tryCatch(as.numeric(Explore::predictExplore(model = model, test_data = data))) - sensitivities[s] <- as.numeric(sensitivity) + conf_matrix <- table(factor(predict, levels = c(0,1)), factor(labels, levels = c(0,1))) # binary prediction + performance <- caret::confusionMatrix(conf_matrix, positive = '1') + + curve_TPR[c+2] <- performance$byClass['Sensitivity'] + curve_FPR[c+2] <- 1 - performance$byClass['Specificity'] } - auroc <- simple_auc(TPR = rev(sensitivities), FPR = rev(1 - specificities)) - # plot(1-specificities, sensitivities) + roc <- pracma::trapz(curve_FPR[length(curve_FPR):1],curve_TPR[length(curve_TPR):1]) + # TODO: check if I can use (already used) package/function? + # roc <- simple_auc(curve_FPR[length(curve_FPR):1],curve_TPR[length(curve_TPR):1]) + + # TODO: return all spec/sens or models? + # TODO: output value AND plot - return(auroc) + return (roc) } From 368dd12859b0c81e6006738a96ac56af63bc7ffb Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Wed, 23 Aug 2023 11:19:14 +0200 Subject: [PATCH 03/11] Correction export functions --- NAMESPACE | 3 ++- man/aurocEXPLORE.Rd | 25 ------------------------- 2 files changed, 2 insertions(+), 26 deletions(-) delete mode 100644 man/aurocEXPLORE.Rd diff --git a/NAMESPACE b/NAMESPACE index 2947b1d3..9ea3f0b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,8 @@ # Generated by roxygen2: do not edit by hand -export(aurocEXPLORE) +export(modelsCurveExplore) export(predictExplore) +export(rocCurveExplore) export(trainExplore) import(Rcpp) import(checkmate) diff --git a/man/aurocEXPLORE.Rd b/man/aurocEXPLORE.Rd deleted file mode 100644 index f7d85d71..00000000 --- a/man/aurocEXPLORE.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/MainFunctions.R -\name{aurocEXPLORE} -\alias{aurocEXPLORE} -\title{aucrocExplore} -\usage{ -aurocEXPLORE(output_path, train_data, settings_path, file_name, ...) -} -\arguments{ -\item{output_path}{A string declaring the path to the settings} - -\item{train_data}{Train data} - -\item{settings_path}{A string declaring the path to the settings} - -\item{file_name}{A string declaring the the path to the file name} - -\item{...}{List of arguments} -} -\value{ -auroc -} -\description{ -aucrocExplore -} From a3542367d6171d7d7c7ce6455e1c17fe4f6043a2 Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Wed, 23 Aug 2023 11:26:14 +0200 Subject: [PATCH 04/11] Clean up computation AUC --- R/HelperFunctions.R | 10 +--------- R/MainFunctions.R | 6 +----- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index 41662686..2b4ecd25 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -91,12 +91,4 @@ saveData <- function(output_path, train_data, file_name) { row.names = FALSE) # TODO: Support other file formats? -} - -simple_auc <- function(TPR, FPR){ - # inputs already sorted, best scores first - # TODO: different computation? is it same as standard packages (how LASSO computed)? - dFPR <- c(diff(FPR), 0) - dTPR <- c(diff(TPR), 0) - sum(TPR * dFPR) + sum(dTPR * dFPR)/2 -} +} \ No newline at end of file diff --git a/R/MainFunctions.R b/R/MainFunctions.R index e0b6922f..2f4d2efa 100644 --- a/R/MainFunctions.R +++ b/R/MainFunctions.R @@ -380,6 +380,7 @@ rocCurveExplore <- function(modelsCurve, data, labels) { # labels <- cohort$outc # Predict using train and test predict <- tryCatch(as.numeric(Explore::predictExplore(model = model, test_data = data))) + # Compute metrics conf_matrix <- table(factor(predict, levels = c(0,1)), factor(labels, levels = c(0,1))) # binary prediction performance <- caret::confusionMatrix(conf_matrix, positive = '1') @@ -388,11 +389,6 @@ rocCurveExplore <- function(modelsCurve, data, labels) { # labels <- cohort$outc } roc <- pracma::trapz(curve_FPR[length(curve_FPR):1],curve_TPR[length(curve_TPR):1]) - # TODO: check if I can use (already used) package/function? - # roc <- simple_auc(curve_FPR[length(curve_FPR):1],curve_TPR[length(curve_TPR):1]) - - # TODO: return all spec/sens or models? - # TODO: output value AND plot return (roc) } From 14d41ed1293f282f9d4afee8c6251525e3c1dd56 Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Wed, 23 Aug 2023 11:26:26 +0200 Subject: [PATCH 05/11] Update documentation --- man/modelsCurveExplore.Rd | 46 +++++++++++++++++++++++++++++++++++++++ man/rocCurveExplore.Rd | 14 ++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 man/modelsCurveExplore.Rd create mode 100644 man/rocCurveExplore.Rd diff --git a/man/modelsCurveExplore.Rd b/man/modelsCurveExplore.Rd new file mode 100644 index 00000000..deefc747 --- /dev/null +++ b/man/modelsCurveExplore.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MainFunctions.R +\name{modelsCurveExplore} +\alias{modelsCurveExplore} +\title{modelsCurveExplore # TODO: update documentation?} +\usage{ +modelsCurveExplore( + train_data = NULL, + settings_path = NULL, + output_path, + file_name = "train_data", + OutputFile = NULL, + StartRulelength = 1, + EndRulelength = 3, + OperatorMethod = "EXHAUSTIVE", + CutoffMethod = "RVAC", + ClassFeature = "'class'", + PositiveClass = "'Iris-versicolor'", + FeatureInclude = "", + Maximize = "ACCURACY", + Accuracy = 0, + Specificity = 0, + PrintSettings = TRUE, + PrintPerformance = TRUE, + Subsumption = TRUE, + BranchBound = TRUE, + Parallel = FALSE +) +} +\arguments{ +\item{train_data}{Train data} + +\item{settings_path}{A string declaring the path to the settings} + +\item{output_path}{A string declaring the path to the settings} + +\item{file_name}{A string declaring the the path to the file name} + +\item{...}{List of arguments} +} +\value{ +models for different sensitivities/specificities +} +\description{ +modelsCurveExplore # TODO: update documentation? +} diff --git a/man/rocCurveExplore.Rd b/man/rocCurveExplore.Rd new file mode 100644 index 00000000..9f60a4e2 --- /dev/null +++ b/man/rocCurveExplore.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MainFunctions.R +\name{rocCurveExplore} +\alias{rocCurveExplore} +\title{rocCurveExplore} +\usage{ +rocCurveExplore(modelsCurve, data, labels) +} +\value{ +auc value for EXPLORE +} +\description{ +rocCurveExplore +} From d93f4be0e250c0aa6efab4fb885e3756dcebb30a Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Thu, 24 Aug 2023 11:13:25 +0200 Subject: [PATCH 06/11] Test --- R/MainFunctions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MainFunctions.R b/R/MainFunctions.R index 2f4d2efa..36b0c27e 100644 --- a/R/MainFunctions.R +++ b/R/MainFunctions.R @@ -16,7 +16,7 @@ #' @param ClassFeature String, should be name of one of columns in data train. Always provided by the user. The string should be enclused in single quotation marks, e.g. 'class' #' @param PositiveClass 1 or string (?) (should be one of elements of column 'ClassFeature' in data train). Always provided by the user. The string should be enclused in single quotation marks, e.g. 'class' #' @param FeatureInclude Empty or string (should be name of one of columns in data train) -#' @param Maximize One of list with strings, list = "ACCURACY", ... +#' @param Maximize One of list with strings, list = "ACCURACY", "SENSITIVITY", ... #' @param Accuracy Float 0-1 -> default = 0 (if 0, make empty = computationally more beneficial) #' @param Specificity float 0-1, default = 0 #' @param PrintSettings True or False From d44146504f093bc88c4c77d2edd31f72003e649e Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Thu, 24 Aug 2023 11:36:09 +0200 Subject: [PATCH 07/11] Update description --- R/MainFunctions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MainFunctions.R b/R/MainFunctions.R index 36b0c27e..b43b8305 100644 --- a/R/MainFunctions.R +++ b/R/MainFunctions.R @@ -16,7 +16,7 @@ #' @param ClassFeature String, should be name of one of columns in data train. Always provided by the user. The string should be enclused in single quotation marks, e.g. 'class' #' @param PositiveClass 1 or string (?) (should be one of elements of column 'ClassFeature' in data train). Always provided by the user. The string should be enclused in single quotation marks, e.g. 'class' #' @param FeatureInclude Empty or string (should be name of one of columns in data train) -#' @param Maximize One of list with strings, list = "ACCURACY", "SENSITIVITY", ... +#' @param Maximize One of list with strings, list = "ACCURACY", "SENSITIVITY", "SPECIFICITY", ... #' @param Accuracy Float 0-1 -> default = 0 (if 0, make empty = computationally more beneficial) #' @param Specificity float 0-1, default = 0 #' @param PrintSettings True or False From db3a41ec6aba5670cfa483ef3f768178fb83b391 Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Fri, 1 Sep 2023 13:57:04 +0200 Subject: [PATCH 08/11] Update test auroc --- tests/testthat/test-MainFunctions.R | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-MainFunctions.R b/tests/testthat/test-MainFunctions.R index 1879cbd7..de5f6fc5 100644 --- a/tests/testthat/test-MainFunctions.R +++ b/tests/testthat/test-MainFunctions.R @@ -95,11 +95,17 @@ test_that("compute AUC", { } output_path <- paste0(output_path, "/") data <- farff::readARFF(data_path) - auroc <- Explore::aurocEXPLORE(output_path = output_path, - train_data = data, - settings_path = settings_path, - ClassFeature = "'class'", - PositiveClass = '"Iris-versicolor"') + + modelsCurve <- Explore::modelsCurveExplore(output_path = output_path, + train_data = data, + settings_path = settings_path, + ClassFeature = "'class'", + PositiveClass = '"Iris-versicolor"') + + auroc <- Explore::rocCurveExplore(modelsCurve = modelsCurve, + data = data, + labels = ifelse(data["class"] == "Iris-versicolor", 1, 0)) + expect_equal(class(auroc), "numeric") expect_true(auroc < 100) expect_true(auroc > 0) From dacf8eeb5cbf1b2f3c87c2302618ff8aecc34d8e Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Fri, 1 Sep 2023 17:37:33 +0200 Subject: [PATCH 09/11] Add balanced accuracy + correction 0 case accuracy --- R/MainFunctions.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/MainFunctions.R b/R/MainFunctions.R index 9b4833a3..c7ffeb58 100644 --- a/R/MainFunctions.R +++ b/R/MainFunctions.R @@ -18,6 +18,7 @@ #' @param FeatureInclude Empty or string (should be name of one of columns in data train) #' @param Maximize One of list with strings, list = "ACCURACY", "SENSITIVITY", "SPECIFICITY", ... #' @param Accuracy Float 0-0.999 -> default = 0 (if 0, make empty = computationally more beneficial) +#' @param BalancedAccuracy Float 0-0.999 -> default = 0 (if 0, make empty = computationally more beneficial) #' @param Specificity float 0-0.999, default = 0 #' @param PrintSettings True or False #' @param PrintPerformance True or False @@ -43,6 +44,7 @@ trainExplore <- function(train_data = NULL, FeatureInclude = "", Maximize = "ACCURACY", Accuracy = 0, + BalancedAccuracy = 0, Specificity = 0, PrintSettings = TRUE, PrintPerformance = TRUE, @@ -91,6 +93,7 @@ trainExplore <- function(train_data = NULL, checkString(FeatureInclude), checkString(Maximize), checkDouble(Accuracy), + checkDouble(BalancedAccuracy), checkDouble(Specificity), checkLogical(PrintSettings), checkLogical(PrintPerformance), @@ -107,7 +110,8 @@ trainExplore <- function(train_data = NULL, Subsumption <- ifelse(Subsumption == TRUE, "yes", "no") BranchBound <- ifelse(BranchBound == TRUE, "yes", "no") Parallel <- ifelse(Parallel == TRUE, "yes", "no") - Accuracy <- ifelse(Accuracy == 0, "", Specificity) + Accuracy <- ifelse(Accuracy == 0, "", Accuracy) + BalancedAccuracy <- ifelse(BalancedAccuracy == 0, "", BalancedAccuracy) Specificity <- ifelse(Specificity == 0, "", Specificity) # Create project setting @@ -146,6 +150,7 @@ trainExplore <- function(train_data = NULL, FeatureInclude = FeatureInclude, Maximize = Maximize, Accuracy = Accuracy, + BalancedAccuracy = BalancedAccuracy, Specificity = Specificity, PrintSettings = PrintSettings, PrintPerformance = PrintPerformance, @@ -214,6 +219,7 @@ settingsExplore <- function(settings, FeatureInclude = "", Maximize = "ACCURACY", Accuracy = 0, + BalancedAccuracy = 0, Specificity = 0, PrintSettings = "yes", PrintPerformance = "yes", @@ -239,6 +245,7 @@ settingsExplore <- function(settings, settings <- changeSetting(settings, parameter = "FeatureInclude", input = FeatureInclude) settings <- changeSetting(settings, parameter = "Maximize", input = Maximize) settings <- changeSetting(settings, parameter = "Accuracy", input = Accuracy) + settings <- changeSetting(settings, parameter = "BalancedAccuracy", input = BalancedAccuracy) settings <- changeSetting(settings, parameter = "Specificity", input = Specificity) settings <- changeSetting(settings, parameter = "PrintSettings", input = PrintSettings) settings <- changeSetting(settings, parameter = "PrintPerformance", input = PrintPerformance) @@ -327,6 +334,7 @@ modelsCurveExplore <- function(train_data = NULL, FeatureInclude = "", Maximize = "ACCURACY", Accuracy = 0, + BalancedAccuracy = 0, Specificity = 0, PrintSettings = TRUE, PrintPerformance = TRUE, @@ -351,7 +359,7 @@ modelsCurveExplore <- function(train_data = NULL, OperatorMethod = OperatorMethod, CutoffMethod = CutoffMethod, ClassFeature = ClassFeature, PositiveClass = PositiveClass, FeatureInclude = FeatureInclude, Maximize = "SENSITIVITY", - Accuracy = Accuracy, Specificity = constraint, + Accuracy = Accuracy, BalancedAccuracy = BalancedAccuracy, Specificity = constraint, PrintSettings = PrintSettings, PrintPerformance = PrintPerformance, Subsumption = Subsumption, BranchBound = BranchBound, Parallel = Parallel) From 93b5aef5e0793e8329c830ba979f0b17234df27c Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Mon, 4 Sep 2023 14:23:44 +0200 Subject: [PATCH 10/11] Correction BA + remove parallellogger --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/MainFunctions.R | 14 ++++++++------ inst/examples/iris.project | 1 + inst/settings/template.project | 1 + man/modelsCurveExplore.Rd | 1 + man/settingsExplore.Rd | 1 + man/trainExplore.Rd | 3 +++ 8 files changed, 18 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 36d68bdd..e3c8c25d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,8 @@ Imports: farff, Rcpp (>= 1.0.5), RcppParallel, - stringr + stringr, + caret Encoding: UTF-8 LinkingTo: Rcpp, BH (>= 1.51.0), RcppParallel RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index 9ea3f0b3..4213f6af 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(trainExplore) import(Rcpp) import(checkmate) importFrom(RcppParallel,RcppParallelLibs) +importFrom(caret,confusionMatrix) importFrom(farff,writeARFF) importFrom(stringr,str_extract) importFrom(stringr,str_replace_all) diff --git a/R/MainFunctions.R b/R/MainFunctions.R index c7ffeb58..070d7251 100644 --- a/R/MainFunctions.R +++ b/R/MainFunctions.R @@ -51,11 +51,11 @@ trainExplore <- function(train_data = NULL, Subsumption = TRUE, BranchBound = TRUE, Parallel = FALSE) { - + if (!dir.exists(output_path)) { dir.create(output_path, recursive = TRUE) - } - + } + # Create output folder if(!endsWith(output_path, "/")) { warning("Output path should end with /, add this") @@ -363,10 +363,11 @@ modelsCurveExplore <- function(train_data = NULL, PrintSettings = PrintSettings, PrintPerformance = PrintPerformance, Subsumption = Subsumption, BranchBound = BranchBound, Parallel = Parallel) + return(model) }) }, - finally = ParallelLogger::logInfo('No model for specificity.') + finally = warning("No model for specificity.") ) return(modelsCurve) @@ -377,6 +378,7 @@ modelsCurveExplore <- function(train_data = NULL, #' #' @return auc value for EXPLORE #' @export +#' @importFrom caret confusionMatrix rocCurveExplore <- function(modelsCurve, data, labels) { # labels <- cohort$outcomeCount # TODO: input checks? @@ -384,7 +386,7 @@ rocCurveExplore <- function(modelsCurve, data, labels) { # labels <- cohort$outc # Combine all these results curve_TPR <- c(1,0) curve_FPR <- c(1,0) - + for (c in length(modelsCurve):1) { model <- modelsCurve[c] @@ -394,7 +396,7 @@ rocCurveExplore <- function(modelsCurve, data, labels) { # labels <- cohort$outc # Compute metrics conf_matrix <- table(factor(predict, levels = c(0,1)), factor(labels, levels = c(0,1))) # binary prediction performance <- caret::confusionMatrix(conf_matrix, positive = '1') - + curve_TPR[c+2] <- performance$byClass['Sensitivity'] curve_FPR[c+2] <- 1 - performance$byClass['Specificity'] } diff --git a/inst/examples/iris.project b/inst/examples/iris.project index 627a13f8..1c907e75 100644 --- a/inst/examples/iris.project +++ b/inst/examples/iris.project @@ -24,6 +24,7 @@ FeatureRule= [Constraints] Maximize=ACCURACY Accuracy= +BalancedAccuracy= Specificity= [Output] OutputMethod=BEST diff --git a/inst/settings/template.project b/inst/settings/template.project index 7b639c0f..cec9bc2e 100755 --- a/inst/settings/template.project +++ b/inst/settings/template.project @@ -24,6 +24,7 @@ FeatureRule= [Constraints] Maximize=ACCURACY Accuracy= +BalancedAccuracy= Specificity= [Output] OutputMethod=BEST diff --git a/man/modelsCurveExplore.Rd b/man/modelsCurveExplore.Rd index deefc747..0f0b79c1 100644 --- a/man/modelsCurveExplore.Rd +++ b/man/modelsCurveExplore.Rd @@ -19,6 +19,7 @@ modelsCurveExplore( FeatureInclude = "", Maximize = "ACCURACY", Accuracy = 0, + BalancedAccuracy = 0, Specificity = 0, PrintSettings = TRUE, PrintPerformance = TRUE, diff --git a/man/settingsExplore.Rd b/man/settingsExplore.Rd index 554aa1f8..a423f8c0 100644 --- a/man/settingsExplore.Rd +++ b/man/settingsExplore.Rd @@ -19,6 +19,7 @@ settingsExplore( FeatureInclude = "", Maximize = "ACCURACY", Accuracy = 0, + BalancedAccuracy = 0, Specificity = 0, PrintSettings = "yes", PrintPerformance = "yes", diff --git a/man/trainExplore.Rd b/man/trainExplore.Rd index 12c30a65..05e73e3b 100644 --- a/man/trainExplore.Rd +++ b/man/trainExplore.Rd @@ -19,6 +19,7 @@ trainExplore( FeatureInclude = "", Maximize = "ACCURACY", Accuracy = 0, + BalancedAccuracy = 0, Specificity = 0, PrintSettings = TRUE, PrintPerformance = TRUE, @@ -56,6 +57,8 @@ trainExplore( \item{Accuracy}{Float 0-0.999 -> default = 0 (if 0, make empty = computationally more beneficial)} +\item{BalancedAccuracy}{Float 0-0.999 -> default = 0 (if 0, make empty = computationally more beneficial)} + \item{Specificity}{float 0-0.999, default = 0} \item{PrintSettings}{True or False} From cbead32e0a61f9b3e3e022aa15057568d4d9dcf0 Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Tue, 5 Sep 2023 15:54:41 +0200 Subject: [PATCH 11/11] Add dependencies caret pracma --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/MainFunctions.R | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e3c8c25d..1505acc9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,8 @@ Imports: Rcpp (>= 1.0.5), RcppParallel, stringr, - caret + caret, + pracma Encoding: UTF-8 LinkingTo: Rcpp, BH (>= 1.51.0), RcppParallel RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index 4213f6af..fc2aaccd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ import(checkmate) importFrom(RcppParallel,RcppParallelLibs) importFrom(caret,confusionMatrix) importFrom(farff,writeARFF) +importFrom(pracma,trapz) importFrom(stringr,str_extract) importFrom(stringr,str_replace_all) importFrom(stringr,str_split_fixed) diff --git a/R/MainFunctions.R b/R/MainFunctions.R index 070d7251..b556af11 100644 --- a/R/MainFunctions.R +++ b/R/MainFunctions.R @@ -379,6 +379,7 @@ modelsCurveExplore <- function(train_data = NULL, #' @return auc value for EXPLORE #' @export #' @importFrom caret confusionMatrix +#' @importFrom pracma trapz rocCurveExplore <- function(modelsCurve, data, labels) { # labels <- cohort$outcomeCount # TODO: input checks?