From 4969782be2f0c647e1429f3cb052811e532214ef Mon Sep 17 00:00:00 2001 From: aniekmarkus Date: Tue, 3 Sep 2024 14:07:37 +0200 Subject: [PATCH] Updates parameters in .project file + added test mandatory feature and BA constraint --- DESCRIPTION | 2 +- inst/examples/complexity/binary_10.project | 9 ++- inst/examples/complexity/binary_3.project | 7 +-- .../examples/complexity/categorical_4.project | 9 ++- inst/examples/complexity/continuous_4.project | 9 ++- inst/examples/complexity/mix_4.project | 11 ++-- inst/examples/plp/test_plp.project | 1 - inst/examples/test.project | 1 - inst/examples/tests/iris.project | 1 - inst/examples/train_data.project | 1 - inst/settings/template.project | 5 +- man/modelsCurveExplore.Rd | 14 +++-- man/settingsExplore.Rd | 13 +++-- man/trainExplore.Rd | 18 ++++-- tests/testthat/test-MainFunctions.R | 57 +++++++++++++++++++ 15 files changed, 111 insertions(+), 47 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b033341..5bafd3d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,7 @@ Imports: pracma Encoding: UTF-8 LinkingTo: Rcpp, BH (>= 1.51.0), RcppParallel -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Suggests: testthat (>= 3.0.0), knitr, diff --git a/inst/examples/complexity/binary_10.project b/inst/examples/complexity/binary_10.project index 6119d60..551c342 100644 --- a/inst/examples/complexity/binary_10.project +++ b/inst/examples/complexity/binary_10.project @@ -8,7 +8,7 @@ IncrementalOutputFile=false [Setup] PartitionMethod=RESUBSTITUTION Randomize=no -StartRulelength=3 +StartRulelength=1 EndRulelength=3 LearnRatio=0.8 NumberofPartitions=1 @@ -34,10 +34,9 @@ PrintCutoffMethod=no PrintCutoffValues=no PrintOperatorMethod=no PrintOperatorValues=no -PrintCombinations=yes -PrintFeatureSets=yes +PrintCombinations=no +PrintFeatureSets=no PrintCutoffSets=no -PrintCutOffsetsBestLength=no PrintPerformance=yes PrintSets=no SavePartitions=no @@ -45,6 +44,6 @@ SavePartitions=no Subsumption=no BranchBound=no Parallel=no -ParallelMethod=ONE +ParallelMethod=TWO BinaryReduction=no diff --git a/inst/examples/complexity/binary_3.project b/inst/examples/complexity/binary_3.project index 4719a91..65d2bfd 100644 --- a/inst/examples/complexity/binary_3.project +++ b/inst/examples/complexity/binary_3.project @@ -34,10 +34,9 @@ PrintCutoffMethod=no PrintCutoffValues=no PrintOperatorMethod=no PrintOperatorValues=no -PrintCombinations=yes -PrintFeatureSets=yes +PrintCombinations=no +PrintFeatureSets=no PrintCutoffSets=no -PrintCutOffsetsBestLength=no PrintPerformance=yes PrintSets=no SavePartitions=no @@ -45,6 +44,6 @@ SavePartitions=no Subsumption=no BranchBound=no Parallel=no -ParallelMethod=ONE +ParallelMethod=TWO BinaryReduction=no diff --git a/inst/examples/complexity/categorical_4.project b/inst/examples/complexity/categorical_4.project index 725de6b..b4416cf 100644 --- a/inst/examples/complexity/categorical_4.project +++ b/inst/examples/complexity/categorical_4.project @@ -34,10 +34,9 @@ PrintCutoffMethod=no PrintCutoffValues=no PrintOperatorMethod=no PrintOperatorValues=no -PrintCombinations=yes -PrintFeatureSets=yes -PrintCutoffSets=yes -PrintCutOffsetsBestLength=no +PrintCombinations=no +PrintFeatureSets=no +PrintCutoffSets=no PrintPerformance=yes PrintSets=no SavePartitions=no @@ -45,6 +44,6 @@ SavePartitions=no Subsumption=no BranchBound=no Parallel=no -ParallelMethod=ONE +ParallelMethod=TWO BinaryReduction=no diff --git a/inst/examples/complexity/continuous_4.project b/inst/examples/complexity/continuous_4.project index d6b6618..b70e721 100644 --- a/inst/examples/complexity/continuous_4.project +++ b/inst/examples/complexity/continuous_4.project @@ -34,10 +34,9 @@ PrintCutoffMethod=no PrintCutoffValues=no PrintOperatorMethod=no PrintOperatorValues=no -PrintCombinations=yes -PrintFeatureSets=yes -PrintCutoffSets=yes -PrintCutOffsetsBestLength=no +PrintCombinations=no +PrintFeatureSets=no +PrintCutoffSets=no PrintPerformance=yes PrintSets=no SavePartitions=no @@ -45,6 +44,6 @@ SavePartitions=no Subsumption=no BranchBound=no Parallel=no -ParallelMethod=ONE +ParallelMethod=TWO BinaryReduction=no diff --git a/inst/examples/complexity/mix_4.project b/inst/examples/complexity/mix_4.project index 15cde8a..3ec382c 100644 --- a/inst/examples/complexity/mix_4.project +++ b/inst/examples/complexity/mix_4.project @@ -25,8 +25,9 @@ FeatureRule= Maximize=BALANCEDACCURACY Accuracy= Specificity= +BalancedAccuracy=0.6268 [Output] -OutputMethod=BEST +OutputMethod=EVERY PrintSettings=yes PrintPartitions=no PrintFeatureOperators=no @@ -34,17 +35,17 @@ PrintCutoffMethod=no PrintCutoffValues=no PrintOperatorMethod=no PrintOperatorValues=no -PrintCombinations=yes -PrintFeatureSets=yes +PrintCombinations=no +PrintFeatureSets=no PrintCutoffSets=no PrintCutOffsetsBestLength=no -PrintPerformance=yes +PrintPerformance=no PrintSets=no SavePartitions=no [Run] Subsumption=no BranchBound=no Parallel=no -ParallelMethod=ONE +ParallelMethod=TWO BinaryReduction=no diff --git a/inst/examples/plp/test_plp.project b/inst/examples/plp/test_plp.project index bb3a586..cb7a0a6 100644 --- a/inst/examples/plp/test_plp.project +++ b/inst/examples/plp/test_plp.project @@ -38,7 +38,6 @@ PrintOperatorValues=no PrintCombinations=no PrintFeatureSets=no PrintCutoffSets=no -PrintCutOffsetsBestLength=no PrintPerformance=yes PrintSets=no SavePartitions=no diff --git a/inst/examples/test.project b/inst/examples/test.project index 03f0fbb..94832ed 100755 --- a/inst/examples/test.project +++ b/inst/examples/test.project @@ -39,7 +39,6 @@ PrintOperatorValues=no PrintCombinations=no PrintFeatureSets=no PrintCutoffSets=no -PrintCutOffsetsBestLength=no PrintPerformance=yes PrintSets=no SavePartitions=no diff --git a/inst/examples/tests/iris.project b/inst/examples/tests/iris.project index 5dc4d38..fd2b6aa 100644 --- a/inst/examples/tests/iris.project +++ b/inst/examples/tests/iris.project @@ -38,7 +38,6 @@ PrintOperatorValues=no PrintCombinations=no PrintFeatureSets=no PrintCutoffSets=no -PrintCutOffsetsBestLength=no PrintPerformance=yes PrintSets=no SavePartitions=no diff --git a/inst/examples/train_data.project b/inst/examples/train_data.project index e1ce687..1d1f2b4 100644 --- a/inst/examples/train_data.project +++ b/inst/examples/train_data.project @@ -37,7 +37,6 @@ PrintOperatorValues=no PrintCombinations=no PrintFeatureSets=no PrintCutoffSets=no -PrintCutOffsetsBestLength=no PrintPerformance=yes PrintSets=no SavePartitions=no diff --git a/inst/settings/template.project b/inst/settings/template.project index f31805f..fa612fe 100755 --- a/inst/settings/template.project +++ b/inst/settings/template.project @@ -38,13 +38,12 @@ PrintOperatorValues=no PrintCombinations=no PrintFeatureSets=no PrintCutoffSets=no -PrintCutOffsetsBestLength=no -PrintPerformance=yes +PrintPerformance=no PrintSets=no SavePartitions=no [Run] Subsumption=no BranchBound=yes Parallel=yes -ParallelMethod=ONE +ParallelMethod=TWO BinaryReduction=no diff --git a/man/modelsCurveExplore.Rd b/man/modelsCurveExplore.Rd index 0f0b79c..767ec77 100644 --- a/man/modelsCurveExplore.Rd +++ b/man/modelsCurveExplore.Rd @@ -13,19 +13,23 @@ modelsCurveExplore( StartRulelength = 1, EndRulelength = 3, OperatorMethod = "EXHAUSTIVE", - CutoffMethod = "RVAC", + CutoffMethod = "ALL", ClassFeature = "'class'", PositiveClass = "'Iris-versicolor'", FeatureInclude = "", - Maximize = "ACCURACY", + Maximize = "BALANCEDACCURACY", Accuracy = 0, BalancedAccuracy = 0, Specificity = 0, + OutputMethod = "BEST", PrintSettings = TRUE, - PrintPerformance = TRUE, - Subsumption = TRUE, + PrintPerformance = FALSE, + Subsumption = FALSE, BranchBound = TRUE, - Parallel = FALSE + Sorted = "none", + Parallel = TRUE, + ParallelMethod = "TWO", + BinaryReduction = FALSE ) } \arguments{ diff --git a/man/settingsExplore.Rd b/man/settingsExplore.Rd index a423f8c..886a4c0 100644 --- a/man/settingsExplore.Rd +++ b/man/settingsExplore.Rd @@ -17,15 +17,18 @@ settingsExplore( ClassFeature, PositiveClass, FeatureInclude = "", - Maximize = "ACCURACY", + Maximize = "BALANCEDACCURACY", Accuracy = 0, BalancedAccuracy = 0, Specificity = 0, + OutputMethod = "BEST", PrintSettings = "yes", - PrintPerformance = "yes", - Subsumption = "yes", + PrintPerformance = "no", + Subsumption = "no", BranchBound = "yes", - Parallel = "no" + Parallel = "yes", + ParallelMethod = "TWO", + BinaryReduction = "no" ) } \arguments{ @@ -59,6 +62,8 @@ settingsExplore( \item{Specificity}{float 0-1, default = 0} +\item{OutputMethod}{string EVERY, BEST, INCREMENT} + \item{PrintSettings}{True or False} \item{PrintPerformance}{True or False} diff --git a/man/trainExplore.Rd b/man/trainExplore.Rd index 05e73e3..5ace43e 100644 --- a/man/trainExplore.Rd +++ b/man/trainExplore.Rd @@ -13,19 +13,23 @@ trainExplore( StartRulelength = 1, EndRulelength = 3, OperatorMethod = "EXHAUSTIVE", - CutoffMethod = "RVAC", + CutoffMethod = "ALL", ClassFeature = "'class'", PositiveClass = "'Iris-versicolor'", FeatureInclude = "", - Maximize = "ACCURACY", + Maximize = "BALANCEDACCURACY", Accuracy = 0, BalancedAccuracy = 0, Specificity = 0, + OutputMethod = "BEST", PrintSettings = TRUE, - PrintPerformance = TRUE, - Subsumption = TRUE, + PrintPerformance = FALSE, + Subsumption = FALSE, BranchBound = TRUE, - Parallel = FALSE + Sorted = "none", + Parallel = TRUE, + ParallelMethod = "TWO", + BinaryReduction = FALSE ) } \arguments{ @@ -51,7 +55,7 @@ trainExplore( \item{PositiveClass}{1 or string (?) (should be one of elements of column 'ClassFeature' in data train). Always provided by the user. The string should be enclused in single quotation marks, e.g. 'class'} -\item{FeatureInclude}{Empty or string (should be name of one of columns in data train)} +\item{FeatureInclude}{Empty or string (should be name of one or more columns in data train separated by ;)} \item{Maximize}{One of list with strings, list = "ACCURACY", "SENSITIVITY", "SPECIFICITY", ...} @@ -69,6 +73,8 @@ trainExplore( \item{BranchBound}{True or False} +\item{Sorted}{One of list with strings, e.g. "none", "jaccard", ... Sort features based on correlation with outcome variable, NOTE: only when train_data is entered} + \item{Parallel}{True or False} } \value{ diff --git a/tests/testthat/test-MainFunctions.R b/tests/testthat/test-MainFunctions.R index 8dc85a2..02dce8b 100644 --- a/tests/testthat/test-MainFunctions.R +++ b/tests/testthat/test-MainFunctions.R @@ -110,3 +110,60 @@ test_that("compute AUC", { expect_true(auroc < 100) expect_true(auroc > 0) }) + +test_that("mandatory features", { + ### Tests for EXPLORE using iris dataset + data_path <- system.file("examples", "tests", "iris.arff", package = "Explore") + settings_path <- system.file("examples", "tests", "iris.project", package = "Explore") + output_path <- paste0(tempdir(), "/", "Test1") + dir.create(output_path) + if (.Platform$OS.type == "windows") { + output_path <- gsub("\\\\", "/", output_path) + } + output_path <- paste0(output_path, "/") + data <- farff::readARFF(data_path) + model <- Explore::trainExplore(output_path = output_path, + file_name = "iris", + train_data = data, + ClassFeature = "'class'", + PositiveClass = '"Iris-versicolor"', + FeatureInclude = "'sepalwidth';'sepallength'") + expect_equal(class(model), "character") + # expect_true(is.na(model), info = "Test failed because model is NA") + expect_equal(model, "'sepallength'>4.9AND'sepalwidth'<=3.2AND'petalwidth'<=1.7") +}) + +test_that("balanced accuracy constraint ", { + data_path <- system.file("examples", "complexity", "mix_4.arff", package = "Explore") + output_path <- paste0(getwd(), "/", "Test1") + dir.create(output_path) + if (.Platform$OS.type == "windows") { + output_path <- gsub("\\\\", "/", output_path) + } + output_path <- paste0(output_path, "/") + + data <- farff::readARFF(data_path) + data <-as.data.frame(apply(data,2,as.numeric)) + + model_without <- Explore::trainExplore(output_path = output_path, + file_name = "mix_4", + train_data = data, + StartRulelength = 3, + ClassFeature = "'outcomeCount'", + PositiveClass = '"1"') + num_without <- Explore::candidatesExplore(paste0(output_path, "mix_4", ".result")) + + model_with <- Explore::trainExplore(output_path = output_path, + file_name = "mix_4", + train_data = data, + StartRulelength = 3, + ClassFeature = "'outcomeCount'", + PositiveClass = '"1"', + BalancedAccuracy = 0.6, + OutputMethod = "EVERY", + Parallel = FALSE) + num_with <- Explore::candidatesExplore(paste0(output_path, "mix_4", ".result")) + + expect_equal(num_without, 1940) + expect_equal(num_with, 36) +}) \ No newline at end of file