Skip to content

Commit

Permalink
Merge pull request #35 from mi-erasmusmc/develop
Browse files Browse the repository at this point in the history
Develop 0.4.0
  • Loading branch information
cebarboza authored Oct 18, 2024
2 parents a0a00d6 + 8854a7d commit 1f72250
Show file tree
Hide file tree
Showing 48 changed files with 11,662 additions and 836 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^renv$
^renv\.lock$
^.*\.Rproj$
^\.Rproj\.user$
^vignettes$
Expand Down
21 changes: 6 additions & 15 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,55 +1,46 @@
# History files
.Rhistory

# Session Data files
.RData
.RDataTmp

# User-specific files
.Ruserdata

# Output files from R CMD check
/*.Rcheck/

# RStudio files
.Rproj.user/

# Produced vignettes
vignettes/*.html
vignettes/*.pdf

# R Environment Variables
.Renviron

# Documents and results
docs/
output/

# Mac files
.DS_STORE
.idea/

# Compiled Object files
*.o
*.obj

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Compiled Static libraries
*.a
*.lib

# Other C++ files
**/__history/

# EXPLORE result files
*.result
inst/doc
inst/cmake-build-debug
inst/cmake-build-debug-coverage
docs

# PaRe Report
Report.html
Report_files/
renv.lock
renv
.Rprofile
@OutputFile
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: Explore
Type: Package
Title: Exhaustive Procedure for LOgic-Rule Extraction (EXPLORE)
Version: 0.3.0
Version: 0.4.0
Date: 2020-12-14
Authors@R: c(
person("Aniek", "Markus", , "[email protected]", role = c("aut"), comment = c(ORCID = "0000-0001-5779-4794")),
Expand All @@ -18,10 +18,12 @@ Imports:
RcppParallel,
stringr,
caret,
pracma
pracma,
glue,
magrittr
Encoding: UTF-8
LinkingTo: Rcpp, BH (>= 1.51.0), RcppParallel
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Suggests:
testthat (>= 3.0.0),
knitr,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# Generated by roxygen2: do not edit by hand

export(candidateNumberExplore)
export(modelsCurveExplore)
export(predictExplore)
export(resultsExplore)
export(rocCurveExplore)
export(trainExplore)
import(Rcpp)
import(checkmate)
importFrom(RcppParallel,RcppParallelLibs)
importFrom(caret,confusionMatrix)
importFrom(farff,writeARFF)
importFrom(magrittr,"%>%")
importFrom(pracma,trapz)
importFrom(stringr,str_extract)
importFrom(stringr,str_replace_all)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# Explore 0.4.0

* Improved testing for different datasets.

* trainExplore has a parameter to select the result type.

* Added testExplore for test configuration.

* Added resultExplore to access results.

# Explore 0.3.0

* Package tested on Windows, MacOs and Linux. R-CMD-Check without errors.
Expand Down
43 changes: 41 additions & 2 deletions R/HelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,13 @@ saveData <- function(output_path, train_data, file_name) {

# Fix col type for binary data
binary_cols <- sapply(1:ncol(train_data), function(c) all(train_data[[c]] %in% 0:1))
train_data[binary_cols] <- lapply(colnames(train_data[binary_cols]), function(c) factor(train_data[[c]], labels=c(0,1)))

# Convert TRUE/FALSE to 1/0
train_data <- convert_logical(train_data)

# Order data (first binary then continuous features)
train_data <- cbind(train_data[binary_cols],train_data[!binary_cols]) # Order needed for correct functioning of main algorithm in C++

# Save data as arff file
if (file.exists(paste0(output_path, file_name, ".arff"))) {file.remove(paste0(output_path, file_name, ".arff"))}
farff::writeARFF(train_data, paste0(output_path, file_name, ".arff"))
Expand All @@ -97,9 +102,43 @@ saveData <- function(output_path, train_data, file_name) {
# TODO: Support other file formats?
}

convert_logical <- function(train_data) {

binary_cols <- sapply(train_data, function(col) all(col %in% c(0, 1, TRUE, FALSE)))

# Convert TRUE/FALSE to 1/0 and create factors
train_data[binary_cols] <- lapply(train_data[binary_cols], function(col) {
col <- as.numeric(as.logical(col)) # Convert TRUE/FALSE to 1/0
factor(col, levels = c(0, 1), labels = c(0, 1)) # Convert to factors
})

return(train_data)

}

# Correlation metric for binary data.
jaccard <- function(a, b) {
intersection = length(intersect(a, b))
union = length(a) + length(b) - intersection
return (intersection/union)
}
}

phi <- function(a, b) {
contingency_tb <- table(a, b)

r.sum <- rowSums(contingency_tb)
c.sum <- colSums(contingency_tb)

total <- sum(r.sum)
r.sum <- r.sum/total
c.sum <- c.sum/total

v <- prod(r.sum, c.sum)
phi <- (contingency_tb[1,1] / total - c.sum[1] * r.sum[1] / sqrt(v))
names(phi) <- NULL

return(phi)
}



Loading

0 comments on commit 1f72250

Please sign in to comment.