From 2a6016cc0d0aa04fc28149db962b5508c236931e Mon Sep 17 00:00:00 2001
From: Chris Knoll
Date: Thu, 8 Aug 2024 09:31:41 -0400
Subject: [PATCH] Prepare for version 2.0.0
Updated to version 2.0.0.
Ran package maintenance.
Rebuilt site.
Updated NEWS.md
---
DESCRIPTION | 2 +-
NEWS.md | 5 +-
R/AggregateCovariates.R | 569 ++++++++-------
R/CustomCovariates.R | 127 ++--
R/Database.R | 29 +-
R/DechallengeRechallenge.R | 126 ++--
R/HelperFunctions.R | 17 +-
R/Incremental.R | 109 ++-
R/RunCharacterization.R | 195 +++--
R/SaveLoad.R | 8 +-
R/TimeToEvent.R | 89 +--
R/ViewShiny.R | 37 +-
docs/404.html | 6 +-
docs/articles/InstallationGuide.html | 6 +-
docs/articles/UsingPackage.html | 110 ++-
docs/articles/index.html | 6 +-
docs/authors.html | 8 +-
docs/index.html | 6 +-
docs/news/index.html | 13 +-
docs/pkgdown.yml | 6 +-
docs/reference/Characterization-package.html | 6 +-
docs/reference/cleanIncremental.html | 11 +-
docs/reference/cleanNonIncremental.html | 11 +-
...computeDechallengeRechallengeAnalyses.html | 6 +-
...puteRechallengeFailCaseSeriesAnalyses.html | 6 +-
.../reference/computeTimeToEventAnalyses.html | 6 +-
.../createAggregateCovariateSettings.html | 6 +-
.../createCharacterizationSettings.html | 6 +-
.../createCharacterizationTables.html | 6 +-
.../createDechallengeRechallengeSettings.html | 6 +-
.../createDuringCovariateSettings.html | 6 +-
docs/reference/createSqliteDatabase.html | 6 +-
docs/reference/createTimeToEventSettings.html | 6 +-
.../exportDechallengeRechallengeToCsv.html | 6 +-
.../exportRechallengeFailCaseSeriesToCsv.html | 6 +-
docs/reference/exportTimeToEventToCsv.html | 6 +-
docs/reference/getDbDuringCovariateData.html | 6 +-
docs/reference/index.html | 12 +-
docs/reference/insertResultsToDatabase.html | 6 +-
.../loadCharacterizationSettings.html | 6 +-
.../runCharacterizationAnalyses.html | 6 +-
.../saveCharacterizationSettings.html | 6 +-
docs/reference/viewCharacterization.html | 6 +-
docs/sitemap.xml | 6 +-
tests/testthat/test-Incremental.R | 151 ++--
tests/testthat/test-aggregateCovariate.R | 166 +++--
tests/testthat/test-dbs.R | 51 +-
tests/testthat/test-dechallengeRechallenge.R | 175 +++--
tests/testthat/test-manualData.R | 685 +++++++++---------
tests/testthat/test-runCharacterization.R | 124 ++--
tests/testthat/test-timeToEvent.R | 15 +-
tests/testthat/test-viewShiny.R | 10 +-
vignettes/UsingPackage.Rmd | 40 +-
53 files changed, 1500 insertions(+), 1551 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 2c507b1..7790036 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: Characterization
Type: Package
Title: Characterizations of Cohorts
-Version: 1.1.1
+Version: 2.0.0
Date: 2024-08-07
Authors@R: c(
person("Jenna", "Reps", , "reps@ohdsi.org", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index b3953d1..076b428 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,10 +1,7 @@
-Characterization 1.1.1
+Characterization 2.0.0
======================
- added tests for all HADES supported dbms
- updated minCellCount censoring
-
-Characterization 1.1.0
-======================
- fixed issues with incremental
- made the code more modular to enable new characterizations to be added
- added job optimization code to optimize the distributuion of jobs
diff --git a/R/AggregateCovariates.R b/R/AggregateCovariates.R
index bca3e9d..8a3c220 100644
--- a/R/AggregateCovariates.R
+++ b/R/AggregateCovariates.R
@@ -67,7 +67,7 @@ createAggregateCovariateSettings <- function(
useDeviceExposureShortTerm = T,
useVisitConceptCountShortTerm = T,
endDays = 0,
- longTermStartDays = -365,
+ longTermStartDays = -365,
shortTermStartDays = -30
),
caseCovariateSettings = createDuringCovariateSettings(
@@ -78,11 +78,10 @@ createAggregateCovariateSettings <- function(
useMeasurementDuring = T,
useObservationDuring = T,
useVisitConceptCountDuring = T
- ),
+ ),
casePreTargetDuration = 365,
casePostOutcomeDuration = 365,
- extractNonCaseCovariates = T
- ) {
+ extractNonCaseCovariates = T) {
errorMessages <- checkmate::makeAssertCollection()
# check targetIds is a vector of int/double
.checkCohortIds(
@@ -98,16 +97,16 @@ createAggregateCovariateSettings <- function(
)
# check TAR - EFF edit
- if(length(riskWindowStart)>1){
- stop('Please add one time-at-risk per setting')
+ if (length(riskWindowStart) > 1) {
+ stop("Please add one time-at-risk per setting")
}
.checkTimeAtRisk(
- riskWindowStart = riskWindowStart,
- startAnchor = startAnchor,
- riskWindowEnd = riskWindowEnd,
- endAnchor = endAnchor,
- errorMessages = errorMessages
- )
+ riskWindowStart = riskWindowStart,
+ startAnchor = startAnchor,
+ riskWindowEnd = riskWindowEnd,
+ endAnchor = endAnchor,
+ errorMessages = errorMessages
+ )
# check covariateSettings
.checkCovariateSettings(
@@ -116,11 +115,13 @@ createAggregateCovariateSettings <- function(
)
# check temporal is false
- if(inherits(covariateSettings, 'covariateSettings')){
+ if (inherits(covariateSettings, "covariateSettings")) {
covariateSettings <- list(covariateSettings)
}
- if(sum(unlist(lapply(covariateSettings, function(x){x$temporal})))>0){
- stop('Temporal covariateSettings not supported by createAggregateCovariateSettings()')
+ if (sum(unlist(lapply(covariateSettings, function(x) {
+ x$temporal
+ }))) > 0) {
+ stop("Temporal covariateSettings not supported by createAggregateCovariateSettings()")
}
# check minPriorObservation
@@ -134,12 +135,12 @@ createAggregateCovariateSettings <- function(
checkmate::reportAssertions(errorMessages)
# check unique Ts and Os
- if(length(targetIds) != length(unique(targetIds))){
- message('targetIds have duplicates - making unique')
+ if (length(targetIds) != length(unique(targetIds))) {
+ message("targetIds have duplicates - making unique")
targetIds <- unique(targetIds)
}
- if(length(outcomeIds) != length(unique(outcomeIds))){
- message('outcomeIds have duplicates - making unique')
+ if (length(outcomeIds) != length(unique(outcomeIds))) {
+ message("outcomeIds have duplicates - making unique")
outcomeIds <- unique(outcomeIds)
}
@@ -148,15 +149,12 @@ createAggregateCovariateSettings <- function(
result <- list(
targetIds = targetIds,
minPriorObservation = minPriorObservation,
-
outcomeIds = outcomeIds,
outcomeWashoutDays = outcomeWashoutDays,
-
riskWindowStart = riskWindowStart,
startAnchor = startAnchor,
riskWindowEnd = riskWindowEnd,
endAnchor = endAnchor,
-
covariateSettings = covariateSettings, # risk factors
caseCovariateSettings = caseCovariateSettings, # case series
casePreTargetDuration = casePreTargetDuration, # case series
@@ -169,8 +167,8 @@ createAggregateCovariateSettings <- function(
return(result)
}
-createExecutionIds <- function(size){
- executionIds <- gsub(" ", "", gsub("[[:punct:]]", "",paste(Sys.time(), sample(1000000,size), sep = '')))
+createExecutionIds <- function(size) {
+ executionIds <- gsub(" ", "", gsub("[[:punct:]]", "", paste(Sys.time(), sample(1000000, size), sep = "")))
return(executionIds)
}
@@ -186,12 +184,10 @@ computeTargetAggregateCovariateAnalyses <- function(
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
settings,
databaseId = "database 1",
- outputFolder = file.path(getwd(),'characterization_results'),
+ outputFolder = file.path(getwd(), "characterization_results"),
minCharacterizationMean = 0,
minCellCount = 0,
- ...
-) {
-
+ ...) {
# get settings
settingId <- unique(settings$settingId)
targetIds <- unique(settings$targetId)
@@ -201,10 +197,10 @@ computeTargetAggregateCovariateAnalyses <- function(
# create cohortDetails - all Ts, minPriorObservation, twice (type = Tall, Target)
cohortDetails <- data.frame(
settingId = settingId,
- targetCohortId = rep(targetIds,2),
+ targetCohortId = rep(targetIds, 2),
outcomeCohortId = 0, # cannot be NA due to pk/index
- cohortType = c(rep('Target',length(targetIds)),rep('Tall',length(targetIds))),
- cohortDefinitionId = 1:(length(targetIds)*2),
+ cohortType = c(rep("Target", length(targetIds)), rep("Tall", length(targetIds))),
+ cohortDefinitionId = 1:(length(targetIds) * 2),
minPriorObservation = minPriorObservation,
outcomeWashoutDays = NA,
casePreTargetDuration = NA,
@@ -226,10 +222,10 @@ computeTargetAggregateCovariateAnalyses <- function(
# create the temp table with cohort_details
DatabaseConnector::insertTable(
- data = cohortDetails[,c('settingId','targetCohortId','outcomeCohortId','cohortType','cohortDefinitionId')],
+ data = cohortDetails[, c("settingId", "targetCohortId", "outcomeCohortId", "cohortType", "cohortDefinitionId")],
camelCaseToSnakeCase = T,
connection = connection,
- tableName = '#cohort_details',
+ tableName = "#cohort_details",
tempTable = T,
dropTableIfExists = T,
createTable = T,
@@ -259,7 +255,7 @@ computeTargetAggregateCovariateAnalyses <- function(
)
completionTime <- Sys.time() - start
- message(paste0('Computing target cohorts took ',round(completionTime,digits = 1), ' ', units(completionTime)))
+ message(paste0("Computing target cohorts took ", round(completionTime, digits = 1), " ", units(completionTime)))
## get counts
message("Extracting target cohort counts")
sql <- "select
@@ -337,11 +333,10 @@ computeCaseAggregateCovariateAnalyses <- function(
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
settings,
databaseId = "database 1",
- outputFolder = file.path(getwd(),'characterization_results'),
+ outputFolder = file.path(getwd(), "characterization_results"),
minCharacterizationMean = 0,
minCellCount = 0,
- ...
- ) {
+ ...) {
# check inputs
# create cohortDetails - all Ts, minPriorObservation, twice (type = Tall, Target)
@@ -361,7 +356,7 @@ computeCaseAggregateCovariateAnalyses <- function(
cohortDetails <- expand.grid(
targetCohortId = unique(settings$targetId),
outcomeCohortId = unique(settings$outcomeId),
- cohortType = c('Cases', 'CasesBefore', 'CasesAfter', 'CasesBetween')
+ cohortType = c("Cases", "CasesBefore", "CasesAfter", "CasesBetween")
)
cohortDetails$minPriorObservation <- settings$minPriorObservation
@@ -383,7 +378,7 @@ computeCaseAggregateCovariateAnalyses <- function(
cohortDetailsExtra <- expand.grid(
targetCohortId = unique(settings$targetId),
outcomeCohortId = unique(settings$outcomeId),
- cohortType = 'Exclude',
+ cohortType = "Exclude",
minPriorObservation = settings$minPriorObservation,
outcomeWashoutDays = settings$outcomeWashoutDays,
casePreTargetDuration = settings$casePreTargetDuration,
@@ -408,10 +403,10 @@ computeCaseAggregateCovariateAnalyses <- function(
# create the temp table with cohort_details
DatabaseConnector::insertTable(
- data = cohortDetails[,c('targetCohortId','outcomeCohortId','cohortType','cohortDefinitionId', 'settingId')],
+ data = cohortDetails[, c("targetCohortId", "outcomeCohortId", "cohortType", "cohortDefinitionId", "settingId")],
camelCaseToSnakeCase = T,
connection = connection,
- tableName = '#cohort_details',
+ tableName = "#cohort_details",
tempTable = T,
dropTableIfExists = T,
createTable = T,
@@ -448,20 +443,20 @@ computeCaseAggregateCovariateAnalyses <- function(
# excluded_analysis_ref, excluded_covariate_ref
# loop over settingId which contains tars:
- for(i in 1:nrow(tars)){
+ for (i in 1:nrow(tars)) {
sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = "CaseCohortsPart2.sql",
packageName = "Characterization",
dbms = connectionDetails$dbms,
tempEmulationSchema = tempEmulationSchema,
- first = i==1,
+ first = i == 1,
case_pre_target_duration = casePreTargetDuration,
case_post_outcome_duration = casePostOutcomeDuration,
setting_id = tars$settingId[i],
tar_start = tars$riskWindowStart[i],
- tar_start_anchor = ifelse(tars$startAnchor[i] == 'cohort start','cohort_start_date','cohort_end_date'), ##TODO change?
+ tar_start_anchor = ifelse(tars$startAnchor[i] == "cohort start", "cohort_start_date", "cohort_end_date"), ## TODO change?
tar_end = tars$riskWindowEnd[i],
- tar_end_anchor = ifelse(tars$endAnchor[i] == 'cohort start','cohort_start_date','cohort_end_date') ##TODO change?
+ tar_end_anchor = ifelse(tars$endAnchor[i] == "cohort start", "cohort_start_date", "cohort_end_date") ## TODO change?
)
DatabaseConnector::executeSql(
connection = connection,
@@ -472,7 +467,7 @@ computeCaseAggregateCovariateAnalyses <- function(
}
completionTime <- Sys.time() - start
- message(paste0('Computing case cohorts took ',round(completionTime,digits = 1), ' ', units(completionTime)))
+ message(paste0("Computing case cohorts took ", round(completionTime, digits = 1), " ", units(completionTime)))
## get counts
message("Extracting case cohort counts")
@@ -512,23 +507,28 @@ computeCaseAggregateCovariateAnalyses <- function(
message("Computing aggregate during case covariate results")
- result2 <- tryCatch({FeatureExtraction::getDbCovariateData(
- connection = connection,
- oracleTempSchema = tempEmulationSchema,
- cdmDatabaseSchema = cdmDatabaseSchema,
- cohortTable = "#case_series",
- cohortTableIsTemp = T,
- cohortIds = -1,
- covariateSettings = ParallelLogger::convertJsonToSettings(caseCovariateSettings),
- cdmVersion = cdmVersion,
- aggregated = T,
- minCharacterizationMean = minCharacterizationMean
- )}, error = function(e){
- message(e);
- return(NULL)
- })
- if(is.null(result2)){
- stop('Issue with case series data extraction')
+ result2 <- tryCatch(
+ {
+ FeatureExtraction::getDbCovariateData(
+ connection = connection,
+ oracleTempSchema = tempEmulationSchema,
+ cdmDatabaseSchema = cdmDatabaseSchema,
+ cohortTable = "#case_series",
+ cohortTableIsTemp = T,
+ cohortIds = -1,
+ covariateSettings = ParallelLogger::convertJsonToSettings(caseCovariateSettings),
+ cdmVersion = cdmVersion,
+ aggregated = T,
+ minCharacterizationMean = minCharacterizationMean
+ )
+ },
+ error = function(e) {
+ message(e)
+ return(NULL)
+ }
+ )
+ if (is.null(result2)) {
+ stop("Issue with case series data extraction")
}
# drop temp tables
@@ -580,11 +580,9 @@ exportAndromedaToCsv <- function(
minCharacterizationMean,
batchSize = 100000,
minCellCount = 0,
- includeSettings = T
-){
-
+ includeSettings = T) {
saveLocation <- outputFolder
- if(!dir.exists(saveLocation)){
+ if (!dir.exists(saveLocation)) {
dir.create(saveLocation, recursive = T)
}
@@ -595,21 +593,21 @@ exportAndromedaToCsv <- function(
# analysis_ref and covariate_ref
# add database_id and setting_id
- if(!is.null(andromeda$analysisRef)){
+ if (!is.null(andromeda$analysisRef)) {
Andromeda::batchApply(
- tbl = andromeda$analysisRef,
- fun = function(x){
+ tbl = andromeda$analysisRef,
+ fun = function(x) {
data <- merge(x, ids)
colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data))
- if(file.exists(file.path(saveLocation, 'analysis_ref.csv'))){
+ if (file.exists(file.path(saveLocation, "analysis_ref.csv"))) {
append <- T
- } else{
- append = F
+ } else {
+ append <- F
}
readr::write_csv(
x = formatDouble(data),
- file = file.path(saveLocation, 'analysis_ref.csv'),
+ file = file.path(saveLocation, "analysis_ref.csv"),
append = append
)
},
@@ -617,21 +615,21 @@ exportAndromedaToCsv <- function(
)
}
- if(!is.null(andromeda$covariateRef)){
+ if (!is.null(andromeda$covariateRef)) {
Andromeda::batchApply(
- tbl = andromeda$covariateRef,
- fun = function(x){
+ tbl = andromeda$covariateRef,
+ fun = function(x) {
data <- merge(x, ids)
colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data))
- if(file.exists(file.path(saveLocation, 'covariate_ref.csv'))){
+ if (file.exists(file.path(saveLocation, "covariate_ref.csv"))) {
append <- T
- } else{
- append = F
+ } else {
+ append <- F
}
readr::write_csv(
x = formatDouble(data),
- file = file.path(saveLocation, 'covariate_ref.csv'),
+ file = file.path(saveLocation, "covariate_ref.csv"),
append = append
)
},
@@ -640,16 +638,16 @@ exportAndromedaToCsv <- function(
}
# covariates and covariate_continuous
- extras <- cohortDetails[, c('cohortDefinitionId','settingId', 'targetCohortId', 'outcomeCohortId', 'cohortType')]
- extras$databaseId <- databaseId
+ extras <- cohortDetails[, c("cohortDefinitionId", "settingId", "targetCohortId", "outcomeCohortId", "cohortType")]
+ extras$databaseId <- databaseId
extras$minCharacterizationMean <- minCharacterizationMean
# add database_id, setting_id, target_cohort_id, outcome_cohort_id and cohort_type
- if(!is.null(andromeda$covariates)){
+ if (!is.null(andromeda$covariates)) {
Andromeda::batchApply(
- tbl = andromeda$covariates,
- fun = function(x){
- data <- merge(x, extras, by = 'cohortDefinitionId')
+ tbl = andromeda$covariates,
+ fun = function(x) {
+ data <- merge(x, extras, by = "cohortDefinitionId")
data <- data %>% dplyr::select(-"cohortDefinitionId")
colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data))
@@ -658,20 +656,20 @@ exportAndromedaToCsv <- function(
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing sum_value counts less than ", minCellCount))
if (sum(removeInd) > 0) {
- data$sum_value[removeInd] <- -1*minCellCount
+ data$sum_value[removeInd] <- -1 * minCellCount
# adding other calculated columns
data$average_value[removeInd] <- NA
}
}
- if(file.exists(file.path(saveLocation, 'covariates.csv'))){
+ if (file.exists(file.path(saveLocation, "covariates.csv"))) {
append <- T
- } else{
- append = F
+ } else {
+ append <- F
}
readr::write_csv(
x = formatDouble(data),
- file = file.path(saveLocation, 'covariates.csv'),
+ file = file.path(saveLocation, "covariates.csv"),
append = append
)
},
@@ -679,11 +677,11 @@ exportAndromedaToCsv <- function(
)
}
- if(!is.null(andromeda$covariatesContinuous)){
+ if (!is.null(andromeda$covariatesContinuous)) {
Andromeda::batchApply(
- tbl = andromeda$covariatesContinuous,
- fun = function(x){
- data <- merge(x, extras %>% dplyr::select(-"minCharacterizationMean"), by = 'cohortDefinitionId')
+ tbl = andromeda$covariatesContinuous,
+ fun = function(x) {
+ data <- merge(x, extras %>% dplyr::select(-"minCharacterizationMean"), by = "cohortDefinitionId")
data <- data %>% dplyr::select(-"cohortDefinitionId")
colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data))
@@ -692,7 +690,7 @@ exportAndromedaToCsv <- function(
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing count_value counts less than ", minCellCount))
if (sum(removeInd) > 0) {
- data$count_value[removeInd] <- -1*minCellCount
+ data$count_value[removeInd] <- -1 * minCellCount
# adding columns calculated from count
data$min_value[removeInd] <- NA
data$max_value[removeInd] <- NA
@@ -706,14 +704,14 @@ exportAndromedaToCsv <- function(
}
}
- if(file.exists(file.path(saveLocation, 'covariates_continuous.csv'))){
+ if (file.exists(file.path(saveLocation, "covariates_continuous.csv"))) {
append <- T
- } else{
- append = F
+ } else {
+ append <- F
}
readr::write_csv(
x = formatDouble(data),
- file = file.path(saveLocation, 'covariates_continuous.csv'),
+ file = file.path(saveLocation, "covariates_continuous.csv"),
append = append
)
},
@@ -722,23 +720,24 @@ exportAndromedaToCsv <- function(
}
# cohort_counts:
- if(!is.null(counts)){
- cohortCounts <- cohortDetails %>% dplyr::select(
- 'targetCohortId',
- 'outcomeCohortId',
- 'cohortType',
- 'cohortDefinitionId',
- 'riskWindowStart',
- 'riskWindowEnd',
- 'startAnchor',
- 'endAnchor',
- 'minPriorObservation',
- 'outcomeWashoutDays'
- ) %>%
+ if (!is.null(counts)) {
+ cohortCounts <- cohortDetails %>%
+ dplyr::select(
+ "targetCohortId",
+ "outcomeCohortId",
+ "cohortType",
+ "cohortDefinitionId",
+ "riskWindowStart",
+ "riskWindowEnd",
+ "startAnchor",
+ "endAnchor",
+ "minPriorObservation",
+ "outcomeWashoutDays"
+ ) %>%
dplyr::mutate(
databaseId = !!databaseId
) %>%
- dplyr::inner_join(counts, by = 'cohortDefinitionId') %>%
+ dplyr::inner_join(counts, by = "cohortDefinitionId") %>%
dplyr::select(-"cohortDefinitionId")
cohortCounts <- unique(cohortCounts)
colnames(cohortCounts) <- SqlRender::camelCaseToSnakeCase(colnames(cohortCounts))
@@ -748,35 +747,37 @@ exportAndromedaToCsv <- function(
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing row_count counts less than ", minCellCount))
if (sum(removeInd) > 0) {
- cohortCounts$row_count[removeInd] <- -1*minCellCount
+ cohortCounts$row_count[removeInd] <- -1 * minCellCount
}
}
removeInd <- cohortCounts$person_count < minCellCount
if (sum(removeInd) > 0) {
ParallelLogger::logInfo(paste0("Removing person_count counts less than ", minCellCount))
if (sum(removeInd) > 0) {
- cohortCounts$person_count[removeInd] <- -1*minCellCount
+ cohortCounts$person_count[removeInd] <- -1 * minCellCount
}
}
- if(file.exists(file.path(saveLocation, 'cohort_counts.csv'))){
+ if (file.exists(file.path(saveLocation, "cohort_counts.csv"))) {
append <- T
- } else{
- append = F
+ } else {
+ append <- F
}
readr::write_csv(
x = formatDouble(cohortCounts),
- file = file.path(saveLocation, 'cohort_counts.csv'),
+ file = file.path(saveLocation, "cohort_counts.csv"),
append = append
)
}
- if(includeSettings){
- settings <- cohortDetails %>%
- dplyr::select('settingId', 'minPriorObservation', 'outcomeWashoutDays',
- 'riskWindowStart', 'riskWindowEnd', 'startAnchor', 'endAnchor',
- 'casePreTargetDuration', 'casePostOutcomeDuration',
- 'covariateSettingJson', 'caseCovariateSettingJson') %>%
+ if (includeSettings) {
+ settings <- cohortDetails %>%
+ dplyr::select(
+ "settingId", "minPriorObservation", "outcomeWashoutDays",
+ "riskWindowStart", "riskWindowEnd", "startAnchor", "endAnchor",
+ "casePreTargetDuration", "casePostOutcomeDuration",
+ "covariateSettingJson", "caseCovariateSettingJson"
+ ) %>%
dplyr::mutate(databaseId = !!databaseId) %>%
dplyr::distinct()
colnames(settings) <- SqlRender::camelCaseToSnakeCase(colnames(settings))
@@ -784,13 +785,15 @@ exportAndromedaToCsv <- function(
# add setting.csv with cohortDetails plus database
readr::write_csv(
x = settings,
- file = file.path(saveLocation, 'settings.csv'),
+ file = file.path(saveLocation, "settings.csv"),
append = F
)
cohortDetails <- cohortDetails %>%
- dplyr::select('settingId', 'targetCohortId',
- 'outcomeCohortId','cohortType') %>%
+ dplyr::select(
+ "settingId", "targetCohortId",
+ "outcomeCohortId", "cohortType"
+ ) %>%
dplyr::mutate(databaseId = !!databaseId) %>%
dplyr::distinct()
colnames(cohortDetails) <- SqlRender::camelCaseToSnakeCase(colnames(cohortDetails))
@@ -798,7 +801,7 @@ exportAndromedaToCsv <- function(
# add cohort_details.csv with cohortDetails plus database
readr::write_csv(
x = cohortDetails,
- file = file.path(saveLocation, 'cohort_details.csv'),
+ file = file.path(saveLocation, "cohort_details.csv"),
append = F
)
}
@@ -809,56 +812,65 @@ exportAndromedaToCsv <- function(
-combineCovariateSettingsJsons <- function(covariateSettingsJsonList){
-
+combineCovariateSettingsJsons <- function(covariateSettingsJsonList) {
# get unique
covariateSettingsJsonList <- unique(covariateSettingsJsonList)
# first convert from json
covariateSettings <- lapply(
X = covariateSettingsJsonList,
- FUN = function(x){ParallelLogger::convertJsonToSettings(x)}
+ FUN = function(x) {
+ ParallelLogger::convertJsonToSettings(x)
+ }
)
# then combine the covariates
- singleSettings <- which(unlist(lapply(covariateSettings, function(x) inherits(x, 'covariateSettings'))))
- multipleSettings <- which(unlist(lapply(covariateSettings, function(x) inherits(x, 'list'))))
+ singleSettings <- which(unlist(lapply(covariateSettings, function(x) inherits(x, "covariateSettings"))))
+ multipleSettings <- which(unlist(lapply(covariateSettings, function(x) inherits(x, "list"))))
covariateSettingList <- list()
- if(length(singleSettings)>0){
- for(i in singleSettings){
+ if (length(singleSettings) > 0) {
+ for (i in singleSettings) {
covariateSettingList[[length(covariateSettingList) + 1]] <- covariateSettings[[i]]
}
}
- if(length(multipleSettings) > 0){
- for(i in multipleSettings){
+ if (length(multipleSettings) > 0) {
+ for (i in multipleSettings) {
settingList <- covariateSettings[[i]]
- for(j in 1:length(settingList)){
- if(inherits(settingList[[j]], 'covariateSettings')){
+ for (j in 1:length(settingList)) {
+ if (inherits(settingList[[j]], "covariateSettings")) {
covariateSettingList[[length(covariateSettingList) + 1]] <- settingList[[j]]
- } else{
- message('Incorrect covariate settings found') # stop?
+ } else {
+ message("Incorrect covariate settings found") # stop?
}
}
}
}
# check for covariates with same id but different
- endDays <- unique(unlist(lapply(covariateSettingList, function(x){x$endDays})))
- if(length(endDays) > 1){
- stop('Covariate settings for aggregate covariates using different end days')
+ endDays <- unique(unlist(lapply(covariateSettingList, function(x) {
+ x$endDays
+ })))
+ if (length(endDays) > 1) {
+ stop("Covariate settings for aggregate covariates using different end days")
}
- longTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x){x$longTermStartDays})))
- if(length(longTermStartDays) > 1){
- stop('Covariate settings for aggregate covariates using different longTermStartDays')
+ longTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x) {
+ x$longTermStartDays
+ })))
+ if (length(longTermStartDays) > 1) {
+ stop("Covariate settings for aggregate covariates using different longTermStartDays")
}
- mediumTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x){x$mediumTermStartDays})))
- if(length(mediumTermStartDays) > 1){
- stop('Covariate settings for aggregate covariates using different mediumTermStartDays')
+ mediumTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x) {
+ x$mediumTermStartDays
+ })))
+ if (length(mediumTermStartDays) > 1) {
+ stop("Covariate settings for aggregate covariates using different mediumTermStartDays")
}
- shortTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x){x$shortTermStartDays})))
- if(length(shortTermStartDays) > 1){
- stop('Covariate settings for aggregate covariates using different shortTermStartDays')
+ shortTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x) {
+ x$shortTermStartDays
+ })))
+ if (length(shortTermStartDays) > 1) {
+ stop("Covariate settings for aggregate covariates using different shortTermStartDays")
}
# convert to json
@@ -868,41 +880,41 @@ combineCovariateSettingsJsons <- function(covariateSettingsJsonList){
getAggregateCovariatesJobs <- function(
characterizationSettings,
- threads
-){
-
+ threads) {
characterizationSettings <- characterizationSettings$aggregateCovariateSettings
- if(length(characterizationSettings) == 0){
+ if (length(characterizationSettings) == 0) {
return(NULL)
}
ind <- 1:length(characterizationSettings)
# target combinations
- targetCombinations <- do.call(what = 'rbind',
- args =
- lapply(
- 1:length(characterizationSettings),
- function(i){
-
- if(characterizationSettings[[i]]$extractNonCaseCovariates){
- result <- data.frame(
- targetIds = c(characterizationSettings[[i]]$targetIds,
- characterizationSettings[[i]]$outcomeIds),
- minPriorObservation = characterizationSettings[[i]]$minPriorObservation,
- covariateSettingsJson = as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$covariateSettings))
- )
- return(result)
- } else{
- return(
- data.frame(targetIds = 1, minPriorObservation = 1, covariateSettingsJson = 1)[-1,]
- )
- }
- }
- )
+ targetCombinations <- do.call(
+ what = "rbind",
+ args =
+ lapply(
+ 1:length(characterizationSettings),
+ function(i) {
+ if (characterizationSettings[[i]]$extractNonCaseCovariates) {
+ result <- data.frame(
+ targetIds = c(
+ characterizationSettings[[i]]$targetIds,
+ characterizationSettings[[i]]$outcomeIds
+ ),
+ minPriorObservation = characterizationSettings[[i]]$minPriorObservation,
+ covariateSettingsJson = as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$covariateSettings))
+ )
+ return(result)
+ } else {
+ return(
+ data.frame(targetIds = 1, minPriorObservation = 1, covariateSettingsJson = 1)[-1, ]
+ )
+ }
+ }
+ )
)
- if(nrow(targetCombinations) > 0){
+ if (nrow(targetCombinations) > 0) {
threadCols <- c("targetIds")
settingCols <- c("minPriorObservation")
@@ -910,18 +922,18 @@ getAggregateCovariatesJobs <- function(
threadSettings <- targetCombinations %>%
dplyr::select(dplyr::all_of(threadCols)) %>%
dplyr::distinct()
- threadSettings$thread <- rep(1:threads, ceiling(nrow(threadSettings)/threads))[1:nrow(threadSettings)]
- targetCombinations <- merge(targetCombinations, threadSettings, by = threadCols )
+ threadSettings$thread <- rep(1:threads, ceiling(nrow(threadSettings) / threads))[1:nrow(threadSettings)]
+ targetCombinations <- merge(targetCombinations, threadSettings, by = threadCols)
executionSettings <- data.frame(
minPriorObservation = unique(targetCombinations$minPriorObservation)
)
executionSettings$settingId <- createExecutionIds(nrow(executionSettings))
- targetCombinations <- merge(targetCombinations, executionSettings, by = settingCols )
+ targetCombinations <- merge(targetCombinations, executionSettings, by = settingCols)
# recreate settings
settings <- c()
- for(settingId in unique(executionSettings$settingId)){
+ for (settingId in unique(executionSettings$settingId)) {
settingVal <- executionSettings %>%
dplyr::filter(.data$settingId == !!settingId) %>%
dplyr::select(dplyr::all_of(settingCols))
@@ -929,26 +941,27 @@ getAggregateCovariatesJobs <- function(
restrictedData <- targetCombinations %>%
dplyr::inner_join(settingVal, by = settingCols)
- for(i in unique(restrictedData$thread)){
+ for (i in unique(restrictedData$thread)) {
ind <- restrictedData$thread == i
- settings <- rbind(settings,
- data.frame(
- functionName = 'computeTargetAggregateCovariateAnalyses',
- settings = as.character(ParallelLogger::convertSettingsToJson(
- list(
- targetIds = unique(restrictedData$targetId[ind]),
- minPriorObservation = unique(restrictedData$minPriorObservation[ind]),
- covariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$covariateSettingsJson[ind])),
- settingId = settingId
- )
- )),
- executionFolder = paste('tac', i ,paste(settingVal, collapse = '_'), sep = '_'),
- jobId = paste('tac', i ,paste(settingVal, collapse = '_'), sep = '_')
- )
+ settings <- rbind(
+ settings,
+ data.frame(
+ functionName = "computeTargetAggregateCovariateAnalyses",
+ settings = as.character(ParallelLogger::convertSettingsToJson(
+ list(
+ targetIds = unique(restrictedData$targetId[ind]),
+ minPriorObservation = unique(restrictedData$minPriorObservation[ind]),
+ covariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$covariateSettingsJson[ind])),
+ settingId = settingId
+ )
+ )),
+ executionFolder = paste("tac", i, paste(settingVal, collapse = "_"), sep = "_"),
+ jobId = paste("tac", i, paste(settingVal, collapse = "_"), sep = "_")
+ )
)
}
}
- } else{
+ } else {
settings <- c()
}
@@ -956,97 +969,105 @@ getAggregateCovariatesJobs <- function(
Sys.sleep(time = 2)
# get all combinations of TnOs, then split by treads
- caseCombinations <- do.call(what = 'rbind',
- args =
- lapply(
- 1:length(characterizationSettings),
- function(i){
- result <- expand.grid(
- targetId = characterizationSettings[[i]]$targetIds,
- outcomeId = characterizationSettings[[i]]$outcomeIds
- )
- result$minPriorObservation = characterizationSettings[[i]]$minPriorObservation
- result$outcomeWashoutDays = characterizationSettings[[i]]$outcomeWashoutDays
-
- result$riskWindowStart = characterizationSettings[[i]]$riskWindowStart
- result$startAnchor = characterizationSettings[[i]]$startAnchor
- result$riskWindowEnd = characterizationSettings[[i]]$riskWindowEnd
- result$endAnchor = characterizationSettings[[i]]$endAnchor
-
- result$casePreTargetDuration = characterizationSettings[[i]]$casePreTargetDuration
- result$casePostOutcomeDuration = characterizationSettings[[i]]$casePostOutcomeDuration
-
- result$covariateSettingsJson = as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$covariateSettings))
- result$caseCovariateSettingsJson = as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$caseCovariateSettings))
- return(result)
- }
- )
+ caseCombinations <- do.call(
+ what = "rbind",
+ args =
+ lapply(
+ 1:length(characterizationSettings),
+ function(i) {
+ result <- expand.grid(
+ targetId = characterizationSettings[[i]]$targetIds,
+ outcomeId = characterizationSettings[[i]]$outcomeIds
+ )
+ result$minPriorObservation <- characterizationSettings[[i]]$minPriorObservation
+ result$outcomeWashoutDays <- characterizationSettings[[i]]$outcomeWashoutDays
+
+ result$riskWindowStart <- characterizationSettings[[i]]$riskWindowStart
+ result$startAnchor <- characterizationSettings[[i]]$startAnchor
+ result$riskWindowEnd <- characterizationSettings[[i]]$riskWindowEnd
+ result$endAnchor <- characterizationSettings[[i]]$endAnchor
+
+ result$casePreTargetDuration <- characterizationSettings[[i]]$casePreTargetDuration
+ result$casePostOutcomeDuration <- characterizationSettings[[i]]$casePostOutcomeDuration
+
+ result$covariateSettingsJson <- as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$covariateSettings))
+ result$caseCovariateSettingsJson <- as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$caseCovariateSettings))
+ return(result)
+ }
+ )
)
# create executionIds
- settingCols <- c('minPriorObservation', 'outcomeWashoutDays',
- 'casePreTargetDuration', 'casePostOutcomeDuration',
- 'riskWindowStart', 'startAnchor',
- 'riskWindowEnd', 'endAnchor')
- executionSettings <- unique(caseCombinations[,settingCols])
+ settingCols <- c(
+ "minPriorObservation", "outcomeWashoutDays",
+ "casePreTargetDuration", "casePostOutcomeDuration",
+ "riskWindowStart", "startAnchor",
+ "riskWindowEnd", "endAnchor"
+ )
+ executionSettings <- unique(caseCombinations[, settingCols])
executionSettings$settingId <- createExecutionIds(nrow(executionSettings))
caseCombinations <- merge(caseCombinations, executionSettings, by = settingCols)
# create thread split
threadCombinations <- caseCombinations %>%
- dplyr::select("targetId",
- "minPriorObservation",
- "outcomeWashoutDays",
- "casePreTargetDuration",
- "casePostOutcomeDuration"
+ dplyr::select(
+ "targetId",
+ "minPriorObservation",
+ "outcomeWashoutDays",
+ "casePreTargetDuration",
+ "casePostOutcomeDuration"
) %>%
dplyr::distinct()
- threadCombinations$thread <- rep(1:threads, ceiling(nrow(threadCombinations)/threads))[1:nrow(threadCombinations)]
- caseCombinations <- merge(caseCombinations, threadCombinations, by = c("targetId",
- "minPriorObservation",
- "outcomeWashoutDays",
- "casePreTargetDuration",
- "casePostOutcomeDuration"
+ threadCombinations$thread <- rep(1:threads, ceiling(nrow(threadCombinations) / threads))[1:nrow(threadCombinations)]
+ caseCombinations <- merge(caseCombinations, threadCombinations, by = c(
+ "targetId",
+ "minPriorObservation",
+ "outcomeWashoutDays",
+ "casePreTargetDuration",
+ "casePostOutcomeDuration"
))
- executionCols <- c('minPriorObservation', 'outcomeWashoutDays',
- 'casePreTargetDuration', 'casePostOutcomeDuration')
- executions <- unique(caseCombinations[,executionCols])
+ executionCols <- c(
+ "minPriorObservation", "outcomeWashoutDays",
+ "casePreTargetDuration", "casePostOutcomeDuration"
+ )
+ executions <- unique(caseCombinations[, executionCols])
# now create the settings
- for(j in 1:nrow(executions)){
- settingVal <- executions[j,]
+ for (j in 1:nrow(executions)) {
+ settingVal <- executions[j, ]
restrictedData <- caseCombinations %>%
dplyr::inner_join(settingVal, by = executionCols)
- for(i in unique(restrictedData$thread)){
+ for (i in unique(restrictedData$thread)) {
ind <- restrictedData$thread == i
- settings <- rbind(settings,
- data.frame(
- functionName = 'computeCaseAggregateCovariateAnalyses',
- settings = as.character(ParallelLogger::convertSettingsToJson(
- list(
- targetIds = unique(restrictedData$targetId[ind]),
- outcomeIds = unique(restrictedData$outcomeId[ind]),
- minPriorObservation = unique(restrictedData$minPriorObservation[ind]),
- outcomeWashoutDays = unique(restrictedData$outcomeWashoutDays[ind]),
- tar = unique(data.frame(
- riskWindowStart = restrictedData$riskWindowStart[ind],
- startAnchor = restrictedData$startAnchor[ind],
- riskWindowEnd = restrictedData$riskWindowEnd[ind],
- endAnchor = restrictedData$endAnchor[ind]
- )),
- casePreTargetDuration = unique(restrictedData$casePreTargetDuration[ind]),
- casePostOutcomeDuration = unique(restrictedData$casePostOutcomeDuration[ind]),
- covariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$covariateSettingsJson[ind])),
- caseCovariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$caseCovariateSettingsJson[ind])),
- settingIds = unique(restrictedData$settingId[ind])
- )
- )),
- executionFolder = paste('cac', i, paste0(settingVal, collapse = '_'), sep = '_'),
- jobId = paste('cac', i, paste0(settingVal, collapse = '_'), sep = '_')
- )
+ settings <- rbind(
+ settings,
+ data.frame(
+ functionName = "computeCaseAggregateCovariateAnalyses",
+ settings = as.character(ParallelLogger::convertSettingsToJson(
+ list(
+ targetIds = unique(restrictedData$targetId[ind]),
+ outcomeIds = unique(restrictedData$outcomeId[ind]),
+ minPriorObservation = unique(restrictedData$minPriorObservation[ind]),
+ outcomeWashoutDays = unique(restrictedData$outcomeWashoutDays[ind]),
+ tar = unique(data.frame(
+ riskWindowStart = restrictedData$riskWindowStart[ind],
+ startAnchor = restrictedData$startAnchor[ind],
+ riskWindowEnd = restrictedData$riskWindowEnd[ind],
+ endAnchor = restrictedData$endAnchor[ind]
+ )),
+ casePreTargetDuration = unique(restrictedData$casePreTargetDuration[ind]),
+ casePostOutcomeDuration = unique(restrictedData$casePostOutcomeDuration[ind]),
+ covariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$covariateSettingsJson[ind])),
+ caseCovariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$caseCovariateSettingsJson[ind])),
+ settingIds = unique(restrictedData$settingId[ind])
+ )
+ )),
+ executionFolder = paste("cac", i, paste0(settingVal, collapse = "_"), sep = "_"),
+ jobId = paste("cac", i, paste0(settingVal, collapse = "_"), sep = "_")
+ )
)
}
}
diff --git a/R/CustomCovariates.R b/R/CustomCovariates.R
index dddc434..df9b8e4 100644
--- a/R/CustomCovariates.R
+++ b/R/CustomCovariates.R
@@ -52,10 +52,10 @@
#'
#' @examples
#' settings <- createDuringCovariateSettings(
-#' useConditionOccurrenceDuring = TRUE,
-#' useConditionOccurrencePrimaryInpatientDuring = FALSE,
-#' useConditionEraDuring = FALSE,
-#' useConditionGroupEraDuring = FALSE
+#' useConditionOccurrenceDuring = TRUE,
+#' useConditionOccurrencePrimaryInpatientDuring = FALSE,
+#' useConditionEraDuring = FALSE,
+#' useConditionGroupEraDuring = FALSE
#' )
#'
#' @export
@@ -64,7 +64,6 @@ createDuringCovariateSettings <- function(
useConditionOccurrencePrimaryInpatientDuring = F,
useConditionEraDuring = F,
useConditionGroupEraDuring = F,
-
useDrugExposureDuring = F,
useDrugEraDuring = F,
useDrugGroupEraDuring = F,
@@ -74,14 +73,11 @@ createDuringCovariateSettings <- function(
useObservationDuring = F,
useVisitCountDuring = F,
useVisitConceptCountDuring = F,
-
includedCovariateConceptIds = c(),
addDescendantsToInclude = F,
excludedCovariateConceptIds = c(),
addDescendantsToExclude = F,
- includedCovariateIds = c()
-){
-
+ includedCovariateIds = c()) {
covariateSettings <- list(
temporal = FALSE, # needed?
temporalSequence = FALSE
@@ -143,14 +139,13 @@ getDbDuringCovariateData <- function(
cohortIds = c(-1),
covariateSettings,
minCharacterizationMean = 0,
- ...
-) {
+ ...) {
writeLines("Constructing during cohort covariates")
if (!aggregated) {
stop("Only aggregation supported")
}
- getDomainSettings <- utils::read.csv(system.file('csv/PrespecAnalyses.csv', package = 'Characterization'))
+ getDomainSettings <- utils::read.csv(system.file("csv/PrespecAnalyses.csv", package = "Characterization"))
# create Tables
@@ -166,10 +161,10 @@ getDbDuringCovariateData <- function(
sql <- SqlRender::translate(
sql = sql,
targetDialect = DatabaseConnector::dbms(connection)
- )
+ )
DatabaseConnector::executeSql(connection, sql = sql)
- sql <- "DROP TABLE IF EXISTS #analysis_ref;
+ sql <- "DROP TABLE IF EXISTS #analysis_ref;
CREATE TABLE #analysis_ref(
analysis_id int,
analysis_name varchar(100),
@@ -183,16 +178,16 @@ getDbDuringCovariateData <- function(
sql = sql,
targetDialect = DatabaseConnector::dbms(connection)
)
- DatabaseConnector::executeSql(connection,sql)
+ DatabaseConnector::executeSql(connection, sql)
# included covariates
- includedCovTable <- ''
- if(length(covariateSettings$includedCovariateIds) > 0 ){
+ includedCovTable <- ""
+ if (length(covariateSettings$includedCovariateIds) > 0) {
# create-
- includedCovTable <- '#included_cov'
+ includedCovTable <- "#included_cov"
DatabaseConnector::insertTable(
connection = connection,
- tableName = includedCovTable,
+ tableName = includedCovTable,
dropTableIfExists = T,
createTable = T,
tempTable = T,
@@ -202,23 +197,23 @@ getDbDuringCovariateData <- function(
}
# including concept ids
- includedConceptTable <- ''
- if(length(covariateSettings$includedCovariateConceptIds) > 0 ){
- includedConceptTable <- '#include_concepts'
+ includedConceptTable <- ""
+ if (length(covariateSettings$includedCovariateConceptIds) > 0) {
+ includedConceptTable <- "#include_concepts"
DatabaseConnector::insertTable(
connection = connection,
- tableName = includedConceptTable,
+ tableName = includedConceptTable,
dropTableIfExists = T,
createTable = T,
tempTable = T,
data = data.frame(id = covariateSettings$includedCovariateConceptIds),
camelCaseToSnakeCase = T
- )
+ )
- if(covariateSettings$addDescendantsToInclude){
+ if (covariateSettings$addDescendantsToInclude) {
SqlRender::loadRenderTranslateSql(
- sqlFilename = 'IncludeDescendants.sql',
- packageName = 'Characterization',
+ sqlFilename = "IncludeDescendants.sql",
+ packageName = "Characterization",
dbms = DatabaseConnector::dbms(connection),
table_name = includedConceptTable,
cdm_database_schema = cdmDatabaseSchema
@@ -227,12 +222,12 @@ getDbDuringCovariateData <- function(
}
# exlcuding concept ids
- excludedConceptTable <- ''
- if(length(covariateSettings$excludedCovariateConceptIds) > 0 ){
- excludedConceptTable <- '#exclude_concepts'
+ excludedConceptTable <- ""
+ if (length(covariateSettings$excludedCovariateConceptIds) > 0) {
+ excludedConceptTable <- "#exclude_concepts"
DatabaseConnector::insertTable(
connection = connection,
- tableName = excludedConceptTable,
+ tableName = excludedConceptTable,
dropTableIfExists = T,
createTable = T,
tempTable = T,
@@ -240,10 +235,10 @@ getDbDuringCovariateData <- function(
camelCaseToSnakeCase = T
)
- if(covariateSettings$addDescendantsToInclude){
+ if (covariateSettings$addDescendantsToInclude) {
SqlRender::loadRenderTranslateSql(
- sqlFilename = 'IncludeDescendants.sql',
- packageName = 'Characterization',
+ sqlFilename = "IncludeDescendants.sql",
+ packageName = "Characterization",
dbms = DatabaseConnector::dbms(connection),
table_name = excludedConceptTable,
cdm_database_schema = cdmDatabaseSchema
@@ -259,23 +254,23 @@ getDbDuringCovariateData <- function(
useContinuous <- F
result <- Andromeda::andromeda()
- for(domainSettingsIndex in domainSettingsIndexes){
+ for (domainSettingsIndex in domainSettingsIndexes) {
i <- i + 1
- if(getDomainSettings$isBinary[domainSettingsIndex] == 'Y'){
+ if (getDomainSettings$isBinary[domainSettingsIndex] == "Y") {
binaryInd <- c(i, binaryInd)
useBinary <- T
- } else{
+ } else {
continuousInd <- c(i, continuousInd)
useContinuous <- T
}
# Load template sql and fill
sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = getDomainSettings$sqlFileName[domainSettingsIndex],
- packageName = 'Characterization',
- dbms = attr(connection, "dbms"),
+ packageName = "Characterization",
+ dbms = attr(connection, "dbms"),
cohort_table = cohortTable,
- #cohort_ids = cohortIds,
+ # cohort_ids = cohortIds,
cohort_definition_id = cohortIds, # added?
row_id_field = rowIdField,
cdm_database_schema = cdmDatabaseSchema,
@@ -291,62 +286,60 @@ getDbDuringCovariateData <- function(
included_cov_table = includedCovTable,
included_concept_table = includedConceptTable,
excluded_concept_table = excludedConceptTable,
- covariate_table = paste0('#cov_',i)
+ covariate_table = paste0("#cov_", i)
)
- message(paste0('Executing during sql code for ', getDomainSettings$analysisName[domainSettingsIndex]))
+ message(paste0("Executing during sql code for ", getDomainSettings$analysisName[domainSettingsIndex]))
start <- Sys.time()
DatabaseConnector::executeSql(
connection = connection,
sql = sql,
progressBar = T
- )
+ )
time <- Sys.time() - start
- message(paste0('Execution took ', round(time, digits = 2), ' ', units(time)))
-
+ message(paste0("Execution took ", round(time, digits = 2), " ", units(time)))
}
# all_covariates.cohort_definition_id,\n all_covariates.covariate_id,\n all_covariates.sum_value,\n CAST(all_covariates.sum_value / (1.0 * total.total_count) AS FLOAT) AS average_value
- message(paste0('Extracting covariates'))
+ message(paste0("Extracting covariates"))
start <- Sys.time()
# Retrieve the covariate:
- if(useBinary){
-
+ if (useBinary) {
sql <- paste0(
- 'select temp.*, CAST(temp.sum_value / (1.0 * total.total_count) AS FLOAT) AS average_value from (',
- paste0(paste0('select cohort_definition_id, covariate_id, sum_value from #cov_', binaryInd), collapse = ' union '),
- ') temp inner join
+ "select temp.*, CAST(temp.sum_value / (1.0 * total.total_count) AS FLOAT) AS average_value from (",
+ paste0(paste0("select cohort_definition_id, covariate_id, sum_value from #cov_", binaryInd), collapse = " union "),
+ ") temp inner join
(SELECT cohort_definition_id, COUNT(*) AS total_count
FROM @cohort_table {@cohort_definition_id != -1} ? {\nWHERE cohort_definition_id IN (@cohort_definition_id)}
GROUP BY cohort_definition_id ) total
- on temp.cohort_definition_id = total.cohort_definition_id;'
+ on temp.cohort_definition_id = total.cohort_definition_id;"
)
sql <- SqlRender::render(
sql = sql,
cohort_table = cohortTable,
- cohort_definition_id = paste0(c(-1), collapse = ',')
- )
+ cohort_definition_id = paste0(c(-1), collapse = ",")
+ )
sql <- SqlRender::translate(
sql = sql,
targetDialect = DatabaseConnector::dbms(connection)
- )
+ )
DatabaseConnector::querySqlToAndromeda(
connection = connection,
sql = sql,
andromeda = result,
- andromedaTableName = 'covariates',
+ andromedaTableName = "covariates",
appendToTable = F,
snakeCaseToCamelCase = TRUE
)
- if(minCharacterizationMean != 0 && "averageValue" %in% colnames(result$covariates)){
+ if (minCharacterizationMean != 0 && "averageValue" %in% colnames(result$covariates)) {
result$covariates <- result$covariates %>%
dplyr::filter(.data$averageValue >= minCharacterizationMean)
}
}
- if(useContinuous){
- sql <- paste0(paste0('select * from #cov_', continuousInd), collapse = ' union ')
+ if (useContinuous) {
+ sql <- paste0(paste0("select * from #cov_", continuousInd), collapse = " union ")
sql <- SqlRender::translate(
sql = sql,
targetDialect = DatabaseConnector::dbms(connection)
@@ -355,7 +348,7 @@ getDbDuringCovariateData <- function(
connection = connection,
sql = sql,
andromeda = result,
- andromedaTableName = 'covariatesContinuous',
+ andromedaTableName = "covariatesContinuous",
appendToTable = F,
snakeCaseToCamelCase = TRUE
)
@@ -364,11 +357,11 @@ getDbDuringCovariateData <- function(
DatabaseConnector::querySqlToAndromeda(
connection = connection,
sql = SqlRender::translate(
- sql = 'select * from #cov_ref;',
+ sql = "select * from #cov_ref;",
targetDialect = DatabaseConnector::dbms(connection)
),
andromeda = result,
- andromedaTableName = 'covariateRef',
+ andromedaTableName = "covariateRef",
appendToTable = F,
snakeCaseToCamelCase = TRUE
)
@@ -377,26 +370,26 @@ getDbDuringCovariateData <- function(
DatabaseConnector::querySqlToAndromeda(
connection = connection,
sql = SqlRender::translate(
- sql = 'select * from #analysis_ref;',
+ sql = "select * from #analysis_ref;",
targetDialect = DatabaseConnector::dbms(connection)
),
andromeda = result,
- andromedaTableName = 'analysisRef',
+ andromedaTableName = "analysisRef",
appendToTable = F,
snakeCaseToCamelCase = TRUE
)
time <- Sys.time() - start
- message(paste0('Extracting covariates took ', round(time, digits = 2), ' ', units(time)))
+ message(paste0("Extracting covariates took ", round(time, digits = 2), " ", units(time)))
# clean up: drop tables...
if (length(c(binaryInd, continuousInd)) != 0) {
- message(paste0('Removing temp covariate tables'))
+ message(paste0("Removing temp covariate tables"))
for (i in c(binaryInd, continuousInd)) {
sql <- "TRUNCATE TABLE #cov_@id;\nDROP TABLE #cov_@id;\n"
sql <- SqlRender::render(
sql,
id = i
- )
+ )
sql <- SqlRender::translate(
sql = sql,
targetDialect = attr(connection, "dbms"),
diff --git a/R/Database.R b/R/Database.R
index 62b3daf..1fbe141 100644
--- a/R/Database.R
+++ b/R/Database.R
@@ -72,11 +72,11 @@ insertResultsToDatabase <- function(
connectionDetails,
schema,
resultsFolder,
- tablePrefix = '',
- csvTablePrefix = 'c_'
- ){
- specLoc <- system.file('settings', 'resultsDataModelSpecification.csv',
- package = 'Characterization')
+ tablePrefix = "",
+ csvTablePrefix = "c_") {
+ specLoc <- system.file("settings", "resultsDataModelSpecification.csv",
+ package = "Characterization"
+ )
specs <- utils::read.csv(specLoc)
colnames(specs) <- SqlRender::snakeCaseToCamelCase(colnames(specs))
specs$tableName <- paste0(csvTablePrefix, specs$tableName)
@@ -148,8 +148,7 @@ createCharacterizationTables <- function(
deleteExistingTables = T,
createTables = T,
tablePrefix = "c_",
- tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
- ) {
+ tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) {
errorMessages <- checkmate::makeAssertCollection()
.checkTablePrefix(
tablePrefix = tablePrefix,
@@ -170,8 +169,8 @@ createCharacterizationTables <- function(
tables <- paste0(tablePrefix, tables)
# adding this to not create tables if all tables esist
- if(sum(tables %in% alltables) == length(tables) & !deleteExistingTables){
- message('All tables exist so no need to recreate')
+ if (sum(tables %in% alltables) == length(tables) & !deleteExistingTables) {
+ message("All tables exist so no need to recreate")
createTables <- FALSE
}
@@ -247,16 +246,16 @@ migrateDataModel <- function(connectionDetails, databaseSchema, tablePrefix = ""
connectionDetails = connectionDetails,
databaseSchema = databaseSchema,
tablePrefix = tablePrefix
- )
+ )
migrator$executeMigrations()
migrator$finalize()
ParallelLogger::logInfo("Updating version number")
updateVersionSql <- SqlRender::loadRenderTranslateSql("UpdateVersionNumber.sql",
- packageName = utils::packageName(),
- database_schema = databaseSchema,
- table_prefix = tablePrefix,
- dbms = connectionDetails$dbms
+ packageName = utils::packageName(),
+ database_schema = databaseSchema,
+ table_prefix = tablePrefix,
+ dbms = connectionDetails$dbms
)
connection <- DatabaseConnector::connect(connectionDetails = connectionDetails)
@@ -287,7 +286,7 @@ getResultTables <- function() {
),
show_col_types = FALSE
)$table_name,
- 'migration', 'package_version'
+ "migration", "package_version"
)
)
)
diff --git a/R/DechallengeRechallenge.R b/R/DechallengeRechallenge.R
index 949ebf5..985fdef 100644
--- a/R/DechallengeRechallenge.R
+++ b/R/DechallengeRechallenge.R
@@ -104,10 +104,9 @@ computeDechallengeRechallengeAnalyses <- function(
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
settings,
databaseId = "database 1",
- outputFolder = file.path(getwd(),'results'),
+ outputFolder = file.path(getwd(), "results"),
minCellCount = 0,
- ...
- ) {
+ ...) {
# check inputs
errorMessages <- checkmate::makeAssertCollection()
.checkConnectionDetails(connectionDetails, errorMessages)
@@ -210,7 +209,7 @@ computeDechallengeRechallengeAnalyses <- function(
)
# export results to csv
- message('exporting to csv file')
+ message("exporting to csv file")
exportDechallengeRechallengeToCsv(
result = result,
saveDirectory = outputFolder,
@@ -248,10 +247,9 @@ computeRechallengeFailCaseSeriesAnalyses <- function(
settings,
databaseId = "database 1",
showSubjectId = F,
- outputFolder = file.path(getwd(),'results'),
+ outputFolder = file.path(getwd(), "results"),
minCellCount = 0,
- ...
- ){
+ ...) {
# check inputs
errorMessages <- checkmate::makeAssertCollection()
.checkConnectionDetails(connectionDetails, errorMessages)
@@ -353,7 +351,7 @@ computeRechallengeFailCaseSeriesAnalyses <- function(
)
# add the csv export here
- message('exporting to csv file')
+ message("exporting to csv file")
exportRechallengeFailCaseSeriesToCsv(
result = result,
saveDirectory = outputFolder
@@ -365,35 +363,42 @@ computeRechallengeFailCaseSeriesAnalyses <- function(
getDechallengeRechallengeJobs <- function(
characterizationSettings,
- threads
-){
-
+ threads) {
characterizationSettings <- characterizationSettings$dechallengeRechallengeSettings
- if(length(characterizationSettings) == 0){
+ if (length(characterizationSettings) == 0) {
return(NULL)
}
ind <- 1:length(characterizationSettings)
- targetIds <- lapply(ind, function(i){characterizationSettings[[i]]$targetCohortDefinitionIds})
- outcomeIds <- lapply(ind, function(i){characterizationSettings[[i]]$outcomeCohortDefinitionIds})
- dechallengeStopIntervals <- lapply(ind, function(i){characterizationSettings[[i]]$dechallengeStopInterval})
- dechallengeEvaluationWindows <- lapply(ind, function(i){characterizationSettings[[i]]$dechallengeEvaluationWindow})
+ targetIds <- lapply(ind, function(i) {
+ characterizationSettings[[i]]$targetCohortDefinitionIds
+ })
+ outcomeIds <- lapply(ind, function(i) {
+ characterizationSettings[[i]]$outcomeCohortDefinitionIds
+ })
+ dechallengeStopIntervals <- lapply(ind, function(i) {
+ characterizationSettings[[i]]$dechallengeStopInterval
+ })
+ dechallengeEvaluationWindows <- lapply(ind, function(i) {
+ characterizationSettings[[i]]$dechallengeEvaluationWindow
+ })
# get all combinations of TnOs, then split by treads
- combinations <- do.call(what = 'rbind',
- args =
- lapply(
- 1:length(targetIds),
- function(i){
- result <- expand.grid(
- targetId = targetIds[[i]],
- outcomeId = outcomeIds[[i]]
- )
- result$dechallengeStopInterval <- dechallengeStopIntervals[[i]]
- result$dechallengeEvaluationWindow <- dechallengeEvaluationWindows[[i]]
- return(result)
- }
- )
+ combinations <- do.call(
+ what = "rbind",
+ args =
+ lapply(
+ 1:length(targetIds),
+ function(i) {
+ result <- expand.grid(
+ targetId = targetIds[[i]],
+ outcomeId = outcomeIds[[i]]
+ )
+ result$dechallengeStopInterval <- dechallengeStopIntervals[[i]]
+ result$dechallengeEvaluationWindow <- dechallengeEvaluationWindows[[i]]
+ return(result)
+ }
+ )
)
# find out whether more Ts or more Os
tcount <- nrow(
@@ -414,35 +419,35 @@ getDechallengeRechallengeJobs <- function(
)
)
- if(threads > max(tcount, ocount)){
- message('Tnput parameter threads greater than number of targets and outcomes')
- message(paste0('Only using ', max(tcount, ocount) ,' threads for TimeToEvent'))
+ if (threads > max(tcount, ocount)) {
+ message("Tnput parameter threads greater than number of targets and outcomes")
+ message(paste0("Only using ", max(tcount, ocount), " threads for TimeToEvent"))
}
- if(tcount >= ocount){
+ if (tcount >= ocount) {
threadDf <- combinations %>%
dplyr::count(
.data$targetId,
.data$dechallengeStopInterval,
.data$dechallengeEvaluationWindow
)
- threadDf$thread = rep(1:threads, ceiling(tcount/threads))[1:tcount]
- mergeColumn <- c('targetId','dechallengeStopInterval', 'dechallengeEvaluationWindow')
- } else{
+ threadDf$thread <- rep(1:threads, ceiling(tcount / threads))[1:tcount]
+ mergeColumn <- c("targetId", "dechallengeStopInterval", "dechallengeEvaluationWindow")
+ } else {
threadDf <- combinations %>%
dplyr::count(
.data$outcomeId,
.data$dechallengeStopInterval,
.data$dechallengeEvaluationWindow
)
- threadDf$thread = rep(1:threads, ceiling(ocount/threads))[1:ocount]
- mergeColumn <- c('outcomeId','dechallengeStopInterval', 'dechallengeEvaluationWindow')
+ threadDf$thread <- rep(1:threads, ceiling(ocount / threads))[1:ocount]
+ mergeColumn <- c("outcomeId", "dechallengeStopInterval", "dechallengeEvaluationWindow")
}
combinations <- merge(combinations, threadDf, by = mergeColumn)
sets <- lapply(
X = 1:max(threadDf$thread),
- FUN = function(i){
+ FUN = function(i) {
createDechallengeRechallengeSettings(
targetIds = unique(combinations$targetId[combinations$thread == i]),
outcomeIds = unique(combinations$outcomeId[combinations$thread == i]),
@@ -454,28 +459,29 @@ getDechallengeRechallengeJobs <- function(
# recreate settings
settings <- c()
- for(i in 1:length(sets)){
- settings <- rbind(settings,
- data.frame(
- functionName = 'computeDechallengeRechallengeAnalyses',
- settings = as.character(ParallelLogger::convertSettingsToJson(
- sets[[i]]
- )),
- executionFolder = paste0('dr_', i),
- jobId = paste0('dr_', i)
- )
+ for (i in 1:length(sets)) {
+ settings <- rbind(
+ settings,
+ data.frame(
+ functionName = "computeDechallengeRechallengeAnalyses",
+ settings = as.character(ParallelLogger::convertSettingsToJson(
+ sets[[i]]
+ )),
+ executionFolder = paste0("dr_", i),
+ jobId = paste0("dr_", i)
+ )
)
- settings <- rbind(settings,
- data.frame(
- functionName = 'computeRechallengeFailCaseSeriesAnalyses',
- settings = as.character(ParallelLogger::convertSettingsToJson(
- sets[[i]]
- )),
- executionFolder = paste0('rfcs_', i),
- jobId = paste0('rfcs_', i)
- )
+ settings <- rbind(
+ settings,
+ data.frame(
+ functionName = "computeRechallengeFailCaseSeriesAnalyses",
+ settings = as.character(ParallelLogger::convertSettingsToJson(
+ sets[[i]]
+ )),
+ executionFolder = paste0("rfcs_", i),
+ jobId = paste0("rfcs_", i)
+ )
)
-
}
return(settings)
diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R
index c784a97..0432995 100644
--- a/R/HelperFunctions.R
+++ b/R/HelperFunctions.R
@@ -212,7 +212,7 @@
.checkCovariateSettings <- function(covariateSettings,
errorMessages) {
- if (inherits(covariateSettings,"covariateSettings")) {
+ if (inherits(covariateSettings, "covariateSettings")) {
checkmate::assertClass(
x = covariateSettings,
classes = "covariateSettings",
@@ -244,12 +244,12 @@
checkNoCsv <- function(
csvFiles,
- errorMessage
- ){
-
- csvExists <- sapply(csvFiles, function(x){file.exists(x)})
+ errorMessage) {
+ csvExists <- sapply(csvFiles, function(x) {
+ file.exists(x)
+ })
- if(sum(csvExists) > 0){
+ if (sum(csvExists) > 0) {
stop(errorMessage)
}
@@ -258,7 +258,6 @@ checkNoCsv <- function(
cleanCsv <- function(
resultFolder,
- fileName = 'time_to_event.csv'
- ){
- file.remove(file.path(resultFolder, fileName ))
+ fileName = "time_to_event.csv") {
+ file.remove(file.path(resultFolder, fileName))
}
diff --git a/R/Incremental.R b/R/Incremental.R
index 5598367..f05d9d2 100644
--- a/R/Incremental.R
+++ b/R/Incremental.R
@@ -1,13 +1,11 @@
createIncrementalLog <- function(
executionFolder,
- logname = 'execution.csv'
- ){
-
- if(!dir.exists(executionFolder)){
+ logname = "execution.csv") {
+ if (!dir.exists(executionFolder)) {
dir.create(executionFolder, recursive = T)
}
- if(!file.exists(file.path(executionFolder, logname))){
+ if (!file.exists(file.path(executionFolder, logname))) {
x <- data.frame(
run_date_time = Sys.time(),
job_id = 0,
@@ -16,23 +14,22 @@ createIncrementalLog <- function(
)
readr::write_csv(
x = x,
- file = file.path(executionFolder, logname)
- )
+ file = file.path(executionFolder, logname)
+ )
}
-
}
-loadIncrementalFiles <- function(executionFolder){
- if(file.exists(file.path(executionFolder, 'execution.csv'))){
- executed <- utils::read.csv(file.path(executionFolder, 'execution.csv'))
- } else{
- stop('execution.csv missing')
+loadIncrementalFiles <- function(executionFolder) {
+ if (file.exists(file.path(executionFolder, "execution.csv"))) {
+ executed <- utils::read.csv(file.path(executionFolder, "execution.csv"))
+ } else {
+ stop("execution.csv missing")
}
- if(file.exists(file.path(executionFolder, 'completed.csv'))){
- completed <- utils::read.csv(file.path(executionFolder, 'completed.csv'))
- } else{
- stop('completed.csv missing')
+ if (file.exists(file.path(executionFolder, "completed.csv"))) {
+ completed <- utils::read.csv(file.path(executionFolder, "completed.csv"))
+ } else {
+ stop("completed.csv missing")
}
return(list(
executed = executed,
@@ -42,9 +39,7 @@ loadIncrementalFiles <- function(executionFolder){
getExecutionJobIssues <- function(
executed,
- completed
-){
-
+ completed) {
executedJobs <- unique(executed$job_id)
completedJobs <- unique(completed$job_id)
@@ -62,9 +57,7 @@ getExecutionJobIssues <- function(
#'
#' @export
cleanIncremental <- function(
- executionFolder
-){
-
+ executionFolder) {
incrementalFiles <- loadIncrementalFiles(
executionFolder
)
@@ -74,12 +67,12 @@ cleanIncremental <- function(
completed = incrementalFiles$completed
)
- if(length(issues) > 0 ){
+ if (length(issues) > 0) {
# delete contents inside folder
- for(i in 1:length(issues)){
- files <- dir(file.path(executionFolder,issues[i]), full.names = T)
- for(file in files){
- message(paste0('Deleting incomplete result file ', file))
+ for (i in 1:length(issues)) {
+ files <- dir(file.path(executionFolder, issues[i]), full.names = T)
+ for (file in files) {
+ message(paste0("Deleting incomplete result file ", file))
file.remove(file)
}
}
@@ -87,21 +80,19 @@ cleanIncremental <- function(
# now update the execution to remove the issue rows
executionFile <- utils::read.csv(
- file = file.path(executionFolder, 'execution.csv')
+ file = file.path(executionFolder, "execution.csv")
)
- fixedExecution <- executionFile[!executionFile$job_id %in% issues,]
+ fixedExecution <- executionFile[!executionFile$job_id %in% issues, ]
utils::write.csv(
x = fixedExecution,
- file = file.path(executionFolder, 'execution.csv')
- )
+ file = file.path(executionFolder, "execution.csv")
+ )
-return(invisible(NULL))
+ return(invisible(NULL))
}
checkResultFilesIncremental <- function(
- executionFolder
- ){
-
+ executionFolder) {
incrementalFiles <- loadIncrementalFiles(
executionFolder
)
@@ -111,14 +102,14 @@ checkResultFilesIncremental <- function(
completed = incrementalFiles$completed
)
- if(length(issues) > 0 ){
- stop(paste0('jobIds: ', paste0(issues, collapse = ','), 'executed but not completed. Please run cleanIncremental() to remove incomplete results.'))
- }
+ if (length(issues) > 0) {
+ stop(paste0("jobIds: ", paste0(issues, collapse = ","), "executed but not completed. Please run cleanIncremental() to remove incomplete results."))
+ }
return(invisible(NULL))
}
-findCompletedJobs <- function(executionFolder){
+findCompletedJobs <- function(executionFolder) {
incrementalFiles <- loadIncrementalFiles(executionFolder)
return(unique(incrementalFiles$completed$job_id))
}
@@ -130,9 +121,8 @@ recordIncremental <- function(
jobId,
startTime,
endTime,
- logname = 'execution.csv'
-){
- if(file.exists(file.path(executionFolder, logname))){
+ logname = "execution.csv") {
+ if (file.exists(file.path(executionFolder, logname))) {
x <- data.frame(
run_date_time = runDateTime,
job_id = jobId,
@@ -141,13 +131,12 @@ recordIncremental <- function(
)
readr::write_csv(
x = x,
- file = file.path(executionFolder, logname),
+ file = file.path(executionFolder, logname),
append = T
)
- } else{
- warning(paste0(logname, ' file missing so no logging possible'))
+ } else {
+ warning(paste0(logname, " file missing so no logging possible"))
}
-
}
#' Removes csv files from the execution folder as there should be no csv files
@@ -160,35 +149,35 @@ recordIncremental <- function(
#'
#' @export
cleanNonIncremental <- function(
- executionFolder
- ){
+ executionFolder) {
# remove all files from the executionFolder
files <- dir(
path = executionFolder,
recursive = T,
full.names = T,
- pattern = '.csv'
- )
- if(length(files) > 0 ){
- for(file in files){
- message(paste0('Deleting file ', file))
+ pattern = ".csv"
+ )
+ if (length(files) > 0) {
+ for (file in files) {
+ message(paste0("Deleting file ", file))
file.remove(file)
}
}
}
checkResultFilesNonIncremental <- function(
- executionFolder
-){
+ executionFolder) {
files <- dir(
path = executionFolder,
recursive = T,
full.names = T,
- pattern = '.csv'
+ pattern = ".csv"
)
- if(length(files) > 0 ){
- errorMessage <- paste0('Running in non-incremental but csv files exist in execution folder.',
- ' please delete manually or using cleanNonIncremental()')
+ if (length(files) > 0) {
+ errorMessage <- paste0(
+ "Running in non-incremental but csv files exist in execution folder.",
+ " please delete manually or using cleanNonIncremental()"
+ )
stop(errorMessage)
}
diff --git a/R/RunCharacterization.R b/R/RunCharacterization.R
index 28fbf3a..4d8a0fa 100644
--- a/R/RunCharacterization.R
+++ b/R/RunCharacterization.R
@@ -16,8 +16,7 @@
createCharacterizationSettings <- function(
timeToEventSettings = NULL,
dechallengeRechallengeSettings = NULL,
- aggregateCovariateSettings = NULL
-) {
+ aggregateCovariateSettings = NULL) {
errorMessages <- checkmate::makeAssertCollection()
.checkTimeToEventSettingsList(
settings = timeToEventSettings,
@@ -141,15 +140,14 @@ runCharacterizationAnalyses <- function(
cdmDatabaseSchema,
characterizationSettings,
outputDirectory,
- executionPath = file.path(outputDirectory, 'execution'),
+ executionPath = file.path(outputDirectory, "execution"),
csvFilePrefix = "c_",
databaseId = "1",
showSubjectId = F,
minCellCount = 0,
incremental = T,
threads = 1,
- minCharacterizationMean = 0.01
-) {
+ minCharacterizationMean = 0.01) {
# inputs checks
errorMessages <- checkmate::makeAssertCollection()
.checkCharacterizationSettings(
@@ -171,7 +169,7 @@ runCharacterizationAnalyses <- function(
logger <- createLogger(
logPath = file.path(executionPath),
- logName = 'log.txt'
+ logName = "log.txt"
)
ParallelLogger::registerLogger(logger)
on.exit(ParallelLogger::unregisterLogger(logger))
@@ -179,60 +177,59 @@ runCharacterizationAnalyses <- function(
jobs <- createJobs(
characterizationSettings = characterizationSettings,
threads = threads
- )
+ )
# save settings
- if(!file.exists(file.path(executionPath, 'settings.rds'))){
+ if (!file.exists(file.path(executionPath, "settings.rds"))) {
saveRDS(
object = list(
characterizationSettings = characterizationSettings,
threads = threads
- ),
- file = file.path(executionPath, 'settings.rds')
+ ),
+ file = file.path(executionPath, "settings.rds")
)
}
- if(incremental){
+ if (incremental) {
# check for any issues with current incremental
oldSettings <- readRDS(
- file = file.path(executionPath, 'settings.rds')
- )
- if(!identical(characterizationSettings,oldSettings$characterizationSettings)){
- stop('Settings have changed - please turn off incremental')
+ file = file.path(executionPath, "settings.rds")
+ )
+ if (!identical(characterizationSettings, oldSettings$characterizationSettings)) {
+ stop("Settings have changed - please turn off incremental")
}
- if(!identical(threads,oldSettings$threads)){
- stop('Cannot change number of threads in incremental model')
+ if (!identical(threads, oldSettings$threads)) {
+ stop("Cannot change number of threads in incremental model")
}
# create logs if not exists
createIncrementalLog(
executionFolder = executionPath,
- logname = 'execution.csv'
+ logname = "execution.csv"
)
createIncrementalLog(
executionFolder = executionPath,
- logname = 'completed.csv'
+ logname = "completed.csv"
)
checkResultFilesIncremental(
- executionFolder = executionPath
+ executionFolder = executionPath
)
# remove any previously completed jobs
completedJobIds <- findCompletedJobs(executionFolder = executionPath)
completedJobIndex <- jobs$jobId %in% completedJobIds
- if(sum(completedJobIndex) > 0){
- message(paste0('Removing ', sum(completedJobIndex), ' previously completed jobs'))
- jobs <- jobs[!completedJobIndex,]
+ if (sum(completedJobIndex) > 0) {
+ message(paste0("Removing ", sum(completedJobIndex), " previously completed jobs"))
+ jobs <- jobs[!completedJobIndex, ]
}
- if(nrow(jobs) == 0){
- message('No jobs left')
+ if (nrow(jobs) == 0) {
+ message("No jobs left")
return(invisible(T))
}
-
- } else{
+ } else {
# check for any csv files in folder
checkResultFilesNonIncremental(
executionFolder = executionPath
@@ -261,17 +258,18 @@ runCharacterizationAnalyses <- function(
# convert jobList to list with extra inputs
jobList <- lapply(
X = 1:nrow(jobs),
- FUN = function(ind){
+ FUN = function(ind) {
inputs <- inputSettings
inputs$settings <- jobs$settings[ind]
inputs$functionName <- jobs$functionName[ind]
- inputs$executionFolder<- jobs$executionFolder[ind]
- inputs$jobId <-jobs$jobId[ind]
+ inputs$executionFolder <- jobs$executionFolder[ind]
+ inputs$jobId <- jobs$jobId[ind]
inputs$runDateTime <- runDateTime
return(inputs)
- })
+ }
+ )
- message('Creating new cluster')
+ message("Creating new cluster")
cluster <- ParallelLogger::makeCluster(
numberOfThreads = threads,
singleThreadToMain = T,
@@ -295,30 +293,29 @@ runCharacterizationAnalyses <- function(
invisible(outputDirectory)
}
-createDirectory <- function(x){
- if(!dir.exists(x)){
- message(paste0('Creating directory ', x))
+createDirectory <- function(x) {
+ if (!dir.exists(x)) {
+ message(paste0("Creating directory ", x))
dir.create(x, recursive = T)
}
}
-createLogger <- function(logPath, logName){
+createLogger <- function(logPath, logName) {
createDirectory(logPath)
ParallelLogger::createLogger(
- name = 'Characterization',
+ name = "Characterization",
threshold = "INFO",
appenders = list(
ParallelLogger::createFileAppender(
- fileName = file.path(logPath, logName),
- layout = ParallelLogger::layoutParallel,
- expirationTime = 60*60*48
- )
+ fileName = file.path(logPath, logName),
+ layout = ParallelLogger::layoutParallel,
+ expirationTime = 60 * 60 * 48
+ )
)
)
}
-runCharacterizationsInParallel <- function(x){
-
+runCharacterizationsInParallel <- function(x) {
startTime <- Sys.time()
functionName <- x$functionName
@@ -327,16 +324,16 @@ runCharacterizationsInParallel <- function(x){
inputSettings$settings <- ParallelLogger::convertJsonToSettings(inputSettings$settings)
inputSettings$outputFolder <- file.path(x$executionPath, x$executionFolder)
- if(x$incremental){
- recordIncremental(
- executionFolder = x$executionPath,
- runDateTime = x$runDateTime,
- jobId = x$jobId,
- startTime = startTime,
- endTime = startTime,
- logname = 'execution.csv'
- )
- }
+ if (x$incremental) {
+ recordIncremental(
+ executionFolder = x$executionPath,
+ runDateTime = x$runDateTime,
+ jobId = x$jobId,
+ startTime = startTime,
+ endTime = startTime,
+ logname = "execution.csv"
+ )
+ }
completed <- tryCatch(
{
@@ -344,30 +341,31 @@ runCharacterizationsInParallel <- function(x){
what = eval(parse(text = functionName)),
args = inputSettings
)
- }, error = function(e){print(e); return(FALSE)}
-
- )
+ },
+ error = function(e) {
+ print(e)
+ return(FALSE)
+ }
+ )
endTime <- Sys.time()
- # if it completed without issues save it
- if(x$incremental & completed){
- recordIncremental(
- executionFolder = x$executionPath,
- runDateTime = x$runDateTime,
- jobId = x$jobId,
- startTime = startTime,
- endTime = endTime,
- logname = 'completed.csv'
- )
- }
+ # if it completed without issues save it
+ if (x$incremental & completed) {
+ recordIncremental(
+ executionFolder = x$executionPath,
+ runDateTime = x$runDateTime,
+ jobId = x$jobId,
+ startTime = startTime,
+ endTime = endTime,
+ logname = "completed.csv"
+ )
+ }
}
createJobs <- function(
- characterizationSettings,
- threads
-){
-
+ characterizationSettings,
+ threads) {
jobDf <- rbind(
getTimeToEventJobs(
characterizationSettings,
@@ -378,17 +376,17 @@ createJobs <- function(
threads
),
getAggregateCovariatesJobs(
- characterizationSettings,
- threads
+ characterizationSettings,
+ threads
)
)
- #data.frame(
+ # data.frame(
# functionName,
# settings # json,
# executionFolder,
# jobId
- #)
+ # )
return(jobDf)
}
@@ -399,14 +397,14 @@ aggregateCsvs <- function(
executionPath,
outputFolder,
executionFolders, # needed?
- csvFilePrefix
-){
-
- tables <- c('cohort_details.csv', 'settings.csv','covariates.csv',
- 'covariates_continuous.csv','covariate_ref.csv',
- 'analysis_ref.csv','cohort_counts.csv',
- 'time_to_event.csv',
- 'rechallenge_fail_case_series.csv', 'dechallenge_rechallenge.csv')
+ csvFilePrefix) {
+ tables <- c(
+ "cohort_details.csv", "settings.csv", "covariates.csv",
+ "covariates_continuous.csv", "covariate_ref.csv",
+ "analysis_ref.csv", "cohort_counts.csv",
+ "time_to_event.csv",
+ "rechallenge_fail_case_series.csv", "dechallenge_rechallenge.csv"
+ )
# this makes sure results are recreated
firstTracker <- data.frame(
@@ -424,44 +422,42 @@ aggregateCsvs <- function(
# for each folder load covariates, covariates_continuous,
# covariate_ref and analysis_ref
- for(folderName in folderNames){
- for(csvType in tables){
-
+ for (folderName in folderNames) {
+ for (csvType in tables) {
loadPath <- file.path(executionPath, folderName, csvType)
- savePath <- file.path(outputFolder, paste0(csvFilePrefix,csvType))
- if(file.exists(loadPath)){
-
- #TODO do this in batches
+ savePath <- file.path(outputFolder, paste0(csvFilePrefix, csvType))
+ if (file.exists(loadPath)) {
+ # TODO do this in batches
data <- readr::read_csv(
file = loadPath,
show_col_types = F
)
- if(csvType == 'analysis_ref.csv'){
+ if (csvType == "analysis_ref.csv") {
data <- data %>%
dplyr::mutate(
- unique_id = paste0(.data$setting_id, '-', .data$analysis_id)
+ unique_id = paste0(.data$setting_id, "-", .data$analysis_id)
) %>%
dplyr::filter( # need to filter analysis_id and setting_id
!.data$unique_id %in% analysisRefTracker
) %>%
dplyr::select(-"unique_id")
- analysisRefTracker <- unique(c(analysisRefTracker, paste0(data$setting_id,'-',data$analysis_id)))
+ analysisRefTracker <- unique(c(analysisRefTracker, paste0(data$setting_id, "-", data$analysis_id)))
}
- if(csvType == 'covariate_ref.csv'){ # this could be problematic as may have differnet covariate_ids
+ if (csvType == "covariate_ref.csv") { # this could be problematic as may have differnet covariate_ids
data <- data %>%
dplyr::mutate(
- unique_id = paste0(.data$setting_id, '-', .data$covariate_id)
+ unique_id = paste0(.data$setting_id, "-", .data$covariate_id)
) %>%
dplyr::filter( # need to filter covariate_id and setting_id
!.data$unique_id %in% covariateRefTracker
- )%>%
+ ) %>%
dplyr::select(-"unique_id")
- covariateRefTracker <- unique(c(covariateRefTracker, paste0(data$setting_id,'-',data$covariate_id)))
+ covariateRefTracker <- unique(c(covariateRefTracker, paste0(data$setting_id, "-", data$covariate_id)))
}
- if(csvType == 'settings.csv'){
+ if (csvType == "settings.csv") {
data <- data %>%
dplyr::filter(
!.data$setting_id %in% settingsTracker
@@ -472,12 +468,11 @@ aggregateCsvs <- function(
append <- file.exists(savePath)
readr::write_csv(
x = data,
- file = savePath, quote = 'all',
+ file = savePath, quote = "all",
append = append & !firstTracker$first[firstTracker$table == csvType]
)
firstTracker$first[firstTracker$table == csvType] <- F
}
-
}
}
}
diff --git a/R/SaveLoad.R b/R/SaveLoad.R
index e0e3bd0..61583bd 100644
--- a/R/SaveLoad.R
+++ b/R/SaveLoad.R
@@ -27,8 +27,7 @@
exportTimeToEventToCsv <- function(
result,
saveDirectory,
- minCellCount = 0
- ) {
+ minCellCount = 0) {
if (!dir.exists(saveDirectory)) {
dir.create(
path = saveDirectory,
@@ -99,7 +98,7 @@ exportDechallengeRechallengeToCsv <- function(
)
message("Writing ", countN, " rows to csv")
- if(!dir.exists(saveDirectory)){
+ if (!dir.exists(saveDirectory)) {
dir.create(saveDirectory, recursive = T)
}
@@ -215,8 +214,7 @@ exportDechallengeRechallengeToCsv <- function(
#' @export
exportRechallengeFailCaseSeriesToCsv <- function(
result,
- saveDirectory
- ) {
+ saveDirectory) {
if (!dir.exists(saveDirectory)) {
dir.create(
path = saveDirectory,
diff --git a/R/TimeToEvent.R b/R/TimeToEvent.R
index b316902..18c9d9c 100644
--- a/R/TimeToEvent.R
+++ b/R/TimeToEvent.R
@@ -25,8 +25,7 @@
#' @export
createTimeToEventSettings <- function(
targetIds,
- outcomeIds
- ) {
+ outcomeIds) {
# check indicationIds
errorMessages <- checkmate::makeAssertCollection()
# check targetIds is a vector of int/double
@@ -80,10 +79,9 @@ computeTimeToEventAnalyses <- function(
cdmDatabaseSchema,
settings,
databaseId = "database 1",
- outputFolder = file.path(getwd(),'results'),
+ outputFolder = file.path(getwd(), "results"),
minCellCount = 0,
- ...
- ) {
+ ...) {
# check inputs
errorMessages <- checkmate::makeAssertCollection()
.checkConnectionDetails(connectionDetails, errorMessages)
@@ -105,7 +103,7 @@ computeTimeToEventAnalyses <- function(
)
.checkTimeToEventSettings(
settings = settings,
- errorMessages = errorMessages
+ errorMessages = errorMessages
)
valid <- checkmate::reportAssertions(errorMessages)
@@ -200,7 +198,7 @@ computeTimeToEventAnalyses <- function(
)
# add the csv export here
- message('exporting to csv file')
+ message("exporting to csv file")
exportTimeToEventToCsv(
result = result,
saveDirectory = outputFolder,
@@ -216,57 +214,61 @@ computeTimeToEventAnalyses <- function(
# based on the number of threads
getTimeToEventJobs <- function(
characterizationSettings,
- threads
-){
-
-
+ threads) {
characterizationSettings <- characterizationSettings$timeToEventSettings
- if(length(characterizationSettings) == 0){
+ if (length(characterizationSettings) == 0) {
return(NULL)
}
ind <- 1:length(characterizationSettings)
- targetIds <- lapply(ind, function(i){characterizationSettings[[i]]$targetIds})
- outcomeIds <- lapply(ind, function(i){characterizationSettings[[i]]$outcomeIds})
+ targetIds <- lapply(ind, function(i) {
+ characterizationSettings[[i]]$targetIds
+ })
+ outcomeIds <- lapply(ind, function(i) {
+ characterizationSettings[[i]]$outcomeIds
+ })
# get all combinations of TnOs, then split by treads
- tnos <- do.call(what = 'rbind',
- args =
- lapply(
- 1:length(targetIds),
- function(i){expand.grid(
- targetId = targetIds[[i]],
- outcomeId = outcomeIds[[i]]
- )}
- )
+ tnos <- do.call(
+ what = "rbind",
+ args =
+ lapply(
+ 1:length(targetIds),
+ function(i) {
+ expand.grid(
+ targetId = targetIds[[i]],
+ outcomeId = outcomeIds[[i]]
+ )
+ }
+ )
)
# find out whether more Ts or more Os
tcount <- length(unique(tnos$targetId))
ocount <- length(unique(tnos$outcomeId))
- if(threads > max(tcount, ocount)){
- message('Tnput parameter threads greater than number of targets and outcomes')
- message(paste0('Only using ', max(tcount, ocount) ,' threads for TimeToEvent'))
+ if (threads > max(tcount, ocount)) {
+ message("Tnput parameter threads greater than number of targets and outcomes")
+ message(paste0("Only using ", max(tcount, ocount), " threads for TimeToEvent"))
}
- if(tcount >= ocount){
+ if (tcount >= ocount) {
threadDf <- data.frame(
targetId = unique(tnos$targetId),
- thread = rep(1:threads, ceiling(tcount/threads))[1:tcount]
+ thread = rep(1:threads, ceiling(tcount / threads))[1:tcount]
)
- mergeColumn <- 'targetId'
- } else{
+ mergeColumn <- "targetId"
+ } else {
threadDf <- data.frame(
outcomeId = unique(tnos$outcomeId),
- thread = rep(1:threads, ceiling(ocount/threads))[1:ocount]
+ thread = rep(1:threads, ceiling(ocount / threads))[1:ocount]
)
- mergeColumn <- 'outcomeId'
+ mergeColumn <- "outcomeId"
}
tnos <- merge(tnos, threadDf, by = mergeColumn)
sets <- lapply(
X = 1:max(threadDf$thread),
- FUN = function(i){
+ FUN = function(i) {
createTimeToEventSettings(
targetIds = unique(tnos$targetId[tnos$thread == i]),
outcomeIds = unique(tnos$outcomeId[tnos$thread == i])
@@ -276,16 +278,17 @@ getTimeToEventJobs <- function(
# recreate settings
settings <- c()
- for(i in 1:length(sets)){
- settings <- rbind(settings,
- data.frame(
- functionName = 'computeTimeToEventAnalyses',
- settings = as.character(ParallelLogger::convertSettingsToJson(
- sets[[i]]
- )),
- executionFolder = paste0('tte_', i),
- jobId = paste0('tte_', i)
- )
+ for (i in 1:length(sets)) {
+ settings <- rbind(
+ settings,
+ data.frame(
+ functionName = "computeTimeToEventAnalyses",
+ settings = as.character(ParallelLogger::convertSettingsToJson(
+ sets[[i]]
+ )),
+ executionFolder = paste0("tte_", i),
+ jobId = paste0("tte_", i)
+ )
)
}
diff --git a/R/ViewShiny.R b/R/ViewShiny.R
index 841b755..bcee5c3 100644
--- a/R/ViewShiny.R
+++ b/R/ViewShiny.R
@@ -13,8 +13,7 @@
#' @export
viewCharacterization <- function(
resultFolder,
- cohortDefinitionSet = NULL
- ) {
+ cohortDefinitionSet = NULL) {
databaseSettings <- prepareCharacterizationShiny(
resultFolder = resultFolder,
cohortDefinitionSet = cohortDefinitionSet
@@ -26,12 +25,10 @@ viewCharacterization <- function(
prepareCharacterizationShiny <- function(
resultFolder,
cohortDefinitionSet,
- sqliteLocation = file.path(tempdir(), 'results.sqlite'),
- tablePrefix = '',
- csvTablePrefix = 'c_'
- ) {
-
- if(!dir.exists(dirname(sqliteLocation))){
+ sqliteLocation = file.path(tempdir(), "results.sqlite"),
+ tablePrefix = "",
+ csvTablePrefix = "c_") {
+ if (!dir.exists(dirname(sqliteLocation))) {
dir.create(dirname(sqliteLocation), recursive = T)
}
@@ -55,7 +52,7 @@ prepareCharacterizationShiny <- function(
# upload the results
insertResultsToDatabase(
connectionDetails = connectionDetails,
- schema = 'main',
+ schema = "main",
resultsFolder = resultFolder,
tablePrefix = tablePrefix,
csvTablePrefix = csvTablePrefix
@@ -71,12 +68,12 @@ prepareCharacterizationShiny <- function(
if (!"cg_cohort_definition" %in% tables) {
cohortIds <- unique(
c(
- DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_ID from ",tablePrefix,csvTablePrefix,"cohort_details where COHORT_TYPE = 'Target';"))$TARGET_COHORT_ID,
- DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_ID from ",tablePrefix,csvTablePrefix,"cohort_details where COHORT_TYPE = 'TnO';"))$OUTCOME_COHORT_ID,
- DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_DEFINITION_ID from ",tablePrefix,csvTablePrefix,"time_to_event;"))$TARGET_COHORT_DEFINITION_ID,
- DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_DEFINITION_ID from ",tablePrefix,csvTablePrefix,"time_to_event;"))$OUTCOME_COHORT_DEFINITION_ID,
- DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_DEFINITION_ID from ",tablePrefix,csvTablePrefix,"rechallenge_fail_case_series;"))$TARGET_COHORT_DEFINITION_ID,
- DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_DEFINITION_ID from ",tablePrefix,csvTablePrefix,"rechallenge_fail_case_series;"))$OUTCOME_COHORT_DEFINITION_ID
+ DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_ID from ", tablePrefix, csvTablePrefix, "cohort_details where COHORT_TYPE = 'Target';"))$TARGET_COHORT_ID,
+ DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_ID from ", tablePrefix, csvTablePrefix, "cohort_details where COHORT_TYPE = 'TnO';"))$OUTCOME_COHORT_ID,
+ DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_DEFINITION_ID from ", tablePrefix, csvTablePrefix, "time_to_event;"))$TARGET_COHORT_DEFINITION_ID,
+ DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_DEFINITION_ID from ", tablePrefix, csvTablePrefix, "time_to_event;"))$OUTCOME_COHORT_DEFINITION_ID,
+ DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_DEFINITION_ID from ", tablePrefix, csvTablePrefix, "rechallenge_fail_case_series;"))$TARGET_COHORT_DEFINITION_ID,
+ DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_DEFINITION_ID from ", tablePrefix, csvTablePrefix, "rechallenge_fail_case_series;"))$OUTCOME_COHORT_DEFINITION_ID
)
)
@@ -95,9 +92,9 @@ prepareCharacterizationShiny <- function(
if (!"database_meta_data" %in% tables) {
dbIds <- unique(
c(
- DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ",tablePrefix,csvTablePrefix,"analysis_ref;"))$DATABASE_ID,
- DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ",tablePrefix,csvTablePrefix,"dechallenge_rechallenge;"))$DATABASE_ID,
- DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ",tablePrefix,csvTablePrefix,"time_to_event;"))$DATABASE_ID
+ DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ", tablePrefix, csvTablePrefix, "analysis_ref;"))$DATABASE_ID,
+ DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ", tablePrefix, csvTablePrefix, "dechallenge_rechallenge;"))$DATABASE_ID,
+ DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ", tablePrefix, csvTablePrefix, "time_to_event;"))$DATABASE_ID
)
)
@@ -120,7 +117,7 @@ prepareCharacterizationShiny <- function(
server = server
),
schema = "main",
- tablePrefix = paste0(tablePrefix,csvTablePrefix),
+ tablePrefix = paste0(tablePrefix, csvTablePrefix),
cohortTablePrefix = "cg_",
databaseTable = "DATABASE_META_DATA"
)
@@ -174,7 +171,7 @@ viewChars <- function(
databaseSettings$cgTablePrefix <- databaseSettings$cohortTablePrefix
databaseSettings$databaseTable <- "DATABASE_META_DATA"
databaseSettings$databaseTablePrefix <- ""
- #databaseSettings$iTablePrefix <- databaseSettings$incidenceTablePrefix
+ # databaseSettings$iTablePrefix <- databaseSettings$incidenceTablePrefix
databaseSettings$cgTable <- "cohort_definition"
if (!testApp) {
diff --git a/docs/404.html b/docs/404.html
index a5cac53..7ee4ee6 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -6,7 +6,7 @@
Page not found (404) • Characterization
-
+
@@ -32,7 +32,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -115,7 +115,7 @@ Page not found (404)
diff --git a/docs/articles/InstallationGuide.html b/docs/articles/InstallationGuide.html
index aebd848..bb566a2 100644
--- a/docs/articles/InstallationGuide.html
+++ b/docs/articles/InstallationGuide.html
@@ -6,7 +6,7 @@
Characterization Installation Guide • Characterization
-
+
@@ -33,7 +33,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -230,7 +230,7 @@
-## Connecting using SQLite driver
-## Creating cohort: Celecoxib
-##
- |
- | | 0%
- |
- |======================================================================| 100%
-## Executing SQL took 0.00776 secs
-## Creating cohort: Diclofenac
-##
- |
- | | 0%
- |
- |======================================================================| 100%
-## Executing SQL took 0.00674 secs
-## Creating cohort: GiBleed
-##
- |
- | | 0%
- |
- |======================================================================| 100%
-## Executing SQL took 0.00919 secs
-## Creating cohort: NSAIDs
-##
- |
- | | 0%
- |
- |======================================================================| 100%
-## Executing SQL took 0.0486 secs
+connectionDetails <- Eunomia :: getEunomiaConnectionDetails ( )
+## attempting to download GiBleed
+## attempting to extract and load: C:\Users\cknoll1\AppData\Local\Temp\5\RtmpoFseQQ/GiBleed_5.3.zip to: C:\Users\cknoll1\AppData\Local\Temp\5\RtmpoFseQQ/GiBleed_5.3.sqlite
+
## Cohorts created in table main.cohort
## cohortId name
## 1 1 Celecoxib
@@ -204,10 +178,10 @@ Setup
## 4 2694
We also need to have the Characterization package installed and
loaded
-
@@ -104,7 +104,7 @@ Author
diff --git a/docs/reference/cleanIncremental.html b/docs/reference/cleanIncremental.html
index 3411df0..2d8fe3e 100644
--- a/docs/reference/cleanIncremental.html
+++ b/docs/reference/cleanIncremental.html
@@ -1,7 +1,5 @@
-Removes csv files from folders that have not been marked as completed
-and removes the record of the execution file — cleanIncremental • Characterization Removes csv files from folders that have not been marked as completed and removes the record of the execution file — cleanIncremental • Characterization Removes csv files from the execution folder as there should be no csv files
-when running in non-incremental model — cleanNonIncremental • Characterization Removes csv files from the execution folder as there should be no csv files when running in non-incremental model — cleanNonIncremental • Characterization Compute dechallenge rechallenge study — computeDechallengeRechallengeAnalyses • Characterization Compute dechallenge rechallenge study — computeDechallengeRechallengeAnalyses • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -171,7 +171,7 @@ See also
diff --git a/docs/reference/computeRechallengeFailCaseSeriesAnalyses.html b/docs/reference/computeRechallengeFailCaseSeriesAnalyses.html
index 6a66550..1e9a2fc 100644
--- a/docs/reference/computeRechallengeFailCaseSeriesAnalyses.html
+++ b/docs/reference/computeRechallengeFailCaseSeriesAnalyses.html
@@ -1,5 +1,5 @@
-Compute fine the subjects that fail the dechallenge rechallenge study — computeRechallengeFailCaseSeriesAnalyses • Characterization Compute fine the subjects that fail the dechallenge rechallenge study — computeRechallengeFailCaseSeriesAnalyses • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -176,7 +176,7 @@ See also
diff --git a/docs/reference/computeTimeToEventAnalyses.html b/docs/reference/computeTimeToEventAnalyses.html
index f26506b..fc73f52 100644
--- a/docs/reference/computeTimeToEventAnalyses.html
+++ b/docs/reference/computeTimeToEventAnalyses.html
@@ -1,5 +1,5 @@
-Compute time to event study — computeTimeToEventAnalyses • Characterization Compute time to event study — computeTimeToEventAnalyses • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -175,7 +175,7 @@ See also
diff --git a/docs/reference/createAggregateCovariateSettings.html b/docs/reference/createAggregateCovariateSettings.html
index 2ff42ec..deff011 100644
--- a/docs/reference/createAggregateCovariateSettings.html
+++ b/docs/reference/createAggregateCovariateSettings.html
@@ -1,5 +1,5 @@
-Create aggregate covariate study settings — createAggregateCovariateSettings • Characterization Create aggregate covariate study settings — createAggregateCovariateSettings • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -186,7 +186,7 @@ Value
diff --git a/docs/reference/createCharacterizationSettings.html b/docs/reference/createCharacterizationSettings.html
index d9e942b..a5bb1f3 100644
--- a/docs/reference/createCharacterizationSettings.html
+++ b/docs/reference/createCharacterizationSettings.html
@@ -1,5 +1,5 @@
-Create the settings for a large scale characterization study — createCharacterizationSettings • Characterization Create the settings for a large scale characterization study — createCharacterizationSettings • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -129,7 +129,7 @@ See also
diff --git a/docs/reference/createCharacterizationTables.html b/docs/reference/createCharacterizationTables.html
index 01e7755..05cb16a 100644
--- a/docs/reference/createCharacterizationTables.html
+++ b/docs/reference/createCharacterizationTables.html
@@ -1,5 +1,5 @@
-Create the results tables to store characterization results into a database — createCharacterizationTables • Characterization Create the results tables to store characterization results into a database — createCharacterizationTables • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -150,7 +150,7 @@ See also
diff --git a/docs/reference/createDechallengeRechallengeSettings.html b/docs/reference/createDechallengeRechallengeSettings.html
index a787f5f..c044efd 100644
--- a/docs/reference/createDechallengeRechallengeSettings.html
+++ b/docs/reference/createDechallengeRechallengeSettings.html
@@ -1,5 +1,5 @@
-Create dechallenge rechallenge study settings — createDechallengeRechallengeSettings • Characterization Create dechallenge rechallenge study settings — createDechallengeRechallengeSettings • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -129,7 +129,7 @@ See also
diff --git a/docs/reference/createDuringCovariateSettings.html b/docs/reference/createDuringCovariateSettings.html
index df4b1c3..52adf2b 100644
--- a/docs/reference/createDuringCovariateSettings.html
+++ b/docs/reference/createDuringCovariateSettings.html
@@ -1,5 +1,5 @@
-Create during covariate settings — createDuringCovariateSettings • Characterization Create during covariate settings — createDuringCovariateSettings • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -237,7 +237,7 @@ Examples
diff --git a/docs/reference/createSqliteDatabase.html b/docs/reference/createSqliteDatabase.html
index d683532..66ed0f0 100644
--- a/docs/reference/createSqliteDatabase.html
+++ b/docs/reference/createSqliteDatabase.html
@@ -1,5 +1,5 @@
-Create an sqlite database connection — createSqliteDatabase • Characterization Create an sqlite database connection — createSqliteDatabase • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -116,7 +116,7 @@ See also
diff --git a/docs/reference/createTimeToEventSettings.html b/docs/reference/createTimeToEventSettings.html
index 63d552b..a6d29ce 100644
--- a/docs/reference/createTimeToEventSettings.html
+++ b/docs/reference/createTimeToEventSettings.html
@@ -1,5 +1,5 @@
-Create time to event study settings — createTimeToEventSettings • Characterization Create time to event study settings — createTimeToEventSettings • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -115,7 +115,7 @@ See also
diff --git a/docs/reference/exportDechallengeRechallengeToCsv.html b/docs/reference/exportDechallengeRechallengeToCsv.html
index 033782d..1f736ab 100644
--- a/docs/reference/exportDechallengeRechallengeToCsv.html
+++ b/docs/reference/exportDechallengeRechallengeToCsv.html
@@ -1,5 +1,5 @@
-export the DechallengeRechallenge results as csv — exportDechallengeRechallengeToCsv • Characterization export the DechallengeRechallenge results as csv — exportDechallengeRechallengeToCsv • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -120,7 +120,7 @@ See also
diff --git a/docs/reference/exportRechallengeFailCaseSeriesToCsv.html b/docs/reference/exportRechallengeFailCaseSeriesToCsv.html
index 456d5bd..f5c025a 100644
--- a/docs/reference/exportRechallengeFailCaseSeriesToCsv.html
+++ b/docs/reference/exportRechallengeFailCaseSeriesToCsv.html
@@ -1,5 +1,5 @@
-export the RechallengeFailCaseSeries results as csv — exportRechallengeFailCaseSeriesToCsv • Characterization export the RechallengeFailCaseSeries results as csv — exportRechallengeFailCaseSeriesToCsv • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -116,7 +116,7 @@ See also
diff --git a/docs/reference/exportTimeToEventToCsv.html b/docs/reference/exportTimeToEventToCsv.html
index c474100..d094231 100644
--- a/docs/reference/exportTimeToEventToCsv.html
+++ b/docs/reference/exportTimeToEventToCsv.html
@@ -1,5 +1,5 @@
-export the TimeToEvent results as csv — exportTimeToEventToCsv • Characterization export the TimeToEvent results as csv — exportTimeToEventToCsv • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -120,7 +120,7 @@ See also
diff --git a/docs/reference/getDbDuringCovariateData.html b/docs/reference/getDbDuringCovariateData.html
index bbaefc5..350d393 100644
--- a/docs/reference/getDbDuringCovariateData.html
+++ b/docs/reference/getDbDuringCovariateData.html
@@ -1,5 +1,5 @@
-Extracts covariates that occur during a cohort — getDbDuringCovariateData • Characterization Extracts covariates that occur during a cohort — getDbDuringCovariateData • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -167,7 +167,7 @@ See also
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 892d17a..1c85a9d 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -1,5 +1,5 @@
-Function reference • Characterization Function reference • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -186,13 +186,11 @@ Incremental
cleanIncremental()
- Removes csv files from folders that have not been marked as completed
-and removes the record of the execution file
+ Removes csv files from folders that have not been marked as completed and removes the record of the execution file
cleanNonIncremental()
- Removes csv files from the execution folder as there should be no csv files
-when running in non-incremental model
+ Removes csv files from the execution folder as there should be no csv files when running in non-incremental model
diff --git a/docs/reference/insertResultsToDatabase.html b/docs/reference/insertResultsToDatabase.html
index 28618e3..7135588 100644
--- a/docs/reference/insertResultsToDatabase.html
+++ b/docs/reference/insertResultsToDatabase.html
@@ -1,5 +1,5 @@
-Upload the results into a result database — insertResultsToDatabase • Characterization Upload the results into a result database — insertResultsToDatabase • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -138,7 +138,7 @@ See also
diff --git a/docs/reference/loadCharacterizationSettings.html b/docs/reference/loadCharacterizationSettings.html
index 156ce2a..8ae5921 100644
--- a/docs/reference/loadCharacterizationSettings.html
+++ b/docs/reference/loadCharacterizationSettings.html
@@ -1,5 +1,5 @@
-Load the characterization settings previously saved as a json file — loadCharacterizationSettings • Characterization Load the characterization settings previously saved as a json file — loadCharacterizationSettings • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -117,7 +117,7 @@ See also
diff --git a/docs/reference/runCharacterizationAnalyses.html b/docs/reference/runCharacterizationAnalyses.html
index d5c9ae8..0d64da5 100644
--- a/docs/reference/runCharacterizationAnalyses.html
+++ b/docs/reference/runCharacterizationAnalyses.html
@@ -1,5 +1,5 @@
-execute a large-scale characterization study — runCharacterizationAnalyses • Characterization execute a large-scale characterization study — runCharacterizationAnalyses • Characterization Save the characterization settings as a json — saveCharacterizationSettings • Characterization Save the characterization settings as a json — saveCharacterizationSettings • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -121,7 +121,7 @@ See also
diff --git a/docs/reference/viewCharacterization.html b/docs/reference/viewCharacterization.html
index 4718fb4..203c343 100644
--- a/docs/reference/viewCharacterization.html
+++ b/docs/reference/viewCharacterization.html
@@ -1,5 +1,5 @@
-viewCharacterization - Interactively view the characterization results — viewCharacterization • Characterization viewCharacterization - Interactively view the characterization results — viewCharacterization • Characterization
@@ -17,7 +17,7 @@
Characterization
- 1.1.1
+ 2.0.0
@@ -114,7 +114,7 @@ Details
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 24c82df..70efbad 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -3,6 +3,9 @@
/404.html
+
+ /articles/index.html
+
/articles/InstallationGuide.html
@@ -12,9 +15,6 @@
/articles/UsingPackage.html
-
- /articles/index.html
-
/authors.html
diff --git a/tests/testthat/test-Incremental.R b/tests/testthat/test-Incremental.R
index 428af80..40d4e12 100644
--- a/tests/testthat/test-Incremental.R
+++ b/tests/testthat/test-Incremental.R
@@ -1,31 +1,32 @@
context("Incremental")
-logFolder <- file.path(tempdir(),'log1')
+logFolder <- file.path(tempdir(), "log1")
on.exit(unlink(logFolder))
-logFolder2 <- file.path(tempdir(),'log2')
+logFolder2 <- file.path(tempdir(), "log2")
on.exit(unlink(logFolder2))
-logFolder3 <- file.path(tempdir(),'log3')
+logFolder3 <- file.path(tempdir(), "log3")
on.exit(unlink(logFolder3))
-logFolder4 <- file.path(tempdir(),'log4')
+logFolder4 <- file.path(tempdir(), "log4")
on.exit(unlink(logFolder4))
-logFolder5 <- file.path(tempdir(),'log5')
+logFolder5 <- file.path(tempdir(), "log5")
on.exit(unlink(logFolder5))
-logFolder6 <- file.path(tempdir(),'log6')
+logFolder6 <- file.path(tempdir(), "log6")
on.exit(unlink(logFolder6))
-for(folder in c(logFolder, logFolder2, logFolder3,
- logFolder4, logFolder5, logFolder6)){
- if(!dir.exists(folder)){
+for (folder in c(
+ logFolder, logFolder2, logFolder3,
+ logFolder4, logFolder5, logFolder6
+)) {
+ if (!dir.exists(folder)) {
dir.create(folder)
}
}
test_that("createIncrementalLog", {
-
Characterization:::createIncrementalLog(
executionFolder = logFolder,
- logname = 'execution.csv'
- )
+ logname = "execution.csv"
+ )
testthat::expect_true("execution.csv" %in% dir(logFolder))
executionLog <- read.csv(file.path(logFolder, "execution.csv"))
testthat::expect_true(nrow(executionLog) == 1)
@@ -33,39 +34,35 @@ test_that("createIncrementalLog", {
Characterization:::createIncrementalLog(
executionFolder = logFolder,
- logname = 'madeup.csv'
+ logname = "madeup.csv"
)
testthat::expect_true("madeup.csv" %in% dir(logFolder))
-
})
test_that("loadIncrementalFiles", {
-
# should error as not completed.csv
testthat::expect_error(
Characterization:::loadIncrementalFiles(
- executionFolder = logFolder
- )
+ executionFolder = logFolder
+ )
)
# now create the completed.csv
Characterization:::createIncrementalLog(
executionFolder = logFolder,
- logname = 'completed.csv'
+ logname = "completed.csv"
)
result <- Characterization:::loadIncrementalFiles(
executionFolder = logFolder
)
- testthat::expect_true(sum(c('executed','completed') %in% names(result)) == 2)
+ testthat::expect_true(sum(c("executed", "completed") %in% names(result)) == 2)
testthat::expect_true(nrow(result$executed) == 1)
testthat::expect_true(nrow(result$completed) == 1)
-
})
test_that("getExecutionJobIssues", {
-
result <- Characterization:::loadIncrementalFiles(
executionFolder = logFolder
)
@@ -73,16 +70,16 @@ test_that("getExecutionJobIssues", {
issues <- Characterization:::getExecutionJobIssues(
executed = result$executed,
completed = result$completed
- )
+ )
testthat::expect_true(length(issues) == 0)
# now add some executed but not completed results
issues <- Characterization:::getExecutionJobIssues(
executed = data.frame(
- run_date_time = c(1,1),
- job_id = c(1,2),
- start_time = c(1,2),
- end_time = c(1,2)
+ run_date_time = c(1, 1),
+ job_id = c(1, 2),
+ start_time = c(1, 2),
+ end_time = c(1, 2)
),
completed = data.frame(
run_date_time = c(1),
@@ -95,10 +92,10 @@ test_that("getExecutionJobIssues", {
issues <- Characterization:::getExecutionJobIssues(
executed = data.frame(
- run_date_time = c(1,1),
- job_id = c(1,20),
- start_time = c(1,2),
- end_time = c(1,2)
+ run_date_time = c(1, 1),
+ job_id = c(1, 20),
+ start_time = c(1, 2),
+ end_time = c(1, 2)
),
completed = data.frame(
run_date_time = c(1),
@@ -108,19 +105,17 @@ test_that("getExecutionJobIssues", {
)
)
testthat::expect_true(issues == 20)
-
})
test_that("cleanIncremental", {
-
# create folder with issues
Characterization:::createIncrementalLog(
executionFolder = logFolder2,
- logname = 'execution.csv'
+ logname = "execution.csv"
)
Characterization:::createIncrementalLog(
executionFolder = logFolder2,
- logname = 'completed.csv'
+ logname = "completed.csv"
)
# add a job into executed that is not in completed
@@ -131,9 +126,9 @@ test_that("cleanIncremental", {
start_time = c(1),
end_time = c(1)
),
- file = file.path(logFolder2, 'execution.csv'),
+ file = file.path(logFolder2, "execution.csv"),
append = T
- )
+ )
incrementalFiles <- Characterization:::loadIncrementalFiles(
executionFolder = logFolder2
@@ -141,26 +136,26 @@ test_that("cleanIncremental", {
issues <- Characterization:::getExecutionJobIssues(
executed = incrementalFiles$executed,
completed = incrementalFiles$completed
- )
+ )
testthat::expect_true(nrow(incrementalFiles$executed) == 2)
testthat::expect_true(nrow(incrementalFiles$completed) == 1)
testthat::expect_true(length(issues) == 1)
- dir.create(file.path(logFolder2, '1'))
+ dir.create(file.path(logFolder2, "1"))
write.csv(
- x = data.frame(a=1),
- file = file.path(logFolder2, '1', 'madeup.csv')
- )
- testthat::expect_true(file.exists(file.path(logFolder2, '1', 'madeup.csv')))
+ x = data.frame(a = 1),
+ file = file.path(logFolder2, "1", "madeup.csv")
+ )
+ testthat::expect_true(file.exists(file.path(logFolder2, "1", "madeup.csv")))
# run clean to fix issues
Characterization:::cleanIncremental(
executionFolder = logFolder2
- )
+ )
# check issues are fixed
- testthat::expect_true(!file.exists(file.path(logFolder2, '1', 'madeup.csv')))
+ testthat::expect_true(!file.exists(file.path(logFolder2, "1", "madeup.csv")))
incrementalFiles <- Characterization:::loadIncrementalFiles(
executionFolder = logFolder2
)
@@ -172,24 +167,22 @@ test_that("cleanIncremental", {
testthat::expect_true(nrow(incrementalFiles$executed) == 1)
testthat::expect_true(nrow(incrementalFiles$completed) == 1)
testthat::expect_true(length(issues) == 0)
-
})
test_that("checkResultFilesIncremental ", {
-
# create folder with issues
Characterization:::createIncrementalLog(
executionFolder = logFolder3,
- logname = 'execution.csv'
+ logname = "execution.csv"
)
Characterization:::createIncrementalLog(
executionFolder = logFolder3,
- logname = 'completed.csv'
+ logname = "completed.csv"
)
result <- Characterization:::checkResultFilesIncremental(
executionFolder = logFolder3
- )
+ )
testthat::expect_true(is.null(result))
# add a job into executed that is not in completed
@@ -200,26 +193,24 @@ test_that("checkResultFilesIncremental ", {
start_time = c(1),
end_time = c(1)
),
- file = file.path(logFolder3, 'execution.csv'),
+ file = file.path(logFolder3, "execution.csv"),
append = T
)
testthat::expect_error(Characterization:::checkResultFilesIncremental(
executionFolder = logFolder3
))
-
})
test_that("checkResultFilesIncremental ", {
-
# create folder with issues
Characterization:::createIncrementalLog(
executionFolder = logFolder4,
- logname = 'execution.csv'
+ logname = "execution.csv"
)
Characterization:::createIncrementalLog(
executionFolder = logFolder4,
- logname = 'completed.csv'
+ logname = "completed.csv"
)
# add a job into executed and completed
@@ -230,7 +221,7 @@ test_that("checkResultFilesIncremental ", {
start_time = c(1),
end_time = c(1)
),
- file = file.path(logFolder4, 'execution.csv'),
+ file = file.path(logFolder4, "execution.csv"),
append = T
)
readr::write_csv(
@@ -240,69 +231,66 @@ test_that("checkResultFilesIncremental ", {
start_time = c(1),
end_time = c(1)
),
- file = file.path(logFolder4, 'completed.csv'),
+ file = file.path(logFolder4, "completed.csv"),
append = T
)
-jobs <- Characterization:::findCompletedJobs(logFolder4)
-testthat::expect_true(1 %in% jobs)
-
+ jobs <- Characterization:::findCompletedJobs(logFolder4)
+ testthat::expect_true(1 %in% jobs)
})
test_that("recordIncremental ", {
Characterization:::createIncrementalLog(
executionFolder = logFolder6,
- logname = 'execution.csv'
+ logname = "execution.csv"
)
execution <- read.csv(
- file = file.path(logFolder6, 'execution.csv')
+ file = file.path(logFolder6, "execution.csv")
)
- testthat::expect_true(!'example100' %in% execution$job_id)
+ testthat::expect_true(!"example100" %in% execution$job_id)
-Characterization:::recordIncremental(
+ Characterization:::recordIncremental(
executionFolder = logFolder6,
runDateTime = Sys.time(),
- jobId = 'example100',
+ jobId = "example100",
startTime = Sys.time(),
endTime = Sys.time(),
- logname = 'execution.csv'
-)
+ logname = "execution.csv"
+ )
executionJobs <- read.csv(
- file = file.path(logFolder6, 'execution.csv')
- )
- testthat::expect_true('example100' %in% executionJobs$job_id)
+ file = file.path(logFolder6, "execution.csv")
+ )
+ testthat::expect_true("example100" %in% executionJobs$job_id)
# test warning if no file
testthat::expect_warning(
Characterization:::recordIncremental(
executionFolder = logFolder6,
runDateTime = 1,
- jobId = 'example100',
+ jobId = "example100",
startTime = 1,
endTime = 1,
- logname = 'execution2.csv'
+ logname = "execution2.csv"
)
)
-
})
test_that("No Incremental works", {
-
result <- Characterization:::checkResultFilesNonIncremental(
executionFolder = logFolder5
)
testthat::expect_true(is.null(result))
dir.create(
- path = file.path(logFolder5, 'job_1'),
+ path = file.path(logFolder5, "job_1"),
recursive = T
)
- on.exit(unlink(file.path(logFolder5, 'job_1')))
+ on.exit(unlink(file.path(logFolder5, "job_1")))
write.csv(
- x = data.frame(a=1),
- file = file.path(logFolder5, 'job_1', 'anyCsvFile.csv')
+ x = data.frame(a = 1),
+ file = file.path(logFolder5, "job_1", "anyCsvFile.csv")
)
# now there is a csv file it should error
@@ -313,10 +301,9 @@ test_that("No Incremental works", {
)
# this should clean the folder of any csv files
-Characterization:::cleanNonIncremental(
- executionFolder = logFolder5
+ Characterization:::cleanNonIncremental(
+ executionFolder = logFolder5
)
-# previously created csv should have been deleted
-testthat::expect_true(length(dir(file.path(logFolder5, 'job_1'))) == 0)
-
+ # previously created csv should have been deleted
+ testthat::expect_true(length(dir(file.path(logFolder5, "job_1"))) == 0)
})
diff --git a/tests/testthat/test-aggregateCovariate.R b/tests/testthat/test-aggregateCovariate.R
index 335ce2d..441981c 100644
--- a/tests/testthat/test-aggregateCovariate.R
+++ b/tests/testthat/test-aggregateCovariate.R
@@ -55,16 +55,16 @@ test_that("createAggregateCovariateSettings", {
)
testthat::expect_equal(
- res$riskWindowStart,2
+ res$riskWindowStart, 2
)
testthat::expect_equal(
res$startAnchor, "cohort end"
)
testthat::expect_equal(
- res$riskWindowEnd,363
+ res$riskWindowEnd, 363
)
testthat::expect_equal(
- res$endAnchor,"cohort end"
+ res$endAnchor, "cohort end"
)
testthat::expect_equal(
@@ -81,8 +81,6 @@ test_that("createAggregateCovariateSettings", {
res$casePostOutcomeDuration,
120
)
-
-
})
test_that("error when using temporal features", {
@@ -120,7 +118,6 @@ test_that("error when using temporal features", {
minCharacterizationMean = 0.01
)
)
-
})
test_that("createAggregateCovariateSettingsList", {
@@ -159,8 +156,8 @@ test_that("createExecutionIds", {
testthat::expect_true(length(testIds) == 10)
testthat::expect_true(length(unique(testIds)) == 10)
- testId1 <- createExecutionIds(1)
- testId2 <- createExecutionIds(1)
+ testId1 <- createExecutionIds(1)
+ testId2 <- createExecutionIds(1)
testthat::expect_true(testId1 != testId2)
})
@@ -177,7 +174,7 @@ test_that("getAggregateCovariatesJobs", {
useConditionOccurrenceDuring = T
)
- minPriorObservation <- sample(30,1)
+ minPriorObservation <- sample(30, 1)
res <- createAggregateCovariateSettings(
targetIds = targetIds,
@@ -193,22 +190,24 @@ test_that("getAggregateCovariatesJobs", {
jobDf <- getAggregateCovariatesJobs(
characterizationSettings = Characterization::createCharacterizationSettings(
aggregateCovariateSettings = res
- ),
+ ),
threads = 1
)
testthat::expect_true(
- sum(c('computeTargetAggregateCovariateAnalyses',
- 'computeCaseAggregateCovariateAnalyses') %in%
+ sum(c(
+ "computeTargetAggregateCovariateAnalyses",
+ "computeCaseAggregateCovariateAnalyses"
+ ) %in%
jobDf$functionName) == 2
- )
+ )
testthat::expect_true(nrow(jobDf) == 2)
testthat::expect_true(
- paste0('tac_1_',minPriorObservation) %in% jobDf$executionFolder
- )
+ paste0("tac_1_", minPriorObservation) %in% jobDf$executionFolder
+ )
testthat::expect_true(
- paste0('cac_1_',minPriorObservation, '_1_365_365') %in% jobDf$executionFolder
+ paste0("cac_1_", minPriorObservation, "_1_365_365") %in% jobDf$executionFolder
)
settings <- ParallelLogger::convertJsonToSettings(jobDf$settings[1])
@@ -234,14 +233,18 @@ test_that("getAggregateCovariatesJobs", {
testthat::expect_true(nrow(jobDf) == 4)
testthat::expect_true(
- sum(c(paste0('tac_1_',minPriorObservation),
- paste0('tac_2_',minPriorObservation))
- %in% jobDf$executionFolder ) == 2
+ sum(c(
+ paste0("tac_1_", minPriorObservation),
+ paste0("tac_2_", minPriorObservation)
+ )
+ %in% jobDf$executionFolder) == 2
)
testthat::expect_true(
- sum(c(paste0('cac_1_',minPriorObservation, '_1_365_365'),
- paste0('cac_2_',minPriorObservation, '_1_365_365'))
- %in% jobDf$executionFolder) == 2
+ sum(c(
+ paste0("cac_1_", minPriorObservation, "_1_365_365"),
+ paste0("cac_2_", minPriorObservation, "_1_365_365")
+ )
+ %in% jobDf$executionFolder) == 2
)
# now check threads = 3
@@ -251,7 +254,7 @@ test_that("getAggregateCovariatesJobs", {
),
threads = 3
)
- testthat::expect_true(nrow(jobDf) == 2*3)
+ testthat::expect_true(nrow(jobDf) == 2 * 3)
# now check threads = 4
jobDf <- getAggregateCovariatesJobs(
@@ -272,11 +275,13 @@ test_that("getAggregateCovariatesJobs", {
testthat::expect_true(nrow(jobDf) == 7)
testthat::expect_true(
- length(unique(unlist(lapply(1:nrow(jobDf),
- function(i){
- ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId
- }
- )))) == 2)
+ length(unique(unlist(lapply(
+ 1:nrow(jobDf),
+ function(i) {
+ ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId
+ }
+ )))) == 2
+ )
# add more settings
@@ -291,7 +296,7 @@ test_that("getAggregateCovariatesJobs", {
caseCovariateSettings = caseCovariateSettings
)
- jobDf <- getAggregateCovariatesJobs(
+ jobDf <- getAggregateCovariatesJobs(
characterizationSettings = createCharacterizationSettings(
aggregateCovariateSettings = list(res, res2)
),
@@ -299,25 +304,29 @@ test_that("getAggregateCovariatesJobs", {
)
testthat::expect_true(nrow(jobDf) == 4)
testthat::expect_true(
- length(unique(unlist(lapply(1:nrow(jobDf),
- function(i){
- ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId
- }
- )))) == 4)
+ length(unique(unlist(lapply(
+ 1:nrow(jobDf),
+ function(i) {
+ ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId
+ }
+ )))) == 4
+ )
jobDf <- getAggregateCovariatesJobs(
- characterizationSettings = createCharacterizationSettings(
- aggregateCovariateSettings = list(res, res2)
- ),
- threads = 3
- )
+ characterizationSettings = createCharacterizationSettings(
+ aggregateCovariateSettings = list(res, res2)
+ ),
+ threads = 3
+ )
testthat::expect_true(nrow(jobDf) == 12)
testthat::expect_true(
- length(unique(unlist(lapply(1:nrow(jobDf),
- function(i){
- ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId
- }
- )))) == 4)
+ length(unique(unlist(lapply(
+ 1:nrow(jobDf),
+ function(i) {
+ ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId
+ }
+ )))) == 4
+ )
# test when extractNonCaseCovariates = F
@@ -325,7 +334,7 @@ test_that("getAggregateCovariatesJobs", {
targetIds = 1,
outcomeIds = 3,
extractNonCaseCovariates = F
- )
+ )
jobDf <- Characterization:::getAggregateCovariatesJobs(
characterizationSettings = createCharacterizationSettings(
aggregateCovariateSettings = list(res3)
@@ -347,7 +356,6 @@ test_that("getAggregateCovariatesJobs", {
)
# add checks
-
})
test_that("computeTargetAggregateCovariateAnalyses", {
@@ -373,10 +381,10 @@ test_that("computeTargetAggregateCovariateAnalyses", {
caseCovariateSettings = caseCovariateSettings
)
- jobDf <- getAggregateCovariatesJobs(
+ jobDf <- getAggregateCovariatesJobs(
characterizationSettings = createCharacterizationSettings(
aggregateCovariateSettings = res
- ),
+ ),
threads = 1
)
@@ -388,25 +396,25 @@ test_that("computeTargetAggregateCovariateAnalyses", {
targetTable = "cohort",
settings = ParallelLogger::convertJsonToSettings(jobDf$settings[1]),
minCharacterizationMean = 0.01,
- databaseId = 'madeup',
+ databaseId = "madeup",
outputFolder = tempFolder1
)
# check incremental does not run
testthat::expect_true(
- sum(c('cohort_details.csv',
- 'settings.csv',
- 'covariates.csv',
- 'covariates_continuous.csv',
- 'cohort_counts.csv',
- 'covariate_ref.csv',
- 'analysis_ref.csv'
- ) %in% dir(tempFolder1)
- ) == length(dir(tempFolder1))
+ sum(c(
+ "cohort_details.csv",
+ "settings.csv",
+ "covariates.csv",
+ "covariates_continuous.csv",
+ "cohort_counts.csv",
+ "covariate_ref.csv",
+ "analysis_ref.csv"
+ ) %in% dir(tempFolder1)) == length(dir(tempFolder1))
)
# check cohortCounts is done for all
cohortDetails <- readr::read_csv(
- file.path(tempFolder1,'cohort_details.csv'),
+ file.path(tempFolder1, "cohort_details.csv"),
show_col_types = F
)
testthat::expect_true(
@@ -416,8 +424,8 @@ test_that("computeTargetAggregateCovariateAnalyses", {
nrow(cohortDetails) == 8
)
- aggCovs <- readr::read_csv(
- file = file.path(tempFolder1, 'covariates.csv'),
+ aggCovs <- readr::read_csv(
+ file = file.path(tempFolder1, "covariates.csv"),
show_col_types = F
)
# check covariates is unique
@@ -427,9 +435,8 @@ test_that("computeTargetAggregateCovariateAnalyses", {
# check databaseId is added
testthat::expect_true(
- aggCovs$database_id[1] == 'madeup'
+ aggCovs$database_id[1] == "madeup"
)
-
})
@@ -459,7 +466,7 @@ test_that("computeCaseAggregateCovariateAnalyses", {
jobDf <- getAggregateCovariatesJobs(
characterizationSettings = createCharacterizationSettings(
aggregateCovariateSettings = res
- ),
+ ),
threads = 1
)
@@ -471,36 +478,36 @@ test_that("computeCaseAggregateCovariateAnalyses", {
targetTable = "cohort",
settings = ParallelLogger::convertJsonToSettings(jobDf$settings[2]),
minCharacterizationMean = 0.01,
- databaseId = 'madeup',
+ databaseId = "madeup",
outputFolder = tempFolder2
)
# check incremental does not run
testthat::expect_true(
- sum(c('cohort_details.csv',
- 'settings.csv',
- 'covariates.csv',
- 'covariates_continuous.csv',
- 'cohort_counts.csv',
- 'covariate_ref.csv',
- 'analysis_ref.csv'
- ) %in% dir(tempFolder2)
- ) == length(dir(tempFolder2))
+ sum(c(
+ "cohort_details.csv",
+ "settings.csv",
+ "covariates.csv",
+ "covariates_continuous.csv",
+ "cohort_counts.csv",
+ "covariate_ref.csv",
+ "analysis_ref.csv"
+ ) %in% dir(tempFolder2)) == length(dir(tempFolder2))
)
# check cohortCounts is done for all
cohortDetails <- readr::read_csv(
- file.path(tempFolder2,'cohort_details.csv'),
+ file.path(tempFolder2, "cohort_details.csv"),
show_col_types = F
)
testthat::expect_true(
nrow(unique(cohortDetails)) == nrow(cohortDetails)
)
testthat::expect_true(
- nrow(cohortDetails) == 3*5
+ nrow(cohortDetails) == 3 * 5
)
- aggCovs <- readr::read_csv(
- file = file.path(tempFolder2, 'covariates.csv'),
+ aggCovs <- readr::read_csv(
+ file = file.path(tempFolder2, "covariates.csv"),
show_col_types = F
)
# check covariates is unique
@@ -510,7 +517,6 @@ test_that("computeCaseAggregateCovariateAnalyses", {
# check databaseId is added
testthat::expect_true(
- aggCovs$database_id[1] == 'madeup'
+ aggCovs$database_id[1] == "madeup"
)
-
})
diff --git a/tests/testthat/test-dbs.R b/tests/testthat/test-dbs.R
index 721735c..703121a 100644
--- a/tests/testthat/test-dbs.R
+++ b/tests/testthat/test-dbs.R
@@ -8,7 +8,7 @@ dbmsPlatforms <- c(
"snowflake",
"spark",
"sql server"
- )
+)
getPlatformConnectionDetails <- function(dbmsPlatform) {
# Get drivers for test platform
@@ -52,7 +52,7 @@ getPlatformConnectionDetails <- function(dbmsPlatform) {
vocabularyDatabaseSchema <- Sys.getenv("CDM_BIG_QUERY_CDM_SCHEMA")
cohortDatabaseSchema <- Sys.getenv("CDM_BIG_QUERY_OHDSI_SCHEMA")
options(sqlRenderTempEmulationSchema = Sys.getenv("CDM_BIG_QUERY_OHDSI_SCHEMA"))
- } else {
+ } else {
return(NULL)
}
} else if (dbmsPlatform == "oracle") {
@@ -103,19 +103,18 @@ getPlatformConnectionDetails <- function(dbmsPlatform) {
options(sqlRenderTempEmulationSchema = Sys.getenv("CDM_SNOWFLAKE_OHDSI_SCHEMA"))
} else if (dbmsPlatform == "spark") {
if (.Platform$OS.type == "windows") { # skipping Mac for GHA due to JAVA issue
- connectionDetails <- DatabaseConnector::createConnectionDetails(
- dbms = dbmsPlatform,
- user = Sys.getenv("CDM5_SPARK_USER"),
- password = URLdecode(Sys.getenv("CDM5_SPARK_PASSWORD")),
- connectionString = Sys.getenv("CDM5_SPARK_CONNECTION_STRING"),
- pathToDriver = jdbcDriverFolder
- )
- cdmDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA")
- vocabularyDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA")
- cohortDatabaseSchema <- Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA")
- options(sqlRenderTempEmulationSchema = Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA"))
- }
- else{
+ connectionDetails <- DatabaseConnector::createConnectionDetails(
+ dbms = dbmsPlatform,
+ user = Sys.getenv("CDM5_SPARK_USER"),
+ password = URLdecode(Sys.getenv("CDM5_SPARK_PASSWORD")),
+ connectionString = Sys.getenv("CDM5_SPARK_CONNECTION_STRING"),
+ pathToDriver = jdbcDriverFolder
+ )
+ cdmDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA")
+ vocabularyDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA")
+ cohortDatabaseSchema <- Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA")
+ options(sqlRenderTempEmulationSchema = Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA"))
+ } else {
return(NULL)
}
} else if (dbmsPlatform == "sql server") {
@@ -147,13 +146,13 @@ getPlatformConnectionDetails <- function(dbmsPlatform) {
))
}
-for(dbmsPlatform in dbmsPlatforms){
- if(Sys.getenv('CI') == 'true' & .Platform$OS.type == "windows"){
+for (dbmsPlatform in dbmsPlatforms) {
+ if (Sys.getenv("CI") == "true" & .Platform$OS.type == "windows") {
tempFolder <- tempfile(paste0("Characterization_", dbmsPlatform))
on.exit(unlink(tempFolder, recursive = TRUE), add = TRUE)
dbmsDetails <- getPlatformConnectionDetails(dbmsPlatform)
- if(!is.null(dbmsDetails)){
+ if (!is.null(dbmsDetails)) {
con <- DatabaseConnector::connect(dbmsDetails$connectionDetails)
on.exit(DatabaseConnector::disconnect(con))
}
@@ -161,11 +160,10 @@ for(dbmsPlatform in dbmsPlatforms){
# This file contains platform specific tests
test_that(paste0("platform specific test ", dbmsPlatform), {
- skip_if(Sys.getenv('CI') != 'true' | .Platform$OS.type != "windows", 'not run locally')
+ skip_if(Sys.getenv("CI") != "true" | .Platform$OS.type != "windows", "not run locally")
if (is.null(dbmsDetails)) {
print(paste("No platform details available for", dbmsPlatform))
} else {
-
# create a cohort table
DatabaseConnector::insertTable(
bulkLoad = F,
@@ -175,9 +173,10 @@ for(dbmsPlatform in dbmsPlatforms){
data = data.frame(
subject_id = 1:10,
cohort_definition_id = sample(4, 10, replace = T),
- cohort_start_date = rep(as.Date('2010-01-01'), 10),
- cohort_end_date = rep(as.Date('2010-01-01'), 10)
- ))
+ cohort_start_date = rep(as.Date("2010-01-01"), 10),
+ cohort_end_date = rep(as.Date("2010-01-01"), 10)
+ )
+ )
targetIds <- c(1, 2, 4)
outcomeIds <- c(3)
@@ -246,9 +245,9 @@ for(dbmsPlatform in dbmsPlatforms){
outcomeDatabaseSchema = dbmsDetails$cohortDatabaseSchema,
outcomeTable = dbmsDetails$cohortTable,
characterizationSettings = characterizationSettings,
- outputDirectory = file.path(tempFolder, 'csv'),
- executionPath = file.path(tempFolder, 'execution'),
- csvFilePrefix = "c_",
+ outputDirectory = file.path(tempFolder, "csv"),
+ executionPath = file.path(tempFolder, "execution"),
+ csvFilePrefix = "c_",
threads = 1,
databaseId = dbmsDetails$connectionDetails$dbms
)
diff --git a/tests/testthat/test-dechallengeRechallenge.R b/tests/testthat/test-dechallengeRechallenge.R
index 7718302..cd01401 100644
--- a/tests/testthat/test-dechallengeRechallenge.R
+++ b/tests/testthat/test-dechallengeRechallenge.R
@@ -139,16 +139,14 @@ test_that("computeDechallengeRechallengeAnalyses", {
outputFolder = dcLoc,
minCellCount = 0
)
- dc <- readr::read_csv(file.path(dcLoc,'dechallenge_rechallenge.csv'), show_col_types = F)
+ dc <- readr::read_csv(file.path(dcLoc, "dechallenge_rechallenge.csv"), show_col_types = F)
# one T and 2 Os, so should have 2 rows
testthat::expect_true(nrow(dc) == 1)
testthat::expect_true(dc$num_persons_exposed == 4)
testthat::expect_true(dc$num_exposure_eras == 10)
-
})
test_that("computeRechallengeFailCaseSeriesAnalyses with known data", {
-
# check with made up date
# subject 1 has 1 exposure for 30 days
# subject 2 has 4 exposures for ~30 days with ~30 day gaps
@@ -223,7 +221,7 @@ test_that("computeRechallengeFailCaseSeriesAnalyses with known data", {
)
# person 2 should be in results
- dc <- readr::read_csv(file.path(dcLoc,'rechallenge_fail_case_series.csv'), show_col_types = F)
+ dc <- readr::read_csv(file.path(dcLoc, "rechallenge_fail_case_series.csv"), show_col_types = F)
testthat::expect_equal(nrow(dc), 1)
testthat::expect_true(is.na(dc$subject_id))
@@ -243,7 +241,7 @@ test_that("computeRechallengeFailCaseSeriesAnalyses with known data", {
)
# person 2 should be in results
- dc <- readr::read_csv(file.path(dcLoc,'rechallenge_fail_case_series.csv'), show_col_types = F)
+ dc <- readr::read_csv(file.path(dcLoc, "rechallenge_fail_case_series.csv"), show_col_types = F)
testthat::expect_equal(nrow(dc), 1)
testthat::expect_equal(dc$subject_id, 2)
@@ -265,8 +263,8 @@ test_that("computeRechallengeFailCaseSeriesAnalyses with known data", {
# checking minCellCount
# person 2 should be in results but all min cell count
# values should be censored
- dr <- readr::read_csv(file.path(dcLoc,'dechallenge_rechallenge.csv'), show_col_types = F)
- testthat::expect_true(nrow(dr) > 0 )
+ dr <- readr::read_csv(file.path(dcLoc, "dechallenge_rechallenge.csv"), show_col_types = F)
+ testthat::expect_true(nrow(dr) > 0)
testthat::expect_equal(max(dr$num_persons_exposed), -9999)
testthat::expect_equal(max(dr$num_cases), -9999)
testthat::expect_equal(max(dr$dechallenge_attempt), -9999)
@@ -275,13 +273,12 @@ test_that("computeRechallengeFailCaseSeriesAnalyses with known data", {
testthat::expect_equal(max(dr$rechallenge_attempt), -9999)
testthat::expect_equal(max(dr$rechallenge_fail), -9999)
testthat::expect_equal(max(dr$rechallenge_success), -9999)
-
})
# add test for job creation code
test_that("computeDechallengeRechallengeAnalyses", {
- targetIds <- c(2,5,6,7,8)
+ targetIds <- c(2, 5, 6, 7, 8)
outcomeIds <- c(3, 4, 9, 10)
res <- createDechallengeRechallengeSettings(
@@ -290,100 +287,98 @@ test_that("computeDechallengeRechallengeAnalyses", {
dechallengeStopInterval = 30,
dechallengeEvaluationWindow = 30
)
-jobs <- Characterization:::getDechallengeRechallengeJobs(
- characterizationSettings = createCharacterizationSettings(
- dechallengeRechallengeSettings = res
+ jobs <- Characterization:::getDechallengeRechallengeJobs(
+ characterizationSettings = createCharacterizationSettings(
+ dechallengeRechallengeSettings = res
),
- threads = 1
- )
-
-# as 1 thread should be 2 rows for two analyses
-testthat::expect_true(nrow(jobs) == 2)
-
-# check all target ids are in there
-targetIdFromSettings <- do.call(
- what = unique,
- args = lapply(1:nrow(jobs), function(i){
- ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds}
- )
-)
-testthat::expect_true(sum(targetIds %in% targetIdFromSettings) ==
- length(targetIds))
-
-# check all outcome ids are in there
-outcomeIdFromSettings <- do.call(
- what = unique,
- args = lapply(1:nrow(jobs), function(i){
- ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds}
+ threads = 1
)
-)
-testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) ==
- length(outcomeIds))
+ # as 1 thread should be 2 rows for two analyses
+ testthat::expect_true(nrow(jobs) == 2)
-# checking more threads 3
-jobs <- Characterization:::getDechallengeRechallengeJobs(
- characterizationSettings = createCharacterizationSettings(
- dechallengeRechallengeSettings = res
- ),
- threads = 3
-)
+ # check all target ids are in there
+ targetIdFromSettings <- do.call(
+ what = unique,
+ args = lapply(1:nrow(jobs), function(i) {
+ ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds
+ })
+ )
+ testthat::expect_true(sum(targetIds %in% targetIdFromSettings) ==
+ length(targetIds))
+
+ # check all outcome ids are in there
+ outcomeIdFromSettings <- do.call(
+ what = unique,
+ args = lapply(1:nrow(jobs), function(i) {
+ ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds
+ })
+ )
+ testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) ==
+ length(outcomeIds))
-# as 3 thread should be 2*3 rows for two analyses
-testthat::expect_true(nrow(jobs) == 2*3)
-# check all target ids are in there
-targetIdFromSettings <- do.call(
- what = c,
- args = lapply(1:nrow(jobs), function(i){
- ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds}
+ # checking more threads 3
+ jobs <- Characterization:::getDechallengeRechallengeJobs(
+ characterizationSettings = createCharacterizationSettings(
+ dechallengeRechallengeSettings = res
+ ),
+ threads = 3
)
-)
-testthat::expect_true(sum(targetIds %in% targetIdFromSettings) ==
- length(targetIds))
-
-# check all outcome ids are in there
-outcomeIdFromSettings <- do.call(
- what =c,
- args = lapply(1:nrow(jobs), function(i){
- ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds}
+
+ # as 3 thread should be 2*3 rows for two analyses
+ testthat::expect_true(nrow(jobs) == 2 * 3)
+
+ # check all target ids are in there
+ targetIdFromSettings <- do.call(
+ what = c,
+ args = lapply(1:nrow(jobs), function(i) {
+ ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds
+ })
)
-)
-testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) ==
- length(outcomeIds))
+ testthat::expect_true(sum(targetIds %in% targetIdFromSettings) ==
+ length(targetIds))
+
+ # check all outcome ids are in there
+ outcomeIdFromSettings <- do.call(
+ what = c,
+ args = lapply(1:nrow(jobs), function(i) {
+ ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds
+ })
+ )
+ testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) ==
+ length(outcomeIds))
-# checking more threads than needed 20
-jobs <- Characterization:::getDechallengeRechallengeJobs(
- characterizationSettings = createCharacterizationSettings(
- dechallengeRechallengeSettings = res
- ),
- threads = 20
-)
+ # checking more threads than needed 20
+ jobs <- Characterization:::getDechallengeRechallengeJobs(
+ characterizationSettings = createCharacterizationSettings(
+ dechallengeRechallengeSettings = res
+ ),
+ threads = 20
+ )
-# as 3 thread should be 2*5 rows for two analyses
-testthat::expect_true(nrow(jobs) == 2*5)
+ # as 3 thread should be 2*5 rows for two analyses
+ testthat::expect_true(nrow(jobs) == 2 * 5)
-# check all target ids are in there
-targetIdFromSettings <- do.call(
- what = c,
- args = lapply(1:nrow(jobs), function(i){
- ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds}
+ # check all target ids are in there
+ targetIdFromSettings <- do.call(
+ what = c,
+ args = lapply(1:nrow(jobs), function(i) {
+ ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds
+ })
)
-)
-testthat::expect_true(sum(targetIds %in% targetIdFromSettings) ==
- length(targetIds))
-
-# check all outcome ids are in there
-outcomeIdFromSettings <- do.call(
- what =c,
- args = lapply(1:nrow(jobs), function(i){
- ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds}
+ testthat::expect_true(sum(targetIds %in% targetIdFromSettings) ==
+ length(targetIds))
+
+ # check all outcome ids are in there
+ outcomeIdFromSettings <- do.call(
+ what = c,
+ args = lapply(1:nrow(jobs), function(i) {
+ ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds
+ })
)
-)
-testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) ==
- length(outcomeIds))
-
+ testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) ==
+ length(outcomeIds))
})
-
diff --git a/tests/testthat/test-manualData.R b/tests/testthat/test-manualData.R
index 772bd3a..a8728d8 100644
--- a/tests/testthat/test-manualData.R
+++ b/tests/testthat/test-manualData.R
@@ -1,318 +1,318 @@
context("manual data")
-manualData <- file.path(tempdir(), 'manual.sqlite')
+manualData <- file.path(tempdir(), "manual.sqlite")
on.exit(file.remove(manualData), add = TRUE)
-manualData2 <- file.path(tempdir(), 'manual2.sqlite')
+manualData2 <- file.path(tempdir(), "manual2.sqlite")
on.exit(file.remove(manualData2), add = TRUE)
test_that("manual data runCharacterizationAnalyses", {
-
# this test creates made-up OMOP CDM data
# and runs runCharacterizationAnalyses on the data
# to check whether the results are as expected
-connectionDetails <- DatabaseConnector::createConnectionDetails(
- dbms = 'sqlite',
- server = manualData
+ connectionDetails <- DatabaseConnector::createConnectionDetails(
+ dbms = "sqlite",
+ server = manualData
)
-con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
-schema <- 'main'
-
-# add persons - aggregate covs (age)
-persons <- data.frame(
- person_id = 1:10,
- gender_concept_id = rep(8532, 10),
- year_of_birth = rep(2000, 10),
- race_concept_id = rep(1, 10),
- ethnicity_concept_id = rep(1, 10),
- location_id = rep(1,10),
- provider_id = rep(1,10),
- care_site_id = rep(1,10),
- person_source_value = 1:10,
- gender_source_value = rep('female', 10),
- race_source_value = rep('na', 10),
- ethnicity_source_value = rep('na', 10)
-)
-DatabaseConnector::insertTable(
- connection = con,
- databaseSchema = schema,
- tableName = 'person',
- data = persons
- )
+ con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
+ schema <- "main"
-# observation period
-obs_period <- data.frame(
- observation_period_id = 1:10,
- person_id = 1:10,
- observation_period_start_date = rep('2000-12-31', 10),
- observation_period_end_date = c('2000-12-31', rep('2020-12-31', 9)),
- period_type_concept_id = rep(1,10)
-)
-obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
-obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
-DatabaseConnector::insertTable(
- connection = con,
- databaseSchema = schema,
- tableName = 'observation_period',
- data = obs_period
-)
-# person 1 has 1 day obs
-# person 2-6 has no events
-# person 7 has diabetes at 10, headache at 12
-# person 8 has diabetes at 13
-# person 9 has headache multiple times
-# person 10 has diabetes at 14
-# add conditions - aggregate covs (conditions)
-
-condition_era <- data.frame(
- condition_era_id = 1:7,
- person_id = c(7,7,8, 9,9,9,10),
- condition_concept_id = c(201820, 378253,201820,378253,378253,378253, 201820),
- condition_era_start_date = c('2011-01-01', '2013-04-03', '2016-01-01',
- '2006-01-04', '2014-08-02', '2014-08-04',
- '2013-01-04'),
- condition_era_end_date = c('2011-01-01', '2013-04-03', '2016-01-01',
- '2006-01-04', '2014-08-02', '2014-08-04',
- '2013-01-04'),
- condition_occurrence_count = rep(1, 7)
-)
-condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
-condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date)
-
-DatabaseConnector::insertTable(
- connection = con,
- databaseSchema = schema,
- tableName = 'condition_era',
- data = condition_era
-)
-
-# add concept
-concept <- data.frame(
- concept_id = c(201820,378253),
- concept_name = c('diabetes', 'hypertension'),
- domain_id = rep(1,2),
- vocabulary_id = rep(1,2),
- concept_class_id = c('Condition', 'Condition'),
- standard_concept = rep('S',2),
- concept_code = rep('Snowmed',2)
- #,valid_start_date = NULL,
- #valid_end_date = NULL,
- #invalid_reason = NULL
-)
-DatabaseConnector::insertTable(
- connection = con,
- databaseSchema = schema,
- tableName = 'concept',
- data = concept
-)
-
-# add cohort - tte/dechal/rechal
-cohort <- data.frame(
- subject_id = c(
- 1:10,
- 7,8,10,
- c(3,6,7,8,10),
- c(7)
- ),
- cohort_definition_id = c(
- rep(1,10),
- rep(1,3),
- rep(2, 5),
- 2
- ),
- cohort_start_date = c(
- rep('2018-01-01', 10),
- rep('2018-05-01',3),
- '2018-01-13','2018-01-03',rep('2018-01-06',3),
- '2018-05-24'
- ),
- cohort_end_date = c(
- rep('2018-02-01', 10),
- rep('2018-06-01',3),
- '2018-02-02','2018-02-04',rep('2018-02-08',3),
- '2018-06-05'
+ # add persons - aggregate covs (age)
+ persons <- data.frame(
+ person_id = 1:10,
+ gender_concept_id = rep(8532, 10),
+ year_of_birth = rep(2000, 10),
+ race_concept_id = rep(1, 10),
+ ethnicity_concept_id = rep(1, 10),
+ location_id = rep(1, 10),
+ provider_id = rep(1, 10),
+ care_site_id = rep(1, 10),
+ person_source_value = 1:10,
+ gender_source_value = rep("female", 10),
+ race_source_value = rep("na", 10),
+ ethnicity_source_value = rep("na", 10)
)
-)
-cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
-cohort$cohort_end_date <- as.Date(cohort$cohort_end_date)
-DatabaseConnector::insertTable(
- connection = con,
- databaseSchema = schema,
- tableName = 'cohort',
- data = cohort
-)
-
-# create settings and run
-characterizationSettings <- Characterization::createCharacterizationSettings(
- timeToEventSettings = Characterization::createTimeToEventSettings(
- targetIds = 1,
- outcomeIds = 2
+ DatabaseConnector::insertTable(
+ connection = con,
+ databaseSchema = schema,
+ tableName = "person",
+ data = persons
+ )
+
+ # observation period
+ obs_period <- data.frame(
+ observation_period_id = 1:10,
+ person_id = 1:10,
+ observation_period_start_date = rep("2000-12-31", 10),
+ observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)),
+ period_type_concept_id = rep(1, 10)
+ )
+ obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
+ obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
+ DatabaseConnector::insertTable(
+ connection = con,
+ databaseSchema = schema,
+ tableName = "observation_period",
+ data = obs_period
+ )
+ # person 1 has 1 day obs
+ # person 2-6 has no events
+ # person 7 has diabetes at 10, headache at 12
+ # person 8 has diabetes at 13
+ # person 9 has headache multiple times
+ # person 10 has diabetes at 14
+ # add conditions - aggregate covs (conditions)
+
+ condition_era <- data.frame(
+ condition_era_id = 1:7,
+ person_id = c(7, 7, 8, 9, 9, 9, 10),
+ condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820),
+ condition_era_start_date = c(
+ "2011-01-01", "2013-04-03", "2016-01-01",
+ "2006-01-04", "2014-08-02", "2014-08-04",
+ "2013-01-04"
),
- dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings(
- targetIds = 1,
- outcomeIds = 2
+ condition_era_end_date = c(
+ "2011-01-01", "2013-04-03", "2016-01-01",
+ "2006-01-04", "2014-08-02", "2014-08-04",
+ "2013-01-04"
),
- aggregateCovariateSettings = Characterization::createAggregateCovariateSettings(
- targetIds = 1,
- outcomeIds = 2,
- minPriorObservation = 365,
- outcomeWashoutDays = 30,
- riskWindowStart = 1,
- riskWindowEnd = 90,
- covariateSettings = FeatureExtraction::createCovariateSettings(
- useDemographicsAge = T,
- useDemographicsGender = T,
- useConditionEraAnyTimePrior = T
- ),
- caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
- casePreTargetDuration = 365*5
- )
-)
-Characterization::runCharacterizationAnalyses(
- connectionDetails = connectionDetails,
- targetDatabaseSchema = schema,
- targetTable = 'cohort',
- outcomeDatabaseSchema = schema,
- outcomeTable = 'cohort',
- cdmDatabaseSchema = schema,
- characterizationSettings = characterizationSettings,
- outputDirectory = file.path(tempdir(), 'result'),
- executionPath = file.path(tempdir(), 'execution'),
- csvFilePrefix = 'c_',
- databaseId = '1',
- incremental = T,
- threads = 1,
- minCharacterizationMean = 0.0001,
- minCellCount = NULL,
- showSubjectId = T
+ condition_occurrence_count = rep(1, 7)
)
+ condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
+ condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date)
-# check csv results are as expected
-
-tte <- read.csv(file.path(tempdir(), 'result','c_time_to_event.csv'))
-
-# check counts - 1-day subsequent missing?
-testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == 'first' & tte$time_scale == 'per 1-day']))
-#subsequent is > 100 days after first drug so not in the 1-day count
-testthat::expect_true(0 == sum(tte$num_events[tte$outcome_type == 'subsequent' & tte$time_scale == 'per 1-day']))
-testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == 'first' & tte$time_scale == 'per 30-day']))
-testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == 'subsequent' & tte$time_scale == 'per 30-day']))
-testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == 'first' & tte$time_scale == 'per 365-day']))
-testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == 'subsequent' & tte$time_scale == 'per 365-day']))
-
-# check times
-testthat::expect_true(sum(c(2,5,12) %in% tte$time_to_event[tte$outcome_type == 'first' & tte$time_scale == 'per 1-day']) == 3)
-
-# TODO: check in code whether minCellCount < or <=
-
-dechal <- read.csv(file.path(tempdir(), 'result','c_dechallenge_rechallenge.csv'))
-testthat::expect_true(dechal$num_exposure_eras == 13)
-testthat::expect_true(dechal$num_persons_exposed == 10)
-testthat::expect_true(dechal$num_cases == 6)
-testthat::expect_true(dechal$dechallenge_attempt == 5)
-testthat::expect_true(dechal$dechallenge_success == 5)
-testthat::expect_true(dechal$rechallenge_attempt == 3)
-
-# one person has a rechal and event stops when second drug exposure stops
-testthat::expect_true(dechal$rechallenge_fail == 1)
-testthat::expect_true(dechal$rechallenge_success == 2)
-testthat::expect_true(dechal$pct_rechallenge_fail == 0.3333333)
-
-failed <- read.csv(file.path(tempdir(), 'result','c_rechallenge_fail_case_series.csv'))
-testthat::expect_true(nrow(failed) == 1)
-testthat::expect_true(failed$subject_id == 7)
-testthat::expect_true(failed$dechallenge_exposure_end_date_offset == 31)
-testthat::expect_true(failed$dechallenge_outcome_start_date_offset == 5)
-testthat::expect_true(failed$rechallenge_exposure_start_date_offset == 120)
-testthat::expect_true(failed$rechallenge_outcome_start_date_offset == 143)
-
-# Aggregate covs
-#=======
-counts <- read.csv(file.path(tempdir(), 'result','c_cohort_counts.csv'))
-# when restricted to first exposure 5 people have outcome
-testthat::expect_true(counts$row_count[counts$cohort_type == 'Cases'] == 5)
-# target is 9 because 1 has insufficient min prior obs
-testthat::expect_true(counts$row_count[counts$cohort_type == 'Target' &
- counts$target_cohort_id == 1] == 9)
-# make sure outcome is there a has count of 5
-testthat::expect_true(counts$row_count[counts$cohort_type == 'Target' &
- counts$target_cohort_id == 2] == 5)
-
-# Tall should not have first restriction
-testthat::expect_true(counts$row_count[counts$cohort_type == 'Tall' &
- counts$target_cohort_id == 1] == 13)
-testthat::expect_true(counts$person_count[counts$cohort_type == 'Tall' &
- counts$target_cohort_id == 1] == 10)
-# make sure outcome is there a has count of 6 and 5
-testthat::expect_true(counts$row_count[counts$cohort_type == 'Tall' &
- counts$target_cohort_id == 2] == 6)
-testthat::expect_true(counts$person_count[counts$cohort_type == 'Tall' &
- counts$target_cohort_id == 2] == 5)
-
-covs <- read.csv(file.path(tempdir(), 'result','c_covariates.csv'))
-
-# checks all females
-testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == 'Cases'] == 1)
-testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == 'Target' &
- covs$target_cohort_id == 1] == 1)
-testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == 'Target' &
- covs$target_cohort_id == 2] == 1)
-
-## TODO: check diabetes and hypertensions
-#covs$covariate_id
-# 201820 7,8 and 10 have in history and all are cases
-ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 &
- covs$cohort_type == 'Cases'
-testthat::expect_true(covs$sum_value[ind] == 3)
-testthat::expect_true(covs$average_value[ind] == 3/5)
-
-ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 &
- covs$cohort_type == 'Target'
-testthat::expect_true(covs$sum_value[ind] == 3)
-testthat::expect_equal(covs$average_value[ind], 3/9, tolerance = 0.01)
-
-# 378253 7,9 (9 multiple times) but 9 not a case
-ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 &
- covs$cohort_type == 'Cases'
-testthat::expect_true(covs$sum_value[ind] == 1)
-testthat::expect_true(covs$average_value[ind] == 1/5)
-
-ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 &
- covs$cohort_type == 'Target'
-testthat::expect_true(covs$sum_value[ind] == 2)
-testthat::expect_equal(covs$average_value[ind], 2/9, tolerance = 0.01)
-
-
-covs_cont <- read.csv(file.path(tempdir(), 'result','c_covariates_continuous.csv'))
-
-# checks age in years
-testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Cases'] == 18)
-testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Cases'] == 5)
-testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Target' &
- covs_cont$target_cohort_id == 1] == 18)
-testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Target' &
- covs_cont$target_cohort_id == 1] == 9)
-testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Target' &
- covs_cont$target_cohort_id == 2] == 18)
-testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Target' &
- covs_cont$target_cohort_id == 2] == 5)
+ DatabaseConnector::insertTable(
+ connection = con,
+ databaseSchema = schema,
+ tableName = "condition_era",
+ data = condition_era
+ )
+ # add concept
+ concept <- data.frame(
+ concept_id = c(201820, 378253),
+ concept_name = c("diabetes", "hypertension"),
+ domain_id = rep(1, 2),
+ vocabulary_id = rep(1, 2),
+ concept_class_id = c("Condition", "Condition"),
+ standard_concept = rep("S", 2),
+ concept_code = rep("Snowmed", 2)
+ # ,valid_start_date = NULL,
+ # valid_end_date = NULL,
+ # invalid_reason = NULL
+ )
+ DatabaseConnector::insertTable(
+ connection = con,
+ databaseSchema = schema,
+ tableName = "concept",
+ data = concept
+ )
+ # add cohort - tte/dechal/rechal
+ cohort <- data.frame(
+ subject_id = c(
+ 1:10,
+ 7, 8, 10,
+ c(3, 6, 7, 8, 10),
+ c(7)
+ ),
+ cohort_definition_id = c(
+ rep(1, 10),
+ rep(1, 3),
+ rep(2, 5),
+ 2
+ ),
+ cohort_start_date = c(
+ rep("2018-01-01", 10),
+ rep("2018-05-01", 3),
+ "2018-01-13", "2018-01-03", rep("2018-01-06", 3),
+ "2018-05-24"
+ ),
+ cohort_end_date = c(
+ rep("2018-02-01", 10),
+ rep("2018-06-01", 3),
+ "2018-02-02", "2018-02-04", rep("2018-02-08", 3),
+ "2018-06-05"
+ )
+ )
+ cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
+ cohort$cohort_end_date <- as.Date(cohort$cohort_end_date)
+ DatabaseConnector::insertTable(
+ connection = con,
+ databaseSchema = schema,
+ tableName = "cohort",
+ data = cohort
+ )
+
+ # create settings and run
+ characterizationSettings <- Characterization::createCharacterizationSettings(
+ timeToEventSettings = Characterization::createTimeToEventSettings(
+ targetIds = 1,
+ outcomeIds = 2
+ ),
+ dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings(
+ targetIds = 1,
+ outcomeIds = 2
+ ),
+ aggregateCovariateSettings = Characterization::createAggregateCovariateSettings(
+ targetIds = 1,
+ outcomeIds = 2,
+ minPriorObservation = 365,
+ outcomeWashoutDays = 30,
+ riskWindowStart = 1,
+ riskWindowEnd = 90,
+ covariateSettings = FeatureExtraction::createCovariateSettings(
+ useDemographicsAge = T,
+ useDemographicsGender = T,
+ useConditionEraAnyTimePrior = T
+ ),
+ caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
+ casePreTargetDuration = 365 * 5
+ )
+ )
+ Characterization::runCharacterizationAnalyses(
+ connectionDetails = connectionDetails,
+ targetDatabaseSchema = schema,
+ targetTable = "cohort",
+ outcomeDatabaseSchema = schema,
+ outcomeTable = "cohort",
+ cdmDatabaseSchema = schema,
+ characterizationSettings = characterizationSettings,
+ outputDirectory = file.path(tempdir(), "result"),
+ executionPath = file.path(tempdir(), "execution"),
+ csvFilePrefix = "c_",
+ databaseId = "1",
+ incremental = T,
+ threads = 1,
+ minCharacterizationMean = 0.0001,
+ minCellCount = NULL,
+ showSubjectId = T
+ )
+
+ # check csv results are as expected
+
+ tte <- read.csv(file.path(tempdir(), "result", "c_time_to_event.csv"))
+
+ # check counts - 1-day subsequent missing?
+ testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 1-day"]))
+ # subsequent is > 100 days after first drug so not in the 1-day count
+ testthat::expect_true(0 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 1-day"]))
+ testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 30-day"]))
+ testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 30-day"]))
+ testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 365-day"]))
+ testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 365-day"]))
+
+ # check times
+ testthat::expect_true(sum(c(2, 5, 12) %in% tte$time_to_event[tte$outcome_type == "first" & tte$time_scale == "per 1-day"]) == 3)
+
+ # TODO: check in code whether minCellCount < or <=
+
+ dechal <- read.csv(file.path(tempdir(), "result", "c_dechallenge_rechallenge.csv"))
+ testthat::expect_true(dechal$num_exposure_eras == 13)
+ testthat::expect_true(dechal$num_persons_exposed == 10)
+ testthat::expect_true(dechal$num_cases == 6)
+ testthat::expect_true(dechal$dechallenge_attempt == 5)
+ testthat::expect_true(dechal$dechallenge_success == 5)
+ testthat::expect_true(dechal$rechallenge_attempt == 3)
+
+ # one person has a rechal and event stops when second drug exposure stops
+ testthat::expect_true(dechal$rechallenge_fail == 1)
+ testthat::expect_true(dechal$rechallenge_success == 2)
+ testthat::expect_true(dechal$pct_rechallenge_fail == 0.3333333)
+
+ failed <- read.csv(file.path(tempdir(), "result", "c_rechallenge_fail_case_series.csv"))
+ testthat::expect_true(nrow(failed) == 1)
+ testthat::expect_true(failed$subject_id == 7)
+ testthat::expect_true(failed$dechallenge_exposure_end_date_offset == 31)
+ testthat::expect_true(failed$dechallenge_outcome_start_date_offset == 5)
+ testthat::expect_true(failed$rechallenge_exposure_start_date_offset == 120)
+ testthat::expect_true(failed$rechallenge_outcome_start_date_offset == 143)
+
+ # Aggregate covs
+ # =======
+ counts <- read.csv(file.path(tempdir(), "result", "c_cohort_counts.csv"))
+ # when restricted to first exposure 5 people have outcome
+ testthat::expect_true(counts$row_count[counts$cohort_type == "Cases"] == 5)
+ # target is 9 because 1 has insufficient min prior obs
+ testthat::expect_true(counts$row_count[counts$cohort_type == "Target" &
+ counts$target_cohort_id == 1] == 9)
+ # make sure outcome is there a has count of 5
+ testthat::expect_true(counts$row_count[counts$cohort_type == "Target" &
+ counts$target_cohort_id == 2] == 5)
+
+ # Tall should not have first restriction
+ testthat::expect_true(counts$row_count[counts$cohort_type == "Tall" &
+ counts$target_cohort_id == 1] == 13)
+ testthat::expect_true(counts$person_count[counts$cohort_type == "Tall" &
+ counts$target_cohort_id == 1] == 10)
+ # make sure outcome is there a has count of 6 and 5
+ testthat::expect_true(counts$row_count[counts$cohort_type == "Tall" &
+ counts$target_cohort_id == 2] == 6)
+ testthat::expect_true(counts$person_count[counts$cohort_type == "Tall" &
+ counts$target_cohort_id == 2] == 5)
+
+ covs <- read.csv(file.path(tempdir(), "result", "c_covariates.csv"))
+
+ # checks all females
+ testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Cases"] == 1)
+ testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Target" &
+ covs$target_cohort_id == 1] == 1)
+ testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Target" &
+ covs$target_cohort_id == 2] == 1)
+
+ ## TODO: check diabetes and hypertensions
+ # covs$covariate_id
+ # 201820 7,8 and 10 have in history and all are cases
+ ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 &
+ covs$cohort_type == "Cases"
+ testthat::expect_true(covs$sum_value[ind] == 3)
+ testthat::expect_true(covs$average_value[ind] == 3 / 5)
+
+ ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 &
+ covs$cohort_type == "Target"
+ testthat::expect_true(covs$sum_value[ind] == 3)
+ testthat::expect_equal(covs$average_value[ind], 3 / 9, tolerance = 0.01)
+
+ # 378253 7,9 (9 multiple times) but 9 not a case
+ ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 &
+ covs$cohort_type == "Cases"
+ testthat::expect_true(covs$sum_value[ind] == 1)
+ testthat::expect_true(covs$average_value[ind] == 1 / 5)
+
+ ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 &
+ covs$cohort_type == "Target"
+ testthat::expect_true(covs$sum_value[ind] == 2)
+ testthat::expect_equal(covs$average_value[ind], 2 / 9, tolerance = 0.01)
+
+
+ covs_cont <- read.csv(file.path(tempdir(), "result", "c_covariates_continuous.csv"))
+
+ # checks age in years
+ testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Cases"] == 18)
+ testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Cases"] == 5)
+ testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
+ covs_cont$target_cohort_id == 1] == 18)
+ testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
+ covs_cont$target_cohort_id == 1] == 9)
+ testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
+ covs_cont$target_cohort_id == 2] == 18)
+ testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" &
+ covs_cont$target_cohort_id == 2] == 5)
})
test_that("manual data checking exclude count works", {
-
# this test creates made-up OMOP CDM data
# and runs runCharacterizationAnalyses on the data
# to check whether the results are as expected
connectionDetails <- DatabaseConnector::createConnectionDetails(
- dbms = 'sqlite',
+ dbms = "sqlite",
server = manualData2
)
con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
- schema <- 'main'
+ schema <- "main"
# add persons - aggregate covs (age)
persons <- data.frame(
@@ -321,18 +321,18 @@ test_that("manual data checking exclude count works", {
year_of_birth = rep(2000, 10),
race_concept_id = rep(1, 10),
ethnicity_concept_id = rep(1, 10),
- location_id = rep(1,10),
- provider_id = rep(1,10),
- care_site_id = rep(1,10),
+ location_id = rep(1, 10),
+ provider_id = rep(1, 10),
+ care_site_id = rep(1, 10),
person_source_value = 1:10,
- gender_source_value = rep('female', 10),
- race_source_value = rep('na', 10),
- ethnicity_source_value = rep('na', 10)
+ gender_source_value = rep("female", 10),
+ race_source_value = rep("na", 10),
+ ethnicity_source_value = rep("na", 10)
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'person',
+ tableName = "person",
data = persons
)
@@ -340,16 +340,16 @@ test_that("manual data checking exclude count works", {
obs_period <- data.frame(
observation_period_id = 1:10,
person_id = 1:10,
- observation_period_start_date = rep('2000-12-31', 10),
- observation_period_end_date = c('2000-12-31', rep('2020-12-31', 9)),
- period_type_concept_id = rep(1,10)
+ observation_period_start_date = rep("2000-12-31", 10),
+ observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)),
+ period_type_concept_id = rep(1, 10)
)
obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'observation_period',
+ tableName = "observation_period",
data = obs_period
)
# person 1 has 1 day obs
@@ -362,14 +362,18 @@ test_that("manual data checking exclude count works", {
condition_era <- data.frame(
condition_era_id = 1:7,
- person_id = c(7,7,8, 9,9,9,10),
- condition_concept_id = c(201820, 378253,201820,378253,378253,378253, 201820),
- condition_era_start_date = c('2011-01-01', '2013-04-03', '2016-01-01',
- '2006-01-04', '2014-08-02', '2014-08-04',
- '2013-01-04'),
- condition_era_end_date = c('2011-01-01', '2013-04-03', '2016-01-01',
- '2006-01-04', '2014-08-02', '2014-08-04',
- '2013-01-04'),
+ person_id = c(7, 7, 8, 9, 9, 9, 10),
+ condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820),
+ condition_era_start_date = c(
+ "2011-01-01", "2013-04-03", "2016-01-01",
+ "2006-01-04", "2014-08-02", "2014-08-04",
+ "2013-01-04"
+ ),
+ condition_era_end_date = c(
+ "2011-01-01", "2013-04-03", "2016-01-01",
+ "2006-01-04", "2014-08-02", "2014-08-04",
+ "2013-01-04"
+ ),
condition_occurrence_count = rep(1, 7)
)
condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
@@ -378,27 +382,27 @@ test_that("manual data checking exclude count works", {
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'condition_era',
+ tableName = "condition_era",
data = condition_era
)
# add concept
concept <- data.frame(
- concept_id = c(201820,378253),
- concept_name = c('diabetes', 'hypertension'),
- domain_id = rep(1,2),
- vocabulary_id = rep(1,2),
- concept_class_id = c('Condition', 'Condition'),
- standard_concept = rep('S',2),
- concept_code = rep('Snowmed',2)
- #,valid_start_date = NULL,
- #valid_end_date = NULL,
- #invalid_reason = NULL
+ concept_id = c(201820, 378253),
+ concept_name = c("diabetes", "hypertension"),
+ domain_id = rep(1, 2),
+ vocabulary_id = rep(1, 2),
+ concept_class_id = c("Condition", "Condition"),
+ standard_concept = rep("S", 2),
+ concept_code = rep("Snowmed", 2)
+ # ,valid_start_date = NULL,
+ # valid_end_date = NULL,
+ # invalid_reason = NULL
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'concept',
+ tableName = "concept",
data = concept
)
@@ -407,31 +411,31 @@ test_that("manual data checking exclude count works", {
cohort <- data.frame(
subject_id = c(
1:10,
- 7,8,10,
- c(3,6,7,8,10),
+ 7, 8, 10,
+ c(3, 6, 7, 8, 10),
c(7),
6
),
cohort_definition_id = c(
- rep(1,10),
- rep(1,3),
+ rep(1, 10),
+ rep(1, 3),
rep(2, 5),
2,
2
),
cohort_start_date = c(
- rep('2018-01-01', 10),
- rep('2018-05-01',3),
- '2018-01-13','2018-01-03',rep('2018-01-06',3),
- '2018-05-24',
- '2017-12-29'
+ rep("2018-01-01", 10),
+ rep("2018-05-01", 3),
+ "2018-01-13", "2018-01-03", rep("2018-01-06", 3),
+ "2018-05-24",
+ "2017-12-29"
),
cohort_end_date = c(
- rep('2018-02-01', 10),
- rep('2018-06-01',3),
- '2018-02-02','2018-02-04',rep('2018-02-08',3),
- '2018-06-05',
- '2017-12-29'
+ rep("2018-02-01", 10),
+ rep("2018-06-01", 3),
+ "2018-02-02", "2018-02-04", rep("2018-02-08", 3),
+ "2018-06-05",
+ "2017-12-29"
)
)
cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
@@ -439,7 +443,7 @@ test_that("manual data checking exclude count works", {
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'cohort',
+ tableName = "cohort",
data = cohort
)
@@ -466,21 +470,21 @@ test_that("manual data checking exclude count works", {
useConditionEraAnyTimePrior = T
),
caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
- casePreTargetDuration = 365*5
+ casePreTargetDuration = 365 * 5
)
)
Characterization::runCharacterizationAnalyses(
connectionDetails = connectionDetails,
targetDatabaseSchema = schema,
- targetTable = 'cohort',
+ targetTable = "cohort",
outcomeDatabaseSchema = schema,
- outcomeTable = 'cohort',
+ outcomeTable = "cohort",
cdmDatabaseSchema = schema,
characterizationSettings = characterizationSettings,
- outputDirectory = file.path(tempdir(), 'result2'),
- executionPath = file.path(tempdir(), 'execution2'),
- csvFilePrefix = 'c_',
- databaseId = '1',
+ outputDirectory = file.path(tempdir(), "result2"),
+ executionPath = file.path(tempdir(), "execution2"),
+ csvFilePrefix = "c_",
+ databaseId = "1",
incremental = T,
threads = 1,
minCharacterizationMean = 0.0001,
@@ -489,9 +493,8 @@ test_that("manual data checking exclude count works", {
)
# load the cohort counts to make sure the exclude is there
- counts <- read.csv(file.path(tempdir(), 'result2','c_cohort_counts.csv'))
+ counts <- read.csv(file.path(tempdir(), "result2", "c_cohort_counts.csv"))
# when restricted to first exposure 5 people have outcome
- testthat::expect_true(counts$row_count[counts$cohort_type == 'Cases'] == 4)
- testthat::expect_true(counts$row_count[counts$cohort_type == 'Exclude'] == 1)
-
+ testthat::expect_true(counts$row_count[counts$cohort_type == "Cases"] == 4)
+ testthat::expect_true(counts$row_count[counts$cohort_type == "Exclude"] == 1)
})
diff --git a/tests/testthat/test-runCharacterization.R b/tests/testthat/test-runCharacterization.R
index 27b88da..3e45925 100644
--- a/tests/testthat/test-runCharacterization.R
+++ b/tests/testthat/test-runCharacterization.R
@@ -119,8 +119,8 @@ test_that("runCharacterizationAnalyses", {
outcomeDatabaseSchema = "main",
outcomeTable = "cohort",
characterizationSettings = characterizationSettings,
- outputDirectory = file.path(tempFolder,'result'),
- executionPath = file.path(tempFolder,'execution'),
+ outputDirectory = file.path(tempFolder, "result"),
+ executionPath = file.path(tempFolder, "execution"),
csvFilePrefix = "c_",
databaseId = "1",
incremental = T,
@@ -161,43 +161,40 @@ test_that("runCharacterizationAnalyses", {
testthat::expect_false(
file.exists(file.path(tempFolder, "result", "c_dechallenge_rechallenge.csv"))
)
- #testthat::expect_true(
+ # testthat::expect_true(
# file.exists(file.path(tempFolder, "result", "rechallenge_fail_case_series.csv"))
- #)
+ # )
testthat::expect_true(
file.exists(file.path(tempFolder, "result", "c_time_to_event.csv"))
)
# make sure both tte runs are in the csv
tte <- readr::read_csv(
- file = file.path(tempFolder,'result' ,"c_time_to_event.csv"),
- show_col_types = FALSE
- )
+ file = file.path(tempFolder, "result", "c_time_to_event.csv"),
+ show_col_types = FALSE
+ )
testthat::expect_equivalent(
unique(tte$target_cohort_definition_id),
- c(1,2)
+ c(1, 2)
)
-
-
})
-manualDataMin <- file.path(tempdir(), 'manual_min.sqlite')
+manualDataMin <- file.path(tempdir(), "manual_min.sqlite")
on.exit(file.remove(manualDataMin), add = TRUE)
test_that("min cell count works", {
-
tempFolder <- tempfile("CharacterizationMin")
on.exit(unlink(tempFolder, recursive = TRUE), add = TRUE)
connectionDetails <- DatabaseConnector::createConnectionDetails(
- dbms = 'sqlite',
+ dbms = "sqlite",
server = manualDataMin
)
con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
on.exit(DatabaseConnector::disconnect(con))
- schema <- 'main'
+ schema <- "main"
# add persons - aggregate covs (age)
persons <- data.frame(
@@ -206,18 +203,18 @@ test_that("min cell count works", {
year_of_birth = rep(2000, 10),
race_concept_id = rep(1, 10),
ethnicity_concept_id = rep(1, 10),
- location_id = rep(1,10),
- provider_id = rep(1,10),
- care_site_id = rep(1,10),
+ location_id = rep(1, 10),
+ provider_id = rep(1, 10),
+ care_site_id = rep(1, 10),
person_source_value = 1:10,
- gender_source_value = rep('female', 10),
- race_source_value = rep('na', 10),
- ethnicity_source_value = rep('na', 10)
+ gender_source_value = rep("female", 10),
+ race_source_value = rep("na", 10),
+ ethnicity_source_value = rep("na", 10)
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'person',
+ tableName = "person",
data = persons
)
@@ -225,16 +222,16 @@ test_that("min cell count works", {
obs_period <- data.frame(
observation_period_id = 1:10,
person_id = 1:10,
- observation_period_start_date = rep('2000-12-31', 10),
- observation_period_end_date = c('2000-12-31', rep('2020-12-31', 9)),
- period_type_concept_id = rep(1,10)
+ observation_period_start_date = rep("2000-12-31", 10),
+ observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)),
+ period_type_concept_id = rep(1, 10)
)
obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'observation_period',
+ tableName = "observation_period",
data = obs_period
)
# person 1 has 1 day obs
@@ -247,14 +244,18 @@ test_that("min cell count works", {
condition_era <- data.frame(
condition_era_id = 1:7,
- person_id = c(7,7,8, 9,9,9,10),
- condition_concept_id = c(201820, 378253,201820,378253,378253,378253, 201820),
- condition_era_start_date = c('2011-01-01', '2013-04-03', '2016-01-01',
- '2006-01-04', '2014-08-02', '2014-08-04',
- '2013-01-04'),
- condition_era_end_date = c('2011-01-01', '2013-04-03', '2016-01-01',
- '2006-01-04', '2014-08-02', '2014-08-04',
- '2013-01-04'),
+ person_id = c(7, 7, 8, 9, 9, 9, 10),
+ condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820),
+ condition_era_start_date = c(
+ "2011-01-01", "2013-04-03", "2016-01-01",
+ "2006-01-04", "2014-08-02", "2014-08-04",
+ "2013-01-04"
+ ),
+ condition_era_end_date = c(
+ "2011-01-01", "2013-04-03", "2016-01-01",
+ "2006-01-04", "2014-08-02", "2014-08-04",
+ "2013-01-04"
+ ),
condition_occurrence_count = rep(1, 7)
)
condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
@@ -263,27 +264,27 @@ test_that("min cell count works", {
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'condition_era',
+ tableName = "condition_era",
data = condition_era
)
# add concept
concept <- data.frame(
- concept_id = c(201820,378253),
- concept_name = c('diabetes', 'hypertension'),
- domain_id = rep(1,2),
- vocabulary_id = rep(1,2),
- concept_class_id = c('Condition', 'Condition'),
- standard_concept = rep('S',2),
- concept_code = rep('Snowmed',2)
- #,valid_start_date = NULL,
- #valid_end_date = NULL,
- #invalid_reason = NULL
+ concept_id = c(201820, 378253),
+ concept_name = c("diabetes", "hypertension"),
+ domain_id = rep(1, 2),
+ vocabulary_id = rep(1, 2),
+ concept_class_id = c("Condition", "Condition"),
+ standard_concept = rep("S", 2),
+ concept_code = rep("Snowmed", 2)
+ # ,valid_start_date = NULL,
+ # valid_end_date = NULL,
+ # invalid_reason = NULL
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'concept',
+ tableName = "concept",
data = concept
)
@@ -291,27 +292,27 @@ test_that("min cell count works", {
cohort <- data.frame(
subject_id = c(
1:10,
- 7,8,10,
- c(3,6,7,8,10),
+ 7, 8, 10,
+ c(3, 6, 7, 8, 10),
c(7)
),
cohort_definition_id = c(
- rep(1,10),
- rep(1,3),
+ rep(1, 10),
+ rep(1, 3),
rep(2, 5),
2
),
cohort_start_date = c(
- rep('2018-01-01', 10),
- rep('2018-05-01',3),
- '2018-01-13','2018-01-03',rep('2018-01-06',3),
- '2018-05-24'
+ rep("2018-01-01", 10),
+ rep("2018-05-01", 3),
+ "2018-01-13", "2018-01-03", rep("2018-01-06", 3),
+ "2018-05-24"
),
cohort_end_date = c(
- rep('2018-02-01', 10),
- rep('2018-06-01',3),
- '2018-02-02','2018-02-04',rep('2018-02-08',3),
- '2018-06-05'
+ rep("2018-02-01", 10),
+ rep("2018-06-01", 3),
+ "2018-02-02", "2018-02-04", rep("2018-02-08", 3),
+ "2018-06-05"
)
)
cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
@@ -319,7 +320,7 @@ test_that("min cell count works", {
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
- tableName = 'cohort',
+ tableName = "cohort",
data = cohort
)
@@ -346,7 +347,7 @@ test_that("min cell count works", {
useConditionEraAnyTimePrior = T
),
caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
- casePreTargetDuration = 365*5
+ casePreTargetDuration = 365 * 5
)
)
@@ -358,8 +359,8 @@ test_that("min cell count works", {
outcomeDatabaseSchema = "main",
outcomeTable = "cohort",
characterizationSettings = characterizationSettings,
- outputDirectory = file.path(tempFolder,'result_mincell'),
- executionPath = file.path(tempFolder,'execution_mincell'),
+ outputDirectory = file.path(tempFolder, "result_mincell"),
+ executionPath = file.path(tempFolder, "execution_mincell"),
csvFilePrefix = "c_",
databaseId = "1",
incremental = F,
@@ -426,5 +427,4 @@ test_that("min cell count works", {
testthat::expect_true(sum(is.na(res$p_75_value)) == length(res$p_75_value))
testthat::expect_true(sum(is.na(res$min_value)) == length(res$min_value))
testthat::expect_true(sum(is.na(res$max_value)) == length(res$max_value))
-
})
diff --git a/tests/testthat/test-timeToEvent.R b/tests/testthat/test-timeToEvent.R
index 4a82c5b..f0d7e3e 100644
--- a/tests/testthat/test-timeToEvent.R
+++ b/tests/testthat/test-timeToEvent.R
@@ -39,19 +39,19 @@ test_that("computeTimeToEventSettings", {
targetTable = "cohort",
settings = res,
outputFolder = tteFolder,
- databaseId = 'tte_test'
+ databaseId = "tte_test"
)
testthat::expect_true(file.exists(file.path(tteFolder, "time_to_event.csv")))
tte <- readr::read_csv(
- file = file.path(tteFolder,'time_to_event.csv'),
+ file = file.path(tteFolder, "time_to_event.csv"),
show_col_types = F
- )
+ )
testthat::expect_true(nrow(tte) == 160)
testthat::expect_true("database_id" %in% colnames(tte))
- testthat::expect_true(tte$database_id[1] == 'tte_test')
+ testthat::expect_true(tte$database_id[1] == "tte_test")
testthat::expect_true(
length(
@@ -79,7 +79,7 @@ test_that("computeTimeToEventSettings", {
sum(
unique(tte$outcome_cohort_definition_id)
%in% outcomeIds
- ) ==
+ ) ==
length(unique(tte$outcome_cohort_definition_id))
)
@@ -93,15 +93,14 @@ test_that("computeTimeToEventSettings", {
targetTable = "cohort",
settings = res,
outputFolder = tteFolder,
- databaseId = 'tte_test',
+ databaseId = "tte_test",
minCellCount = 9999
)
tte <- readr::read_csv(
- file = file.path(tteFolder,'time_to_event.csv'),
+ file = file.path(tteFolder, "time_to_event.csv"),
show_col_types = F
)
testthat::expect_true(max(tte$num_events) == -9999)
-
})
diff --git a/tests/testthat/test-viewShiny.R b/tests/testthat/test-viewShiny.R
index 5ab1e23..10fba8e 100644
--- a/tests/testthat/test-viewShiny.R
+++ b/tests/testthat/test-viewShiny.R
@@ -1,7 +1,7 @@
context("ViewShiny")
# create a folder with results for the shiny app
-resultLocation <- file.path(tempdir(),paste0('d_', paste0(sample(100,3), collapse = '_'), sep = ''), "shinyResults")
+resultLocation <- file.path(tempdir(), paste0("d_", paste0(sample(100, 3), collapse = "_"), sep = ""), "shinyResults")
if (!dir.exists(resultLocation)) {
dir.create(resultLocation, recursive = T)
}
@@ -85,8 +85,8 @@ test_that("prepareCharacterizationShiny works", {
outcomeDatabaseSchema = "main",
outcomeTable = "cohort",
characterizationSettings = characterizationSettings,
- outputDirectory = file.path(resultLocation, 'result'),
- executionPath = file.path(resultLocation, 'execution'),
+ outputDirectory = file.path(resultLocation, "result"),
+ executionPath = file.path(resultLocation, "execution"),
csvFilePrefix = "c_",
databaseId = "1",
threads = 1,
@@ -96,7 +96,7 @@ test_that("prepareCharacterizationShiny works", {
)
settings <- Characterization:::prepareCharacterizationShiny(
- resultFolder = file.path(resultLocation,'result'),
+ resultFolder = file.path(resultLocation, "result"),
cohortDefinitionSet = NULL,
sqliteLocation = file.path(resultLocation, "sqliteCharacterization", "sqlite.sqlite")
)
@@ -128,7 +128,7 @@ test_that("prepareCharacterizationShiny works", {
test_that("shiny app works", {
settings <- prepareCharacterizationShiny(
- resultFolder = file.path(resultLocation,'result'),
+ resultFolder = file.path(resultLocation, "result"),
cohortDefinitionSet = NULL,
sqliteLocation = file.path(resultLocation, "sqliteCharacterization", "sqlite.sqlite")
)
diff --git a/vignettes/UsingPackage.Rmd b/vignettes/UsingPackage.Rmd
index f76d56f..809f6bc 100644
--- a/vignettes/UsingPackage.Rmd
+++ b/vignettes/UsingPackage.Rmd
@@ -54,7 +54,7 @@ Eunomia::createCohorts(connectionDetails = connectionDetails)
We also need to have the Characterization package installed and loaded
```{r tidy=TRUE,eval=FALSE}
remotes::install_github("ohdsi/FeatureExtraction")
-remotes::install_github("ohdsi/Characterization", ref = 'new_approach')
+remotes::install_github("ohdsi/Characterization", ref = "new_approach")
```
```{r tidy=TRUE,eval=TRUE}
@@ -109,12 +109,12 @@ exampleAggregateCovariateSettings <- createAggregateCovariateSettings(
targetIds = exampleTargetIds,
outcomeIds = exampleOutcomeIds,
riskWindowStart = 1, startAnchor = "cohort start",
- riskWindowEnd = 365, endAnchor = "cohort start",
- outcomeWashoutDays = 9999,
+ riskWindowEnd = 365, endAnchor = "cohort start",
+ outcomeWashoutDays = 9999,
minPriorObservation = 365,
- covariateSettings = exampleCovariateSettings,
- caseCovariateSettings = caseCovariateSettings,
- casePreTargetDuration = 90,
+ covariateSettings = exampleCovariateSettings,
+ caseCovariateSettings = caseCovariateSettings,
+ casePreTargetDuration = 90,
casePostOutcomeDuration = 90
)
```
@@ -130,12 +130,12 @@ runCharacterizationAnalyses(
outcomeDatabaseSchema = "main",
outcomeTable = "cohort",
characterizationSettings = createCharacterizationSettings(
- aggregateCovariateSettings = exampleAggregateCovariateSettings
- ),
+ aggregateCovariateSettings = exampleAggregateCovariateSettings
+ ),
databaseId = "Eunomia",
- runId = 1,
- minCharacterizationMean = 0.01,
- outputDirectory = file.path(getwd(), 'example_char', 'results'), executionPath = file.path(getwd(), 'example_char', 'execution'),
+ runId = 1,
+ minCharacterizationMean = 0.01,
+ outputDirectory = file.path(getwd(), "example_char", "results"), executionPath = file.path(getwd(), "example_char", "execution"),
minCellCount = 10,
incremental = F,
threads = 1
@@ -179,8 +179,8 @@ dc <- computeDechallengeRechallengeAnalyses(
targetDatabaseSchema = "main",
targetTable = "cohort",
settings = exampleDechallengeRechallengeSettings,
- databaseId = "Eunomia",
- outcomeTable = file.path(getwd(), 'example_char', 'results'),
+ databaseId = "Eunomia",
+ outcomeTable = file.path(getwd(), "example_char", "results"),
minCellCount = 5
)
```
@@ -196,7 +196,7 @@ failed <- computeRechallengeFailCaseSeriesAnalyses(
outcomeDatabaseSchema = "main",
outcomeTable = "cohort",
databaseId = "Eunomia",
- outcomeTable = file.path(getwd(), 'example_char', 'results'),
+ outcomeTable = file.path(getwd(), "example_char", "results"),
minCellCount = 5
)
```
@@ -225,7 +225,7 @@ tte <- computeTimeToEventAnalyses(
targetTable = "cohort",
settings = exampleTimeToEventSettings,
databaseId = "Eunomia",
- outcomeTable = file.path(getwd(), 'example_char', 'results'),
+ outcomeTable = file.path(getwd(), "example_char", "results"),
minCellCount = 5
)
```
@@ -264,10 +264,10 @@ runCharacterizationAnalyses(
outcomeDatabaseSchema = "main",
outcomeTable = "cohort",
characterizationSettings = characterizationSettings,
- outputDirectory = file.path(tempdir(), "example", 'results'),
+ outputDirectory = file.path(tempdir(), "example", "results"),
executionPath = file.path(tempdir(), "example", "execution"),
- csvFilePrefix = "c_",
- databaseId = "1",
+ csvFilePrefix = "c_",
+ databaseId = "1",
incremental = F,
minCharacterizationMean = 0.01,
minCellCount = 5
@@ -278,7 +278,7 @@ This will create csv files with the results in the saveDirectory. You can run t
```{r eval=FALSE}
viewCharacterization(
- resultFolder = file.path(tempdir(), "example", "results"),
+ resultFolder = file.path(tempdir(), "example", "results"),
cohortDefinitionSet = NULL
- )
+)
```