From 2a6016cc0d0aa04fc28149db962b5508c236931e Mon Sep 17 00:00:00 2001 From: Chris Knoll Date: Thu, 8 Aug 2024 09:31:41 -0400 Subject: [PATCH] Prepare for version 2.0.0 Updated to version 2.0.0. Ran package maintenance. Rebuilt site. Updated NEWS.md --- DESCRIPTION | 2 +- NEWS.md | 5 +- R/AggregateCovariates.R | 569 ++++++++------- R/CustomCovariates.R | 127 ++-- R/Database.R | 29 +- R/DechallengeRechallenge.R | 126 ++-- R/HelperFunctions.R | 17 +- R/Incremental.R | 109 ++- R/RunCharacterization.R | 195 +++-- R/SaveLoad.R | 8 +- R/TimeToEvent.R | 89 +-- R/ViewShiny.R | 37 +- docs/404.html | 6 +- docs/articles/InstallationGuide.html | 6 +- docs/articles/UsingPackage.html | 110 ++- docs/articles/index.html | 6 +- docs/authors.html | 8 +- docs/index.html | 6 +- docs/news/index.html | 13 +- docs/pkgdown.yml | 6 +- docs/reference/Characterization-package.html | 6 +- docs/reference/cleanIncremental.html | 11 +- docs/reference/cleanNonIncremental.html | 11 +- ...computeDechallengeRechallengeAnalyses.html | 6 +- ...puteRechallengeFailCaseSeriesAnalyses.html | 6 +- .../reference/computeTimeToEventAnalyses.html | 6 +- .../createAggregateCovariateSettings.html | 6 +- .../createCharacterizationSettings.html | 6 +- .../createCharacterizationTables.html | 6 +- .../createDechallengeRechallengeSettings.html | 6 +- .../createDuringCovariateSettings.html | 6 +- docs/reference/createSqliteDatabase.html | 6 +- docs/reference/createTimeToEventSettings.html | 6 +- .../exportDechallengeRechallengeToCsv.html | 6 +- .../exportRechallengeFailCaseSeriesToCsv.html | 6 +- docs/reference/exportTimeToEventToCsv.html | 6 +- docs/reference/getDbDuringCovariateData.html | 6 +- docs/reference/index.html | 12 +- docs/reference/insertResultsToDatabase.html | 6 +- .../loadCharacterizationSettings.html | 6 +- .../runCharacterizationAnalyses.html | 6 +- .../saveCharacterizationSettings.html | 6 +- docs/reference/viewCharacterization.html | 6 +- docs/sitemap.xml | 6 +- tests/testthat/test-Incremental.R | 151 ++-- tests/testthat/test-aggregateCovariate.R | 166 +++-- tests/testthat/test-dbs.R | 51 +- tests/testthat/test-dechallengeRechallenge.R | 175 +++-- tests/testthat/test-manualData.R | 685 +++++++++--------- tests/testthat/test-runCharacterization.R | 124 ++-- tests/testthat/test-timeToEvent.R | 15 +- tests/testthat/test-viewShiny.R | 10 +- vignettes/UsingPackage.Rmd | 40 +- 53 files changed, 1500 insertions(+), 1551 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2c507b1..7790036 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Characterization Type: Package Title: Characterizations of Cohorts -Version: 1.1.1 +Version: 2.0.0 Date: 2024-08-07 Authors@R: c( person("Jenna", "Reps", , "reps@ohdsi.org", role = c("aut", "cre")), diff --git a/NEWS.md b/NEWS.md index b3953d1..076b428 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,7 @@ -Characterization 1.1.1 +Characterization 2.0.0 ====================== - added tests for all HADES supported dbms - updated minCellCount censoring - -Characterization 1.1.0 -====================== - fixed issues with incremental - made the code more modular to enable new characterizations to be added - added job optimization code to optimize the distributuion of jobs diff --git a/R/AggregateCovariates.R b/R/AggregateCovariates.R index bca3e9d..8a3c220 100644 --- a/R/AggregateCovariates.R +++ b/R/AggregateCovariates.R @@ -67,7 +67,7 @@ createAggregateCovariateSettings <- function( useDeviceExposureShortTerm = T, useVisitConceptCountShortTerm = T, endDays = 0, - longTermStartDays = -365, + longTermStartDays = -365, shortTermStartDays = -30 ), caseCovariateSettings = createDuringCovariateSettings( @@ -78,11 +78,10 @@ createAggregateCovariateSettings <- function( useMeasurementDuring = T, useObservationDuring = T, useVisitConceptCountDuring = T - ), + ), casePreTargetDuration = 365, casePostOutcomeDuration = 365, - extractNonCaseCovariates = T - ) { + extractNonCaseCovariates = T) { errorMessages <- checkmate::makeAssertCollection() # check targetIds is a vector of int/double .checkCohortIds( @@ -98,16 +97,16 @@ createAggregateCovariateSettings <- function( ) # check TAR - EFF edit - if(length(riskWindowStart)>1){ - stop('Please add one time-at-risk per setting') + if (length(riskWindowStart) > 1) { + stop("Please add one time-at-risk per setting") } .checkTimeAtRisk( - riskWindowStart = riskWindowStart, - startAnchor = startAnchor, - riskWindowEnd = riskWindowEnd, - endAnchor = endAnchor, - errorMessages = errorMessages - ) + riskWindowStart = riskWindowStart, + startAnchor = startAnchor, + riskWindowEnd = riskWindowEnd, + endAnchor = endAnchor, + errorMessages = errorMessages + ) # check covariateSettings .checkCovariateSettings( @@ -116,11 +115,13 @@ createAggregateCovariateSettings <- function( ) # check temporal is false - if(inherits(covariateSettings, 'covariateSettings')){ + if (inherits(covariateSettings, "covariateSettings")) { covariateSettings <- list(covariateSettings) } - if(sum(unlist(lapply(covariateSettings, function(x){x$temporal})))>0){ - stop('Temporal covariateSettings not supported by createAggregateCovariateSettings()') + if (sum(unlist(lapply(covariateSettings, function(x) { + x$temporal + }))) > 0) { + stop("Temporal covariateSettings not supported by createAggregateCovariateSettings()") } # check minPriorObservation @@ -134,12 +135,12 @@ createAggregateCovariateSettings <- function( checkmate::reportAssertions(errorMessages) # check unique Ts and Os - if(length(targetIds) != length(unique(targetIds))){ - message('targetIds have duplicates - making unique') + if (length(targetIds) != length(unique(targetIds))) { + message("targetIds have duplicates - making unique") targetIds <- unique(targetIds) } - if(length(outcomeIds) != length(unique(outcomeIds))){ - message('outcomeIds have duplicates - making unique') + if (length(outcomeIds) != length(unique(outcomeIds))) { + message("outcomeIds have duplicates - making unique") outcomeIds <- unique(outcomeIds) } @@ -148,15 +149,12 @@ createAggregateCovariateSettings <- function( result <- list( targetIds = targetIds, minPriorObservation = minPriorObservation, - outcomeIds = outcomeIds, outcomeWashoutDays = outcomeWashoutDays, - riskWindowStart = riskWindowStart, startAnchor = startAnchor, riskWindowEnd = riskWindowEnd, endAnchor = endAnchor, - covariateSettings = covariateSettings, # risk factors caseCovariateSettings = caseCovariateSettings, # case series casePreTargetDuration = casePreTargetDuration, # case series @@ -169,8 +167,8 @@ createAggregateCovariateSettings <- function( return(result) } -createExecutionIds <- function(size){ - executionIds <- gsub(" ", "", gsub("[[:punct:]]", "",paste(Sys.time(), sample(1000000,size), sep = ''))) +createExecutionIds <- function(size) { + executionIds <- gsub(" ", "", gsub("[[:punct:]]", "", paste(Sys.time(), sample(1000000, size), sep = ""))) return(executionIds) } @@ -186,12 +184,10 @@ computeTargetAggregateCovariateAnalyses <- function( tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), settings, databaseId = "database 1", - outputFolder = file.path(getwd(),'characterization_results'), + outputFolder = file.path(getwd(), "characterization_results"), minCharacterizationMean = 0, minCellCount = 0, - ... -) { - + ...) { # get settings settingId <- unique(settings$settingId) targetIds <- unique(settings$targetId) @@ -201,10 +197,10 @@ computeTargetAggregateCovariateAnalyses <- function( # create cohortDetails - all Ts, minPriorObservation, twice (type = Tall, Target) cohortDetails <- data.frame( settingId = settingId, - targetCohortId = rep(targetIds,2), + targetCohortId = rep(targetIds, 2), outcomeCohortId = 0, # cannot be NA due to pk/index - cohortType = c(rep('Target',length(targetIds)),rep('Tall',length(targetIds))), - cohortDefinitionId = 1:(length(targetIds)*2), + cohortType = c(rep("Target", length(targetIds)), rep("Tall", length(targetIds))), + cohortDefinitionId = 1:(length(targetIds) * 2), minPriorObservation = minPriorObservation, outcomeWashoutDays = NA, casePreTargetDuration = NA, @@ -226,10 +222,10 @@ computeTargetAggregateCovariateAnalyses <- function( # create the temp table with cohort_details DatabaseConnector::insertTable( - data = cohortDetails[,c('settingId','targetCohortId','outcomeCohortId','cohortType','cohortDefinitionId')], + data = cohortDetails[, c("settingId", "targetCohortId", "outcomeCohortId", "cohortType", "cohortDefinitionId")], camelCaseToSnakeCase = T, connection = connection, - tableName = '#cohort_details', + tableName = "#cohort_details", tempTable = T, dropTableIfExists = T, createTable = T, @@ -259,7 +255,7 @@ computeTargetAggregateCovariateAnalyses <- function( ) completionTime <- Sys.time() - start - message(paste0('Computing target cohorts took ',round(completionTime,digits = 1), ' ', units(completionTime))) + message(paste0("Computing target cohorts took ", round(completionTime, digits = 1), " ", units(completionTime))) ## get counts message("Extracting target cohort counts") sql <- "select @@ -337,11 +333,10 @@ computeCaseAggregateCovariateAnalyses <- function( tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), settings, databaseId = "database 1", - outputFolder = file.path(getwd(),'characterization_results'), + outputFolder = file.path(getwd(), "characterization_results"), minCharacterizationMean = 0, minCellCount = 0, - ... - ) { + ...) { # check inputs # create cohortDetails - all Ts, minPriorObservation, twice (type = Tall, Target) @@ -361,7 +356,7 @@ computeCaseAggregateCovariateAnalyses <- function( cohortDetails <- expand.grid( targetCohortId = unique(settings$targetId), outcomeCohortId = unique(settings$outcomeId), - cohortType = c('Cases', 'CasesBefore', 'CasesAfter', 'CasesBetween') + cohortType = c("Cases", "CasesBefore", "CasesAfter", "CasesBetween") ) cohortDetails$minPriorObservation <- settings$minPriorObservation @@ -383,7 +378,7 @@ computeCaseAggregateCovariateAnalyses <- function( cohortDetailsExtra <- expand.grid( targetCohortId = unique(settings$targetId), outcomeCohortId = unique(settings$outcomeId), - cohortType = 'Exclude', + cohortType = "Exclude", minPriorObservation = settings$minPriorObservation, outcomeWashoutDays = settings$outcomeWashoutDays, casePreTargetDuration = settings$casePreTargetDuration, @@ -408,10 +403,10 @@ computeCaseAggregateCovariateAnalyses <- function( # create the temp table with cohort_details DatabaseConnector::insertTable( - data = cohortDetails[,c('targetCohortId','outcomeCohortId','cohortType','cohortDefinitionId', 'settingId')], + data = cohortDetails[, c("targetCohortId", "outcomeCohortId", "cohortType", "cohortDefinitionId", "settingId")], camelCaseToSnakeCase = T, connection = connection, - tableName = '#cohort_details', + tableName = "#cohort_details", tempTable = T, dropTableIfExists = T, createTable = T, @@ -448,20 +443,20 @@ computeCaseAggregateCovariateAnalyses <- function( # excluded_analysis_ref, excluded_covariate_ref # loop over settingId which contains tars: - for(i in 1:nrow(tars)){ + for (i in 1:nrow(tars)) { sql <- SqlRender::loadRenderTranslateSql( sqlFilename = "CaseCohortsPart2.sql", packageName = "Characterization", dbms = connectionDetails$dbms, tempEmulationSchema = tempEmulationSchema, - first = i==1, + first = i == 1, case_pre_target_duration = casePreTargetDuration, case_post_outcome_duration = casePostOutcomeDuration, setting_id = tars$settingId[i], tar_start = tars$riskWindowStart[i], - tar_start_anchor = ifelse(tars$startAnchor[i] == 'cohort start','cohort_start_date','cohort_end_date'), ##TODO change? + tar_start_anchor = ifelse(tars$startAnchor[i] == "cohort start", "cohort_start_date", "cohort_end_date"), ## TODO change? tar_end = tars$riskWindowEnd[i], - tar_end_anchor = ifelse(tars$endAnchor[i] == 'cohort start','cohort_start_date','cohort_end_date') ##TODO change? + tar_end_anchor = ifelse(tars$endAnchor[i] == "cohort start", "cohort_start_date", "cohort_end_date") ## TODO change? ) DatabaseConnector::executeSql( connection = connection, @@ -472,7 +467,7 @@ computeCaseAggregateCovariateAnalyses <- function( } completionTime <- Sys.time() - start - message(paste0('Computing case cohorts took ',round(completionTime,digits = 1), ' ', units(completionTime))) + message(paste0("Computing case cohorts took ", round(completionTime, digits = 1), " ", units(completionTime))) ## get counts message("Extracting case cohort counts") @@ -512,23 +507,28 @@ computeCaseAggregateCovariateAnalyses <- function( message("Computing aggregate during case covariate results") - result2 <- tryCatch({FeatureExtraction::getDbCovariateData( - connection = connection, - oracleTempSchema = tempEmulationSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortTable = "#case_series", - cohortTableIsTemp = T, - cohortIds = -1, - covariateSettings = ParallelLogger::convertJsonToSettings(caseCovariateSettings), - cdmVersion = cdmVersion, - aggregated = T, - minCharacterizationMean = minCharacterizationMean - )}, error = function(e){ - message(e); - return(NULL) - }) - if(is.null(result2)){ - stop('Issue with case series data extraction') + result2 <- tryCatch( + { + FeatureExtraction::getDbCovariateData( + connection = connection, + oracleTempSchema = tempEmulationSchema, + cdmDatabaseSchema = cdmDatabaseSchema, + cohortTable = "#case_series", + cohortTableIsTemp = T, + cohortIds = -1, + covariateSettings = ParallelLogger::convertJsonToSettings(caseCovariateSettings), + cdmVersion = cdmVersion, + aggregated = T, + minCharacterizationMean = minCharacterizationMean + ) + }, + error = function(e) { + message(e) + return(NULL) + } + ) + if (is.null(result2)) { + stop("Issue with case series data extraction") } # drop temp tables @@ -580,11 +580,9 @@ exportAndromedaToCsv <- function( minCharacterizationMean, batchSize = 100000, minCellCount = 0, - includeSettings = T -){ - + includeSettings = T) { saveLocation <- outputFolder - if(!dir.exists(saveLocation)){ + if (!dir.exists(saveLocation)) { dir.create(saveLocation, recursive = T) } @@ -595,21 +593,21 @@ exportAndromedaToCsv <- function( # analysis_ref and covariate_ref # add database_id and setting_id - if(!is.null(andromeda$analysisRef)){ + if (!is.null(andromeda$analysisRef)) { Andromeda::batchApply( - tbl = andromeda$analysisRef, - fun = function(x){ + tbl = andromeda$analysisRef, + fun = function(x) { data <- merge(x, ids) colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data)) - if(file.exists(file.path(saveLocation, 'analysis_ref.csv'))){ + if (file.exists(file.path(saveLocation, "analysis_ref.csv"))) { append <- T - } else{ - append = F + } else { + append <- F } readr::write_csv( x = formatDouble(data), - file = file.path(saveLocation, 'analysis_ref.csv'), + file = file.path(saveLocation, "analysis_ref.csv"), append = append ) }, @@ -617,21 +615,21 @@ exportAndromedaToCsv <- function( ) } - if(!is.null(andromeda$covariateRef)){ + if (!is.null(andromeda$covariateRef)) { Andromeda::batchApply( - tbl = andromeda$covariateRef, - fun = function(x){ + tbl = andromeda$covariateRef, + fun = function(x) { data <- merge(x, ids) colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data)) - if(file.exists(file.path(saveLocation, 'covariate_ref.csv'))){ + if (file.exists(file.path(saveLocation, "covariate_ref.csv"))) { append <- T - } else{ - append = F + } else { + append <- F } readr::write_csv( x = formatDouble(data), - file = file.path(saveLocation, 'covariate_ref.csv'), + file = file.path(saveLocation, "covariate_ref.csv"), append = append ) }, @@ -640,16 +638,16 @@ exportAndromedaToCsv <- function( } # covariates and covariate_continuous - extras <- cohortDetails[, c('cohortDefinitionId','settingId', 'targetCohortId', 'outcomeCohortId', 'cohortType')] - extras$databaseId <- databaseId + extras <- cohortDetails[, c("cohortDefinitionId", "settingId", "targetCohortId", "outcomeCohortId", "cohortType")] + extras$databaseId <- databaseId extras$minCharacterizationMean <- minCharacterizationMean # add database_id, setting_id, target_cohort_id, outcome_cohort_id and cohort_type - if(!is.null(andromeda$covariates)){ + if (!is.null(andromeda$covariates)) { Andromeda::batchApply( - tbl = andromeda$covariates, - fun = function(x){ - data <- merge(x, extras, by = 'cohortDefinitionId') + tbl = andromeda$covariates, + fun = function(x) { + data <- merge(x, extras, by = "cohortDefinitionId") data <- data %>% dplyr::select(-"cohortDefinitionId") colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data)) @@ -658,20 +656,20 @@ exportAndromedaToCsv <- function( if (sum(removeInd) > 0) { ParallelLogger::logInfo(paste0("Removing sum_value counts less than ", minCellCount)) if (sum(removeInd) > 0) { - data$sum_value[removeInd] <- -1*minCellCount + data$sum_value[removeInd] <- -1 * minCellCount # adding other calculated columns data$average_value[removeInd] <- NA } } - if(file.exists(file.path(saveLocation, 'covariates.csv'))){ + if (file.exists(file.path(saveLocation, "covariates.csv"))) { append <- T - } else{ - append = F + } else { + append <- F } readr::write_csv( x = formatDouble(data), - file = file.path(saveLocation, 'covariates.csv'), + file = file.path(saveLocation, "covariates.csv"), append = append ) }, @@ -679,11 +677,11 @@ exportAndromedaToCsv <- function( ) } - if(!is.null(andromeda$covariatesContinuous)){ + if (!is.null(andromeda$covariatesContinuous)) { Andromeda::batchApply( - tbl = andromeda$covariatesContinuous, - fun = function(x){ - data <- merge(x, extras %>% dplyr::select(-"minCharacterizationMean"), by = 'cohortDefinitionId') + tbl = andromeda$covariatesContinuous, + fun = function(x) { + data <- merge(x, extras %>% dplyr::select(-"minCharacterizationMean"), by = "cohortDefinitionId") data <- data %>% dplyr::select(-"cohortDefinitionId") colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data)) @@ -692,7 +690,7 @@ exportAndromedaToCsv <- function( if (sum(removeInd) > 0) { ParallelLogger::logInfo(paste0("Removing count_value counts less than ", minCellCount)) if (sum(removeInd) > 0) { - data$count_value[removeInd] <- -1*minCellCount + data$count_value[removeInd] <- -1 * minCellCount # adding columns calculated from count data$min_value[removeInd] <- NA data$max_value[removeInd] <- NA @@ -706,14 +704,14 @@ exportAndromedaToCsv <- function( } } - if(file.exists(file.path(saveLocation, 'covariates_continuous.csv'))){ + if (file.exists(file.path(saveLocation, "covariates_continuous.csv"))) { append <- T - } else{ - append = F + } else { + append <- F } readr::write_csv( x = formatDouble(data), - file = file.path(saveLocation, 'covariates_continuous.csv'), + file = file.path(saveLocation, "covariates_continuous.csv"), append = append ) }, @@ -722,23 +720,24 @@ exportAndromedaToCsv <- function( } # cohort_counts: - if(!is.null(counts)){ - cohortCounts <- cohortDetails %>% dplyr::select( - 'targetCohortId', - 'outcomeCohortId', - 'cohortType', - 'cohortDefinitionId', - 'riskWindowStart', - 'riskWindowEnd', - 'startAnchor', - 'endAnchor', - 'minPriorObservation', - 'outcomeWashoutDays' - ) %>% + if (!is.null(counts)) { + cohortCounts <- cohortDetails %>% + dplyr::select( + "targetCohortId", + "outcomeCohortId", + "cohortType", + "cohortDefinitionId", + "riskWindowStart", + "riskWindowEnd", + "startAnchor", + "endAnchor", + "minPriorObservation", + "outcomeWashoutDays" + ) %>% dplyr::mutate( databaseId = !!databaseId ) %>% - dplyr::inner_join(counts, by = 'cohortDefinitionId') %>% + dplyr::inner_join(counts, by = "cohortDefinitionId") %>% dplyr::select(-"cohortDefinitionId") cohortCounts <- unique(cohortCounts) colnames(cohortCounts) <- SqlRender::camelCaseToSnakeCase(colnames(cohortCounts)) @@ -748,35 +747,37 @@ exportAndromedaToCsv <- function( if (sum(removeInd) > 0) { ParallelLogger::logInfo(paste0("Removing row_count counts less than ", minCellCount)) if (sum(removeInd) > 0) { - cohortCounts$row_count[removeInd] <- -1*minCellCount + cohortCounts$row_count[removeInd] <- -1 * minCellCount } } removeInd <- cohortCounts$person_count < minCellCount if (sum(removeInd) > 0) { ParallelLogger::logInfo(paste0("Removing person_count counts less than ", minCellCount)) if (sum(removeInd) > 0) { - cohortCounts$person_count[removeInd] <- -1*minCellCount + cohortCounts$person_count[removeInd] <- -1 * minCellCount } } - if(file.exists(file.path(saveLocation, 'cohort_counts.csv'))){ + if (file.exists(file.path(saveLocation, "cohort_counts.csv"))) { append <- T - } else{ - append = F + } else { + append <- F } readr::write_csv( x = formatDouble(cohortCounts), - file = file.path(saveLocation, 'cohort_counts.csv'), + file = file.path(saveLocation, "cohort_counts.csv"), append = append ) } - if(includeSettings){ - settings <- cohortDetails %>% - dplyr::select('settingId', 'minPriorObservation', 'outcomeWashoutDays', - 'riskWindowStart', 'riskWindowEnd', 'startAnchor', 'endAnchor', - 'casePreTargetDuration', 'casePostOutcomeDuration', - 'covariateSettingJson', 'caseCovariateSettingJson') %>% + if (includeSettings) { + settings <- cohortDetails %>% + dplyr::select( + "settingId", "minPriorObservation", "outcomeWashoutDays", + "riskWindowStart", "riskWindowEnd", "startAnchor", "endAnchor", + "casePreTargetDuration", "casePostOutcomeDuration", + "covariateSettingJson", "caseCovariateSettingJson" + ) %>% dplyr::mutate(databaseId = !!databaseId) %>% dplyr::distinct() colnames(settings) <- SqlRender::camelCaseToSnakeCase(colnames(settings)) @@ -784,13 +785,15 @@ exportAndromedaToCsv <- function( # add setting.csv with cohortDetails plus database readr::write_csv( x = settings, - file = file.path(saveLocation, 'settings.csv'), + file = file.path(saveLocation, "settings.csv"), append = F ) cohortDetails <- cohortDetails %>% - dplyr::select('settingId', 'targetCohortId', - 'outcomeCohortId','cohortType') %>% + dplyr::select( + "settingId", "targetCohortId", + "outcomeCohortId", "cohortType" + ) %>% dplyr::mutate(databaseId = !!databaseId) %>% dplyr::distinct() colnames(cohortDetails) <- SqlRender::camelCaseToSnakeCase(colnames(cohortDetails)) @@ -798,7 +801,7 @@ exportAndromedaToCsv <- function( # add cohort_details.csv with cohortDetails plus database readr::write_csv( x = cohortDetails, - file = file.path(saveLocation, 'cohort_details.csv'), + file = file.path(saveLocation, "cohort_details.csv"), append = F ) } @@ -809,56 +812,65 @@ exportAndromedaToCsv <- function( -combineCovariateSettingsJsons <- function(covariateSettingsJsonList){ - +combineCovariateSettingsJsons <- function(covariateSettingsJsonList) { # get unique covariateSettingsJsonList <- unique(covariateSettingsJsonList) # first convert from json covariateSettings <- lapply( X = covariateSettingsJsonList, - FUN = function(x){ParallelLogger::convertJsonToSettings(x)} + FUN = function(x) { + ParallelLogger::convertJsonToSettings(x) + } ) # then combine the covariates - singleSettings <- which(unlist(lapply(covariateSettings, function(x) inherits(x, 'covariateSettings')))) - multipleSettings <- which(unlist(lapply(covariateSettings, function(x) inherits(x, 'list')))) + singleSettings <- which(unlist(lapply(covariateSettings, function(x) inherits(x, "covariateSettings")))) + multipleSettings <- which(unlist(lapply(covariateSettings, function(x) inherits(x, "list")))) covariateSettingList <- list() - if(length(singleSettings)>0){ - for(i in singleSettings){ + if (length(singleSettings) > 0) { + for (i in singleSettings) { covariateSettingList[[length(covariateSettingList) + 1]] <- covariateSettings[[i]] } } - if(length(multipleSettings) > 0){ - for(i in multipleSettings){ + if (length(multipleSettings) > 0) { + for (i in multipleSettings) { settingList <- covariateSettings[[i]] - for(j in 1:length(settingList)){ - if(inherits(settingList[[j]], 'covariateSettings')){ + for (j in 1:length(settingList)) { + if (inherits(settingList[[j]], "covariateSettings")) { covariateSettingList[[length(covariateSettingList) + 1]] <- settingList[[j]] - } else{ - message('Incorrect covariate settings found') # stop? + } else { + message("Incorrect covariate settings found") # stop? } } } } # check for covariates with same id but different - endDays <- unique(unlist(lapply(covariateSettingList, function(x){x$endDays}))) - if(length(endDays) > 1){ - stop('Covariate settings for aggregate covariates using different end days') + endDays <- unique(unlist(lapply(covariateSettingList, function(x) { + x$endDays + }))) + if (length(endDays) > 1) { + stop("Covariate settings for aggregate covariates using different end days") } - longTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x){x$longTermStartDays}))) - if(length(longTermStartDays) > 1){ - stop('Covariate settings for aggregate covariates using different longTermStartDays') + longTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x) { + x$longTermStartDays + }))) + if (length(longTermStartDays) > 1) { + stop("Covariate settings for aggregate covariates using different longTermStartDays") } - mediumTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x){x$mediumTermStartDays}))) - if(length(mediumTermStartDays) > 1){ - stop('Covariate settings for aggregate covariates using different mediumTermStartDays') + mediumTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x) { + x$mediumTermStartDays + }))) + if (length(mediumTermStartDays) > 1) { + stop("Covariate settings for aggregate covariates using different mediumTermStartDays") } - shortTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x){x$shortTermStartDays}))) - if(length(shortTermStartDays) > 1){ - stop('Covariate settings for aggregate covariates using different shortTermStartDays') + shortTermStartDays <- unique(unlist(lapply(covariateSettingList, function(x) { + x$shortTermStartDays + }))) + if (length(shortTermStartDays) > 1) { + stop("Covariate settings for aggregate covariates using different shortTermStartDays") } # convert to json @@ -868,41 +880,41 @@ combineCovariateSettingsJsons <- function(covariateSettingsJsonList){ getAggregateCovariatesJobs <- function( characterizationSettings, - threads -){ - + threads) { characterizationSettings <- characterizationSettings$aggregateCovariateSettings - if(length(characterizationSettings) == 0){ + if (length(characterizationSettings) == 0) { return(NULL) } ind <- 1:length(characterizationSettings) # target combinations - targetCombinations <- do.call(what = 'rbind', - args = - lapply( - 1:length(characterizationSettings), - function(i){ - - if(characterizationSettings[[i]]$extractNonCaseCovariates){ - result <- data.frame( - targetIds = c(characterizationSettings[[i]]$targetIds, - characterizationSettings[[i]]$outcomeIds), - minPriorObservation = characterizationSettings[[i]]$minPriorObservation, - covariateSettingsJson = as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$covariateSettings)) - ) - return(result) - } else{ - return( - data.frame(targetIds = 1, minPriorObservation = 1, covariateSettingsJson = 1)[-1,] - ) - } - } - ) + targetCombinations <- do.call( + what = "rbind", + args = + lapply( + 1:length(characterizationSettings), + function(i) { + if (characterizationSettings[[i]]$extractNonCaseCovariates) { + result <- data.frame( + targetIds = c( + characterizationSettings[[i]]$targetIds, + characterizationSettings[[i]]$outcomeIds + ), + minPriorObservation = characterizationSettings[[i]]$minPriorObservation, + covariateSettingsJson = as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$covariateSettings)) + ) + return(result) + } else { + return( + data.frame(targetIds = 1, minPriorObservation = 1, covariateSettingsJson = 1)[-1, ] + ) + } + } + ) ) - if(nrow(targetCombinations) > 0){ + if (nrow(targetCombinations) > 0) { threadCols <- c("targetIds") settingCols <- c("minPriorObservation") @@ -910,18 +922,18 @@ getAggregateCovariatesJobs <- function( threadSettings <- targetCombinations %>% dplyr::select(dplyr::all_of(threadCols)) %>% dplyr::distinct() - threadSettings$thread <- rep(1:threads, ceiling(nrow(threadSettings)/threads))[1:nrow(threadSettings)] - targetCombinations <- merge(targetCombinations, threadSettings, by = threadCols ) + threadSettings$thread <- rep(1:threads, ceiling(nrow(threadSettings) / threads))[1:nrow(threadSettings)] + targetCombinations <- merge(targetCombinations, threadSettings, by = threadCols) executionSettings <- data.frame( minPriorObservation = unique(targetCombinations$minPriorObservation) ) executionSettings$settingId <- createExecutionIds(nrow(executionSettings)) - targetCombinations <- merge(targetCombinations, executionSettings, by = settingCols ) + targetCombinations <- merge(targetCombinations, executionSettings, by = settingCols) # recreate settings settings <- c() - for(settingId in unique(executionSettings$settingId)){ + for (settingId in unique(executionSettings$settingId)) { settingVal <- executionSettings %>% dplyr::filter(.data$settingId == !!settingId) %>% dplyr::select(dplyr::all_of(settingCols)) @@ -929,26 +941,27 @@ getAggregateCovariatesJobs <- function( restrictedData <- targetCombinations %>% dplyr::inner_join(settingVal, by = settingCols) - for(i in unique(restrictedData$thread)){ + for (i in unique(restrictedData$thread)) { ind <- restrictedData$thread == i - settings <- rbind(settings, - data.frame( - functionName = 'computeTargetAggregateCovariateAnalyses', - settings = as.character(ParallelLogger::convertSettingsToJson( - list( - targetIds = unique(restrictedData$targetId[ind]), - minPriorObservation = unique(restrictedData$minPriorObservation[ind]), - covariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$covariateSettingsJson[ind])), - settingId = settingId - ) - )), - executionFolder = paste('tac', i ,paste(settingVal, collapse = '_'), sep = '_'), - jobId = paste('tac', i ,paste(settingVal, collapse = '_'), sep = '_') - ) + settings <- rbind( + settings, + data.frame( + functionName = "computeTargetAggregateCovariateAnalyses", + settings = as.character(ParallelLogger::convertSettingsToJson( + list( + targetIds = unique(restrictedData$targetId[ind]), + minPriorObservation = unique(restrictedData$minPriorObservation[ind]), + covariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$covariateSettingsJson[ind])), + settingId = settingId + ) + )), + executionFolder = paste("tac", i, paste(settingVal, collapse = "_"), sep = "_"), + jobId = paste("tac", i, paste(settingVal, collapse = "_"), sep = "_") + ) ) } } - } else{ + } else { settings <- c() } @@ -956,97 +969,105 @@ getAggregateCovariatesJobs <- function( Sys.sleep(time = 2) # get all combinations of TnOs, then split by treads - caseCombinations <- do.call(what = 'rbind', - args = - lapply( - 1:length(characterizationSettings), - function(i){ - result <- expand.grid( - targetId = characterizationSettings[[i]]$targetIds, - outcomeId = characterizationSettings[[i]]$outcomeIds - ) - result$minPriorObservation = characterizationSettings[[i]]$minPriorObservation - result$outcomeWashoutDays = characterizationSettings[[i]]$outcomeWashoutDays - - result$riskWindowStart = characterizationSettings[[i]]$riskWindowStart - result$startAnchor = characterizationSettings[[i]]$startAnchor - result$riskWindowEnd = characterizationSettings[[i]]$riskWindowEnd - result$endAnchor = characterizationSettings[[i]]$endAnchor - - result$casePreTargetDuration = characterizationSettings[[i]]$casePreTargetDuration - result$casePostOutcomeDuration = characterizationSettings[[i]]$casePostOutcomeDuration - - result$covariateSettingsJson = as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$covariateSettings)) - result$caseCovariateSettingsJson = as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$caseCovariateSettings)) - return(result) - } - ) + caseCombinations <- do.call( + what = "rbind", + args = + lapply( + 1:length(characterizationSettings), + function(i) { + result <- expand.grid( + targetId = characterizationSettings[[i]]$targetIds, + outcomeId = characterizationSettings[[i]]$outcomeIds + ) + result$minPriorObservation <- characterizationSettings[[i]]$minPriorObservation + result$outcomeWashoutDays <- characterizationSettings[[i]]$outcomeWashoutDays + + result$riskWindowStart <- characterizationSettings[[i]]$riskWindowStart + result$startAnchor <- characterizationSettings[[i]]$startAnchor + result$riskWindowEnd <- characterizationSettings[[i]]$riskWindowEnd + result$endAnchor <- characterizationSettings[[i]]$endAnchor + + result$casePreTargetDuration <- characterizationSettings[[i]]$casePreTargetDuration + result$casePostOutcomeDuration <- characterizationSettings[[i]]$casePostOutcomeDuration + + result$covariateSettingsJson <- as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$covariateSettings)) + result$caseCovariateSettingsJson <- as.character(ParallelLogger::convertSettingsToJson(characterizationSettings[[i]]$caseCovariateSettings)) + return(result) + } + ) ) # create executionIds - settingCols <- c('minPriorObservation', 'outcomeWashoutDays', - 'casePreTargetDuration', 'casePostOutcomeDuration', - 'riskWindowStart', 'startAnchor', - 'riskWindowEnd', 'endAnchor') - executionSettings <- unique(caseCombinations[,settingCols]) + settingCols <- c( + "minPriorObservation", "outcomeWashoutDays", + "casePreTargetDuration", "casePostOutcomeDuration", + "riskWindowStart", "startAnchor", + "riskWindowEnd", "endAnchor" + ) + executionSettings <- unique(caseCombinations[, settingCols]) executionSettings$settingId <- createExecutionIds(nrow(executionSettings)) caseCombinations <- merge(caseCombinations, executionSettings, by = settingCols) # create thread split threadCombinations <- caseCombinations %>% - dplyr::select("targetId", - "minPriorObservation", - "outcomeWashoutDays", - "casePreTargetDuration", - "casePostOutcomeDuration" + dplyr::select( + "targetId", + "minPriorObservation", + "outcomeWashoutDays", + "casePreTargetDuration", + "casePostOutcomeDuration" ) %>% dplyr::distinct() - threadCombinations$thread <- rep(1:threads, ceiling(nrow(threadCombinations)/threads))[1:nrow(threadCombinations)] - caseCombinations <- merge(caseCombinations, threadCombinations, by = c("targetId", - "minPriorObservation", - "outcomeWashoutDays", - "casePreTargetDuration", - "casePostOutcomeDuration" + threadCombinations$thread <- rep(1:threads, ceiling(nrow(threadCombinations) / threads))[1:nrow(threadCombinations)] + caseCombinations <- merge(caseCombinations, threadCombinations, by = c( + "targetId", + "minPriorObservation", + "outcomeWashoutDays", + "casePreTargetDuration", + "casePostOutcomeDuration" )) - executionCols <- c('minPriorObservation', 'outcomeWashoutDays', - 'casePreTargetDuration', 'casePostOutcomeDuration') - executions <- unique(caseCombinations[,executionCols]) + executionCols <- c( + "minPriorObservation", "outcomeWashoutDays", + "casePreTargetDuration", "casePostOutcomeDuration" + ) + executions <- unique(caseCombinations[, executionCols]) # now create the settings - for(j in 1:nrow(executions)){ - settingVal <- executions[j,] + for (j in 1:nrow(executions)) { + settingVal <- executions[j, ] restrictedData <- caseCombinations %>% dplyr::inner_join(settingVal, by = executionCols) - for(i in unique(restrictedData$thread)){ + for (i in unique(restrictedData$thread)) { ind <- restrictedData$thread == i - settings <- rbind(settings, - data.frame( - functionName = 'computeCaseAggregateCovariateAnalyses', - settings = as.character(ParallelLogger::convertSettingsToJson( - list( - targetIds = unique(restrictedData$targetId[ind]), - outcomeIds = unique(restrictedData$outcomeId[ind]), - minPriorObservation = unique(restrictedData$minPriorObservation[ind]), - outcomeWashoutDays = unique(restrictedData$outcomeWashoutDays[ind]), - tar = unique(data.frame( - riskWindowStart = restrictedData$riskWindowStart[ind], - startAnchor = restrictedData$startAnchor[ind], - riskWindowEnd = restrictedData$riskWindowEnd[ind], - endAnchor = restrictedData$endAnchor[ind] - )), - casePreTargetDuration = unique(restrictedData$casePreTargetDuration[ind]), - casePostOutcomeDuration = unique(restrictedData$casePostOutcomeDuration[ind]), - covariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$covariateSettingsJson[ind])), - caseCovariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$caseCovariateSettingsJson[ind])), - settingIds = unique(restrictedData$settingId[ind]) - ) - )), - executionFolder = paste('cac', i, paste0(settingVal, collapse = '_'), sep = '_'), - jobId = paste('cac', i, paste0(settingVal, collapse = '_'), sep = '_') - ) + settings <- rbind( + settings, + data.frame( + functionName = "computeCaseAggregateCovariateAnalyses", + settings = as.character(ParallelLogger::convertSettingsToJson( + list( + targetIds = unique(restrictedData$targetId[ind]), + outcomeIds = unique(restrictedData$outcomeId[ind]), + minPriorObservation = unique(restrictedData$minPriorObservation[ind]), + outcomeWashoutDays = unique(restrictedData$outcomeWashoutDays[ind]), + tar = unique(data.frame( + riskWindowStart = restrictedData$riskWindowStart[ind], + startAnchor = restrictedData$startAnchor[ind], + riskWindowEnd = restrictedData$riskWindowEnd[ind], + endAnchor = restrictedData$endAnchor[ind] + )), + casePreTargetDuration = unique(restrictedData$casePreTargetDuration[ind]), + casePostOutcomeDuration = unique(restrictedData$casePostOutcomeDuration[ind]), + covariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$covariateSettingsJson[ind])), + caseCovariateSettingsJson = combineCovariateSettingsJsons(as.list(restrictedData$caseCovariateSettingsJson[ind])), + settingIds = unique(restrictedData$settingId[ind]) + ) + )), + executionFolder = paste("cac", i, paste0(settingVal, collapse = "_"), sep = "_"), + jobId = paste("cac", i, paste0(settingVal, collapse = "_"), sep = "_") + ) ) } } diff --git a/R/CustomCovariates.R b/R/CustomCovariates.R index dddc434..df9b8e4 100644 --- a/R/CustomCovariates.R +++ b/R/CustomCovariates.R @@ -52,10 +52,10 @@ #' #' @examples #' settings <- createDuringCovariateSettings( -#' useConditionOccurrenceDuring = TRUE, -#' useConditionOccurrencePrimaryInpatientDuring = FALSE, -#' useConditionEraDuring = FALSE, -#' useConditionGroupEraDuring = FALSE +#' useConditionOccurrenceDuring = TRUE, +#' useConditionOccurrencePrimaryInpatientDuring = FALSE, +#' useConditionEraDuring = FALSE, +#' useConditionGroupEraDuring = FALSE #' ) #' #' @export @@ -64,7 +64,6 @@ createDuringCovariateSettings <- function( useConditionOccurrencePrimaryInpatientDuring = F, useConditionEraDuring = F, useConditionGroupEraDuring = F, - useDrugExposureDuring = F, useDrugEraDuring = F, useDrugGroupEraDuring = F, @@ -74,14 +73,11 @@ createDuringCovariateSettings <- function( useObservationDuring = F, useVisitCountDuring = F, useVisitConceptCountDuring = F, - includedCovariateConceptIds = c(), addDescendantsToInclude = F, excludedCovariateConceptIds = c(), addDescendantsToExclude = F, - includedCovariateIds = c() -){ - + includedCovariateIds = c()) { covariateSettings <- list( temporal = FALSE, # needed? temporalSequence = FALSE @@ -143,14 +139,13 @@ getDbDuringCovariateData <- function( cohortIds = c(-1), covariateSettings, minCharacterizationMean = 0, - ... -) { + ...) { writeLines("Constructing during cohort covariates") if (!aggregated) { stop("Only aggregation supported") } - getDomainSettings <- utils::read.csv(system.file('csv/PrespecAnalyses.csv', package = 'Characterization')) + getDomainSettings <- utils::read.csv(system.file("csv/PrespecAnalyses.csv", package = "Characterization")) # create Tables @@ -166,10 +161,10 @@ getDbDuringCovariateData <- function( sql <- SqlRender::translate( sql = sql, targetDialect = DatabaseConnector::dbms(connection) - ) + ) DatabaseConnector::executeSql(connection, sql = sql) - sql <- "DROP TABLE IF EXISTS #analysis_ref; + sql <- "DROP TABLE IF EXISTS #analysis_ref; CREATE TABLE #analysis_ref( analysis_id int, analysis_name varchar(100), @@ -183,16 +178,16 @@ getDbDuringCovariateData <- function( sql = sql, targetDialect = DatabaseConnector::dbms(connection) ) - DatabaseConnector::executeSql(connection,sql) + DatabaseConnector::executeSql(connection, sql) # included covariates - includedCovTable <- '' - if(length(covariateSettings$includedCovariateIds) > 0 ){ + includedCovTable <- "" + if (length(covariateSettings$includedCovariateIds) > 0) { # create- - includedCovTable <- '#included_cov' + includedCovTable <- "#included_cov" DatabaseConnector::insertTable( connection = connection, - tableName = includedCovTable, + tableName = includedCovTable, dropTableIfExists = T, createTable = T, tempTable = T, @@ -202,23 +197,23 @@ getDbDuringCovariateData <- function( } # including concept ids - includedConceptTable <- '' - if(length(covariateSettings$includedCovariateConceptIds) > 0 ){ - includedConceptTable <- '#include_concepts' + includedConceptTable <- "" + if (length(covariateSettings$includedCovariateConceptIds) > 0) { + includedConceptTable <- "#include_concepts" DatabaseConnector::insertTable( connection = connection, - tableName = includedConceptTable, + tableName = includedConceptTable, dropTableIfExists = T, createTable = T, tempTable = T, data = data.frame(id = covariateSettings$includedCovariateConceptIds), camelCaseToSnakeCase = T - ) + ) - if(covariateSettings$addDescendantsToInclude){ + if (covariateSettings$addDescendantsToInclude) { SqlRender::loadRenderTranslateSql( - sqlFilename = 'IncludeDescendants.sql', - packageName = 'Characterization', + sqlFilename = "IncludeDescendants.sql", + packageName = "Characterization", dbms = DatabaseConnector::dbms(connection), table_name = includedConceptTable, cdm_database_schema = cdmDatabaseSchema @@ -227,12 +222,12 @@ getDbDuringCovariateData <- function( } # exlcuding concept ids - excludedConceptTable <- '' - if(length(covariateSettings$excludedCovariateConceptIds) > 0 ){ - excludedConceptTable <- '#exclude_concepts' + excludedConceptTable <- "" + if (length(covariateSettings$excludedCovariateConceptIds) > 0) { + excludedConceptTable <- "#exclude_concepts" DatabaseConnector::insertTable( connection = connection, - tableName = excludedConceptTable, + tableName = excludedConceptTable, dropTableIfExists = T, createTable = T, tempTable = T, @@ -240,10 +235,10 @@ getDbDuringCovariateData <- function( camelCaseToSnakeCase = T ) - if(covariateSettings$addDescendantsToInclude){ + if (covariateSettings$addDescendantsToInclude) { SqlRender::loadRenderTranslateSql( - sqlFilename = 'IncludeDescendants.sql', - packageName = 'Characterization', + sqlFilename = "IncludeDescendants.sql", + packageName = "Characterization", dbms = DatabaseConnector::dbms(connection), table_name = excludedConceptTable, cdm_database_schema = cdmDatabaseSchema @@ -259,23 +254,23 @@ getDbDuringCovariateData <- function( useContinuous <- F result <- Andromeda::andromeda() - for(domainSettingsIndex in domainSettingsIndexes){ + for (domainSettingsIndex in domainSettingsIndexes) { i <- i + 1 - if(getDomainSettings$isBinary[domainSettingsIndex] == 'Y'){ + if (getDomainSettings$isBinary[domainSettingsIndex] == "Y") { binaryInd <- c(i, binaryInd) useBinary <- T - } else{ + } else { continuousInd <- c(i, continuousInd) useContinuous <- T } # Load template sql and fill sql <- SqlRender::loadRenderTranslateSql( sqlFilename = getDomainSettings$sqlFileName[domainSettingsIndex], - packageName = 'Characterization', - dbms = attr(connection, "dbms"), + packageName = "Characterization", + dbms = attr(connection, "dbms"), cohort_table = cohortTable, - #cohort_ids = cohortIds, + # cohort_ids = cohortIds, cohort_definition_id = cohortIds, # added? row_id_field = rowIdField, cdm_database_schema = cdmDatabaseSchema, @@ -291,62 +286,60 @@ getDbDuringCovariateData <- function( included_cov_table = includedCovTable, included_concept_table = includedConceptTable, excluded_concept_table = excludedConceptTable, - covariate_table = paste0('#cov_',i) + covariate_table = paste0("#cov_", i) ) - message(paste0('Executing during sql code for ', getDomainSettings$analysisName[domainSettingsIndex])) + message(paste0("Executing during sql code for ", getDomainSettings$analysisName[domainSettingsIndex])) start <- Sys.time() DatabaseConnector::executeSql( connection = connection, sql = sql, progressBar = T - ) + ) time <- Sys.time() - start - message(paste0('Execution took ', round(time, digits = 2), ' ', units(time))) - + message(paste0("Execution took ", round(time, digits = 2), " ", units(time))) } # all_covariates.cohort_definition_id,\n all_covariates.covariate_id,\n all_covariates.sum_value,\n CAST(all_covariates.sum_value / (1.0 * total.total_count) AS FLOAT) AS average_value - message(paste0('Extracting covariates')) + message(paste0("Extracting covariates")) start <- Sys.time() # Retrieve the covariate: - if(useBinary){ - + if (useBinary) { sql <- paste0( - 'select temp.*, CAST(temp.sum_value / (1.0 * total.total_count) AS FLOAT) AS average_value from (', - paste0(paste0('select cohort_definition_id, covariate_id, sum_value from #cov_', binaryInd), collapse = ' union '), - ') temp inner join + "select temp.*, CAST(temp.sum_value / (1.0 * total.total_count) AS FLOAT) AS average_value from (", + paste0(paste0("select cohort_definition_id, covariate_id, sum_value from #cov_", binaryInd), collapse = " union "), + ") temp inner join (SELECT cohort_definition_id, COUNT(*) AS total_count FROM @cohort_table {@cohort_definition_id != -1} ? {\nWHERE cohort_definition_id IN (@cohort_definition_id)} GROUP BY cohort_definition_id ) total - on temp.cohort_definition_id = total.cohort_definition_id;' + on temp.cohort_definition_id = total.cohort_definition_id;" ) sql <- SqlRender::render( sql = sql, cohort_table = cohortTable, - cohort_definition_id = paste0(c(-1), collapse = ',') - ) + cohort_definition_id = paste0(c(-1), collapse = ",") + ) sql <- SqlRender::translate( sql = sql, targetDialect = DatabaseConnector::dbms(connection) - ) + ) DatabaseConnector::querySqlToAndromeda( connection = connection, sql = sql, andromeda = result, - andromedaTableName = 'covariates', + andromedaTableName = "covariates", appendToTable = F, snakeCaseToCamelCase = TRUE ) - if(minCharacterizationMean != 0 && "averageValue" %in% colnames(result$covariates)){ + if (minCharacterizationMean != 0 && "averageValue" %in% colnames(result$covariates)) { result$covariates <- result$covariates %>% dplyr::filter(.data$averageValue >= minCharacterizationMean) } } - if(useContinuous){ - sql <- paste0(paste0('select * from #cov_', continuousInd), collapse = ' union ') + if (useContinuous) { + sql <- paste0(paste0("select * from #cov_", continuousInd), collapse = " union ") sql <- SqlRender::translate( sql = sql, targetDialect = DatabaseConnector::dbms(connection) @@ -355,7 +348,7 @@ getDbDuringCovariateData <- function( connection = connection, sql = sql, andromeda = result, - andromedaTableName = 'covariatesContinuous', + andromedaTableName = "covariatesContinuous", appendToTable = F, snakeCaseToCamelCase = TRUE ) @@ -364,11 +357,11 @@ getDbDuringCovariateData <- function( DatabaseConnector::querySqlToAndromeda( connection = connection, sql = SqlRender::translate( - sql = 'select * from #cov_ref;', + sql = "select * from #cov_ref;", targetDialect = DatabaseConnector::dbms(connection) ), andromeda = result, - andromedaTableName = 'covariateRef', + andromedaTableName = "covariateRef", appendToTable = F, snakeCaseToCamelCase = TRUE ) @@ -377,26 +370,26 @@ getDbDuringCovariateData <- function( DatabaseConnector::querySqlToAndromeda( connection = connection, sql = SqlRender::translate( - sql = 'select * from #analysis_ref;', + sql = "select * from #analysis_ref;", targetDialect = DatabaseConnector::dbms(connection) ), andromeda = result, - andromedaTableName = 'analysisRef', + andromedaTableName = "analysisRef", appendToTable = F, snakeCaseToCamelCase = TRUE ) time <- Sys.time() - start - message(paste0('Extracting covariates took ', round(time, digits = 2), ' ', units(time))) + message(paste0("Extracting covariates took ", round(time, digits = 2), " ", units(time))) # clean up: drop tables... if (length(c(binaryInd, continuousInd)) != 0) { - message(paste0('Removing temp covariate tables')) + message(paste0("Removing temp covariate tables")) for (i in c(binaryInd, continuousInd)) { sql <- "TRUNCATE TABLE #cov_@id;\nDROP TABLE #cov_@id;\n" sql <- SqlRender::render( sql, id = i - ) + ) sql <- SqlRender::translate( sql = sql, targetDialect = attr(connection, "dbms"), diff --git a/R/Database.R b/R/Database.R index 62b3daf..1fbe141 100644 --- a/R/Database.R +++ b/R/Database.R @@ -72,11 +72,11 @@ insertResultsToDatabase <- function( connectionDetails, schema, resultsFolder, - tablePrefix = '', - csvTablePrefix = 'c_' - ){ - specLoc <- system.file('settings', 'resultsDataModelSpecification.csv', - package = 'Characterization') + tablePrefix = "", + csvTablePrefix = "c_") { + specLoc <- system.file("settings", "resultsDataModelSpecification.csv", + package = "Characterization" + ) specs <- utils::read.csv(specLoc) colnames(specs) <- SqlRender::snakeCaseToCamelCase(colnames(specs)) specs$tableName <- paste0(csvTablePrefix, specs$tableName) @@ -148,8 +148,7 @@ createCharacterizationTables <- function( deleteExistingTables = T, createTables = T, tablePrefix = "c_", - tempEmulationSchema = getOption("sqlRenderTempEmulationSchema") - ) { + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")) { errorMessages <- checkmate::makeAssertCollection() .checkTablePrefix( tablePrefix = tablePrefix, @@ -170,8 +169,8 @@ createCharacterizationTables <- function( tables <- paste0(tablePrefix, tables) # adding this to not create tables if all tables esist - if(sum(tables %in% alltables) == length(tables) & !deleteExistingTables){ - message('All tables exist so no need to recreate') + if (sum(tables %in% alltables) == length(tables) & !deleteExistingTables) { + message("All tables exist so no need to recreate") createTables <- FALSE } @@ -247,16 +246,16 @@ migrateDataModel <- function(connectionDetails, databaseSchema, tablePrefix = "" connectionDetails = connectionDetails, databaseSchema = databaseSchema, tablePrefix = tablePrefix - ) + ) migrator$executeMigrations() migrator$finalize() ParallelLogger::logInfo("Updating version number") updateVersionSql <- SqlRender::loadRenderTranslateSql("UpdateVersionNumber.sql", - packageName = utils::packageName(), - database_schema = databaseSchema, - table_prefix = tablePrefix, - dbms = connectionDetails$dbms + packageName = utils::packageName(), + database_schema = databaseSchema, + table_prefix = tablePrefix, + dbms = connectionDetails$dbms ) connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) @@ -287,7 +286,7 @@ getResultTables <- function() { ), show_col_types = FALSE )$table_name, - 'migration', 'package_version' + "migration", "package_version" ) ) ) diff --git a/R/DechallengeRechallenge.R b/R/DechallengeRechallenge.R index 949ebf5..985fdef 100644 --- a/R/DechallengeRechallenge.R +++ b/R/DechallengeRechallenge.R @@ -104,10 +104,9 @@ computeDechallengeRechallengeAnalyses <- function( tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), settings, databaseId = "database 1", - outputFolder = file.path(getwd(),'results'), + outputFolder = file.path(getwd(), "results"), minCellCount = 0, - ... - ) { + ...) { # check inputs errorMessages <- checkmate::makeAssertCollection() .checkConnectionDetails(connectionDetails, errorMessages) @@ -210,7 +209,7 @@ computeDechallengeRechallengeAnalyses <- function( ) # export results to csv - message('exporting to csv file') + message("exporting to csv file") exportDechallengeRechallengeToCsv( result = result, saveDirectory = outputFolder, @@ -248,10 +247,9 @@ computeRechallengeFailCaseSeriesAnalyses <- function( settings, databaseId = "database 1", showSubjectId = F, - outputFolder = file.path(getwd(),'results'), + outputFolder = file.path(getwd(), "results"), minCellCount = 0, - ... - ){ + ...) { # check inputs errorMessages <- checkmate::makeAssertCollection() .checkConnectionDetails(connectionDetails, errorMessages) @@ -353,7 +351,7 @@ computeRechallengeFailCaseSeriesAnalyses <- function( ) # add the csv export here - message('exporting to csv file') + message("exporting to csv file") exportRechallengeFailCaseSeriesToCsv( result = result, saveDirectory = outputFolder @@ -365,35 +363,42 @@ computeRechallengeFailCaseSeriesAnalyses <- function( getDechallengeRechallengeJobs <- function( characterizationSettings, - threads -){ - + threads) { characterizationSettings <- characterizationSettings$dechallengeRechallengeSettings - if(length(characterizationSettings) == 0){ + if (length(characterizationSettings) == 0) { return(NULL) } ind <- 1:length(characterizationSettings) - targetIds <- lapply(ind, function(i){characterizationSettings[[i]]$targetCohortDefinitionIds}) - outcomeIds <- lapply(ind, function(i){characterizationSettings[[i]]$outcomeCohortDefinitionIds}) - dechallengeStopIntervals <- lapply(ind, function(i){characterizationSettings[[i]]$dechallengeStopInterval}) - dechallengeEvaluationWindows <- lapply(ind, function(i){characterizationSettings[[i]]$dechallengeEvaluationWindow}) + targetIds <- lapply(ind, function(i) { + characterizationSettings[[i]]$targetCohortDefinitionIds + }) + outcomeIds <- lapply(ind, function(i) { + characterizationSettings[[i]]$outcomeCohortDefinitionIds + }) + dechallengeStopIntervals <- lapply(ind, function(i) { + characterizationSettings[[i]]$dechallengeStopInterval + }) + dechallengeEvaluationWindows <- lapply(ind, function(i) { + characterizationSettings[[i]]$dechallengeEvaluationWindow + }) # get all combinations of TnOs, then split by treads - combinations <- do.call(what = 'rbind', - args = - lapply( - 1:length(targetIds), - function(i){ - result <- expand.grid( - targetId = targetIds[[i]], - outcomeId = outcomeIds[[i]] - ) - result$dechallengeStopInterval <- dechallengeStopIntervals[[i]] - result$dechallengeEvaluationWindow <- dechallengeEvaluationWindows[[i]] - return(result) - } - ) + combinations <- do.call( + what = "rbind", + args = + lapply( + 1:length(targetIds), + function(i) { + result <- expand.grid( + targetId = targetIds[[i]], + outcomeId = outcomeIds[[i]] + ) + result$dechallengeStopInterval <- dechallengeStopIntervals[[i]] + result$dechallengeEvaluationWindow <- dechallengeEvaluationWindows[[i]] + return(result) + } + ) ) # find out whether more Ts or more Os tcount <- nrow( @@ -414,35 +419,35 @@ getDechallengeRechallengeJobs <- function( ) ) - if(threads > max(tcount, ocount)){ - message('Tnput parameter threads greater than number of targets and outcomes') - message(paste0('Only using ', max(tcount, ocount) ,' threads for TimeToEvent')) + if (threads > max(tcount, ocount)) { + message("Tnput parameter threads greater than number of targets and outcomes") + message(paste0("Only using ", max(tcount, ocount), " threads for TimeToEvent")) } - if(tcount >= ocount){ + if (tcount >= ocount) { threadDf <- combinations %>% dplyr::count( .data$targetId, .data$dechallengeStopInterval, .data$dechallengeEvaluationWindow ) - threadDf$thread = rep(1:threads, ceiling(tcount/threads))[1:tcount] - mergeColumn <- c('targetId','dechallengeStopInterval', 'dechallengeEvaluationWindow') - } else{ + threadDf$thread <- rep(1:threads, ceiling(tcount / threads))[1:tcount] + mergeColumn <- c("targetId", "dechallengeStopInterval", "dechallengeEvaluationWindow") + } else { threadDf <- combinations %>% dplyr::count( .data$outcomeId, .data$dechallengeStopInterval, .data$dechallengeEvaluationWindow ) - threadDf$thread = rep(1:threads, ceiling(ocount/threads))[1:ocount] - mergeColumn <- c('outcomeId','dechallengeStopInterval', 'dechallengeEvaluationWindow') + threadDf$thread <- rep(1:threads, ceiling(ocount / threads))[1:ocount] + mergeColumn <- c("outcomeId", "dechallengeStopInterval", "dechallengeEvaluationWindow") } combinations <- merge(combinations, threadDf, by = mergeColumn) sets <- lapply( X = 1:max(threadDf$thread), - FUN = function(i){ + FUN = function(i) { createDechallengeRechallengeSettings( targetIds = unique(combinations$targetId[combinations$thread == i]), outcomeIds = unique(combinations$outcomeId[combinations$thread == i]), @@ -454,28 +459,29 @@ getDechallengeRechallengeJobs <- function( # recreate settings settings <- c() - for(i in 1:length(sets)){ - settings <- rbind(settings, - data.frame( - functionName = 'computeDechallengeRechallengeAnalyses', - settings = as.character(ParallelLogger::convertSettingsToJson( - sets[[i]] - )), - executionFolder = paste0('dr_', i), - jobId = paste0('dr_', i) - ) + for (i in 1:length(sets)) { + settings <- rbind( + settings, + data.frame( + functionName = "computeDechallengeRechallengeAnalyses", + settings = as.character(ParallelLogger::convertSettingsToJson( + sets[[i]] + )), + executionFolder = paste0("dr_", i), + jobId = paste0("dr_", i) + ) ) - settings <- rbind(settings, - data.frame( - functionName = 'computeRechallengeFailCaseSeriesAnalyses', - settings = as.character(ParallelLogger::convertSettingsToJson( - sets[[i]] - )), - executionFolder = paste0('rfcs_', i), - jobId = paste0('rfcs_', i) - ) + settings <- rbind( + settings, + data.frame( + functionName = "computeRechallengeFailCaseSeriesAnalyses", + settings = as.character(ParallelLogger::convertSettingsToJson( + sets[[i]] + )), + executionFolder = paste0("rfcs_", i), + jobId = paste0("rfcs_", i) + ) ) - } return(settings) diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index c784a97..0432995 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -212,7 +212,7 @@ .checkCovariateSettings <- function(covariateSettings, errorMessages) { - if (inherits(covariateSettings,"covariateSettings")) { + if (inherits(covariateSettings, "covariateSettings")) { checkmate::assertClass( x = covariateSettings, classes = "covariateSettings", @@ -244,12 +244,12 @@ checkNoCsv <- function( csvFiles, - errorMessage - ){ - - csvExists <- sapply(csvFiles, function(x){file.exists(x)}) + errorMessage) { + csvExists <- sapply(csvFiles, function(x) { + file.exists(x) + }) - if(sum(csvExists) > 0){ + if (sum(csvExists) > 0) { stop(errorMessage) } @@ -258,7 +258,6 @@ checkNoCsv <- function( cleanCsv <- function( resultFolder, - fileName = 'time_to_event.csv' - ){ - file.remove(file.path(resultFolder, fileName )) + fileName = "time_to_event.csv") { + file.remove(file.path(resultFolder, fileName)) } diff --git a/R/Incremental.R b/R/Incremental.R index 5598367..f05d9d2 100644 --- a/R/Incremental.R +++ b/R/Incremental.R @@ -1,13 +1,11 @@ createIncrementalLog <- function( executionFolder, - logname = 'execution.csv' - ){ - - if(!dir.exists(executionFolder)){ + logname = "execution.csv") { + if (!dir.exists(executionFolder)) { dir.create(executionFolder, recursive = T) } - if(!file.exists(file.path(executionFolder, logname))){ + if (!file.exists(file.path(executionFolder, logname))) { x <- data.frame( run_date_time = Sys.time(), job_id = 0, @@ -16,23 +14,22 @@ createIncrementalLog <- function( ) readr::write_csv( x = x, - file = file.path(executionFolder, logname) - ) + file = file.path(executionFolder, logname) + ) } - } -loadIncrementalFiles <- function(executionFolder){ - if(file.exists(file.path(executionFolder, 'execution.csv'))){ - executed <- utils::read.csv(file.path(executionFolder, 'execution.csv')) - } else{ - stop('execution.csv missing') +loadIncrementalFiles <- function(executionFolder) { + if (file.exists(file.path(executionFolder, "execution.csv"))) { + executed <- utils::read.csv(file.path(executionFolder, "execution.csv")) + } else { + stop("execution.csv missing") } - if(file.exists(file.path(executionFolder, 'completed.csv'))){ - completed <- utils::read.csv(file.path(executionFolder, 'completed.csv')) - } else{ - stop('completed.csv missing') + if (file.exists(file.path(executionFolder, "completed.csv"))) { + completed <- utils::read.csv(file.path(executionFolder, "completed.csv")) + } else { + stop("completed.csv missing") } return(list( executed = executed, @@ -42,9 +39,7 @@ loadIncrementalFiles <- function(executionFolder){ getExecutionJobIssues <- function( executed, - completed -){ - + completed) { executedJobs <- unique(executed$job_id) completedJobs <- unique(completed$job_id) @@ -62,9 +57,7 @@ getExecutionJobIssues <- function( #' #' @export cleanIncremental <- function( - executionFolder -){ - + executionFolder) { incrementalFiles <- loadIncrementalFiles( executionFolder ) @@ -74,12 +67,12 @@ cleanIncremental <- function( completed = incrementalFiles$completed ) - if(length(issues) > 0 ){ + if (length(issues) > 0) { # delete contents inside folder - for(i in 1:length(issues)){ - files <- dir(file.path(executionFolder,issues[i]), full.names = T) - for(file in files){ - message(paste0('Deleting incomplete result file ', file)) + for (i in 1:length(issues)) { + files <- dir(file.path(executionFolder, issues[i]), full.names = T) + for (file in files) { + message(paste0("Deleting incomplete result file ", file)) file.remove(file) } } @@ -87,21 +80,19 @@ cleanIncremental <- function( # now update the execution to remove the issue rows executionFile <- utils::read.csv( - file = file.path(executionFolder, 'execution.csv') + file = file.path(executionFolder, "execution.csv") ) - fixedExecution <- executionFile[!executionFile$job_id %in% issues,] + fixedExecution <- executionFile[!executionFile$job_id %in% issues, ] utils::write.csv( x = fixedExecution, - file = file.path(executionFolder, 'execution.csv') - ) + file = file.path(executionFolder, "execution.csv") + ) -return(invisible(NULL)) + return(invisible(NULL)) } checkResultFilesIncremental <- function( - executionFolder - ){ - + executionFolder) { incrementalFiles <- loadIncrementalFiles( executionFolder ) @@ -111,14 +102,14 @@ checkResultFilesIncremental <- function( completed = incrementalFiles$completed ) - if(length(issues) > 0 ){ - stop(paste0('jobIds: ', paste0(issues, collapse = ','), 'executed but not completed. Please run cleanIncremental() to remove incomplete results.')) - } + if (length(issues) > 0) { + stop(paste0("jobIds: ", paste0(issues, collapse = ","), "executed but not completed. Please run cleanIncremental() to remove incomplete results.")) + } return(invisible(NULL)) } -findCompletedJobs <- function(executionFolder){ +findCompletedJobs <- function(executionFolder) { incrementalFiles <- loadIncrementalFiles(executionFolder) return(unique(incrementalFiles$completed$job_id)) } @@ -130,9 +121,8 @@ recordIncremental <- function( jobId, startTime, endTime, - logname = 'execution.csv' -){ - if(file.exists(file.path(executionFolder, logname))){ + logname = "execution.csv") { + if (file.exists(file.path(executionFolder, logname))) { x <- data.frame( run_date_time = runDateTime, job_id = jobId, @@ -141,13 +131,12 @@ recordIncremental <- function( ) readr::write_csv( x = x, - file = file.path(executionFolder, logname), + file = file.path(executionFolder, logname), append = T ) - } else{ - warning(paste0(logname, ' file missing so no logging possible')) + } else { + warning(paste0(logname, " file missing so no logging possible")) } - } #' Removes csv files from the execution folder as there should be no csv files @@ -160,35 +149,35 @@ recordIncremental <- function( #' #' @export cleanNonIncremental <- function( - executionFolder - ){ + executionFolder) { # remove all files from the executionFolder files <- dir( path = executionFolder, recursive = T, full.names = T, - pattern = '.csv' - ) - if(length(files) > 0 ){ - for(file in files){ - message(paste0('Deleting file ', file)) + pattern = ".csv" + ) + if (length(files) > 0) { + for (file in files) { + message(paste0("Deleting file ", file)) file.remove(file) } } } checkResultFilesNonIncremental <- function( - executionFolder -){ + executionFolder) { files <- dir( path = executionFolder, recursive = T, full.names = T, - pattern = '.csv' + pattern = ".csv" ) - if(length(files) > 0 ){ - errorMessage <- paste0('Running in non-incremental but csv files exist in execution folder.', - ' please delete manually or using cleanNonIncremental()') + if (length(files) > 0) { + errorMessage <- paste0( + "Running in non-incremental but csv files exist in execution folder.", + " please delete manually or using cleanNonIncremental()" + ) stop(errorMessage) } diff --git a/R/RunCharacterization.R b/R/RunCharacterization.R index 28fbf3a..4d8a0fa 100644 --- a/R/RunCharacterization.R +++ b/R/RunCharacterization.R @@ -16,8 +16,7 @@ createCharacterizationSettings <- function( timeToEventSettings = NULL, dechallengeRechallengeSettings = NULL, - aggregateCovariateSettings = NULL -) { + aggregateCovariateSettings = NULL) { errorMessages <- checkmate::makeAssertCollection() .checkTimeToEventSettingsList( settings = timeToEventSettings, @@ -141,15 +140,14 @@ runCharacterizationAnalyses <- function( cdmDatabaseSchema, characterizationSettings, outputDirectory, - executionPath = file.path(outputDirectory, 'execution'), + executionPath = file.path(outputDirectory, "execution"), csvFilePrefix = "c_", databaseId = "1", showSubjectId = F, minCellCount = 0, incremental = T, threads = 1, - minCharacterizationMean = 0.01 -) { + minCharacterizationMean = 0.01) { # inputs checks errorMessages <- checkmate::makeAssertCollection() .checkCharacterizationSettings( @@ -171,7 +169,7 @@ runCharacterizationAnalyses <- function( logger <- createLogger( logPath = file.path(executionPath), - logName = 'log.txt' + logName = "log.txt" ) ParallelLogger::registerLogger(logger) on.exit(ParallelLogger::unregisterLogger(logger)) @@ -179,60 +177,59 @@ runCharacterizationAnalyses <- function( jobs <- createJobs( characterizationSettings = characterizationSettings, threads = threads - ) + ) # save settings - if(!file.exists(file.path(executionPath, 'settings.rds'))){ + if (!file.exists(file.path(executionPath, "settings.rds"))) { saveRDS( object = list( characterizationSettings = characterizationSettings, threads = threads - ), - file = file.path(executionPath, 'settings.rds') + ), + file = file.path(executionPath, "settings.rds") ) } - if(incremental){ + if (incremental) { # check for any issues with current incremental oldSettings <- readRDS( - file = file.path(executionPath, 'settings.rds') - ) - if(!identical(characterizationSettings,oldSettings$characterizationSettings)){ - stop('Settings have changed - please turn off incremental') + file = file.path(executionPath, "settings.rds") + ) + if (!identical(characterizationSettings, oldSettings$characterizationSettings)) { + stop("Settings have changed - please turn off incremental") } - if(!identical(threads,oldSettings$threads)){ - stop('Cannot change number of threads in incremental model') + if (!identical(threads, oldSettings$threads)) { + stop("Cannot change number of threads in incremental model") } # create logs if not exists createIncrementalLog( executionFolder = executionPath, - logname = 'execution.csv' + logname = "execution.csv" ) createIncrementalLog( executionFolder = executionPath, - logname = 'completed.csv' + logname = "completed.csv" ) checkResultFilesIncremental( - executionFolder = executionPath + executionFolder = executionPath ) # remove any previously completed jobs completedJobIds <- findCompletedJobs(executionFolder = executionPath) completedJobIndex <- jobs$jobId %in% completedJobIds - if(sum(completedJobIndex) > 0){ - message(paste0('Removing ', sum(completedJobIndex), ' previously completed jobs')) - jobs <- jobs[!completedJobIndex,] + if (sum(completedJobIndex) > 0) { + message(paste0("Removing ", sum(completedJobIndex), " previously completed jobs")) + jobs <- jobs[!completedJobIndex, ] } - if(nrow(jobs) == 0){ - message('No jobs left') + if (nrow(jobs) == 0) { + message("No jobs left") return(invisible(T)) } - - } else{ + } else { # check for any csv files in folder checkResultFilesNonIncremental( executionFolder = executionPath @@ -261,17 +258,18 @@ runCharacterizationAnalyses <- function( # convert jobList to list with extra inputs jobList <- lapply( X = 1:nrow(jobs), - FUN = function(ind){ + FUN = function(ind) { inputs <- inputSettings inputs$settings <- jobs$settings[ind] inputs$functionName <- jobs$functionName[ind] - inputs$executionFolder<- jobs$executionFolder[ind] - inputs$jobId <-jobs$jobId[ind] + inputs$executionFolder <- jobs$executionFolder[ind] + inputs$jobId <- jobs$jobId[ind] inputs$runDateTime <- runDateTime return(inputs) - }) + } + ) - message('Creating new cluster') + message("Creating new cluster") cluster <- ParallelLogger::makeCluster( numberOfThreads = threads, singleThreadToMain = T, @@ -295,30 +293,29 @@ runCharacterizationAnalyses <- function( invisible(outputDirectory) } -createDirectory <- function(x){ - if(!dir.exists(x)){ - message(paste0('Creating directory ', x)) +createDirectory <- function(x) { + if (!dir.exists(x)) { + message(paste0("Creating directory ", x)) dir.create(x, recursive = T) } } -createLogger <- function(logPath, logName){ +createLogger <- function(logPath, logName) { createDirectory(logPath) ParallelLogger::createLogger( - name = 'Characterization', + name = "Characterization", threshold = "INFO", appenders = list( ParallelLogger::createFileAppender( - fileName = file.path(logPath, logName), - layout = ParallelLogger::layoutParallel, - expirationTime = 60*60*48 - ) + fileName = file.path(logPath, logName), + layout = ParallelLogger::layoutParallel, + expirationTime = 60 * 60 * 48 + ) ) ) } -runCharacterizationsInParallel <- function(x){ - +runCharacterizationsInParallel <- function(x) { startTime <- Sys.time() functionName <- x$functionName @@ -327,16 +324,16 @@ runCharacterizationsInParallel <- function(x){ inputSettings$settings <- ParallelLogger::convertJsonToSettings(inputSettings$settings) inputSettings$outputFolder <- file.path(x$executionPath, x$executionFolder) - if(x$incremental){ - recordIncremental( - executionFolder = x$executionPath, - runDateTime = x$runDateTime, - jobId = x$jobId, - startTime = startTime, - endTime = startTime, - logname = 'execution.csv' - ) - } + if (x$incremental) { + recordIncremental( + executionFolder = x$executionPath, + runDateTime = x$runDateTime, + jobId = x$jobId, + startTime = startTime, + endTime = startTime, + logname = "execution.csv" + ) + } completed <- tryCatch( { @@ -344,30 +341,31 @@ runCharacterizationsInParallel <- function(x){ what = eval(parse(text = functionName)), args = inputSettings ) - }, error = function(e){print(e); return(FALSE)} - - ) + }, + error = function(e) { + print(e) + return(FALSE) + } + ) endTime <- Sys.time() - # if it completed without issues save it - if(x$incremental & completed){ - recordIncremental( - executionFolder = x$executionPath, - runDateTime = x$runDateTime, - jobId = x$jobId, - startTime = startTime, - endTime = endTime, - logname = 'completed.csv' - ) - } + # if it completed without issues save it + if (x$incremental & completed) { + recordIncremental( + executionFolder = x$executionPath, + runDateTime = x$runDateTime, + jobId = x$jobId, + startTime = startTime, + endTime = endTime, + logname = "completed.csv" + ) + } } createJobs <- function( - characterizationSettings, - threads -){ - + characterizationSettings, + threads) { jobDf <- rbind( getTimeToEventJobs( characterizationSettings, @@ -378,17 +376,17 @@ createJobs <- function( threads ), getAggregateCovariatesJobs( - characterizationSettings, - threads + characterizationSettings, + threads ) ) - #data.frame( + # data.frame( # functionName, # settings # json, # executionFolder, # jobId - #) + # ) return(jobDf) } @@ -399,14 +397,14 @@ aggregateCsvs <- function( executionPath, outputFolder, executionFolders, # needed? - csvFilePrefix -){ - - tables <- c('cohort_details.csv', 'settings.csv','covariates.csv', - 'covariates_continuous.csv','covariate_ref.csv', - 'analysis_ref.csv','cohort_counts.csv', - 'time_to_event.csv', - 'rechallenge_fail_case_series.csv', 'dechallenge_rechallenge.csv') + csvFilePrefix) { + tables <- c( + "cohort_details.csv", "settings.csv", "covariates.csv", + "covariates_continuous.csv", "covariate_ref.csv", + "analysis_ref.csv", "cohort_counts.csv", + "time_to_event.csv", + "rechallenge_fail_case_series.csv", "dechallenge_rechallenge.csv" + ) # this makes sure results are recreated firstTracker <- data.frame( @@ -424,44 +422,42 @@ aggregateCsvs <- function( # for each folder load covariates, covariates_continuous, # covariate_ref and analysis_ref - for(folderName in folderNames){ - for(csvType in tables){ - + for (folderName in folderNames) { + for (csvType in tables) { loadPath <- file.path(executionPath, folderName, csvType) - savePath <- file.path(outputFolder, paste0(csvFilePrefix,csvType)) - if(file.exists(loadPath)){ - - #TODO do this in batches + savePath <- file.path(outputFolder, paste0(csvFilePrefix, csvType)) + if (file.exists(loadPath)) { + # TODO do this in batches data <- readr::read_csv( file = loadPath, show_col_types = F ) - if(csvType == 'analysis_ref.csv'){ + if (csvType == "analysis_ref.csv") { data <- data %>% dplyr::mutate( - unique_id = paste0(.data$setting_id, '-', .data$analysis_id) + unique_id = paste0(.data$setting_id, "-", .data$analysis_id) ) %>% dplyr::filter( # need to filter analysis_id and setting_id !.data$unique_id %in% analysisRefTracker ) %>% dplyr::select(-"unique_id") - analysisRefTracker <- unique(c(analysisRefTracker, paste0(data$setting_id,'-',data$analysis_id))) + analysisRefTracker <- unique(c(analysisRefTracker, paste0(data$setting_id, "-", data$analysis_id))) } - if(csvType == 'covariate_ref.csv'){ # this could be problematic as may have differnet covariate_ids + if (csvType == "covariate_ref.csv") { # this could be problematic as may have differnet covariate_ids data <- data %>% dplyr::mutate( - unique_id = paste0(.data$setting_id, '-', .data$covariate_id) + unique_id = paste0(.data$setting_id, "-", .data$covariate_id) ) %>% dplyr::filter( # need to filter covariate_id and setting_id !.data$unique_id %in% covariateRefTracker - )%>% + ) %>% dplyr::select(-"unique_id") - covariateRefTracker <- unique(c(covariateRefTracker, paste0(data$setting_id,'-',data$covariate_id))) + covariateRefTracker <- unique(c(covariateRefTracker, paste0(data$setting_id, "-", data$covariate_id))) } - if(csvType == 'settings.csv'){ + if (csvType == "settings.csv") { data <- data %>% dplyr::filter( !.data$setting_id %in% settingsTracker @@ -472,12 +468,11 @@ aggregateCsvs <- function( append <- file.exists(savePath) readr::write_csv( x = data, - file = savePath, quote = 'all', + file = savePath, quote = "all", append = append & !firstTracker$first[firstTracker$table == csvType] ) firstTracker$first[firstTracker$table == csvType] <- F } - } } } diff --git a/R/SaveLoad.R b/R/SaveLoad.R index e0e3bd0..61583bd 100644 --- a/R/SaveLoad.R +++ b/R/SaveLoad.R @@ -27,8 +27,7 @@ exportTimeToEventToCsv <- function( result, saveDirectory, - minCellCount = 0 - ) { + minCellCount = 0) { if (!dir.exists(saveDirectory)) { dir.create( path = saveDirectory, @@ -99,7 +98,7 @@ exportDechallengeRechallengeToCsv <- function( ) message("Writing ", countN, " rows to csv") - if(!dir.exists(saveDirectory)){ + if (!dir.exists(saveDirectory)) { dir.create(saveDirectory, recursive = T) } @@ -215,8 +214,7 @@ exportDechallengeRechallengeToCsv <- function( #' @export exportRechallengeFailCaseSeriesToCsv <- function( result, - saveDirectory - ) { + saveDirectory) { if (!dir.exists(saveDirectory)) { dir.create( path = saveDirectory, diff --git a/R/TimeToEvent.R b/R/TimeToEvent.R index b316902..18c9d9c 100644 --- a/R/TimeToEvent.R +++ b/R/TimeToEvent.R @@ -25,8 +25,7 @@ #' @export createTimeToEventSettings <- function( targetIds, - outcomeIds - ) { + outcomeIds) { # check indicationIds errorMessages <- checkmate::makeAssertCollection() # check targetIds is a vector of int/double @@ -80,10 +79,9 @@ computeTimeToEventAnalyses <- function( cdmDatabaseSchema, settings, databaseId = "database 1", - outputFolder = file.path(getwd(),'results'), + outputFolder = file.path(getwd(), "results"), minCellCount = 0, - ... - ) { + ...) { # check inputs errorMessages <- checkmate::makeAssertCollection() .checkConnectionDetails(connectionDetails, errorMessages) @@ -105,7 +103,7 @@ computeTimeToEventAnalyses <- function( ) .checkTimeToEventSettings( settings = settings, - errorMessages = errorMessages + errorMessages = errorMessages ) valid <- checkmate::reportAssertions(errorMessages) @@ -200,7 +198,7 @@ computeTimeToEventAnalyses <- function( ) # add the csv export here - message('exporting to csv file') + message("exporting to csv file") exportTimeToEventToCsv( result = result, saveDirectory = outputFolder, @@ -216,57 +214,61 @@ computeTimeToEventAnalyses <- function( # based on the number of threads getTimeToEventJobs <- function( characterizationSettings, - threads -){ - - + threads) { characterizationSettings <- characterizationSettings$timeToEventSettings - if(length(characterizationSettings) == 0){ + if (length(characterizationSettings) == 0) { return(NULL) } ind <- 1:length(characterizationSettings) - targetIds <- lapply(ind, function(i){characterizationSettings[[i]]$targetIds}) - outcomeIds <- lapply(ind, function(i){characterizationSettings[[i]]$outcomeIds}) + targetIds <- lapply(ind, function(i) { + characterizationSettings[[i]]$targetIds + }) + outcomeIds <- lapply(ind, function(i) { + characterizationSettings[[i]]$outcomeIds + }) # get all combinations of TnOs, then split by treads - tnos <- do.call(what = 'rbind', - args = - lapply( - 1:length(targetIds), - function(i){expand.grid( - targetId = targetIds[[i]], - outcomeId = outcomeIds[[i]] - )} - ) + tnos <- do.call( + what = "rbind", + args = + lapply( + 1:length(targetIds), + function(i) { + expand.grid( + targetId = targetIds[[i]], + outcomeId = outcomeIds[[i]] + ) + } + ) ) # find out whether more Ts or more Os tcount <- length(unique(tnos$targetId)) ocount <- length(unique(tnos$outcomeId)) - if(threads > max(tcount, ocount)){ - message('Tnput parameter threads greater than number of targets and outcomes') - message(paste0('Only using ', max(tcount, ocount) ,' threads for TimeToEvent')) + if (threads > max(tcount, ocount)) { + message("Tnput parameter threads greater than number of targets and outcomes") + message(paste0("Only using ", max(tcount, ocount), " threads for TimeToEvent")) } - if(tcount >= ocount){ + if (tcount >= ocount) { threadDf <- data.frame( targetId = unique(tnos$targetId), - thread = rep(1:threads, ceiling(tcount/threads))[1:tcount] + thread = rep(1:threads, ceiling(tcount / threads))[1:tcount] ) - mergeColumn <- 'targetId' - } else{ + mergeColumn <- "targetId" + } else { threadDf <- data.frame( outcomeId = unique(tnos$outcomeId), - thread = rep(1:threads, ceiling(ocount/threads))[1:ocount] + thread = rep(1:threads, ceiling(ocount / threads))[1:ocount] ) - mergeColumn <- 'outcomeId' + mergeColumn <- "outcomeId" } tnos <- merge(tnos, threadDf, by = mergeColumn) sets <- lapply( X = 1:max(threadDf$thread), - FUN = function(i){ + FUN = function(i) { createTimeToEventSettings( targetIds = unique(tnos$targetId[tnos$thread == i]), outcomeIds = unique(tnos$outcomeId[tnos$thread == i]) @@ -276,16 +278,17 @@ getTimeToEventJobs <- function( # recreate settings settings <- c() - for(i in 1:length(sets)){ - settings <- rbind(settings, - data.frame( - functionName = 'computeTimeToEventAnalyses', - settings = as.character(ParallelLogger::convertSettingsToJson( - sets[[i]] - )), - executionFolder = paste0('tte_', i), - jobId = paste0('tte_', i) - ) + for (i in 1:length(sets)) { + settings <- rbind( + settings, + data.frame( + functionName = "computeTimeToEventAnalyses", + settings = as.character(ParallelLogger::convertSettingsToJson( + sets[[i]] + )), + executionFolder = paste0("tte_", i), + jobId = paste0("tte_", i) + ) ) } diff --git a/R/ViewShiny.R b/R/ViewShiny.R index 841b755..bcee5c3 100644 --- a/R/ViewShiny.R +++ b/R/ViewShiny.R @@ -13,8 +13,7 @@ #' @export viewCharacterization <- function( resultFolder, - cohortDefinitionSet = NULL - ) { + cohortDefinitionSet = NULL) { databaseSettings <- prepareCharacterizationShiny( resultFolder = resultFolder, cohortDefinitionSet = cohortDefinitionSet @@ -26,12 +25,10 @@ viewCharacterization <- function( prepareCharacterizationShiny <- function( resultFolder, cohortDefinitionSet, - sqliteLocation = file.path(tempdir(), 'results.sqlite'), - tablePrefix = '', - csvTablePrefix = 'c_' - ) { - - if(!dir.exists(dirname(sqliteLocation))){ + sqliteLocation = file.path(tempdir(), "results.sqlite"), + tablePrefix = "", + csvTablePrefix = "c_") { + if (!dir.exists(dirname(sqliteLocation))) { dir.create(dirname(sqliteLocation), recursive = T) } @@ -55,7 +52,7 @@ prepareCharacterizationShiny <- function( # upload the results insertResultsToDatabase( connectionDetails = connectionDetails, - schema = 'main', + schema = "main", resultsFolder = resultFolder, tablePrefix = tablePrefix, csvTablePrefix = csvTablePrefix @@ -71,12 +68,12 @@ prepareCharacterizationShiny <- function( if (!"cg_cohort_definition" %in% tables) { cohortIds <- unique( c( - DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_ID from ",tablePrefix,csvTablePrefix,"cohort_details where COHORT_TYPE = 'Target';"))$TARGET_COHORT_ID, - DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_ID from ",tablePrefix,csvTablePrefix,"cohort_details where COHORT_TYPE = 'TnO';"))$OUTCOME_COHORT_ID, - DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_DEFINITION_ID from ",tablePrefix,csvTablePrefix,"time_to_event;"))$TARGET_COHORT_DEFINITION_ID, - DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_DEFINITION_ID from ",tablePrefix,csvTablePrefix,"time_to_event;"))$OUTCOME_COHORT_DEFINITION_ID, - DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_DEFINITION_ID from ",tablePrefix,csvTablePrefix,"rechallenge_fail_case_series;"))$TARGET_COHORT_DEFINITION_ID, - DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_DEFINITION_ID from ",tablePrefix,csvTablePrefix,"rechallenge_fail_case_series;"))$OUTCOME_COHORT_DEFINITION_ID + DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_ID from ", tablePrefix, csvTablePrefix, "cohort_details where COHORT_TYPE = 'Target';"))$TARGET_COHORT_ID, + DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_ID from ", tablePrefix, csvTablePrefix, "cohort_details where COHORT_TYPE = 'TnO';"))$OUTCOME_COHORT_ID, + DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_DEFINITION_ID from ", tablePrefix, csvTablePrefix, "time_to_event;"))$TARGET_COHORT_DEFINITION_ID, + DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_DEFINITION_ID from ", tablePrefix, csvTablePrefix, "time_to_event;"))$OUTCOME_COHORT_DEFINITION_ID, + DatabaseConnector::querySql(con, paste0("select distinct TARGET_COHORT_DEFINITION_ID from ", tablePrefix, csvTablePrefix, "rechallenge_fail_case_series;"))$TARGET_COHORT_DEFINITION_ID, + DatabaseConnector::querySql(con, paste0("select distinct OUTCOME_COHORT_DEFINITION_ID from ", tablePrefix, csvTablePrefix, "rechallenge_fail_case_series;"))$OUTCOME_COHORT_DEFINITION_ID ) ) @@ -95,9 +92,9 @@ prepareCharacterizationShiny <- function( if (!"database_meta_data" %in% tables) { dbIds <- unique( c( - DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ",tablePrefix,csvTablePrefix,"analysis_ref;"))$DATABASE_ID, - DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ",tablePrefix,csvTablePrefix,"dechallenge_rechallenge;"))$DATABASE_ID, - DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ",tablePrefix,csvTablePrefix,"time_to_event;"))$DATABASE_ID + DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ", tablePrefix, csvTablePrefix, "analysis_ref;"))$DATABASE_ID, + DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ", tablePrefix, csvTablePrefix, "dechallenge_rechallenge;"))$DATABASE_ID, + DatabaseConnector::querySql(con, paste0("select distinct DATABASE_ID from ", tablePrefix, csvTablePrefix, "time_to_event;"))$DATABASE_ID ) ) @@ -120,7 +117,7 @@ prepareCharacterizationShiny <- function( server = server ), schema = "main", - tablePrefix = paste0(tablePrefix,csvTablePrefix), + tablePrefix = paste0(tablePrefix, csvTablePrefix), cohortTablePrefix = "cg_", databaseTable = "DATABASE_META_DATA" ) @@ -174,7 +171,7 @@ viewChars <- function( databaseSettings$cgTablePrefix <- databaseSettings$cohortTablePrefix databaseSettings$databaseTable <- "DATABASE_META_DATA" databaseSettings$databaseTablePrefix <- "" - #databaseSettings$iTablePrefix <- databaseSettings$incidenceTablePrefix + # databaseSettings$iTablePrefix <- databaseSettings$incidenceTablePrefix databaseSettings$cgTable <- "cohort_definition" if (!testApp) { diff --git a/docs/404.html b/docs/404.html index a5cac53..7ee4ee6 100644 --- a/docs/404.html +++ b/docs/404.html @@ -6,7 +6,7 @@ Page not found (404) • Characterization - + @@ -32,7 +32,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -115,7 +115,7 @@

Page not found (404)

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/articles/InstallationGuide.html b/docs/articles/InstallationGuide.html index aebd848..bb566a2 100644 --- a/docs/articles/InstallationGuide.html +++ b/docs/articles/InstallationGuide.html @@ -6,7 +6,7 @@ Characterization Installation Guide • Characterization - + @@ -33,7 +33,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -230,7 +230,7 @@

Acknowledgments

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/articles/UsingPackage.html b/docs/articles/UsingPackage.html index e86aab6..c62e6dd 100644 --- a/docs/articles/UsingPackage.html +++ b/docs/articles/UsingPackage.html @@ -6,7 +6,7 @@ Using Characterization Package • Characterization - + @@ -33,7 +33,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -155,37 +155,11 @@

Setup createCohorts then populates the temporary SQLITE database with the simulated data ready to be used.

-connectionDetails <- Eunomia::getEunomiaConnectionDetails()
-Eunomia::createCohorts(connectionDetails = connectionDetails)
-
## Connecting using SQLite driver
-
## Creating cohort: Celecoxib
-## 
-  |                                                                            
-  |                                                                      |   0%
-  |                                                                            
-  |======================================================================| 100%
-
## Executing SQL took 0.00776 secs
-
## Creating cohort: Diclofenac
-## 
-  |                                                                            
-  |                                                                      |   0%
-  |                                                                            
-  |======================================================================| 100%
-
## Executing SQL took 0.00674 secs
-
## Creating cohort: GiBleed
-## 
-  |                                                                            
-  |                                                                      |   0%
-  |                                                                            
-  |======================================================================| 100%
-
## Executing SQL took 0.00919 secs
-
## Creating cohort: NSAIDs
-## 
-  |                                                                            
-  |                                                                      |   0%
-  |                                                                            
-  |======================================================================| 100%
-
## Executing SQL took 0.0486 secs
+connectionDetails <- Eunomia::getEunomiaConnectionDetails() +
## attempting to download GiBleed
+
## attempting to extract and load: C:\Users\cknoll1\AppData\Local\Temp\5\RtmpoFseQQ/GiBleed_5.3.zip to: C:\Users\cknoll1\AppData\Local\Temp\5\RtmpoFseQQ/GiBleed_5.3.sqlite
+
+Eunomia::createCohorts(connectionDetails = connectionDetails)
## Cohorts created in table main.cohort
##   cohortId       name
 ## 1        1  Celecoxib
@@ -204,10 +178,10 @@ 

Setup ## 4 2694

We also need to have the Characterization package installed and loaded

-
+
 remotes::install_github("ohdsi/FeatureExtraction")
 remotes::install_github("ohdsi/Characterization", ref = "new_approach")
-
+
 
## 
@@ -245,13 +219,13 @@ 

Aggreagate CovariatesUsing the Eunomia data were we previous generated four cohorts, we can use cohort ids 1,2 and 4 as the targetIds and cohort id 3 as the outcomeIds:

-
+
 exampleTargetIds <- c(1, 2, 4)
 exampleOutcomeIds <- 3

If we want to get information on the sex, age at index and Charlson Comorbidity index we can create the settings using FeatureExtraction::createCovariateSettings:

-
+
 exampleCovariateSettings <- FeatureExtraction::createCovariateSettings(
   useDemographicsGender = T,
   useDemographicsAge = T,
@@ -272,7 +246,7 @@ 

Aggreagate Covariates -
+
@@ -283,17 +257,17 @@ 

Aggreagate Covariates -
+
 exampleAggregateCovariateSettings <- createAggregateCovariateSettings(
   targetIds = exampleTargetIds,
   outcomeIds = exampleOutcomeIds,
   riskWindowStart = 1, startAnchor = "cohort start",
-  riskWindowEnd = 365, endAnchor = "cohort start", 
-  outcomeWashoutDays = 9999, 
+  riskWindowEnd = 365, endAnchor = "cohort start",
+  outcomeWashoutDays = 9999,
   minPriorObservation = 365,
-  covariateSettings = exampleCovariateSettings, 
-  caseCovariateSettings = caseCovariateSettings, 
-  casePreTargetDuration = 90, 
+  covariateSettings = exampleCovariateSettings,
+  caseCovariateSettings = caseCovariateSettings,
+  casePreTargetDuration = 90,
   casePostOutcomeDuration = 90
 )

Next we need to use the @@ -306,7 +280,7 @@

Aggreagate CovariatesminCharacterizationMean to exclude covarites with mean values below 0.01, and we must specify the outputFolder where the csv results will be written to.

-
+
 runCharacterizationAnalyses(
   connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
@@ -315,12 +289,12 @@ 

Aggreagate Covariates outcomeDatabaseSchema = "main", outcomeTable = "cohort", characterizationSettings = createCharacterizationSettings( - aggregateCovariateSettings = exampleAggregateCovariateSettings - ), + aggregateCovariateSettings = exampleAggregateCovariateSettings + ), databaseId = "Eunomia", - runId = 1, - minCharacterizationMean = 0.01, - outputDirectory = file.path(getwd(), 'example_char', 'results'), executionPath = file.path(getwd(), 'example_char', 'execution'), + runId = 1, + minCharacterizationMean = 0.01, + outputDirectory = file.path(getwd(), "example_char", "results"), executionPath = file.path(getwd(), "example_char", "execution"), minCellCount = 10, incremental = F, threads = 1 @@ -346,13 +320,13 @@

Dechallenge RechallengeUsing the Eunomia data were we previous generated four cohorts, we can use cohort ids 1,2 and 4 as the targetIds and cohort id 3 as the outcomeIds:

-
+
 exampleTargetIds <- c(1, 2, 4)
 exampleOutcomeIds <- 3

If we want to create the dechallenge rechallenge for all our target cohorts and our outcome cohort with a 30 day dechallengeStopInterval and 31 day dechallengeEvaluationWindow:

-
+
 exampleDechallengeRechallengeSettings <- createDechallengeRechallengeSettings(
   targetIds = exampleTargetIds,
   outcomeIds = exampleOutcomeIds,
@@ -363,18 +337,18 @@ 

Dechallenge RechallengecomputeDechallengeRechallengeAnalyses and the settings previously specified, with minCellCount removing values less than the specified value:

-
+
 dc <- computeDechallengeRechallengeAnalyses(
   connectionDetails = connectionDetails,
   targetDatabaseSchema = "main",
   targetTable = "cohort",
   settings = exampleDechallengeRechallengeSettings,
-  databaseId = "Eunomia", 
-  outcomeTable = file.path(getwd(), 'example_char', 'results'), 
+  databaseId = "Eunomia",
+  outcomeTable = file.path(getwd(), "example_char", "results"),
   minCellCount = 5
 )

Next it is possible to compute the failed rechallenge cases

-
+
 failed <- computeRechallengeFailCaseSeriesAnalyses(
   connectionDetails = connectionDetails,
   targetDatabaseSchema = "main",
@@ -383,7 +357,7 @@ 

Dechallenge Rechallenge outcomeDatabaseSchema = "main", outcomeTable = "cohort", databaseId = "Eunomia", - outcomeTable = file.path(getwd(), 'example_char', 'results'), + outcomeTable = file.path(getwd(), "example_char", "results"), minCellCount = 5 )

@@ -399,7 +373,7 @@

Time to Event
+
 exampleTimeToEventSettings <- createTimeToEventSettings(
   targetIds = exampleTargetIds,
   outcomeIds = exampleOutcomeIds
@@ -407,7 +381,7 @@ 

Time to Event
+
 tte <- computeTimeToEventAnalyses(
   connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
@@ -415,7 +389,7 @@ 

Time to Event= "cohort", settings = exampleTimeToEventSettings, databaseId = "Eunomia", - outcomeTable = file.path(getwd(), 'example_char', 'results'), + outcomeTable = file.path(getwd(), "example_char", "results"), minCellCount = 5 )

@@ -427,7 +401,7 @@

Run Multiple
+
 characterizationSettings <- createCharacterizationSettings(
   timeToEventSettings = list(
     exampleTimeToEventSettings
@@ -457,21 +431,21 @@ 

Run Multiple= "main", outcomeTable = "cohort", characterizationSettings = characterizationSettings, - outputDirectory = file.path(tempdir(), "example", 'results'), + outputDirectory = file.path(tempdir(), "example", "results"), executionPath = file.path(tempdir(), "example", "execution"), - csvFilePrefix = "c_", - databaseId = "1", + csvFilePrefix = "c_", + databaseId = "1", incremental = F, minCharacterizationMean = 0.01, minCellCount = 5 )

This will create csv files with the results in the saveDirectory. You can run the following code to view the results in a shiny app:

-
+
 viewCharacterization(
-  resultFolder = file.path(tempdir(), "example", "results"), 
+  resultFolder = file.path(tempdir(), "example", "results"),
   cohortDefinitionSet = NULL
-    )
+)

@@ -493,7 +467,7 @@

Run Multiple

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/articles/index.html b/docs/articles/index.html index 108ff9f..d961e98 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,5 +1,5 @@ -Articles • CharacterizationArticles • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0

@@ -89,7 +89,7 @@

All vignettes

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/authors.html b/docs/authors.html index b8e830f..2641c42 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,5 +1,5 @@ -Authors and Citation • CharacterizationAuthors and Citation • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0

@@ -69,7 +69,7 @@
@@ -116,7 +116,7 @@

Citation

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/index.html b/docs/index.html index 4af4955..4014dd0 100644 --- a/docs/index.html +++ b/docs/index.html @@ -6,7 +6,7 @@ Characterizations of Cohorts • Characterization - + @@ -33,7 +33,7 @@ Characterization - 1.1.1 + 2.0.0

@@ -300,7 +300,7 @@

Developers

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/news/index.html b/docs/news/index.html index 9ad48e5..cacd0fc 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,5 +1,5 @@ -Changelog • CharacterizationChangelog • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0
@@ -73,13 +73,10 @@

Changelog

- +
  • added tests for all HADES supported dbms
  • updated minCellCount censoring
  • -
-
- -
  • fixed issues with incremental
  • +
  • fixed issues with incremental
  • made the code more modular to enable new characterizations to be added
  • added job optimization code to optimize the distributuion of jobs
  • fixed tests and made minor bug updates
  • @@ -152,7 +149,7 @@
-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 3889dd5..ffef8f3 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,8 +1,8 @@ -pandoc: 3.1.11 -pkgdown: 2.0.7 +pandoc: 3.1.1 +pkgdown: 2.0.9 pkgdown_sha: ~ articles: InstallationGuide: InstallationGuide.html UsingPackage: UsingPackage.html -last_built: 2024-08-07T16:15Z +last_built: 2024-08-07T18:15Z diff --git a/docs/reference/Characterization-package.html b/docs/reference/Characterization-package.html index 6cc22cf..782e85c 100644 --- a/docs/reference/Characterization-package.html +++ b/docs/reference/Characterization-package.html @@ -1,5 +1,5 @@ -Characterization: Characterizations of Cohorts — Characterization-package • CharacterizationCharacterization: Characterizations of Cohorts — Characterization-package • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0
@@ -104,7 +104,7 @@

Author

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/cleanIncremental.html b/docs/reference/cleanIncremental.html index 3411df0..2d8fe3e 100644 --- a/docs/reference/cleanIncremental.html +++ b/docs/reference/cleanIncremental.html @@ -1,7 +1,5 @@ -Removes csv files from folders that have not been marked as completed -and removes the record of the execution file — cleanIncremental • CharacterizationRemoves csv files from folders that have not been marked as completed and removes the record of the execution file — cleanIncremental • CharacterizationRemoves csv files from the execution folder as there should be no csv files -when running in non-incremental model — cleanNonIncremental • CharacterizationRemoves csv files from the execution folder as there should be no csv files when running in non-incremental model — cleanNonIncremental • CharacterizationCompute dechallenge rechallenge study — computeDechallengeRechallengeAnalyses • CharacterizationCompute dechallenge rechallenge study — computeDechallengeRechallengeAnalyses • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -171,7 +171,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/computeRechallengeFailCaseSeriesAnalyses.html b/docs/reference/computeRechallengeFailCaseSeriesAnalyses.html index 6a66550..1e9a2fc 100644 --- a/docs/reference/computeRechallengeFailCaseSeriesAnalyses.html +++ b/docs/reference/computeRechallengeFailCaseSeriesAnalyses.html @@ -1,5 +1,5 @@ -Compute fine the subjects that fail the dechallenge rechallenge study — computeRechallengeFailCaseSeriesAnalyses • CharacterizationCompute fine the subjects that fail the dechallenge rechallenge study — computeRechallengeFailCaseSeriesAnalyses • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -176,7 +176,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/computeTimeToEventAnalyses.html b/docs/reference/computeTimeToEventAnalyses.html index f26506b..fc73f52 100644 --- a/docs/reference/computeTimeToEventAnalyses.html +++ b/docs/reference/computeTimeToEventAnalyses.html @@ -1,5 +1,5 @@ -Compute time to event study — computeTimeToEventAnalyses • CharacterizationCompute time to event study — computeTimeToEventAnalyses • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -175,7 +175,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/createAggregateCovariateSettings.html b/docs/reference/createAggregateCovariateSettings.html index 2ff42ec..deff011 100644 --- a/docs/reference/createAggregateCovariateSettings.html +++ b/docs/reference/createAggregateCovariateSettings.html @@ -1,5 +1,5 @@ -Create aggregate covariate study settings — createAggregateCovariateSettings • CharacterizationCreate aggregate covariate study settings — createAggregateCovariateSettings • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -186,7 +186,7 @@

Value

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/createCharacterizationSettings.html b/docs/reference/createCharacterizationSettings.html index d9e942b..a5bb1f3 100644 --- a/docs/reference/createCharacterizationSettings.html +++ b/docs/reference/createCharacterizationSettings.html @@ -1,5 +1,5 @@ -Create the settings for a large scale characterization study — createCharacterizationSettings • CharacterizationCreate the settings for a large scale characterization study — createCharacterizationSettings • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -129,7 +129,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/createCharacterizationTables.html b/docs/reference/createCharacterizationTables.html index 01e7755..05cb16a 100644 --- a/docs/reference/createCharacterizationTables.html +++ b/docs/reference/createCharacterizationTables.html @@ -1,5 +1,5 @@ -Create the results tables to store characterization results into a database — createCharacterizationTables • CharacterizationCreate the results tables to store characterization results into a database — createCharacterizationTables • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -150,7 +150,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/createDechallengeRechallengeSettings.html b/docs/reference/createDechallengeRechallengeSettings.html index a787f5f..c044efd 100644 --- a/docs/reference/createDechallengeRechallengeSettings.html +++ b/docs/reference/createDechallengeRechallengeSettings.html @@ -1,5 +1,5 @@ -Create dechallenge rechallenge study settings — createDechallengeRechallengeSettings • CharacterizationCreate dechallenge rechallenge study settings — createDechallengeRechallengeSettings • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -129,7 +129,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/createDuringCovariateSettings.html b/docs/reference/createDuringCovariateSettings.html index df4b1c3..52adf2b 100644 --- a/docs/reference/createDuringCovariateSettings.html +++ b/docs/reference/createDuringCovariateSettings.html @@ -1,5 +1,5 @@ -Create during covariate settings — createDuringCovariateSettings • CharacterizationCreate during covariate settings — createDuringCovariateSettings • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -237,7 +237,7 @@

Examples

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/createSqliteDatabase.html b/docs/reference/createSqliteDatabase.html index d683532..66ed0f0 100644 --- a/docs/reference/createSqliteDatabase.html +++ b/docs/reference/createSqliteDatabase.html @@ -1,5 +1,5 @@ -Create an sqlite database connection — createSqliteDatabase • CharacterizationCreate an sqlite database connection — createSqliteDatabase • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -116,7 +116,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/createTimeToEventSettings.html b/docs/reference/createTimeToEventSettings.html index 63d552b..a6d29ce 100644 --- a/docs/reference/createTimeToEventSettings.html +++ b/docs/reference/createTimeToEventSettings.html @@ -1,5 +1,5 @@ -Create time to event study settings — createTimeToEventSettings • CharacterizationCreate time to event study settings — createTimeToEventSettings • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -115,7 +115,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/exportDechallengeRechallengeToCsv.html b/docs/reference/exportDechallengeRechallengeToCsv.html index 033782d..1f736ab 100644 --- a/docs/reference/exportDechallengeRechallengeToCsv.html +++ b/docs/reference/exportDechallengeRechallengeToCsv.html @@ -1,5 +1,5 @@ -export the DechallengeRechallenge results as csv — exportDechallengeRechallengeToCsv • Characterizationexport the DechallengeRechallenge results as csv — exportDechallengeRechallengeToCsv • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -120,7 +120,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/exportRechallengeFailCaseSeriesToCsv.html b/docs/reference/exportRechallengeFailCaseSeriesToCsv.html index 456d5bd..f5c025a 100644 --- a/docs/reference/exportRechallengeFailCaseSeriesToCsv.html +++ b/docs/reference/exportRechallengeFailCaseSeriesToCsv.html @@ -1,5 +1,5 @@ -export the RechallengeFailCaseSeries results as csv — exportRechallengeFailCaseSeriesToCsv • Characterizationexport the RechallengeFailCaseSeries results as csv — exportRechallengeFailCaseSeriesToCsv • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -116,7 +116,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/exportTimeToEventToCsv.html b/docs/reference/exportTimeToEventToCsv.html index c474100..d094231 100644 --- a/docs/reference/exportTimeToEventToCsv.html +++ b/docs/reference/exportTimeToEventToCsv.html @@ -1,5 +1,5 @@ -export the TimeToEvent results as csv — exportTimeToEventToCsv • Characterizationexport the TimeToEvent results as csv — exportTimeToEventToCsv • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -120,7 +120,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/getDbDuringCovariateData.html b/docs/reference/getDbDuringCovariateData.html index bbaefc5..350d393 100644 --- a/docs/reference/getDbDuringCovariateData.html +++ b/docs/reference/getDbDuringCovariateData.html @@ -1,5 +1,5 @@ -Extracts covariates that occur during a cohort — getDbDuringCovariateData • CharacterizationExtracts covariates that occur during a cohort — getDbDuringCovariateData • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -167,7 +167,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/index.html b/docs/reference/index.html index 892d17a..1c85a9d 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,5 +1,5 @@ -Function reference • CharacterizationFunction reference • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -186,13 +186,11 @@

Incremental

cleanIncremental()

-

Removes csv files from folders that have not been marked as completed -and removes the record of the execution file

+

Removes csv files from folders that have not been marked as completed and removes the record of the execution file

cleanNonIncremental()

-

Removes csv files from the execution folder as there should be no csv files -when running in non-incremental model

+

Removes csv files from the execution folder as there should be no csv files when running in non-incremental model

diff --git a/docs/reference/insertResultsToDatabase.html b/docs/reference/insertResultsToDatabase.html index 28618e3..7135588 100644 --- a/docs/reference/insertResultsToDatabase.html +++ b/docs/reference/insertResultsToDatabase.html @@ -1,5 +1,5 @@ -Upload the results into a result database — insertResultsToDatabase • CharacterizationUpload the results into a result database — insertResultsToDatabase • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -138,7 +138,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/loadCharacterizationSettings.html b/docs/reference/loadCharacterizationSettings.html index 156ce2a..8ae5921 100644 --- a/docs/reference/loadCharacterizationSettings.html +++ b/docs/reference/loadCharacterizationSettings.html @@ -1,5 +1,5 @@ -Load the characterization settings previously saved as a json file — loadCharacterizationSettings • CharacterizationLoad the characterization settings previously saved as a json file — loadCharacterizationSettings • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -117,7 +117,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/runCharacterizationAnalyses.html b/docs/reference/runCharacterizationAnalyses.html index d5c9ae8..0d64da5 100644 --- a/docs/reference/runCharacterizationAnalyses.html +++ b/docs/reference/runCharacterizationAnalyses.html @@ -1,5 +1,5 @@ -execute a large-scale characterization study — runCharacterizationAnalyses • Characterizationexecute a large-scale characterization study — runCharacterizationAnalyses • CharacterizationSave the characterization settings as a json — saveCharacterizationSettings • CharacterizationSave the characterization settings as a json — saveCharacterizationSettings • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -121,7 +121,7 @@

See also

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/reference/viewCharacterization.html b/docs/reference/viewCharacterization.html index 4718fb4..203c343 100644 --- a/docs/reference/viewCharacterization.html +++ b/docs/reference/viewCharacterization.html @@ -1,5 +1,5 @@ -viewCharacterization - Interactively view the characterization results — viewCharacterization • CharacterizationviewCharacterization - Interactively view the characterization results — viewCharacterization • Characterization @@ -17,7 +17,7 @@ Characterization - 1.1.1 + 2.0.0 @@ -114,7 +114,7 @@

Details

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 24c82df..70efbad 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,6 +3,9 @@ /404.html + + /articles/index.html + /articles/InstallationGuide.html @@ -12,9 +15,6 @@ /articles/UsingPackage.html - - /articles/index.html - /authors.html diff --git a/tests/testthat/test-Incremental.R b/tests/testthat/test-Incremental.R index 428af80..40d4e12 100644 --- a/tests/testthat/test-Incremental.R +++ b/tests/testthat/test-Incremental.R @@ -1,31 +1,32 @@ context("Incremental") -logFolder <- file.path(tempdir(),'log1') +logFolder <- file.path(tempdir(), "log1") on.exit(unlink(logFolder)) -logFolder2 <- file.path(tempdir(),'log2') +logFolder2 <- file.path(tempdir(), "log2") on.exit(unlink(logFolder2)) -logFolder3 <- file.path(tempdir(),'log3') +logFolder3 <- file.path(tempdir(), "log3") on.exit(unlink(logFolder3)) -logFolder4 <- file.path(tempdir(),'log4') +logFolder4 <- file.path(tempdir(), "log4") on.exit(unlink(logFolder4)) -logFolder5 <- file.path(tempdir(),'log5') +logFolder5 <- file.path(tempdir(), "log5") on.exit(unlink(logFolder5)) -logFolder6 <- file.path(tempdir(),'log6') +logFolder6 <- file.path(tempdir(), "log6") on.exit(unlink(logFolder6)) -for(folder in c(logFolder, logFolder2, logFolder3, - logFolder4, logFolder5, logFolder6)){ - if(!dir.exists(folder)){ +for (folder in c( + logFolder, logFolder2, logFolder3, + logFolder4, logFolder5, logFolder6 +)) { + if (!dir.exists(folder)) { dir.create(folder) } } test_that("createIncrementalLog", { - Characterization:::createIncrementalLog( executionFolder = logFolder, - logname = 'execution.csv' - ) + logname = "execution.csv" + ) testthat::expect_true("execution.csv" %in% dir(logFolder)) executionLog <- read.csv(file.path(logFolder, "execution.csv")) testthat::expect_true(nrow(executionLog) == 1) @@ -33,39 +34,35 @@ test_that("createIncrementalLog", { Characterization:::createIncrementalLog( executionFolder = logFolder, - logname = 'madeup.csv' + logname = "madeup.csv" ) testthat::expect_true("madeup.csv" %in% dir(logFolder)) - }) test_that("loadIncrementalFiles", { - # should error as not completed.csv testthat::expect_error( Characterization:::loadIncrementalFiles( - executionFolder = logFolder - ) + executionFolder = logFolder + ) ) # now create the completed.csv Characterization:::createIncrementalLog( executionFolder = logFolder, - logname = 'completed.csv' + logname = "completed.csv" ) result <- Characterization:::loadIncrementalFiles( executionFolder = logFolder ) - testthat::expect_true(sum(c('executed','completed') %in% names(result)) == 2) + testthat::expect_true(sum(c("executed", "completed") %in% names(result)) == 2) testthat::expect_true(nrow(result$executed) == 1) testthat::expect_true(nrow(result$completed) == 1) - }) test_that("getExecutionJobIssues", { - result <- Characterization:::loadIncrementalFiles( executionFolder = logFolder ) @@ -73,16 +70,16 @@ test_that("getExecutionJobIssues", { issues <- Characterization:::getExecutionJobIssues( executed = result$executed, completed = result$completed - ) + ) testthat::expect_true(length(issues) == 0) # now add some executed but not completed results issues <- Characterization:::getExecutionJobIssues( executed = data.frame( - run_date_time = c(1,1), - job_id = c(1,2), - start_time = c(1,2), - end_time = c(1,2) + run_date_time = c(1, 1), + job_id = c(1, 2), + start_time = c(1, 2), + end_time = c(1, 2) ), completed = data.frame( run_date_time = c(1), @@ -95,10 +92,10 @@ test_that("getExecutionJobIssues", { issues <- Characterization:::getExecutionJobIssues( executed = data.frame( - run_date_time = c(1,1), - job_id = c(1,20), - start_time = c(1,2), - end_time = c(1,2) + run_date_time = c(1, 1), + job_id = c(1, 20), + start_time = c(1, 2), + end_time = c(1, 2) ), completed = data.frame( run_date_time = c(1), @@ -108,19 +105,17 @@ test_that("getExecutionJobIssues", { ) ) testthat::expect_true(issues == 20) - }) test_that("cleanIncremental", { - # create folder with issues Characterization:::createIncrementalLog( executionFolder = logFolder2, - logname = 'execution.csv' + logname = "execution.csv" ) Characterization:::createIncrementalLog( executionFolder = logFolder2, - logname = 'completed.csv' + logname = "completed.csv" ) # add a job into executed that is not in completed @@ -131,9 +126,9 @@ test_that("cleanIncremental", { start_time = c(1), end_time = c(1) ), - file = file.path(logFolder2, 'execution.csv'), + file = file.path(logFolder2, "execution.csv"), append = T - ) + ) incrementalFiles <- Characterization:::loadIncrementalFiles( executionFolder = logFolder2 @@ -141,26 +136,26 @@ test_that("cleanIncremental", { issues <- Characterization:::getExecutionJobIssues( executed = incrementalFiles$executed, completed = incrementalFiles$completed - ) + ) testthat::expect_true(nrow(incrementalFiles$executed) == 2) testthat::expect_true(nrow(incrementalFiles$completed) == 1) testthat::expect_true(length(issues) == 1) - dir.create(file.path(logFolder2, '1')) + dir.create(file.path(logFolder2, "1")) write.csv( - x = data.frame(a=1), - file = file.path(logFolder2, '1', 'madeup.csv') - ) - testthat::expect_true(file.exists(file.path(logFolder2, '1', 'madeup.csv'))) + x = data.frame(a = 1), + file = file.path(logFolder2, "1", "madeup.csv") + ) + testthat::expect_true(file.exists(file.path(logFolder2, "1", "madeup.csv"))) # run clean to fix issues Characterization:::cleanIncremental( executionFolder = logFolder2 - ) + ) # check issues are fixed - testthat::expect_true(!file.exists(file.path(logFolder2, '1', 'madeup.csv'))) + testthat::expect_true(!file.exists(file.path(logFolder2, "1", "madeup.csv"))) incrementalFiles <- Characterization:::loadIncrementalFiles( executionFolder = logFolder2 ) @@ -172,24 +167,22 @@ test_that("cleanIncremental", { testthat::expect_true(nrow(incrementalFiles$executed) == 1) testthat::expect_true(nrow(incrementalFiles$completed) == 1) testthat::expect_true(length(issues) == 0) - }) test_that("checkResultFilesIncremental ", { - # create folder with issues Characterization:::createIncrementalLog( executionFolder = logFolder3, - logname = 'execution.csv' + logname = "execution.csv" ) Characterization:::createIncrementalLog( executionFolder = logFolder3, - logname = 'completed.csv' + logname = "completed.csv" ) result <- Characterization:::checkResultFilesIncremental( executionFolder = logFolder3 - ) + ) testthat::expect_true(is.null(result)) # add a job into executed that is not in completed @@ -200,26 +193,24 @@ test_that("checkResultFilesIncremental ", { start_time = c(1), end_time = c(1) ), - file = file.path(logFolder3, 'execution.csv'), + file = file.path(logFolder3, "execution.csv"), append = T ) testthat::expect_error(Characterization:::checkResultFilesIncremental( executionFolder = logFolder3 )) - }) test_that("checkResultFilesIncremental ", { - # create folder with issues Characterization:::createIncrementalLog( executionFolder = logFolder4, - logname = 'execution.csv' + logname = "execution.csv" ) Characterization:::createIncrementalLog( executionFolder = logFolder4, - logname = 'completed.csv' + logname = "completed.csv" ) # add a job into executed and completed @@ -230,7 +221,7 @@ test_that("checkResultFilesIncremental ", { start_time = c(1), end_time = c(1) ), - file = file.path(logFolder4, 'execution.csv'), + file = file.path(logFolder4, "execution.csv"), append = T ) readr::write_csv( @@ -240,69 +231,66 @@ test_that("checkResultFilesIncremental ", { start_time = c(1), end_time = c(1) ), - file = file.path(logFolder4, 'completed.csv'), + file = file.path(logFolder4, "completed.csv"), append = T ) -jobs <- Characterization:::findCompletedJobs(logFolder4) -testthat::expect_true(1 %in% jobs) - + jobs <- Characterization:::findCompletedJobs(logFolder4) + testthat::expect_true(1 %in% jobs) }) test_that("recordIncremental ", { Characterization:::createIncrementalLog( executionFolder = logFolder6, - logname = 'execution.csv' + logname = "execution.csv" ) execution <- read.csv( - file = file.path(logFolder6, 'execution.csv') + file = file.path(logFolder6, "execution.csv") ) - testthat::expect_true(!'example100' %in% execution$job_id) + testthat::expect_true(!"example100" %in% execution$job_id) -Characterization:::recordIncremental( + Characterization:::recordIncremental( executionFolder = logFolder6, runDateTime = Sys.time(), - jobId = 'example100', + jobId = "example100", startTime = Sys.time(), endTime = Sys.time(), - logname = 'execution.csv' -) + logname = "execution.csv" + ) executionJobs <- read.csv( - file = file.path(logFolder6, 'execution.csv') - ) - testthat::expect_true('example100' %in% executionJobs$job_id) + file = file.path(logFolder6, "execution.csv") + ) + testthat::expect_true("example100" %in% executionJobs$job_id) # test warning if no file testthat::expect_warning( Characterization:::recordIncremental( executionFolder = logFolder6, runDateTime = 1, - jobId = 'example100', + jobId = "example100", startTime = 1, endTime = 1, - logname = 'execution2.csv' + logname = "execution2.csv" ) ) - }) test_that("No Incremental works", { - result <- Characterization:::checkResultFilesNonIncremental( executionFolder = logFolder5 ) testthat::expect_true(is.null(result)) dir.create( - path = file.path(logFolder5, 'job_1'), + path = file.path(logFolder5, "job_1"), recursive = T ) - on.exit(unlink(file.path(logFolder5, 'job_1'))) + on.exit(unlink(file.path(logFolder5, "job_1"))) write.csv( - x = data.frame(a=1), - file = file.path(logFolder5, 'job_1', 'anyCsvFile.csv') + x = data.frame(a = 1), + file = file.path(logFolder5, "job_1", "anyCsvFile.csv") ) # now there is a csv file it should error @@ -313,10 +301,9 @@ test_that("No Incremental works", { ) # this should clean the folder of any csv files -Characterization:::cleanNonIncremental( - executionFolder = logFolder5 + Characterization:::cleanNonIncremental( + executionFolder = logFolder5 ) -# previously created csv should have been deleted -testthat::expect_true(length(dir(file.path(logFolder5, 'job_1'))) == 0) - + # previously created csv should have been deleted + testthat::expect_true(length(dir(file.path(logFolder5, "job_1"))) == 0) }) diff --git a/tests/testthat/test-aggregateCovariate.R b/tests/testthat/test-aggregateCovariate.R index 335ce2d..441981c 100644 --- a/tests/testthat/test-aggregateCovariate.R +++ b/tests/testthat/test-aggregateCovariate.R @@ -55,16 +55,16 @@ test_that("createAggregateCovariateSettings", { ) testthat::expect_equal( - res$riskWindowStart,2 + res$riskWindowStart, 2 ) testthat::expect_equal( res$startAnchor, "cohort end" ) testthat::expect_equal( - res$riskWindowEnd,363 + res$riskWindowEnd, 363 ) testthat::expect_equal( - res$endAnchor,"cohort end" + res$endAnchor, "cohort end" ) testthat::expect_equal( @@ -81,8 +81,6 @@ test_that("createAggregateCovariateSettings", { res$casePostOutcomeDuration, 120 ) - - }) test_that("error when using temporal features", { @@ -120,7 +118,6 @@ test_that("error when using temporal features", { minCharacterizationMean = 0.01 ) ) - }) test_that("createAggregateCovariateSettingsList", { @@ -159,8 +156,8 @@ test_that("createExecutionIds", { testthat::expect_true(length(testIds) == 10) testthat::expect_true(length(unique(testIds)) == 10) - testId1 <- createExecutionIds(1) - testId2 <- createExecutionIds(1) + testId1 <- createExecutionIds(1) + testId2 <- createExecutionIds(1) testthat::expect_true(testId1 != testId2) }) @@ -177,7 +174,7 @@ test_that("getAggregateCovariatesJobs", { useConditionOccurrenceDuring = T ) - minPriorObservation <- sample(30,1) + minPriorObservation <- sample(30, 1) res <- createAggregateCovariateSettings( targetIds = targetIds, @@ -193,22 +190,24 @@ test_that("getAggregateCovariatesJobs", { jobDf <- getAggregateCovariatesJobs( characterizationSettings = Characterization::createCharacterizationSettings( aggregateCovariateSettings = res - ), + ), threads = 1 ) testthat::expect_true( - sum(c('computeTargetAggregateCovariateAnalyses', - 'computeCaseAggregateCovariateAnalyses') %in% + sum(c( + "computeTargetAggregateCovariateAnalyses", + "computeCaseAggregateCovariateAnalyses" + ) %in% jobDf$functionName) == 2 - ) + ) testthat::expect_true(nrow(jobDf) == 2) testthat::expect_true( - paste0('tac_1_',minPriorObservation) %in% jobDf$executionFolder - ) + paste0("tac_1_", minPriorObservation) %in% jobDf$executionFolder + ) testthat::expect_true( - paste0('cac_1_',minPriorObservation, '_1_365_365') %in% jobDf$executionFolder + paste0("cac_1_", minPriorObservation, "_1_365_365") %in% jobDf$executionFolder ) settings <- ParallelLogger::convertJsonToSettings(jobDf$settings[1]) @@ -234,14 +233,18 @@ test_that("getAggregateCovariatesJobs", { testthat::expect_true(nrow(jobDf) == 4) testthat::expect_true( - sum(c(paste0('tac_1_',minPriorObservation), - paste0('tac_2_',minPriorObservation)) - %in% jobDf$executionFolder ) == 2 + sum(c( + paste0("tac_1_", minPriorObservation), + paste0("tac_2_", minPriorObservation) + ) + %in% jobDf$executionFolder) == 2 ) testthat::expect_true( - sum(c(paste0('cac_1_',minPriorObservation, '_1_365_365'), - paste0('cac_2_',minPriorObservation, '_1_365_365')) - %in% jobDf$executionFolder) == 2 + sum(c( + paste0("cac_1_", minPriorObservation, "_1_365_365"), + paste0("cac_2_", minPriorObservation, "_1_365_365") + ) + %in% jobDf$executionFolder) == 2 ) # now check threads = 3 @@ -251,7 +254,7 @@ test_that("getAggregateCovariatesJobs", { ), threads = 3 ) - testthat::expect_true(nrow(jobDf) == 2*3) + testthat::expect_true(nrow(jobDf) == 2 * 3) # now check threads = 4 jobDf <- getAggregateCovariatesJobs( @@ -272,11 +275,13 @@ test_that("getAggregateCovariatesJobs", { testthat::expect_true(nrow(jobDf) == 7) testthat::expect_true( - length(unique(unlist(lapply(1:nrow(jobDf), - function(i){ - ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId - } - )))) == 2) + length(unique(unlist(lapply( + 1:nrow(jobDf), + function(i) { + ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId + } + )))) == 2 + ) # add more settings @@ -291,7 +296,7 @@ test_that("getAggregateCovariatesJobs", { caseCovariateSettings = caseCovariateSettings ) - jobDf <- getAggregateCovariatesJobs( + jobDf <- getAggregateCovariatesJobs( characterizationSettings = createCharacterizationSettings( aggregateCovariateSettings = list(res, res2) ), @@ -299,25 +304,29 @@ test_that("getAggregateCovariatesJobs", { ) testthat::expect_true(nrow(jobDf) == 4) testthat::expect_true( - length(unique(unlist(lapply(1:nrow(jobDf), - function(i){ - ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId - } - )))) == 4) + length(unique(unlist(lapply( + 1:nrow(jobDf), + function(i) { + ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId + } + )))) == 4 + ) jobDf <- getAggregateCovariatesJobs( - characterizationSettings = createCharacterizationSettings( - aggregateCovariateSettings = list(res, res2) - ), - threads = 3 - ) + characterizationSettings = createCharacterizationSettings( + aggregateCovariateSettings = list(res, res2) + ), + threads = 3 + ) testthat::expect_true(nrow(jobDf) == 12) testthat::expect_true( - length(unique(unlist(lapply(1:nrow(jobDf), - function(i){ - ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId - } - )))) == 4) + length(unique(unlist(lapply( + 1:nrow(jobDf), + function(i) { + ParallelLogger::convertJsonToSettings(jobDf$settings[i])$settingId + } + )))) == 4 + ) # test when extractNonCaseCovariates = F @@ -325,7 +334,7 @@ test_that("getAggregateCovariatesJobs", { targetIds = 1, outcomeIds = 3, extractNonCaseCovariates = F - ) + ) jobDf <- Characterization:::getAggregateCovariatesJobs( characterizationSettings = createCharacterizationSettings( aggregateCovariateSettings = list(res3) @@ -347,7 +356,6 @@ test_that("getAggregateCovariatesJobs", { ) # add checks - }) test_that("computeTargetAggregateCovariateAnalyses", { @@ -373,10 +381,10 @@ test_that("computeTargetAggregateCovariateAnalyses", { caseCovariateSettings = caseCovariateSettings ) - jobDf <- getAggregateCovariatesJobs( + jobDf <- getAggregateCovariatesJobs( characterizationSettings = createCharacterizationSettings( aggregateCovariateSettings = res - ), + ), threads = 1 ) @@ -388,25 +396,25 @@ test_that("computeTargetAggregateCovariateAnalyses", { targetTable = "cohort", settings = ParallelLogger::convertJsonToSettings(jobDf$settings[1]), minCharacterizationMean = 0.01, - databaseId = 'madeup', + databaseId = "madeup", outputFolder = tempFolder1 ) # check incremental does not run testthat::expect_true( - sum(c('cohort_details.csv', - 'settings.csv', - 'covariates.csv', - 'covariates_continuous.csv', - 'cohort_counts.csv', - 'covariate_ref.csv', - 'analysis_ref.csv' - ) %in% dir(tempFolder1) - ) == length(dir(tempFolder1)) + sum(c( + "cohort_details.csv", + "settings.csv", + "covariates.csv", + "covariates_continuous.csv", + "cohort_counts.csv", + "covariate_ref.csv", + "analysis_ref.csv" + ) %in% dir(tempFolder1)) == length(dir(tempFolder1)) ) # check cohortCounts is done for all cohortDetails <- readr::read_csv( - file.path(tempFolder1,'cohort_details.csv'), + file.path(tempFolder1, "cohort_details.csv"), show_col_types = F ) testthat::expect_true( @@ -416,8 +424,8 @@ test_that("computeTargetAggregateCovariateAnalyses", { nrow(cohortDetails) == 8 ) - aggCovs <- readr::read_csv( - file = file.path(tempFolder1, 'covariates.csv'), + aggCovs <- readr::read_csv( + file = file.path(tempFolder1, "covariates.csv"), show_col_types = F ) # check covariates is unique @@ -427,9 +435,8 @@ test_that("computeTargetAggregateCovariateAnalyses", { # check databaseId is added testthat::expect_true( - aggCovs$database_id[1] == 'madeup' + aggCovs$database_id[1] == "madeup" ) - }) @@ -459,7 +466,7 @@ test_that("computeCaseAggregateCovariateAnalyses", { jobDf <- getAggregateCovariatesJobs( characterizationSettings = createCharacterizationSettings( aggregateCovariateSettings = res - ), + ), threads = 1 ) @@ -471,36 +478,36 @@ test_that("computeCaseAggregateCovariateAnalyses", { targetTable = "cohort", settings = ParallelLogger::convertJsonToSettings(jobDf$settings[2]), minCharacterizationMean = 0.01, - databaseId = 'madeup', + databaseId = "madeup", outputFolder = tempFolder2 ) # check incremental does not run testthat::expect_true( - sum(c('cohort_details.csv', - 'settings.csv', - 'covariates.csv', - 'covariates_continuous.csv', - 'cohort_counts.csv', - 'covariate_ref.csv', - 'analysis_ref.csv' - ) %in% dir(tempFolder2) - ) == length(dir(tempFolder2)) + sum(c( + "cohort_details.csv", + "settings.csv", + "covariates.csv", + "covariates_continuous.csv", + "cohort_counts.csv", + "covariate_ref.csv", + "analysis_ref.csv" + ) %in% dir(tempFolder2)) == length(dir(tempFolder2)) ) # check cohortCounts is done for all cohortDetails <- readr::read_csv( - file.path(tempFolder2,'cohort_details.csv'), + file.path(tempFolder2, "cohort_details.csv"), show_col_types = F ) testthat::expect_true( nrow(unique(cohortDetails)) == nrow(cohortDetails) ) testthat::expect_true( - nrow(cohortDetails) == 3*5 + nrow(cohortDetails) == 3 * 5 ) - aggCovs <- readr::read_csv( - file = file.path(tempFolder2, 'covariates.csv'), + aggCovs <- readr::read_csv( + file = file.path(tempFolder2, "covariates.csv"), show_col_types = F ) # check covariates is unique @@ -510,7 +517,6 @@ test_that("computeCaseAggregateCovariateAnalyses", { # check databaseId is added testthat::expect_true( - aggCovs$database_id[1] == 'madeup' + aggCovs$database_id[1] == "madeup" ) - }) diff --git a/tests/testthat/test-dbs.R b/tests/testthat/test-dbs.R index 721735c..703121a 100644 --- a/tests/testthat/test-dbs.R +++ b/tests/testthat/test-dbs.R @@ -8,7 +8,7 @@ dbmsPlatforms <- c( "snowflake", "spark", "sql server" - ) +) getPlatformConnectionDetails <- function(dbmsPlatform) { # Get drivers for test platform @@ -52,7 +52,7 @@ getPlatformConnectionDetails <- function(dbmsPlatform) { vocabularyDatabaseSchema <- Sys.getenv("CDM_BIG_QUERY_CDM_SCHEMA") cohortDatabaseSchema <- Sys.getenv("CDM_BIG_QUERY_OHDSI_SCHEMA") options(sqlRenderTempEmulationSchema = Sys.getenv("CDM_BIG_QUERY_OHDSI_SCHEMA")) - } else { + } else { return(NULL) } } else if (dbmsPlatform == "oracle") { @@ -103,19 +103,18 @@ getPlatformConnectionDetails <- function(dbmsPlatform) { options(sqlRenderTempEmulationSchema = Sys.getenv("CDM_SNOWFLAKE_OHDSI_SCHEMA")) } else if (dbmsPlatform == "spark") { if (.Platform$OS.type == "windows") { # skipping Mac for GHA due to JAVA issue - connectionDetails <- DatabaseConnector::createConnectionDetails( - dbms = dbmsPlatform, - user = Sys.getenv("CDM5_SPARK_USER"), - password = URLdecode(Sys.getenv("CDM5_SPARK_PASSWORD")), - connectionString = Sys.getenv("CDM5_SPARK_CONNECTION_STRING"), - pathToDriver = jdbcDriverFolder - ) - cdmDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA") - vocabularyDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA") - cohortDatabaseSchema <- Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA") - options(sqlRenderTempEmulationSchema = Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA")) - } - else{ + connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = dbmsPlatform, + user = Sys.getenv("CDM5_SPARK_USER"), + password = URLdecode(Sys.getenv("CDM5_SPARK_PASSWORD")), + connectionString = Sys.getenv("CDM5_SPARK_CONNECTION_STRING"), + pathToDriver = jdbcDriverFolder + ) + cdmDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA") + vocabularyDatabaseSchema <- Sys.getenv("CDM5_SPARK_CDM_SCHEMA") + cohortDatabaseSchema <- Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA") + options(sqlRenderTempEmulationSchema = Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA")) + } else { return(NULL) } } else if (dbmsPlatform == "sql server") { @@ -147,13 +146,13 @@ getPlatformConnectionDetails <- function(dbmsPlatform) { )) } -for(dbmsPlatform in dbmsPlatforms){ - if(Sys.getenv('CI') == 'true' & .Platform$OS.type == "windows"){ +for (dbmsPlatform in dbmsPlatforms) { + if (Sys.getenv("CI") == "true" & .Platform$OS.type == "windows") { tempFolder <- tempfile(paste0("Characterization_", dbmsPlatform)) on.exit(unlink(tempFolder, recursive = TRUE), add = TRUE) dbmsDetails <- getPlatformConnectionDetails(dbmsPlatform) - if(!is.null(dbmsDetails)){ + if (!is.null(dbmsDetails)) { con <- DatabaseConnector::connect(dbmsDetails$connectionDetails) on.exit(DatabaseConnector::disconnect(con)) } @@ -161,11 +160,10 @@ for(dbmsPlatform in dbmsPlatforms){ # This file contains platform specific tests test_that(paste0("platform specific test ", dbmsPlatform), { - skip_if(Sys.getenv('CI') != 'true' | .Platform$OS.type != "windows", 'not run locally') + skip_if(Sys.getenv("CI") != "true" | .Platform$OS.type != "windows", "not run locally") if (is.null(dbmsDetails)) { print(paste("No platform details available for", dbmsPlatform)) } else { - # create a cohort table DatabaseConnector::insertTable( bulkLoad = F, @@ -175,9 +173,10 @@ for(dbmsPlatform in dbmsPlatforms){ data = data.frame( subject_id = 1:10, cohort_definition_id = sample(4, 10, replace = T), - cohort_start_date = rep(as.Date('2010-01-01'), 10), - cohort_end_date = rep(as.Date('2010-01-01'), 10) - )) + cohort_start_date = rep(as.Date("2010-01-01"), 10), + cohort_end_date = rep(as.Date("2010-01-01"), 10) + ) + ) targetIds <- c(1, 2, 4) outcomeIds <- c(3) @@ -246,9 +245,9 @@ for(dbmsPlatform in dbmsPlatforms){ outcomeDatabaseSchema = dbmsDetails$cohortDatabaseSchema, outcomeTable = dbmsDetails$cohortTable, characterizationSettings = characterizationSettings, - outputDirectory = file.path(tempFolder, 'csv'), - executionPath = file.path(tempFolder, 'execution'), - csvFilePrefix = "c_", + outputDirectory = file.path(tempFolder, "csv"), + executionPath = file.path(tempFolder, "execution"), + csvFilePrefix = "c_", threads = 1, databaseId = dbmsDetails$connectionDetails$dbms ) diff --git a/tests/testthat/test-dechallengeRechallenge.R b/tests/testthat/test-dechallengeRechallenge.R index 7718302..cd01401 100644 --- a/tests/testthat/test-dechallengeRechallenge.R +++ b/tests/testthat/test-dechallengeRechallenge.R @@ -139,16 +139,14 @@ test_that("computeDechallengeRechallengeAnalyses", { outputFolder = dcLoc, minCellCount = 0 ) - dc <- readr::read_csv(file.path(dcLoc,'dechallenge_rechallenge.csv'), show_col_types = F) + dc <- readr::read_csv(file.path(dcLoc, "dechallenge_rechallenge.csv"), show_col_types = F) # one T and 2 Os, so should have 2 rows testthat::expect_true(nrow(dc) == 1) testthat::expect_true(dc$num_persons_exposed == 4) testthat::expect_true(dc$num_exposure_eras == 10) - }) test_that("computeRechallengeFailCaseSeriesAnalyses with known data", { - # check with made up date # subject 1 has 1 exposure for 30 days # subject 2 has 4 exposures for ~30 days with ~30 day gaps @@ -223,7 +221,7 @@ test_that("computeRechallengeFailCaseSeriesAnalyses with known data", { ) # person 2 should be in results - dc <- readr::read_csv(file.path(dcLoc,'rechallenge_fail_case_series.csv'), show_col_types = F) + dc <- readr::read_csv(file.path(dcLoc, "rechallenge_fail_case_series.csv"), show_col_types = F) testthat::expect_equal(nrow(dc), 1) testthat::expect_true(is.na(dc$subject_id)) @@ -243,7 +241,7 @@ test_that("computeRechallengeFailCaseSeriesAnalyses with known data", { ) # person 2 should be in results - dc <- readr::read_csv(file.path(dcLoc,'rechallenge_fail_case_series.csv'), show_col_types = F) + dc <- readr::read_csv(file.path(dcLoc, "rechallenge_fail_case_series.csv"), show_col_types = F) testthat::expect_equal(nrow(dc), 1) testthat::expect_equal(dc$subject_id, 2) @@ -265,8 +263,8 @@ test_that("computeRechallengeFailCaseSeriesAnalyses with known data", { # checking minCellCount # person 2 should be in results but all min cell count # values should be censored - dr <- readr::read_csv(file.path(dcLoc,'dechallenge_rechallenge.csv'), show_col_types = F) - testthat::expect_true(nrow(dr) > 0 ) + dr <- readr::read_csv(file.path(dcLoc, "dechallenge_rechallenge.csv"), show_col_types = F) + testthat::expect_true(nrow(dr) > 0) testthat::expect_equal(max(dr$num_persons_exposed), -9999) testthat::expect_equal(max(dr$num_cases), -9999) testthat::expect_equal(max(dr$dechallenge_attempt), -9999) @@ -275,13 +273,12 @@ test_that("computeRechallengeFailCaseSeriesAnalyses with known data", { testthat::expect_equal(max(dr$rechallenge_attempt), -9999) testthat::expect_equal(max(dr$rechallenge_fail), -9999) testthat::expect_equal(max(dr$rechallenge_success), -9999) - }) # add test for job creation code test_that("computeDechallengeRechallengeAnalyses", { - targetIds <- c(2,5,6,7,8) + targetIds <- c(2, 5, 6, 7, 8) outcomeIds <- c(3, 4, 9, 10) res <- createDechallengeRechallengeSettings( @@ -290,100 +287,98 @@ test_that("computeDechallengeRechallengeAnalyses", { dechallengeStopInterval = 30, dechallengeEvaluationWindow = 30 ) -jobs <- Characterization:::getDechallengeRechallengeJobs( - characterizationSettings = createCharacterizationSettings( - dechallengeRechallengeSettings = res + jobs <- Characterization:::getDechallengeRechallengeJobs( + characterizationSettings = createCharacterizationSettings( + dechallengeRechallengeSettings = res ), - threads = 1 - ) - -# as 1 thread should be 2 rows for two analyses -testthat::expect_true(nrow(jobs) == 2) - -# check all target ids are in there -targetIdFromSettings <- do.call( - what = unique, - args = lapply(1:nrow(jobs), function(i){ - ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds} - ) -) -testthat::expect_true(sum(targetIds %in% targetIdFromSettings) == - length(targetIds)) - -# check all outcome ids are in there -outcomeIdFromSettings <- do.call( - what = unique, - args = lapply(1:nrow(jobs), function(i){ - ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds} + threads = 1 ) -) -testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) == - length(outcomeIds)) + # as 1 thread should be 2 rows for two analyses + testthat::expect_true(nrow(jobs) == 2) -# checking more threads 3 -jobs <- Characterization:::getDechallengeRechallengeJobs( - characterizationSettings = createCharacterizationSettings( - dechallengeRechallengeSettings = res - ), - threads = 3 -) + # check all target ids are in there + targetIdFromSettings <- do.call( + what = unique, + args = lapply(1:nrow(jobs), function(i) { + ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds + }) + ) + testthat::expect_true(sum(targetIds %in% targetIdFromSettings) == + length(targetIds)) + + # check all outcome ids are in there + outcomeIdFromSettings <- do.call( + what = unique, + args = lapply(1:nrow(jobs), function(i) { + ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds + }) + ) + testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) == + length(outcomeIds)) -# as 3 thread should be 2*3 rows for two analyses -testthat::expect_true(nrow(jobs) == 2*3) -# check all target ids are in there -targetIdFromSettings <- do.call( - what = c, - args = lapply(1:nrow(jobs), function(i){ - ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds} + # checking more threads 3 + jobs <- Characterization:::getDechallengeRechallengeJobs( + characterizationSettings = createCharacterizationSettings( + dechallengeRechallengeSettings = res + ), + threads = 3 ) -) -testthat::expect_true(sum(targetIds %in% targetIdFromSettings) == - length(targetIds)) - -# check all outcome ids are in there -outcomeIdFromSettings <- do.call( - what =c, - args = lapply(1:nrow(jobs), function(i){ - ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds} + + # as 3 thread should be 2*3 rows for two analyses + testthat::expect_true(nrow(jobs) == 2 * 3) + + # check all target ids are in there + targetIdFromSettings <- do.call( + what = c, + args = lapply(1:nrow(jobs), function(i) { + ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds + }) ) -) -testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) == - length(outcomeIds)) + testthat::expect_true(sum(targetIds %in% targetIdFromSettings) == + length(targetIds)) + + # check all outcome ids are in there + outcomeIdFromSettings <- do.call( + what = c, + args = lapply(1:nrow(jobs), function(i) { + ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds + }) + ) + testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) == + length(outcomeIds)) -# checking more threads than needed 20 -jobs <- Characterization:::getDechallengeRechallengeJobs( - characterizationSettings = createCharacterizationSettings( - dechallengeRechallengeSettings = res - ), - threads = 20 -) + # checking more threads than needed 20 + jobs <- Characterization:::getDechallengeRechallengeJobs( + characterizationSettings = createCharacterizationSettings( + dechallengeRechallengeSettings = res + ), + threads = 20 + ) -# as 3 thread should be 2*5 rows for two analyses -testthat::expect_true(nrow(jobs) == 2*5) + # as 3 thread should be 2*5 rows for two analyses + testthat::expect_true(nrow(jobs) == 2 * 5) -# check all target ids are in there -targetIdFromSettings <- do.call( - what = c, - args = lapply(1:nrow(jobs), function(i){ - ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds} + # check all target ids are in there + targetIdFromSettings <- do.call( + what = c, + args = lapply(1:nrow(jobs), function(i) { + ParallelLogger::convertJsonToSettings(jobs$settings[i])$targetCohortDefinitionIds + }) ) -) -testthat::expect_true(sum(targetIds %in% targetIdFromSettings) == - length(targetIds)) - -# check all outcome ids are in there -outcomeIdFromSettings <- do.call( - what =c, - args = lapply(1:nrow(jobs), function(i){ - ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds} + testthat::expect_true(sum(targetIds %in% targetIdFromSettings) == + length(targetIds)) + + # check all outcome ids are in there + outcomeIdFromSettings <- do.call( + what = c, + args = lapply(1:nrow(jobs), function(i) { + ParallelLogger::convertJsonToSettings(jobs$settings[i])$outcomeCohortDefinitionIds + }) ) -) -testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) == - length(outcomeIds)) - + testthat::expect_true(sum(outcomeIds %in% outcomeIdFromSettings) == + length(outcomeIds)) }) - diff --git a/tests/testthat/test-manualData.R b/tests/testthat/test-manualData.R index 772bd3a..a8728d8 100644 --- a/tests/testthat/test-manualData.R +++ b/tests/testthat/test-manualData.R @@ -1,318 +1,318 @@ context("manual data") -manualData <- file.path(tempdir(), 'manual.sqlite') +manualData <- file.path(tempdir(), "manual.sqlite") on.exit(file.remove(manualData), add = TRUE) -manualData2 <- file.path(tempdir(), 'manual2.sqlite') +manualData2 <- file.path(tempdir(), "manual2.sqlite") on.exit(file.remove(manualData2), add = TRUE) test_that("manual data runCharacterizationAnalyses", { - # this test creates made-up OMOP CDM data # and runs runCharacterizationAnalyses on the data # to check whether the results are as expected -connectionDetails <- DatabaseConnector::createConnectionDetails( - dbms = 'sqlite', - server = manualData + connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "sqlite", + server = manualData ) -con <- DatabaseConnector::connect(connectionDetails = connectionDetails) -schema <- 'main' - -# add persons - aggregate covs (age) -persons <- data.frame( - person_id = 1:10, - gender_concept_id = rep(8532, 10), - year_of_birth = rep(2000, 10), - race_concept_id = rep(1, 10), - ethnicity_concept_id = rep(1, 10), - location_id = rep(1,10), - provider_id = rep(1,10), - care_site_id = rep(1,10), - person_source_value = 1:10, - gender_source_value = rep('female', 10), - race_source_value = rep('na', 10), - ethnicity_source_value = rep('na', 10) -) -DatabaseConnector::insertTable( - connection = con, - databaseSchema = schema, - tableName = 'person', - data = persons - ) + con <- DatabaseConnector::connect(connectionDetails = connectionDetails) + schema <- "main" -# observation period -obs_period <- data.frame( - observation_period_id = 1:10, - person_id = 1:10, - observation_period_start_date = rep('2000-12-31', 10), - observation_period_end_date = c('2000-12-31', rep('2020-12-31', 9)), - period_type_concept_id = rep(1,10) -) -obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date) -obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date) -DatabaseConnector::insertTable( - connection = con, - databaseSchema = schema, - tableName = 'observation_period', - data = obs_period -) -# person 1 has 1 day obs -# person 2-6 has no events -# person 7 has diabetes at 10, headache at 12 -# person 8 has diabetes at 13 -# person 9 has headache multiple times -# person 10 has diabetes at 14 -# add conditions - aggregate covs (conditions) - -condition_era <- data.frame( - condition_era_id = 1:7, - person_id = c(7,7,8, 9,9,9,10), - condition_concept_id = c(201820, 378253,201820,378253,378253,378253, 201820), - condition_era_start_date = c('2011-01-01', '2013-04-03', '2016-01-01', - '2006-01-04', '2014-08-02', '2014-08-04', - '2013-01-04'), - condition_era_end_date = c('2011-01-01', '2013-04-03', '2016-01-01', - '2006-01-04', '2014-08-02', '2014-08-04', - '2013-01-04'), - condition_occurrence_count = rep(1, 7) -) -condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date) -condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date) - -DatabaseConnector::insertTable( - connection = con, - databaseSchema = schema, - tableName = 'condition_era', - data = condition_era -) - -# add concept -concept <- data.frame( - concept_id = c(201820,378253), - concept_name = c('diabetes', 'hypertension'), - domain_id = rep(1,2), - vocabulary_id = rep(1,2), - concept_class_id = c('Condition', 'Condition'), - standard_concept = rep('S',2), - concept_code = rep('Snowmed',2) - #,valid_start_date = NULL, - #valid_end_date = NULL, - #invalid_reason = NULL -) -DatabaseConnector::insertTable( - connection = con, - databaseSchema = schema, - tableName = 'concept', - data = concept -) - -# add cohort - tte/dechal/rechal -cohort <- data.frame( - subject_id = c( - 1:10, - 7,8,10, - c(3,6,7,8,10), - c(7) - ), - cohort_definition_id = c( - rep(1,10), - rep(1,3), - rep(2, 5), - 2 - ), - cohort_start_date = c( - rep('2018-01-01', 10), - rep('2018-05-01',3), - '2018-01-13','2018-01-03',rep('2018-01-06',3), - '2018-05-24' - ), - cohort_end_date = c( - rep('2018-02-01', 10), - rep('2018-06-01',3), - '2018-02-02','2018-02-04',rep('2018-02-08',3), - '2018-06-05' + # add persons - aggregate covs (age) + persons <- data.frame( + person_id = 1:10, + gender_concept_id = rep(8532, 10), + year_of_birth = rep(2000, 10), + race_concept_id = rep(1, 10), + ethnicity_concept_id = rep(1, 10), + location_id = rep(1, 10), + provider_id = rep(1, 10), + care_site_id = rep(1, 10), + person_source_value = 1:10, + gender_source_value = rep("female", 10), + race_source_value = rep("na", 10), + ethnicity_source_value = rep("na", 10) ) -) -cohort$cohort_start_date <- as.Date(cohort$cohort_start_date) -cohort$cohort_end_date <- as.Date(cohort$cohort_end_date) -DatabaseConnector::insertTable( - connection = con, - databaseSchema = schema, - tableName = 'cohort', - data = cohort -) - -# create settings and run -characterizationSettings <- Characterization::createCharacterizationSettings( - timeToEventSettings = Characterization::createTimeToEventSettings( - targetIds = 1, - outcomeIds = 2 + DatabaseConnector::insertTable( + connection = con, + databaseSchema = schema, + tableName = "person", + data = persons + ) + + # observation period + obs_period <- data.frame( + observation_period_id = 1:10, + person_id = 1:10, + observation_period_start_date = rep("2000-12-31", 10), + observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)), + period_type_concept_id = rep(1, 10) + ) + obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date) + obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date) + DatabaseConnector::insertTable( + connection = con, + databaseSchema = schema, + tableName = "observation_period", + data = obs_period + ) + # person 1 has 1 day obs + # person 2-6 has no events + # person 7 has diabetes at 10, headache at 12 + # person 8 has diabetes at 13 + # person 9 has headache multiple times + # person 10 has diabetes at 14 + # add conditions - aggregate covs (conditions) + + condition_era <- data.frame( + condition_era_id = 1:7, + person_id = c(7, 7, 8, 9, 9, 9, 10), + condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820), + condition_era_start_date = c( + "2011-01-01", "2013-04-03", "2016-01-01", + "2006-01-04", "2014-08-02", "2014-08-04", + "2013-01-04" ), - dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings( - targetIds = 1, - outcomeIds = 2 + condition_era_end_date = c( + "2011-01-01", "2013-04-03", "2016-01-01", + "2006-01-04", "2014-08-02", "2014-08-04", + "2013-01-04" ), - aggregateCovariateSettings = Characterization::createAggregateCovariateSettings( - targetIds = 1, - outcomeIds = 2, - minPriorObservation = 365, - outcomeWashoutDays = 30, - riskWindowStart = 1, - riskWindowEnd = 90, - covariateSettings = FeatureExtraction::createCovariateSettings( - useDemographicsAge = T, - useDemographicsGender = T, - useConditionEraAnyTimePrior = T - ), - caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T), - casePreTargetDuration = 365*5 - ) -) -Characterization::runCharacterizationAnalyses( - connectionDetails = connectionDetails, - targetDatabaseSchema = schema, - targetTable = 'cohort', - outcomeDatabaseSchema = schema, - outcomeTable = 'cohort', - cdmDatabaseSchema = schema, - characterizationSettings = characterizationSettings, - outputDirectory = file.path(tempdir(), 'result'), - executionPath = file.path(tempdir(), 'execution'), - csvFilePrefix = 'c_', - databaseId = '1', - incremental = T, - threads = 1, - minCharacterizationMean = 0.0001, - minCellCount = NULL, - showSubjectId = T + condition_occurrence_count = rep(1, 7) ) + condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date) + condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date) -# check csv results are as expected - -tte <- read.csv(file.path(tempdir(), 'result','c_time_to_event.csv')) - -# check counts - 1-day subsequent missing? -testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == 'first' & tte$time_scale == 'per 1-day'])) -#subsequent is > 100 days after first drug so not in the 1-day count -testthat::expect_true(0 == sum(tte$num_events[tte$outcome_type == 'subsequent' & tte$time_scale == 'per 1-day'])) -testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == 'first' & tte$time_scale == 'per 30-day'])) -testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == 'subsequent' & tte$time_scale == 'per 30-day'])) -testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == 'first' & tte$time_scale == 'per 365-day'])) -testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == 'subsequent' & tte$time_scale == 'per 365-day'])) - -# check times -testthat::expect_true(sum(c(2,5,12) %in% tte$time_to_event[tte$outcome_type == 'first' & tte$time_scale == 'per 1-day']) == 3) - -# TODO: check in code whether minCellCount < or <= - -dechal <- read.csv(file.path(tempdir(), 'result','c_dechallenge_rechallenge.csv')) -testthat::expect_true(dechal$num_exposure_eras == 13) -testthat::expect_true(dechal$num_persons_exposed == 10) -testthat::expect_true(dechal$num_cases == 6) -testthat::expect_true(dechal$dechallenge_attempt == 5) -testthat::expect_true(dechal$dechallenge_success == 5) -testthat::expect_true(dechal$rechallenge_attempt == 3) - -# one person has a rechal and event stops when second drug exposure stops -testthat::expect_true(dechal$rechallenge_fail == 1) -testthat::expect_true(dechal$rechallenge_success == 2) -testthat::expect_true(dechal$pct_rechallenge_fail == 0.3333333) - -failed <- read.csv(file.path(tempdir(), 'result','c_rechallenge_fail_case_series.csv')) -testthat::expect_true(nrow(failed) == 1) -testthat::expect_true(failed$subject_id == 7) -testthat::expect_true(failed$dechallenge_exposure_end_date_offset == 31) -testthat::expect_true(failed$dechallenge_outcome_start_date_offset == 5) -testthat::expect_true(failed$rechallenge_exposure_start_date_offset == 120) -testthat::expect_true(failed$rechallenge_outcome_start_date_offset == 143) - -# Aggregate covs -#======= -counts <- read.csv(file.path(tempdir(), 'result','c_cohort_counts.csv')) -# when restricted to first exposure 5 people have outcome -testthat::expect_true(counts$row_count[counts$cohort_type == 'Cases'] == 5) -# target is 9 because 1 has insufficient min prior obs -testthat::expect_true(counts$row_count[counts$cohort_type == 'Target' & - counts$target_cohort_id == 1] == 9) -# make sure outcome is there a has count of 5 -testthat::expect_true(counts$row_count[counts$cohort_type == 'Target' & - counts$target_cohort_id == 2] == 5) - -# Tall should not have first restriction -testthat::expect_true(counts$row_count[counts$cohort_type == 'Tall' & - counts$target_cohort_id == 1] == 13) -testthat::expect_true(counts$person_count[counts$cohort_type == 'Tall' & - counts$target_cohort_id == 1] == 10) -# make sure outcome is there a has count of 6 and 5 -testthat::expect_true(counts$row_count[counts$cohort_type == 'Tall' & - counts$target_cohort_id == 2] == 6) -testthat::expect_true(counts$person_count[counts$cohort_type == 'Tall' & - counts$target_cohort_id == 2] == 5) - -covs <- read.csv(file.path(tempdir(), 'result','c_covariates.csv')) - -# checks all females -testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == 'Cases'] == 1) -testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == 'Target' & - covs$target_cohort_id == 1] == 1) -testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == 'Target' & - covs$target_cohort_id == 2] == 1) - -## TODO: check diabetes and hypertensions -#covs$covariate_id -# 201820 7,8 and 10 have in history and all are cases -ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 & - covs$cohort_type == 'Cases' -testthat::expect_true(covs$sum_value[ind] == 3) -testthat::expect_true(covs$average_value[ind] == 3/5) - -ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 & - covs$cohort_type == 'Target' -testthat::expect_true(covs$sum_value[ind] == 3) -testthat::expect_equal(covs$average_value[ind], 3/9, tolerance = 0.01) - -# 378253 7,9 (9 multiple times) but 9 not a case -ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 & - covs$cohort_type == 'Cases' -testthat::expect_true(covs$sum_value[ind] == 1) -testthat::expect_true(covs$average_value[ind] == 1/5) - -ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 & - covs$cohort_type == 'Target' -testthat::expect_true(covs$sum_value[ind] == 2) -testthat::expect_equal(covs$average_value[ind], 2/9, tolerance = 0.01) - - -covs_cont <- read.csv(file.path(tempdir(), 'result','c_covariates_continuous.csv')) - -# checks age in years -testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Cases'] == 18) -testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Cases'] == 5) -testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Target' & - covs_cont$target_cohort_id == 1] == 18) -testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Target' & - covs_cont$target_cohort_id == 1] == 9) -testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Target' & - covs_cont$target_cohort_id == 2] == 18) -testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == 'Target' & - covs_cont$target_cohort_id == 2] == 5) + DatabaseConnector::insertTable( + connection = con, + databaseSchema = schema, + tableName = "condition_era", + data = condition_era + ) + # add concept + concept <- data.frame( + concept_id = c(201820, 378253), + concept_name = c("diabetes", "hypertension"), + domain_id = rep(1, 2), + vocabulary_id = rep(1, 2), + concept_class_id = c("Condition", "Condition"), + standard_concept = rep("S", 2), + concept_code = rep("Snowmed", 2) + # ,valid_start_date = NULL, + # valid_end_date = NULL, + # invalid_reason = NULL + ) + DatabaseConnector::insertTable( + connection = con, + databaseSchema = schema, + tableName = "concept", + data = concept + ) + # add cohort - tte/dechal/rechal + cohort <- data.frame( + subject_id = c( + 1:10, + 7, 8, 10, + c(3, 6, 7, 8, 10), + c(7) + ), + cohort_definition_id = c( + rep(1, 10), + rep(1, 3), + rep(2, 5), + 2 + ), + cohort_start_date = c( + rep("2018-01-01", 10), + rep("2018-05-01", 3), + "2018-01-13", "2018-01-03", rep("2018-01-06", 3), + "2018-05-24" + ), + cohort_end_date = c( + rep("2018-02-01", 10), + rep("2018-06-01", 3), + "2018-02-02", "2018-02-04", rep("2018-02-08", 3), + "2018-06-05" + ) + ) + cohort$cohort_start_date <- as.Date(cohort$cohort_start_date) + cohort$cohort_end_date <- as.Date(cohort$cohort_end_date) + DatabaseConnector::insertTable( + connection = con, + databaseSchema = schema, + tableName = "cohort", + data = cohort + ) + + # create settings and run + characterizationSettings <- Characterization::createCharacterizationSettings( + timeToEventSettings = Characterization::createTimeToEventSettings( + targetIds = 1, + outcomeIds = 2 + ), + dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings( + targetIds = 1, + outcomeIds = 2 + ), + aggregateCovariateSettings = Characterization::createAggregateCovariateSettings( + targetIds = 1, + outcomeIds = 2, + minPriorObservation = 365, + outcomeWashoutDays = 30, + riskWindowStart = 1, + riskWindowEnd = 90, + covariateSettings = FeatureExtraction::createCovariateSettings( + useDemographicsAge = T, + useDemographicsGender = T, + useConditionEraAnyTimePrior = T + ), + caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T), + casePreTargetDuration = 365 * 5 + ) + ) + Characterization::runCharacterizationAnalyses( + connectionDetails = connectionDetails, + targetDatabaseSchema = schema, + targetTable = "cohort", + outcomeDatabaseSchema = schema, + outcomeTable = "cohort", + cdmDatabaseSchema = schema, + characterizationSettings = characterizationSettings, + outputDirectory = file.path(tempdir(), "result"), + executionPath = file.path(tempdir(), "execution"), + csvFilePrefix = "c_", + databaseId = "1", + incremental = T, + threads = 1, + minCharacterizationMean = 0.0001, + minCellCount = NULL, + showSubjectId = T + ) + + # check csv results are as expected + + tte <- read.csv(file.path(tempdir(), "result", "c_time_to_event.csv")) + + # check counts - 1-day subsequent missing? + testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 1-day"])) + # subsequent is > 100 days after first drug so not in the 1-day count + testthat::expect_true(0 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 1-day"])) + testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 30-day"])) + testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 30-day"])) + testthat::expect_true(5 == sum(tte$num_events[tte$outcome_type == "first" & tte$time_scale == "per 365-day"])) + testthat::expect_true(1 == sum(tte$num_events[tte$outcome_type == "subsequent" & tte$time_scale == "per 365-day"])) + + # check times + testthat::expect_true(sum(c(2, 5, 12) %in% tte$time_to_event[tte$outcome_type == "first" & tte$time_scale == "per 1-day"]) == 3) + + # TODO: check in code whether minCellCount < or <= + + dechal <- read.csv(file.path(tempdir(), "result", "c_dechallenge_rechallenge.csv")) + testthat::expect_true(dechal$num_exposure_eras == 13) + testthat::expect_true(dechal$num_persons_exposed == 10) + testthat::expect_true(dechal$num_cases == 6) + testthat::expect_true(dechal$dechallenge_attempt == 5) + testthat::expect_true(dechal$dechallenge_success == 5) + testthat::expect_true(dechal$rechallenge_attempt == 3) + + # one person has a rechal and event stops when second drug exposure stops + testthat::expect_true(dechal$rechallenge_fail == 1) + testthat::expect_true(dechal$rechallenge_success == 2) + testthat::expect_true(dechal$pct_rechallenge_fail == 0.3333333) + + failed <- read.csv(file.path(tempdir(), "result", "c_rechallenge_fail_case_series.csv")) + testthat::expect_true(nrow(failed) == 1) + testthat::expect_true(failed$subject_id == 7) + testthat::expect_true(failed$dechallenge_exposure_end_date_offset == 31) + testthat::expect_true(failed$dechallenge_outcome_start_date_offset == 5) + testthat::expect_true(failed$rechallenge_exposure_start_date_offset == 120) + testthat::expect_true(failed$rechallenge_outcome_start_date_offset == 143) + + # Aggregate covs + # ======= + counts <- read.csv(file.path(tempdir(), "result", "c_cohort_counts.csv")) + # when restricted to first exposure 5 people have outcome + testthat::expect_true(counts$row_count[counts$cohort_type == "Cases"] == 5) + # target is 9 because 1 has insufficient min prior obs + testthat::expect_true(counts$row_count[counts$cohort_type == "Target" & + counts$target_cohort_id == 1] == 9) + # make sure outcome is there a has count of 5 + testthat::expect_true(counts$row_count[counts$cohort_type == "Target" & + counts$target_cohort_id == 2] == 5) + + # Tall should not have first restriction + testthat::expect_true(counts$row_count[counts$cohort_type == "Tall" & + counts$target_cohort_id == 1] == 13) + testthat::expect_true(counts$person_count[counts$cohort_type == "Tall" & + counts$target_cohort_id == 1] == 10) + # make sure outcome is there a has count of 6 and 5 + testthat::expect_true(counts$row_count[counts$cohort_type == "Tall" & + counts$target_cohort_id == 2] == 6) + testthat::expect_true(counts$person_count[counts$cohort_type == "Tall" & + counts$target_cohort_id == 2] == 5) + + covs <- read.csv(file.path(tempdir(), "result", "c_covariates.csv")) + + # checks all females + testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Cases"] == 1) + testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Target" & + covs$target_cohort_id == 1] == 1) + testthat::expect_true(covs$average_value[covs$covariate_id == 8532001 & covs$cohort_type == "Target" & + covs$target_cohort_id == 2] == 1) + + ## TODO: check diabetes and hypertensions + # covs$covariate_id + # 201820 7,8 and 10 have in history and all are cases + ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 & + covs$cohort_type == "Cases" + testthat::expect_true(covs$sum_value[ind] == 3) + testthat::expect_true(covs$average_value[ind] == 3 / 5) + + ind <- covs$covariate_id == 201820201 & covs$target_cohort_id == 1 & + covs$cohort_type == "Target" + testthat::expect_true(covs$sum_value[ind] == 3) + testthat::expect_equal(covs$average_value[ind], 3 / 9, tolerance = 0.01) + + # 378253 7,9 (9 multiple times) but 9 not a case + ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 & + covs$cohort_type == "Cases" + testthat::expect_true(covs$sum_value[ind] == 1) + testthat::expect_true(covs$average_value[ind] == 1 / 5) + + ind <- covs$covariate_id == 378253201 & covs$target_cohort_id == 1 & + covs$cohort_type == "Target" + testthat::expect_true(covs$sum_value[ind] == 2) + testthat::expect_equal(covs$average_value[ind], 2 / 9, tolerance = 0.01) + + + covs_cont <- read.csv(file.path(tempdir(), "result", "c_covariates_continuous.csv")) + + # checks age in years + testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Cases"] == 18) + testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Cases"] == 5) + testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" & + covs_cont$target_cohort_id == 1] == 18) + testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" & + covs_cont$target_cohort_id == 1] == 9) + testthat::expect_true(covs_cont$average_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" & + covs_cont$target_cohort_id == 2] == 18) + testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & covs_cont$cohort_type == "Target" & + covs_cont$target_cohort_id == 2] == 5) }) test_that("manual data checking exclude count works", { - # this test creates made-up OMOP CDM data # and runs runCharacterizationAnalyses on the data # to check whether the results are as expected connectionDetails <- DatabaseConnector::createConnectionDetails( - dbms = 'sqlite', + dbms = "sqlite", server = manualData2 ) con <- DatabaseConnector::connect(connectionDetails = connectionDetails) - schema <- 'main' + schema <- "main" # add persons - aggregate covs (age) persons <- data.frame( @@ -321,18 +321,18 @@ test_that("manual data checking exclude count works", { year_of_birth = rep(2000, 10), race_concept_id = rep(1, 10), ethnicity_concept_id = rep(1, 10), - location_id = rep(1,10), - provider_id = rep(1,10), - care_site_id = rep(1,10), + location_id = rep(1, 10), + provider_id = rep(1, 10), + care_site_id = rep(1, 10), person_source_value = 1:10, - gender_source_value = rep('female', 10), - race_source_value = rep('na', 10), - ethnicity_source_value = rep('na', 10) + gender_source_value = rep("female", 10), + race_source_value = rep("na", 10), + ethnicity_source_value = rep("na", 10) ) DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'person', + tableName = "person", data = persons ) @@ -340,16 +340,16 @@ test_that("manual data checking exclude count works", { obs_period <- data.frame( observation_period_id = 1:10, person_id = 1:10, - observation_period_start_date = rep('2000-12-31', 10), - observation_period_end_date = c('2000-12-31', rep('2020-12-31', 9)), - period_type_concept_id = rep(1,10) + observation_period_start_date = rep("2000-12-31", 10), + observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)), + period_type_concept_id = rep(1, 10) ) obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date) obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date) DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'observation_period', + tableName = "observation_period", data = obs_period ) # person 1 has 1 day obs @@ -362,14 +362,18 @@ test_that("manual data checking exclude count works", { condition_era <- data.frame( condition_era_id = 1:7, - person_id = c(7,7,8, 9,9,9,10), - condition_concept_id = c(201820, 378253,201820,378253,378253,378253, 201820), - condition_era_start_date = c('2011-01-01', '2013-04-03', '2016-01-01', - '2006-01-04', '2014-08-02', '2014-08-04', - '2013-01-04'), - condition_era_end_date = c('2011-01-01', '2013-04-03', '2016-01-01', - '2006-01-04', '2014-08-02', '2014-08-04', - '2013-01-04'), + person_id = c(7, 7, 8, 9, 9, 9, 10), + condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820), + condition_era_start_date = c( + "2011-01-01", "2013-04-03", "2016-01-01", + "2006-01-04", "2014-08-02", "2014-08-04", + "2013-01-04" + ), + condition_era_end_date = c( + "2011-01-01", "2013-04-03", "2016-01-01", + "2006-01-04", "2014-08-02", "2014-08-04", + "2013-01-04" + ), condition_occurrence_count = rep(1, 7) ) condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date) @@ -378,27 +382,27 @@ test_that("manual data checking exclude count works", { DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'condition_era', + tableName = "condition_era", data = condition_era ) # add concept concept <- data.frame( - concept_id = c(201820,378253), - concept_name = c('diabetes', 'hypertension'), - domain_id = rep(1,2), - vocabulary_id = rep(1,2), - concept_class_id = c('Condition', 'Condition'), - standard_concept = rep('S',2), - concept_code = rep('Snowmed',2) - #,valid_start_date = NULL, - #valid_end_date = NULL, - #invalid_reason = NULL + concept_id = c(201820, 378253), + concept_name = c("diabetes", "hypertension"), + domain_id = rep(1, 2), + vocabulary_id = rep(1, 2), + concept_class_id = c("Condition", "Condition"), + standard_concept = rep("S", 2), + concept_code = rep("Snowmed", 2) + # ,valid_start_date = NULL, + # valid_end_date = NULL, + # invalid_reason = NULL ) DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'concept', + tableName = "concept", data = concept ) @@ -407,31 +411,31 @@ test_that("manual data checking exclude count works", { cohort <- data.frame( subject_id = c( 1:10, - 7,8,10, - c(3,6,7,8,10), + 7, 8, 10, + c(3, 6, 7, 8, 10), c(7), 6 ), cohort_definition_id = c( - rep(1,10), - rep(1,3), + rep(1, 10), + rep(1, 3), rep(2, 5), 2, 2 ), cohort_start_date = c( - rep('2018-01-01', 10), - rep('2018-05-01',3), - '2018-01-13','2018-01-03',rep('2018-01-06',3), - '2018-05-24', - '2017-12-29' + rep("2018-01-01", 10), + rep("2018-05-01", 3), + "2018-01-13", "2018-01-03", rep("2018-01-06", 3), + "2018-05-24", + "2017-12-29" ), cohort_end_date = c( - rep('2018-02-01', 10), - rep('2018-06-01',3), - '2018-02-02','2018-02-04',rep('2018-02-08',3), - '2018-06-05', - '2017-12-29' + rep("2018-02-01", 10), + rep("2018-06-01", 3), + "2018-02-02", "2018-02-04", rep("2018-02-08", 3), + "2018-06-05", + "2017-12-29" ) ) cohort$cohort_start_date <- as.Date(cohort$cohort_start_date) @@ -439,7 +443,7 @@ test_that("manual data checking exclude count works", { DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'cohort', + tableName = "cohort", data = cohort ) @@ -466,21 +470,21 @@ test_that("manual data checking exclude count works", { useConditionEraAnyTimePrior = T ), caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T), - casePreTargetDuration = 365*5 + casePreTargetDuration = 365 * 5 ) ) Characterization::runCharacterizationAnalyses( connectionDetails = connectionDetails, targetDatabaseSchema = schema, - targetTable = 'cohort', + targetTable = "cohort", outcomeDatabaseSchema = schema, - outcomeTable = 'cohort', + outcomeTable = "cohort", cdmDatabaseSchema = schema, characterizationSettings = characterizationSettings, - outputDirectory = file.path(tempdir(), 'result2'), - executionPath = file.path(tempdir(), 'execution2'), - csvFilePrefix = 'c_', - databaseId = '1', + outputDirectory = file.path(tempdir(), "result2"), + executionPath = file.path(tempdir(), "execution2"), + csvFilePrefix = "c_", + databaseId = "1", incremental = T, threads = 1, minCharacterizationMean = 0.0001, @@ -489,9 +493,8 @@ test_that("manual data checking exclude count works", { ) # load the cohort counts to make sure the exclude is there - counts <- read.csv(file.path(tempdir(), 'result2','c_cohort_counts.csv')) + counts <- read.csv(file.path(tempdir(), "result2", "c_cohort_counts.csv")) # when restricted to first exposure 5 people have outcome - testthat::expect_true(counts$row_count[counts$cohort_type == 'Cases'] == 4) - testthat::expect_true(counts$row_count[counts$cohort_type == 'Exclude'] == 1) - + testthat::expect_true(counts$row_count[counts$cohort_type == "Cases"] == 4) + testthat::expect_true(counts$row_count[counts$cohort_type == "Exclude"] == 1) }) diff --git a/tests/testthat/test-runCharacterization.R b/tests/testthat/test-runCharacterization.R index 27b88da..3e45925 100644 --- a/tests/testthat/test-runCharacterization.R +++ b/tests/testthat/test-runCharacterization.R @@ -119,8 +119,8 @@ test_that("runCharacterizationAnalyses", { outcomeDatabaseSchema = "main", outcomeTable = "cohort", characterizationSettings = characterizationSettings, - outputDirectory = file.path(tempFolder,'result'), - executionPath = file.path(tempFolder,'execution'), + outputDirectory = file.path(tempFolder, "result"), + executionPath = file.path(tempFolder, "execution"), csvFilePrefix = "c_", databaseId = "1", incremental = T, @@ -161,43 +161,40 @@ test_that("runCharacterizationAnalyses", { testthat::expect_false( file.exists(file.path(tempFolder, "result", "c_dechallenge_rechallenge.csv")) ) - #testthat::expect_true( + # testthat::expect_true( # file.exists(file.path(tempFolder, "result", "rechallenge_fail_case_series.csv")) - #) + # ) testthat::expect_true( file.exists(file.path(tempFolder, "result", "c_time_to_event.csv")) ) # make sure both tte runs are in the csv tte <- readr::read_csv( - file = file.path(tempFolder,'result' ,"c_time_to_event.csv"), - show_col_types = FALSE - ) + file = file.path(tempFolder, "result", "c_time_to_event.csv"), + show_col_types = FALSE + ) testthat::expect_equivalent( unique(tte$target_cohort_definition_id), - c(1,2) + c(1, 2) ) - - }) -manualDataMin <- file.path(tempdir(), 'manual_min.sqlite') +manualDataMin <- file.path(tempdir(), "manual_min.sqlite") on.exit(file.remove(manualDataMin), add = TRUE) test_that("min cell count works", { - tempFolder <- tempfile("CharacterizationMin") on.exit(unlink(tempFolder, recursive = TRUE), add = TRUE) connectionDetails <- DatabaseConnector::createConnectionDetails( - dbms = 'sqlite', + dbms = "sqlite", server = manualDataMin ) con <- DatabaseConnector::connect(connectionDetails = connectionDetails) on.exit(DatabaseConnector::disconnect(con)) - schema <- 'main' + schema <- "main" # add persons - aggregate covs (age) persons <- data.frame( @@ -206,18 +203,18 @@ test_that("min cell count works", { year_of_birth = rep(2000, 10), race_concept_id = rep(1, 10), ethnicity_concept_id = rep(1, 10), - location_id = rep(1,10), - provider_id = rep(1,10), - care_site_id = rep(1,10), + location_id = rep(1, 10), + provider_id = rep(1, 10), + care_site_id = rep(1, 10), person_source_value = 1:10, - gender_source_value = rep('female', 10), - race_source_value = rep('na', 10), - ethnicity_source_value = rep('na', 10) + gender_source_value = rep("female", 10), + race_source_value = rep("na", 10), + ethnicity_source_value = rep("na", 10) ) DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'person', + tableName = "person", data = persons ) @@ -225,16 +222,16 @@ test_that("min cell count works", { obs_period <- data.frame( observation_period_id = 1:10, person_id = 1:10, - observation_period_start_date = rep('2000-12-31', 10), - observation_period_end_date = c('2000-12-31', rep('2020-12-31', 9)), - period_type_concept_id = rep(1,10) + observation_period_start_date = rep("2000-12-31", 10), + observation_period_end_date = c("2000-12-31", rep("2020-12-31", 9)), + period_type_concept_id = rep(1, 10) ) obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date) obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date) DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'observation_period', + tableName = "observation_period", data = obs_period ) # person 1 has 1 day obs @@ -247,14 +244,18 @@ test_that("min cell count works", { condition_era <- data.frame( condition_era_id = 1:7, - person_id = c(7,7,8, 9,9,9,10), - condition_concept_id = c(201820, 378253,201820,378253,378253,378253, 201820), - condition_era_start_date = c('2011-01-01', '2013-04-03', '2016-01-01', - '2006-01-04', '2014-08-02', '2014-08-04', - '2013-01-04'), - condition_era_end_date = c('2011-01-01', '2013-04-03', '2016-01-01', - '2006-01-04', '2014-08-02', '2014-08-04', - '2013-01-04'), + person_id = c(7, 7, 8, 9, 9, 9, 10), + condition_concept_id = c(201820, 378253, 201820, 378253, 378253, 378253, 201820), + condition_era_start_date = c( + "2011-01-01", "2013-04-03", "2016-01-01", + "2006-01-04", "2014-08-02", "2014-08-04", + "2013-01-04" + ), + condition_era_end_date = c( + "2011-01-01", "2013-04-03", "2016-01-01", + "2006-01-04", "2014-08-02", "2014-08-04", + "2013-01-04" + ), condition_occurrence_count = rep(1, 7) ) condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date) @@ -263,27 +264,27 @@ test_that("min cell count works", { DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'condition_era', + tableName = "condition_era", data = condition_era ) # add concept concept <- data.frame( - concept_id = c(201820,378253), - concept_name = c('diabetes', 'hypertension'), - domain_id = rep(1,2), - vocabulary_id = rep(1,2), - concept_class_id = c('Condition', 'Condition'), - standard_concept = rep('S',2), - concept_code = rep('Snowmed',2) - #,valid_start_date = NULL, - #valid_end_date = NULL, - #invalid_reason = NULL + concept_id = c(201820, 378253), + concept_name = c("diabetes", "hypertension"), + domain_id = rep(1, 2), + vocabulary_id = rep(1, 2), + concept_class_id = c("Condition", "Condition"), + standard_concept = rep("S", 2), + concept_code = rep("Snowmed", 2) + # ,valid_start_date = NULL, + # valid_end_date = NULL, + # invalid_reason = NULL ) DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'concept', + tableName = "concept", data = concept ) @@ -291,27 +292,27 @@ test_that("min cell count works", { cohort <- data.frame( subject_id = c( 1:10, - 7,8,10, - c(3,6,7,8,10), + 7, 8, 10, + c(3, 6, 7, 8, 10), c(7) ), cohort_definition_id = c( - rep(1,10), - rep(1,3), + rep(1, 10), + rep(1, 3), rep(2, 5), 2 ), cohort_start_date = c( - rep('2018-01-01', 10), - rep('2018-05-01',3), - '2018-01-13','2018-01-03',rep('2018-01-06',3), - '2018-05-24' + rep("2018-01-01", 10), + rep("2018-05-01", 3), + "2018-01-13", "2018-01-03", rep("2018-01-06", 3), + "2018-05-24" ), cohort_end_date = c( - rep('2018-02-01', 10), - rep('2018-06-01',3), - '2018-02-02','2018-02-04',rep('2018-02-08',3), - '2018-06-05' + rep("2018-02-01", 10), + rep("2018-06-01", 3), + "2018-02-02", "2018-02-04", rep("2018-02-08", 3), + "2018-06-05" ) ) cohort$cohort_start_date <- as.Date(cohort$cohort_start_date) @@ -319,7 +320,7 @@ test_that("min cell count works", { DatabaseConnector::insertTable( connection = con, databaseSchema = schema, - tableName = 'cohort', + tableName = "cohort", data = cohort ) @@ -346,7 +347,7 @@ test_that("min cell count works", { useConditionEraAnyTimePrior = T ), caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T), - casePreTargetDuration = 365*5 + casePreTargetDuration = 365 * 5 ) ) @@ -358,8 +359,8 @@ test_that("min cell count works", { outcomeDatabaseSchema = "main", outcomeTable = "cohort", characterizationSettings = characterizationSettings, - outputDirectory = file.path(tempFolder,'result_mincell'), - executionPath = file.path(tempFolder,'execution_mincell'), + outputDirectory = file.path(tempFolder, "result_mincell"), + executionPath = file.path(tempFolder, "execution_mincell"), csvFilePrefix = "c_", databaseId = "1", incremental = F, @@ -426,5 +427,4 @@ test_that("min cell count works", { testthat::expect_true(sum(is.na(res$p_75_value)) == length(res$p_75_value)) testthat::expect_true(sum(is.na(res$min_value)) == length(res$min_value)) testthat::expect_true(sum(is.na(res$max_value)) == length(res$max_value)) - }) diff --git a/tests/testthat/test-timeToEvent.R b/tests/testthat/test-timeToEvent.R index 4a82c5b..f0d7e3e 100644 --- a/tests/testthat/test-timeToEvent.R +++ b/tests/testthat/test-timeToEvent.R @@ -39,19 +39,19 @@ test_that("computeTimeToEventSettings", { targetTable = "cohort", settings = res, outputFolder = tteFolder, - databaseId = 'tte_test' + databaseId = "tte_test" ) testthat::expect_true(file.exists(file.path(tteFolder, "time_to_event.csv"))) tte <- readr::read_csv( - file = file.path(tteFolder,'time_to_event.csv'), + file = file.path(tteFolder, "time_to_event.csv"), show_col_types = F - ) + ) testthat::expect_true(nrow(tte) == 160) testthat::expect_true("database_id" %in% colnames(tte)) - testthat::expect_true(tte$database_id[1] == 'tte_test') + testthat::expect_true(tte$database_id[1] == "tte_test") testthat::expect_true( length( @@ -79,7 +79,7 @@ test_that("computeTimeToEventSettings", { sum( unique(tte$outcome_cohort_definition_id) %in% outcomeIds - ) == + ) == length(unique(tte$outcome_cohort_definition_id)) ) @@ -93,15 +93,14 @@ test_that("computeTimeToEventSettings", { targetTable = "cohort", settings = res, outputFolder = tteFolder, - databaseId = 'tte_test', + databaseId = "tte_test", minCellCount = 9999 ) tte <- readr::read_csv( - file = file.path(tteFolder,'time_to_event.csv'), + file = file.path(tteFolder, "time_to_event.csv"), show_col_types = F ) testthat::expect_true(max(tte$num_events) == -9999) - }) diff --git a/tests/testthat/test-viewShiny.R b/tests/testthat/test-viewShiny.R index 5ab1e23..10fba8e 100644 --- a/tests/testthat/test-viewShiny.R +++ b/tests/testthat/test-viewShiny.R @@ -1,7 +1,7 @@ context("ViewShiny") # create a folder with results for the shiny app -resultLocation <- file.path(tempdir(),paste0('d_', paste0(sample(100,3), collapse = '_'), sep = ''), "shinyResults") +resultLocation <- file.path(tempdir(), paste0("d_", paste0(sample(100, 3), collapse = "_"), sep = ""), "shinyResults") if (!dir.exists(resultLocation)) { dir.create(resultLocation, recursive = T) } @@ -85,8 +85,8 @@ test_that("prepareCharacterizationShiny works", { outcomeDatabaseSchema = "main", outcomeTable = "cohort", characterizationSettings = characterizationSettings, - outputDirectory = file.path(resultLocation, 'result'), - executionPath = file.path(resultLocation, 'execution'), + outputDirectory = file.path(resultLocation, "result"), + executionPath = file.path(resultLocation, "execution"), csvFilePrefix = "c_", databaseId = "1", threads = 1, @@ -96,7 +96,7 @@ test_that("prepareCharacterizationShiny works", { ) settings <- Characterization:::prepareCharacterizationShiny( - resultFolder = file.path(resultLocation,'result'), + resultFolder = file.path(resultLocation, "result"), cohortDefinitionSet = NULL, sqliteLocation = file.path(resultLocation, "sqliteCharacterization", "sqlite.sqlite") ) @@ -128,7 +128,7 @@ test_that("prepareCharacterizationShiny works", { test_that("shiny app works", { settings <- prepareCharacterizationShiny( - resultFolder = file.path(resultLocation,'result'), + resultFolder = file.path(resultLocation, "result"), cohortDefinitionSet = NULL, sqliteLocation = file.path(resultLocation, "sqliteCharacterization", "sqlite.sqlite") ) diff --git a/vignettes/UsingPackage.Rmd b/vignettes/UsingPackage.Rmd index f76d56f..809f6bc 100644 --- a/vignettes/UsingPackage.Rmd +++ b/vignettes/UsingPackage.Rmd @@ -54,7 +54,7 @@ Eunomia::createCohorts(connectionDetails = connectionDetails) We also need to have the Characterization package installed and loaded ```{r tidy=TRUE,eval=FALSE} remotes::install_github("ohdsi/FeatureExtraction") -remotes::install_github("ohdsi/Characterization", ref = 'new_approach') +remotes::install_github("ohdsi/Characterization", ref = "new_approach") ``` ```{r tidy=TRUE,eval=TRUE} @@ -109,12 +109,12 @@ exampleAggregateCovariateSettings <- createAggregateCovariateSettings( targetIds = exampleTargetIds, outcomeIds = exampleOutcomeIds, riskWindowStart = 1, startAnchor = "cohort start", - riskWindowEnd = 365, endAnchor = "cohort start", - outcomeWashoutDays = 9999, + riskWindowEnd = 365, endAnchor = "cohort start", + outcomeWashoutDays = 9999, minPriorObservation = 365, - covariateSettings = exampleCovariateSettings, - caseCovariateSettings = caseCovariateSettings, - casePreTargetDuration = 90, + covariateSettings = exampleCovariateSettings, + caseCovariateSettings = caseCovariateSettings, + casePreTargetDuration = 90, casePostOutcomeDuration = 90 ) ``` @@ -130,12 +130,12 @@ runCharacterizationAnalyses( outcomeDatabaseSchema = "main", outcomeTable = "cohort", characterizationSettings = createCharacterizationSettings( - aggregateCovariateSettings = exampleAggregateCovariateSettings - ), + aggregateCovariateSettings = exampleAggregateCovariateSettings + ), databaseId = "Eunomia", - runId = 1, - minCharacterizationMean = 0.01, - outputDirectory = file.path(getwd(), 'example_char', 'results'), executionPath = file.path(getwd(), 'example_char', 'execution'), + runId = 1, + minCharacterizationMean = 0.01, + outputDirectory = file.path(getwd(), "example_char", "results"), executionPath = file.path(getwd(), "example_char", "execution"), minCellCount = 10, incremental = F, threads = 1 @@ -179,8 +179,8 @@ dc <- computeDechallengeRechallengeAnalyses( targetDatabaseSchema = "main", targetTable = "cohort", settings = exampleDechallengeRechallengeSettings, - databaseId = "Eunomia", - outcomeTable = file.path(getwd(), 'example_char', 'results'), + databaseId = "Eunomia", + outcomeTable = file.path(getwd(), "example_char", "results"), minCellCount = 5 ) ``` @@ -196,7 +196,7 @@ failed <- computeRechallengeFailCaseSeriesAnalyses( outcomeDatabaseSchema = "main", outcomeTable = "cohort", databaseId = "Eunomia", - outcomeTable = file.path(getwd(), 'example_char', 'results'), + outcomeTable = file.path(getwd(), "example_char", "results"), minCellCount = 5 ) ``` @@ -225,7 +225,7 @@ tte <- computeTimeToEventAnalyses( targetTable = "cohort", settings = exampleTimeToEventSettings, databaseId = "Eunomia", - outcomeTable = file.path(getwd(), 'example_char', 'results'), + outcomeTable = file.path(getwd(), "example_char", "results"), minCellCount = 5 ) ``` @@ -264,10 +264,10 @@ runCharacterizationAnalyses( outcomeDatabaseSchema = "main", outcomeTable = "cohort", characterizationSettings = characterizationSettings, - outputDirectory = file.path(tempdir(), "example", 'results'), + outputDirectory = file.path(tempdir(), "example", "results"), executionPath = file.path(tempdir(), "example", "execution"), - csvFilePrefix = "c_", - databaseId = "1", + csvFilePrefix = "c_", + databaseId = "1", incremental = F, minCharacterizationMean = 0.01, minCellCount = 5 @@ -278,7 +278,7 @@ This will create csv files with the results in the saveDirectory. You can run t ```{r eval=FALSE} viewCharacterization( - resultFolder = file.path(tempdir(), "example", "results"), + resultFolder = file.path(tempdir(), "example", "results"), cohortDefinitionSet = NULL - ) +) ```