diff --git a/DESCRIPTION b/DESCRIPTION index ebd6ab0..e12ffac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: MethodEvaluation Type: Package Title: Package for Evaluation of Estimation Methods Version: 2.3.0 -Date: 2032-03-31 +Date: 2023-04-13 Authors@R: c( person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut", "cre")) ) @@ -21,7 +21,7 @@ URL: https://github.com/OHDSI/MethodEvaluation BugReports: https://github.com/OHDSI/MethodEvaluation/issues Depends: R (>= 3.5.0), - DatabaseConnector (>= 4.0.2), + DatabaseConnector (>= 6.0.0), FeatureExtraction (>= 3.0.0), Cyclops (>= 3.0.0) Imports: diff --git a/NEWS.md b/NEWS.md index a70e2b6..8ddeeab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,13 +7,7 @@ Changes: 2. Adding `packageCustomBenchmarkResults()` to support custom methods benchmarks. - -MethodEvaluation 2.2.1 -====================== - -Changes: - -1. Fixing seeds and setting `resetCoefficients = TRUE` to ensure reproducibility of positive control synthesis. +3. Fixing seeds and setting `resetCoefficients = TRUE` to ensure reproducibility of positive control synthesis. MethodEvaluation 2.2.0 diff --git a/R/CreateReferenceSetCohorts.R b/R/CreateReferenceSetCohorts.R index 3bee9bf..1e178dc 100644 --- a/R/CreateReferenceSetCohorts.R +++ b/R/CreateReferenceSetCohorts.R @@ -1,13 +1,13 @@ # Copyright 2023 Observational Health Data Sciences and Informatics # # This file is part of MethodEvaluation -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,9 +20,9 @@ #' This function will create the outcomes of interest and nesting cohorts referenced in the various #' reference sets. The outcomes of interest are derives using information like diagnoses, procedures, #' and drug prescriptions. The outcomes are stored in a table on the database server. -#' +#' #' For the 'ohdsiMethodsBenchmark' reference set, the exposures are taken from the drug_era table, and -#' are therefore not generated as separate cohorts, and an exposure cohort table therefore needn't be supplied. +#' are therefore not generated as separate cohorts, and an exposure cohort table therefore needn't be supplied. #' For the 'ohdsiDevelopment' reference set, exposure cohorts will be generated. #' #' @param connectionDetails An R object of type \code{ConnectionDetails} created using the @@ -35,24 +35,24 @@ #' @param cdmDatabaseSchema A database schema containing health care data in the OMOP Commond #' Data Model. Note that for SQL Server, botth the database and schema #' should be specified, e.g. 'cdm_schema.dbo'. -#' @param exposureDatabaseSchema The name of the database schema where the exposure cohorts will be +#' @param exposureDatabaseSchema The name of the database schema where the exposure cohorts will be #' created. Only needed if \code{referenceSet = 'ohdsiDevelopment'}. Note -#' that for SQL Server, both the database and schema should be specified, +#' that for SQL Server, both the database and schema should be specified, #' e.g. 'cdm_schema.dbo'. -#' @param exposureTable The name of the table that will be created to store the exposure +#' @param exposureTable The name of the table that will be created to store the exposure #' cohorts. Only needed if \code{referenceSet = 'ohdsiDevelopment'}. #' @param outcomeDatabaseSchema The database schema where the target outcome table is located. Note #' that for SQL Server, both the database and schema should be #' specified, e.g. 'cdm_schema.dbo' #' @param outcomeTable The name of the table where the outcomes will be stored. #' @param nestingDatabaseSchema (For the OHDSI Methods Benchmark and OHDSI Development Set only) The -#' database schema where the nesting outcome table is located. Note that +#' database schema where the nesting outcome table is located. Note that #' for SQL Server, both the database and schema should be specified, e.g. #' 'cdm_schema.dbo'. -#' @param nestingTable (For the OHDSI Methods Benchmark and OHDSI Development Set only) The +#' @param nestingTable (For the OHDSI Methods Benchmark and OHDSI Development Set only) The #' name of the table where the nesting cohorts will be stored. #' @param referenceSet The name of the reference set for which outcomes need to be created. -#' Currently supported are "omopReferenceSet", "euadrReferenceSet", +#' Currently supported are "omopReferenceSet", "euadrReferenceSet", #' "ohdsiMethodsBenchmark", and "ohdsiDevelopment". #' @param workFolder Name of local folder to place intermediary results; make sure to use #' forward slashes (/). Do not use a folder on a network drive since @@ -76,14 +76,29 @@ createReferenceSetCohorts <- function(connectionDetails, warning("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.") tempEmulationSchema <- oracleTempSchema } + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages) + checkmate::assertCharacter(tempEmulationSchema, len = 1, null.ok = TRUE, add = errorMessages) + checkmate::assertCharacter(cdmDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(exposureDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(exposureTable, len = 1, add = errorMessages) + checkmate::assertCharacter(outcomeDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(outcomeTable, len = 1, add = errorMessages) + checkmate::assertCharacter(nestingDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(nestingTable, len = 1, add = errorMessages) + checkmate::assertChoice(referenceSet, c("omopReferenceSet", "euadrReferenceSet", "ohdsiMethodsBenchmark", "ohdsiDevelopment"), add = errorMessages) + checkmate::assertCharacter(workFolder, len = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + if (referenceSet == "omopReferenceSet") { ParallelLogger::logInfo("Generating HOIs for the OMOP reference set") renderedSql <- SqlRender::loadRenderTranslateSql("CreateOmopHois.sql", - packageName = "MethodEvaluation", - dbms = connectionDetails$dbms, - cdm_database_schema = cdmDatabaseSchema, - outcome_database_schema = outcomeDatabaseSchema, - outcome_table = outcomeTable) + packageName = "MethodEvaluation", + dbms = connectionDetails$dbms, + cdm_database_schema = cdmDatabaseSchema, + outcome_database_schema = outcomeDatabaseSchema, + outcome_table = outcomeTable + ) conn <- DatabaseConnector::connect(connectionDetails) DatabaseConnector::executeSql(conn, renderedSql) ParallelLogger::logInfo("Done") @@ -93,28 +108,33 @@ createReferenceSetCohorts <- function(connectionDetails, # TODO: add code for creating the EU-ADR HOIs } else if (referenceSet == "ohdsiMethodsBenchmark") { ParallelLogger::logInfo("Generating HOIs and nesting cohorts for the OHDSI Methods Benchmark") - if (outcomeDatabaseSchema == nestingDatabaseSchema && outcomeTable == nestingTable) + if (outcomeDatabaseSchema == nestingDatabaseSchema && outcomeTable == nestingTable) { stop("Outcome and nesting cohorts cannot be created in the same table") - createOhdsiNegativeControlCohorts(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - outcomeDatabaseSchema = outcomeDatabaseSchema, - outcomeTable = outcomeTable, - nestingDatabaseSchema = nestingDatabaseSchema, - nestingTable = nestingTable, - tempEmulationSchema = tempEmulationSchema, - workFolder = workFolder) + } + createOhdsiNegativeControlCohorts( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + outcomeDatabaseSchema = outcomeDatabaseSchema, + outcomeTable = outcomeTable, + nestingDatabaseSchema = nestingDatabaseSchema, + nestingTable = nestingTable, + tempEmulationSchema = tempEmulationSchema, + workFolder = workFolder + ) } else if (referenceSet == "ohdsiDevelopment") { ParallelLogger::logInfo("Generating HOIs and nesting cohorts for the OHDSI Development set") - createOhdsiDevelopmentNegativeControlCohorts(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - exposureDatabaseSchema = exposureDatabaseSchema, - exposureTable = exposureTable, - outcomeDatabaseSchema = outcomeDatabaseSchema, - outcomeTable = outcomeTable, - nestingDatabaseSchema = nestingDatabaseSchema, - nestingTable = nestingTable, - tempEmulationSchema = tempEmulationSchema, - workFolder = workFolder) + createOhdsiDevelopmentNegativeControlCohorts( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + exposureDatabaseSchema = exposureDatabaseSchema, + exposureTable = exposureTable, + outcomeDatabaseSchema = outcomeDatabaseSchema, + outcomeTable = outcomeTable, + nestingDatabaseSchema = nestingDatabaseSchema, + nestingTable = nestingTable, + tempEmulationSchema = tempEmulationSchema, + workFolder = workFolder + ) } else { stop(paste("Unknow reference set:", referenceSet)) } @@ -134,104 +154,129 @@ createOhdsiDevelopmentNegativeControlCohorts <- function(connectionDetails, dir.create(workFolder, recursive = TRUE) } ohdsiDevelopmentNegativeControls <- readRDS(system.file("ohdsiDevelopmentNegativeControls.rds", - package = "MethodEvaluation")) - + package = "MethodEvaluation" + )) + connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) - - sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql", - packageName = "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cohort_database_schema = outcomeDatabaseSchema, - cohort_table = outcomeTable) + + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "CreateCohortTable.sql", + packageName = "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cohort_database_schema = outcomeDatabaseSchema, + cohort_table = outcomeTable + ) DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) - + if (outcomeDatabaseSchema != nestingDatabaseSchema | outcomeTable != nestingTable) { - sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql", - packageName = "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cohort_database_schema = nestingDatabaseSchema, - cohort_table = nestingTable) + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "CreateCohortTable.sql", + packageName = "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cohort_database_schema = nestingDatabaseSchema, + cohort_table = nestingTable + ) DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) } if (outcomeDatabaseSchema != exposureDatabaseSchema | outcomeTable != exposureTable) { - sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql", - packageName = "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cohort_database_schema = exposureDatabaseSchema, - cohort_table = exposureTable) + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "CreateCohortTable.sql", + packageName = "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cohort_database_schema = exposureDatabaseSchema, + cohort_table = exposureTable + ) DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) } - exposureCohorts <- data.frame(cohortName = c("ace_inhibitors", "thiazides_diuretics"), - cohortId = c(1, 2)) + exposureCohorts <- data.frame( + cohortName = c("ace_inhibitors", "thiazides_diuretics"), + cohortId = c(1, 2) + ) for (i in 1:nrow(exposureCohorts)) { ParallelLogger::logInfo(paste("Creating exposure cohort:", exposureCohorts$cohortName[i])) - sql <- SqlRender::loadRenderTranslateSql(sqlFilename = paste0(exposureCohorts$cohortName[i], ".sql"), - packageName = "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - vocabulary_database_schema = cdmDatabaseSchema, - target_database_schema = exposureDatabaseSchema, - target_cohort_table = exposureTable, - target_cohort_id = exposureCohorts$cohortId[i]) + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = paste0(exposureCohorts$cohortName[i], ".sql"), + packageName = "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + vocabulary_database_schema = cdmDatabaseSchema, + target_database_schema = exposureDatabaseSchema, + target_cohort_table = exposureTable, + target_cohort_id = exposureCohorts$cohortId[i] + ) DatabaseConnector::executeSql(connection, sql) } ParallelLogger::logInfo("Creating negative control outcomes") outcomeCohorts <- ohdsiDevelopmentNegativeControls %>% distinct(cohortId = .data$outcomeId, cohortName = .data$outcomeName) sql <- SqlRender::loadRenderTranslateSql("NegativeControls.sql", - "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - target_database_schema = outcomeDatabaseSchema, - target_cohort_table = outcomeTable, - outcome_ids = outcomeCohorts$cohortId) + "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + target_database_schema = outcomeDatabaseSchema, + target_cohort_table = outcomeTable, + outcome_ids = outcomeCohorts$cohortId + ) DatabaseConnector::executeSql(connection, sql) - + ParallelLogger::logInfo("Creating nesting cohorts") nestingCohorts <- ohdsiDevelopmentNegativeControls %>% distinct(cohortId = .data$nestingId, cohortName = .data$nestingName) sql <- SqlRender::loadRenderTranslateSql("NestingCohorts.sql", - "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - target_database_schema = nestingDatabaseSchema, - target_cohort_table = nestingTable, - nesting_ids = nestingCohorts$cohortId) + "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + target_database_schema = nestingDatabaseSchema, + target_cohort_table = nestingTable, + nesting_ids = nestingCohorts$cohortId + ) DatabaseConnector::executeSql(connection, sql) - + ParallelLogger::logInfo("Counting cohorts") - exposureCohortCounts <- countCohorts(connection = connection, - cohortDatabaseSchema = exposureDatabaseSchema, - cohortTable = exposureTable, - cohortIds = exposureCohorts$cohortId) %>% + exposureCohortCounts <- countCohorts( + connection = connection, + cohortDatabaseSchema = exposureDatabaseSchema, + cohortTable = exposureTable, + cohortIds = exposureCohorts$cohortId + ) %>% right_join(exposureCohorts, by = "cohortId") %>% mutate(type = "Exposure") - outcomeCohortCounts <- countCohorts(connection = connection, - cohortDatabaseSchema = outcomeDatabaseSchema, - cohortTable = outcomeTable, - cohortIds = outcomeCohorts$cohortId) %>% + outcomeCohortCounts <- countCohorts( + connection = connection, + cohortDatabaseSchema = outcomeDatabaseSchema, + cohortTable = outcomeTable, + cohortIds = outcomeCohorts$cohortId + ) %>% right_join(outcomeCohorts, by = "cohortId") %>% mutate(type = "Outcome") - nestingCohortCounts <- countCohorts(connection = connection, - cohortDatabaseSchema = nestingDatabaseSchema, - cohortTable = nestingTable, - cohortIds = nestingCohorts$cohortId) %>% + nestingCohortCounts <- countCohorts( + connection = connection, + cohortDatabaseSchema = nestingDatabaseSchema, + cohortTable = nestingTable, + cohortIds = nestingCohorts$cohortId + ) %>% right_join(nestingCohorts, by = "cohortId") %>% mutate(type = "Nesting") cohortCounts <- bind_rows(exposureCohortCounts, outcomeCohortCounts, nestingCohortCounts) %>% - mutate(cohortEntries = case_when(is.na(.data$cohortEntries) ~ as.integer(0), - TRUE ~ as.integer(.data$cohortEntries)), - cohortSubjects = case_when(is.na(.data$cohortSubjects) ~ as.integer(0), - TRUE ~ as.integer(.data$cohortSubjects))) - readr::write_csv(cohortCounts, file.path(workFolder, "cohortCounts.csv")) - ParallelLogger::logInfo("Cohort counts written to ", file.path(workFolder, "cohortCounts.csv")) + mutate( + cohortEntries = case_when( + is.na(.data$cohortEntries) ~ as.integer(0), + TRUE ~ as.integer(.data$cohortEntries) + ), + cohortSubjects = case_when( + is.na(.data$cohortSubjects) ~ as.integer(0), + TRUE ~ as.integer(.data$cohortSubjects) + ) + ) + readr::write_csv(cohortCounts, file.path(workFolder, "cohortCounts.csv")) + ParallelLogger::logInfo("Cohort counts written to ", file.path(workFolder, "cohortCounts.csv")) } createOhdsiNegativeControlCohorts <- function(connectionDetails, @@ -243,40 +288,49 @@ createOhdsiNegativeControlCohorts <- function(connectionDetails, tempEmulationSchema, workFolder) { ohdsiNegativeControls <- readRDS(system.file("ohdsiNegativeControls.rds", - package = "MethodEvaluation")) - + package = "MethodEvaluation" + )) + connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) - - sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql", - packageName = "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cohort_database_schema = outcomeDatabaseSchema, - cohort_table = outcomeTable) + + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "CreateCohortTable.sql", + packageName = "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cohort_database_schema = outcomeDatabaseSchema, + cohort_table = outcomeTable + ) DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) - - sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql", - packageName = "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cohort_database_schema = nestingDatabaseSchema, - cohort_table = nestingTable) + + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "CreateCohortTable.sql", + packageName = "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cohort_database_schema = nestingDatabaseSchema, + cohort_table = nestingTable + ) DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) - - complexOutcomeCohorts <- data.frame(sqlName = c("acute_pancreatitis", "gi_bleed", "stroke", "ibd"), - cohortId = c(1, 2, 3, 4)) + + complexOutcomeCohorts <- data.frame( + sqlName = c("acute_pancreatitis", "gi_bleed", "stroke", "ibd"), + cohortId = c(1, 2, 3, 4) + ) for (i in 1:nrow(complexOutcomeCohorts)) { ParallelLogger::logInfo(paste("Creating outcome:", complexOutcomeCohorts$sqlName[i])) - sql <- SqlRender::loadRenderTranslateSql(sqlFilename = paste0(complexOutcomeCohorts$sqlName[i], ".sql"), - packageName = "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - vocabulary_database_schema = cdmDatabaseSchema, - target_database_schema = outcomeDatabaseSchema, - target_cohort_table = outcomeTable, - target_cohort_id = complexOutcomeCohorts$cohortId[i]) + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = paste0(complexOutcomeCohorts$sqlName[i], ".sql"), + packageName = "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + vocabulary_database_schema = cdmDatabaseSchema, + target_database_schema = outcomeDatabaseSchema, + target_cohort_table = outcomeTable, + target_cohort_id = complexOutcomeCohorts$cohortId[i] + ) DatabaseConnector::executeSql(connection, sql) } ParallelLogger::logInfo("Creating other negative control outcomes") @@ -285,76 +339,94 @@ createOhdsiNegativeControlCohorts <- function(connectionDetails, distinct(cohortId = .data$outcomeId) %>% pull() sql <- SqlRender::loadRenderTranslateSql("NegativeControls.sql", - "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - target_database_schema = outcomeDatabaseSchema, - target_cohort_table = outcomeTable, - outcome_ids = otherOutcomeCohortIds) + "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + target_database_schema = outcomeDatabaseSchema, + target_cohort_table = outcomeTable, + outcome_ids = otherOutcomeCohortIds + ) DatabaseConnector::executeSql(connection, sql) outcomeCohorts <- ohdsiNegativeControls %>% - distinct(cohortId = .data$outcomeId, cohortName = .data$outcomeName) - + distinct(cohortId = .data$outcomeId, cohortName = .data$outcomeName) + ParallelLogger::logInfo("Creating nesting cohorts") nestingCohorts <- ohdsiNegativeControls %>% distinct(cohortId = .data$nestingId, cohortName = .data$nestingName) sql <- SqlRender::loadRenderTranslateSql("NestingCohorts.sql", - "MethodEvaluation", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - target_database_schema = nestingDatabaseSchema, - target_cohort_table = nestingTable, - nesting_ids = nestingCohorts$cohortId) + "MethodEvaluation", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + target_database_schema = nestingDatabaseSchema, + target_cohort_table = nestingTable, + nesting_ids = nestingCohorts$cohortId + ) DatabaseConnector::executeSql(connection, sql) - - exposureCohorts <- bind_rows(ohdsiNegativeControls %>% - distinct(cohortId = .data$targetId, cohortName = .data$targetName) , - ohdsiNegativeControls %>% - distinct(cohortId = .data$comparatorId, cohortName = .data$comparatorName)) %>% + + exposureCohorts <- bind_rows( + ohdsiNegativeControls %>% + distinct(cohortId = .data$targetId, cohortName = .data$targetName), + ohdsiNegativeControls %>% + distinct(cohortId = .data$comparatorId, cohortName = .data$comparatorName) + ) %>% distinct() - + ParallelLogger::logInfo("Counting cohorts") - exposureCohortCounts <- countDrugEras(connection = connection, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortIds = exposureCohorts$cohortId) %>% + exposureCohortCounts <- countDrugEras( + connection = connection, + cdmDatabaseSchema = cdmDatabaseSchema, + cohortIds = exposureCohorts$cohortId + ) %>% right_join(exposureCohorts, by = "cohortId") %>% mutate(type = "Exposure") - outcomeCohortCounts <- countCohorts(connection = connection, - cohortDatabaseSchema = outcomeDatabaseSchema, - cohortTable = outcomeTable, - cohortIds = outcomeCohorts$cohortId) %>% + outcomeCohortCounts <- countCohorts( + connection = connection, + cohortDatabaseSchema = outcomeDatabaseSchema, + cohortTable = outcomeTable, + cohortIds = outcomeCohorts$cohortId + ) %>% right_join(outcomeCohorts, by = "cohortId") %>% mutate(type = "Outcome") - nestingCohortCounts <- countCohorts(connection = connection, - cohortDatabaseSchema = nestingDatabaseSchema, - cohortTable = nestingTable, - cohortIds = nestingCohorts$cohortId) %>% + nestingCohortCounts <- countCohorts( + connection = connection, + cohortDatabaseSchema = nestingDatabaseSchema, + cohortTable = nestingTable, + cohortIds = nestingCohorts$cohortId + ) %>% right_join(nestingCohorts, by = "cohortId") %>% mutate(type = "Nesting") cohortCounts <- bind_rows(exposureCohortCounts, outcomeCohortCounts, nestingCohortCounts) %>% - mutate(cohortEntries = case_when(is.na(.data$cohortEntries) ~ as.integer(0), - TRUE ~ as.integer(.data$cohortEntries)), - cohortSubjects = case_when(is.na(.data$cohortSubjects) ~ as.integer(0), - TRUE ~ as.integer(.data$cohortSubjects))) - readr::write_csv(cohortCounts, file.path(workFolder, "cohortCounts.csv")) - ParallelLogger::logInfo("Cohort counts written to ", file.path(workFolder, "cohortCounts.csv")) + mutate( + cohortEntries = case_when( + is.na(.data$cohortEntries) ~ as.integer(0), + TRUE ~ as.integer(.data$cohortEntries) + ), + cohortSubjects = case_when( + is.na(.data$cohortSubjects) ~ as.integer(0), + TRUE ~ as.integer(.data$cohortSubjects) + ) + ) + readr::write_csv(cohortCounts, file.path(workFolder, "cohortCounts.csv")) + ParallelLogger::logInfo("Cohort counts written to ", file.path(workFolder, "cohortCounts.csv")) } countCohorts <- function(connection, cohortDatabaseSchema, cohortTable, cohortIds) { sql <- "SELECT cohort_definition_id AS cohort_id, COUNT(*) AS cohort_entries, COUNT(DISTINCT subject_id) AS cohort_subjects - FROM @cohort_database_schema.@cohort_table + FROM @cohort_database_schema.@cohort_table WHERE cohort_definition_id IN (@cohort_ids) GROUP BY cohort_definition_id;" - cohortCounts <- DatabaseConnector::renderTranslateQuerySql(connection = connection, - sql = sql, - cohort_database_schema = cohortDatabaseSchema, - cohort_table = cohortTable, - cohort_ids = cohortIds, - snakeCaseToCamelCase = TRUE) + cohortCounts <- DatabaseConnector::renderTranslateQuerySql( + connection = connection, + sql = sql, + cohort_database_schema = cohortDatabaseSchema, + cohort_table = cohortTable, + cohort_ids = cohortIds, + snakeCaseToCamelCase = TRUE + ) return(cohortCounts) } @@ -365,11 +437,13 @@ countDrugEras <- function(connection, cdmDatabaseSchema, cohortIds) { FROM @cdm_database_schema.drug_era WHERE drug_concept_id IN (@cohort_ids) GROUP BY drug_concept_id;" - cohortCounts <- DatabaseConnector::renderTranslateQuerySql(connection = connection, - sql = sql, - cdm_database_schema = cdmDatabaseSchema, - cohort_ids = cohortIds, - snakeCaseToCamelCase = TRUE) + cohortCounts <- DatabaseConnector::renderTranslateQuerySql( + connection = connection, + sql = sql, + cdm_database_schema = cdmDatabaseSchema, + cohort_ids = cohortIds, + snakeCaseToCamelCase = TRUE + ) return(cohortCounts) } @@ -420,13 +494,13 @@ countDrugEras <- function(connection, cdmDatabaseSchema, cohortIds) { #' #' @export synthesizeReferenceSetPositiveControls <- function(connectionDetails, - cdmDatabaseSchema, oracleTempSchema = NULL, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), - outcomeDatabaseSchema = cdmDatabaseSchema, - outcomeTable = "cohort", + cdmDatabaseSchema, exposureDatabaseSchema = cdmDatabaseSchema, exposureTable = "drug_era", + outcomeDatabaseSchema = cdmDatabaseSchema, + outcomeTable = "cohort", referenceSet = "ohdsiMethodsBenchmark", maxCores = 1, workFolder, @@ -435,89 +509,111 @@ synthesizeReferenceSetPositiveControls <- function(connectionDetails, warning("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.") tempEmulationSchema <- oracleTempSchema } - if (!referenceSet %in% c("ohdsiMethodsBenchmark", "ohdsiDevelopment")) { - stop("Currently only supporting positive control synthesis for the ohdsiMethodsBenchmark and ohdsiDevelopment reference sets") - } + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages) + checkmate::assertCharacter(tempEmulationSchema, len = 1, null.ok = TRUE, add = errorMessages) + checkmate::assertCharacter(cdmDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(exposureDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(exposureTable, len = 1, add = errorMessages) + checkmate::assertCharacter(outcomeDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(outcomeTable, len = 1, add = errorMessages) + checkmate::assertChoice(referenceSet, c("ohdsiMethodsBenchmark", "ohdsiDevelopment"), add = errorMessages) + checkmate::assertInt(maxCores, lower = 1, add = errorMessages) + checkmate::assertCharacter(workFolder, len = 1, add = errorMessages) + checkmate::assertCharacter(summaryFileName, len = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) injectionFolder <- file.path(workFolder, "SignalInjection") - if (!file.exists(injectionFolder)) + if (!file.exists(injectionFolder)) { dir.create(injectionFolder) - + } + injectionSummaryFile <- file.path(workFolder, "injectionSummary.rds") if (referenceSet == "ohdsiMethodsBenchmark") { negativeControls <- readRDS(system.file("ohdsiNegativeControls.rds", - package = "MethodEvaluation")) + package = "MethodEvaluation" + )) } else { negativeControls <- readRDS(system.file("ohdsiDevelopmentNegativeControls.rds", - package = "MethodEvaluation")) + package = "MethodEvaluation" + )) } if (!file.exists(injectionSummaryFile)) { - exposureOutcomePairs <- data.frame(exposureId = negativeControls$targetId, - outcomeId = negativeControls$outcomeId) + exposureOutcomePairs <- data.frame( + exposureId = negativeControls$targetId, + outcomeId = negativeControls$outcomeId + ) exposureOutcomePairs <- unique(exposureOutcomePairs) - + prior <- Cyclops::createPrior("laplace", exclude = 0, useCrossValidation = TRUE) - - control <- Cyclops::createControl(cvType = "auto", - startingVariance = 0.01, - noiseLevel = "quiet", - cvRepetitions = 1, - threads = min(c(10, maxCores))) - - covariateSettings <- FeatureExtraction::createCovariateSettings(useDemographicsAgeGroup = TRUE, - useDemographicsGender = TRUE, - useDemographicsIndexYear = TRUE, - useDemographicsIndexMonth = TRUE, - useConditionGroupEraLongTerm = TRUE, - useDrugGroupEraLongTerm = TRUE, - useProcedureOccurrenceLongTerm = TRUE, - useMeasurementLongTerm = TRUE, - useObservationLongTerm = TRUE, - useCharlsonIndex = TRUE, - useDcsi = TRUE, - useChads2Vasc = TRUE, - longTermStartDays = 365, - endDays = 0) - + + control <- Cyclops::createControl( + cvType = "auto", + startingVariance = 0.01, + noiseLevel = "quiet", + cvRepetitions = 1, + threads = min(c(10, maxCores)) + ) + + covariateSettings <- FeatureExtraction::createCovariateSettings( + useDemographicsAgeGroup = TRUE, + useDemographicsGender = TRUE, + useDemographicsIndexYear = TRUE, + useDemographicsIndexMonth = TRUE, + useConditionGroupEraLongTerm = TRUE, + useDrugGroupEraLongTerm = TRUE, + useProcedureOccurrenceLongTerm = TRUE, + useMeasurementLongTerm = TRUE, + useObservationLongTerm = TRUE, + useCharlsonIndex = TRUE, + useDcsi = TRUE, + useChads2Vasc = TRUE, + longTermStartDays = 365, + endDays = 0 + ) + result <- synthesizePositiveControls(connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - tempEmulationSchema = tempEmulationSchema, - exposureDatabaseSchema = exposureDatabaseSchema, - exposureTable = exposureTable, - outcomeDatabaseSchema = outcomeDatabaseSchema, - outcomeTable = outcomeTable, - outputDatabaseSchema = outcomeDatabaseSchema, - outputTable = outcomeTable, - createOutputTable = FALSE, - outputIdOffset = 10000, - exposureOutcomePairs = exposureOutcomePairs, - firstExposureOnly = FALSE, - firstOutcomeOnly = TRUE, - removePeopleWithPriorOutcomes = TRUE, - modelType = "survival", - washoutPeriod = 365, - riskWindowStart = 0, - riskWindowEnd = 0, - endAnchor = "cohort end", - effectSizes = c(1.5, 2, 4), - precision = 0.01, - prior = prior, - control = control, - maxSubjectsForModel = 250000, - minOutcomeCountForModel = 100, - minOutcomeCountForInjection = 25, - workFolder = injectionFolder, - modelThreads = max(1, round(maxCores/8)), - generationThreads = min(6, maxCores), - covariateSettings = covariateSettings) + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + exposureDatabaseSchema = exposureDatabaseSchema, + exposureTable = exposureTable, + outcomeDatabaseSchema = outcomeDatabaseSchema, + outcomeTable = outcomeTable, + outputDatabaseSchema = outcomeDatabaseSchema, + outputTable = outcomeTable, + createOutputTable = FALSE, + outputIdOffset = 10000, + exposureOutcomePairs = exposureOutcomePairs, + firstExposureOnly = FALSE, + firstOutcomeOnly = TRUE, + removePeopleWithPriorOutcomes = TRUE, + modelType = "survival", + washoutPeriod = 365, + riskWindowStart = 0, + riskWindowEnd = 0, + endAnchor = "cohort end", + effectSizes = c(1.5, 2, 4), + precision = 0.01, + prior = prior, + control = control, + maxSubjectsForModel = 250000, + minOutcomeCountForModel = 100, + minOutcomeCountForInjection = 25, + workFolder = injectionFolder, + modelThreads = max(1, round(maxCores / 8)), + generationThreads = min(6, maxCores), + covariateSettings = covariateSettings + ) saveRDS(result, injectionSummaryFile) } injectedSignals <- readRDS(injectionSummaryFile) injectedSignals$targetId <- injectedSignals$exposureId injectedSignals <- merge(injectedSignals, negativeControls) injectedSignals <- injectedSignals[injectedSignals$trueEffectSize != 0, ] - injectedSignals$outcomeName <- paste0(injectedSignals$outcomeName, - ", RR=", - injectedSignals$targetEffectSize) + injectedSignals$outcomeName <- paste0( + injectedSignals$outcomeName, + ", RR=", + injectedSignals$targetEffectSize + ) injectedSignals$oldOutcomeId <- injectedSignals$outcomeId injectedSignals$outcomeId <- injectedSignals$newOutcomeId negativeControls$targetEffectSize <- 1 @@ -526,26 +622,36 @@ synthesizeReferenceSetPositiveControls <- function(connectionDetails, negativeControls$oldOutcomeId <- negativeControls$outcomeId allControls <- rbind(negativeControls, injectedSignals[, names(negativeControls)]) exposureOutcomes <- data.frame() - exposureOutcomes <- rbind(exposureOutcomes, data.frame(exposureId = allControls$targetId, - outcomeId = allControls$outcomeId)) - exposureOutcomes <- rbind(exposureOutcomes, data.frame(exposureId = allControls$comparatorId, - outcomeId = allControls$outcomeId)) + exposureOutcomes <- rbind(exposureOutcomes, data.frame( + exposureId = allControls$targetId, + outcomeId = allControls$outcomeId + )) + exposureOutcomes <- rbind(exposureOutcomes, data.frame( + exposureId = allControls$comparatorId, + outcomeId = allControls$outcomeId + )) exposureOutcomes <- unique(exposureOutcomes) - mdrr <- computeMdrr(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - tempEmulationSchema = tempEmulationSchema, - exposureOutcomePairs = exposureOutcomes, - exposureDatabaseSchema = exposureDatabaseSchema, - exposureTable = exposureTable, - outcomeDatabaseSchema = outcomeDatabaseSchema, - outcomeTable = outcomeTable, - cdmVersion = "5") - allControls <- merge(allControls, data.frame(targetId = mdrr$exposureId, - outcomeId = mdrr$outcomeId, - mdrrTarget = mdrr$mdrr)) - allControls <- merge(allControls, data.frame(comparatorId = mdrr$exposureId, - outcomeId = mdrr$outcomeId, - mdrrComparator = mdrr$mdrr), all.x = TRUE) + mdrr <- computeMdrr( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + exposureOutcomePairs = exposureOutcomes, + exposureDatabaseSchema = exposureDatabaseSchema, + exposureTable = exposureTable, + outcomeDatabaseSchema = outcomeDatabaseSchema, + outcomeTable = outcomeTable, + cdmVersion = "5" + ) + allControls <- merge(allControls, data.frame( + targetId = mdrr$exposureId, + outcomeId = mdrr$outcomeId, + mdrrTarget = mdrr$mdrr + )) + allControls <- merge(allControls, data.frame( + comparatorId = mdrr$exposureId, + outcomeId = mdrr$outcomeId, + mdrrComparator = mdrr$mdrr + ), all.x = TRUE) readr::write_csv(allControls, summaryFileName) - ParallelLogger::logInfo("Positive and negative control summary written to ", summaryFileName) + ParallelLogger::logInfo("Positive and negative control summary written to ", summaryFileName) } diff --git a/R/Mdrr.R b/R/Mdrr.R index e1b3cd5..43989fa 100644 --- a/R/Mdrr.R +++ b/R/Mdrr.R @@ -85,9 +85,9 @@ #' } #' @export computeMdrr <- function(connectionDetails, - cdmDatabaseSchema, oracleTempSchema = NULL, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + cdmDatabaseSchema, exposureOutcomePairs, exposureDatabaseSchema = cdmDatabaseSchema, exposureTable = "drug_era", @@ -101,12 +101,18 @@ computeMdrr <- function(connectionDetails, if (is.null(exposureOutcomePairs$exposureId) && !is.null(exposureOutcomePairs$targetId)) { exposureOutcomePairs$exposureId <- exposureOutcomePairs$targetId } - if (is.null(exposureOutcomePairs$exposureId)) { - stop("exposureOutcomePairs is missing exposureId and targetId column") - } - if (is.null(exposureOutcomePairs$outcomeId)) { - stop("exposureOutcomePairs is missing outcomeId column") - } + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages) + checkmate::assertCharacter(tempEmulationSchema, len = 1, null.ok = TRUE, add = errorMessages) + checkmate::assertCharacter(cdmDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertDataFrame(exposureOutcomePairs, add = errorMessages) + checkmate::assertNames(colnames(exposureOutcomePairs), must.include = c("exposureId", "outcomeId"), add = errorMessages) + checkmate::assertCharacter(exposureDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(exposureTable, len = 1, add = errorMessages) + checkmate::assertCharacter(outcomeDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(outcomeTable, len = 1, add = errorMessages) + checkmate::assertCharacter(cdmVersion, len = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) exposureTable <- tolower(exposureTable) outcomeTable <- tolower(outcomeTable) if (exposureTable == "drug_era") { diff --git a/R/MethodEvaluation.R b/R/MethodEvaluation.R index 05ca650..d08dc3d 100644 --- a/R/MethodEvaluation.R +++ b/R/MethodEvaluation.R @@ -3,13 +3,13 @@ # Copyright 2023 Observational Health Data Sciences and Informatics # # This file is part of MethodEvaluation -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -114,7 +114,7 @@ NULL #' The OHDSI Development Set - Negative Controls #' A set of 76 negative control outcomes, all for the exposures of ACE inhibitors (compared to -#' thiazides and thiazide-like diuretics). This set is a much small set than the he OHDSI Method +#' thiazides and thiazide-like diuretics). This set is a much small set than the he OHDSI Method #' Evaluation Benchmark, but follows the same principles. It is intended to be used when developing #' methods, leaving the Methods Benchark untouched until a final evaluation of the method, thus preventing #' 'training' on the evaluation set. The negative controls are borrowed from the LEGEND Hypertension study. diff --git a/R/Metrics.R b/R/Metrics.R index d3de789..86eacdd 100644 --- a/R/Metrics.R +++ b/R/Metrics.R @@ -44,7 +44,15 @@ computeMetrics <- function(logRr, seLogRr = NULL, ci95Lb = NULL, ci95Ub = NULL, p = NULL, trueLogRr) { # data <- EmpiricalCalibration::simulateControls(n = 50 * 3, mean = 0.25, sd = 0.25, trueLogRr = # log(c(1, 2, 4))); logRr <- data$logRr; seLogRr <- data$seLogRr; trueLogRr <- data$trueLogRr - + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertNumeric(logRr, min.len = 1, add = errorMessages) + checkmate::assertNumeric(seLogRr, len = length(logRr), null.ok = TRUE, add = errorMessages) + checkmate::assertNumeric(ci95Lb, len = length(logRr), null.ok = TRUE, add = errorMessages) + checkmate::assertNumeric(ci95Ub, len = length(logRr), null.ok = TRUE, add = errorMessages) + checkmate::assertNumeric(p, len = length(logRr), null.ok = TRUE, add = errorMessages) + checkmate::assertNumeric(trueLogRr, len = length(logRr), add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + if (is.null(seLogRr) && is.null(ci95Lb)) { stop("Must specify either standard error or confidence interval") } @@ -70,14 +78,14 @@ computeMetrics <- function(logRr, seLogRr = NULL, ci95Lb = NULL, ci95Ub = NULL, } else { data$p <- p } - + idx <- is.na(data$logRr) | is.infinite(data$logRr) | is.na(data$seLogRr) | is.infinite(data$seLogRr) data$logRr[idx] <- 0 data$seLogRr[idx] <- 999 data$ci95Lb[idx] <- 0 data$ci95Ub[idx] <- 999 data$p[idx] <- 1 - + nonEstimable <- round(mean(data$seLogRr >= 99), 2) roc <- pROC::roc(data$trueLogRr > 0, data$logRr, algorithm = 3) auc <- round(pROC::auc(roc), 2) @@ -198,19 +206,19 @@ packageOhdsiBenchmarkResults <- function(estimates, checkmate::assertCharacter(databaseName, len = 1, add = errorMessages) checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - + if (!file.exists(exportFolder)) { dir.create(exportFolder, recursive = TRUE) } - + # Create full grid of controls (including those that did not make it in the database: if (referenceSet == "ohdsiMethodsBenchmark") { ohdsiNegativeControls <- readRDS(system.file("ohdsiNegativeControls.rds", - package = "MethodEvaluation" + package = "MethodEvaluation" )) } else { ohdsiNegativeControls <- readRDS(system.file("ohdsiDevelopmentNegativeControls.rds", - package = "MethodEvaluation" + package = "MethodEvaluation" )) } ohdsiNegativeControls$oldOutcomeId <- ohdsiNegativeControls$outcomeId @@ -238,7 +246,7 @@ packageOhdsiBenchmarkResults <- function(estimates, fullGrid$targetEffectSize[idx] ) allControls <- merge(controlSummary, fullGrid, all.y = TRUE) - + .packageBenchmarkResults( allControls = allControls, analysisRef = analysisRef, @@ -274,7 +282,7 @@ packageOhdsiBenchmarkResults <- function(estimates, by = join_by("analysisId") ) %>% mutate(database = databaseName) - + # Perform empirical calibration: # subset = subsets[[2]] calibrate <- function(subset) { @@ -304,7 +312,7 @@ packageOhdsiBenchmarkResults <- function(estimates, model = model ) null <- EmpiricalCalibration::fitNull(logRr = subsetMinusOne$logRr[subsetMinusOne$targetEffectSize == - 1], seLogRr = subsetMinusOne$seLogRr[subsetMinusOne$targetEffectSize == 1]) + 1], seLogRr = subsetMinusOne$seLogRr[subsetMinusOne$targetEffectSize == 1]) caliP <- EmpiricalCalibration::calibrateP( null = null, logRr = one$logRr, @@ -433,7 +441,7 @@ packageCustomBenchmarkResults <- function(estimates, checkmate::assertCharacter(databaseName, len = 1, add = errorMessages) checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - + trueEffecSizes <- c(1, unique(synthesisSummary$targetEffectSize)) negativeControls <- negativeControls %>% mutate(stratum = if_else(.data$type == "Outcome control", .data$targetId, .data$outcomeId)) %>% @@ -489,6 +497,15 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, trueEffectSize = "Overall", calibrated = FALSE, comparative = FALSE) { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) + checkmate::assertNumeric(mdrr, len = 1, add = errorMessages) + checkmate::assertCharacter(stratum, len = 1, add = errorMessages) + checkmate::assertAtomic(trueEffectSize, len = 1, add = errorMessages) + checkmate::assertLogical(calibrated, len = 1, add = errorMessages) + checkmate::assertLogical(comparative, len = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + # Load and prepare estimates of all methods files <- list.files(exportFolder, "estimates.*csv", full.names = TRUE) estimates <- lapply(files, read.csv) @@ -510,12 +527,12 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, estimates$calCi95Lb[idx] <- 0 estimates$calCi95Ub[idx] <- 999 estimates$calP[is.na(estimates$calP)] <- 1 - + # Load and prepare analysis refs files <- list.files(exportFolder, "analysisRef.*csv", full.names = TRUE) analysisRef <- lapply(files, read.csv) analysisRef <- do.call("rbind", analysisRef) - + # Apply selection criteria subset <- estimates if (mdrr != "All") { @@ -534,7 +551,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, subset$ci95Ub <- subset$calCi95Ub subset$p <- subset$calP } - + # Compute metrics combis <- unique(subset[, c("database", "method", "analysisId")]) if (trueEffectSize == "Overall") { @@ -566,7 +583,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, # trueRr <- input$trueRr computeMetrics <- function(i) { forEval <- subset[subset$method == combis$method[i] & subset$analysisId == combis$analysisId[i] & - subset$targetEffectSize == trueEffectSize, ] + subset$targetEffectSize == trueEffectSize, ] mse <- round(mean((forEval$logRr - log(forEval$trueEffectSize))^2), 2) coverage <- round( mean(forEval$ci95Lb < forEval$trueEffectSize & forEval$ci95Ub > forEval$trueEffectSize), @@ -580,7 +597,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, nonEstimable <- round(mean(forEval$seLogRr == 999), 2) } else { negAndPos <- subset[subset$method == combis$method[i] & subset$analysisId == combis$analysisId[i] & - (subset$targetEffectSize == trueEffectSize | subset$targetEffectSize == 1), ] + (subset$targetEffectSize == trueEffectSize | subset$targetEffectSize == 1), ] roc <- pROC::roc(negAndPos$targetEffectSize > 1, negAndPos$logRr, algorithm = 3) auc <- round(pROC::auc(roc), 2) type1 <- NA diff --git a/R/Plots.R b/R/Plots.R index 94a31df..2ee562c 100644 --- a/R/Plots.R +++ b/R/Plots.R @@ -3,13 +3,13 @@ # Copyright 2023 Observational Health Data Sciences and Informatics # # This file is part of MethodEvaluation -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,22 +29,32 @@ #' #' @export plotRocsInjectedSignals <- function(logRr, trueLogRr, showAucs, fileName = NULL) { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertNumeric(logRr, min.len = 1, add = errorMessages) + checkmate::assertNumeric(trueLogRr, len = length(logRr), add = errorMessages) + checkmate::assertLogical(showAucs, len = 1, add = errorMessages) + checkmate::assertCharacter(fileName, len = 1, null.ok = TRUE, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + if (any(is.na(logRr))) { warning("Some estimates are NA, removing prior to computing AUCs") trueLogRr <- trueLogRr[!is.na(logRr)] logRr <- logRr[!is.na(logRr)] } trueLogRrLevels <- unique(trueLogRr) - if (all(trueLogRrLevels != 0)) + if (all(trueLogRrLevels != 0)) { stop("Requiring at least one true relative risk of 1") + } allData <- data.frame() aucs <- c() trueRrs <- c() for (trueLogRrLevel in trueLogRrLevels) { if (trueLogRrLevel != 0) { - data <- data.frame(logRr = logRr[trueLogRr == 0 | trueLogRr == trueLogRrLevel], - trueLogRr = trueLogRr[trueLogRr == 0 | trueLogRr == trueLogRrLevel]) + data <- data.frame( + logRr = logRr[trueLogRr == 0 | trueLogRr == trueLogRrLevel], + trueLogRr = trueLogRr[trueLogRr == 0 | trueLogRr == trueLogRrLevel] + ) data$truth <- data$trueLogRr != 0 roc <- pROC::roc(data$truth, data$logRr, algorithm = 3) @@ -52,19 +62,23 @@ plotRocsInjectedSignals <- function(logRr, trueLogRr, showAucs, fileName = NULL) aucs <- c(aucs, pROC::auc(roc)) trueRrs <- c(trueRrs, exp(trueLogRrLevel)) } - data <- data.frame(sens = roc$sensitivities, - fpRate = 1 - roc$specificities, - trueRr = exp(trueLogRrLevel)) + data <- data.frame( + sens = roc$sensitivities, + fpRate = 1 - roc$specificities, + trueRr = exp(trueLogRrLevel) + ) data <- data[order(data$sens, data$fpRate), ] allData <- rbind(allData, data) } } allData$trueRr <- as.factor(allData$trueRr) - plot <- ggplot2::ggplot(allData, ggplot2::aes(x = .data$fpRate, - y = .data$sens, - group = .data$trueRr, - color = .data$trueRr, - fill = .data$trueRr)) + + plot <- ggplot2::ggplot(allData, ggplot2::aes( + x = .data$fpRate, + y = .data$sens, + group = .data$trueRr, + color = .data$trueRr, + fill = .data$trueRr + )) + ggplot2::geom_abline(intercept = 0, slope = 1) + ggplot2::geom_line(alpha = 0.5, size = 1) + ggplot2::scale_x_continuous("1 - specificity") + @@ -74,19 +88,24 @@ plotRocsInjectedSignals <- function(logRr, trueLogRr, showAucs, fileName = NULL) aucs <- data.frame(auc = aucs, trueRr = trueRrs) aucs <- aucs[order(-aucs$trueRr), ] for (i in 1:nrow(aucs)) { - label <- paste0("True RR = ", - format(round(aucs$trueRr[i], 2), nsmall = 2), - ": AUC = ", - format(round(aucs$auc[i], 2), nsmall = 2)) - plot <- plot + ggplot2::geom_text(label = label, - x = 1, - y = (i - 1) * 0.1, - hjust = 1, - color = "#000000") + label <- paste0( + "True RR = ", + format(round(aucs$trueRr[i], 2), nsmall = 2), + ": AUC = ", + format(round(aucs$auc[i], 2), nsmall = 2) + ) + plot <- plot + ggplot2::geom_text( + label = label, + x = 1, + y = (i - 1) * 0.1, + hjust = 1, + color = "#000000" + ) } } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 5.5, height = 4.5, dpi = 400) + } return(plot) } @@ -107,11 +126,21 @@ plotRocsInjectedSignals <- function(logRr, trueLogRr, showAucs, fileName = NULL) #' #' @export plotCoverageInjectedSignals <- function(logRr, seLogRr, trueLogRr, region = 0.95, fileName = NULL) { - data <- data.frame(logRr = logRr, - logLb95Rr = logRr + qnorm((1 - region)/2) * seLogRr, - logUb95Rr = logRr + qnorm(1 - (1 - region)/2) * seLogRr, - trueLogRr = trueLogRr, - trueRr = round(exp(trueLogRr), 2)) + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertNumeric(logRr, min.len = 1, add = errorMessages) + checkmate::assertNumeric(seLogRr, len = length(logRr), add = errorMessages) + checkmate::assertNumeric(trueLogRr, len = length(logRr), add = errorMessages) + checkmate::assertNumeric(region, len = 1, lower = 0, upper = 1, add = errorMessages) + checkmate::assertCharacter(fileName, len = 1, null.ok = TRUE, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + + data <- data.frame( + logRr = logRr, + logLb95Rr = logRr + qnorm((1 - region) / 2) * seLogRr, + logUb95Rr = logRr + qnorm(1 - (1 - region) / 2) * seLogRr, + trueLogRr = trueLogRr, + trueRr = round(exp(trueLogRr), 2) + ) if (any(is.na(data$logRr))) { warning("Some estimates are NA, removing prior to computing coverage") data <- data[!is.na(data$logRr), ] @@ -119,15 +148,17 @@ plotCoverageInjectedSignals <- function(logRr, seLogRr, trueLogRr, region = 0.95 vizD <- data.frame() for (trueRr in unique(data$trueRr)) { subset <- data[data$trueRr == trueRr, ] - d <- data.frame(trueRr = trueRr, group = c("Below CI", - "Within CI", - "Above CI"), fraction = 0, pos = 0) + d <- data.frame(trueRr = trueRr, group = c( + "Below CI", + "Within CI", + "Above CI" + ), fraction = 0, pos = 0) d$fraction[1] <- mean(subset$trueLogRr < subset$logLb95Rr) d$fraction[2] <- mean(subset$trueLogRr >= subset$logLb95Rr & subset$trueLogRr <= subset$logUb95Rr) d$fraction[3] <- mean(subset$trueLogRr > subset$logUb95Rr) - d$pos[1] <- d$fraction[1]/2 - d$pos[2] <- d$fraction[1] + (d$fraction[2]/2) - d$pos[3] <- d$fraction[1] + d$fraction[2] + (d$fraction[3]/2) + d$pos[1] <- d$fraction[1] / 2 + d$pos[2] <- d$fraction[1] + (d$fraction[2] / 2) + d$pos[3] <- d$fraction[1] + d$fraction[2] + (d$fraction[3] / 2) vizD <- rbind(vizD, d) } vizD$pos <- sapply(vizD$pos, function(x) { @@ -138,23 +169,26 @@ plotCoverageInjectedSignals <- function(logRr, seLogRr, trueLogRr, region = 0.95 vizD$group <- factor(vizD$group, levels = c("Below CI", "Within CI", "Above CI")) theme <- ggplot2::element_text(colour = "#000000", size = 10) plot <- with(vizD, { - ggplot2::ggplot(vizD, ggplot2::aes(x = as.factor(trueRr), y = fraction)) + - ggplot2::geom_bar(ggplot2::aes(fill = group), stat = "identity", position = "stack", alpha = 0.8) + - ggplot2::scale_fill_manual(values = c("#174a9f", "#f9dd75", "#f15222")) + - ggplot2::geom_text(ggplot2::aes(label = label, y = pos), size = 3) + - ggplot2::scale_x_discrete("True relative risk") + - ggplot2::scale_y_continuous("Coverage") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = ggplot2::element_blank(), - axis.text.x = theme, - legend.key = ggplot2::element_blank(), - legend.position = "right") + ggplot2::ggplot(vizD, ggplot2::aes(x = as.factor(trueRr), y = fraction)) + + ggplot2::geom_bar(ggplot2::aes(fill = group), stat = "identity", position = "stack", alpha = 0.8) + + ggplot2::scale_fill_manual(values = c("#174a9f", "#f9dd75", "#f15222")) + + ggplot2::geom_text(ggplot2::aes(label = label, y = pos), size = 3) + + ggplot2::scale_x_discrete("True relative risk") + + ggplot2::scale_y_continuous("Coverage") + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = ggplot2::element_blank(), + axis.text.x = theme, + legend.key = ggplot2::element_blank(), + legend.position = "right" + ) }) - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 5, height = 3.5, dpi = 400) + } return(plot) } @@ -163,11 +197,11 @@ plotCoverageInjectedSignals <- function(logRr, seLogRr, trueLogRr, region = 0.95 #' @param logRr A numeric vector of effect estimates on the log scale. #' @param seLogRr The standard error of the log of the effect estimates. Hint: often the standard #' error = (log() - log())/qnorm(0.025). If not provided the standard error will be inferred from +#' estimate>))/qnorm(0.025). If not provided the standard error will be inferred from #' the 95 percent confidence interval. -#' @param ci95Lb The lower bound of the 95 percent confidence interval. IF not provided it will be +#' @param ci95Lb The lower bound of the 95 percent confidence interval. IF not provided it will be #' inferred from the standard error. -#' @param ci95Ub The upper bound of the 95 percent confidence interval. IF not provided it will be +#' @param ci95Ub The upper bound of the 95 percent confidence interval. IF not provided it will be #' inferred from the standard error. #' @param trueLogRr A vector of the true effect sizes #' @param estimateType A character string to denote the effect size estimate type. Used for the x-axis @@ -175,19 +209,38 @@ plotCoverageInjectedSignals <- function(logRr, seLogRr, trueLogRr, region = 0.95 #' @param fileName Name of the file where the plot should be saved, for example 'plot.png'. See the #' function \code{ggsave} in the ggplot2 package for supported file formats. #' @param title An optional title to display above the plot. -#' +#' #' @return #' A Ggplot object. Use the \code{ggsave} function to save to file. -#' +#' #' @export -plotControls <- function(logRr, seLogRr = NULL, ci95Lb = NULL, ci95Ub = NULL, trueLogRr, estimateType = "relative risk", fileName = NULL, title) { +plotControls <- function(logRr, + seLogRr = NULL, + ci95Lb = NULL, + ci95Ub = NULL, + trueLogRr, + estimateType = "relative risk", + fileName = NULL, + title = NULL) { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertNumeric(logRr, min.len = 1, add = errorMessages) + checkmate::assertNumeric(seLogRr, len = length(logRr), null.ok = TRUE, add = errorMessages) + checkmate::assertNumeric(ci95Lb, len = length(logRr), null.ok = TRUE, add = errorMessages) + checkmate::assertNumeric(ci95Ub, len = length(logRr), null.ok = TRUE, add = errorMessages) + checkmate::assertNumeric(trueLogRr, len = length(logRr), add = errorMessages) + checkmate::assertCharacter(estimateType, len = 1, add = errorMessages) + checkmate::assertCharacter(fileName, len = 1, null.ok = TRUE, add = errorMessages) + checkmate::assertCharacter(title, null.ok = TRUE, len = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) if (is.null(seLogRr) && is.null(ci95Lb)) { stop("Must specify either standard error or confidence interval") } - data <- data.frame(logRr = logRr, - trueLogRr = trueLogRr) + data <- data.frame( + logRr = logRr, + trueLogRr = trueLogRr + ) if (is.null(seLogRr)) { - data$seLogRr <- (log(ci95Ub) - log(ci95Lb)) / (2*qnorm(0.975)) + data$seLogRr <- (log(ci95Ub) - log(ci95Lb)) / (2 * qnorm(0.975)) } else { data$seLogRr <- seLogRr } @@ -207,74 +260,88 @@ plotControls <- function(logRr, seLogRr = NULL, ci95Lb = NULL, ci95Ub = NULL, tr temp1$nLabel <- paste0(formatC(temp1$Significant, big.mark = ","), " estimates") temp1$Significant <- NULL - temp2$meanLabel <- paste0(formatC(100 * (1 - temp2$Significant), digits = 1, format = "f"), - "% of CIs includes ", - substr(as.character(temp2$Group), - start = 21, - stop = nchar(as.character(temp2$Group)))) + temp2$meanLabel <- paste0( + formatC(100 * (1 - temp2$Significant), digits = 1, format = "f"), + "% of CIs includes ", + substr(as.character(temp2$Group), + start = 21, + stop = nchar(as.character(temp2$Group)) + ) + ) temp2$Significant <- NULL dd <- merge(temp1, temp2) dd$tes <- as.numeric(substr(as.character(dd$Group), start = 21, - stop = nchar(as.character(dd$Group)))) + stop = nchar(as.character(dd$Group)) + )) breaks <- c(0.25, 0.5, 1, 2, 4, 6, 8) theme <- ggplot2::element_text(colour = "#000000", size = 14) themeRA <- ggplot2::element_text(colour = "#000000", size = 14, hjust = 1) - alpha <- 1 - min(0.95 * (nrow(data)/nrow(dd)/50000)^0.1, 0.95) + alpha <- 1 - min(0.95 * (nrow(data) / nrow(dd) / 50000)^0.1, 0.95) plot <- ggplot2::ggplot(data, ggplot2::aes(x = logRr, y = seLogRr)) + ggplot2::geom_vline(xintercept = log(breaks), colour = "#CCCCCC", lty = 1, size = 0.5) + - ggplot2::geom_abline(ggplot2::aes(intercept = (-log(.data$tes))/qnorm(0.025), slope = 1/qnorm(0.025)), - colour = rgb(0.8, 0, 0), - linetype = "dashed", - size = 1, - alpha = 0.5, - data = dd) + - ggplot2::geom_abline(ggplot2::aes(intercept = (-log(.data$tes))/qnorm(0.975), slope = 1/qnorm(0.975)), - colour = rgb(0.8, 0, 0), - linetype = "dashed", - size = 1, - alpha = 0.5, - data = dd) + + ggplot2::geom_abline(ggplot2::aes(intercept = (-log(.data$tes)) / qnorm(0.025), slope = 1 / qnorm(0.025)), + colour = rgb(0.8, 0, 0), + linetype = "dashed", + size = 1, + alpha = 0.5, + data = dd + ) + + ggplot2::geom_abline(ggplot2::aes(intercept = (-log(.data$tes)) / qnorm(0.975), slope = 1 / qnorm(0.975)), + colour = rgb(0.8, 0, 0), + linetype = "dashed", + size = 1, + alpha = 0.5, + data = dd + ) + ggplot2::geom_point(size = 2, color = rgb(0, 0, 0, alpha = 0.05), alpha = alpha, shape = 16) + ggplot2::geom_hline(yintercept = 0) + - ggplot2::geom_label(x = log(0.26), - y = 0.96, - alpha = 1, - hjust = "left", - ggplot2::aes(label = .data$nLabel), - size = 5, - data = dd) + - ggplot2::geom_label(x = log(0.26), - y = 0.8, - alpha = 1, - hjust = "left", - ggplot2::aes(label = .data$meanLabel), - size = 5, - data = dd) + + ggplot2::geom_label( + x = log(0.26), + y = 0.96, + alpha = 1, + hjust = "left", + ggplot2::aes(label = .data$nLabel), + size = 5, + data = dd + ) + + ggplot2::geom_label( + x = log(0.26), + y = 0.8, + alpha = 1, + hjust = "left", + ggplot2::aes(label = .data$meanLabel), + size = 5, + data = dd + ) + ggplot2::scale_x_continuous(paste("Estimated", estimateType), - limits = log(c(0.25, 10)), - breaks = log(breaks), - labels = breaks) + + limits = log(c(0.25, 10)), + breaks = log(breaks), + labels = breaks + ) + ggplot2::scale_y_continuous("Standard Error", limits = c(0, 1)) + ggplot2::facet_grid(. ~ Group) + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_blank(), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - axis.title = theme, - legend.key = ggplot2::element_blank(), - strip.text.x = theme, - strip.text.y = theme, - strip.background = ggplot2::element_blank(), - legend.position = "none") - if (!missing(title)) { + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + axis.title = theme, + legend.key = ggplot2::element_blank(), + strip.text.x = theme, + strip.text.y = theme, + strip.background = ggplot2::element_blank(), + legend.position = "none" + ) + if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 1.6 + 3 * nrow(dd), height = 2.8, dpi = 400) + } return(plot) } diff --git a/R/PositiveControlSynthesis.R b/R/PositiveControlSynthesis.R index 57411af..9072885 100644 --- a/R/PositiveControlSynthesis.R +++ b/R/PositiveControlSynthesis.R @@ -194,16 +194,38 @@ synthesizePositiveControls <- function(connectionDetails, warning("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.") tempEmulationSchema <- oracleTempSchema } - if (min(effectSizes) < 1) { - stop("Effect sizes smaller than 1 are currently not supported") - } - if (modelType != "poisson" && modelType != "survival") { - stop(paste0( - "Unknown modelType '", - modelType, - "', please select either 'poisson' or 'survival'" - )) - } + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages) + checkmate::assertCharacter(cdmDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(tempEmulationSchema, len = 1, null.ok = TRUE, add = errorMessages) + checkmate::assertCharacter(exposureDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(exposureTable, len = 1, add = errorMessages) + checkmate::assertCharacter(outcomeDatabaseSchema, len = 1, add = errorMessages) + checkmate::assertCharacter(outcomeTable, len = 1, add = errorMessages) + checkmate::assertLogical(createOutputTable, len = 1, add = errorMessages) + checkmate::assertDataFrame(exposureOutcomePairs, add = errorMessages) + checkmate::assertNames(colnames(exposureOutcomePairs), must.include = c("exposureId", "outcomeId"), add = errorMessages) + checkmate::assertChoice(modelType, c("poisson", "survival"), add = errorMessages) + checkmate::assertInt(minOutcomeCountForModel, lower = 0, add = errorMessages) + checkmate::assertInt(minOutcomeCountForInjection, lower = 0, add = errorMessages) + checkmate::assertInt(minModelCount, lower = 0, add = errorMessages) + checkmate::assertList(covariateSettings, add = errorMessages) + checkmate::assertClass(prior, "cyclopsPrior", add = errorMessages) + checkmate::assertClass(control, "cyclopsControl", add = errorMessages) + checkmate::assertLogical(firstExposureOnly, len = 1, add = errorMessages) + checkmate::assertInt(washoutPeriod, lower = 0, add = errorMessages) + checkmate::assertInt(riskWindowStart, add = errorMessages) + checkmate::assertInt(riskWindowEnd, add = errorMessages) + checkmate::assertLogical(removePeopleWithPriorOutcomes, len = 1, add = errorMessages) + checkmate::assertInt(maxSubjectsForModel, lower = 0, add = errorMessages) + checkmate::assertNumeric(effectSizes, lower = 1, min.len = 1, add = errorMessages) + checkmate::assertNumeric(precision, lower = 0, len = 1, add = errorMessages) + checkmate::assertInt(outputIdOffset, lower = 0, add = errorMessages) + checkmate::assertCharacter(workFolder, len = 1, add = errorMessages) + checkmate::assertCharacter(cdmVersion, len = 1, add = errorMessages) + checkmate::assertInt(modelThreads, lower = 1, add = errorMessages) + checkmate::assertInt(generationThreads, lower = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) if (!firstExposureOnly && addIntentToTreat) { stop("Cannot have addIntentToTreat = TRUE and firstExposureOnly = FALSE at the same time") } diff --git a/R/ShinyApps.R b/R/ShinyApps.R index db8292d..6045914 100644 --- a/R/ShinyApps.R +++ b/R/ShinyApps.R @@ -1,13 +1,13 @@ # Copyright 2023 Observational Health Data Sciences and Informatics # # This file is part of MethodEvaluation -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,6 +27,11 @@ #' #' @export launchMethodEvaluationApp <- function(exportFolder, launch.browser = TRUE) { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) + checkmate::assertLogical(launch.browser, len = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + ensure_installed("DT") appDir <- system.file("shinyApps", "MethodEvalViewer", package = "MethodEvaluation") shinySettings <- list(exportFolder = exportFolder) diff --git a/R/SignalInjection.R b/R/SignalInjection.R index a4d1cd1..0bc35cf 100644 --- a/R/SignalInjection.R +++ b/R/SignalInjection.R @@ -135,25 +135,29 @@ injectSignals <- function(connectionDetails, modelType = "poisson", minOutcomeCountForModel = 100, minOutcomeCountForInjection = 25, - covariateSettings = FeatureExtraction::createCovariateSettings(useDemographicsAgeGroup = TRUE, - useDemographicsGender = TRUE, - useDemographicsIndexYear = TRUE, - useDemographicsIndexMonth = TRUE, - useConditionGroupEraLongTerm = TRUE, - useDrugGroupEraLongTerm = TRUE, - useProcedureOccurrenceLongTerm = TRUE, - useMeasurementLongTerm = TRUE, - useObservationLongTerm = TRUE, - useCharlsonIndex = TRUE, - useDcsi = TRUE, - useChads2Vasc = TRUE, - longTermStartDays = 365, - endDays = 0), + covariateSettings = FeatureExtraction::createCovariateSettings( + useDemographicsAgeGroup = TRUE, + useDemographicsGender = TRUE, + useDemographicsIndexYear = TRUE, + useDemographicsIndexMonth = TRUE, + useConditionGroupEraLongTerm = TRUE, + useDrugGroupEraLongTerm = TRUE, + useProcedureOccurrenceLongTerm = TRUE, + useMeasurementLongTerm = TRUE, + useObservationLongTerm = TRUE, + useCharlsonIndex = TRUE, + useDcsi = TRUE, + useChads2Vasc = TRUE, + longTermStartDays = 365, + endDays = 0 + ), prior = Cyclops::createPrior("laplace", exclude = 0, useCrossValidation = TRUE), - control = Cyclops::createControl(cvType = "auto", - startingVariance = 0.1, - noiseLevel = "quiet", - threads = 10), + control = Cyclops::createControl( + cvType = "auto", + startingVariance = 0.1, + noiseLevel = "quiet", + threads = 10 + ), firstExposureOnly = FALSE, washoutPeriod = 183, riskWindowStart = 0, @@ -171,44 +175,46 @@ injectSignals <- function(connectionDetails, modelThreads = 1, generationThreads = 1) { .Deprecated("synthesizePositiveControls") - + if (addExposureDaysToEnd) { endAnchor <- "cohort end" } else { endAnchor <- "cohort start" } - - synthesizePositiveControls(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - oracleTempSchema = oracleTempSchema, - exposureDatabaseSchema = exposureDatabaseSchema, - exposureTable = exposureTable, - outcomeDatabaseSchema = outcomeDatabaseSchema, - outcomeTable = outcomeTable, - outputDatabaseSchema = outputDatabaseSchema, - outputTable = outputTable, - createOutputTable = createOutputTable, - exposureOutcomePairs = exposureOutcomePairs, - modelType = modelType, - minOutcomeCountForModel = minOutcomeCountForModel, - minOutcomeCountForInjection = minOutcomeCountForInjection, - covariateSettings = covariateSettings, - prior = prior, - control = control, - firstExposureOnly = firstExposureOnly, - washoutPeriod = washoutPeriod, - riskWindowStart = riskWindowStart, - riskWindowEnd = riskWindowEnd, - endAnchor = endAnchor, - addIntentToTreat = addIntentToTreat, - firstOutcomeOnly = firstOutcomeOnly, - removePeopleWithPriorOutcomes = removePeopleWithPriorOutcomes, - maxSubjectsForModel = maxSubjectsForModel, - effectSizes = effectSizes, - precision = precision, - outputIdOffset = outputIdOffset, - workFolder = workFolder, - cdmVersion = cdmVersion, - modelThreads = modelThreads, - generationThreads = generationThreads) + + synthesizePositiveControls( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + oracleTempSchema = oracleTempSchema, + exposureDatabaseSchema = exposureDatabaseSchema, + exposureTable = exposureTable, + outcomeDatabaseSchema = outcomeDatabaseSchema, + outcomeTable = outcomeTable, + outputDatabaseSchema = outputDatabaseSchema, + outputTable = outputTable, + createOutputTable = createOutputTable, + exposureOutcomePairs = exposureOutcomePairs, + modelType = modelType, + minOutcomeCountForModel = minOutcomeCountForModel, + minOutcomeCountForInjection = minOutcomeCountForInjection, + covariateSettings = covariateSettings, + prior = prior, + control = control, + firstExposureOnly = firstExposureOnly, + washoutPeriod = washoutPeriod, + riskWindowStart = riskWindowStart, + riskWindowEnd = riskWindowEnd, + endAnchor = endAnchor, + addIntentToTreat = addIntentToTreat, + firstOutcomeOnly = firstOutcomeOnly, + removePeopleWithPriorOutcomes = removePeopleWithPriorOutcomes, + maxSubjectsForModel = maxSubjectsForModel, + effectSizes = effectSizes, + precision = precision, + outputIdOffset = outputIdOffset, + workFolder = workFolder, + cdmVersion = cdmVersion, + modelThreads = modelThreads, + generationThreads = generationThreads + ) } diff --git a/docs/404.html b/docs/404.html index 8f6ebf0..b546ad5 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ MethodEvaluation - 2.2.0 + 2.3.0 @@ -42,7 +42,7 @@ Reference