diff --git a/NAMESPACE b/NAMESPACE index 883dc22a9..4a2e100cc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(createConceptCountsTable) export(createDiagnosticsExplorerZip) export(createMergedResultsFile) export(createResultsDataModel) @@ -7,6 +8,7 @@ export(deployPositConnectApp) export(executeDiagnostics) export(getCdmDataSourceInformation) export(getCohortCounts) +export(getConceptCountsTableName) export(getDataMigrator) export(getDefaultCovariateSettings) export(getDefaultVocabularyTableNames) diff --git a/R/ConceptCountsTable.R b/R/ConceptCountsTable.R new file mode 100644 index 000000000..9d7579fe6 --- /dev/null +++ b/R/ConceptCountsTable.R @@ -0,0 +1,81 @@ +# Copyright 2022 Observational Health Data Sciences and Informatics +# +# This file is part of CohortDiagnostics +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' createConceptCountsTable +#' +#' @description Create a table containing concept counts. +#' CohortDiagnostics performs this task in every run and takes a significant amount of time. +#' However, with this function, the user can create this table beforehand and +#' save it in the writing schema for further use. +#' +#' @inheritParams executeDiagnostics +#' @param conceptCountsDatabaseSchema schema name for the concept counts table +#' @param conceptCountsTableIsTemp boolean to indicate if it should be a temporary table +#' @param removeCurrentTable if the current table should be removed +#' +#' @export +createConceptCountsTable <- function(connectionDetails = NULL, + connection = NULL, + cdmDatabaseSchema, + tempEmulationSchema = NULL, + conceptCountsTable = "concept_counts", + conceptCountsDatabaseSchema = cdmDatabaseSchema, + conceptCountsTableIsTemp = FALSE, + removeCurrentTable = TRUE) { + ParallelLogger::logInfo("Creating concept counts table") + if (is.null(connection)) { + connection <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection)) + } + sql <- + SqlRender::loadRenderTranslateSql( + "CreateConceptCountTable.sql", + packageName = "CohortDiagnostics", + dbms = connection@dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + work_database_schema = conceptCountsDatabaseSchema, + concept_counts_table = conceptCountsTable, + table_is_temp = conceptCountsTableIsTemp, + remove_current_table = removeCurrentTable + ) + executeSql(connection, sql) +} + +#' getConceptCountsTableName +#' +#' @description Get a concept counts table name that is unique for the current database version. +#' We need to make sure the table is only used if the counts are for the current database. +#' +#' @param connection database connection +#' @param cdmDatabaseSchema CDM schema +#' +#' @return the concepts count table name +#' @export +getConceptCountsTableName <- function(connection, cdmDatabaseSchema) { + result <- "concept_counts" + sql <- paste("SELECT vocabulary_version as version", + "FROM @cdmDatabaseSchema.VOCABULARY", + "WHERE vocabulary_id = 'None'") + dbVersion <- DatabaseConnector::renderTranslateQuerySql(connection = connection, + sql = sql, + cdmDatabaseSchema = cdmDatabaseSchema) |> + dplyr::pull(1) + if (!identical(dbVersion, character(0))) { + result <- paste(gsub(" |\\.|-", "_", dbVersion), result, sep = "_") + } + return(result) +} \ No newline at end of file diff --git a/R/ConceptSetUtils.R b/R/ConceptSetUtils.R index f3ff74c4b..76242db6a 100644 --- a/R/ConceptSetUtils.R +++ b/R/ConceptSetUtils.R @@ -75,29 +75,3 @@ ) return(orphanConcepts) } - -createConceptCountsTable <- function(connectionDetails = NULL, - connection = NULL, - cdmDatabaseSchema, - tempEmulationSchema = NULL, - conceptCountsDatabaseSchema = cdmDatabaseSchema, - conceptCountsTable = "concept_counts", - conceptCountsTableIsTemp = FALSE) { - ParallelLogger::logInfo("Creating internal concept counts table") - if (is.null(connection)) { - connection <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection)) - } - sql <- - SqlRender::loadRenderTranslateSql( - "CreateConceptCountTable.sql", - packageName = utils::packageName(), - dbms = connection@dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - work_database_schema = conceptCountsDatabaseSchema, - concept_counts_table = conceptCountsTable, - table_is_temp = conceptCountsTableIsTemp - ) - DatabaseConnector::executeSql(connection, sql) -} diff --git a/R/ConceptSets.R b/R/ConceptSets.R index 10ecbb709..1e1f436db 100644 --- a/R/ConceptSets.R +++ b/R/ConceptSets.R @@ -464,22 +464,24 @@ runConceptSetDiagnostics <- function(connection, if ((runIncludedSourceConcepts && nrow(subsetIncluded) > 0) || (runOrphanConcepts && nrow(subsetOrphans) > 0)) { - timeExecution( - exportFolder, - taskName = "createConceptCountsTable", - cohortIds = NULL, - parent = "runConceptSetDiagnostics", - expr = { - createConceptCountsTable( - connection = connection, - cdmDatabaseSchema = cdmDatabaseSchema, - tempEmulationSchema = tempEmulationSchema, - conceptCountsDatabaseSchema = conceptCountsDatabaseSchema, - conceptCountsTable = conceptCountsTable, - conceptCountsTableIsTemp = conceptCountsTableIsTemp + if (!useExternalConceptCountsTable) { + timeExecution( + exportFolder, + taskName = "createConceptCountsTable", + cohortIds = NULL, + parent = "runConceptSetDiagnostics", + expr = { + createConceptCountsTable( + connection = connection, + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + conceptCountsDatabaseSchema = conceptCountsDatabaseSchema, + conceptCountsTable = conceptCountsTable, + conceptCountsTableIsTemp = conceptCountsTableIsTemp + ) + } ) } - ) } if (runIncludedSourceConcepts) { timeExecution( @@ -499,9 +501,6 @@ runConceptSetDiagnostics <- function(connection, } if (nrow(subsetIncluded) > 0) { start <- Sys.time() - if (useExternalConceptCountsTable) { - stop("Use of external concept count table is not supported") - } else { sql <- SqlRender::loadRenderTranslateSql( "CohortSourceCodes.sql", packageName = utils::packageName(), @@ -616,7 +615,6 @@ runConceptSetDiagnostics <- function(connection, signif(delta, 3), attr(delta, "units") )) - } } } ) @@ -889,7 +887,7 @@ runConceptSetDiagnostics <- function(connection, if (!useExternalConceptCountsTable) { ParallelLogger::logTrace("Using internal concept count table.") } else { - stop("Use of external concept count table is not supported") + ParallelLogger::logTrace("Using external concept count table.") } # [OPTIMIZATION idea] can we modify the sql to do this for all uniqueConceptSetId in one query using group by? @@ -1081,23 +1079,25 @@ runConceptSetDiagnostics <- function(connection, if ((runIncludedSourceConcepts && nrow(subsetIncluded) > 0) || (runOrphanConcepts && nrow(subsetOrphans) > 0)) { - ParallelLogger::logTrace("Dropping temp concept count table") - if (conceptCountsTableIsTemp) { - countTable <- conceptCountsTable - } else { - countTable <- - paste(conceptCountsDatabaseSchema, conceptCountsTable, sep = ".") - } - - sql <- "TRUNCATE TABLE @count_table; DROP TABLE @count_table;" - DatabaseConnector::renderTranslateExecuteSql( - connection, - sql, - tempEmulationSchema = tempEmulationSchema, - count_table = countTable, - progressBar = FALSE, - reportOverallTime = FALSE - ) + if (!useExternalConceptCountsTable) { + ParallelLogger::logTrace("Dropping temp concept count table") + if (conceptCountsTableIsTemp) { + countTable <- conceptCountsTable + } else { + countTable <- + paste(conceptCountsDatabaseSchema, conceptCountsTable, sep = ".") + } + + sql <- "TRUNCATE TABLE @count_table; DROP TABLE @count_table;" + DatabaseConnector::renderTranslateExecuteSql( + connection, + sql, + tempEmulationSchema = tempEmulationSchema, + count_table = countTable, + progressBar = FALSE, + reportOverallTime = FALSE + ) + } } delta <- Sys.time() - startConceptSetDiagnostics diff --git a/R/RunDiagnostics.R b/R/RunDiagnostics.R index d3e8d26e1..26e70b6de 100644 --- a/R/RunDiagnostics.R +++ b/R/RunDiagnostics.R @@ -110,6 +110,8 @@ getDefaultCovariateSettings <- function() { #' diagnostics to. #' @param cohortDefinitionSet Data.frame of cohorts must include columns cohortId, cohortName, json, sql #' @param cohortTableNames Cohort Table names used by CohortGenerator package +#' @param conceptCountsTable Concepts count table name. The default is "#concept_counts" to create a temporal concept counts table. +#' If an external concept counts table is used, provide the name in character, e.g. "concept_counts" without a hash #' @param databaseId A short string for identifying the database (e.g. 'Synpuf'). #' @param databaseName The full name of the database. If NULL, defaults to value in cdm_source table #' @param databaseDescription A short description (several sentences) of the database. If NULL, defaults to value in cdm_source table @@ -136,6 +138,7 @@ getDefaultCovariateSettings <- function() { #' @param incremental Create only cohort diagnostics that haven't been created before? #' @param incrementalFolder If \code{incremental = TRUE}, specify a folder where records are kept #' of which cohort diagnostics has been executed. +#' @param useExternalConceptCountsTable If TRUE an external table for the cohort concept counts will be used. #' @param runFeatureExtractionOnSample Logical. If TRUE, the function will operate on a sample of the data. #' Default is FALSE, meaning the function will operate on the full data set. #' @@ -205,6 +208,7 @@ executeDiagnostics <- function(cohortDefinitionSet, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), cohortTable = "cohort", cohortTableNames = CohortGenerator::getCohortTableNames(cohortTable = cohortTable), + conceptCountsTable = "#concept_counts", vocabularyDatabaseSchema = cdmDatabaseSchema, cohortIds = NULL, cdmVersion = 5, @@ -223,6 +227,7 @@ executeDiagnostics <- function(cohortDefinitionSet, irWashoutPeriod = 0, incremental = FALSE, incrementalFolder = file.path(exportFolder, "incremental"), + useExternalConceptCountsTable = FALSE, runFeatureExtractionOnSample = FALSE, sampleN = 1000, seed = 64374, @@ -687,6 +692,37 @@ executeDiagnostics <- function(cohortDefinitionSet, } ) } + + # Defines variables and checks version of external concept counts table ----- + if (!useExternalConceptCountsTable) { + conceptCountsTableIsTemp <- TRUE + if (conceptCountsTable != "#concept_counts") { + conceptCountsTable <- "#concept_counts" + } + } else { + if (conceptCountsTable == "#concept_counts") { + stop("Temporary conceptCountsTable name. Please provide a valid external ConceptCountsTable name") + } + conceptCountsTableIsTemp <- FALSE + conceptCountsTable <- conceptCountsTable + dataSourceInfo <- getCdmDataSourceInformation(connection = connection, cdmDatabaseSchema = cdmDatabaseSchema) + vocabVersion <- dataSourceInfo$vocabularyVersion + vocabVersionExternalConceptCountsTable <- renderTranslateQuerySql( + connection = connection, + sql = "SELECT DISTINCT vocabulary_version FROM @work_database_schema.@concept_counts_table;", + work_database_schema = cohortDatabaseSchema, + concept_counts_table = conceptCountsTable, + snakeCaseToCamelCase = TRUE, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchena") + ) + if (!identical(vocabVersion, vocabVersionExternalConceptCountsTable[1,1])) { + stop(paste0("External concept counts table (", + vocabVersionExternalConceptCountsTable, + ") does not match database (", + vocabVersion, + "). Update concept_counts with createConceptCountsTable()")) + } + } # Always export concept sets to csv exportConceptSets( @@ -719,11 +755,11 @@ executeDiagnostics <- function(cohortDefinitionSet, exportFolder = exportFolder, minCellCount = minCellCount, conceptCountsDatabaseSchema = NULL, - conceptCountsTable = "#concept_counts", + conceptCountsTable = conceptCountsTable, conceptCountsTableIsTemp = TRUE, cohortDatabaseSchema = cohortDatabaseSchema, cohortTable = cohortTable, - useExternalConceptCountsTable = FALSE, + useExternalConceptCountsTable = useExternalConceptCountsTable, incremental = incremental, conceptIdTable = "#concept_ids", recordKeepingFile = recordKeepingFile diff --git a/inst/sql/sql_server/CreateConceptCountTable.sql b/inst/sql/sql_server/CreateConceptCountTable.sql index c92f05852..5806ffc23 100644 --- a/inst/sql/sql_server/CreateConceptCountTable.sql +++ b/inst/sql/sql_server/CreateConceptCountTable.sql @@ -1,11 +1,14 @@ {DEFAULT @table_is_temp = FALSE} +{DEFAULT @remove_current_table = TRUE} -{@table_is_temp} ? { -IF OBJECT_ID('tempdb..@concept_counts_table', 'U') IS NOT NULL - DROP TABLE @concept_counts_table; -} : { -IF OBJECT_ID('@work_database_schema.@concept_counts_table', 'U') IS NOT NULL - DROP TABLE @work_database_schema.@concept_counts_table; +{@remove_current_table} ? { + {@table_is_temp} ? { + IF OBJECT_ID('tempdb..@concept_counts_table', 'U') IS NOT NULL + DROP TABLE @concept_counts_table; + } : { + IF OBJECT_ID('@work_database_schema.@concept_counts_table', 'U') IS NOT NULL + DROP TABLE @work_database_schema.@concept_counts_table; + } } SELECT concept_id, @@ -95,3 +98,9 @@ FROM ( FROM @cdm_database_schema.observation GROUP BY observation_source_concept_id ) tmp; + +{@table_is_temp} ? {} : { +ALTER TABLE @work_database_schema.@concept_counts_table +ADD vocabulary_version VARCHAR(20) NULL; +UPDATE @work_database_schema.@concept_counts_table SET vocabulary_version = (SELECT vocabulary_version FROM @cdm_database_schema.vocabulary WHERE vocabulary_id = 'None'); +} diff --git a/man/createConceptCountsTable.Rd b/man/createConceptCountsTable.Rd new file mode 100644 index 000000000..bc0bbec9c --- /dev/null +++ b/man/createConceptCountsTable.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ConceptCountsTable.R +\name{createConceptCountsTable} +\alias{createConceptCountsTable} +\title{createConceptCountsTable} +\usage{ +createConceptCountsTable( + connectionDetails = NULL, + connection = NULL, + cdmDatabaseSchema, + tempEmulationSchema = NULL, + conceptCountsTable = "concept_counts", + conceptCountsDatabaseSchema = cdmDatabaseSchema, + conceptCountsTableIsTemp = FALSE, + removeCurrentTable = TRUE +) +} +\arguments{ +\item{connectionDetails}{An object of type \code{connectionDetails} as created using the +\code{\link[DatabaseConnector]{createConnectionDetails}} function in the +DatabaseConnector package. Can be left NULL if \code{connection} is +provided.} + +\item{connection}{An object of type \code{connection} as created using the +\code{\link[DatabaseConnector]{connect}} function in the +DatabaseConnector package. Can be left NULL if \code{connectionDetails} +is provided, in which case a new connection will be opened at the start +of the function, and closed when the function finishes.} + +\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.} + +\item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.} + +\item{conceptCountsTable}{Concepts count table name. The default is "#concept_counts" to create a temporal concept counts table. +If an external concept counts table is used, provide the name in character, e.g. "concept_counts" without a hash} + +\item{conceptCountsDatabaseSchema}{schema name for the concept counts table} + +\item{conceptCountsTableIsTemp}{boolean to indicate if it should be a temporary table} + +\item{removeCurrentTable}{if the current table should be removed} +} +\description{ +Create a table containing concept counts. +CohortDiagnostics performs this task in every run and takes a significant amount of time. +However, with this function, the user can create this table beforehand and +save it in the writing schema for further use. +} diff --git a/man/executeDiagnostics.Rd b/man/executeDiagnostics.Rd index 8d21bde8f..d14373a91 100644 --- a/man/executeDiagnostics.Rd +++ b/man/executeDiagnostics.Rd @@ -17,6 +17,7 @@ executeDiagnostics( tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), cohortTable = "cohort", cohortTableNames = CohortGenerator::getCohortTableNames(cohortTable = cohortTable), + conceptCountsTable = "#concept_counts", vocabularyDatabaseSchema = cdmDatabaseSchema, cohortIds = NULL, cdmVersion = 5, @@ -35,6 +36,7 @@ executeDiagnostics( irWashoutPeriod = 0, incremental = FALSE, incrementalFolder = file.path(exportFolder, "incremental"), + useExternalConceptCountsTable = FALSE, runFeatureExtractionOnSample = FALSE, sampleN = 1000, seed = 64374, @@ -79,6 +81,9 @@ tables, provide a schema with write privileges where temp tables can be created. \item{cohortTableNames}{Cohort Table names used by CohortGenerator package} +\item{conceptCountsTable}{Concepts count table name. The default is "#concept_counts" to create a temporal concept counts table. +If an external concept counts table is used, provide the name in character, e.g. "concept_counts" without a hash} + \item{vocabularyDatabaseSchema}{Schema name where your OMOP vocabulary data resides. This is commonly the same as cdmDatabaseSchema. Note that for SQL Server, this should include both the database and @@ -126,6 +131,8 @@ on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent) \item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where records are kept of which cohort diagnostics has been executed.} +\item{useExternalConceptCountsTable}{If TRUE an external table for the cohort concept counts will be used.} + \item{runFeatureExtractionOnSample}{Logical. If TRUE, the function will operate on a sample of the data. Default is FALSE, meaning the function will operate on the full data set.} diff --git a/man/getConceptCountsTableName.Rd b/man/getConceptCountsTableName.Rd new file mode 100644 index 000000000..8ed1b0a11 --- /dev/null +++ b/man/getConceptCountsTableName.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ConceptCountsTable.R +\name{getConceptCountsTableName} +\alias{getConceptCountsTableName} +\title{getConceptCountsTableName} +\usage{ +getConceptCountsTableName(connection, cdmDatabaseSchema) +} +\arguments{ +\item{connection}{database connection} + +\item{cdmDatabaseSchema}{CDM schema} +} +\value{ +the concepts count table name +} +\description{ +Get a concept counts table name that is unique for the current database version. +We need to make sure the table is only used if the counts are for the current database. +} diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 6597fe5e0..8dede91ed 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -31,9 +31,8 @@ minCellCountValue <- 5 skipCdmTests <- FALSE if (dbms == "sqlite") { - databaseFile <- paste0(Sys.getpid(), "testEunomia.sqlite") - - connectionDetails <- Eunomia::getEunomiaConnectionDetails(databaseFile = databaseFile) + databaseFile <- Eunomia::getDatabaseFile("GiBleed", overwrite = FALSE) + connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "sqlite", server = databaseFile) withr::defer( { unlink(databaseFile, recursive = TRUE, force = TRUE) diff --git a/tests/testthat/test-1-ResultsDataModel.R b/tests/testthat/test-1-ResultsDataModel.R index 1f2dd2182..fcae6b0d3 100644 --- a/tests/testthat/test-1-ResultsDataModel.R +++ b/tests/testthat/test-1-ResultsDataModel.R @@ -1,5 +1,5 @@ skipResultsDm <- FALSE -if (Sys.getenv("CDM5_POSTGRESQL_SERVER") == "") { +if (Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" || Sys.getenv("SKIP_DB_TESTS") == "TRUE") { skipResultsDm <- TRUE } else { postgresConnectionDetails <- DatabaseConnector::createConnectionDetails( @@ -179,6 +179,7 @@ VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient }) test_that("Sqlite results data model", { + skip_if(skipResultsDm) dbFile <- tempfile(fileext = ".sqlite") createMergedResultsFile(dataFolder = file.path(folder, "export"), sqliteDbPath = dbFile, overwrite = TRUE, tablePrefix = "cd_") connectionDetailsSqlite <- DatabaseConnector::createConnectionDetails(dbms = "sqlite", server = dbFile) diff --git a/tests/testthat/test-other.R b/tests/testthat/test-other.R index 27c9acbdd..e719401d3 100644 --- a/tests/testthat/test-other.R +++ b/tests/testthat/test-other.R @@ -157,3 +157,43 @@ test_that("enforceMinCellValue works with vector of minimum values", { expect_equal(result$a, c(1, 2, 3, 4, 5)) }) + +test_that("Creating and checking externalConceptCounts table", { + + # Creating externalConceptCounts + sql_lite_path <- file.path(test_path(), "4448testEunomia.sqlite") + connectionDetails <- createConnectionDetails(dbms= "sqlite", server = sql_lite_path) + connection <- connect(connectionDetails) + cdmDatabaseSchema <- "main" + CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = NULL, + conceptCountsTable = "concept_counts", + conceptCountsDatabaseSchema = cdmDatabaseSchema, + conceptCountsTableIsTemp = FALSE, + removeCurrentTable = TRUE) + + concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") + expect_equal(concept_counts_info$NAME, c( "concept_id", "concept_count", "concept_subjects", "vocabulary_version")) + + # Checking vocab version matches + useExternalConceptCountsTable <- TRUE + conceptCountsTable <- "concept_counts" + conceptCountsTable <- conceptCountsTable + dataSourceInfo <- getCdmDataSourceInformation(connection = connection, cdmDatabaseSchema = cdmDatabaseSchema) + vocabVersion <- dataSourceInfo$vocabularyVersion + vocabVersionExternalConceptCountsTable <- renderTranslateQuerySql( + connection = connection, + sql = "SELECT DISTINCT vocabulary_version FROM @work_database_schema.@concept_counts_table;", + work_database_schema = cdmDatabaseSchema, + concept_counts_table = conceptCountsTable, + snakeCaseToCamelCase = TRUE, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchena") + ) + + expect_equal(vocabVersion, vocabVersionExternalConceptCountsTable[1,1]) + +}) + + +