diff --git a/.github/workflows/R_CMD_check_Hades.yaml b/.github/workflows/R_CMD_check_Hades.yaml index b367ab6a..db1df5be 100644 --- a/.github/workflows/R_CMD_check_Hades.yaml +++ b/.github/workflows/R_CMD_check_Hades.yaml @@ -20,30 +20,33 @@ jobs: fail-fast: false matrix: config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: windows-latest, r: '4.2.3', rtools: '42', rspm: "https://cloud.r-project.org"} + - {os: macOS-latest, r: '4.2.3', rtools: '42', rspm: "https://cloud.r-project.org"} + - {os: ubuntu-20.04, r: '4.2.3', rtools: '42', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: windows-latest, r: 'release', rtools: '', rspm: "https://cloud.r-project.org"} + #- {os: macOS-latest, r: 'release', rtools: '', rspm: "https://cloud.r-project.org"} + - {os: ubuntu-20.04, r: 'release', rtools: '', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} env: GITHUB_PAT: ${{ secrets.GH_TOKEN }} R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} - CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM_SCHEMA }} + CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM54_SCHEMA }} CDM5_ORACLE_OHDSI_SCHEMA: ${{ secrets.CDM5_ORACLE_OHDSI_SCHEMA }} CDM5_ORACLE_PASSWORD: ${{ secrets.CDM5_ORACLE_PASSWORD }} CDM5_ORACLE_SERVER: ${{ secrets.CDM5_ORACLE_SERVER }} CDM5_ORACLE_USER: ${{ secrets.CDM5_ORACLE_USER }} - CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM_SCHEMA }} + CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM54_SCHEMA }} CDM5_POSTGRESQL_OHDSI_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_OHDSI_SCHEMA }} CDM5_POSTGRESQL_PASSWORD: ${{ secrets.CDM5_POSTGRESQL_PASSWORD }} CDM5_POSTGRESQL_SERVER: ${{ secrets.CDM5_POSTGRESQL_SERVER }} CDM5_POSTGRESQL_USER: ${{ secrets.CDM5_POSTGRESQL_USER }} - CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM_SCHEMA }} + CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM54_SCHEMA }} CDM5_SQL_SERVER_OHDSI_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_OHDSI_SCHEMA }} CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }} CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }} CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }} - CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM_SCHEMA }} + CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM54_SCHEMA }} CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }} CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }} CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }} @@ -53,11 +56,12 @@ jobs: CDM5_SPARK_CONNECTION_STRING: ${{ secrets.CDM5_SPARK_CONNECTION_STRING }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} + rtools-version: ${{ matrix.config.rtools }} - uses: r-lib/actions/setup-tinytex@v2 @@ -66,7 +70,11 @@ jobs: - name: Install system requirements if: runner.os == 'Linux' run: | - sudo apt-get install -y libssh-dev + sudo apt-get install -y make + sudo apt-get install -y default-jdk + sudo apt-get install -y libcurl4-openssl-dev + sudo apt-get install -y libssl-dev + sudo apt-get install -y libglpk-dev while read -r cmd do eval sudo $cmd @@ -84,20 +92,20 @@ jobs: check-dir: '"check"' - name: Upload source package - if: success() && runner.os == 'macOS' && github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + if: success() && runner.os == 'Windows' && github.event_name != 'pull_request' && github.ref == 'refs/heads/main' uses: actions/upload-artifact@v2 with: name: package_tarball path: check/*.tar.gz - name: Install covr - if: runner.os == 'macOS' + if: runner.os == 'Linux' run: | install.packages("covr") shell: Rscript {0} - name: Test coverage - if: runner.os == 'macOS' + if: runner.os == 'Linux' run: covr::codecov() shell: Rscript {0} diff --git a/.gitignore b/.gitignore index b7f5e41a..6ed21cce 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,4 @@ src/*.so src/*.dll /Debug standalone/build/* -sql _targets diff --git a/DESCRIPTION b/DESCRIPTION index 9439eb10..e0fc6cf3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,12 @@ Package: Strategus Type: Package Title: Coordinating and Executing Analytics Using HADES Modules -Version: 0.0.6 -Date: 2023-04-25 +Version: 0.1.0 +Date: 2023-10-04 Authors@R: c( - person("Martijn", "Schuemie", email = "schuemie@ohdsi.org", role = c("aut", "cre")), - person("Anthony", "Sena", email = "sena@ohdsi.org", role = c("aut")), + person("Martijn", "Schuemie", email = "schuemie@ohdsi.org", role = c("aut")), + person("Anthony", "Sena", email = "sena@ohdsi.org", role = c("aut", "cre")), + person("Jamie", "Gilbert", role = c("aut")), person("Observational Health Data Science and Informatics", role = c("cph")) ) Maintainer: Anthony Sena @@ -14,12 +15,12 @@ License: Apache License 2.0 URL: https://ohdsi.github.io/Strategus, https://github.com/OHDSI/Strategus BugReports: https://github.com/OHDSI/Strategus/issues Depends: - R (>= 4.0.0), - CohortGenerator (>= 0.7.0), - DatabaseConnector (>= 5.1.0) + R (>= 4.2.0), + CohortGenerator (>= 0.8.0), + DatabaseConnector (>= 6.2.3) Imports: targets, - renv (>= 0.15.5), + renv (>= 1.0.0), ParallelLogger (>= 3.1.0), dplyr, checkmate, @@ -30,6 +31,7 @@ Imports: digest, methods, tibble, + ResultModelManager (>= 0.3.0), SqlRender (>= 1.11.0) Suggests: testthat (>= 3.0.0), @@ -40,6 +42,7 @@ Suggests: withr Remotes: ohdsi/CohortGenerator, + ohdsi/ResultModelManager, ohdsi/Eunomia VignetteBuilder: knitr NeedsCompilation: no diff --git a/NAMESPACE b/NAMESPACE index 8ea3b576..2671ea51 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(addModuleSpecifications) export(addSharedResources) export(createCdmExecutionSettings) export(createEmptyAnalysisSpecificiations) +export(createResultDataModels) export(createResultsExecutionSettings) export(ensureAllModulesInstantiated) export(execute) diff --git a/NEWS.md b/NEWS.md index 9f6b07b1..1fd7ea44 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,19 @@ +Strategus 0.1.0 +=============== + +- Adds an initial implementation for uploading results to a results database (#72) +- Robust handling of connection details via keyring (#74) +- Ensures uniqueness of all CDM tables when gathering database metadata (#82) +- `tempEmulationSchema` added to execution settings and passed properly to modules (#82) +- Adding logging to module initalization to detect `renv` restore errors (#82) +- Adopt HADES-wide lock file in latest versions of all modules (#83) +- Use renv >= v1.0.0 for all modules and Strategus (#83) +- Add GitHub unit tests for HADES adopted version (currently v4.2.3) and the latest R versions for all modules and Strategus. (#83) +- Ensure all Strategus GitHub unit tests run on all operating systems and available OHDSI test database platforms (#83) +- Use CDM v5.4 schemas for all unit tests (#85) +- Allow for passing `renv`configuration options when running Strategus (#88) +- Adds SQL for test cohorts to package (#1) + Strategus 0.0.6 =============== diff --git a/R/DatabaseMetaData.R b/R/DatabaseMetaData.R index cdd775f3..121ca849 100644 --- a/R/DatabaseMetaData.R +++ b/R/DatabaseMetaData.R @@ -42,7 +42,11 @@ createDatabaseMetaData <- function(executionSettings, keyringName = NULL) { connection = connection, databaseSchema = executionSettings$cdmDatabaseSchema ) - cdmTableList <- tolower(cdmTableList) + cdmTableList <- unique(tolower(cdmTableList)) + + if (length(cdmTableList) == 0) { + stop(sprintf("FATAL ERROR: No tables found in your OMOP CDM. Please confirm you are using the proper connection information, in particular the CDM schema name.")) + } if (!length(cdmTableList[which(x = cdmTableList %in% requiredTables)]) == length(requiredTables)) { missingCdmTables <- requiredTables[!(requiredTables %in% cdmTableList)] diff --git a/R/Execution.R b/R/Execution.R index facc04ce..c4e29680 100644 --- a/R/Execution.R +++ b/R/Execution.R @@ -24,18 +24,13 @@ #' Execute analysis specifications. #' #' @template AnalysisSpecifications -#' @param executionSettings An object of type `ExecutionSettings` as created -#' by [createCdmExecutionSettings()] or [createResultsExecutionSettings()]. +#' @template executionSettings #' @param executionScriptFolder Optional: the path to use for storing the execution script. #' when NULL, this function will use a temporary #' file location to create the script to execute. -#' #' @template keyringName -#' #' @param restart Restart run? Requires `executionScriptFolder` to be specified, and be #' the same as the `executionScriptFolder` used in the run to restart. -#' -#' #' @return #' Does not return anything. Is called for the side-effect of executing the specified #' analyses. @@ -53,6 +48,18 @@ execute <- function(analysisSpecifications, checkmate::assertChoice(x = keyringName, choices = keyringList$keyring, null.ok = TRUE, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) + # Assert that the temp emulation schema is set if required for the dbms + # specified by the executionSettings + if (is(executionSettings, "CdmExecutionSettings")) { + connectionDetails <- retrieveConnectionDetails( + connectionDetailsReference = executionSettings$connectionDetailsReference, + keyringName = keyringName + ) + DatabaseConnector::assertTempEmulationSchemaSet( + dbms = connectionDetails$dbms, + tempEmulationSchema = executionSettings$tempEmulationSchema + ) + } modules <- ensureAllModulesInstantiated(analysisSpecifications) if (is.null(executionScriptFolder)) { @@ -74,6 +81,7 @@ execute <- function(analysisSpecifications, } dependencies <- extractDependencies(modules) + fileName <- generateTargetsScript( analysisSpecifications = analysisSpecifications, executionSettings = executionSettings, @@ -82,7 +90,6 @@ execute <- function(analysisSpecifications, restart = restart, keyringName = keyringName ) - # targets::tar_manifest(script = fileName) # targets::tar_glimpse(script = fileName) targets::tar_make(script = fileName, store = file.path(executionScriptFolder, "_targets")) @@ -93,6 +100,59 @@ generateTargetsScript <- function(analysisSpecifications, executionSettings, dep if (restart) { return(fileName) } + + ### Note anything inisde this block will be scoped inside the targets script file + targets::tar_script( + { + ## + # Generated by Strategus - not advisable to edit by hand + ## + analysisSpecificationsLoad <- readRDS(analysisSpecificationsFileName) + moduleToTargetNames <- readRDS(moduleToTargetNamesFileName) + dependencies <- readRDS(dependenciesFileName) + + targets::tar_option_set(packages = c("Strategus", "keyring"), imports = c("Strategus", "keyring")) + targetList <- list( + targets::tar_target(analysisSpecifications, readRDS(analysisSpecificationsFileName)), + # NOTE Execution settings could be mapped to many different cdms making re-execution across cdms much simpler + targets::tar_target(executionSettings, readRDS(executionSettingsFileName)), + targets::tar_target(keyringSettings, readRDS(keyringSettingsFileName)) + ) + + # factory for producing module targets based on their dependencies + # This could be inside Strategus as an exported function + # it would also be much cleaner to use a targets pattern = cross(analysisSpecifications$moduleSpecifications) + # however, working out how to handle dependencies wasn't obvious + # This approach could be modified to allow multiple executionSettings, but that would require a substantial re-write + for (i in 1:length(analysisSpecificationsLoad$moduleSpecifications)) { + moduleSpecification <- analysisSpecificationsLoad$moduleSpecifications[[i]] + targetName <- sprintf("%s_%d", moduleSpecification$module, i) + dependencyModules <- dependencies[dependencies$module == moduleSpecification$module, ]$dependsOn + dependencyTargetNames <- moduleToTargetNames[moduleToTargetNames$module %in% dependencyModules, ]$targetName + + # Use of tar_target_raw allows dynamic names + targetList[[length(targetList) + 1]] <- targets::tar_target_raw(targetName, + substitute(Strategus:::runModule(analysisSpecifications, keyringSettings, i, executionSettings), + env = list(i = i) + ), + deps = c("analysisSpecifications", "keyringSettings", "executionSettings", dependencyTargetNames) + ) + + if (execResultsUpload) { + resultsTargetName <- paste0(targetName, "_results_upload") + targetList[[length(targetList) + 1]] <- targets::tar_target_raw(resultsTargetName, + substitute(Strategus:::runResultsUpload(analysisSpecifications, keyringSettings, i, executionSettings), + env = list(i = i) + ), + deps = c("analysisSpecifications", "keyringSettings", "executionSettings", targetName) + ) + } + } + targetList + }, + script = fileName + ) + # Store settings objects in the temp folder so they are available in targets analysisSpecificationsFileName <- gsub("\\\\", "/", file.path(executionScriptFolder, "analysisSpecifications.rds")) saveRDS(analysisSpecifications, analysisSpecificationsFileName) @@ -101,25 +161,6 @@ generateTargetsScript <- function(analysisSpecifications, executionSettings, dep keyringSettingsFileName <- gsub("\\\\", "/", file.path(executionScriptFolder, "keyringSettings.rds")) saveRDS(list(keyringName = keyringName), keyringSettingsFileName) - - # Dynamically generate targets script based on analysis specifications - lines <- c( - "library(targets)", - "tar_option_set(packages = c('Strategus', 'keyring'))", - "list(", - " tar_target(", - " analysisSpecifications,", - sprintf(" readRDS('%s')", analysisSpecificationsFileName), - " ),", - " tar_target(", - " executionSettings,", - sprintf(" readRDS('%s')", executionSettingsFileName), - " ),", - " tar_target(", - " keyringSettings,", - sprintf(" readRDS('%s')", keyringSettingsFileName), - " )," - ) # Generate target names by module type moduleToTargetNames <- list() for (i in 1:length(analysisSpecifications$moduleSpecifications)) { @@ -131,38 +172,28 @@ generateTargetsScript <- function(analysisSpecifications, executionSettings, dep ) } moduleToTargetNames <- bind_rows(moduleToTargetNames) + moduleToTargetNamesFileName <- gsub("\\\\", "/", file.path(executionScriptFolder, "moduleTargetNames.rds")) + saveRDS(moduleToTargetNames, moduleToTargetNamesFileName) + + dependenciesFileName <- gsub("\\\\", "/", file.path(executionScriptFolder, "dependencies.rds")) + saveRDS(dependencies, dependenciesFileName) + + execResultsUpload <- all(c( + is(executionSettings, "CdmExecutionSettings"), + !is.null(executionSettings$resultsConnectionDetailsReference), + !is.null(executionSettings$resultsDatabaseSchema) + )) + + # Settings required inside script. There is probably a much cleaner way of doing this + writeLines(c( + sprintf("analysisSpecificationsFileName <- '%s'", analysisSpecificationsFileName), + sprintf("executionSettingsFileName <- '%s'", executionSettingsFileName), + sprintf("keyringSettingsFileName <- '%s'", keyringSettingsFileName), + sprintf("moduleToTargetNamesFileName <- '%s'", moduleToTargetNamesFileName), + sprintf("dependenciesFileName <- '%s'", dependenciesFileName), + sprintf("execResultsUpload <- '%s'", execResultsUpload), + readLines(fileName) + ), fileName) - # Generate targets code, inserting dependencies - for (i in 1:length(analysisSpecifications$moduleSpecifications)) { - moduleSpecification <- analysisSpecifications$moduleSpecifications[[i]] - targetName <- sprintf("%s_%d", moduleSpecification$module, i) - dependencyModules <- dependencies %>% - filter(.data$module == moduleSpecification$module) %>% - pull(.data$dependsOn) - dependencyTargetNames <- moduleToTargetNames %>% - filter(.data$module %in% dependencyModules) %>% - pull(.data$targetName) - - command <- sprintf( - "Strategus:::runModule(analysisSpecifications, keyringSettings, %d, executionSettings%s)", - i, - ifelse(length(dependencyTargetNames) == 0, "", sprintf(", %s", paste(dependencyTargetNames, collapse = ", "))) - ) - - - lines <- c( - lines, - " tar_target(", - sprintf(" %s,", targetName), - sprintf(" %s", command), - ifelse(i == length(analysisSpecifications$moduleSpecifications), " )", " ),") - ) - } - - lines <- c(lines, ")") - - sink(fileName) - cat(paste(lines, collapse = "\n")) - sink() return(fileName) } diff --git a/R/ModuleEnv.R b/R/ModuleEnv.R new file mode 100644 index 00000000..b26de8ba --- /dev/null +++ b/R/ModuleEnv.R @@ -0,0 +1,125 @@ +# Copyright 2023 Observational Health Data Sciences and Informatics +# +# This file is part of Strategus +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.handleInjectVar <- function(x) { + hVar <- function(x) { + if (is.character(x)) { + return(sprintf('"%s"', x)) + } else { + return(sprintf("%s", x)) + } + } + + if (length(x) == 1) { + return(hVar(x)) + } else if (is.vector(x)) { + innerVars <- hVar(x) + return(paste0("c(", paste(innerVars, collapse = ", "), ")")) + } else { + stop("cannot handle complex data structures in variable injection") + } +} + +#' Load module execution space inside and renv +#' inspired by targets::tar_script but allowing custom variable execution +#' +#' Designed to allow more human readable code that is executed inside a module as well as simple variable substituion +#' for injecting constants (e.g. simple parameters or file paths used inside and outside of modules) +#' +#' This pattern also allows dependency injection which could be used if you don't want to use and renv and (instead) +#' would like to use docker images or just execution in the base environment for testing/debugging +#' +#' @param code code block to execute +#' @param moduleFolder Instantiated Strategus module folder +#' @param injectVars list of var names list(name=value) to replace (e.g. replace list(foo = "some string") will +#' find the pattern foo and replace it with the string some string - be careful! +#' @param tempScriptFile tempFile to write script to +#' @param useLocalStrategusLibrary Use the locally installed Strategus library? TRUE will use the Strategus +#' installation from the calling R process. +#' @param job run as rstudio job +#' @param processName String name for process +#' @returns NULL invisibly +withModuleRenv <- function(code, + moduleFolder, + injectVars = list(), + tempScriptFile = tempfile(fileext = ".R"), + useLocalStrategusLibrary = TRUE, + job = FALSE, + processName = paste(moduleFolder, "_renv_run")) { + # convert human readable code to a string for writing + script <- as.character(substitute(code))[-1] + # Insert variables + for (name in names(injectVars)) { + rep <- .handleInjectVar(injectVars[[name]]) + script <- gsub(name, rep, script) + } + + # Attach renv options() from the calling environment to the renv::run context + # renv options are prefixed with "renv." as described in + # https://rstudio.github.io/renv/reference/config.html + envOptions <- options() + renvOptions <- envOptions[grepl("renv\\.", names(envOptions))] + if (length(renvOptions) > 0) { + for (i in 1:length(renvOptions)) { + script <- c(.copyOptionForScript( + optionName = names(renvOptions)[[i]], + optionValue = renvOptions[[i]] + ), script) + } + } + + # Enforce attachment of Strategus from calling process - note one inside the renv + if (useLocalStrategusLibrary) { + script <- c(.getLocalLibraryScipt("Strategus"), script) + # Adding Strategus dependencies to the script + script <- c(.getLocalLibraryScipt("ParallelLogger"), script) + script <- c(.getLocalLibraryScipt("CohortGenerator"), script) + script <- c(.getLocalLibraryScipt("DatabaseConnector"), script) + script <- c(.getLocalLibraryScipt("keyring"), script) + script <- c(.getLocalLibraryScipt("openssl"), script) + script <- c(.getLocalLibraryScipt("dplyr"), script) + script <- c(.getLocalLibraryScipt("R6"), script) + } + + # Write file and execute script inside an renv + fileConn <- file(tempScriptFile) + writeLines(script, fileConn) + close(fileConn) + renv::run( + script = tempScriptFile, + job = job, + name = processName, + project = moduleFolder + ) + return(invisible(NULL)) +} + +.getLocalLibraryScipt <- function(x) { + libPath <- file.path(find.package(x), "../") + sprintf("library(%s, lib.loc = '%s')", x, libPath) +} + +.copyOptionForScript <- function(optionName, optionValue) { + if (is.logical(optionValue) || is.numeric(optionValue)) { + sprintf("options(%s = %s)", optionName, optionValue) + } else if (is.character(optionValue) && length(optionValue) == 1) { + sprintf("options(%s = '%s')", optionName, optionValue) + } else if (is.character(optionValue) && length(optionValue) > 1) { + sprintf("options(%s = c('%s'))", optionName, paste(optionValue, collapse = "','")) + } else { + paste0("# option = ", optionName, " - could not be passed to this file, likely because it is a function.") + } +} diff --git a/R/ModuleInstantiation.R b/R/ModuleInstantiation.R index bab8a742..df27152f 100644 --- a/R/ModuleInstantiation.R +++ b/R/ModuleInstantiation.R @@ -38,9 +38,9 @@ ensureAllModulesInstantiated <- function(analysisSpecifications) { # Verify only one version per module: multipleVersionsPerModule <- modules %>% - group_by(.data$module) %>% + group_by(module) %>% summarise(versions = n()) %>% - filter(.data$versions > 1) + filter(versions > 1) if (nrow(multipleVersionsPerModule) > 0) { stop(sprintf( "Only one version per module allowed in a single analyses specification.\nMultiple versions found for module(s) `%s`.", @@ -61,7 +61,7 @@ ensureAllModulesInstantiated <- function(analysisSpecifications) { # Check required dependencies have been installed: dependencies <- extractDependencies(modules) missingDependencies <- dependencies %>% - filter(!.data$dependsOn %in% modules$module) + filter(!dependsOn %in% modules$module) if (nrow(missingDependencies) > 0) { message <- paste( c( @@ -91,7 +91,7 @@ getModuleTable <- function(analysisSpecifications, distinct = FALSE) { bind_rows() if (distinct) { modules <- modules %>% - distinct(.data$module, .data$version, .keep_all = TRUE) + distinct(module, version, .keep_all = TRUE) } return(modules) } @@ -144,33 +144,33 @@ instantiateModule <- function(module, version, remoteRepo, remoteUsername, modul dir.create(moduleFolder) success <- FALSE on.exit(if (!success) unlink(moduleFolder, recursive = TRUE)) + moduleFile <- file.path(moduleFolder, sprintf("%s_%s.zip", module, version)) if (module == "TestModule1") { - # For demo purposes only: get module from extras folder - files <- list.files("extras/TestModules/TestModule1", full.names = TRUE, include.dirs = TRUE, all.files = TRUE) - files <- files[!grepl("renv$", files)] - files <- files[!grepl("\\.$", files)] - files <- files[!grepl(".Rhistory$", files)] - file.copy(files, moduleFolder, recursive = TRUE) - dir.create(file.path(moduleFolder, "renv")) - file.copy("extras/TestModules/TestModule1/renv/activate.R", file.path(moduleFolder, "renv"), recursive = TRUE) + # For unit testing purposes only: get module from inst/testdata folder + file.copy( + from = system.file( + file.path("testdata", basename(moduleFile)), + package = utils::packageName() + ), + to = moduleFolder + ) } else { - moduleFile <- file.path(moduleFolder, sprintf("%s_%s.zip", module, version)) moduleUrl <- sprintf("https://%s/%s/%s/archive/refs/tags/v%s.zip", remoteRepo, remoteUsername, module, version) utils::download.file(url = moduleUrl, destfile = moduleFile) - utils::unzip(zipfile = moduleFile, exdir = moduleFolder) - unlink(moduleFile) - # At this point, the unzipped folders will likely exist in a sub folder. - # Move all files from that sub folder to the main module folder - subFolders <- list.dirs(path = moduleFolder, recursive = FALSE) - if (length(subFolders) > 0) { - for (i in 1:length(subFolders)) { - R.utils::copyDirectory( - from = subFolders[i], - to = moduleFolder, - recursive = TRUE - ) - unlink(subFolders[i], recursive = TRUE) - } + } + utils::unzip(zipfile = moduleFile, exdir = moduleFolder) + unlink(moduleFile) + # At this point, the unzipped folders will likely exist in a sub folder. + # Move all files from that sub folder to the main module folder + subFolders <- list.dirs(path = moduleFolder, recursive = FALSE) + if (length(subFolders) > 0) { + for (i in 1:length(subFolders)) { + R.utils::copyDirectory( + from = subFolders[i], + to = moduleFolder, + recursive = TRUE + ) + unlink(subFolders[i], recursive = TRUE) } } @@ -191,25 +191,18 @@ instantiateModule <- function(module, version, remoteRepo, remoteUsername, modul stop(message) } - script <- " + withModuleRenv( + code = { + ParallelLogger::addDefaultFileLogger( + fileName = file.path(moduleFolder, "moduleInitLog.txt") + ) + ParallelLogger::addDefaultErrorReportLogger( + fileName = file.path(moduleFolder, "moduleInitErrorReport.txt") + ) renv::restore(prompt = FALSE) - if (!require('ParallelLogger', quietly = TRUE)) { - install.packages('ParallelLogger') - } - if (!require('keyring', quietly = TRUE)) { - install.packages('keyring') - } - " - tempScriptFile <- tempfile(fileext = ".R") - fileConn <- file(tempScriptFile) - writeLines(script, fileConn) - close(fileConn) - - renv::run( - script = tempScriptFile, - job = FALSE, - name = "Buidling renv library", - project = moduleFolder + }, + moduleFolder = moduleFolder, + injectVars = list(moduleFolder = moduleFolder) ) success <- TRUE } @@ -223,10 +216,10 @@ getModuleRenvDependencies <- function(moduleFolder) { ) missingFiles <- tibble::enframe(renvRequiredFiles) %>% - mutate(fileExists = file.exists(file.path(moduleFolder, .data$value))) %>% - rename(fileName = .data$value) %>% - select(.data$fileName, .data$fileExists) %>% - filter(.data$fileExists == FALSE) + dplyr::mutate(fileExists = file.exists(file.path(moduleFolder, value))) %>% + dplyr::rename(fileName = value) %>% + dplyr::select("fileName", "fileExists") %>% + dplyr::filter(fileExists == FALSE) invisible(missingFiles) } diff --git a/R/ResultModelCreation.R b/R/ResultModelCreation.R new file mode 100644 index 00000000..2446561a --- /dev/null +++ b/R/ResultModelCreation.R @@ -0,0 +1,276 @@ +# Copyright 2023 Observational Health Data Sciences and Informatics +# +# This file is part of Strategus +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' Create Result Data Models +#' @description +#' Use this at the study design stage to create data models for modules +#' This functions loads modules and executes any custom code to create schemas in a results database +#' If recreate is set to TRUE all existing data will be removed, otherwise +#' +#' @inheritParams execute +#' +#' @export +createResultDataModels <- function(analysisSpecifications, + executionSettings, + executionScriptFolder = NULL, + keyringName = NULL, + restart = FALSE) { + errorMessages <- checkmate::makeAssertCollection() + keyringList <- keyring::keyring_list() + checkmate::assertClass(analysisSpecifications, "AnalysisSpecifications", add = errorMessages) + checkmate::assertClass(executionSettings, "ResultsExecutionSettings", add = errorMessages) + checkmate::assertChoice(x = keyringName, choices = keyringList$keyring, null.ok = TRUE, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + + modules <- ensureAllModulesInstantiated(analysisSpecifications) + + + if (is.null(executionScriptFolder)) { + executionScriptFolder <- tempfile("strategusTempSettings") + dir.create(executionScriptFolder) + on.exit(unlink(executionScriptFolder, recursive = TRUE)) + } else if (!restart) { + if (dir.exists(executionScriptFolder)) { + unlink(executionScriptFolder, recursive = TRUE) + } + dir.create(executionScriptFolder, recursive = TRUE) + } + + script <- file.path(executionScriptFolder, "SchemaScript.R") + ## + # Code execution inside targets block + ## + targets::tar_script( + { + ## + # Generated by Strategus - not advisable to edit by hand + ## + analysisSpecificationsLoad <- readRDS(analysisSpecificationsFileName) + + targets::tar_option_set(packages = c("Strategus", "keyring"), imports = c("Strategus", "keyring")) + targetList <- list( + targets::tar_target(analysisSpecifications, readRDS(analysisSpecificationsFileName)), + targets::tar_target(executionSettings, readRDS(executionSettingsFileName)), + targets::tar_target(keyringSettings, readRDS(keyringSettingsFileName)) + ) + + for (i in 1:length(analysisSpecificationsLoad$moduleSpecifications)) { + moduleSpecification <- analysisSpecificationsLoad$moduleSpecifications[[i]] + targetName <- sprintf("%s_%d_schema_creation", moduleSpecification$module, i) + + # Use of tar_target_raw allows dynamic names + targetList[[length(targetList) + 1]] <- targets::tar_target_raw(targetName, + substitute(Strategus:::runSchemaCreation(analysisSpecifications, keyringSettings, i, executionSettings), + env = list(i = i) + ), + deps = c("analysisSpecifications", "keyringSettings", "executionSettings") + ) + } + targetList + }, + script = script + ) + + # Store settings objects in the temp folder so they are available in targets + analysisSpecificationsFileName <- gsub("\\\\", "/", file.path(executionScriptFolder, "analysisSpecifications.rds")) + saveRDS(analysisSpecifications, analysisSpecificationsFileName) + executionSettingsFileName <- gsub("\\\\", "/", file.path(executionScriptFolder, "executionSettings.rds")) + saveRDS(executionSettings, executionSettingsFileName) + keyringSettingsFileName <- gsub("\\\\", "/", file.path(executionScriptFolder, "keyringSettings.rds")) + saveRDS(list(keyringName = keyringName), keyringSettingsFileName) + + # Generate target names by module type + moduleToTargetNames <- list() + for (i in 1:length(analysisSpecifications$moduleSpecifications)) { + moduleSpecification <- analysisSpecifications$moduleSpecifications[[i]] + targetName <- sprintf("%s_%d", moduleSpecification$module, i) + moduleToTargetNames[[length(moduleToTargetNames) + 1]] <- tibble( + module = moduleSpecification$module, + targetName = targetName + ) + } + moduleToTargetNames <- bind_rows(moduleToTargetNames) + moduleToTargetNamesFileName <- gsub("\\\\", "/", file.path(executionScriptFolder, "moduleTargetNames.rds")) + saveRDS(moduleToTargetNames, moduleToTargetNamesFileName) + + # Settings required inside script. There is probably a much cleaner way of doing this + writeLines(c( + sprintf("analysisSpecificationsFileName <- '%s'", analysisSpecificationsFileName), + sprintf("executionSettingsFileName <- '%s'", executionSettingsFileName), + sprintf("keyringSettingsFileName <- '%s'", keyringSettingsFileName), + sprintf("moduleToTargetNamesFileName <- '%s'", moduleToTargetNamesFileName), + readLines(script) + ), script) + + targets::tar_make(script = script) +} + + +#' Create module(s) result data model +#' @description +#' This function will create the results data model for the modules in the +#' `analysisSpecifications`. A module can implement its own results data model +#' creation function by implementing the function `createDataModelSchema` in +#' its Main.R. The default behavior is to use the `ResultsModelManager` to create +#' the results data model based on the `resultsDataModelSpecification.csv` in the +#' module's results folder. +#' +#' @template AnalysisSpecifications +#' @param keyringSettings The keyringSettings from the executionSettings context +#' @param moduleIndex The index of the module in the analysis specification +#' @template executionSettings +#' @param ... For future expansion +runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleIndex, executionSettings, ...) { + checkmate::assert_multi_class(x = executionSettings, classes = c("ResultsExecutionSettings")) + moduleSpecification <- analysisSpecifications$moduleSpecifications[[moduleIndex]] + + module <- moduleSpecification$module + version <- moduleSpecification$version + remoteRepo <- moduleSpecification$remoteRepo + remoteUsername <- moduleSpecification$remoteUsername + moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername) + + # Create job context + moduleExecutionSettings <- executionSettings + moduleExecutionSettings$workSubFolder <- file.path(executionSettings$workFolder, sprintf("%s_%d", module, moduleIndex)) + moduleExecutionSettings$resultsSubFolder <- file.path(executionSettings$resultsFolder, sprintf("%s_%d", module, moduleIndex)) + + if (!dir.exists(moduleExecutionSettings$workSubFolder)) { + dir.create(moduleExecutionSettings$workSubFolder, recursive = TRUE) + } + if (!dir.exists(moduleExecutionSettings$resultsSubFolder)) { + dir.create(moduleExecutionSettings$resultsSubFolder, recursive = TRUE) + } + + jobContext <- list( + sharedResources = analysisSpecifications$sharedResources, + settings = moduleSpecification$settings, + moduleExecutionSettings = moduleExecutionSettings, + keyringSettings = keyringSettings + ) + jobContextFileName <- file.path(moduleExecutionSettings$workSubFolder, "jobContext.rds") # gsub("\\\\", "/", tempfile(fileext = ".rds")) + saveRDS(jobContext, jobContextFileName) + dataModelExportPath <- file.path(moduleExecutionSettings$workSubFolder, "resultsDataModelSpecification.csv") + + + doneFile <- file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "schema.creation") + if (file.exists(doneFile)) { + unlink(doneFile) + } + ## code executed inside renv only has scoped variables + withModuleRenv( + code = { + createDataModelSchema <- NULL + + getDataModelSpecifications <- function(...) { + if (file.exists("resultsDataModelSpecification.csv")) { + res <- CohortGenerator::readCsv( + file = "resultsDataModelSpecification.csv" + ) + return(res) + } + return(NULL) + } + + source("Main.R") + + jobContext <- readRDS(jobContextFileName) + specifications <- getDataModelSpecifications(jobContext) + + if (Sys.getenv("FORCE_RENV_USE", "") == "TRUE") { + renv::use(lockfile = "renv.lock") + } + + ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "log.txt")) + ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "errorReportR.txt")) + # Main.R can override default behaviour by implementing this function + if (is.function(createDataModelSchema)) { + # If the keyring is locked, unlock it, set the value and then re-lock it + keyringName <- jobContext$keyringSettings$keyringName + keyringLocked <- Strategus::unlockKeyring(keyringName = keyringName) + + resultsConnectionDetails <- keyring::key_get(jobContext$moduleExecutionSettings$resultsConnectionDetailsReference, keyring = keyringName) + resultsConnectionDetails <- ParallelLogger::convertJsonToSettings(resultsConnectionDetails) + resultsConnectionDetails <- do.call(DatabaseConnector::createConnectionDetails, resultsConnectionDetails) + jobContext$moduleExecutionSettings$resultsConnectionDetails <- resultsConnectionDetails + + createDataModelSchema(jobContext) + + if (keyringLocked) { + keyring::keyring_lock(keyring = keyringName) + } + writeLines("schema.created", doneFile) + } else if (is.data.frame(specifications)) { + # Export schema to readable location + CohortGenerator::writeCsv( + x = specifications, + file = dataModelExportPath, + warnOnCaseMismatch = FALSE + ) + writeLines("specifications.written", doneFile) + } else { + warning("Module does not include data specifications file or createDataModelSchema function") + CohortGenerator::writeCsv( + x = specifications, + file = dataModelExportPath, + warnOnCaseMismatch = FALSE + ) + writeLines("specifications.not.written", doneFile) + } + + ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE) + ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE) + }, + moduleFolder = moduleFolder, + tempScriptFile = file.path(moduleExecutionSettings$workSubFolder, "SchemaCreation.R"), + injectVars = list( + jobContextFileName = jobContextFileName, + dataModelExportPath = dataModelExportPath, + doneFile = doneFile + ) + ) + + workStatus <- readLines(doneFile) + if (workStatus == "specifications.written") { + moduleInfo <- ParallelLogger::loadSettingsFromJson(file.path(moduleFolder, "MetaData.json")) + keyringName <- jobContext$keyringSettings$keyringName + keyringLocked <- Strategus::unlockKeyring(keyringName = keyringName) + + resultsConnectionDetails <- keyring::key_get(jobContext$moduleExecutionSettings$resultsConnectionDetailsReference, keyring = keyringName) + resultsConnectionDetails <- ParallelLogger::convertJsonToSettings(resultsConnectionDetails) + resultsConnectionDetails <- do.call(DatabaseConnector::createConnectionDetails, resultsConnectionDetails) + + connection <- DatabaseConnector::connect(resultsConnectionDetails) + on.exit(DatabaseConnector::disconnect(connection)) + + sql <- ResultModelManager::generateSqlSchema(csvFilepath = dataModelExportPath) + DatabaseConnector::renderTranslateExecuteSql(connection, + sql, + table_prefix = moduleInfo$TablePrefix, + database_schema = jobContext$moduleExecutionSettings$resultsDatabaseSchema + ) + + if (keyringLocked) { + keyring::keyring_lock(keyring = keyringName) + } + } else if (workStatus == "schema.created") { + message("Result schema creation handled inside module execution envrionment") + } else { + message("Schema not created for module") + } + + return(list(dummy = 123)) +} diff --git a/R/ResultsUpload.R b/R/ResultsUpload.R new file mode 100644 index 00000000..0a3ad60a --- /dev/null +++ b/R/ResultsUpload.R @@ -0,0 +1,186 @@ +# Copyright 2023 Observational Health Data Sciences and Informatics +# +# This file is part of Strategus +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Results upload callbacks for inserting results in to a database +runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleIndex, executionSettings, ...) { + checkmate::assert_multi_class(x = executionSettings, classes = c("ExecutionSettings")) + moduleSpecification <- analysisSpecifications$moduleSpecifications[[moduleIndex]] + + module <- moduleSpecification$module + version <- moduleSpecification$version + remoteRepo <- moduleSpecification$remoteRepo + remoteUsername <- moduleSpecification$remoteUsername + moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername) + + # Create job context + moduleExecutionSettings <- executionSettings + moduleExecutionSettings$workSubFolder <- file.path(executionSettings$workFolder, sprintf("%s_%d", module, moduleIndex)) + moduleExecutionSettings$resultsSubFolder <- file.path(executionSettings$resultsFolder, sprintf("%s_%d", module, moduleIndex)) + + if (!is(executionSettings, "CdmExecutionSettings")) { + stop("Unhandled executionSettings class! Must be CdmExecutionSettings instance") + } + + if (!dir.exists(moduleExecutionSettings$resultsSubFolder)) { + stop("results not found") + } + jobContext <- list( + sharedResources = analysisSpecifications$sharedResources, + settings = moduleSpecification$settings, + moduleExecutionSettings = moduleExecutionSettings, + keyringSettings = keyringSettings + ) + jobContextFileName <- file.path(moduleExecutionSettings$workSubFolder, "jobContext.rds") # gsub("\\\\", "/", tempfile(fileext = ".rds")) + saveRDS(jobContext, jobContextFileName) + dataModelExportPath <- file.path(moduleExecutionSettings$workSubFolder, "resultsDataModelSpecification.csv") + + doneFile <- file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "results.uploaded") + if (file.exists(doneFile)) { + unlink(doneFile) + } + + tempScriptFile <- file.path(moduleExecutionSettings$workSubFolder, "UploadScript.R") + + ## + # Module space executed code + ## + withModuleRenv( + { + uploadResultsCallback <- NULL + + getDataModelSpecifications <- function(...) { + ParallelLogger::logInfo("Getting result model specification") + if (file.exists("resultsDataModelSpecification.csv")) { + res <- CohortGenerator::readCsv( + file = "resultsDataModelSpecification.csv" + ) + return(res) + } + ParallelLogger::logInfo("No result model specification found") + return(NULL) + } + source("Main.R") + moduleInfo <- ParallelLogger::loadSettingsFromJson("MetaData.json") + jobContext <- readRDS(jobContextFileName) + specifications <- getDataModelSpecifications(jobContext) + ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "log.txt")) + ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "errorReportR.txt")) + + if (Sys.getenv("FORCE_RENV_USE", "") == "TRUE") { + renv::use(lockfile = "renv.lock") + } + + # Override default behaviour and do module specific upload inside module context? + if (is.function(uploadResultsCallback)) { + ParallelLogger::logInfo("Calling module result upload functionality") + # If the keyring is locked, unlock it, set the value and then re-lock it + ParallelLogger::logInfo("-- Getting result database credentials") + keyringName <- jobContext$keyringSettings$keyringName + keyringLocked <- Strategus::unlockKeyring(keyringName = keyringName) + resultsConnectionDetails <- keyring::key_get(jobContext$moduleExecutionSettings$resultsConnectionDetailsReference, keyring = keyringName) + resultsConnectionDetails <- ParallelLogger::convertJsonToSettings(resultsConnectionDetails) + resultsConnectionDetails <- do.call(DatabaseConnector::createConnectionDetails, resultsConnectionDetails) + jobContext$moduleExecutionSettings$resultsConnectionDetails <- resultsConnectionDetails + ParallelLogger::logInfo("-- Executing upload callback") + uploadResultsCallback(jobContext) + if (keyringLocked) { + keyring::keyring_lock(keyring = keyringName) + } + ParallelLogger::logInfo("-- Upload completed") + writeLines("results.uploaded", doneFile) + } else if (is.null(specifications)) { + ParallelLogger::logInfo("No result specifications found, assuming module has produced no results") + # NO spect file Status + warning("data model specifications not loaded from module - skipping results upload") + writeLines("no.spec.found", doneFile) + } else { + # Spec file written + ParallelLogger::logInfo("Writing spec for result upload outside of module context") + CohortGenerator::writeCsv( + x = specifications, + file = dataModelExportPath, + warnOnFileNameCaseMismatch = FALSE + ) + writeLines("specifications.written", doneFile) + } + + ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE) + ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE) + }, + moduleFolder = moduleFolder, + tempScriptFile = tempScriptFile, + injectVars = list( + jobContextFileName = jobContextFileName, + dataModelExportPath = dataModelExportPath, + doneFile = doneFile + ) + ) + ## + # end Module executed code + ## + if (!file.exists(doneFile)) { + message <- paste( + "Module did not complete. To debug:", + sprintf(" rstudioapi::openProject('%s', newSession = TRUE)", moduleFolder), + sprintf(" file.edit('%s')", tempScriptFile), + sep = "\n" + ) + stop(message) + } + + workStatus <- readLines(doneFile) + + if (workStatus == "specifications.written") { + ParallelLogger::logInfo("Uploading results according to module specification") + specifications <- CohortGenerator::readCsv(dataModelExportPath) + moduleInfo <- ParallelLogger::loadSettingsFromJson(file.path(moduleFolder, "MetaData.json")) + + keyringName <- jobContext$keyringSettings$keyringName + keyringLocked <- Strategus::unlockKeyring(keyringName = keyringName) + + ParallelLogger::logInfo("Getting result database credentials") + resultsConnectionDetails <- keyring::key_get(jobContext$moduleExecutionSettings$resultsConnectionDetailsReference, keyring = keyringName) + resultsConnectionDetails <- ParallelLogger::convertJsonToSettings(resultsConnectionDetails) + resultsConnectionDetails <- do.call(DatabaseConnector::createConnectionDetails, resultsConnectionDetails) + jobContext$moduleExecutionSettings$resultsConnectionDetails <- resultsConnectionDetails + + ParallelLogger::logInfo("Calling RMM for upload") + ResultModelManager::uploadResults( + connectionDetails = jobContext$moduleExecutionSettings$resultsConnectionDetails, + schema = jobContext$moduleExecutionSettings$resultsDatabaseSchema, + resultsFolder = jobContext$moduleExecutionSettings$resultsSubFolder, + tablePrefix = moduleInfo$TablePrefix, + forceOverWriteOfSpecifications = FALSE, + purgeSiteDataBeforeUploading = FALSE, + databaseIdentifierFile = "database_meta_data.csv", + runCheckAndFixCommands = FALSE, + warnOnMissingTable = TRUE, + specifications = specifications + ) + + ParallelLogger::logInfo("Upload completed") + if (keyringLocked) { + keyring::keyring_lock(keyring = keyringName) + } + } else if (workStatus == "results.uploaded") { + message("Result upload handled inside module execution envrionment") + } else { + message("Results not uploaded for module") + } + + return(list(dummy = 123)) +} diff --git a/R/RunModule.R b/R/RunModule.R index 3eea9099..d7087afb 100644 --- a/R/RunModule.R +++ b/R/RunModule.R @@ -24,7 +24,6 @@ runModule <- function(analysisSpecifications, keyringSettings, moduleIndex, executionSettings, ...) { checkmate::assert_multi_class(x = executionSettings, classes = c("CdmExecutionSettings", "ResultsExecutionSettings")) moduleSpecification <- analysisSpecifications$moduleSpecifications[[moduleIndex]] - module <- moduleSpecification$module version <- moduleSpecification$version remoteRepo <- moduleSpecification$remoteRepo @@ -51,83 +50,83 @@ runModule <- function(analysisSpecifications, keyringSettings, moduleIndex, exec jobContextFileName <- file.path(moduleExecutionSettings$workSubFolder, "jobContext.rds") # gsub("\\\\", "/", tempfile(fileext = ".rds")) saveRDS(jobContext, jobContextFileName) - # Execute module using settings - script <- " - source('Main.R') - jobContext <- readRDS(jobContextFileName) - - unlockKeyring <- function(keyringName) { - # If the keyring is locked, unlock it, set the value and then re-lock it - keyringLocked <- keyring::keyring_is_locked(keyring = keyringName) - if (keyringLocked) { - keyring::keyring_unlock(keyring = keyringName, password = Sys.getenv('STRATEGUS_KEYRING_PASSWORD')) - } - return(keyringLocked) - } - - # If the keyring is locked, unlock it, set the value and then re-lock it - keyringName <- jobContext$keyringSettings$keyringName - keyringLocked <- unlockKeyring(keyringName = keyringName) - " + tempScriptFile <- file.path(moduleExecutionSettings$workSubFolder, "StrategusScript.R") + doneFile <- file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "done") + if (file.exists(doneFile)) { + unlink(doneFile) + } - # Set the connection information based on the type of execution being - # performed if (is(executionSettings, "CdmExecutionSettings")) { - script <- paste0( - script, - "connectionDetails <- keyring::key_get(jobContext$moduleExecutionSettings$connectionDetailsReference, keyring = keyringName) - connectionDetails <- ParallelLogger::convertJsonToSettings(connectionDetails) - connectionDetails <- do.call(DatabaseConnector::createConnectionDetails, connectionDetails) - jobContext$moduleExecutionSettings$connectionDetails <- connectionDetails" - ) + isCdmExecution <- TRUE } else if (is(executionSettings, "ResultsExecutionSettings")) { - script <- paste0( - script, - "resultsConnectionDetails <- keyring::key_get(jobContext$moduleExecutionSettings$resultsConnectionDetailsReference, keyring = keyringName) - resultsConnectionDetails <- ParallelLogger::convertJsonToSettings(resultsConnectionDetails) - resultsConnectionDetails <- do.call(DatabaseConnector::createConnectionDetails, resultsConnectionDetails) - jobContext$moduleExecutionSettings$resultsConnectionDetails <- resultsConnectionDetails" - ) + isCdmExecution <- FALSE } else { stop("Unhandled executionSettings class! Must be one of the following: CdmExecutionSettings, ResultsExecutionSettings") } + withModuleRenv( + code = { + ############################ + # Generated by strategus - do not modify by hand + ############################ + source("Main.R") + jobContext <- readRDS(jobContextFileName) + + unlockKeyring <- function(keyringName) { + # If the keyring is locked, unlock it, set the value and then re-lock it + keyringLocked <- keyring::keyring_is_locked(keyring = keyringName) + if (keyringLocked) { + keyring::keyring_unlock(keyring = keyringName, password = Sys.getenv("STRATEGUS_KEYRING_PASSWORD")) + } + return(keyringLocked) + } - script <- paste0(script, " - if (keyringLocked) { - keyring::keyring_lock(keyring = keyringName) - } - - ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, 'log.txt')) - ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, 'errorReport.R')) - - options(andromedaTempFolder = file.path(jobContext$moduleExecutionSettings$workFolder, 'andromedaTemp')) + keyringName <- jobContext$keyringSettings$keyringName + keyringLocked <- unlockKeyring(keyringName = keyringName) - if (Sys.getenv('FORCE_RENV_USE', '') == 'TRUE') { - renv::use(lockfile = 'renv.lock') - } - execute(jobContext) + ParallelLogger::addDefaultFileLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "log.txt")) + ParallelLogger::addDefaultErrorReportLogger(file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "errorReport.R")) - ParallelLogger::unregisterLogger('DEFAULT_FILE_LOGGER', silent = TRUE) - ParallelLogger::unregisterLogger('DEFAULT_ERRORREPORT_LOGGER', silent = TRUE) - writeLines('done', file.path(jobContext$moduleExecutionSettings$resultsSubFolder, 'done')) - ") + options(andromedaTempFolder = file.path(jobContext$moduleExecutionSettings$workFolder, "andromedaTemp")) + options(tempEmulationSchema = jobContext$moduleExecutionSettings$tempEmulationSchema) + options(databaseConnectorIntegerAsNumeric = jobContext$moduleExecutionSettings$integerAsNumeric) + options(databaseConnectorInteger64AsNumeric = jobContext$moduleExecutionSettings$integer64AsNumeric) - script <- gsub("jobContextFileName", sprintf("\"%s\"", jobContextFileName), script) - tempScriptFile <- jobContextFileName <- file.path(moduleExecutionSettings$workSubFolder, "StrategusScript.R") # gsub("\\\\", "/", tempfile(fileext = ".R")) - fileConn <- file(tempScriptFile) - writeLines(script, fileConn) - close(fileConn) + if (Sys.getenv("FORCE_RENV_USE", "") == "TRUE") { + renv::use(lockfile = "renv.lock") + } - doneFile <- file.path(jobContext$moduleExecutionSettings$resultsSubFolder, "done") - if (file.exists(doneFile)) { - unlink(doneFile) - } - renv::run( - script = tempScriptFile, - job = FALSE, - name = "Running module", - project = moduleFolder + # NOTE injected variable isResultsExecution - will look strange outside of Strategus definition + if (isCdmExecution) { + connectionDetails <- Strategus::retrieveConnectionDetails( + connectionDetailsReference = jobContext$moduleExecutionSettings$connectionDetailsReference, + keyringName = keyringName + ) + jobContext$moduleExecutionSettings$connectionDetails <- connectionDetails + } else { + resultsConnectionDetails <- Strategus::retrieveConnectionDetails( + connectionDetailsReference = jobContext$moduleExecutionSettings$resultsConnectionDetailsReference, + keyringName = keyringName + ) + jobContext$moduleExecutionSettings$resultsConnectionDetails <- resultsConnectionDetails + } + if (keyringLocked) { + keyring::keyring_lock(keyring = keyringName) + } + execute(jobContext) + + ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE) + ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE) + writeLines("done", doneFile) + }, + tempScriptFile = tempScriptFile, + moduleFolder = moduleFolder, + injectVars = list( + doneFile = doneFile, + isCdmExecution = isCdmExecution, + jobContextFileName = jobContextFileName + ) ) + if (!file.exists(doneFile)) { message <- paste( "Module did not complete. To debug:", diff --git a/R/Settings.R b/R/Settings.R index 0d1fb51f..fe2a72f3 100644 --- a/R/Settings.R +++ b/R/Settings.R @@ -87,10 +87,16 @@ addModuleSpecifications <- function(analysisSpecifications, moduleSpecifications #' @param cohortTableNames An object identifying the various cohort table names that will be created in the #' `workDatabaseSchema`. This object can be created using the #' [CohortGenerator::getCohortTableNames()] function. +#' @param tempEmulationSchema Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created. #' @param workFolder A folder in the local file system where intermediate results can be written. #' @param resultsFolder A folder in the local file system where the module output will be written. #' @param minCellCount The minimum number of subjects contributing to a count before it can be included #' in results. +#' @param integerAsNumeric Logical: should 32-bit integers be converted to numeric (double) values? If FALSE 32-bit integers will be represented using R's native `Integer` class. Default is TRUE +#' @param integer64AsNumeric Logical: should 64-bit integers be converted to numeric (double) values? If FALSE 64-bit integers will be represented using `bit64::integer64`. Default is TRUE +#' @param resultsConnectionDetailsReference A string that can be used to retrieve the results database connection +#' details from a secure local store. +#' @param resultsDatabaseSchema A schema where the results tables are stored #' #' @return #' An object of type `ExecutionSettings`. @@ -100,9 +106,14 @@ createCdmExecutionSettings <- function(connectionDetailsReference, workDatabaseSchema, cdmDatabaseSchema, cohortTableNames = CohortGenerator::getCohortTableNames(cohortTable = "cohort"), + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), workFolder, resultsFolder, - minCellCount = 5) { + minCellCount = 5, + integerAsNumeric = getOption("databaseConnectorIntegerAsNumeric", default = TRUE), + integer64AsNumeric = getOption("databaseConnectorInteger64AsNumeric", default = TRUE), + resultsConnectionDetailsReference = NULL, + resultsDatabaseSchema = NULL) { errorMessages <- checkmate::makeAssertCollection() checkmate::assertCharacter(connectionDetailsReference, len = 1, add = errorMessages) checkmate::assertCharacter(workDatabaseSchema, len = 1, add = errorMessages) @@ -111,6 +122,10 @@ createCdmExecutionSettings <- function(connectionDetailsReference, checkmate::assertCharacter(workFolder, len = 1, add = errorMessages) checkmate::assertCharacter(resultsFolder, len = 1, add = errorMessages) checkmate::assertInt(minCellCount, add = errorMessages) + checkmate::assertLogical(integerAsNumeric, max.len = 1, add = errorMessages) + checkmate::assertLogical(integer64AsNumeric, max.len = 1, add = errorMessages) + checkmate::assertCharacter(resultsConnectionDetailsReference, null.ok = TRUE, add = errorMessages) + checkmate::assertCharacter(resultsDatabaseSchema, null.ok = TRUE, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) executionSettings <- list( @@ -118,9 +133,14 @@ createCdmExecutionSettings <- function(connectionDetailsReference, workDatabaseSchema = workDatabaseSchema, cdmDatabaseSchema = cdmDatabaseSchema, cohortTableNames = cohortTableNames, + tempEmulationSchema = tempEmulationSchema, workFolder = workFolder, resultsFolder = resultsFolder, - minCellCount = minCellCount + minCellCount = minCellCount, + integerAsNumeric = integerAsNumeric, + integer64AsNumeric = integer64AsNumeric, + resultsConnectionDetailsReference = resultsConnectionDetailsReference, + resultsDatabaseSchema = resultsDatabaseSchema ) class(executionSettings) <- c("CdmExecutionSettings", "ExecutionSettings") return(executionSettings) @@ -135,6 +155,8 @@ createCdmExecutionSettings <- function(connectionDetailsReference, #' @param resultsFolder A folder in the local file system where the module output will be written. #' @param minCellCount The minimum number of subjects contributing to a count before it can be included #' in results. +#' @param integerAsNumeric Logical: should 32-bit integers be converted to numeric (double) values? If FALSE 32-bit integers will be represented using R's native `Integer` class. Default is TRUE +#' @param integer64AsNumeric Logical: should 64-bit integers be converted to numeric (double) values? If FALSE 64-bit integers will be represented using `bit64::integer64`. Default is TRUE #' #' @return #' An object of type `ExecutionSettings`. @@ -144,13 +166,17 @@ createResultsExecutionSettings <- function(resultsConnectionDetailsReference, resultsDatabaseSchema, workFolder, resultsFolder, - minCellCount = 5) { + minCellCount = 5, + integerAsNumeric = getOption("databaseConnectorIntegerAsNumeric", default = TRUE), + integer64AsNumeric = getOption("databaseConnectorInteger64AsNumeric", default = TRUE)) { errorMessages <- checkmate::makeAssertCollection() checkmate::assertCharacter(resultsConnectionDetailsReference, len = 1, add = errorMessages) checkmate::assertCharacter(resultsDatabaseSchema, len = 1, add = errorMessages) checkmate::assertCharacter(workFolder, len = 1, add = errorMessages) checkmate::assertCharacter(resultsFolder, len = 1, add = errorMessages) checkmate::assertInt(minCellCount, add = errorMessages) + checkmate::assertLogical(integerAsNumeric, max.len = 1, add = errorMessages) + checkmate::assertLogical(integer64AsNumeric, max.len = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) executionSettings <- list( @@ -158,7 +184,9 @@ createResultsExecutionSettings <- function(resultsConnectionDetailsReference, resultsDatabaseSchema = resultsDatabaseSchema, workFolder = workFolder, resultsFolder = resultsFolder, - minCellCount = minCellCount + minCellCount = minCellCount, + integerAsNumeric = integerAsNumeric, + integer64AsNumeric = integer64AsNumeric ) class(executionSettings) <- c("ResultsExecutionSettings", "ExecutionSettings") return(executionSettings) @@ -205,7 +233,7 @@ storeConnectionDetails <- function(connectionDetails, connectionDetailsReference if (is.function(connectionDetails[[i]])) { detail <- connectionDetails[[i]]() if (is.null(detail)) { - connectionDetails[[i]] <- "" + connectionDetails[[i]] <- .nullList() # Fixes Issue #74 } else { connectionDetails[[i]] <- connectionDetails[[i]]() } @@ -246,7 +274,21 @@ retrieveConnectionDetails <- function(connectionDetailsReference, keyringName = connectionDetails <- keyring::key_get(connectionDetailsReference, keyring = keyringName) connectionDetails <- ParallelLogger::convertJsonToSettings(connectionDetails) - connectionDetails <- do.call(DatabaseConnector::createConnectionDetails, connectionDetails) + + # Ensure that NA values are converted to NULL prior to calling + # DatabaseConnector. To do this, we'll construct a new connectionDetails + # list from keyring where the connectionDetails are NOT NA. This will + # allow for calling DatabaseConnector::createConnectionDetails with + # NULL values where NAs are present in the serialized version of the + # connectionDetails from keyring. + connectionDetailsConstructedFromKeyring <- list() + for (i in 1:length(connectionDetails)) { + if (isFALSE(is.na(connectionDetails[[i]]))) { + connectionDetailsConstructedFromKeyring[names(connectionDetails)[i]] <- connectionDetails[[i]] + } + } + + connectionDetails <- do.call(DatabaseConnector::createConnectionDetails, connectionDetailsConstructedFromKeyring) if (keyringLocked) { keyring::keyring_lock(keyring = keyringName) @@ -312,3 +354,10 @@ unlockKeyring <- function(keyringName) { return(TRUE) } } + +#' Used when serializing connection details to retain NULL values +#' +#' @keywords internal +.nullList <- function() { + invisible(list(NULL)) +} diff --git a/README.md b/README.md index 9ba8b018..39a7921b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Strategus Introduction ============ -Strategus is an R package for coordinating and executing analytics using HADES modules. +Strategus is an R package for coordinating and executing analytics using HADES modules. Please see the [Introduction To Strategus](https://ohdsi.github.io/Strategus/articles/IntroductionToStrategus.html) for more details. Features ======== @@ -21,11 +21,11 @@ Strategus is an R package. System Requirements ============ -Requires R (version 4.0.0 or higher). Installation on Windows requires [RTools](https://cran.r-project.org/bin/windows/Rtools/). Libraries used in Strategus require Java. +Requires R (version 4.2.0 or higher). Installation on Windows requires [RTools](https://cran.r-project.org/bin/windows/Rtools/). Libraries used in Strategus require Java. Strategus requires you to setup your GitHub Personal Access Token as described [here](https://ohdsi.github.io/Hades/rSetup.html#GitHub_Personal_Access_Token) Installation ============= -1. See the instructions [here](https://ohdsi.github.io/Hades/rSetup.html) for configuring your R environment, including RTools and Java. +1. See the instructions [here](https://ohdsi.github.io/Hades/rSetup.html) for configuring your R environment, including RTools, Java and your GitHub Personal Access Token. 2. In R, use the following commands to download and install Strategus: @@ -33,7 +33,15 @@ Installation install.packages("remotes") remotes::install_github("ohdsi/Strategus") ``` - +3. If using Linux, please refer to the [keyring Linux secret service setup](https://r-lib.github.io/keyring/index.html#linux) as this is required to execute studies using Strategus. In addition to those instructions, members of the OHDSI community have shared the following guidance: + +Run in a terminal the following before installing keyring: + +```bash +sudo yum install libsodium libsodium-devel +sudo yum install libsecret libsecret-devel +sudo chmod 777 /home/idies/.config +``` User Documentation ================== diff --git a/Strategus.Rproj b/Strategus.Rproj index ac857c97..3edb226c 100644 --- a/Strategus.Rproj +++ b/Strategus.Rproj @@ -18,4 +18,5 @@ StripTrailingWhitespace: Yes BuildType: Package PackageInstallArgs: --no-multiarch --with-keep.source PackageBuildArgs: --resave-data +PackageCheckArgs: --no-manual --no-build-vignettes PackageRoxygenize: rd,collate,namespace diff --git a/docs/404.html b/docs/404.html index e713abb3..09410908 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ Strategus - 0.0.6 + 0.1.0 @@ -107,7 +107,7 @@

Page not found (404)