Skip to content

Commit

Permalink
Merge branch 'develop' into cohort_templates
Browse files Browse the repository at this point in the history
# Conflicts:
#	R/CohortConstruction.R
  • Loading branch information
azimov committed Oct 24, 2024
2 parents 1b64c1a + 5a7a8bc commit 73d3692
Show file tree
Hide file tree
Showing 330 changed files with 31,237 additions and 5,169 deletions.
8 changes: 8 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,11 @@
^\.Rproj\.user$
^\.idea$
^\.github$
_pkgdown\.yml
compare_versions
deploy.sh
docs
extras
man-roxygen
^cran-comments\.md$
^CRAN-SUBMISSION$
35 changes: 27 additions & 8 deletions .github/workflows/R_CMD_check_Hades.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ jobs:
fail-fast: false
matrix:
config:
- {os: windows-latest, r: '4.2.3', rtools: '42', rspm: "https://cloud.r-project.org"}
- {os: macOS-latest, r: '4.2.3', rtools: '42', rspm: "https://cloud.r-project.org"}
- {os: ubuntu-20.04, r: '4.2.3', rtools: '42', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: windows-latest, r: 'release', rtools: '', rspm: "https://cloud.r-project.org"}
- {os: macOS-latest, r: 'release', rtools: '', rspm: "https://cloud.r-project.org"}
- {os: ubuntu-20.04, r: 'release', rtools: '', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
Expand Down Expand Up @@ -50,15 +47,29 @@ jobs:
CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }}
CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }}
CDM5_REDSHIFT_USER: ${{ secrets.CDM5_REDSHIFT_USER }}
CDM_SNOWFLAKE_CDM53_SCHEMA: ${{ secrets.CDM_SNOWFLAKE_CDM53_SCHEMA }}
CDM_SNOWFLAKE_OHDSI_SCHEMA: ${{ secrets.CDM_SNOWFLAKE_OHDSI_SCHEMA }}
CDM_SNOWFLAKE_PASSWORD: ${{ secrets.CDM_SNOWFLAKE_PASSWORD }}
CDM_SNOWFLAKE_CONNECTION_STRING: ${{ secrets.CDM_SNOWFLAKE_CONNECTION_STRING }}
CDM_SNOWFLAKE_USER: ${{ secrets.CDM_SNOWFLAKE_USER }}
CDM5_SPARK_USER: ${{ secrets.CDM5_SPARK_USER }}
CDM5_SPARK_PASSWORD: ${{ secrets.CDM5_SPARK_PASSWORD }}
CDM5_SPARK_CONNECTION_STRING: ${{ secrets.CDM5_SPARK_CONNECTION_STRING }}
CDM5_SPARK_CDM_SCHEMA: ${{ secrets.CDM5_SPARK_CDM_SCHEMA }}
CDM5_SPARK_OHDSI_SCHEMA: ${{ secrets.CDM5_SPARK_OHDSI_SCHEMA }}
CDM_BIG_QUERY_CONNECTION_STRING: ${{ secrets.CDM_BIG_QUERY_CONNECTION_STRING }}
CDM_BIG_QUERY_KEY_FILE: ${{ secrets.CDM_BIG_QUERY_KEY_FILE }}
CDM_BIG_QUERY_CDM_SCHEMA: ${{ secrets.CDM_BIG_QUERY_CDM_SCHEMA }}
CDM_BIG_QUERY_OHDSI_SCHEMA: ${{ secrets.CDM_BIG_QUERY_OHDSI_SCHEMA }}

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
rtools-version: ${{ matrix.config.rtools }}

- uses: r-lib/actions/setup-tinytex@v2

- uses: r-lib/actions/setup-pandoc@v2
Expand Down Expand Up @@ -91,13 +102,21 @@ jobs:
eval sudo $cmd
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
- name: Setup Java
if: runner.os == 'macOS'
uses: actions/setup-java@v4
with:
distribution: 'corretto'
java-version: '8'

- name: Install libssh
if: runner.os == 'Linux'
run: |
sudo apt-get install libssh-dev
- name: Install dependencies
run: |
install.packages("cachem")
remotes::install_deps(dependencies = TRUE, INSTALL_opts=c("--no-multiarch"))
remotes::install_cran("rcmdcheck")
shell: Rscript {0}
Expand All @@ -121,14 +140,14 @@ jobs:

- name: Upload check results
if: failure()
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: ${{ runner.os }}-r${{ matrix.config.r }}-results
path: check

- name: Upload source package
if: success() && runner.os == 'macOS' && github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: package_tarball
path: check/*.tar.gz
Expand Down Expand Up @@ -192,7 +211,7 @@ jobs:
- name: Download package tarball
if: ${{ env.new_version != '' }}
uses: actions/download-artifact@v2
uses: actions/download-artifact@v4
with:
name: package_tarball

Expand Down
21 changes: 16 additions & 5 deletions .github/workflows/R_CMD_check_main_weekly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,41 @@ jobs:
GITHUB_PAT: ${{ secrets.GH_TOKEN }}
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM_SCHEMA }}
CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM54_SCHEMA }}
CDM5_ORACLE_OHDSI_SCHEMA: ${{ secrets.CDM5_ORACLE_OHDSI_SCHEMA }}
CDM5_ORACLE_PASSWORD: ${{ secrets.CDM5_ORACLE_PASSWORD }}
CDM5_ORACLE_SERVER: ${{ secrets.CDM5_ORACLE_SERVER }}
CDM5_ORACLE_USER: ${{ secrets.CDM5_ORACLE_USER }}
CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM_SCHEMA }}
CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM54_SCHEMA }}
CDM5_POSTGRESQL_OHDSI_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_OHDSI_SCHEMA }}
CDM5_POSTGRESQL_PASSWORD: ${{ secrets.CDM5_POSTGRESQL_PASSWORD }}
CDM5_POSTGRESQL_SERVER: ${{ secrets.CDM5_POSTGRESQL_SERVER }}
CDM5_POSTGRESQL_USER: ${{ secrets.CDM5_POSTGRESQL_USER }}
CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM_SCHEMA }}
CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM54_SCHEMA }}
CDM5_SQL_SERVER_OHDSI_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_OHDSI_SCHEMA }}
CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }}
CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }}
CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }}
CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM_SCHEMA }}
CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM54_SCHEMA }}
CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }}
CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }}
CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }}
CDM5_REDSHIFT_USER: ${{ secrets.CDM5_REDSHIFT_USER }}
CDM_SNOWFLAKE_CDM53_SCHEMA: ${{ secrets.CDM_SNOWFLAKE_CDM53_SCHEMA }}
CDM_SNOWFLAKE_OHDSI_SCHEMA: ${{ secrets.CDM_SNOWFLAKE_OHDSI_SCHEMA }}
CDM_SNOWFLAKE_PASSWORD: ${{ secrets.CDM_SNOWFLAKE_PASSWORD }}
CDM_SNOWFLAKE_CONNECTION_STRING: ${{ secrets.CDM_SNOWFLAKE_CONNECTION_STRING }}
CDM_SNOWFLAKE_USER: ${{ secrets.CDM_SNOWFLAKE_USER }}
CDM5_SPARK_USER: ${{ secrets.CDM5_SPARK_USER }}
CDM5_SPARK_PASSWORD: ${{ secrets.CDM5_SPARK_PASSWORD }}
CDM5_SPARK_CONNECTION_STRING: ${{ secrets.CDM5_SPARK_CONNECTION_STRING }}

CDM5_SPARK_CDM_SCHEMA: ${{ secrets.CDM5_SPARK_CDM_SCHEMA }}
CDM5_SPARK_OHDSI_SCHEMA: ${{ secrets.CDM5_SPARK_OHDSI_SCHEMA }}
CDM_BIG_QUERY_CONNECTION_STRING: ${{ secrets.CDM_BIG_QUERY_CONNECTION_STRING }}
CDM_BIG_QUERY_KEY_FILE: ${{ secrets.CDM_BIG_QUERY_KEY_FILE }}
CDM_BIG_QUERY_CDM_SCHEMA: ${{ secrets.CDM_BIG_QUERY_CDM_SCHEMA }}
CDM_BIG_QUERY_OHDSI_SCHEMA: ${{ secrets.CDM_BIG_QUERY_OHDSI_SCHEMA }}

steps:
- uses: actions/checkout@v2

Expand Down
3 changes: 3 additions & 0 deletions CRAN-SUBMISSION
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Version: 0.11.0
Date: 2024-09-09 13:18:20 UTC
SHA: 39c45bae218f8ffd983b9bc9a6a5914ad4f7f9df
27 changes: 14 additions & 13 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: CohortGenerator
Type: Package
Title: An R Package for Cohort Generation Against the OMOP CDM
Version: 0.8.1
Date: 2023-10-10
Title: Cohort Generation for the OMOP Common Data Model
Version: 0.11.2
Date: 2024-09-30
Authors@R: c(
person("Anthony", "Sena", email = "[email protected]", role = c("aut", "cre")),
person("Jamie", "Gilbert", role = c("aut")),
Expand All @@ -11,40 +11,41 @@ Authors@R: c(
person("Observational Health Data Science and Informatics", role = c("cph"))
)
Maintainer: Anthony Sena <[email protected]>
Description: An R package for that encapsulates the functions for generating cohorts against the OMOP CDM.
Description: Generate cohorts and subsets using an Observational
Medical Outcomes Partnership (OMOP) Common Data Model (CDM) Database.
Cohorts are defined using 'CIRCE' (<https://github.com/ohdsi/circe-be>) or
SQL compatible with 'SqlRender' (<https://github.com/OHDSI/SqlRender>).
Depends:
DatabaseConnector (>= 5.0.0),
R (>= 3.6.0),
R6
Imports:
bit64,
checkmate,
digest,
dplyr,
lubridate,
methods,
ParallelLogger (>= 3.0.0),
readr (>= 2.1.0),
rlang,
RJSONIO,
jsonlite,
ResultModelManager,
SqlRender (>= 1.11.1),
stringi (>= 1.7.6)
stringi (>= 1.7.6),
tibble
Suggests:
CirceR (>= 1.1.1),
Eunomia,
knitr,
rmarkdown,
ROhdsiWebApi,
testthat,
withr
Remotes:
ohdsi/CirceR,
ohdsi/Eunomia,
ohdsi/ROhdsiWebApi
withr,
zip
License: Apache License
VignetteBuilder: knitr
URL: https://ohdsi.github.io/CohortGenerator/, https://github.com/OHDSI/CohortGenerator
BugReports: https://github.com/OHDSI/CohortGenerator/issues
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Encoding: UTF-8
Language: en-US
11 changes: 10 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,42 @@ export(createDemographicSubset)
export(createEmptyCohortDefinitionSet)
export(createEmptyNegativeControlOutcomeCohortSet)
export(createLimitSubset)
export(createResultsDataModel)
export(createSubsetCohortWindow)
export(dropCohortStatsTables)
export(exportCohortStatsTables)
export(generateCohortSet)
export(generateNegativeControlOutcomeCohorts)
export(getCohortCounts)
export(getCohortDefinitionSet)
export(getCohortInclusionRules)
export(getCohortStats)
export(getCohortTableNames)
export(getDataMigrator)
export(getRequiredTasks)
export(getResultsDataModelSpecifications)
export(getSubsetDefinitions)
export(insertInclusionRuleNames)
export(isCamelCase)
export(isCohortDefinitionSet)
export(isFormattedForDatabaseUpload)
export(isSnakeCase)
export(isTaskRequired)
export(migrateDataModel)
export(readCsv)
export(recordTasksDone)
export(runCohortGeneration)
export(sampleCohortDefinitionSet)
export(saveCohortDefinitionSet)
export(saveCohortSubsetDefinition)
export(saveIncremental)
export(uploadResults)
export(writeCsv)
import(DatabaseConnector)
import(R6)
importFrom(dplyr,"%>%")
import(dplyr)
importFrom(grDevices,rgb)
importFrom(methods,is)
importFrom(rlang,':=')
importFrom(rlang,.data)
importFrom(stats,aggregate)
Expand Down
58 changes: 57 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,65 @@
CohortGenerator 0.11.2
=======================

- Ensure temp tables are dropped before creating them (#188)

CohortGenerator 0.11.1
=======================

- CohortGenerator added to CRAN (#77)

CohortGenerator 0.11.0
=======================

New Features

- Add support for minimum cell count (#176)

Bug Fixes

- Multiple calls to export stats causing duplicates in cohort inclusion file (#179)
- Updates to subset documentation (#180, #181)
- Negative control outcome generation bug (#177)

CohortGenerator 0.10.0
=======================

New Features

- Add `runCohortGeneration` function (Issue #165)
- Adopt ResultModelManager for handling results data models & uploading. Extend results data model to include information on cohort subsets(#154, #162)
- Remove REMOTES entries for CirceR and Eunomia which are now in CRAN (#145)
- Unit tests now running on all OHDSI DB Platforms (#151)

Bug Fixes

- Negation of cohort subset operator must join on `subject_id` AND `start_date` (#167)
- Allow integer as cohort ID (#146)
- Use native messaging functions for output vs. ParallelLogger (#97)
- Prevent upload of inclusion rule information (#78)
- Expose `colTypes` when working with .csv files (#59)
- Remove `bit64` from package (mostly) (#152)
- Updated documentation for cohort subset negate feature (#111)

CohortGenerator 0.9.0
=======================
- Random sample functionality (for development only) (Issue #129)
- Incremental mode for negative control cohort generation (Issue #137)
- Fixes getCohortCounts() if cohortIds is not specified, but cohortDefinitionSet is. (Issue #136)
- Add cohort ID to generation output messages (Issue #132)
- Add databaseId to output of getStatsTable() (Issue #116)
- Prevent duplicate cohort IDs in cohortDefinitionSet (Issue #130)
- Fix cohort stats query for Oracle (Issue #143)
- Ensure databaseId applied to all returned cohort counts (Issue #144)
- Preserve backwards compatibility if cohort sample table is not in the list of cohort table names (Issue #147)


CohortGenerator 0.8.1
=======================
- Include cohorts with 0 people in cohort counts (Issue #91).
- Use numeric for cohort ID (Issue #98)
- Allow big ints for target pairs (#103)
- Pass `tempEmulationSchema` when creating negative controlc ohorts (#104)
- Pass `tempEmulationSchema` when creating negative control cohorts (#104)
- Target CDM v5.4 for unit tests (#119)
- Fix for subset references (#115)
- Allow for subset cohort name templating (#118)
Expand Down
34 changes: 10 additions & 24 deletions R/CohortConstruction.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 Observational Health Data Sciences and Informatics
# Copyright 2024 Observational Health Data Sciences and Informatics
#
# This file is part of CohortGenerator
#
Expand Down Expand Up @@ -76,6 +76,13 @@ generateCohortSet <- function(connectionDetails = NULL,
"sql"
)
)
assertLargeInteger(cohortDefinitionSet$cohortId)
# Verify that cohort IDs are not repeated in the cohort definition
# set before generating
if (length(unique(cohortDefinitionSet$cohortId)) != length(cohortDefinitionSet$cohortId)) {
duplicatedCohortIds <- cohortDefinitionSet$cohortId[duplicated(cohortDefinitionSet$cohortId)]
stop("Cannot generate! Duplicate cohort IDs found in your cohortDefinitionSet: ", paste(duplicatedCohortIds, sep = ","), ". Please fix your cohortDefinitionSet and try again.")
}
if (is.null(connection) && is.null(connectionDetails)) {
stop("You must provide either a database connection or the connection details.")
}
Expand All @@ -94,28 +101,7 @@ generateCohortSet <- function(connectionDetails = NULL,
on.exit(DatabaseConnector::disconnect(connection))
}

# Verify the cohort tables exist and if they do not
# stop the generation process
tableExistsFlagList <- lapply(cohortTableNames, FUN = function(x) {
x <- FALSE
})
tables <- DatabaseConnector::getTableNames(connection, cohortDatabaseSchema)
for (i in 1:length(cohortTableNames)) {
if (toupper(cohortTableNames[i]) %in% toupper(tables)) {
tableExistsFlagList[i] <- TRUE
}
}

if (!all(unlist(tableExistsFlagList, use.names = FALSE))) {
errorMsg <- "The following tables have not been created: \n"
for (i in 1:length(cohortTableNames)) {
if (!tableExistsFlagList[[i]]) {
errorMsg <- paste0(errorMsg, " - ", cohortTableNames[i], "\n")
}
}
errorMsg <- paste(errorMsg, "Please use the createCohortTables function to ensure all tables exist before generating cohorts.", sep = "\n")
stop(errorMsg)
}
.checkCohortTables(connection, cohortDatabaseSchema, cohortTableNames)

generatedTemplateCohorts <- c()
if ("isTemplatedCohort" %in% colnames(cohortDefinitionSet)) {
Expand Down Expand Up @@ -283,7 +269,7 @@ generateCohort <- function(cohortId = NULL,
connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))
}
ParallelLogger::logInfo(i, "/", nrow(cohortDefinitionSet), "- Generating cohort: ", cohortName)
rlang::inform(paste0(i, "/", nrow(cohortDefinitionSet), "- Generating cohort: ", cohortName, " (id = ", cohortId, ")"))
sql <- cohortDefinitionSet$sql[i]

if (!isSubset) {
Expand Down
Loading

0 comments on commit 73d3692

Please sign in to comment.