diff --git a/DESCRIPTION b/DESCRIPTION index f1ffb94d..0f346897 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,6 +19,7 @@ Imports: cli, dbplyr, dplyr, + glue, magrittr, PatientProfiles, rlang, diff --git a/NAMESPACE b/NAMESPACE index c69a79c0..b57d2ec2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,11 @@ export(generateCombinationCohortSet) export(getIdentifier) export(joinOverlap) +export(requireAge) +export(requireDemographics) +export(requireFutureObservation) +export(requirePriorObservation) +export(requireSex) export(splitOverlap) importFrom(magrittr,"%>%") importFrom(rlang,":=") diff --git a/R/requireDemographics.R b/R/requireDemographics.R new file mode 100644 index 00000000..c5ed2deb --- /dev/null +++ b/R/requireDemographics.R @@ -0,0 +1,242 @@ + + +#' Restrict cohort on patient demographics +#' +#' @param cohort A cohort table in a cdm reference +#' @param indexDate Variable in cohort that contains the date to compute the +#' demographics characteristics on which to restrict on. +#' @param ageRange A list of minimum and maximum age +#' @param sex Can be "Both", "Male" or "Female". If one of the latter, only +#' those with that sex will be included. +#' @param minPriorObservation A mimimum number of prior observation days in +#' the database. +#' @param minFutureObservation A minimum number of future observation days in +#' the database. +#' +#' @return +#' @export +#' +#' @examples +requireDemographics <- function(cohort, + indexDate = "cohort_start_date", + ageRange = list(c(0, 150)), + sex = c("Both"), + minPriorObservation = 0, + minFutureObservation = 0){ + + cohort <- demographicsFilter(cohort = cohort, + indexDate = indexDate, + ageRange = ageRange, + sex = sex, + minPriorObservation = minPriorObservation, + minFutureObservation = minFutureObservation) + + cohort <- cohort %>% + CDMConnector::recordCohortAttrition(reason = "Demographic requirements") + + cohort +} + +#' Restrict cohort on age +#' +#' @param cohort A cohort table in a cdm reference +#' @param indexDate Variable in cohort that contains the date to compute the +#' demographics characteristics on which to restrict on. +#' @param ageRange A list of minimum and maximum age +#' +#' @return +#' @export +#' +#' @examples +requireAge <- function(cohort, + indexDate = "cohort_start_date", + ageRange = list(c(0, 150))){ + + cohort <- demographicsFilter(cohort = cohort, + indexDate = indexDate, + ageRange = ageRange, + sex = "Both", + minPriorObservation = 0, + minFutureObservation = 0) + + cohort <- cohort %>% + CDMConnector::recordCohortAttrition(reason = + glue::glue("Age requirement: {ageRange[[1]][1]} to {ageRange[[1]][2]}")) + + cohort + +} + +#' Restrict cohort on sex +#' +#' @param cohort A cohort table in a cdm reference +#' @param sex Can be "Both", "Male" or "Female". If one of the latter, only +#' those with that sex will be included. +#' +#' @return +#' @export +#' +#' @examples +requireSex <- function(cohort, + sex = c("Both")){ + + cohort <- demographicsFilter(cohort = cohort, + indexDate = "cohort_start_date", + ageRange = list(c(0, 150)), + sex = sex, + minPriorObservation = 0, + minFutureObservation = 0) + + cohort <- cohort %>% + CDMConnector::recordCohortAttrition(reason = + glue::glue("Sex requirement: {sex}")) + + + cohort + +} + +#' Restrict cohort on prior observation +#' +#' @param cohort A cohort table in a cdm reference +#' @param indexDate Variable in cohort that contains the date to compute the +#' demographics characteristics on which to restrict on. +#' @param minPriorObservation A mimimum number of prior observation days in +#' the database. +#' +#' @return +#' @export +#' +#' @examples +requirePriorObservation <- function(cohort, + indexDate = "cohort_start_date", + minPriorObservation = 0){ + + cohort <- demographicsFilter(cohort = cohort, + indexDate = indexDate, + ageRange = list(c(0, 150)), + sex = "Both", + minPriorObservation = minPriorObservation, + minFutureObservation = 0) + + cohort <- cohort %>% + CDMConnector::recordCohortAttrition(reason = + glue::glue("Prior observation requirement: {minPriorObservation} days")) + + cohort + +} + +#' Restrict cohort on future observation +#' +#' @param cohort A cohort table in a cdm reference +#' @param indexDate Variable in cohort that contains the date to compute the +#' demographics characteristics on which to restrict on. +#' @param minFutureObservation A minimum number of future observation days in +#' the database. +#' +#' @return +#' @export +#' +#' @examples +requireFutureObservation <- function(cohort, + indexDate = "cohort_start_date", + minFutureObservation = 0){ + + cohort <- demographicsFilter(cohort = cohort, + indexDate = indexDate, + ageRange = list(c(0, 150)), + sex = "Both", + minPriorObservation = 0, + minFutureObservation = minFutureObservation) + + cohort <- cohort %>% + CDMConnector::recordCohortAttrition(reason = + glue::glue("Future observation requirement: {minFutureObservation} days")) + + cohort + +} + +demographicsFilter <- function(cohort, + indexDate, + ageRange, + sex, + minPriorObservation, + minFutureObservation){ + + cdm <- attr(cohort, "cdm_reference") + + # validate inputs + if (!isTRUE(inherits(cdm, "cdm_reference"))) { + cli::cli_abort("cohort must be part of a cdm reference") + } + if(!"GeneratedCohortSet" %in% class(cohort) || + !all(c("cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date") %in% + colnames(cohort))){ + cli::cli_abort("cohort must be a GeneratedCohortSet") + } + if(!indexDate %in% colnames(cohort)){ + cli::cli_abort("indexDate must be a date column in the cohort table") + } + + if(!is.list(ageRange)){ + cli::cli_abort("ageRange must be a list") + } + if(length(ageRange[[1]]) != 2 || + !is.numeric(ageRange[[1]]) || + !ageRange[[1]][2] >= ageRange[[1]][1] || + !ageRange[[1]][1]>=0){ + cli::cli_abort("ageRange only contain a vector of length two, with the + second number greater or equal to the first") + } + if(length(ageRange) != 1){ + cli::cli_abort("Only a single ageRange is currently supported") + } + if(!all(sex %in% c("Both", "Male", "Female"))){ + cli::cli_abort("sex must be Both, Male, or Female") + } + if(length(sex) != 1){ + cli::cli_abort("Only a single sex option is currently supported") + } + if(!is.numeric(minPriorObservation) || + length(minPriorObservation) != 1 || + !minPriorObservation >= 0){ + cli::cli_abort("minPriorObservation must be a positive number") + } + if(!is.numeric(minFutureObservation) || + length(minFutureObservation) != 1 || + !minFutureObservation >= 0){ + cli::cli_abort("minFutureObservation must be a positive number") + } + + minAge <- ageRange[[1]][1] + maxAge <- ageRange[[1]][2] + if(sex == "Both"){ + sex <- c("Male", "Female") + } + + # because the cohort table passed to the function might have extra columns + # that would conflict with ones we'll add, we'll take the core table first + # join later + + working_cohort <- cohort %>% + dplyr::select(c("cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date", + indexDate)) %>% + PatientProfiles::addDemographics(indexDate = indexDate) %>% + dplyr::filter(.data$age >= .env$minAge, + .data$age <= .env$maxAge, + .data$sex %in% .env$sex, + .data$prior_observation >= .env$minPriorObservation, + .data$future_observation >= .env$minFutureObservation) + + cohort <- cohort %>% + dplyr::inner_join(working_cohort %>% + dplyr::select(c("cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date")), + by = c("cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date")) + cohort +} diff --git a/README.Rmd b/README.Rmd index 06a8209d..7e9d2824 100644 --- a/README.Rmd +++ b/README.Rmd @@ -22,7 +22,7 @@ knitr::opts_chunk$set( [![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -The goal of CohortConstructor is to help on the creation of cohorts in the OMOP Common Data Model. +The goal of CohortConstructor is to help on the creation and manipulation of cohorts in the OMOP Common Data Model. ## Installation @@ -33,21 +33,54 @@ You can install the development version of CohortConstructor from [GitHub](https devtools::install_github("oxford-pharmacoepi/CohortConstructor") ``` -## Example +## Example usage + +``` {r} +library(CDMConnector) +library(PatientProfiles) +library(CohortConstructor) + +con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir()) +cdm <- cdm_from_con(con, cdm_schema = "main", + write_schema = c(prefix = "my_study_", schema = "main")) +``` + +### Generating concept based cohorts +``` {r} +cdm <- generate_concept_cohort_set(cdm = cdm, + name = "medications", + concept_set = list("diclofenac" = 1124300, + "acetaminophen" = 1127433)) +cohortSet(cdm$combinations) +cohort_count(cdm$medications) +cohort_attrition(cdm$medications) +``` + +### Applying demographic requirements +``` {r} +cdm$medications %>% + requireDemographics(ageRange = list(c(40, 65)), + sex = "Female") +cohortSet(cdm$combinations) +cohort_count(cdm$medications) +cohort_attrition(cdm$medications) +``` + + +### Combining cohorts Generate a combination cohort. ```{r} -library(PatientProfiles) -library(CohortConstructor) -library(CDMConnector) +cdm <- generateCombinationCohortSet(cdm = cdm, + name = "combinations", + targetCohortName = "medications") + -cdm <- mockPatientProfiles() -cdm <- generateCombinationCohortSet(cdm = cdm, name = "cohort3", targetCohortName = "cohort2") -cdm$cohort3 +cohortSet(cdm$combinations) +cohortCount(cdm$combinations) -cohortSet(cdm$cohort3) cdmDisconnect(cdm) ``` diff --git a/README.md b/README.md index 42c98981..d0ab06a5 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,8 @@ status](https://www.r-pkg.org/badges/version/CohortConstructor)](https://CRAN.R- [![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -The goal of CohortConstructor is to help on the creation of cohorts in -the OMOP Common Data Model. +The goal of CohortConstructor is to help on the creation and +manipulation of cohorts in the OMOP Common Data Model. ## Installation @@ -25,40 +25,102 @@ You can install the development version of CohortConstructor from devtools::install_github("oxford-pharmacoepi/CohortConstructor") ``` -## Example - -Generate a combination cohort. +## Example usage ``` r +library(CDMConnector) library(PatientProfiles) -#> Warning: package 'PatientProfiles' was built under R version 4.2.3 library(CohortConstructor) -library(CDMConnector) -cdm <- mockPatientProfiles() -cdm <- generateCombinationCohortSet(cdm = cdm, name = "cohort3", targetCohortName = "cohort2") - -cdm$cohort3 -#> # Source: table [4 x 4] -#> # Database: DuckDB 0.7.1 [martics@Windows 10 x64:R 4.2.1/:memory:] -#> cohort_definition_id subject_id cohort_start_date cohort_end_date -#> -#> 1 2 1 2020-05-25 2020-05-25 -#> 2 1 1 2019-12-30 2019-12-30 -#> 3 1 1 2020-05-25 2020-05-25 -#> 4 3 1 2020-05-25 2020-05-25 - -cohortSet(cdm$cohort3) -#> # A tibble: 7 × 6 -#> cohort_definition_id cohort_name cohort_1 cohort_2 cohort_3 mutually_exclusive -#> -#> 1 1 cohort_1 1 NA NA FALSE -#> 2 2 cohort_2 NA 1 NA FALSE -#> 3 3 cohort_1+c… 1 1 NA FALSE -#> 4 4 cohort_3 NA NA 1 FALSE -#> 5 5 cohort_1+c… 1 NA 1 FALSE -#> 6 6 cohort_2+c… NA 1 1 FALSE -#> 7 7 cohort_1+c… 1 1 1 FALSE +con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir()) +cdm <- cdm_from_con(con, cdm_schema = "main", + write_schema = c(prefix = "my_study_", schema = "main")) +``` + +### Generating concept based cohorts + +``` r +cdm <- generate_concept_cohort_set(cdm = cdm, + name = "medications", + concept_set = list("diclofenac" = 1124300, + "acetaminophen" = 1127433)) +cohort_set(cdm$medications) +#> # A tibble: 2 × 6 +#> cohort_definition_id cohort_name limit prior_observation future_observation +#> +#> 1 1 diclofenac first 0 0 +#> 2 2 acetaminophen first 0 0 +#> # ℹ 1 more variable: end +cohort_count(cdm$medications) +#> # A tibble: 2 × 3 +#> cohort_definition_id number_records number_subjects +#> +#> 1 1 830 830 +#> 2 2 2580 2580 +``` + +### Applying demographic requirements + +``` r +cdm$medications %>% + requireDemographics(ageRange = list(c(40, 65)), + sex = "Female") +#> # Source: SQL [?? x 4] +#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\Rtmpqm0Jdd\file42405cf744a.duckdb] +#> cohort_definition_id subject_id cohort_start_date cohort_end_date +#> +#> 1 1 730 2002-11-18 2018-12-16 +#> 2 1 1169 1975-12-23 2018-08-27 +#> 3 1 1808 2003-12-18 2019-06-05 +#> 4 1 2858 1953-05-26 2019-05-29 +#> 5 1 2909 1986-09-03 2007-07-23 +#> 6 1 2939 1997-10-31 2018-09-04 +#> 7 1 3175 1999-05-02 2018-09-04 +#> 8 1 5240 1984-05-31 2019-03-12 +#> 9 2 1338 1997-02-22 2019-06-21 +#> 10 2 2026 2009-02-11 2019-06-19 +#> # ℹ more rows +cohort_set(cdm$medications) +#> # A tibble: 2 × 6 +#> cohort_definition_id cohort_name limit prior_observation future_observation +#> +#> 1 1 diclofenac first 0 0 +#> 2 2 acetaminophen first 0 0 +#> # ℹ 1 more variable: end +cohort_count(cdm$medications) +#> # A tibble: 2 × 3 +#> cohort_definition_id number_records number_subjects +#> +#> 1 1 156 156 +#> 2 2 76 76 +``` + +### Combining cohorts + +Generate a combination cohort. + +``` r +cdm <- generateCombinationCohortSet(cdm = cdm, + name = "combinations", + targetCohortName = "medications") + + + +cohortSet(cdm$combinations) +#> # A tibble: 3 × 5 +#> cohort_definition_id cohort_name diclofenac acetaminophen mutually_exclusive +#> +#> 1 1 diclofenac 1 NA FALSE +#> 2 2 acetaminophen NA 1 FALSE +#> 3 3 diclofenac+a… 1 1 FALSE +cohortCount(cdm$combinations) +#> # A tibble: 3 × 3 +#> cohort_definition_id number_records number_subjects +#> +#> 1 1 830 830 +#> 2 2 2580 2580 +#> 3 3 805 805 + cdmDisconnect(cdm) ``` diff --git a/man/requireAge.Rd b/man/requireAge.Rd new file mode 100644 index 00000000..3246a638 --- /dev/null +++ b/man/requireAge.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/requireDemographics.R +\name{requireAge} +\alias{requireAge} +\title{Restrict cohort on age} +\usage{ +requireAge(cohort, indexDate = "cohort_start_date", ageRange = list(c(0, 150))) +} +\arguments{ +\item{cohort}{A cohort table in a cdm reference} + +\item{indexDate}{Variable in cohort that contains the date to compute the +demographics characteristics on which to restrict on.} + +\item{ageRange}{A list of minimum and maximum age} +} +\description{ +Restrict cohort on age +} diff --git a/man/requireDemographics.Rd b/man/requireDemographics.Rd new file mode 100644 index 00000000..7aa7fed8 --- /dev/null +++ b/man/requireDemographics.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/requireDemographics.R +\name{requireDemographics} +\alias{requireDemographics} +\title{Restrict cohort on patient demographics} +\usage{ +requireDemographics( + cohort, + indexDate = "cohort_start_date", + ageRange = list(c(0, 150)), + sex = c("Both"), + minPriorObservation = 0, + minFutureObservation = 0 +) +} +\arguments{ +\item{cohort}{A cohort table in a cdm reference} + +\item{indexDate}{Variable in cohort that contains the date to compute the +demographics characteristics on which to restrict on.} + +\item{ageRange}{A list of minimum and maximum age} + +\item{sex}{Can be "Both", "Male" or "Female". If one of the latter, only +those with that sex will be included.} + +\item{minPriorObservation}{A mimimum number of prior observation days in +the database.} + +\item{minFutureObservation}{A minimum number of future observation days in +the database.} +} +\description{ +Restrict cohort on patient demographics +} diff --git a/man/requireFutureObservation.Rd b/man/requireFutureObservation.Rd new file mode 100644 index 00000000..ecd47b24 --- /dev/null +++ b/man/requireFutureObservation.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/requireDemographics.R +\name{requireFutureObservation} +\alias{requireFutureObservation} +\title{Restrict cohort on future observation} +\usage{ +requireFutureObservation( + cohort, + indexDate = "cohort_start_date", + minFutureObservation = 0 +) +} +\arguments{ +\item{cohort}{A cohort table in a cdm reference} + +\item{indexDate}{Variable in cohort that contains the date to compute the +demographics characteristics on which to restrict on.} + +\item{minFutureObservation}{A minimum number of future observation days in +the database.} +} +\description{ +Restrict cohort on future observation +} diff --git a/man/requirePriorObservation.Rd b/man/requirePriorObservation.Rd new file mode 100644 index 00000000..143cc8ec --- /dev/null +++ b/man/requirePriorObservation.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/requireDemographics.R +\name{requirePriorObservation} +\alias{requirePriorObservation} +\title{Restrict cohort on prior observation} +\usage{ +requirePriorObservation( + cohort, + indexDate = "cohort_start_date", + minPriorObservation = 0 +) +} +\arguments{ +\item{cohort}{A cohort table in a cdm reference} + +\item{indexDate}{Variable in cohort that contains the date to compute the +demographics characteristics on which to restrict on.} + +\item{minPriorObservation}{A mimimum number of prior observation days in +the database.} +} +\description{ +Restrict cohort on prior observation +} diff --git a/man/requireSex.Rd b/man/requireSex.Rd new file mode 100644 index 00000000..a98434d9 --- /dev/null +++ b/man/requireSex.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/requireDemographics.R +\name{requireSex} +\alias{requireSex} +\title{Restrict cohort on sex} +\usage{ +requireSex(cohort, sex = c("Both")) +} +\arguments{ +\item{cohort}{A cohort table in a cdm reference} + +\item{sex}{Can be "Both", "Male" or "Female". If one of the latter, only +those with that sex will be included.} +} +\description{ +Restrict cohort on sex +} diff --git a/tests/testthat/test-requireDemographics.R b/tests/testthat/test-requireDemographics.R new file mode 100644 index 00000000..0266fe9c --- /dev/null +++ b/tests/testthat/test-requireDemographics.R @@ -0,0 +1,105 @@ +test_that("simple example", { + cdm <- PatientProfiles::mockPatientProfiles(patient_size = 100, + drug_exposure_size = 100) + cdm$cohort1 <- cdm$cohort1 %>% + requireDemographics(ageRange = list(c(0,50)), + indexDate = "cohort_start_date", + sex = "Both", + minPriorObservation = 10, + minFutureObservation = 5) + + expect_true("GeneratedCohortSet" %in% class(cdm$cohort1)) + + cdm$cohort1 <- cdm$cohort1 %>% + requireAge(ageRange = list(c(10,40))) %>% + requireSex(sex = "Male") %>% + requirePriorObservation(minPriorObservation = 20) %>% + requireFutureObservation(minFutureObservation = 10) + + # expect errors + expect_error(requireDemographics(cohort = "cohort")) + expect_error(requireDemographics(cohort = cdm$person)) + expect_error(requireDemographics(cohort = cdm$cohort2, + indexDate = "aaa")) + expect_error(requireDemographics(cohort = cdm$cohort2, + ageRange = c(0,50))) + expect_error(requireDemographics(cohort = cdm$cohort2, + ageRange = list(c(50,40)))) + expect_error(requireDemographics(cohort = cdm$cohort2, + ageRange = list(c(-10,40)))) + expect_error(requireDemographics(cohort = cdm$cohort2, + ageRange = list(c(0,"a")))) + expect_error(requireDemographics(cohort = cdm$cohort2, + sex = "a")) + + expect_error(requireDemographics(cohort = cdm$cohort2, + minPriorObservation = -10)) + expect_error(requireDemographics(cohort = cdm$cohort2, + minPriorObservation = "a")) + expect_error(requireDemographics(cohort = cdm$cohort2, + minFutureObservation = -10)) + expect_error(requireDemographics(cohort = cdm$cohort2, + minFutureObservation = "a")) + + # multiple options not currently supported + expect_error(requireDemographics(cohort = cdm$cohort2, + ageRange = list(c(0,50), + c(51,100)))) + expect_error(requireDemographics(cohort = cdm$cohort2, + sex = c("Both", "Male"))) + expect_error(requireDemographics(cohort = cdm$cohort2, + minPriorObservation = c(0,10))) + expect_error(requireDemographics(cohort = cdm$cohort2, + minFutureObservation = c(0,10))) + + CDMConnector::cdm_disconnect(cdm) + }) + +test_that("restrictions applied to single cohort", { + # one person, one observation periods + personTable <- dplyr::tibble( + person_id = c("1", "2", "3"), + gender_concept_id = c("8507","8532","8507"), + year_of_birth = c(2000,2005,2010), + month_of_birth = 01, + day_of_birth = 01 + ) + observationPeriodTable <- dplyr::tibble( + observation_period_id = c("1", "2", "3"), + person_id = c("1", "2", "3"), + observation_period_start_date = as.Date("2010-01-01"), + observation_period_end_date = as.Date("2015-06-01") + ) + cohortTable <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1), + subject_id = c("1", "2", "3"), + cohort_start_date = as.Date(c("2010-06-06", "2010-06-06", "2010-06-06")), + cohort_end_date = as.Date(c("2013-06-06", "2013-06-06", "2013-02-01")) + ) + + cdm <- PatientProfiles::mockPatientProfiles(person = personTable, + observation_period = observationPeriodTable, + cohort1 = cohortTable) + cdm$cohort1 <- cdm$cohort1 %>% + requireDemographics(ageRange = list(c(0,5))) + + expect_equal(c("2", "3"), + sort(cdm$cohort1 %>% + dplyr::pull("subject_id"))) + + cdm$cohort1 <- cdm$cohort1 %>% + requireDemographics(sex = "Male") + expect_equal(c("3"), + sort(cdm$cohort1 %>% + dplyr::pull("subject_id"))) + + + CDMConnector::cdm_disconnect(cdm) + +}) + +test_that("ignore existing cohort extra variables", { + + # ignore existing conflicting age column, but keep it in the output + +})