Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

requireCohortIntersectFlag #21

Merged
merged 1 commit into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(generateMatchedCohortSet)
export(getIdentifier)
export(joinOverlap)
export(requireAge)
export(requireCohortIntersectFlag)
export(requireDemographics)
export(requireFutureObservation)
export(requirePriorObservation)
Expand Down
86 changes: 86 additions & 0 deletions R/requireCohortIntersectFlag.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@


#' Require cohort subjects are present in another cohort
#'
#' @param x Cohort table
#' @param targetCohortTable name of the cohort that we want to check for overlap
#' @param targetCohortId vector of cohort definition ids to include
#' @param indexDate Variable in x that contains the date to compute the
#' intersection.
#' @param targetStartDate date of reference in cohort table, either for start
#' (in overlap) or on its own (for incidence)
#' @param targetEndDate date of reference in cohort table, either for end
#' (overlap) or NULL (if incidence)
#' @param window window to consider events of
#'
#' @return Cohort table with only those in the other cohort kept
#' @export
#'
#' @examples
requireCohortIntersectFlag <- function(x,
targetCohortTable,
targetCohortId = NULL,
indexDate = "cohort_start_date",
targetStartDate = "cohort_start_date",
targetEndDate = "cohort_end_date",
window = list(c(0, Inf))){

cols <- unique(c("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
indexDate))

if(is.list(window)){
window_start <- window[[1]][1]
window_end <- window[[1]][2]
} else {
window_start <- window[1]
window_end <- window[2]
}

cdm <- attr(x, "cdm_reference")

if(is.null(cdm[[targetCohortTable]])){
cli::cli_abort("targetCohortTable not found in cdm reference")
}

if(is.null(targetCohortId)){
targetCohortId <- CDMConnector::cohortSet(cdm[[targetCohortTable]]) %>%
dplyr::pull("cohort_definition_id")
}

if(length(targetCohortId) > 1){
cli::cli_abort("Only one target cohort is currently supported")
}

target_name <- CDMConnector::cohort_set(cdm[[targetCohortTable]]) %>%
dplyr::filter(.data$cohort_definition_id == .env$targetCohortId) %>%
dplyr::pull("cohort_name")

subsetCohort <- x %>%
dplyr::select(dplyr::all_of(.env$cols)) %>%
PatientProfiles::addCohortIntersectFlag(
cdm = cdm,
targetCohortTable = targetCohortTable,
targetCohortId = targetCohortId,
indexDate = indexDate,
targetStartDate = targetStartDate,
targetEndDate = targetEndDate,
window = window,
nameStyle = "intersect_cohort"
) %>%
dplyr::filter(.data$intersect_cohort == 1) %>%
dplyr::select(!"intersect_cohort")

x %>%
dplyr::inner_join(subsetCohort,
by = c(cols)) %>%
CDMConnector::recordCohortAttrition(reason =
glue::glue("In cohort {target_name} between ",
"{window_start} and ",
"{window_end} days relative to ",
"{indexDate}"))

}



9 changes: 5 additions & 4 deletions R/requireDemographics.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ demographicsFilter <- function(cohort,
# join later

working_cohort <- cohort %>%
dplyr::select(c("cohort_definition_id", "subject_id",
dplyr::select(dplyr::all_of(c("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
indexDate)) %>%
indexDate))) %>%
PatientProfiles::addDemographics(indexDate = indexDate) %>%
dplyr::filter(.data$age >= .env$minAge,
.data$age <= .env$maxAge,
Expand All @@ -234,8 +234,9 @@ demographicsFilter <- function(cohort,

cohort <- cohort %>%
dplyr::inner_join(working_cohort %>%
dplyr::select(c("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date")),
dplyr::select(dplyr::all_of(c("cohort_definition_id",
"subject_id",
"cohort_start_date", "cohort_end_date"))),
by = c("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date"))
cohort
Expand Down
19 changes: 15 additions & 4 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ cdm <- generate_concept_cohort_set(cdm = cdm,
name = "medications",
concept_set = list("diclofenac" = 1124300,
"acetaminophen" = 1127433))
cohortSet(cdm$combinations)
cohort_count(cdm$medications)
cohort_attrition(cdm$medications)
```
Expand All @@ -61,7 +60,21 @@ cohort_attrition(cdm$medications)
cdm$medications %>%
requireDemographics(ageRange = list(c(40, 65)),
sex = "Female")
cohortSet(cdm$combinations)
cohort_count(cdm$medications)
cohort_attrition(cdm$medications)
```

### Require presence in another cohort
We can also require that individuals are in another cohort over some window. Here for example we require that study participants are in a GI bleed cohort any time prior up to their entry in the medications cohort.
```{r}
cdm <- generate_concept_cohort_set(cdm = cdm,
name = "gibleed",
concept_set = list("gibleed" = 192671))

cdm$medications <- cdm$medications %>%
requireCohortIntersectFlag(targetCohortTable = "gibleed",
window = c(-Inf, 0))

cohort_count(cdm$medications)
cohort_attrition(cdm$medications)
```
Expand All @@ -76,8 +89,6 @@ cdm <- generateCombinationCohortSet(cdm = cdm,
name = "combinations",
targetCohortName = "medications")



cohortSet(cdm$combinations)
cohortCount(cdm$combinations)

Expand Down
98 changes: 67 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ devtools::install_github("oxford-pharmacoepi/CohortConstructor")

``` r
library(CDMConnector)
#> Warning: package 'CDMConnector' was built under R version 4.2.3
library(PatientProfiles)
#> Warning: package 'PatientProfiles' was built under R version 4.2.3
library(CohortConstructor)

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
Expand All @@ -44,19 +46,19 @@ cdm <- generate_concept_cohort_set(cdm = cdm,
name = "medications",
concept_set = list("diclofenac" = 1124300,
"acetaminophen" = 1127433))
cohort_set(cdm$medications)
#> # A tibble: 2 × 6
#> cohort_definition_id cohort_name limit prior_observation future_observation
#> <int> <chr> <chr> <dbl> <dbl>
#> 1 1 diclofenac first 0 0
#> 2 2 acetaminophen first 0 0
#> # ℹ 1 more variable: end <chr>
cohort_count(cdm$medications)
#> # A tibble: 2 × 3
#> cohort_definition_id number_records number_subjects
#> <int> <dbl> <dbl>
#> 1 1 830 830
#> 2 2 2580 2580
cohort_attrition(cdm$medications)
#> # A tibble: 2 × 7
#> cohort_definition_id number_records number_subjects reason_id reason
#> <int> <dbl> <dbl> <dbl> <chr>
#> 1 1 830 830 1 Qualifying init…
#> 2 2 2580 2580 1 Qualifying init…
#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
```

### Applying demographic requirements
Expand All @@ -66,33 +68,69 @@ cdm$medications %>%
requireDemographics(ageRange = list(c(40, 65)),
sex = "Female")
#> # Source: SQL [?? x 4]
#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\Rtmpqm0Jdd\file42405cf744a.duckdb]
#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\RtmpIngDmK\file4f3841e26e9e.duckdb]
#> cohort_definition_id subject_id cohort_start_date cohort_end_date
#> <int> <int> <date> <date>
#> 1 1 730 2002-11-18 2018-12-16
#> 2 1 1169 1975-12-23 2018-08-27
#> 3 1 1808 2003-12-18 2019-06-05
#> 4 1 2858 1953-05-26 2019-05-29
#> 5 1 2909 1986-09-03 2007-07-23
#> 6 1 2939 1997-10-31 2018-09-04
#> 7 1 3175 1999-05-02 2018-09-04
#> 8 1 5240 1984-05-31 2019-03-12
#> 9 2 1338 1997-02-22 2019-06-21
#> 10 2 2026 2009-02-11 2019-06-19
#> <int> <dbl> <date> <date>
#> 1 1 18 2009-03-21 2018-11-07
#> 2 1 893 1993-09-26 2019-05-06
#> 3 1 2396 1961-08-30 2001-02-28
#> 4 1 3159 2000-01-26 2018-10-18
#> 5 1 3376 1994-05-06 2019-06-28
#> 6 1 4071 1998-08-07 2018-12-27
#> 7 1 4636 1986-10-26 2018-10-23
#> 8 1 4690 2001-03-20 2018-10-14
#> 9 1 4701 2011-07-10 2018-12-22
#> 10 2 3951 1997-12-11 2019-04-13
#> # ℹ more rows
cohort_set(cdm$medications)
#> # A tibble: 2 × 6
#> cohort_definition_id cohort_name limit prior_observation future_observation
#> <int> <chr> <chr> <dbl> <dbl>
#> 1 1 diclofenac first 0 0
#> 2 2 acetaminophen first 0 0
#> # ℹ 1 more variable: end <chr>
cohort_count(cdm$medications)
#> # A tibble: 2 × 3
#> cohort_definition_id number_records number_subjects
#> <int> <dbl> <dbl>
#> 1 1 156 156
#> 2 2 76 76
cohort_attrition(cdm$medications)
#> # A tibble: 4 × 7
#> cohort_definition_id number_records number_subjects reason_id reason
#> <int> <dbl> <dbl> <dbl> <chr>
#> 1 1 830 830 1 Qualifying init…
#> 2 2 2580 2580 1 Qualifying init…
#> 3 2 76 76 2 Demographic req…
#> 4 1 156 156 2 Demographic req…
#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
```

### Require presence in another cohort

We can also require that individuals are in another cohort over some
window. Here for example we require that study participants are in a GI
bleed cohort any time prior up to their entry in the medications cohort.

``` r
cdm <- generate_concept_cohort_set(cdm = cdm,
name = "gibleed",
concept_set = list("gibleed" = 192671))

cdm$medications <- cdm$medications %>%
requireCohortIntersectFlag(targetCohortTable = "gibleed",
window = c(-Inf, 0))

cohort_count(cdm$medications)
#> # A tibble: 2 × 3
#> cohort_definition_id number_records number_subjects
#> <int> <dbl> <dbl>
#> 1 2 36 36
#> 2 1 0 0
cohort_attrition(cdm$medications)
#> # A tibble: 6 × 7
#> cohort_definition_id number_records number_subjects reason_id reason
#> <int> <dbl> <dbl> <dbl> <chr>
#> 1 1 830 830 1 Qualifying init…
#> 2 2 2580 2580 1 Qualifying init…
#> 3 2 76 76 2 Demographic req…
#> 4 1 156 156 2 Demographic req…
#> 5 2 36 36 3 In cohort gible…
#> 6 1 0 0 3 In cohort gible…
#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
```

### Combining cohorts
Expand All @@ -104,8 +142,6 @@ cdm <- generateCombinationCohortSet(cdm = cdm,
name = "combinations",
targetCohortName = "medications")



cohortSet(cdm$combinations)
#> # A tibble: 3 × 5
#> cohort_definition_id cohort_name diclofenac acetaminophen mutually_exclusive
Expand All @@ -117,9 +153,9 @@ cohortCount(cdm$combinations)
#> # A tibble: 3 × 3
#> cohort_definition_id number_records number_subjects
#> <int> <dbl> <dbl>
#> 1 1 830 830
#> 2 2 2580 2580
#> 3 3 805 805
#> 1 2 36 36
#> 2 1 0 0
#> 3 3 0 0


cdmDisconnect(cdm)
Expand Down
40 changes: 40 additions & 0 deletions man/requireCohortIntersectFlag.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-generateMatchedCohortSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ test_that("test exactMatchingCohort with a ratio bigger than 1", {

outc <- a[["new_cohort"]] %>%
dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>%
dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")
dplyr::pull() %in% as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09"))
expect_true(unique(outc) == TRUE)
})

Expand Down
Loading