requireCohortIntersectFlag

OHDSI · Dec 7, 2023 · def9761 · def9761
1 parent bd43172
commit def9761
Show file tree

Hide file tree

Showing 8 changed files with 272 additions and 40 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ export(generateMatchedCohortSet)
 export(getIdentifier)
 export(joinOverlap)
 export(requireAge)
+export(requireCohortIntersectFlag)
 export(requireDemographics)
 export(requireFutureObservation)
 export(requirePriorObservation)

diff --git a/R/requireCohortIntersectFlag.R b/R/requireCohortIntersectFlag.R
@@ -0,0 +1,86 @@
+
+
+#' Require cohort subjects are present in another cohort
+#'
+#' @param x Cohort table
+#' @param targetCohortTable name of the cohort that we want to check for overlap
+#' @param targetCohortId vector of cohort definition ids to include
+#' @param indexDate Variable in x that contains the date to compute the
+#' intersection.
+#' @param targetStartDate date of reference in cohort table, either for start
+#' (in overlap) or on its own (for incidence)
+#' @param targetEndDate date of reference in cohort table, either for end
+#' (overlap) or NULL (if incidence)
+#' @param window window to consider events of
+#'
+#' @return Cohort table with only those in the other cohort kept
+#' @export
+#'
+#' @examples
+requireCohortIntersectFlag <- function(x,
+                                       targetCohortTable,
+                                       targetCohortId = NULL,
+                                       indexDate = "cohort_start_date",
+                                       targetStartDate = "cohort_start_date",
+                                       targetEndDate = "cohort_end_date",
+                                       window = list(c(0, Inf))){
+
+cols <- unique(c("cohort_definition_id", "subject_id",
+          "cohort_start_date", "cohort_end_date",
+          indexDate))
+
+if(is.list(window)){
+window_start <- window[[1]][1]
+window_end <- window[[1]][2]
+} else {
+  window_start <- window[1]
+  window_end <- window[2]
+}
+
+cdm <- attr(x, "cdm_reference")
+
+if(is.null(cdm[[targetCohortTable]])){
+  cli::cli_abort("targetCohortTable not found in cdm reference")
+}
+
+if(is.null(targetCohortId)){
+targetCohortId <- CDMConnector::cohortSet(cdm[[targetCohortTable]]) %>%
+    dplyr::pull("cohort_definition_id")
+}
+
+if(length(targetCohortId) > 1){
+  cli::cli_abort("Only one target cohort is currently supported")
+}
+
+target_name <- CDMConnector::cohort_set(cdm[[targetCohortTable]]) %>%
+  dplyr::filter(.data$cohort_definition_id == .env$targetCohortId) %>%
+  dplyr::pull("cohort_name")
+
+subsetCohort <- x %>%
+  dplyr::select(dplyr::all_of(.env$cols)) %>%
+  PatientProfiles::addCohortIntersectFlag(
+    cdm = cdm,
+    targetCohortTable = targetCohortTable,
+    targetCohortId = targetCohortId,
+    indexDate = indexDate,
+    targetStartDate = targetStartDate,
+    targetEndDate = targetEndDate,
+    window = window,
+    nameStyle = "intersect_cohort"
+  ) %>%
+  dplyr::filter(.data$intersect_cohort == 1) %>%
+  dplyr::select(!"intersect_cohort")
+
+x %>%
+  dplyr::inner_join(subsetCohort,
+                    by = c(cols)) %>%
+  CDMConnector::recordCohortAttrition(reason =
+                                        glue::glue("In cohort {target_name} between ",
+                                                   "{window_start} and ",
+                                                   "{window_end} days relative to ",
+                                                   "{indexDate}"))
+
+}
+
+
+
diff --git a/R/requireDemographics.R b/R/requireDemographics.R
@@ -222,9 +222,9 @@ demographicsFilter <- function(cohort,
   # join later
 
   working_cohort <- cohort %>%
-    dplyr::select(c("cohort_definition_id", "subject_id",
+    dplyr::select(dplyr::all_of(c("cohort_definition_id", "subject_id",
                     "cohort_start_date", "cohort_end_date",
-                    indexDate)) %>%
+                    indexDate))) %>%
     PatientProfiles::addDemographics(indexDate = indexDate) %>%
     dplyr::filter(.data$age >= .env$minAge,
                   .data$age <= .env$maxAge,
@@ -234,8 +234,9 @@ demographicsFilter <- function(cohort,
 
   cohort <- cohort %>%
     dplyr::inner_join(working_cohort  %>%
-                        dplyr::select(c("cohort_definition_id", "subject_id",
-                                        "cohort_start_date", "cohort_end_date")),
+                        dplyr::select(dplyr::all_of(c("cohort_definition_id",
+                                                      "subject_id",
+                                        "cohort_start_date", "cohort_end_date"))),
                       by = c("cohort_definition_id", "subject_id",
                              "cohort_start_date", "cohort_end_date"))
   cohort

diff --git a/README.Rmd b/README.Rmd
@@ -51,7 +51,6 @@ cdm <- generate_concept_cohort_set(cdm = cdm,
                             name = "medications",
                             concept_set = list("diclofenac" = 1124300,
                                                "acetaminophen" = 1127433))
-cohortSet(cdm$combinations)
 cohort_count(cdm$medications)
 cohort_attrition(cdm$medications)
 ```
@@ -61,7 +60,21 @@ cohort_attrition(cdm$medications)
 cdm$medications %>% 
   requireDemographics(ageRange = list(c(40, 65)),
                       sex = "Female")
-cohortSet(cdm$combinations)
+cohort_count(cdm$medications)
+cohort_attrition(cdm$medications)
+```
+
+### Require presence in another cohort
+We can also require that individuals are in another cohort over some window. Here for example we require that study participants are in a GI bleed cohort any time prior up to their entry in the medications cohort.
+```{r}
+cdm <- generate_concept_cohort_set(cdm = cdm, 
+                            name = "gibleed",
+                            concept_set = list("gibleed" = 192671))
+
+cdm$medications <- cdm$medications %>% 
+  requireCohortIntersectFlag(targetCohortTable = "gibleed",
+                             window = c(-Inf, 0))
+
 cohort_count(cdm$medications)
 cohort_attrition(cdm$medications)
 ```
@@ -76,8 +89,6 @@ cdm <- generateCombinationCohortSet(cdm = cdm,
                                     name = "combinations", 
                                     targetCohortName = "medications")
 
-
-
 cohortSet(cdm$combinations)
 cohortCount(cdm$combinations)
 

diff --git a/README.md b/README.md
@@ -29,7 +29,9 @@ devtools::install_github("oxford-pharmacoepi/CohortConstructor")
 
 ``` r
 library(CDMConnector)
+#> Warning: package 'CDMConnector' was built under R version 4.2.3
 library(PatientProfiles)
+#> Warning: package 'PatientProfiles' was built under R version 4.2.3
 library(CohortConstructor)
 
 con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
@@ -44,19 +46,19 @@ cdm <- generate_concept_cohort_set(cdm = cdm,
                             name = "medications",
                             concept_set = list("diclofenac" = 1124300,
                                                "acetaminophen" = 1127433))
-cohort_set(cdm$medications)
-#> # A tibble: 2 × 6
-#>   cohort_definition_id cohort_name   limit prior_observation future_observation
-#>                  <int> <chr>         <chr>             <dbl>              <dbl>
-#> 1                    1 diclofenac    first                 0                  0
-#> 2                    2 acetaminophen first                 0                  0
-#> # ℹ 1 more variable: end <chr>
 cohort_count(cdm$medications)
 #> # A tibble: 2 × 3
 #>   cohort_definition_id number_records number_subjects
 #>                  <int>          <dbl>           <dbl>
 #> 1                    1            830             830
 #> 2                    2           2580            2580
+cohort_attrition(cdm$medications)
+#> # A tibble: 2 × 7
+#>   cohort_definition_id number_records number_subjects reason_id reason          
+#>                  <int>          <dbl>           <dbl>     <dbl> <chr>           
+#> 1                    1            830             830         1 Qualifying init…
+#> 2                    2           2580            2580         1 Qualifying init…
+#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
 ```
 
 ### Applying demographic requirements
@@ -66,33 +68,69 @@ cdm$medications %>%
   requireDemographics(ageRange = list(c(40, 65)),
                       sex = "Female")
 #> # Source:   SQL [?? x 4]
-#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\Rtmpqm0Jdd\file42405cf744a.duckdb]
+#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\RtmpIngDmK\file4f3841e26e9e.duckdb]
 #>    cohort_definition_id subject_id cohort_start_date cohort_end_date
-#>                   <int>      <int> <date>            <date>         
-#>  1                    1        730 2002-11-18        2018-12-16     
-#>  2                    1       1169 1975-12-23        2018-08-27     
-#>  3                    1       1808 2003-12-18        2019-06-05     
-#>  4                    1       2858 1953-05-26        2019-05-29     
-#>  5                    1       2909 1986-09-03        2007-07-23     
-#>  6                    1       2939 1997-10-31        2018-09-04     
-#>  7                    1       3175 1999-05-02        2018-09-04     
-#>  8                    1       5240 1984-05-31        2019-03-12     
-#>  9                    2       1338 1997-02-22        2019-06-21     
-#> 10                    2       2026 2009-02-11        2019-06-19     
+#>                   <int>      <dbl> <date>            <date>         
+#>  1                    1         18 2009-03-21        2018-11-07     
+#>  2                    1        893 1993-09-26        2019-05-06     
+#>  3                    1       2396 1961-08-30        2001-02-28     
+#>  4                    1       3159 2000-01-26        2018-10-18     
+#>  5                    1       3376 1994-05-06        2019-06-28     
+#>  6                    1       4071 1998-08-07        2018-12-27     
+#>  7                    1       4636 1986-10-26        2018-10-23     
+#>  8                    1       4690 2001-03-20        2018-10-14     
+#>  9                    1       4701 2011-07-10        2018-12-22     
+#> 10                    2       3951 1997-12-11        2019-04-13     
 #> # ℹ more rows
-cohort_set(cdm$medications)
-#> # A tibble: 2 × 6
-#>   cohort_definition_id cohort_name   limit prior_observation future_observation
-#>                  <int> <chr>         <chr>             <dbl>              <dbl>
-#> 1                    1 diclofenac    first                 0                  0
-#> 2                    2 acetaminophen first                 0                  0
-#> # ℹ 1 more variable: end <chr>
 cohort_count(cdm$medications)
 #> # A tibble: 2 × 3
 #>   cohort_definition_id number_records number_subjects
 #>                  <int>          <dbl>           <dbl>
 #> 1                    1            156             156
 #> 2                    2             76              76
+cohort_attrition(cdm$medications)
+#> # A tibble: 4 × 7
+#>   cohort_definition_id number_records number_subjects reason_id reason          
+#>                  <int>          <dbl>           <dbl>     <dbl> <chr>           
+#> 1                    1            830             830         1 Qualifying init…
+#> 2                    2           2580            2580         1 Qualifying init…
+#> 3                    2             76              76         2 Demographic req…
+#> 4                    1            156             156         2 Demographic req…
+#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
+```
+
+### Require presence in another cohort
+
+We can also require that individuals are in another cohort over some
+window. Here for example we require that study participants are in a GI
+bleed cohort any time prior up to their entry in the medications cohort.
+
+``` r
+cdm <- generate_concept_cohort_set(cdm = cdm, 
+                            name = "gibleed",
+                            concept_set = list("gibleed" = 192671))
+
+cdm$medications <- cdm$medications %>% 
+  requireCohortIntersectFlag(targetCohortTable = "gibleed",
+                             window = c(-Inf, 0))
+
+cohort_count(cdm$medications)
+#> # A tibble: 2 × 3
+#>   cohort_definition_id number_records number_subjects
+#>                  <int>          <dbl>           <dbl>
+#> 1                    2             36              36
+#> 2                    1              0               0
+cohort_attrition(cdm$medications)
+#> # A tibble: 6 × 7
+#>   cohort_definition_id number_records number_subjects reason_id reason          
+#>                  <int>          <dbl>           <dbl>     <dbl> <chr>           
+#> 1                    1            830             830         1 Qualifying init…
+#> 2                    2           2580            2580         1 Qualifying init…
+#> 3                    2             76              76         2 Demographic req…
+#> 4                    1            156             156         2 Demographic req…
+#> 5                    2             36              36         3 In cohort gible…
+#> 6                    1              0               0         3 In cohort gible…
+#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
 ```
 
 ### Combining cohorts
@@ -104,8 +142,6 @@ cdm <- generateCombinationCohortSet(cdm = cdm,
                                     name = "combinations", 
                                     targetCohortName = "medications")
 
-
-
 cohortSet(cdm$combinations)
 #> # A tibble: 3 × 5
 #>   cohort_definition_id cohort_name   diclofenac acetaminophen mutually_exclusive
@@ -117,9 +153,9 @@ cohortCount(cdm$combinations)
 #> # A tibble: 3 × 3
 #>   cohort_definition_id number_records number_subjects
 #>                  <int>          <dbl>           <dbl>
-#> 1                    1            830             830
-#> 2                    2           2580            2580
-#> 3                    3            805             805
+#> 1                    2             36              36
+#> 2                    1              0               0
+#> 3                    3              0               0
 
 
 cdmDisconnect(cdm)

diff --git a/man/requireCohortIntersectFlag.Rd b/man/requireCohortIntersectFlag.Rd
diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R
@@ -295,7 +295,7 @@ test_that("test exactMatchingCohort with a ratio bigger than 1", {
 
   outc <- a[["new_cohort"]] %>%
     dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>%
-    dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")
+    dplyr::pull() %in% as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09"))
   expect_true(unique(outc) == TRUE)
 })