Merge branch 'main' into inObservation-param

OHDSI · Dec 20, 2024 · eb80694 · eb80694
2 parents 0f455ab + 21088e6
commit eb80694
Showing 27 changed files with 156 additions and 236 deletions.
diff --git a/CohortConstructor.Rproj b/CohortConstructor.Rproj
@@ -1,5 +1,4 @@
 Version: 1.0
-ProjectId: 00fa364f-f8e3-4b28-a7dd-02de51c09f48
 
 RestoreWorkspace: No
 SaveWorkspace: No

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -71,3 +71,5 @@ Depends:
     R (>= 4.1)
 URL: https://ohdsi.github.io/CohortConstructor/, https://github.com/OHDSI/CohortConstructor
 LazyData: true
+Remotes:
+    ohdsi/omock
diff --git a/R/collapseCohorts.R b/R/collapseCohorts.R
@@ -73,7 +73,7 @@ collapseCohorts <- function(cohort,
       dplyr::compute(name = name, temporary = FALSE)
   }
   newCohort <- newCohort |>
-    omopgenerics::newCohortTable(.softValidation = TRUE) |>
+    omopgenerics::newCohortTable(.softValidation = FALSE) |>
     omopgenerics::recordCohortAttrition(
       reason = "Collapse cohort with a gap of {gap} days.",
       cohortId = cohortId)

diff --git a/R/conceptCohort.R b/R/conceptCohort.R
@@ -198,10 +198,10 @@ conceptCohort <- function(cdm,
   cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name)
 
   if(overlap == "merge"){
-  cli::cli_inform(c("i" = "Merging overlapping records."))
-  cdm[[name]] <- cdm[[name]] |>
-    joinOverlap(name = name, gap = 0)  |>
-    omopgenerics::recordCohortAttrition(reason = "Merge overlapping records")
+    cli::cli_inform(c("i" = "Merging overlapping records."))
+    cdm[[name]] <- cdm[[name]] |>
+      joinOverlap(name = name, gap = 0)  |>
+      omopgenerics::recordCohortAttrition(reason = "Merge overlapping records")
   }
 
   if(overlap == "extend"){
@@ -210,7 +210,7 @@ conceptCohort <- function(cdm,
       extendOverlap(name = name)  |>
       omopgenerics::recordCohortAttrition(reason = "Add overlapping records")
 
-   # adding days might mean we no longer satisfy cohort requirements
+    # adding days might mean we no longer satisfy cohort requirements
     cli::cli_inform(c("i" = "Re-appplying cohort requirements."))
     cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name)
   }
@@ -492,86 +492,82 @@ extendOverlap  <- function(cohort,
   # Because once we add to a record this may cause a new overlap
   # will do a while loop until all overlaps are resolved
   while(hasOverlap(cohort)){
-  cli::cli_inform("Recursively adding overlapping records")
-  workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4))
-  cohort <- cohort %>%
+    cli::cli_inform("Recursively adding overlapping records")
+    workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4))
+    cohort <- cohort %>%
       dplyr::mutate(record_id = dplyr::row_number()) |>
       dplyr::compute(temporary = FALSE,
                      name = workingTblNames[1])
 
-  # keep overlapping records
-  cohort_overlap <- cohort %>%
-    dplyr::inner_join(cohort,
-                      by = c("cohort_definition_id", "subject_id"),
-                      suffix = c("", "_overlap")) |>
-    dplyr::filter(
-      record_id != record_id_overlap,
-      cohort_start_date <= cohort_end_date_overlap &
-      cohort_end_date >= cohort_start_date_overlap
-    )  |>
-    dplyr::select("cohort_definition_id", "subject_id",
-                  "cohort_start_date", "cohort_end_date",
-                  "record_id") |>
-    dplyr::distinct() |>
-    dplyr::compute(temporary = FALSE,
-                   name = workingTblNames[2])
-
-  cohort_no_overlap <- cohort |>
-    dplyr::anti_join(cohort_overlap |>
-                       dplyr::select("record_id"),
-                     by = "record_id") |>
-    dplyr::select(!"record_id")  |>
-    dplyr::compute(temporary = FALSE,
-                   name = workingTblNames[3])
-
-  cohort_overlap <- cohort_overlap %>%
-     dplyr::mutate(days = !!CDMConnector::datediff("cohort_start_date",
-                                          "cohort_end_date")) |>
-     dplyr::group_by(dplyr::pick("cohort_definition_id",
-                                 "subject_id")) |>
-     dplyr::summarise(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
-                      days  = as.integer(sum(.data$days)))  %>%
-    dplyr:: ungroup() %>%
-    dplyr::mutate(cohort_end_date = as.Date(
-      !!CDMConnector::dateadd(
-        date = "cohort_start_date",
-        number = "days",
-        interval = "day"
-      ))) |>
-    dplyr::select(!"days")  |>
-    dplyr::compute(temporary = FALSE,
-                   name = workingTblNames[4])
-
-  cohort <- dplyr::union_all(cohort_overlap,
-                                     cohort_no_overlap) |>
-    dplyr::compute(name = name, temporary = FALSE)
+    # keep overlapping records
+    cohort_overlap <- cohort %>%
+      dplyr::inner_join(cohort,
+                        by = c("cohort_definition_id", "subject_id"),
+                        suffix = c("", "_overlap")) |>
+      dplyr::filter(
+        record_id != record_id_overlap,
+        cohort_start_date <= cohort_end_date_overlap &
+          cohort_end_date >= cohort_start_date_overlap
+      )  |>
+      dplyr::select("cohort_definition_id", "subject_id",
+                    "cohort_start_date", "cohort_end_date",
+                    "record_id") |>
+      dplyr::distinct() |>
+      dplyr::compute(temporary = FALSE,
+                     name = workingTblNames[2])
 
-  CDMConnector::dropTable(cdm = cdm,
-                          name = workingTblNames)
+    cohort_no_overlap <- cohort |>
+      dplyr::anti_join(cohort_overlap |>
+                         dplyr::select("record_id"),
+                       by = "record_id") |>
+      dplyr::select(!"record_id")  |>
+      dplyr::compute(temporary = FALSE,
+                     name = workingTblNames[3])
+
+    cohort_overlap <- cohort_overlap %>%
+      dplyr::mutate(days = !!CDMConnector::datediff("cohort_start_date",
+                                                    "cohort_end_date")) |>
+      dplyr::group_by(dplyr::pick("cohort_definition_id",
+                                  "subject_id")) |>
+      dplyr::summarise(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
+                       days  = as.integer(sum(.data$days, na.rm = TRUE)))  %>%
+      dplyr:: ungroup() %>%
+      dplyr::mutate(cohort_end_date = as.Date(
+        !!CDMConnector::dateadd(
+          date = "cohort_start_date",
+          number = "days",
+          interval = "day"
+        ))) |>
+      dplyr::select(!"days")  |>
+      dplyr::compute(temporary = FALSE,
+                     name = workingTblNames[4])
+
+    cohort <- dplyr::union_all(cohort_overlap, cohort_no_overlap) |>
+      dplyr::compute(name = name, temporary = FALSE)
 
+    CDMConnector::dropTable(cdm = cdm, name = workingTblNames)
   }
 
   cohort
-
 }
 
 hasOverlap <- function(cohort){
-    overlaps <- cohort |>
-      dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
-      dplyr::arrange(.data$cohort_start_date) |>
-      dplyr::mutate(
-        "next_cohort_start_date" = dplyr::lead(.data$cohort_start_date)
-      ) |>
-      dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |>
-      dplyr::ungroup() |>
-      dplyr::tally() |>
-      dplyr::collect()
+  overlaps <- cohort |>
+    dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
+    dplyr::arrange(.data$cohort_start_date) |>
+    dplyr::mutate(
+      "next_cohort_start_date" = dplyr::lead(.data$cohort_start_date)
+    ) |>
+    dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |>
+    dplyr::ungroup() |>
+    dplyr::tally() |>
+    dplyr::collect()
 
-    if (overlaps$n > 0) {
-      cli::cli_inform(" - {overlaps$n} overlapping record{?s} found")
+  if (overlaps$n > 0) {
+    cli::cli_inform(" - {overlaps$n} overlapping record{?s} found")
     return(TRUE)
-      } else {
+  } else {
     return(FALSE)
-      }
-
   }
+
+}
diff --git a/R/exitAtColumnDate.R b/R/exitAtColumnDate.R
@@ -216,7 +216,7 @@ exitAtColumnDate <- function(cohort,
   newCohort <- newCohort |>
     dplyr::relocate(dplyr::all_of(omopgenerics::cohortColumns("cohort"))) |>
     dplyr::compute(name = name, temporary = FALSE) |>
-    omopgenerics::newCohortTable(.softValidation = TRUE)
+    omopgenerics::newCohortTable(.softValidation = FALSE)
 
   cdm <- omopgenerics::dropTable(cdm, name = dplyr::starts_with(tmpPrefix))
 

diff --git a/R/exitAtDate.R b/R/exitAtDate.R
@@ -105,7 +105,7 @@ exitAtObservationEnd <- function(cohort,
 
   newCohort <- newCohort |>
     dplyr::compute(name = name, temporary = FALSE) |>
-    omopgenerics::newCohortTable(.softValidation = TRUE) |>
+    omopgenerics::newCohortTable(.softValidation = FALSE) |>
     omopgenerics::recordCohortAttrition(reason = reason, cohortId = cohortId)
 
   omopgenerics::dropTable(cdm = cdm, name = tmpTable)
@@ -186,7 +186,7 @@ exitAtDeath <- function(cohort,
     # no overlapping periods
     joinOverlap(name = name) |>
     dplyr::compute(name = name, temporary = FALSE) |>
-    omopgenerics::newCohortTable(.softValidation = TRUE) |>
+    omopgenerics::newCohortTable(.softValidation = FALSE) |>
     omopgenerics::recordCohortAttrition(reason = "Exit at death", cohortId = cohortId)
 
   useIndexes <- getOption("CohortConstructor.use_indexes")

diff --git a/R/intersectCohorts.R b/R/intersectCohorts.R
@@ -342,10 +342,10 @@ joinOverlap <- function(cohort,
   cdm <- omopgenerics::cdmReference(cohort)
 
   start <- cohort |>
-    dplyr::select(by, "date" := !!startDate) |>
+    dplyr::select(dplyr::all_of(by), "date" := !!startDate) |>
     dplyr::mutate("date_id" = -1)
   end <- cohort |>
-    dplyr::select(by, "date" := !!endDate) |>
+    dplyr::select(dplyr::all_of(by), "date" := !!endDate) |>
     dplyr::mutate("date_id" = 1)
   if (gap > 0) {
     end <- end |>
@@ -357,7 +357,7 @@ joinOverlap <- function(cohort,
     dplyr::compute(temporary = FALSE, name = workingTbl)
 
   x <- x |>
-    dplyr::group_by(dplyr::pick(by)) |>
+    dplyr::group_by(dplyr::pick(dplyr::all_of(by))) |>
     dplyr::arrange(.data$date, .data$date_id) |>
     dplyr::mutate(
       "cum_id" = cumsum(.data$date_id),
@@ -410,7 +410,7 @@ joinAll <- function(cohort,
   }
 
   x <- cohort |>
-    dplyr::group_by(dplyr::across(by)) |>
+    dplyr::group_by(dplyr::across(dplyr::all_of(by))) |>
     dplyr::summarise(
       cohort_start_date =
         min(.data$cohort_start_date, na.rm = TRUE),

diff --git a/R/matchCohorts.R b/R/matchCohorts.R
@@ -164,7 +164,7 @@ matchCohorts <- function(cohort,
           "match_status" = "target"
         )
       ,
-      .softValidation = TRUE
+      .softValidation = FALSE
     )
 
   # Bind both cohorts

diff --git a/R/padCohortDate.R b/R/padCohortDate.R
@@ -213,6 +213,7 @@ padCohortStart <- function(cohort,
     ) |>
     dplyr::union_all(subCohort) |>
     dplyr::compute(name = name, temporary = FALSE) |>
+    omopgenerics::newCohortTable(.softValidation = FALSE) |>
     omopgenerics::recordCohortAttrition(cohortId = cohortId, reason = reason)
 
   # drop temp table

diff --git a/R/requireCohortIntersect.R b/R/requireCohortIntersect.R
@@ -105,7 +105,7 @@ requireCohortIntersect <- function(cohort,
     ) |
       (!.data$cohort_definition_id %in% .env$cohortId)
     ) |>
-    dplyr::select(cols) |>
+    dplyr::select(dplyr::all_of(cols)) |>
     dplyr::compute(name = subsetName, temporary = FALSE)
 
   # attrition reason

diff --git a/R/requireConceptIntersect.R b/R/requireConceptIntersect.R
@@ -99,7 +99,7 @@ requireConceptIntersect <- function(cohort,
       ) |
         (!.data$cohort_definition_id %in% .env$cohortId)
       ) |>
-      dplyr::select(cols) |>
+      dplyr::select(dplyr::all_of(cols)) |>
       dplyr::compute(name = subsetName, temporary = FALSE)
 
     # attrition reason

diff --git a/R/requireDateRange.R b/R/requireDateRange.R
@@ -149,7 +149,7 @@ trimToDateRange <- function(cohort,
 
   cohort <- cohort |>
     dplyr::compute(name = name, temporary = FALSE) |>
-    omopgenerics::newCohortTable(.softValidation = TRUE)
+    omopgenerics::newCohortTable(.softValidation = FALSE)
 
   useIndexes <- getOption("CohortConstructor.use_indexes")
   if (!isFALSE(useIndexes)) {

diff --git a/R/requireTableIntersect.R b/R/requireTableIntersect.R
@@ -90,7 +90,7 @@ requireTableIntersect <- function(cohort,
     ) |
       (!.data$cohort_definition_id %in% .env$cohortId)
     ) |>
-    dplyr::select(cols) |>
+    dplyr::select(dplyr::all_of(cols)) |>
     dplyr::compute(name = subsetName, temporary = FALSE)
 
   # attrition reason

diff --git a/R/trimDemographics.R b/R/trimDemographics.R
@@ -320,7 +320,7 @@ trimDemographics <- function(cohort,
       cohortSetRef = newSet,
       cohortAttritionRef = attrition(newCohort),
       cohortCodelistRef = newCod,
-      .softValidation = TRUE
+      .softValidation = FALSE
     )
 
   omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix))

diff --git a/R/unionCohorts.R b/R/unionCohorts.R
@@ -80,7 +80,7 @@ unionCohorts <- function(cohort,
       cohortSetRef = cohSet,
       cohortAttritionRef = NULL,
       cohortCodelistRef = cohCodelist,
-      .softValidation = TRUE
+      .softValidation = FALSE
     )
 
   if (isFALSE(keepOriginalCohorts)) {

diff --git a/R/yearCohorts.R b/R/yearCohorts.R
@@ -178,7 +178,7 @@ yearCohorts <- function(cohort,
       cohortSetRef = newSet,
       cohortAttritionRef = newAttrition |> dplyr::bind_rows(),
       cohortCodelistRef = newCodelist,
-      .softValidation = TRUE
+      .softValidation = FALSE
     )
 
   omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix))

diff --git a/tests/testthat/test-addIndex.R b/tests/testthat/test-addIndex.R
@@ -1,12 +1,11 @@
-
 test_that("local tibble and duckdb test - will do nothing for these", {
   skip_on_cran()
   cdm <- omock::mockCdmReference() |>
     omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble(
       "cohort_definition_id" = 1,
       "subject_id" = c(1, 2, 3),
       "cohort_start_date" = as.Date("2020-01-01"),
-      "cohort_end_date" = as.Date("2029-12-31")
+      "cohort_end_date" = as.Date("2024-01-01")
     )))
   expect_no_error(cdm$cohort <- cdm$cohort |>
     addCohortTableIndex())

diff --git a/tests/testthat/test-collapseCohorts.R b/tests/testthat/test-collapseCohorts.R
@@ -5,7 +5,7 @@ test_that("simple example", {
       "cohort_definition_id" = 1L,
       "subject_id" = c(1L, 2L, 3L),
       "cohort_start_date" = as.Date("2020-01-01"),
-      "cohort_end_date" = as.Date("2029-12-31")
+      "cohort_end_date" = as.Date("2024-01-01")
     )))
   cdm <- omopgenerics::insertTable(
     cdm = cdm, name = "concept", table = dplyr::tibble(
@@ -70,7 +70,7 @@ test_that("simple example", {
   expect_no_error(newCohort <- cohort |> collapseCohorts(gap = 1, name = "my_cohort"))
   expect_identical(settings(newCohort), settings(cohort))
   expect_identical(cohortCount(newCohort), dplyr::tibble(
-    "cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
+    "cohort_definition_id" = 1L, "number_records" = 2L, "number_subjects" = 2L
   ))
   # expect_identical(
   #   attrition(newCohort),
@@ -112,7 +112,7 @@ test_that("out of observation", {
       "cohort_definition_id" = 1L,
       "subject_id" = c(1L, 2L, 3L),
       "cohort_start_date" = as.Date("2020-01-01"),
-      "cohort_end_date" = as.Date("2029-12-31")
+      "cohort_end_date" = as.Date("2024-01-01")
     )))
   cdm <- omopgenerics::insertTable(
     cdm = cdm, name = "concept", table = dplyr::tibble(
@@ -169,7 +169,7 @@ test_that("out of observation", {
   expect_no_error(newCohort <- cohort |> collapseCohorts(gap = 1, name = "my_cohort"))
   expect_identical(settings(newCohort), settings(cohort))
   expect_identical(cohortCount(newCohort), dplyr::tibble(
-    "cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
+    "cohort_definition_id" = 1L, "number_records" = 2L, "number_subjects" = 2L
   ))
   # expect_identical(
   #   attrition(newCohort),