From cc50a7ec40ca85950859b986d3878d4f06aaa77f Mon Sep 17 00:00:00 2001 From: edward-burn <9583964+edward-burn@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:33:12 +0000 Subject: [PATCH 1/2] recursively add days closes #402 - adding days can lead to new overlaps --- R/conceptCohort.R | 39 ++++++++++++++++++++++++----- tests/testthat/test-conceptCohort.R | 19 ++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/R/conceptCohort.R b/R/conceptCohort.R index a3037e0..228bc5e 100644 --- a/R/conceptCohort.R +++ b/R/conceptCohort.R @@ -484,14 +484,18 @@ getDomainCohort <- function(cdm, extendOverlap <- function(cohort, name){ - workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4)) cdm <- omopgenerics::cdmReference(cohort) + # Because once we add to a record this may cause a new overlap + # will do a while loop until all overlaps are resolved + while(hasOverlap(cohort)){ + cli::cli_inform("Recursively adding overlapping records") + workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4)) cohort <- cohort %>% - dplyr::mutate(record_id = dplyr::row_number()) |> - dplyr::compute(temporary = FALSE, - name = workingTblNames[1]) + dplyr::mutate(record_id = dplyr::row_number()) |> + dplyr::compute(temporary = FALSE, + name = workingTblNames[1]) # keep overlapping records cohort_overlap <- cohort %>% @@ -536,13 +540,36 @@ extendOverlap <- function(cohort, dplyr::compute(temporary = FALSE, name = workingTblNames[4]) - cohort_updated <- dplyr::union_all(cohort_overlap, + cohort <- dplyr::union_all(cohort_overlap, cohort_no_overlap) |> dplyr::compute(name = name, temporary = FALSE) CDMConnector::dropTable(cdm = cdm, name = workingTblNames) - cohort_updated + } + + cohort } + +hasOverlap <- function(cohort){ + overlaps <- cohort |> + dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |> + dplyr::arrange(.data$cohort_start_date) |> + dplyr::mutate( + "next_cohort_start_date" = dplyr::lead(.data$cohort_start_date) + ) |> + dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |> + dplyr::ungroup() |> + dplyr::tally() |> + dplyr::collect() + + if (overlaps$n > 0) { + cli::cli_inform(" - {overlaps$n} overlapping record{?s} found") + return(TRUE) + } else { + return(FALSE) + } + + } diff --git a/tests/testthat/test-conceptCohort.R b/tests/testthat/test-conceptCohort.R index fd618db..2b59d5d 100644 --- a/tests/testthat/test-conceptCohort.R +++ b/tests/testthat/test-conceptCohort.R @@ -898,6 +898,25 @@ test_that("overlap option", { overlap = "another")) + + # When extending, overlapping with the following record + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "drug_exposure", table = dplyr::tibble( + "drug_exposure_id" = c(1L, 2L, 3L), + "person_id" = c(1L, 1L, 1L), + "drug_concept_id" = c(1L, 1L, 1L), + "drug_exposure_start_date" = as.Date(c("2020-01-01", "2020-01-05", "2020-01-13")), + "drug_exposure_end_date" = as.Date(c("2020-01-08", "2020-01-11", "2020-01-15")), + "drug_type_concept_id" = 1L + )) + expect_no_error(conceptCohort(cdm = cdm, + conceptSet = list(a = 1L), + name = "cohort_6", + exit = "event_end_date", + overlap = "extend")) + + + PatientProfiles::mockDisconnect(cdm) }) From f1915d0d57e6eecc41ae55e4255d9e8cb2556014 Mon Sep 17 00:00:00 2001 From: edward-burn <9583964+edward-burn@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:41:54 +0000 Subject: [PATCH 2/2] re-apply cohort requirements closes #403 --- R/conceptCohort.R | 7 +++++-- tests/testthat/test-conceptCohort.R | 21 ++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/R/conceptCohort.R b/R/conceptCohort.R index 228bc5e..feb0b2d 100644 --- a/R/conceptCohort.R +++ b/R/conceptCohort.R @@ -206,12 +206,15 @@ conceptCohort <- function(cdm, } if(overlap == "extend"){ - cli::cli_inform(c("i" = "Merging overlapping records.")) + cli::cli_inform(c("i" = "Adding overlapping records.")) cdm[[name]] <- cdm[[name]] |> extendOverlap(name = name) |> omopgenerics::recordCohortAttrition(reason = "Add overlapping records") - } + # adding days might mean we no longer satisfy cohort requirements + cli::cli_inform(c("i" = "Re-appplying cohort requirements.")) + cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name) + } cdm[[name]] <- omopgenerics::newCohortTable(table = cdm[[name]]) diff --git a/tests/testthat/test-conceptCohort.R b/tests/testthat/test-conceptCohort.R index 2b59d5d..e12227a 100644 --- a/tests/testthat/test-conceptCohort.R +++ b/tests/testthat/test-conceptCohort.R @@ -909,7 +909,7 @@ test_that("overlap option", { "drug_exposure_end_date" = as.Date(c("2020-01-08", "2020-01-11", "2020-01-15")), "drug_type_concept_id" = 1L )) - expect_no_error(conceptCohort(cdm = cdm, + expect_no_error(cdm$cohort_6 <- conceptCohort(cdm = cdm, conceptSet = list(a = 1L), name = "cohort_6", exit = "event_end_date", @@ -917,6 +917,25 @@ test_that("overlap option", { + # Check if "extend" exceeds observation_period_end_date + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "drug_exposure", table = dplyr::tibble( + "drug_exposure_id" = c(1L, 2L), + "person_id" = c(1L, 1L), + "drug_concept_id" = c(1L, 1L), + "drug_exposure_start_date" = as.Date(c("2020-01-15", "2020-01-20")), + "drug_exposure_end_date" = as.Date(c("2020-01-28", "2020-01-28")), + "drug_type_concept_id" = 1L + )) + + expect_no_error(cdm$cohort_7 <- conceptCohort(cdm = cdm, + conceptSet = list(a = 1L), + name = "cohort_7", + exit = "event_end_date", + overlap = "extend")) + + + PatientProfiles::mockDisconnect(cdm) })