From cc50a7ec40ca85950859b986d3878d4f06aaa77f Mon Sep 17 00:00:00 2001 From: edward-burn <9583964+edward-burn@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:33:12 +0000 Subject: [PATCH] recursively add days closes #402 - adding days can lead to new overlaps --- R/conceptCohort.R | 39 ++++++++++++++++++++++++----- tests/testthat/test-conceptCohort.R | 19 ++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/R/conceptCohort.R b/R/conceptCohort.R index a3037e0..228bc5e 100644 --- a/R/conceptCohort.R +++ b/R/conceptCohort.R @@ -484,14 +484,18 @@ getDomainCohort <- function(cdm, extendOverlap <- function(cohort, name){ - workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4)) cdm <- omopgenerics::cdmReference(cohort) + # Because once we add to a record this may cause a new overlap + # will do a while loop until all overlaps are resolved + while(hasOverlap(cohort)){ + cli::cli_inform("Recursively adding overlapping records") + workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4)) cohort <- cohort %>% - dplyr::mutate(record_id = dplyr::row_number()) |> - dplyr::compute(temporary = FALSE, - name = workingTblNames[1]) + dplyr::mutate(record_id = dplyr::row_number()) |> + dplyr::compute(temporary = FALSE, + name = workingTblNames[1]) # keep overlapping records cohort_overlap <- cohort %>% @@ -536,13 +540,36 @@ extendOverlap <- function(cohort, dplyr::compute(temporary = FALSE, name = workingTblNames[4]) - cohort_updated <- dplyr::union_all(cohort_overlap, + cohort <- dplyr::union_all(cohort_overlap, cohort_no_overlap) |> dplyr::compute(name = name, temporary = FALSE) CDMConnector::dropTable(cdm = cdm, name = workingTblNames) - cohort_updated + } + + cohort } + +hasOverlap <- function(cohort){ + overlaps <- cohort |> + dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |> + dplyr::arrange(.data$cohort_start_date) |> + dplyr::mutate( + "next_cohort_start_date" = dplyr::lead(.data$cohort_start_date) + ) |> + dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |> + dplyr::ungroup() |> + dplyr::tally() |> + dplyr::collect() + + if (overlaps$n > 0) { + cli::cli_inform(" - {overlaps$n} overlapping record{?s} found") + return(TRUE) + } else { + return(FALSE) + } + + } diff --git a/tests/testthat/test-conceptCohort.R b/tests/testthat/test-conceptCohort.R index fd618db..2b59d5d 100644 --- a/tests/testthat/test-conceptCohort.R +++ b/tests/testthat/test-conceptCohort.R @@ -898,6 +898,25 @@ test_that("overlap option", { overlap = "another")) + + # When extending, overlapping with the following record + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "drug_exposure", table = dplyr::tibble( + "drug_exposure_id" = c(1L, 2L, 3L), + "person_id" = c(1L, 1L, 1L), + "drug_concept_id" = c(1L, 1L, 1L), + "drug_exposure_start_date" = as.Date(c("2020-01-01", "2020-01-05", "2020-01-13")), + "drug_exposure_end_date" = as.Date(c("2020-01-08", "2020-01-11", "2020-01-15")), + "drug_type_concept_id" = 1L + )) + expect_no_error(conceptCohort(cdm = cdm, + conceptSet = list(a = 1L), + name = "cohort_6", + exit = "event_end_date", + overlap = "extend")) + + + PatientProfiles::mockDisconnect(cdm) })