Skip to content

Commit

Permalink
Merge branch 'main' into inObservation-param
Browse files Browse the repository at this point in the history
edward-burn authored Dec 20, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
2 parents 0f455ab + 21088e6 commit eb80694
Showing 27 changed files with 156 additions and 236 deletions.
1 change: 0 additions & 1 deletion CohortConstructor.Rproj
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Version: 1.0
ProjectId: 00fa364f-f8e3-4b28-a7dd-02de51c09f48

RestoreWorkspace: No
SaveWorkspace: No
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -71,3 +71,5 @@ Depends:
R (>= 4.1)
URL: https://ohdsi.github.io/CohortConstructor/, https://github.com/OHDSI/CohortConstructor
LazyData: true
Remotes:
ohdsi/omock
2 changes: 1 addition & 1 deletion R/collapseCohorts.R
Original file line number Diff line number Diff line change
@@ -73,7 +73,7 @@ collapseCohorts <- function(cohort,
dplyr::compute(name = name, temporary = FALSE)
}
newCohort <- newCohort |>
omopgenerics::newCohortTable(.softValidation = TRUE) |>
omopgenerics::newCohortTable(.softValidation = FALSE) |>
omopgenerics::recordCohortAttrition(
reason = "Collapse cohort with a gap of {gap} days.",
cohortId = cohortId)
140 changes: 68 additions & 72 deletions R/conceptCohort.R
Original file line number Diff line number Diff line change
@@ -198,10 +198,10 @@ conceptCohort <- function(cdm,
cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name)

if(overlap == "merge"){
cli::cli_inform(c("i" = "Merging overlapping records."))
cdm[[name]] <- cdm[[name]] |>
joinOverlap(name = name, gap = 0) |>
omopgenerics::recordCohortAttrition(reason = "Merge overlapping records")
cli::cli_inform(c("i" = "Merging overlapping records."))
cdm[[name]] <- cdm[[name]] |>
joinOverlap(name = name, gap = 0) |>
omopgenerics::recordCohortAttrition(reason = "Merge overlapping records")
}

if(overlap == "extend"){
@@ -210,7 +210,7 @@ conceptCohort <- function(cdm,
extendOverlap(name = name) |>
omopgenerics::recordCohortAttrition(reason = "Add overlapping records")

# adding days might mean we no longer satisfy cohort requirements
# adding days might mean we no longer satisfy cohort requirements
cli::cli_inform(c("i" = "Re-appplying cohort requirements."))
cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name)
}
@@ -492,86 +492,82 @@ extendOverlap <- function(cohort,
# Because once we add to a record this may cause a new overlap
# will do a while loop until all overlaps are resolved
while(hasOverlap(cohort)){
cli::cli_inform("Recursively adding overlapping records")
workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4))
cohort <- cohort %>%
cli::cli_inform("Recursively adding overlapping records")
workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4))
cohort <- cohort %>%
dplyr::mutate(record_id = dplyr::row_number()) |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[1])

# keep overlapping records
cohort_overlap <- cohort %>%
dplyr::inner_join(cohort,
by = c("cohort_definition_id", "subject_id"),
suffix = c("", "_overlap")) |>
dplyr::filter(
record_id != record_id_overlap,
cohort_start_date <= cohort_end_date_overlap &
cohort_end_date >= cohort_start_date_overlap
) |>
dplyr::select("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
"record_id") |>
dplyr::distinct() |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[2])

cohort_no_overlap <- cohort |>
dplyr::anti_join(cohort_overlap |>
dplyr::select("record_id"),
by = "record_id") |>
dplyr::select(!"record_id") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[3])

cohort_overlap <- cohort_overlap %>%
dplyr::mutate(days = !!CDMConnector::datediff("cohort_start_date",
"cohort_end_date")) |>
dplyr::group_by(dplyr::pick("cohort_definition_id",
"subject_id")) |>
dplyr::summarise(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
days = as.integer(sum(.data$days))) %>%
dplyr:: ungroup() %>%
dplyr::mutate(cohort_end_date = as.Date(
!!CDMConnector::dateadd(
date = "cohort_start_date",
number = "days",
interval = "day"
))) |>
dplyr::select(!"days") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[4])

cohort <- dplyr::union_all(cohort_overlap,
cohort_no_overlap) |>
dplyr::compute(name = name, temporary = FALSE)
# keep overlapping records
cohort_overlap <- cohort %>%
dplyr::inner_join(cohort,
by = c("cohort_definition_id", "subject_id"),
suffix = c("", "_overlap")) |>
dplyr::filter(
record_id != record_id_overlap,
cohort_start_date <= cohort_end_date_overlap &
cohort_end_date >= cohort_start_date_overlap
) |>
dplyr::select("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
"record_id") |>
dplyr::distinct() |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[2])

CDMConnector::dropTable(cdm = cdm,
name = workingTblNames)
cohort_no_overlap <- cohort |>
dplyr::anti_join(cohort_overlap |>
dplyr::select("record_id"),
by = "record_id") |>
dplyr::select(!"record_id") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[3])

cohort_overlap <- cohort_overlap %>%
dplyr::mutate(days = !!CDMConnector::datediff("cohort_start_date",
"cohort_end_date")) |>
dplyr::group_by(dplyr::pick("cohort_definition_id",
"subject_id")) |>
dplyr::summarise(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
days = as.integer(sum(.data$days, na.rm = TRUE))) %>%
dplyr:: ungroup() %>%
dplyr::mutate(cohort_end_date = as.Date(
!!CDMConnector::dateadd(
date = "cohort_start_date",
number = "days",
interval = "day"
))) |>
dplyr::select(!"days") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[4])

cohort <- dplyr::union_all(cohort_overlap, cohort_no_overlap) |>
dplyr::compute(name = name, temporary = FALSE)

CDMConnector::dropTable(cdm = cdm, name = workingTblNames)
}

cohort

}

hasOverlap <- function(cohort){
overlaps <- cohort |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$cohort_start_date) |>
dplyr::mutate(
"next_cohort_start_date" = dplyr::lead(.data$cohort_start_date)
) |>
dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |>
dplyr::ungroup() |>
dplyr::tally() |>
dplyr::collect()
overlaps <- cohort |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$cohort_start_date) |>
dplyr::mutate(
"next_cohort_start_date" = dplyr::lead(.data$cohort_start_date)
) |>
dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |>
dplyr::ungroup() |>
dplyr::tally() |>
dplyr::collect()

if (overlaps$n > 0) {
cli::cli_inform(" - {overlaps$n} overlapping record{?s} found")
if (overlaps$n > 0) {
cli::cli_inform(" - {overlaps$n} overlapping record{?s} found")
return(TRUE)
} else {
} else {
return(FALSE)
}

}

}
2 changes: 1 addition & 1 deletion R/exitAtColumnDate.R
Original file line number Diff line number Diff line change
@@ -216,7 +216,7 @@ exitAtColumnDate <- function(cohort,
newCohort <- newCohort |>
dplyr::relocate(dplyr::all_of(omopgenerics::cohortColumns("cohort"))) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(.softValidation = TRUE)
omopgenerics::newCohortTable(.softValidation = FALSE)

cdm <- omopgenerics::dropTable(cdm, name = dplyr::starts_with(tmpPrefix))

4 changes: 2 additions & 2 deletions R/exitAtDate.R
Original file line number Diff line number Diff line change
@@ -105,7 +105,7 @@ exitAtObservationEnd <- function(cohort,

newCohort <- newCohort |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(.softValidation = TRUE) |>
omopgenerics::newCohortTable(.softValidation = FALSE) |>
omopgenerics::recordCohortAttrition(reason = reason, cohortId = cohortId)

omopgenerics::dropTable(cdm = cdm, name = tmpTable)
@@ -186,7 +186,7 @@ exitAtDeath <- function(cohort,
# no overlapping periods
joinOverlap(name = name) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(.softValidation = TRUE) |>
omopgenerics::newCohortTable(.softValidation = FALSE) |>
omopgenerics::recordCohortAttrition(reason = "Exit at death", cohortId = cohortId)

useIndexes <- getOption("CohortConstructor.use_indexes")
8 changes: 4 additions & 4 deletions R/intersectCohorts.R
Original file line number Diff line number Diff line change
@@ -342,10 +342,10 @@ joinOverlap <- function(cohort,
cdm <- omopgenerics::cdmReference(cohort)

start <- cohort |>
dplyr::select(by, "date" := !!startDate) |>
dplyr::select(dplyr::all_of(by), "date" := !!startDate) |>
dplyr::mutate("date_id" = -1)
end <- cohort |>
dplyr::select(by, "date" := !!endDate) |>
dplyr::select(dplyr::all_of(by), "date" := !!endDate) |>
dplyr::mutate("date_id" = 1)
if (gap > 0) {
end <- end |>
@@ -357,7 +357,7 @@ joinOverlap <- function(cohort,
dplyr::compute(temporary = FALSE, name = workingTbl)

x <- x |>
dplyr::group_by(dplyr::pick(by)) |>
dplyr::group_by(dplyr::pick(dplyr::all_of(by))) |>
dplyr::arrange(.data$date, .data$date_id) |>
dplyr::mutate(
"cum_id" = cumsum(.data$date_id),
@@ -410,7 +410,7 @@ joinAll <- function(cohort,
}

x <- cohort |>
dplyr::group_by(dplyr::across(by)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(by))) |>
dplyr::summarise(
cohort_start_date =
min(.data$cohort_start_date, na.rm = TRUE),
2 changes: 1 addition & 1 deletion R/matchCohorts.R
Original file line number Diff line number Diff line change
@@ -164,7 +164,7 @@ matchCohorts <- function(cohort,
"match_status" = "target"
)
,
.softValidation = TRUE
.softValidation = FALSE
)

# Bind both cohorts
1 change: 1 addition & 0 deletions R/padCohortDate.R
Original file line number Diff line number Diff line change
@@ -213,6 +213,7 @@ padCohortStart <- function(cohort,
) |>
dplyr::union_all(subCohort) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(.softValidation = FALSE) |>
omopgenerics::recordCohortAttrition(cohortId = cohortId, reason = reason)

# drop temp table
2 changes: 1 addition & 1 deletion R/requireCohortIntersect.R
Original file line number Diff line number Diff line change
@@ -105,7 +105,7 @@ requireCohortIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
2 changes: 1 addition & 1 deletion R/requireConceptIntersect.R
Original file line number Diff line number Diff line change
@@ -99,7 +99,7 @@ requireConceptIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
2 changes: 1 addition & 1 deletion R/requireDateRange.R
Original file line number Diff line number Diff line change
@@ -149,7 +149,7 @@ trimToDateRange <- function(cohort,

cohort <- cohort |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::newCohortTable(.softValidation = TRUE)
omopgenerics::newCohortTable(.softValidation = FALSE)

useIndexes <- getOption("CohortConstructor.use_indexes")
if (!isFALSE(useIndexes)) {
2 changes: 1 addition & 1 deletion R/requireTableIntersect.R
Original file line number Diff line number Diff line change
@@ -90,7 +90,7 @@ requireTableIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
2 changes: 1 addition & 1 deletion R/trimDemographics.R
Original file line number Diff line number Diff line change
@@ -320,7 +320,7 @@ trimDemographics <- function(cohort,
cohortSetRef = newSet,
cohortAttritionRef = attrition(newCohort),
cohortCodelistRef = newCod,
.softValidation = TRUE
.softValidation = FALSE
)

omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix))
2 changes: 1 addition & 1 deletion R/unionCohorts.R
Original file line number Diff line number Diff line change
@@ -80,7 +80,7 @@ unionCohorts <- function(cohort,
cohortSetRef = cohSet,
cohortAttritionRef = NULL,
cohortCodelistRef = cohCodelist,
.softValidation = TRUE
.softValidation = FALSE
)

if (isFALSE(keepOriginalCohorts)) {
2 changes: 1 addition & 1 deletion R/yearCohorts.R
Original file line number Diff line number Diff line change
@@ -178,7 +178,7 @@ yearCohorts <- function(cohort,
cohortSetRef = newSet,
cohortAttritionRef = newAttrition |> dplyr::bind_rows(),
cohortCodelistRef = newCodelist,
.softValidation = TRUE
.softValidation = FALSE
)

omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix))
3 changes: 1 addition & 2 deletions tests/testthat/test-addIndex.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@

test_that("local tibble and duckdb test - will do nothing for these", {
skip_on_cran()
cdm <- omock::mockCdmReference() |>
omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble(
"cohort_definition_id" = 1,
"subject_id" = c(1, 2, 3),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
expect_no_error(cdm$cohort <- cdm$cohort |>
addCohortTableIndex())
8 changes: 4 additions & 4 deletions tests/testthat/test-collapseCohorts.R
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@ test_that("simple example", {
"cohort_definition_id" = 1L,
"subject_id" = c(1L, 2L, 3L),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
@@ -70,7 +70,7 @@ test_that("simple example", {
expect_no_error(newCohort <- cohort |> collapseCohorts(gap = 1, name = "my_cohort"))
expect_identical(settings(newCohort), settings(cohort))
expect_identical(cohortCount(newCohort), dplyr::tibble(
"cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
"cohort_definition_id" = 1L, "number_records" = 2L, "number_subjects" = 2L
))
# expect_identical(
# attrition(newCohort),
@@ -112,7 +112,7 @@ test_that("out of observation", {
"cohort_definition_id" = 1L,
"subject_id" = c(1L, 2L, 3L),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
@@ -169,7 +169,7 @@ test_that("out of observation", {
expect_no_error(newCohort <- cohort |> collapseCohorts(gap = 1, name = "my_cohort"))
expect_identical(settings(newCohort), settings(cohort))
expect_identical(cohortCount(newCohort), dplyr::tibble(
"cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
"cohort_definition_id" = 1L, "number_records" = 2L, "number_subjects" = 2L
))
# expect_identical(
# attrition(newCohort),
Loading

0 comments on commit eb80694

Please sign in to comment.