Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mock data in tests to work with omock development version #413

Merged
merged 3 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CohortConstructor.Rproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Version: 1.0
ProjectId: 27e959ec-87b2-4ff1-ac1d-3ea886296a0d

RestoreWorkspace: No
SaveWorkspace: No
Expand Down
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,5 @@ Depends:
R (>= 4.1)
URL: https://ohdsi.github.io/CohortConstructor/, https://github.com/OHDSI/CohortConstructor
LazyData: true
Remotes:
ohdsi/omock
140 changes: 68 additions & 72 deletions R/conceptCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,10 @@ conceptCohort <- function(cdm,
cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name)

if(overlap == "merge"){
cli::cli_inform(c("i" = "Merging overlapping records."))
cdm[[name]] <- cdm[[name]] |>
joinOverlap(name = name, gap = 0) |>
omopgenerics::recordCohortAttrition(reason = "Merge overlapping records")
cli::cli_inform(c("i" = "Merging overlapping records."))
cdm[[name]] <- cdm[[name]] |>
joinOverlap(name = name, gap = 0) |>
omopgenerics::recordCohortAttrition(reason = "Merge overlapping records")
}

if(overlap == "extend"){
Expand All @@ -210,7 +210,7 @@ conceptCohort <- function(cdm,
extendOverlap(name = name) |>
omopgenerics::recordCohortAttrition(reason = "Add overlapping records")

# adding days might mean we no longer satisfy cohort requirements
# adding days might mean we no longer satisfy cohort requirements
cli::cli_inform(c("i" = "Re-appplying cohort requirements."))
cdm[[name]] <- fulfillCohortReqs(cdm = cdm, name = name)
}
Expand Down Expand Up @@ -492,86 +492,82 @@ extendOverlap <- function(cohort,
# Because once we add to a record this may cause a new overlap
# will do a while loop until all overlaps are resolved
while(hasOverlap(cohort)){
cli::cli_inform("Recursively adding overlapping records")
workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4))
cohort <- cohort %>%
cli::cli_inform("Recursively adding overlapping records")
workingTblNames <- paste0(omopgenerics::uniqueTableName(), "_", c(1:4))
cohort <- cohort %>%
dplyr::mutate(record_id = dplyr::row_number()) |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[1])

# keep overlapping records
cohort_overlap <- cohort %>%
dplyr::inner_join(cohort,
by = c("cohort_definition_id", "subject_id"),
suffix = c("", "_overlap")) |>
dplyr::filter(
record_id != record_id_overlap,
cohort_start_date <= cohort_end_date_overlap &
cohort_end_date >= cohort_start_date_overlap
) |>
dplyr::select("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
"record_id") |>
dplyr::distinct() |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[2])

cohort_no_overlap <- cohort |>
dplyr::anti_join(cohort_overlap |>
dplyr::select("record_id"),
by = "record_id") |>
dplyr::select(!"record_id") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[3])

cohort_overlap <- cohort_overlap %>%
dplyr::mutate(days = !!CDMConnector::datediff("cohort_start_date",
"cohort_end_date")) |>
dplyr::group_by(dplyr::pick("cohort_definition_id",
"subject_id")) |>
dplyr::summarise(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
days = as.integer(sum(.data$days))) %>%
dplyr:: ungroup() %>%
dplyr::mutate(cohort_end_date = as.Date(
!!CDMConnector::dateadd(
date = "cohort_start_date",
number = "days",
interval = "day"
))) |>
dplyr::select(!"days") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[4])

cohort <- dplyr::union_all(cohort_overlap,
cohort_no_overlap) |>
dplyr::compute(name = name, temporary = FALSE)
# keep overlapping records
cohort_overlap <- cohort %>%
dplyr::inner_join(cohort,
by = c("cohort_definition_id", "subject_id"),
suffix = c("", "_overlap")) |>
dplyr::filter(
record_id != record_id_overlap,
cohort_start_date <= cohort_end_date_overlap &
cohort_end_date >= cohort_start_date_overlap
) |>
dplyr::select("cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date",
"record_id") |>
dplyr::distinct() |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[2])

CDMConnector::dropTable(cdm = cdm,
name = workingTblNames)
cohort_no_overlap <- cohort |>
dplyr::anti_join(cohort_overlap |>
dplyr::select("record_id"),
by = "record_id") |>
dplyr::select(!"record_id") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[3])

cohort_overlap <- cohort_overlap %>%
dplyr::mutate(days = !!CDMConnector::datediff("cohort_start_date",
"cohort_end_date")) |>
dplyr::group_by(dplyr::pick("cohort_definition_id",
"subject_id")) |>
dplyr::summarise(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
days = as.integer(sum(.data$days, na.rm = TRUE))) %>%
dplyr:: ungroup() %>%
dplyr::mutate(cohort_end_date = as.Date(
!!CDMConnector::dateadd(
date = "cohort_start_date",
number = "days",
interval = "day"
))) |>
dplyr::select(!"days") |>
dplyr::compute(temporary = FALSE,
name = workingTblNames[4])

cohort <- dplyr::union_all(cohort_overlap, cohort_no_overlap) |>
dplyr::compute(name = name, temporary = FALSE)

CDMConnector::dropTable(cdm = cdm, name = workingTblNames)
}

cohort

}

hasOverlap <- function(cohort){
overlaps <- cohort |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$cohort_start_date) |>
dplyr::mutate(
"next_cohort_start_date" = dplyr::lead(.data$cohort_start_date)
) |>
dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |>
dplyr::ungroup() |>
dplyr::tally() |>
dplyr::collect()
overlaps <- cohort |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$cohort_start_date) |>
dplyr::mutate(
"next_cohort_start_date" = dplyr::lead(.data$cohort_start_date)
) |>
dplyr::filter(.data$cohort_end_date >= .data$next_cohort_start_date) |>
dplyr::ungroup() |>
dplyr::tally() |>
dplyr::collect()

if (overlaps$n > 0) {
cli::cli_inform(" - {overlaps$n} overlapping record{?s} found")
if (overlaps$n > 0) {
cli::cli_inform(" - {overlaps$n} overlapping record{?s} found")
return(TRUE)
} else {
} else {
return(FALSE)
}

}

}
8 changes: 4 additions & 4 deletions R/intersectCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,10 @@ joinOverlap <- function(cohort,
cdm <- omopgenerics::cdmReference(cohort)

start <- cohort |>
dplyr::select(by, "date" := !!startDate) |>
dplyr::select(dplyr::all_of(by), "date" := !!startDate) |>
dplyr::mutate("date_id" = -1)
end <- cohort |>
dplyr::select(by, "date" := !!endDate) |>
dplyr::select(dplyr::all_of(by), "date" := !!endDate) |>
dplyr::mutate("date_id" = 1)
if (gap > 0) {
end <- end |>
Expand All @@ -357,7 +357,7 @@ joinOverlap <- function(cohort,
dplyr::compute(temporary = FALSE, name = workingTbl)

x <- x |>
dplyr::group_by(dplyr::pick(by)) |>
dplyr::group_by(dplyr::pick(dplyr::all_of(by))) |>
dplyr::arrange(.data$date, .data$date_id) |>
dplyr::mutate(
"cum_id" = cumsum(.data$date_id),
Expand Down Expand Up @@ -410,7 +410,7 @@ joinAll <- function(cohort,
}

x <- cohort |>
dplyr::group_by(dplyr::across(by)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(by))) |>
dplyr::summarise(
cohort_start_date =
min(.data$cohort_start_date, na.rm = TRUE),
Expand Down
2 changes: 1 addition & 1 deletion R/requireCohortIntersect.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ requireCohortIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
Expand Down
2 changes: 1 addition & 1 deletion R/requireConceptIntersect.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ requireConceptIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
Expand Down
2 changes: 1 addition & 1 deletion R/requireTableIntersect.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ requireTableIntersect <- function(cohort,
) |
(!.data$cohort_definition_id %in% .env$cohortId)
) |>
dplyr::select(cols) |>
dplyr::select(dplyr::all_of(cols)) |>
dplyr::compute(name = subsetName, temporary = FALSE)

# attrition reason
Expand Down
3 changes: 1 addition & 2 deletions tests/testthat/test-addIndex.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@

test_that("local tibble and duckdb test - will do nothing for these", {
skip_on_cran()
cdm <- omock::mockCdmReference() |>
omock::mockCdmFromTables(tables = list("cohort" = dplyr::tibble(
"cohort_definition_id" = 1,
"subject_id" = c(1, 2, 3),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
expect_no_error(cdm$cohort <- cdm$cohort |>
addCohortTableIndex())
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-collapseCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_that("simple example", {
"cohort_definition_id" = 1L,
"subject_id" = c(1L, 2L, 3L),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
Expand Down Expand Up @@ -70,7 +70,7 @@ test_that("simple example", {
expect_no_error(newCohort <- cohort |> collapseCohorts(gap = 1, name = "my_cohort"))
expect_identical(settings(newCohort), settings(cohort))
expect_identical(cohortCount(newCohort), dplyr::tibble(
"cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
"cohort_definition_id" = 1L, "number_records" = 2L, "number_subjects" = 2L
))
# expect_identical(
# attrition(newCohort),
Expand Down Expand Up @@ -112,7 +112,7 @@ test_that("out of observation", {
"cohort_definition_id" = 1L,
"subject_id" = c(1L, 2L, 3L),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
"cohort_end_date" = as.Date("2024-01-01")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
Expand Down Expand Up @@ -169,7 +169,7 @@ test_that("out of observation", {
expect_no_error(newCohort <- cohort |> collapseCohorts(gap = 1, name = "my_cohort"))
expect_identical(settings(newCohort), settings(cohort))
expect_identical(cohortCount(newCohort), dplyr::tibble(
"cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
"cohort_definition_id" = 1L, "number_records" = 2L, "number_subjects" = 2L
))
# expect_identical(
# attrition(newCohort),
Expand Down
Loading
Loading