From fe98cfe1b4fcf58bf23c6eab369d64b844a0ab8e Mon Sep 17 00:00:00 2001 From: schuemie Date: Tue, 1 Oct 2024 14:36:41 +0200 Subject: [PATCH] Setting up end-of-obs_dependence simulations --- R/ModelFitting.R | 2 +- R/Simulation.R | 98 +++++++++++++------ extras/EndOfObsSimulations.R | 163 ++++++++++++++++++++++++++++++++ extras/TestEndOfObsDiagnostic.R | 95 ------------------- 4 files changed, 231 insertions(+), 127 deletions(-) create mode 100644 extras/EndOfObsSimulations.R diff --git a/R/ModelFitting.R b/R/ModelFitting.R index ab5de85..330c53e 100644 --- a/R/ModelFitting.R +++ b/R/ModelFitting.R @@ -61,7 +61,7 @@ fitSccsModel <- function(sccsIntervalData, ), profileGrid = NULL, profileBounds = c(log(0.1), log(10)), - endOfObservationEffectBounds = c(log(0.75), log(1.25))) { + endOfObservationEffectBounds = c(log(0.5), log(2.0))) { errorMessages <- checkmate::makeAssertCollection() checkmate::assertClass(sccsIntervalData, "SccsIntervalData", null.ok = TRUE, add = errorMessages) checkmate::assertClass(prior, "cyclopsPrior", add = errorMessages) diff --git a/R/Simulation.R b/R/Simulation.R index 773d10d..dfe07b8 100644 --- a/R/Simulation.R +++ b/R/Simulation.R @@ -17,9 +17,9 @@ #' Create a risk window definition for simulation #' #' @param start Start of the risk window relative to exposure start. -#' @param end The end of the risk window (in days) relative to the `endAnchor`. -#' @param endAnchor The anchor point for the end of the risk window. Can be `"era start"` -#' or `"era end"`. +#' @param end The end of the risk window (in days) relative to the `endAnchor`. +#' @param endAnchor The anchor point for the end of the risk window. Can be `"era start"` +#' or `"era end"`. #' @param splitPoints Subdivision of the risk window in to smaller sub-windows. #' @param relativeRisks Either a single number representing the relative risk in the risk #' window, or when splitPoints have been defined a vector of relative @@ -78,6 +78,8 @@ createSimulationRiskWindow <- function(start = 0, #' @param eraIds The IDs for the covariates to be generated. #' @param patientUsages The fraction of patients that use the drugs. #' @param usageRate The rate of prescriptions per person that uses the drug. +#' @param usageRateSlope The change in the usage rate from one day to the next. +#' `usageRate` is the intercept at day 0 #' @param meanPrescriptionDurations The mean duration of a prescription, per drug. #' @param sdPrescriptionDurations The standard deviation of the duration of a prescription, per #' drug. @@ -107,6 +109,7 @@ createSccsSimulationSettings <- function(meanPatientTime = 4 * 365, eraIds = c(1, 2), patientUsages = c(0.2, 0.1), usageRate = c(0.01, 0.01), + usageRateSlope = c(0, 0), meanPrescriptionDurations = c(14, 30), sdPrescriptionDurations = c(7, 14), simulationRiskWindows = list( @@ -132,6 +135,7 @@ createSccsSimulationSettings <- function(meanPatientTime = 4 * 365, checkmate::assertIntegerish(eraIds, min.len = 1, add = errorMessages) checkmate::assertNumeric(patientUsages, lower = 0, min.len = 1, add = errorMessages) checkmate::assertNumeric(usageRate, lower = 0, min.len = 1, add = errorMessages) + checkmate::assertNumeric(usageRateSlope, min.len = 1, add = errorMessages) checkmate::assertNumeric(meanPrescriptionDurations, lower = 0, min.len = 1, add = errorMessages) checkmate::assertNumeric(sdPrescriptionDurations, lower = 0, min.len = 1, add = errorMessages) checkmate::assertList(simulationRiskWindows, min.len = 1, add = errorMessages) @@ -167,7 +171,7 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf # Simulate a batch of persons, and eliminate non-cases n <- 1000 - ### Generate patients ### + # Generate patients ----------------------------------------------------------------------------- observationDays <- round(rnorm(n, settings$meanPatientTime, settings$sdPatientTime)) observationDays[observationDays < 1] <- 1 observationDays[observationDays > settings$maxAge - settings$minAge] <- settings$maxAge - settings$minAge @@ -191,41 +195,73 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf noninformativeEndCensor = 0 ) - ### Generate eras ### + # Generate eras ---------------------------------------------------------------------------------- + cumulativeIntensity <- function(t, usageRate, usageRateSlope) { + usageRate * t + 0.5 * usageRateSlope * t^2 + } + + inverseCumulativeIntensity <- function(u, usageRate, usageRateSlope, days) { + if (usageRateSlope == 0) { + t <- u * days + } else { + lambdaT <- usageRate * days + 0.5 * usageRateSlope * days^2 + t <- (-usageRate + sqrt(usageRate^2 + 2 * usageRateSlope * u * lambdaT)) / usageRateSlope + } + return(t) + } + + sampleErasForPerson <- function(idx, eraId, usageRate, usageRateSlope, meanPrescriptionDurations, sdPrescriptionDurations) { + nEvents <- rpois(1, cumulativeIntensity(observationDays[idx], usageRate, usageRateSlope)) + if (nEvents == 0) { + return(tibble()) + } else { + u <- runif(nEvents) + startDay <- sapply(u, + inverseCumulativeIntensity, + usageRate = settings$usageRate[i], + usageRateSlope = settings$usageRateSlope[i], + days = observationDays[idx]) |> + round() |> + unique() |> + sort() + duration <- round(rnorm(length(startDay), meanPrescriptionDurations, sdPrescriptionDurations)) + duration[duration < 1] <- 1 + endDay <- startDay + duration + endDay[endDay > observationDays[idx]] <- observationDays[idx] + newEras <- tibble( + eraType = "rx", + caseId = idx, + eraId = eraId, + eraValue = 1, + eraStartDay = startDay, + eraEndDay = endDay + ) + return(newEras) + } + } + eras <- tibble() for (i in 1:length(settings$eraIds)) { # i <- 1 patientsOnDrug <- sample.int(nrow(cases), - settings$patientUsages[i] * nrow(cases), - replace = FALSE + settings$patientUsages[i] * nrow(cases), + replace = FALSE ) - patientsOnDrug <- patientsOnDrug[order(patientsOnDrug)] - count <- rpois(length(patientsOnDrug), observationDays[patientsOnDrug] * settings$usageRate[i]) - observationPeriodId <- rep(patientsOnDrug, count) - patientsOnDrug <- patientsOnDrug[count != 0] - startDay <- round(runif(sum(count), 0, cases$endDay[observationPeriodId])) - duration <- round(rnorm( - sum(count), - settings$meanPrescriptionDurations[i], - settings$sdPrescriptionDurations[i] - )) - duration[duration < 1] <- 1 - endDay <- startDay + duration - endDay[endDay > cases$endDay[observationPeriodId]] <- cases$endDay[observationPeriodId][endDay > - cases$endDay[observationPeriodId]] - newEras <- tibble( - eraType = "rx", - caseId = observationPeriodId, - eraId = settings$eraIds[i], - eraValue = 1, - eraStartDay = startDay, - eraEndDay = endDay - ) - eras <- rbind(eras, newEras) + patientsOnDrug <- sort(patientsOnDrug) + + newEras <- lapply(patientsOnDrug, + sampleErasForPerson, + eraId = settings$eraIds[i], + usageRate = settings$usageRate[i], + usageRateSlope = settings$usageRateSlope[i], + meanPrescriptionDurations = settings$meanPrescriptionDurations[i], + sdPrescriptionDurations = settings$sdPrescriptionDurations[i]) + newEras <- bind_rows(newEras) + eras <- bind_rows(eras, newEras) } eras <- eras[order(eras$caseId, eras$eraId), ] - ### Generate outcomes ### + # Generate outcomes ----------------------------------------------------------------------------- baselineRates <- runif(n, min = settings$minBaselineRate, max = settings$maxBaselineRate) if (settings$includeAgeEffect) { ageRrs <- exp(ageFun(settings$minAge:settings$maxAge)) diff --git a/extras/EndOfObsSimulations.R b/extras/EndOfObsSimulations.R new file mode 100644 index 0000000..0aa9e64 --- /dev/null +++ b/extras/EndOfObsSimulations.R @@ -0,0 +1,163 @@ +library(SelfControlledCaseSeries) + + + +# Define simulation scenarios ---------------------------------------------------------------------- + +scenarios <- list() +for (trueRr in c(1, 2, 4)) { + for (baseLineRate in c(0.01, 0.001, 0.0001)) { + for (usageRateSlope in c(-0.0001, 0, 0.0001)) { + for (censorType in c("Next week", "Gradual", "None")) { + for (censorStrength in if (censorType == "None") c("None") else c("Weak", "Strong")) { + rw <- createSimulationRiskWindow(start = 0, + end = 0, + endAnchor = "era end", + relativeRisks = trueRr) + settings <- createSccsSimulationSettings(minBaselineRate = baseLineRate / 10, + maxBaselineRate = baseLineRate, + eraIds = 1, + usageRate = if (usageRateSlope < 0) 0.01 - 3000 * usageRateSlope else 0.01, + usageRateSlope = usageRateSlope, + simulationRiskWindows = list(rw), + includeAgeEffect = FALSE, + includeSeasonality = FALSE, + includeCalendarTimeEffect = FALSE) + if (censorType == "Next week") { + # One change of dying in next week: + if (censorStrength == "Weak") { + censorFunction <- function(endDay, outcomeDay) { + if_else(runif(length(endDay)) < 0.05, round(pmin(endDay, outcomeDay + runif(length(endDay), 0, 7))), endDay) + } + } else { + censorFunction <- function(endDay, outcomeDay) { + if_else(runif(length(endDay)) < 0.25, round(pmin(endDay, outcomeDay + runif(length(endDay), 0, 7))), endDay) + } + } + } else if (censorType == "Gradual") { + # Added hazard of dying for rest of time: + if (censorStrength == "Weak") { + censorFunction <- function(endDay, outcomeDay) { + pmin(endDay, outcomeDay + rexp(length(endDay), 0.001)) + } + } else { + censorFunction <- function(endDay, outcomeDay) { + pmin(endDay, outcomeDay + rexp(length(endDay), 0.01)) + } + } + } else { + censorFunction <- function(endDay, outcomeDay) { + endDay + } + } + scenario <- list(settings = settings, + censorFunction = censorFunction, + trueRr = trueRr, + baselineRate = baseLineRate, + usageRateSlope = usageRateSlope, + censorType = censorType, + censorStrength = censorStrength) + scenarios[[length(scenarios) + 1]] <- scenario + + } + } + } + } +} +writeLines(sprintf("Number of simulation scenarios: %d", length(scenarios))) + +# Run simulations ---------------------------------------------------------------------------------- +folder <- "simulations" + +scenario = scenarios[[10]] +simulateOne <- function(seed, scenario) { + set.seed(seed) + sccsData <- simulateSccsData(1000, scenario$settings) + firstOutcomeEra <- sccsData$eras |> + filter(eraId == 10) |> + group_by(caseId) |> + filter(row_number(eraStartDay) == 1) |> + ungroup() |> + select(caseId, outcomeDay = eraStartDay) + + # Censor observation at outcome: + sccsData$cases <- sccsData$cases |> + inner_join(firstOutcomeEra, by = join_by(caseId)) |> + collect() |> + mutate(endDay = scenario$censorFunction(endDay, outcomeDay)) |> + select(-outcomeDay) + + + covarSettings <- createEraCovariateSettings(label = "Exposure of interest", + includeEraIds = 1, + stratifyById = FALSE, + start = 0, + end = 0, + endAnchor = "era end") + + preCovarSettings <- createEraCovariateSettings(label = "Pre-exposure", + includeEraIds = 1, + stratifyById = FALSE, + start = -30, + end = -1, + endAnchor = "era start") + + studyPop <- createStudyPopulation(sccsData = sccsData, + outcomeId = scenario$settings$outcomeId, + firstOutcomeOnly = TRUE, + naivePeriod = 0) + + sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, + sccsData = sccsData, + eraCovariateSettings = list(covarSettings, preCovarSettings)) + + model <- fitSccsModel(sccsIntervalData, profileBounds = NULL) + estimates <- model$estimates + idx1 <- which(estimates$covariateId == 1000) + idx2 <- which(estimates$covariateId == 99) + scenario$settings <- NULL + scenario$censorFunction <- NULL + + row <- tibble(logRr = estimates$logRr[idx1], + ci95Lb = exp(estimates$logLb95[idx1]), + ci95Ub = exp(estimates$logUb95[idx1]), + diagnosticEstimate = exp(estimates$logRr[idx2]), + diagnosticP = computeEventDependentObservationP(model)) + return(row) +} + +cluster <- ParallelLogger::makeCluster(10) +ParallelLogger::clusterRequire(cluster, "SelfControlledCaseSeries") + +dir.create(folder) +rows <- list() +for (i in seq_along(scenarios)) { + writeLines(sprintf("Processing scenario %d of %d", i, length(scenarios))) + scenario <- scenarios[[i]] + scenarioKey <- scenario + scenarioKey$settings <- NULL + scenarioKey$censorFunction <- NULL + fileName <- paste0(paste(gsub("__", "", gsub("[^a-zA-Z0-9]", "_", paste(names(scenarioKey), scenarioKey, sep = "_"))), collapse = "_"), ".rds") + fileName <- file.path(folder, fileName) + if (file.exists(fileName)) { + results <- readRDS(fileName) + } else { + results <- ParallelLogger::clusterApply(cluster, 1:100, simulateOne, scenario = scenario) + results <- bind_rows(results) + saveRDS(results, fileName) + } + metrics <- results |> + mutate(coverage = ci95Lb < trueRr & ci95Ub > trueRr, + diagnosticEstimate = log(diagnosticEstimate), + failDiagnostic = diagnosticP < 0.05) |> + summarise(coverage = mean(coverage, na.rm = TRUE), + bias = mean(logRr, na.rm = TRUE), + meanDiagnosticEstimate = exp(mean(diagnosticEstimate, na.rm = TRUE)), + fractionFailingDiagnostic = mean(failDiagnostic, na.rm = TRUE)) + row <- as_tibble(scenarioKey) |> + bind_cols(metrics) + rows[[length(rows) + 1]] <- row +} +rows <- bind_rows(rows) + +ParallelLogger::stopCluster(cluster) diff --git a/extras/TestEndOfObsDiagnostic.R b/extras/TestEndOfObsDiagnostic.R index 39b01aa..0be1d8c 100644 --- a/extras/TestEndOfObsDiagnostic.R +++ b/extras/TestEndOfObsDiagnostic.R @@ -318,98 +318,3 @@ ggplot(data, aes(x = startDate + 15, y = ir)) + geom_line() + geom_point() -# Simulations -------------------------------------------------------------------------------------- -library(SelfControlledCaseSeries) - -rw <- createSimulationRiskWindow(start = 0, - end = 0, - endAnchor = "era end", - relativeRisks = 2) - -settings <- createSccsSimulationSettings(minBaselineRate = 0.001, - maxBaselineRate = 0.01, - eraIds = 1, - simulationRiskWindows = list(rw), - includeAgeEffect = FALSE, - includeSeasonality = FALSE, - includeCalendarTimeEffect = FALSE) - -simulateOne <- function(seed, settings, censorFunction) { - set.seed(seed) - sccsData <- simulateSccsData(1000, settings) - sccsData$cases |> - count() - firstOutcomeEra <- sccsData$eras |> - filter(eraId == 10) |> - group_by(caseId) |> - filter(row_number(eraStartDay) == 1) |> - ungroup() |> - select(caseId, outcomeDay = eraStartDay) - - # Censor observation at outcome: - sccsData$cases <- sccsData$cases |> - inner_join(firstOutcomeEra, by = join_by(caseId)) |> - collect() |> - mutate(endDay = censorFunction(endDay, outcomeDay)) |> - select(-outcomeDay) - - - covarSettings <- createEraCovariateSettings(label = "Exposures of interest", - includeEraIds = 1, - stratifyById = FALSE, - start = 0, - end = 0, - endAnchor = "era end") - - studyPop <- createStudyPopulation(sccsData = sccsData, - outcomeId = settings$outcomeId, - firstOutcomeOnly = TRUE, - naivePeriod = 0) - - sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, - sccsData = sccsData, - eraCovariateSettings = covarSettings) - - model <- fitSccsModel(sccsIntervalData) - estimates <- model$estimates - return(tibble(ci95Lb = exp(estimates$logLb95[2]), - ci95Ub = exp(estimates$logUb95[2]), - diagnosticEstimate = exp(estimates$logRr[1]))) -} - -# One change at dying in next week: -censorFunction <- function(endDay, outcomeDay) { - if_else(runif(length(endDay)) < 0.9, round(pmin(endDay, outcomeDay + runif(length(endDay), 0, 7))), endDay) -} -# Added hazard of dying for rest of time: -censorFunction <- function(endDay, outcomeDay) { - pmin(endDay, outcomeDay + rexp(length(endDay), 0.05)) -} - -cluster <- ParallelLogger::makeCluster(10) -ParallelLogger::clusterRequire(cluster, "SelfControlledCaseSeries") - - - -results <- ParallelLogger::clusterApply(cluster, 1:100, simulateOne, settings = settings, censorFunction = censorFunction) -results <- bind_rows(results) -coverage <- results |> - mutate(coverage = ci95Lb < rw$relativeRisks & ci95Ub > rw$relativeRisks) |> - summarise(mean(coverage)) |> - pull() -diagnosticEstimate <- results |> - mutate(diagnosticEstimate = log(diagnosticEstimate)) |> - summarise(exp(mean(diagnosticEstimate))) |> - pull() -writeLines(sprintf("Coverage: %0.3f, mean diagnostic estimate: %0.2f", coverage, diagnosticEstimate)) - - - - -ParallelLogger::stopCluster(cluster) - -print(sprintf("True IRR = %0.2f", rw$relativeRisks)) - -h <- 0.0003 -1-(1-h)^365 -