Skip to content

Commit

Permalink
Setting up end-of-obs_dependence simulations
Browse files Browse the repository at this point in the history
  • Loading branch information
schuemie committed Oct 1, 2024
1 parent 06a5651 commit fe98cfe
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 127 deletions.
2 changes: 1 addition & 1 deletion R/ModelFitting.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ fitSccsModel <- function(sccsIntervalData,
),
profileGrid = NULL,
profileBounds = c(log(0.1), log(10)),
endOfObservationEffectBounds = c(log(0.75), log(1.25))) {
endOfObservationEffectBounds = c(log(0.5), log(2.0))) {
errorMessages <- checkmate::makeAssertCollection()
checkmate::assertClass(sccsIntervalData, "SccsIntervalData", null.ok = TRUE, add = errorMessages)
checkmate::assertClass(prior, "cyclopsPrior", add = errorMessages)
Expand Down
98 changes: 67 additions & 31 deletions R/Simulation.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#' Create a risk window definition for simulation
#'
#' @param start Start of the risk window relative to exposure start.
#' @param end The end of the risk window (in days) relative to the `endAnchor`.
#' @param endAnchor The anchor point for the end of the risk window. Can be `"era start"`
#' or `"era end"`.
#' @param end The end of the risk window (in days) relative to the `endAnchor`.
#' @param endAnchor The anchor point for the end of the risk window. Can be `"era start"`
#' or `"era end"`.
#' @param splitPoints Subdivision of the risk window in to smaller sub-windows.
#' @param relativeRisks Either a single number representing the relative risk in the risk
#' window, or when splitPoints have been defined a vector of relative
Expand Down Expand Up @@ -78,6 +78,8 @@ createSimulationRiskWindow <- function(start = 0,
#' @param eraIds The IDs for the covariates to be generated.
#' @param patientUsages The fraction of patients that use the drugs.
#' @param usageRate The rate of prescriptions per person that uses the drug.
#' @param usageRateSlope The change in the usage rate from one day to the next.
#' `usageRate` is the intercept at day 0
#' @param meanPrescriptionDurations The mean duration of a prescription, per drug.
#' @param sdPrescriptionDurations The standard deviation of the duration of a prescription, per
#' drug.
Expand Down Expand Up @@ -107,6 +109,7 @@ createSccsSimulationSettings <- function(meanPatientTime = 4 * 365,
eraIds = c(1, 2),
patientUsages = c(0.2, 0.1),
usageRate = c(0.01, 0.01),
usageRateSlope = c(0, 0),
meanPrescriptionDurations = c(14, 30),
sdPrescriptionDurations = c(7, 14),
simulationRiskWindows = list(
Expand All @@ -132,6 +135,7 @@ createSccsSimulationSettings <- function(meanPatientTime = 4 * 365,
checkmate::assertIntegerish(eraIds, min.len = 1, add = errorMessages)
checkmate::assertNumeric(patientUsages, lower = 0, min.len = 1, add = errorMessages)
checkmate::assertNumeric(usageRate, lower = 0, min.len = 1, add = errorMessages)
checkmate::assertNumeric(usageRateSlope, min.len = 1, add = errorMessages)
checkmate::assertNumeric(meanPrescriptionDurations, lower = 0, min.len = 1, add = errorMessages)
checkmate::assertNumeric(sdPrescriptionDurations, lower = 0, min.len = 1, add = errorMessages)
checkmate::assertList(simulationRiskWindows, min.len = 1, add = errorMessages)
Expand Down Expand Up @@ -167,7 +171,7 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf
# Simulate a batch of persons, and eliminate non-cases
n <- 1000

### Generate patients ###
# Generate patients -----------------------------------------------------------------------------
observationDays <- round(rnorm(n, settings$meanPatientTime, settings$sdPatientTime))
observationDays[observationDays < 1] <- 1
observationDays[observationDays > settings$maxAge - settings$minAge] <- settings$maxAge - settings$minAge
Expand All @@ -191,41 +195,73 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf
noninformativeEndCensor = 0
)

### Generate eras ###
# Generate eras ----------------------------------------------------------------------------------
cumulativeIntensity <- function(t, usageRate, usageRateSlope) {
usageRate * t + 0.5 * usageRateSlope * t^2
}

inverseCumulativeIntensity <- function(u, usageRate, usageRateSlope, days) {
if (usageRateSlope == 0) {
t <- u * days
} else {
lambdaT <- usageRate * days + 0.5 * usageRateSlope * days^2
t <- (-usageRate + sqrt(usageRate^2 + 2 * usageRateSlope * u * lambdaT)) / usageRateSlope
}
return(t)
}

sampleErasForPerson <- function(idx, eraId, usageRate, usageRateSlope, meanPrescriptionDurations, sdPrescriptionDurations) {
nEvents <- rpois(1, cumulativeIntensity(observationDays[idx], usageRate, usageRateSlope))
if (nEvents == 0) {
return(tibble())
} else {
u <- runif(nEvents)
startDay <- sapply(u,
inverseCumulativeIntensity,
usageRate = settings$usageRate[i],
usageRateSlope = settings$usageRateSlope[i],
days = observationDays[idx]) |>
round() |>
unique() |>
sort()
duration <- round(rnorm(length(startDay), meanPrescriptionDurations, sdPrescriptionDurations))
duration[duration < 1] <- 1
endDay <- startDay + duration
endDay[endDay > observationDays[idx]] <- observationDays[idx]
newEras <- tibble(
eraType = "rx",
caseId = idx,
eraId = eraId,
eraValue = 1,
eraStartDay = startDay,
eraEndDay = endDay
)
return(newEras)
}
}

eras <- tibble()
for (i in 1:length(settings$eraIds)) {
# i <- 1
patientsOnDrug <- sample.int(nrow(cases),
settings$patientUsages[i] * nrow(cases),
replace = FALSE
settings$patientUsages[i] * nrow(cases),
replace = FALSE
)
patientsOnDrug <- patientsOnDrug[order(patientsOnDrug)]
count <- rpois(length(patientsOnDrug), observationDays[patientsOnDrug] * settings$usageRate[i])
observationPeriodId <- rep(patientsOnDrug, count)
patientsOnDrug <- patientsOnDrug[count != 0]
startDay <- round(runif(sum(count), 0, cases$endDay[observationPeriodId]))
duration <- round(rnorm(
sum(count),
settings$meanPrescriptionDurations[i],
settings$sdPrescriptionDurations[i]
))
duration[duration < 1] <- 1
endDay <- startDay + duration
endDay[endDay > cases$endDay[observationPeriodId]] <- cases$endDay[observationPeriodId][endDay >
cases$endDay[observationPeriodId]]
newEras <- tibble(
eraType = "rx",
caseId = observationPeriodId,
eraId = settings$eraIds[i],
eraValue = 1,
eraStartDay = startDay,
eraEndDay = endDay
)
eras <- rbind(eras, newEras)
patientsOnDrug <- sort(patientsOnDrug)

newEras <- lapply(patientsOnDrug,
sampleErasForPerson,
eraId = settings$eraIds[i],
usageRate = settings$usageRate[i],
usageRateSlope = settings$usageRateSlope[i],
meanPrescriptionDurations = settings$meanPrescriptionDurations[i],
sdPrescriptionDurations = settings$sdPrescriptionDurations[i])
newEras <- bind_rows(newEras)
eras <- bind_rows(eras, newEras)
}
eras <- eras[order(eras$caseId, eras$eraId), ]

### Generate outcomes ###
# Generate outcomes -----------------------------------------------------------------------------
baselineRates <- runif(n, min = settings$minBaselineRate, max = settings$maxBaselineRate)
if (settings$includeAgeEffect) {
ageRrs <- exp(ageFun(settings$minAge:settings$maxAge))
Expand Down
163 changes: 163 additions & 0 deletions extras/EndOfObsSimulations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
library(SelfControlledCaseSeries)



# Define simulation scenarios ----------------------------------------------------------------------

scenarios <- list()
for (trueRr in c(1, 2, 4)) {
for (baseLineRate in c(0.01, 0.001, 0.0001)) {
for (usageRateSlope in c(-0.0001, 0, 0.0001)) {
for (censorType in c("Next week", "Gradual", "None")) {
for (censorStrength in if (censorType == "None") c("None") else c("Weak", "Strong")) {
rw <- createSimulationRiskWindow(start = 0,
end = 0,
endAnchor = "era end",
relativeRisks = trueRr)
settings <- createSccsSimulationSettings(minBaselineRate = baseLineRate / 10,
maxBaselineRate = baseLineRate,
eraIds = 1,
usageRate = if (usageRateSlope < 0) 0.01 - 3000 * usageRateSlope else 0.01,
usageRateSlope = usageRateSlope,
simulationRiskWindows = list(rw),
includeAgeEffect = FALSE,
includeSeasonality = FALSE,
includeCalendarTimeEffect = FALSE)
if (censorType == "Next week") {
# One change of dying in next week:
if (censorStrength == "Weak") {
censorFunction <- function(endDay, outcomeDay) {
if_else(runif(length(endDay)) < 0.05, round(pmin(endDay, outcomeDay + runif(length(endDay), 0, 7))), endDay)
}
} else {
censorFunction <- function(endDay, outcomeDay) {
if_else(runif(length(endDay)) < 0.25, round(pmin(endDay, outcomeDay + runif(length(endDay), 0, 7))), endDay)
}
}
} else if (censorType == "Gradual") {
# Added hazard of dying for rest of time:
if (censorStrength == "Weak") {
censorFunction <- function(endDay, outcomeDay) {
pmin(endDay, outcomeDay + rexp(length(endDay), 0.001))
}
} else {
censorFunction <- function(endDay, outcomeDay) {
pmin(endDay, outcomeDay + rexp(length(endDay), 0.01))
}
}
} else {
censorFunction <- function(endDay, outcomeDay) {
endDay
}
}
scenario <- list(settings = settings,
censorFunction = censorFunction,
trueRr = trueRr,
baselineRate = baseLineRate,
usageRateSlope = usageRateSlope,
censorType = censorType,
censorStrength = censorStrength)
scenarios[[length(scenarios) + 1]] <- scenario

}
}
}
}
}
writeLines(sprintf("Number of simulation scenarios: %d", length(scenarios)))

# Run simulations ----------------------------------------------------------------------------------
folder <- "simulations"

scenario = scenarios[[10]]
simulateOne <- function(seed, scenario) {
set.seed(seed)
sccsData <- simulateSccsData(1000, scenario$settings)
firstOutcomeEra <- sccsData$eras |>
filter(eraId == 10) |>
group_by(caseId) |>
filter(row_number(eraStartDay) == 1) |>
ungroup() |>
select(caseId, outcomeDay = eraStartDay)

# Censor observation at outcome:
sccsData$cases <- sccsData$cases |>
inner_join(firstOutcomeEra, by = join_by(caseId)) |>
collect() |>
mutate(endDay = scenario$censorFunction(endDay, outcomeDay)) |>
select(-outcomeDay)


covarSettings <- createEraCovariateSettings(label = "Exposure of interest",
includeEraIds = 1,
stratifyById = FALSE,
start = 0,
end = 0,
endAnchor = "era end")

preCovarSettings <- createEraCovariateSettings(label = "Pre-exposure",
includeEraIds = 1,
stratifyById = FALSE,
start = -30,
end = -1,
endAnchor = "era start")

studyPop <- createStudyPopulation(sccsData = sccsData,
outcomeId = scenario$settings$outcomeId,
firstOutcomeOnly = TRUE,
naivePeriod = 0)

sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop,
sccsData = sccsData,
eraCovariateSettings = list(covarSettings, preCovarSettings))

model <- fitSccsModel(sccsIntervalData, profileBounds = NULL)
estimates <- model$estimates
idx1 <- which(estimates$covariateId == 1000)
idx2 <- which(estimates$covariateId == 99)
scenario$settings <- NULL
scenario$censorFunction <- NULL

row <- tibble(logRr = estimates$logRr[idx1],
ci95Lb = exp(estimates$logLb95[idx1]),
ci95Ub = exp(estimates$logUb95[idx1]),
diagnosticEstimate = exp(estimates$logRr[idx2]),
diagnosticP = computeEventDependentObservationP(model))
return(row)
}

cluster <- ParallelLogger::makeCluster(10)
ParallelLogger::clusterRequire(cluster, "SelfControlledCaseSeries")

dir.create(folder)
rows <- list()
for (i in seq_along(scenarios)) {
writeLines(sprintf("Processing scenario %d of %d", i, length(scenarios)))
scenario <- scenarios[[i]]
scenarioKey <- scenario
scenarioKey$settings <- NULL
scenarioKey$censorFunction <- NULL
fileName <- paste0(paste(gsub("__", "", gsub("[^a-zA-Z0-9]", "_", paste(names(scenarioKey), scenarioKey, sep = "_"))), collapse = "_"), ".rds")
fileName <- file.path(folder, fileName)
if (file.exists(fileName)) {
results <- readRDS(fileName)
} else {
results <- ParallelLogger::clusterApply(cluster, 1:100, simulateOne, scenario = scenario)
results <- bind_rows(results)
saveRDS(results, fileName)
}
metrics <- results |>
mutate(coverage = ci95Lb < trueRr & ci95Ub > trueRr,
diagnosticEstimate = log(diagnosticEstimate),
failDiagnostic = diagnosticP < 0.05) |>
summarise(coverage = mean(coverage, na.rm = TRUE),
bias = mean(logRr, na.rm = TRUE),
meanDiagnosticEstimate = exp(mean(diagnosticEstimate, na.rm = TRUE)),
fractionFailingDiagnostic = mean(failDiagnostic, na.rm = TRUE))
row <- as_tibble(scenarioKey) |>
bind_cols(metrics)
rows[[length(rows) + 1]] <- row
}
rows <- bind_rows(rows)

ParallelLogger::stopCluster(cluster)
Loading

0 comments on commit fe98cfe

Please sign in to comment.