Skip to content

Commit

Permalink
First implementation of event-dependent observation end diagnostic.
Browse files Browse the repository at this point in the history
  • Loading branch information
Admin_mschuemi authored and Admin_mschuemi committed Sep 23, 2024
1 parent f70f234 commit c288a89
Show file tree
Hide file tree
Showing 18 changed files with 298 additions and 25 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ S3method(confint,SccsModel)
S3method(print,SccsModel)
S3method(print,summary.SccsData)
S3method(print,summary.SccsIntervalData)
export(computeEventDependentObservationP)
export(computeMdrr)
export(computePreExposureGainP)
export(computeTimeStability)
Expand Down
24 changes: 24 additions & 0 deletions R/Diagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,27 @@ computePreExposureGainP <- function(sccsData, studyPopulation, exposureEraId = N
names(p) <- NULL
return(p)
}

#' Compute p-value for event-dependent observation end
#'
#' @param sccsModel A fitted SCCS model as created using [fitSccsModel()].
#'
#' @return
#' The p-value
#'
#' @export
computeEventDependentObservationP <- function(sccsModel) {
errorMessages <- checkmate::makeAssertCollection()
checkmate::assertClass(sccsModel, "SccsModel", null.ok = TRUE, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)

llr <- sccsModel$estimates |>
filter(.data$covariateId == 99) |>
pull(llr)
if (length(llr) == 0) {
warning("No estimate found for the end of observation probe")
return(NA)
}
p <- 0.5 * pchisq(2 * llr, df = 0, lower.tail = FALSE) + 0.5 * pchisq(2 * llr, df = 1, lower.tail = FALSE)
return(p)
}
44 changes: 37 additions & 7 deletions R/ModelFitting.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
#' the likelihood for coefficient of variables is sampled. See details.
#' @param profileBounds The bounds (on the log relative risk scale) for the adaptive sampling
#' of the likelihood function.
#' @param endOfObservationEffectBounds The bounds on the estimated effect for the end-of-observation probe. A p-value
#' will be computed against the null that the effect is within these bounds.
#'
#'
#' @return
#' An object of type `SccsModel`. Generic functions `print`, `coef`, and
Expand All @@ -57,7 +60,8 @@ fitSccsModel <- function(sccsIntervalData,
noiseLevel = "quiet"
),
profileGrid = NULL,
profileBounds = c(log(0.1), log(10))) {
profileBounds = c(log(0.1), log(10)),
endOfObservationEffectBounds = c(log(0.75), log(1.25))) {
errorMessages <- checkmate::makeAssertCollection()
checkmate::assertClass(sccsIntervalData, "SccsIntervalData", null.ok = TRUE, add = errorMessages)
checkmate::assertClass(prior, "cyclopsPrior", add = errorMessages)
Expand Down Expand Up @@ -96,6 +100,7 @@ fitSccsModel <- function(sccsIntervalData,
needRegularization <- FALSE
needCi <- c()
needProfile <- c()
needTestForEndofObservation <- FALSE
covariateSettingsList <- metaData$covariateSettingsList
for (i in 1:length(covariateSettingsList)) {
if (covariateSettingsList[[i]]$allowRegularization) {
Expand Down Expand Up @@ -138,6 +143,11 @@ fitSccsModel <- function(sccsIntervalData,
}
}
}
if (!is.null(metaData$endOfObservationEra) && metaData$endOfObservationEra$endOfObservationEraLength > 0) {
needTestForEndofObservation <- TRUE
needCi <- c(needCi, metaData$endOfObservationEra$endOfObservationCovariateId)
nonRegularized <- c(nonRegularized, metaData$endOfObservationEra$endOfObservationCovariateId)
}

if (!needRegularization) {
prior <- createPrior("none")
Expand Down Expand Up @@ -222,12 +232,32 @@ fitSccsModel <- function(sccsIntervalData,
estimates <- merge(estimates, ci, by.x = "covariateId", by.y = "covariate", all.x = TRUE)
estimates$seLogRr <- (estimates$logUb95 - estimates$logLb95) / (2 * qnorm(0.975))
for (param in intersect(needCi, estimates$covariateId)) {
llNull <- Cyclops::getCyclopsProfileLogLikelihood(
object = fit,
parm = param,
x = 0,
includePenalty = FALSE
)$value
if (needTestForEndofObservation && param == metaData$endOfObservationEra$endOfObservationCovariateId) {
# We use a different NULL for the end-of-observation probe:
logRr <- estimates$logRr[estimates$covariateId == param]
if (logRr >= endOfObservationEffectBounds[1] && logRr <= endOfObservationEffectBounds[2]) {
llNull <- fit$log_likelihood
} else {
if (logRr < endOfObservationEffectBounds[1]) {
nullLogRr <- endOfObservationEffectBounds[1]
} else {
nullLogRr <- endOfObservationEffectBounds[2]
}
llNull <- Cyclops::getCyclopsProfileLogLikelihood(
object = fit,
parm = param,
x = nullLogRr,
includePenalty = TRUE
)$value
}
} else {
llNull <- Cyclops::getCyclopsProfileLogLikelihood(
object = fit,
parm = param,
x = 0,
includePenalty = TRUE
)$value
}
estimates$llr[estimates$covariateId == param] <- fit$log_likelihood - llNull
}
}
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

convertToSccs <- function(cases, outcomes, eras, includeAge, ageOffset, ageDesignMatrix, includeSeason, includeCalendarTime, calendarTimeOffset, calendarTimeDesignMatrix, seasonDesignMatrix, timeCovariateCases, covariateSettingsList, eventDependentObservation, censorModel, scri, controlIntervalId, resultAndromeda) {
invisible(.Call('_SelfControlledCaseSeries_convertToSccs', PACKAGE = 'SelfControlledCaseSeries', cases, outcomes, eras, includeAge, ageOffset, ageDesignMatrix, includeSeason, includeCalendarTime, calendarTimeOffset, calendarTimeDesignMatrix, seasonDesignMatrix, timeCovariateCases, covariateSettingsList, eventDependentObservation, censorModel, scri, controlIntervalId, resultAndromeda))
convertToSccs <- function(cases, outcomes, eras, includeAge, ageOffset, ageDesignMatrix, includeSeason, includeCalendarTime, calendarTimeOffset, calendarTimeDesignMatrix, seasonDesignMatrix, timeCovariateCases, covariateSettingsList, endOfObservationEraLength, endOfObservationCovariateId, eventDependentObservation, censorModel, scri, controlIntervalId, resultAndromeda) {
invisible(.Call('_SelfControlledCaseSeries_convertToSccs', PACKAGE = 'SelfControlledCaseSeries', cases, outcomes, eras, includeAge, ageOffset, ageDesignMatrix, includeSeason, includeCalendarTime, calendarTimeOffset, calendarTimeDesignMatrix, seasonDesignMatrix, timeCovariateCases, covariateSettingsList, endOfObservationEraLength, endOfObservationCovariateId, eventDependentObservation, censorModel, scri, controlIntervalId, resultAndromeda))
}

simulateSccsOutcomes <- function(cases, eras, baselineRates, eraRrs, includeAgeEffect, ageOffset, ageRrs, includeSeasonality, seasonRrs, includeCalendarTimeEffect, minCalendarTime, calendarTimeRrs) {
Expand Down
28 changes: 25 additions & 3 deletions R/SccsDataConversion.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
#' @param minCasesForAgeSeason DEPRECATED: Use `minCasesForTimeCovariates` instead.
#' @param minCasesForTimeCovariates Minimum number of cases to use to fit age, season and calendar time splines. If
#' needed (and available), cases that are not exposed will be included.
#' @param endOfObservationEraLength Length in days of the probe that is inserted at the end of a patient's
#' observation time. This probe will be used to test whether there is event-
#' dependent observation end. Set to 0 to not include the probe.
#' @param eventDependentObservation Should the extension proposed by Farrington et al. be used to
#' adjust for event-dependent observation time?
#'
Expand All @@ -56,6 +59,7 @@ createSccsIntervalData <- function(studyPopulation,
calendarTimeCovariateSettings = NULL,
minCasesForAgeSeason = NULL,
minCasesForTimeCovariates = 10000,
endOfObservationEraLength = 30,
eventDependentObservation = FALSE) {
errorMessages <- checkmate::makeAssertCollection()
checkmate::assertList(studyPopulation, min.len = 1, add = errorMessages)
Expand Down Expand Up @@ -98,7 +102,8 @@ createSccsIntervalData <- function(studyPopulation,
settings <- addEventDependentObservationSettings(
settings,
eventDependentObservation,
studyPopulation
studyPopulation,
endOfObservationEraLength
)
if (eventDependentObservation && settings$metaData$censorModel$model %in% c(1, 3) && !is.null(ageCovariateSettings)) {
warning("Optimal censoring model adjusts for age, so removing age as separate covariate.")
Expand All @@ -107,7 +112,6 @@ createSccsIntervalData <- function(studyPopulation,
settings <- addAgeSettings(settings, ageCovariateSettings, studyPopulation)
settings <- addSeasonalitySettings(settings, seasonalityCovariateSettings, sccsData)
settings <- addCalendarTimeSettings(settings, calendarTimeCovariateSettings, studyPopulation, sccsData)

settings <- addEraCovariateSettings(settings, eraCovariateSettings, sccsData)
settings$metaData$covariateSettingsList <- cleanCovariateSettingsList(settings$covariateSettingsList)
metaData <- append(studyPopulation$metaData, settings$metaData)
Expand Down Expand Up @@ -142,6 +146,8 @@ createSccsIntervalData <- function(studyPopulation,
calendarTimeDesignMatrix = settings$calendarTimeDesignMatrix,
timeCovariateCases = timeCovariateCases,
covariateSettingsList = settings$covariateSettingsList,
endOfObservationEraLength = settings$endOfObservationEraLength,
endOfObservationCovariateId = settings$endOfObservationCovariateId,
eventDependentObservation = eventDependentObservation,
censorModel = settings$censorModel,
scri = FALSE,
Expand Down Expand Up @@ -502,7 +508,8 @@ computeObservedPerMonth <- function(studyPopulation) {

addEventDependentObservationSettings <- function(settings,
eventDependentObservation,
studyPopulation) {
studyPopulation,
endOfObservationEraLength) {
if (!eventDependentObservation) {
settings$censorModel <- list(model = 0, p = c(0))
} else {
Expand All @@ -519,6 +526,21 @@ addEventDependentObservationSettings <- function(settings,
settings$censorModel <- fitModelsAndPickBest(data)
settings$metaData$censorModel <- settings$censorModel
}
settings$endOfObservationEraLength <- endOfObservationEraLength
settings$endOfObservationCovariateId <- 99
newCovariateRef <- tibble(
covariateId = settings$endOfObservationCovariateId,
covariateName = "End of observation period",
covariateAnalysisId = NA,
originalEraId = 0,
originalEraType = "",
originalEraName = "",
isControlInterval = FALSE
)
settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef)
settings$metaData$endOfObservationEra <- list(
endOfObservationEraLength = settings$endOfObservationEraLength,
endOfObservationCovariateId = settings$endOfObservationCovariateId)
return(settings)
}

Expand Down
2 changes: 2 additions & 0 deletions R/ScriDataConversion.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ createScriIntervalData <- function(studyPopulation,
calendarTimeDesignMatrix = matrix(),
timeCovariateCases = numeric(0),
covariateSettingsList = settings$covariateSettingsList,
endOfObservationEraLength = 0,
endOfObservationCovariateId = 0,
eventDependentObservation = FALSE,
censorModel = list(model = 0, p = c(0)),
scri = TRUE,
Expand Down
34 changes: 32 additions & 2 deletions extras/SingleStudyVignetteDataFetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable)
CohortGenerator::createCohortTables(connection = connection,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTableNames = cohortTableNames)
cohortDefinitionSet <- PhenotypeLibrary::getPlCohortDefinitionSet(356)
cohortDefinitionSet <- PhenotypeLibrary::getPlCohortDefinitionSet(c(356, 70))
counts <- CohortGenerator::generateCohortSet(connection = connection,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
Expand All @@ -58,6 +58,7 @@ DatabaseConnector::disconnect(connection)
# Simple model -----------------------------------------------------------------
aspirin <- 1112807
epistaxis <- 356
stroke <- 70

if (!file.exists(folder))
dir.create(folder)
Expand Down Expand Up @@ -105,7 +106,7 @@ studyPop <- readRDS(file.path(folder, "studyPop.rds"))
# stability %>%
# filter(!stable)

covarAspirin<- createEraCovariateSettings(label = "Exposure of interest",
covarAspirin <- createEraCovariateSettings(label = "Exposure of interest",
includeEraIds = aspirin,
start = 0,
end = 0,
Expand All @@ -117,11 +118,40 @@ saveSccsIntervalData(sccsIntervalData, file.path(folder, "intervalData1.zip"))
sccsIntervalData <- loadSccsIntervalData(file.path(folder, "intervalData1.zip"))
sccsIntervalData
summary(sccsIntervalData)
metaData <- attr(sccsIntervalData, "metaData")
metaData$endOfObservationEra

model <- fitSccsModel(sccsIntervalData)
saveRDS(model, file.path(folder, "simpleModel.rds"))
model

# Stroke model -----------------------------------------------------------------
sccsDataStroke <- getDbSccsData(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
outcomeDatabaseSchema = cohortDatabaseSchema,
outcomeTable = cohortTable,
outcomeIds = epistaxis,
exposureDatabaseSchema = cdmDatabaseSchema,
exposureTable = "drug_era",
exposureIds = aspirin,
studyStartDates = "20100101",
studyEndDates = "21000101",
maxCasesPerOutcome = 100000)
saveSccsData(sccsDataStroke, file.path(folder, "data1Stroke.zip"))
sccsDataStroke <- loadSccsData(file.path(folder, "data1Stroke.zip"))

studyPop <- createStudyPopulation(sccsData = sccsDataStroke,
outcomeId = stroke,
firstOutcomeOnly = FALSE,
naivePeriod = 180)
sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop,
sccsDataStroke,
eraCovariateSettings = covarAspirin,
eventDependentObservation = F)
model <- fitSccsModel(sccsIntervalData)
model

computeEventDependentObservationP(model)

# Pre-exposure -----------------------------------------------------------------
covarPreAspirin <- createEraCovariateSettings(label = "Pre-exposure",
Expand Down
108 changes: 108 additions & 0 deletions extras/TestEndOfObsDiagnostic.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
library(SelfControlledCaseSeries)
options(andromedaTempFolder = "e:/andromedaTemp")

folder <- "e:/temp/edoTest"
connectionDetails <- createConnectionDetails(
dbms = "spark",
connectionString = keyring::key_get("databricksConnectionString"),
user = "token",
password = keyring::key_get("databricksToken")
)
cdmDatabaseSchema <- "merative_mdcr.cdm_merative_mdcr_v3045"
cohortDatabaseSchema <- "scratch.scratch_mschuemi"
cohortTable <- "sccs_edo_test"
options(sqlRenderTempEmulationSchema = "scratch.scratch_mschuemi")

# Create outcome cohorts -------------------------------------------------------
outcomes <- tibble(
outcomeId = c(2072,
2088,
3429,
10650,
12480,
14964,
16881,
16916),
outcomeName = c("AMI",
"Hemorrhagic stroke",
"Suicide ideation, attempt, including drug poisoning",
"COVID-19",
"End-stage renal disease",
"Earliest event of Non-small cell lung cancer (NSCLC), with two diagnosis",
"Motion sickness",
"Multiple myeloma")
)

ROhdsiWebApi::authorizeWebApi(baseUrl = Sys.getenv("baseUrl"),
authMethod = "windows")
cohorts <- ROhdsiWebApi::exportCohortDefinitionSet(baseUrl = Sys.getenv("baseUrl"),
cohortIds = outcomes$outcomeId)

connection <- DatabaseConnector::connect(connectionDetails)

cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable)
CohortGenerator::createCohortTables(connection = connection,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTableNames = cohortTableNames)
counts <- CohortGenerator::generateCohortSet(connection = connection,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTableNames = cohortTableNames,
cohortDefinitionSet = cohorts)

# Check number of subjects per cohort:
sql <- "SELECT cohort_definition_id, COUNT(*) AS count FROM @cohortDatabaseSchema.@cohortTable GROUP BY cohort_definition_id;"
sql <- SqlRender::render(sql,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable)
sql <- SqlRender::translate(sql, targetDialect = connectionDetails$dbms)
DatabaseConnector::querySql(connection, sql)

DatabaseConnector::disconnect(connection)


# Compute statistics -----------------------------------------------------------
aspirin <- 1112807

if (!file.exists(folder))
dir.create(folder)

sccsData <- getDbSccsData(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
outcomeDatabaseSchema = cohortDatabaseSchema,
outcomeTable = cohortTable,
outcomeIds = outcomes$outcomeId,
exposureDatabaseSchema = cdmDatabaseSchema,
exposureTable = "drug_era",
exposureIds = aspirin,
studyStartDates = "20100101",
studyEndDates = "21000101",
maxCasesPerOutcome = 100000)
saveSccsData(sccsData, file.path(folder, "data1.zip"))
sccsData <- loadSccsData(file.path(folder, "data1.zip"))

i <- 1
for (i in seq_len(nrow(outcomes))) {
studyPop <- createStudyPopulation(sccsData = sccsData,
outcomeId = outcomes$outcomeId[i],
firstOutcomeOnly = FALSE,
naivePeriod = 180)
covarAspirin <- createEraCovariateSettings(label = "Exposure of interest",
includeEraIds = aspirin,
start = 0,
end = 0,
endAnchor = "era end")
sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop,
sccsData,
eraCovariateSettings = covarAspirin)
model <- fitSccsModel(sccsIntervalData)
p <- computeEventDependentObservationP(model)
outcomes$p[i] <- sprintf("%0.4f", p)
outcomes$estimate[i] <- model$estimates |>
filter(covariateId == 99) |>
mutate(estimate = sprintf("%0.2f (%0.2f - %0.2f)", exp(logRr), exp(logLb95), exp(logUb95))) |>
pull(estimate)
}
outcomes

readr::write_csv(outcomes, file.path(folder,"Results.csv"))
Loading

0 comments on commit c288a89

Please sign in to comment.