First implementation of event-dependent observation end diagnostic.

OHDSI · Sep 23, 2024 · c288a89 · c288a89
1 parent f70f234
commit c288a89
Show file tree

Hide file tree

Showing 18 changed files with 298 additions and 25 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ S3method(confint,SccsModel)
 S3method(print,SccsModel)
 S3method(print,summary.SccsData)
 S3method(print,summary.SccsIntervalData)
+export(computeEventDependentObservationP)
 export(computeMdrr)
 export(computePreExposureGainP)
 export(computeTimeStability)

diff --git a/R/Diagnostics.R b/R/Diagnostics.R
@@ -334,3 +334,27 @@ computePreExposureGainP <- function(sccsData, studyPopulation, exposureEraId = N
   names(p) <- NULL
   return(p)
 }
+
+#' Compute p-value for event-dependent observation end
+#'
+#' @param sccsModel         A fitted SCCS model as created using [fitSccsModel()].
+#'
+#' @return
+#' The p-value
+#'
+#' @export
+computeEventDependentObservationP <- function(sccsModel) {
+  errorMessages <- checkmate::makeAssertCollection()
+  checkmate::assertClass(sccsModel, "SccsModel", null.ok = TRUE, add = errorMessages)
+  checkmate::reportAssertions(collection = errorMessages)
+
+  llr <- sccsModel$estimates |>
+    filter(.data$covariateId == 99) |>
+    pull(llr)
+  if (length(llr) == 0) {
+    warning("No estimate found for the end of observation probe")
+    return(NA)
+  }
+  p <- 0.5 * pchisq(2 * llr, df = 0, lower.tail = FALSE) + 0.5 * pchisq(2 * llr, df = 1, lower.tail = FALSE)
+  return(p)
+}
diff --git a/R/ModelFitting.R b/R/ModelFitting.R
@@ -35,6 +35,9 @@
 #'                              the likelihood for coefficient of variables is sampled. See details.
 #' @param profileBounds         The bounds (on the log relative risk scale) for the adaptive sampling
 #'                              of the likelihood function.
+#' @param endOfObservationEffectBounds The bounds on the estimated effect for the end-of-observation probe. A p-value
+#'                                     will be computed against the null that the effect is within these bounds.
+#'
 #'
 #' @return
 #' An object of type `SccsModel`. Generic functions `print`, `coef`, and
@@ -57,7 +60,8 @@ fitSccsModel <- function(sccsIntervalData,
                            noiseLevel = "quiet"
                          ),
                          profileGrid = NULL,
-                         profileBounds = c(log(0.1), log(10))) {
+                         profileBounds = c(log(0.1), log(10)),
+                         endOfObservationEffectBounds = c(log(0.75), log(1.25))) {
   errorMessages <- checkmate::makeAssertCollection()
   checkmate::assertClass(sccsIntervalData, "SccsIntervalData", null.ok = TRUE, add = errorMessages)
   checkmate::assertClass(prior, "cyclopsPrior", add = errorMessages)
@@ -96,6 +100,7 @@ fitSccsModel <- function(sccsIntervalData,
     needRegularization <- FALSE
     needCi <- c()
     needProfile <- c()
+    needTestForEndofObservation <- FALSE
     covariateSettingsList <- metaData$covariateSettingsList
     for (i in 1:length(covariateSettingsList)) {
       if (covariateSettingsList[[i]]$allowRegularization) {
@@ -138,6 +143,11 @@ fitSccsModel <- function(sccsIntervalData,
         }
       }
     }
+    if (!is.null(metaData$endOfObservationEra) && metaData$endOfObservationEra$endOfObservationEraLength > 0) {
+      needTestForEndofObservation <- TRUE
+      needCi <- c(needCi, metaData$endOfObservationEra$endOfObservationCovariateId)
+      nonRegularized <- c(nonRegularized, metaData$endOfObservationEra$endOfObservationCovariateId)
+    }
 
     if (!needRegularization) {
       prior <- createPrior("none")
@@ -222,12 +232,32 @@ fitSccsModel <- function(sccsIntervalData,
           estimates <- merge(estimates, ci, by.x = "covariateId", by.y = "covariate", all.x = TRUE)
           estimates$seLogRr <- (estimates$logUb95 - estimates$logLb95) / (2 * qnorm(0.975))
           for (param in intersect(needCi, estimates$covariateId)) {
-            llNull <- Cyclops::getCyclopsProfileLogLikelihood(
-              object = fit,
-              parm = param,
-              x = 0,
-              includePenalty = FALSE
-            )$value
+            if (needTestForEndofObservation && param == metaData$endOfObservationEra$endOfObservationCovariateId) {
+              # We use a different NULL for the end-of-observation probe:
+              logRr <- estimates$logRr[estimates$covariateId == param]
+              if (logRr >= endOfObservationEffectBounds[1] && logRr <= endOfObservationEffectBounds[2]) {
+                llNull <- fit$log_likelihood
+              } else {
+                if (logRr < endOfObservationEffectBounds[1]) {
+                  nullLogRr <- endOfObservationEffectBounds[1]
+                } else {
+                  nullLogRr <- endOfObservationEffectBounds[2]
+                }
+                llNull <- Cyclops::getCyclopsProfileLogLikelihood(
+                  object = fit,
+                  parm = param,
+                  x = nullLogRr,
+                  includePenalty = TRUE
+                )$value
+              }
+            } else {
+              llNull <- Cyclops::getCyclopsProfileLogLikelihood(
+                object = fit,
+                parm = param,
+                x = 0,
+                includePenalty = TRUE
+              )$value
+            }
             estimates$llr[estimates$covariateId == param] <- fit$log_likelihood - llNull
           }
         }

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,8 +1,8 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-convertToSccs <- function(cases, outcomes, eras, includeAge, ageOffset, ageDesignMatrix, includeSeason, includeCalendarTime, calendarTimeOffset, calendarTimeDesignMatrix, seasonDesignMatrix, timeCovariateCases, covariateSettingsList, eventDependentObservation, censorModel, scri, controlIntervalId, resultAndromeda) {
-    invisible(.Call('_SelfControlledCaseSeries_convertToSccs', PACKAGE = 'SelfControlledCaseSeries', cases, outcomes, eras, includeAge, ageOffset, ageDesignMatrix, includeSeason, includeCalendarTime, calendarTimeOffset, calendarTimeDesignMatrix, seasonDesignMatrix, timeCovariateCases, covariateSettingsList, eventDependentObservation, censorModel, scri, controlIntervalId, resultAndromeda))
+convertToSccs <- function(cases, outcomes, eras, includeAge, ageOffset, ageDesignMatrix, includeSeason, includeCalendarTime, calendarTimeOffset, calendarTimeDesignMatrix, seasonDesignMatrix, timeCovariateCases, covariateSettingsList, endOfObservationEraLength, endOfObservationCovariateId, eventDependentObservation, censorModel, scri, controlIntervalId, resultAndromeda) {
+    invisible(.Call('_SelfControlledCaseSeries_convertToSccs', PACKAGE = 'SelfControlledCaseSeries', cases, outcomes, eras, includeAge, ageOffset, ageDesignMatrix, includeSeason, includeCalendarTime, calendarTimeOffset, calendarTimeDesignMatrix, seasonDesignMatrix, timeCovariateCases, covariateSettingsList, endOfObservationEraLength, endOfObservationCovariateId, eventDependentObservation, censorModel, scri, controlIntervalId, resultAndromeda))
 }
 
 simulateSccsOutcomes <- function(cases, eras, baselineRates, eraRrs, includeAgeEffect, ageOffset, ageRrs, includeSeasonality, seasonRrs, includeCalendarTimeEffect, minCalendarTime, calendarTimeRrs) {

diff --git a/R/SccsDataConversion.R b/R/SccsDataConversion.R
@@ -36,6 +36,9 @@
 #' @param minCasesForAgeSeason        DEPRECATED: Use `minCasesForTimeCovariates` instead.
 #' @param minCasesForTimeCovariates   Minimum number of cases to use to fit age, season and calendar time splines. If
 #'                                    needed (and available), cases that are not exposed will be included.
+#' @param endOfObservationEraLength   Length in days of the probe that is inserted at the end of a patient's
+#'                                    observation time. This probe will be used to test whether there is event-
+#'                                    dependent observation end. Set to 0 to not include the probe.
 #' @param eventDependentObservation   Should the extension proposed by Farrington et al. be used to
 #'                                    adjust for event-dependent observation time?
 #'
@@ -56,6 +59,7 @@ createSccsIntervalData <- function(studyPopulation,
                                    calendarTimeCovariateSettings = NULL,
                                    minCasesForAgeSeason = NULL,
                                    minCasesForTimeCovariates = 10000,
+                                   endOfObservationEraLength = 30,
                                    eventDependentObservation = FALSE) {
   errorMessages <- checkmate::makeAssertCollection()
   checkmate::assertList(studyPopulation, min.len = 1, add = errorMessages)
@@ -98,7 +102,8 @@ createSccsIntervalData <- function(studyPopulation,
   settings <- addEventDependentObservationSettings(
     settings,
     eventDependentObservation,
-    studyPopulation
+    studyPopulation,
+    endOfObservationEraLength
   )
   if (eventDependentObservation && settings$metaData$censorModel$model %in% c(1, 3) && !is.null(ageCovariateSettings)) {
     warning("Optimal censoring model adjusts for age, so removing age as separate covariate.")
@@ -107,7 +112,6 @@ createSccsIntervalData <- function(studyPopulation,
   settings <- addAgeSettings(settings, ageCovariateSettings, studyPopulation)
   settings <- addSeasonalitySettings(settings, seasonalityCovariateSettings, sccsData)
   settings <- addCalendarTimeSettings(settings, calendarTimeCovariateSettings, studyPopulation, sccsData)
-
   settings <- addEraCovariateSettings(settings, eraCovariateSettings, sccsData)
   settings$metaData$covariateSettingsList <- cleanCovariateSettingsList(settings$covariateSettingsList)
   metaData <- append(studyPopulation$metaData, settings$metaData)
@@ -142,6 +146,8 @@ createSccsIntervalData <- function(studyPopulation,
     calendarTimeDesignMatrix = settings$calendarTimeDesignMatrix,
     timeCovariateCases = timeCovariateCases,
     covariateSettingsList = settings$covariateSettingsList,
+    endOfObservationEraLength = settings$endOfObservationEraLength,
+    endOfObservationCovariateId = settings$endOfObservationCovariateId,
     eventDependentObservation = eventDependentObservation,
     censorModel = settings$censorModel,
     scri = FALSE,
@@ -502,7 +508,8 @@ computeObservedPerMonth <- function(studyPopulation) {
 
 addEventDependentObservationSettings <- function(settings,
                                                  eventDependentObservation,
-                                                 studyPopulation) {
+                                                 studyPopulation,
+                                                 endOfObservationEraLength) {
   if (!eventDependentObservation) {
     settings$censorModel <- list(model = 0, p = c(0))
   } else {
@@ -519,6 +526,21 @@ addEventDependentObservationSettings <- function(settings,
     settings$censorModel <- fitModelsAndPickBest(data)
     settings$metaData$censorModel <- settings$censorModel
   }
+  settings$endOfObservationEraLength <- endOfObservationEraLength
+  settings$endOfObservationCovariateId <- 99
+  newCovariateRef <- tibble(
+    covariateId = settings$endOfObservationCovariateId,
+    covariateName = "End of observation period",
+    covariateAnalysisId = NA,
+    originalEraId = 0,
+    originalEraType = "",
+    originalEraName = "",
+    isControlInterval = FALSE
+  )
+  settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef)
+  settings$metaData$endOfObservationEra <- list(
+    endOfObservationEraLength = settings$endOfObservationEraLength,
+    endOfObservationCovariateId = settings$endOfObservationCovariateId)
   return(settings)
 }
 

diff --git a/R/ScriDataConversion.R b/R/ScriDataConversion.R
@@ -106,6 +106,8 @@ createScriIntervalData <- function(studyPopulation,
     calendarTimeDesignMatrix = matrix(),
     timeCovariateCases = numeric(0),
     covariateSettingsList = settings$covariateSettingsList,
+    endOfObservationEraLength = 0,
+    endOfObservationCovariateId = 0,
     eventDependentObservation = FALSE,
     censorModel = list(model = 0, p = c(0)),
     scri = TRUE,

diff --git a/extras/SingleStudyVignetteDataFetch.R b/extras/SingleStudyVignetteDataFetch.R
@@ -37,7 +37,7 @@ cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable)
 CohortGenerator::createCohortTables(connection = connection,
                                     cohortDatabaseSchema = cohortDatabaseSchema,
                                     cohortTableNames = cohortTableNames)
-cohortDefinitionSet <- PhenotypeLibrary::getPlCohortDefinitionSet(356)
+cohortDefinitionSet <- PhenotypeLibrary::getPlCohortDefinitionSet(c(356, 70))
 counts <- CohortGenerator::generateCohortSet(connection = connection,
                                              cdmDatabaseSchema = cdmDatabaseSchema,
                                              cohortDatabaseSchema = cohortDatabaseSchema,
@@ -58,6 +58,7 @@ DatabaseConnector::disconnect(connection)
 # Simple model -----------------------------------------------------------------
 aspirin <- 1112807
 epistaxis <- 356
+stroke <- 70
 
 if (!file.exists(folder))
   dir.create(folder)
@@ -105,7 +106,7 @@ studyPop <- readRDS(file.path(folder, "studyPop.rds"))
 # stability %>%
 #   filter(!stable)
 
-covarAspirin<- createEraCovariateSettings(label = "Exposure of interest",
+covarAspirin <- createEraCovariateSettings(label = "Exposure of interest",
                                           includeEraIds = aspirin,
                                           start = 0,
                                           end = 0,
@@ -117,11 +118,40 @@ saveSccsIntervalData(sccsIntervalData, file.path(folder, "intervalData1.zip"))
 sccsIntervalData <- loadSccsIntervalData(file.path(folder, "intervalData1.zip"))
 sccsIntervalData
 summary(sccsIntervalData)
+metaData <- attr(sccsIntervalData, "metaData")
+metaData$endOfObservationEra
 
 model <- fitSccsModel(sccsIntervalData)
 saveRDS(model, file.path(folder, "simpleModel.rds"))
 model
 
+# Stroke model -----------------------------------------------------------------
+sccsDataStroke <- getDbSccsData(connectionDetails = connectionDetails,
+                          cdmDatabaseSchema = cdmDatabaseSchema,
+                          outcomeDatabaseSchema = cohortDatabaseSchema,
+                          outcomeTable = cohortTable,
+                          outcomeIds = epistaxis,
+                          exposureDatabaseSchema = cdmDatabaseSchema,
+                          exposureTable = "drug_era",
+                          exposureIds = aspirin,
+                          studyStartDates = "20100101",
+                          studyEndDates = "21000101",
+                          maxCasesPerOutcome = 100000)
+saveSccsData(sccsDataStroke, file.path(folder, "data1Stroke.zip"))
+sccsDataStroke <- loadSccsData(file.path(folder, "data1Stroke.zip"))
+
+studyPop <- createStudyPopulation(sccsData = sccsDataStroke,
+                                  outcomeId = stroke,
+                                  firstOutcomeOnly = FALSE,
+                                  naivePeriod = 180)
+sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop,
+                                           sccsDataStroke,
+                                           eraCovariateSettings = covarAspirin,
+                                           eventDependentObservation = F)
+model <- fitSccsModel(sccsIntervalData)
+model
+
+computeEventDependentObservationP(model)
 
 # Pre-exposure -----------------------------------------------------------------
 covarPreAspirin <- createEraCovariateSettings(label = "Pre-exposure",

diff --git a/extras/TestEndOfObsDiagnostic.R b/extras/TestEndOfObsDiagnostic.R
@@ -0,0 +1,108 @@
+library(SelfControlledCaseSeries)
+options(andromedaTempFolder = "e:/andromedaTemp")
+
+folder <- "e:/temp/edoTest"
+connectionDetails <- createConnectionDetails(
+  dbms = "spark",
+  connectionString = keyring::key_get("databricksConnectionString"),
+  user = "token",
+  password = keyring::key_get("databricksToken")
+)
+cdmDatabaseSchema <- "merative_mdcr.cdm_merative_mdcr_v3045"
+cohortDatabaseSchema <- "scratch.scratch_mschuemi"
+cohortTable  <- "sccs_edo_test"
+options(sqlRenderTempEmulationSchema = "scratch.scratch_mschuemi")
+
+# Create outcome cohorts -------------------------------------------------------
+outcomes <- tibble(
+  outcomeId = c(2072,
+                2088,
+                3429,
+                10650,
+                12480,
+                14964,
+                16881,
+                16916),
+  outcomeName = c("AMI",
+                  "Hemorrhagic stroke",
+                  "Suicide ideation, attempt, including drug poisoning",
+                  "COVID-19",
+                  "End-stage renal disease",
+                  "Earliest event of Non-small cell lung cancer (NSCLC), with two diagnosis",
+                  "Motion sickness",
+                  "Multiple myeloma")
+)
+
+ROhdsiWebApi::authorizeWebApi(baseUrl = Sys.getenv("baseUrl"),
+                              authMethod = "windows")
+cohorts <- ROhdsiWebApi::exportCohortDefinitionSet(baseUrl = Sys.getenv("baseUrl"),
+                                                   cohortIds = outcomes$outcomeId)
+
+connection <- DatabaseConnector::connect(connectionDetails)
+
+cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable)
+CohortGenerator::createCohortTables(connection = connection,
+                                    cohortDatabaseSchema = cohortDatabaseSchema,
+                                    cohortTableNames = cohortTableNames)
+counts <- CohortGenerator::generateCohortSet(connection = connection,
+                                             cdmDatabaseSchema = cdmDatabaseSchema,
+                                             cohortDatabaseSchema = cohortDatabaseSchema,
+                                             cohortTableNames = cohortTableNames,
+                                             cohortDefinitionSet = cohorts)
+
+# Check number of subjects per cohort:
+sql <- "SELECT cohort_definition_id, COUNT(*) AS count FROM @cohortDatabaseSchema.@cohortTable GROUP BY cohort_definition_id;"
+sql <- SqlRender::render(sql,
+                         cohortDatabaseSchema = cohortDatabaseSchema,
+                         cohortTable = cohortTable)
+sql <- SqlRender::translate(sql, targetDialect = connectionDetails$dbms)
+DatabaseConnector::querySql(connection, sql)
+
+DatabaseConnector::disconnect(connection)
+
+
+# Compute statistics -----------------------------------------------------------
+aspirin <- 1112807
+
+if (!file.exists(folder))
+  dir.create(folder)
+
+sccsData <- getDbSccsData(connectionDetails = connectionDetails,
+                          cdmDatabaseSchema = cdmDatabaseSchema,
+                          outcomeDatabaseSchema = cohortDatabaseSchema,
+                          outcomeTable = cohortTable,
+                          outcomeIds = outcomes$outcomeId,
+                          exposureDatabaseSchema = cdmDatabaseSchema,
+                          exposureTable = "drug_era",
+                          exposureIds = aspirin,
+                          studyStartDates = "20100101",
+                          studyEndDates = "21000101",
+                          maxCasesPerOutcome = 100000)
+saveSccsData(sccsData, file.path(folder, "data1.zip"))
+sccsData <- loadSccsData(file.path(folder, "data1.zip"))
+
+i <- 1
+for (i in seq_len(nrow(outcomes))) {
+  studyPop <- createStudyPopulation(sccsData = sccsData,
+                                    outcomeId = outcomes$outcomeId[i],
+                                    firstOutcomeOnly = FALSE,
+                                    naivePeriod = 180)
+  covarAspirin <- createEraCovariateSettings(label = "Exposure of interest",
+                                             includeEraIds = aspirin,
+                                             start = 0,
+                                             end = 0,
+                                             endAnchor = "era end")
+  sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop,
+                                             sccsData,
+                                             eraCovariateSettings = covarAspirin)
+  model <- fitSccsModel(sccsIntervalData)
+  p <- computeEventDependentObservationP(model)
+  outcomes$p[i] <- sprintf("%0.4f", p)
+  outcomes$estimate[i] <- model$estimates |>
+    filter(covariateId == 99) |>
+    mutate(estimate = sprintf("%0.2f (%0.2f - %0.2f)", exp(logRr), exp(logLb95), exp(logUb95))) |>
+    pull(estimate)
+}
+outcomes
+
+readr::write_csv(outcomes, file.path(folder,"Results.csv"))