Adding option to use MDRR for custom benchmark filtering

OHDSI · May 10, 2023 · b811886 · b811886
1 parent 01ed5ca
commit b811886
Show file tree

Hide file tree

Showing 2 changed files with 68 additions and 19 deletions.
diff --git a/R/Metrics.R b/R/Metrics.R
@@ -52,7 +52,7 @@ computeMetrics <- function(logRr, seLogRr = NULL, ci95Lb = NULL, ci95Ub = NULL,
   checkmate::assertNumeric(p, len = length(logRr), null.ok = TRUE, add = errorMessages)
   checkmate::assertNumeric(trueLogRr, len = length(logRr), add = errorMessages)
   checkmate::reportAssertions(collection = errorMessages)
-
+  
   if (is.null(seLogRr) && is.null(ci95Lb)) {
     stop("Must specify either standard error or confidence interval")
   }
@@ -78,14 +78,14 @@ computeMetrics <- function(logRr, seLogRr = NULL, ci95Lb = NULL, ci95Ub = NULL,
   } else {
     data$p <- p
   }
-
+  
   idx <- is.na(data$logRr) | is.infinite(data$logRr) | is.na(data$seLogRr) | is.infinite(data$seLogRr)
   data$logRr[idx] <- 0
   data$seLogRr[idx] <- 999
   data$ci95Lb[idx] <- 0
   data$ci95Ub[idx] <- 999
   data$p[idx] <- 1
-
+  
   nonEstimable <- round(mean(data$seLogRr >= 99), 2)
   roc <- pROC::roc(data$trueLogRr > 0, data$logRr, algorithm = 3)
   auc <- round(pROC::auc(roc), 2)
@@ -206,19 +206,19 @@ packageOhdsiBenchmarkResults <- function(estimates,
   checkmate::assertCharacter(databaseName, len = 1, add = errorMessages)
   checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages)
   checkmate::reportAssertions(collection = errorMessages)
-
+  
   if (!file.exists(exportFolder)) {
     dir.create(exportFolder, recursive = TRUE)
   }
-
+  
   # Create full grid of controls (including those that did not make it in the database:
   if (referenceSet == "ohdsiMethodsBenchmark") {
     ohdsiNegativeControls <- readRDS(system.file("ohdsiNegativeControls.rds",
-      package = "MethodEvaluation"
+                                                 package = "MethodEvaluation"
     ))
   } else {
     ohdsiNegativeControls <- readRDS(system.file("ohdsiDevelopmentNegativeControls.rds",
-      package = "MethodEvaluation"
+                                                 package = "MethodEvaluation"
     ))
   }
   ohdsiNegativeControls$oldOutcomeId <- ohdsiNegativeControls$outcomeId
@@ -246,7 +246,7 @@ packageOhdsiBenchmarkResults <- function(estimates,
     fullGrid$targetEffectSize[idx]
   )
   allControls <- merge(controlSummary, fullGrid, all.y = TRUE)
-
+  
   .packageBenchmarkResults(
     allControls = allControls,
     analysisRef = analysisRef,
@@ -282,7 +282,7 @@ packageOhdsiBenchmarkResults <- function(estimates,
       by = join_by("analysisId")
     ) %>%
     mutate(database = databaseName)
-
+  
   # Perform empirical calibration:
   # subset = subsets[[2]]
   calibrate <- function(subset) {
@@ -312,7 +312,7 @@ packageOhdsiBenchmarkResults <- function(estimates,
           model = model
         )
         null <- EmpiricalCalibration::fitNull(logRr = subsetMinusOne$logRr[subsetMinusOne$targetEffectSize ==
-          1], seLogRr = subsetMinusOne$seLogRr[subsetMinusOne$targetEffectSize == 1])
+                                                                             1], seLogRr = subsetMinusOne$seLogRr[subsetMinusOne$targetEffectSize == 1])
         caliP <- EmpiricalCalibration::calibrateP(
           null = null,
           logRr = one$logRr,
@@ -352,7 +352,7 @@ packageOhdsiBenchmarkResults <- function(estimates,
   readr::write_csv(calibratedEstimates, estimatesFileName)
   readr::write_csv(analysisRef, analysisRefFileName)
   ParallelLogger::logInfo("Estimates have been written to ", estimatesFileName)
-  ParallelLogger::logInfo("Analysis reference has been written to ", estimatesFileName)
+  ParallelLogger::logInfo("Analysis reference has been written to ", analysisRefFileName)
 }
 
 #' Package results of a method on the OHDSI Methods Benchmark
@@ -365,6 +365,9 @@ packageOhdsiBenchmarkResults <- function(estimates,
 #' @param negativeControls A data frame containing the negative controls. See details for required columns.
 #' @param synthesisSummary A data frame with the summary of the positive control synthesis as generated by the
 #'                         \code{\link{synthesizePositiveControls}} function.
+#' @param mdrr             The MDRR as computed using the \code{\link{computeMdrr}} function. Should contain the
+#'                         following columns: "exposureId", "outcomeId", "mdrr". For comparative analyses,
+#'                         the "mdrr" column can be replaced by a "mdrrTarget" and "mdrrComparator" column.
 #' @param analysisRef      A file describing the various analyses that were performed. See details for
 #'                         required columns.
 #' @param databaseName     A character string to identify the database the method was executed on.
@@ -398,6 +401,7 @@ packageOhdsiBenchmarkResults <- function(estimates,
 packageCustomBenchmarkResults <- function(estimates,
                                           negativeControls,
                                           synthesisSummary,
+                                          mdrr = NULL,
                                           analysisRef,
                                           databaseName,
                                           exportFolder) {
@@ -426,6 +430,31 @@ packageCustomBenchmarkResults <- function(estimates,
     ),
     add = errorMessages
   )
+  checkmate::assertDataFrame(synthesisSummary, add = errorMessages)
+  checkmate::assertNames(
+    colnames(synthesisSummary),
+    must.include = c(
+      "exposureId", 
+      "outcomeId", 
+      "targetEffectSize", 
+      "newOutcomeId", 
+      "trueEffectSize", 
+      "trueEffectSizeFirstExposure", 
+      "trueEffectSizeItt"
+    ),
+    add = errorMessages
+  )
+  checkmate::assertDataFrame(mdrr, null.ok = TRUE, add = errorMessages)
+  if (!is.null(mdrr)) {
+    checkmate::assertNames(
+      colnames(mdrr),
+      must.include = c(
+        "exposureId", 
+        "outcomeId"
+      ),
+      add = errorMessages
+    )
+  }
   checkmate::assertDataFrame(analysisRef, add = errorMessages)
   checkmate::assertNames(
     colnames(analysisRef),
@@ -441,7 +470,7 @@ packageCustomBenchmarkResults <- function(estimates,
   checkmate::assertCharacter(databaseName, len = 1, add = errorMessages)
   checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages)
   checkmate::reportAssertions(collection = errorMessages)
-
+  
   trueEffecSizes <- c(1, unique(synthesisSummary$targetEffectSize))
   negativeControls <- negativeControls %>%
     mutate(stratum = if_else(.data$type == "Outcome control", .data$targetId, .data$outcomeId)) %>%
@@ -459,6 +488,21 @@ packageCustomBenchmarkResults <- function(estimates,
     by = join_by("targetId", "oldOutcomeId", "targetEffectSize")
   ) %>%
     mutate(outcomeId = if_else(.data$targetEffectSize == 1, .data$oldOutcomeId, .data$outcomeId))
+  if (is.null(mdrr)) {
+    allControls <- allControls %>%
+      mutate(mdrrTarget = as.numeric(NA), mdrrComparator = as.numeric(NA))
+  } else {
+    if (!"mdrrTarget" %in% colnames(mdrr)) {
+      mdrr <- mdrr %>%
+        mutate(mdrrTarget = .data$mdrr, mdrrComparator = .data$mdrr)
+    }
+    allControls <- allControls %>%
+      left_join(
+        mdrr %>% 
+          rename(targetId = "exposureId"), 
+        by = c("targetId", "outcomeId")
+      )
+  }
   .packageBenchmarkResults(
     allControls = allControls,
     analysisRef = analysisRef,
@@ -500,12 +544,12 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder,
   errorMessages <- checkmate::makeAssertCollection()
   checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages)
   checkmate::assertNumeric(mdrr, len = 1, add = errorMessages)
-  checkmate::assertCharacter(stratum, len = 1, add = errorMessages)
+  checkmate::assertAtomic(stratum, len = 1, add = errorMessages)
   checkmate::assertAtomic(trueEffectSize, len = 1, add = errorMessages)
   checkmate::assertLogical(calibrated, len = 1, add = errorMessages)
   checkmate::assertLogical(comparative, len = 1, add = errorMessages)
   checkmate::reportAssertions(collection = errorMessages)
-
+  
   # Load and prepare estimates of all methods
   files <- list.files(exportFolder, "estimates.*csv", full.names = TRUE)
   estimates <- lapply(files, read.csv)
@@ -527,12 +571,12 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder,
   estimates$calCi95Lb[idx] <- 0
   estimates$calCi95Ub[idx] <- 999
   estimates$calP[is.na(estimates$calP)] <- 1
-
+  
   # Load and prepare analysis refs
   files <- list.files(exportFolder, "analysisRef.*csv", full.names = TRUE)
   analysisRef <- lapply(files, read.csv)
   analysisRef <- do.call("rbind", analysisRef)
-
+  
   # Apply selection criteria
   subset <- estimates
   if (mdrr != "All") {
@@ -551,7 +595,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder,
     subset$ci95Ub <- subset$calCi95Ub
     subset$p <- subset$calP
   }
-
+  
   # Compute metrics
   combis <- unique(subset[, c("database", "method", "analysisId")])
   if (trueEffectSize == "Overall") {
@@ -583,7 +627,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder,
     # trueRr <- input$trueRr
     computeMetrics <- function(i) {
       forEval <- subset[subset$method == combis$method[i] & subset$analysisId == combis$analysisId[i] &
-        subset$targetEffectSize == trueEffectSize, ]
+                          subset$targetEffectSize == trueEffectSize, ]
       mse <- round(mean((forEval$logRr - log(forEval$trueEffectSize))^2), 2)
       coverage <- round(
         mean(forEval$ci95Lb < forEval$trueEffectSize & forEval$ci95Ub > forEval$trueEffectSize),
@@ -597,7 +641,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder,
         nonEstimable <- round(mean(forEval$seLogRr == 999), 2)
       } else {
         negAndPos <- subset[subset$method == combis$method[i] & subset$analysisId == combis$analysisId[i] &
-          (subset$targetEffectSize == trueEffectSize | subset$targetEffectSize == 1), ]
+                              (subset$targetEffectSize == trueEffectSize | subset$targetEffectSize == 1), ]
         roc <- pROC::roc(negAndPos$targetEffectSize > 1, negAndPos$logRr, algorithm = 3)
         auc <- round(pROC::auc(roc), 2)
         type1 <- NA

diff --git a/man/packageCustomBenchmarkResults.Rd b/man/packageCustomBenchmarkResults.Rd