From b8118865c68cdbcdfd84da052d9c45de66a1d0b7 Mon Sep 17 00:00:00 2001 From: schuemie Date: Wed, 10 May 2023 05:49:49 -0400 Subject: [PATCH] Adding option to use MDRR for custom benchmark filtering --- R/Metrics.R | 82 +++++++++++++++++++++------- man/packageCustomBenchmarkResults.Rd | 5 ++ 2 files changed, 68 insertions(+), 19 deletions(-) diff --git a/R/Metrics.R b/R/Metrics.R index 86eacdd..da25a88 100644 --- a/R/Metrics.R +++ b/R/Metrics.R @@ -52,7 +52,7 @@ computeMetrics <- function(logRr, seLogRr = NULL, ci95Lb = NULL, ci95Ub = NULL, checkmate::assertNumeric(p, len = length(logRr), null.ok = TRUE, add = errorMessages) checkmate::assertNumeric(trueLogRr, len = length(logRr), add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - + if (is.null(seLogRr) && is.null(ci95Lb)) { stop("Must specify either standard error or confidence interval") } @@ -78,14 +78,14 @@ computeMetrics <- function(logRr, seLogRr = NULL, ci95Lb = NULL, ci95Ub = NULL, } else { data$p <- p } - + idx <- is.na(data$logRr) | is.infinite(data$logRr) | is.na(data$seLogRr) | is.infinite(data$seLogRr) data$logRr[idx] <- 0 data$seLogRr[idx] <- 999 data$ci95Lb[idx] <- 0 data$ci95Ub[idx] <- 999 data$p[idx] <- 1 - + nonEstimable <- round(mean(data$seLogRr >= 99), 2) roc <- pROC::roc(data$trueLogRr > 0, data$logRr, algorithm = 3) auc <- round(pROC::auc(roc), 2) @@ -206,19 +206,19 @@ packageOhdsiBenchmarkResults <- function(estimates, checkmate::assertCharacter(databaseName, len = 1, add = errorMessages) checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - + if (!file.exists(exportFolder)) { dir.create(exportFolder, recursive = TRUE) } - + # Create full grid of controls (including those that did not make it in the database: if (referenceSet == "ohdsiMethodsBenchmark") { ohdsiNegativeControls <- readRDS(system.file("ohdsiNegativeControls.rds", - package = "MethodEvaluation" + package = "MethodEvaluation" )) } else { ohdsiNegativeControls <- readRDS(system.file("ohdsiDevelopmentNegativeControls.rds", - package = "MethodEvaluation" + package = "MethodEvaluation" )) } ohdsiNegativeControls$oldOutcomeId <- ohdsiNegativeControls$outcomeId @@ -246,7 +246,7 @@ packageOhdsiBenchmarkResults <- function(estimates, fullGrid$targetEffectSize[idx] ) allControls <- merge(controlSummary, fullGrid, all.y = TRUE) - + .packageBenchmarkResults( allControls = allControls, analysisRef = analysisRef, @@ -282,7 +282,7 @@ packageOhdsiBenchmarkResults <- function(estimates, by = join_by("analysisId") ) %>% mutate(database = databaseName) - + # Perform empirical calibration: # subset = subsets[[2]] calibrate <- function(subset) { @@ -312,7 +312,7 @@ packageOhdsiBenchmarkResults <- function(estimates, model = model ) null <- EmpiricalCalibration::fitNull(logRr = subsetMinusOne$logRr[subsetMinusOne$targetEffectSize == - 1], seLogRr = subsetMinusOne$seLogRr[subsetMinusOne$targetEffectSize == 1]) + 1], seLogRr = subsetMinusOne$seLogRr[subsetMinusOne$targetEffectSize == 1]) caliP <- EmpiricalCalibration::calibrateP( null = null, logRr = one$logRr, @@ -352,7 +352,7 @@ packageOhdsiBenchmarkResults <- function(estimates, readr::write_csv(calibratedEstimates, estimatesFileName) readr::write_csv(analysisRef, analysisRefFileName) ParallelLogger::logInfo("Estimates have been written to ", estimatesFileName) - ParallelLogger::logInfo("Analysis reference has been written to ", estimatesFileName) + ParallelLogger::logInfo("Analysis reference has been written to ", analysisRefFileName) } #' Package results of a method on the OHDSI Methods Benchmark @@ -365,6 +365,9 @@ packageOhdsiBenchmarkResults <- function(estimates, #' @param negativeControls A data frame containing the negative controls. See details for required columns. #' @param synthesisSummary A data frame with the summary of the positive control synthesis as generated by the #' \code{\link{synthesizePositiveControls}} function. +#' @param mdrr The MDRR as computed using the \code{\link{computeMdrr}} function. Should contain the +#' following columns: "exposureId", "outcomeId", "mdrr". For comparative analyses, +#' the "mdrr" column can be replaced by a "mdrrTarget" and "mdrrComparator" column. #' @param analysisRef A file describing the various analyses that were performed. See details for #' required columns. #' @param databaseName A character string to identify the database the method was executed on. @@ -398,6 +401,7 @@ packageOhdsiBenchmarkResults <- function(estimates, packageCustomBenchmarkResults <- function(estimates, negativeControls, synthesisSummary, + mdrr = NULL, analysisRef, databaseName, exportFolder) { @@ -426,6 +430,31 @@ packageCustomBenchmarkResults <- function(estimates, ), add = errorMessages ) + checkmate::assertDataFrame(synthesisSummary, add = errorMessages) + checkmate::assertNames( + colnames(synthesisSummary), + must.include = c( + "exposureId", + "outcomeId", + "targetEffectSize", + "newOutcomeId", + "trueEffectSize", + "trueEffectSizeFirstExposure", + "trueEffectSizeItt" + ), + add = errorMessages + ) + checkmate::assertDataFrame(mdrr, null.ok = TRUE, add = errorMessages) + if (!is.null(mdrr)) { + checkmate::assertNames( + colnames(mdrr), + must.include = c( + "exposureId", + "outcomeId" + ), + add = errorMessages + ) + } checkmate::assertDataFrame(analysisRef, add = errorMessages) checkmate::assertNames( colnames(analysisRef), @@ -441,7 +470,7 @@ packageCustomBenchmarkResults <- function(estimates, checkmate::assertCharacter(databaseName, len = 1, add = errorMessages) checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - + trueEffecSizes <- c(1, unique(synthesisSummary$targetEffectSize)) negativeControls <- negativeControls %>% mutate(stratum = if_else(.data$type == "Outcome control", .data$targetId, .data$outcomeId)) %>% @@ -459,6 +488,21 @@ packageCustomBenchmarkResults <- function(estimates, by = join_by("targetId", "oldOutcomeId", "targetEffectSize") ) %>% mutate(outcomeId = if_else(.data$targetEffectSize == 1, .data$oldOutcomeId, .data$outcomeId)) + if (is.null(mdrr)) { + allControls <- allControls %>% + mutate(mdrrTarget = as.numeric(NA), mdrrComparator = as.numeric(NA)) + } else { + if (!"mdrrTarget" %in% colnames(mdrr)) { + mdrr <- mdrr %>% + mutate(mdrrTarget = .data$mdrr, mdrrComparator = .data$mdrr) + } + allControls <- allControls %>% + left_join( + mdrr %>% + rename(targetId = "exposureId"), + by = c("targetId", "outcomeId") + ) + } .packageBenchmarkResults( allControls = allControls, analysisRef = analysisRef, @@ -500,12 +544,12 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, errorMessages <- checkmate::makeAssertCollection() checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) checkmate::assertNumeric(mdrr, len = 1, add = errorMessages) - checkmate::assertCharacter(stratum, len = 1, add = errorMessages) + checkmate::assertAtomic(stratum, len = 1, add = errorMessages) checkmate::assertAtomic(trueEffectSize, len = 1, add = errorMessages) checkmate::assertLogical(calibrated, len = 1, add = errorMessages) checkmate::assertLogical(comparative, len = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - + # Load and prepare estimates of all methods files <- list.files(exportFolder, "estimates.*csv", full.names = TRUE) estimates <- lapply(files, read.csv) @@ -527,12 +571,12 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, estimates$calCi95Lb[idx] <- 0 estimates$calCi95Ub[idx] <- 999 estimates$calP[is.na(estimates$calP)] <- 1 - + # Load and prepare analysis refs files <- list.files(exportFolder, "analysisRef.*csv", full.names = TRUE) analysisRef <- lapply(files, read.csv) analysisRef <- do.call("rbind", analysisRef) - + # Apply selection criteria subset <- estimates if (mdrr != "All") { @@ -551,7 +595,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, subset$ci95Ub <- subset$calCi95Ub subset$p <- subset$calP } - + # Compute metrics combis <- unique(subset[, c("database", "method", "analysisId")]) if (trueEffectSize == "Overall") { @@ -583,7 +627,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, # trueRr <- input$trueRr computeMetrics <- function(i) { forEval <- subset[subset$method == combis$method[i] & subset$analysisId == combis$analysisId[i] & - subset$targetEffectSize == trueEffectSize, ] + subset$targetEffectSize == trueEffectSize, ] mse <- round(mean((forEval$logRr - log(forEval$trueEffectSize))^2), 2) coverage <- round( mean(forEval$ci95Lb < forEval$trueEffectSize & forEval$ci95Ub > forEval$trueEffectSize), @@ -597,7 +641,7 @@ computeOhdsiBenchmarkMetrics <- function(exportFolder, nonEstimable <- round(mean(forEval$seLogRr == 999), 2) } else { negAndPos <- subset[subset$method == combis$method[i] & subset$analysisId == combis$analysisId[i] & - (subset$targetEffectSize == trueEffectSize | subset$targetEffectSize == 1), ] + (subset$targetEffectSize == trueEffectSize | subset$targetEffectSize == 1), ] roc <- pROC::roc(negAndPos$targetEffectSize > 1, negAndPos$logRr, algorithm = 3) auc <- round(pROC::auc(roc), 2) type1 <- NA diff --git a/man/packageCustomBenchmarkResults.Rd b/man/packageCustomBenchmarkResults.Rd index efecfcf..6a6e469 100644 --- a/man/packageCustomBenchmarkResults.Rd +++ b/man/packageCustomBenchmarkResults.Rd @@ -8,6 +8,7 @@ packageCustomBenchmarkResults( estimates, negativeControls, synthesisSummary, + mdrr = NULL, analysisRef, databaseName, exportFolder @@ -21,6 +22,10 @@ packageCustomBenchmarkResults( \item{synthesisSummary}{A data frame with the summary of the positive control synthesis as generated by the \code{\link{synthesizePositiveControls}} function.} +\item{mdrr}{The MDRR as computed using the \code{\link{computeMdrr}} function. Should contain the +following columns: "exposureId", "outcomeId", "mdrr". For comparative analyses, +the "mdrr" column can be replaced by a "mdrrTarget" and "mdrrComparator" column.} + \item{analysisRef}{A file describing the various analyses that were performed. See details for required columns.}