Skip to content

Commit

Permalink
fitNull() and fitMcmcNull()`, as well as downstream calibration fun…
Browse files Browse the repository at this point in the history
…ctions, throw no error when there are no valid negative controls estimates (instead returning NAs).
  • Loading branch information
schuemie committed Aug 22, 2024
1 parent 14b6707 commit 37e237c
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 107 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: EmpiricalCalibration
Type: Package
Title: Routines for Performing Empirical Calibration of Observational Study Estimates
Version: 3.1.2
Date: 2023-12-21
Date: 2024-08-22
Authors@R: c(
person("Martijn", "Schuemie", , "[email protected]", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0817-5361")),
person("Marc", "Suchard", role = c("aut"), comment = c(ORCID = "0000-0001-9818-479X"))
Expand Down Expand Up @@ -40,5 +40,5 @@ LinkingTo: Rcpp
NeedsCompilation: yes
URL: https://ohdsi.github.io/EmpiricalCalibration/, https://github.com/OHDSI/EmpiricalCalibration
BugReports: https://github.com/OHDSI/EmpiricalCalibration/issues
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Encoding: UTF-8
10 changes: 9 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
EmpiricalCalibration 3.1.3
==========================

Changes

1. `fitNull()` and `fitMcmcNull()`, as well as downstream calibration functions, throw no error when there are no valid negative controls estimates (instead returning NAs).


EmpiricalCalibration 3.1.2
==========================

Expand All @@ -11,7 +19,7 @@ Bugfixes
EmpiricalCalibration 3.1.1
==========================

Changes.
Changes

1. Making sure we pass R check even if suggested packages are unavailable, as required by CRAN.

Expand Down
104 changes: 53 additions & 51 deletions R/ConfidenceIntervalCalibration.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ fitSystematicErrorModel <- function(logRr,
seLogRr <- seLogRr[!is.na(logRr)]
logRr <- logRr[!is.na(logRr)]
}

if (legacy) {
theta <- c(0, 1, -2, 0)
logLikelihood <- minLogLikelihoodErrorModelLegacy
Expand All @@ -83,15 +83,15 @@ fitSystematicErrorModel <- function(logRr,
logLikelihood <- minLogLikelihoodErrorModel
parscale <- c(1, 1, 1, 1)
}

fit <- optim(theta,
logLikelihood,
logRr = logRr,
seLogRr = seLogRr,
trueLogRr = trueLogRr,
method = "BFGS",
hessian = TRUE,
control = list(parscale = parscale)
logLikelihood,
logRr = logRr,
seLogRr = seLogRr,
trueLogRr = trueLogRr,
method = "BFGS",
hessian = TRUE,
control = list(parscale = parscale)
)
model <- fit$par
if (legacy) {
Expand Down Expand Up @@ -162,7 +162,7 @@ calibrateConfidenceInterval <- function(logRr, seLogRr, model, ciWidth = 0.95) {
}
return(z + numerator / denominator)
}

logBound <- function(ciWidth,
lb = TRUE,
logRr,
Expand Down Expand Up @@ -259,48 +259,50 @@ calibrateConfidenceInterval <- function(logRr, seLogRr, model, ciWidth = 0.95) {
legacy = legacy
)$root)
}

legacy <- (names(model)[3] == "logSdIntercept")
result <- data.frame(logRr = rep(0, length(logRr)), logLb95Rr = 0, logUb95Rr = 0)
for (i in 1:nrow(result)) {
if (is.infinite(logRr[i]) || is.na(logRr[i]) || is.infinite(seLogRr[i]) || is.na(seLogRr[i])) {
result$logRr[i] <- NA
result$logLb95Rr[i] <- NA
result$logUb95Rr[i] <- NA
} else {
result$logRr[i] <- logBound(
0,
TRUE,
logRr[i],
seLogRr[i],
model[1],
model[2],
model[3],
model[4],
legacy
)
result$logLb95Rr[i] <- logBound(
ciWidth,
TRUE,
logRr[i],
seLogRr[i],
model[1],
model[2],
model[3],
model[4],
legacy
)
result$logUb95Rr[i] <- logBound(
ciWidth,
FALSE,
logRr[i],
seLogRr[i],
model[1],
model[2],
model[3],
model[4],
legacy
)
result <- data.frame(logRr = rep(as.numeric(NA), length(logRr)), logLb95Rr = as.numeric(NA), logUb95Rr = as.numeric(NA))
if (!is.na(model[1])) {
for (i in 1:nrow(result)) {
if (is.infinite(logRr[i]) || is.na(logRr[i]) || is.infinite(seLogRr[i]) || is.na(seLogRr[i])) {
result$logRr[i] <- NA
result$logLb95Rr[i] <- NA
result$logUb95Rr[i] <- NA
} else {
result$logRr[i] <- logBound(
0,
TRUE,
logRr[i],
seLogRr[i],
model[1],
model[2],
model[3],
model[4],
legacy
)
result$logLb95Rr[i] <- logBound(
ciWidth,
TRUE,
logRr[i],
seLogRr[i],
model[1],
model[2],
model[3],
model[4],
legacy
)
result$logUb95Rr[i] <- logBound(
ciWidth,
FALSE,
logRr[i],
seLogRr[i],
model[1],
model[2],
model[3],
model[4],
legacy
)
}
}
}
result$seLogRr <- (result$logLb95Rr - result$logUb95Rr) / (2 * qnorm((1 - ciWidth) / 2))
Expand Down
118 changes: 65 additions & 53 deletions R/EmpiricalCalibrationUsingMcmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
proposalFunction <- function(param, scale) {
dim <- length(param)
draw <- rnorm(dim, mean = param, sd = scale)

# Precision cannot be negative:
draw[2] <- abs(draw[2])
# draw[2] <- max(0, draw[2])
Expand All @@ -31,18 +31,18 @@ runMetropolisMcmc <- function(startValue, iterations, scale, logRr, seLogRr) {
chain <- array(dim = c(iterations + 1, dim))
logLik <- array(dim = c(iterations + 1, 1))
acc <- array(dim = c(iterations + 1, 1))

logLik[1] <- -logLikelihoodNullMcmc(startValue, logRr, seLogRr)
chain[1, ] <- c(startValue)
acc[1] <- 1

for (i in 1:iterations) {
# print(paste('itr =', i))
proposal <- proposalFunction(chain[i, ], scale = scale)
newLogLik <- tryCatch(-logLikelihoodNullMcmc(proposal, logRr, seLogRr), error = function(e) {
-1e+10
})

# print(paste(paste(proposal, collapse = ","), newLogLik))
prob <- exp(newLogLik - logLik[i])
if (runif(1) < prob) {
Expand Down Expand Up @@ -177,22 +177,27 @@ fitMcmcNull <- function(logRr, seLogRr, iter = 100000) {
seLogRr <- seLogRr[!is.na(logRr)]
logRr <- logRr[!is.na(logRr)]
}
fit <- optim(c(0, 100), logLikelihoodNullMcmc, logRr = logRr, seLogRr = seLogRr)

# Profile likelihood for roughly correct scale:
scale <- binarySearchMu(fit$par[1],
fit$par[2],
logRrNegatives = logRr,
seLogRrNegatives = seLogRr
)
scale <- c(scale, binarySearchSigma(fit$par[1],
fit$par[2],
logRrNegatives = logRr,
seLogRrNegatives = seLogRr
))

# writeLines(paste('Scale:', paste(scale,collapse=',')))
mcmc <- runMetropolisMcmc(fit$par, iterations = iter, scale, logRr, seLogRr)
if (length(logRr) == 0) {
warning("No valid estimates left. Returning undefined null distribution")
mcmc <- list(chain = matrix(c(NA,NA), nrow = 1, ncol = 2))
} else {
fit <- optim(c(0, 100), logLikelihoodNullMcmc, logRr = logRr, seLogRr = seLogRr)

# Profile likelihood for roughly correct scale:
scale <- binarySearchMu(fit$par[1],
fit$par[2],
logRrNegatives = logRr,
seLogRrNegatives = seLogRr
)
scale <- c(scale, binarySearchSigma(fit$par[1],
fit$par[2],
logRrNegatives = logRr,
seLogRrNegatives = seLogRr
))

# writeLines(paste('Scale:', paste(scale,collapse=',')))
mcmc <- runMetropolisMcmc(fit$par, iterations = iter, scale, logRr, seLogRr)
}
result <- c(median(mcmc$chain[, 1]), median(mcmc$chain[, 2]))
attr(result, "mcmc") <- mcmc
class(result) <- "mcmcNull"
Expand All @@ -202,20 +207,25 @@ fitMcmcNull <- function(logRr, seLogRr, iter = 100000) {
#' @export
print.mcmcNull <- function(x, ...) {
writeLines("Estimated null distribution (using MCMC)\n")
mcmc <- attr(x, "mcmc")
lb95Mean <- quantile(mcmc$chain[, 1], 0.025)
ub95Mean <- quantile(mcmc$chain[, 1], 0.975)
lb95Precision <- quantile(mcmc$chain[, 2], 0.025)
ub95Precision <- quantile(mcmc$chain[, 2], 0.975)
output <- data.frame(
Estimate = c(x[1], x[2]),
lb95 = c(lb95Mean, lb95Precision),
ub95 = c(ub95Mean, ub95Precision)
)
colnames(output) <- c("Estimate", "lower .95", "upper .95")
rownames(output) <- c("Mean", "Precision")
printCoefmat(output)
writeLines(paste("\nAcceptance rate:", mean(mcmc$acc)))
if (is.na(x[1])) {
writeLines("Undefined")
} else {

mcmc <- attr(x, "mcmc")
lb95Mean <- quantile(mcmc$chain[, 1], 0.025)
ub95Mean <- quantile(mcmc$chain[, 1], 0.975)
lb95Precision <- quantile(mcmc$chain[, 2], 0.025)
ub95Precision <- quantile(mcmc$chain[, 2], 0.975)
output <- data.frame(
Estimate = c(x[1], x[2]),
lb95 = c(lb95Mean, lb95Precision),
ub95 = c(ub95Mean, ub95Precision)
)
colnames(output) <- c("Estimate", "lower .95", "upper .95")
rownames(output) <- c("Mean", "Precision")
printCoefmat(output)
writeLines(paste("\nAcceptance rate:", mean(mcmc$acc)))
}
}

#' @describeIn
Expand All @@ -228,27 +238,29 @@ print.mcmcNull <- function(x, ...) {
#' @export
calibrateP.mcmcNull <- function(null, logRr, seLogRr, twoSided = TRUE, upper = TRUE, pValueOnly, ...) {
mcmc <- attr(null, "mcmc")
adjustedP <- data.frame(p = rep(1, length(logRr)), lb95ci = 0, ub95ci = 0)
for (i in 1:length(logRr)) {
if (is.na(logRr[i]) || is.infinite(logRr[i]) || is.na(seLogRr[i]) || is.infinite(seLogRr[i])) {
adjustedP$p[i] <- NA
adjustedP$lb95ci[i] <- NA
adjustedP$ub95ci[i] <- NA
} else {
pUpperBound <- pnorm((mcmc$chain[, 1] - logRr[i]) / sqrt((1 / sqrt(mcmc$chain[, 2]))^2 + seLogRr[i]^2))
pLowerBound <- pnorm((logRr[i] - mcmc$chain[, 1]) / sqrt((1 / sqrt(mcmc$chain[, 2]))^2 + seLogRr[i]^2))
if (twoSided) {
p <- pUpperBound
p[pLowerBound < p] <- pLowerBound[pLowerBound < p]
p <- p * 2
} else if (upper) {
p <- pUpperBound
adjustedP <- data.frame(p = rep(as.numeric(NA), length(logRr)), lb95ci = as.numeric(NA), ub95ci = as.numeric(NA))
if (!is.na(null[1])) {
for (i in 1:length(logRr)) {
if (is.na(logRr[i]) || is.infinite(logRr[i]) || is.na(seLogRr[i]) || is.infinite(seLogRr[i])) {
adjustedP$p[i] <- NA
adjustedP$lb95ci[i] <- NA
adjustedP$ub95ci[i] <- NA
} else {
p <- pLowerBound
pUpperBound <- pnorm((mcmc$chain[, 1] - logRr[i]) / sqrt((1 / sqrt(mcmc$chain[, 2]))^2 + seLogRr[i]^2))
pLowerBound <- pnorm((logRr[i] - mcmc$chain[, 1]) / sqrt((1 / sqrt(mcmc$chain[, 2]))^2 + seLogRr[i]^2))
if (twoSided) {
p <- pUpperBound
p[pLowerBound < p] <- pLowerBound[pLowerBound < p]
p <- p * 2
} else if (upper) {
p <- pUpperBound
} else {
p <- pLowerBound
}
adjustedP$p[i] <- quantile(p, 0.5)
adjustedP$lb95ci[i] <- quantile(p, 0.025)
adjustedP$ub95ci[i] <- quantile(p, 0.975)
}
adjustedP$p[i] <- quantile(p, 0.5)
adjustedP$lb95ci[i] <- quantile(p, 0.025)
adjustedP$ub95ci[i] <- quantile(p, 0.975)
}
}
if (missing(pValueOnly) || pValueOnly == FALSE) {
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-EmpiricalCalibrationUsingAsymptotics.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,19 @@ test_that("CalibrateP matches computeTraditionalP when mu = sigma = 0", {
computeTraditionalP(logRr, seLogRr)
)
})

test_that("fitNull throws no error when all estimates are NA", {
null <- fitNull(logRr = c(NA, NA, NA),
seLogRr = c(NA, NA, NA))
print(null)

p <- calibrateP(null, 1, 1)
expect_true(is.na(p))

model <- convertNullToErrorModel(null)
expect_true(is.na(model[1]))

ci <- calibrateConfidenceInterval(1, 1, model)
expect_true(is.na(ci[1]))
})

18 changes: 18 additions & 0 deletions tests/testthat/test-empiricalCalibrationUsingMcmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,21 @@ test_that("MCMC calibration of p-values returns values close to truth", {
)
)
})

test_that("fitMcmcNull throws no error when all estimates are NA", {
null <- fitMcmcNull(logRr = c(NA, NA, NA),
seLogRr = c(NA, NA, NA))
print(null)

p <- calibrateP(null, 1, 1)
expect_true(is.na(p$p))

p <- calibrateP(null, 1, 1, pValueOnly = TRUE)
expect_true(is.na(p))

model <- convertNullToErrorModel(null)
expect_true(is.na(model[1]))

ci <- calibrateConfidenceInterval(1, 1, model)
expect_true(is.na(ci[1]))
})

0 comments on commit 37e237c

Please sign in to comment.