Skip to content

Commit

Permalink
Porting computation of correction to C++ for speed
Browse files Browse the repository at this point in the history
  • Loading branch information
Admin_mschuemi authored and Admin_mschuemi committed Aug 18, 2023
1 parent 2d2c46f commit bb0c919
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 51 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,11 @@ importFrom(stats,aggregate)
importFrom(stats,coef)
importFrom(stats,confint)
importFrom(stats,dgamma)
importFrom(stats,dpois)
importFrom(stats,integrate)
importFrom(stats,nlm)
importFrom(stats,optim)
importFrom(stats,pchisq)
importFrom(stats,pgamma)
importFrom(stats,pnorm)
importFrom(stats,ppois)
Expand Down
69 changes: 37 additions & 32 deletions R/Diagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,23 @@
# source("~/git/SelfControlledCaseSeries/R/SccsDataConversion.R")

computeOutcomeRatePerMonth <- function(studyPopulation, sccsModel = NULL) {
# Computes not only the observed outcome rate per calendar month, but also
# observed / expected rate assuming everything is constant, and if sccsModel is
# provided, observed / expected using the adjustments in the model.
# To compute the expected rate for a month, we must consider only those cases
# observed during that month, summing their respective incidence rates.

if (nrow(studyPopulation$cases) == 0) {
tibble(
result <- tibble(
month = 1.0,
observedCount = 1.0,
observationPeriodCount = 1.0,
rate = 1.0,
adjustedRate = 1.0,
monthStartDate = as.Date("2000-01-01"),
monthEndDate = as.Date("2000-01-01")) %>%
filter(month == 0) %>%
return()
filter(month == 0)
return(result)
}
cases <- studyPopulation$cases %>%
mutate(startDate = .data$observationPeriodStartDate + .data$startDay,
Expand Down Expand Up @@ -78,23 +84,11 @@ computeOutcomeRatePerMonth <- function(studyPopulation, sccsModel = NULL) {
select(-"monthOfYear") %>%
mutate(totalRr = .data$totalRr * .data$seasonRr)
}
computeCorrection <- function(startMonth, endMonth, startMonthFraction, endMonthFraction) {
monthAdjustments %>%
filter(.data$month >= startMonth, .data$month <= endMonth) %>%
mutate(weight = if_else(.data$month == startMonth,
startMonthFraction,
if_else(.data$month == endMonth,
endMonthFraction,
1))) %>%
summarize(correction = sum(.data$weight * .data$totalRr) / sum(.data$weight)) %>%
pull(.data$correction) %>%
return()
}
if (hasAdjustment) {
# Need to correct for the fact that a person may have seen only part of the spline, so
# techincally has a different intercept:
cases <- cases %>%
rowwise() %>%
mutate(correction = computeCorrection(.data$startMonth, .data$endMonth, .data$startMonthFraction, .data$endMonthFraction)) %>%
ungroup()
mutate(correction = computeCorrections(cases, monthAdjustments))
} else {
cases <- cases %>%
mutate(correction = 1)
Expand Down Expand Up @@ -134,24 +128,22 @@ computeOutcomeRatePerMonth <- function(studyPopulation, sccsModel = NULL) {
#' Compute stability of outcome rate over time
#'
#' @details
#' Computes for each calendar month the rate of the outcome, and evaluates whether that rate is constant over time. If
#' splines are used to adjust for seasonality and/or calendar time, these adjustments are taken into consideration. For each
#' month a two-sided p-value is computed against the null hypothesis that the rate in that month deviates from the mean rate
#' no more than `maxRatio`. This p-value is compared to an alpha value, using a Bonferroni correction to adjust for the
#' multiple testing across months.
#' Computes for each month the observed and expected count, and computes the (weighted) mean ratio between the two. If
#' splines are used to adjust for seasonality and/or calendar time, these adjustments are taken into consideration when
#' considering the expected count. A one-sided p-value is computed against the null hypothesis that the ratio is smaller
#' than `maxRatio`. If this p-value exceeds the specified alpha value, the series is considered stable.
#'
#' @template StudyPopulation
#' @param sccsModel Optional: A fitted SCCS model as created using [fitSccsModel()]. If the
#' model contains splines for seasonality and or calendar time these will be adjusted
#' for before computing stability.
#' @param maxRatio The maximum ratio between the (adjusted) rate in a month, and the mean (adjusted) rate that
#' we would consider to be irrelevant.
#' @param alpha The alpha (type 1 error) used to test for stability. A Bonferroni correction will
#' be applied for the number of months tested.
#' @param maxRatio The maximum global ratio between the observed and expected count.
#' @param alpha The alpha (type 1 error) used to test for stability.
#'
#' @return
#' A tibble with information on the temporal stability per month. The column `stable` indicates whether the rate
#' of the outcome is within the expected range for that month, assuming the rate is constant over time.
#' A tibble with one row and three columns: `ratio` indicates the estimated mean ratio between observed and expected.
#' `p` is the p-value against the null-hypothesis that the ratio is smaller than `maxRatio`, and `stable` is `TRUE`
#' if `p` is greater than `alpha`.
#'
#' @export
computeTimeStability <- function(studyPopulation, sccsModel = NULL, maxRatio = 1.25, alpha = 0.05) {
Expand All @@ -162,20 +154,33 @@ computeTimeStability <- function(studyPopulation, sccsModel = NULL, maxRatio = 1
checkmate::assertNumeric(alpha, lower = 0, upper = 1, len = 1, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)

data <- computeOutcomeRatePerMonth(studyPopulation, sccsModel)
data <- SelfControlledCaseSeries:::computeOutcomeRatePerMonth(studyPopulation, sccsModel)
if (nrow(data) == 0) {
result <- tibble(ratio = NA,
p = 1,
stable = TRUE)
return(result)
}
o <- data$observedCount
e <- data$observedCount / data$adjustedRate

# logLikelihood <- function(x) {
# return(-sum(log(dpois(o, e*x) + dpois(o, e/x))))
# }
# From https://cdsmithus.medium.com/the-logarithm-of-a-sum-69dd76199790
smoothMax <- function(x, y) {
return(ifelse(abs(x-y) > 100, pmax(x,y), x + log(1 + exp(y-x))))
}
logLikelihood <- function(x) {
return(-sum(log(dpois(o, e*x) + dpois(o, e/x))))
return(-sum(smoothMax(dpois(o, e*x, log = TRUE), dpois(o, e/x, log = TRUE))))
}
likelihood <- function(x) {
return(exp(-logLikelihood(x)))
}
vectorLikelihood <- function(x) {
return(sapply(x, likelihood))
}
x <- seq(1,10, by = 0.1)
x <- seq(1, 10, by = 0.1)
ll <- sapply(x, logLikelihood)
maxX <- x[max(which(!is.na(ll) & !is.infinite(ll)))]
minX <- x[min(which(!is.na(ll) & !is.infinite(ll)))]
Expand Down
13 changes: 8 additions & 5 deletions R/Export.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' (MDRR)?
#' @param easeThreshold What is the maximum allowed expected absolute systematic error
#' (EASE).
#' @param timeTrendPThreshold What family-wise p-value threshold (alpha) will be used to determine
#' @param timeTrendPThreshold What p-value threshold (alpha) will be used to determine
#' temporal instability?
#' @param preExposurePThreshold What p-value threshold (alpha) will be used to determine whether the
#' rate of the outcome was higher just before exposure initiation?
Expand Down Expand Up @@ -568,15 +568,17 @@ exportFromSccsDataStudyPopSccsModel <- function(outputFolder, exportFolder, data
)
}

# sccsTimeTrend table
timeTrendData <- computeTimeStability(
# sccsTimeTrend table. No longer computing stability per month. Set flag
# to TRUE for all months for backwards compatability:
timeTrendData <- computeOutcomeRatePerMonth(
studyPopulation = studyPop,
sccsModel = sccsModel
) %>%
mutate(
calendarYear = floor(.data$month / 12),
calendarMonth = floor(.data$month %% 12) + 1,
stable = as.integer(.data$stable)
stable = 1,
p = 1
) %>%
select(
"calendarYear",
Expand All @@ -596,11 +598,12 @@ exportFromSccsDataStudyPopSccsModel <- function(outputFolder, exportFolder, data
bind_cols(timeTrendData)

# sccsDiagnosticsSummary table
stability <- computeTimeStability(studyPopulation = studyPop, sccsModel = sccsModel)
table <- ease %>%
filter(.data$exposuresOutcomeSetId == refRow$exposuresOutcomeSetId & .data$analysisId == refRow$analysisId) %>%
mutate(
mdrr = as.numeric(NA),
timeTrendP = min(timeTrendData$p * nrow(timeTrendData)),
timeTrendP = stability$p,
preExposureP = as.numeric(NA)
)
for (j in seq_len(nrow(table))) {
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@ testEgid <- function(p, present, astart, aend, start, end) {
.Call('_SelfControlledCaseSeries_testEgid', PACKAGE = 'SelfControlledCaseSeries', p, present, astart, aend, start, end)
}

computeCorrections <- function(cases, monthAdjustments) {
.Call('_SelfControlledCaseSeries_computeCorrections', PACKAGE = 'SelfControlledCaseSeries', cases, monthAdjustments)
}

2 changes: 1 addition & 1 deletion R/SelfControlledCaseSeries.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#' @importFrom SqlRender loadRenderTranslateSql translateSql
#' @importFrom grDevices rgb
#' @importFrom methods is
#' @importFrom stats aggregate coef confint dgamma nlm pgamma pnorm printCoefmat qnorm rnorm rpois runif splinefun quantile ppois
#' @importFrom stats aggregate coef confint dgamma nlm pgamma pnorm printCoefmat qnorm rnorm rpois runif splinefun quantile ppois dpois integrate optim pchisq
#' @importFrom utils head tail setTxtProgressBar txtProgressBar packageVersion
#' @import dplyr
#' @import Cyclops
Expand Down
2 changes: 1 addition & 1 deletion extras/AgeAndSeasonSimulations.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
library(SelfControlledCaseSeries)
options(andromedaTempFolder = "d:/andromedaTemp")
settings <- createSccsSimulationSettings(includeAgeEffect = F,
settings <- createSccsSimulationSettings(includeAgeEffect = T,
includeCalendarTimeEffect = T,
includeSeasonality = T)
set.seed(123)
Expand Down
20 changes: 9 additions & 11 deletions man/computeTimeStability.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/createSccsDiagnosticThresholds.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,18 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// computeCorrections
std::vector<double> computeCorrections(const DataFrame& cases, const DataFrame& monthAdjustments);
RcppExport SEXP _SelfControlledCaseSeries_computeCorrections(SEXP casesSEXP, SEXP monthAdjustmentsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const DataFrame& >::type cases(casesSEXP);
Rcpp::traits::input_parameter< const DataFrame& >::type monthAdjustments(monthAdjustmentsSEXP);
rcpp_result_gen = Rcpp::wrap(computeCorrections(cases, monthAdjustments));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_SelfControlledCaseSeries_convertToSccs", (DL_FUNC) &_SelfControlledCaseSeries_convertToSccs, 18},
Expand All @@ -131,6 +143,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_SelfControlledCaseSeries_testEwid", (DL_FUNC) &_SelfControlledCaseSeries_testEwid, 6},
{"_SelfControlledCaseSeries_testEgad", (DL_FUNC) &_SelfControlledCaseSeries_testEgad, 6},
{"_SelfControlledCaseSeries_testEgid", (DL_FUNC) &_SelfControlledCaseSeries_testEgid, 6},
{"_SelfControlledCaseSeries_computeCorrections", (DL_FUNC) &_SelfControlledCaseSeries_computeCorrections, 2},
{NULL, NULL, 0}
};

Expand Down
35 changes: 35 additions & 0 deletions src/RcppWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,39 @@ double testEgid(std::vector<double> p, double present, double astart, double aen
return weight;
}

// [[Rcpp::export]]
std::vector<double> computeCorrections(const DataFrame& cases, const DataFrame& monthAdjustments) {
NumericVector startMonths = cases["startMonth"];
NumericVector startMonthFractions = cases["startMonthFraction"];
NumericVector endMonths = cases["endMonth"];
NumericVector endMonthFractions = cases["endMonthFraction"];
NumericVector months = monthAdjustments["month"];
NumericVector totalRrs = monthAdjustments["totalRr"];
std::vector<double> corrections(cases.nrows());
for (int i = 0; i < cases.nrows(); i++) {
double startMonth = startMonths[i];
double startMonthFraction = startMonthFractions[i];
double endMonth = endMonths[i];
double endMonthFraction = endMonthFractions[i];
double sumW = 0;
double sumRr = 0;
for (int j = 0; j < monthAdjustments.nrows(); j++) {
double month = months[j];
if (month >= startMonth) {
double w = 1;
if (month == startMonth)
w = startMonthFraction;
else if (month == endMonth)
w = endMonthFraction;
sumW += w;
sumRr += w * totalRrs[j];
}
if (month == endMonth)
break;
}
corrections[i] = sumRr / sumW;
}
return corrections;
}

#endif // __RcppWrapper_cpp__

0 comments on commit bb0c919

Please sign in to comment.