Merge pull request #50 from Merck/feature_drmd

dom-muston · web-flow · commit a19f1f8c87d1 · 2024-06-05T11:53:13.000-04:00
Simplify constrained mortality calculations
diff --git a/R/discrmd.R b/R/discrmd.R
@@ -53,20 +53,20 @@
 # fits <- fit_ends_mods_spl(bosonc)
 # # Pick out best distribution according to min AIC
 # params <- list(
-#   ppd = find_bestfit_spl(fits$ppd, "aic")$fit,
-#   ttp = find_bestfit_spl(fits$ttp, "aic")$fit,
-#   pfs = find_bestfit_spl(fits$pfs, "aic")$fit,
-#   os = find_bestfit_spl(fits$os, "aic")$fit,
-#   pps_cf = find_bestfit_spl(fits$pps_cf, "aic")$fit,
-#   pps_cr = find_bestfit_spl(fits$pps_cr, "aic")$fit
+#   ppd = find_bestfit(fits$ppd, "aic")$fit,
+#   ttp = find_bestfit(fits$ttp, "aic")$fit,
+#   pfs = find_bestfit(fits$pfs, "aic")$fit,
+#   os = find_bestfit(fits$os, "aic")$fit,
+#   pps_cf = find_bestfit(fits$pps_cf, "aic")$fit,
+#   pps_cr = find_bestfit(fits$pps_cr, "aic")$fit
 # )
 # drmd_psm(ptdata=bosonc, dpam=params)
 # # Add a lifetable constraint
 # ltable <- tibble::tibble(lttime=0:20, lx=1-lttime*0.05)
 # drmd_psm(ptdata=bosonc, dpam=params, lifetable=ltable)
 drmd_psm <- function(ptdata, dpam, psmtype="simple", Ty=10, discrate=0, lifetable=NA, timestep=1) {
   # Declare local variables
-  Tw <- tvec <- pfprob <- osprob <- adjosprob <- adjfac <- adjprob <- vn <- NULL
+  Tw <- NULL
   # Time horizon in weeks (ceiling)
   Tw <- ceiling(convert_yrs2wks(Ty)/timestep)
   # Create dataset, starting with time vector, with half-cycle addition, and unconstrained probs
@@ -82,51 +82,8 @@ drmd_psm <- function(ptdata, dpam, psmtype="simple", Ty=10, discrate=0, lifetabl
   allh <- calc_haz_psm(timevar=ds$tmid, ptdata=ptdata, dpam=dpam, psmtype=psmtype)$adj
   # Derive the unconstrained PPD mortality probability
   ds$q_ppd <- 1-exp(-allh$ppd)
-  # Derive the constrained life table
-  ds$clx <- calc_ltsurv(convert_wks2yrs(ds$tzero), lifetable)
-  # Other calculations on the dataset
-  ds <- ds |>
-    dplyr::mutate(
-      # Derive the background mortality for this timepoint
-      cqx = 1 - dplyr::lead(.data$clx)/.data$clx,
-      # Derive the TTP probability (balancing item)
-      q_pfs = 1 - dplyr::lead(.data$u_pf)/.data$u_pf,
-      q_ttp = .data$q_pfs - .data$q_ppd,
-      d_pf = .data$u_pf * .data$q_ppd,
-      c_qpfs = .data$q_ttp + pmax(.data$q_ppd, .data$cqx),
-      # Derive the PPS mortality probability
-      d_pfpd = .data$u_pf + .data$u_pd - dplyr::lead(.data$u_pf) - dplyr::lead(.data$u_pd),
-      d_pps = .data$d_pfpd - .data$d_pf,
-      q_pps = dplyr::if_else(.data$u_pd==0, 0, .data$d_pps / .data$u_pd),
-      # Constrained probabilities
-      cqpfs = .data$q_ttp + pmax(.data$q_ppd, .data$cqx),
-      cqpps = pmax(.data$q_pps, .data$cqx),
-      # Derive the constrained PF and PD memberships
-      c_pf = .data$u_pf,
-      c_pd = .data$u_pd,
-    )
-  # Derive the constrained PF and PD memberships
-  for (t in 2:(Tw)) {
-      ds$c_pf[t] = ds$c_pf[t-1] * (1-ds$cqpfs[t-1])
-      ds$c_pd[t] = ds$c_pf[t-1] * ds$q_ttp[t-1] + ds$c_pd[t-1] * (1 - ds$cqpps[t-1])
-  }
-  # The final membership probabilities are zero
-  ds$c_pf[Tw+1] <- ds$c_pd[Tw+1] <- 0
-  # Final calculations on the dataset
-  ds <- ds |>
-      dplyr::mutate(
-        # Discount factor
-        vn = (1+discrate)^(-convert_wks2yrs(.data$tmid)),
-        # RMD components in each timestep
-        rmd_pf = (.data$c_pf + dplyr::lead(.data$c_pf))/2 * .data$vn * timestep,
-        rmd_pd = (.data$c_pd + dplyr::lead(.data$c_pd))/2 * .data$vn * timestep
-      )
-  ds$rmd_pf[Tw+1] <- ds$rmd_pd[Tw+1] <- 0
-  # Calculate RMDs
-  pf <- sum(ds$rmd_pf)
-  pd <- sum(ds$rmd_pd)
-  # Return values
-  return(list(pf=pf, pd=pd, os=pf+pd, calc=ds))
+  # Call routine to run calculations
+  calc_drmd(ds, Tw, discrate, lifetable, timestep)
 }
 
 
@@ -142,73 +99,35 @@ drmd_psm <- function(ptdata, dpam, psmtype="simple", Ty=10, discrate=0, lifetabl
 # fits <- fit_ends_mods_spl(bosonc)
 # # Pick out best distribution according to min AIC
 # params <- list(
-#   ppd = find_bestfit_spl(fits$ppd, "aic")$fit,
-#   ttp = find_bestfit_spl(fits$ttp, "aic")$fit,
-#   pfs = find_bestfit_spl(fits$pfs, "aic")$fit,
-#   os = find_bestfit_spl(fits$os, "aic")$fit,
-#   pps_cf = find_bestfit_spl(fits$pps_cf, "aic")$fit,
-#   pps_cr = find_bestfit_spl(fits$pps_cr, "aic")$fit
+#   ppd = find_bestfit(fits$ppd, "aic")$fit,
+#   ttp = find_bestfit(fits$ttp, "aic")$fit,
+#   pfs = find_bestfit(fits$pfs, "aic")$fit,
+#   os = find_bestfit(fits$os, "aic")$fit,
+#   pps_cf = find_bestfit(fits$pps_cf, "aic")$fit,
+#   pps_cr = find_bestfit(fits$pps_cr, "aic")$fit
 # )
 # drmd_stm_cf(dpam=params)
 # # Add a lifetable constraint
 # ltable <- tibble::tibble(lttime=0:20, lx=1-lttime*0.05)
 # drmd_stm_cf(dpam=params, lifetable=ltable)
 drmd_stm_cf <- function(dpam, Ty=10, discrate=0, lifetable=NA, timestep=1) {
   # Declare local variables
-  Tw <- tvec <- sppd <- sttp <- sos <- NULL
-  adjsppd <- adjos <- vn <- pf <- os <- NULL
+  Tw <- NULL
   # Time horizon in weeks (ceiling)
   Tw <- ceiling(convert_yrs2wks(Ty)/timestep)
   # Create dataset, starting with time vector, with half-cycle addition, and unconstrained probs
   ds <- tibble::tibble(
     tzero = timestep*(0:Tw),
     tmid = .data$tzero + timestep/2,
     u_pf = prob_pf_stm(.data$tzero, dpam),
+    # Must be CF in next line
     u_pd = prob_pd_stm_cf(.data$tzero, dpam),
     # Calculate PPD hazard and probability
     h_ppd = calc_haz(.data$tmid, survobj=dpam$ppd),
-    q_ppd = 1-exp(-.data$h_ppd),
-    # Derive the constrained life table
-    clx = calc_ltsurv(convert_wks2yrs(.data$tzero), lifetable),
-    # Derive the background mortality for this timepoint
-    cqx = 1 - dplyr::lead(.data$clx)/.data$clx,
-    # Derive the TTP probability (balancing item for PFS)
-    q_pfs = 1 - dplyr::lead(.data$u_pf)/.data$u_pf,
-    q_ttp = .data$q_pfs-.data$q_ppd,
-    d_pf = .data$u_pf * .data$q_ppd,
-    # Derive the PPS mortality probability
-    d_pfpd = .data$u_pf + .data$u_pd - dplyr::lead(.data$u_pf) - dplyr::lead(.data$u_pd),
-    d_pps = .data$d_pfpd - .data$d_pf,
-    q_pps = dplyr::if_else(.data$u_pd==0, 0, .data$d_pps / .data$u_pd),
-    # Constrained probabilities
-    cqpfs = .data$q_ttp + pmax(.data$q_ppd, .data$cqx),
-    cqpps = pmax(.data$q_pps, .data$cqx),
-    # Initial constrained PF and PD
-    c_pf = .data$u_pf,
-    c_pd = .data$u_pd
+    q_ppd = 1-exp(-.data$h_ppd)
   )
-  # Derive the constrained PF and PD memberships
-  for (t in 2:(Tw)) {
-    ds$c_pf[t] = ds$c_pf[t-1] * (1-ds$cqpfs[t-1])
-    ds$c_pd[t] = ds$c_pf[t-1] * ds$q_ttp[t-1] + ds$c_pd[t-1] * (1 - ds$cqpps[t-1])
-  }
-  # The final membership probabilities are zero
-  ds$c_pf[Tw+1] <- ds$c_pd[Tw+1] <- 0
-  # Final calculations on the dataset
-  ds <- ds |>
-    dplyr::mutate(
-      # Discount factor
-      vn = (1+discrate)^(-convert_wks2yrs(tmid)),
-      # RMD components in each timestep
-      rmd_pf = (.data$c_pf + dplyr::lead(.data$c_pf))/2 * .data$vn * timestep,
-      rmd_pd = (.data$c_pd + dplyr::lead(.data$c_pd))/2 * .data$vn * timestep
-    )
-  ds$rmd_pf[Tw+1] <- ds$rmd_pd[Tw+1] <- 0
-  # Calculate RMDs
-  pf <- sum(ds$rmd_pf)
-  pd <- sum(ds$rmd_pd)
-  # Return values
-  return(list(pf=pf, pd=pd, os=pf+pd, calc=ds))
+  # Call routine to run calculations
+  calc_drmd(ds, Tw, discrate, lifetable, timestep)
 }
 
 #' Discretized Restricted Mean Duration calculation for State Transition Model Clock Reset structure
@@ -223,54 +142,71 @@ drmd_stm_cf <- function(dpam, Ty=10, discrate=0, lifetable=NA, timestep=1) {
 # fits <- fit_ends_mods_spl(bosonc)
 # # Pick out best distribution according to min AIC
 # params <- list(
-#   ppd = find_bestfit_spl(fits$ppd, "aic")$fit,
-#   ttp = find_bestfit_spl(fits$ttp, "aic")$fit,
-#   pfs = find_bestfit_spl(fits$pfs, "aic")$fit,
-#   os = find_bestfit_spl(fits$os, "aic")$fit,
-#   pps_cf = find_bestfit_spl(fits$pps_cf, "aic")$fit,
-#   pps_cr = find_bestfit_spl(fits$pps_cr, "aic")$fit
+#   ppd = find_bestfit(fits$ppd, "aic")$fit,
+#   ttp = find_bestfit(fits$ttp, "aic")$fit,
+#   pfs = find_bestfit(fits$pfs, "aic")$fit,
+#   os = find_bestfit(fits$os, "aic")$fit,
+#   pps_cf = find_bestfit(fits$pps_cf, "aic")$fit,
+#   pps_cr = find_bestfit(fits$pps_cr, "aic")$fit
 # )
 # drmd_stm_cr(dpam=params)
 # # Add a lifetable constraint
 # ltable <- tibble::tibble(lttime=0:20, lx=1-lttime*0.05)
 # drmd_stm_cr(dpam=params, lifetable=ltable)
 drmd_stm_cr <- function(dpam, Ty=10, discrate=0, lifetable=NA, timestep=1) {
   # Declare local variables
-  Tw <- tvec <- sppd <- sttp <- sos <- NULL
-  adjsppd <- adjos <- vn <- pf <- os <- NULL
+  Tw <- NULL
   # Time horizon in weeks (ceiling)
   Tw <- ceiling(convert_yrs2wks(Ty)/timestep)
   # Create dataset, starting with time vector, with half-cycle addition, and unconstrained probs
   ds <- tibble::tibble(
     tzero = timestep*(0:Tw),
     tmid = .data$tzero + timestep/2,
     u_pf = prob_pf_stm(.data$tzero, dpam),
-    u_pd = prob_pd_stm_cf(.data$tzero, dpam)
-  )
-  # Keep calculating membership probabilities
-  ds <- ds |>
-    dplyr::mutate(
+    # Must be CR in next line
+    u_pd = prob_pd_stm_cr(.data$tzero, dpam),
     # Calculate PPD hazard and probability
     h_ppd = calc_haz(.data$tmid, survobj=dpam$ppd),
-    q_ppd = 1-exp(-h_ppd),
-    # Derive the constrained life table
-    clx = calc_ltsurv(convert_wks2yrs(.data$tzero), lifetable),
-    cqx = 1 - dplyr::lead(.data$clx)/.data$clx,
-    # Derive the TTP probability (balancing item for PF)
-    q_pfs = 1 - dplyr::lead(.data$u_pf)/.data$u_pf,
-    q_ttp = .data$q_pfs - .data$q_ppd,
-    d_pf = .data$u_pf * .data$q_ppd,
-    # Derive the PPS mortality probability
-    d_pfpd = .data$u_pf + .data$u_pd - dplyr::lead(.data$u_pf) - dplyr::lead(.data$u_pd),
-    d_pps = .data$d_pfpd - .data$d_pf,
-    q_pps = dplyr::if_else(.data$u_pd==0, 0, .data$d_pps / .data$u_pd),
-    # Constrained probabilities
-    cqpfs = .data$q_ttp + pmax(.data$q_ppd, .data$cqx),
-    cqpps = pmax(.data$q_pps, .data$cqx),
-    # Initial constrained PF and PD
-    c_pf = .data$u_pf,
-    c_pd = .data$u_pd
+    q_ppd = 1-exp(-.data$h_ppd)
   )
+  # Call routine to run calculations
+  calc_drmd(ds, Tw, discrate, lifetable, timestep)
+}
+
+#' Calculate the constrained and discounted RMDs
+#'
+#' @inheritParams drmd_psm
+#' @param ds Dataset required for this calculation must be a tibble including: tzero, tmid, q_ppd, u_pf and u_pd
+#' @importFrom rlang .data
+#' @return List containing:
+#' - pf: RMD in PF state
+#' - pd: RMD in PD state
+#' - os: RMD in either alive state
+#' @noRd
+calc_drmd <- function(ds, Tw, discrate, lifetable, timestep) {
+  # Derive the constrained life table
+  ds$clx <- calc_ltsurv(convert_wks2yrs(ds$tzero), lifetable)
+  # Other calculations on the dataset
+  ds <- ds |>
+    dplyr::mutate(
+      # Derive the background mortality for this timepoint
+      cqx = 1 - dplyr::lead(.data$clx)/.data$clx,
+      # Derive the TTP probability (balancing item)
+      q_pfs = 1 - dplyr::lead(.data$u_pf)/.data$u_pf,
+      q_ttp = .data$q_pfs - .data$q_ppd,
+      d_pf = .data$u_pf * .data$q_ppd,
+      c_qpfs = .data$q_ttp + pmax(.data$q_ppd, .data$cqx),
+      # Derive the PPS mortality probability
+      d_pfpd = .data$u_pf + .data$u_pd - dplyr::lead(.data$u_pf) - dplyr::lead(.data$u_pd),
+      d_pps = .data$d_pfpd - .data$d_pf,
+      q_pps = dplyr::if_else(.data$u_pd==0, 0, .data$d_pps / .data$u_pd),
+      # Constrained probabilities
+      cqpfs = .data$q_ttp + pmax(.data$q_ppd, .data$cqx),
+      cqpps = pmax(.data$q_pps, .data$cqx),
+      # Derive the constrained PF and PD memberships
+      c_pf = .data$u_pf,
+      c_pd = .data$u_pd,
+    )
   # Derive the constrained PF and PD memberships
   for (t in 2:(Tw)) {
     ds$c_pf[t] = ds$c_pf[t-1] * (1-ds$cqpfs[t-1])
@@ -293,4 +229,4 @@ drmd_stm_cr <- function(dpam, Ty=10, discrate=0, lifetable=NA, timestep=1) {
   pd <- sum(ds$rmd_pd)
   # Return values
   return(list(pf=pf, pd=pd, os=pf+pd, calc=ds))
-}
+}