Skip to content

Commit bfc93ca

Browse files
authored
Merge pull request #17 from Merck/Nextrel
Version 0.2.1 reworks the lifetable constraint calculations
2 parents 5ad1da6 + 481d91f commit bfc93ca

30 files changed

+866
-337
lines changed

DESCRIPTION

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: psm3mkv
22
Title: Fit and Evaluate Three-State Partitioned Survival Analysis and Markov Models to Progression-Free and Overall Survival Data
3-
Version: 0.2.0.9000
3+
Version: 0.2.1
44
Authors@R: c(
55
person("Dominic", "Muston", , "[email protected]", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-4876-7940")),
@@ -46,6 +46,7 @@ Collate:
4646
'basics.R'
4747
'brier.R'
4848
'datasets.R'
49+
'discrmd.R'
4950
'fitting-spl.R'
5051
'fitting.R'
5152
'lhoods.R'

NAMESPACE

+4
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,15 @@ export(calc_rmd_par)
2222
export(calc_surv)
2323
export(calc_surv_psmpps)
2424
export(check_posdef)
25+
export(constrain_survprob)
2526
export(convert_fit2spec)
2627
export(convert_wks2yrs)
2728
export(convert_yrs2wks)
2829
export(create_dummydata)
2930
export(create_extrafields)
31+
export(drmd_psm)
32+
export(drmd_stm_cf)
33+
export(drmd_stm_cr)
3034
export(find_bestfit_par)
3135
export(find_bestfit_spl)
3236
export(fit_ends_mods_par)

NEWS.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
# psm3mkv (development version)
1+
# psm3mkv 0.2.1
2+
3+
# 31 Mar 2024 - Version 0.2.1
4+
5+
Constraining calculations of restricted mean durations and the accompanying `vignette("background-mortality")` has been reworked. The calculations using integral/continuous methods was not reliable. Instead, `calc_allrmds` now has a `rmdmethod="disc"` option to allow for discretized calculations for a given timestep (defaulting at one week). There is a new collection of functions in `discrmd.R` to provide for this, as well as `constrain_survprob()` function, which constrains a vector of survival estimates at given times such that the underlying hazard is at least as great as in an accompanying lifetable.
26

37
# 26 Jan 2024 - Version 0.2
48

R/discrmd.R

+178
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
# Copyright (c) 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates.
2+
# All rights reserved.
3+
#
4+
# This file is part of the psm3mkv program.
5+
#
6+
# psm3mkv is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License as published by
8+
# the Free Software Foundation, either version 3 of the License, or
9+
# (at your option) any later version.
10+
#
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License
17+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
#
19+
# ==================================================================
20+
# Discretized restricted mean durations
21+
# discrmd.R
22+
# =====================================
23+
#
24+
25+
#' Discretized Restricted Mean Duration calculation for Partitioned Survival Model
26+
#' Calculate restricted mean duration (RMD) in PF, PD and OS states under a Partitioned Survival Model structure.
27+
#' @param dpam List of survival regressions for model endpoints. These must include time to progression (TTP) and pre-progression death (PPD).
28+
#' @param Ty Time duration over which to calculate (defaults to 10 years). Assumes input is in years, and patient-level data is recorded in weeks.
29+
#' @param discrate Discount rate (%) per year (defaults to zero).
30+
#' @param lifetable Optional. The lifetable must be a dataframe with columns named time and lx. The first entry of the time column must be zero. Data should be sorted in ascending order by time, and all times must be unique.
31+
#' @param timestep Optional, defaults to one (week).
32+
#' @return List containing:
33+
#' - pf: RMD in PF state
34+
#' - pd: RMD in PD state
35+
#' - os: RMD in either alive state
36+
#' @seealso [drmd_stm_cf()] [drmd_stm_cr()]
37+
#' @export
38+
#' @examples
39+
#' # Create dataset and fit survival models (splines)
40+
#' bosonc <- create_dummydata("flexbosms")
41+
#' fits <- fit_ends_mods_spl(bosonc)
42+
#' # Pick out best distribution according to min AIC
43+
#' params <- list(
44+
#' ppd = find_bestfit_spl(fits$ppd, "aic")$fit,
45+
#' ttp = find_bestfit_spl(fits$ttp, "aic")$fit,
46+
#' pfs = find_bestfit_spl(fits$pfs, "aic")$fit,
47+
#' os = find_bestfit_spl(fits$os, "aic")$fit,
48+
#' pps_cf = find_bestfit_spl(fits$pps_cf, "aic")$fit,
49+
#' pps_cr = find_bestfit_spl(fits$pps_cr, "aic")$fit
50+
#' )
51+
#' drmd_psm(dpam=params)
52+
#' # Add a lifetable constraint
53+
#' ltable <- tibble::tibble(lttime=0:20, lx=1-lttime*0.05)
54+
#' drmd_psm(dpam=params, lifetable=ltable)
55+
drmd_psm <- function(dpam, Ty=10, discrate=0, lifetable=NA, timestep=1) {
56+
# Declare local variables
57+
Tw <- tvec <- pfprob <- osprob <- adjosprob <- adjfac <- adjprob <- vn <- NULL
58+
# Time horizon in weeks (ceiling)
59+
Tw <- convert_yrs2wks(Ty)
60+
# Create time vector, with half-cycle addition
61+
tvec <- timestep*(1:floor(Tw/timestep)) + timestep/2
62+
# Membership probabilities with lifetable constraint
63+
pfprob <- prob_pf_psm(tvec, dpam)
64+
osprob <- prob_os_psm(tvec, dpam)
65+
adjosprob <- constrain_survprob(osprob, lifetable=lifetable, timevec=tvec)
66+
adjfac <- adjosprob/osprob
67+
adjfac[is.na(adjfac)] <- 1
68+
adjpfprob <- pfprob * adjfac
69+
# Discount factor
70+
vn <- (1+discrate)^(-convert_wks2yrs(tvec+timestep/2))
71+
# Calculate RMDs
72+
pf <- sum(adjpfprob*vn) * timestep
73+
os <- sum(adjosprob*vn) * timestep
74+
# Return values
75+
return(list(pf=pf, pd=os-pf, os=os))
76+
}
77+
78+
#' Discretized Restricted Mean Duration calculation for State Transition Model Clock Forward structure
79+
#' Calculate restricted mean duration (RMD) in PF, PD and OS states under a State Transition Model Clock Forward structure.
80+
#' @inherit drmd_psm params return
81+
#' @seealso [drmd_psm()] [drmd_stm_cr()]
82+
#' @export
83+
#' @examples
84+
#' # Create dataset and fit survival models (splines)
85+
#' bosonc <- create_dummydata("flexbosms")
86+
#' fits <- fit_ends_mods_spl(bosonc)
87+
#' # Pick out best distribution according to min AIC
88+
#' params <- list(
89+
#' ppd = find_bestfit_spl(fits$ppd, "aic")$fit,
90+
#' ttp = find_bestfit_spl(fits$ttp, "aic")$fit,
91+
#' pfs = find_bestfit_spl(fits$pfs, "aic")$fit,
92+
#' os = find_bestfit_spl(fits$os, "aic")$fit,
93+
#' pps_cf = find_bestfit_spl(fits$pps_cf, "aic")$fit,
94+
#' pps_cr = find_bestfit_spl(fits$pps_cr, "aic")$fit
95+
#' )
96+
#' drmd_stm_cf(dpam=params)
97+
#' # Add a lifetable constraint
98+
#' ltable <- tibble::tibble(lttime=0:20, lx=1-lttime*0.05)
99+
#' drmd_stm_cf(dpam=params, lifetable=ltable)
100+
drmd_stm_cf <- function(dpam, Ty=10, discrate=0, lifetable=NA, timestep=1) {
101+
# Declare local variables
102+
Tw <- tvec <- ppd.ts <- ttp.ts <- sppd <- sttp <- sos <- NULL
103+
adjsppd <- adjos <- vn <- pf <- os <- NULL
104+
# Time horizon in weeks (ceiling)
105+
Tw <- convert_yrs2wks(Ty)
106+
# Create time vector, with half-cycle addition
107+
tvec <- timestep*(1:floor(Tw/timestep)) + timestep/2
108+
# Pull out type and spec for PPD and TTP
109+
ppd.ts <- convert_fit2spec(dpam$ppd)
110+
ttp.ts <- convert_fit2spec(dpam$ttp)
111+
# Calculate S_PPD, S_TTP and S_OS
112+
sppd <- tvec |> purrr::map_dbl(~calc_surv(.x, ppd.ts$type, ppd.ts$spec))
113+
sttp <- tvec |> purrr::map_dbl(~calc_surv(.x, ttp.ts$type, ttp.ts$spec))
114+
# Next line is the difference with STM-CR
115+
sos <- prob_os_stm_cf(tvec, dpam)
116+
# Apply constraints to S_PPD and S_OS
117+
adjsppd <- constrain_survprob(sppd, lifetable=lifetable, timevec=tvec)
118+
adjos <- constrain_survprob(sos, lifetable=lifetable, timevec=tvec)
119+
# Discount factor
120+
vn <- (1+discrate)^(-convert_wks2yrs(tvec+timestep/2))
121+
# Calculate RMDs
122+
pf <- sum(sttp*adjsppd*vn) * timestep
123+
os <- sum(adjos*vn) * timestep
124+
# Return values
125+
return(list(pf=pf, pd=os-pf, os=os))
126+
}
127+
128+
#' Discretized Restricted Mean Duration calculation for State Transition Model Clock Reset structure
129+
#' Calculate restricted mean duration (RMD) in PF, PD and OS states under a State Transition Model Clock Reset structure.
130+
#' @inherit drmd_psm params return
131+
#' @seealso [drmd_stm_cf()] [drmd_psm()]
132+
#' @export
133+
#' @examples
134+
#' # Create dataset and fit survival models (splines)
135+
#' bosonc <- create_dummydata("flexbosms")
136+
#' fits <- fit_ends_mods_spl(bosonc)
137+
#' # Pick out best distribution according to min AIC
138+
#' params <- list(
139+
#' ppd = find_bestfit_spl(fits$ppd, "aic")$fit,
140+
#' ttp = find_bestfit_spl(fits$ttp, "aic")$fit,
141+
#' pfs = find_bestfit_spl(fits$pfs, "aic")$fit,
142+
#' os = find_bestfit_spl(fits$os, "aic")$fit,
143+
#' pps_cf = find_bestfit_spl(fits$pps_cf, "aic")$fit,
144+
#' pps_cr = find_bestfit_spl(fits$pps_cr, "aic")$fit
145+
#' )
146+
#' drmd_stm_cr(dpam=params)
147+
#' # Add a lifetable constraint
148+
#' ltable <- tibble::tibble(lttime=0:20, lx=1-lttime*0.05)
149+
#' drmd_stm_cr(dpam=params, lifetable=ltable)
150+
drmd_stm_cr <- function(dpam, Ty=10, discrate=0, lifetable=NA, timestep=1) {
151+
# Declare local variables
152+
Tw <- tvec <- ppd.ts <- ttp.ts <- sppd <- sttp <- sos <- NULL
153+
adjsppd <- adjos <- vn <- pf <- os <- NULL
154+
# Time horizon in weeks (ceiling)
155+
Tw <- convert_yrs2wks(Ty)
156+
# Create time vector, with half-cycle addition
157+
tvec <- timestep*(1:floor(Tw/timestep)) + timestep/2
158+
# Pull out type and spec for PPD and TTP
159+
ppd.ts <- convert_fit2spec(dpam$ppd)
160+
ttp.ts <- convert_fit2spec(dpam$ttp)
161+
# Calculate S_PPD, S_TTP and S_OS
162+
sppd <- tvec |> purrr::map_dbl(~calc_surv(.x, ppd.ts$type, ppd.ts$spec))
163+
sttp <- tvec |> purrr::map_dbl(~calc_surv(.x, ttp.ts$type, ttp.ts$spec))
164+
# Next line is the difference with STM-CF
165+
sos <- prob_os_stm_cr(tvec, dpam)
166+
# Apply constraints to S_PPD and S_OS
167+
adjsppd <- constrain_survprob(sppd, lifetable=lifetable, timevec=tvec)
168+
adjos <- constrain_survprob(sos, lifetable=lifetable, timevec=tvec)
169+
# Discount factor
170+
vn <- (1+discrate)^(-convert_wks2yrs(tvec+timestep/2))
171+
# Calculate RMDs
172+
pf <- sum(sttp*adjsppd*vn) * timestep
173+
os <- sum(adjos*vn) * timestep
174+
# Return values
175+
return(list(pf=pf, pd=os-pf, os=os))
176+
}
177+
178+

R/fitting-spl.R

+5-5
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ fit_mods_spl <- function(durn1, durn2=NA, evflag,
7575

7676
#' Fit multiple spline regressions to the multiple required endpoints
7777
#' @description Fits multiple survival regressions, according to the distributions stipulated, to the multiple endpoints required in fitting partitioned survival analysis, clock forward and clock reset semi-markov models.
78-
#' @param ds Dataset of patient level data. Must be a tibble with columns named:
78+
#' @param simdat Dataset of patient level data. Must be a tibble with columns named:
7979
#' - ptid: patient identifier
8080
#' - pfs.durn: duration of PFS from baseline
8181
#' - pfs.flag: event flag for PFS (=1 if progression or death occurred, 0 for censoring)
@@ -98,18 +98,18 @@ fit_mods_spl <- function(durn1, durn2=NA, evflag,
9898
#' # Create dataset in suitable form using bos dataset from the flexsurv package
9999
#' bosonc <- create_dummydata("flexbosms")
100100
#' fit_ends_mods_spl(bosonc, expvar=bosonc$ttp.durn)
101-
fit_ends_mods_spl <- function(ds,
101+
fit_ends_mods_spl <- function(simdat,
102102
knot_set=1:3,
103103
scale_set=c("hazard", "odds", "normal"),
104104
expvar = NA
105105
) {
106106
# Declare local variables
107-
dspps <- pps.durn <- fits.ppd <- fits.ttp <- fits.pfs <- NULL
107+
ds <- dspps <- pps.durn <- fits.ppd <- fits.ttp <- fits.pfs <- NULL
108108
fits.os <- fits.pps_cf <- fits.pps_cr <- NULL
109109
# Derive additional fields, as with regular function
110-
ds <- create_extrafields(ds, cuttime=0)
110+
ds <- create_extrafields(simdat, cuttime=0)
111111
# For PPS analysis, require there to be a known progression event, plus a positive PPS
112-
dspps <- ds |> dplyr::filter(pps.durn>0, ttp.flag==1)
112+
dspps <- ds |> dplyr::filter(.data$pps.durn>0, .data$ttp.flag==1)
113113
# Captures lists of fitted models to each endpoint
114114
fits.ppd <- fit_mods_spl(durn1 = ds$tzero,
115115
durn2 = ds$ppd.durn,

R/fitting.R

+5-5
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ create_extrafields <- function(ds, cuttime=0){
133133

134134
#' Fit multiple parametric survival regressions to the multiple required endpoints
135135
#' @description Fits multiple parametric survival regressions, according to the distributions stipulated, to the multiple endpoints required in fitting partitioned survival analysis, clock forward and clock reset semi-markov models.
136-
#' @param ds Dataset of patient level data. Must be a tibble with columns named:
136+
#' @param simdat Dataset of patient level data. Must be a tibble with columns named:
137137
#' - ptid: patient identifier
138138
#' - pfs.durn: duration of PFS from baseline
139139
#' - pfs.flag: event flag for PFS (=1 if progression or death occurred, 0 for censoring)
@@ -159,7 +159,7 @@ create_extrafields <- function(ds, cuttime=0){
159159
#' @examples
160160
#' bosonc <- create_dummydata("flexbosms")
161161
#' fit_ends_mods_par(bosonc, expvar=bosonc$ttp.durn)
162-
fit_ends_mods_par <- function(ds,
162+
fit_ends_mods_par <- function(simdat,
163163
cuttime = 0,
164164
ppd.dist = c("exp", "weibullPH", "llogis", "lnorm", "gamma", "gompertz"),
165165
ttp.dist = c("exp", "weibullPH", "llogis", "lnorm", "gamma", "gompertz"),
@@ -169,12 +169,12 @@ fit_ends_mods_par <- function(ds,
169169
pps_cr.dist = c("exp", "weibullPH", "llogis", "lnorm", "gamma", "gompertz"),
170170
expvar = NA) {
171171
# Declare local variables
172-
dspps <- pps.durn <- NULL
172+
ds <- dspps <- pps.durn <- NULL
173173
fits.ppd <- fits.ttp <- fits.pfs <- fits.os <- fits.pps_cf <- fits.pps_cr <- NULL
174174
# Derive additional fields, as with regular function
175-
ds <- create_extrafields(ds, cuttime)
175+
ds <- create_extrafields(simdat, cuttime)
176176
# For PPS analysis, require there to be a known progression event, plus a positive PPS
177-
dspps <- ds |> dplyr::filter(pps.durn>0, ttp.flag==1)
177+
dspps <- ds |> dplyr::filter(.data$pps.durn>0, .data$ttp.flag==1)
178178
# Captures lists of fitted models to each endpoint
179179
fits.ppd <- fit_mods_par(durn1 = ds$tzero,
180180
durn2 = ds$ppd.durn,

R/ltablesurv.R

+34-3
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@
3434
#' - `geom`: Geometric mean, where interpolation is required between measured values
3535
#' @seealso [psm3mkv::vlookup]
3636
vonelookup <- function(oneindexval, indexvec, valvec, method="geom") {
37+
if (is.na(oneindexval)) return(NA) # Return NA rather than error if index value lookup is NA
3738
if (oneindexval<min(indexvec)) stop("Lookup value is below range of lookup table")
3839
if (oneindexval>max(indexvec)) stop("Lookup value is above range of lookup table")
39-
# stopifnot(oneindexval >= min(indexvec), oneindexval<=max(indexvec))
4040
loc <- indexrange <- valrange <- NULL
4141
# Location of index values
4242
loc <- match(1, (oneindexval>=indexvec)*(oneindexval<=dplyr::lead(indexvec)))
@@ -110,12 +110,12 @@ calc_ltsurv <- function(looktime, lifetable=NA){
110110
#' @export
111111
#' @examples
112112
#' ltable <- tibble::tibble(lttime=0:10, lx=10-(0:10))
113-
#' calc_ltsurv(c(2, 2.5, 9.3), ltable)
113+
#' calc_ltdens(c(2, 2.5, 9.3), ltable)
114114
calc_ltdens <- function(looktime, lifetable=NA){
115115
if (!is.data.frame(lifetable)) stop("Lifetable must be specified")
116116
if (lifetable$lttime[1]!=0) stop("Lifetable must run from time zero")
117117
# Floor time from lifetable
118-
tlo <- vlookup(looktime, lifetable$lttime, lifetable$lttime, meth="floor")
118+
tlo <- vlookup(looktime, lifetable$lttime, lifetable$lttime, method="floor")
119119
pos <- match(tlo, lifetable$lttime)
120120
# Pick out useful lx values
121121
lx0 <- lifetable$lx[1]
@@ -163,4 +163,35 @@ calc_ex <- function(Ty=10, lifetable, discrate=0) {
163163
ex_y = ex,
164164
ex_w = convert_yrs2wks(ex),
165165
calcs = res1)
166+
}
167+
168+
#' Constrain survival probabilities according to hazards in a lifetable
169+
#' Recalculated constrained survival probabilities (by week) as the lower of the original unadjusted survival probability and the survival implied by the given lifetable (assumed indexed as years).
170+
#' @param survprob (Unconstrained) survival probability value or vector
171+
#' @param lifetable Lifetable
172+
#' @param timevec Vector of times corresponding with survival probabilities above
173+
#' @return Vector of constrained survival probabilities
174+
#' @export
175+
#' @examples
176+
#' ltable <- tibble::tibble(lttime=0:20, lx=c(1,0.08,0.05,0.03,0.01,rep(0,16)))
177+
#' survprob <- c(1,0.5,0.4,0.2,0)
178+
#' constrain_survprob(survprob, lifetable=ltable)
179+
#' timevec <- 100*(0:4)
180+
#' constrain_survprob(survprob, lifetable=ltable, timevec=timevec)
181+
constrain_survprob <- function(survprob, lifetable, timevec=0:(length(survprob)-1)) {
182+
# Check lifetable exists or return survprob
183+
if (!is.data.frame(lifetable)) {return(survprob)}
184+
# Vector of lifetables
185+
lxprob <- calc_ltsurv(convert_wks2yrs(timevec), lifetable)
186+
# Length of survprob
187+
N <- length(survprob)
188+
# Cycle through each element
189+
adjsurv <- slx <- sprob <- rep(NA, N)
190+
adjsurv[1] <- survprob[1]
191+
for (i in 2:N) {
192+
slx[i] <- ifelse(lxprob[i-1]==0, 1, lxprob[i]/lxprob[i-1])
193+
sprob[i] <- ifelse(survprob[i-1]==0, 1, survprob[i]/survprob[i-1])
194+
adjsurv[i] <- adjsurv[i-1] * pmin(slx[i], sprob[i])
195+
}
196+
return(adjsurv)
166197
}

0 commit comments

Comments
 (0)