Skip to content

Commit 2cc1420

Browse files
authored
Merge pull request #45 from Merck/postcranrel
Some developments following the CRAN release
2 parents 2a488bd + 438764a commit 2cc1420

12 files changed

+395
-38
lines changed

DESCRIPTION

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: psm3mkv
22
Title: Evaluate Partitioned Survival and State Transition Models
3-
Version: 0.3.1
3+
Version: 0.3.1.9000
44
Authors@R: c(
55
person("Dominic", "Muston", , "[email protected]", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-4876-7940")),
@@ -10,16 +10,18 @@ Description: Fits and evaluates three-state partitioned survival analyses
1010
(PartSAs) and Markov models (clock forward or clock reset) to
1111
progression and overall survival data typically collected in oncology clinical trials. These model structures are typically considered in
1212
cost-effectiveness modeling in advanced/metastatic cancer indications.
13-
Muston (2024). "Informing structural assumptions for three state oncology cost-effectiveness models through model efficiency and fit". Applied Health Economics and Health Policy. In press.
13+
Muston (2024). "Informing structural assumptions for three state oncology cost-effectiveness models through model efficiency and fit". Applied Health Economics and Health Policy. DOI 10.1007/s40258-024-00884-2
1414
License: GPL (>= 3)
1515
URL: https://merck.github.io/psm3mkv/, https://github.com/Merck/psm3mkv
1616
BugReports: https://github.com/Merck/psm3mkv/issues
1717
Encoding: UTF-8
1818
Depends: R (>= 4.1.0)
1919
Imports:
20+
admiral,
2021
dplyr,
2122
flexsurv,
2223
ggplot2,
24+
pharmaverseadam,
2325
purrr,
2426
rlang,
2527
SimplicialCubature,

NAMESPACE

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ export(calc_haz_psm)
55
export(calc_likes)
66
export(calc_rmd)
77
export(calc_surv_psmpps)
8+
export(check_consistent_pfs)
9+
export(compare_psm_likes)
810
export(constrain_survprob)
911
export(create_dummydata)
1012
export(create_extrafields)

NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1+
# psm3mkv (development version)
2+
13
# psm3mkv 0.3.1
24

5+
- Submission to CRAN, including changes requested by CRAN
6+
37
# psm3mkv 0.3.0
48

9+
- First submission to CRAN, not accepted
10+
511
# psm3mkv 0.2.2 (4 May 2024)
612

713
Several minor changes to ready the package for CRAN.

R/datasets.R

+131-3
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,29 @@
2121
# These functions are used to create dummy datasets to illustrate package use
2222
# create_dummydata
2323
# create_dummydata_survcan
24+
# create_dummydata_pharmaonc
2425
# create_dummydata_flexbosms
2526
#
2627
# ======================================
2728

2829
#' Create dummy dataset for illustration
2930
#' @description Create dummy dataset to illustrate [psm3mkv]
3031
#' @param dsname Dataset name, as follows:
31-
#' * 'flexbosms' provides a dataset based on [flexsurv::bosms3()]. This contains all the fields necessary for [psm3mkv]. Durations have been converted from months in the original dataset to weeks.
32-
#' * 'survcan' provides a dataset based on [survival::cancer()]. This contains the necessary ID and overall survival fields only. Durations have been converted from days in the original dataset to weeks. You will additionally need to supply PFS and TTP data (fields pfs.durn, pfs.flag, ttp.durn and ttp.flag) to use [psm3mkv].
32+
#' * `flexbosms` provides a dataset based on [flexsurv::bosms3()]. This contains all the fields necessary for [psm3mkv]. Durations have been converted from months in the original dataset to weeks.
33+
#' * `pharmaonc` provides a dataset based on [pharmaverseadam::adsl] and [pharmaverseadam::adrs_onco] to demonstrate how this package can be used with ADaM ADTTE datasets.
34+
#' * `survcan` provides a dataset based on [survival::cancer()]. This contains the necessary ID and overall survival fields only. Durations have been converted from days in the original dataset to weeks. You will additionally need to supply PFS and TTP data (fields pfs.durn, pfs.flag, ttp.durn and ttp.flag) to use [psm3mkv].
3335
#' @return Tibble dataset, for use with [psm3mkv] functions
3436
#' @export
3537
#' @examples
3638
#' create_dummydata("survcan") |> head()
3739
#' create_dummydata("flexbosms") |> head()
40+
#' create_dummydata("pharmaonc") |> head()
3841
create_dummydata <- function(dsname) {
42+
dsname <- stringr::str_to_lower(dsname)
3943
if (dsname=="survcan") {create_dummydata_survcan()}
4044
else if (dsname=="flexbosms") {create_dummydata_flexbosms()}
41-
else {stop("Incorrect dataset specified. Must be survcan or flexbosms.")}
45+
else if (dsname=="pharmaonc") {create_dummydata_pharmaonc()}
46+
else {stop("Incorrect dataset specified. Must be survcan, flexbosms or pharmaonc.")}
4247
}
4348

4449
#' Create survcan dummy dataset for illustration
@@ -103,3 +108,126 @@ create_dummydata_flexbosms <- function() {
103108
attr(ds$ttp.flag, "label") <- "Event flag for TTP (1=event, 0=censor)"
104109
return(ds)
105110
}
111+
112+
#' Create pharmaonc dataset for illustration
113+
#' @description Create 'pharmaonc' dummy dataset to illustrate [psm3mkv]. This dataset is derived from `pharmaverse::adsl` and `pharmaverse::adrs_onco`. Overall Survival and Time To Progression are derived using `admiral::derive_param_tte()`, then durations are calculated in weeks.
114+
#' @return Tibble dataset, for use with [psm3mkv] functions
115+
#' @seealso [create_dummydata()]
116+
#' @importFrom rlang .data
117+
#' @noRd
118+
create_dummydata_pharmaonc <- function() {
119+
# Create local variables
120+
DTHFL <- DTHDT <- LSTALVDT <- AVALC <- ADT <- ASEQ <- RANDDT <- STARTDT <- NULL
121+
CNSR <- USUBJID <- PARAMCD <- DURN <- EVFLAG <- DURN_OS <- EVFLAG_OS <- DURN_TTP <- EVFLAG_TTP <- NULL
122+
ttp.durn <- os.durn <- ttp.flag <- os.flag <- ptid <- pfs.durn <- pfs.flag <- NULL
123+
# Obtain ADSL and ADRS datsets from pharmaverseadam
124+
adsl <- pharmaverseadam::adsl
125+
adrs <- pharmaverseadam::adrs_onco
126+
# Define event: death
127+
death <- admiral::event_source(
128+
dataset_name = "adsl",
129+
filter = DTHFL == "Y",
130+
date = DTHDT,
131+
set_values_to = admiral::exprs(
132+
EVNTDESC = "DEATH",
133+
SRCDOM = "ADSL",
134+
SRCVAR = "DTHDT"
135+
)
136+
)
137+
# Define event: last date alive
138+
last_alive_dt <- admiral::censor_source(
139+
dataset_name = "adsl",
140+
date = LSTALVDT,
141+
set_values_to = admiral::exprs(
142+
EVNTDESC = "LAST DATE KNOWN ALIVE",
143+
SRCDOM = "ADSL",
144+
SRCVAR = "LSTALVDT"
145+
)
146+
)
147+
# Define event: progression
148+
pd <- admiral::event_source(
149+
dataset_name = "adrs",
150+
filter = AVALC == "PD",
151+
date = ADT,
152+
set_values_to = admiral::exprs(
153+
EVENTDESC = "PD",
154+
SRCDOM = "ADRS",
155+
SRCVAR = "ADTM",
156+
SRCSEQ = ASEQ
157+
)
158+
)
159+
# Start creating dataset
160+
# Derive OS date
161+
admiral::derive_param_tte(
162+
dataset_adsl = adsl,
163+
start_date = RANDDT,
164+
event_conditions = list(death),
165+
censor_conditions = list(last_alive_dt),
166+
source_datasets = list(adsl = adsl, adrs = adrs),
167+
set_values_to = admiral::exprs(PARAMCD = "OS", PARAM = "Overall Survival")
168+
) |>
169+
# Derive TTP date
170+
admiral::derive_param_tte(
171+
dataset_adsl = adsl,
172+
start_date = RANDDT,
173+
event_conditions = list(pd),
174+
censor_conditions = list(last_alive_dt),
175+
source_datasets = list(adsl = adsl, adrs = adrs),
176+
set_values_to = admiral::exprs(PARAMCD = "TTP", PARAM = "Time to Progression")
177+
) |>
178+
# Derive durations of TTP and PFS
179+
dplyr::mutate(
180+
DURN = admiral::compute_duration(
181+
start_date = STARTDT,
182+
end_date = ADT,
183+
trunc_out = FALSE,
184+
out_unit = "weeks",
185+
add_one = FALSE
186+
),
187+
EVFLAG = 1-CNSR
188+
) |>
189+
# Keep only necessary fields
190+
dplyr::select(USUBJID, PARAMCD, DURN, EVFLAG) |>
191+
# Pivot wide the duration and event flag fields
192+
tidyr::pivot_wider(
193+
id_cols = "USUBJID",
194+
names_from = "PARAMCD",
195+
values_from = c("DURN", "EVFLAG")
196+
) |>
197+
# Rename to required field names
198+
dplyr::rename(
199+
ptid = USUBJID,
200+
os.durn = DURN_OS,
201+
os.flag = EVFLAG_OS,
202+
ttp.durn = DURN_TTP,
203+
ttp.flag = EVFLAG_TTP
204+
) |>
205+
# Add a PFS field
206+
dplyr::mutate(
207+
pfs.durn = pmin(ttp.durn, os.durn),
208+
pfs.flag = 1-(1-ttp.flag)*(1-os.flag)
209+
) |>
210+
dplyr::select(ptid, ttp.durn, ttp.flag, pfs.durn, pfs.flag, os.durn, os.flag)
211+
}
212+
213+
#' Check consistency of PFS definition
214+
#' Check that PFS is defined consistently with TTP and OS in a dataset. This convenience function compares `pfs.durn` with the lower of `ttp.durn` and `os.durn`, and checks that the event field `pfs.flag` is consistent with `ttp.flag` and `os.flag` (is 1 when either `ttp.flag` or `os.flag` is one).
215+
#' @param ds Tibble of complete patient-level dataset
216+
#' - `ttp.durn`, `pfs.durn`, and `os.durn` are the durations of TTP (time to progression), PFS (progression-free survival), and OS (overall survival).
217+
#' - `ttp.flag`, `pfs.flag`, and `os.flag`, and `pps.flag` are event flag indicators for TTP, PFS, and OS respectively (1=event, 0=censoring).
218+
#' @export
219+
#' @return List containing:
220+
#' - `durn`: Logical vector comparing expected and actual PFS durations
221+
#' - `flag`: Logical vector comparing expected and actual PFS event flags
222+
#' - `all`: Single logical value of TRUE if all durations and flags match as expected, FALSE otherwise
223+
#' @export
224+
#' @examples
225+
#' ponc <- create_dummydata("pharmaonc")
226+
#' check_consistent_pfs(ponc)
227+
check_consistent_pfs <- function(ds) {
228+
durn <- flag <- NULL
229+
durn <- ds$pfs.durn==pmin(ds$ttp.durn, ds$os.durn)
230+
flag <- ds$pfs.flag==1-(1-ds$ttp.flag)*(1-ds$os.flag)
231+
list(durn=durn, flag=flag, all=all(c(durn,flag)))
232+
}
233+

R/likepsm.R

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#' Compare likelihoods of PSMs
2+
#'
3+
#' Compare the total log-likelihood values for the patient-level dataset after fitting PSM-simple and PSM-complex models to each combination of endpoint distributions
4+
#' @inheritParams calc_allrmds
5+
#' @param fitslist List of distribution fits to relevant endpoints, after calling `fit_ends_mods_par()` or `fit_ends_mods_spl()`
6+
#' @importFrom rlang .data
7+
#' @return List containing
8+
#' - `res`: Dataset of calculation results for each model
9+
#' - `ind_aic`: Set of statistical distributions for TTP, PFS and OS which individually fit each endpoint with the best (lowest) AIC
10+
#' - `ind_bic`: Set of statistical distributions for TTP, PFS and OS which individually fit each endpoint with the best (lowest) BIC
11+
#' - `jt_aic`: Set of statistical distributions for TTP, PFS and OS which overall fit a PSM with the best (lowest) AIC
12+
#' - `jt_bic`: Set of statistical distributions for TTP, PFS and OS which overall fit a PSM with the best (lowest) BIC
13+
#' @export
14+
#' @examples
15+
#' # Fit parametric distributions to a dataset
16+
#' bosonc <- create_dummydata("flexbosms")
17+
#' parfits <- fit_ends_mods_par(bosonc)
18+
#' # Present comparison of likelihood calculations
19+
#' compare_psm_likes(bosonc, parfits)
20+
compare_psm_likes <- function(ptdata, fitslist, cuttime=0) {
21+
# Check that fitslist is a list of 6 endpoints
22+
if (length(fitslist)!=6) {stop("The list provided to fitslist must contain all 6 endpoints")}
23+
# Create local variables
24+
eps <- ndists <- aic_indbest <- bic_indbest <- bests <- res <- thisfit <- aic_jtbest <- bic_jtbest <- NULL
25+
ll <- rank_aic <- ttp_meth <- pfs_dist <- os_dist <- rank_bic <- NULL
26+
# TTP, PFS and OS are endpoints 1, 3 and 4
27+
eps <- c(1, 3, 4)
28+
# Number of distributions for each endpoint
29+
ndists <- eps |>
30+
purrr::map_vec(~length(fitslist[[.x]]))
31+
# Best fits for each endpoint - AIC
32+
aic_indbest <- eps |>
33+
purrr::map_vec(~find_bestfit(fitslist[[.x]], crit="aic")$fit$dlist$name)
34+
# Best fits for each endpoint - BIC
35+
bic_indbest <- eps |>
36+
purrr::map_vec(~find_bestfit(fitslist[[.x]], crit="bic")$fit$dlist$name)
37+
# Join as a tibble
38+
bests <- rbind(aic_indbest, bic_indbest)
39+
bests <- tibble::tibble(
40+
ttp_meth = bests[,1],
41+
pfs_dist = bests[,2],
42+
os_dist = bests[,3],
43+
meth = "ind",
44+
ic = c("aic", "bic")
45+
)
46+
# Create results table for each model combination
47+
res <- tibble::tibble(
48+
id = 1:(ndists[3]*ndists[2]*(ndists[1]+1)),
49+
ttp_meth = NA,
50+
pfs_dist = NA,
51+
os_dist = NA,
52+
ll = NA,
53+
npar = NA,
54+
npts = fitslist$os[[1]]$result$N
55+
)
56+
# Create a safe calculation of the PSM-simple likelihood (returns NA on error)
57+
slike_simple <- purrr::possibly(
58+
~calc_likes_psm_simple(
59+
ptdata=ptdata,
60+
dpam=.x,
61+
cuttime=cuttime)$ll[2],
62+
otherwise = NA)
63+
# Create a safe calculation of the PSM-complex likelihood (returns NA on error)
64+
slike_complex <- purrr::possibly(
65+
~calc_likes_psm_complex(
66+
ptdata=ptdata,
67+
dpam=.x,
68+
cuttime=cuttime)$ll[2],
69+
otherwise = NA)
70+
# Compute results for PSM-simple models
71+
message("Calculating PSM simple")
72+
thisfit <- list(ttp=NA, pfs=NA, os=NA)
73+
for (p in 1:ndists[2]) {
74+
thisfit$pfs <- fitslist$pfs[[p]]$result
75+
for (o in 1:ndists[3]) {
76+
thisfit$os <- fitslist$os[[o]]$result
77+
resrow <- (p-1)*ndists[3] + o
78+
res$ttp_meth[resrow] <- "simple"
79+
res$pfs_dist[resrow] <- thisfit$pfs$dlist$name
80+
res$os_dist[resrow] <- thisfit$os$dlist$name
81+
res$ll[resrow] <- slike_simple(thisfit)
82+
res$npar[resrow] <- thisfit$pfs$npars + thisfit$os$npars + 1
83+
}
84+
}
85+
# Compute results for PSM-complex models
86+
message("Calculating PSM complex")
87+
thisfit <- list(ttp=NA, pfs=NA, os=NA)
88+
for (t in 1:ndists[1]) {
89+
thisfit$ttp <- fitslist$ttp[[t]]$result
90+
for (p in 1:ndists[2]) {
91+
thisfit$pfs <- fitslist$pfs[[p]]$result
92+
for (o in 1:ndists[3]) {
93+
thisfit$os <- fitslist$os[[o]]$result
94+
resrow <- t*ndists[3]*ndists[2] + (p-1)*ndists[3] + o
95+
res$ttp_meth[resrow] <- thisfit$ttp$dlist$name
96+
res$pfs_dist[resrow] <- thisfit$pfs$dlist$name
97+
res$os_dist[resrow] <- thisfit$os$dlist$name
98+
res$ll[resrow] <- slike_complex(thisfit)
99+
res$npar[resrow] <- thisfit$ttp$npars + thisfit$pfs$npars + thisfit$os$npars
100+
}
101+
}
102+
}
103+
# Set log-likelihood values to NA if if cannot be calculated (=-Inf)
104+
res$ll[res$ll==-Inf] <- NA
105+
# Add AIC and BIC, with ranks
106+
message("Wrapping up")
107+
res <- res |>
108+
dplyr::mutate(
109+
aic = 2*.data$npar-2*ll,
110+
bic = .data$npar*log(.data$npts)-2*ll,
111+
rank_aic = rank(.data$aic),
112+
rank_bic = rank(.data$bic),
113+
best_aic = 0,
114+
best_bic = 0
115+
)
116+
# Identify best AIC and best BIC model
117+
res$best_aic[res$ttp_meth==aic_indbest[1] & res$pfs_dist==aic_indbest[2] & res$os_dist==aic_indbest[3]] <- 1
118+
res$best_bic[res$ttp_meth==bic_indbest[1] & res$pfs_dist==bic_indbest[2] & res$os_dist==bic_indbest[3]] <- 1
119+
# Identify best distributions for overall AIC and BIC
120+
aic_jtbest <- res |>
121+
dplyr::filter(rank_aic==1) |>
122+
dplyr::select(ttp_meth, pfs_dist, os_dist) |>
123+
dplyr::mutate(meth="joint", ic="aic")
124+
bic_jtbest <- res |>
125+
dplyr::filter(rank_bic==1) |>
126+
dplyr::select(ttp_meth, pfs_dist, os_dist) |>
127+
dplyr::mutate(meth="joint", ic="aic")
128+
# Join together
129+
bests <- bests |>
130+
tibble::add_row(aic_jtbest) |>
131+
tibble::add_row(bic_jtbest)
132+
# Return
133+
return(list(results=res, bests=bests))
134+
}

R/ppdpps.R

+8-15
Original file line numberDiff line numberDiff line change
@@ -71,22 +71,15 @@ calc_haz_psm <- function(timevar, ptdata, dpam, psmtype) {
7171
# OS
7272
hos <- calc_haz(timevar, survobj=dpam$os)
7373
sos <- calc_surv(timevar, survobj=dpam$os)
74-
# TTP complex
75-
http_complex <- calc_haz(timevar, survobj=dpam$ttp)
76-
# TTP simple
77-
ne_pfs <- sum(ptdata$pfs.flag)
78-
ne_ttp <- sum(ptdata$ttp.flag)
79-
progfrac <- max(0, min(1, ne_ttp/ne_pfs))
80-
http_simple <- progfrac*hpf
81-
# TTP
82-
typeflag <- ifelse(psmtype=="simple", 1, 0)
83-
http <- http_simple*typeflag + http_complex*(1-typeflag)
74+
# TTP depends on psmtype
75+
if (psmtype=="simple") {
76+
http <- hpf * max(0, min(1, sum(ptdata$ttp.flag) / sum(ptdata$pfs.flag)))
77+
} else {
78+
http <- calc_haz(timevar, survobj=dpam$ttp)
79+
}
8480
# PPD
85-
hppd_unadj <- hpf-http
86-
hppd_simple <- pmax(0, pmin((1-progfrac)*hpf, sos*hos/spf))
87-
hppd_complex <- pmax(0, pmin(hpf-http, sos*hos/spf))
88-
hppd <- hppd_simple*typeflag + hppd_complex*(1-typeflag)
89-
# PPS
81+
hppd <- pmax(0, pmin(hpf-http, sos*hos/spf))
82+
# PPS, capped at 5000
9083
hpps_unadj <- (sos*hos-spf*hppd)/(sos-spf)
9184
hpps <- pmax(0, pmin(hpps_unadj, 5000))
9285
hpps[timevar==0] <- 0

0 commit comments

Comments
 (0)