Merck
diff --git a/‎DESCRIPTION
+4-2 b/‎DESCRIPTION
+4-2
diff --git a/‎NAMESPACE
+2 b/‎NAMESPACE
+2
diff --git a/‎NEWS.md
+6 b/‎NEWS.md
+6
diff --git a/‎R/datasets.R
+131-3 b/‎R/datasets.R
+131-3
diff --git a/‎R/likepsm.R
+134 b/‎R/likepsm.R
+134
diff --git a/‎R/ppdpps.R
+8-15 b/‎R/ppdpps.R
+8-15
@@ -1,6 +1,6 @@
 Package: psm3mkv
 Title: Evaluate Partitioned Survival and State Transition Models
-Version: 0.3.1
+Version: 0.3.1.9000
 Authors@R: c(
     person("Dominic", "Muston", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-4876-7940")),
@@ -10,16 +10,18 @@ Description: Fits and evaluates three-state partitioned survival analyses
     (PartSAs) and Markov models (clock forward or clock reset) to
     progression and overall survival data typically collected in oncology clinical trials. These model structures are typically considered in
     cost-effectiveness modeling in advanced/metastatic cancer indications.
-    Muston (2024). "Informing structural assumptions for three state oncology cost-effectiveness models through model efficiency and fit". Applied Health Economics and Health Policy. In press.
+    Muston (2024). "Informing structural assumptions for three state oncology cost-effectiveness models through model efficiency and fit". Applied Health Economics and Health Policy. DOI 10.1007/s40258-024-00884-2 
 License: GPL (>= 3)
 URL: https://merck.github.io/psm3mkv/, https://github.com/Merck/psm3mkv
 BugReports: https://github.com/Merck/psm3mkv/issues
 Encoding: UTF-8
 Depends: R (>= 4.1.0)
 Imports:
+    admiral,
     dplyr,
     flexsurv,
     ggplot2,
+    pharmaverseadam,
     purrr,
     rlang,
     SimplicialCubature,
 
@@ -5,6 +5,8 @@ export(calc_haz_psm)
 export(calc_likes)
 export(calc_rmd)
 export(calc_surv_psmpps)
+export(check_consistent_pfs)
+export(compare_psm_likes)
 export(constrain_survprob)
 export(create_dummydata)
 export(create_extrafields)
 
@@ -1,7 +1,13 @@
+# psm3mkv (development version)
+
 # psm3mkv 0.3.1
 
+- Submission to CRAN, including changes requested by CRAN
+
 # psm3mkv 0.3.0
 
+- First submission to CRAN, not accepted
+
 # psm3mkv 0.2.2 (4 May 2024)
 
 Several minor changes to ready the package for CRAN.
 
@@ -21,24 +21,29 @@
 # These functions are used to create dummy datasets to illustrate package use
 # create_dummydata
 # create_dummydata_survcan
+# create_dummydata_pharmaonc
 # create_dummydata_flexbosms
 #
 # ======================================
 
 #' Create dummy dataset for illustration
 #' @description Create dummy dataset to illustrate [psm3mkv]
 #' @param dsname Dataset name, as follows:
-#' * 'flexbosms' provides a dataset based on [flexsurv::bosms3()]. This contains all the fields necessary for [psm3mkv]. Durations have been converted from months in the original dataset to weeks.
-#' * 'survcan' provides a dataset based on [survival::cancer()]. This contains the necessary ID and overall survival fields only. Durations have been converted from days in the original dataset to weeks. You will additionally need to supply PFS and TTP data (fields pfs.durn, pfs.flag, ttp.durn and ttp.flag) to use [psm3mkv].
+#' * `flexbosms` provides a dataset based on [flexsurv::bosms3()]. This contains all the fields necessary for [psm3mkv]. Durations have been converted from months in the original dataset to weeks.
+#' * `pharmaonc` provides a dataset based on [pharmaverseadam::adsl] and [pharmaverseadam::adrs_onco] to demonstrate how this package can be used with ADaM ADTTE datasets.
+#' * `survcan` provides a dataset based on [survival::cancer()]. This contains the necessary ID and overall survival fields only. Durations have been converted from days in the original dataset to weeks. You will additionally need to supply PFS and TTP data (fields pfs.durn, pfs.flag, ttp.durn and ttp.flag) to use [psm3mkv].
 #' @return Tibble dataset, for use with [psm3mkv] functions
 #' @export
 #' @examples
 #' create_dummydata("survcan") |> head()
 #' create_dummydata("flexbosms") |> head()
+#' create_dummydata("pharmaonc") |> head()
 create_dummydata <- function(dsname) {
+  dsname <- stringr::str_to_lower(dsname)
   if (dsname=="survcan") {create_dummydata_survcan()}
   else if (dsname=="flexbosms") {create_dummydata_flexbosms()}
-  else {stop("Incorrect dataset specified. Must be survcan or flexbosms.")}
+  else if (dsname=="pharmaonc") {create_dummydata_pharmaonc()}
+  else {stop("Incorrect dataset specified. Must be survcan, flexbosms or pharmaonc.")}
 }
 
 #' Create survcan dummy dataset for illustration
@@ -103,3 +108,126 @@ create_dummydata_flexbosms <- function() {
   attr(ds$ttp.flag, "label") <- "Event flag for TTP (1=event, 0=censor)"
   return(ds)
 }
+
+#' Create pharmaonc dataset for illustration
+#' @description Create 'pharmaonc' dummy dataset to illustrate [psm3mkv]. This dataset is derived from `pharmaverse::adsl` and `pharmaverse::adrs_onco`. Overall Survival and Time To Progression are derived using `admiral::derive_param_tte()`, then durations are calculated in weeks.
+#' @return Tibble dataset, for use with [psm3mkv] functions
+#' @seealso [create_dummydata()]
+#' @importFrom rlang .data
+#' @noRd
+create_dummydata_pharmaonc <- function() {
+  # Create local variables
+  DTHFL <- DTHDT <- LSTALVDT <- AVALC <- ADT <- ASEQ <- RANDDT <- STARTDT <- NULL
+  CNSR <- USUBJID <- PARAMCD <- DURN <- EVFLAG <- DURN_OS <- EVFLAG_OS <- DURN_TTP <- EVFLAG_TTP <- NULL
+  ttp.durn <- os.durn <- ttp.flag <- os.flag <- ptid <- pfs.durn <- pfs.flag <- NULL
+  # Obtain ADSL and ADRS datsets from pharmaverseadam
+  adsl <- pharmaverseadam::adsl
+  adrs <- pharmaverseadam::adrs_onco
+  # Define event: death
+  death <- admiral::event_source(
+    dataset_name = "adsl",
+    filter = DTHFL == "Y",
+    date = DTHDT,
+    set_values_to = admiral::exprs(
+      EVNTDESC = "DEATH",
+      SRCDOM = "ADSL",
+      SRCVAR = "DTHDT"
+    )
+  )
+  # Define event: last date alive
+  last_alive_dt <- admiral::censor_source(
+    dataset_name = "adsl",
+    date = LSTALVDT,
+    set_values_to = admiral::exprs(
+      EVNTDESC = "LAST DATE KNOWN ALIVE",
+      SRCDOM = "ADSL",
+      SRCVAR = "LSTALVDT"
+    )
+  )
+  # Define event: progression
+  pd <- admiral::event_source(
+    dataset_name = "adrs",
+    filter = AVALC == "PD",
+    date = ADT,
+    set_values_to = admiral::exprs(
+      EVENTDESC = "PD",
+      SRCDOM = "ADRS",
+      SRCVAR = "ADTM",
+      SRCSEQ = ASEQ
+    )
+  )
+  # Start creating dataset
+  # Derive OS date
+  admiral::derive_param_tte(
+    dataset_adsl = adsl,
+    start_date = RANDDT,
+    event_conditions = list(death),
+    censor_conditions = list(last_alive_dt),
+    source_datasets = list(adsl = adsl, adrs = adrs),
+    set_values_to = admiral::exprs(PARAMCD = "OS", PARAM = "Overall Survival")
+  ) |>
+  # Derive TTP date
+    admiral::derive_param_tte(
+      dataset_adsl = adsl,
+      start_date = RANDDT,
+      event_conditions = list(pd),
+      censor_conditions = list(last_alive_dt),
+      source_datasets = list(adsl = adsl, adrs = adrs),
+      set_values_to = admiral::exprs(PARAMCD = "TTP", PARAM = "Time to Progression")
+    ) |>
+  # Derive durations of TTP and PFS
+    dplyr::mutate(
+      DURN = admiral::compute_duration(
+          start_date = STARTDT,
+          end_date = ADT,
+          trunc_out = FALSE,
+          out_unit = "weeks",
+          add_one = FALSE
+          ),
+      EVFLAG = 1-CNSR
+    ) |>
+  # Keep only necessary fields
+    dplyr::select(USUBJID, PARAMCD, DURN, EVFLAG) |>
+  # Pivot wide the duration and event flag fields
+    tidyr::pivot_wider(
+      id_cols = "USUBJID",
+      names_from = "PARAMCD",
+      values_from = c("DURN", "EVFLAG")
+    ) |>
+  # Rename to required field names
+    dplyr::rename(
+      ptid = USUBJID,
+      os.durn = DURN_OS,
+      os.flag = EVFLAG_OS,
+      ttp.durn = DURN_TTP,
+      ttp.flag = EVFLAG_TTP
+    ) |>
+  # Add a PFS field
+    dplyr::mutate(
+      pfs.durn = pmin(ttp.durn, os.durn),
+      pfs.flag = 1-(1-ttp.flag)*(1-os.flag)
+    ) |>
+    dplyr::select(ptid, ttp.durn, ttp.flag, pfs.durn, pfs.flag, os.durn, os.flag)
+}
+
+#' Check consistency of PFS definition
+#' Check that PFS is defined consistently with TTP and OS in a dataset. This convenience function compares `pfs.durn` with the lower of `ttp.durn` and `os.durn`, and checks that the event field `pfs.flag` is consistent with `ttp.flag` and `os.flag` (is 1 when either `ttp.flag` or `os.flag` is one).
+#' @param ds Tibble of complete patient-level dataset
+#' - `ttp.durn`, `pfs.durn`, and `os.durn` are the durations of TTP (time to progression), PFS (progression-free survival), and OS (overall survival).
+#' - `ttp.flag`, `pfs.flag`, and `os.flag`, and `pps.flag` are event flag indicators for TTP, PFS, and OS respectively (1=event, 0=censoring).
+#' @export
+#' @return List containing:
+#' - `durn`: Logical vector comparing expected and actual PFS durations
+#' - `flag`: Logical vector comparing expected and actual PFS event flags
+#' - `all`: Single logical value of TRUE if all durations and flags match as expected, FALSE otherwise
+#' @export
+#' @examples
+#' ponc <- create_dummydata("pharmaonc")
+#' check_consistent_pfs(ponc)
+check_consistent_pfs <- function(ds) {
+  durn <- flag <- NULL
+  durn <- ds$pfs.durn==pmin(ds$ttp.durn, ds$os.durn)
+  flag <- ds$pfs.flag==1-(1-ds$ttp.flag)*(1-ds$os.flag)
+  list(durn=durn, flag=flag, all=all(c(durn,flag)))
+}
+
@@ -0,0 +1,134 @@
+#' Compare likelihoods of PSMs
+#' 
+#' Compare the total log-likelihood values for the patient-level dataset after fitting PSM-simple and PSM-complex models to each combination of endpoint distributions
+#' @inheritParams calc_allrmds
+#' @param fitslist List of distribution fits to relevant endpoints, after calling `fit_ends_mods_par()` or `fit_ends_mods_spl()`
+#' @importFrom rlang .data
+#' @return List containing
+#' - `res`: Dataset of calculation results for each model
+#' - `ind_aic`: Set of statistical distributions for TTP, PFS and OS which individually fit each endpoint with the best (lowest) AIC
+#' - `ind_bic`: Set of statistical distributions for TTP, PFS and OS which individually fit each endpoint with the best (lowest) BIC
+#' - `jt_aic`:  Set of statistical distributions for TTP, PFS and OS which overall fit a PSM with the best (lowest) AIC
+#' - `jt_bic`:  Set of statistical distributions for TTP, PFS and OS which overall fit a PSM with the best (lowest) BIC
+#' @export
+#' @examples
+#' # Fit parametric distributions to a dataset
+#' bosonc <- create_dummydata("flexbosms")
+#' parfits <- fit_ends_mods_par(bosonc)
+#' # Present comparison of likelihood calculations
+#' compare_psm_likes(bosonc, parfits)
+compare_psm_likes <- function(ptdata, fitslist, cuttime=0) {
+  # Check that fitslist is a list of 6 endpoints
+  if (length(fitslist)!=6) {stop("The list provided to fitslist must contain all 6 endpoints")}
+  # Create local variables
+  eps <- ndists <- aic_indbest <- bic_indbest <- bests <- res <- thisfit <- aic_jtbest <- bic_jtbest <- NULL
+  ll <- rank_aic <- ttp_meth <- pfs_dist <- os_dist <- rank_bic <- NULL
+  # TTP, PFS and OS are endpoints 1, 3 and 4
+  eps <- c(1, 3, 4)
+  # Number of distributions for each endpoint
+  ndists <- eps |>
+    purrr::map_vec(~length(fitslist[[.x]]))
+  # Best fits for each endpoint - AIC
+  aic_indbest <- eps |>
+    purrr::map_vec(~find_bestfit(fitslist[[.x]], crit="aic")$fit$dlist$name)
+  # Best fits for each endpoint - BIC
+  bic_indbest <- eps |>
+    purrr::map_vec(~find_bestfit(fitslist[[.x]], crit="bic")$fit$dlist$name)
+  # Join as a tibble
+  bests <- rbind(aic_indbest, bic_indbest)
+  bests <- tibble::tibble(
+    ttp_meth = bests[,1],
+    pfs_dist = bests[,2],
+    os_dist = bests[,3],
+    meth = "ind",
+    ic = c("aic", "bic")
+  )
+  # Create results table for each model combination
+  res <- tibble::tibble(
+    id = 1:(ndists[3]*ndists[2]*(ndists[1]+1)),
+    ttp_meth = NA,
+    pfs_dist = NA,
+    os_dist = NA,
+    ll = NA,
+    npar = NA,
+    npts = fitslist$os[[1]]$result$N
+  )
+  # Create a safe calculation of the PSM-simple likelihood (returns NA on error)
+  slike_simple <- purrr::possibly(
+    ~calc_likes_psm_simple(
+      ptdata=ptdata,
+      dpam=.x,
+      cuttime=cuttime)$ll[2],
+    otherwise = NA)
+  # Create a safe calculation of the PSM-complex likelihood (returns NA on error)
+  slike_complex <- purrr::possibly(
+    ~calc_likes_psm_complex(
+      ptdata=ptdata,
+      dpam=.x,
+      cuttime=cuttime)$ll[2],
+    otherwise = NA)
+  # Compute results for PSM-simple models
+  message("Calculating PSM simple")
+  thisfit <- list(ttp=NA, pfs=NA, os=NA)
+  for (p in 1:ndists[2]) {
+    thisfit$pfs <- fitslist$pfs[[p]]$result
+    for (o in 1:ndists[3]) {
+      thisfit$os <- fitslist$os[[o]]$result
+      resrow <- (p-1)*ndists[3] + o
+      res$ttp_meth[resrow] <- "simple"
+      res$pfs_dist[resrow] <- thisfit$pfs$dlist$name
+      res$os_dist[resrow] <- thisfit$os$dlist$name
+      res$ll[resrow] <- slike_simple(thisfit)
+      res$npar[resrow] <- thisfit$pfs$npars + thisfit$os$npars + 1
+    }
+  }
+  # Compute results for PSM-complex models
+  message("Calculating PSM complex")
+  thisfit <- list(ttp=NA, pfs=NA, os=NA)
+  for (t in 1:ndists[1]) {
+    thisfit$ttp <- fitslist$ttp[[t]]$result
+    for (p in 1:ndists[2]) {
+      thisfit$pfs <- fitslist$pfs[[p]]$result
+      for (o in 1:ndists[3]) {
+        thisfit$os <- fitslist$os[[o]]$result
+        resrow <- t*ndists[3]*ndists[2] + (p-1)*ndists[3] + o
+        res$ttp_meth[resrow] <- thisfit$ttp$dlist$name
+        res$pfs_dist[resrow] <- thisfit$pfs$dlist$name
+        res$os_dist[resrow] <- thisfit$os$dlist$name
+        res$ll[resrow] <- slike_complex(thisfit)
+        res$npar[resrow] <- thisfit$ttp$npars + thisfit$pfs$npars + thisfit$os$npars
+      }
+    }
+  }
+  # Set log-likelihood values to NA if if cannot be calculated (=-Inf)
+  res$ll[res$ll==-Inf] <- NA
+  # Add AIC and BIC, with ranks
+  message("Wrapping up")
+  res <- res |>
+    dplyr::mutate(
+      aic = 2*.data$npar-2*ll,
+      bic = .data$npar*log(.data$npts)-2*ll,
+      rank_aic = rank(.data$aic),
+      rank_bic = rank(.data$bic),
+      best_aic = 0,
+      best_bic = 0
+    )
+  # Identify best AIC and best BIC model
+  res$best_aic[res$ttp_meth==aic_indbest[1] & res$pfs_dist==aic_indbest[2] & res$os_dist==aic_indbest[3]] <- 1
+  res$best_bic[res$ttp_meth==bic_indbest[1] & res$pfs_dist==bic_indbest[2] & res$os_dist==bic_indbest[3]] <- 1
+  # Identify best distributions for overall AIC and BIC
+  aic_jtbest <- res |>
+    dplyr::filter(rank_aic==1) |>
+    dplyr::select(ttp_meth, pfs_dist, os_dist) |>
+    dplyr::mutate(meth="joint", ic="aic")
+  bic_jtbest <- res |>
+    dplyr::filter(rank_bic==1) |>
+    dplyr::select(ttp_meth, pfs_dist, os_dist) |>
+    dplyr::mutate(meth="joint", ic="aic")
+  # Join together
+  bests <- bests |>
+    tibble::add_row(aic_jtbest) |>
+    tibble::add_row(bic_jtbest)
+  # Return
+  return(list(results=res, bests=bests))
+}
@@ -71,22 +71,15 @@ calc_haz_psm <- function(timevar, ptdata, dpam, psmtype) {
   # OS
   hos <- calc_haz(timevar, survobj=dpam$os)
   sos <- calc_surv(timevar, survobj=dpam$os)
-  # TTP complex
-  http_complex <- calc_haz(timevar, survobj=dpam$ttp)
-  # TTP simple
-  ne_pfs <- sum(ptdata$pfs.flag)
-  ne_ttp <- sum(ptdata$ttp.flag)
-  progfrac <- max(0, min(1, ne_ttp/ne_pfs))
-  http_simple <- progfrac*hpf
-  # TTP
-  typeflag <- ifelse(psmtype=="simple", 1, 0)
-  http <- http_simple*typeflag + http_complex*(1-typeflag)
+  # TTP depends on psmtype
+  if (psmtype=="simple") {
+    http <- hpf * max(0, min(1, sum(ptdata$ttp.flag) / sum(ptdata$pfs.flag)))
+  } else {
+    http <- calc_haz(timevar, survobj=dpam$ttp)
+  }
   # PPD
-  hppd_unadj <- hpf-http
-  hppd_simple <- pmax(0, pmin((1-progfrac)*hpf, sos*hos/spf))
-  hppd_complex <- pmax(0, pmin(hpf-http, sos*hos/spf))
-  hppd <- hppd_simple*typeflag + hppd_complex*(1-typeflag)
-  # PPS
+  hppd <- pmax(0, pmin(hpf-http, sos*hos/spf))
+  # PPS, capped at 5000
   hpps_unadj <- (sos*hos-spf*hppd)/(sos-spf)
   hpps <- pmax(0, pmin(hpps_unadj, 5000))
   hpps[timevar==0] <- 0