-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathsharing_CSC_model.R
123 lines (91 loc) · 4.08 KB
/
sharing_CSC_model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Load libraries and data -------------------------------------------------
# General packages
library(survival)
library(riskRegression)
# Load datasets
rdata <- readRDS("Data/rdata.rds")
vdata <- readRDS("Data/vdata.rds")
# Fitting and sharing necessary parts for validation ----------------------
# Fit model
fit_csh <- riskRegression::CSC(
formula = Hist(time, status_num) ~ age + size + ncat + hr_status,
data = rdata,
fitter = "coxph" # even if model was built with rms, share with coxph
)
# The bare minimum to share: coefficients, baseline hazards and model 'terms'
# (No data sharing is needed)
model_info <- list(
"coefficients" = stats::coef(fit_csh),
"baseline_hazards" = lapply(fit_csh$models, function(mod) survival::basehaz(mod, centered = FALSE)),
"model_terms" = lapply(fit_csh$models, function(mod) mod[["terms"]])
)
# Can be saved to then be shared as:
#saveRDS(model_info, file = "model_info.rds")
# Calculating predicted risks --------------------------------------------
# Function to calculate predicted risks
predictRisk_shared_CSC <- function(model_info, # List object (see above)
newdata, # Data.frame for which to make predictions
horizon, # Prediction time horizon (numeric)
primary_event) { # Primary event (numeric)
# -- Basic checks
# Check model_info components
info_names <- c("coefficients", "baseline_hazards", "model_terms")
if (any(!(names(model_info) %in% info_names))) {
stop(paste0("Names of model_info components should be: ", paste(info_names, collapse = ", ")))
}
n_causes <- unique(vapply(model_info, length, FUN.VALUE = integer(1L)))
if (length(n_causes) > 1) {
stop("The elements of model_info should all be of length equal to number of competing risks!")
}
# -- Absolute risk prediction
causes_ind <- seq_len(n_causes)
# Calculate linear predictors for all causes in new dataset
linpreds <- lapply(causes_ind, function(cause) {
mod_matrix <- stats::model.matrix(model_info$model_terms[[cause]], data = newdata)
unname(drop(mod_matrix[, -1] %*% model_info$coefficients[[cause]]))
})
# Compute absolute risks for each individual
preds <- vapply(seq_len(nrow(newdata)), function(id) {
# Calculate individual-specific cause-specific hazards
time_points <- model_info$baseline_hazards[[primary_event]][["time"]]
hazards <- vapply(causes_ind, function(cause) {
cumhaz <- model_info$baseline_hazards[[cause]][["hazard"]] * exp(linpreds[[cause]][[id]])
diff(c(0, cumhaz))
}, FUN.VALUE = numeric(length(time_points)))
# Calculate event-free survival
surv <- cumprod(1 - rowSums(hazards))
# Calculate cumulative incidence
cuminc <- cumsum(hazards[, primary_event] * c(1, surv[-length(surv)]))
cuminc_horizon <- cuminc[length(time_points[time_points <= horizon])]
return(cuminc_horizon)
}, FUN.VALUE = numeric(1L))
return(preds)
}
# Set prediction horizon and event of interest
horiz <- 5
cause_interest <- 1
# Get absolute risk
preds <- predictRisk_shared_CSC(model_info, newdata = vdata, horizon = horiz, primary_event = cause_interest)
# Check results against riskRegression::predictRisk()
preds_riskReg <- drop(predictRisk(fit_csh, vdata, times = horiz, cause = cause_interest))
all.equal(preds, preds_riskReg)
# Validation with riskRegression ------------------------------------------
# Supply predicted probabilities, and proceed as with the minimal script..
score_vdata <- Score(
list("csh_validation" = preds),
formula = Hist(time, status_num) ~ 1,
cens.model = "km",
data = vdata,
conf.int = TRUE,
times = horiz,
metrics = c("auc", "brier"),
summary = c("ipa"),
cause = cause_interest,
plots = "calibration"
)
#.. We need to do all of this since predictRisk() needs coxph models to be run
# with x = TRUE, meaning data has to be passed on
# Steps for future development:
# - Account for further stratification/offsets in model
# - Adjust script so it work also with simple survival models
# - .. combined and test within a package