diff --git a/DESCRIPTION b/DESCRIPTION index d176625..91e0258 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: smms Title: Semi-Markov Multi-State Models for Interval Censored Data -Version: 0.0.1.9002 +Version: 1.0.0.9001 Date: 2024-03-12 Authors@R: c( @@ -37,7 +37,7 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: igraph, parallel, diff --git a/NEWS.md b/NEWS.md index 1ffafff..e770f94 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ # smms (development version) +* Fixed code smells. + +# smms 1.0.0 + * Initial CRAN submission. diff --git a/R/sm_msm_latex_func.R b/R/sm_msm_latex_func.R index d93c943..1446868 100644 --- a/R/sm_msm_latex_func.R +++ b/R/sm_msm_latex_func.R @@ -24,7 +24,7 @@ write_type <- function(obs_type,graph,abs_exact=TRUE){ f_types = names(which(formula_obs_types[, obs_type] == 1)) lik_parts <- rep(NA,length(f_types)) - for (k in 1:length(f_types)){ + for (k in seq_along(f_types)){ form_type=f_types[k] integrand = type_to_integrand(form_type,edge_mats, names_surv_dens,abs_exact=abs_exact) @@ -96,14 +96,14 @@ write_type <- function(obs_type,graph,abs_exact=TRUE){ m <- gregexpr("[fS]_[[:digit:]]{2}",int) call <- regmatches(int,m) - for (i in 1:length(call[[1]])){ + for (i in seq_along(call[[1]])){ callspl <- unlist(strsplit(call[[1]][i],"")) int <- gsub(call[[1]][i],paste(callspl[1],"_{",paste(callspl[3:4],collapse=""),"}",sep=""),int) } # Make timepoints (belonging to the obs_type) - timepoints = c() - for (i in 1:length(otype_splt)){ + timepoints = NULL + for (i in seq_along(otype_splt)){ st <- as.numeric(otype_splt[i]) if (state_ord$type[which(state_ord$order==st)]=="trans"){ ti <- c(paste("t_{",st,"m}",sep=""),paste("t_{",st,"M}",sep="")) @@ -129,20 +129,20 @@ write_type <- function(obs_type,graph,abs_exact=TRUE){ next_state <- splitted_f_type[j+1] if (next_state %in% jump_states & (current_state %in% jump_states)){ lower[j] <- 0 - id_up <- which(1:length(timepoints)%in%seq(2,20,2) & as.numeric(substr(timepoints,4,4))>as.numeric(next_state)) + id_up <- which(seq_along(timepoints)%in%seq(2,20,2) & as.numeric(substr(timepoints,4,4))>as.numeric(next_state)) upper[j] <- paste(timepoints[min(id_up)],add_ons[j-1],sep="") }else if (next_state %in% jump_states & !(current_state %in% jump_states)){ - id_low <- which(1:length(timepoints)%in%seq(1,19,2) & as.numeric(substr(timepoints,4,4))==as.numeric(current_state)) - id_up <- which(1:length(timepoints)%in%seq(2,20,2) & as.numeric(substr(timepoints,4,4))>as.numeric(next_state)) + id_low <- which(seq_along(timepoints)%in%seq(1,19,2) & as.numeric(substr(timepoints,4,4))==as.numeric(current_state)) + id_up <- which(seq_along(timepoints)%in%seq(2,20,2) & as.numeric(substr(timepoints,4,4))>as.numeric(next_state)) lower[j] <- paste(timepoints[id_low],add_ons[j-1],sep="") upper[j] <- paste(timepoints[min(id_up)],add_ons[j-1],sep="") }else if (!(next_state %in% jump_states) & (current_state %in% jump_states)){ lower[j] <- 0 - id_up <- which(1:length(timepoints)%in%seq(2,20,2) & as.numeric(substr(timepoints,4,4))==as.numeric(next_state)) + id_up <- which(seq_along(timepoints)%in%seq(2,20,2) & as.numeric(substr(timepoints,4,4))==as.numeric(next_state)) upper[j] <- paste(timepoints[min(id_up)],add_ons[j-1],sep="") }else{ - id_low <- which(1:length(timepoints)%in%seq(1,19,2) & as.numeric(substr(timepoints,4,4))==as.numeric(current_state)) - id_up <- which(1:length(timepoints)%in%seq(2,20,2) & as.numeric(substr(timepoints,4,4))==as.numeric(next_state)) + id_low <- which(seq_along(timepoints)%in%seq(1,19,2) & as.numeric(substr(timepoints,4,4))==as.numeric(current_state)) + id_up <- which(seq_along(timepoints)%in%seq(2,20,2) & as.numeric(substr(timepoints,4,4))==as.numeric(next_state)) lower[j] <- paste(timepoints[id_low],add_ons[j-1],sep="") upper[j] <- paste(timepoints[id_up],add_ons[j-1],sep="") } @@ -192,7 +192,7 @@ write_loglikelihood <- function(graph,abs_exact=TRUE){ o_types <- construct_obs_types(graph) all_parts <- rep(NA,length(o_types)) - for (i in 1:length(all_parts)){ + for (i in seq_along(all_parts)){ all_parts[i] <- write_type(obs_type=o_types[i],graph=graph,abs_exact=abs_exact) } eq <- paste("\\begin{align*} \n \\ell_n (\\theta)= ",paste(all_parts,collapse=" + "),". \n \\end{align*}") diff --git a/R/sm_msm_likelihood_func.R b/R/sm_msm_likelihood_func.R index 47b0238..bbe952a 100644 --- a/R/sm_msm_likelihood_func.R +++ b/R/sm_msm_likelihood_func.R @@ -27,7 +27,7 @@ names_of_survival_density = function(graph){ matrix_names = as.data.frame(matrix(ncol = 6, nrow = length(edge_names))) colnames(matrix_names) = c("edge_name","survival_name", "density_name", "from_prev", "to_prev", "type") - for(i in 1:length(edge_names)){ + for(i in seq_along(edge_names)){ kk_to = which(state_ord$state %in% all_edges[i,2]) matrix_names[i, "survival_name"] = paste(c("S_", edge_names[i]), collapse = "") matrix_names[i, "density_name"] = paste(c("f_", edge_names[i]), collapse = "") @@ -36,7 +36,7 @@ names_of_survival_density = function(graph){ matrix_names[i, "edge_name"] = edge_names[i] matrix_names[i, "type"] = state_ord[kk_to, "type"] } - return(matrix_names) + matrix_names } #' Write out the integrand as a string @@ -344,7 +344,7 @@ change_integrand <- function(integr){ int <- sub("\\{.+?f_","\\{\nss<-times[1]\nf_",int) #sub("\\{.+f_","\\{\nss<-times[1]\nf_01",int) } } - return(eval(parse(text=int))) + eval(parse(text=int)) } @@ -404,7 +404,7 @@ finding_limits <- function(timepoints,form_type,edge_mats,absorbing_states,abs_e #id_na <- which(is.na(M_times) & substr(names(M_times),2,2)%in%unobs_states) #in unobs unecessary? id_na <- which(is.na(M_times)) if (length(id_na)>0){ - for (j in 1:length(id_na)){ ## CHECK + for (j in seq_along(id_na)){ ## CHECK M_times[id_na[j]] <- M_times[id_na[j]-1] } } @@ -464,7 +464,7 @@ finding_limits <- function(timepoints,form_type,edge_mats,absorbing_states,abs_e mloglikelihood <- function(param,integrands,limits, X = NULL,cmethod = "hcubature",mc_cores = 2){ # Test that limits and integrand have same length - final_integral = sum(unlist(parallel::mclapply(1:length(integrands), function(i){ + final_integral = sum(unlist(parallel::mclapply(seq_along(integrands), function(i){ mm <- length(limits[[i]]) lli <- rep(NA,mm) for (j in 1:mm){ @@ -486,13 +486,12 @@ mloglikelihood <- function(param,integrands,limits, X = NULL,cmethod = "hcubatu },error=function(cond){ integrand2 <- change_integrand(integrands[[i]][[j]]) if (length(unique(lower)) != length(lower)){ - llij = cubature::cubintegrate(integrand2, lower = lower,upper = upper, method = "divonne", maxEval = 500, + cubature::cubintegrate(integrand2, lower = lower,upper = upper, method = "divonne", maxEval = 500, tt = tmax[1], tt2=tmax[2],param = param, x = X[i,])$integral }else if (length(unique(lower)) == length(lower)){ - llij = cubature::cubintegrate(integrand2, lower = lower,upper = upper,maxEval = 500, + cubature::cubintegrate(integrand2, lower = lower,upper = upper,maxEval = 500, method = cmethod, tt = tmax[1], tt2=tmax[2],param = param, x = X[i,])$integral } - return(llij) }) }else if (length(lower)>2){ diff --git a/R/sm_msm_optim_func.R b/R/sm_msm_optim_func.R index 0db5908..2a9abb4 100644 --- a/R/sm_msm_optim_func.R +++ b/R/sm_msm_optim_func.R @@ -42,12 +42,12 @@ smms = function(startval, data, graph, X = NULL, abs_exact=TRUE, mc_cores = 1, h all_integral_limits = list() integrand = list() - for(i in 1:nrow(all_data_set)){ + for(i in seq_len(nrow(all_data_set))){ observation_type[i] = all_data_set[i,"obs_type"] f_types = names(which(formula_obs_types[, observation_type[i]] == 1)) integrand_mellomregn = list() integral_mellomregn= list() - for(j in 1:length(f_types)){ + for(j in seq_along(f_types)){ integrand_mellomregn[[j]] = eval(parse(text=type_to_integrand(f_types[j], edge_mats, names_surv_dens,abs_exact=abs_exact))) integral_mellomregn[[j]] = finding_limits(timepointMat[i,],f_types[j],edge_mats,absorbing_states,abs_exact=abs_exact) } @@ -115,12 +115,12 @@ hessian_matrix = function(param, data, graph, X = NULL, mc_cores = 1,cmethod = " all_integral_limits = list() integrand = list() - for(i in 1:nrow(all_data_set)){ + for(i in seq_len(nrow(all_data_set))){ observation_type[i] = all_data_set[i,"obs_type"] f_types = names(which(formula_obs_types[, observation_type[i]] == 1)) integrand_mellomregn = list() integral_mellomregn= list() - for(j in 1:length(f_types)){ + for(j in seq_along(f_types)){ integrand_mellomregn[[j]] = eval(parse(text=type_to_integrand(f_types[j], edge_mats, names_surv_dens))) integral_mellomregn[[j]] = finding_limits(timepointMat[i,],f_types[j],edge_mats,absorbing_states) } @@ -216,9 +216,8 @@ occupancy_prob = function(state, time, param, graph, xval = NULL){ x = xval) },error=function(cond){ integrand2 <- change_integrand(integrand) - llij = cubature::cubintegrate(integrand2, lower = lower,upper = upper, method = "divonne", maxEval = 500, + cubature::cubintegrate(integrand2, lower = lower,upper = upper, method = "divonne", maxEval = 500, tt = tmax[1], tt2=tmax[2],param = param, x = xval)$integral - return(llij) }) }else if (length(lower)>2){ @@ -318,8 +317,7 @@ occupancy_prob_delta = function(state, time, param, graph, xval = NULL){ pracma::grad(repintegrate,x0=param,innerfunc=integrand,tt=tmax[1],tt2=tmax[2],lower=lower,upper = upper,x = xval) },error=function(cond){ integrand2 <- change_integrand(integrand) - llij = pracma::grad(cubint,x0=param,integrand=integrand2,lower = lower,upper = upper, tmax=tmax,xval=xval) - return(llij) + pracma::grad(cubint,x0=param,integrand=integrand2,lower = lower,upper = upper, tmax=tmax,xval=xval) }) }else if (length(lower)>2){ @@ -562,9 +560,8 @@ transition_prob = function(trans_ji, time_t,time_v, param, graph, xval = NULL){ x = xval) },error=function(cond){ integrand2 <- change_integrand(integrand) - llij = cubature::cubintegrate(integrand2, lower = lower,upper = upper, method = "divonne", maxEval = 500, + cubature::cubintegrate(integrand2, lower = lower,upper = upper, method = "divonne", maxEval = 500, tt = tmax[1], tt2=tmax[2],param = param, x = xval)$integral - return(llij) }) }else if (length(lower)>2){ @@ -685,8 +682,7 @@ transition_prob_delta = function(trans_ji, time_t,time_v, param, graph, xval = N pracma::grad(repintegrate,x0=param,innerfunc=integrand,tt=tmax[1],tt2=tmax[2],lower=lower,upper = upper,x = xval) },error=function(cond){ integrand2 <- change_integrand(integrand) - llij = pracma::grad(cubint,x0=param,integrand=integrand2,lower = lower,upper = upper, tmax=tmax,xval=xval) - return(llij) + pracma::grad(cubint,x0=param,integrand=integrand2,lower = lower,upper = upper, tmax=tmax,xval=xval) }) }else if (length(lower)>2){ diff --git a/R/sm_msm_preprocessing_func.R b/R/sm_msm_preprocessing_func.R index bca0d89..07a1151 100644 --- a/R/sm_msm_preprocessing_func.R +++ b/R/sm_msm_preprocessing_func.R @@ -54,7 +54,7 @@ state_ordering = function(graph){ state_num$order[which(state_num$state %in% absorbing_states)] <- (k-1-length(absorbing_states)+1):(k-1) state_num$type[which(state_num$state %in% absorbing_states)] <- "abs" - return(state_num) + state_num } #' Filter away redundant observations in a multi-state dataset @@ -107,7 +107,7 @@ relevant_timepoints = function(data, graph){ rle_i <- rle(ddi$state) id_unrelevant = NULL - for(j in 1:length(rle_i$values)){ + for(j in seq_along(rle_i$values)){ val <- rle_i$values[j] num <- rle_i$lengths[j] if(val%in%init & num >= 2){ #initial states @@ -121,7 +121,7 @@ relevant_timepoints = function(data, graph){ idd = c(idd,which(ddr$patient==inds[i])[id_unrelevant]) } if (length(idd)>0) ddr = ddr[-idd,] - return(ddr) + ddr } #' Find all formula types @@ -146,23 +146,22 @@ construct_formula_types = function(graph){ subsets[,2] <- rep(c(init,trans,abs),times=length(init)) ## Determine which subsets containt none, one or multiple paths - form_types = c() - for(p in 1:nrow(subsets)){ + form_types = NULL + for(p in seq_len(nrow(subsets))){ paths = igraph::all_simple_paths(graph, state_ord$state[which(state_ord$order==subsets[p,1])], state_ord$state[which(state_ord$order==subsets[p,2])]) ## If only observed in initial state if(length(paths) == 0){ form_types = c(form_types, as.character(subsets[p, 1])) } else{ ## For all the other possible roads to travel - for(i in 1:length(paths)){ + for(i in seq_along(paths)){ st = sort(state_ord$order[which(state_ord$state%in%igraph::as_ids(paths[[i]]))]) data_frame_subset_type_as_char = paste(st, collapse = "") form_types = c(form_types, data_frame_subset_type_as_char) } } } - form_types = unique(form_types) - return(form_types) + unique(form_types) } @@ -176,18 +175,17 @@ construct_formula_types = function(graph){ #' @return A vector with string elements indicating the states in which the patient is observed. construct_obs_types = function(graph){ form_types = construct_formula_types(graph) - obs_types = c() - for (i in 1:length(form_types)){ + obs_types = NULL + for (i in seq_along(form_types)){ st = strsplit(form_types[i],"")[[1]] obs_types = c(obs_types,form_types[i]) if (length(st)>2){ - ot = sapply(2:(length(st)-1), function(r) utils::combn(st[1:length(st)],r),simplify=F) + ot = sapply(2:(length(st)-1), function(r) utils::combn(st[seq_along(st)],r),simplify=F) ot = lapply(ot,function(m) m[,which(m[1,]==st[1])]) obs_types = c(obs_types,unlist(lapply(ot,function(m) apply(m,2,paste,collapse="")))) } } - obs_types = unique(obs_types) - return(obs_types) + unique(obs_types) } #' Find links between formula and observation types @@ -204,9 +202,9 @@ all_types = function(graph){ matrix_all_types = matrix(data = 0, nrow = length(formula_types), ncol = length(observation_types)) rownames(matrix_all_types) = formula_types colnames(matrix_all_types) = observation_types - for(i in 1:length(formula_types)){ + for(i in seq_along(formula_types)){ formula_types_split = unlist(strsplit(formula_types[i], "")) - for(j in 1:length(observation_types)){ + for(j in seq_along(observation_types)){ observation_types_split = unlist(strsplit(observation_types[j], "")) if(formula_types_split[1] == observation_types_split[1] & formula_types_split[length(formula_types_split)] == observation_types_split[length(observation_types_split)] & @@ -215,7 +213,7 @@ all_types = function(graph){ } } } - return(matrix_all_types) + matrix_all_types } #' Arrange data set @@ -260,7 +258,7 @@ arrange_data = function(data, graph){ ## Which observed type the individual is timepoints[i, ncol(timepoints)] = paste(unique(names(tti[!(is.na(tti))])), collapse ="") } - return(timepoints) + timepoints } #' Make edge matrices @@ -286,7 +284,7 @@ edge_matrices = function(graph){ matrix_travelled = matrix(data = 0, nrow = length(formula_types), ncol = length(edge_names)) rownames(matrix_travelled) = formula_types colnames(matrix_travelled) = edge_names - for(i in 1:length(formula_types)){ + for(i in seq_along(formula_types)){ formula_str_pairs = substring(formula_types[i], first = 1:(nchar(formula_types[i]) - 1), last = 2:nchar(formula_types[i])) matches = which(edge_names %in% formula_str_pairs) matches_order = match(edge_names[matches],formula_str_pairs) @@ -298,7 +296,7 @@ edge_matrices = function(graph){ matrix_possible_next = matrix(data = 0, nrow = length(formula_types), ncol = length(edge_names)) rownames(matrix_possible_next) = formula_types colnames(matrix_possible_next) = edge_names - for(i in 1:length(formula_types)){ + for(i in seq_along(formula_types)){ formula_types_split = unlist(strsplit(formula_types[i], "")) id_next <- which(all_edges[,1]==formula_types_split[length(formula_types_split)]) if (length(id_next)==0) next @@ -309,15 +307,14 @@ edge_matrices = function(graph){ matrix_passed = matrix(data = 0, nrow = length(formula_types), ncol = length(edge_names)) rownames(matrix_passed) = formula_types colnames(matrix_passed) = edge_names - for(i in 1:length(formula_types)){ + for(i in seq_along(formula_types)){ formula_str_pairs = substring(formula_types[i], first = 1:(nchar(formula_types[i]) - 1), last = 2:nchar(formula_types[i])) - for (j in 1:length(formula_str_pairs)){ + for (j in seq_along(formula_str_pairs)){ pair_split = unlist(strsplit(formula_str_pairs[j], "")) match_passed = which(pair_split[1]==all_edges[,1] & pair_split[2]!=all_edges[,2]) if (length(match_passed)==0) next matrix_passed[i,match_passed] = j } } - list_all_edges = list("traveled" = matrix_travelled , "passedBy" = matrix_passed, "possible" = matrix_possible_next) - return(list_all_edges) + list("traveled" = matrix_travelled , "passedBy" = matrix_passed, "possible" = matrix_possible_next) } diff --git a/README.Rmd b/README.Rmd index 0f04f50..0377383 100644 --- a/README.Rmd +++ b/README.Rmd @@ -8,7 +8,7 @@ output: # smms: Semi-Markov Multi-State models for interval censored data -The `smms` package allows you to fit Semi-Markovian multi-state models to panel datasets. The package constructs and optimises the likelihood of arbitrary multi-state models where the possible state transitions can be described by an acyclic graph with one or more initial states and one or more absorbing states. +The `smms` package allows you to fit Semi-Markovian multi-state models to panel datasets. The package constructs and optimises the likelihood of arbitrary multi-state models where the possible state transitions can be described by an acyclic graph with one or more initial states and one or more absorbing states. Designed for data where the exact state transitions are not necessarily observed, so that we have interval censoring of the transition times. Data like these are sometimes referred to as panel data. @@ -38,7 +38,7 @@ See NEWS.md for changes for complete version history. ## Overview -The main function in this package is the `smms` function, which is used to fit a multi-state model. The user needs to provide a datasets of states and time-points for multiple units of observation, typically referred to as *patients*, a graph describing the states and the possible transitions between them, and a set of parametric densities, and survival functions. +The main function in this package is the `smms` function, which is used to fit a multi-state model. The user needs to provide a datasets of states and time-points for multiple units of observation, typically referred to as *patients*, a graph describing the states and the possible transitions between them, and a set of parametric densities, and survival functions. In the next section, we will illustrate the use of the `smms` function in a simple example. @@ -52,14 +52,14 @@ Here we see the observations belonging to two patients. Note here that the state ```{r, message=FALSE} library(smms) -library(igraph) # For specifying the multi-state graph +library(igraph) # For specifying the multi-state graph library(msm) # To get the CAV dataset dd = cav dd = dd[!is.na(dd$pdiag),] # Remove observations where the patient appears to go back to a previous state (assumed to be impossible): -id_wrong = unique(dd$PTNUM[which(dd$state!=dd$statemax)]) +id_wrong = unique(dd$PTNUM[which(dd$state!=dd$statemax)]) dd = dd[-which(dd$PTNUM %in% id_wrong),] # Change state names from 1,2,3,4 to well, mild, severe, death @@ -74,7 +74,7 @@ print(dd[1:11,]) ### Specifying the model graph -Here we assume a four-state illness death model, since we consider CAV to be irreversible (so we do not allow for patients to move back to less severe states). It is convenient to stick to the same state names as in the dataset when specifying the model graph. +Here we assume a four-state illness death model, since we consider CAV to be irreversible (so we do not allow for patients to move back to less severe states). It is convenient to stick to the same state names as in the dataset when specifying the model graph. ```{r} # Specify the graph: @@ -83,7 +83,7 @@ par(mar=c(1,1,1,1)) plot(gg,layout=layout_with_sugiyama(gg,layers=c(1,1,1,2))$layout,vertex.size=40) ``` -### Specifying parametric models +### Specifying parametric models Then, the user has to specify parametric models for all transition times (meaning one for each edge in the graph). In the current version of the package, these models have to be specified by providing density functions (in a specific format detailed below), as well as the corresponding survival functions. The functions will look like the following if one chooses to use simple exponential models for all transitions (i.e. meaning that we are fitting a homogeneous Markov model which could have been fitted with the `msm`package too - but this is for the sake of a simple illustration): @@ -132,24 +132,25 @@ One needs some start values for the optimisation, and we will soon add a functio load("README_files/cav_expo_noCov_optims") ``` -We can compute AIC, and look at the estimated parameters and approximate 95% confidence intervals. +We can compute AIC, and look at the estimated parameters and approximate 95% confidence intervals. ```{r} # Compute AIC (higher values are better with this definition) aic <- (-2*mlo$opt$objective)-2*length(mlo$opt$par) #-2887.1 -# Look at estimates and 95% confidence intervals. +# Look at estimates and 95% confidence intervals. # On the -Inf to Inf scale: -print(round(est_ci(mlo$opt$par,mlo$hess),2)) +print(round(est_ci(mlo$opt$par,mlo$hess),2)) # On the 0 to Inf scale (on the transition intensity scale): -round(exp(est_ci(mlo$opt$par,mlo$hess)),2) +round(exp(est_ci(mlo$opt$par,mlo$hess)),2) ``` ## References -* Jackson CH (2011). [Multi-State Models for Panel Data: The msm Package for R.](https://www.jstatsoft.org/v38/i08/) Journal of Statistical Software, 38, 1–29. - +* Jackson CH (2011). [Multi-State Models for Panel Data: The msm Package for R.](https://www.jstatsoft.org/v38/i08/) Journal of Statistical Software, 38, 1–29. +# Badges +[![CodeFactor](https://www.codefactor.io/repository/github/wleoncio/smms/badge)](https://www.codefactor.io/repository/github/wleoncio/smms) diff --git a/scripts/basic_survival_model.R b/scripts/basic_survival_model.R index 65acf25..25147da 100644 --- a/scripts/basic_survival_model.R +++ b/scripts/basic_survival_model.R @@ -1,8 +1,8 @@ #### Simple survival analysis model: Boag (1949) dataset -### Expo model without covariates +### Expo model without covariates library(smms) -library(igraph) # For specifying the multi-state graph +library(igraph) # For specifying the multi-state graph # Graph: gg = graph_from_literal("0"--+"1") @@ -30,14 +30,14 @@ S_01 = function(param, x, t){(1-pexp(t,exp(param[1])))} f_01 = function(param, x, t){dexp(t,exp(param[1]))} # Optimize: -startval <- c(0.8) +startval <- 0.8 mlo <- smms(startval,dd,gg, mc_cores = 1, hessian_matrix = T) # Prevalence plot -tval <- seq(0.01,1,length=50) +tval <- seq(0.01,1,length=50) # a sequence of time-points over which to compute the state occupancies -p0_ci <- occupancy_prob_ci_band("0",tval,mlo$opt$par,gg,hessian=mlo$hess) +p0_ci <- occupancy_prob_ci_band("0",tval,mlo$opt$par,gg,hessian=mlo$hess) #for computing the confidence bands, the hessian needs to be provided (but there # exists a function computing only the occupancy probabilities too) p1_ci <- occupancy_prob_ci_band("1",tval,mlo$opt$par,gg,hessian=mlo$hess) @@ -80,4 +80,4 @@ plot(km, col="dark grey",lwd=2,xlab="time",ylab="survival") polygon(c(tval, rev(tval)), c(So$upper, rev(So$lower)), col = adjustcolor("#0571b0",alpha.f=0.2), border=NA) lines(tval,So$est,col="#0571b0",lwd=3) -# the simple expo model doesn't seem to fit so well based on these plots \ No newline at end of file +# the simple expo model doesn't seem to fit so well based on these plots diff --git a/scripts/cav_Weibull_genWeibull.R b/scripts/cav_Weibull_genWeibull.R index a1232e0..dbfb2d8 100644 --- a/scripts/cav_Weibull_genWeibull.R +++ b/scripts/cav_Weibull_genWeibull.R @@ -14,7 +14,7 @@ dim(dd[dd$firstobs==1,]) # 614 patients #first obs (at years=0) is always in state 1 id_wrong = unique(dd$PTNUM[which(dd$state!=dd$statemax)]) # observations where the patient appears to go back to a previous state dd = dd[-which(dd$PTNUM %in% id_wrong),] -## Only relevant parts +## Only relevant parts dd = dd[ ,-c(2, 5, 7, 9, 10)] dd$dage_st = (dd$dage-mean(dd$dage))/sd(dd$dage) dd$ihd = (dd$pdiag=="IHD") @@ -30,7 +30,7 @@ names_of_survival_density(gg) pXweibull <- function(tt,a,b,th){ pp <- 1-exp(1-(1+(tt/b)^a)^(1/th)) pp[tt<0] <- 0 #to ensure that the survival function returns 1 when tt<0 - return(pp) + pp } dXweibull <- function(tt,a,b,th){ (1/th)*(a/b)*(tt/b)^(a-1)*(1+(tt/b)^a)^(1/th-1)*exp(1-(1+(tt/b)^a)^(1/th)) diff --git a/scripts/simulations_illness_death_comp_true.R b/scripts/simulations_illness_death_comp_true.R index a7ea0a0..ab4ac9b 100644 --- a/scripts/simulations_illness_death_comp_true.R +++ b/scripts/simulations_illness_death_comp_true.R @@ -3,7 +3,7 @@ library(Rlab) library(numDeriv) library(parallel) -## Example of how to perform simulations when the competing risks approach is true. +## Example of how to perform simulations when the competing risks approach is true. nn = 100 theta_hat_par_embed = matrix(NA, nrow = nn, ncol = 11) @@ -45,12 +45,12 @@ simulation_t = function(param, n){ T_01 = rep(NA, n) T_12 = rep(NA, n) T_02 = rep(NA, n) - + t_type0 = matrix(, nrow = 0, ncol = 2) t_type1 = matrix(, nrow = 0, ncol = 4) t_type2 = matrix(, nrow = 0, ncol = 4) t_type3 = matrix(, nrow = 0, ncol = 2) - + for(i in 1:n){ x = sample(0:1, 1) unif1 = runif(1, min = 0, max = 1) @@ -71,7 +71,7 @@ simulation_t = function(param, n){ 1-pweibull(T, exp(param[5] + param[9]*x), exp(param[6])) - unif3 } T_02[i] = uniroot(S02, interval = c(1.e-14, 1e04), tol = 1e-9)$root - + mm <- sample(2:10,1) t <- c(0,sort(rlnorm(mm-1,-1,1))) t_n = length(t) @@ -85,45 +85,45 @@ simulation_t = function(param, n){ } else if(any(t > T_01[i] + T_12[i])){ lower_int1 = max(which(t < T_01[i])) upper_int1 = min(which(t > T_01[i])) - t_type2 = rbind(t_type2, c(t[lower_int1], min(T_01[i] + T_12[i],t[upper_int1]), + t_type2 = rbind(t_type2, c(t[lower_int1], min(T_01[i] + T_12[i],t[upper_int1]), T_01[i] + T_12[i], x)) } } else if(any(t > T_02[i]) & T_02[i] < T_01[i]){ t_type3 = rbind(t_type3, c(T_02[i], x)) } } - return(list(t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3)) + list(t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3) } ## Competing risks approach sum_all_comp = function(a, t_type0, t_type1, t_type2, t_type3, mc_cores){ - type_1_comp = -sum(unlist(mclapply(1:nrow(t_type0), function(i) (log(S_01_comp(a = a, t = t_type0[i, 1], x = t_type0[i, 2])* + type_1_comp = -sum(unlist(mclapply(seq_len(nrow(t_type0)), function(i) (log(S_01_comp(a = a, t = t_type0[i, 1], x = t_type0[i, 2])* S_02_comp(a = a, t = t_type0[i, 1], x = t_type0[i, 2])))))) - type_2_comp = -sum(unlist(mclapply(1:nrow(t_type1), function(i)(log(as.numeric( - integrate(f01_S12_S02_comp, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], + type_2_comp = -sum(unlist(mclapply(seq_len(nrow(t_type1)), function(i)(log(as.numeric( + integrate(f01_S12_S02_comp, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], a = a, x = t_type1[i, 4])[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_3_comp = -sum(unlist(mclapply(1:nrow(t_type2), function(i)(log(as.numeric( - integrate(f01_f12_S02_comp, lower = t_type2[i,1], upper = t_type2[i,2], + type_3_comp = -sum(unlist(mclapply(seq_len(nrow(t_type2)), function(i)(log(as.numeric( + integrate(f01_f12_S02_comp, lower = t_type2[i,1], upper = t_type2[i,2], w = t_type2[i,3], a = a, x = t_type2[i, 4])[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_4_comp = -sum(unlist(mclapply(1:nrow(t_type3), function(i) (log(f_02_comp(a = a, t = t_type3[i, 1], x = t_type3[i, 2])* + type_4_comp = -sum(unlist(mclapply(seq_len(nrow(t_type3)), function(i) (log(f_02_comp(a = a, t = t_type3[i, 1], x = t_type3[i, 2])* S_01_comp(a = a, t = t_type3[i, 1], x = t_type3[i, 2])))))) - - return(type_1_comp + type_2_comp + type_3_comp + type_4_comp) + + type_1_comp + type_2_comp + type_3_comp + type_4_comp } ## Embedded Markov chain approach sum_all_embed = function(a, t_type0, t_type1, t_type2, t_type3, mc_cores){ - type_1_embed = -sum(unlist(mclapply(1:nrow(t_type0), function(i) (log(S_01_embed(a = a, t = t_type0[i, 1], x = t_type0[i, 2])+ + type_1_embed = -sum(unlist(mclapply(seq_len(nrow(t_type0)), function(i) (log(S_01_embed(a = a, t = t_type0[i, 1], x = t_type0[i, 2])+ S_02_embed(a = a, t = t_type0[i, 1], x = t_type0[i, 2])))))) - type_2_embed = -sum(unlist(mclapply(1:nrow(t_type1), function(i)(log(as.numeric( - integrate(f01_S12_S02_embed, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], + type_2_embed = -sum(unlist(mclapply(seq_len(nrow(t_type1)), function(i)(log(as.numeric( + integrate(f01_S12_S02_embed, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], a = a, x = t_type1[i, 4])[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_3_embed = -sum(unlist(mclapply(1:nrow(t_type2), function(i)(log(as.numeric( - integrate(f01_f12_S02_embed, lower = t_type2[i,1], upper = t_type2[i,2], + type_3_embed = -sum(unlist(mclapply(seq_len(nrow(t_type2)), function(i)(log(as.numeric( + integrate(f01_f12_S02_embed, lower = t_type2[i,1], upper = t_type2[i,2], w = t_type2[i,3], a = a, x = t_type2[i, 4])[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_4_embed = -sum(unlist(mclapply(1:nrow(t_type3), function(i) (log(f_02_embed(a = a, t = t_type3[i, 1], x = t_type3[i, 2])))))) - - return(type_1_embed + type_2_embed + type_3_embed + type_4_embed) + type_4_embed = -sum(unlist(mclapply(seq_len(nrow(t_type3)), function(i) (log(f_02_embed(a = a, t = t_type3[i, 1], x = t_type3[i, 2])))))) + + type_1_embed + type_2_embed + type_3_embed + type_4_embed } log_lik_semi_markov = rep(NA, nn) @@ -137,20 +137,20 @@ for(mn in 1:nn){ param_embed = c(0.4, 0.6,-1.2, 1.6, -0.5, 0.2, 1.1, -0.9, -1.2, 0, -0.8) n = 1000 t = simulation_t(param_comp, n) - + t_type0 = t$t_type0 t_type1 = t$t_type1 t_type2 = t$t_type2 t_type3 = t$t_type3 - + print(nrow(t_type0) + nrow(t_type1) + nrow(t_type2) + nrow(t_type3)) - + theta_hat_comp = nlminb(param_comp, sum_all_comp,t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 3) - - + + theta_hat_embed = nlminb(param_embed, sum_all_embed,t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 3) - + theta_hat_middle_comp = as.matrix(theta_hat_comp$par) theta_hat_par_comp[mn, 1] = theta_hat_middle_comp[1, 1] theta_hat_par_comp[mn, 2] = theta_hat_middle_comp[2, 1] @@ -161,8 +161,8 @@ for(mn in 1:nn){ theta_hat_par_comp[mn, 7] = theta_hat_middle_comp[7, 1] theta_hat_par_comp[mn, 8] = theta_hat_middle_comp[8, 1] theta_hat_par_comp[mn, 9] = theta_hat_middle_comp[9, 1] - - theta_hat_hessian_comp = hessian(sum_all_comp, theta_hat_comp$par, t_type0 = t_type0, t_type1 = t_type1, + + theta_hat_hessian_comp = hessian(sum_all_comp, theta_hat_comp$par, t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 3) theta_hat_hessian_solved_comp = solve(theta_hat_hessian_comp) @@ -175,7 +175,7 @@ for(mn in 1:nn){ theta_hat_solve_hessian_comp[mn, 7] = theta_hat_hessian_solved_comp[7, 7] theta_hat_solve_hessian_comp[mn, 8] = theta_hat_hessian_solved_comp[8, 8] theta_hat_solve_hessian_comp[mn, 9] = theta_hat_hessian_solved_comp[9, 9] - + theta_hat_middle = as.matrix(theta_hat_embed$par) theta_hat_par_embed[mn, 1] = theta_hat_middle[1, 1] theta_hat_par_embed[mn, 2] = theta_hat_middle[2, 1] @@ -188,8 +188,8 @@ for(mn in 1:nn){ theta_hat_par_embed[mn, 9] = theta_hat_middle[9, 1] theta_hat_par_embed[mn, 10] = theta_hat_middle[10, 1] theta_hat_par_embed[mn, 11] = theta_hat_middle[11, 1] - - + + theta_hat_hessian_embed = hessian(sum_all_embed, theta_hat_embed$par, t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 3) theta_hat_hessian_solved_embed = solve(theta_hat_hessian_embed) theta_hat_solve_hessian_embed[mn, 1] = theta_hat_hessian_solved_embed[1, 1] @@ -203,5 +203,5 @@ for(mn in 1:nn){ theta_hat_solve_hessian_embed[mn, 9] = theta_hat_hessian_solved_embed[9, 9] theta_hat_solve_hessian_embed[mn, 10] = theta_hat_hessian_solved_embed[10, 10] theta_hat_solve_hessian_embed[mn, 11] = theta_hat_hessian_solved_embed[11, 11] - + } diff --git a/scripts/simulations_illness_death_embed_true.R b/scripts/simulations_illness_death_embed_true.R index ea85431..9bef1ac 100644 --- a/scripts/simulations_illness_death_embed_true.R +++ b/scripts/simulations_illness_death_embed_true.R @@ -45,7 +45,7 @@ simulation_t = function(param, n){ T_01 = rep(NA, n) T_12 = rep(NA, n) T_02 = rep(NA, n) - + t_type0 = matrix(, nrow = 0, ncol = 2) t_type1 = matrix(, nrow = 0, ncol = 4) t_type2 = matrix(, nrow = 0, ncol = 4) @@ -71,7 +71,7 @@ simulation_t = function(param, n){ 1-pweibull(T, exp(param[6] + param[10]*x), exp(param[7])) - unif3 } T_02[i] = uniroot(S02, interval = c(1.e-14, 1e04), tol = 1e-9)$root - + mm <- sample(2:10,1) t <- c(0,sort(rlnorm(mm-1,-1,1))) t_n = length(t) @@ -85,42 +85,42 @@ simulation_t = function(param, n){ } else if(any(t > T_01[i] + T_12[i])){ lower_int1 = max(which(t < T_01[i])) upper_int1 = min(which(t > T_01[i])) - t_type2 = rbind(t_type2, c(t[lower_int1], min(T_01[i] + T_12[i],t[upper_int1]), + t_type2 = rbind(t_type2, c(t[lower_int1], min(T_01[i] + T_12[i],t[upper_int1]), T_01[i] + T_12[i], x)) } } else if(any(t > T_02[i]) & pi_01 == 1){ t_type3 = rbind(t_type3, c(T_02[i], x)) } } - return(list(t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3)) + list(t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3) } sum_all_comp = function(a, t_type0, t_type1, t_type2, t_type3, mc_cores){ - type_1_comp = -sum(unlist(mclapply(1:nrow(t_type0), function(i) (log(S_01_comp(a = a, t = t_type0[i, 1], x = t_type0[i, 2])*S_02_comp(a = a, t = t_type0[i, 1], + type_1_comp = -sum(unlist(mclapply(seq_len(nrow(t_type0)), function(i) (log(S_01_comp(a = a, t = t_type0[i, 1], x = t_type0[i, 2])*S_02_comp(a = a, t = t_type0[i, 1], x = t_type0[i, 2])))))) - type_2_comp = -sum(unlist(mclapply(1:nrow(t_type1), function(i)(log(as.numeric( - integrate(f01_S12_S02_comp, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], + type_2_comp = -sum(unlist(mclapply(seq_len(nrow(t_type1)), function(i)(log(as.numeric( + integrate(f01_S12_S02_comp, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], a = a, x = t_type1[i, 4])[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_3_comp = -sum(unlist(mclapply(1:nrow(t_type2), function(i)(log(as.numeric( - integrate(f01_f12_S02_comp, lower = t_type2[i,1], upper = t_type2[i,2], + type_3_comp = -sum(unlist(mclapply(seq_len(nrow(t_type2)), function(i)(log(as.numeric( + integrate(f01_f12_S02_comp, lower = t_type2[i,1], upper = t_type2[i,2], w = t_type2[i,3], a = a, x = t_type2[i, 4])[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_4_comp = -sum(unlist(mclapply(1:nrow(t_type3), function(i) (log(f_02_comp(a = a, t = t_type3[i, 1], x = t_type3[i, 2])*S_01_comp(a = a, t = t_type3[i, 1], x = t_type3[i, 2])))))) - - return(type_1_comp + type_2_comp + type_3_comp + type_4_comp) + type_4_comp = -sum(unlist(mclapply(seq_len(nrow(t_type3)), function(i) (log(f_02_comp(a = a, t = t_type3[i, 1], x = t_type3[i, 2])*S_01_comp(a = a, t = t_type3[i, 1], x = t_type3[i, 2])))))) + + type_1_comp + type_2_comp + type_3_comp + type_4_comp } sum_all_embed = function(a, t_type0, t_type1, t_type2, t_type3, mc_cores){ - type_1_embed = -sum(unlist(mclapply(1:nrow(t_type0), function(i) (log(S_01_embed(a = a, t = t_type0[i, 1], + type_1_embed = -sum(unlist(mclapply(seq_len(nrow(t_type0)), function(i) (log(S_01_embed(a = a, t = t_type0[i, 1], x = t_type0[i, 2])+S_02_embed(a = a, t = t_type0[i, 1], x = t_type0[i, 2])))))) - type_2_embed = -sum(unlist(mclapply(1:nrow(t_type1), function(i)(log(as.numeric( - integrate(f01_S12_S02_embed, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], + type_2_embed = -sum(unlist(mclapply(seq_len(nrow(t_type1)), function(i)(log(as.numeric( + integrate(f01_S12_S02_embed, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], a = a, x = t_type1[i, 4])[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_3_embed = -sum(unlist(mclapply(1:nrow(t_type2), function(i)(log(as.numeric( - integrate(f01_f12_S02_embed, lower = t_type2[i,1], upper = t_type2[i,2], + type_3_embed = -sum(unlist(mclapply(seq_len(nrow(t_type2)), function(i)(log(as.numeric( + integrate(f01_f12_S02_embed, lower = t_type2[i,1], upper = t_type2[i,2], w = t_type2[i,3], a = a, x = t_type2[i, 4])[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_4_embed = -sum(unlist(mclapply(1:nrow(t_type3), function(i) (log(f_02_embed(a = a, t = t_type3[i, 1], x = t_type3[i, 2])))))) - - return(type_1_embed + type_2_embed + type_3_embed + type_4_embed) + type_4_embed = -sum(unlist(mclapply(seq_len(nrow(t_type3)), function(i) (log(f_02_embed(a = a, t = t_type3[i, 1], x = t_type3[i, 2])))))) + + type_1_embed + type_2_embed + type_3_embed + type_4_embed } log_lik_semi_markov = rep(NA, nn) @@ -134,19 +134,19 @@ for(mn in 1:nn){ param_embed = c(0.4, 0.6, -0.7, 1.6, -0.5, 0.2, 1.1, -0.9, -1.2, 0) n = 1000 t = simulation_t(param_embed, n) - + t_type0 = t$t_type0 t_type1 = t$t_type1 t_type2 = t$t_type2 t_type3 = t$t_type3 - + print(nrow(t_type0) + nrow(t_type1) + nrow(t_type2) + nrow(t_type3)) - + theta_hat_comp = nlminb(param_comp, sum_all_comp,t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 5) - + theta_hat_embed = nlminb(param_embed, sum_all_embed,t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 5) - + theta_hat_middle_comp = as.matrix(theta_hat_comp$par) theta_hat_par_comp[mn, 1] = theta_hat_middle_comp[1, 1] theta_hat_par_comp[mn, 2] = theta_hat_middle_comp[2, 1] @@ -157,8 +157,8 @@ for(mn in 1:nn){ theta_hat_par_comp[mn, 7] = theta_hat_middle_comp[7, 1] theta_hat_par_comp[mn, 8] = theta_hat_middle_comp[8, 1] theta_hat_par_comp[mn, 9] = theta_hat_middle_comp[9, 1] - - theta_hat_hessian_comp = hessian(sum_all_comp, theta_hat_comp$par, t_type0 = t_type0, t_type1 = t_type1, + + theta_hat_hessian_comp = hessian(sum_all_comp, theta_hat_comp$par, t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 5) theta_hat_hessian_solved_comp = solve(theta_hat_hessian_comp) @@ -171,8 +171,8 @@ for(mn in 1:nn){ theta_hat_solve_hessian_comp[mn, 7] = theta_hat_hessian_solved_comp[7, 7] theta_hat_solve_hessian_comp[mn, 8] = theta_hat_hessian_solved_comp[8, 8] theta_hat_solve_hessian_comp[mn, 9] = theta_hat_hessian_solved_comp[9, 9] - - + + theta_hat_middle = as.matrix(theta_hat_embed$par) theta_hat_par_embed[mn, 1] = theta_hat_middle[1, 1] theta_hat_par_embed[mn, 2] = theta_hat_middle[2, 1] @@ -184,8 +184,8 @@ for(mn in 1:nn){ theta_hat_par_embed[mn, 8] = theta_hat_middle[8, 1] theta_hat_par_embed[mn, 9] = theta_hat_middle[9, 1] theta_hat_par_embed[mn, 10] = theta_hat_middle[10, 1] - - + + theta_hat_hessian_embed = hessian(sum_all_embed, theta_hat_embed$par, t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 5) theta_hat_hessian_solved_embed = solve(theta_hat_hessian_embed) theta_hat_solve_hessian_embed[mn, 1] = theta_hat_hessian_solved_embed[1, 1] @@ -198,4 +198,4 @@ for(mn in 1:nn){ theta_hat_solve_hessian_embed[mn, 8] = theta_hat_hessian_solved_embed[8, 8] theta_hat_solve_hessian_embed[mn, 9] = theta_hat_hessian_solved_embed[9, 9] theta_hat_solve_hessian_embed[mn, 10] = theta_hat_hessian_solved_embed[10, 10] -} \ No newline at end of file +} diff --git a/scripts/simulations_markov_illness_death.R b/scripts/simulations_markov_illness_death.R index a9fb29f..8bdecab 100644 --- a/scripts/simulations_markov_illness_death.R +++ b/scripts/simulations_markov_illness_death.R @@ -45,12 +45,12 @@ simulation_t = function(param, n){ T_01 = rep(NA, n) T_12 = rep(NA, n) T_02 = rep(NA, n) - + t_type0 = matrix(, nrow = 0, ncol = 1) t_type1 = matrix(, nrow = 0, ncol = 3) t_type2 = matrix(, nrow = 0, ncol = 3) t_type3 = matrix(, nrow = 0, ncol = 1) - + for(i in 1:n){ unif1 = runif(1, min = 0, max = 1) S01 = function(T) @@ -70,7 +70,7 @@ simulation_t = function(param, n){ 1-pweibull(T, param[5], param[6]) - unif3 } T_02[i] = uniroot(S02, interval = c(1.e-14, 1e04), tol = 1e-9)$root - + #mm = sample(2:10,1) #t = c(0,sort(rlnorm(mm-1,-1,1))) mm = sample(2:20,1) @@ -86,42 +86,42 @@ simulation_t = function(param, n){ } else if(any(t > T_01[i] + T_12[i])){ lower_int1 = max(which(t < T_01[i])) upper_int1 = min(which(t > T_01[i])) - t_type2 = rbind(t_type2, c(t[lower_int1], min(T_01[i] + T_12[i],t[upper_int1]), + t_type2 = rbind(t_type2, c(t[lower_int1], min(T_01[i] + T_12[i],t[upper_int1]), T_01[i] + T_12[i])) } } else if(any(t > T_02[i]) & T_02[i] < T_01[i]){ t_type3 = rbind(t_type3, T_02[i]) } } - return(list(t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3)) + list(t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3) } sum_all = function(a, t_type0, t_type1, t_type2, t_type3, mc_cores){ - type_1 = -sum(unlist(mclapply(1:nrow(t_type0), function(i) (log(S_01(a = a, t = t_type0[i, 1])*S_02(a = a, t = t_type0[i, 1])))))) - type_2 = -sum(unlist(mclapply(1:nrow(t_type1), function(i)(log(as.numeric( - integrate(f01_S12_S02, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], + type_1 = -sum(unlist(mclapply(seq_len(nrow(t_type0)), function(i) (log(S_01(a = a, t = t_type0[i, 1])*S_02(a = a, t = t_type0[i, 1])))))) + type_2 = -sum(unlist(mclapply(seq_len(nrow(t_type1)), function(i)(log(as.numeric( + integrate(f01_S12_S02, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], a = a)[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_3 = -sum(unlist(mclapply(1:nrow(t_type2), function(i)(log(as.numeric( - integrate(f01_f12_S02, lower = t_type2[i,1], upper = t_type2[i,2], + type_3 = -sum(unlist(mclapply(seq_len(nrow(t_type2)), function(i)(log(as.numeric( + integrate(f01_f12_S02, lower = t_type2[i,1], upper = t_type2[i,2], w = t_type2[i,3], a = a)[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_4 = -sum(unlist(mclapply(1:nrow(t_type3), function(i) (log(f_02(a = a, t = t_type3[i, 1])* + type_4 = -sum(unlist(mclapply(seq_len(nrow(t_type3)), function(i) (log(f_02(a = a, t = t_type3[i, 1])* S_01(a = a, t = t_type3[i, 1])))))) - - return(type_1 + type_2 + type_3 + type_4) + + type_1 + type_2 + type_3 + type_4 } sum_all_markov = function(a, t_type0, t_type1, t_type2, t_type3, mc_cores){ - type_1_exp = -sum(unlist(mclapply(1:nrow(t_type0), function(i) (log(S_01_exp(a = a, t = t_type0[i, 1])*S_02_exp(a = a, t = t_type0[i, 1])))))) - type_2_exp = -sum(unlist(mclapply(1:nrow(t_type1), function(i)(log(as.numeric( - integrate(f01_S12_S02_exp, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], + type_1_exp = -sum(unlist(mclapply(seq_len(nrow(t_type0)), function(i) (log(S_01_exp(a = a, t = t_type0[i, 1])*S_02_exp(a = a, t = t_type0[i, 1])))))) + type_2_exp = -sum(unlist(mclapply(seq_len(nrow(t_type1)), function(i)(log(as.numeric( + integrate(f01_S12_S02_exp, lower = t_type1[i,1], upper = t_type1[i,2], w = t_type1[i,3], a = a)[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_3_exp = -sum(unlist(mclapply(1:nrow(t_type2), function(i)(log(as.numeric( - integrate(f01_f12_S02_exp, lower = t_type2[i,1], upper = t_type2[i,2], + type_3_exp = -sum(unlist(mclapply(seq_len(nrow(t_type2)), function(i)(log(as.numeric( + integrate(f01_f12_S02_exp, lower = t_type2[i,1], upper = t_type2[i,2], w = t_type2[i,3], a = a)[1]))),mc.cores = mc_cores), use.names = FALSE)) - type_4_exp = -sum(unlist(mclapply(1:nrow(t_type3), function(i) (log(f_02_exp(a = a, t = t_type3[i, 1])* + type_4_exp = -sum(unlist(mclapply(seq_len(nrow(t_type3)), function(i) (log(f_02_exp(a = a, t = t_type3[i, 1])* S_01_exp(a = a, t = t_type3[i, 1])))))) - - return(type_1_exp + type_2_exp + type_3_exp + type_4_exp) + + type_1_exp + type_2_exp + type_3_exp + type_4_exp } log_lik_semi_markov = rep(NA, nn) @@ -137,35 +137,35 @@ for(mn in 1:nn){ param_markov = c(1,1,1) n = 1000 t = simulation_t(param, n) - + t_type0 = t$t_type0 t_type1 = t$t_type1 t_type2 = t$t_type2 t_type3 = t$t_type3 - + print(nrow(t_type0) + nrow(t_type1) + nrow(t_type2) + nrow(t_type3)) - - - theta_hat_markov = nlminb(param_markov, sum_all_markov,t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, + + + theta_hat_markov = nlminb(param_markov, sum_all_markov,t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 3, lower = c(eps, eps, eps)) - + theta_hat = nlminb(param, sum_all,t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 3, lower = c(eps, eps, eps, eps, eps, eps)) - + theta_hat_middle_markov = as.matrix(theta_hat_markov$par) theta_hat_par_markov[mn, 1] = theta_hat_middle_markov[1, 1] theta_hat_par_markov[mn, 2] = theta_hat_middle_markov[2, 1] theta_hat_par_markov[mn, 3] = theta_hat_middle_markov[3, 1] - + theta_hat_hessian_markov = hessian(sum_all_markov, theta_hat_markov$par, t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3, mc_cores = 3) theta_hat_hessian_solved_markov = solve(theta_hat_hessian_markov) theta_hat_solve_hessian_markov[mn, 1] = theta_hat_hessian_solved_markov[1, 1] theta_hat_solve_hessian_markov[mn, 2] = theta_hat_hessian_solved_markov[2, 2] theta_hat_solve_hessian_markov[mn, 3] = theta_hat_hessian_solved_markov[3, 3] - + log_lik_markov[mn] = theta_hat_markov$objective - + theta_hat_middle = as.matrix(theta_hat$par) theta_hat_par[mn, 1] = theta_hat_middle[1, 1] theta_hat_par[mn, 2] = theta_hat_middle[2, 1] @@ -173,7 +173,7 @@ for(mn in 1:nn){ theta_hat_par[mn, 4] = theta_hat_middle[4, 1] theta_hat_par[mn, 5] = theta_hat_middle[5, 1] theta_hat_par[mn, 6] = theta_hat_middle[6, 1] - + theta_hat_hessian = hessian(sum_all, theta_hat$par, t_type0 = t_type0, t_type1 = t_type1, t_type2 = t_type2, t_type3 = t_type3,mc_cores = 3) theta_hat_hessian_solved = solve(theta_hat_hessian) theta_hat_solve_hessian[mn, 1] = theta_hat_hessian_solved[1, 1] @@ -182,6 +182,6 @@ for(mn in 1:nn){ theta_hat_solve_hessian[mn, 4] = theta_hat_hessian_solved[4, 4] theta_hat_solve_hessian[mn, 5] = theta_hat_hessian_solved[5, 5] theta_hat_solve_hessian[mn, 6] = theta_hat_hessian_solved[6, 6] - + log_lik_semi_markov[mn] = theta_hat$objective } diff --git a/scripts/timing_experiment.R b/scripts/timing_experiment.R index 05e876f..be091aa 100644 --- a/scripts/timing_experiment.R +++ b/scripts/timing_experiment.R @@ -2,7 +2,7 @@ #### up to 7 state progressive with absorbing not observed exactly setwd("~/Multistate models/smms") rm(list = ls()) -devtools::load_all() +devtools::load_all() library(igraph) library(parallel) library(cubature) @@ -39,7 +39,7 @@ f_56 = function(param, x, t){as.numeric(t>=0)*dexp(t,exp(param[6]))} for (k in 1:6){ tt <- rep(NA,mm) - + for (m in 1:mm){ param <- rep(1,k) int_dim <- k-1 @@ -51,25 +51,25 @@ for (k in 1:6){ } gg <- graphs[[k]] dda <- arrange_data(dd,gg) - + formula_obs_types = all_types(gg) edge_mats <- edge_matrices(gg) state_ord = state_ordering(gg) absorbing_states <- sort(state_ord$order[which(state_ord$type=="abs")]) names_surv_dens = names_of_survival_density(gg) - + timepointMat <- dda[,1:(dim(dda)[2]-1)] - + observation_type = rep(NA, nrow(dda)) all_integral_limits = list() integrand = list() - - for(i in 1:nrow(dda)){ + + for(i in seq_len(nrow(dda))){ observation_type[i] = dda[i,"obs_type"] f_types = names(which(formula_obs_types[, observation_type[i]] == 1)) integrand_mellomregn = list() integral_mellomregn= list() - for(j in 1:length(f_types)){ + for(j in seq_along(f_types)){ integrand_mellomregn[[j]] = eval(parse(text=type_to_integrand(f_types[j], edge_mats, names_surv_dens,abs_exact = F))) integral_mellomregn[[j]] = finding_limits(timepointMat[i,],f_types[j],edge_mats, absorbing_states,abs_exact = F) } @@ -102,7 +102,7 @@ f_56 = function(param, x, t){as.numeric(t>=0)*dexp(t,exp(param[7]))} for (k in 1:6){ tt <- rep(NA,mm) - + for (m in 1:mm){ param <- c(1,0.5,rep(1,k-1)) int_dim <- k-1 @@ -119,25 +119,25 @@ for (k in 1:6){ } gg <- graphs[[k]] dda <- arrange_data(dd,gg) - + formula_obs_types = all_types(gg) edge_mats <- edge_matrices(gg) state_ord = state_ordering(gg) absorbing_states <- sort(state_ord$order[which(state_ord$type=="abs")]) names_surv_dens = names_of_survival_density(gg) - + timepointMat <- dda[,1:(dim(dda)[2]-1)] - + observation_type = rep(NA, nrow(dda)) all_integral_limits = list() integrand = list() - - for(i in 1:nrow(dda)){ + + for(i in seq_len(nrow(dda))){ observation_type[i] = dda[i,"obs_type"] f_types = names(which(formula_obs_types[, observation_type[i]] == 1)) integrand_mellomregn = list() integral_mellomregn= list() - for(j in 1:length(f_types)){ + for(j in seq_along(f_types)){ integrand_mellomregn[[j]] = eval(parse(text=type_to_integrand(f_types[j], edge_mats, names_surv_dens,abs_exact = F))) integral_mellomregn[[j]] = finding_limits(timepointMat[i,],f_types[j],edge_mats, absorbing_states,abs_exact = F) } @@ -171,7 +171,7 @@ f_56 = function(param, x, t){as.numeric(t>=0)*dweibull(t,exp(param[11]),exp(para for (k in 1:6){ tt <- rep(NA,mm) - + for (m in 1:mm){ param <- c(rep(c(1,0.5),k)) int_dim <- k-1 @@ -183,25 +183,25 @@ for (k in 1:6){ } gg <- graphs[[k]] dda <- arrange_data(dd,gg) - + formula_obs_types = all_types(gg) edge_mats <- edge_matrices(gg) state_ord = state_ordering(gg) absorbing_states <- sort(state_ord$order[which(state_ord$type=="abs")]) names_surv_dens = names_of_survival_density(gg) - + timepointMat <- dda[,1:(dim(dda)[2]-1)] - + observation_type = rep(NA, nrow(dda)) all_integral_limits = list() integrand = list() - - for(i in 1:nrow(dda)){ + + for(i in seq_len(nrow(dda))){ observation_type[i] = dda[i,"obs_type"] f_types = names(which(formula_obs_types[, observation_type[i]] == 1)) integrand_mellomregn = list() integral_mellomregn= list() - for(j in 1:length(f_types)){ + for(j in seq_along(f_types)){ integrand_mellomregn[[j]] = eval(parse(text=type_to_integrand(f_types[j], edge_mats, names_surv_dens,abs_exact = F))) integral_mellomregn[[j]] = finding_limits(timepointMat[i,],f_types[j],edge_mats, absorbing_states,abs_exact = F) } @@ -214,4 +214,3 @@ for (k in 1:6){ } ctimes[which(ctimes$int_dim==(k-1) & ctimes$num_par=="2"),3] <- mean(tt) } -