From 83f1ab3d31e461317c9d32653ae5d515e0465ed6 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 8 Mar 2024 10:29:46 +0000 Subject: [PATCH] Name the chunks --- README.Rmd | 15 ++++++--------- vignettes/epichains.Rmd | 30 +++++++++++++++--------------- vignettes/projecting_incidence.Rmd | 26 +++++++++++++------------- 3 files changed, 34 insertions(+), 37 deletions(-) diff --git a/README.Rmd b/README.Rmd index 84a41e3e..75271ff9 100644 --- a/README.Rmd +++ b/README.Rmd @@ -10,12 +10,13 @@ link-citations: true -```{r, include = FALSE} +```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = file.path("man", "figures", "README-"), - out.width = "100%" + out.width = "100%", + echo = TRUE ) ``` @@ -29,10 +30,6 @@ knitr::opts_chunk$set( experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` - _{{ packagename }}_ is an R package to simulate, analyse, and visualize the size and length of branching processes with a given offspring distribution. These models are often used in infectious disease epidemiology, where the chains represent chains of @@ -49,7 +46,7 @@ _{{ packagename }}_ is developed at the [Centre for the Mathematical Modelling o The latest development version of the _{{ packagename }}_ package can be installed via -```{r include=TRUE,eval=FALSE} +```{r install_pkg, include=TRUE,eval=FALSE} # check whether {pak} is installed if (!require("pak")) install.packages("pak") pak::pak("{{ gh_repo }}") @@ -57,7 +54,7 @@ pak::pak("{{ gh_repo }}") To load the package, use -```{r eval=TRUE} +```{r load_pkg, eval=TRUE} library("epichains") ``` @@ -122,6 +119,6 @@ By contributing to this project, you agree to abide by its terms. ## Citing this package -```{r message=FALSE, warning=FALSE} +```{r citation, message=FALSE, warning=FALSE} citation("epichains") ``` diff --git a/vignettes/epichains.Rmd b/vignettes/epichains.Rmd index 24825a04..8c1a716b 100644 --- a/vignettes/epichains.Rmd +++ b/vignettes/epichains.Rmd @@ -17,7 +17,7 @@ editor_options: chunk_output_type: console --- -```{r include=FALSE} +```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, message = FALSE, @@ -35,7 +35,7 @@ can be used, for example, to analyse the distribution of chain sizes or length of infectious disease outbreaks, as discussed in @farrington2003 and @blumberg2013. -```{r} +```{r load_epichains} library("epichains") ``` @@ -60,7 +60,7 @@ likelihoods are returned. Let's look at the following example where we estimate the log-likelihood of observing `chain_sizes`. -```{r} +```{r chain_sizes_setup} set.seed(121) # example of observed chain sizes # randomly generate 20 chains of size between 1 to 10 @@ -68,7 +68,7 @@ chain_sizes <- sample(1:10, 20, replace = TRUE) chain_sizes ``` -```{r} +```{r estimate_likelihoods} # estimate loglikelihood of the observed chain sizes likelihood_eg <- likelihood( chains = chain_sizes, @@ -86,7 +86,7 @@ likelihood_eg `likelihood()`, by default, returns the joint log-likelihood. If instead, the individual log-likelihoods are required, then the `individual` argument must be set to `TRUE`. To return likelihoods instead, set `log = FALSE`. -```{r} +```{r chains_setup2} set.seed(121) # example of observed chain sizes # randomly generate 20 chains of size between 1 to 10 @@ -94,7 +94,7 @@ chain_sizes <- sample(1:10, 20, replace = TRUE) chain_sizes ``` -```{r} +```{r estimate_likelihoods2} # estimate loglikelihood of the observed chain sizes likelihood_ind_eg <- likelihood( chains = chain_sizes, @@ -128,7 +128,7 @@ used for this approximation. For example, let's look at an example where `chain_sizes` is observed and we want to calculate the likelihood of this being drawn from a binomial distribution with probability `prob = 0.9`. -```{r} +```{r likelihoods_by_simulation} set.seed(121) # example of observed chain sizes; randomly generate 20 chains of size 1 to 10 chain_sizes <- sample(1:10, 20, replace = TRUE) @@ -161,7 +161,7 @@ each of the simulations. For example, if the probability of observing each case is `obs_prob = 0.30`, we use -```{r} +```{r likelihoods_imperfect_obs} set.seed(121) # example of observed chain sizes; randomly generate 20 chains of size 1 to 10 chain_sizes <- sample(1:10, 20, replace = TRUE) @@ -200,7 +200,7 @@ function is specified. Let's look at an example where we simulate a transmission tree for $10$ index cases. We assume a poisson offspring distribution with mean, $\text{lambda} = 0.9$, and a generation time of $3$ days: -```{r} +```{r sim_chains_pois} set.seed(32) # Define generation time generation_time_fn <- function(n) { @@ -228,7 +228,7 @@ Here is a quick example where we simulate an outbreak in a population of size $1000$ with $10$ index cases and $10/%$ pre-existing immunity. We assume individuals have a poisson offspring distribution with mean, $\text{lambda} = 1$, and fixed generation time of $3$: -```{r} +```{r sim_chains_with_finite_pop} set.seed(32) # Define generation time generation_time_fn <- function(n) { @@ -260,7 +260,7 @@ especially useful for calculating numerical likelihoods in `likelihood()`. Here is an example to simulate the previous examples without intervention, returning the size of each of the $10$ chains. It assumes a poisson offspring distribution with mean of $0.9$. -```{r} +```{r sim_summary_pois} set.seed(123) simulate_summary_eg <- simulate_summary( @@ -281,7 +281,7 @@ simulate_summary_eg You can run `summary()` on the object returned by `simulate_chains()` to obtain the chain summaries per index case. -```{r include=TRUE,echo=TRUE} +```{r summary_eg, include=TRUE,echo=TRUE} set.seed(32) # Define generation time generation_time_fn <- function(n) { @@ -323,7 +323,7 @@ tree. We can confirm if the two outputs are the same using `base::setequal()` -```{r include=TRUE,echo=TRUE} +```{r compare_sim_funcs, include=TRUE,echo=TRUE} setequal( # summary of output from simulate_chains() summary(sim_chains), @@ -340,7 +340,7 @@ depending on the value of `by`. To aggregate over "time", you must have specified a generation time distribution (`generation_time`) in the simulation step. -```{r include=TRUE,echo=TRUE} +```{r aggregation_eg, include=TRUE,echo=TRUE} # Example with simulate_chains() set.seed(32) @@ -368,7 +368,7 @@ Aggregated `` objects can easily be plotted using base R or `ggplot2` with little to no data manipulation. Here is an end-to-end example from simulation through aggregation to plotting. -```{r} +```{r plot_eg} # Run simulation with simulate_chains() set.seed(32) # Define generation time diff --git a/vignettes/projecting_incidence.Rmd b/vignettes/projecting_incidence.Rmd index 6c90378e..8a41fe80 100644 --- a/vignettes/projecting_incidence.Rmd +++ b/vignettes/projecting_incidence.Rmd @@ -55,7 +55,7 @@ library("lubridate") Included in _epichains_ is a cleaned time series of the first 15 days of the COVID-19 outbreak in South Africa. This can be loaded into memory as follows: -```{r} +```{r data} data("covid19_sa", package = "epichains") ``` @@ -63,7 +63,7 @@ We will use the first $5$ observations for this demonstration. We will assume that all the cases in that subset are imported and did not infect each other. Let us subset and view that aspect of the data. -```{r} +```{r view_data} seed_cases <- covid19_sa[1:5, ] head(seed_cases) ``` @@ -87,7 +87,7 @@ days_since_index Using the vector of start times from the time series, we will then create a corresponding seeding time for each individual, which we'll call `tf`. -```{r} +```{r t0_setup} t0 <- rep(days_since_index, seed_cases$cases) t0 ``` @@ -135,7 +135,7 @@ argument `n` as is required by _epichains_ (See `?epichains::simulate_chains`), which is further passed to `rlnorm()` as the first argument to determine the number of observations to sample (See `?rlnorm`). -```{r input_prep3, message=FALSE} +```{r generation_time_setup, message=FALSE} mu <- 4.7 sgma <- 2.9 @@ -162,7 +162,7 @@ For this example, we will assume that the offspring distribution is characterised by a negative binomial with $mu = 2.5$ [@abbott2020] and $size = 0.58$ [@wang2020]. -```{r nbinom_args, message=FALSE} +```{r nbinom_args_setup, message=FALSE} mu <- 2.5 size <- 0.58 ``` @@ -177,7 +177,7 @@ heterogeneity in transmission by single individuals. For this example, we will simulate outbreaks that end $21$ days after the last date of observations in the `seed_cases` dataset. -```{r input_prep2, message=FALSE} +```{r time_args_setup, message=FALSE} #' Date to end simulation projection_window <- 21 tf <- max(days_since_index) + projection_window @@ -189,7 +189,7 @@ time it is run for the same set of parameters. We will, therefore, run the simulations $100$ times and aggregate the results. Let us specify that. -```{r} +```{r sim_reps_setup} #' Number of simulations sim_rep <- 100 ``` @@ -203,7 +203,7 @@ Above `stat_max`, the simulation is cut off. If this value is not specified, it assumes a value of infinity. Here, we will assume a maximum chain size of $1000$. -```{r} +```{r stat_max_setup} #' Maximum chain size allowed stat_max <- 1000 ``` @@ -228,7 +228,7 @@ assuming that no outbreak size exceeds `r stat_max` cases. We will use the function `lapply()` to run the simulations and bind them by rows with `dplyr::bind_rows()`. -```{r simulations, message=FALSE} +```{r run_simulations, message=FALSE} set.seed(1234) sim_chain_sizes <- lapply( seq_len(sim_rep), @@ -252,7 +252,7 @@ sim_output <- bind_rows(sim_chain_sizes) ``` Let us view the first few rows of the simulation results. -```{r sim_output_head} +```{r view_sim_output} head(sim_output) ``` @@ -265,7 +265,7 @@ the median cases per day aggregated over all simulations. First, we will create the daily time series per simulation by aggregating the number of cases per day of each simulation. -```{r post_processing} +```{r post_process_output} # Daily number of cases for each simulation incidence_ts <- sim_output %>% mutate(day = ceiling(time)) %>% @@ -278,7 +278,7 @@ head(incidence_ts) Next, we will add a date column to the results of each simulation set. We will use the date of the first case in the observed data as the reference start date. -```{r} +```{r add_dates} # Get start date from the observed data index_date <- min(seed_cases$date) index_date @@ -294,7 +294,7 @@ head(incidence_ts_by_date) Now we will aggregate the simulations by day and evaluate the median daily cases across all simulations. -```{r} +```{r aggregate_simulations} # Median daily number of cases aggregated across all simulations median_daily_cases <- incidence_ts_by_date %>% group_by(date) %>%