From 83f1ab3d31e461317c9d32653ae5d515e0465ed6 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 8 Mar 2024 10:29:46 +0000
Subject: [PATCH] Name the chunks

---
 README.Rmd                         | 15 ++++++---------
 vignettes/epichains.Rmd            | 30 +++++++++++++++---------------
 vignettes/projecting_incidence.Rmd | 26 +++++++++++++-------------
 3 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 84a41e3e..75271ff9 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -10,12 +10,13 @@ link-citations: true
 <!-- `packagename` is extracted from the DESCRIPTION file -->
 <!-- `gh_repo` is extracted via a special environment variable in GitHub Actions -->
 
-```{r, include = FALSE}
+```{r setup, include = FALSE}
 knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>",
   fig.path = file.path("man", "figures", "README-"),
-  out.width = "100%"
+  out.width = "100%",
+  echo = TRUE
 )
 ```
 
@@ -29,10 +30,6 @@ knitr::opts_chunk$set(
 experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
 <!-- badges: end -->
 
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
 _{{ packagename }}_ is an R package to simulate, analyse, and visualize the size 
 and length of branching processes with a given offspring distribution. These 
 models are often used in infectious disease epidemiology, where the chains represent chains of
@@ -49,7 +46,7 @@ _{{ packagename }}_ is developed at the [Centre for the Mathematical Modelling o
 
 The latest development version of the _{{ packagename }}_ package can be installed via
 
-```{r include=TRUE,eval=FALSE}
+```{r install_pkg, include=TRUE,eval=FALSE}
 # check whether {pak} is installed
 if (!require("pak")) install.packages("pak")
 pak::pak("{{ gh_repo }}")
@@ -57,7 +54,7 @@ pak::pak("{{ gh_repo }}")
 
 To load the package, use
 
-```{r eval=TRUE}
+```{r load_pkg, eval=TRUE}
 library("epichains")
 ```
 
@@ -122,6 +119,6 @@ By contributing to this project, you agree to abide by its terms.
 
 ## Citing this package
 
-```{r message=FALSE, warning=FALSE}
+```{r citation, message=FALSE, warning=FALSE}
 citation("epichains")
 ```
diff --git a/vignettes/epichains.Rmd b/vignettes/epichains.Rmd
index 24825a04..8c1a716b 100644
--- a/vignettes/epichains.Rmd
+++ b/vignettes/epichains.Rmd
@@ -17,7 +17,7 @@ editor_options:
   chunk_output_type: console
 ---
 
-```{r include=FALSE}
+```{r setup, include=FALSE}
 knitr::opts_chunk$set(
   echo = TRUE,
   message = FALSE,
@@ -35,7 +35,7 @@ can be used, for example, to analyse the distribution of chain sizes
 or length of infectious disease outbreaks, as discussed in @farrington2003 and 
 @blumberg2013.
 
-```{r}
+```{r load_epichains}
 library("epichains")
 ```
 
@@ -60,7 +60,7 @@ likelihoods are returned.
 
 Let's look at the following example where we estimate the log-likelihood of
 observing `chain_sizes`.
-```{r}
+```{r chain_sizes_setup}
 set.seed(121)
 # example of observed chain sizes
 # randomly generate 20 chains of size between 1 to 10
@@ -68,7 +68,7 @@ chain_sizes <- sample(1:10, 20, replace = TRUE)
 chain_sizes
 ```
 
-```{r}
+```{r estimate_likelihoods}
 # estimate loglikelihood of the observed chain sizes
 likelihood_eg <- likelihood(
   chains = chain_sizes,
@@ -86,7 +86,7 @@ likelihood_eg
 `likelihood()`, by default, returns the joint log-likelihood. If instead,
 the individual log-likelihoods are required, then the `individual` argument
 must be set to `TRUE`. To return likelihoods instead, set `log = FALSE`.
-```{r}
+```{r chains_setup2}
 set.seed(121)
 # example of observed chain sizes
 # randomly generate 20 chains of size between 1 to 10
@@ -94,7 +94,7 @@ chain_sizes <- sample(1:10, 20, replace = TRUE)
 chain_sizes
 ```
 
-```{r}
+```{r estimate_likelihoods2}
 # estimate loglikelihood of the observed chain sizes
 likelihood_ind_eg <- likelihood(
   chains = chain_sizes,
@@ -128,7 +128,7 @@ used for this approximation.
 For example, let's look at an example where `chain_sizes` is observed and we
 want to calculate the likelihood of this being drawn from a binomial
 distribution with probability `prob = 0.9`.
-```{r}
+```{r likelihoods_by_simulation}
 set.seed(121)
 # example of observed chain sizes; randomly generate 20 chains of size 1 to 10
 chain_sizes <- sample(1:10, 20, replace = TRUE)
@@ -161,7 +161,7 @@ each of the simulations.
 For example, if the probability of observing each case is `obs_prob = 0.30`,
 we use
 
-```{r}
+```{r likelihoods_imperfect_obs}
 set.seed(121)
 # example of observed chain sizes; randomly generate 20 chains of size 1 to 10
 chain_sizes <- sample(1:10, 20, replace = TRUE)
@@ -200,7 +200,7 @@ function is specified.
 Let's look at an example where we simulate a transmission tree for $10$ index
 cases. We assume a poisson offspring distribution with mean,
 $\text{lambda} = 0.9$, and a generation time of $3$ days:
-```{r}
+```{r sim_chains_pois}
 set.seed(32)
 # Define generation time
 generation_time_fn <- function(n) {
@@ -228,7 +228,7 @@ Here is a quick example where we simulate an outbreak in a population of
 size $1000$ with $10$ index cases and $10/%$ pre-existing immunity. We assume
 individuals have a poisson offspring distribution with mean,
 $\text{lambda} = 1$, and fixed generation time of $3$:
-```{r}
+```{r sim_chains_with_finite_pop}
 set.seed(32)
 # Define generation time
 generation_time_fn <- function(n) {
@@ -260,7 +260,7 @@ especially useful for calculating numerical likelihoods in `likelihood()`.
 Here is an example to simulate the previous examples without intervention,
 returning the size of each of the $10$ chains. It assumes a poisson offspring
 distribution with mean of $0.9$.
-```{r}
+```{r sim_summary_pois}
 set.seed(123)
 
 simulate_summary_eg <- simulate_summary(
@@ -281,7 +281,7 @@ simulate_summary_eg
 
 You can run `summary()` on the object returned by `simulate_chains()` to
 obtain the chain summaries per index case.
-```{r include=TRUE,echo=TRUE}
+```{r summary_eg, include=TRUE,echo=TRUE}
 set.seed(32)
 # Define generation time
 generation_time_fn <- function(n) {
@@ -323,7 +323,7 @@ tree.
 
 We can confirm if the two outputs are the same using `base::setequal()`
 
-```{r include=TRUE,echo=TRUE}
+```{r compare_sim_funcs, include=TRUE,echo=TRUE}
 setequal(
   # summary of output from simulate_chains()
   summary(sim_chains),
@@ -340,7 +340,7 @@ depending on the value of `by`.
 
 To aggregate over "time", you must have specified a generation time
 distribution (`generation_time`) in the simulation step.
-```{r include=TRUE,echo=TRUE}
+```{r aggregation_eg, include=TRUE,echo=TRUE}
 # Example with simulate_chains()
 set.seed(32)
 
@@ -368,7 +368,7 @@ Aggregated `<epichains_tree>` objects can easily be plotted using base R
 or `ggplot2` with little to no data manipulation.
 
 Here is an end-to-end example from simulation through aggregation to plotting.
-```{r}
+```{r plot_eg}
 # Run simulation with simulate_chains()
 set.seed(32)
 # Define generation time
diff --git a/vignettes/projecting_incidence.Rmd b/vignettes/projecting_incidence.Rmd
index 6c90378e..8a41fe80 100644
--- a/vignettes/projecting_incidence.Rmd
+++ b/vignettes/projecting_incidence.Rmd
@@ -55,7 +55,7 @@ library("lubridate")
 Included in _epichains_ is a cleaned time series of the first 15 days of 
 the COVID-19 outbreak in South Africa. This can be loaded into 
 memory as follows: 
-```{r}
+```{r data}
 data("covid19_sa", package = "epichains")
 ```
 
@@ -63,7 +63,7 @@ We will use the first $5$ observations for this demonstration. We will assume
 that all the cases in that subset are imported and did not infect each other. 
 
 Let us subset and view that aspect of the data.
-```{r}
+```{r view_data}
 seed_cases <- covid19_sa[1:5, ]
 head(seed_cases)
 ```
@@ -87,7 +87,7 @@ days_since_index
 Using the vector of start times from the time series, we will then 
 create a corresponding seeding time for each individual, which we'll call
 `tf`.
-```{r}
+```{r t0_setup}
 t0 <- rep(days_since_index, seed_cases$cases)
 t0
 ```
@@ -135,7 +135,7 @@ argument `n` as is required by _epichains_ (See `?epichains::simulate_chains`),
 which is further passed to `rlnorm()` as the 
 first argument to determine the number of observations to sample
 (See `?rlnorm`).
-```{r input_prep3, message=FALSE}
+```{r generation_time_setup, message=FALSE}
 mu <- 4.7
 sgma <- 2.9
 
@@ -162,7 +162,7 @@ For this example, we will assume that the offspring distribution is
 characterised by a negative binomial with $mu = 2.5$ [@abbott2020] and 
 $size = 0.58$ [@wang2020]. 
 
-```{r nbinom_args, message=FALSE}
+```{r nbinom_args_setup, message=FALSE}
 mu <- 2.5
 size <- 0.58
 ```
@@ -177,7 +177,7 @@ heterogeneity in transmission by single individuals.
 
 For this example, we will simulate outbreaks that end $21$ days after the last
 date of observations in the `seed_cases` dataset.
-```{r input_prep2, message=FALSE}
+```{r time_args_setup, message=FALSE}
 #' Date to end simulation
 projection_window <- 21
 tf <- max(days_since_index) + projection_window
@@ -189,7 +189,7 @@ time it is run for the same set of parameters. We will, therefore, run the
 simulations $100$ times and aggregate the results.
 
 Let us specify that.
-```{r}
+```{r sim_reps_setup}
 #' Number of simulations
 sim_rep <- 100
 ```
@@ -203,7 +203,7 @@ Above `stat_max`, the simulation is cut off. If this value is
 not specified, it assumes a value of infinity. Here, we will
 assume a maximum chain size of $1000$.
 
-```{r}
+```{r stat_max_setup}
 #' Maximum chain size allowed
 stat_max <- 1000
 ```
@@ -228,7 +228,7 @@ assuming that no outbreak size exceeds `r stat_max` cases.
 
 We will use the function `lapply()` to run the simulations and bind them
 by rows with `dplyr::bind_rows()`.
-```{r simulations, message=FALSE}
+```{r run_simulations, message=FALSE}
 set.seed(1234)
 sim_chain_sizes <- lapply(
   seq_len(sim_rep),
@@ -252,7 +252,7 @@ sim_output <- bind_rows(sim_chain_sizes)
 ```
 
 Let us view the first few rows of the simulation results.
-```{r sim_output_head}
+```{r view_sim_output}
 head(sim_output)
 ```
 
@@ -265,7 +265,7 @@ the median cases per day aggregated over all simulations.
 
 First, we will create the daily time series per simulation by
 aggregating the number of cases per day of each simulation.
-```{r post_processing}
+```{r post_process_output}
 # Daily number of cases for each simulation
 incidence_ts <- sim_output %>%
   mutate(day = ceiling(time)) %>%
@@ -278,7 +278,7 @@ head(incidence_ts)
 Next, we will add a date column to the results of each simulation 
 set. We will use the date of the first case in the observed data 
 as the reference start date.
-```{r}
+```{r add_dates}
 # Get start date from the observed data
 index_date <- min(seed_cases$date)
 index_date
@@ -294,7 +294,7 @@ head(incidence_ts_by_date)
 
 Now we will aggregate the simulations by day and evaluate the median 
 daily cases across all simulations.
-```{r}
+```{r aggregate_simulations}
 # Median daily number of cases aggregated across all simulations
 median_daily_cases <- incidence_ts_by_date %>%
   group_by(date) %>%