epinowcast · seabbs · Feb 12, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 29, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -28,6 +28,7 @@ repos:
         entry: Cannot commit .Rhistory, .RData, .Rds or .rds.
         language: fail
         files: '\.Rhistory|\.RData|\.Rds|\.rds$'
+        exclude: '^data-raw/sierra_leone_ebola_data\.rds$'
 -   repo: meta
     hooks:
     -   id: check-hooks-apply

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -43,35 +43,30 @@ Imports:
     rstan (>= 2.26.0),
     dplyr,
     tibble,
+    tidyr,
     lubridate,
     primarycensored
 Suggests:
     bookdown,
     testthat (>= 3.0.0),
-    readxl,
-    janitor,
     gt,
     knitr,
     roxyglobals,
     bayesplot,
-    tidyr,
     posterior,
-    fs,
-    sf,
     tidybayes,
     modelr,
     patchwork,
+    pkgdown,
     cmdstanr,
     priorsense,
     usethis,
-    pkgdown,
     CodeDepends,
     BH (>= 1.66.0),
     Rcpp (>= 0.12.0),
     RcppEigen (>= 0.3.3.3.0)
 Remotes:
-    stan-dev/cmdstanr,
-    epinowcast/primarycensored
+    stan-dev/cmdstanr
 Config/Needs/website:
     r-lib/pkgdown,
     epinowcast/enwtheme

diff --git a/NAMESPACE b/NAMESPACE
@@ -3,12 +3,20 @@
 S3method(add_mean_sd,default)
 S3method(add_mean_sd,gamma_samples)
 S3method(add_mean_sd,lognormal_samples)
+S3method(as_epidist_aggregate_data,data.frame)
+S3method(as_epidist_aggregate_data,default)
+S3method(as_epidist_aggregate_data,epidist_linelist_data)
+S3method(as_epidist_latent_model,epidist_aggregate_data)
 S3method(as_epidist_latent_model,epidist_linelist_data)
 S3method(as_epidist_linelist_data,data.frame)
 S3method(as_epidist_linelist_data,default)
+S3method(as_epidist_linelist_data,epidist_aggregate_data)
+S3method(as_epidist_marginal_model,epidist_aggregate_data)
 S3method(as_epidist_marginal_model,epidist_linelist_data)
+S3method(as_epidist_naive_model,epidist_aggregate_data)
 S3method(as_epidist_naive_model,epidist_linelist_data)
 S3method(assert_epidist,default)
+S3method(assert_epidist,epidist_aggregate_data)
 S3method(assert_epidist,epidist_latent_model)
 S3method(assert_epidist,epidist_linelist_data)
 S3method(assert_epidist,epidist_marginal_model)
@@ -22,15 +30,18 @@ S3method(epidist_family_prior,lognormal)
 S3method(epidist_formula_model,default)
 S3method(epidist_formula_model,epidist_latent_model)
 S3method(epidist_formula_model,epidist_marginal_model)
+S3method(epidist_formula_model,epidist_naive_model)
 S3method(epidist_model_prior,default)
 S3method(epidist_model_prior,epidist_latent_model)
 S3method(epidist_stancode,default)
 S3method(epidist_stancode,epidist_latent_model)
 S3method(epidist_stancode,epidist_marginal_model)
 S3method(epidist_transform_data_model,default)
 S3method(epidist_transform_data_model,epidist_marginal_model)
+S3method(epidist_transform_data_model,epidist_naive_model)
 export(Gamma)
 export(add_mean_sd)
+export(as_epidist_aggregate_data)
 export(as_epidist_latent_model)
 export(as_epidist_linelist_data)
 export(as_epidist_marginal_model)
@@ -52,11 +63,13 @@ export(epidist_prior)
 export(epidist_stancode)
 export(epidist_transform_data)
 export(epidist_transform_data_model)
+export(is_epidist_aggregate_data)
 export(is_epidist_latent_model)
 export(is_epidist_linelist_data)
 export(is_epidist_marginal_model)
 export(is_epidist_naive_model)
 export(lognormal)
+export(new_epidist_aggregate_data)
 export(new_epidist_latent_model)
 export(new_epidist_linelist_data)
 export(new_epidist_marginal_model)
@@ -77,6 +90,7 @@ importFrom(brms,prior)
 importFrom(brms,set_prior)
 importFrom(brms,stanvar)
 importFrom(brms,weibull)
+importFrom(checkmate,assert_character)
 importFrom(checkmate,assert_class)
 importFrom(checkmate,assert_data_frame)
 importFrom(checkmate,assert_date)

diff --git a/NEWS.md b/NEWS.md
@@ -1,13 +1,21 @@
 # epidist 0.1.0.1000
 
-Development version of `epidist`. As part of this release we have moved from @athowes maintaining the package (who led the initial package development, implementation of the S3 infrastructure, implementation of the core models, and wrote the first versions of the getting started vignette, Ebola case study, FAQ section, and the approximate inference vignette) to @seabbs maintaining the package.
+Development version of `epidist`.
+
+This release adds a new marginal model based on `primarycensored` which provides a more efficient approach for fitting delay distributions compared to the existing latent model. We've also improved data handling by adding support for aggregated data across all models, added comprehensive examples using real world data, and enhanced documentation based on user feedback. The package has also undergone significant internal improvements including generalised Stan reparameterisation and improved data transformation methods.
+
+As part of this release we have moved from @athowes maintaining the package (who led the initial package development, implementation of the S3 infrastructure, implementation of the core models, and wrote the first versions of the getting started vignette, Ebola case study, FAQ section, and the approximate inference vignette) to @seabbs maintaining the package.
 
 ## Models
 
 - Added a marginalised likelihood model based on `primarycensored`. This can be specified using `as_epidist_marginal_model()`. This is currently limited to Weibull, log-normal, and gamma distributions with uniform primary censoring but this will be generalised in future releases. See #426.
-- Added a `weight` argument to `as_epidist_marginal_model()` to allow for weighted data (for example count data) to be used in the marginal model. See #509.
 - Added user settable primary event priors to the latent model. See #474.
 - Added a marginalised likelihood to the latent model. See #474.
+- Added a `weight` argument to `as_epidist_marginal_model()` to allow for weighted data (for example count data) to be used in the marginal model. See #509.
+- Added a `epidist_aggregate_data` method to `as_epidist_marginal_model()` to allow straightforward use of the marginal model with aggregated data. See #510.
+- Added a `epidist_aggregate_data` method to `as_epidist_latent_model()` to allow straightforward use of the latent model with aggregated data. See #510.
+- Added a `epidist_aggregate_data` method to `as_epidist_naive_model()` to allow straightforward use of the naive model with aggregated data. See #510.
+- Updated the naive model to internally transform the data to be optimally aggregated as for the marginal model. See #510.
 
 ## Package
 
@@ -17,12 +25,19 @@ Development version of `epidist`. As part of this release we have moved from @at
 - Added a `merge` argument to `epidist_prior()` to allow for not merging user and package priors. See #474.
 - Generalised the Stan reparametrisation feature to work across all distributions without manual specification by generating Stan code with `brms` and then extracting the reparameterisation. See #474.
 - Added a `transform_data` S3 method to allow for data to be transformed for specific models. This is specifically useful for the marginal model at the moment as it allows reducing the data to its unique strata. See #474.
+- Added new `epidist_aggregate_data` class to handle pre-aggregated line list data. See #510.
+- Added a `as_epidist_aggregate_data()` method for `epidist_linelist_data` objects to allow for easy conversion to aggregate data. See #510.
+- Added a `as_epidist_linelist_data()` method for `epidist_aggregate_data` objects to allow for easy conversion to linelist data. See #510.
+- Added an example dataset `sierra_leone_ebola_data` to the package. See #510.
+- Added examples to most functions to show usage of the package. See #510.
+- Added improved documentation explaining how the `epidist_transform_data()` methods work for the marginal and naive models. See #510.
 
 ## Documentation
 
 - Brings the README into line with `epinowcast` standards. See #467.
 - Switched over to using the marginal model as default in the documentation. See #426.
 - Added a helper functions for new variables to avoid code duplication in vignettes. See #426.
+- Improved the Ebola case study vignette to use truncated data and to reduce the focus on exploratory data analysis. See #510.
 
 ## Bugs
 

diff --git a/R/aggregate_data.R b/R/aggregate_data.R
@@ -0,0 +1,232 @@
+#' Create an epidist_aggregate_data object
+#'
+#' Creates an epidist_aggregate_data object from various input formats. This is
+#' useful when working with pre-aggregated data where each row represents
+#' multiple identical observations. See the specific methods for details on
+#' supported input formats and usage examples.
+#'
+#' @inheritParams as_epidist_linelist_data
+#' @family aggregate_data
+#' @export
+as_epidist_aggregate_data <- function(data, ...) {
+  UseMethod("as_epidist_aggregate_data")
+}
+
+#' Create an epidist_aggregate_data object from vectors of event times
+#'
+#' This method takes vectors of event times (primary/secondary event times and
+#' observation time) along with counts and creates an `epidist_aggregate_data`
+#' object. This format is useful when working with pre-aggregated data where
+#' each row represents multiple identical observations with the count stored in
+#' the `n` column. Internally it makes use of
+#' [as_epidist_linelist_data.default()] to convert the data to a linelist
+#' format before adding the count column. See the other methods for other data
+#' input options.
+#'
+#' @inheritParams as_epidist_linelist_data.default
+#' @inheritParams as_epidist_aggregate_data
+#'
+#' @param n An integerish vector containing the counts for each row. Must be the
+#'  same length as the input data vector.
+#'
+#' @family aggregate_data
+#' @autoglobal
+#' @export
+#' @examples
+#' as_epidist_aggregate_data(
+#'   data = c(1, 2, 3),
+#'   ptime_upr = c(2, 3, 4),
+#'   stime_lwr = c(3, 4, 5),
+#'   stime_upr = c(4, 5, 6),
+#'   obs_time = c(5, 6, 7),
+#'   n = c(1, 2, 3)
+#' )
+as_epidist_aggregate_data.default <- function(
+    data, n = NULL, ptime_upr = NULL, stime_lwr = NULL,
+    stime_upr = NULL, obs_time = NULL, ...) {
+  # Create linelist data first
+  df <- as_epidist_linelist_data.default(
+    data = data,
+    ptime_upr = ptime_upr,
+    stime_lwr = stime_lwr,
+    stime_upr = stime_upr,
+    obs_time = obs_time,
+    ...
+  )
+
+  if (!is.null(n)) {
+    df$n <- n
+  } else {
+    cli::cli_abort("{.var n} is NULL but must be provided.")
+  }
+  df <- new_epidist_aggregate_data(df)
+  assert_epidist(df)
+  return(df)
+}
+
+#' Create an epidist_aggregate_data object from a data.frame
+#'
+#' This method takes a data.frame containing event dates (primary/secondary
+#'  event dates and observation date) along with counts and creates an
+#' `epidist_aggregate_data` object. This format is useful when working with
+#' pre-aggregated data where each row represents multiple identical observations
+#' with the count stored in a specified column. Internally it makes use of
+#' [as_epidist_linelist_data.data.frame()] to convert the data to a linelist
+#' format before adding the count column. See the other methods for other data
+#' input options.
+#'
+#' @param n A character string giving the name of the column containing the
+#'  counts for each row. If `NULL` then the column `n` must be present in the
+#'  data.
+#' @inheritParams as_epidist_linelist_data.data.frame
+#' @family aggregate_data
+#' @autoglobal
+#' @export
+#' @examples
+#' sierra_leone_ebola_data |>
+#'   dplyr::count(date_of_symptom_onset, date_of_sample_tested) |>
+#'   as_epidist_aggregate_data(
+#'     pdate_lwr = "date_of_symptom_onset",
+#'     sdate_lwr = "date_of_sample_tested",
+#'     n = "n"
+#'   )
+as_epidist_aggregate_data.data.frame <- function(
+    data, n = NULL, pdate_lwr = NULL, sdate_lwr = NULL,
+    pdate_upr = NULL, sdate_upr = NULL, obs_date = NULL, ...) {
+  # First convert to linelist data
+  df <- as_epidist_linelist_data.data.frame(
+    data = data,
+    pdate_lwr = pdate_lwr,
+    sdate_lwr = sdate_lwr,
+    pdate_upr = pdate_upr,
+    sdate_upr = sdate_upr,
+    obs_date = obs_date,
+    ...
+  )
+
+  # Handle n column
+  if (is.null(n)) {
+    if (!hasName(data, "n")) {
+      cli::cli_abort("{.var n} is NULL but must be provided.")
+    }
+    n <- "n"
+  }
+
+  df$n <- data[[n]]
+
+  df <- new_epidist_aggregate_data(df)
+  assert_epidist(df)
+  return(df)
+}
+
+#' Convert linelist data to aggregate format
+#'
+#' This method takes an `epidist_linelist_data` object (see
+#' [as_epidist_linelist_data()]) and aggregates it by counting unique
+#' combinations of the required time variables (primary/secondary event times
+#' and observation time) and any additional variables specified in `by`. The
+#' result is a more compact representation of the same data where each row
+#' represents multiple identical observations with the count stored in the `n`
+#' column.
+#'
+#' @param by Character vector of additional variables to stratify by, beyond the
+#'   required time variables.
+#'
+#' @inheritParams as_epidist_aggregate_data
+#'
+#' @method as_epidist_aggregate_data epidist_linelist_data
+#' @family aggregate_data
+#' @autoglobal
+#' @export
+#' @importFrom checkmate assert_character assert_names
+#' @examples
+#' # Default stratification by required time variables only
+#' sierra_leone_ebola_data |>
+#'   as_epidist_linelist_data(
+#'     pdate_lwr = "date_of_symptom_onset",
+#'     sdate_lwr = "date_of_sample_tested"
+#'   ) |>
+#'   as_epidist_aggregate_data()
+#'
+#' # Additional stratification by other variables
+#' sierra_leone_ebola_data |>
+#'   as_epidist_linelist_data(
+#'     pdate_lwr = "date_of_symptom_onset",
+#'     sdate_lwr = "date_of_sample_tested"
+#'   ) |>
+#'   as_epidist_aggregate_data(by = "age")
+as_epidist_aggregate_data.epidist_linelist_data <- function(
+    data, by = NULL, ...) {
+  assert_epidist.epidist_linelist_data(data)
+
+  # Required variables for epidist objects
+  group_vars <- .linelist_required_cols()
+
+  # Combine required variables with user-specified ones
+  if (!is.null(by)) {
+    assert_character(by)
+    assert_names(names(data), must.include = by)
+    group_vars <- c(group_vars, by)
+  }
+
+  agg <- data |>
+    dplyr::count(dplyr::across(dplyr::all_of(group_vars)), name = "n")
+  class(agg) <- setdiff(class(agg), "epidist_linelist_data")
+  aggregated <- as_epidist_aggregate_data.default(
+    data = agg$ptime_lwr,
+    ptime_upr = agg$ptime_upr,
+    stime_lwr = agg$stime_lwr,
+    stime_upr = agg$stime_upr,
+    obs_time = agg$obs_time,
+    n = agg$n,
+    ...
+  )
+  aggregated <- bind_cols(aggregated, agg[!names(agg) %in% names(aggregated)])
+  return(aggregated)
+}
+
+#' Class constructor for `epidist_aggregate_data` objects
+#'
+#' @param data A data.frame to convert
+#' @returns An object of class `epidist_aggregate_data`
+#' @family aggregate_data
+#' @export
+#' @examples
+#' df <- new_epidist_aggregate_data(data.frame())
+#' class(df)
+new_epidist_aggregate_data <- function(data) {
+  class(data) <- c("epidist_aggregate_data", class(data))
+  return(data)
+}
+
+#' Check if data has the `epidist_aggregate_data` class
+#'
+#' @inheritParams as_epidist_aggregate_data
+#' @param ... Additional arguments
+#' @family aggregate_data
+#' @export
+is_epidist_aggregate_data <- function(data, ...) {
+  inherits(data, "epidist_aggregate_data")
+}
+
+#' Assert validity of `epidist_aggregate_data` objects
+#'
+#' @param data An object to check
+#' @param ... Additional arguments
+#' @method assert_epidist epidist_aggregate_data
+#' @family aggregate_data
+#' @export
+#' @examples
+#' sierra_leone_ebola_data |>
+#'   as_epidist_linelist_data(
+#'     pdate_lwr = "date_of_symptom_onset",
+#'     sdate_lwr = "date_of_sample_tested"
+#'   ) |>
+#'   as_epidist_aggregate_data() |>
+#'   assert_epidist()
+assert_epidist.epidist_aggregate_data <- function(data, ...) {
+  assert_epidist.epidist_linelist_data(data)
+  assert_names(names(data), must.include = "n")
+  assert_integerish(data$n, lower = 1)
+  return(invisible(NULL))
+}
diff --git a/R/assert_epidist.R b/R/assert_epidist.R
@@ -18,4 +18,5 @@
       "i" = "Please convert to epidist object first using as_epidist_<class>()" # nolint
     )
   )
+  return(invisible(NULL))
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,4 +18,5 @@ @@
           "i" = "Please convert to epidist object first using as_epidist_<class>()" # nolint
         )
       )
+      return(invisible(NULL))
     }