epiforecasts · sbfnk · Nov 19, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,27 +19,31 @@ Description: Provides methods for sampling contact matrices from diary
 License: MIT + file LICENSE
 Depends: 
     R (>= 3.5.0)
-Imports: 
+Imports:
+    checkmate,
     countrycode,
     curl,
     data.table,
     grDevices,
     httr,
     jsonlite,
+    lifecycle,
     lubridate,
     memoise,
+    purrr,
     oai,
     wpp2017,
     xml2
 Suggests:
     ggplot2,
     here,
     knitr,
-    purrr,
+    quarto,
     reshape2,
     rmarkdown,
     roxyglobals (>= 1.0.0),
-    testthat
+    testthat,
+    withr
 VignetteBuilder: 
     knitr
 Encoding: UTF-8

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(check,survey)
-S3method(clean,survey)
+S3method(check,contact_survey)
+S3method(clean,contact_survey)
+export(as_contact_survey)
 export(check)
 export(clean)
 export(contact_matrix)
@@ -20,8 +21,13 @@ export(wpp_age)
 export(wpp_countries)
 import(data.table)
 import(wpp2017)
+importFrom(checkmate,assert_character)
+importFrom(checkmate,assert_data_frame)
+importFrom(checkmate,assert_list)
+importFrom(checkmate,assert_names)
 importFrom(countrycode,countrycode)
 importFrom(curl,curl_download)
+importFrom(data.table,copy)
 importFrom(data.table,data.table)
 importFrom(data.table,dcast)
 importFrom(data.table,fcase)
@@ -47,6 +53,7 @@ importFrom(lubridate,period_to_seconds)
 importFrom(lubridate,years)
 importFrom(memoise,memoise)
 importFrom(oai,list_records)
+importFrom(purrr,walk)
 importFrom(stats,median)
 importFrom(stats,runif)
 importFrom(stats,xtabs)

diff --git a/R/as_contact_survey.R b/R/as_contact_survey.R
@@ -0,0 +1,67 @@
+#' @title Check contact survey data
+#'
+#' @description Checks that a survey fulfills all the requirements to work with the 'contact_matrix' function
+#'
+#' @param x list containing
+#'  - an element named 'participants', a data frame containing participant
+#'   information
+#'  - an element named 'contacts', a data frame containing contact information
+#'  - (optionally) an element named 'reference, a list containing information
+#'   information needed to reference the survey, in particular it can contain$a
+#'   "title", "bibtype", "author", "doi", "publisher", "note", "year"
+#' @param id.column the column in both the `participants` and `contacts` data frames that links contacts to participants
+#' @param country.column the column in the `participants` data frame containing the country in which the participant was queried
+#' @param year.column the column in the `participants` data frame containing the year in which the participant was queried
+#' @importFrom checkmate assert_list assert_names assert_data_frame
+#'   assert_character
+#' @importFrom purrr walk
+#' @return invisibly returns a character vector of the relevant columns
+#' @examples
+#' data(polymod)
+#' check(polymod)
+#' @export
+as_contact_survey <- function(x, id.column = "part_id",
+                              country.column = "country",
+                              year.column = "year") {
+  ## check arguments
+  assert_list(x, names = "named")
+  assert_names(names(x), must.include = c("participants", "contacts"))
+  assert_data_frame(x$participants)
+  assert_data_frame(x$contacts)
+  assert_list(x$reference, names = "named", null.ok = TRUE)
+  assert_character(id.column)
+  assert_character(year.column, null.ok = TRUE)
+  assert_character(country.column, null.ok = TRUE)
+  assert_names(colnames(x$participants), must.include = id.column)
+  assert_names(colnames(x$contacts), must.include = id.column)
+
+  setnames(x$participants, id.column, "part_id")
+  setnames(x$contacts, id.column, "part_id")
+
+  ## check optional columns exist if provided
+  to_check <- list(
+    country = country.column,
+    year = year.column
+  )
+
+  walk(names(to_check), \(column) {
+    if (!is.null(to_check[[column]]) &&
+      !(to_check[[column]] %in% colnames(x$participants))) {
+      stop(
+        column, " column '", to_check[[column]], "' does not exist ",
+        "in the participant data frame"
+      )
+    } else {
+      setnames(x$participants, to_check[[column]], column)
+    }
+  })
+
+  if (is.null(x$reference)) {
+    warning("No reference provided")
+  }
+
+  survey <- new_contact_survey(x$participant, x$contacts, x$reference)
+  survey <- clean(survey)
+
+  return(survey)
+}
diff --git a/R/check.R b/R/check.R
@@ -18,7 +18,15 @@ check <- function(x, ...) UseMethod("check")
 #' data(polymod)
 #' check(polymod)
 #' @export
-check.survey <- function(x, id.column = "part_id", participant.age.column = "part_age", country.column = "country", year.column = "year", contact.age.column = "cnt_age", ...) {
+check.contact_survey <- function(x, id.column = "part_id", participant.age.column = "part_age", country.column = "country", year.column = "year", contact.age.column = "cnt_age", ...) {
+  lifecycle::deprecate_warn(
+    "1.0.0",
+    "check()",
+    details = paste(
+      "Use `as_contact_survey()` instead to construct a `<contact_survey>`",
+      "object. This will perform necessary checks."
+    )
+  )
   chkDots(...)
   if (!is.data.frame(x$participants) || !is.data.frame(x$contacts)) {
     stop("The 'participants' and 'contacts' elements of 'x' must be data.frames")

diff --git a/R/clean.R b/R/clean.R
@@ -7,7 +7,6 @@ clean <- function(x, ...) UseMethod("clean")
 #' @description Cleans survey data to work with the 'contact_matrix' function
 #'
 #' @param x A [survey()] object
-#' @param country.column the name of the country in which the survey participant was interviewed
 #' @param participant.age.column the column in `x$participants` containing participants' age
 #' @param ... ignored
 #' @importFrom data.table fcase
@@ -19,22 +18,20 @@ clean <- function(x, ...) UseMethod("clean")
 #' cleaned <- clean(polymod) # not really necessary as the 'polymod' data set has already been cleaned
 #' @autoglobal
 #' @export
-clean.survey <- function(x, country.column = "country", participant.age.column = "part_age", ...) {
+clean.contact_survey <- function(x, participant.age.column = "part_age", ...) {
   chkDots(...)
 
-  x <- survey(x$participants, x$contacts, x$reference)
-
   ## update country names
-  if (country.column %in% colnames(x$participants)) {
-    countries <- x$participants[[country.column]]
+  if ("country" %in% colnames(x$participants)) {
+    countries <- x$participants$country
     origin.code <- fcase(
       all(nchar(as.character(countries)) == 2), "iso2c",
       all(nchar(as.character(countries)) == 3), "iso3c",
       default = "country.name"
     )
     converted_countries <- suppressWarnings(countrycode(countries, origin.code, "country.name"))
     converted_countries[is.na(converted_countries)] <- as.character(countries[is.na(converted_countries)])
-    x$participants[, paste(country.column) := factor(converted_countries)]
+    x$participants[, country := factor(converted_countries)]
   }
 
   if (nrow(x$participants) > 0 &&