Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove storage of column names #159

Merged
merged 29 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,31 @@ Description: Provides methods for sampling contact matrices from diary
License: MIT + file LICENSE
Depends:
R (>= 3.5.0)
Imports:
Imports:
checkmate,
countrycode,
curl,
data.table,
grDevices,
httr,
jsonlite,
lifecycle,
lubridate,
memoise,
purrr,
oai,
wpp2017,
xml2
Suggests:
ggplot2,
here,
knitr,
purrr,
quarto,
reshape2,
rmarkdown,
roxyglobals (>= 1.0.0),
testthat
testthat,
withr
VignetteBuilder:
knitr
Encoding: UTF-8
Expand Down
11 changes: 9 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Generated by roxygen2: do not edit by hand

S3method(check,survey)
S3method(clean,survey)
S3method(check,contact_survey)
S3method(clean,contact_survey)
export(as_contact_survey)
export(check)
export(clean)
export(contact_matrix)
Expand All @@ -20,8 +21,13 @@ export(wpp_age)
export(wpp_countries)
import(data.table)
import(wpp2017)
importFrom(checkmate,assert_character)
importFrom(checkmate,assert_data_frame)
importFrom(checkmate,assert_list)
importFrom(checkmate,assert_names)
importFrom(countrycode,countrycode)
importFrom(curl,curl_download)
importFrom(data.table,copy)
importFrom(data.table,data.table)
importFrom(data.table,dcast)
importFrom(data.table,fcase)
Expand All @@ -47,6 +53,7 @@ importFrom(lubridate,period_to_seconds)
importFrom(lubridate,years)
importFrom(memoise,memoise)
importFrom(oai,list_records)
importFrom(purrr,walk)
importFrom(stats,median)
importFrom(stats,runif)
importFrom(stats,xtabs)
Expand Down
67 changes: 67 additions & 0 deletions R/as_contact_survey.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#' @title Check contact survey data
#'
#' @description Checks that a survey fulfills all the requirements to work with the 'contact_matrix' function
#'
#' @param x list containing
#' - an element named 'participants', a data frame containing participant
#' information
#' - an element named 'contacts', a data frame containing contact information
#' - (optionally) an element named 'reference, a list containing information
#' information needed to reference the survey, in particular it can contain$a
#' "title", "bibtype", "author", "doi", "publisher", "note", "year"
#' @param id.column the column in both the `participants` and `contacts` data frames that links contacts to participants
#' @param country.column the column in the `participants` data frame containing the country in which the participant was queried
#' @param year.column the column in the `participants` data frame containing the year in which the participant was queried
#' @importFrom checkmate assert_list assert_names assert_data_frame
#' assert_character
#' @importFrom purrr walk
#' @return invisibly returns a character vector of the relevant columns
#' @examples
#' data(polymod)
#' check(polymod)
#' @export
as_contact_survey <- function(x, id.column = "part_id",
country.column = "country",
year.column = "year") {
## check arguments
assert_list(x, names = "named")
assert_names(names(x), must.include = c("participants", "contacts"))
assert_data_frame(x$participants)
assert_data_frame(x$contacts)
assert_list(x$reference, names = "named", null.ok = TRUE)
assert_character(id.column)
assert_character(year.column, null.ok = TRUE)
assert_character(country.column, null.ok = TRUE)
assert_names(colnames(x$participants), must.include = id.column)
assert_names(colnames(x$contacts), must.include = id.column)

setnames(x$participants, id.column, "part_id")
setnames(x$contacts, id.column, "part_id")

## check optional columns exist if provided
to_check <- list(
country = country.column,
year = year.column
)

walk(names(to_check), \(column) {
if (!is.null(to_check[[column]]) &&
!(to_check[[column]] %in% colnames(x$participants))) {
stop(
column, " column '", to_check[[column]], "' does not exist ",
"in the participant data frame"
)
} else {
setnames(x$participants, to_check[[column]], column)
}
})

if (is.null(x$reference)) {
warning("No reference provided")
}

survey <- new_contact_survey(x$participant, x$contacts, x$reference)
survey <- clean(survey)

return(survey)
}
10 changes: 9 additions & 1 deletion R/check.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,15 @@ check <- function(x, ...) UseMethod("check")
#' data(polymod)
#' check(polymod)
#' @export
check.survey <- function(x, id.column = "part_id", participant.age.column = "part_age", country.column = "country", year.column = "year", contact.age.column = "cnt_age", ...) {
check.contact_survey <- function(x, id.column = "part_id", participant.age.column = "part_age", country.column = "country", year.column = "year", contact.age.column = "cnt_age", ...) {
lifecycle::deprecate_warn(
"1.0.0",
"check()",
details = paste(
"Use `as_contact_survey()` instead to construct a `<contact_survey>`",
"object. This will perform necessary checks."
)
)
chkDots(...)
if (!is.data.frame(x$participants) || !is.data.frame(x$contacts)) {
stop("The 'participants' and 'contacts' elements of 'x' must be data.frames")
Expand Down
11 changes: 4 additions & 7 deletions R/clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ clean <- function(x, ...) UseMethod("clean")
#' @description Cleans survey data to work with the 'contact_matrix' function
#'
#' @param x A [survey()] object
#' @param country.column the name of the country in which the survey participant was interviewed
#' @param participant.age.column the column in `x$participants` containing participants' age
#' @param ... ignored
#' @importFrom data.table fcase
Expand All @@ -19,22 +18,20 @@ clean <- function(x, ...) UseMethod("clean")
#' cleaned <- clean(polymod) # not really necessary as the 'polymod' data set has already been cleaned
#' @autoglobal
#' @export
clean.survey <- function(x, country.column = "country", participant.age.column = "part_age", ...) {
clean.contact_survey <- function(x, participant.age.column = "part_age", ...) {
chkDots(...)

x <- survey(x$participants, x$contacts, x$reference)

## update country names
if (country.column %in% colnames(x$participants)) {
countries <- x$participants[[country.column]]
if ("country" %in% colnames(x$participants)) {
countries <- x$participants$country
origin.code <- fcase(
all(nchar(as.character(countries)) == 2), "iso2c",
all(nchar(as.character(countries)) == 3), "iso3c",
default = "country.name"
)
converted_countries <- suppressWarnings(countrycode(countries, origin.code, "country.name"))
converted_countries[is.na(converted_countries)] <- as.character(countries[is.na(converted_countries)])
x$participants[, paste(country.column) := factor(converted_countries)]
x$participants[, country := factor(converted_countries)]
}

if (nrow(x$participants) > 0 &&
Expand Down
Loading