From f9faf0eb39a19055ad40165302dff25daddafce2 Mon Sep 17 00:00:00 2001 From: lwillem Date: Mon, 24 Feb 2025 17:51:33 +0100 Subject: [PATCH] Fix for issue with missing.contact.age "sample" and age.limits > 0 (#170) * The combination of `age.limits` that did not start with 0 and the `missing.contact.age` option set to "sample" caused an issue. Specifically, the age groups below the defined `age.limits` were not excluded, unlike with the `missing.contact.age` options "ignore" or "remove." This led to a contact matrix containing contacts with an age of NA. A corresponding `testthat` case has been added to validate this function's behavior. * Style code (GHA) * Changed expact_equal(x, y) to expect_identical(x, y) in the new testthat cases on the missing.contact.age. * Style code (GHA) * Use 2L instead of as.integer(2) * Resolved the issue with the `testthat` instance on `missing.contact.age`, which was introduced in the last update where `is.integer(3)` was changed to `2L` instead of `3L`. * Added a defensive check to ensure the "ignore" code for `missing.contact.age` is only executed if there is (non-NA) information available to sample from. Additionally, ensured that the final set does not include `NA` values for `contact.age` with this user option. --------- Co-authored-by: lwillem --- R/contact_matrix.R | 17 ++++++++++------- tests/testthat/test-matrix.r | 7 +++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/R/contact_matrix.R b/R/contact_matrix.R index 223e3ff..858dd90 100644 --- a/R/contact_matrix.R +++ b/R/contact_matrix.R @@ -195,9 +195,12 @@ contact_matrix <- function(survey, countries = NULL, survey.pop, age.limits, fil # note: do nothing when "missing" is specified } + # remove contact ages below the age limit, before dealing with missing contact ages + survey$contacts <- survey$contacts[is.na(cnt_age) | + cnt_age >= min(age.limits), ] + if (missing.contact.age == "remove" && - nrow(survey$contacts[is.na(cnt_age) | - cnt_age < min(age.limits)]) > 0) { + nrow(survey$contacts[is.na(cnt_age)]) > 0) { if (!missing.contact.age.set) { message( "Removing participants that have contacts without age information. ", @@ -205,21 +208,20 @@ contact_matrix <- function(survey, countries = NULL, survey.pop, age.limits, fil ) } missing.age.id <- survey$contacts[ - is.na(cnt_age) | cnt_age < min(age.limits), part_id + is.na(cnt_age), part_id ] survey$participants <- survey$participants[!(part_id %in% missing.age.id)] } if (missing.contact.age == "ignore" && - nrow(survey$contacts[is.na(cnt_age) | cnt_age < min(age.limits)]) > 0) { + nrow(survey$contacts[is.na(cnt_age)]) > 0) { if (!missing.contact.age.set) { message( "Ignore contacts without age information. ", "To change this behaviour, set the 'missing.contact.age' option" ) } - survey$contacts <- survey$contacts[!is.na(cnt_age) & - cnt_age >= min(age.limits), ] + survey$contacts <- survey$contacts[!is.na(cnt_age), ] } ## check if any filters have been requested @@ -513,7 +515,7 @@ contact_matrix <- function(survey, countries = NULL, survey.pop, age.limits, fil replace = TRUE ) ] - } else { + } else if (nrow(survey$contacts[!is.na(cnt_age), ]) > 0) { ## no contacts in the age group have an age, sample uniformly between limits min.contact.age <- survey$contacts[, min(cnt_age, na.rm = TRUE)] @@ -530,6 +532,7 @@ contact_matrix <- function(survey, countries = NULL, survey.pop, age.limits, fil ] } } + survey$contacts <- survey$contacts[!is.na(cnt_age), ] # make sure the final set does not contain NA's anymore } ## set contact age groups diff --git a/tests/testthat/test-matrix.r b/tests/testthat/test-matrix.r index 8ac90e6..96d61f4 100644 --- a/tests/testthat/test-matrix.r +++ b/tests/testthat/test-matrix.r @@ -555,3 +555,10 @@ test_that("Contact matrices per capita are also generated when bootstrapping", { test_that("Symmetric contact matrices with large normalisation weights throw a warning", { expect_warning(contact_matrix(survey = polymod, age.limits = c(0, 90), symmetric = TRUE), "artefacts after making the matrix symmetric") }) + +test_that("Contacts with an age below the age limits are excluded regardless of the missing.contact.age setting", { + expect_identical(ncol(contact_matrix(polymod, age.limits = c(10, 50), missing.contact.age = "remove")$matrix), 2L) + expect_identical(ncol(contact_matrix(polymod, age.limits = c(10, 50), missing.contact.age = "sample")$matrix), 2L) + expect_identical(ncol(contact_matrix(polymod, age.limits = c(10, 50), missing.contact.age = "keep")$matrix), 3L) # extra column for ages outside age limits (= NA) + expect_identical(ncol(contact_matrix(polymod, age.limits = c(10, 50), missing.contact.age = "ignore")$matrix), 2L) +})