diff --git a/R/all_data_prep.R b/R/all_data_prep.R index 93aad071..ff062d3d 100644 --- a/R/all_data_prep.R +++ b/R/all_data_prep.R @@ -1,14 +1,15 @@ #' @title Construct age-group variable from age column #' #' @description This method splits an age interval from `min_val` to `max_val` -#' into `(max_val - min_val) / step` intervals. -#' By default `min_val` is set to 0, however it can be assigned by -#' convenience. +#' into intervals of size `step`. #' If the method finds ages greater or equal than `max_val` #' it assigns the string `">max_val"`. -#' To avoid errors it is necessary to set `step < max_val`. -#' It is also suggested to choose the step such -#' that `max_val %% (step + 1) == 0`. +#' By default `min_val` is set to 0, however it can be assigned by +#' convenience. If the method finds ages lower or equal +#' than `min_val` it assigns the string `"", - limits_low[length(limits_low)] + ">", limits_low[length(limits_low)] ) - lim_breaks <- c(-Inf, limits_low[seq(2, length(limits_low))] - 1, Inf) + if (min_val == 0) { + lim_breaks <- c(limits_low[seq(1, length(limits_low))] - 1, Inf) + } else { + lim_labels <- c(paste0("<", min_val - 1), lim_labels) + lim_breaks <- c(-Inf, limits_low[seq(1, length(limits_low))] - 1, Inf) + } + + #Warning of module condition not satisfied + if ((max_val - min_val) %% step != 0) { + war_msg <- "(max_val - min_val) must be an integer multiple of step. + The last interval will be truncated to " + war_msg <- paste0(war_msg, lim_labels[length(lim_labels)]) + warning(war_msg) + } # cut the age data and apply labels age_group <- cut(data[[col_age]], diff --git a/man/get_age_group.Rd b/man/get_age_group.Rd index 3dcf7e75..a0ea331d 100644 --- a/man/get_age_group.Rd +++ b/man/get_age_group.Rd @@ -26,14 +26,15 @@ Ages above \code{max_val} are represented as \verb{>max_val}. } \description{ This method splits an age interval from \code{min_val} to \code{max_val} -into \code{(max_val - min_val) / step} intervals. -By default \code{min_val} is set to 0, however it can be assigned by -convenience. +into intervals of size \code{step}. If the method finds ages greater or equal than \code{max_val} it assigns the string \code{">max_val"}. -To avoid errors it is necessary to set \code{step < max_val}. -It is also suggested to choose the step such -that \code{max_val \%\% (step + 1) == 0}. +By default \code{min_val} is set to 0, however it can be assigned by +convenience. If the method finds ages lower or equal +than \code{min_val} it assigns the string \code{"%i", max_val) # hacky test to avoid regex extraction - ) # check that breaks are correct # expect 0-50 and >80 - expect_identical( - levels( - get_age_group( - data = cohortdata, - col_age = "age", - max_val = 80, - step = 50 - ) - ), - c("0-50", ">80") + expect_warning( + get_age_group( + data = cohortdata, + col_age = "age", + max_val = 80, + step = 50 + ) ) }) @@ -120,6 +110,17 @@ test_that("`get_age_groups`: Input checking", { regexp = "Assertion on 'min_val' failed: May not be NA" ) + # non-integer values passed + expect_error( + get_age_group( + data = cohortdata, + col_age = "age", + min_val = 0.7, + max_val = 1, + step = 1 + ) + ) + # step size is larger than difference in age limits # maximum age is less than the minimum age max_age <- 80 @@ -138,3 +139,23 @@ test_that("`get_age_groups`: Input checking", { ) ) }) + +# tests to check for min_val != 0 +test_that("`get_age_groups`: non-zero min_val", { + min_val <- 10 + cohortdata$age_group <- get_age_group( + data = cohortdata, + col_age = "age", + max_val = 80, + min_val = min_val, + step = 10 + ) + + #expect none NA values are expected + expect_length(cohortdata[is.na(cohortdata$age_group), ]$age_group, 0) + + #Check for registers < min_val + expect_true( + all(cohortdata[cohortdata$age < min_val, ]$age_group == "<9") + ) +})