Skip to content

Commit

Permalink
refactor muac prevalence to work on a multiple geographical area acco…
Browse files Browse the repository at this point in the history
…rding to status of mfaz SD and age ratio
  • Loading branch information
tomaszaba committed Jul 11, 2024
1 parent 46447fe commit 91faf56
Show file tree
Hide file tree
Showing 23 changed files with 340 additions and 207 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ Imports:
scales,
srvyr,
stats,
zscorer
zscorer,
tibble
Suggests:
spelling,
testthat (>= 3.0.0)
Expand Down
2 changes: 1 addition & 1 deletion R/age.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ process_age <- function(df, svdate = NULL, birdate = NULL, age) {
age_days = round({{ age }} * 30.44, 2)
)
}
dplyr::as_tibble(df)
tibble::as_tibble(df)
}

#'
Expand Down
62 changes: 33 additions & 29 deletions R/prevalence_helpers.R → R/case_definitions.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,3 @@


# Function to identify the type of treatment for prevalence --------------------
#'
#' A helper function to tell how to go about MUAC prevalence analysis based on
#' on the output of age ratio and standard deviation test results
#'
#' @param age_ratio_class,sd_class Character vectors storing age ratio's p-values
#' and standard deviation's classification, respectively.
#'
#' @returns A character vector of the same length containing the indication of
#' what to do for the MUAC prevalence analysis: "weighted", "unweighted" and
#' "missing". If "weighted", the CDC weighting approach is applied to correct for
#' age bias. If "unweighted" a normal complex sample analysis is applied, and for
#' the latter, NA are thrown.
#'
#'
tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) {
case_when(
age_ratio_class == "Problematic" & sd_class != "Problematic" ~ "weighted",
age_ratio_class != "Problematic" & sd_class == "Problematic" ~ "missing",
age_ratio_class == "Problematic" & sd_class == "Problematic" ~ "missing",
.default = "unweighted"
)
}


# Function on acute malnutrition case-definition -------------------------------

#'
#' Case-Definition: is an observation acutely malnourished?
#'
Expand Down Expand Up @@ -255,3 +226,36 @@ define_wasting <- function(df, zscore = NULL, muac = NULL, edema = NULL,
}
)
}

#'
#' A helper function to classify nutritional status into SAM, MAM or not wasted
#'
#' `classify_wasting_for_cdc_approach()` is used a helper inside
#' [apply_cdc_age_weighting()] to classify nutritional status into "sam", "mam"
#' or "not wasted" and then the vector returned is used downstream to calculate
#' the proportions of children with severe and moderate acute malnutrition.
#'
#' @param muac An integer vector containing MUAC values. They should be in
#' millimeters.
#'
#' @param .edema Optional. Its a vector containing data on bilateral pitting
#' edema coded as "y" for yes and "n" for no.
#'
#'
classify_wasting_for_cdc_approach <- function(muac, .edema = NULL) {
if (!is.null(.edema)) {
#edema <- ifelse(edema == 1, "y", "n")
x <- case_when(
muac < 115 | {{ .edema }} == "y" ~ "sam",
muac >= 115 & muac < 125 & {{ .edema }} == "n" ~ "mam",
.default = "not wasted"
)
} else {
x <- case_when(
muac < 115 ~ "sam",
muac >= 115 & muac < 125 ~ "mam",
.default = "not wasted"
)
}
x
}
55 changes: 54 additions & 1 deletion R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,63 @@


#'
#' District level survey data with Standard Deviation classified as problematic
#' District level SMART surveys conducted in four district in Mozambique
#'
#' @description
#' This example data contains survey data of four districts. Two of them have their WFHZ
#' standard deviation classified as problematic, and the are other two within range of
#' acceptable standard deviation. The data is used to test the performance of WFHZ based
#' prevalence when used on a data set with multiple survey areas that may or not have
#' different classification for standard deviation that may warrant different analysis
#' approach, as the function is designed for.
#'
#' @format A tibble of 943 x 9.
#'
#' |**Variable** | **Description** |
#' | :--- | :---|
#' | *district* | The administrative unit (admin 1) where data was collected. |
#' | *cluster* | Primary sampling unit |
#' | *team* | Survey teams |
#' | *sex* | Sex, "m" = boys, "f" = girls |
#' | *age* | calculated age in months with two decimal places |
#' | *weight* | Weight (kg) |
#' | *height* | Height (cm) |
#' | *edema* | Edema, "n" = no, "y" = yes |
#' | *muac* | Mid-upper arm circumference (mm) |
#'
"anthro.03"


#'
#' MUAC data from a community-based sentinel site from an anonymized location
#'
#' @description
#' Data in `anthro.04` was generated from a community-based sentinel site of three provinces.
#' Each province data set holds different scenarios that informs the appropriate analysis
#' approach to follow. One province (province 3) has its MFAZ standard deviation and age
#' ratio tests classified as problematic. Another province (province 2) has its age ratio
#' classified as problematic, but with a within range standard deviation. Lastly, province 1
#' has both tests falling within range of nor problematic. The data is used to test the
#' performance of `[compute_muac_prevalence()]` based when used on a multiple survey areas
#' data that may or not have on the aforementioned test that may then warrant a different
#' analysis approach, as the function is designed for.
#'
#' @format A tibble of 3,002 x 8.
#'
#' |**Variable** | **Description** |
#' | :--- | :---|
#' | *province* |
#' | *cluster* | Primary sampling unit |
#' | *sex* | Sex, "m" = boys, "f" = girls |
#' | *age* | calculated age in months with two decimal places |
#' | *muac* | Mid-upper arm circumference (mm) |
#' | *edema* | Edema, "n" = no, "y" = yes |
#' | *mfaz* | MUAC-for-age z-scores with 3 decimal places |
#' | *flag_mfaz* | Flagged observations. 1=flagged, 0=not flagged |
#'
"anthro.04"


#'
#' A SMART survey data with standard deviation on weight-for-height zscores
#' classified as problematic
Expand Down
133 changes: 66 additions & 67 deletions R/prevalence_muac.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,24 @@
# Function to classify nutritional status -------------------------------------

#'
#' A helper function to classify nutritional status into SAM, MAM or not wasted
#'
#' `classify_wasting_for_cdc_approach()` is used a helper inside
#' [apply_cdc_age_weighting()] to classify nutritional status into "sam", "mam"
#' or "not wasted" and then the vector returned is used downstream to calculate
#' the proportions of children with severe and moderate acute malnutrition.
#' A helper function to tell how to go about MUAC prevalence analysis based on
#' on the output of age ratio and standard deviation test results
#'
#' @param muac An integer vector containing MUAC values. They should be in
#' millimeters.
#' @param age_ratio_class,sd_class Character vectors storing age ratio's p-values
#' and standard deviation's classification, respectively.
#'
#' @param .edema Optional. Its a vector containing data on bilateral pitting
#' edema coded as "y" for yes and "n" for no.
#' @returns A character vector of the same length containing the indication of
#' what to do for the MUAC prevalence analysis: "weighted", "unweighted" and
#' "missing". If "weighted", the CDC weighting approach is applied to correct for
#' age bias. If "unweighted" a normal complex sample analysis is applied, and for
#' the latter, NA are thrown.
#'
#'
classify_wasting_for_cdc_approach <- function(muac, .edema = NULL) {
if (!is.null(.edema)) {
#edema <- ifelse(edema == 1, "y", "n")
x <- case_when(
muac < 115 | {{ .edema }} == "y" ~ "sam",
muac >= 115 & muac < 125 & {{ .edema }} == "n" ~ "mam",
.default = "not wasted"
)
} else {
x <- case_when(
muac < 115 ~ "sam",
muac >= 115 & muac < 125 ~ "mam",
.default = "not wasted"
)
}
x
tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) {
case_when(
age_ratio_class == "Problematic" & sd_class != "Problematic" ~ "weighted",
age_ratio_class != "Problematic" & sd_class == "Problematic" ~ "missing",
age_ratio_class == "Problematic" & sd_class == "Problematic" ~ "missing",
.default = "unweighted"
)
}

# The CDC approach to correct for age bias in MUAC -----------------------------
Expand Down Expand Up @@ -99,18 +87,16 @@ compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) {
if (!is.null(.summary_by)) {
df <- df |>
filter(.data$flag_mfaz == 0) |>
mutate(muac = recode_muac(.data$muac, unit = "mm")) |>
group_by(!!.summary_by) |>
#mutate(muac = recode_muac(.data$muac, unit = "cm")) |>
summarise(
sam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "sam"),
mam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "mam"),
gam = sum(.data$sam, .data$mam),
.groups = "drop"
.by = !!.summary_by
) |>
mutate(
gam_p = gam, sam_p = sam, mam_p = mam,
gam = NA, sam = NA, mam = NA
) |> dplyr::select(!2:4)
dplyr::rename(
gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam
)
} else {
df <- df |>
filter(.data$flag_mfaz == 0) |>
Expand All @@ -120,10 +106,9 @@ compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) {
mam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "mam"),
gam = sum(.data$sam, .data$mam)
) |>
mutate(
gam_p = gam, sam_p = sam, mam_p = mam,
gam = NA, sam = NA, mam = NA
) |> dplyr::select(!2:4)
dplyr::rename(
gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam
)
}
df
}
Expand Down Expand Up @@ -189,60 +174,74 @@ compute_muac_prevalence <- function(df,

## Get and classify age ratio and standard deviation ----
if (!rlang::quo_is_null(.summary_by)) {
x <- group_by(df, !!.summary_by) |>
x <- df |>
group_by(!!.summary_by) |>
summarise(
age_ratio = classify_age_sex_ratio(age_ratio_test(.data$age, .expectedP = 0.66)$p),
std = classify_sd(sd(remove_flags(as.numeric(.data$mfaz), "zscore"), na.rm = TRUE)),
analysis_approach = tell_muac_analysis_strategy(age_ratio, std),
analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std),
.groups = "drop"
)
} else {
x <- summarise(
df,
age_ratio = classify_age_sex_ratio(age_ratio_test(.data$age, .expectedP = 0.66)$p),
std = classify_sd(sd(remove_flags(as.numeric(.data$mfaz), "zscore"), na.rm = TRUE)),
analysis_approach = tell_muac_analysis_strategy(age_ratio, std)
)
x <- df |>
summarise(
age_ratio = classify_age_sex_ratio(age_ratio_test(.data$age, .expectedP = 0.66)$p),
std = classify_sd(sd(remove_flags(as.numeric(.data$mfaz), "zscore"), na.rm = TRUE)),
analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std)
)
}

for (i in seq_along(nrow(x))) {
## Iterate over data frame to compute prevalence according to analysis_approach ----
for (i in seq_len(nrow(x))) {
if (!rlang::quo_is_null(.summary_by)) {
area <- dplyr::pull(x, !!.summary_by)[i]
data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == !!area)
data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == area)
} else {
data <- df
}

analysis_approach <- x$analysis_approach[i]

if (analysis_approach == "unweighted") {
### Compute standard complex sample based prevalence analysis ----
output <- compute_pps_based_muac_prevalence(data, {{ .wt }}, {{ .edema }}, !!.summary_by)
} else if (analysis_approach == "weighted") {
if (!rlang::quo_is_null(.summary_by)) {
### Compute weighted prevalence summarized at .summary_by ----
output <- compute_weighted_prevalence(data, .edema = {{ .edema }}, !!.summary_by)
} else {
### Compute non-summarized weighted prevalence ----
output <- compute_weighted_prevalence(data, .edema = {{ .edema }})
}
} else {
output <- tibble::tibble(
gam_n = NA,
sam_n = NA,
mam_n = NA,
gam_p = NA,
sam_p = NA,
mam_p = NA,
gam_p_low = NA,
gam_p_upp = NA,
sam_p_low = NA,
sam_p_upp = NA,
mam_p_low = NA,
mam_p_upp = NA,
gam_p_deff = NA,
sam_p_deff = NA,
mam_p_deff = NA
)
if (!rlang::quo_is_null(.summary_by)) {
### Add NA's to the summarized tibble accordingly ----
output <- summarise(
data,
gam_p = NA_real_,
sam_p = NA_real_,
mam_p = NA_real_,
.by = !!.summary_by
)
} else {
### Return a non-summarised tibble ----
output <- tibble::tibble(
gam_p = NA_real_,
sam_p = NA_real_,
mam_p = NA_real_
)
}
}
results[[i]] <- output
}
dplyr::bind_rows(results)
### Ensure that all geographical aras are added to the tibble ----
if (!rlang::quo_is_null(.summary_by)) {
results <- dplyr::bind_rows(results) |>
dplyr::relocate(.data$gam_p, .after = .data$gam_n) |>
dplyr::relocate(.data$sam_p, .after = .data$sam_n) |>
dplyr::relocate(.data$mam_p, .after = .data$mam_n)
} else {
results <- dplyr::bind_rows(results)
}
results
}
14 changes: 7 additions & 7 deletions R/prevalence_wfhz.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ apply_probit_approach <- function(x, .status = c("gam", "sam")) {
#' Compute global, severe and moderate acute malnutrition prevalence using PROBIT approach.
#'
#' @description
#' This function is a helper function inside the main fuction [compute_wfhz_prevalence()].
#' This function is a helper function inside the main function [compute_wfhz_prevalence()].
#' It is used to compute PROBIT based prevalence depending on the status of standard deviation.
#'
#' @param df A data frame object returned by [process_wfhz_data()]. this will contain the
Expand All @@ -131,11 +131,11 @@ compute_probit_prevalence <- function(df, .summary_by = NULL) {
df,
gam = apply_probit_approach(.data$wfhz, .status = "gam"),
sam = apply_probit_approach(.data$wfhz, .status = "sam"),
mam = gam - sam,
mam = .data$gam - .data$sam,
.by = !!.summary_by
) |>
mutate(
gam_p = gam, sam_p = sam, mam_p = mam,
gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam,
gam = NA, sam = NA, mam = NA
) |>
dplyr::select(!2:4) ## To make it fit in the tibble structure from the main function
Expand All @@ -144,10 +144,10 @@ compute_probit_prevalence <- function(df, .summary_by = NULL) {
df,
gam = apply_probit_approach(.data$wfhz, .status = "gam"),
sam = apply_probit_approach(.data$wfhz, .status = "sam"),
mam = gam - sam
mam = .data$gam - .data$sam
) |>
mutate(
gam_p = gam, sam_p = sam, mam_p = mam,
gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam,
gam = NA, sam = NA, mam = NA
) |>
dplyr::select(!2:4) ## To make it fit in the tibble structure from the main function
Expand Down Expand Up @@ -183,8 +183,8 @@ compute_probit_prevalence <- function(df, .summary_by = NULL) {
#' .summary_by = NULL (default).
#'
#' @returns A tibble. The length vary depending on .summary_by. If set to NULL, a tibble of
#' 1 x 16 is returned, otherwise, a tibble of n rowns (depending on the number of geographical
#' areas in the dataset) x 17.
#' 1 x 16 is returned, otherwise, a tibble of n rows (depending on the number of geographical
#' areas in the data set) x 17.
#'
#' @examples
#'
Expand Down
Loading

0 comments on commit 91faf56

Please sign in to comment.