Merge pull request #84 from nutriverse:dev

use coverage estimators and add confidence intervals; fix #80; fix #83
nutriverse · Feb 1, 2025 · f41f663 · f41f663
2 parents bf8af27 + 7a47dff
commit f41f663
Show file tree

Hide file tree

Showing 13 changed files with 680 additions and 1,245 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -12,7 +12,8 @@ export(get_sampling_clusters)
 export(get_sampling_list)
 export(lqas_classify)
 export(lqas_classify_)
-export(lqas_classify_coverage)
+export(lqas_classify_cf)
+export(lqas_classify_tc)
 export(lqas_get_class_prob)
 export(lqas_simulate_population)
 export(lqas_simulate_run)

diff --git a/R/03-classify_coverage.R b/R/03-classify_coverage.R
@@ -1,38 +1,46 @@
 #'
 #' LQAS classifier
 #'
-#' @param n Number of cases found.
-#' @param n_total Number sampled.
+#' @param cases_in Number of SAM and/or MAM cases found during the survey who
+#'   are in the CMAM programme.
+#' @param cases_out Number of SAM and/or MAM cases found during the survey who
+#'   are in the CMAM programme.
+#' @param rec_in Number of children recovering from SAM or MAM found during the
+#'   survey who are in the programme.
+#' @inheritParams squeacr::calculate_tc
 #' @param threshold  Decision rule threshold/s. Should be between 0 and 1. At
 #'   least one threshold should be provided for a two-tier classifier. Two 
 #'   thresholds should be provided for a three-tier classifier. Default is a 
 #'   three-tier classifier with rule set at 0.2 and 0.5.
+#' @param label Logical. Should the output results be classification labels?
+#'   If TRUE, output classification are character labels else they are integer
+#'   values. Default is FALSE.
 #'
-#' @returns A character value or vector indicating classification. If
-#'   `threshold` is a single value, the generic function returns *1* if `n` is
-#'   greater than the threshold else *0*. The coverage classifier
-#'   function returns **"Satisfactory"** if `n` is greater than the threshold
-#'   else **"Not satisfactory"**. If `threshold` is two values, the generic
-#'   function returns *1* if `n` is greater than the first threshold and *2* if
-#'   `n` is greater than the second threshold else *0*. The CMAM coverage
-#'   classifier returns **"Low"** if `n` is below or equal to lower threshold, 
-#'   **"High"** if `n` is above the higher threshold, and **"Moderate"** for 
-#'   all other values of `n`.
+#' @returns A [data.frame()] of coverage classifications for case-finding
+#'   effectiveness and for treatment coverage.
 #'
 #' @author Ernest Guevarra
 #'
 #' @examples
-#' lqas_classify_coverage(n = 6, n_total = 40)
+#' lqas_classify(cases_in = 6, cases_out = 34, rec_in = 6)
+#' 
 #' with(
 #'   survey_data,
-#'   lqas_classify_coverage(n = cases_in, n_total = cases_total)
+#'   lqas_classify(
+#'     cases_in = cases_in, cases_out = cases_out, rec_in = rec_in
+#'   )
 #' )
 #'
 #' @export
 #' @rdname lqas_classify
 #'
 
-lqas_classify_ <- function(n, n_total, threshold = c(0.2, 0.5)) {
+lqas_classify_ <- function(cases_in, 
+                           cases_out,
+                           rec_in = NULL,
+                           k = 3,
+                           threshold = c(0.2, 0.5),
+                           label = FALSE) {  
   ## Check that threshold/s is/are numeric
   if (!all(is.numeric(threshold))) {
     stop(
@@ -61,25 +69,23 @@ lqas_classify_ <- function(n, n_total, threshold = c(0.2, 0.5)) {
       )
     }
   }
+
+  ## Classify case-finding effectiveness ----
+  cf <- lqas_classify_cf(
+    cases_in = cases_in, cases_out = cases_out, 
+    threshold = threshold, label = label
+  )
+
+  ## Classify treatment coverage ----
+  tc <- lqas_classify_tc(
+    cases_in = cases_in, cases_out = cases_out, rec_in = rec_in, k = k,
+    threshold = threshold, label = label
+  )
 
-  ## Get d
-  d <- n_total * threshold
-
-  ## Two-tier classification
-  if (length(d) == 1) {
-    coverage_class <- ifelse(n > d, 1, 0)
-  }
-
-  ## Three-tier classification
-  if (length(d) == 2) {
-    coverage_class <- ifelse(
-      n > d[2], 2,
-      ifelse(
-        n <= d[1], 0, 1
-      )
-    )
-  }
-
+  ## Concatenate cf and tc ----
+  coverage_class <- list(cf = cf, tc = tc)
+
+  ## Return coverage class ----
   coverage_class
 }
 
@@ -88,39 +94,82 @@ lqas_classify_ <- function(n, n_total, threshold = c(0.2, 0.5)) {
 #' @rdname lqas_classify
 #' 
 
-lqas_classify <- function(n, n_total, threshold = c(0.2, 0.5)) {
+lqas_classify <- function(cases_in, 
+                          cases_out, 
+                          rec_in = NULL, 
+                          k = 3, 
+                          threshold = c(0.2, 0.5),
+                          label = FALSE) {
   Map(
     f = lqas_classify_,
-    n = as.list(n),
-    n_total = as.list(n_total),
-    threshold = rep(list(threshold), length(n))
+    cases_in = as.list(cases_in),
+    cases_out = as.list(cases_out),
+    rec_in = as.list(rec_in),
+    k = as.list(k),
+    threshold = rep(list(threshold), length(cases_in)),
+    label = label
   ) |>
-    unlist()  
+    do.call(rbind, args = _) |>
+    data.frame()
 }
 
 #'
 #' @export
 #' @rdname lqas_classify
-#' 
+#'
 
-lqas_classify_coverage <- function(n, n_total, threshold = c(0.2, 0.5)) {
-  coverage_class <- lqas_classify(
-    n = n, n_total = n_total, threshold = threshold
-  )
+lqas_classify_cf <- function(cases_in, cases_out, 
+                             threshold = c(0.2, 0.5), label = FALSE) {
+  d <- (cases_in + cases_out) * threshold
 
   if (length(threshold) == 1) {
-    coverage_label <- ifelse(
-      coverage_class == 1, "Satisfactory", "Not satisfactory"
+    cf <- ifelse(cases_in > d, 1L, 0L)
+
+    if (label) cf <- ifelse(cf == 0L, "Not satisfactory", "Satisfactory")
+  } else {
+    cf <- ifelse(
+      cases_in > d[2], 2L,
+      ifelse(
+        cases_in <= d[1], 0L, 1L
+      )
     )
+
+    if (label)
+      cf <- ifelse(cf == 0L, "Low", ifelse(cf == 1L, "Moderate", "High"))
+  }
+
+  ## Return cf ----
+  cf
+}
+
+
+#'
+#' @export
+#' @rdname lqas_classify
+#' 
+
+lqas_classify_tc <- function(cases_in, cases_out, rec_in, k,
+                             threshold = c(0.2, 0.5), label = FALSE) {
+  rec_out <- squeacr::calculate_rout(cases_in, cases_out, rec_in, k = k)
+
+  d <- (cases_in + cases_out + rec_in + rec_out) * threshold
+
+  if (length(threshold) == 1) {
+    tc <- ifelse((cases_in + rec_in) > d, 1L, 0L)
+
+    if (label) tc <- ifelse(tc == 0L, "Not satisfactory", "Satisfactory")
   } else {
-    coverage_label <- ifelse(
-      coverage_class == 0, "Low",
+    tc <- ifelse(
+      (cases_in + rec_in) > d[2], 2L,
       ifelse(
-        coverage_class == 1, "Moderate", "High"
+        (cases_in + rec_in) <= d[1], 0L, 1L
       )
     )
+
+    if (label)
+      tc <- ifelse(tc == 0L, "Low", ifelse(tc == 1L, "Moderate", "High"))
   }
 
-  ## Return coverage_label ----
-  coverage_label
+  ## Return tc ----
+  tc
 }
diff --git a/R/05-post_strat_estimator.R b/R/05-post_strat_estimator.R
@@ -1,5 +1,6 @@
 #'
-#' Weighted post-stratification estimation
+#' Weighted post-stratification estimation of coverage over several service
+#' delivery units
 #' 
 #' @param cov_df A [data.frame()] of stratified coverage survey data to get
 #'   overall coverage estimates of. `cov_df` should have a variable named
@@ -24,7 +25,8 @@
 #'   Default is *"cf"*.
 #' @inheritParams squeacr::calculate_tc
 #'
-#' @returns An overall coverage estimate with 95% confidence interval.
+#' @returns A list of overall coverage estimates with corresponding 95%
+#'   confidence intervals for case-finding effectiveness and treatment coverage.
 #' 
 #' @examples
 #' cov_df <- survey_data
@@ -33,7 +35,7 @@
 #'   setNames(nm = c("strata", "pop"))
 #' 
 #' estimate_coverage_overall(
-#'   cov_df, pop_df, strata = "district", u5 = 0.17, p = 0.015
+#'   cov_df, pop_df, strata = "district", u5 = 0.177, p = 0.01
 #' )
 #' 
 #' @export
@@ -42,7 +44,7 @@
 
 estimate_coverage_overall <- function(cov_df, pop_df, strata, 
                                       u5, p, 
-                                      cov_type = c("cf", "treat"), k = 3) {
+                                      k = 3) {
   ## Check data ----
   check_coverage_data(cov_df)
 
@@ -51,8 +53,6 @@ estimate_coverage_overall <- function(cov_df, pop_df, strata,
       "{.strong {strata}} is not a variable in {.var cov_df}"
     )
 
-  cov_type <- match.arg(cov_type)
-
   ## Calculate weights ----
   weights <- calculate_weights(pop_df = pop_df, u5 = u5, p = p)
 
@@ -61,16 +61,52 @@ estimate_coverage_overall <- function(cov_df, pop_df, strata,
     by.x = strata, by.y = "strata", all.x = TRUE
   )
 
-  ## Calculate coverage ----
-  cov <- estimate_coverage(
-    cov_df = cov_pop_df, cov_type = cov_type, k = k
+  ## Calculate case-finding effectiveness ----
+  cov_cf <- estimate_coverage(
+    cov_df = cov_pop_df, cov_type = "cf", k = k
   )
 
   ## Weight coverage estimates and sum ----
-  cov <- sum(cov * weights)
+  cov_cf <- sum(cov_cf * weights)
 
-  ## Return cov ----
-  cov
+  ## Calculate weighted ci ----
+  ci_cf <- calculate_ci(
+    cov_df = cov_df, cov_type = "cf", k = k, weights = weights
+  )
+
+  ## Calculate overall ci ----
+  lcl_cf <- cov_cf - 1.96 * sqrt(sum(ci_cf))
+  ucl_cf <- cov_cf + 1.96 * sqrt(sum(ci_cf))
+
+  ## Calculate treatment coverage ----
+  cov_tc <- estimate_coverage(
+    cov_df = cov_pop_df, cov_type = "tc", k = k
+  )
+
+  ## Weight coverage estimates and sum ----
+  cov_tc <- sum(cov_tc * weights)
+
+  ## Calculate weighted ci ----
+  ci_tc <- calculate_ci(
+    cov_df = cov_df, cov_type = "tc", k = k, weights = weights
+  )
+
+  ## Calculate overall ci ----
+  lcl_tc <- cov_tc - 1.96 * sqrt(sum(ci_tc))
+  ucl_tc <- cov_tc + 1.96 * sqrt(sum(ci_tc))
+
+  ## Concatenate outputs ----
+  coverage_overall <- list(
+    cf = list(
+      estimate = cov_cf, ci = c(lcl_cf, ucl_cf)
+    ),
+    tc = list(
+      estimate = cov_tc, ci = c(lcl_tc, ucl_tc)
+    )
+  )
+
+  ## Return coverage_overall ----
+  coverage_overall
 }
 
 #'
@@ -98,8 +134,8 @@ calculate_weights <- function(pop_df, u5, p) {
 #' 
 
 estimate_coverage <- function(cov_df, 
-                               cov_type = c("cf", "tc"), 
-                               k = 3) {
+                              cov_type = c("cf", "tc"), 
+                              k = 3) {
   check_coverage_data(cov_df)
 
   cov_type <- match.arg(cov_type)
@@ -118,3 +154,28 @@ estimate_coverage <- function(cov_df,
   ## Return cov ----
   cov
 }
+
+#'
+#' @keywords internal
+#'
+
+calculate_ci <- function(cov_df, cov_type = c("cf", "tc"), k = 3, weights) {
+  if (cov_type == "cf") {
+    c <- cov_df$cases_in
+    n <- cov_df$cases_in + cov_df$cases_out
+  } else {
+    rec_out <- with(
+      cov_df,
+      squeacr::calculate_rout(cases_in, cases_out, rec_in, k = k)
+    )
+
+    c <- cov_df$cases_in + cov_df$rec_in
+
+    n <- cov_df$cases_in + cov_df$cases_out + cov_df$rec_in + rec_out
+  }
+
+
+  ci <- ((weights ^ 2) * (c / n) * (1 - (c / n))) / n
+}
+
+
diff --git a/R/utils.R b/R/utils.R
@@ -145,7 +145,7 @@ check_pop_data <- function(pop_df) {
 
   ## Check that pop_df has the required variables ----
   df_names_check <- c("strata", "pop") %in% names(pop_df)
-  df_names_missing <- names(pop_df)[!df_names_check]
+  df_names_missing <- c("strata", "pop")[!df_names_check]
 
   if (!all(df_names_check))
     cli::cli_abort(