Merge pull request #63 from nutriverse:dev

re-factor stage 1 sampling functions; fix #62
nutriverse · Dec 26, 2024 · 9a85674 · 9a85674
2 parents 747cb6c + 0224366
commit 9a85674
Show file tree

Hide file tree

Showing 6 changed files with 72 additions and 104 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,19 +3,17 @@
 S3method(plot,lqasSim)
 S3method(print,lqasClass)
 export(classify_coverage)
-export(create_sampling_list)
 export(get_n_cases)
 export(get_n_clusters)
 export(get_sample_d)
 export(get_sample_n)
-export(get_sampling_interval)
+export(get_sampling_clusters)
+export(get_sampling_list)
 export(lqas_get_class_prob)
 export(lqas_simulate_population)
 export(lqas_simulate_run)
 export(lqas_simulate_runs)
 export(lqas_simulate_test)
-export(select_random_start)
-export(select_sampling_clusters)
 importFrom(graphics,abline)
 importFrom(graphics,legend)
 importFrom(graphics,lines)

diff --git a/R/02-stage1.R b/R/02-stage1.R
@@ -1,71 +1,60 @@
 #'
 #' Select sampling clusters using systematic sampling
 #'
-#' @param N_clusters Total number of clusters in survey area
-#' @param n_clusters Number of sampling clusters to be selected
-#' @param interval Sampling interval usually calculated using
-#'   `[get_sampling_interval()]`
+#' @param N_clusters Total number of clusters in survey area.
+#' @param n_clusters Number of sampling clusters to be selected.
 #' @param cluster_list A data.frame containing at least the name or any other
 #'   identifier for the entire set of clusters to sample from.
 #'
-#' @return A numeric value for `[get_sampling_interval()]` and
-#'   `[select_random_start()]`. An integer vector for 
-#'   `[select_sampling_clusters()]` giving the row index for selected clusters. 
-#'   A data.frame for `[create_sampling_list()]` which is a subset of 
-#'   `cluster_list`
+#' @return An integer vector for [get_sampling_clusters()] giving the row 
+#'   index for selected clusters. A data.frame for `[get_sampling_list()]` which 
+#'   is a subset of `cluster_list`.
 #'
 #' @examples
-#' get_sampling_interval(N_clusters = 211, n_clusters = 35)
-#' interval <- get_sampling_interval(N_clusters = 211, n_clusters = 35)
-#' select_random_start(interval)
-#' select_sampling_clusters(N_clusters = 211, n_clusters = 35)
-#' create_sampling_list(cluster_list = village_list, n_clusters = 70)
+#' get_sampling_clusters(N_clusters = 211, n_clusters = 35)
+#' get_sampling_list(cluster_list = village_list, n_clusters = 70)
 #'
 #' @export
-#' @rdname sampling
+#' @rdname get_sampling
 #'
 
-get_sampling_interval <- function(N_clusters, n_clusters) {
-  floor(N_clusters / n_clusters)
-}
-
-#'
-#' @export
-#' @rdname sampling
-#'
-
-select_random_start <- function(interval) {
-  sample(x = seq_len(interval), size = 1)
-}
-
-#'
-#' @export
-#' @rdname sampling
-#'
-
-select_sampling_clusters <- function(N_clusters, n_clusters) {
+get_sampling_clusters <- function(N_clusters, n_clusters) {
   interval <- get_sampling_interval(
     N_clusters = N_clusters, n_clusters = n_clusters
   )
 
   random_start <- select_random_start(interval = interval)
 
-  sample_sequence <- seq.int(
-    from = random_start, to = N_clusters, by = interval
-  )
+  sample_sequence <- seq(from = random_start, to = N_clusters, by = interval)
 
   sample_sequence
 }
 
 #'
 #' @export
-#' @rdname sampling
+#' @rdname get_sampling
 #'
 
-create_sampling_list <- function(cluster_list, n_clusters) {
-  sample_sequence <- select_sampling_clusters(
+get_sampling_list <- function(cluster_list, n_clusters) {
+  sample_sequence <- get_sampling_clusters(
     N_clusters = nrow(cluster_list), n_clusters = n_clusters
   )
 
   cluster_list[sample_sequence, ]
 }
+
+#'
+#' @keywords internal
+#'
+
+select_random_start <- function(interval) {
+  sample(x = seq_len(interval), size = 1)
+}
+
+#'
+#' @keywords internal
+#'
+
+get_sampling_interval <- function(N_clusters, n_clusters) {
+  floor(N_clusters / n_clusters)
+}
diff --git a/man/get_sampling.Rd b/man/get_sampling.Rd
diff --git a/man/sampling.Rd b/man/sampling.Rd
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
@@ -48,10 +48,7 @@ reference:
 
   - title: Stage 1 sampling
     contents:
-    - get_sampling_interval
-    - select_random_start
-    - select_sampling_clusters
-    - create_sampling_list
+    - starts_with("get_sampling")
 
   - title: Coverage classifier
     contents: classify_coverage

diff --git a/tests/testthat/test-02-stage1.R b/tests/testthat/test-02-stage1.R
@@ -16,27 +16,25 @@ test_that("output is numeric or integer", {
 
   ## Sampling sequence
   expect_type(
-    select_sampling_clusters(N_clusters = 211, n_clusters = 35), "double"
+    get_sampling_clusters(N_clusters = 211, n_clusters = 35), "double"
   )
 })
 
 ## Test that output is data.frame
 
 test_that("output is tibble or data.frame", {
   expect_type(
-    create_sampling_list(cluster_list = village_list, n_clusters = 70),
+    get_sampling_list(cluster_list = village_list, n_clusters = 70),
     "list"
   )
 
-  expect_true(
-    is_tibble(
-      create_sampling_list(cluster_list = village_list, n_clusters = 70)
-    )
+  expect_s3_class(
+    get_sampling_list(cluster_list = village_list, n_clusters = 70),
+    "data.frame"
   )
 
-  expect_true(
-    is.data.frame(
-      create_sampling_list(cluster_list = village_list, n_clusters = 70)
-    )
+  expect_s3_class(
+    get_sampling_list(cluster_list = village_list, n_clusters = 70),
+    "tbl"
   )
 })