accommodate readr's na parameter

suggested by @rmtrane in #529
OuhscBbmc · Sep 6, 2024 · 8fd2cd5 · 8fd2cd5
1 parent c95eeca
commit 8fd2cd5
Show file tree

Hide file tree

Showing 7 changed files with 190 additions and 0 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -94,6 +94,7 @@ This will help extract forms from longitudinal & repeating projects.
 * The regex in `regex_named_captures()` is forgiving if there's an unnecessary leading space (@BlairCooper, #495, #501)
 * `redcap_log_read()` assumes all columns are character, except for `timestamp` (#525)
 * `redcap_file_download_oneshot()` no longer asks for the unnecessary parameter for `repeating_instrument` (that the REDCap server ignores).  (@BlairCooper, #506, #530)
+* `redcap_read()` and `redcap_read_oneshot()` accommodate `readr::read_csv()`'s parameter of `na`.  (Suggested by @rmtrane in #529)
 
 Version 1.1.0 (released 2022-08-10)
 ==========================================================

diff --git a/R/redcap-read-oneshot.R b/R/redcap-read-oneshot.R
@@ -60,6 +60,8 @@
 #' REDCap project. Default is `FALSE`.
 #' @param col_types A [readr::cols()] object passed internally to
 #' [readr::read_csv()].  Optional.
+#' @param na A [character] vector passed internally to [readr::read_csv()].
+#' Defaults to `c("", "NA")`.
 #' @param guess_type A boolean value indicating if all columns should be
 #' returned as character.  If true, [readr::read_csv()] guesses the intended
 #' data type for each column.  Ignored if `col_types` is not null.
@@ -215,6 +217,7 @@ redcap_read_oneshot <- function(
   datetime_range_end            = as.POSIXct(NA),
   blank_for_gray_form_status    = FALSE,
   col_types                     = NULL,
+  na                            = c("", "NA"),
   guess_type                    = TRUE,
   guess_max                     = 1000,
   http_response_encoding        = "UTF-8",
@@ -244,6 +247,7 @@ redcap_read_oneshot <- function(
   checkmate::assert_posixct(  datetime_range_end        , any.missing=TRUE , len=1, null.ok=TRUE)
   checkmate::assert_logical( blank_for_gray_form_status , any.missing=FALSE, len=1)
 
+  checkmate::assert_character(na                        , any.missing=FALSE)
   checkmate::assert_logical(  guess_type                , any.missing=FALSE, len=1)
   checkmate::assert_numeric(   guess_max                , any.missing=FALSE, len=1, lower=1)
 
@@ -329,6 +333,7 @@ redcap_read_oneshot <- function(
         readr::read_csv(
           file            = I(kernel$raw_text),
           col_types       = col_types,
+          na              = na,
           guess_max       = guess_max,
           locale          = locale,
           show_col_types  = FALSE

diff --git a/R/redcap-read.R b/R/redcap-read.R
@@ -73,6 +73,8 @@
 #' REDCap project. Default is `FALSE`.
 #' @param col_types A [readr::cols()] object passed internally to
 #' [readr::read_csv()].  Optional.
+#' @param na A [character] vector passed internally to [readr::read_csv()].
+#' Defaults to `c("", "NA")`.
 #' @param guess_type A boolean value indicating if all columns should be
 #' returned as character.  If true, [readr::read_csv()] guesses the intended
 #' data type for each column.  Ignored if `col_types` is not null.
@@ -265,6 +267,7 @@ redcap_read <- function(
   blank_for_gray_form_status    = FALSE,
 
   col_types                     = NULL,
+  na                            = c("", "NA"),
   guess_type                    = TRUE,
   guess_max                     = NULL, # Deprecated parameter
   http_response_encoding        = "UTF-8",
@@ -294,6 +297,7 @@ redcap_read <- function(
   checkmate::assert_posixct(  datetime_range_end        , any.missing=TRUE , len=1, null.ok=TRUE)
   checkmate::assert_logical( blank_for_gray_form_status , any.missing=FALSE, len=1)
 
+  checkmate::assert_character(na                        , any.missing=FALSE)
   checkmate::assert_logical(  guess_type                , any.missing=FALSE,     len=1)
   if (!is.null(guess_max)) warning("The `guess_max` parameter in `REDCapR::redcap_read()` is deprecated.")
 
@@ -468,6 +472,7 @@ redcap_read <- function(
       datetime_range_end          = datetime_range_end,
       blank_for_gray_form_status  = blank_for_gray_form_status,
 
+      na                          = na,
       col_types                   = col_types,
       guess_type                  = FALSE,
       # guess_max                 # Not used, because guess_type is FALSE

diff --git a/inst/test-data/specific-redcapr/read-batch-simple/na.R b/inst/test-data/specific-redcapr/read-batch-simple/na.R
@@ -0,0 +1,52 @@
+structure(list(record_id = 1:5, name_first = c("Nutmeg", "Tumtum", 
+"Marcus", "Trudy", "John Lee"), name_last = c(NA, NA, "Wood", 
+"DAG", "Walker"), address = c("14 Rose Cottage St.\nKenning UK, 323232", 
+"14 Rose Cottage Blvd.\nKenning UK 34243", "243 Hill St.\nGuthrie OK 73402", 
+"342 Elm\nDuncanville TX, 75116", "Hotel Suite\nNew Orleans LA, 70115"
+), telephone = c("(405) 321-1111", "(405) 321-2222", "(405) 321-3333", 
+"(405) 321-4444", "(405) 321-5555"), email = c("[email protected]", 
+"[email protected]", "[email protected]", "[email protected]", "[email protected]"
+), dob = structure(c(12294, 12121, -13051, -6269, -5375), class = "Date"), 
+    age = c(11, 11, 80, 61, 59), sex = c(0, 1, 1, 0, 1), demographics_complete = c(2, 
+    2, 2, 2, 2), height = c(7, 6, 180, 165, 193.04), weight = c(1, 
+    1, 80, 54, 104), bmi = c(204.1, 277.8, 24.7, 19.8, 27.9), 
+    comments = c("Character in a book, with some guessing", "A mouse character from a good book", 
+    "completely made up", "This record doesn't have a DAG assigned\n\nSo call up Trudy on the telephone\nSend her a letter in the mail", 
+    "Had a hand for trouble and a eye for cash\n\nHe had a gold watch chain and a black mustache"
+    ), mugshot = c("mugshot-1.jpg", "mugshot-2.jpg", "mugshot-3.jpg", 
+    "mugshot-4.jpg", "mugshot-5.jpg"), health_complete = c(1, 
+    0, 2, 2, 0), race___1 = c(FALSE, FALSE, FALSE, FALSE, TRUE
+    ), race___2 = c(FALSE, FALSE, FALSE, TRUE, FALSE), race___3 = c(FALSE, 
+    TRUE, FALSE, FALSE, FALSE), race___4 = c(FALSE, FALSE, TRUE, 
+    FALSE, FALSE), race___5 = c(TRUE, TRUE, TRUE, TRUE, FALSE
+    ), race___6 = c(FALSE, FALSE, FALSE, FALSE, TRUE), ethnicity = c(1, 
+    1, 0, 1, 2), interpreter_needed = c(0, 0, 1, NA, 0), race_and_ethnicity_complete = c(2, 
+    0, 2, 2, 2)), row.names = c(NA, -5L), spec = structure(list(
+    cols = list(record_id = structure(list(), class = c("collector_integer", 
+    "collector")), name_first = structure(list(), class = c("collector_character", 
+    "collector")), name_last = structure(list(), class = c("collector_logical", 
+    "collector")), address = structure(list(), class = c("collector_character", 
+    "collector")), telephone = structure(list(), class = c("collector_character", 
+    "collector")), email = structure(list(), class = c("collector_character", 
+    "collector")), dob = structure(list(format = ""), class = c("collector_date", 
+    "collector")), age = structure(list(), class = c("collector_double", 
+    "collector")), sex = structure(list(), class = c("collector_double", 
+    "collector")), demographics_complete = structure(list(), class = c("collector_double", 
+    "collector")), height = structure(list(), class = c("collector_double", 
+    "collector")), weight = structure(list(), class = c("collector_double", 
+    "collector")), bmi = structure(list(), class = c("collector_double", 
+    "collector")), comments = structure(list(), class = c("collector_character", 
+    "collector")), mugshot = structure(list(), class = c("collector_character", 
+    "collector")), health_complete = structure(list(), class = c("collector_double", 
+    "collector")), race___1 = structure(list(), class = c("collector_logical", 
+    "collector")), race___2 = structure(list(), class = c("collector_logical", 
+    "collector")), race___3 = structure(list(), class = c("collector_logical", 
+    "collector")), race___4 = structure(list(), class = c("collector_logical", 
+    "collector")), race___5 = structure(list(), class = c("collector_logical", 
+    "collector")), race___6 = structure(list(), class = c("collector_logical", 
+    "collector")), ethnicity = structure(list(), class = c("collector_double", 
+    "collector")), interpreter_needed = structure(list(), class = c("collector_double", 
+    "collector")), race_and_ethnicity_complete = structure(list(), class = c("collector_double", 
+    "collector"))), default = structure(list(), class = c("collector_guess", 
+    "collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df", 
+"tbl_df", "tbl", "data.frame"))
diff --git a/inst/test-data/specific-redcapr/read-oneshot/na.R b/inst/test-data/specific-redcapr/read-oneshot/na.R
@@ -0,0 +1,52 @@
+structure(list(record_id = 1:5, name_first = c("Nutmeg", "Tumtum", 
+"Marcus", "Trudy", "John Lee"), name_last = c(NA, NA, "Wood", 
+"DAG", "Walker"), address = c("14 Rose Cottage St.\nKenning UK, 323232", 
+"14 Rose Cottage Blvd.\nKenning UK 34243", "243 Hill St.\nGuthrie OK 73402", 
+"342 Elm\nDuncanville TX, 75116", "Hotel Suite\nNew Orleans LA, 70115"
+), telephone = c("(405) 321-1111", "(405) 321-2222", "(405) 321-3333", 
+"(405) 321-4444", "(405) 321-5555"), email = c("[email protected]", 
+"[email protected]", "[email protected]", "[email protected]", "[email protected]"
+), dob = structure(c(12294, 12121, -13051, -6269, -5375), class = "Date"), 
+    age = c(11, 11, 80, 61, 59), sex = c(0, 1, 1, 0, 1), demographics_complete = c(2, 
+    2, 2, 2, 2), height = c(7, 6, 180, 165, 193.04), weight = c(1, 
+    1, 80, 54, 104), bmi = c(204.1, 277.8, 24.7, 19.8, 27.9), 
+    comments = c("Character in a book, with some guessing", "A mouse character from a good book", 
+    "completely made up", "This record doesn't have a DAG assigned\n\nSo call up Trudy on the telephone\nSend her a letter in the mail", 
+    "Had a hand for trouble and a eye for cash\n\nHe had a gold watch chain and a black mustache"
+    ), mugshot = c("mugshot-1.jpg", "mugshot-2.jpg", "mugshot-3.jpg", 
+    "mugshot-4.jpg", "mugshot-5.jpg"), health_complete = c(1, 
+    0, 2, 2, 0), race___1 = c(FALSE, FALSE, FALSE, FALSE, TRUE
+    ), race___2 = c(FALSE, FALSE, FALSE, TRUE, FALSE), race___3 = c(FALSE, 
+    TRUE, FALSE, FALSE, FALSE), race___4 = c(FALSE, FALSE, TRUE, 
+    FALSE, FALSE), race___5 = c(TRUE, TRUE, TRUE, TRUE, FALSE
+    ), race___6 = c(FALSE, FALSE, FALSE, FALSE, TRUE), ethnicity = c(1, 
+    1, 0, 1, 2), interpreter_needed = c(0, 0, 1, NA, 0), race_and_ethnicity_complete = c(2, 
+    0, 2, 2, 2)), row.names = c(NA, -5L), spec = structure(list(
+    cols = list(record_id = structure(list(), class = c("collector_integer", 
+    "collector")), name_first = structure(list(), class = c("collector_character", 
+    "collector")), name_last = structure(list(), class = c("collector_character", 
+    "collector")), address = structure(list(), class = c("collector_character", 
+    "collector")), telephone = structure(list(), class = c("collector_character", 
+    "collector")), email = structure(list(), class = c("collector_character", 
+    "collector")), dob = structure(list(format = ""), class = c("collector_date", 
+    "collector")), age = structure(list(), class = c("collector_double", 
+    "collector")), sex = structure(list(), class = c("collector_double", 
+    "collector")), demographics_complete = structure(list(), class = c("collector_double", 
+    "collector")), height = structure(list(), class = c("collector_double", 
+    "collector")), weight = structure(list(), class = c("collector_double", 
+    "collector")), bmi = structure(list(), class = c("collector_double", 
+    "collector")), comments = structure(list(), class = c("collector_character", 
+    "collector")), mugshot = structure(list(), class = c("collector_character", 
+    "collector")), health_complete = structure(list(), class = c("collector_double", 
+    "collector")), race___1 = structure(list(), class = c("collector_logical", 
+    "collector")), race___2 = structure(list(), class = c("collector_logical", 
+    "collector")), race___3 = structure(list(), class = c("collector_logical", 
+    "collector")), race___4 = structure(list(), class = c("collector_logical", 
+    "collector")), race___5 = structure(list(), class = c("collector_logical", 
+    "collector")), race___6 = structure(list(), class = c("collector_logical", 
+    "collector")), ethnicity = structure(list(), class = c("collector_double", 
+    "collector")), interpreter_needed = structure(list(), class = c("collector_double", 
+    "collector")), race_and_ethnicity_complete = structure(list(), class = c("collector_double", 
+    "collector"))), default = structure(list(), class = c("collector_guess", 
+    "collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df", 
+"tbl_df", "tbl", "data.frame"))
diff --git a/tests/testthat/test-read-batch-simple.R b/tests/testthat/test-read-batch-simple.R
@@ -83,6 +83,43 @@ test_that("default", {
   expect_match(returned_object2$outcome_messages, regexp=expected_outcome_message, perl=TRUE)
   expect_s3_class(returned_object2$data, "tbl")
 })
+test_that("na", {
+  testthat::skip_on_cran()
+  path_expected <- "test-data/specific-redcapr/read-batch-simple/na.R"
+  col_types <- readr::cols(
+    record_id  = readr::col_integer(),
+    race___1   = readr::col_logical(),
+    race___2   = readr::col_logical(),
+    race___3   = readr::col_logical(),
+    race___4   = readr::col_logical(),
+    race___5   = readr::col_logical(),
+    race___6   = readr::col_logical()
+  )
+
+  expected_outcome_message <- "\\d+ records and 25 columns were read from REDCap in \\d+(\\.\\d+\\W|\\W)seconds\\."
+
+  returned_object <-
+    redcap_read(
+      redcap_uri  = credential$redcap_uri,
+      token       = credential$token,
+      na          = c("", "NA", "Nutmouse"),
+      col_types   = col_types,
+      batch_size  = 2,
+      verbose     = FALSE
+    )
+
+  if (update_expectation) save_expected(returned_object$data, path_expected)
+  expected_data_frame <- retrieve_expected(path_expected)
+
+  expect_equal(returned_object$data, expected=expected_data_frame, label="The returned data.frame should be correct", ignore_attr = TRUE) # dput(returned_object$data)
+  expect_true( returned_object$success)
+  expect_match(returned_object$status_codes, regexp="200", perl=TRUE)
+  expect_true( returned_object$records_collapsed=="", "A subset of records was not requested.")
+  expect_true( returned_object$fields_collapsed=="", "A subset of fields was not requested.")
+  expect_true( returned_object$filter_logic=="", "A filter was not specified.")
+  expect_match(returned_object$outcome_messages, regexp=expected_outcome_message, perl=TRUE)
+  expect_s3_class(returned_object$data, "tbl")
+})
 test_that("col_types", {
   testthat::skip_on_cran()
   path_expected <- "test-data/specific-redcapr/read-batch-simple/col_types.R"

diff --git a/tests/testthat/test-read-oneshot.R b/tests/testthat/test-read-oneshot.R
@@ -41,6 +41,44 @@ test_that("default", {
 
   expect_s3_class(returned_object$data, "tbl")
 })
+test_that("na", {
+  testthat::skip_on_cran()
+  path_expected <- "test-data/specific-redcapr/read-oneshot/na.R"
+  expected_outcome_message <- "\\d+ records and \\d+ columns were read from REDCap in \\d+(\\.\\d+\\W|\\W)seconds\\."
+
+  col_types <- readr::cols(
+    record_id  = readr::col_integer(),
+    race___1   = readr::col_logical(),
+    race___2   = readr::col_logical(),
+    race___3   = readr::col_logical(),
+    race___4   = readr::col_logical(),
+    race___5   = readr::col_logical(),
+    race___6   = readr::col_logical()
+  )
+
+  returned_object <-
+    redcap_read_oneshot(
+      redcap_uri  = credential$redcap_uri,
+      token       = credential$token,
+      na          = c("", "NA", "Nutmouse"),
+      col_types   = col_types,
+      verbose     = FALSE
+    )
+
+  if (update_expectation) save_expected(returned_object$data, path_expected)
+  expected_data_frame <- retrieve_expected(path_expected)
+
+  expect_equal(returned_object$data, expected=expected_data_frame, label="The returned data.frame should be correct", ignore_attr = TRUE) # dput(returned_object$data)
+  expect_equal(returned_object$status_code, expected=200L)
+  expect_equal(returned_object$raw_text, expected="", ignore_attr = TRUE) # dput(returned_object$raw_text)
+  expect_true(returned_object$records_collapsed=="", "A subset of records was not requested.")
+  expect_true(returned_object$fields_collapsed=="", "A subset of fields was not requested.")
+  expect_true(returned_object$filter_logic=="", "A filter was not specified.")
+  expect_match(returned_object$outcome_message, regexp=expected_outcome_message, perl=TRUE)
+  expect_true(returned_object$success)
+
+  expect_s3_class(returned_object$data, "tbl")
+})
 test_that("col_types", {
   testthat::skip_on_cran()
   path_expected <- "test-data/specific-redcapr/read-oneshot/col_types.R"