Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Checkbox choices 2 #505

Merged
merged 9 commits into from
Jul 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ This will help extract forms from longitudinal & repeating projects.
* `validate_repeat_instance()`
* `validate_no_logical()`
* `redcap_read()` checks the `event` parameter and throws an error if a value is not recognized, or the project is not longitudinal (#493)
* The regex in `regex_named_captures()` is forgiving if there's an unnecessary leading space (@BlairCooper, #495)
* The regex in `regex_named_captures()` is forgiving if there's an unnecessary leading space (@BlairCooper, #495, #501)

Version 1.1.0 (released 2022-08-10)
==========================================================
Expand Down
19 changes: 17 additions & 2 deletions R/metadata-utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
#' choices_3 <- ds_metadata_3[ds_metadata_3$field_name=="race", "select_choices_or_calculations"]
#' REDCapR::regex_named_captures(pattern = pattern_boxes, text = choices_3)

#' @importFrom magrittr %>%
#' @export
regex_named_captures <- function(pattern, text, perl = TRUE) {
checkmate::assert_character(pattern, any.missing = FALSE, len = 1, min.chars = 0L)
Expand Down Expand Up @@ -110,11 +111,25 @@ regex_named_captures <- function(pattern, text, perl = TRUE) {
}

#' @rdname metadata_utilities
#' @importFrom rlang .data
#' @export
checkbox_choices <- function(select_choices) {
checkmate::assert_character(select_choices, any.missing=FALSE, len=1, min.chars=1)

pattern_checkboxes <- "(?<=\\A| \\| |\\| )(?<id>\\d{1,}), (?<label>[^|]{1,}?)(?= \\| |\\| |\\Z)"
pattern <- "^(.+?),\\s*+(.*)$"

regex_named_captures(pattern = pattern_checkboxes, text = select_choices)
select_choices %>%
strsplit(split = "|", fixed = TRUE) %>%
magrittr::extract2(1) %>%
base::trimws() %>%
tibble::as_tibble() %>% # default column name is `value`
dplyr::filter(.data$value != "") %>%
dplyr::transmute(
id = sub(pattern, "\\1", .data$value, perl = TRUE),
label = sub(pattern, "\\2", .data$value, perl = TRUE),
)

# pattern_checkboxes <- "(?<=\\A| \\| |\\| )(?<id>\\d{1,}), (?<label>[^|]{1,}?)(?= \\| |\\| |\\Z)"
# pattern_checkboxes <- "(?<=\\A| \\| |\\| | \\|)(?<id>\\d{1,}), ?(?<label>[^|]{1,}?)(?= \\| |\\| | \\||\\Z)"
# regex_named_captures(pattern = pattern_checkboxes, text = select_choices)
}
201 changes: 151 additions & 50 deletions tests/testthat/test-metadata-utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,55 +3,157 @@ library(testthat)
test_that("Named Captures", {
pattern_checkboxes <- "(?<=\\A| \\| )(?<id>\\d{1,}), (?<label>[\x20-\x7B\x7D-\x7E]{1,})(?= \\| |\\Z)"

ds_expected <-
tibble::tribble(
~id, ~label,
"1", "American Indian/Alaska Native",
"2", "Asian",
"3", "Native Hawaiian or Other Pacific Islander",
"4", "Black or African American",
"5", "White",
"6", "Unknown / Not Reported"
)

choices_1 <- "1, American Indian/Alaska Native | 2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 6, Unknown / Not Reported"
ds_boxes <- regex_named_captures(pattern=pattern_checkboxes, text=choices_1)

ds_expected <- structure(
list(
id = c("1", "2", "3", "4", "5", "6"),
label = c("American Indian/Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White", "Unknown / Not Reported")
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -6L)
)

expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct") #dput(ds_boxes)
expect_s3_class(ds_boxes, "tbl")
})

test_that("checkbox choices", {
choices_1 <- "1, American Indian/Alaska Native | 2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 6, Unknown / Not Reported"
ds_boxes <- checkbox_choices(select_choices=choices_1)
test_that("checkbox choices -digits", {
ds_expected <-
tibble::tribble(
~id, ~label,
"1", "American Indian/Alaska Native",
"-2", "Asian",
"3", "Native Hawaiian or Other Pacific Islander",
"4", "Black or African American",
"5", "White",
"66", "Unknown / Not Reported"
)

ds_expected <- structure(
list(
id = c("1", "2", "3", "4", "5", "6"),
label = c("American Indian/Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White", "Unknown / Not Reported")
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -6L)
)
# well-behaved
"1, American Indian/Alaska Native | -2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 66, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")

expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct") #dput(ds_boxes)
expect_s3_class(ds_boxes, "tbl")
# no leading spaces
"1, American Indian/Alaska Native |-2, Asian |3, Native Hawaiian or Other Pacific Islander |4, Black or African American |5, White |66, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no leading spaces:")

# no trailing spaces
"1, American Indian/Alaska Native| -2, Asian| 3, Native Hawaiian or Other Pacific Islander| 4, Black or African American| 5, White| 66, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no trailing spaces:")

# extra lines
"| | 1, American Indian/Alaska Native | | | -2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 66, Unknown / Not Reported | | | " |>
checkbox_choices() |>
expect_equal(ds_expected, label = "extra lines:")
})

test_that("checkbox choices -letters", {
ds_expected <- # datapasta::tribble_paste(ds_expected)
tibble::tribble(
~id, ~label,
"a", "American Indian/Alaska Native",
"b", "Asian",
"c", "Native Hawaiian or Other Pacific Islander",
"dd", "Black or African American",
"eee", "White",
"f", "Unknown / Not Reported"
)

# well-behaved
"a, American Indian/Alaska Native | b, Asian | c, Native Hawaiian or Other Pacific Islander | dd, Black or African American | eee, White | f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")

# no leading spaces
"a, American Indian/Alaska Native |b, Asian |c, Native Hawaiian or Other Pacific Islander |dd, Black or African American |eee, White |f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no leading spaces:")

# no trailing spaces
"a, American Indian/Alaska Native| b, Asian| c, Native Hawaiian or Other Pacific Islander| dd, Black or African American| eee, White| f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no trailing spaces:")
})

test_that("checkbox choices -commas in labels", {
ds_expected <- # datapasta::tribble_paste(ds_expected)
tibble::tribble(
~id , ~label,
"a" , "American Indian, Native American, or Alaska Native",
"b" , "Asian",
"c" , "Native Hawaiian, Samoan, or Other Pacific Islander",
"dd" , "Black or African American",
"eee" , "White",
"f" , "Unknown / Not Reported"
)

# well-behaved
"a, American Indian, Native American, or Alaska Native | b, Asian | c, Native Hawaiian, Samoan, or Other Pacific Islander | dd, Black or African American | eee, White | f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")

# no leading spaces
"a, American Indian, Native American, or Alaska Native |b, Asian |c, Native Hawaiian, Samoan, or Other Pacific Islander |dd, Black or African American |eee, White |f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no leading spaces:")

# no trailing spaces
"a, American Indian, Native American, or Alaska Native| b, Asian| c, Native Hawaiian, Samoan, or Other Pacific Islander| dd, Black or African American| eee, White| f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no trailing spaces:")
})

test_that("checkbox choices -digits only", {
ds_expected <- # datapasta::tribble_paste(ds_expected)
tibble::tribble(
~id , ~label,
"1" , "1",
"2" , "2",
"3" , "3",
"4" , "4"
)

# well-behaved
"1, 1 | 2, 2 | 3, 3 | 4, 4" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")

# missing leading space
"1, 1 | 2,2 | 3, 3 | 4, 4" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "missing leading space:")

# missing trailing spaces
"1, 1 | 2, 2| 3, 3| 4, 4" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no leading spaces:")

# extra lines
"|1, 1 | 2, 2 | 3, 3 || 4, 4| |" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")
})

test_that("checkbox choices with special characters", {
choices_1 <- "1, Hospital A | 2, Hospitäl B | 3, Hospital Ç | 4, Hospítal D"
ds_boxes <- checkbox_choices(select_choices=choices_1)

ds_expected <- structure(
list(
id = c("1", "2", "3", "4"),
label = c("Hospital A", "Hospitäl B", "Hospital Ç", "Hospítal D")
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L)
)

expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct")
expect_s3_class(ds_boxes, "tbl")
ds_expected <- # datapasta::tribble_paste(ds_expected)
tibble::tribble(
~id, ~label,
"1", "Hospital A",
"2", "Hospitäl B",
"3", "Hospital Ç",
"4", "Hospítal D"
)

"1, Hospital A | 2, Hospitäl B | 3, Hospital Ç | 4, Hospítal D" |>
checkbox_choices() |>
expect_equal(ds_expected)
})

###############################################################################
Expand All @@ -72,18 +174,17 @@ test_that("checkbox choices with special characters", {
# REDCap versions
###############################################################################
test_that("checkbox choices with errant space", {
choices_1 <- "1, Depressive mood disorder | 2, Adjustment disorder| 3, Personality disorder | 4, Anxiety | 0, Not Noted"
ds_boxes <- checkbox_choices(select_choices=choices_1)

ds_expected <- structure(
list(
id = c("1", "2", "3", "4", "0"),
label = c("Depressive mood disorder", "Adjustment disorder", "Personality disorder", "Anxiety", "Not Noted")
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -5L)
)

expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct")
expect_s3_class(ds_boxes, "tbl")
ds_expected <-
tibble::tribble(
~id, ~label,
"1", "Depressive mood disorder",
"2", "Adjustment disorder",
"3", "Personality disorder",
"4", "Anxiety",
"0", "Not Noted"
)

"1, Depressive mood disorder | 2, Adjustment disorder| 3, Personality disorder | 4, Anxiety | 0, Not Noted" |>
checkbox_choices() |> # datapasta::tribble_paste()
expect_equal(ds_expected)
})