Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fetch records in batches #12

Merged
merged 2 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: redcap
Title: R Utilities For REDCap
Version: 0.1.0.9000
Version: 0.2.0
Authors@R: c(
person(
"Patrick", "Barks",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# redcap 0.2.0

* records are fetched in batches by default with `fetch_records()`

# redcap 0.1.0.9000

* `project_logging()` now returns column `timestamp` as class POSIXct by
Expand Down
84 changes: 49 additions & 35 deletions R/fetch_database.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#' @param names_fn Function for creating custom list element names given a
#' vector of form names. Defaults to an identity function in which case
#' element names will correspond exactly to form names.
#' @param form_delay Delay in seconds between fetching successive forms, to
#' give the REDCap server time to respond to other requests. Defaults to
#' `0.5`.
#' @param fns Optional list of one or more functions to apply to each list
#' element (i.e. each form). Could be used e.g. to filter out record IDs from
#' test entries, create derived variables, etc. Each function should take a
Expand Down Expand Up @@ -70,6 +73,9 @@ fetch_database <- function(conn,
fn_datetimes_args = list(orders = c("Ymd HMS", "Ymd HM")),
na = c("", "NA"),
dag = TRUE,
batch_size = 100L,
batch_delay = 0.5,
form_delay = 0.5,
double_resolve = FALSE,
double_remove = FALSE,
double_sep = "--",
Expand All @@ -94,41 +100,49 @@ fetch_database <- function(conn,
## fetch records -------------------------------------------------------------
# uses lower-level fn fetch_records_() to avoid having to repeatedly fetch the
# same metadata tables for each separate form
out <- lapply(
X = forms,
FUN = fetch_records_,
conn = conn,
events = NULL,
records = records,
records_omit = records_omit,
fields = NULL,
id_field = id_field,
rm_empty = rm_empty,
value_labs = value_labs,
value_labs_fetch_raw = value_labs_fetch_raw,
header_labs = header_labs,
checkbox_labs = checkbox_labs,
use_factors = use_factors,
times_chron = times_chron,
date_range_begin = date_range_begin,
date_range_end = date_range_end,
fn_dates = fn_dates,
fn_dates_args = fn_dates_args,
fn_datetimes = fn_datetimes,
fn_datetimes_args = fn_datetimes_args,
na = na,
dag = dag,
double_resolve = double_resolve,
double_remove = double_remove,
double_sep = double_sep,
m_dict = m_dict,
m_factors = m_factors,
m_instr = m_instr,
m_events = m_events,
m_repeat = m_repeat,
m_mapping = m_mapping,
m_dags = m_dags
)
out <- list()

for (i in seq_along(forms)) {

out[[i]] <- fetch_records_(
conn = conn,
forms = forms[i],
events = NULL,
records = records,
records_omit = records_omit,
fields = NULL,
id_field = id_field,
rm_empty = rm_empty,
value_labs = value_labs,
value_labs_fetch_raw = value_labs_fetch_raw,
header_labs = header_labs,
checkbox_labs = checkbox_labs,
use_factors = use_factors,
times_chron = times_chron,
date_range_begin = date_range_begin,
date_range_end = date_range_end,
fn_dates = fn_dates,
fn_dates_args = fn_dates_args,
fn_datetimes = fn_datetimes,
fn_datetimes_args = fn_datetimes_args,
na = na,
dag = dag,
batch_size = batch_size,
batch_delay = batch_delay,
double_resolve = double_resolve,
double_remove = double_remove,
double_sep = double_sep,
m_dict = m_dict,
m_factors = m_factors,
m_instr = m_instr,
m_events = m_events,
m_repeat = m_repeat,
m_mapping = m_mapping,
m_dags = m_dags
)

if (i < length(forms)) Sys.sleep(form_delay)
}

names(out) <- names_fn(forms)

Expand Down
81 changes: 66 additions & 15 deletions R/fetch_records.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@
#' @param dag Logical indicating whether to export the
#' `redcap_data_access_group` field (if used in the project). Defaults to
#' `TRUE`.
#' @param batch_size Number of records to fetch per batch. Defaults to `100L`.
#' Set to `Inf` or `NA` to fetch all records at once.
#' @param batch_delay Delay in seconds between fetching successive batches, to
#' give the REDCap server time to respond to other requests. Defaults to
#' `0.5`.
#' @param double_resolve Logical indicating whether to resolve double-entries
#' (i.e. records entered in duplicate using REDCap's Double Data Entry
#' module), by filtering to the lowest entry number associated with each
Expand Down Expand Up @@ -163,6 +168,8 @@ fetch_records <- function(conn,
fn_datetimes_args = list(orders = c("Ymd HMS", "Ymd HM")),
na = c("", "NA"),
dag = TRUE,
batch_size = 100L,
batch_delay = 0.5,
double_resolve = FALSE,
double_remove = FALSE,
double_sep = "--") {
Expand Down Expand Up @@ -204,6 +211,8 @@ fetch_records <- function(conn,
fn_datetimes_args = fn_datetimes_args,
na = na,
dag = dag,
batch_size = batch_size,
batch_delay = batch_delay,
double_resolve = double_resolve,
double_remove = double_remove,
double_sep = double_sep,
Expand Down Expand Up @@ -245,6 +254,8 @@ fetch_records_ <- function(conn,
fn_datetimes_args,
na,
dag,
batch_size,
batch_delay,
double_resolve,
double_remove,
double_sep,
Expand Down Expand Up @@ -290,12 +301,44 @@ fetch_records_ <- function(conn,
stop("Argument 'date_range_end' must have format YYYY-MM-DD HH:MM:SS")
}

# batch_size
if (is.na(batch_size)) {
batch_size <- Inf
}

# add ID field
name_id_field <- m_dict$field_name[1]
if (id_field & !name_id_field %in% fields) fields <- c(name_id_field, fields)

## prepare request -----------------------------------------------------------
body <- list(

## get list of record IDs to create batches ----------------------------------
body_ids <- list(
token = conn$token,
content = "record",
format = "csv",
type = "flat",
csvDelimiter = ",",
fields = name_id_field,
events = paste(events, collapse = ","),
returnFormat = "csv"
)

if (!is.null(records)) body_ids[["records"]] <- paste(records, collapse = ",")

df_ids <- post_wrapper(
conn,
body = body_ids,
content = NULL,
na = na,
on_error = "fail"
)

ids_unique <- sort(unique(df_ids[[name_id_field]]))
batch <- (seq_len(length(ids_unique)) - 1L) %/% batch_size + 1L


## fetch records in batches --------------------------------------------------
body_batch <- list(
token = conn$token,
content = "record",
format = "csv",
Expand All @@ -310,23 +353,31 @@ fetch_records_ <- function(conn,
returnFormat = "csv"
)

# add records and fields, if given
if (!is.null(records)) body[["records"]] <- paste(records, collapse = ",")
if (!is.null(fields)) body[["fields"]] <- paste(fields, collapse = ",")
# add fields, if given
if (!is.null(fields)) body_batch[["fields"]] <- paste(fields, collapse = ",")

# add date range fields, if given
if (!is.null(date_range_begin)) body[["dateRangeBegin"]] <- date_range_begin
if (!is.null(date_range_end)) body[["dateRangeEnd"]] <- date_range_end
if (!is.null(date_range_begin)) body_batch[["dateRangeBegin"]] <- date_range_begin
if (!is.null(date_range_end)) body_batch[["dateRangeEnd"]] <- date_range_end

out_batch <- list()

## fetch ---------------------------------------------------------------------
out <- post_wrapper(
conn,
body = body,
content = NULL,
na = na,
on_error = "fail"
)
for (i in unique(batch)) {

body_batch$records = paste(ids_unique[batch == i], collapse = ",")

out_batch[[i]] <- post_wrapper(
conn,
body = body_batch,
content = NULL,
na = na,
on_error = "fail"
)

if (i < max(batch)) Sys.sleep(batch_delay)
}

out <- bind_rows(out_batch)


## if no records, populate empty form ----------------------------------------
Expand Down
14 changes: 14 additions & 0 deletions man/fetch_database.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions man/fetch_records.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading