Skip to content

Commit

Permalink
Improve ONA pull function to work with multiple pages
Browse files Browse the repository at this point in the history
  • Loading branch information
truenomad committed Oct 4, 2024
1 parent a35d123 commit ea2e708
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 106 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(autoparse_dates)
export(big_mark)
export(calculate_distance)
export(check_coords)
Expand Down
157 changes: 76 additions & 81 deletions R/get_ona.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ check_status_api <- function(response) {
# Conditional loading for packages
if (!requireNamespace("httpcode", quietly = TRUE)) {
stop(
"Package 'httpcode' is required but is not installed. Please install it.",
call. = FALSE)
"Package 'httpcode' is required but is not installed. Please install it.",
call. = FALSE)
}

# get resoinse code
Expand Down Expand Up @@ -98,36 +98,25 @@ prep_ona_data_endpoints <- function(
#'
#' @param api_url The base URL of the API endpoint.
#' @param api_token Authentication token for API access, prefixed with "Token".
#' @param start An integer specifying the starting point for data retrieval.
#' Defaults to 0.
#' @param api_limit An integer specifying the maximum number of items to
#' retrieve per page.
#' @param times The number of attempts to retry the request in case of failure.
#' Defaults to 12.
#' @return A list containing the retrieved data parsed from JSON format. If
#' the specified content is not found or an error occurs, the function
#' stops and returns an error message.
get_ona_page <- function(api_url, api_token, start = 0, api_limit, times = 12) {
tryCatch({
resp <- httr::RETRY(
verb = "GET",
url = api_url,
config = httr::add_headers(Authorization = paste("Token", api_token)),
query = list(
start = start,
limit = api_limit
),
times = times,
pause_cap = 180,
httr::progress(type = "down")
)

content <- httr::content(resp, "text", encoding = "UTF-8")
jsonlite::fromJSON(content, simplifyDataFrame = TRUE)
}, error = function(e) {
message("Error encountered: ", e$message)
NULL
})
get_ona_page <- function(api_url, api_token, times = 12) {
tryCatch(
httr::RETRY(
"GET", api_url,
httr::add_headers(Authorization = paste("Token", api_token)),
times = times, pause_cap = 180, httr::progress("down")
) |>
httr::content("text", encoding = "UTF-8") |>
jsonlite::fromJSON(simplifyDataFrame = TRUE),
error = function(e) {
message("Error encountered: ", e$message)
NULL
}
)
}

#' Validate and Normalize a Base URL
Expand Down Expand Up @@ -164,23 +153,29 @@ validate_base_url <- function(base_url) {
#' # api_token <- "your_api_token_here"
#' # data <- get_paginated_data(api_url, api_token)
get_paginated_data <- function(api_url, api_token) {
api_limit = 100000

api_limit <- 100000
results <- list()
get_next_page <- TRUE
start <- 0
while (get_next_page) {
current_page <- get_ona_page(api_url, api_token, start, api_limit) |>
page_number <- 1

repeat {

paged_url <- paste0(api_url,
"?page=", page_number,
"&page_size=100000")

current_page <- get_ona_page(paged_url, api_token) |>
as.data.frame() |>
dplyr::mutate(dplyr::across(tidyselect::everything(), as.character))

results <- dplyr::bind_rows(results, current_page)
if (nrow(current_page) < api_limit) {
get_next_page <- FALSE
} else {
start <- start + api_limit
}

if (nrow(current_page) < api_limit || nrow(current_page) == 0) break

page_number <- page_number + 1
}
results

return(dplyr::distinct(results))
}

#' Generate a Random Emoji for Successful Results
Expand Down Expand Up @@ -558,7 +553,7 @@ get_updated_ona_data <- function(base_url = "https://api.whonghub.org",
file_path, data_file_name, base_url, form_ids)


# If getting multiple columns, include these in url ---------------------------
# If getting multiple columns, include these in url --------------------------

if (!is.null(selected_columns)) {
# Construct query parameters with quotes around column names
Expand Down Expand Up @@ -594,56 +589,56 @@ get_updated_ona_data <- function(base_url = "https://api.whonghub.org",
# update the existing data ---------------------------------------------------

# Combine new data with existing data
full_data <- dplyr::bind_rows(full_data_orig, new_data) |>
dplyr::arrange(
`_id`,
dplyr::desc(date_last_updated),
dplyr::desc(date_last_updated)) |>
dplyr::group_by(`_id`, form_id_num) |>
dplyr::slice(1) |>
dplyr::ungroup()
full_data <- dplyr::bind_rows(full_data_orig, new_data) |>
dplyr::arrange(
`_id`,
dplyr::desc(date_last_updated),
dplyr::desc(date_last_updated)) |>
dplyr::group_by(`_id`, form_id_num) |>
dplyr::slice(1) |>
dplyr::ungroup()

# log results ----------------------------------------------------------------

if (log_results) {

logs = NULL

for (form_id in unique(full_data$form_id_num)) {

if (nrow(full_data_orig) != 0) {
df <- full_data_orig |>
dplyr::filter(form_id_num == form_id) |>
janitor::remove_empty(which = "cols") } else {df <- data.frame()}

df_new <- new_data |>
dplyr::filter(form_id_num == form_id) |>
janitor::remove_empty(which = "cols")
logs = NULL

# Construct the log message
log_message <- data.frame(
form_id = form_id,
update_date = Sys.Date(),
total_columns = ncol(df_new),
total_rows = format(nrow(df) + nrow(df_new), big.mark = ","),
new_columns = ncol(df_new) - ncol(df),
new_rows = format(nrow(df_new), big.mark = ",")
)

logs[[form_id]] <- log_message |>
dplyr::mutate(
new_rows = ifelse(
new_rows == total_rows, " Initial Download", new_rows
),
new_columns = ifelse(
new_columns == total_columns, " Initial Download", new_columns
)
for (form_id in unique(full_data$form_id_num)) {

if (nrow(full_data_orig) != 0) {
df <- full_data_orig |>
dplyr::filter(form_id_num == form_id) |>
janitor::remove_empty(which = "cols") } else {df <- data.frame()}

df_new <- new_data |>
dplyr::filter(form_id_num == form_id) |>
janitor::remove_empty(which = "cols")

# Construct the log message
log_message <- data.frame(
form_id = form_id,
update_date = Sys.Date(),
total_columns = ncol(df_new),
total_rows = format(nrow(df) + nrow(df_new), big.mark = ","),
new_columns = ncol(df_new) - ncol(df),
new_rows = format(nrow(df_new), big.mark = ",")
)

logs[[form_id]] <- log_message |>
dplyr::mutate(
new_rows = ifelse(
new_rows == total_rows, " Initial Download", new_rows
),
new_columns = ifelse(
new_columns == total_columns, " Initial Download", new_columns
)
)

}

log_messages <- do.call(rbind, logs)

}

log_messages <- do.call(rbind, logs)

# construct file names for logging
log_file_name <- paste0(file_path, "/", "ona_data_update_log.rds")

Expand Down
4 changes: 2 additions & 2 deletions R/globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ globalVariables(
"New Emergence", "unique_emerg_grp", "ActivityDateFrom",
"ActivityStatus", "Country", "Date of Any Last SIA Campagin",
"Days Since SIA and Detection", "Days between
Previous and Most Recent Detection",
Previous and Most Recent Detection", "level3_prepped",
"Delayed Reporting Detections to Confirmation (days)", "Emergence Group",
"Emergence Group", "Last SIA Date", "Most Recent Detection",
"MostRecentVirusDate", "New VDPV Class", "New VDPV Classification",
Expand All @@ -48,6 +48,6 @@ globalVariables(
"new_epids", "ActivityVaccineType", "Days to Confirm",
"Days between Previous and Most Recent Detection",
"Delayed Reporting", "Delayed Report",
"Detections to Confirmation (days)"
"Detections to Confirmation (days)", "ADM3_NAME"
)
)
8 changes: 1 addition & 7 deletions man/get_ona_page.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 1 addition & 16 deletions tests/testthat/test-get_ona.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ testthat::test_that("get_ona_page Successfully API call processes correctly", {
suppressMessages(
result <- get_ona_page(
api_url = "https://fakerapi.it/api/v1/addresses?_quantity=10",
api_token = NULL,
start = 0, api_limit = 5
api_token = NULL
)
)

Expand Down Expand Up @@ -35,17 +34,3 @@ testthat::test_that("check_status_api when it fails", {
testthat::expect_error(check_status_api(response))
}
)

testthat::test_that("API call fails with incorrect URL", {
suppressMessages(
result <- get_ona_page(
api_url = "https://randomsitethatiswrong.com",
api_token = NULL,
start = 0,
api_limit = 1,
times = 1
)
)
testthat::expect_null(result)
})

0 comments on commit ea2e708

Please sign in to comment.