Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pagination #54

Merged
merged 3 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@ Depends:
Imports:
cli,
desc,
dplyr,
fs,
glue,
httptest2,
lubridate,
nectar,
purrr,
rapid (>= 0.0.0.9003),
Expand Down
4 changes: 0 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ importFrom(S7,class_list)
importFrom(cli,cli_abort)
importFrom(cli,cli_warn)
importFrom(desc,desc)
importFrom(dplyr,coalesce)
importFrom(dplyr,filter)
importFrom(fs,file_delete)
importFrom(fs,file_exists)
importFrom(fs,is_dir)
Expand All @@ -22,8 +20,6 @@ importFrom(fs,path_rel)
importFrom(glue,glue)
importFrom(glue,glue_collapse)
importFrom(httptest2,use_httptest2)
importFrom(lubridate,now)
importFrom(lubridate,parse_date_time)
importFrom(nectar,stabilize_string)
importFrom(purrr,discard)
importFrom(purrr,imap)
Expand Down
4 changes: 0 additions & 4 deletions R/beekeeper-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
#' @importFrom cli cli_abort
#' @importFrom cli cli_warn
#' @importFrom desc desc
#' @importFrom dplyr coalesce
#' @importFrom dplyr filter
#' @importFrom fs file_delete
#' @importFrom fs file_exists
#' @importFrom fs is_dir
Expand All @@ -17,8 +15,6 @@
#' @importFrom glue glue
#' @importFrom glue glue_collapse
#' @importFrom httptest2 use_httptest2
#' @importFrom lubridate now
#' @importFrom lubridate parse_date_time
#' @importFrom nectar stabilize_string
#' @importFrom purrr discard
#' @importFrom purrr imap
Expand Down
13 changes: 6 additions & 7 deletions R/generate_pkg-paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@ S7::method(as_bk_data, class_paths) <- function(x) {
}

.paths_to_tags_df <- function(x) {
x <- unnest(x, "operations")
x <- x[!x$deprecated, ]
nest(
filter(
unnest(x, "operations"),
!.data$deprecated
),
x,
.by = "tags", .key = "endpoints"
)
}
Expand Down Expand Up @@ -75,12 +74,12 @@ S7::method(as_bk_data, class_paths) <- function(x) {
### fill data ------------------------------------------------------------------

.paths_fill_operation_id <- function(operation_id, endpoint, method) {
coalesce(.to_snake(operation_id), glue("{method}_{.to_snake(endpoint)}"))
.coalesce(.to_snake(operation_id), glue("{method}_{.to_snake(endpoint)}"))
}

.paths_fill_summary <- function(summary, endpoint, method) {
endpoint_spaced <- str_replace_all(.to_snake(endpoint), "_", " ")
coalesce(
.coalesce(
str_squish(summary),
str_to_sentence(glue("{method} {endpoint_spaced}"))
)
Expand Down Expand Up @@ -113,7 +112,7 @@ S7::method(as_bk_data, class_paths) <- function(x) {
.prepare_paths_df <- function(params_df) {
params_df <- .flatten_df(params_df)
if (nrow(params_df)) {
params_df <- filter(params_df, !.data$deprecated)
params_df <- params_df[!params_df$deprecated, ]
params_df$description <- .paths_fill_descriptions(params_df$description)
}
return(params_df)
Expand Down
5 changes: 3 additions & 2 deletions R/generate_pkg-prepare.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@
config$api_abbr <- stabilize_string(config$api_abbr)
config$api_version <- stabilize_string(config$api_version)
config$rapid_file <- stabilize_string(config$rapid_file)
config$updated_on <- parse_date_time(
config$updated_on <- strptime(
config$updated_on,
orders = c("ymd HMS", "ymd H", "ymd")
format = "%Y-%m-%d %H:%M:%S",
tz = "UTC"
)
return(config)
}
Expand Down
3 changes: 2 additions & 1 deletion R/use_beekeeper.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@ use_beekeeper <- function(x,

.write_config <- function(x, api_abbr, rapid_file, config_file) {
config_file <- stabilize_string(config_file)
update_time <- strptime(Sys.time(), format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
write_yaml(
list(
api_title = x@info@title,
api_abbr = stabilize_string(api_abbr),
api_version = x@info@version,
rapid_file = path_rel(rapid_file, path_dir(config_file)),
updated_on = as.character(now(tzone = "UTC"))
updated_on = as.character(update_time)
),
file = config_file
)
Expand Down
4 changes: 4 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
}
}

.coalesce <- function(x, y) {
ifelse(is.na(x), y, x)
}

.collapse_comma <- function(x) {
glue_collapse(x, sep = ", ")
}
Expand Down
15 changes: 8 additions & 7 deletions tests/testthat/_fixtures/000-create_fixtures.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ fec_rapid |>
rapid_file = rapid_write_path
)
fec_rapid@paths <- rapid::as_paths({
fec_rapid@paths |>
x <- fec_rapid@paths |>
tibble::as_tibble() |>
tidyr::hoist(operations, tags = "tags", .remove = FALSE) |>
dplyr::filter(tags %in% c("audit", "debts", "legal")) |>
dplyr::select(-tags)
tidyr::hoist(operations, tags = "tags", .remove = FALSE)
x <- x[x$tags %in% c("audit", "debts", "legal"), ]
x$tags <- NULL
x
})
rapid_write_path <- test_path(glue::glue("_fixtures/{api_abbr}_subset_rapid.rds"))
config_path <- test_path(glue::glue("_fixtures/{api_abbr}_subset_beekeeper.yml"))
Expand All @@ -56,10 +57,10 @@ trello_rapid <- apid_url |>
url() |>
rapid::as_rapid()
trello_rapid@paths <- rapid::as_paths({
trello_rapid@paths |>
x <- trello_rapid@paths |>
tibble::as_tibble() |>
tidyr::unnest(operations) |>
dplyr::filter(tags == "board") |>
tidyr::unnest(operations)
x[x$tags == "board", ] |>
head(1) |>
tidyr::nest(.by = "endpoint", .key = "operations")
})
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/_fixtures/fec_beekeeper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ api_title: OpenFEC
api_abbr: fec
api_version: '1.0'
rapid_file: fec_rapid.rds
updated_on: 2024-03-27 19:14:26.022082
updated_on: 2024-03-27 19:14:26
2 changes: 1 addition & 1 deletion tests/testthat/_fixtures/fec_subset_beekeeper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ api_title: OpenFEC
api_abbr: fec
api_version: '1.0'
rapid_file: fec_subset_rapid.rds
updated_on: 2024-03-29 19:53:51.997502
updated_on: 2024-03-29 19:53:51
2 changes: 1 addition & 1 deletion tests/testthat/_fixtures/guru_beekeeper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ api_title: APIs.guru
api_abbr: guru
api_version: 2.2.0
rapid_file: guru_rapid.rds
updated_on: 2024-03-27 19:14:00.938212
updated_on: 2024-03-27 19:14:00
2 changes: 1 addition & 1 deletion tests/testthat/_fixtures/trello_beekeeper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ api_title: Trello
api_abbr: trello
api_version: '1.0'
rapid_file: trello_rapid.rds
updated_on: 2024-03-29 21:06:50.517151
updated_on: 2024-03-29 21:06:50
127 changes: 127 additions & 0 deletions vignettes/pagination.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
---
title: "Pagination"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Pagination}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = FALSE
)
```

Many APIs implement some form of pagination: they break up large datasets into "pages" of results, and return a single page at a time.
To get the full dataset, we need to make multiple requests, and combine the results.

Unfortunately, there isn't a standard way to document API pagination.
Therefore, we cannot automatically generate pagination code.
You will need to edit your `010-call.R` file to implement pagination.

## Finding pagination information

Before you can implement pagination in your package, you will need to find out how the API implements pagination.
You can usually find this information in the API documentation.
Sometimes this information is in a separate "Pagination" section at the top of the documentation.
Often it is described in the individual endpoint documentation (even if it is separately described in its own section).
If it isn't clearly described, watch for pagination-related endpoint parameters, such as `page`, `pageSize`, `perPage` `limit`, `offset`, or `cursor`.

For more tips on finding pagination information, see [How can I get a lot of data from an API?](https://r4ds.github.io/bookclub-wapir/slides/httr2/httr2-pagination.html) in [Web APIs with R](https://r4ds.github.io/bookclub-wapir/).

## Implementing pagination

The [req_perform_iterative() function from {httr2}](https://httr2.r-lib.org/reference/req_perform_iterative.html) helps to implement pagination.
It uses the request and some helper functions to create a new request to fetch the next page.
This family of functions is experimental, so be sure to check the latest documentation in case the functions have changed.

To implement pagination in your package, you will need to edit the `010-call.R` file generated by {beekeeper}.
By default, the perform step is handled by `nectar::req_perform_opinionated()`

```{r default-perform}
resp <- nectar::req_perform_opinionated(req)
```

This function calls `httr2::req_perform()` if you only give it a `req` object, or `httr2::req_perform_iterative()` if you supply an iteration helper function in the `next_req` parameter.
For example, if every endpoint of your API uses a `page` parameter to paginate, you could replace the line above with something like this:

```{r pagination}
is_complete <- function(resp) {
as.logical(length(httr2::resp_body_json(resp)$data))
}
resp <- nectar::req_perform_opinionated(
req,
next_req = httr2::iterate_with_offset("page", resp_complete = is_complete)
)
```

By default, `nectar::req_perform_opinionated()` only returns 2 responses (`max_reqs = 2`).
Once you have verified that your pagination strategy works, you will likely want to increase this limit, usually to `Inf`.
`nectar::req_perform_opinionated()` also implements a basic `httr2::req_retry()` to try each request up to 3 times, using the default `httr2::retry_retry()` settings to decide if a failure is transient.

## More complicated pagination

If you would like to implement more complex pagination, or apply other transformations to the `req` object such as `httr2::req_retry()` or `httr2::req_throttle()`, you can create your own `perform` function.
I name these functions `{api_abbr}_req_perform()`.
For example, this is the `perform` function for the {fecapi} package:

```{r fecapi-perform}
.fec_req_perform <- function(req,
pagination,
per_page,
max_results,
max_reqs,
call) {
next_req <- .choose_pagination_fn(pagination, call = call)
max_reqs <- min(max_reqs, ceiling(max_results / per_page))
nectar::req_perform_opinionated(
req,
next_req = next_req,
max_reqs = max_reqs
)
}

.choose_pagination_fn <- function(pagination, call = rlang::caller_env()) {
pagination <- .validate_pagination(pagination, call)
switch(pagination,
basic = .iterator_fn_basic(),
none = NULL
)
}

.validate_pagination <- function(pagination, call = rlang::caller_env()) {
rlang::arg_match0(
pagination,
c("none", "basic"),
error_call = call
)
}

.iterator_fn_basic <- function() {
httr2::iterate_with_offset(
"page",
resp_pages = function(resp) {
httr2::resp_body_json(resp)$pagination$pages
}
)
}
```

Within `010-call.R`, I apply the function like this:

```{r fecapi-call-with-pagination}
resp <- .fec_req_perform(
req,
pagination = pagination,
per_page = query$per_page,
max_results = max_results,
max_reqs = max_reqs
)
```

## Help us improve

If you find a pattern in pagination implementation from the API description and/or endpoint function parameters, please [submit an issue or a pull request](https://github.com/jonthegeek/beekeeper/issues) to help us improve the output of this package.
Loading