Skip to content

Commit 557c2bd

Browse files
authored
minor changes (#112)
* closes #107 * stringr dep removed (#107) * use pkgdown (closes #59) * specify user agent; closes #108 * closes #111
1 parent e345a63 commit 557c2bd

40 files changed

+193
-96
lines changed

.Rbuildignore

+1
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
^doc$
1515
^Meta$
1616
^data-raw$
17+
^pkgdown$

.github/workflows/covr.yaml

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
on:
2+
push:
3+
branches:
4+
- main
5+
- master
6+
pull_request:
7+
branches:
8+
- main
9+
- master
10+
11+
name: test-coverage
12+
13+
jobs:
14+
test-coverage:
15+
runs-on: macOS-latest
16+
env:
17+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18+
steps:
19+
- uses: actions/checkout@v2
20+
21+
- uses: r-lib/actions/setup-r@v1
22+
23+
- uses: r-lib/actions/setup-pandoc@v1
24+
25+
- name: Query dependencies
26+
run: |
27+
install.packages('remotes')
28+
saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
29+
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
30+
shell: Rscript {0}
31+
32+
- name: Cache R packages
33+
uses: actions/cache@v2
34+
with:
35+
path: ${{ env.R_LIBS_USER }}
36+
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
37+
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
38+
39+
- name: Install dependencies
40+
run: |
41+
install.packages(c("remotes"))
42+
remotes::install_deps(dependencies = TRUE)
43+
remotes::install_cran("covr")
44+
shell: Rscript {0}
45+
46+
- name: Test coverage
47+
run: covr::codecov()
48+
shell: Rscript {0}

.github/workflows/pkgdown.yaml

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
on:
2+
push:
3+
branches:
4+
- main
5+
- master
6+
7+
name: pkgdown
8+
9+
jobs:
10+
pkgdown:
11+
runs-on: macOS-latest
12+
env:
13+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
14+
steps:
15+
- uses: actions/checkout@v2
16+
17+
- uses: r-lib/actions/setup-r@v1
18+
19+
- uses: r-lib/actions/setup-pandoc@v1
20+
21+
- name: Query dependencies
22+
run: |
23+
install.packages('remotes')
24+
saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
25+
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
26+
shell: Rscript {0}
27+
28+
- name: Cache R packages
29+
uses: actions/cache@v2
30+
with:
31+
path: ${{ env.R_LIBS_USER }}
32+
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
33+
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
34+
35+
- name: Install dependencies
36+
run: |
37+
remotes::install_deps(dependencies = TRUE)
38+
install.packages("pkgdown", type = "binary")
39+
shell: Rscript {0}
40+
41+
- name: Install package
42+
run: R CMD INSTALL .
43+
44+
- name: Deploy package
45+
run: |
46+
git config --local user.email "[email protected]"
47+
git config --local user.name "GitHub Actions"
48+
Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ data
88
.DS_Store
99
doc
1010
.Rhistory (8140V-144300-W's conflicted copy 2020-09-19)
11+
docs

.travis.yml

-14
This file was deleted.

DESCRIPTION

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: readabs
22
Type: Package
33
Title: Download and Tidy Time Series Data from the Australian Bureau of Statistics
4-
Version: 0.4.6
4+
Version: 0.4.6.900
55
Authors@R: c(
66
person("Matt", "Cowgill", role = c("aut", "cre"), email = "[email protected]"),
77
person("Zoe", "Meers", role = "aut", email = "[email protected]"),
@@ -18,14 +18,12 @@ LazyData: true
1818
Depends: R (>= 3.5)
1919
Imports:
2020
readxl (>= 1.2.0),
21-
tibble (>= 1.4.99),
2221
dplyr (>= 0.8.0),
2322
hutils (>= 1.5.0),
2423
fst,
2524
curl,
2625
purrr,
2726
tidyr (>= 1.0.0),
28-
stringr,
2927
stringi,
3028
rsdmx,
3129
tools,
@@ -42,5 +40,4 @@ Suggests:
4240
knitr,
4341
rmarkdown,
4442
testthat (>= 2.1.0),
45-
RCurl,
4643
ggplot2

NAMESPACE

+1-6
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ importFrom(dplyr,pull)
2525
importFrom(dplyr,select)
2626
importFrom(dplyr,slice)
2727
importFrom(dplyr,sym)
28+
importFrom(dplyr,tibble)
2829
importFrom(glue,glue)
2930
importFrom(httr,GET)
3031
importFrom(purrr,map)
@@ -42,12 +43,6 @@ importFrom(rvest,html_attr)
4243
importFrom(rvest,html_nodes)
4344
importFrom(rvest,html_text)
4445
importFrom(stringi,stri_trim_both)
45-
importFrom(stringr,str_count)
46-
importFrom(stringr,str_extract)
47-
importFrom(stringr,str_remove)
48-
importFrom(stringr,str_replace_all)
49-
importFrom(stringr,str_trim)
50-
importFrom(tibble,tibble)
5146
importFrom(tidyr,gather)
5247
importFrom(tidyr,pivot_longer)
5348
importFrom(tidyr,separate)

NEWS.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# readabs 0.4.6.900
2+
* Package dependencies reduced
3+
14
# readabs 0.4.6
25
* New read_payrolls() convenience function added
36
* New read_awe() convenience function added

R/check_abs_connection.R

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ test_abs_robots <- function() {
2727
suppressWarnings(utils::download.file(
2828
"https://www.abs.gov.au/robots.txt",
2929
destfile = tmp,
30-
quiet = TRUE
30+
quiet = TRUE,
31+
headers = c("User-Agent" = readabs_user_agent)
3132
))
3233
file.exists(tmp)
3334
},

R/download_abs.R

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ download_abs <- function(urls,
2121
quiet = !show_progress_bars,
2222
destfile = filenames,
2323
method = "libcurl",
24-
cacheOK = FALSE
24+
cacheOK = FALSE,
25+
headers = c("User-Agent" = readabs_user_agent)
2526
)
2627
} else {
2728
purrr::walk2(
@@ -30,7 +31,8 @@ download_abs <- function(urls,
3031
.f = utils::download.file,
3132
mode = "wb",
3233
quiet = !show_progress_bars,
33-
cacheOK = FALSE
34+
cacheOK = FALSE,
35+
headers = c("User-Agent" = readabs_user_agent)
3436
)
3537
}
3638

R/download_data_cube.r

+2-4
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,8 @@
4545
#'
4646
#' @importFrom dplyr %>%
4747
#' @importFrom glue glue
48-
#' @importFrom xml2 read_html
4948
#' @importFrom dplyr filter pull slice
50-
#' @importFrom tibble tibble
5149
#' @importFrom rvest html_nodes html_attr html_text
52-
#' @importFrom stringr str_remove str_extract str_replace_all
5350
#' @importFrom httr GET
5451
#'
5552
#' @export
@@ -82,7 +79,8 @@ download_abs_data_cube <- function(catalogue_string,
8279

8380

8481
# ==================download file======================
85-
download_object <- httr::GET(file_download_url)
82+
download_object <- httr::GET(file_download_url,
83+
httr::user_agent(readabs_user_agent))
8684

8785
# save file path to disk
8886

R/extract_abs_sheets.R

-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#' `Sys.getenv("R_READABS_PATH", unset = tempdir())`.
1515
#'
1616
#' @importFrom readxl excel_sheets read_excel
17-
#' @importFrom tibble tibble
1817
#' @importFrom dplyr filter "%>%"
1918
#' @importFrom purrr map set_names
2019
#' @importFrom tools file_path_sans_ext

R/get_abs_xml_metadata.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11

2-
#' @importFrom utils download.file
32
#' @import dplyr
43

54
# given a catalogue number, download the catalogue metadata via XML, then find
@@ -28,7 +27,8 @@ get_abs_xml_metadata <- function(url, issue = "latest") {
2827
# doesn't work then we try with a leading zero ("01"). If that fails,
2928
# it's an error.
3029

31-
first_page <- xml2::read_xml(first_url, encoding = "ISO-8859-1")
30+
first_page <- xml2::read_xml(first_url, encoding = "ISO-8859-1",
31+
user_agent = readabs_user_agent)
3232
first_page_list <- xml2::as_list(first_page)
3333
first_page_list <- first_page_list[[1]]
3434
first_url_works <- ifelse(length(first_page_list) > 0,

R/get_available_files.r

+5-7
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@
1919
#' }
2020
#'
2121
#' @importFrom glue glue
22-
#' @importFrom xml2 read_html
23-
#' @importFrom dplyr %>% filter pull slice
24-
#' @importFrom tibble tibble
22+
#' @importFrom dplyr %>% filter pull slice tibble
2523
#' @importFrom rvest html_nodes html_attr html_text
26-
#' @importFrom stringr str_extract str_replace_all
2724
#' @importFrom rlang .data
2825
#'
2926
#' @export
@@ -52,7 +49,8 @@ get_available_files <- function(catalogue_string, refresh = FALSE) {
5249

5350
# Try to download the page
5451
download_page <- tryCatch(
55-
xml2::read_html(download_url),
52+
xml2::read_html(download_url,
53+
user_agent = readabs_user_agent),
5654
error = function(cond) {
5755
message(paste("URL does not seem to exist:", download_url))
5856
message("Here's the original error message:")
@@ -85,11 +83,11 @@ get_available_files <- function(catalogue_string, refresh = FALSE) {
8583
}
8684

8785

88-
available_downloads <- tibble::tibble(
86+
available_downloads <- dplyr::tibble(
8987
url = urls,
9088
label = labels
9189
) %>%
92-
mutate(file = str_extract(url, "[^/]*$")) %>%
90+
mutate(file = stringi::stri_extract_first_regex(url, "[^/]*$")) %>%
9391
select(.data$label, .data$file, .data$url)
9492

9593
return(available_downloads)

R/read_abs.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ read_abs <- function(cat_no = NULL,
9292
)
9393
}
9494
out <- fst::read_fst(path = catno2fst(cat_no = cat_no, path = path))
95-
out <- tibble::as_tibble(out)
95+
out <- dplyr::as_tibble(out)
9696
if (is.null(series_id)) {
9797
return(out)
9898
}

R/read_abs_local.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ read_abs_local <- function(cat_no = NULL,
8181
# Retrieve cache if available
8282
if (is.null(filenames) && isTRUE(use_fst) && fst_available(cat_no, path)) {
8383
out <- fst::read_fst(path = catno2fst(cat_no = cat_no, path = path))
84-
return(tibble::as_tibble(out))
84+
return(dplyr::as_tibble(out))
8585
}
8686

8787
# If catalogue number is specifid, that takes precedence

R/read_abs_sdmx.R

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#' }
2424
#'
2525
read_abs_sdmx <- function(url) {
26+
.Deprecated(msg = "read_abs_sdmx() is deprecated and will be removed in a future version.")
2627
url <- url
2728
dataset <- rsdmx::readSDMX(url)
2829
abs_data <- as.data.frame(dataset)

R/read_awe.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ tidy_awe <- function(df) {
114114

115115
df$measure <- gsub(";", "", df$measure, fixed = TRUE)
116116
df$measure <- tolower(df$measure)
117-
df$measure <- stringr::str_squish(df$measure)
117+
df$measure <- fast_str_squish(df$measure)
118118

119-
df$sex <- stringr::str_squish(df$sex)
119+
df$sex <- fast_str_squish(df$sex)
120120
df$sex <- tolower(df$sex)
121121

122122
df <- df %>%

R/scrape_abs_catalogues.r

+12-12
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,20 @@
88
#' @importFrom glue glue
99
#' @importFrom xml2 read_html
1010
#' @importFrom dplyr %>% filter pull slice
11-
#' @importFrom tibble tibble
1211
#' @importFrom rvest html_nodes html_attr html_text
13-
#' @importFrom stringr str_trim str_remove
1412
#' @importFrom purrr map_dfr
1513
#' @importFrom rlang .data
1614
#'
1715

1816
scrape_abs_catalogues <- function() {
1917

2018
# scrape the main page
21-
abs_stats_page <- xml2::read_html("https://www.abs.gov.au/statistics")
19+
abs_stats_page <- xml2::read_html("https://www.abs.gov.au/statistics",
20+
user_agent = readabs_user_agent)
2221

23-
main_page_data <- tibble::tibble(
24-
heading = abs_stats_page %>% rvest::html_nodes(".field--type-ds h3") %>% rvest::html_text() %>% stringr::str_trim(),
25-
url_suffix = abs_stats_page %>% rvest::html_nodes(".card") %>% rvest::html_attr("href") %>% stringr::str_trim()
22+
main_page_data <- dplyr::tibble(
23+
heading = abs_stats_page %>% rvest::html_nodes(".field--type-ds h3") %>% rvest::html_text() %>% stringi::stri_trim_both(),
24+
url_suffix = abs_stats_page %>% rvest::html_nodes(".card") %>% rvest::html_attr("href") %>% stringi::stri_trim_both()
2625
)
2726

2827
# scrape each page
@@ -31,15 +30,16 @@ scrape_abs_catalogues <- function() {
3130
main_page_heading <- main_page_data$heading[main_page_data$url_suffix == sub_page_url_suffix]
3231

3332

34-
sub_page <- xml2::read_html(glue::glue("https://www.abs.gov.au{sub_page_url_suffix}"))
33+
sub_page <- xml2::read_html(glue::glue("https://www.abs.gov.au{sub_page_url_suffix}"),
34+
user_agent = readabs_user_agent)
3535

36-
sub_page_data <- tibble::tibble(
36+
sub_page_data <- dplyr::tibble(
3737
heading = main_page_heading,
38-
sub_heading = sub_page %>% rvest::html_nodes(".abs-layout-title") %>% rvest::html_text() %>% str_trim(),
38+
sub_heading = sub_page %>% rvest::html_nodes(".abs-layout-title") %>% rvest::html_text() %>% stringi::stri_trim_both(),
3939
catalogue = sub_page %>% rvest::html_nodes("#content .card") %>% rvest::html_attr("href") %>%
40-
stringr::str_remove(sub_page_url_suffix) %>%
41-
stringr::str_remove("/[^/]*$") %>%
42-
stringr::str_remove("/"),
40+
stringi::stri_replace_all_fixed(sub_page_url_suffix, "") %>%
41+
stringi::stri_replace_all_regex("/[^/]*$", "") %>%
42+
stringi::stri_replace_all_fixed("/", ""),
4343
url = glue::glue("https://www.abs.gov.au{sub_page_url_suffix}/{catalogue}/latest-release")
4444
)
4545
}

0 commit comments

Comments
 (0)