Skip to content

Commit 6bbf4d6

Browse files
committed
added 'tables' argument to get_abs(), with ability to specify particular table(s) to download
1 parent 2dca695 commit 6bbf4d6

File tree

5 files changed

+42
-10
lines changed

5 files changed

+42
-10
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Authors@R: c(
99
Maintainer: Matt Cowgill <[email protected]>
1010
Description: Downloads, imports, and tidies time series data from the
1111
Australian Bureau of Statistics <https://www.abs.gov.au/>.
12-
Date: 2018-05-30
12+
Date: 2019-01-14
1313
License: MIT + file LICENSE
1414
Encoding: UTF-8
1515
LazyData: true

LICENSE

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
YEAR:2018
2-
COPYRIGHT HOLDER: Zoe Meers
2+
COPYRIGHT HOLDER: Zoe Meers and Matt Cowgill

R/get_abs_xml_metadata.R

+10-3
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,21 @@
55
# given a catalogue number, download the catalogue metadata via XML, then find
66
# unique filenames in the latest release and return those
77

8-
get_abs_xml_metadata <- function(cat_no) {
8+
get_abs_xml_metadata <- function(cat_no, table) {
99

1010
ProductReleaseDate=TableOrder=text=NULL
1111

12+
if(table == "all"){
13+
tables_url <- ""
14+
} else {
15+
tables_url <- paste0("&ttitle=", table)
16+
}
17+
1218
# Download the first page of metadata for cat_no
1319
first_url <- paste0("http://ausstats.abs.gov.au/servlet/TSSearchServlet?catno=",
1420
cat_no,
15-
"&pg=1")
21+
"&pg=1",
22+
tables_url)
1623

1724
first_page <- XML::xmlParse(file = first_url)
1825

@@ -47,7 +54,7 @@ get_abs_xml_metadata <- function(cat_no) {
4754
xml_dfs <- list()
4855
while(current == TRUE){
4956

50-
xml_df <- get_xml_df(cat_no = cat_no, metadata_page = all_pages[i])
57+
xml_df <- get_xml_df(cat_no = cat_no, table = table, metadata_page = all_pages[i])
5158

5259
xml_dfs[[i]] <- xml_df
5360

R/get_xml_df.R

+18-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,30 @@
22
#' @importFrom XML xmlParse xmlToDataFrame
33
#' @importFrom dplyr filter select "%>%"
44

5-
get_xml_df <- function(cat_no, metadata_page){
5+
get_xml_df <- function(cat_no, table, metadata_page){
66

77
text=NULL
88

9+
if(table == "all"){
10+
tables_url <- ""
11+
} else {
12+
tables_url <- paste0("&ttitle=", table)
13+
}
14+
915
base_url <- "http://ausstats.abs.gov.au/servlet/TSSearchServlet?catno="
1016

11-
url <- paste0(base_url, cat_no, "&pg=", metadata_page)
17+
url <- paste0(base_url, cat_no, "&pg=", metadata_page, tables_url)
18+
19+
safe_parse <- purrr::safely(XML::xmlParse)
1220

13-
xml_page <- XML::xmlParse(file = url)
21+
xml_page <- safe_parse(file = url)
22+
23+
if(is.null(xml_page$error)){
24+
xml_page <- xml_page$result
25+
} else {
26+
stop(paste0("Error: the following URL does not contain valid ABS metadata:\n",
27+
url))
28+
}
1429

1530
xml_df <- XML::xmlToDataFrame(xml_page, stringsAsFactors = FALSE)
1631

R/read_abs.R

+12-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
#' @param cat_no ABS catalogue number, as a string, including the extension.
99
#' For example, "6202.0".
1010
#'
11+
#' @param tables Time series tables in `cat_no`` to download and extract. Default is "all",
12+
#' which will read all time series in `cat_no`. Specify `tables` to
13+
#' download and import specific tables(s) - eg. `tables = 1` or `tables = c(1, 5)`.
14+
#'
1115
#' @param path Local directory in which to save downloaded ABS time series
1216
#' spreadsheets. Default is "data/ABS"; this subdirectory of your working
1317
#' directory will be created if it does not exist.
@@ -24,11 +28,12 @@
2428
#'
2529
#' \donttest{wpi <- read_abs("6345.0")}
2630
#'
27-
#' @importFrom purrr walk walk2 map
31+
#' @importFrom purrr walk walk2 map map2_dfr
2832
#' @name read_abs
2933
#' @export
3034

3135
read_abs <- function(cat_no = NULL,
36+
tables = "all",
3237
path = "data/ABS",
3338
show_progress_bars = TRUE){
3439

@@ -44,9 +49,14 @@ read_abs <- function(cat_no = NULL,
4449
stop("Please ensure you include the cat_no extension, eg. '6202.0', not '6202'")
4550
}
4651

52+
if(is.null(tables)){
53+
message(paste0("`tables` not specified; attempting to fetch all tables from ", cat_no))
54+
}
55+
4756
# find URLs from cat_no
4857
message(paste0("Finding filenames for tables from ABS catalogue ", cat_no))
49-
xml_dfs <- get_abs_xml_metadata(cat_no = cat_no)
58+
xml_dfs <- purrr::map2_dfr(cat_no, tables,
59+
.f = get_abs_xml_metadata)
5060

5161
urls <- unique(xml_dfs$TableURL)
5262
urls <- gsub(".test", "", urls)

0 commit comments

Comments
 (0)