Skip to content

Commit b2ceed4

Browse files
committed
Closes #53
1 parent de17abf commit b2ceed4

File tree

2 files changed

+119
-23
lines changed

2 files changed

+119
-23
lines changed

R/fst-utils.R

+32-12
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@
22
#' @name fst-utils
33
#' @noRd
44
#'
5-
#' @param cat_no,path As in \code{\link{read_abs}}.
5+
#' @param cat_no,tpath As in \code{\link{read_abs}}.
6+
#' @param table A length-one vector.
7+
#' Either "all" or an integer vector specifying the table within
8+
#' `cat_no`. If "all" or `integer(0)`, the filename just reflects the `cat_no`.
9+
#' Otherwise, the filename will be specific for the `table`. Note that `read_abs`
10+
#' accepts `length(tables) > 1` but `catno2fst` does not (since it would mean
11+
#' every combination of `tables` would be cached).
612
#'
713
#' @return For `catno2fst` the path to the `fst` file to be saved or read, given
8-
#' `cat_no` and `path`.
14+
#' `cat_no`, `table`, and `path`.
915
#'
1016
#' `fst_available` returns `TRUE` if and only if an appropriate `fst` file is
1117
#' available.
@@ -16,18 +22,27 @@
1622

1723

1824
catno2fst <- function(cat_no,
25+
table = integer(0L),
1926
path = Sys.getenv("R_READABS_PATH", unset = tempdir())) {
20-
hutils::provide.file(file.path(path,
21-
"fst",
22-
paste0(gsub(".", "-", cat_no, fixed = TRUE),
23-
".fst")),
24-
on_failure = stop("`path = ", normalizePath(path,
25-
winslash = "/"),
27+
if (length(table) > 1L) {
28+
stop("Internal error (catno2fst): length(table) > 1 at this time. Please report.")
29+
}
30+
basename.fst <- gsub(".", "-", cat_no, fixed = TRUE)
31+
if (length(table) == 0L || identical(table, "all")) {
32+
basename.fst <- paste0(basename.fst, ".fst")
33+
} else {
34+
basename.fst <- paste0(basename.fst, sprintf("T%02d", table), ".fst")
35+
}
36+
fullname.fst <- file.path(path, "fst", basename.fst)
37+
hutils::provide.file(fullname.fst,
38+
on_failure = stop("`path = ",
39+
normalizePath(path, winslash = "/"),
2640
"`, ",
2741
"but it was not possible to write to this directory."))
2842
}
2943

3044
fst_available <- function(cat_no,
45+
table = integer(0L),
3146
path = Sys.getenv("R_READABS_PATH",
3247
unset = tempdir())) {
3348
if (!requireNamespace("fst", quietly = TRUE) ||
@@ -42,16 +57,21 @@ fst_available <- function(cat_no,
4257
return(FALSE)
4358
}
4459

45-
file.fst <- catno2fst(cat_no, path)
60+
file.fst <- catno2fst(cat_no, table = table, path)
4661

4762
if (!file.exists(file.fst)) {
4863
return(FALSE) # nocov
4964
}
65+
# Is the file clearly not an fst file
66+
# (where "clearly not an fst file" means "empty" or "a directory")?
67+
file_info <- file.info(file.fst, extra_cols = FALSE)
68+
if (!file_info[["size"]] || file_info[["isdir"]]) {
69+
return(FALSE)
70+
}
5071

51-
# fst may be damaged. If it appears to be (i.e. fst metadata returns an error)
72+
# fst may be damaged/not a real fst file.
73+
# If it appears to be (i.e. fst metadata returns an error)
5274
# return FALSE
53-
54-
5575
out <- tryCatch(inherits(fst::fst.metadata(file.fst), "fstmetadata"),
5676
error = function(e) FALSE,
5777
warning = function(e) FALSE)

R/read_abs.R

+87-11
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,11 @@ read_abs <- function(cat_no = NULL,
8181
retain_files = TRUE,
8282
check_local = TRUE) {
8383

84-
if (isTRUE(check_local) &&
84+
# Anything other than TRUE is equivalent to FALSE
85+
check_local <- isTRUE(check_local)
86+
if (check_local &&
87+
identical(tables, "all") &&
8588
fst_available(cat_no = cat_no, path = path)) {
86-
if (!identical(tables, "all")) {
87-
warning("`tables` was provided",
88-
"yet `check_local = TRUE` and fst files are available ",
89-
"so `tables` will be ignored.")
90-
}
9189
out <- fst::read_fst(path = catno2fst(cat_no = cat_no, path = path))
9290
out <- tibble::as_tibble(out)
9391
if (is.null(series_id)) {
@@ -133,10 +131,88 @@ read_abs <- function(cat_no = NULL,
133131
tables <- "all"
134132
}
135133

136-
if (!is.logical(metadata)) {
134+
if (!is.atomic(tables)) {
135+
stop("`tables` was not atomic.")
136+
}
137+
if (anyNA(tables)) {
138+
warning("`tables` contains missing values, these will be removed.")
139+
tables <- tables[!is.na(tables)]
140+
}
141+
if (!is.integer(tables) && length(tables) != 0L) {
142+
if (is.character(tables)) {
143+
if (length(tables) != 1L) {
144+
stop("`tables` was character, but had length ", length(tables), ". ",
145+
'The only valid character value for `tables` is "all".')
146+
}
147+
if (tables != "all") {
148+
stop("`tables = ", tables, "`.",
149+
'The only valid character value for `tables` is "all".')
150+
}
151+
} else {
152+
# Edge case: if user supplies a very large number,
153+
# any(tables != as.integer(tables))
154+
# below will return a cryptic error message (possibly during recursion).
155+
# Unlikely to happen on purpose.
156+
if (min(tables) < 0 || max(tables) > .Machine$integer.max) {
157+
stop("`tables` was a numeric vector but had values outside [0, .Machine$integer.max]. ",
158+
"These are unlikely values for table numbers and are ")
159+
}
160+
if (!is.numeric(tables) || any(tables != as.integer(tables))) {
161+
stop("`tables` was not an integer(ish) vector of table numbers.")
162+
}
163+
tables <- as.integer(tables)
164+
}
165+
}
166+
167+
if (!is.logical(metadata) || length(metadata) != 1L || is.na(metadata)) {
137168
stop("`metadata` argument must be either TRUE or FALSE")
138169
}
139170

171+
if (check_local) {
172+
# In the case of table = "all" we simply get the fst file for
173+
# the whole cat_no. Equally simple is the case of a single
174+
# table. Both are handled by length(tables <= 1L)
175+
176+
# If len > 1 integer vector is supplied to tables, we recurse
177+
# for each element of tables, checking the table's fst file availability
178+
# independently of the other elements. Either we use the fst
179+
# file or we download that single table. Each operation of lapply
180+
# will produce a tibble.
181+
if (length(tables) <= 1L) {
182+
if (fst_available(cat_no = cat_no, table = tables, path = path)) {
183+
file.fst <- catno2fst(cat_no = cat_no, table = tables, path = path)
184+
out <- fst::read_fst(file.fst)
185+
out <- tibble::as_tibble(out)
186+
if (is.null(series_id)) {
187+
return(out)
188+
}
189+
if (series_id %in% out[["series_id"]]) {
190+
users_series_id <- series_id
191+
out <- dplyr::filter(out, series_id %in% users_series_id)
192+
} else {
193+
warning("`series_id` was provided,",
194+
"but was not present in the local table and will be ignored.")
195+
}
196+
return(out)
197+
} else {
198+
# continue as if check_local = FALSE
199+
}
200+
} else {
201+
# Recursion
202+
out <-
203+
lapply(tables, function(ta) {
204+
read_abs(cat_no = cat_no,
205+
tables = ta,
206+
series_id = series_id,
207+
path = path,
208+
metadata = metadata,
209+
show_progress_bars = show_progress_bars,
210+
retain_files = retain_files)
211+
})
212+
return(dplyr::bind_rows(out))
213+
}
214+
}
215+
140216
# satisfy CRAN
141217
ProductReleaseDate = SeriesID = NULL
142218

@@ -252,13 +328,13 @@ read_abs <- function(cat_no = NULL,
252328
}
253329

254330
# if fst is available, and what has been requested is the full data,
255-
# write the result to the <path>/fst/ file
331+
# or a single table, retain the fst file.
256332
if (retain_files &&
257-
is.null(series_id) &&
258-
identical(tables, "all") &&
259-
requireNamespace("fst", quietly = TRUE)) {
333+
requireNamespace("fst", quietly = TRUE) &&
334+
length(tables) <= 1L) {
260335
fst::write_fst(sheet,
261336
catno2fst(cat_no = cat_no,
337+
table = tables,
262338
path = path))
263339
}
264340

0 commit comments

Comments
 (0)