Skip to content

Commit

Permalink
Merge pull request #123 from molgenis/feat/load-table-linkfile
Browse files Browse the repository at this point in the history
feat: load table linkfile
  • Loading branch information
marikaris authored May 16, 2024
2 parents a349308 + 116dddf commit 7b69d28
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 3 deletions.
24 changes: 22 additions & 2 deletions R/object.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@
#'
#' @importFrom httr GET
#' @noRd
.load_object <- function(project, folder, name, load_function, extension) {
.load_object <- function(project, folder, name, load_function, extension, load_arg) {
file <- tempfile()
on.exit(unlink(file))

Expand All @@ -211,7 +211,11 @@

writeBin(content(response, "raw"), file)

load_function(file)
if (missing(load_arg)) {
load_function(file)
} else {
load_function(file, load_arg)
}
}

#' Get storage API object URI.
Expand All @@ -233,3 +237,19 @@
utils::URLencode(full_name, reserved = TRUE)
)
}

#' Helperfunction that checks if a file exists in armadillo
#'
#' @param project project name
#' @param object_name folder/name of file
#' @param extension the extension of the file
#'
#' @return TRUE if the file exists, FALSE if it doesnt
#'
#' @noRd
.object_exists <- function(project, object_name, extension) {
response <- httr::HEAD(paste0(.get_url(),
.to_object_uri(project, object_name, extension)),
config = c(httr::add_headers(.get_auth_header())))
response$status_code == 204
}
38 changes: 37 additions & 1 deletion R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,17 @@ armadillo.copy_table <- # nolint
#'
#' @export
armadillo.load_table <- function(project, folder, name) { # nolint
.load_object(project, folder, name, .load_table, ".parquet")
object_name <- paste0(folder, "/", name)
if(.object_exists(project, object_name, ".parquet")){
.load_object(project, folder, name, .load_table, ".parquet")
} else if(.object_exists(project, object_name, ".alf")) {
info <- .get_linkfile_content(project, object_name)
variables <- unlist(info$variables)
source <- strsplit(info$sourceLink,"/", fixed=T)
.load_object(source[[1]][1], source[[1]][2], source[[1]][3], .load_linked_table, ".parquet", variables)
} else {
stop(paste0("Table ", project, "/", object_name, " does not exist."))
}
}

#' Helper function to extract a parquet file
Expand All @@ -145,6 +155,32 @@ armadillo.load_table <- function(project, folder, name) { # nolint
as.data.frame(arrow::read_parquet(file, as_data_frame = FALSE))
}

#' Helper function to extract the source parquet file in a linkfile
#'
#' @param file source table parquet file
#' @param columns character list of columns to select from source file
#'
#' @return the contents of the file, as data frame
#'
.load_linked_table <- function(file, columns) {
as.data.frame(arrow::read_parquet(file, as_data_frame = FALSE, col_select = columns))
}

#' Helper function to get the contents of a linkfile
#'
#' @param project projectname where the linkfile is stored
#' @param object_name folder/name of linkfile
#'
#' @return the contents of the linkfile
#'
.get_linkfile_content <- function(project, object_name) {
response <- httr::GET(paste0(.get_url(),
.to_object_uri(project, object_name, ".alf"), "/info"),
config = c(httr::add_headers(.get_auth_header())))
.handle_request_error(response)
httr::content(response, as = "parsed")
}

#' Move the table
#'
#' @param project a study or collection of variables
Expand Down
36 changes: 36 additions & 0 deletions tests/testthat/test-object.R
Original file line number Diff line number Diff line change
Expand Up @@ -402,3 +402,39 @@ test_that(".move_object moves object", {

stub_registry_clear()
})

test_that(".object_exists returns true if status is 204", {
stub_request('head', uri = 'https://test.nl//storage/projects/project/objects/core%2Fnonrep.parquet') %>%
wi_th(
headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token')
) %>%
to_return(status = 204)

expect_true(
.object_exists(
project = "project",
object_name = "core/nonrep",
extension = ".parquet"
)
)

stub_registry_clear()
})

test_that(".object_exists returns true if status is 204", {
stub_request('head', uri = 'https://test.nl//storage/projects/project/objects/core%2Fnonrep.parquet') %>%
wi_th(
headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token')
) %>%
to_return(status = 404)

expect_false(
.object_exists(
project = "project",
object_name = "core/nonrep",
extension = ".parquet"
)
)

stub_registry_clear()
})
55 changes: 55 additions & 0 deletions tests/testthat/test-table.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ test_that("armadillo.move_table calls .move_object", {

test_that("armadillo.load_table calls .load_object", {
load_object <- mock()

stub_request('head', uri = 'https://test.nl//storage/projects/project/objects/folder%2Fname.parquet') %>%
wi_th(
headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token')
) %>%
to_return(status = 204)

with_mock(
armadillo.load_table(
Expand All @@ -121,3 +127,52 @@ test_that("armadillo.load_table calls .load_object", {
extension = ".parquet"
)
})

test_that("armadillo.load_table calls .load_object with linktable loadfunction", {
load_object <- mock()

stub_request('head', uri = 'https://test.nl//storage/projects/project1/objects/folder%2Fname.parquet') %>%
wi_th(
headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token')
) %>%
to_return(status = 404)

stub_request('head', uri = 'https://test.nl//storage/projects/project1/objects/folder%2Fname.alf') %>%
wi_th(
headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token')
) %>%
to_return(status = 204)

stub_request('get', uri = 'https://test.nl//storage/projects/project1/objects/folder%2Fname.alf/info') %>%
wi_th(
headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token')
) %>%
to_return(status = 200, headers = list('Content-Type' = 'application/json; charset=utf-8'),
body = '{
"name": "folder/name.alf",
"size": "955 bytes",
"rows": "3000",
"columns": "6",
"sourceLink": "project/folder/name",
"variables": ["coh_country", "recruit_age","cob_m", "ethn1_m","ethn2_m","ethn3_m"]
}'
)

with_mock(
armadillo.load_table(
"project1",
"folder",
"name"
),
"MolgenisArmadillo:::.load_object" = load_object
)

expect_args(load_object, 1,
project = "project",
folder = "folder",
name = "name",
load_function = .load_linked_table,
extension = ".parquet",
load_arg = c("coh_country", "recruit_age","cob_m", "ethn1_m","ethn2_m","ethn3_m")
)
})

0 comments on commit 7b69d28

Please sign in to comment.