From 287b6ada8aa782c9e4e4294cf0e2322ec1524673 Mon Sep 17 00:00:00 2001 From: mkslofstra Date: Tue, 7 May 2024 08:56:03 +0200 Subject: [PATCH 1/3] feat: add linkfile functionality to load_table --- R/object.R | 24 ++++++++++++++++++++++-- R/table.R | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/R/object.R b/R/object.R index a5e83379..e435b6c5 100644 --- a/R/object.R +++ b/R/object.R @@ -194,7 +194,7 @@ #' #' @importFrom httr GET #' @noRd -.load_object <- function(project, folder, name, load_function, extension) { +.load_object <- function(project, folder, name, load_function, extension, load_arg) { file <- tempfile() on.exit(unlink(file)) @@ -211,7 +211,11 @@ writeBin(content(response, "raw"), file) - load_function(file) + if (missing(load_arg)) { + load_function(file) + } else { + load_function(file, load_arg) + } } #' Get storage API object URI. @@ -233,3 +237,19 @@ utils::URLencode(full_name, reserved = TRUE) ) } + +#' Helperfunction that checks if a file exists in armadillo +#' +#' @param project project name +#' @param object_name folder/name of file +#' @param extension the extension of the file +#' +#' @return TRUE if the file exists, FALSE if it doesnt +#' +#' @noRd +.object_exists <- function(project, object_name, extension) { + response <- httr::HEAD(paste0(.get_url(), + .to_object_uri(project, object_name, extension)), + config = c(httr::add_headers(.get_auth_header()))) + response$status_code == 204 +} \ No newline at end of file diff --git a/R/table.R b/R/table.R index f48ea7f3..e20857ab 100644 --- a/R/table.R +++ b/R/table.R @@ -132,7 +132,17 @@ armadillo.copy_table <- # nolint #' #' @export armadillo.load_table <- function(project, folder, name) { # nolint - .load_object(project, folder, name, .load_table, ".parquet") + object_name <- paste0(folder, "/", name) + if(.object_exists(project, object_name, ".parquet")){ + .load_object(project, folder, name, .load_table, ".parquet") + } else if(.object_exists(project, object_name, ".alf")) { + info <- .get_linkfile_content(project, object_name) + variables <- unlist(info$variables) + source <- strsplit(info$sourceLink,"/",fixed=T) + .load_object(source[[1]][1], source[[1]][2], source[[1]][3], .load_linked_table, ".parquet", variables) + } else { + stop(paste0("Table ", project, "/", object_name, " does not exist.")) + } } #' Helper function to extract a parquet file @@ -145,6 +155,32 @@ armadillo.load_table <- function(project, folder, name) { # nolint as.data.frame(arrow::read_parquet(file, as_data_frame = FALSE)) } +#' Helper function to extract the source parquet file in a linkfile +#' +#' @param file source table parquet file +#' @param columns character list of columns to select from source file +#' +#' @return the contents of the file, as data frame +#' +.load_linked_table <- function(file, columns) { + as.data.frame(arrow::read_parquet(file, as_data_frame = FALSE, col_select = columns)) +} + +#' Helper function to get the contents of a linkfile +#' +#' @param project projectname where the linkfile is stored +#' @param objectname folder/name of linkfile +#' +#' @return the contents of the linkfile +#' +.get_linkfile_content <- function(project, object_name) { + response <- httr::GET(paste0(.get_url(), + .to_object_uri(project, object_name, ".alf"), "/info"), + config = c(httr::add_headers(.get_auth_header()))) + .handle_request_error(response) + httr::content(response, as = "parsed") +} + #' Move the table #' #' @param project a study or collection of variables From 4c40fe9cdd84173ba321842b0d9fd9ee3f1356c6 Mon Sep 17 00:00:00 2001 From: mkslofstra Date: Mon, 13 May 2024 09:26:06 +0200 Subject: [PATCH 2/3] test(load_table): write tests for linkfile functionality --- R/table.R | 2 +- tests/testthat/test-object.R | 36 +++++++++++++++++++++++ tests/testthat/test-table.R | 55 ++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/R/table.R b/R/table.R index e20857ab..588bebec 100644 --- a/R/table.R +++ b/R/table.R @@ -138,7 +138,7 @@ armadillo.load_table <- function(project, folder, name) { # nolint } else if(.object_exists(project, object_name, ".alf")) { info <- .get_linkfile_content(project, object_name) variables <- unlist(info$variables) - source <- strsplit(info$sourceLink,"/",fixed=T) + source <- strsplit(info$sourceLink,"/", fixed=T) .load_object(source[[1]][1], source[[1]][2], source[[1]][3], .load_linked_table, ".parquet", variables) } else { stop(paste0("Table ", project, "/", object_name, " does not exist.")) diff --git a/tests/testthat/test-object.R b/tests/testthat/test-object.R index c043cd27..d8429647 100644 --- a/tests/testthat/test-object.R +++ b/tests/testthat/test-object.R @@ -402,3 +402,39 @@ test_that(".move_object moves object", { stub_registry_clear() }) + +test_that(".object_exists returns true if status is 204", { + stub_request('head', uri = 'https://test.nl//storage/projects/project/objects/core%2Fnonrep.parquet') %>% + wi_th( + headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token') + ) %>% + to_return(status = 204) + + expect_true( + .object_exists( + project = "project", + object_name = "core/nonrep", + extension = ".parquet" + ) + ) + + stub_registry_clear() +}) + +test_that(".object_exists returns true if status is 204", { + stub_request('head', uri = 'https://test.nl//storage/projects/project/objects/core%2Fnonrep.parquet') %>% + wi_th( + headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token') + ) %>% + to_return(status = 404) + + expect_false( + .object_exists( + project = "project", + object_name = "core/nonrep", + extension = ".parquet" + ) + ) + + stub_registry_clear() +}) diff --git a/tests/testthat/test-table.R b/tests/testthat/test-table.R index 26733fd2..50793ad7 100644 --- a/tests/testthat/test-table.R +++ b/tests/testthat/test-table.R @@ -103,6 +103,12 @@ test_that("armadillo.move_table calls .move_object", { test_that("armadillo.load_table calls .load_object", { load_object <- mock() + + stub_request('head', uri = 'https://test.nl//storage/projects/project/objects/folder%2Fname.parquet') %>% + wi_th( + headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token') + ) %>% + to_return(status = 204) with_mock( armadillo.load_table( @@ -121,3 +127,52 @@ test_that("armadillo.load_table calls .load_object", { extension = ".parquet" ) }) + +test_that("armadillo.load_table calls .load_object with linktable loadfunction", { + load_object <- mock() + + stub_request('head', uri = 'https://test.nl//storage/projects/project1/objects/folder%2Fname.parquet') %>% + wi_th( + headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token') + ) %>% + to_return(status = 404) + + stub_request('head', uri = 'https://test.nl//storage/projects/project1/objects/folder%2Fname.alf') %>% + wi_th( + headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token') + ) %>% + to_return(status = 204) + + stub_request('get', uri = 'https://test.nl//storage/projects/project1/objects/folder%2Fname.alf/info') %>% + wi_th( + headers = list('Accept' = 'application/json, text/xml, application/xml, */*', 'Authorization' = 'Bearer token') + ) %>% + to_return(status = 200, headers = list('Content-Type' = 'application/json; charset=utf-8'), + body = '{ + "name": "folder/name.alf", + "size": "955 bytes", + "rows": "3000", + "columns": "6", + "sourceLink": "project/folder/name", + "variables": ["coh_country", "recruit_age","cob_m", "ethn1_m","ethn2_m","ethn3_m"] + }' + ) + + with_mock( + armadillo.load_table( + "project1", + "folder", + "name" + ), + "MolgenisArmadillo:::.load_object" = load_object + ) + + expect_args(load_object, 1, + project = "project", + folder = "folder", + name = "name", + load_function = .load_linked_table, + extension = ".parquet", + load_arg = c("coh_country", "recruit_age","cob_m", "ethn1_m","ethn2_m","ethn3_m") + ) +}) From 116dddf4a8333cb1fc5a8a6d2d96793fa555cb14 Mon Sep 17 00:00:00 2001 From: marikaris Date: Tue, 14 May 2024 13:08:42 +0200 Subject: [PATCH 3/3] fix typo --- R/table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/table.R b/R/table.R index 588bebec..4cf95dad 100644 --- a/R/table.R +++ b/R/table.R @@ -169,7 +169,7 @@ armadillo.load_table <- function(project, folder, name) { # nolint #' Helper function to get the contents of a linkfile #' #' @param project projectname where the linkfile is stored -#' @param objectname folder/name of linkfile +#' @param object_name folder/name of linkfile #' #' @return the contents of the linkfile #'