Skip to content

Commit

Permalink
Merge pull request #284 from dblodgett-usgs/main
Browse files Browse the repository at this point in the history
Download from cloud storage.
  • Loading branch information
dblodgett-usgs authored May 25, 2022
2 parents f6b919a + c6950c3 commit 97c852f
Show file tree
Hide file tree
Showing 12 changed files with 230 additions and 115 deletions.
6 changes: 6 additions & 0 deletions R/AAA.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ check_session <- function(x) {
stop('session is not authorized. See ?authenticate_sb', call. = FALSE)
}

session_authorized <- function(session){

return(session_validate(session) && !is.null(session))

}

session_val <- function(x) {
if (!session_validate(x)) {
stop('Session state is invalid, please re-authenticate', call. = FALSE)
Expand Down
70 changes: 70 additions & 0 deletions R/gql_helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
get_gql_header <- function() {
httr::add_headers(
.headers = c(`content-type` = "application/json",
accept = "application/json",
authorization = paste("Bearer",
get_access_token())))
}

#' @noRd
#' @param q character gql query to embed into json body
#' @param gql handle to pass to POST
#' @param json character json to pass -- shoul include gql query and additional content.
#' json is optional - it will default to just the query.
run_gql_query <- function(q, gql, json = jsonlite::toJSON(list(query = q), auto_unbox = TRUE)) {
out <- httr::POST(pkg.env$graphql_url, get_gql_header(),
body = json,
handle = gql)

if(out$status_code == 200) {
jsonlite::fromJSON(rawToChar(out$content))
} else {
stop(paste("Error making multipart session.\n code:", out$status_code,
"\n content:", rawToChar(out$content)))
}

}

#GraphQL Queries for interaction with ScienceBase Manager
create_multipart_upload_session <- function(s3_filepath, content_type, username, gql) {

run_gql_query(sprintf(
'query { createMultipartUploadSession(object: "%s" contentType: "%s" username: "%s") }',
s3_filepath, content_type, username), gql)$data$createMultipartUploadSession

}

get_presigned_url_for_chunk <- function(s3_filepath, upload_id, part_number, gql) {

run_gql_query(sprintf(
'query { getPreSignedUrlForChunk(object: "%s", upload_id: "%s", part_number: "%s") }',
s3_filepath, upload_id, part_number), gql)$data$getPreSignedUrlForChunk

}

complete_multipart_upload <- function(item_str, upload_id, etag_payload, gql) {

eta <- sapply(etag_payload, function(x) {
sprintf('{ETag: "%s", PartNumber: %i}', gsub('"', "", x$ETag), x$PartNumber)
})

eta <- paste0("[", paste(eta, collapse = ","), "]")

run_gql_query(sprintf(
'query { completeMultiPartUpload(object: "%s" upload_id: "%s" parts_eTags: %s) }',
item_str, upload_id, eta), gql)
}

get_cloud_download_url <- function(cr, gql) {

query <- "query getS3DownloadUrl($input: SaveFileInputs){ getS3DownloadUrl(input: $input){ downloadUri }}"

variables <- sprintf('{"input": {"selectedRows": {"cuid": "%s", "key": "%s", "title": "%s", "useForPreview": "%s"}}}',
cr$cuid, cr$key, cr$title, cr$useForPreview)

variables <- list(input = list(selectedRows = list(cuid = cr$cuid, key = cr$key, title = cr$title, useForPreview = cr$useForPreview)))

json <- jsonlite::toJSON(list(query = query, variables = variables), auto_unbox = TRUE)

run_gql_query(query, gql, json = json)
}
82 changes: 46 additions & 36 deletions R/item_file_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,69 +10,79 @@
#' to download.
#' @param destinations String vector list of destinations for requested files.
#' Must be same length as \code{names}
#' @param dest_dir A directory path for saving files when \code{names} parameter
#' is omitted
#' @param dest_dir A directory path for saving files when \code{names} destinations
#' parameter is not specified.
#' @param overwrite_file Boolean indicating if file should be overwritten if it
#' already exists locally
#'
#' @return Character vector of full paths to local files
#'
#' @author Luke Winslow
#'
#' @examples \dontrun{
#'
#' #downloads two files attached to this item
#' item_file_download('548b2b31e4b03f64633662a4', dest_dir=tempdir())
#' #downloads all files attached to this item
#' item_file_download('627f1572d34e3bef0c9a30d8', dest_dir=tempdir())
#'
#' #downloads a specific file attached to this item
#' item_file_download('548b2b31e4b03f64633662a4', names='gdp.txt',
#' destinations=file.path(tempdir(), 'fname.txt'))
#' item_file_download('627f1572d34e3bef0c9a30d8', names='example.txt',
#' destinations=file.path(tempdir(), 'out.txt'))
#'
#' }
#' @export
item_file_download = function(sb_id, ..., names, destinations,
dest_dir = getwd(), session=current_session(),
overwrite_file = FALSE){

sb_id = as.sbitem(sb_id)
if(!session_validate(session)){

if(!session_validate(session))
stop('Session state is invalid, please re-authenticate')
}

#We have two states, missing names and destinations, which means we need a dest_dir
if(missing(names) && missing(destinations)){

#populate names and destinations from files that are on SB
flist = item_list_files(sb_id, ..., session=session)
if(nrow(flist) < 1){
stop(sb_id$id, ':Item has no attached files')
}
names = flist$fname
destinations = file.path(dest_dir, names)

#or we have names and destinations
}else if(!missing(names) & !missing(destinations)){
if(length(names) != length(destinations)){
stop('Length of names and destinations must be identical')
}
flist <- item_list_files(sb_id, ..., session=session)

if(nrow(flist) < 1)
stop(sb_id$id, ':Item has no attached files')

if(missing(names)) {

names <- flist$fname

flist = item_list_files(sb_id, ..., session=session)
} else {

if(!missing(destinations) & length(names) != length(destinations))
stop('Length of names and destinations must be identical')

if(!all(names %in% flist$fname)){
stop(sb_id$id, 'Item does not contain all requested files')
}
#otherwise in some other error condition
}else{
stop('Must have either names & destinations, or dest_dir for all files')
}


flist = merge(flist, data.frame(fname=names, dest=destinations, stringsAsFactors=FALSE))
if(!all(names %in% flist$fname)) stop(sb_id$id, 'Item does not contain all requested files')

if(!exists("destinations") | missing(destinations)) {
destinations <- file.path(dest_dir, names)
}

flist <- merge(cbind(flist, do.call(rbind.data.frame, attr(flist, "cloud"))),
data.frame(fname=names, dest=destinations))

for(i in seq_len(nrow(flist))) {
tryCatch({

if(flist[i, ]$cuid != "") {

if(!exists("gql")) gql <- httr::handle(url = pkg.env$graphql_url)

message("retrieving S3 URL")

flist[i, ]$url <- get_cloud_download_url(flist[i, c("cuid", "key", "title", "useForPreview")],
gql)[[1]]$getS3DownloadUrl$downloadUri[1]

}

message(paste("downloading file", flist[i,]$dest))

GET(url=flist[i,]$url, ...,
write_disk(flist[i,]$dest, overwrite = overwrite_file),
handle=session, timeout = httr::timeout(default_timeout()))
handle=session, timeout = httr::timeout(default_timeout()),
httr::progress())

}, error = function(e) {
if(file.exists(flist[i,]$dest)) {
warning(paste(basename(flist[i,]$dest), "exists, and overwrite is false. Skipping."))
Expand Down
36 changes: 35 additions & 1 deletion R/item_list_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#'
#' @template manipulate_item
#' @param recursive (logical) List files recursively. Default: \code{FALSE}
#' @param fetch_cloud_urls (logical) fetch a tokenized cloud download URLs? Default: \code{TRUE}
#' This option will take slightly longer but the `url` attribute of the returned list will
#' work for direct file downloads or use with pther applications and libraries.
#'
#' @return
#' A data.frame with columns fname, size, url, and facet.
Expand Down Expand Up @@ -47,7 +50,8 @@
#' item_list_files(id = '56562348e4b071e7ea53e09d', recursive = FALSE) # default
#' item_list_files(id = '56562348e4b071e7ea53e09d', recursive = TRUE)
#' }
item_list_files = function(sb_id, recursive = FALSE, ..., session=current_session()){
item_list_files = function(sb_id, recursive = FALSE, fetch_cloud_urls = TRUE, ...,
session=current_session()){

session_val(session)

Expand Down Expand Up @@ -92,6 +96,10 @@ item_list_files = function(sb_id, recursive = FALSE, ..., session=current_sessio
url = rep("", lf),
facet = rep("", lf))

cloud <- rep(list(list(cuid = "", key = "",
title = "", useForPreview = "")),
lf)

if (length(files) == 0) {
return(out)
}
Expand All @@ -102,8 +110,34 @@ item_list_files = function(sb_id, recursive = FALSE, ..., session=current_sessio
out[i,'url'] = files[[i]]$url
if(!is.null(f <- files[[i]]$facet_name))
out[i, "facet"] = f
if(!is.null(files[[i]]$cuid)) {
cloud[[i]]$cuid <- files[[i]]$cuid
cloud[[i]]$key <- files[[i]]$key
cloud[[i]]$title <- ifelse(!is.null(files[[i]]$title),
files[[i]]$title, "")
cloud[[i]]$useForPreview <- ifelse(!is.null(files[[i]]$useForPreview),
files[[i]]$useForPreview, FALSE)

if(fetch_cloud_urls) {

if(!exists("gql")) gql <- httr::handle(url = pkg.env$graphql_url)

message("retrieving S3 URL")

out[i, 'url'] = get_cloud_download_url(
data.frame(cuid = cloud[[i]]$cuid,
key = cloud[[i]]$key,
title = cloud[[i]]$title,
useForPreview = cloud[[i]]$useForPreview),
gql)[[1]]$getS3DownloadUrl$downloadUri[1]

}

}
}

attr(out, "cloud") <- cloud

return(out)
}

Expand Down
54 changes: 1 addition & 53 deletions R/item_upload_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ multi_file_body <- function(files){
#' item_append_files(res$id, "foobar.txt")
#' }
#' @export
item_upload_cloud <- function(sb_id, files, status = TRUE, session=current_session()) {
item_upload_cloud <- function(sb_id, files, ..., status = TRUE, session=current_session()) {

try(sb_id <- sb_id$id, silent = TRUE)

Expand Down Expand Up @@ -212,55 +212,3 @@ cloud_upload <- function(file, mimetype, itemid, chunk_size_bytes = pkg.env$chun
return(invisible(complete_multipart_upload(f_path, session_id, parts_header, gql)))

}

get_gql_header <- function() {
httr::add_headers(
.headers = c(`content-type` = "application/json",
accept = "application/json",
authorization = paste("Bearer",
get_access_token())))
}

run_gql_query <- function(q, gql) {
out <- httr::POST(pkg.env$graphql_url, get_gql_header(),
body = jsonlite::toJSON(list(query = q), auto_unbox = TRUE),
handle = gql)

if(out$status_code == 200) {
jsonlite::fromJSON(rawToChar(out$content))
} else {
stop(paste("Error making multipart session.\n code:", out$status_code,
"\n content:", rawToChar(out$content)))
}

}

#GraphQL Queries for interaction with ScienceBase Manager
create_multipart_upload_session <- function(s3_filepath, content_type, username, gql) {

run_gql_query(sprintf(
'query { createMultipartUploadSession(object: "%s" contentType: "%s" username: "%s") }',
s3_filepath, content_type, username), gql)$data$createMultipartUploadSession

}

get_presigned_url_for_chunk <- function(s3_filepath, upload_id, part_number, gql) {

run_gql_query(sprintf(
'query { getPreSignedUrlForChunk(object: "%s", upload_id: "%s", part_number: "%s") }',
s3_filepath, upload_id, part_number), gql)$data$getPreSignedUrlForChunk

}

complete_multipart_upload <- function(item_str, upload_id, etag_payload, gql) {

eta <- sapply(etag_payload, function(x) {
sprintf('{ETag: "%s", PartNumber: %i}', gsub('"', "", x$ETag), x$PartNumber)
})

eta <- paste0("[", paste(eta, collapse = ","), "]")

run_gql_query(sprintf(
'query { completeMultiPartUpload(object: "%s" upload_id: "%s" parts_eTags: %s) }',
item_str, upload_id, eta), gql)
}
5 changes: 0 additions & 5 deletions R/session_authorized.R

This file was deleted.

16 changes: 7 additions & 9 deletions man/item_file_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion man/item_list_files.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 97c852f

Please sign in to comment.