From 68c5019c4ec116138bdaf4100f4be34191548296 Mon Sep 17 00:00:00 2001 From: MRG FOSS Date: Sun, 16 Jul 2023 11:26:47 +0200 Subject: [PATCH] Add support for GCS emulators Allow configuring a Google Cloud Storage emulator to be used instead of the real service. This can be achieved by providing a `STORAGE_EMULATOR_HOST` environment variable with the scheme, host and port of the emulator server (e.g.: http://localhost:8080). Besides using a different API endpoint, configuring an emulator also has the effect of lifting the requirement of providing valid authentication credentials. Emulators don't require or even implement authentication. This behavior is similar to the one implemented in official Google Cloud Storage client libraries developed and maintained by Google. --- DESCRIPTION | 8 +++++-- NEWS.md | 2 ++ R/acl.R | 10 ++++----- R/buckets.R | 8 +++---- R/compose.R | 2 +- R/download.R | 4 ++-- R/host.R | 19 ++++++++++++++++ R/objects.R | 10 ++++----- R/options.R | 4 ++++ R/pubsub.R | 16 ++++++------- R/upload.R | 8 +++---- R/versioning.R | 6 +++-- man/gcs_upload.Rd | 2 +- man/get_storage_host.Rd | 15 +++++++++++++ man/is.storage_emulated.Rd | 14 ++++++++++++ vignettes/googleCloudStorageR.Rmd | 37 +++++++++++++++++++++++++++++++ 16 files changed, 131 insertions(+), 34 deletions(-) create mode 100644 R/host.R create mode 100644 man/get_storage_host.Rd create mode 100644 man/is.storage_emulated.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 16c7c3c..7b5152e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,7 +6,11 @@ Description: Interact with Google Cloud Storage project. Authors@R: c(person("Mark", "Edmondson",email = "r@sunholo.com", role = c("aut", "cre"), - comment = c(ORCID = "0000-0002-8434-3881"))) + comment = c(ORCID = "0000-0002-8434-3881")), + person("manuteleco", + email = "mrg.foss@gmail.com", + role = "ctb", + comment = "")) URL: https://code.markedmondson.me/googleCloudStorageR/ BugReports: https://github.com/cloudyr/googleCloudStorageR/issues Depends: @@ -36,6 +40,6 @@ Remotes: License: MIT + file LICENSE LazyData: true VignetteBuilder: knitr -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.1 Config/testthat/edition: 3 Config/testthat/parallel: false diff --git a/NEWS.md b/NEWS.md index e9d8f32..6b3d3a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,8 @@ * Fix bug where retry was not attempted on `gcs_upload()` * Add some custom error classes `http_400`, `http_404` etc. * Allow listing of more than 1000 buckets (#163 - thanks @hidekoji) +* Support GCS emulators and the `STORAGE_EMULATOR_HOST` environment variable + (#176 - thanks @manuteleco) # googleCloudStorage 0.7.0 diff --git a/R/acl.R b/R/acl.R index 46f0d06..e42c81d 100644 --- a/R/acl.R +++ b/R/acl.R @@ -45,7 +45,7 @@ gcs_get_bucket_acl <- function(bucket = gcs_get_global_bucket(), entity <- build_entity(entity, entity_type) ge <- - gar_api_generator("https://storage.googleapis.com/storage/v1", + gar_api_generator(sprintf("%s/storage/v1", get_storage_host()), "GET", path_args = list(b = bucket, acl = entity), @@ -99,7 +99,7 @@ gcs_create_bucket_acl <- function(bucket = gcs_get_global_bucket(), ) insert <- - gar_api_generator("https://storage.googleapis.com/storage/v1", + gar_api_generator(sprintf("%s/storage/v1", get_storage_host()), "POST", path_args = list(b = bucket, acl = ""), @@ -182,7 +182,7 @@ gcs_update_object_acl <- function(object_name, ) insert <- - gar_api_generator("https://storage.googleapis.com/storage/v1", + gar_api_generator(sprintf("%s/storage/v1", get_storage_host()), "POST", path_args = list(b = bucket, o = object_name, @@ -261,8 +261,8 @@ gcs_get_object_acl <- function(object_name, pa <- list(generation = generation) } - url <- sprintf("https://storage.googleapis.com/storage/v1/b/%s/o/%s/acl/%s", - bucket, object_name, entity) + url <- sprintf("%s/storage/v1/b/%s/o/%s/acl/%s", + get_storage_host(), bucket, object_name, entity) # storage.objectAccessControls.get f <- gar_api_generator(url, "GET", pars_args = pa, diff --git a/R/buckets.R b/R/buckets.R index 2bc6630..f76429c 100644 --- a/R/buckets.R +++ b/R/buckets.R @@ -137,7 +137,7 @@ gcs_list_buckets <- function(projectId, } lb <- - googleAuthR::gar_api_generator("https://storage.googleapis.com/storage/v1/b", + googleAuthR::gar_api_generator(sprintf("%s/storage/v1/b", get_storage_host()), "GET", pars_args = list(project=projectId, prefix=prefix, @@ -202,7 +202,7 @@ gcs_get_bucket <- function(bucket = gcs_get_global_bucket(), pars_args <- rmNullObs(pars_args) bb <- - googleAuthR::gar_api_generator("https://storage.googleapis.com/storage/v1/", + googleAuthR::gar_api_generator(sprintf("%s/storage/v1/", get_storage_host()), "GET", path_args = list(b = bucket), pars_args = pars_args) @@ -275,7 +275,7 @@ gcs_create_bucket <- projection = projection) pars_args <- rmNullObs(pars_args) - bb <- gar_api_generator("https://storage.googleapis.com/storage/v1/b", + bb <- gar_api_generator(sprintf("%s/storage/v1/b", get_storage_host()), "POST", pars_args = pars_args) @@ -333,7 +333,7 @@ gcs_delete_bucket <- function(bucket, ifMetagenerationNotMatch=ifMetagenerationNotMatch) pars_args <- rmNullObs(pars_args) - bb <- gar_api_generator("https://storage.googleapis.com/storage/v1/", + bb <- gar_api_generator(sprintf("%s/storage/v1/", get_storage_host()), "DELETE", path_args = list(b = bucket), pars_args = pars_args) diff --git a/R/compose.R b/R/compose.R index 4281045..3b81e3e 100644 --- a/R/compose.R +++ b/R/compose.R @@ -49,7 +49,7 @@ gcs_compose_objects <- function(objects, ) ) - ob <- gar_api_generator("https://storage.googleapis.com/storage/v1", + ob <- gar_api_generator(sprintf("%s/storage/v1", get_storage_host()), "POST", path_args = list(b = bucket, o = destination, diff --git a/R/download.R b/R/download.R index cb0115e..a3d6a47 100644 --- a/R/download.R +++ b/R/download.R @@ -40,8 +40,8 @@ gcs_download_url <- function(object_name, } domain <- "https://storage.cloud.google.com" - if(public){ - domain <- "https://storage.googleapis.com" + if(public || is.storage_emulated()){ + domain <- get_storage_host() } file.path(domain, bucket, object_name, fsep = "/") diff --git a/R/host.R b/R/host.R new file mode 100644 index 0000000..aa5d261 --- /dev/null +++ b/R/host.R @@ -0,0 +1,19 @@ +## Google Cloud Storage API host, used by default +.default_storage_host <- "https://storage.googleapis.com" + +#' Get the Google Cloud Storage API host to use for requests +#' +#' Uses the \code{STORAGE_EMULATOR_HOST} environment variable if set, otherwise +#' uses the default host (the real Google Cloud Storage API). +#' +#' @return The host to use for requests (includes scheme, host and port) +get_storage_host <- function() { + Sys.getenv("STORAGE_EMULATOR_HOST", .default_storage_host) +} + +#' Check if the Google Cloud Storage API is emulated +#' +#' @return TRUE if the Google Cloud Storage API is emulated, FALSE otherwise +is.storage_emulated <- function() { + !is.null(Sys.getenv("STORAGE_EMULATOR_HOST")) +} diff --git a/R/objects.R b/R/objects.R index d0b8297..bb0814e 100644 --- a/R/objects.R +++ b/R/objects.R @@ -45,7 +45,7 @@ gcs_list_objects <- function(bucket = gcs_get_global_bucket(), versions = versions) pars <- rmNullObs(pars) - lo <- gar_api_generator("https://storage.googleapis.com/storage/v1/", + lo <- gar_api_generator(sprintf("%s/storage/v1/", get_storage_host()), path_args = list(b = bucket, o = ""), pars_args = pars, @@ -263,7 +263,7 @@ gcs_get_object <- function(object_name, fields = fields) pars_args <- rmNullObs(pars_args) - ob <- gar_api_generator("https://storage.googleapis.com/storage/v1/", + ob <- gar_api_generator(sprintf("%s/storage/v1/", get_storage_host()), path_args = list(b = bucket, o = object_name), pars_args = pars_args, @@ -387,7 +387,7 @@ gcs_delete_object <- function(object_name, pars <- list(generation = generation) pars <- rmNullObs(pars) - ob <- gar_api_generator("https://storage.googleapis.com/storage/v1/", + ob <- gar_api_generator(sprintf("%s/storage/v1/", get_storage_host()), "DELETE", path_args = list(b = bucket, o = object_name), @@ -446,8 +446,8 @@ gcs_copy_object <- function(source_object, source_object <- URLencode(source_object, reserved = TRUE) destination_object <- URLencode(destination_object, reserved = TRUE) - the_url <- sprintf("https://storage.googleapis.com/storage/v1/b/%s/o/%s/copyTo/b/%s/o/%s", - source_bucket, source_object, destination_bucket, destination_object) + the_url <- sprintf("%s/storage/v1/b/%s/o/%s/copyTo/b/%s/o/%s", + get_storage_host(), source_bucket, source_object, destination_bucket, destination_object) pars <- NULL if(!is.null(rewriteToken)){ pars <- list(rewriteToken = rewriteToken) diff --git a/R/options.R b/R/options.R index 29a590b..cf9f9be 100644 --- a/R/options.R +++ b/R/options.R @@ -8,6 +8,10 @@ toset <- !(names(op.googleCloudStorageR) %in% names(op)) if(any(toset)) options(op.googleCloudStorageR[toset]) + + if(is.storage_emulated()){ + options(googleAuthR.skip_token_checks = TRUE) + } invisible() diff --git a/R/pubsub.R b/R/pubsub.R index 73e5d63..b4532f4 100644 --- a/R/pubsub.R +++ b/R/pubsub.R @@ -58,8 +58,8 @@ gcs_create_pubsub <- function(topic, the_body <- rmNullObs(the_body) - the_url <- sprintf("https://storage.googleapis.com/storage/v1/b/%s/notificationConfigs", - bucket) + the_url <- sprintf("%s/storage/v1/b/%s/notificationConfigs", + get_storage_host(), bucket) api <- gar_api_generator(the_url, "POST", data_parse_function = function(x) x) @@ -85,8 +85,8 @@ gcs_get_service_email <- function(project){ assert_that(is.string(project)) - the_url <- sprintf("https://storage.googleapis.com/storage/v1/projects/%s/serviceAccount", - project) + the_url <- sprintf("%s/storage/v1/projects/%s/serviceAccount", + get_storage_host(), project) api <- gar_api_generator(the_url, "GET", data_parse_function = function(x){x$email_address}) @@ -120,8 +120,8 @@ gcs_list_pubsub <- function(bucket = gcs_get_global_bucket()){ bucket <- as.bucket_name(bucket) - the_url <- sprintf("https://storage.googleapis.com/storage/v1/b/%s/notificationConfigs", - bucket) + the_url <- sprintf("%s/storage/v1/b/%s/notificationConfigs", + get_storage_host(), bucket) api <- gar_api_generator(the_url, "GET", data_parse_function = function(x) x$items) @@ -159,8 +159,8 @@ gcs_delete_pubsub <- function(config_name, assert_that(is.string(config_name)) - the_url <- sprintf("https://storage.googleapis.com/storage/v1/b/%s/notificationConfigs/%s", - bucket, config_name) + the_url <- sprintf("%s/storage/v1/b/%s/notificationConfigs/%s", + get_storage_host(), bucket, config_name) # to avoid some warning messages due to empty body diff --git a/R/upload.R b/R/upload.R index c06384c..d8004a5 100644 --- a/R/upload.R +++ b/R/upload.R @@ -382,7 +382,7 @@ do_simple_upload <- function(name, } up <- - gar_api_generator("https://storage.googleapis.com/upload/storage/v1", + gar_api_generator(sprintf("%s/upload/storage/v1", get_storage_host()), "POST", path_args = list(b = bucket, o = ""), @@ -425,8 +425,8 @@ do_multipart_upload <- function(name, pars_args[["predefinedAcl"]] <- predefinedAcl } - the_url <- sprintf("https://storage.googleapis.com/upload/storage/v1/b/%s/o", - bucket) + the_url <- sprintf("%s/upload/storage/v1/b/%s/o", + get_storage_host(), bucket) up <- gar_api_generator(the_url, "POST", @@ -458,7 +458,7 @@ do_resumable_upload <- function(name, } up <- - googleAuthR::gar_api_generator("https://storage.googleapis.com/upload/storage/v1", + googleAuthR::gar_api_generator(sprintf("%s/upload/storage/v1", get_storage_host()), "POST", path_args = list(b = bucket, o = ""), diff --git a/R/versioning.R b/R/versioning.R index 38faf81..0104086 100644 --- a/R/versioning.R +++ b/R/versioning.R @@ -49,7 +49,8 @@ gcs_version_bucket <- function(bucket, # will only be different for list url <- sprintf( - "https://storage.googleapis.com/storage/v1/b/%s", + "%s/storage/v1/b/%s", + get_storage_host(), bucket ) pars_args <- list(fields = "versioning") @@ -102,7 +103,8 @@ gcs_version_bucket <- function(bucket, # Archived versions of objects have a `timeDeleted` property. url <- sprintf( - "https://storage.googleapis.com/storage/v1/b/%s/o", + "%s/storage/v1/b/%s/o", + get_storage_host(), bucket ) pars_args <- list(versions = "true") diff --git a/man/gcs_upload.Rd b/man/gcs_upload.Rd index 140149a..2e17672 100644 --- a/man/gcs_upload.Rd +++ b/man/gcs_upload.Rd @@ -18,7 +18,7 @@ gcs_upload( upload_type = c("simple", "resumable") ) -gcs_upload_set_limit(upload_limit = 5000000L) +gcs_upload_set_limit(upload_limit = 5e+06) } \arguments{ \item{file}{data.frame, list, R object or filepath (character) to upload file} diff --git a/man/get_storage_host.Rd b/man/get_storage_host.Rd new file mode 100644 index 0000000..68f02e6 --- /dev/null +++ b/man/get_storage_host.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/host.R +\name{get_storage_host} +\alias{get_storage_host} +\title{Get the Google Cloud Storage API host to use for requests} +\usage{ +get_storage_host() +} +\value{ +The host to use for requests (includes scheme, host and port) +} +\description{ +Uses the \code{STORAGE_EMULATOR_HOST} environment variable if set, otherwise +uses the default host (the real Google Cloud Storage API). +} diff --git a/man/is.storage_emulated.Rd b/man/is.storage_emulated.Rd new file mode 100644 index 0000000..f2908f2 --- /dev/null +++ b/man/is.storage_emulated.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/host.R +\name{is.storage_emulated} +\alias{is.storage_emulated} +\title{Check if the Google Cloud Storage API is emulated} +\usage{ +is.storage_emulated() +} +\value{ +TRUE if the Google Cloud Storage API is emulated, FALSE otherwise +} +\description{ +Check if the Google Cloud Storage API is emulated +} diff --git a/vignettes/googleCloudStorageR.Rmd b/vignettes/googleCloudStorageR.Rmd index 082de0a..1115472 100644 --- a/vignettes/googleCloudStorageR.Rmd +++ b/vignettes/googleCloudStorageR.Rmd @@ -138,6 +138,43 @@ gcs_get_global_bucket() [1] "my-default-bucket-2" ``` +## Using a GCS emulator + +You can also use a GCS emulator instead of the real Google Cloud Storage. By +providing a `STORAGE_EMULATOR_HOST` environment variable, the library will +direct all API request to the emulator server. The variable's value should +include scheme, host and port (e.g.: `http://localhost:8080`). + +When using an emulator, you don't have to provide authentication credentials. + +This is generally useful in the context of automated tests. And depending on the +emulator implementation, it can even give local filesystem support to +applications that were previously hardwired to use GCS. + +```r +## start a GCS emulator outside R listening at 127.0.0.1:1234, for example + +## set emulator host via environment +Sys.setenv("STORAGE_EMULATOR_HOST" = "http://127.0.0.1:1234") + +library(googleCloudStorageR) + +proj <- "my-dummy-project-id" + +# perform GCS operations normally, like: +gcs_create_bucket("my-bucket", proj) +==Google Cloud Storage Bucket== +Bucket: my-bucket +Location: US-CENTRAL1 +Class: STANDARD +Created: 2023-07-22 07:31:28 +Updated: 2023-07-22 07:31:28 + +gcs_list_buckets(proj) + name storageClass location updated +1 my-bucket STANDARD US-CENTRAL1 2023-07-22 07:31:28 +``` + ## Downloading objects from Google Cloud storage Once you have a Google project and created a bucket with an object in it, you can download it as below: