From 74e596774a4bdfe0f4a2546f8a8715e535c7dcac Mon Sep 17 00:00:00 2001 From: Martin Studer Date: Mon, 21 Oct 2024 16:49:35 +0200 Subject: [PATCH] Refactoring --- .github/workflows/ci-tests.yml | 58 +++++++++++++++---------------- NAMESPACE | 2 +- R/defaults.R | 19 +++++----- R/spark_read_bigquery.R | 12 +++---- R/spark_write_bigquery.R | 10 ++---- man/bigquery_defaults.Rd | 9 +++-- man/default_billing_project_id.Rd | 14 -------- man/default_project_id.Rd | 14 ++++++++ man/spark_read_bigquery.Rd | 13 +++---- man/spark_write_bigquery.Rd | 11 ++---- tests/testthat/setup.R | 6 ++-- 11 files changed, 74 insertions(+), 94 deletions(-) delete mode 100644 man/default_billing_project_id.Rd create mode 100644 man/default_project_id.Rd diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index df0fcbf..16e5550 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -26,31 +26,31 @@ jobs: matrix: config: # Spark 3.5 seems currently unavailable so we use 3.4 for now - # - os-name: ubuntu - # os-version: latest - # java: 17 - # spark: "3.4" - # r-version: release - # - os-name: ubuntu - # os-version: latest - # java: 8 - # spark: "2.4" - # r-version: devel - # - os-name: ubuntu - # os-version: "22.04" - # java: 11 - # spark: "3.0" - # r-version: oldrel - # - os-name: macos - # os-version: latest - # java: 8 - # spark: "3.2" - # r-version: release - # - os-name: macos - # os-version: latest - # java: 17 - # spark: "3.4" - # r-version: devel + - os-name: ubuntu + os-version: latest + java: 17 + spark: "3.4" + r-version: release + - os-name: ubuntu + os-version: latest + java: 8 + spark: "2.4" + r-version: devel + - os-name: ubuntu + os-version: "22.04" + java: 11 + spark: "3.0" + r-version: oldrel + - os-name: macos + os-version: latest + java: 8 + spark: "3.2" + r-version: release + - os-name: macos + os-version: latest + java: 17 + spark: "3.4" + r-version: devel - os-name: windows os-version: latest java: 8 @@ -63,9 +63,9 @@ jobs: r-version: release env: SPARK_VERSION: ${{ matrix.config.spark }} - BILLING_PROJECT_ID: ${{ secrets.GCLOUD_BILLING_PROJECT_ID }} - MATERIALIZATION_DATASET: test - GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/adc.json + BIGQUERY_BILLING_PROJECT_ID: ${{ secrets.GCLOUD_BILLING_PROJECT_ID }} + BIGQUERY_MATERIALIZATION_DATASET: test + BIGQUERY_APPLICATION_CREDENTIALS: ${{ github.workspace }}/adc.json R_DEFAULT_INTERNET_TIMEOUT: 1800 steps: @@ -148,7 +148,7 @@ jobs: ADC: ${{ secrets.GCLOUD_APPLICATION_CREDENTIALS }} shell: bash run: | - echo $ADC > $GOOGLE_APPLICATION_CREDENTIALS + echo $ADC > ${BIGQUERY_APPLICATION_CREDENTIALS} - name: Run R CMD check uses: r-lib/actions/check-r-package@v2 diff --git a/NAMESPACE b/NAMESPACE index 0592396..552d510 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,9 @@ # Generated by roxygen2: do not edit by hand export(bigquery_defaults) -export(default_billing_project_id) export(default_materialization_dataset) export(default_materialization_project) +export(default_project_id) export(default_service_account_key_file) export(spark_read_bigquery) export(spark_write_bigquery) diff --git a/R/defaults.R b/R/defaults.R index 7f1f1f3..0ba24a8 100644 --- a/R/defaults.R +++ b/R/defaults.R @@ -1,7 +1,6 @@ #' @title Google BigQuery Default Settings #' @description Sets default values for several Google BigQuery related settings. -#' @param billingProjectId Default Google Cloud Platform (GCP) project ID for billing purposes. -#' This is the project on whose behalf to perform BigQuery operations. +#' @param projectId Default Google Cloud Platform (GCP) project ID to use. #' @param materializationProject Project to use for materializing SQL queries. See also #' \code{materializationDataset}. Defaults to the billing project (\code{billingProjectId}). #' @param materializationDataset Dataset (in materialization project) which is used for @@ -32,7 +31,7 @@ #' #' \code{\link{spark_write_bigquery}} #' -#' \code{\link{default_billing_project_id}} +#' \code{\link{default_project_id}} #' #' \code{\link{default_materialization_project}} #' @@ -41,8 +40,8 @@ #' \code{\link{default_service_account_key_file}} #' @keywords database connection #' @export -bigquery_defaults <- function(billingProjectId, - materializationProject = billingProjectId, +bigquery_defaults <- function(projectId, + materializationProject = projectId, materializationDataset = NULL, serviceAccountKeyFile = NULL) { if (is.null(serviceAccountKeyFile)) { @@ -53,19 +52,19 @@ bigquery_defaults <- function(billingProjectId, } options( - "sparkbq.default.billingProjectId" = billingProjectId, + "sparkbq.default.projectId" = projectId, "sparkbq.default.materializationProject" = materializationProject, "sparkbq.default.materializationDataset" = materializationDataset, "sparkbq.default.serviceAccountKeyFile" = serviceAccountKeyFile ) } -#' @title Default Google BigQuery Billing Project ID -#' @description Returns the default Google BigQuery billing project ID. +#' @title Default Google BigQuery Project ID +#' @description Returns the default Google BigQuery project ID. #' @seealso \code{\link{bigquery_defaults}} #' @export -default_billing_project_id <- function() { - getOption("sparkbq.default.billingProjectId") +default_project_id <- function() { + getOption("sparkbq.default.projectId") } #' @title Default Google BigQuery Materialization Project diff --git a/R/spark_read_bigquery.R b/R/spark_read_bigquery.R index e5cbf60..c2ffd62 100644 --- a/R/spark_read_bigquery.R +++ b/R/spark_read_bigquery.R @@ -3,11 +3,8 @@ #' @param sc \code{\link[sparklyr]{spark_connection}} provided by sparklyr. #' @param name The name to assign to the newly generated table (see also #' \code{\link[sparklyr]{spark_read_source}}). -#' @param billingProjectId Google Cloud Platform project ID for billing purposes. -#' This is the project on whose behalf to perform BigQuery operations. -#' Defaults to \code{\link{default_billing_project_id}}. #' @param projectId Google Cloud Platform project ID of BigQuery dataset. -#' Defaults to \code{billingProjectId}. +#' Defaults to \code{\link{default_project_id}}. #' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores). #' Either both of \code{datasetId} and \code{tableId} or \code{sqlQuery} must be specified. #' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores). @@ -17,7 +14,7 @@ #' (SQL-2011). Legacy SQL is not supported. Tables are specified as #' \code{..}. #' @param materializationProject Project to use for materializing SQL queries. See also -#' \code{materializationDataset}. Defaults to billing project +#' \code{materializationDataset}. Defaults to project #' \code{\link{default_materialization_project}}. #' @param materializationDataset Dataset (in materialization project) which is used for #' materializing SQL queries (see \code{sqlQuery}). The GCP user @@ -66,7 +63,7 @@ #' sc <- spark_connect(master = "local", config = config) #' #' bigquery_defaults( -#' billingProjectId = "", +#' projectId = "", #' serviceAccountKeyFile = "") #' #' # Reading the public shakespeare data table @@ -84,8 +81,7 @@ #' @export spark_read_bigquery <- function(sc, name, - billingProjectId = default_billing_project_id(), - projectId = billingProjectId, + projectId = default_project_id(), datasetId = NULL, tableId = NULL, sqlQuery = NULL, diff --git a/R/spark_write_bigquery.R b/R/spark_write_bigquery.R index 5c3aab7..9a292de 100644 --- a/R/spark_write_bigquery.R +++ b/R/spark_write_bigquery.R @@ -4,11 +4,8 @@ #' Data is written directly to BigQuery using the #' \href{https://cloud.google.com/bigquery/docs/write-api}{BigQuery Storage Write API}. #' @param data Spark DataFrame to write to Google BigQuery. -#' @param billingProjectId Google Cloud Platform project ID for billing purposes. -#' This is the project on whose behalf to perform BigQuery operations. -#' Defaults to \code{default_billing_project_id()}. #' @param projectId Google Cloud Platform project ID of BigQuery dataset. -#' Defaults to \code{billingProjectId}. +#' Defaults to \code{default_project_id()}. #' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores). #' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores). #' @param serviceAccountKeyFile Google Cloud service account key file to use for authentication @@ -50,7 +47,7 @@ #' sc <- spark_connect(master = "local", config = config) #' #' bigquery_defaults( -#' billingProjectId = "", +#' projectId = "", #' serviceAccountKeyFile = "") #' #' # Copy mtcars to Spark @@ -65,8 +62,7 @@ #' @importFrom sparklyr spark_write_source #' @export spark_write_bigquery <- function(data, - billingProjectId = default_billing_project_id(), - projectId = billingProjectId, + projectId = default_project_id(), datasetId, tableId, serviceAccountKeyFile = default_service_account_key_file(), diff --git a/man/bigquery_defaults.Rd b/man/bigquery_defaults.Rd index 50e98e4..1dfc39c 100644 --- a/man/bigquery_defaults.Rd +++ b/man/bigquery_defaults.Rd @@ -5,15 +5,14 @@ \title{Google BigQuery Default Settings} \usage{ bigquery_defaults( - billingProjectId, - materializationProject = billingProjectId, + projectId, + materializationProject = projectId, materializationDataset = NULL, serviceAccountKeyFile = NULL ) } \arguments{ -\item{billingProjectId}{Default Google Cloud Platform (GCP) project ID for billing purposes. -This is the project on whose behalf to perform BigQuery operations.} +\item{projectId}{Default Google Cloud Platform (GCP) project ID to use.} \item{materializationProject}{Project to use for materializing SQL queries. See also \code{materializationDataset}. Defaults to the billing project (\code{billingProjectId}).} @@ -54,7 +53,7 @@ Sets default values for several Google BigQuery related settings. \code{\link{spark_write_bigquery}} -\code{\link{default_billing_project_id}} +\code{\link{default_project_id}} \code{\link{default_materialization_project}} diff --git a/man/default_billing_project_id.Rd b/man/default_billing_project_id.Rd deleted file mode 100644 index 2248673..0000000 --- a/man/default_billing_project_id.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/defaults.R -\name{default_billing_project_id} -\alias{default_billing_project_id} -\title{Default Google BigQuery Billing Project ID} -\usage{ -default_billing_project_id() -} -\description{ -Returns the default Google BigQuery billing project ID. -} -\seealso{ -\code{\link{bigquery_defaults}} -} diff --git a/man/default_project_id.Rd b/man/default_project_id.Rd new file mode 100644 index 0000000..7bf3ce8 --- /dev/null +++ b/man/default_project_id.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/defaults.R +\name{default_project_id} +\alias{default_project_id} +\title{Default Google BigQuery Project ID} +\usage{ +default_project_id() +} +\description{ +Returns the default Google BigQuery project ID. +} +\seealso{ +\code{\link{bigquery_defaults}} +} diff --git a/man/spark_read_bigquery.Rd b/man/spark_read_bigquery.Rd index 9e67a09..d08c9a2 100644 --- a/man/spark_read_bigquery.Rd +++ b/man/spark_read_bigquery.Rd @@ -7,8 +7,7 @@ spark_read_bigquery( sc, name, - billingProjectId = default_billing_project_id(), - projectId = billingProjectId, + projectId = default_project_id(), datasetId = NULL, tableId = NULL, sqlQuery = NULL, @@ -26,12 +25,8 @@ spark_read_bigquery( \item{name}{The name to assign to the newly generated table (see also \code{\link[sparklyr]{spark_read_source}}).} -\item{billingProjectId}{Google Cloud Platform project ID for billing purposes. -This is the project on whose behalf to perform BigQuery operations. -Defaults to \code{\link{default_billing_project_id}}.} - \item{projectId}{Google Cloud Platform project ID of BigQuery dataset. -Defaults to \code{billingProjectId}.} +Defaults to \code{\link{default_project_id}}.} \item{datasetId}{Google BigQuery dataset ID (may contain letters, numbers and underscores). Either both of \code{datasetId} and \code{tableId} or \code{sqlQuery} must be specified.} @@ -45,7 +40,7 @@ or \code{sqlQuery} must be specified. The query must be specified in standard SQ \code{..}.} \item{materializationProject}{Project to use for materializing SQL queries. See also -\code{materializationDataset}. Defaults to billing project +\code{materializationDataset}. Defaults to project \code{\link{default_materialization_project}}.} \item{materializationDataset}{Dataset (in materialization project) which is used for @@ -84,7 +79,7 @@ config <- spark_config() sc <- spark_connect(master = "local", config = config) bigquery_defaults( - billingProjectId = "", + projectId = "", serviceAccountKeyFile = "") # Reading the public shakespeare data table diff --git a/man/spark_write_bigquery.Rd b/man/spark_write_bigquery.Rd index d90cc2d..22f7712 100644 --- a/man/spark_write_bigquery.Rd +++ b/man/spark_write_bigquery.Rd @@ -6,8 +6,7 @@ \usage{ spark_write_bigquery( data, - billingProjectId = default_billing_project_id(), - projectId = billingProjectId, + projectId = default_project_id(), datasetId, tableId, serviceAccountKeyFile = default_service_account_key_file(), @@ -19,12 +18,8 @@ spark_write_bigquery( \arguments{ \item{data}{Spark DataFrame to write to Google BigQuery.} -\item{billingProjectId}{Google Cloud Platform project ID for billing purposes. -This is the project on whose behalf to perform BigQuery operations. -Defaults to \code{default_billing_project_id()}.} - \item{projectId}{Google Cloud Platform project ID of BigQuery dataset. -Defaults to \code{billingProjectId}.} +Defaults to \code{default_project_id()}.} \item{datasetId}{Google BigQuery dataset ID (may contain letters, numbers and underscores).} @@ -58,7 +53,7 @@ config <- spark_config() sc <- spark_connect(master = "local", config = config) bigquery_defaults( - billingProjectId = "", + projectId = "", serviceAccountKeyFile = "") # Copy mtcars to Spark diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index bf54f48..e60357a 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,8 +1,8 @@ # Setting some BigQuery defaults for use in tests bigquery_defaults( - billingProjectId = Sys.getenv("BILLING_PROJECT_ID"), - materializationDataset = Sys.getenv("MATERIALIZATION_DATASET"), - serviceAccountKeyFile = Sys.getenv("GOOGLE_APPLICATION_CREDENTIALS") + projectId = Sys.getenv("BIGQUERY_PROJECT_ID"), + materializationDataset = Sys.getenv("BIGQUERY_MATERIALIZATION_DATASET"), + serviceAccountKeyFile = Sys.getenv("BIGQUERY_APPLICATION_CREDENTIALS") ) options(spark.version = Sys.getenv("SPARK_VERSION", "3.5"))