From 74e596774a4bdfe0f4a2546f8a8715e535c7dcac Mon Sep 17 00:00:00 2001
From: Martin Studer <martin.studer@mirai-solutions.com>
Date: Mon, 21 Oct 2024 16:49:35 +0200
Subject: [PATCH] Refactoring

---
 .github/workflows/ci-tests.yml    | 58 +++++++++++++++----------------
 NAMESPACE                         |  2 +-
 R/defaults.R                      | 19 +++++-----
 R/spark_read_bigquery.R           | 12 +++----
 R/spark_write_bigquery.R          | 10 ++----
 man/bigquery_defaults.Rd          |  9 +++--
 man/default_billing_project_id.Rd | 14 --------
 man/default_project_id.Rd         | 14 ++++++++
 man/spark_read_bigquery.Rd        | 13 +++----
 man/spark_write_bigquery.Rd       | 11 ++----
 tests/testthat/setup.R            |  6 ++--
 11 files changed, 74 insertions(+), 94 deletions(-)
 delete mode 100644 man/default_billing_project_id.Rd
 create mode 100644 man/default_project_id.Rd

diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index df0fcbf..16e5550 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -26,31 +26,31 @@ jobs:
       matrix:
         config:
           # Spark 3.5 seems currently unavailable so we use 3.4 for now
-          # - os-name: ubuntu
-          #   os-version: latest
-          #   java: 17
-          #   spark: "3.4"
-          #   r-version: release
-          # - os-name: ubuntu
-          #   os-version: latest
-          #   java: 8
-          #   spark: "2.4"
-          #   r-version: devel
-          # - os-name: ubuntu
-          #   os-version: "22.04"
-          #   java: 11
-          #   spark: "3.0"
-          #   r-version: oldrel
-          # - os-name: macos
-          #   os-version: latest
-          #   java: 8
-          #   spark: "3.2"
-          #   r-version: release
-          # - os-name: macos
-          #   os-version: latest
-          #   java: 17
-          #   spark: "3.4"
-          #   r-version: devel
+          - os-name: ubuntu
+            os-version: latest
+            java: 17
+            spark: "3.4"
+            r-version: release
+          - os-name: ubuntu
+            os-version: latest
+            java: 8
+            spark: "2.4"
+            r-version: devel
+          - os-name: ubuntu
+            os-version: "22.04"
+            java: 11
+            spark: "3.0"
+            r-version: oldrel
+          - os-name: macos
+            os-version: latest
+            java: 8
+            spark: "3.2"
+            r-version: release
+          - os-name: macos
+            os-version: latest
+            java: 17
+            spark: "3.4"
+            r-version: devel
           - os-name: windows
             os-version: latest
             java: 8
@@ -63,9 +63,9 @@ jobs:
             r-version: release
     env:
       SPARK_VERSION: ${{ matrix.config.spark }}
-      BILLING_PROJECT_ID: ${{ secrets.GCLOUD_BILLING_PROJECT_ID }}
-      MATERIALIZATION_DATASET: test
-      GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/adc.json
+      BIGQUERY_BILLING_PROJECT_ID: ${{ secrets.GCLOUD_BILLING_PROJECT_ID }}
+      BIGQUERY_MATERIALIZATION_DATASET: test
+      BIGQUERY_APPLICATION_CREDENTIALS: ${{ github.workspace }}/adc.json
       R_DEFAULT_INTERNET_TIMEOUT: 1800
       
     steps:
@@ -148,7 +148,7 @@ jobs:
           ADC: ${{ secrets.GCLOUD_APPLICATION_CREDENTIALS }}
         shell: bash
         run: |
-          echo $ADC > $GOOGLE_APPLICATION_CREDENTIALS
+          echo $ADC > ${BIGQUERY_APPLICATION_CREDENTIALS}
       
       - name: Run R CMD check
         uses: r-lib/actions/check-r-package@v2
diff --git a/NAMESPACE b/NAMESPACE
index 0592396..552d510 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,9 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
 export(bigquery_defaults)
-export(default_billing_project_id)
 export(default_materialization_dataset)
 export(default_materialization_project)
+export(default_project_id)
 export(default_service_account_key_file)
 export(spark_read_bigquery)
 export(spark_write_bigquery)
diff --git a/R/defaults.R b/R/defaults.R
index 7f1f1f3..0ba24a8 100644
--- a/R/defaults.R
+++ b/R/defaults.R
@@ -1,7 +1,6 @@
 #' @title Google BigQuery Default Settings
 #' @description Sets default values for several Google BigQuery related settings.
-#' @param billingProjectId Default Google Cloud Platform (GCP) project ID for billing purposes.
-#' This is the project on whose behalf to perform BigQuery operations.
+#' @param projectId Default Google Cloud Platform (GCP) project ID to use.
 #' @param materializationProject Project to use for materializing SQL queries. See also
 #' \code{materializationDataset}. Defaults to the billing project (\code{billingProjectId}).
 #' @param materializationDataset Dataset (in materialization project) which is used for
@@ -32,7 +31,7 @@
 #' 
 #' \code{\link{spark_write_bigquery}}
 #' 
-#' \code{\link{default_billing_project_id}}
+#' \code{\link{default_project_id}}
 #' 
 #' \code{\link{default_materialization_project}}
 #' 
@@ -41,8 +40,8 @@
 #' \code{\link{default_service_account_key_file}}
 #' @keywords database connection
 #' @export
-bigquery_defaults <- function(billingProjectId,
-                              materializationProject = billingProjectId,
+bigquery_defaults <- function(projectId,
+                              materializationProject = projectId,
                               materializationDataset = NULL,
                               serviceAccountKeyFile = NULL) {
   if (is.null(serviceAccountKeyFile)) {
@@ -53,19 +52,19 @@ bigquery_defaults <- function(billingProjectId,
   }
   
   options(
-    "sparkbq.default.billingProjectId" = billingProjectId,
+    "sparkbq.default.projectId" = projectId,
     "sparkbq.default.materializationProject" = materializationProject,
     "sparkbq.default.materializationDataset" = materializationDataset,
     "sparkbq.default.serviceAccountKeyFile" = serviceAccountKeyFile
   )
 }
 
-#' @title Default Google BigQuery Billing Project ID
-#' @description Returns the default Google BigQuery billing project ID.
+#' @title Default Google BigQuery Project ID
+#' @description Returns the default Google BigQuery project ID.
 #' @seealso \code{\link{bigquery_defaults}}
 #' @export
-default_billing_project_id <- function() {
-  getOption("sparkbq.default.billingProjectId")
+default_project_id <- function() {
+  getOption("sparkbq.default.projectId")
 }
 
 #' @title Default Google BigQuery Materialization Project
diff --git a/R/spark_read_bigquery.R b/R/spark_read_bigquery.R
index e5cbf60..c2ffd62 100644
--- a/R/spark_read_bigquery.R
+++ b/R/spark_read_bigquery.R
@@ -3,11 +3,8 @@
 #' @param sc \code{\link[sparklyr]{spark_connection}} provided by sparklyr.
 #' @param name The name to assign to the newly generated table (see also
 #' \code{\link[sparklyr]{spark_read_source}}).
-#' @param billingProjectId Google Cloud Platform project ID for billing purposes.
-#' This is the project on whose behalf to perform BigQuery operations.
-#' Defaults to \code{\link{default_billing_project_id}}.
 #' @param projectId Google Cloud Platform project ID of BigQuery dataset.
-#' Defaults to \code{billingProjectId}.
+#' Defaults to \code{\link{default_project_id}}.
 #' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores).
 #' Either both of \code{datasetId} and \code{tableId} or \code{sqlQuery} must be specified.
 #' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores).
@@ -17,7 +14,7 @@
 #' (SQL-2011). Legacy SQL is not supported. Tables are specified as
 #' \code{<project_id>.<dataset_id>.<table_id>}.
 #' @param materializationProject Project to use for materializing SQL queries. See also
-#' \code{materializationDataset}. Defaults to billing project
+#' \code{materializationDataset}. Defaults to project
 #' \code{\link{default_materialization_project}}.
 #' @param materializationDataset Dataset (in materialization project) which is used for
 #' materializing SQL queries (see \code{sqlQuery}). The GCP user
@@ -66,7 +63,7 @@
 #' sc <- spark_connect(master = "local", config = config)
 #'
 #' bigquery_defaults(
-#'   billingProjectId = "<your_billing_project_id>",
+#'   projectId = "<your_project_id>",
 #'   serviceAccountKeyFile = "<your_service_account_key_file>")
 #'
 #' # Reading the public shakespeare data table
@@ -84,8 +81,7 @@
 #' @export
 spark_read_bigquery <- function(sc,
                                 name,
-                                billingProjectId = default_billing_project_id(),
-                                projectId = billingProjectId,
+                                projectId = default_project_id(),
                                 datasetId = NULL,
                                 tableId = NULL,
                                 sqlQuery = NULL,
diff --git a/R/spark_write_bigquery.R b/R/spark_write_bigquery.R
index 5c3aab7..9a292de 100644
--- a/R/spark_write_bigquery.R
+++ b/R/spark_write_bigquery.R
@@ -4,11 +4,8 @@
 #' Data is written directly to BigQuery using the
 #' \href{https://cloud.google.com/bigquery/docs/write-api}{BigQuery Storage Write API}.
 #' @param data Spark DataFrame to write to Google BigQuery.
-#' @param billingProjectId Google Cloud Platform project ID for billing purposes.
-#' This is the project on whose behalf to perform BigQuery operations.
-#' Defaults to \code{default_billing_project_id()}.
 #' @param projectId Google Cloud Platform project ID of BigQuery dataset.
-#' Defaults to \code{billingProjectId}.
+#' Defaults to \code{default_project_id()}.
 #' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores).
 #' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores).
 #' @param serviceAccountKeyFile Google Cloud service account key file to use for authentication
@@ -50,7 +47,7 @@
 #' sc <- spark_connect(master = "local", config = config)
 #'
 #' bigquery_defaults(
-#'   billingProjectId = "<your_billing_project_id>",
+#'   projectId = "<your_project_id>",
 #'   serviceAccountKeyFile = "<your_service_account_key_file>")
 #'
 #' # Copy mtcars to Spark
@@ -65,8 +62,7 @@
 #' @importFrom sparklyr spark_write_source
 #' @export
 spark_write_bigquery <- function(data,
-                                 billingProjectId = default_billing_project_id(),
-                                 projectId = billingProjectId,
+                                 projectId = default_project_id(),
                                  datasetId,
                                  tableId,
                                  serviceAccountKeyFile = default_service_account_key_file(),
diff --git a/man/bigquery_defaults.Rd b/man/bigquery_defaults.Rd
index 50e98e4..1dfc39c 100644
--- a/man/bigquery_defaults.Rd
+++ b/man/bigquery_defaults.Rd
@@ -5,15 +5,14 @@
 \title{Google BigQuery Default Settings}
 \usage{
 bigquery_defaults(
-  billingProjectId,
-  materializationProject = billingProjectId,
+  projectId,
+  materializationProject = projectId,
   materializationDataset = NULL,
   serviceAccountKeyFile = NULL
 )
 }
 \arguments{
-\item{billingProjectId}{Default Google Cloud Platform (GCP) project ID for billing purposes.
-This is the project on whose behalf to perform BigQuery operations.}
+\item{projectId}{Default Google Cloud Platform (GCP) project ID to use.}
 
 \item{materializationProject}{Project to use for materializing SQL queries. See also
 \code{materializationDataset}. Defaults to the billing project (\code{billingProjectId}).}
@@ -54,7 +53,7 @@ Sets default values for several Google BigQuery related settings.
 
 \code{\link{spark_write_bigquery}}
 
-\code{\link{default_billing_project_id}}
+\code{\link{default_project_id}}
 
 \code{\link{default_materialization_project}}
 
diff --git a/man/default_billing_project_id.Rd b/man/default_billing_project_id.Rd
deleted file mode 100644
index 2248673..0000000
--- a/man/default_billing_project_id.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/defaults.R
-\name{default_billing_project_id}
-\alias{default_billing_project_id}
-\title{Default Google BigQuery Billing Project ID}
-\usage{
-default_billing_project_id()
-}
-\description{
-Returns the default Google BigQuery billing project ID.
-}
-\seealso{
-\code{\link{bigquery_defaults}}
-}
diff --git a/man/default_project_id.Rd b/man/default_project_id.Rd
new file mode 100644
index 0000000..7bf3ce8
--- /dev/null
+++ b/man/default_project_id.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/defaults.R
+\name{default_project_id}
+\alias{default_project_id}
+\title{Default Google BigQuery Project ID}
+\usage{
+default_project_id()
+}
+\description{
+Returns the default Google BigQuery project ID.
+}
+\seealso{
+\code{\link{bigquery_defaults}}
+}
diff --git a/man/spark_read_bigquery.Rd b/man/spark_read_bigquery.Rd
index 9e67a09..d08c9a2 100644
--- a/man/spark_read_bigquery.Rd
+++ b/man/spark_read_bigquery.Rd
@@ -7,8 +7,7 @@
 spark_read_bigquery(
   sc,
   name,
-  billingProjectId = default_billing_project_id(),
-  projectId = billingProjectId,
+  projectId = default_project_id(),
   datasetId = NULL,
   tableId = NULL,
   sqlQuery = NULL,
@@ -26,12 +25,8 @@ spark_read_bigquery(
 \item{name}{The name to assign to the newly generated table (see also
 \code{\link[sparklyr]{spark_read_source}}).}
 
-\item{billingProjectId}{Google Cloud Platform project ID for billing purposes.
-This is the project on whose behalf to perform BigQuery operations.
-Defaults to \code{\link{default_billing_project_id}}.}
-
 \item{projectId}{Google Cloud Platform project ID of BigQuery dataset.
-Defaults to \code{billingProjectId}.}
+Defaults to \code{\link{default_project_id}}.}
 
 \item{datasetId}{Google BigQuery dataset ID (may contain letters, numbers and underscores).
 Either both of \code{datasetId} and \code{tableId} or \code{sqlQuery} must be specified.}
@@ -45,7 +40,7 @@ or \code{sqlQuery} must be specified. The query must be specified in standard SQ
 \code{<project_id>.<dataset_id>.<table_id>}.}
 
 \item{materializationProject}{Project to use for materializing SQL queries. See also
-\code{materializationDataset}. Defaults to billing project
+\code{materializationDataset}. Defaults to project
 \code{\link{default_materialization_project}}.}
 
 \item{materializationDataset}{Dataset (in materialization project) which is used for
@@ -84,7 +79,7 @@ config <- spark_config()
 sc <- spark_connect(master = "local", config = config)
 
 bigquery_defaults(
-  billingProjectId = "<your_billing_project_id>",
+  projectId = "<your_project_id>",
   serviceAccountKeyFile = "<your_service_account_key_file>")
 
 # Reading the public shakespeare data table
diff --git a/man/spark_write_bigquery.Rd b/man/spark_write_bigquery.Rd
index d90cc2d..22f7712 100644
--- a/man/spark_write_bigquery.Rd
+++ b/man/spark_write_bigquery.Rd
@@ -6,8 +6,7 @@
 \usage{
 spark_write_bigquery(
   data,
-  billingProjectId = default_billing_project_id(),
-  projectId = billingProjectId,
+  projectId = default_project_id(),
   datasetId,
   tableId,
   serviceAccountKeyFile = default_service_account_key_file(),
@@ -19,12 +18,8 @@ spark_write_bigquery(
 \arguments{
 \item{data}{Spark DataFrame to write to Google BigQuery.}
 
-\item{billingProjectId}{Google Cloud Platform project ID for billing purposes.
-This is the project on whose behalf to perform BigQuery operations.
-Defaults to \code{default_billing_project_id()}.}
-
 \item{projectId}{Google Cloud Platform project ID of BigQuery dataset.
-Defaults to \code{billingProjectId}.}
+Defaults to \code{default_project_id()}.}
 
 \item{datasetId}{Google BigQuery dataset ID (may contain letters, numbers and underscores).}
 
@@ -58,7 +53,7 @@ config <- spark_config()
 sc <- spark_connect(master = "local", config = config)
 
 bigquery_defaults(
-  billingProjectId = "<your_billing_project_id>",
+  projectId = "<your_project_id>",
   serviceAccountKeyFile = "<your_service_account_key_file>")
 
 # Copy mtcars to Spark
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index bf54f48..e60357a 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -1,8 +1,8 @@
 # Setting some BigQuery defaults for use in tests
 bigquery_defaults(
-  billingProjectId = Sys.getenv("BILLING_PROJECT_ID"),
-  materializationDataset = Sys.getenv("MATERIALIZATION_DATASET"),
-  serviceAccountKeyFile = Sys.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+  projectId = Sys.getenv("BIGQUERY_PROJECT_ID"),
+  materializationDataset = Sys.getenv("BIGQUERY_MATERIALIZATION_DATASET"),
+  serviceAccountKeyFile = Sys.getenv("BIGQUERY_APPLICATION_CREDENTIALS")
 )
 
 options(spark.version = Sys.getenv("SPARK_VERSION", "3.5"))