From e6b59582f6df3e8b3c761acb27a2d1aaa302f3aa Mon Sep 17 00:00:00 2001 From: Violetta Mishechkina Date: Sun, 11 Aug 2024 21:10:09 +0200 Subject: [PATCH] Allow different from credentials project_id --- dlt/destinations/impl/bigquery/bigquery.py | 1 + .../impl/bigquery/configuration.py | 4 ++- dlt/destinations/impl/bigquery/sql_client.py | 8 ++++-- .../dlt-ecosystem/destinations/bigquery.md | 12 ++++++++ tests/load/bigquery/test_bigquery_client.py | 28 +++++++++++++++++++ 5 files changed, 49 insertions(+), 4 deletions(-) diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index ef4e31acd1..c6bf2e7654 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -225,6 +225,7 @@ def __init__( config.credentials, capabilities, config.get_location(), + config.project_id, config.http_timeout, config.retry_deadline, ) diff --git a/dlt/destinations/impl/bigquery/configuration.py b/dlt/destinations/impl/bigquery/configuration.py index 47cc997a4a..3d71b0c8ea 100644 --- a/dlt/destinations/impl/bigquery/configuration.py +++ b/dlt/destinations/impl/bigquery/configuration.py @@ -1,6 +1,6 @@ import dataclasses import warnings -from typing import ClassVar, List, Final +from typing import ClassVar, List, Final, Optional from dlt.common.configuration import configspec from dlt.common.configuration.specs import GcpServiceAccountCredentials @@ -14,6 +14,8 @@ class BigQueryClientConfiguration(DestinationClientDwhWithStagingConfiguration): destination_type: Final[str] = dataclasses.field(default="bigquery", init=False, repr=False, compare=False) # type: ignore credentials: GcpServiceAccountCredentials = None location: str = "US" + project_id: Optional[str] = None + """Note, that this is BigQuery project_id which could be different from credentials.project_id""" has_case_sensitive_identifiers: bool = True """If True then dlt expects to load data into case sensitive dataset""" should_set_case_sensitivity_on_new_dataset: bool = False diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py index dfc4094e7b..c56742f1ff 100644 --- a/dlt/destinations/impl/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -82,14 +82,16 @@ def __init__( credentials: GcpServiceAccountCredentialsWithoutDefaults, capabilities: DestinationCapabilitiesContext, location: str = "US", + project_id: Optional[str] = None, http_timeout: float = 15.0, retry_deadline: float = 60.0, ) -> None: self._client: bigquery.Client = None self.credentials: GcpServiceAccountCredentialsWithoutDefaults = credentials self.location = location + self.project_id = project_id or self.credentials.project_id self.http_timeout = http_timeout - super().__init__(credentials.project_id, dataset_name, staging_dataset_name, capabilities) + super().__init__(self.project_id, dataset_name, staging_dataset_name, capabilities) self._default_retry = bigquery.DEFAULT_RETRY.with_deadline(retry_deadline) self._default_query = bigquery.QueryJobConfig( @@ -100,7 +102,7 @@ def __init__( @raise_open_connection_error def open_connection(self) -> bigquery.Client: self._client = bigquery.Client( - self.credentials.project_id, + self.project_id, credentials=self.credentials.to_native_credentials(), location=self.location, ) @@ -240,7 +242,7 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB conn.close() def catalog_name(self, escape: bool = True) -> Optional[str]: - project_id = self.capabilities.casefold_identifier(self.credentials.project_id) + project_id = self.capabilities.casefold_identifier(self.project_id) if escape: project_id = self.capabilities.escape_identifier(project_id) return project_id diff --git a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md index 51d124251a..334e08c4a7 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md +++ b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md @@ -112,6 +112,18 @@ VMs available on GCP (cloud functions, Composer runners, Colab notebooks) have a location = "US" ``` +### Using Different `project_id` + +You can set the `project_id` in your configuration to be different from the one in your credentials, provided your account has access to it: +```toml +[destination.bigquery] +project_id = "project_id_destination" + +[destination.bigquery.credentials] +project_id = "project_id_credentials" +``` +In this scenario, `project_id_credentials` will be used for authentication, while `project_id_destination` will be used as the data destination. + ## Write Disposition All write dispositions are supported. diff --git a/tests/load/bigquery/test_bigquery_client.py b/tests/load/bigquery/test_bigquery_client.py index 80bd008730..c92f18e159 100644 --- a/tests/load/bigquery/test_bigquery_client.py +++ b/tests/load/bigquery/test_bigquery_client.py @@ -32,6 +32,7 @@ prepare_table, yield_client_with_storage, cm_yield_client_with_storage, + cm_yield_client, ) # mark all tests as essential, do not remove @@ -53,6 +54,18 @@ def auto_delete_storage() -> None: delete_test_storage() +@pytest.fixture +def bigquery_project_id() -> Iterator[str]: + project_id = "different_project_id" + project_id_key = "DESTINATION__BIGQUERY__PROJECT_ID" + saved_project_id = os.environ.get(project_id_key) + os.environ[project_id_key] = project_id + yield project_id + del os.environ[project_id_key] + if saved_project_id: + os.environ[project_id_key] = saved_project_id + + def test_service_credentials_with_default(environment: Any) -> None: gcpc = GcpServiceAccountCredentials() # resolve will miss values and try to find default credentials on the machine @@ -247,6 +260,21 @@ def test_bigquery_configuration() -> None: ) +def test_bigquery_different_project_id(bigquery_project_id) -> None: + """Test scenario when bigquery project_id different from gcp credentials project_id.""" + config = resolve_configuration( + BigQueryClientConfiguration()._bind_dataset_name(dataset_name="dataset"), + sections=("destination", "bigquery"), + ) + assert config.project_id == bigquery_project_id + with cm_yield_client( + "bigquery", + dataset_name="dataset", + default_config_values={"project_id": bigquery_project_id}, + ) as client: + assert bigquery_project_id in client.sql_client.catalog_name() + + def test_bigquery_autodetect_configuration(client: BigQueryClient) -> None: # no schema autodetect assert client._should_autodetect_schema("event_slot") is False