Skip to content

Commit

Permalink
Allow different from credentials project_id
Browse files Browse the repository at this point in the history
  • Loading branch information
VioletM committed Aug 11, 2024
1 parent dd8485f commit e6b5958
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 4 deletions.
1 change: 1 addition & 0 deletions dlt/destinations/impl/bigquery/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def __init__(
config.credentials,
capabilities,
config.get_location(),
config.project_id,
config.http_timeout,
config.retry_deadline,
)
Expand Down
4 changes: 3 additions & 1 deletion dlt/destinations/impl/bigquery/configuration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import dataclasses
import warnings
from typing import ClassVar, List, Final
from typing import ClassVar, List, Final, Optional

from dlt.common.configuration import configspec
from dlt.common.configuration.specs import GcpServiceAccountCredentials
Expand All @@ -14,6 +14,8 @@ class BigQueryClientConfiguration(DestinationClientDwhWithStagingConfiguration):
destination_type: Final[str] = dataclasses.field(default="bigquery", init=False, repr=False, compare=False) # type: ignore
credentials: GcpServiceAccountCredentials = None
location: str = "US"
project_id: Optional[str] = None
"""Note, that this is BigQuery project_id which could be different from credentials.project_id"""
has_case_sensitive_identifiers: bool = True
"""If True then dlt expects to load data into case sensitive dataset"""
should_set_case_sensitivity_on_new_dataset: bool = False
Expand Down
8 changes: 5 additions & 3 deletions dlt/destinations/impl/bigquery/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,16 @@ def __init__(
credentials: GcpServiceAccountCredentialsWithoutDefaults,
capabilities: DestinationCapabilitiesContext,
location: str = "US",
project_id: Optional[str] = None,
http_timeout: float = 15.0,
retry_deadline: float = 60.0,
) -> None:
self._client: bigquery.Client = None
self.credentials: GcpServiceAccountCredentialsWithoutDefaults = credentials
self.location = location
self.project_id = project_id or self.credentials.project_id
self.http_timeout = http_timeout
super().__init__(credentials.project_id, dataset_name, staging_dataset_name, capabilities)
super().__init__(self.project_id, dataset_name, staging_dataset_name, capabilities)

self._default_retry = bigquery.DEFAULT_RETRY.with_deadline(retry_deadline)
self._default_query = bigquery.QueryJobConfig(
Expand All @@ -100,7 +102,7 @@ def __init__(
@raise_open_connection_error
def open_connection(self) -> bigquery.Client:
self._client = bigquery.Client(
self.credentials.project_id,
self.project_id,
credentials=self.credentials.to_native_credentials(),
location=self.location,
)
Expand Down Expand Up @@ -240,7 +242,7 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB
conn.close()

def catalog_name(self, escape: bool = True) -> Optional[str]:
project_id = self.capabilities.casefold_identifier(self.credentials.project_id)
project_id = self.capabilities.casefold_identifier(self.project_id)
if escape:
project_id = self.capabilities.escape_identifier(project_id)
return project_id
Expand Down
12 changes: 12 additions & 0 deletions docs/website/docs/dlt-ecosystem/destinations/bigquery.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,18 @@ VMs available on GCP (cloud functions, Composer runners, Colab notebooks) have a
location = "US"
```

### Using Different `project_id`

You can set the `project_id` in your configuration to be different from the one in your credentials, provided your account has access to it:
```toml
[destination.bigquery]
project_id = "project_id_destination"

[destination.bigquery.credentials]
project_id = "project_id_credentials"
```
In this scenario, `project_id_credentials` will be used for authentication, while `project_id_destination` will be used as the data destination.

## Write Disposition

All write dispositions are supported.
Expand Down
28 changes: 28 additions & 0 deletions tests/load/bigquery/test_bigquery_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
prepare_table,
yield_client_with_storage,
cm_yield_client_with_storage,
cm_yield_client,
)

# mark all tests as essential, do not remove
Expand All @@ -53,6 +54,18 @@ def auto_delete_storage() -> None:
delete_test_storage()


@pytest.fixture
def bigquery_project_id() -> Iterator[str]:
project_id = "different_project_id"
project_id_key = "DESTINATION__BIGQUERY__PROJECT_ID"
saved_project_id = os.environ.get(project_id_key)
os.environ[project_id_key] = project_id
yield project_id
del os.environ[project_id_key]
if saved_project_id:
os.environ[project_id_key] = saved_project_id


def test_service_credentials_with_default(environment: Any) -> None:
gcpc = GcpServiceAccountCredentials()
# resolve will miss values and try to find default credentials on the machine
Expand Down Expand Up @@ -247,6 +260,21 @@ def test_bigquery_configuration() -> None:
)


def test_bigquery_different_project_id(bigquery_project_id) -> None:
"""Test scenario when bigquery project_id different from gcp credentials project_id."""
config = resolve_configuration(
BigQueryClientConfiguration()._bind_dataset_name(dataset_name="dataset"),
sections=("destination", "bigquery"),
)
assert config.project_id == bigquery_project_id
with cm_yield_client(
"bigquery",
dataset_name="dataset",
default_config_values={"project_id": bigquery_project_id},
) as client:
assert bigquery_project_id in client.sql_client.catalog_name()


def test_bigquery_autodetect_configuration(client: BigQueryClient) -> None:
# no schema autodetect
assert client._should_autodetect_schema("event_slot") is False
Expand Down

0 comments on commit e6b5958

Please sign in to comment.