From 1f33b0d36f576e9c9c25b29bc82d22f9f8b2ce33 Mon Sep 17 00:00:00 2001 From: Romain Champourlier Date: Fri, 27 Sep 2024 20:08:34 +0200 Subject: [PATCH] bugfix: using the write project instead of the compute one (#47) When using `pandas_gbq.read_gbq`, the incorrect project ID was passed (write instead of compute). Since this method doesn't support passing a separate compute project, we also need to correctly specify the reference to the dataset's project in the query, hence the changes in `list_tables` and `list_columns`. --- lea/clients/bigquery.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lea/clients/bigquery.py b/lea/clients/bigquery.py index a33df14..cff4eee 100644 --- a/lea/clients/bigquery.py +++ b/lea/clients/bigquery.py @@ -139,7 +139,10 @@ def read_sql(self, query: str) -> pd.DataFrame: return pandas_gbq.read_gbq( query, credentials=self.client._credentials, - project_id=self.write_project_id, + project_id=self.compute_project_id, + # project_id: the project to execute the job in + # NB: there is no option to specify the write project, so we need to correctly specify + # the project in the query. location=self.location, progress_bar_type=None, ) @@ -151,7 +154,7 @@ def list_tables(self): FORMAT('%s.%s', '{self.dataset_name}', table_id) AS table_reference, row_count AS n_rows, size_bytes AS n_bytes - FROM {self.dataset_name}.__TABLES__ + FROM `{self.write_project_id}.{self.dataset_name}`.__TABLES__ """ ) @@ -162,7 +165,7 @@ def list_columns(self) -> pd.DataFrame: FORMAT('%s.%s', table_schema, table_name) AS table_reference, column_name AS column, data_type AS type - FROM {self.dataset_name}.INFORMATION_SCHEMA.COLUMNS + FROM `{self.write_project_id}.{self.dataset_name}`.INFORMATION_SCHEMA.COLUMNS """ )