bugfix: using the write project instead of the compute one

When using `pandas_gbq.read_gbq`, the incorrect project ID was passed (write instead of compute). Since this method doesn't support passing a separate compute project, we also need to correctly specify the reference to the dataset's project in the query, hence the changes in `list_tables` and `list_columns`.
carbonfact · Sep 27, 2024 · a514426 · a514426
1 parent 1d2e37c
commit a514426
Showing 1 changed file with 6 additions and 3 deletions.
diff --git a/lea/clients/bigquery.py b/lea/clients/bigquery.py
@@ -139,7 +139,10 @@ def read_sql(self, query: str) -> pd.DataFrame:
         return pandas_gbq.read_gbq(
             query,
             credentials=self.client._credentials,
-            project_id=self.write_project_id,
+            project_id=self.compute_project_id,
+            # project_id: the project to execute the job in
+            # NB: there is no option to specify the write project, so we need to correctly specify
+            # the project in the query.
             location=self.location,
             progress_bar_type=None,
         )
@@ -151,7 +154,7 @@ def list_tables(self):
             FORMAT('%s.%s', '{self.dataset_name}', table_id) AS table_reference,
             row_count AS n_rows,
             size_bytes AS n_bytes
-        FROM {self.dataset_name}.__TABLES__
+        FROM `{self.write_project_id}.{self.dataset_name}`.__TABLES__
         """
         )
 
@@ -162,7 +165,7 @@ def list_columns(self) -> pd.DataFrame:
             FORMAT('%s.%s', table_schema, table_name) AS table_reference,
             column_name AS column,
             data_type AS type
-        FROM {self.dataset_name}.INFORMATION_SCHEMA.COLUMNS
+        FROM `{self.write_project_id}.{self.dataset_name}`.INFORMATION_SCHEMA.COLUMNS
         """
         )