From b8ff031f090cf1b0b1a787d0c0700b2cd6e6511b Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Thu, 26 Dec 2024 16:28:25 -0600 Subject: [PATCH 01/12] feat: use pandas-gbq to determine schema in `load_table_from_dataframe` --- google/cloud/bigquery/_pandas_helpers.py | 33 ++++++++++++- google/cloud/bigquery/_pyarrow_helpers.py | 7 ++- pyproject.toml | 1 + testing/constraints-3.7.txt | 1 + tests/unit/test__pandas_helpers.py | 57 ++++++++++++++++++----- 5 files changed, 86 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index bf7d10c0f..3449063e2 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pandas.""" +"""Shared helper functions for connecting BigQuery and pandas. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py +""" import concurrent.futures from datetime import datetime @@ -39,6 +44,16 @@ else: import numpy + +try: + import pandas_gbq.schema.pandas_to_bigquery # type: ignore + + pandas_gbq_import_exception = None +except ImportError as exc: + pandas_gbq = None + pandas_gbq_import_exception = exc + + try: import db_dtypes # type: ignore @@ -429,6 +444,10 @@ def _first_array_valid(series): def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. + DEPRECATED: Use + pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(), + instead. See: go/pandas-gbq-and-bigframes-redundancy. + Args: dataframe (pandas.DataFrame): DataFrame for which the client determines the BigQuery schema. @@ -444,6 +463,18 @@ def dataframe_to_bq_schema(dataframe, bq_schema): The automatically determined schema. Returns None if the type of any column cannot be determined. """ + if pandas_gbq is None: + warnings.warn( + "Loading pandas DataFrame into BigQuery will require pandas-gbq " + "package version 0.26.0 or greater in future. " + f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}", + category=FutureWarning, + ) + else: + return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields( + dataframe, override_bigquery_fields=bq_schema, + ) + if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) bq_schema_index = {field.name: field for field in bq_schema} diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 3c745a611..1b42cd5c7 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pyarrow.""" +"""Shared helper functions for connecting BigQuery and pyarrow. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py +""" from typing import Any diff --git a/pyproject.toml b/pyproject.toml index ecf21d922..a0a4d854f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ bqstorage = [ ] pandas = [ "pandas >= 1.1.0", + "pandas-gbq >= 0.26.0", "pyarrow >= 3.0.0", "db-dtypes >= 0.3.0, < 2.0.0dev", "importlib_metadata >= 1.0.0; python_version < '3.8'", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 55e63449f..2e11327d3 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -26,6 +26,7 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==20.0.0 pandas==1.1.0 +pandas-gbq==0.26.0 proto-plus==1.22.3 protobuf==3.20.2 pyarrow==3.0.0 diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 3a5fddacc..97772b018 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -34,6 +34,11 @@ except ImportError: pandas = None +try: + import pandas_gbq.schema.pandas_to_bigquery +except ImportError: + pandas_gbq = None + try: import geopandas except ImportError: @@ -1280,7 +1285,19 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_named_index(module_under_test): +@pytest.mark.skipif(pandas_gbq is None, reason="Requires `pandas-gbq`") +def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) + got = module_under_test.dataframe_to_bq_schema(dataframe, []) + # Don't assert beyond this, since pandas-gbq is now source of truth. + assert got is not None + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_named_index(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1291,7 +1308,8 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): index = pandas.Index(["a", "b"], name="str_index") dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1303,7 +1321,9 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_multiindex(module_under_test): +def test_dataframe_to_bq_schema_w_multiindex(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1320,7 +1340,8 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): ) dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1334,7 +1355,9 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): +def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1349,7 +1372,8 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, ] - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) expected_schema = ( schema.SchemaField("str_column", "STRING", "NULLABLE"), @@ -1360,7 +1384,9 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, @@ -1388,7 +1414,9 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, @@ -1418,7 +1446,9 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): +def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"struct_field": {"one": 2}, "status": "FOO"}, @@ -1442,8 +1472,10 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") -def test_dataframe_to_bq_schema_geography(module_under_test): +def test_dataframe_to_bq_schema_geography(module_under_test, monkeypatch): from shapely import wkt + + monkeypatch.setattr(module_under_test, "pandas_gbq", None) df = geopandas.GeoDataFrame( pandas.DataFrame( @@ -1455,7 +1487,10 @@ def test_dataframe_to_bq_schema_geography(module_under_test): ), geometry="geo1", ) - bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + + with pytest.warns(FutureWarning, match="pandas-gbq"): + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( schema.SchemaField("name", "STRING"), schema.SchemaField("geo1", "GEOGRAPHY"), From 04a9f11554c078b0f2779accf8eb1bf507499cd4 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 26 Dec 2024 22:31:24 +0000 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- google/cloud/bigquery/_pandas_helpers.py | 5 +++-- tests/unit/test__pandas_helpers.py | 24 ++++++++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 3449063e2..4341289fd 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -47,7 +47,7 @@ try: import pandas_gbq.schema.pandas_to_bigquery # type: ignore - + pandas_gbq_import_exception = None except ImportError as exc: pandas_gbq = None @@ -472,7 +472,8 @@ def dataframe_to_bq_schema(dataframe, bq_schema): ) else: return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields( - dataframe, override_bigquery_fields=bq_schema, + dataframe, + override_bigquery_fields=bq_schema, ) if bq_schema: diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 97772b018..20a25eddf 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1286,7 +1286,9 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(pandas_gbq is None, reason="Requires `pandas-gbq`") -def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq(module_under_test, monkeypatch): +def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq( + module_under_test, monkeypatch +): monkeypatch.setattr(module_under_test, "pandas_gbq", None) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) got = module_under_test.dataframe_to_bq_schema(dataframe, []) @@ -1297,7 +1299,7 @@ def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq(module_under_test @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_bq_schema_w_named_index(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "pandas_gbq", None) - + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1323,7 +1325,7 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_bq_schema_w_multiindex(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "pandas_gbq", None) - + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1373,7 +1375,9 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch): ] with pytest.warns(FutureWarning, match="pandas-gbq"): - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + returned_schema = module_under_test.dataframe_to_bq_schema( + dataframe, dict_schema + ) expected_schema = ( schema.SchemaField("str_column", "STRING", "NULLABLE"), @@ -1384,7 +1388,9 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test, monkeypatch): +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow( + module_under_test, monkeypatch +): monkeypatch.setattr(module_under_test, "pandas_gbq", None) dataframe = pandas.DataFrame( @@ -1414,9 +1420,11 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test, mo @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test, monkeypatch): +def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow( + module_under_test, monkeypatch +): monkeypatch.setattr(module_under_test, "pandas_gbq", None) - + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, @@ -1474,7 +1482,7 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test, monkey @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") def test_dataframe_to_bq_schema_geography(module_under_test, monkeypatch): from shapely import wkt - + monkeypatch.setattr(module_under_test, "pandas_gbq", None) df = geopandas.GeoDataFrame( From be2dd1b017594e492834f6333dc4f121004fd329 Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Fri, 27 Dec 2024 12:56:34 -0600 Subject: [PATCH 03/12] fix some unit tests --- google/cloud/bigquery/_pandas_helpers.py | 2 +- tests/unit/test_client.py | 31 ++++++++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 3449063e2..7e96de876 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -472,7 +472,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): ) else: return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields( - dataframe, override_bigquery_fields=bq_schema, + dataframe, override_bigquery_fields=bq_schema, index=True, ) if bq_schema: diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cd336b73f..feebf4c70 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8381,8 +8381,12 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): autospec=True, side_effect=google.api_core.exceptions.NotFound("Table not found"), ) + pandas_gbq_patch = mock.patch( + "google.cloud.bigquery._pandas_helpers.pandas_gbq", + new=None, + ) - with load_patch as load_table_from_file, get_table_patch: + with load_patch as load_table_from_file, get_table_patch, pandas_gbq_patch: with warnings.catch_warnings(record=True) as warned: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8438,7 +8442,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8450,6 +8453,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): ] ), ) + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8570,10 +8574,10 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se client = self._make_client() dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64") + load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8602,8 +8606,13 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert tuple(sent_config.schema) == ( - SchemaField("x", "INT64", "NULLABLE", None), + assert ( + # Accept either the GoogleSQL or legacy SQL type name from pandas-gbq. + tuple(sent_config.schema) == ( + SchemaField("x", "INT64", "NULLABLE", None), + ) or tuple(sent_config.schema) == ( + SchemaField("x", "INTEGER", "NULLABLE", None), + ) ) def test_load_table_from_dataframe_struct_fields(self): @@ -8749,7 +8758,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): data=records, columns=["float_column", "array_column"] ) - expected_schema = [ + expected_schema_googlesql = [ SchemaField("float_column", "FLOAT"), SchemaField( "array_column", @@ -8757,6 +8766,14 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): mode="REPEATED", ), ] + expected_schema_legacy_sql = [ + SchemaField("float_column", "FLOAT"), + SchemaField( + "array_column", + "INTEGER", + mode="REPEATED", + ), + ] load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -8792,7 +8809,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema == expected_schema + assert sent_config.schema == expected_schema_googlesql or sent_config.schema == expected_schema_legacy_sql def test_load_table_from_dataframe_w_partial_schema(self): pandas = pytest.importorskip("pandas") From 1606cd67e8a1ceec5913f56ffcbfcdabf903b391 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Fri, 27 Dec 2024 19:00:02 +0000 Subject: [PATCH 04/12] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/test_client.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index feebf4c70..2cd50684a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8608,11 +8608,9 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se assert sent_config.source_format == job.SourceFormat.PARQUET assert ( # Accept either the GoogleSQL or legacy SQL type name from pandas-gbq. - tuple(sent_config.schema) == ( - SchemaField("x", "INT64", "NULLABLE", None), - ) or tuple(sent_config.schema) == ( - SchemaField("x", "INTEGER", "NULLABLE", None), - ) + tuple(sent_config.schema) == (SchemaField("x", "INT64", "NULLABLE", None),) + or tuple(sent_config.schema) + == (SchemaField("x", "INTEGER", "NULLABLE", None),) ) def test_load_table_from_dataframe_struct_fields(self): @@ -8809,7 +8807,10 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema == expected_schema_googlesql or sent_config.schema == expected_schema_legacy_sql + assert ( + sent_config.schema == expected_schema_googlesql + or sent_config.schema == expected_schema_legacy_sql + ) def test_load_table_from_dataframe_w_partial_schema(self): pandas = pytest.importorskip("pandas") From e90cdfb8d1d1b9dfe1adae588a9d7eb1022ecae0 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Fri, 3 Jan 2025 19:05:47 +0000 Subject: [PATCH 05/12] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- .kokoro/docker/docs/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index fb6ffa272..f99a5c4aa 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.3 \ - --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ - --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ From 29f336a9d9156f45a9e5b78077b0af82bfd2e757 Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Tue, 7 Jan 2025 09:57:01 -0600 Subject: [PATCH 06/12] bump minimum pandas-gbq to 0.26.1 --- pyproject.toml | 2 +- testing/constraints-3.7.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a0a4d854f..c4aa80c9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ bqstorage = [ ] pandas = [ "pandas >= 1.1.0", - "pandas-gbq >= 0.26.0", + "pandas-gbq >= 0.26.1", "pyarrow >= 3.0.0", "db-dtypes >= 0.3.0, < 2.0.0dev", "importlib_metadata >= 1.0.0; python_version < '3.8'", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 2e11327d3..e50a88ab7 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -26,7 +26,7 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==20.0.0 pandas==1.1.0 -pandas-gbq==0.26.0 +pandas-gbq==0.26.1 proto-plus==1.22.3 protobuf==3.20.2 pyarrow==3.0.0 From aff70218a9a80ff832b01e3e94095aa854e83448 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 7 Jan 2025 15:59:48 +0000 Subject: [PATCH 07/12] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- .kokoro/docker/docs/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 27fab1d9a..f99a5c4aa 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -66,7 +66,7 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.1 \ - --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ - --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox From f0ef6139556abd8bdba9e638cd17b9d621f9aa3a Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Tue, 21 Jan 2025 11:06:35 -0600 Subject: [PATCH 08/12] drop pandas-gbq from python 3.7 extras --- pyproject.toml | 4 +++- testing/constraints-3.7.txt | 1 - testing/constraints-3.8.txt | 9 +++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c4aa80c9d..c4e5c2f0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,9 @@ bqstorage = [ ] pandas = [ "pandas >= 1.1.0", - "pandas-gbq >= 0.26.1", + "pandas-gbq >= 0.26.1; python_version >= '3.8'", + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", "pyarrow >= 3.0.0", "db-dtypes >= 0.3.0, < 2.0.0dev", "importlib_metadata >= 1.0.0; python_version < '3.8'", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index e50a88ab7..55e63449f 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -26,7 +26,6 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==20.0.0 pandas==1.1.0 -pandas-gbq==0.26.1 proto-plus==1.22.3 protobuf==3.20.2 pyarrow==3.0.0 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index e5e73c5c7..9883fb8cc 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1,2 +1,11 @@ grpcio==1.47.0 pandas==1.2.0 + +# This constraints file is used to check that lower bounds +# are correct in setup.py +# +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 +pandas-gbq==0.26.1 From f4c1a4fdb9885c4456f2c65d7c01c1381d9d3de1 Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Wed, 22 Jan 2025 16:59:56 -0600 Subject: [PATCH 09/12] relax warning message text assertion --- tests/unit/test_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index be57e2eab..a5af37b6b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8940,7 +8940,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): load_table_from_file.assert_not_called() message = str(exc_context.value) - assert "bq_schema contains fields not present in dataframe" in message assert "unknown_col" in message def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): From 76b7890140c824ec7b1d64935db3413af984aaa4 Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Wed, 22 Jan 2025 17:02:59 -0600 Subject: [PATCH 10/12] use consistent time zone presense/absense in time datetime system test --- tests/system/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 85c7b79e6..a9e76d416 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1259,7 +1259,7 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( dt=[ - datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime(2020, 1, 8, 8, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime( 2020, 1, From d382e13555cf0472ea0795dac5394ca47ea6384b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 27 Jan 2025 09:35:29 -0600 Subject: [PATCH 11/12] Update google/cloud/bigquery/_pandas_helpers.py --- google/cloud/bigquery/_pandas_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 8c3c82f84..964434392 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -466,7 +466,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): if pandas_gbq is None: warnings.warn( "Loading pandas DataFrame into BigQuery will require pandas-gbq " - "package version 0.26.0 or greater in future. " + "package version 0.26.1 or greater in future. " f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}", category=FutureWarning, ) From 672ae3373253736788eeb6df1dff78297e50f96c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 28 Jan 2025 09:22:20 -0600 Subject: [PATCH 12/12] Update google/cloud/bigquery/_pandas_helpers.py Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_pandas_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 964434392..9ee0c3a37 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -466,7 +466,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): if pandas_gbq is None: warnings.warn( "Loading pandas DataFrame into BigQuery will require pandas-gbq " - "package version 0.26.1 or greater in future. " + "package version 0.26.1 or greater in the future. " f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}", category=FutureWarning, )