From 38b06e2dc1c5fb33c266c90eaccc8d122d859f7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Wed, 28 Aug 2024 11:25:16 -0500
Subject: [PATCH] deps: re-introduce support for numpy 1.24.x (#931)

---
 bigframes/dataframe.py       |  7 +++-
 bigframes/dtypes.py          | 79 ++++++++++++++++++++----------------
 bigframes/operations/base.py |  6 +--
 setup.py                     |  1 +
 testing/constraints-3.9.txt  |  1 +
 5 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index cb9c904121..17dde7021b 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -112,12 +112,15 @@ def __init__(
         *,
         session: typing.Optional[bigframes.session.Session] = None,
     ):
+        global bigframes
+
         if copy is not None and not copy:
             raise ValueError(
                 f"DataFrame constructor only supports copy=True. {constants.FEEDBACK_LINK}"
             )
-        # just ignore object dtype if provided
-        if dtype in {numpy.dtypes.ObjectDType, "object"}:
+        # Ignore object dtype if provided, as it provides no additional
+        # information about what BigQuery type to use.
+        if dtype is not None and bigframes.dtypes.is_object_like(dtype):
             dtype = None
 
         # Check to see if constructing from BigQuery-backed objects before
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 563904fbb6..45c1e7e4e2 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -205,67 +205,74 @@ class SimpleDtypeInfo:
 
 
 ## dtype predicates - use these to maintain consistency
-def is_datetime_like(type: ExpressionType) -> bool:
-    return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE)
+def is_datetime_like(type_: ExpressionType) -> bool:
+    return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE)
 
 
-def is_date_like(type: ExpressionType) -> bool:
-    return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)
+def is_date_like(type_: ExpressionType) -> bool:
+    return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)
 
 
-def is_time_like(type: ExpressionType) -> bool:
-    return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
+def is_time_like(type_: ExpressionType) -> bool:
+    return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
 
 
-def is_binary_like(type: ExpressionType) -> bool:
-    return type in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)
+def is_binary_like(type_: ExpressionType) -> bool:
+    return type_ in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)
 
 
-def is_string_like(type: ExpressionType) -> bool:
-    return type in (STRING_DTYPE, BYTES_DTYPE)
+def is_object_like(type_: Union[ExpressionType, str]) -> bool:
+    # See: https://stackoverflow.com/a/40312924/101923 and
+    # https://numpy.org/doc/stable/reference/generated/numpy.dtype.kind.html
+    # for the way to identify object type.
+    return type_ in ("object", "O") or getattr(type_, "kind", None) == "O"
 
 
-def is_array_like(type: ExpressionType) -> bool:
-    return isinstance(type, pd.ArrowDtype) and isinstance(
-        type.pyarrow_dtype, pa.ListType
+def is_string_like(type_: ExpressionType) -> bool:
+    return type_ in (STRING_DTYPE, BYTES_DTYPE)
+
+
+def is_array_like(type_: ExpressionType) -> bool:
+    return isinstance(type_, pd.ArrowDtype) and isinstance(
+        type_.pyarrow_dtype, pa.ListType
     )
 
 
-def is_array_string_like(type: ExpressionType) -> bool:
+def is_array_string_like(type_: ExpressionType) -> bool:
     return (
-        isinstance(type, pd.ArrowDtype)
-        and isinstance(type.pyarrow_dtype, pa.ListType)
-        and pa.types.is_string(type.pyarrow_dtype.value_type)
+        isinstance(type_, pd.ArrowDtype)
+        and isinstance(type_.pyarrow_dtype, pa.ListType)
+        and pa.types.is_string(type_.pyarrow_dtype.value_type)
     )
 
 
-def is_struct_like(type: ExpressionType) -> bool:
-    return isinstance(type, pd.ArrowDtype) and isinstance(
-        type.pyarrow_dtype, pa.StructType
+def is_struct_like(type_: ExpressionType) -> bool:
+    return isinstance(type_, pd.ArrowDtype) and isinstance(
+        type_.pyarrow_dtype, pa.StructType
     )
 
 
-def is_json_like(type: ExpressionType) -> bool:
+def is_json_like(type_: ExpressionType) -> bool:
     # TODO: Add JSON type support
-    return type == STRING_DTYPE
+    return type_ == STRING_DTYPE
 
 
-def is_json_encoding_type(type: ExpressionType) -> bool:
+def is_json_encoding_type(type_: ExpressionType) -> bool:
     # Types can be converted into JSON.
     # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
-    return type != GEO_DTYPE
+    return type_ != GEO_DTYPE
 
 
-def is_numeric(type: ExpressionType) -> bool:
-    return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
+def is_numeric(type_: ExpressionType) -> bool:
+    return type_ in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
 
 
-def is_iterable(type: ExpressionType) -> bool:
-    return type in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type)
+def is_iterable(type_: ExpressionType) -> bool:
+    return type_ in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type_)
 
 
-def is_comparable(type: ExpressionType) -> bool:
-    return (type is not None) and is_orderable(type)
+def is_comparable(type_: ExpressionType) -> bool:
+    return (type_ is not None) and is_orderable(type_)
 
 
 _ORDERABLE_SIMPLE_TYPES = set(
@@ -273,9 +280,9 @@ def is_comparable(type: ExpressionType) -> bool:
 )
 
 
-def is_orderable(type: ExpressionType) -> bool:
+def is_orderable(type_: ExpressionType) -> bool:
     # On BQ side, ARRAY, STRUCT, GEOGRAPHY, JSON are not orderable
-    return type in _ORDERABLE_SIMPLE_TYPES
+    return type_ in _ORDERABLE_SIMPLE_TYPES
 
 
 _CLUSTERABLE_SIMPLE_TYPES = set(
@@ -283,15 +290,15 @@ def is_orderable(type: ExpressionType) -> bool:
 )
 
 
-def is_clusterable(type: ExpressionType) -> bool:
+def is_clusterable(type_: ExpressionType) -> bool:
     # https://cloud.google.com/bigquery/docs/clustered-tables#cluster_column_types
     # This is based on default database type mapping, could in theory represent in non-default bq type to cluster.
-    return type in _CLUSTERABLE_SIMPLE_TYPES
+    return type_ in _CLUSTERABLE_SIMPLE_TYPES
 
 
-def is_bool_coercable(type: ExpressionType) -> bool:
+def is_bool_coercable(type_: ExpressionType) -> bool:
     # TODO: Implement more bool coercions
-    return (type is None) or is_numeric(type) or is_string_like(type)
+    return (type_ is None) or is_numeric(type_) or is_string_like(type_)
 
 
 BIGFRAMES_STRING_TO_BIGFRAMES: Dict[DtypeString, Dtype] = {
diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index 1daa1ea5ae..2f87045415 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -18,7 +18,6 @@
 from typing import List, Sequence
 
 import bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing
-import numpy
 import pandas as pd
 
 import bigframes.constants as constants
@@ -49,8 +48,9 @@ def __init__(
     ):
         import bigframes.pandas
 
-        # just ignore object dtype if provided
-        if dtype in {numpy.dtypes.ObjectDType, "object"}:
+        # Ignore object dtype if provided, as it provides no additional
+        # information about what BigQuery type to use.
+        if dtype is not None and bigframes.dtypes.is_object_like(dtype):
             dtype = None
 
         read_pandas_func = (
diff --git a/setup.py b/setup.py
index 79baf1fb23..0e0be5fd77 100644
--- a/setup.py
+++ b/setup.py
@@ -49,6 +49,7 @@
     "google-cloud-storage >=2.0.0",
     "ibis-framework[bigquery] >=8.0.0,<9.0.0dev",
     "jellyfish >=0.8.9",
+    "numpy >=1.24.0",
     # TODO: Relax upper bound once we have fixed `system_prerelease` tests.
     "pandas >=1.5.0",
     "pyarrow >=8.0.0",
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index 5a76698576..0d3f16e95f 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -14,6 +14,7 @@ google-cloud-resource-manager==1.10.3
 google-cloud-storage==2.0.0
 ibis-framework==8.0.0
 jellyfish==0.8.9
+numpy==1.24.0
 pandas==1.5.0
 pyarrow==8.0.0
 pydata-google-auth==1.8.2