googleapis · tswast · Aug 28, 2024 · Aug 27, 2024 · Aug 27, 2024 · Aug 28, 2024
@@ -112,12 +112,15 @@ def __init__(
         *,
         session: typing.Optional[bigframes.session.Session] = None,
     ):
+        global bigframes
+
         if copy is not None and not copy:
             raise ValueError(
                 f"DataFrame constructor only supports copy=True. {constants.FEEDBACK_LINK}"
             )
-        # just ignore object dtype if provided
-        if dtype in {numpy.dtypes.ObjectDType, "object"}:
+        # Ignore object dtype if provided, as it provides no additional
+        # information about what BigQuery type to use.
+        if dtype is not None and bigframes.dtypes.is_object_like(dtype):
             dtype = None
 
         # Check to see if constructing from BigQuery-backed objects before

@@ -205,93 +205,100 @@ class SimpleDtypeInfo:
 
 
 ## dtype predicates - use these to maintain consistency
-def is_datetime_like(type: ExpressionType) -> bool:
-    return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE)
+def is_datetime_like(type_: ExpressionType) -> bool:
+    return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE)
 
 
-def is_date_like(type: ExpressionType) -> bool:
-    return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)
+def is_date_like(type_: ExpressionType) -> bool:
+    return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)
 
 
-def is_time_like(type: ExpressionType) -> bool:
-    return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
+def is_time_like(type_: ExpressionType) -> bool:
+    return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
 
 
-def is_binary_like(type: ExpressionType) -> bool:
-    return type in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)
+def is_binary_like(type_: ExpressionType) -> bool:
+    return type_ in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)
 
 
-def is_string_like(type: ExpressionType) -> bool:
-    return type in (STRING_DTYPE, BYTES_DTYPE)
+def is_object_like(type_: Union[ExpressionType, str]) -> bool:
+    # See: https://stackoverflow.com/a/40312924/101923 and
+    # https://numpy.org/doc/stable/reference/generated/numpy.dtype.kind.html
+    # for the way to identify object type.
+    return type_ in ("object", "O") or getattr(type_, "kind", None) == "O"
 
 
-def is_array_like(type: ExpressionType) -> bool:
-    return isinstance(type, pd.ArrowDtype) and isinstance(
-        type.pyarrow_dtype, pa.ListType
+def is_string_like(type_: ExpressionType) -> bool:
+    return type_ in (STRING_DTYPE, BYTES_DTYPE)
+
+
+def is_array_like(type_: ExpressionType) -> bool:
+    return isinstance(type_, pd.ArrowDtype) and isinstance(
+        type_.pyarrow_dtype, pa.ListType
     )
 
 
-def is_array_string_like(type: ExpressionType) -> bool:
+def is_array_string_like(type_: ExpressionType) -> bool:
     return (
-        isinstance(type, pd.ArrowDtype)
-        and isinstance(type.pyarrow_dtype, pa.ListType)
-        and pa.types.is_string(type.pyarrow_dtype.value_type)
+        isinstance(type_, pd.ArrowDtype)
+        and isinstance(type_.pyarrow_dtype, pa.ListType)
+        and pa.types.is_string(type_.pyarrow_dtype.value_type)
     )
 
 
-def is_struct_like(type: ExpressionType) -> bool:
-    return isinstance(type, pd.ArrowDtype) and isinstance(
-        type.pyarrow_dtype, pa.StructType
+def is_struct_like(type_: ExpressionType) -> bool:
+    return isinstance(type_, pd.ArrowDtype) and isinstance(
+        type_.pyarrow_dtype, pa.StructType
     )
 
 
-def is_json_like(type: ExpressionType) -> bool:
+def is_json_like(type_: ExpressionType) -> bool:
     # TODO: Add JSON type support
-    return type == STRING_DTYPE
+    return type_ == STRING_DTYPE
 
 
-def is_json_encoding_type(type: ExpressionType) -> bool:
+def is_json_encoding_type(type_: ExpressionType) -> bool:
     # Types can be converted into JSON.
     # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
-    return type != GEO_DTYPE
+    return type_ != GEO_DTYPE
 
 
-def is_numeric(type: ExpressionType) -> bool:
-    return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
+def is_numeric(type_: ExpressionType) -> bool:
+    return type_ in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
 
 
-def is_iterable(type: ExpressionType) -> bool:
-    return type in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type)
+def is_iterable(type_: ExpressionType) -> bool:
+    return type_ in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type_)
 
 
-def is_comparable(type: ExpressionType) -> bool:
-    return (type is not None) and is_orderable(type)
+def is_comparable(type_: ExpressionType) -> bool:
+    return (type_ is not None) and is_orderable(type_)
 
 
 _ORDERABLE_SIMPLE_TYPES = set(
     mapping.dtype for mapping in SIMPLE_TYPES if mapping.orderable
 )
 
 
-def is_orderable(type: ExpressionType) -> bool:
+def is_orderable(type_: ExpressionType) -> bool:
     # On BQ side, ARRAY, STRUCT, GEOGRAPHY, JSON are not orderable
-    return type in _ORDERABLE_SIMPLE_TYPES
+    return type_ in _ORDERABLE_SIMPLE_TYPES
 
 
 _CLUSTERABLE_SIMPLE_TYPES = set(
     mapping.dtype for mapping in SIMPLE_TYPES if mapping.clusterable
 )
 
 
-def is_clusterable(type: ExpressionType) -> bool:
+def is_clusterable(type_: ExpressionType) -> bool:
     # https://cloud.google.com/bigquery/docs/clustered-tables#cluster_column_types
     # This is based on default database type mapping, could in theory represent in non-default bq type to cluster.
-    return type in _CLUSTERABLE_SIMPLE_TYPES
+    return type_ in _CLUSTERABLE_SIMPLE_TYPES
 
 
-def is_bool_coercable(type: ExpressionType) -> bool:
+def is_bool_coercable(type_: ExpressionType) -> bool:
     # TODO: Implement more bool coercions
-    return (type is None) or is_numeric(type) or is_string_like(type)
+    return (type_ is None) or is_numeric(type_) or is_string_like(type_)
 
 
 BIGFRAMES_STRING_TO_BIGFRAMES: Dict[DtypeString, Dtype] = {

@@ -18,7 +18,6 @@
 from typing import List, Sequence
 
 import bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing
-import numpy
 import pandas as pd
 
 import bigframes.constants as constants
@@ -49,8 +48,9 @@ def __init__(
     ):
         import bigframes.pandas
 
-        # just ignore object dtype if provided
-        if dtype in {numpy.dtypes.ObjectDType, "object"}:
+        # Ignore object dtype if provided, as it provides no additional
+        # information about what BigQuery type to use.
+        if dtype is not None and bigframes.dtypes.is_object_like(dtype):
             dtype = None
 
         read_pandas_func = (

@@ -49,6 +49,7 @@
     "google-cloud-storage >=2.0.0",
     "ibis-framework[bigquery] >=8.0.0,<9.0.0dev",
     "jellyfish >=0.8.9",
+    "numpy >=1.24.0",
     # TODO: Relax upper bound once we have fixed `system_prerelease` tests.
     "pandas >=1.5.0",
     "pyarrow >=8.0.0",

@@ -14,6 +14,7 @@ google-cloud-resource-manager==1.10.3
 google-cloud-storage==2.0.0
 ibis-framework==8.0.0
 jellyfish==0.8.9
+numpy==1.24.0
 pandas==1.5.0
 pyarrow==8.0.0
 pydata-google-auth==1.8.2