Skip to content

Commit

Permalink
deps: re-introduce support for numpy 1.24.x (#931)
Browse files Browse the repository at this point in the history
  • Loading branch information
tswast authored Aug 28, 2024
1 parent 189f147 commit 3d71913
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 41 deletions.
7 changes: 5 additions & 2 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,15 @@ def __init__(
*,
session: typing.Optional[bigframes.session.Session] = None,
):
global bigframes

if copy is not None and not copy:
raise ValueError(
f"DataFrame constructor only supports copy=True. {constants.FEEDBACK_LINK}"
)
# just ignore object dtype if provided
if dtype in {numpy.dtypes.ObjectDType, "object"}:
# Ignore object dtype if provided, as it provides no additional
# information about what BigQuery type to use.
if dtype is not None and bigframes.dtypes.is_object_like(dtype):
dtype = None

# Check to see if constructing from BigQuery-backed objects before
Expand Down
79 changes: 43 additions & 36 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,93 +205,100 @@ class SimpleDtypeInfo:


## dtype predicates - use these to maintain consistency
def is_datetime_like(type: ExpressionType) -> bool:
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE)
def is_datetime_like(type_: ExpressionType) -> bool:
return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE)


def is_date_like(type: ExpressionType) -> bool:
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)
def is_date_like(type_: ExpressionType) -> bool:
return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)


def is_time_like(type: ExpressionType) -> bool:
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
def is_time_like(type_: ExpressionType) -> bool:
return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)


def is_binary_like(type: ExpressionType) -> bool:
return type in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)
def is_binary_like(type_: ExpressionType) -> bool:
return type_ in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)


def is_string_like(type: ExpressionType) -> bool:
return type in (STRING_DTYPE, BYTES_DTYPE)
def is_object_like(type_: Union[ExpressionType, str]) -> bool:
# See: https://stackoverflow.com/a/40312924/101923 and
# https://numpy.org/doc/stable/reference/generated/numpy.dtype.kind.html
# for the way to identify object type.
return type_ in ("object", "O") or getattr(type_, "kind", None) == "O"


def is_array_like(type: ExpressionType) -> bool:
return isinstance(type, pd.ArrowDtype) and isinstance(
type.pyarrow_dtype, pa.ListType
def is_string_like(type_: ExpressionType) -> bool:
return type_ in (STRING_DTYPE, BYTES_DTYPE)


def is_array_like(type_: ExpressionType) -> bool:
return isinstance(type_, pd.ArrowDtype) and isinstance(
type_.pyarrow_dtype, pa.ListType
)


def is_array_string_like(type: ExpressionType) -> bool:
def is_array_string_like(type_: ExpressionType) -> bool:
return (
isinstance(type, pd.ArrowDtype)
and isinstance(type.pyarrow_dtype, pa.ListType)
and pa.types.is_string(type.pyarrow_dtype.value_type)
isinstance(type_, pd.ArrowDtype)
and isinstance(type_.pyarrow_dtype, pa.ListType)
and pa.types.is_string(type_.pyarrow_dtype.value_type)
)


def is_struct_like(type: ExpressionType) -> bool:
return isinstance(type, pd.ArrowDtype) and isinstance(
type.pyarrow_dtype, pa.StructType
def is_struct_like(type_: ExpressionType) -> bool:
return isinstance(type_, pd.ArrowDtype) and isinstance(
type_.pyarrow_dtype, pa.StructType
)


def is_json_like(type: ExpressionType) -> bool:
def is_json_like(type_: ExpressionType) -> bool:
# TODO: Add JSON type support
return type == STRING_DTYPE
return type_ == STRING_DTYPE


def is_json_encoding_type(type: ExpressionType) -> bool:
def is_json_encoding_type(type_: ExpressionType) -> bool:
# Types can be converted into JSON.
# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
return type != GEO_DTYPE
return type_ != GEO_DTYPE


def is_numeric(type: ExpressionType) -> bool:
return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
def is_numeric(type_: ExpressionType) -> bool:
return type_ in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE


def is_iterable(type: ExpressionType) -> bool:
return type in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type)
def is_iterable(type_: ExpressionType) -> bool:
return type_ in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type_)


def is_comparable(type: ExpressionType) -> bool:
return (type is not None) and is_orderable(type)
def is_comparable(type_: ExpressionType) -> bool:
return (type_ is not None) and is_orderable(type_)


_ORDERABLE_SIMPLE_TYPES = set(
mapping.dtype for mapping in SIMPLE_TYPES if mapping.orderable
)


def is_orderable(type: ExpressionType) -> bool:
def is_orderable(type_: ExpressionType) -> bool:
# On BQ side, ARRAY, STRUCT, GEOGRAPHY, JSON are not orderable
return type in _ORDERABLE_SIMPLE_TYPES
return type_ in _ORDERABLE_SIMPLE_TYPES


_CLUSTERABLE_SIMPLE_TYPES = set(
mapping.dtype for mapping in SIMPLE_TYPES if mapping.clusterable
)


def is_clusterable(type: ExpressionType) -> bool:
def is_clusterable(type_: ExpressionType) -> bool:
# https://cloud.google.com/bigquery/docs/clustered-tables#cluster_column_types
# This is based on default database type mapping, could in theory represent in non-default bq type to cluster.
return type in _CLUSTERABLE_SIMPLE_TYPES
return type_ in _CLUSTERABLE_SIMPLE_TYPES


def is_bool_coercable(type: ExpressionType) -> bool:
def is_bool_coercable(type_: ExpressionType) -> bool:
# TODO: Implement more bool coercions
return (type is None) or is_numeric(type) or is_string_like(type)
return (type_ is None) or is_numeric(type_) or is_string_like(type_)


BIGFRAMES_STRING_TO_BIGFRAMES: Dict[DtypeString, Dtype] = {
Expand Down
6 changes: 3 additions & 3 deletions bigframes/operations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from typing import List, Sequence

import bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing
import numpy
import pandas as pd

import bigframes.constants as constants
Expand Down Expand Up @@ -49,8 +48,9 @@ def __init__(
):
import bigframes.pandas

# just ignore object dtype if provided
if dtype in {numpy.dtypes.ObjectDType, "object"}:
# Ignore object dtype if provided, as it provides no additional
# information about what BigQuery type to use.
if dtype is not None and bigframes.dtypes.is_object_like(dtype):
dtype = None

read_pandas_func = (
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"google-cloud-storage >=2.0.0",
"ibis-framework[bigquery] >=8.0.0,<9.0.0dev",
"jellyfish >=0.8.9",
"numpy >=1.24.0",
# TODO: Relax upper bound once we have fixed `system_prerelease` tests.
"pandas >=1.5.0",
"pyarrow >=8.0.0",
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ google-cloud-resource-manager==1.10.3
google-cloud-storage==2.0.0
ibis-framework==8.0.0
jellyfish==0.8.9
numpy==1.24.0
pandas==1.5.0
pyarrow==8.0.0
pydata-google-auth==1.8.2
Expand Down

0 comments on commit 3d71913

Please sign in to comment.