Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: re-introduce support for numpy 1.24.x #931

Merged
merged 3 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,15 @@ def __init__(
*,
session: typing.Optional[bigframes.session.Session] = None,
):
global bigframes

if copy is not None and not copy:
raise ValueError(
f"DataFrame constructor only supports copy=True. {constants.FEEDBACK_LINK}"
)
# just ignore object dtype if provided
if dtype in {numpy.dtypes.ObjectDType, "object"}:
# Ignore object dtype if provided, as it provides no additional
# information about what BigQuery type to use.
if dtype is not None and bigframes.dtypes.is_object_like(dtype):
dtype = None

# Check to see if constructing from BigQuery-backed objects before
Expand Down
79 changes: 43 additions & 36 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,93 +205,100 @@ class SimpleDtypeInfo:


## dtype predicates - use these to maintain consistency
def is_datetime_like(type: ExpressionType) -> bool:
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE)
def is_datetime_like(type_: ExpressionType) -> bool:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type is a built-in. Avoid conflicts by using type_.

single_trailing_underscore_: used by convention to avoid conflicts with Python keyword

https://peps.python.org/pep-0008/#descriptive-naming-styles

In this case, type is a function, not a keyword, so not strictly needed, but type is a common enough function that it is best to avoid conflicts, IMO.

return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE)


def is_date_like(type: ExpressionType) -> bool:
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)
def is_date_like(type_: ExpressionType) -> bool:
return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)


def is_time_like(type: ExpressionType) -> bool:
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
def is_time_like(type_: ExpressionType) -> bool:
return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)


def is_binary_like(type: ExpressionType) -> bool:
return type in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)
def is_binary_like(type_: ExpressionType) -> bool:
return type_ in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)


def is_string_like(type: ExpressionType) -> bool:
return type in (STRING_DTYPE, BYTES_DTYPE)
def is_object_like(type_: Union[ExpressionType, str]) -> bool:
# See: https://stackoverflow.com/a/40312924/101923 and
# https://numpy.org/doc/stable/reference/generated/numpy.dtype.kind.html
# for the way to identify object type.
return type_ in ("object", "O") or getattr(type_, "kind", None) == "O"


def is_array_like(type: ExpressionType) -> bool:
return isinstance(type, pd.ArrowDtype) and isinstance(
type.pyarrow_dtype, pa.ListType
def is_string_like(type_: ExpressionType) -> bool:
return type_ in (STRING_DTYPE, BYTES_DTYPE)


def is_array_like(type_: ExpressionType) -> bool:
return isinstance(type_, pd.ArrowDtype) and isinstance(
type_.pyarrow_dtype, pa.ListType
)


def is_array_string_like(type: ExpressionType) -> bool:
def is_array_string_like(type_: ExpressionType) -> bool:
return (
isinstance(type, pd.ArrowDtype)
and isinstance(type.pyarrow_dtype, pa.ListType)
and pa.types.is_string(type.pyarrow_dtype.value_type)
isinstance(type_, pd.ArrowDtype)
and isinstance(type_.pyarrow_dtype, pa.ListType)
and pa.types.is_string(type_.pyarrow_dtype.value_type)
)


def is_struct_like(type: ExpressionType) -> bool:
return isinstance(type, pd.ArrowDtype) and isinstance(
type.pyarrow_dtype, pa.StructType
def is_struct_like(type_: ExpressionType) -> bool:
return isinstance(type_, pd.ArrowDtype) and isinstance(
type_.pyarrow_dtype, pa.StructType
)


def is_json_like(type: ExpressionType) -> bool:
def is_json_like(type_: ExpressionType) -> bool:
# TODO: Add JSON type support
return type == STRING_DTYPE
return type_ == STRING_DTYPE


def is_json_encoding_type(type: ExpressionType) -> bool:
def is_json_encoding_type(type_: ExpressionType) -> bool:
# Types can be converted into JSON.
# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
return type != GEO_DTYPE
return type_ != GEO_DTYPE


def is_numeric(type: ExpressionType) -> bool:
return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
def is_numeric(type_: ExpressionType) -> bool:
return type_ in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE


def is_iterable(type: ExpressionType) -> bool:
return type in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type)
def is_iterable(type_: ExpressionType) -> bool:
return type_ in (STRING_DTYPE, BYTES_DTYPE) or is_array_like(type_)


def is_comparable(type: ExpressionType) -> bool:
return (type is not None) and is_orderable(type)
def is_comparable(type_: ExpressionType) -> bool:
return (type_ is not None) and is_orderable(type_)


_ORDERABLE_SIMPLE_TYPES = set(
mapping.dtype for mapping in SIMPLE_TYPES if mapping.orderable
)


def is_orderable(type: ExpressionType) -> bool:
def is_orderable(type_: ExpressionType) -> bool:
# On BQ side, ARRAY, STRUCT, GEOGRAPHY, JSON are not orderable
return type in _ORDERABLE_SIMPLE_TYPES
return type_ in _ORDERABLE_SIMPLE_TYPES


_CLUSTERABLE_SIMPLE_TYPES = set(
mapping.dtype for mapping in SIMPLE_TYPES if mapping.clusterable
)


def is_clusterable(type: ExpressionType) -> bool:
def is_clusterable(type_: ExpressionType) -> bool:
# https://cloud.google.com/bigquery/docs/clustered-tables#cluster_column_types
# This is based on default database type mapping, could in theory represent in non-default bq type to cluster.
return type in _CLUSTERABLE_SIMPLE_TYPES
return type_ in _CLUSTERABLE_SIMPLE_TYPES


def is_bool_coercable(type: ExpressionType) -> bool:
def is_bool_coercable(type_: ExpressionType) -> bool:
# TODO: Implement more bool coercions
return (type is None) or is_numeric(type) or is_string_like(type)
return (type_ is None) or is_numeric(type_) or is_string_like(type_)


BIGFRAMES_STRING_TO_BIGFRAMES: Dict[DtypeString, Dtype] = {
Expand Down
6 changes: 3 additions & 3 deletions bigframes/operations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from typing import List, Sequence

import bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing
import numpy
import pandas as pd

import bigframes.constants as constants
Expand Down Expand Up @@ -49,8 +48,9 @@ def __init__(
):
import bigframes.pandas

# just ignore object dtype if provided
if dtype in {numpy.dtypes.ObjectDType, "object"}:
# Ignore object dtype if provided, as it provides no additional
# information about what BigQuery type to use.
if dtype is not None and bigframes.dtypes.is_object_like(dtype):
dtype = None

read_pandas_func = (
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"google-cloud-storage >=2.0.0",
"ibis-framework[bigquery] >=8.0.0,<9.0.0dev",
"jellyfish >=0.8.9",
"numpy >=1.24.0",
shobsi marked this conversation as resolved.
Show resolved Hide resolved
# TODO: Relax upper bound once we have fixed `system_prerelease` tests.
"pandas >=1.5.0",
"pyarrow >=8.0.0",
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ google-cloud-resource-manager==1.10.3
google-cloud-storage==2.0.0
ibis-framework==8.0.0
jellyfish==0.8.9
numpy==1.24.0
pandas==1.5.0
pyarrow==8.0.0
pydata-google-auth==1.8.2
Expand Down