Skip to content

Commit

Permalink
[Arrow] Allow Python Objects fallback to override logging warning (#4…
Browse files Browse the repository at this point in the history
…9299)

This change enables setting
`DataContext.enable_fallback_to_arrow_object_ext_type` to override
logging a warning.

---------

Signed-off-by: Alexey Kudinkin <[email protected]>
  • Loading branch information
alexeykudinkin authored Dec 17, 2024
1 parent f1b27b5 commit 9375c1f
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 28 deletions.
54 changes: 31 additions & 23 deletions python/ray/air/util/tensor_extensions/arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,49 +118,57 @@ def convert_to_pyarrow_array(column_values: np.ndarray, column_name: str) -> pa.
return _convert_to_pyarrow_native_array(column_values, column_name)

except ArrowConversionError as ace:
from ray.data import DataContext
from ray.data.extensions.object_extension import (
ArrowPythonObjectArray,
_object_extension_type_allowed,
)

enable_fallback_config: Optional[
bool
] = DataContext.get_current().enable_fallback_to_arrow_object_ext_type

if not _object_extension_type_allowed():
should_serialize_as_object_ext_type = False
object_ext_type_fallback_allowed = False
object_ext_type_detail = (
"skipping fallback to serialize as pickled python"
f" objects (due to unsupported Arrow version {PYARROW_VERSION}, "
f"min required version is {MIN_PYARROW_VERSION_SCALAR_SUBCLASS})"
)
else:
from ray.data import DataContext
# NOTE: By default setting is unset which (for compatibility reasons)
# is allowing the fallback
object_ext_type_fallback_allowed = (
enable_fallback_config is None or enable_fallback_config
)

if not DataContext.get_current().enable_fallback_to_arrow_object_ext_type:
should_serialize_as_object_ext_type = False
if object_ext_type_fallback_allowed:
object_ext_type_detail = (
"skipping fallback to serialize as pickled python objects "
"(due to DataContext.enable_fallback_to_arrow_object_ext_type "
"= False)"
"falling back to serialize as pickled python objects"
)
else:
should_serialize_as_object_ext_type = True
object_ext_type_detail = (
"falling back to serialize as pickled python objects"
"skipping fallback to serialize as pickled python objects "
"(due to DataContext.enable_fallback_to_arrow_object_ext_type "
"= False)"
)

# NOTE: To avoid logging following warning for every block it's
# only going to be logged in following cases
# - When fallback is disabled, or
# - It's being logged for the first time
if not should_serialize_as_object_ext_type or log_once(
"_fallback_to_arrow_object_extension_type_warning"
):
logger.warning(
f"Failed to convert column '{column_name}' into pyarrow "
f"array due to: {ace}; {object_ext_type_detail}",
exc_info=ace,
)
if not object_ext_type_fallback_allowed:
# To avoid logging following warning for every block it's
# only going to be logged in following cases
# - When fallback is disallowed, and
# - Fallback configuration is not set or set to false, and
# - It's being logged for the first time
if not enable_fallback_config and log_once(
"_fallback_to_arrow_object_extension_type_warning"
):
logger.warning(
f"Failed to convert column '{column_name}' into pyarrow "
f"array due to: {ace}; {object_ext_type_detail}",
exc_info=ace,
)

# If `ArrowPythonObjectType` is not supported raise original exception
if not should_serialize_as_object_ext_type:
# If `ArrowPythonObjectType` is not supported raise original exception
raise

# Otherwise, attempt to fall back to serialize as python objects
Expand Down
6 changes: 1 addition & 5 deletions python/ray/data/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@
# V2 in turn relies on int64 offsets, therefore having a limit of ~9Eb (exabytes)
DEFAULT_USE_ARROW_TENSOR_V2 = env_bool("RAY_DATA_USE_ARROW_TENSOR_V2", True)

DEFAULT_ENABLE_FALLBACK_TO_ARROW_OBJECT_EXT_TYPE = True

DEFAULT_AUTO_LOG_STATS = False

DEFAULT_VERBOSE_STATS_LOG = False
Expand Down Expand Up @@ -301,9 +299,7 @@ class DataContext:
read_op_min_num_blocks: int = DEFAULT_READ_OP_MIN_NUM_BLOCKS
enable_tensor_extension_casting: bool = DEFAULT_ENABLE_TENSOR_EXTENSION_CASTING
use_arrow_tensor_v2: bool = DEFAULT_USE_ARROW_TENSOR_V2
enable_fallback_to_arrow_object_ext_type = (
DEFAULT_ENABLE_FALLBACK_TO_ARROW_OBJECT_EXT_TYPE
)
enable_fallback_to_arrow_object_ext_type: Optional[bool] = None
enable_auto_log_stats: bool = DEFAULT_AUTO_LOG_STATS
verbose_stats_logs: bool = DEFAULT_VERBOSE_STATS_LOG
trace_allocations: bool = DEFAULT_TRACE_ALLOCATIONS
Expand Down

0 comments on commit 9375c1f

Please sign in to comment.