Skip to content

GH-46198: [Python] Remove deprecated PyExtensionType #46199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/source/python/api/datatypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ Specific classes and functions for extension types.

BaseExtensionType
ExtensionType
PyExtensionType
UnknownExtensionType
register_extension_type
unregister_extension_type
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def print_entry(label, value):
BaseExtensionType, ExtensionType,
RunEndEncodedType, Bool8Type, FixedShapeTensorType,
JsonType, OpaqueType, UuidType,
PyExtensionType, UnknownExtensionType,
UnknownExtensionType,
register_extension_type, unregister_extension_type,
DictionaryMemo,
KeyValueMetadata,
Expand Down
4 changes: 0 additions & 4 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,6 @@ cdef class JsonType(BaseExtensionType):
const CJsonType* json_ext_type


cdef class PyExtensionType(ExtensionType):
pass


cdef class _Metadata(_Weakrefable):
# required because KeyValueMetadata also extends collections.abc.Mapping
# and the first parent class must be an extension type
Expand Down
37 changes: 0 additions & 37 deletions python/pyarrow/tests/test_extension_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,6 @@ def registered_extension_type(ext_type):
pa.unregister_extension_type(ext_type.extension_name)


@contextlib.contextmanager
def enabled_auto_load():
pa.PyExtensionType.set_auto_load(True)
try:
yield
finally:
pa.PyExtensionType.set_auto_load(False)


class TinyIntType(pa.ExtensionType):

def __init__(self):
Expand Down Expand Up @@ -233,15 +224,6 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
return cls(storage_type)


class LegacyIntType(pa.PyExtensionType):

def __init__(self):
pa.PyExtensionType.__init__(self, pa.int8())

def __reduce__(self):
return LegacyIntType, ()


def ipc_write_batch(batch):
stream = pa.BufferOutputStream()
writer = pa.RecordBatchStreamWriter(stream, batch.schema)
Expand Down Expand Up @@ -1735,25 +1717,6 @@ def test_tensor_type_str(tensor_type, text):
assert text in tensor_type_str


def test_legacy_int_type():
with pytest.warns(FutureWarning, match="PyExtensionType is deprecated"):
ext_ty = LegacyIntType()
arr = pa.array([1, 2, 3], type=ext_ty.storage_type)
ext_arr = pa.ExtensionArray.from_storage(ext_ty, arr)
batch = pa.RecordBatch.from_arrays([ext_arr], names=['ext'])
buf = ipc_write_batch(batch)

with pytest.warns((RuntimeWarning, FutureWarning)):
batch = ipc_read_batch(buf)
assert isinstance(batch.column(0).type, pa.UnknownExtensionType)

with enabled_auto_load():
with pytest.warns(FutureWarning, match="PyExtensionType is deprecated"):
batch = ipc_read_batch(buf)
assert isinstance(batch.column(0).type, LegacyIntType)
assert batch.column(0) == ext_arr


@pytest.mark.parametrize("storage_type,storage", [
(pa.null(), [None] * 4),
(pa.int64(), [1, 2, None, 4]),
Expand Down
125 changes: 6 additions & 119 deletions python/pyarrow/types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2136,91 +2136,7 @@ cdef class OpaqueType(BaseExtensionType):
return OpaqueScalar


_py_extension_type_auto_load = False


cdef class PyExtensionType(ExtensionType):
"""
Concrete base class for Python-defined extension types based on pickle
for (de)serialization.

.. warning::
This class is deprecated and its deserialization is disabled by default.
:class:`ExtensionType` is recommended instead.

Parameters
----------
storage_type : DataType
The storage type for which the extension is built.
"""

def __cinit__(self):
if type(self) is PyExtensionType:
raise TypeError("Can only instantiate subclasses of "
"PyExtensionType")

def __init__(self, DataType storage_type):
warnings.warn(
"pyarrow.PyExtensionType is deprecated "
"and will refuse deserialization by default. "
"Instead, please derive from pyarrow.ExtensionType and implement "
"your own serialization mechanism.",
FutureWarning)
ExtensionType.__init__(self, storage_type, "arrow.py_extension_type")

def __reduce__(self):
raise NotImplementedError("Please implement {0}.__reduce__"
.format(type(self).__name__))

def __arrow_ext_serialize__(self):
return pickle.dumps(self)

@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
if not _py_extension_type_auto_load:
warnings.warn(
"pickle-based deserialization of pyarrow.PyExtensionType subclasses "
"is disabled by default; if you only ingest "
"trusted data files, you may re-enable this using "
"`pyarrow.PyExtensionType.set_auto_load(True)`.\n"
"In the future, Python-defined extension subclasses should "
"derive from pyarrow.ExtensionType (not pyarrow.PyExtensionType) "
"and implement their own serialization mechanism.\n",
RuntimeWarning)
return UnknownExtensionType(storage_type, serialized)
try:
ty = pickle.loads(serialized)
except Exception:
# For some reason, it's impossible to deserialize the
# ExtensionType instance. Perhaps the serialized data is
# corrupt, or more likely the type is being deserialized
# in an environment where the original Python class or module
# is not available. Fall back on a generic BaseExtensionType.
return UnknownExtensionType(storage_type, serialized)

if ty.storage_type != storage_type:
raise TypeError("Expected storage type {0} but got {1}"
.format(ty.storage_type, storage_type))
return ty

# XXX Cython marks extension types as immutable, so cannot expose this
# as a writable class attribute.
@classmethod
def set_auto_load(cls, value):
"""
Enable or disable auto-loading of serialized PyExtensionType instances.

Parameters
----------
value : bool
Whether to enable auto-loading.
"""
global _py_extension_type_auto_load
assert isinstance(value, bool)
_py_extension_type_auto_load = value


cdef class UnknownExtensionType(PyExtensionType):
cdef class UnknownExtensionType(ExtensionType):
"""
A concrete class for Python-defined extension types that refer to
an unknown Python implementation.
Expand All @@ -2238,11 +2154,15 @@ cdef class UnknownExtensionType(PyExtensionType):

def __init__(self, DataType storage_type, serialized):
self.serialized = serialized
PyExtensionType.__init__(self, storage_type)
super().__init__(storage_type, "pyarrow.unknown")

def __arrow_ext_serialize__(self):
return self.serialized

@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
return UnknownExtensionType()


_python_extension_types_registry = []

Expand Down Expand Up @@ -6094,39 +6014,6 @@ cdef class _ExtensionRegistryNanny(_Weakrefable):
_registry_nanny = _ExtensionRegistryNanny()


def _register_py_extension_type():
cdef:
DataType storage_type
shared_ptr[CExtensionType] cpy_ext_type
c_string c_extension_name = tobytes("arrow.py_extension_type")

# Make a dummy C++ ExtensionType
storage_type = null()
check_status(CPyExtensionType.FromClass(
storage_type.sp_type, c_extension_name, PyExtensionType,
&cpy_ext_type))
check_status(
RegisterPyExtensionType(<shared_ptr[CDataType]> cpy_ext_type))


def _unregister_py_extension_types():
# This needs to be done explicitly before the Python interpreter is
# finalized. If the C++ type is destroyed later in the process
# teardown stage, it will invoke CPython APIs such as Py_DECREF
# with a destroyed interpreter.
unregister_extension_type("arrow.py_extension_type")
for ext_type in _python_extension_types_registry:
try:
unregister_extension_type(ext_type.extension_name)
except KeyError:
pass
_registry_nanny.release_registry()


_register_py_extension_type()
atexit.register(_unregister_py_extension_types)


#
# PyCapsule export utilities
#
Expand Down
Loading