Skip to content

Commit

Permalink
GH-44713: [Python] Add support for Decimal32 and Decimal64 types (#44882
Browse files Browse the repository at this point in the history
)

### Rationale for this change

Arrow C++ and the Arrow specification now support 32-bit and 64-bit decimal types...pyarrow should too!

### What changes are included in this PR?

Added type, array, and scalar bindings.

### Are these changes tested?

Yes!

### Are there any user-facing changes?

Yes!

* GitHub Issue: #44713

Authored-by: Dewey Dunnington <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
  • Loading branch information
paleolimbot authored Dec 17, 2024
1 parent 66be0f8 commit 2fcf8b3
Show file tree
Hide file tree
Showing 22 changed files with 506 additions and 46 deletions.
2 changes: 2 additions & 0 deletions docs/source/python/api/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ may expose data type-specific methods or properties.
TimestampArray
DurationArray
MonthDayNanoIntervalArray
Decimal32Array
Decimal64Array
Decimal128Array
Decimal256Array
DictionaryArray
Expand Down
2 changes: 2 additions & 0 deletions docs/source/python/api/datatypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ functions above.
Time64Type
DurationType
FixedSizeBinaryType
Decimal32Type
Decimal64Type
Decimal128Type
Decimal256Type
Field
Expand Down
10 changes: 6 additions & 4 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def print_entry(label, value):
float16, float32, float64,
binary, string, utf8, binary_view, string_view,
large_binary, large_string, large_utf8,
decimal128, decimal256,
decimal32, decimal64, decimal128, decimal256,
list_, large_list, list_view, large_list_view,
map_, struct,
union, sparse_union, dense_union,
Expand All @@ -180,7 +180,8 @@ def print_entry(label, value):
ListViewType, LargeListViewType,
MapType, UnionType, SparseUnionType, DenseUnionType,
TimestampType, Time32Type, Time64Type, DurationType,
FixedSizeBinaryType, Decimal128Type, Decimal256Type,
FixedSizeBinaryType,
Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type,
BaseExtensionType, ExtensionType,
RunEndEncodedType, Bool8Type, FixedShapeTensorType,
JsonType, OpaqueType, UuidType,
Expand Down Expand Up @@ -216,15 +217,16 @@ def print_entry(label, value):
Date32Array, Date64Array, TimestampArray,
Time32Array, Time64Array, DurationArray,
MonthDayNanoIntervalArray,
Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
StructArray, ExtensionArray,
RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
JsonArray, OpaqueArray, UuidArray,
scalar, NA, _NULL as NULL, Scalar,
NullScalar, BooleanScalar,
Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
HalfFloatScalar, FloatScalar, DoubleScalar,
Decimal128Scalar, Decimal256Scalar,
Decimal32Scalar, Decimal64Scalar, Decimal128Scalar, Decimal256Scalar,
ListScalar, LargeListScalar, FixedSizeListScalar,
ListViewScalar, LargeListViewScalar,
Date32Scalar, Date64Scalar,
Expand Down
13 changes: 12 additions & 1 deletion python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2327,6 +2327,15 @@ cdef class FixedSizeBinaryArray(Array):
Concrete class for Arrow arrays of a fixed-size binary data type.
"""

cdef class Decima32Array(FixedSizeBinaryArray):
"""
Concrete class for Arrow arrays of decimal32 data type.
"""

cdef class Decimal64Array(FixedSizeBinaryArray):
"""
Concrete class for Arrow arrays of decimal64 data type.
"""

cdef class Decimal128Array(FixedSizeBinaryArray):
"""
Expand Down Expand Up @@ -4043,7 +4052,7 @@ cdef class StructArray(Array):
memory_pool : MemoryPool (optional)
For memory allocations, if required, otherwise uses default pool.
type : pyarrow.StructType (optional)
Struct type for name and type of each child.
Struct type for name and type of each child.
Returns
-------
Expand Down Expand Up @@ -4705,6 +4714,8 @@ cdef dict _array_classes = {
_Type_STRING_VIEW: StringViewArray,
_Type_DICTIONARY: DictionaryArray,
_Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray,
_Type_DECIMAL32: Decimal32Array,
_Type_DECIMAL64: Decimal64Array,
_Type_DECIMAL128: Decimal128Array,
_Type_DECIMAL256: Decimal256Array,
_Type_STRUCT: StructArray,
Expand Down
40 changes: 40 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ cdef extern from "arrow/util/key_value_metadata.h" namespace "arrow" nogil:
c_bool Contains(const c_string& key) const


cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil:
cdef cppclass CDecimal32" arrow::Decimal32":
c_string ToString(int32_t scale) const


cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil:
cdef cppclass CDecimal64" arrow::Decimal64":
c_string ToString(int32_t scale) const


cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil:
cdef cppclass CDecimal128" arrow::Decimal128":
c_string ToString(int32_t scale) const
Expand Down Expand Up @@ -110,6 +120,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
_Type_FLOAT" arrow::Type::FLOAT"
_Type_DOUBLE" arrow::Type::DOUBLE"

_Type_DECIMAL32" arrow::Type::DECIMAL32"
_Type_DECIMAL64" arrow::Type::DECIMAL64"
_Type_DECIMAL128" arrow::Type::DECIMAL128"
_Type_DECIMAL256" arrow::Type::DECIMAL256"

Expand Down Expand Up @@ -453,6 +465,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int byte_width()
int bit_width()

cdef cppclass CDecimal32Type \
" arrow::Decimal32Type"(CFixedSizeBinaryType):
CDecimal32Type(int precision, int scale)
int precision()
int scale()

cdef cppclass CDecimal64Type \
" arrow::Decimal64Type"(CFixedSizeBinaryType):
CDecimal64Type(int precision, int scale)
int precision()
int scale()

cdef cppclass CDecimal128Type \
" arrow::Decimal128Type"(CFixedSizeBinaryType):
CDecimal128Type(int precision, int scale)
Expand Down Expand Up @@ -680,6 +704,16 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedSizeBinaryArray" arrow::FixedSizeBinaryArray"(CArray):
const uint8_t* GetValue(int i)

cdef cppclass CDecimal32Array" arrow::Decimal32Array"(
CFixedSizeBinaryArray
):
c_string FormatValue(int i)

cdef cppclass CDecimal64Array" arrow::Decimal64Array"(
CFixedSizeBinaryArray
):
c_string FormatValue(int i)

cdef cppclass CDecimal128Array" arrow::Decimal128Array"(
CFixedSizeBinaryArray
):
Expand Down Expand Up @@ -1263,6 +1297,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CDoubleScalar" arrow::DoubleScalar"(CScalar):
double value

cdef cppclass CDecimal32Scalar" arrow::Decimal32Scalar"(CScalar):
CDecimal32 value

cdef cppclass CDecimal64Scalar" arrow::Decimal64Scalar"(CScalar):
CDecimal64 value

cdef cppclass CDecimal128Scalar" arrow::Decimal128Scalar"(CScalar):
CDecimal128 value

Expand Down
18 changes: 18 additions & 0 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,16 @@ cdef class FixedSizeBinaryType(DataType):
const CFixedSizeBinaryType* fixed_size_binary_type


cdef class Decimal32Type(FixedSizeBinaryType):
cdef:
const CDecimal32Type* decimal32_type


cdef class Decimal64Type(FixedSizeBinaryType):
cdef:
const CDecimal64Type* decimal64_type


cdef class Decimal128Type(FixedSizeBinaryType):
cdef:
const CDecimal128Type* decimal128_type
Expand Down Expand Up @@ -430,6 +440,14 @@ cdef class FixedSizeBinaryArray(Array):
pass


cdef class Decimal32Array(FixedSizeBinaryArray):
pass


cdef class Decimal64Array(FixedSizeBinaryArray):
pass


cdef class Decimal128Array(FixedSizeBinaryArray):
pass

Expand Down
6 changes: 4 additions & 2 deletions python/pyarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def set_cpu_count(int count):

def is_threading_enabled() -> bool:
"""
Returns True if threading is enabled in libarrow.
Returns True if threading is enabled in libarrow.

If it isn't enabled, then python shouldn't create any
If it isn't enabled, then python shouldn't create any
threads either, because we're probably on a system where
threading doesn't work (e.g. Emscripten).
"""
Expand All @@ -109,6 +109,8 @@ Type_INT64 = _Type_INT64
Type_HALF_FLOAT = _Type_HALF_FLOAT
Type_FLOAT = _Type_FLOAT
Type_DOUBLE = _Type_DOUBLE
Type_DECIMAL32 = _Type_DECIMAL32
Type_DECIMAL64 = _Type_DECIMAL64
Type_DECIMAL128 = _Type_DECIMAL128
Type_DECIMAL256 = _Type_DECIMAL256
Type_DATE32 = _Type_DATE32
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/public-api.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ cdef api object pyarrow_wrap_data_type(
out = DurationType.__new__(DurationType)
elif type.get().id() == _Type_FIXED_SIZE_BINARY:
out = FixedSizeBinaryType.__new__(FixedSizeBinaryType)
elif type.get().id() == _Type_DECIMAL32:
out = Decimal32Type.__new__(Decimal32Type)
elif type.get().id() == _Type_DECIMAL64:
out = Decimal64Type.__new__(Decimal64Type)
elif type.get().id() == _Type_DECIMAL128:
out = Decimal128Type.__new__(Decimal128Type)
elif type.get().id() == _Type_DECIMAL256:
Expand Down
42 changes: 42 additions & 0 deletions python/pyarrow/scalar.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,46 @@ cdef class DoubleScalar(Scalar):
return sp.value if sp.is_valid else None


cdef class Decimal32Scalar(Scalar):
"""
Concrete class for decimal32 scalars.
"""

def as_py(self):
"""
Return this value as a Python Decimal.
"""
cdef:
CDecimal32Scalar* sp = <CDecimal32Scalar*> self.wrapped.get()
CDecimal32Type* dtype = <CDecimal32Type*> sp.type.get()
if sp.is_valid:
return _pydecimal.Decimal(
frombytes(sp.value.ToString(dtype.scale()))
)
else:
return None


cdef class Decimal64Scalar(Scalar):
"""
Concrete class for decimal64 scalars.
"""

def as_py(self):
"""
Return this value as a Python Decimal.
"""
cdef:
CDecimal64Scalar* sp = <CDecimal64Scalar*> self.wrapped.get()
CDecimal64Type* dtype = <CDecimal64Type*> sp.type.get()
if sp.is_valid:
return _pydecimal.Decimal(
frombytes(sp.value.ToString(dtype.scale()))
)
else:
return None


cdef class Decimal128Scalar(Scalar):
"""
Concrete class for decimal128 scalars.
Expand Down Expand Up @@ -1132,6 +1172,8 @@ cdef dict _scalar_classes = {
_Type_HALF_FLOAT: HalfFloatScalar,
_Type_FLOAT: FloatScalar,
_Type_DOUBLE: DoubleScalar,
_Type_DECIMAL32: Decimal32Scalar,
_Type_DECIMAL64: Decimal64Scalar,
_Type_DECIMAL128: Decimal128Scalar,
_Type_DECIMAL256: Decimal256Scalar,
_Type_DATE32: Date32Scalar,
Expand Down
44 changes: 14 additions & 30 deletions python/pyarrow/src/arrow/python/arrow_to_pandas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1317,23 +1317,16 @@ struct ObjectWriterVisitor {
out_values);
}

Status Visit(const Decimal32Type& type) {
return Status::NotImplemented("Decimal32 type not yet implemented");
}

Status Visit(const Decimal64Type& type) {
return Status::NotImplemented("Decimal64 type not yet implemented");
}

Status Visit(const Decimal128Type& type) {
template <typename DecimalT, typename DecimalArrayT>
Status VisitDecimal(const DecimalT& type) {
OwnedRef decimal;
OwnedRef Decimal;
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
RETURN_NOT_OK(internal::ImportFromModule(decimal.obj(), "Decimal", &Decimal));
PyObject* decimal_constructor = Decimal.obj();

for (int c = 0; c < data.num_chunks(); c++) {
const auto& arr = checked_cast<const arrow::Decimal128Array&>(*data.chunk(c));
const auto& arr = checked_cast<const DecimalArrayT&>(*data.chunk(c));

for (int64_t i = 0; i < arr.length(); ++i) {
if (arr.IsNull(i)) {
Expand All @@ -1350,29 +1343,20 @@ struct ObjectWriterVisitor {
return Status::OK();
}

Status Visit(const Decimal256Type& type) {
OwnedRef decimal;
OwnedRef Decimal;
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
RETURN_NOT_OK(internal::ImportFromModule(decimal.obj(), "Decimal", &Decimal));
PyObject* decimal_constructor = Decimal.obj();
Status Visit(const Decimal32Type& type) {
return VisitDecimal<Decimal32Type, Decimal32Array>(type);
}

for (int c = 0; c < data.num_chunks(); c++) {
const auto& arr = checked_cast<const arrow::Decimal256Array&>(*data.chunk(c));
Status Visit(const Decimal64Type& type) {
return VisitDecimal<Decimal64Type, Decimal64Array>(type);
}

for (int64_t i = 0; i < arr.length(); ++i) {
if (arr.IsNull(i)) {
Py_INCREF(Py_None);
*out_values++ = Py_None;
} else {
*out_values++ =
internal::DecimalFromString(decimal_constructor, arr.FormatValue(i));
RETURN_IF_PYERROR();
}
}
}
Status Visit(const Decimal128Type& type) {
return VisitDecimal<Decimal128Type, Decimal128Array>(type);
}

return Status::OK();
Status Visit(const Decimal256Type& type) {
return VisitDecimal<Decimal256Type, Decimal256Array>(type);
}

template <typename T>
Expand Down
18 changes: 18 additions & 0 deletions python/pyarrow/src/arrow/python/decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,24 @@ Status InternalDecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type,

} // namespace

Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type,
Decimal32* out) {
return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out);
}

Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal32* out) {
return InternalDecimalFromPyObject(obj, arrow_type, out);
}

Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type,
Decimal64* out) {
return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out);
}

Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal64* out) {
return InternalDecimalFromPyObject(obj, arrow_type, out);
}

Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type,
Decimal128* out) {
return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out);
Expand Down
Loading

0 comments on commit 2fcf8b3

Please sign in to comment.