diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst index 5219902362375..dc24be8bd06d8 100644 --- a/docs/source/python/api/arrays.rst +++ b/docs/source/python/api/arrays.rst @@ -72,6 +72,8 @@ may expose data type-specific methods or properties. TimestampArray DurationArray MonthDayNanoIntervalArray + Decimal32Array + Decimal64Array Decimal128Array Decimal256Array DictionaryArray diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst index 65f6da56a553c..5e151a1f93af5 100644 --- a/docs/source/python/api/datatypes.rst +++ b/docs/source/python/api/datatypes.rst @@ -116,6 +116,8 @@ functions above. Time64Type DurationType FixedSizeBinaryType + Decimal32Type + Decimal64Type Decimal128Type Decimal256Type Field diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 8c8c09265d0bf..d00a731324c92 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -166,7 +166,7 @@ def print_entry(label, value): float16, float32, float64, binary, string, utf8, binary_view, string_view, large_binary, large_string, large_utf8, - decimal128, decimal256, + decimal32, decimal64, decimal128, decimal256, list_, large_list, list_view, large_list_view, map_, struct, union, sparse_union, dense_union, @@ -180,7 +180,8 @@ def print_entry(label, value): ListViewType, LargeListViewType, MapType, UnionType, SparseUnionType, DenseUnionType, TimestampType, Time32Type, Time64Type, DurationType, - FixedSizeBinaryType, Decimal128Type, Decimal256Type, + FixedSizeBinaryType, + Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, BaseExtensionType, ExtensionType, RunEndEncodedType, Bool8Type, FixedShapeTensorType, JsonType, OpaqueType, UuidType, @@ -216,7 +217,8 @@ def print_entry(label, value): Date32Array, Date64Array, TimestampArray, Time32Array, Time64Array, DurationArray, MonthDayNanoIntervalArray, - Decimal128Array, Decimal256Array, StructArray, ExtensionArray, + Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, + StructArray, ExtensionArray, RunEndEncodedArray, Bool8Array, FixedShapeTensorArray, JsonArray, OpaqueArray, UuidArray, scalar, NA, _NULL as NULL, Scalar, @@ -224,7 +226,7 @@ def print_entry(label, value): Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar, UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar, HalfFloatScalar, FloatScalar, DoubleScalar, - Decimal128Scalar, Decimal256Scalar, + Decimal32Scalar, Decimal64Scalar, Decimal128Scalar, Decimal256Scalar, ListScalar, LargeListScalar, FixedSizeListScalar, ListViewScalar, LargeListViewScalar, Date32Scalar, Date64Scalar, diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 8bddc34e1000b..f86caf1433d4e 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2327,6 +2327,15 @@ cdef class FixedSizeBinaryArray(Array): Concrete class for Arrow arrays of a fixed-size binary data type. """ +cdef class Decima32Array(FixedSizeBinaryArray): + """ + Concrete class for Arrow arrays of decimal32 data type. + """ + +cdef class Decimal64Array(FixedSizeBinaryArray): + """ + Concrete class for Arrow arrays of decimal64 data type. + """ cdef class Decimal128Array(FixedSizeBinaryArray): """ @@ -4043,7 +4052,7 @@ cdef class StructArray(Array): memory_pool : MemoryPool (optional) For memory allocations, if required, otherwise uses default pool. type : pyarrow.StructType (optional) - Struct type for name and type of each child. + Struct type for name and type of each child. Returns ------- @@ -4705,6 +4714,8 @@ cdef dict _array_classes = { _Type_STRING_VIEW: StringViewArray, _Type_DICTIONARY: DictionaryArray, _Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray, + _Type_DECIMAL32: Decimal32Array, + _Type_DECIMAL64: Decimal64Array, _Type_DECIMAL128: Decimal128Array, _Type_DECIMAL256: Decimal256Array, _Type_STRUCT: StructArray, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 8bf61b73cc211..b2edeb0b4192f 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -45,6 +45,16 @@ cdef extern from "arrow/util/key_value_metadata.h" namespace "arrow" nogil: c_bool Contains(const c_string& key) const +cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil: + cdef cppclass CDecimal32" arrow::Decimal32": + c_string ToString(int32_t scale) const + + +cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil: + cdef cppclass CDecimal64" arrow::Decimal64": + c_string ToString(int32_t scale) const + + cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil: cdef cppclass CDecimal128" arrow::Decimal128": c_string ToString(int32_t scale) const @@ -110,6 +120,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: _Type_FLOAT" arrow::Type::FLOAT" _Type_DOUBLE" arrow::Type::DOUBLE" + _Type_DECIMAL32" arrow::Type::DECIMAL32" + _Type_DECIMAL64" arrow::Type::DECIMAL64" _Type_DECIMAL128" arrow::Type::DECIMAL128" _Type_DECIMAL256" arrow::Type::DECIMAL256" @@ -453,6 +465,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: int byte_width() int bit_width() + cdef cppclass CDecimal32Type \ + " arrow::Decimal32Type"(CFixedSizeBinaryType): + CDecimal32Type(int precision, int scale) + int precision() + int scale() + + cdef cppclass CDecimal64Type \ + " arrow::Decimal64Type"(CFixedSizeBinaryType): + CDecimal64Type(int precision, int scale) + int precision() + int scale() + cdef cppclass CDecimal128Type \ " arrow::Decimal128Type"(CFixedSizeBinaryType): CDecimal128Type(int precision, int scale) @@ -680,6 +704,16 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CFixedSizeBinaryArray" arrow::FixedSizeBinaryArray"(CArray): const uint8_t* GetValue(int i) + cdef cppclass CDecimal32Array" arrow::Decimal32Array"( + CFixedSizeBinaryArray + ): + c_string FormatValue(int i) + + cdef cppclass CDecimal64Array" arrow::Decimal64Array"( + CFixedSizeBinaryArray + ): + c_string FormatValue(int i) + cdef cppclass CDecimal128Array" arrow::Decimal128Array"( CFixedSizeBinaryArray ): @@ -1263,6 +1297,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CDoubleScalar" arrow::DoubleScalar"(CScalar): double value + cdef cppclass CDecimal32Scalar" arrow::Decimal32Scalar"(CScalar): + CDecimal32 value + + cdef cppclass CDecimal64Scalar" arrow::Decimal64Scalar"(CScalar): + CDecimal64 value + cdef cppclass CDecimal128Scalar" arrow::Decimal128Scalar"(CScalar): CDecimal128 value diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index f3d4e1eec0899..bc9811b92b007 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -185,6 +185,16 @@ cdef class FixedSizeBinaryType(DataType): const CFixedSizeBinaryType* fixed_size_binary_type +cdef class Decimal32Type(FixedSizeBinaryType): + cdef: + const CDecimal32Type* decimal32_type + + +cdef class Decimal64Type(FixedSizeBinaryType): + cdef: + const CDecimal64Type* decimal64_type + + cdef class Decimal128Type(FixedSizeBinaryType): cdef: const CDecimal128Type* decimal128_type @@ -430,6 +440,14 @@ cdef class FixedSizeBinaryArray(Array): pass +cdef class Decimal32Array(FixedSizeBinaryArray): + pass + + +cdef class Decimal64Array(FixedSizeBinaryArray): + pass + + cdef class Decimal128Array(FixedSizeBinaryArray): pass diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 6b82eb6566896..2c92ecbfa7344 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -87,9 +87,9 @@ def set_cpu_count(int count): def is_threading_enabled() -> bool: """ - Returns True if threading is enabled in libarrow. + Returns True if threading is enabled in libarrow. - If it isn't enabled, then python shouldn't create any + If it isn't enabled, then python shouldn't create any threads either, because we're probably on a system where threading doesn't work (e.g. Emscripten). """ @@ -109,6 +109,8 @@ Type_INT64 = _Type_INT64 Type_HALF_FLOAT = _Type_HALF_FLOAT Type_FLOAT = _Type_FLOAT Type_DOUBLE = _Type_DOUBLE +Type_DECIMAL32 = _Type_DECIMAL32 +Type_DECIMAL64 = _Type_DECIMAL64 Type_DECIMAL128 = _Type_DECIMAL128 Type_DECIMAL256 = _Type_DECIMAL256 Type_DATE32 = _Type_DATE32 diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi index 913e25e308254..d1fa1192debc3 100644 --- a/python/pyarrow/public-api.pxi +++ b/python/pyarrow/public-api.pxi @@ -111,6 +111,10 @@ cdef api object pyarrow_wrap_data_type( out = DurationType.__new__(DurationType) elif type.get().id() == _Type_FIXED_SIZE_BINARY: out = FixedSizeBinaryType.__new__(FixedSizeBinaryType) + elif type.get().id() == _Type_DECIMAL32: + out = Decimal32Type.__new__(Decimal32Type) + elif type.get().id() == _Type_DECIMAL64: + out = Decimal64Type.__new__(Decimal64Type) elif type.get().id() == _Type_DECIMAL128: out = Decimal128Type.__new__(Decimal128Type) elif type.get().id() == _Type_DECIMAL256: diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index 2bfdcddf30736..2235cd0b981a6 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -336,6 +336,46 @@ cdef class DoubleScalar(Scalar): return sp.value if sp.is_valid else None +cdef class Decimal32Scalar(Scalar): + """ + Concrete class for decimal32 scalars. + """ + + def as_py(self): + """ + Return this value as a Python Decimal. + """ + cdef: + CDecimal32Scalar* sp = self.wrapped.get() + CDecimal32Type* dtype = sp.type.get() + if sp.is_valid: + return _pydecimal.Decimal( + frombytes(sp.value.ToString(dtype.scale())) + ) + else: + return None + + +cdef class Decimal64Scalar(Scalar): + """ + Concrete class for decimal64 scalars. + """ + + def as_py(self): + """ + Return this value as a Python Decimal. + """ + cdef: + CDecimal64Scalar* sp = self.wrapped.get() + CDecimal64Type* dtype = sp.type.get() + if sp.is_valid: + return _pydecimal.Decimal( + frombytes(sp.value.ToString(dtype.scale())) + ) + else: + return None + + cdef class Decimal128Scalar(Scalar): """ Concrete class for decimal128 scalars. @@ -1132,6 +1172,8 @@ cdef dict _scalar_classes = { _Type_HALF_FLOAT: HalfFloatScalar, _Type_FLOAT: FloatScalar, _Type_DOUBLE: DoubleScalar, + _Type_DECIMAL32: Decimal32Scalar, + _Type_DECIMAL64: Decimal64Scalar, _Type_DECIMAL128: Decimal128Scalar, _Type_DECIMAL256: Decimal256Scalar, _Type_DATE32: Date32Scalar, diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc index 110dab7d35538..10c4d0e16000b 100644 --- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc +++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc @@ -1317,15 +1317,8 @@ struct ObjectWriterVisitor { out_values); } - Status Visit(const Decimal32Type& type) { - return Status::NotImplemented("Decimal32 type not yet implemented"); - } - - Status Visit(const Decimal64Type& type) { - return Status::NotImplemented("Decimal64 type not yet implemented"); - } - - Status Visit(const Decimal128Type& type) { + template + Status VisitDecimal(const DecimalT& type) { OwnedRef decimal; OwnedRef Decimal; RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); @@ -1333,7 +1326,7 @@ struct ObjectWriterVisitor { PyObject* decimal_constructor = Decimal.obj(); for (int c = 0; c < data.num_chunks(); c++) { - const auto& arr = checked_cast(*data.chunk(c)); + const auto& arr = checked_cast(*data.chunk(c)); for (int64_t i = 0; i < arr.length(); ++i) { if (arr.IsNull(i)) { @@ -1350,29 +1343,20 @@ struct ObjectWriterVisitor { return Status::OK(); } - Status Visit(const Decimal256Type& type) { - OwnedRef decimal; - OwnedRef Decimal; - RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); - RETURN_NOT_OK(internal::ImportFromModule(decimal.obj(), "Decimal", &Decimal)); - PyObject* decimal_constructor = Decimal.obj(); + Status Visit(const Decimal32Type& type) { + return VisitDecimal(type); + } - for (int c = 0; c < data.num_chunks(); c++) { - const auto& arr = checked_cast(*data.chunk(c)); + Status Visit(const Decimal64Type& type) { + return VisitDecimal(type); + } - for (int64_t i = 0; i < arr.length(); ++i) { - if (arr.IsNull(i)) { - Py_INCREF(Py_None); - *out_values++ = Py_None; - } else { - *out_values++ = - internal::DecimalFromString(decimal_constructor, arr.FormatValue(i)); - RETURN_IF_PYERROR(); - } - } - } + Status Visit(const Decimal128Type& type) { + return VisitDecimal(type); + } - return Status::OK(); + Status Visit(const Decimal256Type& type) { + return VisitDecimal(type); } template diff --git a/python/pyarrow/src/arrow/python/decimal.cc b/python/pyarrow/src/arrow/python/decimal.cc index 0c00fcfaa8e59..e6caff2201ddc 100644 --- a/python/pyarrow/src/arrow/python/decimal.cc +++ b/python/pyarrow/src/arrow/python/decimal.cc @@ -164,6 +164,24 @@ Status InternalDecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, } // namespace +Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, + Decimal32* out) { + return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out); +} + +Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal32* out) { + return InternalDecimalFromPyObject(obj, arrow_type, out); +} + +Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, + Decimal64* out) { + return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out); +} + +Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal64* out) { + return InternalDecimalFromPyObject(obj, arrow_type, out); +} + Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, Decimal128* out) { return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out); diff --git a/python/pyarrow/src/arrow/python/decimal.h b/python/pyarrow/src/arrow/python/decimal.h index 1187037aed29e..83ded0b82b922 100644 --- a/python/pyarrow/src/arrow/python/decimal.h +++ b/python/pyarrow/src/arrow/python/decimal.h @@ -56,6 +56,40 @@ ARROW_PYTHON_EXPORT PyObject* DecimalFromString(PyObject* decimal_constructor, const std::string& decimal_string); +// \brief Convert a Python decimal to an Arrow Decimal128 object +// \param[in] python_decimal A Python decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation +ARROW_PYTHON_EXPORT +Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, + Decimal32* out); + +// \brief Convert a Python object to an Arrow Decimal128 object +// \param[in] python_decimal A Python int or decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation +ARROW_PYTHON_EXPORT +Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal32* out); + +// \brief Convert a Python decimal to an Arrow Decimal128 object +// \param[in] python_decimal A Python decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation +ARROW_PYTHON_EXPORT +Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, + Decimal64* out); + +// \brief Convert a Python object to an Arrow Decimal128 object +// \param[in] python_decimal A Python int or decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation +ARROW_PYTHON_EXPORT +Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal64* out); + // \brief Convert a Python decimal to an Arrow Decimal128 object // \param[in] python_decimal A Python decimal.Decimal instance // \param[in] arrow_type An instance of arrow::DecimalType diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index e7195e99072b0..709338b4e7756 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -260,6 +260,18 @@ class PyValue { return value; } + static Result Convert(const Decimal32Type* type, const O&, I obj) { + Decimal32 value; + RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value)); + return value; + } + + static Result Convert(const Decimal64Type* type, const O&, I obj) { + Decimal64 value; + RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value)); + return value; + } + static Result Convert(const Decimal128Type* type, const O&, I obj) { Decimal128 value; RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value)); diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py index 7a1b31a4d9d77..450cce74f1d43 100644 --- a/python/pyarrow/tests/strategies.py +++ b/python/pyarrow/tests/strategies.py @@ -92,6 +92,16 @@ pa.float32(), pa.float64() ]) +decimal32_type = st.builds( + pa.decimal32, + precision=st.integers(min_value=1, max_value=9), + scale=st.integers(min_value=1, max_value=9) +) +decimal64_type = st.builds( + pa.decimal64, + precision=st.integers(min_value=1, max_value=18), + scale=st.integers(min_value=1, max_value=18) +) decimal128_type = st.builds( pa.decimal128, precision=st.integers(min_value=1, max_value=38), diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index e388851bea17b..e6fcd6149ee04 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1900,7 +1900,9 @@ def test_fsl_to_fsl_cast(value_type): FloatToDecimalCase = namedtuple('FloatToDecimalCase', ('precision', 'scale', 'float_val')) -decimal_type_traits = [DecimalTypeTraits('decimal128', pa.decimal128, 38), +decimal_type_traits = [DecimalTypeTraits('decimal32', pa.decimal32, 9), + DecimalTypeTraits('decimal64', pa.decimal64, 18), + DecimalTypeTraits('decimal128', pa.decimal128, 38), DecimalTypeTraits('decimal256', pa.decimal256, 76)] @@ -1991,7 +1993,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx, # very high precisions as rounding errors can accumulate in # the iterative algorithm (GH-35576). diff_digits = abs(actual - expected) * 10**decimal_ty.scale - limit = 2 if decimal_ty.precision < max_precision - 1 else 4 + limit = 2 if decimal_ty.precision < max_precision - 2 else 4 assert diff_digits <= limit, ( f"float_val = {float_val!r}, precision={decimal_ty.precision}, " f"expected = {expected!r}, actual = {actual!r}, " @@ -2041,6 +2043,11 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits): mantissa_digits = math.floor(math.log10(2**mantissa_bits)) max_precision = decimal_traits.max_precision + # For example, decimal32 <-> float64 + if max_precision < mantissa_digits: + mantissa_bits = math.floor(math.log2(10**max_precision)) + mantissa_digits = math.floor(math.log10(2**mantissa_bits)) + with decimal.localcontext() as ctx: precision = mantissa_digits ctx.prec = precision diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index c3589877e6423..07286125c4cf6 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -1592,7 +1592,7 @@ def test_sequence_mixed_types_with_specified_type_fails(): def test_sequence_decimal(): data = [decimal.Decimal('1234.183'), decimal.Decimal('8094.234')] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal32, pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=7, scale=3)) assert arr.to_pylist() == data @@ -1601,28 +1601,28 @@ def test_sequence_decimal_different_precisions(): data = [ decimal.Decimal('1234234983.183'), decimal.Decimal('80943244.234') ] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=13, scale=3)) assert arr.to_pylist() == data def test_sequence_decimal_no_scale(): data = [decimal.Decimal('1234234983'), decimal.Decimal('8094324')] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=10)) assert arr.to_pylist() == data def test_sequence_decimal_negative(): data = [decimal.Decimal('-1234.234983'), decimal.Decimal('-8.094324')] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=10, scale=6)) assert arr.to_pylist() == data def test_sequence_decimal_no_whole_part(): data = [decimal.Decimal('-.4234983'), decimal.Decimal('.0103943')] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal32, pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=7, scale=7)) assert arr.to_pylist() == data diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py index 3bb4440e89750..978c92307a69e 100644 --- a/python/pyarrow/tests/test_json.py +++ b/python/pyarrow/tests/test_json.py @@ -256,7 +256,9 @@ def test_explicit_schema_decimal(self): expected = { 'a': [Decimal("1"), Decimal("1.45"), Decimal("-23.456"), None], } - for type_factory in (pa.decimal128, pa.decimal256): + + decimal_types = (pa.decimal32, pa.decimal64, pa.decimal128, pa.decimal256) + for type_factory in decimal_types: schema = pa.schema([('a', type_factory(9, 4))]) opts = ParseOptions(explicit_schema=schema) table = self.read_bytes(rows, parse_options=opts) diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index 0b2055018f695..dbba7852190f4 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -165,6 +165,8 @@ def test_set_timezone_db_path_non_windows(): pa.Time32Type, pa.Time64Type, pa.TimestampType, + pa.Decimal32Type, + pa.Decimal64Type, pa.Decimal128Type, pa.Decimal256Type, pa.DictionaryType, diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index bdcb6c2b42d78..b6d36787fbd37 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -615,6 +615,8 @@ def test_type_schema_pickling(pickle_module): pa.date64(), pa.timestamp('ms'), pa.timestamp('ns'), + pa.decimal32(9, 3), + pa.decimal64(11, 4), pa.decimal128(12, 2), pa.decimal256(76, 38), pa.field('a', 'string', metadata={b'foo': b'bar'}), diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index de439b6bb8cd7..926de46318036 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -57,6 +57,8 @@ def get_many_types(): pa.float16(), pa.float32(), pa.float64(), + pa.decimal32(9, 4), + pa.decimal64(18, 4), pa.decimal128(19, 4), pa.decimal256(76, 38), pa.string(), @@ -139,18 +141,38 @@ def test_null_field_may_not_be_non_nullable(): def test_is_decimal(): + decimal32 = pa.decimal32(9, 4) + decimal64 = pa.decimal64(18, 4) decimal128 = pa.decimal128(19, 4) decimal256 = pa.decimal256(76, 38) int32 = pa.int32() + assert types.is_decimal(decimal32) + assert types.is_decimal(decimal64) assert types.is_decimal(decimal128) assert types.is_decimal(decimal256) assert not types.is_decimal(int32) + assert types.is_decimal32(decimal32) + assert not types.is_decimal32(decimal64) + assert not types.is_decimal32(decimal128) + assert not types.is_decimal32(decimal256) + assert not types.is_decimal32(int32) + + assert not types.is_decimal64(decimal32) + assert types.is_decimal64(decimal64) + assert not types.is_decimal64(decimal128) + assert not types.is_decimal64(decimal256) + assert not types.is_decimal64(int32) + + assert not types.is_decimal128(decimal32) + assert not types.is_decimal128(decimal64) assert types.is_decimal128(decimal128) assert not types.is_decimal128(decimal256) assert not types.is_decimal128(int32) + assert not types.is_decimal256(decimal32) + assert not types.is_decimal256(decimal64) assert not types.is_decimal256(decimal128) assert types.is_decimal256(decimal256) assert not types.is_decimal256(int32) @@ -970,6 +992,8 @@ def test_bit_and_byte_width(): (pa.float16(), 16, 2), (pa.timestamp('s'), 64, 8), (pa.date32(), 32, 4), + (pa.decimal32(9, 4), 32, 4), + (pa.decimal64(18, 4), 64, 8), (pa.decimal128(19, 4), 128, 16), (pa.decimal256(76, 38), 256, 32), (pa.binary(42), 42 * 8, 42), @@ -1002,6 +1026,14 @@ def test_fixed_size_binary_byte_width(): def test_decimal_properties(): + ty = pa.decimal32(9, 4) + assert ty.byte_width == 4 + assert ty.precision == 9 + assert ty.scale == 4 + ty = pa.decimal64(18, 4) + assert ty.byte_width == 8 + assert ty.precision == 18 + assert ty.scale == 4 ty = pa.decimal128(19, 4) assert ty.byte_width == 16 assert ty.precision == 19 @@ -1013,6 +1045,18 @@ def test_decimal_properties(): def test_decimal_overflow(): + pa.decimal32(1, 0) + pa.decimal32(9, 0) + for i in (0, -1, 10): + with pytest.raises(ValueError): + pa.decimal32(i, 0) + + pa.decimal64(1, 0) + pa.decimal64(18, 0) + for i in (0, -1, 19): + with pytest.raises(ValueError): + pa.decimal64(i, 0) + pa.decimal128(1, 0) pa.decimal128(38, 0) for i in (0, -1, 39): diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 827243ce00e16..3caf068a4c9b1 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -73,7 +73,10 @@ def _get_pandas_type_map(): _Type_STRING: np.object_, _Type_LIST: np.object_, _Type_MAP: np.object_, + _Type_DECIMAL32: np.object_, + _Type_DECIMAL64: np.object_, _Type_DECIMAL128: np.object_, + _Type_DECIMAL256: np.object_, }) return _pandas_type_map @@ -1417,6 +1420,104 @@ cdef class FixedSizeBinaryType(DataType): return binary, (self.byte_width,) +cdef class Decimal32Type(FixedSizeBinaryType): + """ + Concrete class for decimal32 data types. + + Examples + -------- + Create an instance of decimal32 type: + + >>> import pyarrow as pa + >>> pa.decimal32(5, 2) + Decimal32Type(decimal32(5, 2)) + """ + + cdef void init(self, const shared_ptr[CDataType]& type) except *: + FixedSizeBinaryType.init(self, type) + self.decimal32_type = type.get() + + def __reduce__(self): + return decimal32, (self.precision, self.scale) + + @property + def precision(self): + """ + The decimal precision, in number of decimal digits (an integer). + + Examples + -------- + >>> import pyarrow as pa + >>> t = pa.decimal32(5, 2) + >>> t.precision + 5 + """ + return self.decimal32_type.precision() + + @property + def scale(self): + """ + The decimal scale (an integer). + + Examples + -------- + >>> import pyarrow as pa + >>> t = pa.decimal32(5, 2) + >>> t.scale + 2 + """ + return self.decimal32_type.scale() + + +cdef class Decimal64Type(FixedSizeBinaryType): + """ + Concrete class for decimal64 data types. + + Examples + -------- + Create an instance of decimal64 type: + + >>> import pyarrow as pa + >>> pa.decimal64(5, 2) + Decimal64Type(decimal64(5, 2)) + """ + + cdef void init(self, const shared_ptr[CDataType]& type) except *: + FixedSizeBinaryType.init(self, type) + self.decimal64_type = type.get() + + def __reduce__(self): + return decimal64, (self.precision, self.scale) + + @property + def precision(self): + """ + The decimal precision, in number of decimal digits (an integer). + + Examples + -------- + >>> import pyarrow as pa + >>> t = pa.decimal64(5, 2) + >>> t.precision + 5 + """ + return self.decimal64_type.precision() + + @property + def scale(self): + """ + The decimal scale (an integer). + + Examples + -------- + >>> import pyarrow as pa + >>> t = pa.decimal64(5, 2) + >>> t.scale + 2 + """ + return self.decimal64_type.scale() + + cdef class Decimal128Type(FixedSizeBinaryType): """ Concrete class for decimal128 data types. @@ -4500,6 +4601,116 @@ def float64(): return primitive_type(_Type_DOUBLE) +cpdef DataType decimal32(int precision, int scale=0): + """ + Create decimal type with precision and scale and 32-bit width. + + Arrow decimals are fixed-point decimal numbers encoded as a scaled + integer. The precision is the number of significant digits that the + decimal type can represent; the scale is the number of digits after + the decimal point (note the scale can be negative). + + As an example, ``decimal32(7, 3)`` can exactly represent the numbers + 1234.567 and -1234.567 (encoded internally as the 32-bit integers + 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567. + + ``decimal32(5, -3)`` can exactly represent the number 12345000 + (encoded internally as the 32-bit integer 12345), but neither + 123450000 nor 1234500. + + If you need a precision higher than 9 significant digits, consider + using ``decimal64``, ``decimal128``, or ``decimal256``. + + Parameters + ---------- + precision : int + Must be between 1 and 9 + scale : int + + Returns + ------- + decimal_type : Decimal32Type + + Examples + -------- + Create an instance of decimal type: + + >>> import pyarrow as pa + >>> pa.decimal32(5, 2) + Decimal32Type(decimal32(5, 2)) + + Create an array with decimal type: + + >>> import decimal + >>> a = decimal.Decimal('123.45') + >>> pa.array([a], pa.decimal32(5, 2)) + + [ + 123.45 + ] + """ + cdef shared_ptr[CDataType] decimal_type + if precision < 1 or precision > 9: + raise ValueError("precision should be between 1 and 9") + decimal_type.reset(new CDecimal32Type(precision, scale)) + return pyarrow_wrap_data_type(decimal_type) + + +cpdef DataType decimal64(int precision, int scale=0): + """ + Create decimal type with precision and scale and 64-bit width. + + Arrow decimals are fixed-point decimal numbers encoded as a scaled + integer. The precision is the number of significant digits that the + decimal type can represent; the scale is the number of digits after + the decimal point (note the scale can be negative). + + As an example, ``decimal64(7, 3)`` can exactly represent the numbers + 1234.567 and -1234.567 (encoded internally as the 64-bit integers + 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567. + + ``decimal64(5, -3)`` can exactly represent the number 12345000 + (encoded internally as the 64-bit integer 12345), but neither + 123450000 nor 1234500. + + If you need a precision higher than 18 significant digits, consider + using ``decimal128``, or ``decimal256``. + + Parameters + ---------- + precision : int + Must be between 1 and 18 + scale : int + + Returns + ------- + decimal_type : Decimal64Type + + Examples + -------- + Create an instance of decimal type: + + >>> import pyarrow as pa + >>> pa.decimal64(5, 2) + Decimal64Type(decimal64(5, 2)) + + Create an array with decimal type: + + >>> import decimal + >>> a = decimal.Decimal('123.45') + >>> pa.array([a], pa.decimal64(5, 2)) + + [ + 123.45 + ] + """ + cdef shared_ptr[CDataType] decimal_type + if precision < 1 or precision > 18: + raise ValueError("precision should be between 1 and 18") + decimal_type.reset(new CDecimal64Type(precision, scale)) + return pyarrow_wrap_data_type(decimal_type) + + cpdef DataType decimal128(int precision, int scale=0): """ Create decimal type with precision and scale and 128-bit width. diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py index 66b1ec33953a9..2bb5cfcf8b739 100644 --- a/python/pyarrow/types.py +++ b/python/pyarrow/types.py @@ -32,7 +32,8 @@ lib.Type_UINT64} _INTEGER_TYPES = _SIGNED_INTEGER_TYPES | _UNSIGNED_INTEGER_TYPES _FLOATING_TYPES = {lib.Type_HALF_FLOAT, lib.Type_FLOAT, lib.Type_DOUBLE} -_DECIMAL_TYPES = {lib.Type_DECIMAL128, lib.Type_DECIMAL256} +_DECIMAL_TYPES = {lib.Type_DECIMAL32, lib.Type_DECIMAL64, lib.Type_DECIMAL128, + lib.Type_DECIMAL256} _DATE_TYPES = {lib.Type_DATE32, lib.Type_DATE64} _TIME_TYPES = {lib.Type_TIME32, lib.Type_TIME64} _INTERVAL_TYPES = {lib.Type_INTERVAL_MONTH_DAY_NANO} @@ -289,6 +290,16 @@ def is_decimal(t): return t.id in _DECIMAL_TYPES +@doc(is_null, datatype="decimal32") +def is_decimal32(t): + return t.id == lib.Type_DECIMAL32 + + +@doc(is_null, datatype="decimal64") +def is_decimal64(t): + return t.id == lib.Type_DECIMAL64 + + @doc(is_null, datatype="decimal128") def is_decimal128(t): return t.id == lib.Type_DECIMAL128