From dbab57dbfc6706045cac342a92163936af478d9f Mon Sep 17 00:00:00 2001 From: Clide Stefani <109172241+Monkwire3@users.noreply.github.com> Date: Mon, 25 Mar 2024 14:55:33 -0400 Subject: [PATCH 1/2] Split arrow-cast::cast::decimal from arrow-cast::cast --- arrow-cast/src/cast/decimal.rs | 573 +++++++++++++++++++++++++++++++++ arrow-cast/src/cast/mod.rs | 557 +------------------------------- 2 files changed, 575 insertions(+), 555 deletions(-) create mode 100644 arrow-cast/src/cast/decimal.rs diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs new file mode 100644 index 000000000000..d89d9e5f74f7 --- /dev/null +++ b/arrow-cast/src/cast/decimal.rs @@ -0,0 +1,573 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::cast::*; + +/// A utility trait that provides checked conversions between +/// decimal types inspired by [`NumCast`] +pub(crate) trait DecimalCast: Sized { + fn to_i128(self) -> Option; + + fn to_i256(self) -> Option; + + fn from_decimal(n: T) -> Option; +} + +impl DecimalCast for i128 { + fn to_i128(self) -> Option { + Some(self) + } + + fn to_i256(self) -> Option { + Some(i256::from_i128(self)) + } + + fn from_decimal(n: T) -> Option { + n.to_i128() + } +} + +impl DecimalCast for i256 { + fn to_i128(self) -> Option { + self.to_i128() + } + + fn to_i256(self) -> Option { + Some(self) + } + + fn from_decimal(n: T) -> Option { + n.to_i256() + } +} + +pub(crate) fn cast_decimal_to_decimal_error( + output_precision: u8, + output_scale: i8, +) -> impl Fn(::Native) -> ArrowError +where + I: DecimalType, + O: DecimalType, + I::Native: DecimalCast + ArrowNativeTypeOp, + O::Native: DecimalCast + ArrowNativeTypeOp, +{ + move |x: I::Native| { + ArrowError::CastError(format!( + "Cannot cast to {}({}, {}). Overflowing on {:?}", + O::PREFIX, + output_precision, + output_scale, + x + )) + } +} + +pub(crate) fn convert_to_smaller_scale_decimal( + array: &PrimitiveArray, + input_scale: i8, + output_precision: u8, + output_scale: i8, + cast_options: &CastOptions, +) -> Result, ArrowError> +where + I: DecimalType, + O: DecimalType, + I::Native: DecimalCast + ArrowNativeTypeOp, + O::Native: DecimalCast + ArrowNativeTypeOp, +{ + let error = cast_decimal_to_decimal_error::(output_precision, output_scale); + let div = I::Native::from_decimal(10_i128) + .unwrap() + .pow_checked((input_scale - output_scale) as u32)?; + + let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); + let half_neg = half.neg_wrapping(); + + let f = |x: I::Native| { + // div is >= 10 and so this cannot overflow + let d = x.div_wrapping(div); + let r = x.mod_wrapping(div); + + // Round result + let adjusted = match x >= I::Native::ZERO { + true if r >= half => d.add_wrapping(I::Native::ONE), + false if r <= half_neg => d.sub_wrapping(I::Native::ONE), + _ => d, + }; + O::Native::from_decimal(adjusted) + }; + + Ok(match cast_options.safe { + true => array.unary_opt(f), + false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?, + }) +} + +pub(crate) fn convert_to_bigger_or_equal_scale_decimal( + array: &PrimitiveArray, + input_scale: i8, + output_precision: u8, + output_scale: i8, + cast_options: &CastOptions, +) -> Result, ArrowError> +where + I: DecimalType, + O: DecimalType, + I::Native: DecimalCast + ArrowNativeTypeOp, + O::Native: DecimalCast + ArrowNativeTypeOp, +{ + let error = cast_decimal_to_decimal_error::(output_precision, output_scale); + let mul = O::Native::from_decimal(10_i128) + .unwrap() + .pow_checked((output_scale - input_scale) as u32)?; + + let f = |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); + + Ok(match cast_options.safe { + true => array.unary_opt(f), + false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?, + }) +} + +// Only support one type of decimal cast operations +pub(crate) fn cast_decimal_to_decimal_same_type( + array: &PrimitiveArray, + input_scale: i8, + output_precision: u8, + output_scale: i8, + cast_options: &CastOptions, +) -> Result +where + T: DecimalType, + T::Native: DecimalCast + ArrowNativeTypeOp, +{ + let array: PrimitiveArray = match input_scale.cmp(&output_scale) { + Ordering::Equal => { + // the scale doesn't change, the native value don't need to be changed + array.clone() + } + Ordering::Greater => convert_to_smaller_scale_decimal::( + array, + input_scale, + output_precision, + output_scale, + cast_options, + )?, + Ordering::Less => { + // input_scale < output_scale + convert_to_bigger_or_equal_scale_decimal::( + array, + input_scale, + output_precision, + output_scale, + cast_options, + )? + } + }; + + Ok(Arc::new(array.with_precision_and_scale( + output_precision, + output_scale, + )?)) +} + +// Support two different types of decimal cast operations +pub(crate) fn cast_decimal_to_decimal( + array: &PrimitiveArray, + input_scale: i8, + output_precision: u8, + output_scale: i8, + cast_options: &CastOptions, +) -> Result +where + I: DecimalType, + O: DecimalType, + I::Native: DecimalCast + ArrowNativeTypeOp, + O::Native: DecimalCast + ArrowNativeTypeOp, +{ + let array: PrimitiveArray = if input_scale > output_scale { + convert_to_smaller_scale_decimal::( + array, + input_scale, + output_precision, + output_scale, + cast_options, + )? + } else { + convert_to_bigger_or_equal_scale_decimal::( + array, + input_scale, + output_precision, + output_scale, + cast_options, + )? + }; + + Ok(Arc::new(array.with_precision_and_scale( + output_precision, + output_scale, + )?)) +} + +/// Parses given string to specified decimal native (i128/i256) based on given +/// scale. Returns an `Err` if it cannot parse given string. +pub(crate) fn parse_string_to_decimal_native( + value_str: &str, + scale: usize, +) -> Result +where + T::Native: DecimalCast + ArrowNativeTypeOp, +{ + let value_str = value_str.trim(); + let parts: Vec<&str> = value_str.split('.').collect(); + if parts.len() > 2 { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid decimal format: {value_str:?}" + ))); + } + + let (negative, first_part) = if parts[0].is_empty() { + (false, parts[0]) + } else { + match parts[0].as_bytes()[0] { + b'-' => (true, &parts[0][1..]), + b'+' => (false, &parts[0][1..]), + _ => (false, parts[0]), + } + }; + + let integers = first_part.trim_start_matches('0'); + let decimals = if parts.len() == 2 { parts[1] } else { "" }; + + if !integers.is_empty() && !integers.as_bytes()[0].is_ascii_digit() { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid decimal format: {value_str:?}" + ))); + } + + if !decimals.is_empty() && !decimals.as_bytes()[0].is_ascii_digit() { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid decimal format: {value_str:?}" + ))); + } + + // Adjust decimal based on scale + let mut number_decimals = if decimals.len() > scale { + let decimal_number = i256::from_string(decimals).ok_or_else(|| { + ArrowError::InvalidArgumentError(format!("Cannot parse decimal format: {value_str}")) + })?; + + let div = i256::from_i128(10_i128).pow_checked((decimals.len() - scale) as u32)?; + + let half = div.div_wrapping(i256::from_i128(2)); + let half_neg = half.neg_wrapping(); + + let d = decimal_number.div_wrapping(div); + let r = decimal_number.mod_wrapping(div); + + // Round result + let adjusted = match decimal_number >= i256::ZERO { + true if r >= half => d.add_wrapping(i256::ONE), + false if r <= half_neg => d.sub_wrapping(i256::ONE), + _ => d, + }; + + let integers = if !integers.is_empty() { + i256::from_string(integers) + .ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "Cannot parse decimal format: {value_str}" + )) + }) + .map(|v| v.mul_wrapping(i256::from_i128(10_i128).pow_wrapping(scale as u32)))? + } else { + i256::ZERO + }; + + format!("{}", integers.add_wrapping(adjusted)) + } else { + let padding = if scale > decimals.len() { scale } else { 0 }; + + let decimals = format!("{decimals:0( + from: &GenericStringArray, + precision: u8, + scale: i8, + cast_options: &CastOptions, +) -> Result, ArrowError> +where + T: DecimalType, + T::Native: DecimalCast + ArrowNativeTypeOp, +{ + if cast_options.safe { + let iter = from.iter().map(|v| { + v.and_then(|v| parse_string_to_decimal_native::(v, scale as usize).ok()) + .and_then(|v| { + T::validate_decimal_precision(v, precision) + .is_ok() + .then_some(v) + }) + }); + // Benefit: + // 20% performance improvement + // Soundness: + // The iterator is trustedLen because it comes from an `StringArray`. + Ok(unsafe { + PrimitiveArray::::from_trusted_len_iter(iter) + .with_precision_and_scale(precision, scale)? + }) + } else { + let vec = from + .iter() + .map(|v| { + v.map(|v| { + parse_string_to_decimal_native::(v, scale as usize) + .map_err(|_| { + ArrowError::CastError(format!( + "Cannot cast string '{}' to value of {:?} type", + v, + T::DATA_TYPE, + )) + }) + .and_then(|v| T::validate_decimal_precision(v, precision).map(|_| v)) + }) + .transpose() + }) + .collect::, _>>()?; + // Benefit: + // 20% performance improvement + // Soundness: + // The iterator is trustedLen because it comes from an `StringArray`. + Ok(unsafe { + PrimitiveArray::::from_trusted_len_iter(vec.iter()) + .with_precision_and_scale(precision, scale)? + }) + } +} + +/// Cast Utf8 to decimal +pub(crate) fn cast_string_to_decimal( + from: &dyn Array, + precision: u8, + scale: i8, + cast_options: &CastOptions, +) -> Result +where + T: DecimalType, + T::Native: DecimalCast + ArrowNativeTypeOp, +{ + if scale < 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "Cannot cast string to decimal with negative scale {scale}" + ))); + } + + if scale > T::MAX_SCALE { + return Err(ArrowError::InvalidArgumentError(format!( + "Cannot cast string to decimal greater than maximum scale {}", + T::MAX_SCALE + ))); + } + + Ok(Arc::new(string_to_decimal_cast::( + from.as_any() + .downcast_ref::>() + .unwrap(), + precision, + scale, + cast_options, + )?)) +} + +pub(crate) fn cast_floating_point_to_decimal128( + array: &PrimitiveArray, + precision: u8, + scale: i8, + cast_options: &CastOptions, +) -> Result +where + ::Native: AsPrimitive, +{ + let mul = 10_f64.powi(scale as i32); + + if cast_options.safe { + array + .unary_opt::<_, Decimal128Type>(|v| { + (mul * v.as_()) + .round() + .to_i128() + .filter(|v| Decimal128Type::validate_decimal_precision(*v, precision).is_ok()) + }) + .with_precision_and_scale(precision, scale) + .map(|a| Arc::new(a) as ArrayRef) + } else { + array + .try_unary::<_, Decimal128Type, _>(|v| { + (mul * v.as_()) + .round() + .to_i128() + .ok_or_else(|| { + ArrowError::CastError(format!( + "Cannot cast to {}({}, {}). Overflowing on {:?}", + Decimal128Type::PREFIX, + precision, + scale, + v + )) + }) + .and_then(|v| { + Decimal128Type::validate_decimal_precision(v, precision).map(|_| v) + }) + })? + .with_precision_and_scale(precision, scale) + .map(|a| Arc::new(a) as ArrayRef) + } +} + +pub(crate) fn cast_floating_point_to_decimal256( + array: &PrimitiveArray, + precision: u8, + scale: i8, + cast_options: &CastOptions, +) -> Result +where + ::Native: AsPrimitive, +{ + let mul = 10_f64.powi(scale as i32); + + if cast_options.safe { + array + .unary_opt::<_, Decimal256Type>(|v| { + i256::from_f64((v.as_() * mul).round()) + .filter(|v| Decimal256Type::validate_decimal_precision(*v, precision).is_ok()) + }) + .with_precision_and_scale(precision, scale) + .map(|a| Arc::new(a) as ArrayRef) + } else { + array + .try_unary::<_, Decimal256Type, _>(|v| { + i256::from_f64((v.as_() * mul).round()) + .ok_or_else(|| { + ArrowError::CastError(format!( + "Cannot cast to {}({}, {}). Overflowing on {:?}", + Decimal256Type::PREFIX, + precision, + scale, + v + )) + }) + .and_then(|v| { + Decimal256Type::validate_decimal_precision(v, precision).map(|_| v) + }) + })? + .with_precision_and_scale(precision, scale) + .map(|a| Arc::new(a) as ArrayRef) + } +} + +pub(crate) fn cast_decimal_to_integer( + array: &dyn Array, + base: D::Native, + scale: i8, + cast_options: &CastOptions, +) -> Result +where + T: ArrowPrimitiveType, + ::Native: NumCast, + D: DecimalType + ArrowPrimitiveType, + ::Native: ArrowNativeTypeOp + ToPrimitive, +{ + let array = array.as_primitive::(); + + let div: D::Native = base.pow_checked(scale as u32).map_err(|_| { + ArrowError::CastError(format!( + "Cannot cast to {:?}. The scale {} causes overflow.", + D::PREFIX, + scale, + )) + })?; + + let mut value_builder = PrimitiveBuilder::::with_capacity(array.len()); + + if cast_options.safe { + for i in 0..array.len() { + if array.is_null(i) { + value_builder.append_null(); + } else { + let v = array + .value(i) + .div_checked(div) + .ok() + .and_then(::from::); + + value_builder.append_option(v); + } + } + } else { + for i in 0..array.len() { + if array.is_null(i) { + value_builder.append_null(); + } else { + let v = array.value(i).div_checked(div)?; + + let value = ::from::(v).ok_or_else(|| { + ArrowError::CastError(format!( + "value of {:?} is out of range {}", + v, + T::DATA_TYPE + )) + })?; + + value_builder.append_value(value); + } + } + } + Ok(Arc::new(value_builder.finish())) +} + +// cast the decimal array to floating-point array +pub(crate) fn cast_decimal_to_float( + array: &dyn Array, + op: F, +) -> Result +where + F: Fn(D::Native) -> T::Native, +{ + let array = array.as_primitive::(); + let array = array.unary::<_, T>(op); + Ok(Arc::new(array)) +} diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 2b9892aa3fba..61bbf1280030 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -37,7 +37,9 @@ //! assert_eq!(7.0, c.value(2)); //! ``` +mod decimal; mod list; +use crate::cast::decimal::*; use crate::cast::list::*; use chrono::{NaiveTime, Offset, TimeZone, Utc}; @@ -337,92 +339,6 @@ where Ok(Arc::new(array.with_precision_and_scale(precision, scale)?)) } -fn cast_floating_point_to_decimal128( - array: &PrimitiveArray, - precision: u8, - scale: i8, - cast_options: &CastOptions, -) -> Result -where - ::Native: AsPrimitive, -{ - let mul = 10_f64.powi(scale as i32); - - if cast_options.safe { - array - .unary_opt::<_, Decimal128Type>(|v| { - (mul * v.as_()) - .round() - .to_i128() - .filter(|v| Decimal128Type::validate_decimal_precision(*v, precision).is_ok()) - }) - .with_precision_and_scale(precision, scale) - .map(|a| Arc::new(a) as ArrayRef) - } else { - array - .try_unary::<_, Decimal128Type, _>(|v| { - (mul * v.as_()) - .round() - .to_i128() - .ok_or_else(|| { - ArrowError::CastError(format!( - "Cannot cast to {}({}, {}). Overflowing on {:?}", - Decimal128Type::PREFIX, - precision, - scale, - v - )) - }) - .and_then(|v| { - Decimal128Type::validate_decimal_precision(v, precision).map(|_| v) - }) - })? - .with_precision_and_scale(precision, scale) - .map(|a| Arc::new(a) as ArrayRef) - } -} - -fn cast_floating_point_to_decimal256( - array: &PrimitiveArray, - precision: u8, - scale: i8, - cast_options: &CastOptions, -) -> Result -where - ::Native: AsPrimitive, -{ - let mul = 10_f64.powi(scale as i32); - - if cast_options.safe { - array - .unary_opt::<_, Decimal256Type>(|v| { - i256::from_f64((v.as_() * mul).round()) - .filter(|v| Decimal256Type::validate_decimal_precision(*v, precision).is_ok()) - }) - .with_precision_and_scale(precision, scale) - .map(|a| Arc::new(a) as ArrayRef) - } else { - array - .try_unary::<_, Decimal256Type, _>(|v| { - i256::from_f64((v.as_() * mul).round()) - .ok_or_else(|| { - ArrowError::CastError(format!( - "Cannot cast to {}({}, {}). Overflowing on {:?}", - Decimal256Type::PREFIX, - precision, - scale, - v - )) - }) - .and_then(|v| { - Decimal256Type::validate_decimal_precision(v, precision).map(|_| v) - }) - })? - .with_precision_and_scale(precision, scale) - .map(|a| Arc::new(a) as ArrayRef) - } -} - /// Cast the array from interval year month to month day nano fn cast_interval_year_month_to_interval_month_day_nano( array: &dyn Array, @@ -552,79 +468,6 @@ fn cast_reinterpret_arrays().reinterpret_cast::())) } -fn cast_decimal_to_integer( - array: &dyn Array, - base: D::Native, - scale: i8, - cast_options: &CastOptions, -) -> Result -where - T: ArrowPrimitiveType, - ::Native: NumCast, - D: DecimalType + ArrowPrimitiveType, - ::Native: ArrowNativeTypeOp + ToPrimitive, -{ - let array = array.as_primitive::(); - - let div: D::Native = base.pow_checked(scale as u32).map_err(|_| { - ArrowError::CastError(format!( - "Cannot cast to {:?}. The scale {} causes overflow.", - D::PREFIX, - scale, - )) - })?; - - let mut value_builder = PrimitiveBuilder::::with_capacity(array.len()); - - if cast_options.safe { - for i in 0..array.len() { - if array.is_null(i) { - value_builder.append_null(); - } else { - let v = array - .value(i) - .div_checked(div) - .ok() - .and_then(::from::); - - value_builder.append_option(v); - } - } - } else { - for i in 0..array.len() { - if array.is_null(i) { - value_builder.append_null(); - } else { - let v = array.value(i).div_checked(div)?; - - let value = ::from::(v).ok_or_else(|| { - ArrowError::CastError(format!( - "value of {:?} is out of range {}", - v, - T::DATA_TYPE - )) - })?; - - value_builder.append_value(value); - } - } - } - Ok(Arc::new(value_builder.finish())) -} - -// cast the decimal array to floating-point array -fn cast_decimal_to_float( - array: &dyn Array, - op: F, -) -> Result -where - F: Fn(D::Native) -> T::Native, -{ - let array = array.as_primitive::(); - let array = array.unary::<_, T>(op); - Ok(Arc::new(array)) -} - fn make_timestamp_array( array: &PrimitiveArray, unit: TimeUnit, @@ -2100,212 +1943,6 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 { } } -/// A utility trait that provides checked conversions between -/// decimal types inspired by [`NumCast`] -trait DecimalCast: Sized { - fn to_i128(self) -> Option; - - fn to_i256(self) -> Option; - - fn from_decimal(n: T) -> Option; -} - -impl DecimalCast for i128 { - fn to_i128(self) -> Option { - Some(self) - } - - fn to_i256(self) -> Option { - Some(i256::from_i128(self)) - } - - fn from_decimal(n: T) -> Option { - n.to_i128() - } -} - -impl DecimalCast for i256 { - fn to_i128(self) -> Option { - self.to_i128() - } - - fn to_i256(self) -> Option { - Some(self) - } - - fn from_decimal(n: T) -> Option { - n.to_i256() - } -} - -fn cast_decimal_to_decimal_error( - output_precision: u8, - output_scale: i8, -) -> impl Fn(::Native) -> ArrowError -where - I: DecimalType, - O: DecimalType, - I::Native: DecimalCast + ArrowNativeTypeOp, - O::Native: DecimalCast + ArrowNativeTypeOp, -{ - move |x: I::Native| { - ArrowError::CastError(format!( - "Cannot cast to {}({}, {}). Overflowing on {:?}", - O::PREFIX, - output_precision, - output_scale, - x - )) - } -} - -fn convert_to_smaller_scale_decimal( - array: &PrimitiveArray, - input_scale: i8, - output_precision: u8, - output_scale: i8, - cast_options: &CastOptions, -) -> Result, ArrowError> -where - I: DecimalType, - O: DecimalType, - I::Native: DecimalCast + ArrowNativeTypeOp, - O::Native: DecimalCast + ArrowNativeTypeOp, -{ - let error = cast_decimal_to_decimal_error::(output_precision, output_scale); - let div = I::Native::from_decimal(10_i128) - .unwrap() - .pow_checked((input_scale - output_scale) as u32)?; - - let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); - let half_neg = half.neg_wrapping(); - - let f = |x: I::Native| { - // div is >= 10 and so this cannot overflow - let d = x.div_wrapping(div); - let r = x.mod_wrapping(div); - - // Round result - let adjusted = match x >= I::Native::ZERO { - true if r >= half => d.add_wrapping(I::Native::ONE), - false if r <= half_neg => d.sub_wrapping(I::Native::ONE), - _ => d, - }; - O::Native::from_decimal(adjusted) - }; - - Ok(match cast_options.safe { - true => array.unary_opt(f), - false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?, - }) -} - -fn convert_to_bigger_or_equal_scale_decimal( - array: &PrimitiveArray, - input_scale: i8, - output_precision: u8, - output_scale: i8, - cast_options: &CastOptions, -) -> Result, ArrowError> -where - I: DecimalType, - O: DecimalType, - I::Native: DecimalCast + ArrowNativeTypeOp, - O::Native: DecimalCast + ArrowNativeTypeOp, -{ - let error = cast_decimal_to_decimal_error::(output_precision, output_scale); - let mul = O::Native::from_decimal(10_i128) - .unwrap() - .pow_checked((output_scale - input_scale) as u32)?; - - let f = |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); - - Ok(match cast_options.safe { - true => array.unary_opt(f), - false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?, - }) -} - -// Only support one type of decimal cast operations -fn cast_decimal_to_decimal_same_type( - array: &PrimitiveArray, - input_scale: i8, - output_precision: u8, - output_scale: i8, - cast_options: &CastOptions, -) -> Result -where - T: DecimalType, - T::Native: DecimalCast + ArrowNativeTypeOp, -{ - let array: PrimitiveArray = match input_scale.cmp(&output_scale) { - Ordering::Equal => { - // the scale doesn't change, the native value don't need to be changed - array.clone() - } - Ordering::Greater => convert_to_smaller_scale_decimal::( - array, - input_scale, - output_precision, - output_scale, - cast_options, - )?, - Ordering::Less => { - // input_scale < output_scale - convert_to_bigger_or_equal_scale_decimal::( - array, - input_scale, - output_precision, - output_scale, - cast_options, - )? - } - }; - - Ok(Arc::new(array.with_precision_and_scale( - output_precision, - output_scale, - )?)) -} - -// Support two different types of decimal cast operations -fn cast_decimal_to_decimal( - array: &PrimitiveArray, - input_scale: i8, - output_precision: u8, - output_scale: i8, - cast_options: &CastOptions, -) -> Result -where - I: DecimalType, - O: DecimalType, - I::Native: DecimalCast + ArrowNativeTypeOp, - O::Native: DecimalCast + ArrowNativeTypeOp, -{ - let array: PrimitiveArray = if input_scale > output_scale { - convert_to_smaller_scale_decimal::( - array, - input_scale, - output_precision, - output_scale, - cast_options, - )? - } else { - convert_to_bigger_or_equal_scale_decimal::( - array, - input_scale, - output_precision, - output_scale, - cast_options, - )? - }; - - Ok(Arc::new(array.with_precision_and_scale( - output_precision, - output_scale, - )?)) -} - /// Convert Array into a PrimitiveArray of type, and apply numeric cast fn cast_numeric_arrays( from: &dyn Array, @@ -2618,196 +2255,6 @@ where Ok(Arc::new(output_array)) } -/// Parses given string to specified decimal native (i128/i256) based on given -/// scale. Returns an `Err` if it cannot parse given string. -fn parse_string_to_decimal_native( - value_str: &str, - scale: usize, -) -> Result -where - T::Native: DecimalCast + ArrowNativeTypeOp, -{ - let value_str = value_str.trim(); - let parts: Vec<&str> = value_str.split('.').collect(); - if parts.len() > 2 { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid decimal format: {value_str:?}" - ))); - } - - let (negative, first_part) = if parts[0].is_empty() { - (false, parts[0]) - } else { - match parts[0].as_bytes()[0] { - b'-' => (true, &parts[0][1..]), - b'+' => (false, &parts[0][1..]), - _ => (false, parts[0]), - } - }; - - let integers = first_part.trim_start_matches('0'); - let decimals = if parts.len() == 2 { parts[1] } else { "" }; - - if !integers.is_empty() && !integers.as_bytes()[0].is_ascii_digit() { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid decimal format: {value_str:?}" - ))); - } - - if !decimals.is_empty() && !decimals.as_bytes()[0].is_ascii_digit() { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid decimal format: {value_str:?}" - ))); - } - - // Adjust decimal based on scale - let mut number_decimals = if decimals.len() > scale { - let decimal_number = i256::from_string(decimals).ok_or_else(|| { - ArrowError::InvalidArgumentError(format!("Cannot parse decimal format: {value_str}")) - })?; - - let div = i256::from_i128(10_i128).pow_checked((decimals.len() - scale) as u32)?; - - let half = div.div_wrapping(i256::from_i128(2)); - let half_neg = half.neg_wrapping(); - - let d = decimal_number.div_wrapping(div); - let r = decimal_number.mod_wrapping(div); - - // Round result - let adjusted = match decimal_number >= i256::ZERO { - true if r >= half => d.add_wrapping(i256::ONE), - false if r <= half_neg => d.sub_wrapping(i256::ONE), - _ => d, - }; - - let integers = if !integers.is_empty() { - i256::from_string(integers) - .ok_or_else(|| { - ArrowError::InvalidArgumentError(format!( - "Cannot parse decimal format: {value_str}" - )) - }) - .map(|v| v.mul_wrapping(i256::from_i128(10_i128).pow_wrapping(scale as u32)))? - } else { - i256::ZERO - }; - - format!("{}", integers.add_wrapping(adjusted)) - } else { - let padding = if scale > decimals.len() { scale } else { 0 }; - - let decimals = format!("{decimals:0( - from: &GenericStringArray, - precision: u8, - scale: i8, - cast_options: &CastOptions, -) -> Result, ArrowError> -where - T: DecimalType, - T::Native: DecimalCast + ArrowNativeTypeOp, -{ - if cast_options.safe { - let iter = from.iter().map(|v| { - v.and_then(|v| parse_string_to_decimal_native::(v, scale as usize).ok()) - .and_then(|v| { - T::validate_decimal_precision(v, precision) - .is_ok() - .then_some(v) - }) - }); - // Benefit: - // 20% performance improvement - // Soundness: - // The iterator is trustedLen because it comes from an `StringArray`. - Ok(unsafe { - PrimitiveArray::::from_trusted_len_iter(iter) - .with_precision_and_scale(precision, scale)? - }) - } else { - let vec = from - .iter() - .map(|v| { - v.map(|v| { - parse_string_to_decimal_native::(v, scale as usize) - .map_err(|_| { - ArrowError::CastError(format!( - "Cannot cast string '{}' to value of {:?} type", - v, - T::DATA_TYPE, - )) - }) - .and_then(|v| T::validate_decimal_precision(v, precision).map(|_| v)) - }) - .transpose() - }) - .collect::, _>>()?; - // Benefit: - // 20% performance improvement - // Soundness: - // The iterator is trustedLen because it comes from an `StringArray`. - Ok(unsafe { - PrimitiveArray::::from_trusted_len_iter(vec.iter()) - .with_precision_and_scale(precision, scale)? - }) - } -} - -/// Cast Utf8 to decimal -fn cast_string_to_decimal( - from: &dyn Array, - precision: u8, - scale: i8, - cast_options: &CastOptions, -) -> Result -where - T: DecimalType, - T::Native: DecimalCast + ArrowNativeTypeOp, -{ - if scale < 0 { - return Err(ArrowError::InvalidArgumentError(format!( - "Cannot cast string to decimal with negative scale {scale}" - ))); - } - - if scale > T::MAX_SCALE { - return Err(ArrowError::InvalidArgumentError(format!( - "Cannot cast string to decimal greater than maximum scale {}", - T::MAX_SCALE - ))); - } - - Ok(Arc::new(string_to_decimal_cast::( - from.as_any() - .downcast_ref::>() - .unwrap(), - precision, - scale, - cast_options, - )?)) -} - /// Cast numeric types to Boolean /// /// Any zero value returns `false` while non-zero returns `true` From 4dcbd01532866f50df4cbecb946eb39c990e0c64 Mon Sep 17 00:00:00 2001 From: Clide Stefani <109172241+Monkwire3@users.noreply.github.com> Date: Mon, 25 Mar 2024 19:44:28 -0400 Subject: [PATCH 2/2] Minor formatting change --- arrow-cast/src/cast/decimal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index d89d9e5f74f7..600f868a3e01 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -559,7 +559,7 @@ where Ok(Arc::new(value_builder.finish())) } -// cast the decimal array to floating-point array +// Cast the decimal array to floating-point array pub(crate) fn cast_decimal_to_float( array: &dyn Array, op: F,