From 2ee135310c002cd6777ccaacf0e8bf9649d23907 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 10 Oct 2025 15:27:32 -0600 Subject: [PATCH] fix: Add overflow checks to SparkDateAdd/Sub to avoid panics (#18013) * fix * fix * fix * fix * fix * add negative overflow test * remove unrelated test * update test --- .../spark/src/function/datetime/date_add.rs | 25 ++++++++++++++----- .../spark/src/function/datetime/date_sub.rs | 25 ++++++++++++++----- .../test_files/spark/datetime/date_add.slt | 16 ++++++++++++ 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/datafusion/spark/src/function/datetime/date_add.rs b/datafusion/spark/src/function/datetime/date_add.rs index 58633e15f962..a00430febcdb 100644 --- a/datafusion/spark/src/function/datetime/date_add.rs +++ b/datafusion/spark/src/function/datetime/date_add.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use arrow::array::ArrayRef; use arrow::compute; use arrow::datatypes::{DataType, Date32Type}; +use arrow::error::ArrowError; use datafusion_common::cast::{ as_date32_array, as_int16_array, as_int32_array, as_int8_array, }; @@ -96,26 +97,38 @@ fn spark_date_add(args: &[ArrayRef]) -> Result { let result = match days_arg.data_type() { DataType::Int8 => { let days_array = as_int8_array(days_arg)?; - compute::binary::<_, _, _, Date32Type>( + compute::try_binary::<_, _, _, Date32Type>( date_array, days_array, - |date, days| date + days as i32, + |date, days| { + date.checked_add(days as i32).ok_or_else(|| { + ArrowError::ArithmeticOverflow("date_add".to_string()) + }) + }, )? } DataType::Int16 => { let days_array = as_int16_array(days_arg)?; - compute::binary::<_, _, _, Date32Type>( + compute::try_binary::<_, _, _, Date32Type>( date_array, days_array, - |date, days| date + days as i32, + |date, days| { + date.checked_add(days as i32).ok_or_else(|| { + ArrowError::ArithmeticOverflow("date_add".to_string()) + }) + }, )? } DataType::Int32 => { let days_array = as_int32_array(days_arg)?; - compute::binary::<_, _, _, Date32Type>( + compute::try_binary::<_, _, _, Date32Type>( date_array, days_array, - |date, days| date + days, + |date, days| { + date.checked_add(days).ok_or_else(|| { + ArrowError::ArithmeticOverflow("date_add".to_string()) + }) + }, )? } _ => { diff --git a/datafusion/spark/src/function/datetime/date_sub.rs b/datafusion/spark/src/function/datetime/date_sub.rs index aa10c05b8a99..6925d7e85d68 100644 --- a/datafusion/spark/src/function/datetime/date_sub.rs +++ b/datafusion/spark/src/function/datetime/date_sub.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use arrow::array::ArrayRef; use arrow::compute; use arrow::datatypes::{DataType, Date32Type}; +use arrow::error::ArrowError; use datafusion_common::cast::{ as_date32_array, as_int16_array, as_int32_array, as_int8_array, }; @@ -90,26 +91,38 @@ fn spark_date_sub(args: &[ArrayRef]) -> Result { let result = match days_arg.data_type() { DataType::Int8 => { let days_array = as_int8_array(days_arg)?; - compute::binary::<_, _, _, Date32Type>( + compute::try_binary::<_, _, _, Date32Type>( date_array, days_array, - |date, days| date - days as i32, + |date, days| { + date.checked_sub(days as i32).ok_or_else(|| { + ArrowError::ArithmeticOverflow("date_sub".to_string()) + }) + }, )? } DataType::Int16 => { let days_array = as_int16_array(days_arg)?; - compute::binary::<_, _, _, Date32Type>( + compute::try_binary::<_, _, _, Date32Type>( date_array, days_array, - |date, days| date - days as i32, + |date, days| { + date.checked_sub(days as i32).ok_or_else(|| { + ArrowError::ArithmeticOverflow("date_sub".to_string()) + }) + }, )? } DataType::Int32 => { let days_array = as_int32_array(days_arg)?; - compute::binary::<_, _, _, Date32Type>( + compute::try_binary::<_, _, _, Date32Type>( date_array, days_array, - |date, days| date - days, + |date, days| { + date.checked_sub(days).ok_or_else(|| { + ArrowError::ArithmeticOverflow("date_sub".to_string()) + }) + }, )? } _ => { diff --git a/datafusion/sqllogictest/test_files/spark/datetime/date_add.slt b/datafusion/sqllogictest/test_files/spark/datetime/date_add.slt index 146f97016638..2e9851ca1e59 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/date_add.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/date_add.slt @@ -45,6 +45,22 @@ SELECT date_sub('2016-07-30'::date, 0::int); ---- 2016-07-30 +query error DataFusion error: Arrow error: Arithmetic overflow: date_add +SELECT date_add('2016-07-30'::date, 2147483647::int); + +query error DataFusion error: Arrow error: Arithmetic overflow: date_sub +SELECT date_sub('1969-01-01'::date, 2147483647::int); + +query D +SELECT date_add('2016-07-30'::date, 100000::int); +---- +2290-05-15 + +query D +SELECT date_sub('2016-07-30'::date, 100000::int); +---- +1742-10-15 + # Test with negative day values (should subtract days) query D SELECT date_add('2016-07-30'::date, -5::int);