Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flip interval field ordering (#5654) #5656

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions arrow-arith/src/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1346,8 +1346,10 @@ mod tests {
IntervalMonthDayNanoType::make_value(35, -19, 41899000000000000)
])
);
let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]);
let b = IntervalMonthDayNanoArray::from(vec![1]);
let max_nanos = IntervalMonthDayNanoType::make_value(0, 0, i64::MAX);
let a = IntervalMonthDayNanoArray::from(vec![max_nanos]);
let one_nanos = IntervalMonthDayNanoType::make_value(0, 0, 1);
let b = IntervalMonthDayNanoArray::from(vec![one_nanos]);
let err = add(&a, &b).unwrap_err().to_string();
assert_eq!(
err,
Expand Down
55 changes: 29 additions & 26 deletions arrow-array/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,11 @@ Each field is independent (e.g. there is no constraint that the quantity of
nanoseconds represents less than a day's worth of time).

```text
┌──────────────────────────────┬───────────────────────────┐
Nanos Days Months
(64 bits) │ (32 bits) │ (32 bits)
└──────────────────────────────┴───────────────────────────┘
0 63 95 127 bit offset
┌────────────────────────────┬─────────────────────────────┐
Months Days Nanos
(32 bits) │ (32 bits) (64 bits)
└────────────────────────────┴─────────────────────────────┘
0 32 64 128 bit offset
```
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details

Expand All @@ -290,9 +290,9 @@ representation which is fast and efficient, but leads
to potentially surprising results.

For example a
`IntervalMonthDayNano` of `1 month` will compare as **greater** than a
`IntervalMonthDayNano` of `100 days` because the binary representation of `1 month`
is larger than the binary representation of 100 days.
`IntervalMonthDayNano` of `1 month` will compare as **less** than a
`IntervalMonthDayNano` of `1 days` because the binary representation of `1 month`
is smaller than the binary representation of 1 days.
"#
);
make_type!(
Expand Down Expand Up @@ -928,13 +928,14 @@ impl IntervalDayTimeType {
int32_t milliseconds = 0;
...
}
64 56 48 40 32 24 16 8 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| days | milliseconds |
+-------+-------+-------+-------+-------+-------+-------+-------+
┌──────────────┬──────────────┐
│ Days │ Milliseconds │
│ (32 bits) │ (32 bits) │
└──────────────┴──────────────┘
0 31 63 bit offset
*/
let m = millis as u64 & u32::MAX as u64;
let d = (days as u64 & u32::MAX as u64) << 32;
let m = (millis as u64 & u32::MAX as u64) << 32;
let d = days as u64 & u32::MAX as u64;
(m | d) as <IntervalDayTimeType as ArrowPrimitiveType>::Native
}

Expand All @@ -945,8 +946,8 @@ impl IntervalDayTimeType {
/// * `i` - The IntervalDayTimeType to convert
#[inline]
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
let days = (i >> 32) as i32;
let ms = i as i32;
let days = i as i32;
let ms = (i >> 32) as i32;
(days, ms)
}
}
Expand All @@ -972,14 +973,16 @@ impl IntervalMonthDayNanoType {
int32_t days;
int64_t nanoseconds;
}
128 112 96 80 64 48 32 16 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| months | days | nanos |
+-------+-------+-------+-------+-------+-------+-------+-------+
┌───────────────┬─────────────┬─────────────────────────────┐
│ Months │ Days │ Nanos │
│ (32 bits) │ (32 bits) │ (64 bits) │
└───────────────┴─────────────┴─────────────────────────────┘
0 32 64 128 bit offset

*/
let m = (months as u128 & u32::MAX as u128) << 96;
let d = (days as u128 & u32::MAX as u128) << 64;
let n = nanos as u128 & u64::MAX as u128;
let m = months as u128 & u32::MAX as u128;
let d = (days as u128 & u32::MAX as u128) << 32;
let n = (nanos as u128 & u64::MAX as u128) << 64;
(m | d | n) as <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native
}

Expand All @@ -992,9 +995,9 @@ impl IntervalMonthDayNanoType {
pub fn to_parts(
i: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> (i32, i32, i64) {
let months = (i >> 96) as i32;
let days = (i >> 64) as i32;
let nanos = i as i64;
let months = i as i32;
let days = (i >> 32) as i32;
let nanos = (i >> 64) as i64;
(months, days, nanos)
}
}
Expand Down
85 changes: 49 additions & 36 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,32 +37,34 @@
//! assert_eq!(7.0, c.value(2));
//! ```

mod decimal;
mod dictionary;
mod list;
mod string;
use crate::cast::decimal::*;
use crate::cast::dictionary::*;
use crate::cast::list::*;
use crate::cast::string::*;

use chrono::{NaiveTime, Offset, TimeZone, Utc};
use std::cmp::Ordering;
use std::sync::Arc;

use crate::display::{ArrayFormatter, FormatOptions};
use crate::parse::{
parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
string_to_datetime, Parser,
};
use chrono::{NaiveTime, Offset, TimeZone, Utc};
use num::cast::AsPrimitive;
use num::{NumCast, ToPrimitive};

use arrow_array::{builder::*, cast::*, temporal_conversions::*, timezone::Tz, types::*, *};
use arrow_buffer::{i256, ArrowNativeType, OffsetBuffer};
use arrow_data::transform::MutableArrayData;
use arrow_data::ArrayData;
use arrow_schema::*;
use arrow_select::take::take;
use num::cast::AsPrimitive;
use num::{NumCast, ToPrimitive};

use crate::cast::decimal::*;
use crate::cast::dictionary::*;
use crate::cast::list::*;
use crate::cast::string::*;
use crate::display::{ArrayFormatter, FormatOptions};
use crate::parse::{
parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
string_to_datetime, Parser,
};

mod decimal;
mod dictionary;
mod list;
mod string;

/// CastOptions provides a way to override the default cast behaviors
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -384,23 +386,27 @@ fn cast_month_day_nano_to_duration<D: ArrowTemporalType<Native = i64>>(
_ => unreachable!(),
};

let convert = |x| {
let (month, day, nanos) = IntervalMonthDayNanoType::to_parts(x);
(month == 0 && day == 0).then_some(nanos / scale)
};

if cast_options.safe {
let iter = array
.iter()
.map(|v| v.and_then(|v| (v >> 64 == 0).then_some((v as i64) / scale)));
let iter = array.iter().map(|x| x.and_then(convert));
Ok(Arc::new(unsafe {
PrimitiveArray::<D>::from_trusted_len_iter(iter)
}))
} else {
let vec = array
.iter()
.map(|v| {
v.map(|v| match v >> 64 {
0 => Ok((v as i64) / scale),
_ => Err(ArrowError::ComputeError(
"Cannot convert interval containing non-zero months or days to duration"
.to_string(),
)),
v.map(|v| {
convert(v).ok_or_else(|| {
ArrowError::ComputeError(
"Cannot convert interval containing non-zero months or days to duration"
.to_string(),
)
})
})
.transpose()
})
Expand Down Expand Up @@ -2240,10 +2246,11 @@ where

#[cfg(test)]
mod tests {
use arrow_buffer::{Buffer, NullBuffer};
use chrono::NaiveDate;
use half::f16;

use arrow_buffer::{Buffer, NullBuffer};

use super::*;

macro_rules! generate_cast_test_case {
Expand Down Expand Up @@ -8272,10 +8279,10 @@ mod tests {
};

// from interval month day nano to duration second
let array = vec![1234567].into();
let array = vec![IntervalMonthDayNanoType::make_value(0, 0, 1_000_000_000)].into();
let casted_array: DurationSecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert_eq!(casted_array.value(0), 0);
assert_eq!(casted_array.value(0), 1);

let array = vec![i128::MAX].into();
let casted_array: DurationSecondArray =
Expand All @@ -8286,7 +8293,7 @@ mod tests {
assert!(res.is_err());

// from interval month day nano to duration millisecond
let array = vec![1234567].into();
let array = vec![IntervalMonthDayNanoType::make_value(0, 0, 1234567)].into();
let casted_array: DurationMillisecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert_eq!(casted_array.value(0), 1);
Expand All @@ -8300,7 +8307,7 @@ mod tests {
assert!(res.is_err());

// from interval month day nano to duration microsecond
let array = vec![1234567].into();
let array = vec![IntervalMonthDayNanoType::make_value(0, 0, 1234567)].into();
let casted_array: DurationMicrosecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert_eq!(casted_array.value(0), 1234);
Expand All @@ -8315,7 +8322,7 @@ mod tests {
assert!(casted_array.is_err());

// from interval month day nano to duration nanosecond
let array = vec![1234567].into();
let array = vec![IntervalMonthDayNanoType::make_value(0, 0, 1234567)].into();
let casted_array: DurationNanosecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert_eq!(casted_array.value(0), 1234567);
Expand Down Expand Up @@ -8373,7 +8380,7 @@ mod tests {
#[test]
fn test_cast_from_interval_year_month_to_interval_month_day_nano() {
// from interval year month to interval month day nano
let array = vec![1234567];
let array = vec![IntervalYearMonthType::make_value(23, 34)];
let casted_array = cast_from_interval_year_month_to_interval_month_day_nano(
array,
&CastOptions::default(),
Expand All @@ -8383,7 +8390,10 @@ mod tests {
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
assert_eq!(casted_array.value(0), 97812474910747780469848774134464512);
assert_eq!(
casted_array.value(0),
IntervalMonthDayNanoType::make_value(310, 0, 0)
);
}

/// helper function to test casting from interval day time to interval month day nano
Expand All @@ -8406,15 +8416,18 @@ mod tests {
#[test]
fn test_cast_from_interval_day_time_to_interval_month_day_nano() {
// from interval day time to interval month day nano
let array = vec![123];
let array = vec![IntervalDayTimeType::make_value(23, 123)];
let casted_array =
cast_from_interval_day_time_to_interval_month_day_nano(array, &CastOptions::default())
.unwrap();
assert_eq!(
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
assert_eq!(casted_array.value(0), 123000000);
assert_eq!(
casted_array.value(0),
IntervalMonthDayNanoType::make_value(0, 23, 123000000)
);
}

#[test]
Expand Down
26 changes: 9 additions & 17 deletions arrow-cast/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,19 +660,16 @@ impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalYearMonthType> {

impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalDayTimeType> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
let value: u64 = self.value(idx) as u64;
let (days, millis) = IntervalDayTimeType::to_parts(self.value(idx));

let days_parts: i32 = ((value & 0xFFFFFFFF00000000) >> 32) as i32;
let milliseconds_part: i32 = (value & 0xFFFFFFFF) as i32;

let secs = milliseconds_part / 1_000;
let secs = millis / 1_000;
let mins = secs / 60;
let hours = mins / 60;

let secs = secs - (mins * 60);
let mins = mins - (hours * 60);

let milliseconds = milliseconds_part % 1_000;
let milliseconds = millis % 1_000;

let secs_sign = if secs < 0 || milliseconds < 0 {
"-"
Expand All @@ -683,7 +680,7 @@ impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalDayTimeType> {
write!(
f,
"0 years 0 mons {} days {} hours {} mins {}{}.{:03} secs",
days_parts,
days,
hours,
mins,
secs_sign,
Expand All @@ -696,28 +693,23 @@ impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalDayTimeType> {

impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalMonthDayNanoType> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
let value: u128 = self.value(idx) as u128;

let months_part: i32 = ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as i32;
let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64;

let secs = nanoseconds_part / 1_000_000_000;
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(self.value(idx));
let secs = nanos / 1_000_000_000;
let mins = secs / 60;
let hours = mins / 60;

let secs = secs - (mins * 60);
let mins = mins - (hours * 60);

let nanoseconds = nanoseconds_part % 1_000_000_000;
let nanoseconds = nanos % 1_000_000_000;

let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };

write!(
f,
"0 years {} mons {} days {} hours {} mins {}{}.{:09} secs",
months_part,
days_part,
months,
days,
hours,
mins,
secs_sign,
Expand Down
9 changes: 4 additions & 5 deletions arrow-ord/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1714,18 +1714,17 @@ mod tests {
IntervalDayTimeType::make_value(1, 3000),
// 90M milliseconds
IntervalDayTimeType::make_value(0, 90_000_000),
IntervalDayTimeType::make_value(4, 10),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The interval ordering is now even more arbitrary 😅

I suspect this is the most user-visible change, as I suspect not many people are manually bit-twiddling

],
vec![
IntervalDayTimeType::make_value(0, 1000),
IntervalDayTimeType::make_value(1, 0),
IntervalDayTimeType::make_value(10, 0),
IntervalDayTimeType::make_value(2, 1),
// NB even though 1 day is less than 90M milliseconds long,
// it compares as greater because the underlying type stores
// days and milliseconds as different fields
IntervalDayTimeType::make_value(0, 12),
IntervalDayTimeType::make_value(56, 10),
],
vec![false, true, true, true ,false]
vec![true, false, false, false, false, true]
);

cmp_vec!(
Expand Down Expand Up @@ -1771,7 +1770,7 @@ mod tests {
// 100 days (note is treated as greater than 1 month as the underlying integer representation)
IntervalMonthDayNanoType::make_value(0, 100, 0),
],
vec![false, false, true, false, false]
vec![false, true, true, true, true]
);
}

Expand Down
Loading
Loading