Skip to content

Commit

Permalink
chore: remove interval test (#10888)
Browse files Browse the repository at this point in the history
  • Loading branch information
marvinlanhenke authored Jun 12, 2024
1 parent 73381fe commit 87d8267
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 159 deletions.
81 changes: 1 addition & 80 deletions datafusion/core/tests/parquet/arrow_statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ use arrow::datatypes::{
use arrow_array::{
make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array, Float32Array,
Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray,
Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
LargeStringArray, RecordBatch, StringArray, Time32MillisecondArray,
Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
Expand Down Expand Up @@ -1061,84 +1060,6 @@ async fn test_dates_64_diff_rg_sizes() {
.run();
}

#[tokio::test]
#[should_panic]
// Currently this test `should_panic` since statistics for `Intervals`
// are not supported and `IntervalMonthDayNano` cannot be written
// to parquet yet.
// Refer to issue: https://github.com/apache/arrow-rs/issues/5847
// and https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747
async fn test_interval_diff_rg_sizes() {
// This creates a parquet files of 3 columns:
// "year_month" --> IntervalYearMonthArray
// "day_time" --> IntervalDayTimeArray
// "month_day_nano" --> IntervalMonthDayNanoArray
//
// The file is created by 4 record batches (each has a null row)
// each has 5 rows but then will be split into 2 row groups with size 13, 7
let reader = TestReader {
scenario: Scenario::Interval,
row_per_group: 13,
}
.build()
.await;

// TODO: expected values need to be changed once issue is resolved
// expected_min: Arc::new(IntervalYearMonthArray::from(vec![
// IntervalYearMonthType::make_value(1, 10),
// IntervalYearMonthType::make_value(4, 13),
// ])),
// expected_max: Arc::new(IntervalYearMonthArray::from(vec![
// IntervalYearMonthType::make_value(6, 51),
// IntervalYearMonthType::make_value(8, 53),
// ])),
Test {
reader: &reader,
expected_min: Arc::new(IntervalYearMonthArray::from(vec![None, None])),
expected_max: Arc::new(IntervalYearMonthArray::from(vec![None, None])),
expected_null_counts: UInt64Array::from(vec![2, 2]),
expected_row_counts: UInt64Array::from(vec![13, 7]),
column_name: "year_month",
}
.run();

// expected_min: Arc::new(IntervalDayTimeArray::from(vec![
// IntervalDayTimeType::make_value(1, 10),
// IntervalDayTimeType::make_value(4, 13),
// ])),
// expected_max: Arc::new(IntervalDayTimeArray::from(vec![
// IntervalDayTimeType::make_value(6, 51),
// IntervalDayTimeType::make_value(8, 53),
// ])),
Test {
reader: &reader,
expected_min: Arc::new(IntervalDayTimeArray::from(vec![None, None])),
expected_max: Arc::new(IntervalDayTimeArray::from(vec![None, None])),
expected_null_counts: UInt64Array::from(vec![2, 2]),
expected_row_counts: UInt64Array::from(vec![13, 7]),
column_name: "day_time",
}
.run();

// expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![
// IntervalMonthDayNanoType::make_value(1, 10, 100),
// IntervalMonthDayNanoType::make_value(4, 13, 103),
// ])),
// expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![
// IntervalMonthDayNanoType::make_value(6, 51, 501),
// IntervalMonthDayNanoType::make_value(8, 53, 503),
// ])),
Test {
reader: &reader,
expected_min: Arc::new(IntervalMonthDayNanoArray::from(vec![None, None])),
expected_max: Arc::new(IntervalMonthDayNanoArray::from(vec![None, None])),
expected_null_counts: UInt64Array::from(vec![2, 2]),
expected_row_counts: UInt64Array::from(vec![13, 7]),
column_name: "month_day_nano",
}
.run();
}

#[tokio::test]
async fn test_uint() {
// This creates a parquet files of 4 columns named "u8", "u16", "u32", "u64"
Expand Down
80 changes: 1 addition & 79 deletions datafusion/core/tests/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
//! Parquet integration tests
use crate::parquet::utils::MetricsFinder;
use arrow::array::Decimal128Array;
use arrow::datatypes::{
i256, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
};
use arrow::datatypes::i256;
use arrow::{
array::{
make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
Expand All @@ -36,10 +34,6 @@ use arrow::{
record_batch::RecordBatch,
util::pretty::pretty_format_batches,
};
use arrow_array::{
IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
};
use arrow_schema::IntervalUnit;
use chrono::{Datelike, Duration, TimeDelta};
use datafusion::{
datasource::{provider_as_source, TableProvider},
Expand Down Expand Up @@ -92,7 +86,6 @@ enum Scenario {
Time32Millisecond,
Time64Nanosecond,
Time64Microsecond,
Interval,
/// 7 Rows, for each i8, i16, i32, i64, u8, u16, u32, u64, f32, f64
/// -MIN, -100, -1, 0, 1, 100, MAX
NumericLimits,
Expand Down Expand Up @@ -921,71 +914,6 @@ fn make_dict_batch() -> RecordBatch {
.unwrap()
}

fn make_interval_batch(offset: i32) -> RecordBatch {
let schema = Schema::new(vec![
Field::new(
"year_month",
DataType::Interval(IntervalUnit::YearMonth),
true,
),
Field::new("day_time", DataType::Interval(IntervalUnit::DayTime), true),
Field::new(
"month_day_nano",
DataType::Interval(IntervalUnit::MonthDayNano),
true,
),
]);
let schema = Arc::new(schema);

let ym_arr = IntervalYearMonthArray::from(vec![
Some(IntervalYearMonthType::make_value(1 + offset, 10 + offset)),
Some(IntervalYearMonthType::make_value(2 + offset, 20 + offset)),
Some(IntervalYearMonthType::make_value(3 + offset, 30 + offset)),
None,
Some(IntervalYearMonthType::make_value(5 + offset, 50 + offset)),
]);

let dt_arr = IntervalDayTimeArray::from(vec![
Some(IntervalDayTimeType::make_value(1 + offset, 10 + offset)),
Some(IntervalDayTimeType::make_value(2 + offset, 20 + offset)),
Some(IntervalDayTimeType::make_value(3 + offset, 30 + offset)),
None,
Some(IntervalDayTimeType::make_value(5 + offset, 50 + offset)),
]);

// Not yet implemented, refer to:
// https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L747
let mdn_arr = IntervalMonthDayNanoArray::from(vec![
Some(IntervalMonthDayNanoType::make_value(
1 + offset,
10 + offset,
100 + (offset as i64),
)),
Some(IntervalMonthDayNanoType::make_value(
2 + offset,
20 + offset,
200 + (offset as i64),
)),
Some(IntervalMonthDayNanoType::make_value(
3 + offset,
30 + offset,
300 + (offset as i64),
)),
None,
Some(IntervalMonthDayNanoType::make_value(
5 + offset,
50 + offset,
500 + (offset as i64),
)),
]);

RecordBatch::try_new(
schema,
vec![Arc::new(ym_arr), Arc::new(dt_arr), Arc::new(mdn_arr)],
)
.unwrap()
}

fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
match scenario {
Scenario::Boolean => {
Expand Down Expand Up @@ -1407,12 +1335,6 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
]),
]
}
Scenario::Interval => vec![
make_interval_batch(0),
make_interval_batch(1),
make_interval_batch(2),
make_interval_batch(3),
],
}
}

Expand Down

0 comments on commit 87d8267

Please sign in to comment.