diff --git a/kernel/tests/cdf.rs b/kernel/tests/cdf.rs index 892047104..c93b22d24 100644 --- a/kernel/tests/cdf.rs +++ b/kernel/tests/cdf.rs @@ -51,116 +51,75 @@ fn read_cdf_for_table( Ok(batches) } -fn assert_batches_sorted_eq(expected_lines: &[impl ToString], batches: &[RecordBatch]) { - let sort_rows = |lines: &mut Vec| { - let num_lines = lines.len(); - if num_lines > 3 { - // sort except for header + footer - lines.as_mut_slice()[2..num_lines - 1].sort_unstable() - } - }; - - let mut expected_lines: Vec = expected_lines.iter().map(ToString::to_string).collect(); - sort_rows(&mut expected_lines); - - let formatted = arrow::util::pretty::pretty_format_batches(batches) - .unwrap() - .to_string(); - let mut actual_lines: Vec = formatted.trim().lines().map(ToString::to_string).collect(); - sort_rows(&mut actual_lines); - - let expected_table_str = expected_lines.join("\n"); - let actual_table_str = actual_lines.join("\n"); - - assert_eq!( - expected_lines.len(), - actual_lines.len(), - "Incorrect number of lines. Expected {} lines:\n{}\nbut got {} lines:\n{} ", - expected_lines.len(), - expected_table_str, - actual_lines.len(), - actual_table_str - ); - for (expected, actual) in expected_lines.iter().zip(&actual_lines) { - assert_eq!( - expected, actual, - "Expected:\n{}\nbut got:\n{}", - expected_table_str, actual_table_str - ); - } -} - #[test] fn cdf_with_deletion_vector() -> Result<(), Box> { - let cdf = read_cdf_for_table("cdf-table-with-dv", 0, None)?; - assert_batches_sorted_eq( - &[ - "+-------+--------------+-----------------+", - "| value | _change_type | _commit_version |", - "+-------+--------------+-----------------+", - "| 0 | insert | 0 |", - "| 1 | insert | 0 |", - "| 2 | insert | 0 |", - "| 3 | insert | 0 |", - "| 4 | insert | 0 |", - "| 5 | insert | 0 |", - "| 6 | insert | 0 |", - "| 8 | insert | 0 |", - "| 7 | insert | 0 |", - "| 9 | insert | 0 |", - "| 0 | delete | 1 |", - "| 9 | delete | 1 |", - "| 0 | insert | 2 |", - "| 9 | insert | 2 |", - "+-------+--------------+-----------------+", - ], - &cdf, - ); + let batches = read_cdf_for_table("cdf-table-with-dv", 0, None)?; + let mut expected = vec![ + "+-------+--------------+-----------------+", + "| value | _change_type | _commit_version |", + "+-------+--------------+-----------------+", + "| 0 | insert | 0 |", + "| 1 | insert | 0 |", + "| 2 | insert | 0 |", + "| 3 | insert | 0 |", + "| 4 | insert | 0 |", + "| 5 | insert | 0 |", + "| 6 | insert | 0 |", + "| 8 | insert | 0 |", + "| 7 | insert | 0 |", + "| 9 | insert | 0 |", + "| 0 | delete | 1 |", + "| 9 | delete | 1 |", + "| 0 | insert | 2 |", + "| 9 | insert | 2 |", + "+-------+--------------+-----------------+", + ]; + sort_lines!(expected); + assert_batches_sorted_eq!(expected, &batches); Ok(()) } #[test] fn basic_cdf() -> Result<(), Box> { let batches = read_cdf_for_table("cdf-table", 0, None)?; - assert_batches_sorted_eq( - &[ - "+----+--------+------------+------------------+-----------------+", - "| id | name | birthday | _change_type | _commit_version |", - "+----+--------+------------+------------------+-----------------+", - "| 1 | Steve | 2023-12-22 | insert | 0 |", - "| 2 | Bob | 2023-12-23 | insert | 0 |", - "| 3 | Dave | 2023-12-23 | insert | 0 |", - "| 4 | Kate | 2023-12-23 | insert | 0 |", - "| 5 | Emily | 2023-12-24 | insert | 0 |", - "| 6 | Carl | 2023-12-24 | insert | 0 |", - "| 7 | Dennis | 2023-12-24 | insert | 0 |", - "| 8 | Claire | 2023-12-25 | insert | 0 |", - "| 9 | Ada | 2023-12-25 | insert | 0 |", - "| 10 | Borb | 2023-12-25 | insert | 0 |", - "| 3 | Dave | 2023-12-22 | update_postimage | 1 |", - "| 3 | Dave | 2023-12-23 | update_preimage | 1 |", - "| 4 | Kate | 2023-12-22 | update_postimage | 1 |", - "| 4 | Kate | 2023-12-23 | update_preimage | 1 |", - "| 2 | Bob | 2023-12-22 | update_postimage | 1 |", - "| 2 | Bob | 2023-12-23 | update_preimage | 1 |", - "| 7 | Dennis | 2023-12-24 | update_preimage | 2 |", - "| 7 | Dennis | 2023-12-29 | update_postimage | 2 |", - "| 5 | Emily | 2023-12-24 | update_preimage | 2 |", - "| 5 | Emily | 2023-12-29 | update_postimage | 2 |", - "| 6 | Carl | 2023-12-24 | update_preimage | 2 |", - "| 6 | Carl | 2023-12-29 | update_postimage | 2 |", - "| 7 | Dennis | 2023-12-29 | delete | 3 |", - "+----+--------+------------+------------------+-----------------+", - ], - &batches, - ); + let mut expected = vec![ + "+----+--------+------------+------------------+-----------------+", + "| id | name | birthday | _change_type | _commit_version |", + "+----+--------+------------+------------------+-----------------+", + "| 1 | Steve | 2023-12-22 | insert | 0 |", + "| 2 | Bob | 2023-12-23 | insert | 0 |", + "| 3 | Dave | 2023-12-23 | insert | 0 |", + "| 4 | Kate | 2023-12-23 | insert | 0 |", + "| 5 | Emily | 2023-12-24 | insert | 0 |", + "| 6 | Carl | 2023-12-24 | insert | 0 |", + "| 7 | Dennis | 2023-12-24 | insert | 0 |", + "| 8 | Claire | 2023-12-25 | insert | 0 |", + "| 9 | Ada | 2023-12-25 | insert | 0 |", + "| 10 | Borb | 2023-12-25 | insert | 0 |", + "| 3 | Dave | 2023-12-22 | update_postimage | 1 |", + "| 3 | Dave | 2023-12-23 | update_preimage | 1 |", + "| 4 | Kate | 2023-12-22 | update_postimage | 1 |", + "| 4 | Kate | 2023-12-23 | update_preimage | 1 |", + "| 2 | Bob | 2023-12-22 | update_postimage | 1 |", + "| 2 | Bob | 2023-12-23 | update_preimage | 1 |", + "| 7 | Dennis | 2023-12-24 | update_preimage | 2 |", + "| 7 | Dennis | 2023-12-29 | update_postimage | 2 |", + "| 5 | Emily | 2023-12-24 | update_preimage | 2 |", + "| 5 | Emily | 2023-12-29 | update_postimage | 2 |", + "| 6 | Carl | 2023-12-24 | update_preimage | 2 |", + "| 6 | Carl | 2023-12-29 | update_postimage | 2 |", + "| 7 | Dennis | 2023-12-29 | delete | 3 |", + "+----+--------+------------+------------------+-----------------+", + ]; + sort_lines!(expected); + assert_batches_sorted_eq!(expected, &batches); Ok(()) } #[test] fn cdf_non_partitioned() -> Result<(), Box> { let batches = read_cdf_for_table("cdf-table-non-partitioned", 0, None)?; - assert_batches_sorted_eq(&[ + let mut expected = vec![ "+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+", "| id | name | birthday | long_field | boolean_field | double_field | smallint_field | _change_type | _commit_version |", "+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+", @@ -190,7 +149,8 @@ fn cdf_non_partitioned() -> Result<(), Box> { "| 1 | Alex | 2024-04-14 | 1 | true | 3.14 | 1 | insert | 4 |", "| 2 | Alan | 2024-04-15 | 1 | true | 3.14 | 1 | insert | 4 |", "+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+" - ], - &batches); + ]; + sort_lines!(expected); + assert_batches_sorted_eq!(expected, &batches); Ok(()) } diff --git a/kernel/tests/common/mod.rs b/kernel/tests/common/mod.rs index bc67f7307..a918695b7 100644 --- a/kernel/tests/common/mod.rs +++ b/kernel/tests/common/mod.rs @@ -9,6 +9,35 @@ use delta_kernel::{DeltaResult, Engine, EngineData, Table}; use std::sync::Arc; +#[macro_export] +macro_rules! sort_lines { + ($lines: expr) => {{ + // sort except for header + footer + let num_lines = $lines.len(); + if num_lines > 3 { + $lines.as_mut_slice()[2..num_lines - 1].sort_unstable() + } + }}; +} + +// NB: expected_lines_sorted MUST be pre-sorted (via sort_lines!()) +#[macro_export] +macro_rules! assert_batches_sorted_eq { + ($expected_lines_sorted: expr, $CHUNKS: expr) => { + let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS) + .unwrap() + .to_string(); + // fix for windows: \r\n --> + let mut actual_lines: Vec<&str> = formatted.trim().lines().collect(); + sort_lines!(actual_lines); + assert_eq!( + $expected_lines_sorted, actual_lines, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + $expected_lines_sorted, actual_lines + ); + }; +} + /// unpack the test data from {test_parent_dir}/{test_name}.tar.zst into a temp dir, and return the dir it was /// unpacked into #[allow(unused)] diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs index a0a8160c1..7a674ce57 100644 --- a/kernel/tests/read.rs +++ b/kernel/tests/read.rs @@ -318,33 +318,6 @@ async fn stats() -> Result<(), Box> { Ok(()) } -macro_rules! sort_lines { - ($lines: expr) => {{ - // sort except for header + footer - let num_lines = $lines.len(); - if num_lines > 3 { - $lines.as_mut_slice()[2..num_lines - 1].sort_unstable() - } - }}; -} - -// NB: expected_lines_sorted MUST be pre-sorted (via sort_lines!()) -macro_rules! assert_batches_sorted_eq { - ($expected_lines_sorted: expr, $CHUNKS: expr) => { - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS) - .unwrap() - .to_string(); - // fix for windows: \r\n --> - let mut actual_lines: Vec<&str> = formatted.trim().lines().collect(); - sort_lines!(actual_lines); - assert_eq!( - $expected_lines_sorted, actual_lines, - "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", - $expected_lines_sorted, actual_lines - ); - }; -} - fn read_with_execute( engine: Arc, scan: &Scan,