Skip to content

Commit

Permalink
Remove reimplemented function
Browse files Browse the repository at this point in the history
  • Loading branch information
OussamaSaoudi-db committed Dec 11, 2024
1 parent 75445af commit 192849d
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 125 deletions.
156 changes: 58 additions & 98 deletions kernel/tests/cdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,116 +51,75 @@ fn read_cdf_for_table(
Ok(batches)
}

fn assert_batches_sorted_eq(expected_lines: &[impl ToString], batches: &[RecordBatch]) {
let sort_rows = |lines: &mut Vec<String>| {
let num_lines = lines.len();
if num_lines > 3 {
// sort except for header + footer
lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
}
};

let mut expected_lines: Vec<String> = expected_lines.iter().map(ToString::to_string).collect();
sort_rows(&mut expected_lines);

let formatted = arrow::util::pretty::pretty_format_batches(batches)
.unwrap()
.to_string();
let mut actual_lines: Vec<String> = formatted.trim().lines().map(ToString::to_string).collect();
sort_rows(&mut actual_lines);

let expected_table_str = expected_lines.join("\n");
let actual_table_str = actual_lines.join("\n");

assert_eq!(
expected_lines.len(),
actual_lines.len(),
"Incorrect number of lines. Expected {} lines:\n{}\nbut got {} lines:\n{} ",
expected_lines.len(),
expected_table_str,
actual_lines.len(),
actual_table_str
);
for (expected, actual) in expected_lines.iter().zip(&actual_lines) {
assert_eq!(
expected, actual,
"Expected:\n{}\nbut got:\n{}",
expected_table_str, actual_table_str
);
}
}

#[test]
fn cdf_with_deletion_vector() -> Result<(), Box<dyn error::Error>> {
let cdf = read_cdf_for_table("cdf-table-with-dv", 0, None)?;
assert_batches_sorted_eq(
&[
"+-------+--------------+-----------------+",
"| value | _change_type | _commit_version |",
"+-------+--------------+-----------------+",
"| 0 | insert | 0 |",
"| 1 | insert | 0 |",
"| 2 | insert | 0 |",
"| 3 | insert | 0 |",
"| 4 | insert | 0 |",
"| 5 | insert | 0 |",
"| 6 | insert | 0 |",
"| 8 | insert | 0 |",
"| 7 | insert | 0 |",
"| 9 | insert | 0 |",
"| 0 | delete | 1 |",
"| 9 | delete | 1 |",
"| 0 | insert | 2 |",
"| 9 | insert | 2 |",
"+-------+--------------+-----------------+",
],
&cdf,
);
let batches = read_cdf_for_table("cdf-table-with-dv", 0, None)?;
let mut expected = vec![
"+-------+--------------+-----------------+",
"| value | _change_type | _commit_version |",
"+-------+--------------+-----------------+",
"| 0 | insert | 0 |",
"| 1 | insert | 0 |",
"| 2 | insert | 0 |",
"| 3 | insert | 0 |",
"| 4 | insert | 0 |",
"| 5 | insert | 0 |",
"| 6 | insert | 0 |",
"| 8 | insert | 0 |",
"| 7 | insert | 0 |",
"| 9 | insert | 0 |",
"| 0 | delete | 1 |",
"| 9 | delete | 1 |",
"| 0 | insert | 2 |",
"| 9 | insert | 2 |",
"+-------+--------------+-----------------+",
];
sort_lines!(expected);
assert_batches_sorted_eq!(expected, &batches);
Ok(())
}

#[test]
fn basic_cdf() -> Result<(), Box<dyn error::Error>> {
let batches = read_cdf_for_table("cdf-table", 0, None)?;
assert_batches_sorted_eq(
&[
"+----+--------+------------+------------------+-----------------+",
"| id | name | birthday | _change_type | _commit_version |",
"+----+--------+------------+------------------+-----------------+",
"| 1 | Steve | 2023-12-22 | insert | 0 |",
"| 2 | Bob | 2023-12-23 | insert | 0 |",
"| 3 | Dave | 2023-12-23 | insert | 0 |",
"| 4 | Kate | 2023-12-23 | insert | 0 |",
"| 5 | Emily | 2023-12-24 | insert | 0 |",
"| 6 | Carl | 2023-12-24 | insert | 0 |",
"| 7 | Dennis | 2023-12-24 | insert | 0 |",
"| 8 | Claire | 2023-12-25 | insert | 0 |",
"| 9 | Ada | 2023-12-25 | insert | 0 |",
"| 10 | Borb | 2023-12-25 | insert | 0 |",
"| 3 | Dave | 2023-12-22 | update_postimage | 1 |",
"| 3 | Dave | 2023-12-23 | update_preimage | 1 |",
"| 4 | Kate | 2023-12-22 | update_postimage | 1 |",
"| 4 | Kate | 2023-12-23 | update_preimage | 1 |",
"| 2 | Bob | 2023-12-22 | update_postimage | 1 |",
"| 2 | Bob | 2023-12-23 | update_preimage | 1 |",
"| 7 | Dennis | 2023-12-24 | update_preimage | 2 |",
"| 7 | Dennis | 2023-12-29 | update_postimage | 2 |",
"| 5 | Emily | 2023-12-24 | update_preimage | 2 |",
"| 5 | Emily | 2023-12-29 | update_postimage | 2 |",
"| 6 | Carl | 2023-12-24 | update_preimage | 2 |",
"| 6 | Carl | 2023-12-29 | update_postimage | 2 |",
"| 7 | Dennis | 2023-12-29 | delete | 3 |",
"+----+--------+------------+------------------+-----------------+",
],
&batches,
);
let mut expected = vec![
"+----+--------+------------+------------------+-----------------+",
"| id | name | birthday | _change_type | _commit_version |",
"+----+--------+------------+------------------+-----------------+",
"| 1 | Steve | 2023-12-22 | insert | 0 |",
"| 2 | Bob | 2023-12-23 | insert | 0 |",
"| 3 | Dave | 2023-12-23 | insert | 0 |",
"| 4 | Kate | 2023-12-23 | insert | 0 |",
"| 5 | Emily | 2023-12-24 | insert | 0 |",
"| 6 | Carl | 2023-12-24 | insert | 0 |",
"| 7 | Dennis | 2023-12-24 | insert | 0 |",
"| 8 | Claire | 2023-12-25 | insert | 0 |",
"| 9 | Ada | 2023-12-25 | insert | 0 |",
"| 10 | Borb | 2023-12-25 | insert | 0 |",
"| 3 | Dave | 2023-12-22 | update_postimage | 1 |",
"| 3 | Dave | 2023-12-23 | update_preimage | 1 |",
"| 4 | Kate | 2023-12-22 | update_postimage | 1 |",
"| 4 | Kate | 2023-12-23 | update_preimage | 1 |",
"| 2 | Bob | 2023-12-22 | update_postimage | 1 |",
"| 2 | Bob | 2023-12-23 | update_preimage | 1 |",
"| 7 | Dennis | 2023-12-24 | update_preimage | 2 |",
"| 7 | Dennis | 2023-12-29 | update_postimage | 2 |",
"| 5 | Emily | 2023-12-24 | update_preimage | 2 |",
"| 5 | Emily | 2023-12-29 | update_postimage | 2 |",
"| 6 | Carl | 2023-12-24 | update_preimage | 2 |",
"| 6 | Carl | 2023-12-29 | update_postimage | 2 |",
"| 7 | Dennis | 2023-12-29 | delete | 3 |",
"+----+--------+------------+------------------+-----------------+",
];
sort_lines!(expected);
assert_batches_sorted_eq!(expected, &batches);
Ok(())
}

#[test]
fn cdf_non_partitioned() -> Result<(), Box<dyn error::Error>> {
let batches = read_cdf_for_table("cdf-table-non-partitioned", 0, None)?;
assert_batches_sorted_eq(&[
let mut expected = vec![
"+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+",
"| id | name | birthday | long_field | boolean_field | double_field | smallint_field | _change_type | _commit_version |",
"+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+",
Expand Down Expand Up @@ -190,7 +149,8 @@ fn cdf_non_partitioned() -> Result<(), Box<dyn error::Error>> {
"| 1 | Alex | 2024-04-14 | 1 | true | 3.14 | 1 | insert | 4 |",
"| 2 | Alan | 2024-04-15 | 1 | true | 3.14 | 1 | insert | 4 |",
"+----+--------+------------+-------------------+---------------+--------------+----------------+------------------+-----------------+"
],
&batches);
];
sort_lines!(expected);
assert_batches_sorted_eq!(expected, &batches);
Ok(())
}
29 changes: 29 additions & 0 deletions kernel/tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,35 @@ use delta_kernel::{DeltaResult, Engine, EngineData, Table};

use std::sync::Arc;

#[macro_export]
macro_rules! sort_lines {
($lines: expr) => {{
// sort except for header + footer
let num_lines = $lines.len();
if num_lines > 3 {
$lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
}
}};
}

// NB: expected_lines_sorted MUST be pre-sorted (via sort_lines!())
#[macro_export]
macro_rules! assert_batches_sorted_eq {
($expected_lines_sorted: expr, $CHUNKS: expr) => {
let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS)
.unwrap()
.to_string();
// fix for windows: \r\n -->
let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
sort_lines!(actual_lines);
assert_eq!(
$expected_lines_sorted, actual_lines,
"\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
$expected_lines_sorted, actual_lines
);
};
}

/// unpack the test data from {test_parent_dir}/{test_name}.tar.zst into a temp dir, and return the dir it was
/// unpacked into
#[allow(unused)]
Expand Down
27 changes: 0 additions & 27 deletions kernel/tests/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,33 +318,6 @@ async fn stats() -> Result<(), Box<dyn std::error::Error>> {
Ok(())
}

macro_rules! sort_lines {
($lines: expr) => {{
// sort except for header + footer
let num_lines = $lines.len();
if num_lines > 3 {
$lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
}
}};
}

// NB: expected_lines_sorted MUST be pre-sorted (via sort_lines!())
macro_rules! assert_batches_sorted_eq {
($expected_lines_sorted: expr, $CHUNKS: expr) => {
let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS)
.unwrap()
.to_string();
// fix for windows: \r\n -->
let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
sort_lines!(actual_lines);
assert_eq!(
$expected_lines_sorted, actual_lines,
"\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
$expected_lines_sorted, actual_lines
);
};
}

fn read_with_execute(
engine: Arc<dyn Engine>,
scan: &Scan,
Expand Down

0 comments on commit 192849d

Please sign in to comment.