Skip to content

Commit

Permalink
fix: adopt the right array item name which changed in kernel 0.3.1
Browse files Browse the repository at this point in the history
see delta-io/delta-kernel-rs#301

Signed-off-by: R. Tyler Croy <[email protected]>
  • Loading branch information
rtyler committed Nov 21, 2024
1 parent 3b8a7b3 commit 6b3adbf
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions crates/core/src/writer/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,10 @@ impl AddAssign for AggregatedStats {
/// the list and items fields from the path, but also need to handle the
/// peculiar case where the user named the list field "list" or "item".
///
/// NOTE: As of delta_kernel 0.3.1 the name switched from `item` to `element` to line up with the
/// parquet spec, see
/// [here](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists)
///
/// For example:
///
/// * ["some_nested_list", "list", "item", "list", "item"] -> "some_nested_list"
Expand All @@ -495,9 +499,9 @@ fn get_list_field_name(column_descr: &Arc<ColumnDescriptor>) -> Option<String> {
while let Some(part) = column_path_parts.pop() {
match (part.as_str(), lists_seen, items_seen) {
("list", seen, _) if seen == max_rep_levels => return Some("list".to_string()),
("item", _, seen) if seen == max_rep_levels => return Some("item".to_string()),
("element", _, seen) if seen == max_rep_levels => return Some("element".to_string()),
("list", _, _) => lists_seen += 1,
("item", _, _) => items_seen += 1,
("element", _, _) => items_seen += 1,
(other, _, _) => return Some(other.to_string()),
}
}
Expand Down Expand Up @@ -789,9 +793,21 @@ mod tests {
let mut null_count_keys = vec!["some_list", "some_nested_list"];
null_count_keys.extend_from_slice(min_max_keys.as_slice());

assert_eq!(min_max_keys.len(), stats.min_values.len());
assert_eq!(min_max_keys.len(), stats.max_values.len());
assert_eq!(null_count_keys.len(), stats.null_count.len());
assert_eq!(
min_max_keys.len(),
stats.min_values.len(),
"min values don't match"
);
assert_eq!(
min_max_keys.len(),
stats.max_values.len(),
"max values don't match"
);
assert_eq!(
null_count_keys.len(),
stats.null_count.len(),
"null counts don't match"
);

// assert on min values
for (k, v) in stats.min_values.iter() {
Expand Down Expand Up @@ -820,7 +836,7 @@ mod tests {
("uuid", ColumnValueStat::Value(v)) => {
assert_eq!("176c770d-92af-4a21-bf76-5d8c5261d659", v.as_str().unwrap())
}
_ => panic!("Key should not be present"),
k => panic!("Key {k:?} should not be present in min_values"),
}
}

Expand Down Expand Up @@ -851,7 +867,7 @@ mod tests {
("uuid", ColumnValueStat::Value(v)) => {
assert_eq!("a98bea04-d119-4f21-8edc-eb218b5849af", v.as_str().unwrap())
}
_ => panic!("Key should not be present"),
k => panic!("Key {k:?} should not be present in max_values"),
}
}

Expand All @@ -878,7 +894,7 @@ mod tests {
("some_nested_list", ColumnCountStat::Value(v)) => assert_eq!(100, *v),
("date", ColumnCountStat::Value(v)) => assert_eq!(0, *v),
("uuid", ColumnCountStat::Value(v)) => assert_eq!(0, *v),
_ => panic!("Key should not be present"),
k => panic!("Key {k:?} should not be present in null_count"),
}
}
}
Expand Down

0 comments on commit 6b3adbf

Please sign in to comment.