diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index 82f21461290b..efd6a3339b71 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -81,6 +81,13 @@ impl RowSelector { /// /// let actual: Vec = selection.into(); /// assert_eq!(actual, expected); +/// +/// // you can also create a selection from consecutive ranges +/// let ranges = vec![5..10, 10..15]; +/// let selection = +/// RowSelection::from_consecutive_ranges(ranges.into_iter(), Some(20)); +/// let actual: Vec = selection.into(); +/// assert_eq!(actual, expected); /// ``` /// /// A [`RowSelection`] maintains the following invariants: @@ -111,13 +118,13 @@ impl RowSelection { SlicesIterator::new(filter).map(move |(start, end)| start + offset..end + offset) }); - Self::from_consecutive_ranges(iter, total_rows) + Self::from_consecutive_ranges(iter, Some(total_rows)) } /// Creates a [`RowSelection`] from an iterator of consecutive ranges to keep - pub(crate) fn from_consecutive_ranges>>( + pub fn from_consecutive_ranges>>( ranges: I, - total_rows: usize, + total_rows_opt: Option, ) -> Self { let mut selectors: Vec = Vec::with_capacity(ranges.size_hint().0); let mut last_end = 0; @@ -141,8 +148,10 @@ impl RowSelection { last_end = range.end; } - if last_end != total_rows { - selectors.push(RowSelector::skip(total_rows - last_end)) + if let Some(total_rows) = total_rows_opt { + if last_end != total_rows { + selectors.push(RowSelector::skip(total_rows - last_end)) + } } Self { selectors } @@ -1136,9 +1145,9 @@ mod tests { } #[test] - fn test_empty_ranges() { + fn test_from_ranges() { let ranges = [1..3, 4..6, 6..6, 8..8, 9..10]; - let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), 10); + let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), Some(10)); assert_eq!( selection.selectors, vec![ @@ -1149,7 +1158,27 @@ mod tests { RowSelector::skip(3), RowSelector::select(1) ] - ) + ); + + let out_of_order_ranges = [1..3, 8..10, 4..7]; + let result = std::panic::catch_unwind(|| { + RowSelection::from_consecutive_ranges(out_of_order_ranges.into_iter(), Some(10)) + }); + assert!(result.is_err()); + + // test without total rows specified + let ranges = [1..3, 4..6]; + let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), None); + + assert_eq!( + selection.selectors, + vec![ + RowSelector::skip(1), + RowSelector::select(2), + RowSelector::skip(1), + RowSelector::select(2), + ] + ); } #[test]