From f7059c9d647f6aca9dc701b0590af24e7444ed2a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 6 Jun 2024 07:41:07 -0400 Subject: [PATCH] Minor: refine row selection example more --- parquet/src/arrow/arrow_reader/mod.rs | 29 ++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index bbb1c079b169..793f79272c18 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -155,24 +155,43 @@ impl ArrowReaderBuilder { /// /// # Example /// - /// Given a parquet file with 3 row groups, and a row group filter of - /// `[0, 2]`, in order to only scan rows 50-100 in row group 2: + /// Given a parquet file with 4 row groups, and a row group filter of `[0, + /// 2, 3]`, in order to scan rows 50-100 in row group 2 and rows 200-300 in + /// row group 3: /// /// ```text /// Row Group 0, 1000 rows (selected) /// Row Group 1, 1000 rows (skipped) /// Row Group 2, 1000 rows (selected, but want to only scan rows 50-100) + /// Row Group 3, 1000 rows (selected, but want to only scan rows 200-300) /// ``` /// - /// You would pass the following [`RowSelection`]: + /// You could pass the following [`RowSelection`]: /// /// ```text /// Select 1000 (scan all rows in row group 0) - /// Select 50-100 (scan rows 50-100 in row group 2) + /// Skip 50 (skip the first 50 rows in row group 2) + /// Select 50 (scan rows 50-100 in row group 2) + /// Skip 900 (skip the remaining rows in row group 2) + /// Skip 200 (skip the first 200 rows in row group 3) + /// Select 100 (scan rows 200-300 in row group 3) + /// Skip 700 (skip the remaining rows in row group 3) /// ``` - /// /// Note there is no entry for the (entirely) skipped row group 1. /// + /// Note you can represent the same selection with fewer entries. Instead of + /// + /// ```text + /// Skip 900 (skip the remaining rows in row group 2) + /// Skip 200 (skip the first 200 rows in row group 3) + /// ``` + /// + /// you could use + /// + /// ```text + /// Skip 1100 (skip the remaining 900 rows in row group 2 and the first 200 rows in row group 3) + /// ``` + /// /// [`Index`]: crate::file::page_index::index::Index pub fn with_row_selection(self, selection: RowSelection) -> Self { Self {