From 67183a76909db5ce2ba681595224870b11808eec Mon Sep 17 00:00:00 2001 From: yan ma Date: Fri, 19 Jul 2024 19:20:02 +0800 Subject: [PATCH] Remove fast path for all null in filter --- velox/dwio/common/ScanSpec.cpp | 8 -------- .../dwio/parquet/tests/examples/null_map.parquet | Bin 0 -> 941 bytes .../parquet/tests/reader/ParquetTableScanTest.cpp | 13 +++++++++++++ 3 files changed, 13 insertions(+), 8 deletions(-) create mode 100644 velox/dwio/parquet/tests/examples/null_map.parquet diff --git a/velox/dwio/common/ScanSpec.cpp b/velox/dwio/common/ScanSpec.cpp index 005c6eaa47ae9..100e9b79b0c69 100644 --- a/velox/dwio/common/ScanSpec.cpp +++ b/velox/dwio/common/ScanSpec.cpp @@ -277,15 +277,7 @@ bool testFilter( const TypePtr& type) { bool mayHaveNull = true; - // Has-null statistics is often not set. Hence, we supplement it with - // number-of-values statistic to detect no-null columns more often. - // Number-of-values is the number of non-null values. When it is equal to - // total number of values, we know there are no nulls. if (stats->getNumberOfValues().has_value()) { - if (stats->getNumberOfValues().value() == 0) { - // Column is all null. - return filter->testNull(); - } mayHaveNull = stats->getNumberOfValues().value() < totalRows; } diff --git a/velox/dwio/parquet/tests/examples/null_map.parquet b/velox/dwio/parquet/tests/examples/null_map.parquet new file mode 100644 index 0000000000000000000000000000000000000000..64dbe3e87a43c1793578d0e4219c404e2bfaa38b GIT binary patch literal 941 zcma)5-D(p-6h1TkA!-y0onco(2?Gr&#HHD7n=}L=G-8xcLJWdP36t%lUAp<{?xs@0 zzJV9H6$IbIC-BNE1uq1_JFk2J5zlVYRBbP0XLt5|=X~eq+u25?nP=1+_a0kgpWc4`OsC#uwhVB4lcIM4ujmQ?7p1RIT_gGEW>n_@`U=rC zz!jPFikadGR>m;UQ@qORv$qMEz9Pr1CH|9i07={UKuiB5VyZ6LKwm z+T=XJg#44?8)clop_qZcC!A&y{3BUK8FQL8@Qw^Ql&F^xOJ|%Z&vyoWZOJk$BM;if zl?eaTHBjZ#kqQiN@Y3k1Kpx6KYOm;61m{B~O2QAkksXMf=sLwEH91l^qw|UFrHNCoXb|(l`urbFMw-&MWog5JA+hHLpC)8GpEI!M#C0kWSy< z$Eu=)ovQH979!xU4n(`XSF1GJ&nnG$Z#{23Y(H%^AMZcHd9Z=9eJs7~_C^lJ#^Mh| z7#@V6J(;O=zvrcAvZnJ!-Zac~wli|v!*rpbtWCSPxocU)9c6Euwqa YX5KPQdv|L?FXQ>0F8Gc|s)0Z1Z#b;g?EnA( literal 0 HcmV?d00001 diff --git a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp index b0b83c48396d4..659bbca5fdb60 100644 --- a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp @@ -383,6 +383,19 @@ TEST_F(ParquetTableScanTest, map) { assertSelectWithFilter({"map"}, {}, "", "SELECT map FROM tmp"); } +TEST_F(ParquetTableScanTest, nullMap) { + auto path = getExampleFilePath("null_map.parquet"); + loadData( + path, + ROW({"i", "c"}, {VARCHAR(), MAP(VARCHAR(), VARCHAR())}), + makeRowVector( + {"i", "c"}, + {makeConstant("1", 1), + makeNullableMapVector({std::nullopt})})); + + assertSelectWithFilter({"i", "c"}, {}, "", "SELECT i, c FROM tmp"); +} + // Core dump is fixed. TEST_F(ParquetTableScanTest, singleRowStruct) { auto vector = makeArrayVector({{}});