Skip to content

Commit

Permalink
Remove fast path for all null in filter
Browse files Browse the repository at this point in the history
  • Loading branch information
yma11 committed Jul 19, 2024
1 parent c92a493 commit 5cb671c
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 8 deletions.
8 changes: 0 additions & 8 deletions velox/dwio/common/ScanSpec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,15 +277,7 @@ bool testFilter(
const TypePtr& type) {
bool mayHaveNull = true;

// Has-null statistics is often not set. Hence, we supplement it with
// number-of-values statistic to detect no-null columns more often.
// Number-of-values is the number of non-null values. When it is equal to
// total number of values, we know there are no nulls.
if (stats->getNumberOfValues().has_value()) {
if (stats->getNumberOfValues().value() == 0) {
// Column is all null.
return filter->testNull();
}
mayHaveNull = stats->getNumberOfValues().value() < totalRows;
}

Expand Down
Binary file added velox/dwio/parquet/tests/examples/null_map.parquet
Binary file not shown.
13 changes: 13 additions & 0 deletions velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,19 @@ TEST_F(ParquetTableScanTest, map) {
assertSelectWithFilter({"map"}, {}, "", "SELECT map FROM tmp");
}

TEST_F(ParquetTableScanTest, nullMap) {
auto path = getExampleFilePath("null_map.parquet");
loadData(
path,
ROW({"i", "c"}, {VARCHAR(), MAP(VARCHAR(), VARCHAR())}),
makeRowVector(
{"i", "c"},
{makeConstant<std::string>("1", 1),
makeNullableMapVector<std::string, std::string>({std::nullopt})}));

assertSelectWithFilter({"i", "c"}, {}, "", "SELECT i, c FROM tmp");
}

// Core dump is fixed.
TEST_F(ParquetTableScanTest, singleRowStruct) {
auto vector = makeArrayVector<int32_t>({{}});
Expand Down

0 comments on commit 5cb671c

Please sign in to comment.