diff --git a/velox/dwio/common/tests/utils/DataSetBuilder.cpp b/velox/dwio/common/tests/utils/DataSetBuilder.cpp index f9d532c84eab..07b6b245003f 100644 --- a/velox/dwio/common/tests/utils/DataSetBuilder.cpp +++ b/velox/dwio/common/tests/utils/DataSetBuilder.cpp @@ -37,7 +37,8 @@ RowTypePtr DataSetBuilder::makeRowType( DataSetBuilder& DataSetBuilder::makeDataset( RowTypePtr rowType, const size_t batchCount, - const size_t numRows) { + const size_t numRows, + const bool withRecursiveNulls) { if (batches_) { batches_->clear(); } else { @@ -45,8 +46,18 @@ DataSetBuilder& DataSetBuilder::makeDataset( } for (size_t i = 0; i < batchCount; ++i) { - batches_->push_back(std::static_pointer_cast( - BatchMaker::createBatch(rowType, numRows, pool_, nullptr, i))); + if (withRecursiveNulls) { + batches_->push_back(std::static_pointer_cast( + BatchMaker::createBatch(rowType, numRows, pool_, nullptr, i))); + } else { + batches_->push_back( + std::static_pointer_cast(BatchMaker::createBatch( + rowType, + numRows, + pool_, + [](vector_size_t /*index*/) { return false; }, + i))); + } } return *this; diff --git a/velox/dwio/common/tests/utils/DataSetBuilder.h b/velox/dwio/common/tests/utils/DataSetBuilder.h index d43fe28518a8..4893c28336f6 100644 --- a/velox/dwio/common/tests/utils/DataSetBuilder.h +++ b/velox/dwio/common/tests/utils/DataSetBuilder.h @@ -43,7 +43,8 @@ class DataSetBuilder { DataSetBuilder& makeDataset( RowTypePtr rowType, const size_t batchCount, - const size_t numRows); + const size_t numRows, + const bool withRecursiveNulls = true); // Adds high values to 'batches_' so that these values occur only in some row // groups. Tests skipping row groups based on row group stats. diff --git a/velox/dwio/common/tests/utils/E2EFilterTestBase.cpp b/velox/dwio/common/tests/utils/E2EFilterTestBase.cpp index 755ff7157390..287d245c8408 100644 --- a/velox/dwio/common/tests/utils/E2EFilterTestBase.cpp +++ b/velox/dwio/common/tests/utils/E2EFilterTestBase.cpp @@ -46,12 +46,14 @@ using velox::common::Subfield; std::vector E2EFilterTestBase::makeDataset( std::function customize, - bool forRowGroupSkip) { + bool forRowGroupSkip, + bool withRecursiveNulls) { if (!dataSetBuilder_) { dataSetBuilder_ = std::make_unique(*leafPool_, 0); } - dataSetBuilder_->makeDataset(rowType_, batchCount_, batchSize_); + dataSetBuilder_->makeDataset( + rowType_, batchCount_, batchSize_, withRecursiveNulls); if (forRowGroupSkip) { dataSetBuilder_->withRowGroupSpecificData(kRowsInGroup); @@ -408,17 +410,18 @@ void E2EFilterTestBase::testScenario( std::function customize, bool wrapInStruct, const std::vector& filterable, - int32_t numCombinations) { + int32_t numCombinations, + bool withRecursiveNulls) { rowType_ = DataSetBuilder::makeRowType(columns, wrapInStruct); filterGenerator_ = std::make_unique(rowType_, seed_); - auto batches = makeDataset(customize, false); + auto batches = makeDataset(customize, false, withRecursiveNulls); writeToMemory(rowType_, batches, false); testNoRowGroupSkip(batches, filterable, numCombinations); testPruningWithFilter(batches, filterable); if (testRowGroupSkip_) { - batches = makeDataset(customize, true); + batches = makeDataset(customize, true, withRecursiveNulls); writeToMemory(rowType_, batches, true); testRowGroupSkip(batches, filterable); } diff --git a/velox/dwio/common/tests/utils/E2EFilterTestBase.h b/velox/dwio/common/tests/utils/E2EFilterTestBase.h index f26ac8beef1b..f0e9d1daa0c9 100644 --- a/velox/dwio/common/tests/utils/E2EFilterTestBase.h +++ b/velox/dwio/common/tests/utils/E2EFilterTestBase.h @@ -105,7 +105,8 @@ class E2EFilterTestBase : public testing::Test { std::vector makeDataset( std::function customize, - bool forRowGroupSkip); + bool forRowGroupSkip, + bool withRecursiveNulls); void makeAllNulls(const std::string& fieldName); @@ -297,7 +298,8 @@ class E2EFilterTestBase : public testing::Test { std::function customize, bool wrapInStruct, const std::vector& filterable, - int32_t numCombinations); + int32_t numCombinations, + bool withRecursiveNulls = true); private: void testMetadataFilterImpl(