From 9e49827d10bef8488235aae5df6dcf9a258ec238 Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Tue, 28 Nov 2023 21:09:34 +0530 Subject: [PATCH 1/2] Replace tableScan API with tableScanBuilder --- .../fuzzer/tests/FuzzerConnectorTest.cpp | 60 ++- velox/exec/tests/AssertQueryBuilderTest.cpp | 9 +- velox/exec/tests/AsyncConnectorTest.cpp | 5 +- velox/exec/tests/HashJoinTest.cpp | 27 +- velox/exec/tests/TableScanTest.cpp | 371 +++++++++++++----- velox/exec/tests/utils/PlanBuilder.cpp | 13 - velox/exec/tests/utils/PlanBuilder.h | 14 - 7 files changed, 337 insertions(+), 162 deletions(-) diff --git a/velox/connectors/fuzzer/tests/FuzzerConnectorTest.cpp b/velox/connectors/fuzzer/tests/FuzzerConnectorTest.cpp index ef2163a31540..5b5fe277ac59 100644 --- a/velox/connectors/fuzzer/tests/FuzzerConnectorTest.cpp +++ b/velox/connectors/fuzzer/tests/FuzzerConnectorTest.cpp @@ -31,8 +31,12 @@ TEST_F(FuzzerConnectorTest, singleSplit) { const size_t numRows = 100; auto type = ROW({BIGINT(), DOUBLE(), VARCHAR()}); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .split(makeFuzzerSplit(numRows)) @@ -43,8 +47,12 @@ TEST_F(FuzzerConnectorTest, floatingPoints) { const size_t numRows = 1000; auto type = ROW({REAL(), DOUBLE()}); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .split(makeFuzzerSplit(numRows)) @@ -59,8 +67,12 @@ TEST_F(FuzzerConnectorTest, complexTypes) { REAL(), }); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .split(makeFuzzerSplit(numRows)) @@ -72,8 +84,12 @@ TEST_F(FuzzerConnectorTest, multipleSplits) { const size_t numSplits = 10; auto type = ROW({BIGINT(), DOUBLE(), VARCHAR()}); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .splits(makeFuzzerSplits(rowsPerSplit, numSplits)) @@ -89,8 +105,12 @@ TEST_F(FuzzerConnectorTest, randomTypes) { for (size_t i = 0; i < iterations; ++i) { auto type = VectorFuzzer({}, pool()).randRowType(); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .splits(makeFuzzerSplits(rowsPerSplit, numSplits)) .assertTypeAndNumRows(type, rowsPerSplit * numSplits); @@ -101,14 +121,18 @@ TEST_F(FuzzerConnectorTest, reproducible) { const size_t numRows = 100; auto type = ROW({BIGINT(), ARRAY(INTEGER()), VARCHAR()}); - auto plan1 = - PlanBuilder() - .tableScan(type, makeFuzzerTableHandle(/*fuzerSeed=*/1234), {}) - .planNode(); - auto plan2 = - PlanBuilder() - .tableScan(type, makeFuzzerTableHandle(/*fuzerSeed=*/1234), {}) - .planNode(); + auto plan1 = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle(/*fuzerSeed=*/1234)) + .endTableScan() + .planNode(); + auto plan2 = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle(/*fuzerSeed=*/1234)) + .endTableScan() + .planNode(); auto results1 = exec::test::AssertQueryBuilder(plan1) .split(makeFuzzerSplit(numRows)) diff --git a/velox/exec/tests/AssertQueryBuilderTest.cpp b/velox/exec/tests/AssertQueryBuilderTest.cpp index 8c0b7ceacc81..a01a467b4e4f 100644 --- a/velox/exec/tests/AssertQueryBuilderTest.cpp +++ b/velox/exec/tests/AssertQueryBuilderTest.cpp @@ -86,10 +86,11 @@ TEST_F(AssertQueryBuilderTest, hiveSplits) { AssertQueryBuilder( PlanBuilder() - .tableScan( - ROW({"c0", "ds"}, {INTEGER(), VARCHAR()}), - makeTableHandle(), - assignments) + .startTableScan() + .outputType(ROW({"c0", "ds"}, {INTEGER(), VARCHAR()})) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(), duckDbQueryRunner_) .split(HiveConnectorSplitBuilder(file->path) diff --git a/velox/exec/tests/AsyncConnectorTest.cpp b/velox/exec/tests/AsyncConnectorTest.cpp index 30e4f8fb0dd5..6d9115495abb 100644 --- a/velox/exec/tests/AsyncConnectorTest.cpp +++ b/velox/exec/tests/AsyncConnectorTest.cpp @@ -193,7 +193,10 @@ TEST_F(AsyncConnectorTest, basic) { auto tableHandle = std::make_shared(); core::PlanNodeId scanId; auto plan = PlanBuilder() - .tableScan(ROW({"a"}, {BIGINT()}), tableHandle, {}) + .startTableScan() + .outputType(ROW({"a"}, {BIGINT()})) + .tableHandle(tableHandle) + .endTableScan() .capturePlanNodeId(scanId) .singleAggregation({}, {"min(a)"}) .planNode(); diff --git a/velox/exec/tests/HashJoinTest.cpp b/velox/exec/tests/HashJoinTest.cpp index 10e3d4ee7246..57ca0887602b 100644 --- a/velox/exec/tests/HashJoinTest.cpp +++ b/velox/exec/tests/HashJoinTest.cpp @@ -4123,16 +4123,17 @@ TEST_F(HashJoinTest, dynamicFilters) { assignments["b"] = regularColumn("c1", BIGINT()); core::PlanNodeId probeScanId; - auto op = - PlanBuilder(planNodeIdGenerator, pool_.get()) - .tableScan( - scanOutputType, - makeTableHandle(common::test::SubfieldFiltersBuilder().build()), - assignments) - .capturePlanNodeId(probeScanId) - .hashJoin({"a"}, {"u_c0"}, buildSide, "", {"a", "b", "u_c1"}) - .project({"a", "b + 1", "b + u_c1"}) - .planNode(); + auto op = PlanBuilder(planNodeIdGenerator, pool_.get()) + .startTableScan() + .outputType(scanOutputType) + .tableHandle(makeTableHandle( + common::test::SubfieldFiltersBuilder().build())) + .assignments(assignments) + .endTableScan() + .capturePlanNodeId(probeScanId) + .hashJoin({"a"}, {"u_c0"}, buildSide, "", {"a", "b", "u_c1"}) + .project({"a", "b + 1", "b + u_c1"}) + .planNode(); HashJoinBuilder(*pool_, duckDbQueryRunner_, driverExecutor_.get()) .planNode(std::move(op)) @@ -4889,7 +4890,11 @@ TEST_F(HashJoinTest, dynamicFilterOnPartitionKey) { auto planNodeIdGenerator = std::make_shared(); auto op = PlanBuilder(planNodeIdGenerator) - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .capturePlanNodeId(probeScanId) .hashJoin( {"n1_1"}, diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index dbefb40bb711..e9f01ecf4706 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -148,7 +148,11 @@ class TableScanTest : public virtual HiveConnectorTestBase { {"c1", regularColumn("c1", DOUBLE())}}; auto op = PlanBuilder() - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); std::string partitionValueStr = @@ -158,7 +162,11 @@ class TableScanTest : public virtual HiveConnectorTestBase { outputType = ROW({"c0", "pkey", "c1"}, {BIGINT(), partitionType, DOUBLE()}); op = PlanBuilder() - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, @@ -166,7 +174,11 @@ class TableScanTest : public virtual HiveConnectorTestBase { fmt::format("SELECT c0, {}, c1 FROM tmp", partitionValueStr)); outputType = ROW({"c0", "c1", "pkey"}, {BIGINT(), DOUBLE(), partitionType}); op = PlanBuilder() - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, @@ -177,7 +189,11 @@ class TableScanTest : public virtual HiveConnectorTestBase { assignments = {{"pkey", partitionKey("pkey", partitionType)}}; outputType = ROW({"pkey"}, {partitionType}); op = PlanBuilder() - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, split, fmt::format("SELECT {} FROM tmp", partitionValueStr)); @@ -251,19 +267,33 @@ TEST_F(TableScanTest, columnAliases) { std::unordered_map aliases = {{"a", "c0"}}; auto outputType = ROW({"a"}, {BIGINT()}); auto op = PlanBuilder(pool_.get()) - .tableScan(tableName, outputType, aliases) + .startTableScan() + .tableName(tableName) + .outputType(outputType) + .columnAliases(aliases) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp"); // Use aliased column in a range filter. op = PlanBuilder(pool_.get()) - .tableScan(tableName, outputType, aliases, {"a < 10"}) + .startTableScan() + .tableName(tableName) + .outputType(outputType) + .columnAliases(aliases) + .subfieldFilters({"a < 10"}) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp WHERE c0 <= 10"); // Use aliased column in remaining filter. op = PlanBuilder(pool_.get()) - .tableScan(tableName, outputType, aliases, {}, "a % 2 = 1") + .startTableScan() + .tableName(tableName) + .outputType(outputType) + .columnAliases(aliases) + .remainingFilter("a % 2 = 1") + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp WHERE c0 % 2 = 1"); } @@ -284,7 +314,11 @@ TEST_F(TableScanTest, partitionKeyAlias) { auto outputType = ROW({"a", "ds_alias"}, {BIGINT(), VARCHAR()}); auto op = PlanBuilder() - .tableScan(outputType, makeTableHandle(), assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery(op, split, "SELECT c0, '2021-12-02' FROM tmp"); @@ -338,20 +372,20 @@ TEST_F(TableScanTest, timestamp) { assertQuery(op, {filePath}, "SELECT c0, c1 FROM tmp"); op = PlanBuilder(pool_.get()) - .tableScan( - ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()}), - {"c1 is null"}, - "", - dataColumns) + .startTableScan() + .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) + .subfieldFilters({"c1 is null"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0, c1 FROM tmp WHERE c1 is null"); op = PlanBuilder(pool_.get()) - .tableScan( - ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()}), - {"c1 < '1970-01-01 01:30:00'::TIMESTAMP"}, - "", - dataColumns) + .startTableScan() + .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) + .subfieldFilters({"c1 < '1970-01-01 01:30:00'::TIMESTAMP"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery( op, @@ -359,21 +393,28 @@ TEST_F(TableScanTest, timestamp) { "SELECT c0, c1 FROM tmp WHERE c1 < timestamp '1970-01-01 01:30:00'"); op = PlanBuilder(pool_.get()) - .tableScan(ROW({"c0"}, {BIGINT()}), {}, "", dataColumns) + .startTableScan() + .outputType(ROW({"c0"}, {BIGINT()})) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp"); op = PlanBuilder(pool_.get()) - .tableScan(ROW({"c0"}, {BIGINT()}), {"c1 is null"}, "", dataColumns) + .startTableScan() + .outputType(ROW({"c0"}, {BIGINT()})) + .subfieldFilters({"c1 is null"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp WHERE c1 is null"); op = PlanBuilder(pool_.get()) - .tableScan( - ROW({"c0"}, {BIGINT()}), - {"c1 < timestamp'1970-01-01 01:30:00'"}, - "", - dataColumns) + .startTableScan() + .outputType(ROW({"c0"}, {BIGINT()})) + .subfieldFilters({"c1 < timestamp'1970-01-01 01:30:00'"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery( op, @@ -416,11 +457,11 @@ DEBUG_ONLY_TEST_F(TableScanTest, timeLimitInGetOutput) { auto dataColumns = ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()}); const size_t tableScanGetOutputTimeLimitMs{100}; auto plan = PlanBuilder(pool_.get()) - .tableScan( - ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()}), - {"c1 is null"}, - "", - dataColumns) + .startTableScan() + .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) + .subfieldFilters({"c1 is null"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); // Ensure the getOutput is long enough to trigger the maxGetOutputTimeMs in @@ -473,7 +514,11 @@ TEST_F(TableScanTest, subfieldPruningRowType) { columnType, std::move(requiredSubfields)); auto op = PlanBuilder() - .tableScan(rowType, makeTableHandle(), assignments) + .startTableScan() + .outputType(rowType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -526,11 +571,12 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterSubfieldsMissing) { std::move(requiredSubfields)); auto op = PlanBuilder() - .tableScan( - rowType, - makeTableHandle( - SubfieldFilters{}, parseExpr("e.a is null", rowType)), - assignments) + .startTableScan() + .outputType(rowType) + .tableHandle(makeTableHandle( + SubfieldFilters{}, parseExpr("e.a is null", rowType))) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -558,12 +604,13 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterRootFieldMissing) { assignments["d"] = std::make_shared( "d", HiveColumnHandle::ColumnType::kRegular, BIGINT(), BIGINT()); auto op = PlanBuilder() - .tableScan( - ROW({{"d", BIGINT()}}), - makeTableHandle( - SubfieldFilters{}, - parseExpr("e.a is null or e.b is null", rowType)), - assignments) + .startTableScan() + .outputType(ROW({{"d", BIGINT()}})) + .tableHandle(makeTableHandle( + SubfieldFilters{}, + parseExpr("e.a is null or e.b is null", rowType))) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -622,10 +669,12 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterStruct) { } auto op = PlanBuilder() - .tableScan( - outputColumn == kNoOutput ? ROW({"d"}, {BIGINT()}) : rowType, - makeTableHandle(SubfieldFilters{}, remainingFilter), - assignments) + .startTableScan() + .outputType( + outputColumn == kNoOutput ? ROW({"d"}, {BIGINT()}) : rowType) + .tableHandle(makeTableHandle(SubfieldFilters{}, remainingFilter)) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -705,10 +754,12 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterMap) { } auto op = PlanBuilder() - .tableScan( - outputColumn == kNoOutput ? ROW({"a"}, {BIGINT()}) : rowType, - makeTableHandle(SubfieldFilters{}, remainingFilter), - assignments) + .startTableScan() + .outputType( + outputColumn == kNoOutput ? ROW({"a"}, {BIGINT()}) : rowType) + .tableHandle(makeTableHandle(SubfieldFilters{}, remainingFilter)) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -786,7 +837,11 @@ TEST_F(TableScanTest, subfieldPruningMapType) { mapType, std::move(requiredSubfields)); auto op = PlanBuilder() - .tableScan(rowType, makeTableHandle(), assignments) + .startTableScan() + .outputType(rowType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -856,7 +911,11 @@ TEST_F(TableScanTest, subfieldPruningArrayType) { arrayType, std::move(requiredSubfields)); auto op = PlanBuilder() - .tableScan(rowType, makeTableHandle(), assignments) + .startTableScan() + .outputType(rowType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -928,31 +987,50 @@ TEST_F(TableScanTest, missingColumns) { auto outputTypeC0 = ROW({"c0"}, {BIGINT()}); auto op = PlanBuilder(pool_.get()) - .tableScan(outputType, {}, "", dataColumns) + .startTableScan() + .outputType(outputType) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT * FROM tmp"); // Use missing column in a tuple domain filter. op = PlanBuilder(pool_.get()) - .tableScan(outputType, {"c1 <= 100.1"}, "", dataColumns) + .startTableScan() + .outputType(outputType) + .subfieldFilters({"c1 <= 100.1"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT * FROM tmp WHERE c1 <= 100.1"); // Use missing column in a tuple domain filter. Select *. op = PlanBuilder(pool_.get()) - .tableScan(outputType, {"c1 <= 2000.1"}, "", dataColumns) + .startTableScan() + .outputType(outputType) + .subfieldFilters({"c1 <= 2000.1"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT * FROM tmp WHERE c1 <= 2000.1"); // Use missing column in a tuple domain filter. Select c0. op = PlanBuilder(pool_.get()) - .tableScan(outputTypeC0, {"c1 <= 3000.1"}, "", dataColumns) + .startTableScan() + .outputType(outputTypeC0) + .subfieldFilters({"c1 <= 3000.1"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT c0 FROM tmp WHERE c1 <= 3000.1"); // Use missing column in a tuple domain filter. Select count(*). op = PlanBuilder(pool_.get()) - .tableScan(ROW({}, {}), {"c1 <= 4000.1"}, "", dataColumns) + .startTableScan() + .outputType(ROW({}, {})) + .subfieldFilters({"c1 <= 4000.1"}) + .dataColumns(dataColumns) + .endTableScan() .singleAggregation({}, {"count(1)"}) .planNode(); assertQuery(op, filePaths, "SELECT count(*) FROM tmp WHERE c1 <= 4000.1"); @@ -965,14 +1043,22 @@ TEST_F(TableScanTest, missingColumns) { ColumnHandleMap assignments; assignments["c0"] = regularColumn("c0", BIGINT()); op = PlanBuilder(pool_.get()) - .tableScan(outputTypeC0, tableHandle, assignments) + .startTableScan() + .outputType(outputTypeC0) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, filePaths, "SELECT c0 FROM tmp WHERE c1 is null or c1 <= 1050.0"); // Use missing column 'c1' in 'is null' filter, while not selecting anything. op = PlanBuilder(pool_.get()) - .tableScan(ROW({}, {}), {"c1 is null"}, "", dataColumns) + .startTableScan() + .outputType(ROW({}, {})) + .subfieldFilters({"c1 is null"}) + .dataColumns(dataColumns) + .endTableScan() .singleAggregation({}, {"count(1)"}) .planNode(); assertQuery(op, filePaths, "SELECT count(*) FROM tmp WHERE c1 is null"); @@ -987,7 +1073,11 @@ TEST_F(TableScanTest, missingColumns) { tableHandle = makeTableHandle({}, nullptr, "hive_table", dataColumns); op = PlanBuilder(pool_.get()) - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT * FROM tmp"); @@ -1016,7 +1106,10 @@ TEST_F(TableScanTest, constDictLazy) { // Orchestrate a Const(Dict(Lazy)) by using remaining filter that passes on // exactly one row. auto op = PlanBuilder() - .tableScan(rowType, {}, "c0 % 1000 = 5") + .startTableScan() + .outputType(rowType) + .remainingFilter("c0 % 1000 = 5") + .endTableScan() .project({"c1 + 10"}) .planNode(); @@ -1024,14 +1117,20 @@ TEST_F(TableScanTest, constDictLazy) { // Orchestrate a Const(Dict(Lazy)) for a complex type (map) op = PlanBuilder() - .tableScan(rowType, {}, "c0 = 0") + .startTableScan() + .outputType(rowType) + .remainingFilter("c0 = 0") + .endTableScan() .project({"cardinality(c2)"}) .planNode(); assertQuery(op, {filePath}, "SELECT 0 FROM tmp WHERE c0 = 5"); op = PlanBuilder() - .tableScan(rowType, {}, "c0 = 2") + .startTableScan() + .outputType(rowType) + .remainingFilter("c0 = 2") + .endTableScan() .project({"cardinality(c2)"}) .planNode(); @@ -1562,7 +1661,11 @@ TEST_F(TableScanTest, statsBasedSkipping) { asRowType(rowVector->type())); return TableScanTest::assertQuery( PlanBuilder() - .tableScan(ROW({"c1"}, {INTEGER()}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({"c1"}, {INTEGER()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, query); @@ -1725,7 +1828,7 @@ TEST_F(TableScanTest, statsBasedSkippingNulls) { auto assertQuery = [&](const std::string& filter) { return TableScanTest::assertQuery( - PlanBuilder().tableScan(rowType, {filter}).planNode(), + PlanBuilder(pool_.get()).tableScan(rowType, {filter}).planNode(), filePaths, "SELECT * FROM tmp WHERE " + filter); }; @@ -2052,10 +2155,11 @@ TEST_F(TableScanTest, filterPushdown) { auto task = assertQuery( PlanBuilder() - .tableScan( - ROW({"c1", "c3", "c0"}, {BIGINT(), BOOLEAN(), TINYINT()}), - tableHandle, - assignments) + .startTableScan() + .outputType(ROW({"c1", "c3", "c0"}, {BIGINT(), BOOLEAN(), TINYINT()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, "SELECT c1, c3, c0 FROM tmp WHERE (c1 >= 0 OR c1 IS NULL) AND c3"); @@ -2070,7 +2174,11 @@ TEST_F(TableScanTest, filterPushdown) { assignments["c0"] = regularColumn("c0", TINYINT()); assertQuery( PlanBuilder() - .tableScan(ROW({"c0"}, {TINYINT()}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({"c0"}, {TINYINT()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, "SELECT c0 FROM tmp WHERE (c1 >= 0 OR c1 IS NULL) AND c3"); @@ -2079,7 +2187,11 @@ TEST_F(TableScanTest, filterPushdown) { assignments.clear(); assertQuery( PlanBuilder() - .tableScan(ROW({}, {}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({}, {})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .singleAggregation({}, {"sum(1)"}) .planNode(), filePaths, @@ -2091,7 +2203,11 @@ TEST_F(TableScanTest, filterPushdown) { tableHandle = makeTableHandle(std::move(subfieldFilters)); assertQuery( PlanBuilder() - .tableScan(ROW({}, {}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({}, {})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .singleAggregation({}, {"sum(1)"}) .planNode(), filePaths, @@ -2115,7 +2231,11 @@ TEST_F(TableScanTest, path) { auto pathValue = fmt::format("file:{}", filePath->path); auto typeWithPath = ROW({kPath, "a"}, {VARCHAR(), BIGINT()}); auto op = PlanBuilder() - .tableScan(typeWithPath, tableHandle, assignments) + .startTableScan() + .outputType(typeWithPath) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, {filePath}, fmt::format("SELECT '{}', * FROM tmp", pathValue)); @@ -2125,12 +2245,22 @@ TEST_F(TableScanTest, path) { SubfieldFilters{}, parseExpr(fmt::format("\"{}\" = '{}'", kPath, pathValue), typeWithPath)); - op = PlanBuilder().tableScan(rowType, tableHandle, assignments).planNode(); + op = PlanBuilder() + .startTableScan() + .outputType(rowType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() + .planNode(); assertQuery(op, {filePath}, "SELECT * FROM tmp"); // use $path in a filter and project it out op = PlanBuilder() - .tableScan(typeWithPath, tableHandle, assignments) + .startTableScan() + .outputType(typeWithPath) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, {filePath}, fmt::format("SELECT '{}', * FROM tmp", pathValue)); @@ -2177,7 +2307,11 @@ TEST_F(TableScanTest, bucket) { ROW({kBucket, "c0", "c1"}, {INTEGER(), INTEGER(), BIGINT()}); auto tableHandle = makeTableHandle(); auto op = PlanBuilder() - .tableScan(typeWithBucket, tableHandle, assignments) + .startTableScan() + .outputType(typeWithBucket) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); OperatorTestBase::assertQuery(op, splits, "SELECT c0, * FROM tmp"); @@ -2191,7 +2325,11 @@ TEST_F(TableScanTest, bucket) { // Filter on bucket and filter on first column should produce // identical result for each split op = PlanBuilder() - .tableScan(typeWithBucket, tableHandle, assignments) + .startTableScan() + .outputType(typeWithBucket) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, @@ -2204,7 +2342,13 @@ TEST_F(TableScanTest, bucket) { hsplit = HiveConnectorSplitBuilder(filePaths[i]->path) .tableBucketNumber(bucketValue) .build(); - op = PlanBuilder().tableScan(rowTypes, tableHandle, assignments).planNode(); + op = PlanBuilder() + .startTableScan() + .outputType(rowTypes) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() + .planNode(); assertQuery( op, hsplit, @@ -2235,28 +2379,28 @@ TEST_F(TableScanTest, integerNotEqualFilter) { assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c0 != 0::TINYINT"}, {}) + .tableScan(rowType, {"c0 != 0::TINYINT"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c0 != 0"); assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c1 != 1::SMALLINT"}, {}) + .tableScan(rowType, {"c1 != 1::SMALLINT"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c1 != 1"); assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c2 != (-2)::INTEGER"}, {}) + .tableScan(rowType, {"c2 != (-2)::INTEGER"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c2 != -2"); assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c3 != 3::BIGINT"}, {}) + .tableScan(rowType, {"c3 != 3::BIGINT"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c3 != 3"); @@ -2269,14 +2413,13 @@ TEST_F(TableScanTest, floatingPointNotEqualFilter) { createDuckDbTable(vectors); auto outputType = ROW({"c4"}, {DOUBLE()}); - auto op = PlanBuilder(pool_.get()) - .tableScan(outputType, {"c4 != 0.0"}, {}) - .planNode(); + auto op = + PlanBuilder(pool_.get()).tableScan(outputType, {"c4 != 0.0"}).planNode(); assertQuery(op, {filePath}, "SELECT c4 FROM tmp WHERE c4 != 0.0"); outputType = ROW({"c3"}, {REAL()}); op = PlanBuilder(pool_.get()) - .tableScan(outputType, {"c3 != cast(0.0 as REAL)"}, {}) + .tableScan(outputType, {"c3 != cast(0.0 as REAL)"}) .planNode(); assertQuery( op, {filePath}, "SELECT c3 FROM tmp WHERE c3 != cast(0.0 as REAL)"); @@ -2309,13 +2452,13 @@ TEST_F(TableScanTest, stringNotEqualFilter) { assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c0 != 'banana'"}, {}) + .tableScan(rowType, {"c0 != 'banana'"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c0 != 'banana'"); assertQuery( - PlanBuilder(pool_.get()).tableScan(rowType, {"c1 != ''"}, {}).planNode(), + PlanBuilder(pool_.get()).tableScan(rowType, {"c1 != ''"}).planNode(), {filePath}, "SELECT * FROM tmp WHERE c1 != ''"); } @@ -2420,7 +2563,11 @@ TEST_F(TableScanTest, remainingFilter) { makeTableHandle(SubfieldFilters{}, parseExpr("c1 > c0", rowType)); assertQuery( PlanBuilder(pool_.get()) - .tableScan(ROW({"c2"}, {DOUBLE()}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({"c2"}, {DOUBLE()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, "SELECT c2 FROM tmp WHERE c1 > c0"); @@ -2434,10 +2581,11 @@ TEST_F(TableScanTest, remainingFilter) { makeTableHandle(SubfieldFilters{}, parseExpr("c1 > c0", rowType)); assertQuery( PlanBuilder(pool_.get()) - .tableScan( - ROW({"c1", "c2"}, {INTEGER(), DOUBLE()}), - tableHandle, - assignments) + .startTableScan() + .outputType(ROW({"c1", "c2"}, {INTEGER(), DOUBLE()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, "SELECT c1, c2 FROM tmp WHERE c1 > c0"); @@ -2477,7 +2625,7 @@ TEST_F(TableScanTest, remainingFilterSkippedStrides) { } createDuckDbTable(vectors); core::PlanNodeId tableScanNodeId; - auto plan = PlanBuilder() + auto plan = PlanBuilder(pool_.get()) .tableScan(rowType, {}, "c0 = 0 or c1 = 2") .capturePlanNodeId(tableScanNodeId) .planNode(); @@ -2629,7 +2777,10 @@ TEST_F(TableScanTest, aggregationPushdown) { // Add remaining filter to scan to expose LazyVectors wrapped in Dictionary to // aggregation. op = PlanBuilder() - .tableScan(rowType_, {}, "length(c5) % 2 = 0") + .startTableScan() + .outputType(rowType_) + .remainingFilter("length(c5) % 2 = 0") + .endTableScan() .singleAggregation({"c5"}, {"max(c0)"}) .planNode(); task = assertQuery( @@ -2760,8 +2911,13 @@ TEST_F(TableScanTest, interleaveLazyEager) { SubfieldFiltersBuilder().add("c0.c0", isNotNull()).build()); ColumnHandleMap assignments = {{"c0", regularColumn("c0", column->type())}}; CursorParameters params; - params.planNode = - PlanBuilder().tableScan(rowType, tableHandle, assignments).planNode(); + params.planNode = PlanBuilder() + .startTableScan() + .outputType(rowType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() + .planNode(); TaskCursor cursor(params); cursor.task()->addSplit("0", makeHiveSplit(lazyFile->path)); cursor.task()->addSplit("0", makeHiveSplit(eagerFile->path)); @@ -3182,7 +3338,10 @@ TEST_F(TableScanTest, readMissingFieldsInMap) { ROW({"m1", "a2"}, {{MAP(BIGINT(), structType), ARRAY(structType)}}); auto op = PlanBuilder() - .tableScan(rowType, {}, "", rowType) + .startTableScan() + .outputType(rowType) + .dataColumns(rowType) + .endTableScan() .project( {"m1[0].a", "m1[1].b", @@ -3290,7 +3449,10 @@ TEST_F(TableScanTest, readMissingFieldsInMap) { rowType = ROW({"i1", "a2"}, {{INTEGER(), ARRAY(structType)}}); op = PlanBuilder() - .tableScan(rowType, {}, "", rowType) + .startTableScan() + .outputType(rowType) + .dataColumns(rowType) + .endTableScan() .project({"i1"}) .planNode(); @@ -3395,7 +3557,10 @@ TEST_F(TableScanTest, readMissingFieldsWithMoreColumns) { auto op = PlanBuilder() - .tableScan(rowType, {}, "", rowType) + .startTableScan() + .outputType(rowType) + .dataColumns(rowType) + .endTableScan() .project({"st1.a", "st1.b", "st1.c", "st1.d", "i2", "d3", "b4", "c4"}) .planNode(); @@ -3527,7 +3692,11 @@ TEST_F(TableScanTest, varbinaryPartitionKey) { auto outputType = ROW({"a", "ds_alias"}, {BIGINT(), VARBINARY()}); auto op = PlanBuilder() - .tableScan(outputType, makeTableHandle(), assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery(op, split, "SELECT c0, '2021-12-02' FROM tmp"); diff --git a/velox/exec/tests/utils/PlanBuilder.cpp b/velox/exec/tests/utils/PlanBuilder.cpp index d76c3ee69087..54981d7c66d6 100644 --- a/velox/exec/tests/utils/PlanBuilder.cpp +++ b/velox/exec/tests/utils/PlanBuilder.cpp @@ -108,19 +108,6 @@ PlanBuilder& PlanBuilder::tableScan( .endTableScan(); } -PlanBuilder& PlanBuilder::tableScan( - const RowTypePtr& outputType, - const std::shared_ptr& tableHandle, - const std::unordered_map< - std::string, - std::shared_ptr>& assignments) { - return TableScanBuilder(*this) - .outputType(outputType) - .tableHandle(tableHandle) - .assignments(assignments) - .endTableScan(); -} - PlanBuilder& PlanBuilder::tpchTableScan( tpch::Table table, std::vector&& columnNames, diff --git a/velox/exec/tests/utils/PlanBuilder.h b/velox/exec/tests/utils/PlanBuilder.h index bf151970732b..36929671f09f 100644 --- a/velox/exec/tests/utils/PlanBuilder.h +++ b/velox/exec/tests/utils/PlanBuilder.h @@ -146,20 +146,6 @@ class PlanBuilder { const std::string& remainingFilter = "", const RowTypePtr& dataColumns = nullptr); - /// Add a TableScanNode using a connector-specific table handle and - /// assignments. Supports any connector, not just Hive connector. - /// - /// @param outputType List of column names and types to project out. Column - /// names should match the keys in the 'assignments' map. The 'assignments' - /// map may contain more columns then 'outputType' if some columns are only - /// used by pushed-down filters. - PlanBuilder& tableScan( - const RowTypePtr& outputType, - const std::shared_ptr& tableHandle, - const std::unordered_map< - std::string, - std::shared_ptr>& assignments); - /// Add a TableScanNode to scan a TPC-H table. /// /// @param tpchTableHandle The handle that specifies the target TPC-H table From 8451d92ae42cf11fd9d178ebb20222875ba061bd Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Tue, 5 Dec 2023 23:06:42 +0530 Subject: [PATCH 2/2] address review comments --- velox/exec/tests/HashJoinTest.cpp | 5 -- velox/exec/tests/TableScanTest.cpp | 114 ++++++++++++--------------- velox/exec/tests/utils/PlanBuilder.h | 9 +++ 3 files changed, 59 insertions(+), 69 deletions(-) diff --git a/velox/exec/tests/HashJoinTest.cpp b/velox/exec/tests/HashJoinTest.cpp index 57ca0887602b..d07966b90aad 100644 --- a/velox/exec/tests/HashJoinTest.cpp +++ b/velox/exec/tests/HashJoinTest.cpp @@ -4126,8 +4126,6 @@ TEST_F(HashJoinTest, dynamicFilters) { auto op = PlanBuilder(planNodeIdGenerator, pool_.get()) .startTableScan() .outputType(scanOutputType) - .tableHandle(makeTableHandle( - common::test::SubfieldFiltersBuilder().build())) .assignments(assignments) .endTableScan() .capturePlanNodeId(probeScanId) @@ -4880,8 +4878,6 @@ TEST_F(HashJoinTest, dynamicFilterOnPartitionKey) { .partitionKey("k", "0") .build(); auto outputType = ROW({"n1_0", "n1_1"}, {BIGINT(), BIGINT()}); - std::shared_ptr tableHandle = - makeTableHandle(); ColumnHandleMap assignments = { {"n1_0", regularColumn("c0", BIGINT())}, {"n1_1", partitionKey("k", BIGINT())}}; @@ -4892,7 +4888,6 @@ TEST_F(HashJoinTest, dynamicFilterOnPartitionKey) { PlanBuilder(planNodeIdGenerator) .startTableScan() .outputType(outputType) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .capturePlanNodeId(probeScanId) diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index e9f01ecf4706..6ff533257699 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -141,7 +141,6 @@ class TableScanTest : public virtual HiveConnectorTestBase { .build(); auto outputType = ROW({"pkey", "c0", "c1"}, {partitionType, BIGINT(), DOUBLE()}); - auto tableHandle = makeTableHandle(); ColumnHandleMap assignments = { {"pkey", partitionKey("pkey", partitionType)}, {"c0", regularColumn("c0", BIGINT())}, @@ -150,7 +149,6 @@ class TableScanTest : public virtual HiveConnectorTestBase { auto op = PlanBuilder() .startTableScan() .outputType(outputType) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -164,7 +162,6 @@ class TableScanTest : public virtual HiveConnectorTestBase { op = PlanBuilder() .startTableScan() .outputType(outputType) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -176,7 +173,6 @@ class TableScanTest : public virtual HiveConnectorTestBase { op = PlanBuilder() .startTableScan() .outputType(outputType) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -191,7 +187,6 @@ class TableScanTest : public virtual HiveConnectorTestBase { op = PlanBuilder() .startTableScan() .outputType(outputType) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -281,7 +276,7 @@ TEST_F(TableScanTest, columnAliases) { .tableName(tableName) .outputType(outputType) .columnAliases(aliases) - .subfieldFilters({"a < 10"}) + .subfieldFilter("a < 10") .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp WHERE c0 <= 10"); @@ -316,7 +311,6 @@ TEST_F(TableScanTest, partitionKeyAlias) { auto op = PlanBuilder() .startTableScan() .outputType(outputType) - .tableHandle(makeTableHandle()) .assignments(assignments) .endTableScan() .planNode(); @@ -374,7 +368,7 @@ TEST_F(TableScanTest, timestamp) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) - .subfieldFilters({"c1 is null"}) + .subfieldFilter("c1 is null") .dataColumns(dataColumns) .endTableScan() .planNode(); @@ -383,7 +377,7 @@ TEST_F(TableScanTest, timestamp) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) - .subfieldFilters({"c1 < '1970-01-01 01:30:00'::TIMESTAMP"}) + .subfieldFilter("c1 < '1970-01-01 01:30:00'::TIMESTAMP") .dataColumns(dataColumns) .endTableScan() .planNode(); @@ -403,7 +397,7 @@ TEST_F(TableScanTest, timestamp) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({"c0"}, {BIGINT()})) - .subfieldFilters({"c1 is null"}) + .subfieldFilter("c1 is null") .dataColumns(dataColumns) .endTableScan() .planNode(); @@ -412,7 +406,7 @@ TEST_F(TableScanTest, timestamp) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({"c0"}, {BIGINT()})) - .subfieldFilters({"c1 < timestamp'1970-01-01 01:30:00'"}) + .subfieldFilter("c1 < timestamp'1970-01-01 01:30:00'") .dataColumns(dataColumns) .endTableScan() .planNode(); @@ -459,7 +453,7 @@ DEBUG_ONLY_TEST_F(TableScanTest, timeLimitInGetOutput) { auto plan = PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) - .subfieldFilters({"c1 is null"}) + .subfieldFilter("c1 is null") .dataColumns(dataColumns) .endTableScan() .planNode(); @@ -516,7 +510,6 @@ TEST_F(TableScanTest, subfieldPruningRowType) { auto op = PlanBuilder() .startTableScan() .outputType(rowType) - .tableHandle(makeTableHandle()) .assignments(assignments) .endTableScan() .planNode(); @@ -573,8 +566,7 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterSubfieldsMissing) { auto op = PlanBuilder() .startTableScan() .outputType(rowType) - .tableHandle(makeTableHandle( - SubfieldFilters{}, parseExpr("e.a is null", rowType))) + .remainingFilter("e.a is null") .assignments(assignments) .endTableScan() .planNode(); @@ -606,9 +598,8 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterRootFieldMissing) { auto op = PlanBuilder() .startTableScan() .outputType(ROW({{"d", BIGINT()}})) - .tableHandle(makeTableHandle( - SubfieldFilters{}, - parseExpr("e.a is null or e.b is null", rowType))) + .remainingFilter("e.a is null or e.b is null") + .dataColumns(rowType) .assignments(assignments) .endTableScan() .planNode(); @@ -659,20 +650,20 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterStruct) { structType, std::move(subfields)); } - core::TypedExprPtr remainingFilter; + std::string remainingFilter; if (filterColumn == kWholeColumn) { - remainingFilter = parseExpr( - "coalesce(c, cast(null AS ROW(a BIGINT, b BIGINT))).a % 2 == 0", - rowType); + remainingFilter = + "coalesce(c, cast(null AS ROW(a BIGINT, b BIGINT))).a % 2 == 0"; } else { - remainingFilter = parseExpr("c.a % 2 == 0", rowType); + remainingFilter = "c.a % 2 == 0"; } auto op = PlanBuilder() .startTableScan() .outputType( outputColumn == kNoOutput ? ROW({"d"}, {BIGINT()}) : rowType) - .tableHandle(makeTableHandle(SubfieldFilters{}, remainingFilter)) + .remainingFilter(remainingFilter) + .dataColumns(rowType) .assignments(assignments) .endTableScan() .planNode(); @@ -745,19 +736,20 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterMap) { mapType, std::move(subfields)); } - core::TypedExprPtr remainingFilter; + std::string remainingFilter; if (filterColumn == kWholeColumn) { - remainingFilter = parseExpr( - "coalesce(b, cast(null AS MAP(BIGINT, BIGINT)))[0] == 0", rowType); + remainingFilter = + "coalesce(b, cast(null AS MAP(BIGINT, BIGINT)))[0] == 0"; } else { - remainingFilter = parseExpr("b[0] == 0", rowType); + remainingFilter = "b[0] == 0"; } auto op = PlanBuilder() .startTableScan() .outputType( outputColumn == kNoOutput ? ROW({"a"}, {BIGINT()}) : rowType) - .tableHandle(makeTableHandle(SubfieldFilters{}, remainingFilter)) + .remainingFilter(remainingFilter) + .dataColumns(rowType) .assignments(assignments) .endTableScan() .planNode(); @@ -839,7 +831,6 @@ TEST_F(TableScanTest, subfieldPruningMapType) { auto op = PlanBuilder() .startTableScan() .outputType(rowType) - .tableHandle(makeTableHandle()) .assignments(assignments) .endTableScan() .planNode(); @@ -913,7 +904,6 @@ TEST_F(TableScanTest, subfieldPruningArrayType) { auto op = PlanBuilder() .startTableScan() .outputType(rowType) - .tableHandle(makeTableHandle()) .assignments(assignments) .endTableScan() .planNode(); @@ -998,7 +988,7 @@ TEST_F(TableScanTest, missingColumns) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(outputType) - .subfieldFilters({"c1 <= 100.1"}) + .subfieldFilter("c1 <= 100.1") .dataColumns(dataColumns) .endTableScan() .planNode(); @@ -1008,7 +998,7 @@ TEST_F(TableScanTest, missingColumns) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(outputType) - .subfieldFilters({"c1 <= 2000.1"}) + .subfieldFilter("c1 <= 2000.1") .dataColumns(dataColumns) .endTableScan() .planNode(); @@ -1018,7 +1008,7 @@ TEST_F(TableScanTest, missingColumns) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(outputTypeC0) - .subfieldFilters({"c1 <= 3000.1"}) + .subfieldFilter("c1 <= 3000.1") .dataColumns(dataColumns) .endTableScan() .planNode(); @@ -1028,7 +1018,7 @@ TEST_F(TableScanTest, missingColumns) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({}, {})) - .subfieldFilters({"c1 <= 4000.1"}) + .subfieldFilter("c1 <= 4000.1") .dataColumns(dataColumns) .endTableScan() .singleAggregation({}, {"count(1)"}) @@ -1056,7 +1046,7 @@ TEST_F(TableScanTest, missingColumns) { op = PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({}, {})) - .subfieldFilters({"c1 is null"}) + .subfieldFilter("c1 is null") .dataColumns(dataColumns) .endTableScan() .singleAggregation({}, {"count(1)"}) @@ -1070,12 +1060,10 @@ TEST_F(TableScanTest, missingColumns) { assignments["a"] = regularColumn("c0", BIGINT()); assignments["b"] = regularColumn("c1", DOUBLE()); - tableHandle = makeTableHandle({}, nullptr, "hive_table", dataColumns); - op = PlanBuilder(pool_.get()) .startTableScan() .outputType(outputType) - .tableHandle(tableHandle) + .dataColumns(dataColumns) .assignments(assignments) .endTableScan() .planNode(); @@ -1828,7 +1816,7 @@ TEST_F(TableScanTest, statsBasedSkippingNulls) { auto assertQuery = [&](const std::string& filter) { return TableScanTest::assertQuery( - PlanBuilder(pool_.get()).tableScan(rowType, {filter}).planNode(), + PlanBuilder().tableScan(rowType, {filter}).planNode(), filePaths, "SELECT * FROM tmp WHERE " + filter); }; @@ -2226,14 +2214,11 @@ TEST_F(TableScanTest, path) { auto assignments = allRegularColumns(rowType); assignments[kPath] = synthesizedColumn(kPath, VARCHAR()); - auto tableHandle = makeTableHandle(); - auto pathValue = fmt::format("file:{}", filePath->path); auto typeWithPath = ROW({kPath, "a"}, {VARCHAR(), BIGINT()}); auto op = PlanBuilder() .startTableScan() .outputType(typeWithPath) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -2241,10 +2226,9 @@ TEST_F(TableScanTest, path) { op, {filePath}, fmt::format("SELECT '{}', * FROM tmp", pathValue)); // use $path in a filter, but don't project it out - tableHandle = makeTableHandle( + auto tableHandle = makeTableHandle( SubfieldFilters{}, parseExpr(fmt::format("\"{}\" = '{}'", kPath, pathValue), typeWithPath)); - op = PlanBuilder() .startTableScan() .outputType(rowType) @@ -2258,7 +2242,6 @@ TEST_F(TableScanTest, path) { op = PlanBuilder() .startTableScan() .outputType(typeWithPath) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -2305,11 +2288,9 @@ TEST_F(TableScanTest, bucket) { // Query that spans on all buckets auto typeWithBucket = ROW({kBucket, "c0", "c1"}, {INTEGER(), INTEGER(), BIGINT()}); - auto tableHandle = makeTableHandle(); auto op = PlanBuilder() .startTableScan() .outputType(typeWithBucket) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -2320,14 +2301,12 @@ TEST_F(TableScanTest, bucket) { auto hsplit = HiveConnectorSplitBuilder(filePaths[i]->path) .tableBucketNumber(bucketValue) .build(); - tableHandle = makeTableHandle(); // Filter on bucket and filter on first column should produce // identical result for each split op = PlanBuilder() .startTableScan() .outputType(typeWithBucket) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -2345,7 +2324,6 @@ TEST_F(TableScanTest, bucket) { op = PlanBuilder() .startTableScan() .outputType(rowTypes) - .tableHandle(tableHandle) .assignments(assignments) .endTableScan() .planNode(); @@ -2539,13 +2517,23 @@ TEST_F(TableScanTest, remainingFilter) { createDuckDbTable(vectors); assertQuery( - PlanBuilder(pool_.get()).tableScan(rowType, {}, "c1 > c0").planNode(), + PlanBuilder(pool_.get()) + .startTableScan() + .outputType(rowType) + .remainingFilter("c1 > c0") + .endTableScan() + .planNode(), filePaths, "SELECT * FROM tmp WHERE c1 > c0"); // filter that never passes assertQuery( - PlanBuilder(pool_.get()).tableScan(rowType, {}, "c1 % 5 = 6").planNode(), + PlanBuilder(pool_.get()) + .startTableScan() + .outputType(rowType) + .remainingFilter("c1 % 5 = 6") + .endTableScan() + .planNode(), filePaths, "SELECT * FROM tmp WHERE c1 % 5 = 6"); @@ -2559,13 +2547,13 @@ TEST_F(TableScanTest, remainingFilter) { // Remaining filter uses columns that are not used otherwise. ColumnHandleMap assignments = {{"c2", regularColumn("c2", DOUBLE())}}; - auto tableHandle = - makeTableHandle(SubfieldFilters{}, parseExpr("c1 > c0", rowType)); + assertQuery( PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({"c2"}, {DOUBLE()})) - .tableHandle(tableHandle) + .remainingFilter("c1 > c0") + .dataColumns(rowType) .assignments(assignments) .endTableScan() .planNode(), @@ -2577,13 +2565,13 @@ TEST_F(TableScanTest, remainingFilter) { assignments = { {"c1", regularColumn("c1", INTEGER())}, {"c2", regularColumn("c2", DOUBLE())}}; - tableHandle = - makeTableHandle(SubfieldFilters{}, parseExpr("c1 > c0", rowType)); + assertQuery( PlanBuilder(pool_.get()) .startTableScan() .outputType(ROW({"c1", "c2"}, {INTEGER(), DOUBLE()})) - .tableHandle(tableHandle) + .remainingFilter("c1 > c0") + .dataColumns(rowType) .assignments(assignments) .endTableScan() .planNode(), @@ -2625,7 +2613,7 @@ TEST_F(TableScanTest, remainingFilterSkippedStrides) { } createDuckDbTable(vectors); core::PlanNodeId tableScanNodeId; - auto plan = PlanBuilder(pool_.get()) + auto plan = PlanBuilder() .tableScan(rowType, {}, "c0 = 0 or c1 = 2") .capturePlanNodeId(tableScanNodeId) .planNode(); @@ -2907,14 +2895,13 @@ TEST_F(TableScanTest, interleaveLazyEager) { } auto eagerFile = TempFilePath::create(); writeToFile(eagerFile->path, rowsWithNulls); - auto tableHandle = makeTableHandle( - SubfieldFiltersBuilder().add("c0.c0", isNotNull()).build()); + ColumnHandleMap assignments = {{"c0", regularColumn("c0", column->type())}}; CursorParameters params; params.planNode = PlanBuilder() .startTableScan() .outputType(rowType) - .tableHandle(tableHandle) + .subfieldFilter("c0.c0 is not null") .assignments(assignments) .endTableScan() .planNode(); @@ -3694,7 +3681,6 @@ TEST_F(TableScanTest, varbinaryPartitionKey) { auto op = PlanBuilder() .startTableScan() .outputType(outputType) - .tableHandle(makeTableHandle()) .assignments(assignments) .endTableScan() .planNode(); diff --git a/velox/exec/tests/utils/PlanBuilder.h b/velox/exec/tests/utils/PlanBuilder.h index 36929671f09f..3089936c6bb3 100644 --- a/velox/exec/tests/utils/PlanBuilder.h +++ b/velox/exec/tests/utils/PlanBuilder.h @@ -192,6 +192,15 @@ class PlanBuilder { return *this; } + /// @param subfieldFilter A SQL expression for the range filter + /// to apply to an individual column. Supported filters are: column <= + /// value, column < value, column >= value, column > value, column = value, + /// column IN (v1, v2,.. vN), column < v1 OR column >= v2. + TableScanBuilder& subfieldFilter(std::string subfieldFilter) { + subfieldFilters_.emplace_back(std::move(subfieldFilter)); + return *this; + } + /// @param remainingFilter SQL expression for the additional conjunct. May /// include multiple columns and SQL functions. The remainingFilter is /// AND'ed with all the subfieldFilters.