From 70c9b7d8eeab954b62de5d746d996a948250ea25 Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Tue, 28 Nov 2023 21:09:34 +0530 Subject: [PATCH] Replace tableScan API with tableScanBuilder --- .../fuzzer/tests/FuzzerConnectorTest.cpp | 60 ++- velox/exec/tests/AssertQueryBuilderTest.cpp | 9 +- velox/exec/tests/AsyncConnectorTest.cpp | 5 +- velox/exec/tests/HashJoinTest.cpp | 27 +- velox/exec/tests/TableScanTest.cpp | 371 +++++++++++++----- velox/exec/tests/utils/PlanBuilder.cpp | 13 - velox/exec/tests/utils/PlanBuilder.h | 14 - 7 files changed, 337 insertions(+), 162 deletions(-) diff --git a/velox/connectors/fuzzer/tests/FuzzerConnectorTest.cpp b/velox/connectors/fuzzer/tests/FuzzerConnectorTest.cpp index ef2163a315406..5b5fe277ac595 100644 --- a/velox/connectors/fuzzer/tests/FuzzerConnectorTest.cpp +++ b/velox/connectors/fuzzer/tests/FuzzerConnectorTest.cpp @@ -31,8 +31,12 @@ TEST_F(FuzzerConnectorTest, singleSplit) { const size_t numRows = 100; auto type = ROW({BIGINT(), DOUBLE(), VARCHAR()}); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .split(makeFuzzerSplit(numRows)) @@ -43,8 +47,12 @@ TEST_F(FuzzerConnectorTest, floatingPoints) { const size_t numRows = 1000; auto type = ROW({REAL(), DOUBLE()}); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .split(makeFuzzerSplit(numRows)) @@ -59,8 +67,12 @@ TEST_F(FuzzerConnectorTest, complexTypes) { REAL(), }); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .split(makeFuzzerSplit(numRows)) @@ -72,8 +84,12 @@ TEST_F(FuzzerConnectorTest, multipleSplits) { const size_t numSplits = 10; auto type = ROW({BIGINT(), DOUBLE(), VARCHAR()}); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .splits(makeFuzzerSplits(rowsPerSplit, numSplits)) @@ -89,8 +105,12 @@ TEST_F(FuzzerConnectorTest, randomTypes) { for (size_t i = 0; i < iterations; ++i) { auto type = VectorFuzzer({}, pool()).randRowType(); - auto plan = - PlanBuilder().tableScan(type, makeFuzzerTableHandle(), {}).planNode(); + auto plan = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle()) + .endTableScan() + .planNode(); exec::test::AssertQueryBuilder(plan) .splits(makeFuzzerSplits(rowsPerSplit, numSplits)) .assertTypeAndNumRows(type, rowsPerSplit * numSplits); @@ -101,14 +121,18 @@ TEST_F(FuzzerConnectorTest, reproducible) { const size_t numRows = 100; auto type = ROW({BIGINT(), ARRAY(INTEGER()), VARCHAR()}); - auto plan1 = - PlanBuilder() - .tableScan(type, makeFuzzerTableHandle(/*fuzerSeed=*/1234), {}) - .planNode(); - auto plan2 = - PlanBuilder() - .tableScan(type, makeFuzzerTableHandle(/*fuzerSeed=*/1234), {}) - .planNode(); + auto plan1 = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle(/*fuzerSeed=*/1234)) + .endTableScan() + .planNode(); + auto plan2 = PlanBuilder() + .startTableScan() + .outputType(type) + .tableHandle(makeFuzzerTableHandle(/*fuzerSeed=*/1234)) + .endTableScan() + .planNode(); auto results1 = exec::test::AssertQueryBuilder(plan1) .split(makeFuzzerSplit(numRows)) diff --git a/velox/exec/tests/AssertQueryBuilderTest.cpp b/velox/exec/tests/AssertQueryBuilderTest.cpp index 8c0b7ceacc814..a01a467b4e4f1 100644 --- a/velox/exec/tests/AssertQueryBuilderTest.cpp +++ b/velox/exec/tests/AssertQueryBuilderTest.cpp @@ -86,10 +86,11 @@ TEST_F(AssertQueryBuilderTest, hiveSplits) { AssertQueryBuilder( PlanBuilder() - .tableScan( - ROW({"c0", "ds"}, {INTEGER(), VARCHAR()}), - makeTableHandle(), - assignments) + .startTableScan() + .outputType(ROW({"c0", "ds"}, {INTEGER(), VARCHAR()})) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(), duckDbQueryRunner_) .split(HiveConnectorSplitBuilder(file->path) diff --git a/velox/exec/tests/AsyncConnectorTest.cpp b/velox/exec/tests/AsyncConnectorTest.cpp index 30e4f8fb0dd50..6d9115495abb0 100644 --- a/velox/exec/tests/AsyncConnectorTest.cpp +++ b/velox/exec/tests/AsyncConnectorTest.cpp @@ -193,7 +193,10 @@ TEST_F(AsyncConnectorTest, basic) { auto tableHandle = std::make_shared(); core::PlanNodeId scanId; auto plan = PlanBuilder() - .tableScan(ROW({"a"}, {BIGINT()}), tableHandle, {}) + .startTableScan() + .outputType(ROW({"a"}, {BIGINT()})) + .tableHandle(tableHandle) + .endTableScan() .capturePlanNodeId(scanId) .singleAggregation({}, {"min(a)"}) .planNode(); diff --git a/velox/exec/tests/HashJoinTest.cpp b/velox/exec/tests/HashJoinTest.cpp index 79cc8ea9781d3..0851951afa614 100644 --- a/velox/exec/tests/HashJoinTest.cpp +++ b/velox/exec/tests/HashJoinTest.cpp @@ -4123,16 +4123,17 @@ TEST_F(HashJoinTest, dynamicFilters) { assignments["b"] = regularColumn("c1", BIGINT()); core::PlanNodeId probeScanId; - auto op = - PlanBuilder(planNodeIdGenerator, pool_.get()) - .tableScan( - scanOutputType, - makeTableHandle(common::test::SubfieldFiltersBuilder().build()), - assignments) - .capturePlanNodeId(probeScanId) - .hashJoin({"a"}, {"u_c0"}, buildSide, "", {"a", "b", "u_c1"}) - .project({"a", "b + 1", "b + u_c1"}) - .planNode(); + auto op = PlanBuilder(planNodeIdGenerator, pool_.get()) + .startTableScan() + .outputType(scanOutputType) + .tableHandle(makeTableHandle( + common::test::SubfieldFiltersBuilder().build())) + .assignments(assignments) + .endTableScan() + .capturePlanNodeId(probeScanId) + .hashJoin({"a"}, {"u_c0"}, buildSide, "", {"a", "b", "u_c1"}) + .project({"a", "b + 1", "b + u_c1"}) + .planNode(); HashJoinBuilder(*pool_, duckDbQueryRunner_, driverExecutor_.get()) .planNode(std::move(op)) @@ -4889,7 +4890,11 @@ TEST_F(HashJoinTest, dynamicFilterOnPartitionKey) { auto planNodeIdGenerator = std::make_shared(); auto op = PlanBuilder(planNodeIdGenerator) - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .capturePlanNodeId(probeScanId) .hashJoin( {"n1_1"}, diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index dbefb40bb711b..e9f01ecf47063 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -148,7 +148,11 @@ class TableScanTest : public virtual HiveConnectorTestBase { {"c1", regularColumn("c1", DOUBLE())}}; auto op = PlanBuilder() - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); std::string partitionValueStr = @@ -158,7 +162,11 @@ class TableScanTest : public virtual HiveConnectorTestBase { outputType = ROW({"c0", "pkey", "c1"}, {BIGINT(), partitionType, DOUBLE()}); op = PlanBuilder() - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, @@ -166,7 +174,11 @@ class TableScanTest : public virtual HiveConnectorTestBase { fmt::format("SELECT c0, {}, c1 FROM tmp", partitionValueStr)); outputType = ROW({"c0", "c1", "pkey"}, {BIGINT(), DOUBLE(), partitionType}); op = PlanBuilder() - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, @@ -177,7 +189,11 @@ class TableScanTest : public virtual HiveConnectorTestBase { assignments = {{"pkey", partitionKey("pkey", partitionType)}}; outputType = ROW({"pkey"}, {partitionType}); op = PlanBuilder() - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, split, fmt::format("SELECT {} FROM tmp", partitionValueStr)); @@ -251,19 +267,33 @@ TEST_F(TableScanTest, columnAliases) { std::unordered_map aliases = {{"a", "c0"}}; auto outputType = ROW({"a"}, {BIGINT()}); auto op = PlanBuilder(pool_.get()) - .tableScan(tableName, outputType, aliases) + .startTableScan() + .tableName(tableName) + .outputType(outputType) + .columnAliases(aliases) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp"); // Use aliased column in a range filter. op = PlanBuilder(pool_.get()) - .tableScan(tableName, outputType, aliases, {"a < 10"}) + .startTableScan() + .tableName(tableName) + .outputType(outputType) + .columnAliases(aliases) + .subfieldFilters({"a < 10"}) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp WHERE c0 <= 10"); // Use aliased column in remaining filter. op = PlanBuilder(pool_.get()) - .tableScan(tableName, outputType, aliases, {}, "a % 2 = 1") + .startTableScan() + .tableName(tableName) + .outputType(outputType) + .columnAliases(aliases) + .remainingFilter("a % 2 = 1") + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp WHERE c0 % 2 = 1"); } @@ -284,7 +314,11 @@ TEST_F(TableScanTest, partitionKeyAlias) { auto outputType = ROW({"a", "ds_alias"}, {BIGINT(), VARCHAR()}); auto op = PlanBuilder() - .tableScan(outputType, makeTableHandle(), assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery(op, split, "SELECT c0, '2021-12-02' FROM tmp"); @@ -338,20 +372,20 @@ TEST_F(TableScanTest, timestamp) { assertQuery(op, {filePath}, "SELECT c0, c1 FROM tmp"); op = PlanBuilder(pool_.get()) - .tableScan( - ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()}), - {"c1 is null"}, - "", - dataColumns) + .startTableScan() + .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) + .subfieldFilters({"c1 is null"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0, c1 FROM tmp WHERE c1 is null"); op = PlanBuilder(pool_.get()) - .tableScan( - ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()}), - {"c1 < '1970-01-01 01:30:00'::TIMESTAMP"}, - "", - dataColumns) + .startTableScan() + .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) + .subfieldFilters({"c1 < '1970-01-01 01:30:00'::TIMESTAMP"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery( op, @@ -359,21 +393,28 @@ TEST_F(TableScanTest, timestamp) { "SELECT c0, c1 FROM tmp WHERE c1 < timestamp '1970-01-01 01:30:00'"); op = PlanBuilder(pool_.get()) - .tableScan(ROW({"c0"}, {BIGINT()}), {}, "", dataColumns) + .startTableScan() + .outputType(ROW({"c0"}, {BIGINT()})) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp"); op = PlanBuilder(pool_.get()) - .tableScan(ROW({"c0"}, {BIGINT()}), {"c1 is null"}, "", dataColumns) + .startTableScan() + .outputType(ROW({"c0"}, {BIGINT()})) + .subfieldFilters({"c1 is null"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, {filePath}, "SELECT c0 FROM tmp WHERE c1 is null"); op = PlanBuilder(pool_.get()) - .tableScan( - ROW({"c0"}, {BIGINT()}), - {"c1 < timestamp'1970-01-01 01:30:00'"}, - "", - dataColumns) + .startTableScan() + .outputType(ROW({"c0"}, {BIGINT()})) + .subfieldFilters({"c1 < timestamp'1970-01-01 01:30:00'"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery( op, @@ -416,11 +457,11 @@ DEBUG_ONLY_TEST_F(TableScanTest, timeLimitInGetOutput) { auto dataColumns = ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()}); const size_t tableScanGetOutputTimeLimitMs{100}; auto plan = PlanBuilder(pool_.get()) - .tableScan( - ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()}), - {"c1 is null"}, - "", - dataColumns) + .startTableScan() + .outputType(ROW({"c0", "c1"}, {BIGINT(), TIMESTAMP()})) + .subfieldFilters({"c1 is null"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); // Ensure the getOutput is long enough to trigger the maxGetOutputTimeMs in @@ -473,7 +514,11 @@ TEST_F(TableScanTest, subfieldPruningRowType) { columnType, std::move(requiredSubfields)); auto op = PlanBuilder() - .tableScan(rowType, makeTableHandle(), assignments) + .startTableScan() + .outputType(rowType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -526,11 +571,12 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterSubfieldsMissing) { std::move(requiredSubfields)); auto op = PlanBuilder() - .tableScan( - rowType, - makeTableHandle( - SubfieldFilters{}, parseExpr("e.a is null", rowType)), - assignments) + .startTableScan() + .outputType(rowType) + .tableHandle(makeTableHandle( + SubfieldFilters{}, parseExpr("e.a is null", rowType))) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -558,12 +604,13 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterRootFieldMissing) { assignments["d"] = std::make_shared( "d", HiveColumnHandle::ColumnType::kRegular, BIGINT(), BIGINT()); auto op = PlanBuilder() - .tableScan( - ROW({{"d", BIGINT()}}), - makeTableHandle( - SubfieldFilters{}, - parseExpr("e.a is null or e.b is null", rowType)), - assignments) + .startTableScan() + .outputType(ROW({{"d", BIGINT()}})) + .tableHandle(makeTableHandle( + SubfieldFilters{}, + parseExpr("e.a is null or e.b is null", rowType))) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -622,10 +669,12 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterStruct) { } auto op = PlanBuilder() - .tableScan( - outputColumn == kNoOutput ? ROW({"d"}, {BIGINT()}) : rowType, - makeTableHandle(SubfieldFilters{}, remainingFilter), - assignments) + .startTableScan() + .outputType( + outputColumn == kNoOutput ? ROW({"d"}, {BIGINT()}) : rowType) + .tableHandle(makeTableHandle(SubfieldFilters{}, remainingFilter)) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -705,10 +754,12 @@ TEST_F(TableScanTest, subfieldPruningRemainingFilterMap) { } auto op = PlanBuilder() - .tableScan( - outputColumn == kNoOutput ? ROW({"a"}, {BIGINT()}) : rowType, - makeTableHandle(SubfieldFilters{}, remainingFilter), - assignments) + .startTableScan() + .outputType( + outputColumn == kNoOutput ? ROW({"a"}, {BIGINT()}) : rowType) + .tableHandle(makeTableHandle(SubfieldFilters{}, remainingFilter)) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -786,7 +837,11 @@ TEST_F(TableScanTest, subfieldPruningMapType) { mapType, std::move(requiredSubfields)); auto op = PlanBuilder() - .tableScan(rowType, makeTableHandle(), assignments) + .startTableScan() + .outputType(rowType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -856,7 +911,11 @@ TEST_F(TableScanTest, subfieldPruningArrayType) { arrayType, std::move(requiredSubfields)); auto op = PlanBuilder() - .tableScan(rowType, makeTableHandle(), assignments) + .startTableScan() + .outputType(rowType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); auto split = makeHiveConnectorSplit(filePath->path); auto result = AssertQueryBuilder(op).split(split).copyResults(pool()); @@ -928,31 +987,50 @@ TEST_F(TableScanTest, missingColumns) { auto outputTypeC0 = ROW({"c0"}, {BIGINT()}); auto op = PlanBuilder(pool_.get()) - .tableScan(outputType, {}, "", dataColumns) + .startTableScan() + .outputType(outputType) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT * FROM tmp"); // Use missing column in a tuple domain filter. op = PlanBuilder(pool_.get()) - .tableScan(outputType, {"c1 <= 100.1"}, "", dataColumns) + .startTableScan() + .outputType(outputType) + .subfieldFilters({"c1 <= 100.1"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT * FROM tmp WHERE c1 <= 100.1"); // Use missing column in a tuple domain filter. Select *. op = PlanBuilder(pool_.get()) - .tableScan(outputType, {"c1 <= 2000.1"}, "", dataColumns) + .startTableScan() + .outputType(outputType) + .subfieldFilters({"c1 <= 2000.1"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT * FROM tmp WHERE c1 <= 2000.1"); // Use missing column in a tuple domain filter. Select c0. op = PlanBuilder(pool_.get()) - .tableScan(outputTypeC0, {"c1 <= 3000.1"}, "", dataColumns) + .startTableScan() + .outputType(outputTypeC0) + .subfieldFilters({"c1 <= 3000.1"}) + .dataColumns(dataColumns) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT c0 FROM tmp WHERE c1 <= 3000.1"); // Use missing column in a tuple domain filter. Select count(*). op = PlanBuilder(pool_.get()) - .tableScan(ROW({}, {}), {"c1 <= 4000.1"}, "", dataColumns) + .startTableScan() + .outputType(ROW({}, {})) + .subfieldFilters({"c1 <= 4000.1"}) + .dataColumns(dataColumns) + .endTableScan() .singleAggregation({}, {"count(1)"}) .planNode(); assertQuery(op, filePaths, "SELECT count(*) FROM tmp WHERE c1 <= 4000.1"); @@ -965,14 +1043,22 @@ TEST_F(TableScanTest, missingColumns) { ColumnHandleMap assignments; assignments["c0"] = regularColumn("c0", BIGINT()); op = PlanBuilder(pool_.get()) - .tableScan(outputTypeC0, tableHandle, assignments) + .startTableScan() + .outputType(outputTypeC0) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, filePaths, "SELECT c0 FROM tmp WHERE c1 is null or c1 <= 1050.0"); // Use missing column 'c1' in 'is null' filter, while not selecting anything. op = PlanBuilder(pool_.get()) - .tableScan(ROW({}, {}), {"c1 is null"}, "", dataColumns) + .startTableScan() + .outputType(ROW({}, {})) + .subfieldFilters({"c1 is null"}) + .dataColumns(dataColumns) + .endTableScan() .singleAggregation({}, {"count(1)"}) .planNode(); assertQuery(op, filePaths, "SELECT count(*) FROM tmp WHERE c1 is null"); @@ -987,7 +1073,11 @@ TEST_F(TableScanTest, missingColumns) { tableHandle = makeTableHandle({}, nullptr, "hive_table", dataColumns); op = PlanBuilder(pool_.get()) - .tableScan(outputType, tableHandle, assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery(op, filePaths, "SELECT * FROM tmp"); @@ -1016,7 +1106,10 @@ TEST_F(TableScanTest, constDictLazy) { // Orchestrate a Const(Dict(Lazy)) by using remaining filter that passes on // exactly one row. auto op = PlanBuilder() - .tableScan(rowType, {}, "c0 % 1000 = 5") + .startTableScan() + .outputType(rowType) + .remainingFilter("c0 % 1000 = 5") + .endTableScan() .project({"c1 + 10"}) .planNode(); @@ -1024,14 +1117,20 @@ TEST_F(TableScanTest, constDictLazy) { // Orchestrate a Const(Dict(Lazy)) for a complex type (map) op = PlanBuilder() - .tableScan(rowType, {}, "c0 = 0") + .startTableScan() + .outputType(rowType) + .remainingFilter("c0 = 0") + .endTableScan() .project({"cardinality(c2)"}) .planNode(); assertQuery(op, {filePath}, "SELECT 0 FROM tmp WHERE c0 = 5"); op = PlanBuilder() - .tableScan(rowType, {}, "c0 = 2") + .startTableScan() + .outputType(rowType) + .remainingFilter("c0 = 2") + .endTableScan() .project({"cardinality(c2)"}) .planNode(); @@ -1562,7 +1661,11 @@ TEST_F(TableScanTest, statsBasedSkipping) { asRowType(rowVector->type())); return TableScanTest::assertQuery( PlanBuilder() - .tableScan(ROW({"c1"}, {INTEGER()}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({"c1"}, {INTEGER()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, query); @@ -1725,7 +1828,7 @@ TEST_F(TableScanTest, statsBasedSkippingNulls) { auto assertQuery = [&](const std::string& filter) { return TableScanTest::assertQuery( - PlanBuilder().tableScan(rowType, {filter}).planNode(), + PlanBuilder(pool_.get()).tableScan(rowType, {filter}).planNode(), filePaths, "SELECT * FROM tmp WHERE " + filter); }; @@ -2052,10 +2155,11 @@ TEST_F(TableScanTest, filterPushdown) { auto task = assertQuery( PlanBuilder() - .tableScan( - ROW({"c1", "c3", "c0"}, {BIGINT(), BOOLEAN(), TINYINT()}), - tableHandle, - assignments) + .startTableScan() + .outputType(ROW({"c1", "c3", "c0"}, {BIGINT(), BOOLEAN(), TINYINT()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, "SELECT c1, c3, c0 FROM tmp WHERE (c1 >= 0 OR c1 IS NULL) AND c3"); @@ -2070,7 +2174,11 @@ TEST_F(TableScanTest, filterPushdown) { assignments["c0"] = regularColumn("c0", TINYINT()); assertQuery( PlanBuilder() - .tableScan(ROW({"c0"}, {TINYINT()}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({"c0"}, {TINYINT()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, "SELECT c0 FROM tmp WHERE (c1 >= 0 OR c1 IS NULL) AND c3"); @@ -2079,7 +2187,11 @@ TEST_F(TableScanTest, filterPushdown) { assignments.clear(); assertQuery( PlanBuilder() - .tableScan(ROW({}, {}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({}, {})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .singleAggregation({}, {"sum(1)"}) .planNode(), filePaths, @@ -2091,7 +2203,11 @@ TEST_F(TableScanTest, filterPushdown) { tableHandle = makeTableHandle(std::move(subfieldFilters)); assertQuery( PlanBuilder() - .tableScan(ROW({}, {}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({}, {})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .singleAggregation({}, {"sum(1)"}) .planNode(), filePaths, @@ -2115,7 +2231,11 @@ TEST_F(TableScanTest, path) { auto pathValue = fmt::format("file:{}", filePath->path); auto typeWithPath = ROW({kPath, "a"}, {VARCHAR(), BIGINT()}); auto op = PlanBuilder() - .tableScan(typeWithPath, tableHandle, assignments) + .startTableScan() + .outputType(typeWithPath) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, {filePath}, fmt::format("SELECT '{}', * FROM tmp", pathValue)); @@ -2125,12 +2245,22 @@ TEST_F(TableScanTest, path) { SubfieldFilters{}, parseExpr(fmt::format("\"{}\" = '{}'", kPath, pathValue), typeWithPath)); - op = PlanBuilder().tableScan(rowType, tableHandle, assignments).planNode(); + op = PlanBuilder() + .startTableScan() + .outputType(rowType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() + .planNode(); assertQuery(op, {filePath}, "SELECT * FROM tmp"); // use $path in a filter and project it out op = PlanBuilder() - .tableScan(typeWithPath, tableHandle, assignments) + .startTableScan() + .outputType(typeWithPath) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, {filePath}, fmt::format("SELECT '{}', * FROM tmp", pathValue)); @@ -2177,7 +2307,11 @@ TEST_F(TableScanTest, bucket) { ROW({kBucket, "c0", "c1"}, {INTEGER(), INTEGER(), BIGINT()}); auto tableHandle = makeTableHandle(); auto op = PlanBuilder() - .tableScan(typeWithBucket, tableHandle, assignments) + .startTableScan() + .outputType(typeWithBucket) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); OperatorTestBase::assertQuery(op, splits, "SELECT c0, * FROM tmp"); @@ -2191,7 +2325,11 @@ TEST_F(TableScanTest, bucket) { // Filter on bucket and filter on first column should produce // identical result for each split op = PlanBuilder() - .tableScan(typeWithBucket, tableHandle, assignments) + .startTableScan() + .outputType(typeWithBucket) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery( op, @@ -2204,7 +2342,13 @@ TEST_F(TableScanTest, bucket) { hsplit = HiveConnectorSplitBuilder(filePaths[i]->path) .tableBucketNumber(bucketValue) .build(); - op = PlanBuilder().tableScan(rowTypes, tableHandle, assignments).planNode(); + op = PlanBuilder() + .startTableScan() + .outputType(rowTypes) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() + .planNode(); assertQuery( op, hsplit, @@ -2235,28 +2379,28 @@ TEST_F(TableScanTest, integerNotEqualFilter) { assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c0 != 0::TINYINT"}, {}) + .tableScan(rowType, {"c0 != 0::TINYINT"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c0 != 0"); assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c1 != 1::SMALLINT"}, {}) + .tableScan(rowType, {"c1 != 1::SMALLINT"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c1 != 1"); assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c2 != (-2)::INTEGER"}, {}) + .tableScan(rowType, {"c2 != (-2)::INTEGER"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c2 != -2"); assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c3 != 3::BIGINT"}, {}) + .tableScan(rowType, {"c3 != 3::BIGINT"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c3 != 3"); @@ -2269,14 +2413,13 @@ TEST_F(TableScanTest, floatingPointNotEqualFilter) { createDuckDbTable(vectors); auto outputType = ROW({"c4"}, {DOUBLE()}); - auto op = PlanBuilder(pool_.get()) - .tableScan(outputType, {"c4 != 0.0"}, {}) - .planNode(); + auto op = + PlanBuilder(pool_.get()).tableScan(outputType, {"c4 != 0.0"}).planNode(); assertQuery(op, {filePath}, "SELECT c4 FROM tmp WHERE c4 != 0.0"); outputType = ROW({"c3"}, {REAL()}); op = PlanBuilder(pool_.get()) - .tableScan(outputType, {"c3 != cast(0.0 as REAL)"}, {}) + .tableScan(outputType, {"c3 != cast(0.0 as REAL)"}) .planNode(); assertQuery( op, {filePath}, "SELECT c3 FROM tmp WHERE c3 != cast(0.0 as REAL)"); @@ -2309,13 +2452,13 @@ TEST_F(TableScanTest, stringNotEqualFilter) { assertQuery( PlanBuilder(pool_.get()) - .tableScan(rowType, {"c0 != 'banana'"}, {}) + .tableScan(rowType, {"c0 != 'banana'"}) .planNode(), {filePath}, "SELECT * FROM tmp WHERE c0 != 'banana'"); assertQuery( - PlanBuilder(pool_.get()).tableScan(rowType, {"c1 != ''"}, {}).planNode(), + PlanBuilder(pool_.get()).tableScan(rowType, {"c1 != ''"}).planNode(), {filePath}, "SELECT * FROM tmp WHERE c1 != ''"); } @@ -2420,7 +2563,11 @@ TEST_F(TableScanTest, remainingFilter) { makeTableHandle(SubfieldFilters{}, parseExpr("c1 > c0", rowType)); assertQuery( PlanBuilder(pool_.get()) - .tableScan(ROW({"c2"}, {DOUBLE()}), tableHandle, assignments) + .startTableScan() + .outputType(ROW({"c2"}, {DOUBLE()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, "SELECT c2 FROM tmp WHERE c1 > c0"); @@ -2434,10 +2581,11 @@ TEST_F(TableScanTest, remainingFilter) { makeTableHandle(SubfieldFilters{}, parseExpr("c1 > c0", rowType)); assertQuery( PlanBuilder(pool_.get()) - .tableScan( - ROW({"c1", "c2"}, {INTEGER(), DOUBLE()}), - tableHandle, - assignments) + .startTableScan() + .outputType(ROW({"c1", "c2"}, {INTEGER(), DOUBLE()})) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() .planNode(), filePaths, "SELECT c1, c2 FROM tmp WHERE c1 > c0"); @@ -2477,7 +2625,7 @@ TEST_F(TableScanTest, remainingFilterSkippedStrides) { } createDuckDbTable(vectors); core::PlanNodeId tableScanNodeId; - auto plan = PlanBuilder() + auto plan = PlanBuilder(pool_.get()) .tableScan(rowType, {}, "c0 = 0 or c1 = 2") .capturePlanNodeId(tableScanNodeId) .planNode(); @@ -2629,7 +2777,10 @@ TEST_F(TableScanTest, aggregationPushdown) { // Add remaining filter to scan to expose LazyVectors wrapped in Dictionary to // aggregation. op = PlanBuilder() - .tableScan(rowType_, {}, "length(c5) % 2 = 0") + .startTableScan() + .outputType(rowType_) + .remainingFilter("length(c5) % 2 = 0") + .endTableScan() .singleAggregation({"c5"}, {"max(c0)"}) .planNode(); task = assertQuery( @@ -2760,8 +2911,13 @@ TEST_F(TableScanTest, interleaveLazyEager) { SubfieldFiltersBuilder().add("c0.c0", isNotNull()).build()); ColumnHandleMap assignments = {{"c0", regularColumn("c0", column->type())}}; CursorParameters params; - params.planNode = - PlanBuilder().tableScan(rowType, tableHandle, assignments).planNode(); + params.planNode = PlanBuilder() + .startTableScan() + .outputType(rowType) + .tableHandle(tableHandle) + .assignments(assignments) + .endTableScan() + .planNode(); TaskCursor cursor(params); cursor.task()->addSplit("0", makeHiveSplit(lazyFile->path)); cursor.task()->addSplit("0", makeHiveSplit(eagerFile->path)); @@ -3182,7 +3338,10 @@ TEST_F(TableScanTest, readMissingFieldsInMap) { ROW({"m1", "a2"}, {{MAP(BIGINT(), structType), ARRAY(structType)}}); auto op = PlanBuilder() - .tableScan(rowType, {}, "", rowType) + .startTableScan() + .outputType(rowType) + .dataColumns(rowType) + .endTableScan() .project( {"m1[0].a", "m1[1].b", @@ -3290,7 +3449,10 @@ TEST_F(TableScanTest, readMissingFieldsInMap) { rowType = ROW({"i1", "a2"}, {{INTEGER(), ARRAY(structType)}}); op = PlanBuilder() - .tableScan(rowType, {}, "", rowType) + .startTableScan() + .outputType(rowType) + .dataColumns(rowType) + .endTableScan() .project({"i1"}) .planNode(); @@ -3395,7 +3557,10 @@ TEST_F(TableScanTest, readMissingFieldsWithMoreColumns) { auto op = PlanBuilder() - .tableScan(rowType, {}, "", rowType) + .startTableScan() + .outputType(rowType) + .dataColumns(rowType) + .endTableScan() .project({"st1.a", "st1.b", "st1.c", "st1.d", "i2", "d3", "b4", "c4"}) .planNode(); @@ -3527,7 +3692,11 @@ TEST_F(TableScanTest, varbinaryPartitionKey) { auto outputType = ROW({"a", "ds_alias"}, {BIGINT(), VARBINARY()}); auto op = PlanBuilder() - .tableScan(outputType, makeTableHandle(), assignments) + .startTableScan() + .outputType(outputType) + .tableHandle(makeTableHandle()) + .assignments(assignments) + .endTableScan() .planNode(); assertQuery(op, split, "SELECT c0, '2021-12-02' FROM tmp"); diff --git a/velox/exec/tests/utils/PlanBuilder.cpp b/velox/exec/tests/utils/PlanBuilder.cpp index a163bdc907e06..d8d61b5802134 100644 --- a/velox/exec/tests/utils/PlanBuilder.cpp +++ b/velox/exec/tests/utils/PlanBuilder.cpp @@ -108,19 +108,6 @@ PlanBuilder& PlanBuilder::tableScan( .endTableScan(); } -PlanBuilder& PlanBuilder::tableScan( - const RowTypePtr& outputType, - const std::shared_ptr& tableHandle, - const std::unordered_map< - std::string, - std::shared_ptr>& assignments) { - return TableScanBuilder(*this) - .outputType(outputType) - .tableHandle(tableHandle) - .assignments(assignments) - .endTableScan(); -} - PlanBuilder& PlanBuilder::tpchTableScan( tpch::Table table, std::vector&& columnNames, diff --git a/velox/exec/tests/utils/PlanBuilder.h b/velox/exec/tests/utils/PlanBuilder.h index ee7f64a546d67..72e5f3f0ad3d4 100644 --- a/velox/exec/tests/utils/PlanBuilder.h +++ b/velox/exec/tests/utils/PlanBuilder.h @@ -146,20 +146,6 @@ class PlanBuilder { const std::string& remainingFilter = "", const RowTypePtr& dataColumns = nullptr); - /// Add a TableScanNode using a connector-specific table handle and - /// assignments. Supports any connector, not just Hive connector. - /// - /// @param outputType List of column names and types to project out. Column - /// names should match the keys in the 'assignments' map. The 'assignments' - /// map may contain more columns then 'outputType' if some columns are only - /// used by pushed-down filters. - PlanBuilder& tableScan( - const RowTypePtr& outputType, - const std::shared_ptr& tableHandle, - const std::unordered_map< - std::string, - std::shared_ptr>& assignments); - /// Add a TableScanNode to scan a TPC-H table. /// /// @param tpchTableHandle The handle that specifies the target TPC-H table