From 567fad16e121b16775a27e775a6ddf87df4987d2 Mon Sep 17 00:00:00 2001 From: Daniel Hunte Date: Sun, 15 Dec 2024 20:40:07 -0800 Subject: [PATCH 1/2] fix(fuzzer): Add filter parsing to toSql methods for hasJoinNode in ReferenceQueryRunners (#11566) Summary: This change updates both the DuckQueryRunner and PrestoQueryRunner to parse filters in their hasJoinNode toSql methods. Reviewed By: kagamiori Differential Revision: D66021799 --- velox/exec/fuzzer/DuckQueryRunner.cpp | 45 ++++++++++++++++++------ velox/exec/fuzzer/PrestoQueryRunner.cpp | 46 +++++++++++++++++++------ 2 files changed, 70 insertions(+), 21 deletions(-) diff --git a/velox/exec/fuzzer/DuckQueryRunner.cpp b/velox/exec/fuzzer/DuckQueryRunner.cpp index 603c8f00b4f2..6fdf10613bb6 100644 --- a/velox/exec/fuzzer/DuckQueryRunner.cpp +++ b/velox/exec/fuzzer/DuckQueryRunner.cpp @@ -354,7 +354,12 @@ std::optional DuckQueryRunner::toSql( return out.str(); }; - const auto& equiClausesToSql = [](auto joinNode) { + const auto filterToSql = [](core::TypedExprPtr filter) { + auto call = std::dynamic_pointer_cast(filter); + return toCallSql(call); + }; + + const auto& joinConditionAsSql = [&](auto joinNode) { std::stringstream out; for (auto i = 0; i < joinNode->leftKeys().size(); ++i) { if (i > 0) { @@ -363,6 +368,9 @@ std::optional DuckQueryRunner::toSql( out << joinNode->leftKeys()[i]->name() << " = " << joinNode->rightKeys()[i]->name(); } + if (joinNode->filter()) { + out << " AND " << filterToSql(joinNode->filter()); + } return out.str(); }; @@ -378,39 +386,56 @@ std::optional DuckQueryRunner::toSql( switch (joinNode->joinType()) { case core::JoinType::kInner: - sql << " FROM t INNER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t INNER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeft: - sql << " FROM t LEFT JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t LEFT JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kFull: - sql << " FROM t FULL OUTER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t FULL OUTER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeftSemiFilter: + // Multiple columns returned by a scalar subquery is not supported in + // DuckDB. A scalar subquery expression is a subquery that returns one + // result row from exactly one column for every input row. if (joinNode->leftKeys().size() > 1) { return std::nullopt; } sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; break; case core::JoinType::kLeftSemiProject: if (joinNode->isNullAware()) { sql << ", " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " - << joinKeysToSql(joinNode->rightKeys()) << " FROM u) FROM t"; + << joinKeysToSql(joinNode->rightKeys()) << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ") FROM t"; } else { - sql << ", EXISTS (SELECT * FROM u WHERE " << equiClausesToSql(joinNode) - << ") FROM t"; + sql << ", EXISTS (SELECT * FROM u WHERE " + << joinConditionAsSql(joinNode); + sql << ") FROM t"; } break; case core::JoinType::kAnti: if (joinNode->isNullAware()) { sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) << " NOT IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; } else { sql << " FROM t WHERE NOT EXISTS (SELECT * FROM u WHERE " - << equiClausesToSql(joinNode) << ")"; + << joinConditionAsSql(joinNode); + sql << ")"; } break; default: diff --git a/velox/exec/fuzzer/PrestoQueryRunner.cpp b/velox/exec/fuzzer/PrestoQueryRunner.cpp index 37f31869541d..0ff5021167e2 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.cpp +++ b/velox/exec/fuzzer/PrestoQueryRunner.cpp @@ -575,7 +575,12 @@ std::optional PrestoQueryRunner::toSql( return out.str(); }; - const auto equiClausesToSql = [](auto joinNode) { + const auto filterToSql = [](core::TypedExprPtr filter) { + auto call = std::dynamic_pointer_cast(filter); + return toCallSql(call); + }; + + const auto& joinConditionAsSql = [&](auto joinNode) { std::stringstream out; for (auto i = 0; i < joinNode->leftKeys().size(); ++i) { if (i > 0) { @@ -584,6 +589,9 @@ std::optional PrestoQueryRunner::toSql( out << joinNode->leftKeys()[i]->name() << " = " << joinNode->rightKeys()[i]->name(); } + if (joinNode->filter()) { + out << " AND " << filterToSql(joinNode->filter()); + } return out.str(); }; @@ -599,46 +607,62 @@ std::optional PrestoQueryRunner::toSql( switch (joinNode->joinType()) { case core::JoinType::kInner: - sql << " FROM t INNER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t INNER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeft: - sql << " FROM t LEFT JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t LEFT JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kFull: - sql << " FROM t FULL OUTER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t FULL OUTER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeftSemiFilter: + // Multiple columns returned by a scalar subquery is not supported in + // Presto. A scalar subquery expression is a subquery that returns one + // result row from exactly one column for every input row. if (joinNode->leftKeys().size() > 1) { return std::nullopt; } sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; break; case core::JoinType::kLeftSemiProject: if (joinNode->isNullAware()) { sql << ", " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " - << joinKeysToSql(joinNode->rightKeys()) << " FROM u) FROM t"; + << joinKeysToSql(joinNode->rightKeys()) << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ") FROM t"; } else { - sql << ", EXISTS (SELECT * FROM u WHERE " << equiClausesToSql(joinNode) - << ") FROM t"; + sql << ", EXISTS (SELECT * FROM u WHERE " + << joinConditionAsSql(joinNode); + sql << ") FROM t"; } break; case core::JoinType::kAnti: if (joinNode->isNullAware()) { sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) << " NOT IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; } else { sql << " FROM t WHERE NOT EXISTS (SELECT * FROM u WHERE " - << equiClausesToSql(joinNode) << ")"; + << joinConditionAsSql(joinNode); + sql << ")"; } break; default: VELOX_UNREACHABLE( "Unknown join type: {}", static_cast(joinNode->joinType())); } - return sql.str(); } From c8a5b1fe57d5ebb317354a82b934323b4a67a347 Mon Sep 17 00:00:00 2001 From: Daniel Hunte Date: Sun, 15 Dec 2024 20:40:07 -0800 Subject: [PATCH 2/2] fix(fuzzer): Fix toSql methods for NestedLoopJoinNode in Reference Query Runners (#11576) Summary: The select clause is completely missing in the produced query string. Reviewed By: kagamiori Differential Revision: D66132514 --- velox/exec/fuzzer/DuckQueryRunner.cpp | 1 + velox/exec/fuzzer/PrestoQueryRunner.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/velox/exec/fuzzer/DuckQueryRunner.cpp b/velox/exec/fuzzer/DuckQueryRunner.cpp index 6fdf10613bb6..d6d606f6497e 100644 --- a/velox/exec/fuzzer/DuckQueryRunner.cpp +++ b/velox/exec/fuzzer/DuckQueryRunner.cpp @@ -449,6 +449,7 @@ std::optional DuckQueryRunner::toSql( std::optional DuckQueryRunner::toSql( const std::shared_ptr& joinNode) { std::stringstream sql; + sql << "SELECT " << folly::join(", ", joinNode->outputType()->names()); // Nested loop join without filter. VELOX_CHECK( diff --git a/velox/exec/fuzzer/PrestoQueryRunner.cpp b/velox/exec/fuzzer/PrestoQueryRunner.cpp index 0ff5021167e2..c8bba9cdb64d 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.cpp +++ b/velox/exec/fuzzer/PrestoQueryRunner.cpp @@ -669,6 +669,7 @@ std::optional PrestoQueryRunner::toSql( std::optional PrestoQueryRunner::toSql( const std::shared_ptr& joinNode) { std::stringstream sql; + sql << "SELECT " << folly::join(", ", joinNode->outputType()->names()); // Nested loop join without filter. VELOX_CHECK(