From fadd8b4edc3bee2da304c6a331ff7ee83961f729 Mon Sep 17 00:00:00 2001 From: Daniel Hunte Date: Thu, 21 Nov 2024 11:57:07 -0800 Subject: [PATCH] fix(fuzzer): Add filter parsing to toSql methods for hasJoinNode in ReferenceQueryRunners (#11566) Summary: This change updates both the DuckQueryRunner and PrestoQueryRunner to parse filters in their hasJoinNode toSql methods. Differential Revision: D66021799 --- velox/exec/fuzzer/DuckQueryRunner.cpp | 32 ++++++++++++++++++------- velox/exec/fuzzer/PrestoQueryRunner.cpp | 32 ++++++++++++++++++------- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/velox/exec/fuzzer/DuckQueryRunner.cpp b/velox/exec/fuzzer/DuckQueryRunner.cpp index 15b74efae88eb..36bf38d9cc06f 100644 --- a/velox/exec/fuzzer/DuckQueryRunner.cpp +++ b/velox/exec/fuzzer/DuckQueryRunner.cpp @@ -354,7 +354,12 @@ std::optional DuckQueryRunner::toSql( return out.str(); }; - const auto& equiClausesToSql = [](auto joinNode) { + const auto filterToSql = [](core::TypedExprPtr filter) { + auto call = std::dynamic_pointer_cast(filter); + return toCallSql(call); + }; + + const auto& joinConditionAsSql = [&](auto joinNode) { std::stringstream out; for (auto i = 0; i < joinNode->leftKeys().size(); ++i) { if (i > 0) { @@ -363,6 +368,9 @@ std::optional DuckQueryRunner::toSql( out << joinNode->leftKeys()[i]->name() << " = " << joinNode->rightKeys()[i]->name(); } + if (joinNode->filter()) { + out << " AND " << filterToSql(joinNode->filter()); + } return out.str(); }; @@ -378,29 +386,35 @@ std::optional DuckQueryRunner::toSql( switch (joinNode->joinType()) { case core::JoinType::kInner: - sql << " FROM t INNER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t INNER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeft: - sql << " FROM t LEFT JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t LEFT JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kFull: - sql << " FROM t FULL OUTER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t FULL OUTER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeftSemiFilter: + // Multiple columns returned by a scalar subquery is not supported in + // DuckDB. if (joinNode->leftKeys().size() > 1) { return std::nullopt; } - sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) + sql << " FROM t WHERE " << joinNode->leftKeys()[0]->name() << " IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; break; case core::JoinType::kLeftSemiProject: if (joinNode->isNullAware()) { sql << ", " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) << " FROM u) FROM t"; } else { - sql << ", EXISTS (SELECT * FROM u WHERE " << equiClausesToSql(joinNode) - << ") FROM t"; + sql << ", EXISTS (SELECT * FROM u WHERE " + << joinConditionAsSql(joinNode) << ") FROM t"; } break; case core::JoinType::kAnti: @@ -410,7 +424,7 @@ std::optional DuckQueryRunner::toSql( << " FROM u)"; } else { sql << " FROM t WHERE NOT EXISTS (SELECT * FROM u WHERE " - << equiClausesToSql(joinNode) << ")"; + << joinConditionAsSql(joinNode) << ")"; } break; default: diff --git a/velox/exec/fuzzer/PrestoQueryRunner.cpp b/velox/exec/fuzzer/PrestoQueryRunner.cpp index d1613579b265e..6f707f535db32 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.cpp +++ b/velox/exec/fuzzer/PrestoQueryRunner.cpp @@ -569,7 +569,12 @@ std::optional PrestoQueryRunner::toSql( return out.str(); }; - const auto equiClausesToSql = [](auto joinNode) { + const auto filterToSql = [](core::TypedExprPtr filter) { + auto call = std::dynamic_pointer_cast(filter); + return toCallSql(call); + }; + + const auto& joinConditionAsSql = [&](auto joinNode) { std::stringstream out; for (auto i = 0; i < joinNode->leftKeys().size(); ++i) { if (i > 0) { @@ -578,6 +583,9 @@ std::optional PrestoQueryRunner::toSql( out << joinNode->leftKeys()[i]->name() << " = " << joinNode->rightKeys()[i]->name(); } + if (joinNode->filter()) { + out << " AND " << filterToSql(joinNode->filter()); + } return out.str(); }; @@ -593,29 +601,35 @@ std::optional PrestoQueryRunner::toSql( switch (joinNode->joinType()) { case core::JoinType::kInner: - sql << " FROM t INNER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t INNER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeft: - sql << " FROM t LEFT JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t LEFT JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kFull: - sql << " FROM t FULL OUTER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t FULL OUTER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeftSemiFilter: + // Multiple columns returned by a scalar subquery is not supported in + // Presto. if (joinNode->leftKeys().size() > 1) { return std::nullopt; } - sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) + sql << " FROM t WHERE " << joinNode->leftKeys()[0]->name() << " IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; break; case core::JoinType::kLeftSemiProject: if (joinNode->isNullAware()) { sql << ", " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) << " FROM u) FROM t"; } else { - sql << ", EXISTS (SELECT * FROM u WHERE " << equiClausesToSql(joinNode) - << ") FROM t"; + sql << ", EXISTS (SELECT * FROM u WHERE " + << joinConditionAsSql(joinNode) << ") FROM t"; } break; case core::JoinType::kAnti: @@ -625,7 +639,7 @@ std::optional PrestoQueryRunner::toSql( << " FROM u)"; } else { sql << " FROM t WHERE NOT EXISTS (SELECT * FROM u WHERE " - << equiClausesToSql(joinNode) << ")"; + << joinConditionAsSql(joinNode) << ")"; } break; default: