diff --git a/velox/exec/fuzzer/DuckQueryRunner.cpp b/velox/exec/fuzzer/DuckQueryRunner.cpp index 603c8f00b4f2..d6d606f6497e 100644 --- a/velox/exec/fuzzer/DuckQueryRunner.cpp +++ b/velox/exec/fuzzer/DuckQueryRunner.cpp @@ -354,7 +354,12 @@ std::optional DuckQueryRunner::toSql( return out.str(); }; - const auto& equiClausesToSql = [](auto joinNode) { + const auto filterToSql = [](core::TypedExprPtr filter) { + auto call = std::dynamic_pointer_cast(filter); + return toCallSql(call); + }; + + const auto& joinConditionAsSql = [&](auto joinNode) { std::stringstream out; for (auto i = 0; i < joinNode->leftKeys().size(); ++i) { if (i > 0) { @@ -363,6 +368,9 @@ std::optional DuckQueryRunner::toSql( out << joinNode->leftKeys()[i]->name() << " = " << joinNode->rightKeys()[i]->name(); } + if (joinNode->filter()) { + out << " AND " << filterToSql(joinNode->filter()); + } return out.str(); }; @@ -378,39 +386,56 @@ std::optional DuckQueryRunner::toSql( switch (joinNode->joinType()) { case core::JoinType::kInner: - sql << " FROM t INNER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t INNER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeft: - sql << " FROM t LEFT JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t LEFT JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kFull: - sql << " FROM t FULL OUTER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t FULL OUTER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeftSemiFilter: + // Multiple columns returned by a scalar subquery is not supported in + // DuckDB. A scalar subquery expression is a subquery that returns one + // result row from exactly one column for every input row. if (joinNode->leftKeys().size() > 1) { return std::nullopt; } sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; break; case core::JoinType::kLeftSemiProject: if (joinNode->isNullAware()) { sql << ", " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " - << joinKeysToSql(joinNode->rightKeys()) << " FROM u) FROM t"; + << joinKeysToSql(joinNode->rightKeys()) << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ") FROM t"; } else { - sql << ", EXISTS (SELECT * FROM u WHERE " << equiClausesToSql(joinNode) - << ") FROM t"; + sql << ", EXISTS (SELECT * FROM u WHERE " + << joinConditionAsSql(joinNode); + sql << ") FROM t"; } break; case core::JoinType::kAnti: if (joinNode->isNullAware()) { sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) << " NOT IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; } else { sql << " FROM t WHERE NOT EXISTS (SELECT * FROM u WHERE " - << equiClausesToSql(joinNode) << ")"; + << joinConditionAsSql(joinNode); + sql << ")"; } break; default: @@ -424,6 +449,7 @@ std::optional DuckQueryRunner::toSql( std::optional DuckQueryRunner::toSql( const std::shared_ptr& joinNode) { std::stringstream sql; + sql << "SELECT " << folly::join(", ", joinNode->outputType()->names()); // Nested loop join without filter. VELOX_CHECK( diff --git a/velox/exec/fuzzer/PrestoQueryRunner.cpp b/velox/exec/fuzzer/PrestoQueryRunner.cpp index 37f31869541d..c8bba9cdb64d 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.cpp +++ b/velox/exec/fuzzer/PrestoQueryRunner.cpp @@ -575,7 +575,12 @@ std::optional PrestoQueryRunner::toSql( return out.str(); }; - const auto equiClausesToSql = [](auto joinNode) { + const auto filterToSql = [](core::TypedExprPtr filter) { + auto call = std::dynamic_pointer_cast(filter); + return toCallSql(call); + }; + + const auto& joinConditionAsSql = [&](auto joinNode) { std::stringstream out; for (auto i = 0; i < joinNode->leftKeys().size(); ++i) { if (i > 0) { @@ -584,6 +589,9 @@ std::optional PrestoQueryRunner::toSql( out << joinNode->leftKeys()[i]->name() << " = " << joinNode->rightKeys()[i]->name(); } + if (joinNode->filter()) { + out << " AND " << filterToSql(joinNode->filter()); + } return out.str(); }; @@ -599,52 +607,69 @@ std::optional PrestoQueryRunner::toSql( switch (joinNode->joinType()) { case core::JoinType::kInner: - sql << " FROM t INNER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t INNER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeft: - sql << " FROM t LEFT JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t LEFT JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kFull: - sql << " FROM t FULL OUTER JOIN u ON " << equiClausesToSql(joinNode); + sql << " FROM t FULL OUTER JOIN u ON " << joinConditionAsSql(joinNode); break; case core::JoinType::kLeftSemiFilter: + // Multiple columns returned by a scalar subquery is not supported in + // Presto. A scalar subquery expression is a subquery that returns one + // result row from exactly one column for every input row. if (joinNode->leftKeys().size() > 1) { return std::nullopt; } sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; break; case core::JoinType::kLeftSemiProject: if (joinNode->isNullAware()) { sql << ", " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT " - << joinKeysToSql(joinNode->rightKeys()) << " FROM u) FROM t"; + << joinKeysToSql(joinNode->rightKeys()) << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ") FROM t"; } else { - sql << ", EXISTS (SELECT * FROM u WHERE " << equiClausesToSql(joinNode) - << ") FROM t"; + sql << ", EXISTS (SELECT * FROM u WHERE " + << joinConditionAsSql(joinNode); + sql << ") FROM t"; } break; case core::JoinType::kAnti: if (joinNode->isNullAware()) { sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys()) << " NOT IN (SELECT " << joinKeysToSql(joinNode->rightKeys()) - << " FROM u)"; + << " FROM u"; + if (joinNode->filter()) { + sql << " WHERE " << filterToSql(joinNode->filter()); + } + sql << ")"; } else { sql << " FROM t WHERE NOT EXISTS (SELECT * FROM u WHERE " - << equiClausesToSql(joinNode) << ")"; + << joinConditionAsSql(joinNode); + sql << ")"; } break; default: VELOX_UNREACHABLE( "Unknown join type: {}", static_cast(joinNode->joinType())); } - return sql.str(); } std::optional PrestoQueryRunner::toSql( const std::shared_ptr& joinNode) { std::stringstream sql; + sql << "SELECT " << folly::join(", ", joinNode->outputType()->names()); // Nested loop join without filter. VELOX_CHECK(