Skip to content

Commit

Permalink
fix(fuzzer): Add filter parsing to toSql methods for hasJoinNode in R…
Browse files Browse the repository at this point in the history
…eferenceQueryRunners (#11566)

Summary:

This change updates both the DuckQueryRunner and PrestoQueryRunner to parse filters in their hasJoinNode toSql methods.

Reviewed By: kagamiori

Differential Revision: D66021799
  • Loading branch information
Daniel Hunte authored and facebook-github-bot committed Dec 16, 2024
1 parent 2c9a42e commit 3103be3
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 21 deletions.
45 changes: 35 additions & 10 deletions velox/exec/fuzzer/DuckQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,12 @@ std::optional<std::string> DuckQueryRunner::toSql(
return out.str();
};

const auto& equiClausesToSql = [](auto joinNode) {
const auto filterToSql = [](core::TypedExprPtr filter) {
auto call = std::dynamic_pointer_cast<const core::CallTypedExpr>(filter);
return toCallSql(call);
};

const auto& joinConditionAsSql = [&](auto joinNode) {
std::stringstream out;
for (auto i = 0; i < joinNode->leftKeys().size(); ++i) {
if (i > 0) {
Expand All @@ -363,6 +368,9 @@ std::optional<std::string> DuckQueryRunner::toSql(
out << joinNode->leftKeys()[i]->name() << " = "
<< joinNode->rightKeys()[i]->name();
}
if (joinNode->filter()) {
out << " AND " << filterToSql(joinNode->filter());
}
return out.str();
};

Expand All @@ -378,39 +386,56 @@ std::optional<std::string> DuckQueryRunner::toSql(

switch (joinNode->joinType()) {
case core::JoinType::kInner:
sql << " FROM t INNER JOIN u ON " << equiClausesToSql(joinNode);
sql << " FROM t INNER JOIN u ON " << joinConditionAsSql(joinNode);
break;
case core::JoinType::kLeft:
sql << " FROM t LEFT JOIN u ON " << equiClausesToSql(joinNode);
sql << " FROM t LEFT JOIN u ON " << joinConditionAsSql(joinNode);
break;
case core::JoinType::kFull:
sql << " FROM t FULL OUTER JOIN u ON " << equiClausesToSql(joinNode);
sql << " FROM t FULL OUTER JOIN u ON " << joinConditionAsSql(joinNode);
break;
case core::JoinType::kLeftSemiFilter:
// Multiple columns returned by a scalar subquery is not supported in
// DuckDB. A scalar subquery expression is a subquery that returns one
// result row from exactly one column for every input row.
if (joinNode->leftKeys().size() > 1) {
return std::nullopt;
}
sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys())
<< " IN (SELECT " << joinKeysToSql(joinNode->rightKeys())
<< " FROM u)";
<< " FROM u";
if (joinNode->filter()) {
sql << " WHERE " << filterToSql(joinNode->filter());
}
sql << ")";
break;
case core::JoinType::kLeftSemiProject:
if (joinNode->isNullAware()) {
sql << ", " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT "
<< joinKeysToSql(joinNode->rightKeys()) << " FROM u) FROM t";
<< joinKeysToSql(joinNode->rightKeys()) << " FROM u";
if (joinNode->filter()) {
sql << " WHERE " << filterToSql(joinNode->filter());
}
sql << ") FROM t";
} else {
sql << ", EXISTS (SELECT * FROM u WHERE " << equiClausesToSql(joinNode)
<< ") FROM t";
sql << ", EXISTS (SELECT * FROM u WHERE "
<< joinConditionAsSql(joinNode);
sql << ") FROM t";
}
break;
case core::JoinType::kAnti:
if (joinNode->isNullAware()) {
sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys())
<< " NOT IN (SELECT " << joinKeysToSql(joinNode->rightKeys())
<< " FROM u)";
<< " FROM u";
if (joinNode->filter()) {
sql << " WHERE " << filterToSql(joinNode->filter());
}
sql << ")";
} else {
sql << " FROM t WHERE NOT EXISTS (SELECT * FROM u WHERE "
<< equiClausesToSql(joinNode) << ")";
<< joinConditionAsSql(joinNode);
sql << ")";
}
break;
default:
Expand Down
46 changes: 35 additions & 11 deletions velox/exec/fuzzer/PrestoQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,12 @@ std::optional<std::string> PrestoQueryRunner::toSql(
return out.str();
};

const auto equiClausesToSql = [](auto joinNode) {
const auto filterToSql = [](core::TypedExprPtr filter) {
auto call = std::dynamic_pointer_cast<const core::CallTypedExpr>(filter);
return toCallSql(call);
};

const auto& joinConditionAsSql = [&](auto joinNode) {
std::stringstream out;
for (auto i = 0; i < joinNode->leftKeys().size(); ++i) {
if (i > 0) {
Expand All @@ -584,6 +589,9 @@ std::optional<std::string> PrestoQueryRunner::toSql(
out << joinNode->leftKeys()[i]->name() << " = "
<< joinNode->rightKeys()[i]->name();
}
if (joinNode->filter()) {
out << " AND " << filterToSql(joinNode->filter());
}
return out.str();
};

Expand All @@ -599,46 +607,62 @@ std::optional<std::string> PrestoQueryRunner::toSql(

switch (joinNode->joinType()) {
case core::JoinType::kInner:
sql << " FROM t INNER JOIN u ON " << equiClausesToSql(joinNode);
sql << " FROM t INNER JOIN u ON " << joinConditionAsSql(joinNode);
break;
case core::JoinType::kLeft:
sql << " FROM t LEFT JOIN u ON " << equiClausesToSql(joinNode);
sql << " FROM t LEFT JOIN u ON " << joinConditionAsSql(joinNode);
break;
case core::JoinType::kFull:
sql << " FROM t FULL OUTER JOIN u ON " << equiClausesToSql(joinNode);
sql << " FROM t FULL OUTER JOIN u ON " << joinConditionAsSql(joinNode);
break;
case core::JoinType::kLeftSemiFilter:
// Multiple columns returned by a scalar subquery is not supported in
// Presto. A scalar subquery expression is a subquery that returns one
// result row from exactly one column for every input row.
if (joinNode->leftKeys().size() > 1) {
return std::nullopt;
}
sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys())
<< " IN (SELECT " << joinKeysToSql(joinNode->rightKeys())
<< " FROM u)";
<< " FROM u";
if (joinNode->filter()) {
sql << " WHERE " << filterToSql(joinNode->filter());
}
sql << ")";
break;
case core::JoinType::kLeftSemiProject:
if (joinNode->isNullAware()) {
sql << ", " << joinKeysToSql(joinNode->leftKeys()) << " IN (SELECT "
<< joinKeysToSql(joinNode->rightKeys()) << " FROM u) FROM t";
<< joinKeysToSql(joinNode->rightKeys()) << " FROM u";
if (joinNode->filter()) {
sql << " WHERE " << filterToSql(joinNode->filter());
}
sql << ") FROM t";
} else {
sql << ", EXISTS (SELECT * FROM u WHERE " << equiClausesToSql(joinNode)
<< ") FROM t";
sql << ", EXISTS (SELECT * FROM u WHERE "
<< joinConditionAsSql(joinNode);
sql << ") FROM t";
}
break;
case core::JoinType::kAnti:
if (joinNode->isNullAware()) {
sql << " FROM t WHERE " << joinKeysToSql(joinNode->leftKeys())
<< " NOT IN (SELECT " << joinKeysToSql(joinNode->rightKeys())
<< " FROM u)";
<< " FROM u";
if (joinNode->filter()) {
sql << " WHERE " << filterToSql(joinNode->filter());
}
sql << ")";
} else {
sql << " FROM t WHERE NOT EXISTS (SELECT * FROM u WHERE "
<< equiClausesToSql(joinNode) << ")";
<< joinConditionAsSql(joinNode);
sql << ")";
}
break;
default:
VELOX_UNREACHABLE(
"Unknown join type: {}", static_cast<int>(joinNode->joinType()));
}

return sql.str();
}

Expand Down

0 comments on commit 3103be3

Please sign in to comment.