From e803ae34962ac132907973a99d66547cc6e4e6ff Mon Sep 17 00:00:00 2001 From: Daniel Hunte Date: Thu, 23 Jan 2025 22:42:09 -0800 Subject: [PATCH] fix(fuzzer): Make Join Fuzzer fail when query fails in reference DB (#12131) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/12131 The change in D66977480 casued the Join Fuzzer to no longer throw when queries fail in the reference DB. This change returns that behavior in the Join Fuzzer. Reviewed By: kagamiori Differential Revision: D68428128 fbshipit-source-id: 1d7f29c98dde9336d8a1d220a891e85936a1ff00 --- velox/exec/fuzzer/JoinFuzzer.cpp | 16 +++++-------- velox/exec/fuzzer/ReferenceQueryRunner.h | 29 +++++++++++++++++------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/velox/exec/fuzzer/JoinFuzzer.cpp b/velox/exec/fuzzer/JoinFuzzer.cpp index ffdfcd1628a5..1e55bc7e6b36 100644 --- a/velox/exec/fuzzer/JoinFuzzer.cpp +++ b/velox/exec/fuzzer/JoinFuzzer.cpp @@ -680,12 +680,8 @@ std::optional JoinFuzzer::computeReferenceResults( } auto result = referenceQueryRunner_->execute(plan); - if (result.first) { - return result.first; - } - - LOG(INFO) << "Query not supported by or failed in the reference DB"; - return std::nullopt; + VELOX_CHECK_NE(result.second, ReferenceQueryErrorCode::kReferenceQueryFail); + return result.first; } std::vector fieldNames( @@ -1014,8 +1010,8 @@ RowVectorPtr JoinFuzzer::testCrossProduct( /*filter=*/""); const auto expected = execute(plan, /*injectSpill=*/false); - // If OOM injection is not enabled verify the results against Reference query - // runner. + // If OOM injection is not enabled verify the results against Reference + // query runner. if (!FLAGS_enable_oom_injection) { if (auto referenceResult = computeReferenceResults(plan.plan, probeInput, buildInput)) { @@ -1170,8 +1166,8 @@ void JoinFuzzer::verify(core::JoinType joinType) { const auto expected = execute(defaultPlan, /*injectSpill=*/false); - // If OOM injection is not enabled verify the results against Reference query - // runner. + // If OOM injection is not enabled verify the results against Reference + // query runner. if (!FLAGS_enable_oom_injection) { if (auto referenceResult = computeReferenceResults(defaultPlan.plan, probeInput, buildInput)) { diff --git a/velox/exec/fuzzer/ReferenceQueryRunner.h b/velox/exec/fuzzer/ReferenceQueryRunner.h index a3942d5d3b36..7a29432a3964 100644 --- a/velox/exec/fuzzer/ReferenceQueryRunner.h +++ b/velox/exec/fuzzer/ReferenceQueryRunner.h @@ -34,6 +34,19 @@ enum ReferenceQueryErrorCode { kReferenceQueryUnsupported }; +FOLLY_ALWAYS_INLINE std::string format_as(ReferenceQueryErrorCode errorCode) { + switch (errorCode) { + case ReferenceQueryErrorCode::kSuccess: + return "kSuccess"; + case ReferenceQueryErrorCode::kReferenceQueryFail: + return "kReferenceQueryFail"; + case ReferenceQueryErrorCode::kReferenceQueryUnsupported: + return "kReferenceQueryUnsupported"; + default: + return "Unknown"; + } +} + /// Query runner that uses reference database, i.e. DuckDB, Presto, Spark. class ReferenceQueryRunner { public: @@ -43,8 +56,8 @@ class ReferenceQueryRunner { kSparkQueryRunner }; - // @param aggregatePool Used to allocate memory needed for vectors produced by - // 'execute' methods. + // @param aggregatePool Used to allocate memory needed for vectors produced + // by 'execute' methods. explicit ReferenceQueryRunner(memory::MemoryPool* aggregatePool) : aggregatePool_(aggregatePool) {} @@ -88,8 +101,8 @@ class ReferenceQueryRunner { return true; } - /// Returns whether types contained in a function signature are all supported - /// by the reference database. + /// Returns whether types contained in a function signature are all + /// supported by the reference database. virtual bool isSupported(const exec::FunctionSignature& /*signature*/) { return true; } @@ -103,10 +116,10 @@ class ReferenceQueryRunner { VELOX_UNSUPPORTED(); } - // Converts 'plan' into an SQL query and executes it. Result is returned as a - // MaterializedRowMultiset with the ReferenceQueryErrorCode::kSuccess if - // successful, or an std::nullopt with a ReferenceQueryErrorCode if the query - // fails. + // Converts 'plan' into an SQL query and executes it. Result is returned as + // a MaterializedRowMultiset with the ReferenceQueryErrorCode::kSuccess if + // successful, or an std::nullopt with a ReferenceQueryErrorCode if the + // query fails. virtual std::pair< std::optional>>, ReferenceQueryErrorCode>