From 9531f655f33d5bed58b6e277d00ad6f71cfdca00 Mon Sep 17 00:00:00 2001 From: Wei He Date: Mon, 21 Oct 2024 14:32:21 -0700 Subject: [PATCH] Fix expression fuzzer with PrestoQueryRunner (#11276) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11276 Fix the set-up of Hive connector and Dwrf reader in ExpressionFuzzerVerifier. Also fix the handling of unsupported function signaure and constant literals in PrestoQueryRunner. Reviewed By: kgpai Differential Revision: D64484741 fbshipit-source-id: e70ae269d1e9b739d31349946dcaf9fe3e7d62c2 --- velox/exec/fuzzer/PrestoQueryRunner.cpp | 20 +++++++++---------- .../fuzzer/ExpressionFuzzerVerifier.cpp | 4 ++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/velox/exec/fuzzer/PrestoQueryRunner.cpp b/velox/exec/fuzzer/PrestoQueryRunner.cpp index b2731abe9991..d1613579b265 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.cpp +++ b/velox/exec/fuzzer/PrestoQueryRunner.cpp @@ -355,12 +355,7 @@ std::optional PrestoQueryRunner::toSql( auto constant = std::dynamic_pointer_cast( projection)) { - if (constant->type()->isPrimitiveType()) { - sql << toConstantSql(constant); - } else { - // TODO: support complex-typed constant literals. - VELOX_NYI(); - } + sql << toConstantSql(constant); } else { VELOX_NYI(); } @@ -434,9 +429,11 @@ bool PrestoQueryRunner::isConstantExprSupported( // used as the type of constant literals in SQL, Presto implicitly invoke // json_parse() on it, which makes the behavior of Presto different from // Velox. Timestamp constant literals require further investigation to - // ensure Presto uses the same timezone as Velox. + // ensure Presto uses the same timezone as Velox. Interval type cannot be + // used as the type of constant literals in Presto SQL. auto& type = expr->type(); - return type->isPrimitiveType() && !type->isTimestamp() && !isJsonType(type); + return type->isPrimitiveType() && !type->isTimestamp() && + !isJsonType(type) && !type->isIntervalDayTime(); } return true; } @@ -447,11 +444,14 @@ bool PrestoQueryRunner::isSupported(const exec::FunctionSignature& signature) { // cast-to or constant literals. Hyperloglog can only be casted from varbinary // and cannot be used as the type of constant literals. Interval year to month // can only be casted from NULL and cannot be used as the type of constant - // literals. + // literals. Json requires special handling, because Presto requires Json + // literals to be valid Json strings, and doesn't allow creation of Json-typed + // HIVE columns. return !( usesTypeName(signature, "interval year to month") || usesTypeName(signature, "hugeint") || - usesTypeName(signature, "hyperloglog")); + usesTypeName(signature, "hyperloglog") || + usesTypeName(signature, "json")); } std::optional PrestoQueryRunner::toSql( diff --git a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp index 764bb8620522..cad98cb0ee0e 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp @@ -23,6 +23,7 @@ #include "velox/common/base/Exceptions.h" #include "velox/common/file/FileSystems.h" #include "velox/connectors/hive/HiveConnector.h" +#include "velox/dwio/dwrf/RegisterDwrfWriter.h" #include "velox/exec/fuzzer/FuzzerUtil.h" #include "velox/expression/Expr.h" #include "velox/expression/FunctionSignature.h" @@ -109,7 +110,10 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier( referenceQueryRunner_{ options_.expressionFuzzerOptions.referenceQueryRunner} { filesystems::registerLocalFileSystem(); + connector::registerConnectorFactory( + std::make_shared()); exec::test::registerHiveConnector({}); + dwrf::registerDwrfWriterFactory(); seed(initialSeed);