From 7773ecf88c57a5bf67f715e34e63b0550e44ccde Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 4 Mar 2024 12:28:54 +0800 Subject: [PATCH] fix bugs --- .../execution/CHHashAggregateExecTransformer.scala | 2 ++ .../GlutenClickHouseTPCHParquetSuite.scala | 13 +++++++++++++ .../local-engine/Parser/AggregateFunctionParser.h | 6 ++++-- cpp-ch/local-engine/Parser/AggregateRelParser.cpp | 2 +- cpp-ch/local-engine/Parser/TypeParser.cpp | 2 ++ .../ApproxPercentileParser.cpp | 7 ++++--- .../ApproxPercentileParser.h | 2 +- .../BloomFilterAggParser.h | 2 +- .../aggregate_function_parser/CollectListParser.h | 6 +++--- .../CommonAggregateFunctionParser.h | 2 +- .../aggregate_function_parser/CountParser.cpp | 2 +- .../Parser/aggregate_function_parser/CountParser.h | 2 +- .../aggregate_function_parser/LeadLagParser.h | 4 ++-- cpp-ch/local-engine/compile_commands.json | 1 - 14 files changed, 36 insertions(+), 17 deletions(-) delete mode 120000 cpp-ch/local-engine/compile_commands.json diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/execution/CHHashAggregateExecTransformer.scala b/backends-clickhouse/src/main/scala/io/glutenproject/execution/CHHashAggregateExecTransformer.scala index bafe2ecc96c47..11e98118da744 100644 --- a/backends-clickhouse/src/main/scala/io/glutenproject/execution/CHHashAggregateExecTransformer.scala +++ b/backends-clickhouse/src/main/scala/io/glutenproject/execution/CHHashAggregateExecTransformer.scala @@ -361,6 +361,8 @@ case class CHHashAggregateExecTransformer( case approxPercentile: ApproximatePercentile => var fields = Seq[(DataType, Boolean)]() fields = fields :+ (approxPercentile.child.dataType, approxPercentile.child.nullable) + fields = fields :+ (approxPercentile.percentageExpression.dataType, + approxPercentile.percentageExpression.nullable) (makeStructType(fields), attr.nullable) case _ => (makeStructTypeSingleOne(attr.dataType, attr.nullable), attr.nullable) diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala index 1c44c2de01917..25f17d54a48c1 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala @@ -2450,5 +2450,18 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite compareResultsAgainstVanillaSpark(select_sql, true, { _ => }) spark.sql("drop table test_tbl_4279") } + + test("aggregate function approx_percentile") { + // single percentage + val sql1 = "select l_linenumber % 10, approx_percentile(l_extendedprice, 0.5) " + + "from lineitem group by l_linenumber % 10" + runQueryAndCompare(sql1)({ _ => }) + + // multiple percentages + val sql2 = + "select l_linenumber % 10, approx_percentile(l_extendedprice, array(0.1, 0.2, 0.3)) " + + "from lineitem group by l_linenumber % 10" + runQueryAndCompare(sql2)({ _ => }) + } } // scalastyle:on line.size.limit diff --git a/cpp-ch/local-engine/Parser/AggregateFunctionParser.h b/cpp-ch/local-engine/Parser/AggregateFunctionParser.h index 2911dab1668ae..4ffa5e3a33c02 100644 --- a/cpp-ch/local-engine/Parser/AggregateFunctionParser.h +++ b/cpp-ch/local-engine/Parser/AggregateFunctionParser.h @@ -86,9 +86,11 @@ class AggregateFunctionParser /// In some special cases, different arguments size or different arguments types may refer to different /// CH function implementation. virtual String getCHFunctionName(const CommonFunctionInfo & func_info) const = 0; + /// In most cases, arguments size and types are enough to determine the CH function implementation. - /// This is only be used in SerializedPlanParser::parseNameStructure. - virtual String getCHFunctionName(const DB::DataTypes & args) const = 0; + /// It is only be used in TypeParser::buildBlockFromNamedStruct + /// Users are allowed to modify arg types to make it fit for ggregateFunctionFactory::instance().get(...) in TypeParser::buildBlockFromNamedStruct + virtual String getCHFunctionName(DB::DataTypes & args) const = 0; /// Do some preprojections for the function arguments, and return the necessary arguments for the CH function. virtual DB::ActionsDAG::NodeRawConstPtrs diff --git a/cpp-ch/local-engine/Parser/AggregateRelParser.cpp b/cpp-ch/local-engine/Parser/AggregateRelParser.cpp index e71cf9f8d2496..37e38577f1a0d 100644 --- a/cpp-ch/local-engine/Parser/AggregateRelParser.cpp +++ b/cpp-ch/local-engine/Parser/AggregateRelParser.cpp @@ -259,7 +259,7 @@ void AggregateRelParser::addMergingAggregatedStep() { AggregateDescriptions aggregate_descriptions; buildAggregateDescriptions(aggregate_descriptions); - auto settings = getContext()->getSettingsRef(); + const auto & settings = getContext()->getSettingsRef(); Aggregator::Params params( grouping_keys, aggregate_descriptions, diff --git a/cpp-ch/local-engine/Parser/TypeParser.cpp b/cpp-ch/local-engine/Parser/TypeParser.cpp index b600ff7bd4f25..f22dfa643eba7 100644 --- a/cpp-ch/local-engine/Parser/TypeParser.cpp +++ b/cpp-ch/local-engine/Parser/TypeParser.cpp @@ -268,6 +268,8 @@ DB::Block TypeParser::buildBlockFromNamedStruct(const substrait::NamedStruct & s auto tmp_ctx = DB::Context::createCopy(SerializedPlanParser::global_context); SerializedPlanParser tmp_plan_parser(tmp_ctx); auto function_parser = AggregateFunctionParserFactory::instance().get(name_parts[3], &tmp_plan_parser); + /// This may remove elements from args_types, because some of them are used to determine CH function name, but not needed for the following + /// call `AggregateFunctionFactory::instance().get` auto agg_function_name = function_parser->getCHFunctionName(args_types); auto action = NullsAction::EMPTY; ch_type = AggregateFunctionFactory::instance() diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/ApproxPercentileParser.cpp b/cpp-ch/local-engine/Parser/aggregate_function_parser/ApproxPercentileParser.cpp index 1ea490c69c466..116cd7f04b367 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/ApproxPercentileParser.cpp +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/ApproxPercentileParser.cpp @@ -68,12 +68,13 @@ String ApproxPercentileParser::getCHFunctionName(const CommonFunctionInfo & func return output_type.has_list() ? "quantilesGK" : "quantileGK"; } -String ApproxPercentileParser::getCHFunctionName(const DB::DataTypes & types) const +String ApproxPercentileParser::getCHFunctionName(DB::DataTypes & types) const { /// Always invoked during second stage - assertArgumentsSize(substrait::AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT, types.size(), 1); + assertArgumentsSize(substrait::AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT, types.size(), 2); - auto type = removeNullable(types[0]); + auto type = removeNullable(types[1]); + types.resize(1); return isArray(type) ? "quantilesGK" : "quantileGK"; } diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/ApproxPercentileParser.h b/cpp-ch/local-engine/Parser/aggregate_function_parser/ApproxPercentileParser.h index bd1019bac499e..37eae30457b42 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/ApproxPercentileParser.h +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/ApproxPercentileParser.h @@ -34,7 +34,7 @@ class ApproxPercentileParser : public AggregateFunctionParser String getName() const override { return name; } static constexpr auto name = "approx_percentile"; String getCHFunctionName(const CommonFunctionInfo & func_info) const override; - String getCHFunctionName(const DB::DataTypes & types) const override; + String getCHFunctionName(DB::DataTypes & types) const override; DB::Array parseFunctionParameters(const CommonFunctionInfo & /*func_info*/, DB::ActionsDAG::NodeRawConstPtrs & arg_nodes) const override; diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/BloomFilterAggParser.h b/cpp-ch/local-engine/Parser/aggregate_function_parser/BloomFilterAggParser.h index 6318372cec21e..2465164421f12 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/BloomFilterAggParser.h +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/BloomFilterAggParser.h @@ -29,7 +29,7 @@ class AggregateFunctionParserBloomFilterAgg : public AggregateFunctionParser String getName() const override { return name; } static constexpr auto name = "bloom_filter_agg"; String getCHFunctionName(const CommonFunctionInfo &) const override { return "groupBloomFilterState"; } - String getCHFunctionName(const DB ::DataTypes &) const override { return "groupBloomFilterState"; } + String getCHFunctionName(DB::DataTypes &) const override { return "groupBloomFilterState"; } DB::Array parseFunctionParameters(const CommonFunctionInfo & /*func_info*/, DB::ActionsDAG::NodeRawConstPtrs & arg_nodes) const override; diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/CollectListParser.h b/cpp-ch/local-engine/Parser/aggregate_function_parser/CollectListParser.h index cf9cae6ebd98c..60e1b4eaedd30 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/CollectListParser.h +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/CollectListParser.h @@ -47,7 +47,7 @@ class CollectFunctionParser : public AggregateFunctionParser throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Not implement"); } - virtual String getCHFunctionName(const DB::DataTypes &) const override + virtual String getCHFunctionName(DB::DataTypes &) const override { throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Not implement"); } @@ -79,7 +79,7 @@ class CollectListParser : public CollectFunctionParser static constexpr auto name = "collect_list"; String getName() const override { return name; } String getCHFunctionName(const CommonFunctionInfo &) const override { return "groupArray"; } - String getCHFunctionName(const DB::DataTypes &) const override { return "groupArray"; } + String getCHFunctionName(DB::DataTypes &) const override { return "groupArray"; } }; class CollectSetParser : public CollectFunctionParser @@ -90,6 +90,6 @@ class CollectSetParser : public CollectFunctionParser static constexpr auto name = "collect_set"; String getName() const override { return name; } String getCHFunctionName(const CommonFunctionInfo &) const override { return "groupUniqArray"; } - String getCHFunctionName(const DB::DataTypes &) const override { return "groupUniqArray"; } + String getCHFunctionName(DB::DataTypes &) const override { return "groupUniqArray"; } }; } diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/CommonAggregateFunctionParser.h b/cpp-ch/local-engine/Parser/aggregate_function_parser/CommonAggregateFunctionParser.h index c486e16e1c692..e21581e00e3a7 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/CommonAggregateFunctionParser.h +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/CommonAggregateFunctionParser.h @@ -31,7 +31,7 @@ namespace local_engine String getName() const override { return #substait_name; } \ static constexpr auto name = #substait_name; \ String getCHFunctionName(const CommonFunctionInfo &) const override { return #ch_name; } \ - String getCHFunctionName(const DB::DataTypes &) const override { return #ch_name; } \ + String getCHFunctionName(DB::DataTypes &) const override { return #ch_name; } \ }; \ static const AggregateFunctionParserRegister register_##cls_name = AggregateFunctionParserRegister(); diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/CountParser.cpp b/cpp-ch/local-engine/Parser/aggregate_function_parser/CountParser.cpp index 56bc233f22aad..6135546f2e0f2 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/CountParser.cpp +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/CountParser.cpp @@ -35,7 +35,7 @@ String CountParser::getCHFunctionName(const CommonFunctionInfo &) const return "count"; } -String CountParser::getCHFunctionName(const DB::DataTypes &) const +String CountParser::getCHFunctionName(DB::DataTypes &) const { return "count"; } diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/CountParser.h b/cpp-ch/local-engine/Parser/aggregate_function_parser/CountParser.h index 7f34112137a69..a561f87d940d4 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/CountParser.h +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/CountParser.h @@ -28,7 +28,7 @@ class CountParser : public AggregateFunctionParser static constexpr auto name = "count"; String getName() const override { return name; } String getCHFunctionName(const CommonFunctionInfo &) const override; - String getCHFunctionName(const DB::DataTypes &) const override; + String getCHFunctionName(DB::DataTypes &) const override; DB::ActionsDAG::NodeRawConstPtrs parseFunctionArguments( const CommonFunctionInfo & func_info, const String & ch_func_name, DB::ActionsDAGPtr & actions_dag) const override; }; diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/LeadLagParser.h b/cpp-ch/local-engine/Parser/aggregate_function_parser/LeadLagParser.h index 35afbfc475878..4fa1c1bbca139 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/LeadLagParser.h +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/LeadLagParser.h @@ -27,7 +27,7 @@ class LeadParser : public AggregateFunctionParser static constexpr auto name = "lead"; String getName() const override { return name; } String getCHFunctionName(const CommonFunctionInfo &) const override { return "leadInFrame"; } - String getCHFunctionName(const DB::DataTypes &) const override { return "leadInFrame"; } + String getCHFunctionName(DB::DataTypes &) const override { return "leadInFrame"; } DB::ActionsDAG::NodeRawConstPtrs parseFunctionArguments( const CommonFunctionInfo & func_info, const String & ch_func_name, DB::ActionsDAGPtr & actions_dag) const override; }; @@ -40,7 +40,7 @@ class LagParser : public AggregateFunctionParser static constexpr auto name = "lag"; String getName() const override { return name; } String getCHFunctionName(const CommonFunctionInfo &) const override { return "lagInFrame"; } - String getCHFunctionName(const DB::DataTypes &) const override { return "lagInFrame"; } + String getCHFunctionName(DB::DataTypes &) const override { return "lagInFrame"; } DB::ActionsDAG::NodeRawConstPtrs parseFunctionArguments( const CommonFunctionInfo & func_info, const String & ch_func_name, DB::ActionsDAGPtr & actions_dag) const override; }; diff --git a/cpp-ch/local-engine/compile_commands.json b/cpp-ch/local-engine/compile_commands.json deleted file mode 120000 index 3e5240562a619..0000000000000 --- a/cpp-ch/local-engine/compile_commands.json +++ /dev/null @@ -1 +0,0 @@ -/data1/liyang/cppproject/kyli/ClickHouse/build_gcc/compile_commands.json \ No newline at end of file