From a803b4833322c37397beaeafa5f905cf45310d86 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 22 Aug 2022 13:33:01 -0700 Subject: [PATCH] Kusto-phase2: add bin function, unit test for make-series --- .../KustoFunctions/KQLGeneralFunctions.cpp | 39 ++++++++++++++++--- .../tests/KQL/gtest_KQL_MakeSeries.cpp | 25 ++++++++++++ .../tests/KQL/gtest_KQL_StringFunctions.cpp | 12 ++++++ 3 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index dd79cc06898b..e1d932e9ce19 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -20,11 +20,38 @@ namespace DB { -bool Bin::convertImpl(String &out,IParser::Pos &pos) +bool Bin::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + double bin_size; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String origal_expr(pos->begin, pos->end); + String value = getConvertedArgument(fn_name, pos); + + ++pos; + String round_to = getConvertedArgument(fn_name, pos); + + auto t = std::format("toFloat64({})", value); + + bin_size = std::stod(round_to); + + if (origal_expr == "datetime" || origal_expr == "date") + { + out = std::format("toDateTime64(toInt64({0} / {1} ) * {1}, 9, 'UTC')", t, bin_size); + } + else if (origal_expr == "timespan" || origal_expr =="time" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) + { + String bin_value = std::format(" toInt64({0} / {1} ) * {1}", t, bin_size); + out = std::format("concat(toString( toInt32((({}) as x) / 3600)),':', toString( toInt32(x % 3600 / 60)),':',toString( toInt32(x % 3600 % 60)))", bin_value); + } + else + { + out = std::format("toInt64({0} / {1} ) * {1}", t, bin_size); + } + return true; } bool BinAt::convertImpl(String & out,IParser::Pos & pos) @@ -49,11 +76,11 @@ bool BinAt::convertImpl(String & out,IParser::Pos & pos) int dir = t2 >= t1 ? 0 : -1; bin_size = std::stod(bin_size_str); - if (origal_expr == "datetime" or origal_expr == "date") + if (origal_expr == "datetime" || origal_expr == "date") { out = std::format("toDateTime64({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); } - else if (origal_expr == "timespan" or origal_expr =="time" or ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) + else if (origal_expr == "timespan" || origal_expr =="time" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) { String bin_value = std::format("{} + toInt64(({} - {}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); out = std::format("concat(toString( toInt32((({}) as x) / 3600)),':', toString( toInt32(x % 3600 / 60)),':',toString( toInt32(x % 3600 % 60)))", bin_value); diff --git a/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp new file mode 100644 index 000000000000..5c94ab4665e5 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp @@ -0,0 +1,25 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MakeSeries, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM T\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga)) < 0, 0, length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(toUInt64(10), toUInt64(15), toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(10) + (toInt64((toFloat64(Purchase) - toFloat64(10)) / 1) * 1) AS Purchase_ali\n FROM T2\n WHERE (toInt64(toFloat64(Purchase)) >= toUInt64(10)) AND (toInt64(toFloat64(Purchase)) < toUInt64(15))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(86400))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 62135596800, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 62135596800, 9, 'UTC'), range(low, high, toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) + 62135596800) / 86400) * 86400) AS Purchase_ali\n FROM T\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(1))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(low, high, toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(Purchase) + 0) / 1) * 1) AS Purchase_ali\n FROM T2\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index 0ef816646a44..6fa03ef946da 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -176,6 +176,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, { "print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))", "SELECT toDateTime64(toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64('2017-05-15 10:20:00.0', 9, 'UTC')) - toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC'))) / 86400) + 0) * 86400), 9, 'UTC') AS res" + }, + { + "print bin(4.5, 1)", + "SELECT toInt64(toFloat64(4.5) / 1) * 1" + }, + { + "print bin(time(16d), 7d)", + "SELECT concat(toString(toInt32(((toInt64(toFloat64(1382400.) / 604800) * 604800) AS x) / 3600)), ':', toString(toInt32((x % 3600) / 60)), ':', toString(toInt32((x % 3600) % 60)))" + }, + { + "print bin(datetime(1970-05-11 13:45:07), 1d)", + "SELECT toDateTime64(toInt64(toFloat64(toDateTime64('1970-05-11 13:45:07', 9, 'UTC')) / 86400) * 86400, 9, 'UTC')" } })));