Skip to content

Commit

Permalink
Implement KQL functional tests and fix issues
Browse files Browse the repository at this point in the history
  • Loading branch information
ltrk2 authored and kashwy committed Aug 26, 2023
1 parent 9806fce commit f3c52d1
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 56 deletions.
64 changes: 37 additions & 27 deletions src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
#include <Parsers/Kusto/KustoFunctions/KQLIPFunctions.h>
#include <Parsers/Kusto/KustoFunctions/KQLStringFunctions.h>
#include <Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h>
#include <Parsers/Kusto/ParserKQLDateTypeTimespan.h>
#include <Parsers/Kusto/ParserKQLOperators.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/Kusto/ParserKQLDateTypeTimespan.h>

#include <pcg_random.hpp>

Expand Down Expand Up @@ -73,40 +73,46 @@ bool IParserKQLFunction::convert(String & out, IParser::Pos & pos)
});
}

bool IParserKQLFunction::directMapping(String & out, IParser::Pos & pos, const String & ch_fn)
bool IParserKQLFunction::directMapping(
String & out, IParser::Pos & pos, const std::string_view ch_fn, const Interval & argument_count_interval)
{
std::vector<String> arguments;

String fn_name = getKQLFunctionName(pos);

const auto fn_name = getKQLFunctionName(pos);
if (fn_name.empty())
return false;

String res;
auto begin = pos;
++pos;
out.append(ch_fn.data(), ch_fn.length());
out.push_back('(');

int argument_count = 0;
const auto begin = pos;
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
String argument = getConvertedArgument(fn_name, pos);
arguments.push_back(argument);
if (pos != begin)
out.append(", ");

if (pos->type == TokenType::ClosingRoundBracket)
if (const auto argument = getOptionalArgument(fn_name, pos))
{
for (auto arg : arguments)
{
if (res.empty())
res = ch_fn + "(" + arg;
else
res = res + ", " + arg;
}
res += ")";
++argument_count;
out.append(*argument);
}

out = res;
if (pos->type == TokenType::ClosingRoundBracket)
{
if (!argument_count_interval.IsWithinBounds(argument_count))
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"{}: between {} and {} arguments are expected, but {} were provided",
fn_name,
argument_count_interval.Min(),
argument_count_interval.Max(),
argument_count);

out.push_back(')');
return true;
}
++pos;
}

out.clear();
pos = begin;
return false;
}
Expand Down Expand Up @@ -174,10 +180,13 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser:
std::optional<String>
IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state)
{
if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket)
if (const auto type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket)
return {};

++pos;
if (const auto type = pos->type; type == DB::TokenType::ClosingRoundBracket || type == DB::TokenType::ClosingSquareBracket)
return {};

if (argument_state == ArgumentState::Parsed)
return getConvertedArgument(function_name, pos);

Expand All @@ -187,7 +196,7 @@ IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParse
"Argument extraction is not implemented for {}::{}",
magic_enum::enum_type_name<ArgumentState>(),
magic_enum::enum_name(argument_state));

String expression;
std::stack<DB::TokenType> scopes;
while (!pos->isEnd() && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket)))
Expand All @@ -197,11 +206,12 @@ IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParse
else if (isClosingBracket(token_type))
{
if (scopes.empty() || determineClosingPair(scopes.top()) != token_type)
throw Exception(DB::ErrorCodes::SYNTAX_ERROR, "Unmatched token: {} when parsing {}", magic_enum::enum_name(token_type), function_name);

throw Exception(
DB::ErrorCodes::SYNTAX_ERROR, "Unmatched token: {} when parsing {}", magic_enum::enum_name(token_type), function_name);

scopes.pop();
}

expression.append(pos->begin, pos->end);
++pos;
}
Expand Down
25 changes: 23 additions & 2 deletions src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,25 @@

namespace DB
{
class Interval
{
public:
using Representation = int;

Interval(const Representation min_, const Representation max_) : max(max_), min(min_) { }

Representation Max() const { return max; }
Representation Min() const { return min; }
bool IsWithinBounds(const Representation value) const { return min <= value && value <= max; }

static constexpr auto max_bound = std::numeric_limits<Representation>::max();
static constexpr auto min_bound = std::numeric_limits<Representation>::min();

private:
Representation max = max_bound;
Representation min = min_bound;
};

class IParserKQLFunction
{
public:
Expand Down Expand Up @@ -51,11 +70,13 @@ class IParserKQLFunction

virtual bool convertImpl(String & out, IParser::Pos & pos) = 0;

static bool directMapping(String & out, IParser::Pos & pos, const String & ch_fn);
static bool directMapping(
String & out, IParser::Pos & pos, std::string_view ch_fn, const Interval & argument_count_interval = {0, Interval::max_bound});
static String generateUniqueIdentifier();
static String getArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed);
static String getConvertedArgument(const String & fn_name, IParser::Pos & pos);
static std::optional<String> getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed);
static std::optional<String>
getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed);
static String
kqlCallToExpression(std::string_view function_name, std::initializer_list<const std::string_view> params, uint32_t max_depth);
static String kqlCallToExpression(std::string_view function_name, std::span<const std::string_view> params, uint32_t max_depth);
Expand Down
54 changes: 48 additions & 6 deletions src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ String wrapInDynamic(const String & parameter)

namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}

bool ArrayConcat::convertImpl(String & out, IParser::Pos & pos)
{
Expand Down Expand Up @@ -159,8 +163,8 @@ bool ArraySlice::convertImpl(String & out, IParser::Pos & pos)
const auto end = getArgument(function_name, pos);

out = std::format(
"arraySlice({0}, plus(1, if({1} >= 0, {1}, toInt64(max2(-length({0}), {1})) + length({0}))) as offset_{3}, "
" plus(1, if({2} >= 0, {2}, toInt64(max2(-length({0}), {2})) + length({0}))) - offset_{3} + 1)",
"arraySlice({0}, plus(1, if({1} >= 0, {1}, arrayMax([-length({0}), {1}]) + length({0}))) as offset_{3}, "
" plus(1, if({2} >= 0, {2}, arrayMax([-length({0}), {2}]) + length({0}))) - offset_{3} + 1)",
array,
start,
end,
Expand Down Expand Up @@ -195,7 +199,7 @@ bool ArraySplit::convertImpl(String & out, IParser::Pos & pos)
const auto indices = getArgument(function_name, pos);

out = std::format(
"if(empty(arrayMap(x -> if(x >= 0, x, toInt64(max2(0, x + length({0})))), flatten([{1}])) as indices_{2}), [{0}], "
"if(empty(arrayMap(x -> if(x >= 0, x, arrayMax([0, x + length({0})::Int64])), flatten([{1}])) as indices_{2}), [{0}], "
"arrayConcat([arraySlice({0}, 1, indices_{2}[1])], arrayMap(i -> arraySlice({0}, indices_{2}[i] + 1, "
"if(i = length(indices_{2}), length({0})::Int64, indices_{2}[i + 1]::Int64) - indices_{2}[i]), "
"range(1, length(indices_{2}) + 1))))",
Expand Down Expand Up @@ -264,7 +268,7 @@ bool PackAll::convertImpl(String & out, IParser::Pos & pos)

bool PackArray::convertImpl(String & out, IParser::Pos & pos)
{
return directMapping(out, pos, "array");
return directMapping(out, pos, "array", {1, Interval::max_bound});
}

bool Repeat::convertImpl(String & out, IParser::Pos & pos)
Expand Down Expand Up @@ -331,10 +335,48 @@ bool TreePath::convertImpl(String & out, IParser::Pos & pos)

bool Zip::convertImpl(String & out, IParser::Pos & pos)
{
if (!directMapping(out, pos, "arrayZip"))
const auto function_name = getKQLFunctionName(pos);
if (function_name.empty())
return false;

out = std::format("arrayMap(t -> [untuple(t)], {0})", out);
const auto arguments = std::invoke(
[&function_name, &pos]
{
std::vector<String> result;
while (auto argument = getOptionalArgument(function_name, pos))
result.push_back(std::move(*argument));

return result;
});

if (const auto size = arguments.size(); size < 2 || size > 16)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Between 2 and 16 arguments are expected, but {} were provided", size);

const auto unique_identifier = generateUniqueIdentifier();
const auto resized_arguments = std::invoke(
[&arguments, &unique_identifier]
{
String lengths;
for (int i = 0; i < std::ssize(arguments); ++i)
{
lengths.append(i > 0 ? ", " : "");
lengths.append(std::format(
"length(if(match(toTypeName({0}), 'Array\\(Nullable\\(.*\\)\\)'), {0}, "
"cast({0}, concat('Array(Nullable(', extract(toTypeName({0}), 'Array\\((.*)\\)'), '))'))) as arg{1}_{2})",
arguments[i],
i,
unique_identifier));
}

auto result = std::format("arrayResize(arg0_{1}, arrayMax([{0}]) as max_length_{1}, null)", lengths, unique_identifier);
for (int i = 1; i < std::ssize(arguments); ++i)
result.append(std::format(", arrayResize(arg{0}_{1}, max_length_{1}, null)", i, unique_identifier));

return result;
});

out = std::format("arrayMap(t -> [untuple(t)], arrayZip({0}))", resized_arguments);

return true;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos)
const auto mask = getArgument(function_name, pos);
out = std::format(
"if(isNull(toIPv4OrNull({0}) as ip_{2}) or isNull(toUInt8OrNull(toString({1})) as mask_{2}), null, "
"toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{2}), toUInt8(max2(0, min2(32, assumeNotNull(mask_{2}))))), 1)))",
"toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{2}), arrayMax([0, arrayMin([32, assumeNotNull(mask_{2})])])), 1)))",
ip_address,
mask,
generateUniqueIdentifier());
Expand Down
32 changes: 16 additions & 16 deletions src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest,
},
{
"print array_rotate_left(A, B)",
"SELECT arrayMap(x -> (A[(((x + length(A)) + (B % toInt64(length(A)))) % length(A)) + 1]), range(0, length(A)))"
"SELECT arrayMap(x -> (A[moduloOrZero((x + length(A)) + moduloOrZero(B, toInt64(length(A))), length(A)) + 1]), range(0, length(A)))"
},
{
"print array_rotate_right(A, B)",
"SELECT arrayMap(x -> (A[(((x + length(A)) + ((-1 * B) % toInt64(length(A)))) % length(A)) + 1]), range(0, length(A)))"
"SELECT arrayMap(x -> (A[moduloOrZero((x + length(A)) + moduloOrZero(-1 * B, toInt64(length(A))), length(A)) + 1]), range(0, length(A)))"
},
{
"print output = array_sum(dynamic([2, 5, 3]))",
Expand Down Expand Up @@ -102,14 +102,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest,
{
"print set_union(A, B, C)",
"SELECT arrayDistinct(arrayConcat(A, B, C))"
},
{
"print zip(A, B)",
"SELECT arrayMap(t -> [untuple(t)], arrayZip(A, B))"
},
{
"print zip(A, B, C)",
"SELECT arrayMap(t -> [untuple(t)], arrayZip(A, B, C))"
}
})));

Expand All @@ -119,26 +111,34 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicRegex, ParserRegexTest,
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
{
"print array_shift_left(A, B)",
"SELECT arrayResize\\(if\\(B > 0, arraySlice\\(A, B \\+ 1\\), arrayConcat\\(arrayWithConstant\\(abs\\(B\\), fill_value_\\d+\\), A\\)\\), length\\(A\\), ifNull\\(NULL, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
R"(SELECT arrayResize\(if\(B > 0, arraySlice\(A, B \+ 1\), arrayConcat\(arrayWithConstant\(abs\(B\), fill_value_\d+\), A\)\), length\(A\), if\(\(NULL IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)*'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), NULL\) AS fill_value_\d+\))"
},
{
"print array_shift_left(A, B, C)",
"SELECT arrayResize\\(if\\(B > 0, arraySlice\\(A, B \\+ 1\\), arrayConcat\\(arrayWithConstant\\(abs\\(B\\), fill_value_\\d+\\), A\\)\\), length\\(A\\), ifNull\\(C, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
R"(SELECT arrayResize\(if\(B > 0, arraySlice\(A, B \+ 1\), arrayConcat\(arrayWithConstant\(abs\(B\), fill_value_\d+\), A\)\), length\(A\), if\(\(C IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), C\) AS fill_value_\d+\))"
},
{
"print array_shift_right(A, B)",
"SELECT arrayResize\\(if\\(\\(-1 \\* B\\) > 0, arraySlice\\(A, \\(-1 \\* B\\) \\+ 1\\), arrayConcat\\(arrayWithConstant\\(abs\\(-1 \\* B\\), fill_value_\\d+\\), A\\)\\), length\\(A\\), ifNull\\(NULL, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
R"(SELECT arrayResize\(if\(\(-1 \* B\) > 0, arraySlice\(A, \(-1 \* B\) \+ 1\), arrayConcat\(arrayWithConstant\(abs\(-1 \* B\), fill_value_\d+\), A\)\), length\(A\), if\(\(NULL IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), NULL\) AS fill_value_\d+\))"
},
{
"print array_shift_right(A, B, C)",
"SELECT arrayResize\\(if\\(\\(-1 \\* B\\) > 0, arraySlice\\(A, \\(-1 \\* B\\) \\+ 1\\), arrayConcat\\(arrayWithConstant\\(abs\\(-1 \\* B\\), fill_value_\\d+\\), A\\)\\), length\\(A\\), ifNull\\(C, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
R"(SELECT arrayResize\(if\(\(-1 \* B\) > 0, arraySlice\(A, \(-1 \* B\) \+ 1\), arrayConcat\(arrayWithConstant\(abs\(-1 \* B\), fill_value_\d+\), A\)\), length\(A\), if\(\(C IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), C\) AS fill_value_\d+\))"
},
{
"print array_slice(A, B, C)",
"SELECT arraySlice\\(A, 1 \\+ if\\(B >= 0, B, toInt64\\(max2\\(-length\\(A\\), B\\)\\) \\+ length\\(A\\)\\) AS offset_\\d+, \\(\\(1 \\+ if\\(C >= 0, C, toInt64\\(max2\\(-length\\(A\\), C\\)\\) \\+ length\\(A\\)\\)\\) - offset_\\d+\\) \\+ 1\\)"
R"(SELECT arraySlice\(A, 1 \+ if\(B >= 0, B, arrayMax\(\[-length\(A\), B\]\) \+ length\(A\)\) AS offset_\d+, \(\(1 \+ if\(C >= 0, C, arrayMax\(\[-length\(A\), C\]\) \+ length\(A\)\)\) - offset_\d+\) \+ 1\))"
},
{
"print array_split(A, B)",
"SELECT if\\(empty\\(arrayMap\\(x -> if\\(x >= 0, x, toInt64\\(max2\\(0, x \\+ length\\(A\\)\\)\\)\\), flatten\\(\\[B\\]\\)\\) AS indices_\\d+\\), \\[A\\], arrayConcat\\(\\[arraySlice\\(A, 1, indices_\\d+\\[1\\]\\)\\], arrayMap\\(i -> arraySlice\\(A, \\(indices_\\d+\\[i\\]\\) \\+ 1, if\\(i = length\\(indices_\\d+\\), CAST\\(length\\(A\\), 'Int64'\\), CAST\\(indices_\\d+\\[i \\+ 1\\], 'Int64'\\)\\) - \\(indices_\\d+\\[i\\]\\)\\), range\\(1, length\\(indices_\\d+\\) \\+ 1\\)\\)\\)\\)"
R"(SELECT if\(empty\(arrayMap\(x -> if\(x >= 0, x, arrayMax\(\[0, x \+ CAST\(length\(A\), 'Int\d+'\)\]\)\), flatten\(\[B\]\)\) AS indices_\d+\), \[A\], arrayConcat\(\[arraySlice\(A, 1, indices_\d+\[1\]\)\], arrayMap\(i -> arraySlice\(A, \(indices_\d+\[i\]\) \+ 1, if\(i = length\(indices_\d+\), CAST\(length\(A\), 'Int\d+'\), CAST\(indices_\d+\[i \+ 1\], 'Int\d+'\)\) - \(indices_\d+\[i\]\)\), range\(1, length\(indices_\d+\) \+ 1\)\)\)\))"
},
{
"print zip(A, B)",
R"(SELECT arrayMap\(t -> \[untuple\(t\)\], arrayZip\(arrayResize\(arg0_\d+, arrayMax\(\[length\(if\(match\(toTypeName\(A\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), A, CAST\(A, concat\('Array\(Nullable\(', extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)\)'\)\)\) AS arg0_\d+\), length\(if\(match\(toTypeName\(B\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), B, CAST\(B, concat\('Array\(Nullable\(', extract\(toTypeName\(B\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)\)'\)\)\) AS arg1_\d+\)\]\) AS max_length_\d+, NULL\), arrayResize\(arg1_\d+, max_length_\d+, NULL\)\)\))"
},
{
"print zip(A, B, C)",
R"(SELECT arrayMap\(t -> \[untuple\(t\)\], arrayZip\(arrayResize\(arg0_\d+, arrayMax\(\[length\(if\(match\(toTypeName\(A\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), A, CAST\(A, concat\('Array\(Nullable\(', extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)\)'\)\)\) AS arg0_\d+\), length\(if\(match\(toTypeName\(B\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), B, CAST\(B, concat\('Array\(Nullable\(', extract\(toTypeName\(B\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)\)'\)\)\) AS arg1_\d+\), length\(if\(match\(toTypeName\(C\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), C, CAST\(C, concat\('Array\(Nullable\(', extract\(toTypeName\(C\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)\)'\)\)\) AS arg2_\d+\)\]\) AS max_length_\d+, NULL\), arrayResize\(arg1_\d+, max_length_\d+, NULL\), arrayResize\(arg2_\d+, max_length_\d+, NULL\)\)\))"
}
})));
Loading

0 comments on commit f3c52d1

Please sign in to comment.