Skip to content

Commit

Permalink
Implement KQL set functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ltrk2 authored and kashwy committed Aug 26, 2023
1 parent 5f89b79 commit eedb3a9
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 24 deletions.
52 changes: 52 additions & 0 deletions src/Parsers/Kusto/KQL_ReleaseNote.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,58 @@

Please note that our current implementation supports only scalars and arrays made up of elements of the same type. Support for mixed types and property bags is deferred for now, based on our understanding of the required effort and discussion with representatives of the QRadar team.

## Mathematical functions
- [isnan](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/isnanfunction)
`print isnan(double(nan)) == true`
`print isnan(4.2) == false`
`print isnan(4) == false`
`print isnan(real(+inf)) == false`

## Set functions
Please note that functions returning arrays with set semantics may return them in any particular order, which may be subject to change in the future.

- [jaccard_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/jaccard-index-function)
`print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])) == 0.75`
`print jaccard_index(dynamic([1, 2, 3]), dynamic([])) == 0`
`print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])) == 0`
`print isnan(jaccard_index(dynamic([]), dynamic([])))`
`print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])) == 0`
`print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])) == 0.75`
`print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])) == 0.25`

- [set_difference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setdifferencefunction)
`print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])) == dynamic([])`
`print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([4, 5, 6])`
`print set_difference(dynamic([4]), dynamic([1, 2, 3])) == dynamic([4])`
`print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[1] == dynamic([1, 3])`
`print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[1] == dynamic([1, 2, 3])`
`print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[1] == dynamic(['d', 's'])`
`print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[1] == dynamic(['Chewbacca', 'Han Solo'])`

- [set_has_element](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/sethaselementfunction)
`print set_has_element(dynamic(["this", "is", "an", "example"]), "example") == true`
`print set_has_element(dynamic(["this", "is", "an", "example"]), "examplee") == false`
`print set_has_element(dynamic([1, 2, 3]), 2) == true`
`print set_has_element(dynamic([1, 2, 3, 4.2]), 4) == false`

- [set_intersect](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setintersectfunction)
`print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])`
`print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])`
`print set_intersect(dynamic([4]), dynamic([1, 2, 3])) == dynamic([])`
`print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])) == dynamic([5])`
`print set_intersect(dynamic([1, 2, 3]), dynamic([])) == dynamic([])`
`print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])) == dynamic(['a'])`
`print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])) == dynamic(['Darth Vader'])`

- [set_union](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setunionfunction)
`print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])`
`print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3, 4, 5, 6])`
`print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3, 4])`
`print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[1] == dynamic([1, 2, 3, 4, 5])`
`print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[1] == dynamic([1, 2, 3])`
`print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[1] == dynamic(['a', 'd', 'f', 's'])`
`print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[1] == dynamic(['Chewbacca', 'Darth Sidious', 'Darth Vader', 'Han Solo'])`

# August 29, 2022

## **mv-expand operator**
Expand Down
8 changes: 7 additions & 1 deletion src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,13 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos)
}

String IParserKQLFunction::kqlCallToExpression(
const String & function_name, std::initializer_list<std::string_view> params, const uint32_t max_depth)
const std::string_view function_name, const std::initializer_list<const std::string_view> params, const uint32_t max_depth)
{
return kqlCallToExpression(function_name, std::span(params), max_depth);
}

String IParserKQLFunction::kqlCallToExpression(
const std::string_view function_name, const std::span<const std::string_view> params, const uint32_t max_depth)
{
const auto params_str = std::accumulate(
std::cbegin(params),
Expand Down
5 changes: 4 additions & 1 deletion src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/KustoFunctions/IParserKQLFunction.h>

#include <span>

namespace DB
{
class IParserKQLFunction
Expand Down Expand Up @@ -48,7 +50,8 @@ class IParserKQLFunction
static String getArgument(const String & function_name, DB::IParser::Pos & pos);
static String getConvertedArgument(const String & fn_name, IParser::Pos & pos);
static std::optional<String> getOptionalArgument(const String & function_name, DB::IParser::Pos & pos);
static String kqlCallToExpression(const String & function_name, std::initializer_list<std::string_view> params, uint32_t max_depth);
static String kqlCallToExpression(std::string_view function_name, std::initializer_list<const std::string_view> params, uint32_t max_depth);
static String kqlCallToExpression(std::string_view function_name, std::span<const std::string_view> params, uint32_t max_depth);
static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos);
static String getKQLFunctionName(IParser::Pos & pos);
static String ArraySortHelper(String & out, IParser::Pos & pos, bool ascending);
Expand Down
67 changes: 49 additions & 18 deletions src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@

#include <format>

namespace
{
String wrapInDynamic(const String & parameter)
{
return "dynamic(" + parameter + ")";
}
}

namespace DB
{

Expand Down Expand Up @@ -91,7 +99,7 @@ bool ArrayRotateRight::convertImpl(String & out, IParser::Pos & pos)

const auto array = getArgument(function_name, pos);
const auto count = getArgument(function_name, pos);
out = kqlCallToExpression("array_rotate_left", {"dynamic(" + array + ")", "-1 * " + count}, pos.max_depth);
out = kqlCallToExpression("array_rotate_left", {wrapInDynamic(array), "-1 * " + count}, pos.max_depth);

return true;
}
Expand All @@ -106,7 +114,7 @@ bool ArrayShiftLeft::convertImpl(String & out, IParser::Pos & pos)
const auto count = getArgument(function_name, pos);
const auto fill = getOptionalArgument(function_name, pos);
out = std::format(
"arrayResize(multiIf({1} > 0, arraySlice({0}, {1} + 1), {1} < 0, arrayConcat(arrayWithConstant(abs({1}), fill_value_{3}), {0}), {0}), "
"arrayResize(if({1} > 0, arraySlice({0}, {1} + 1), arrayConcat(arrayWithConstant(abs({1}), fill_value_{3}), {0})), "
"length({0}), ifNull({2}, if(toTypeName({0}) = 'Array(String)', defaultValueOfArgumentType({0}[1]), null)) as fill_value_{3})",
array,
count,
Expand All @@ -126,7 +134,7 @@ bool ArrayShiftRight::convertImpl(String & out, IParser::Pos & pos)
const auto count = getArgument(function_name, pos);
const auto fill = getOptionalArgument(function_name, pos);

const auto arg1 = "dynamic(" + array + ")";
const auto arg1 = wrapInDynamic(array);
const auto arg2 = "-1 * " + count;
out = kqlCallToExpression(
"array_shift_left",
Expand Down Expand Up @@ -222,9 +230,18 @@ bool BagRemoveKeys::convertImpl(String & out, IParser::Pos & pos)

bool JaccardIndex::convertImpl(String & out, IParser::Pos & pos)
{
String res = String(pos->begin, pos->end);
out = res;
return false;
const auto function_name = getKQLFunctionName(pos);
if (function_name.empty())
return false;

const auto lhs = wrapInDynamic(getArgument(function_name, pos));
const auto rhs = wrapInDynamic(getArgument(function_name, pos));
out = std::format(
"divide(length({0}), length({1}))",
kqlCallToExpression("set_intersect", {lhs, rhs}, pos.max_depth),
kqlCallToExpression("set_union", {lhs, rhs}, pos.max_depth));

return true;
}

bool Pack::convertImpl(String & out, IParser::Pos & pos)
Expand Down Expand Up @@ -261,30 +278,44 @@ bool Repeat::convertImpl(String & out, IParser::Pos & pos)

bool SetDifference::convertImpl(String & out, IParser::Pos & pos)
{
String res = String(pos->begin, pos->end);
out = res;
return false;
const auto function_name = getKQLFunctionName(pos);
if (function_name.empty())
return false;

const auto lhs = getArgument(function_name, pos);
const auto rhs = std::invoke(
[&function_name, &pos]
{
std::vector<String> arrays{wrapInDynamic(getArgument(function_name, pos))};
while (auto next_array = getOptionalArgument(function_name, pos))
arrays.push_back(wrapInDynamic(*next_array));

return kqlCallToExpression("set_union", std::vector<std::string_view>(arrays.cbegin(), arrays.cend()), pos.max_depth);
});

out = std::format("arrayFilter(x -> not has({1}, x), arrayDistinct({0}))", lhs, rhs);

return true;
}

bool SetHasElement::convertImpl(String & out, IParser::Pos & pos)
{
String res = String(pos->begin, pos->end);
out = res;
return false;
return directMapping(out, pos, "has");
}

bool SetIntersect::convertImpl(String & out, IParser::Pos & pos)
{
String res = String(pos->begin, pos->end);
out = res;
return false;
return directMapping(out, pos, "arrayIntersect");
}

bool SetUnion::convertImpl(String & out, IParser::Pos & pos)
{
String res = String(pos->begin, pos->end);
out = res;
return false;
if (!directMapping(out, pos, "arrayConcat"))
return false;

out = std::format("arrayDistinct({0})", out);

return true;
}

bool TreePath::convertImpl(String & out, IParser::Pos & pos)
Expand Down
5 changes: 5 additions & 0 deletions src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h>
#include <Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h>
#include <Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h>
#include <Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h>

namespace DB
{
Expand Down Expand Up @@ -65,6 +66,7 @@ namespace DB
{"has_any_index", KQLFunctionValue::has_any_index},
{"indexof", KQLFunctionValue::indexof},
{"isempty", KQLFunctionValue::isempty},
{"isnan", KQLFunctionValue::isnan},
{"isnotempty", KQLFunctionValue::isnotempty},
{"notempty", KQLFunctionValue::isnotempty},
{"isnotnull", KQLFunctionValue::isnotnull},
Expand Down Expand Up @@ -370,6 +372,9 @@ std::unique_ptr<IParserKQLFunction> KQLFunctionFactory::get(String &kql_function
case KQLFunctionValue::isempty:
return std::make_unique<IsEmpty>();

case KQLFunctionValue::isnan:
return std::make_unique<IsNan>();

case KQLFunctionValue::isnotempty:
return std::make_unique<IsNotEmpty>();

Expand Down
1 change: 1 addition & 0 deletions src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ namespace DB
has_any_index,
indexof,
isempty,
isnan,
isnotempty,
isnotnull,
isnull,
Expand Down
9 changes: 9 additions & 0 deletions src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "KQLMathematicalFunctions.h"

namespace DB
{
bool IsNan::convertImpl(String & out, IParser::Pos & pos)
{
return directMapping(out, pos, "isNaN");
}
}
11 changes: 11 additions & 0 deletions src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include "IParserKQLFunction.h"

namespace DB
{
class IsNan : public IParserKQLFunction
{
protected:
const char * getName() const override { return "isnan()"; }
bool convertImpl(String & out, IParser::Pos & pos) override;
};
}
40 changes: 36 additions & 4 deletions src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest,
"print output = array_sum(dynamic([2.5, 5.5, 3]))",
"SELECT arraySum([2.5, 5.5, 3]) AS output"
},
{
"print jaccard_index(A, B)",
"SELECT length(arrayIntersect(A, B)) / length(arrayDistinct(arrayConcat(A, B)))"
},
{
"print pack_array(A, B, C, D)",
"SELECT [A, B, C, D]"
Expand All @@ -71,6 +75,34 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest,
"print repeat(A, B)",
"SELECT arrayWithConstant(B, A)"
},
{
"print set_difference(A, B)",
"SELECT arrayFilter(x -> (NOT has(arrayDistinct(arrayConcat(B)), x)), arrayDistinct(A))"
},
{
"print set_difference(A, B, C)",
"SELECT arrayFilter(x -> (NOT has(arrayDistinct(arrayConcat(B, C)), x)), arrayDistinct(A))"
},
{
"print set_has_element(A, B)",
"SELECT has(A, B)"
},
{
"print set_intersect(A, B)",
"SELECT arrayIntersect(A, B)"
},
{
"print set_intersect(A, B, C)",
"SELECT arrayIntersect(A, B, C)"
},
{
"print set_union(A, B)",
"SELECT arrayDistinct(arrayConcat(A, B))"
},
{
"print set_union(A, B, C)",
"SELECT arrayDistinct(arrayConcat(A, B, C))"
},
{
"print zip(A, B)",
"SELECT arrayMap(t -> [untuple(t)], arrayZip(A, B))"
Expand All @@ -87,19 +119,19 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicRegex, ParserRegexTest,
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
{
"print array_shift_left(A, B)",
"SELECT arrayResize\\(multiIf\\(B > 0, arraySlice\\(A, B \\+ 1\\), B < 0, arrayConcat\\(arrayWithConstant\\(abs\\(B\\), fill_value_\\d+\\), A\\), A\\), length\\(A\\), ifNull\\(NULL, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
"SELECT arrayResize\\(if\\(B > 0, arraySlice\\(A, B \\+ 1\\), arrayConcat\\(arrayWithConstant\\(abs\\(B\\), fill_value_\\d+\\), A\\)\\), length\\(A\\), ifNull\\(NULL, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
},
{
"print array_shift_left(A, B, C)",
"SELECT arrayResize\\(multiIf\\(B > 0, arraySlice\\(A, B \\+ 1\\), B < 0, arrayConcat\\(arrayWithConstant\\(abs\\(B\\), fill_value_\\d+\\), A\\), A\\), length\\(A\\), ifNull\\(C, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
"SELECT arrayResize\\(if\\(B > 0, arraySlice\\(A, B \\+ 1\\), arrayConcat\\(arrayWithConstant\\(abs\\(B\\), fill_value_\\d+\\), A\\)\\), length\\(A\\), ifNull\\(C, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
},
{
"print array_shift_right(A, B)",
"SELECT arrayResize\\(multiIf\\(\\(-1 \\* B\\) > 0, arraySlice\\(A, \\(-1 \\* B\\) \\+ 1\\), \\(-1 \\* B\\) < 0, arrayConcat\\(arrayWithConstant\\(abs\\(-1 \\* B\\), fill_value_\\d+\\), A\\), A\\), length\\(A\\), ifNull\\(NULL, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
"SELECT arrayResize\\(if\\(\\(-1 \\* B\\) > 0, arraySlice\\(A, \\(-1 \\* B\\) \\+ 1\\), arrayConcat\\(arrayWithConstant\\(abs\\(-1 \\* B\\), fill_value_\\d+\\), A\\)\\), length\\(A\\), ifNull\\(NULL, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
},
{
"print array_shift_right(A, B, C)",
"SELECT arrayResize\\(multiIf\\(\\(-1 \\* B\\) > 0, arraySlice\\(A, \\(-1 \\* B\\) \\+ 1\\), \\(-1 \\* B\\) < 0, arrayConcat\\(arrayWithConstant\\(abs\\(-1 \\* B\\), fill_value_\\d+\\), A\\), A\\), length\\(A\\), ifNull\\(C, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
"SELECT arrayResize\\(if\\(\\(-1 \\* B\\) > 0, arraySlice\\(A, \\(-1 \\* B\\) \\+ 1\\), arrayConcat\\(arrayWithConstant\\(abs\\(-1 \\* B\\), fill_value_\\d+\\), A\\)\\), length\\(A\\), ifNull\\(C, if\\(toTypeName\\(A\\) = 'Array\\(String\\)', defaultValueOfArgumentType\\(A\\[1\\]\\), NULL\\)\\) AS fill_value_\\d+\\)"
},
{
"print array_slice(A, B, C)",
Expand Down

0 comments on commit eedb3a9

Please sign in to comment.