Skip to content

Commit

Permalink
Merge pull request #256 from ClibMouse/Kusto-p3-disable-extra--word-f…
Browse files Browse the repository at this point in the history
…or-sort

Kusto-phase3 : fix sort issues
  • Loading branch information
kashwy authored Mar 14, 2023
2 parents fc581bc + fa3d497 commit 5c651f3
Show file tree
Hide file tree
Showing 8 changed files with 412 additions and 60 deletions.
114 changes: 84 additions & 30 deletions src/Parsers/Kusto/ParserKQLSort.cpp
Original file line number Diff line number Diff line change
@@ -1,58 +1,112 @@
#include <format>
#include <Parsers/ASTLiteral.h>
#include <Parsers/IParserBase.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLSort.h>

namespace DB
{

namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
}

bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
bool has_dir = false;
std::vector <bool> has_directions;
String order_list_str;
ParserOrderByExpressionList order_list;
ASTPtr order_expression_list;

auto expr = getExprFromToken(pos);

Tokens tokens(expr.c_str(), expr.c_str() + expr.size());
IParser::Pos new_pos(tokens, pos.max_depth);
auto validate_column = [&](Pos & pos1, Pos & pos2)
{
if (pos2->type == TokenType::BareWord && pos1 != pos2)
throw Exception(
ErrorCodes::SYNTAX_ERROR,
"{} does not refer to any known column, table, variable or function",
String(pos2->begin, pos2->end));

auto pos_backup = new_pos;
if (!order_list.parse(pos_backup, order_expression_list, expected))
return false;
return String(pos1->begin, pos2->end);
};

while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon)
auto format_sort_expr = [&](const Pos & pos1, const Pos & pos2)
{
String tmp(new_pos->begin, new_pos->end);
if (tmp == "desc" or tmp == "asc")
has_dir = true;

if (new_pos->type == TokenType::Comma)
auto start_pos = pos1;
auto end_pos = pos2;
String column_expr, sort_dir, nulls_position;
auto tmp_pos = start_pos;
while (tmp_pos < end_pos)
{
has_directions.push_back(has_dir);
has_dir = false;
String tmp(tmp_pos->begin, tmp_pos->end);
if (tmp == "desc" || tmp == "asc")
{
if (!sort_dir.empty() || !nulls_position.empty())
throw Exception(ErrorCodes::SYNTAX_ERROR, "The incomplete fragment is unexpected");
--tmp_pos;
column_expr = validate_column(start_pos, tmp_pos);
sort_dir = tmp;
++tmp_pos;
}
if (tmp == "nulls")
{
if (!nulls_position.empty())
throw Exception(ErrorCodes::SYNTAX_ERROR, "The incomplete fragment is unexpected");
auto nulls_pos = tmp_pos;
++tmp_pos;
tmp = String(tmp_pos->begin, tmp_pos->end);
if (tmp_pos->isEnd() || (tmp != "first" && tmp != "last"))
throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid nulls position of sort operator");

nulls_position = "nulls " + tmp;
if (column_expr.empty())
{
--nulls_pos;
column_expr = validate_column(start_pos, nulls_pos);
}
}

++tmp_pos;
}
--end_pos;
if (column_expr.empty())
column_expr = validate_column(start_pos, end_pos);

++new_pos;
}
has_directions.push_back(has_dir);
if (sort_dir.empty())
sort_dir = "desc";
if (nulls_position.empty())
nulls_position = sort_dir == "desc" ? "nulls last" : "nulls first";
return std::format("{} {} {}", getExprFromToken(column_expr, pos.max_depth), sort_dir, nulls_position);
};

for (uint64_t i = 0; i < order_expression_list->children.size(); ++i)
auto paren_count = 0;
auto begin = pos;
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (!has_directions[i])
if (pos->type == TokenType::ClosingRoundBracket)
--paren_count;
if (pos->type == TokenType::OpeningRoundBracket)
++paren_count;
if (pos->type == TokenType::Comma && paren_count == 0)
{
auto *order_expr = order_expression_list->children[i]->as<ASTOrderByElement>();
order_expr->direction = -1; // default desc
if (!order_expr->nulls_direction_was_explicitly_specified)
order_expr->nulls_direction = -1;
else
order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1;
auto single_sort_expr = format_sort_expr(begin, pos);
order_list_str = order_list_str.empty() ? single_sort_expr : order_list_str + "," + single_sort_expr;
begin = pos;
++begin;
}
++pos;
}

auto single_sort_expr = format_sort_expr(begin, pos);
order_list_str = order_list_str.empty() ? single_sort_expr : order_list_str + "," + single_sort_expr;

Tokens tokens(order_list_str.c_str(), order_list_str.c_str() + order_list_str.size());
IParser::Pos new_pos(tokens, pos.max_depth);

if (!order_list.parse(new_pos, order_expression_list, expected))
return false;

node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list));
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Distinct, ParserTest,
},
{
"Customers |where Age <30 | order by Age| distinct Occupation, Education",
"SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n ORDER BY Age DESC\n)"
"SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n ORDER BY Age DESC NULLS LAST\n)"
},
{
"Customers | project a = (Age % 10) | distinct a;",
Expand Down
109 changes: 109 additions & 0 deletions src/Parsers/tests/KQL/gtest_KQL_Sort.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#include <Parsers/tests/gtest_common.h>

#include <Parsers/Kusto/ParserKQLQuery.h>

INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Sort, ParserTest,
::testing::Combine(
::testing::Values(std::make_shared<DB::ParserKQLQuery>()),
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
{
"Customers | order by FirstName",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
},
{
"Customers | order by FirstName asc",
"SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST"
},
{
"Customers | order by FirstName desc",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
},
{
"Customers | order by FirstName asc nulls first",
"SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST"
},
{
"Customers | order by FirstName asc nulls last",
"SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS LAST"
},
{
"Customers | order by FirstName desc nulls first",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST"
},
{
"Customers | order by FirstName desc nulls last",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
},
{
"Customers | order by FirstName nulls first",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST"
},
{
"Customers | order by FirstName nulls last",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
},
{
"Customers | order by FirstName, Age",
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST"
},
{
"Customers | order by FirstName asc, Age desc",
"SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS FIRST,\n Age DESC NULLS LAST"
},
{
"Customers | order by FirstName desc, Age asc",
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age ASC NULLS FIRST"
},
{
"Customers | order by FirstName asc nulls first, Age asc nulls first",
"SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS FIRST,\n Age ASC NULLS FIRST"
},
{
"Customers | order by FirstName asc nulls last, Age asc nulls last",
"SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS LAST,\n Age ASC NULLS LAST"
},
{
"Customers | order by FirstName desc nulls first, Age desc nulls first",
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS FIRST,\n Age DESC NULLS FIRST"
},
{
"Customers | order by FirstName desc nulls last, Age desc nulls last",
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST"
},
{
"Customers | order by FirstName nulls first, Age nulls first",
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS FIRST,\n Age DESC NULLS FIRST"
},
{
"Customers | order by FirstName nulls last, Age nulls last",
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST"
},
{
"Customers | order by FirstName, Age asc nulls last, LastName nulls first",
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age ASC NULLS LAST,\n LastName DESC NULLS FIRST"
},
{
"Customers | order by FirstName ASC",
"throws"
},
{
"Customers | order by FirstName DESC",
"throws"
},
{
"Customers | order by FirstName nulls",
"throws"
},
{
"Customers | order by FirstName nulls middle",
"throws"
},
{
"Customers | order by FirstName asc desc",
"throws"
},
{
"Customers | order by FirstName nulls first desc",
"throws"
}
})));
2 changes: 1 addition & 1 deletion src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_String, ParserTest,
},
{
"Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName",
"SELECT concat(ifNull(kql_tostring(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(if(toInt64(length(LastName)) <= 0, '', substr(LastName, (((2 % toInt64(length(LastName))) + toInt64(length(LastName))) % toInt64(length(LastName))) + 1))), ''), '') AS name_abbr\nFROM Customers\nORDER BY LastName DESC"
"SELECT concat(ifNull(kql_tostring(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(if(toInt64(length(LastName)) <= 0, '', substr(LastName, (((2 % toInt64(length(LastName))) + toInt64(length(LastName))) % toInt64(length(LastName))) + 1))), ''), '') AS name_abbr\nFROM Customers\nORDER BY LastName DESC NULLS LAST"
},
{
"print idx1 = indexof('abcdefg','cde')",
Expand Down
32 changes: 16 additions & 16 deletions src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopHitters, ParserTest,
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
{
"Customers | top 5 by Age",
"SELECT *\nFROM Customers\nORDER BY Age DESC\nLIMIT 5"
"SELECT *\nFROM Customers\nORDER BY Age DESC NULLS LAST\nLIMIT 5"
},
{
"Customers | top 5 by Age desc",
"SELECT *\nFROM Customers\nORDER BY Age DESC\nLIMIT 5"
"SELECT *\nFROM Customers\nORDER BY Age DESC NULLS LAST\nLIMIT 5"
},
{
"Customers | top 5 by Age asc",
"SELECT *\nFROM Customers\nORDER BY Age ASC\nLIMIT 5"
"SELECT *\nFROM Customers\nORDER BY Age ASC NULLS FIRST\nLIMIT 5"
},
{
"Customers | top 5 by FirstName desc nulls first",
Expand All @@ -28,30 +28,30 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopHitters, ParserTest,
},
{
"Customers | top 5 by Age | top 2 by FirstName",
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n ORDER BY Age DESC\n LIMIT 5\n)\nORDER BY FirstName DESC\nLIMIT 2"
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n ORDER BY Age DESC NULLS LAST\n LIMIT 5\n)\nORDER BY FirstName DESC NULLS LAST\nLIMIT 2"
},
{
"Customers| top-hitters a = 3 of Age by extra",
"SELECT *\nFROM\n(\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_sum_extra DESC\nLIMIT 3 AS a"
"Customers| top-hitters a = 3 of Age by extra",
"SELECT *\nFROM\n(\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_sum_extra DESC NULLS LAST\nLIMIT 3 AS a"
},
{
"Customers| top-hitters 3 of Age",
"SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC\nLIMIT 3"
"Customers| top-hitters 3 of Age",
"SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC NULLS LAST\nLIMIT 3"
},
{
"Customers| top-hitters 3 of Age by extra | top-hitters 2 of Age",
"SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM\n (\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n )\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC\nLIMIT 2"
"Customers| top-hitters 3 of Age by extra | top-hitters 2 of Age",
"SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM\n (\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n )\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC NULLS LAST\nLIMIT 2"
},
{
"Customers| top-hitters 3 of Age by extra | where Age > 30",
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n)\nWHERE Age > 30"
"Customers| top-hitters 3 of Age by extra | where Age > 30",
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n)\nWHERE Age > 30"
},
{
"Customers| top-hitters 3 of Age by extra | where approximate_sum_extra < 200",
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n)\nWHERE approximate_sum_extra < 200"
"Customers| top-hitters 3 of Age by extra | where approximate_sum_extra < 200",
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n)\nWHERE approximate_sum_extra < 200"
},
{
"Customers| top-hitters 3 of Age | where approximate_count_Age > 2",
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_count_Age DESC\n LIMIT 3\n)\nWHERE approximate_count_Age > 2"
"Customers| top-hitters 3 of Age | where approximate_count_Age > 2",
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_count_Age DESC NULLS LAST\n LIMIT 3\n)\nWHERE approximate_count_Age > 2"
}
})));
20 changes: 8 additions & 12 deletions src/Parsers/tests/gtest_Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,35 +273,31 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest,
},
{
"Customers | sort by FirstName desc",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC"
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
},
{
"Customers | take 3 | order by FirstName desc",
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC"
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC NULLS LAST"
},
{
"Customers | sort by FirstName asc",
"SELECT *\nFROM Customers\nORDER BY FirstName ASC"
"SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST"
},
{
"Customers | sort by FirstName",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC"
},
{
"Customers | order by LastName",
"SELECT *\nFROM Customers\nORDER BY LastName DESC"
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
},
{
"Customers | order by Age desc , FirstName asc ",
"SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC"
"Customers | order by Age desc, FirstName asc",
"SELECT *\nFROM Customers\nORDER BY\n Age DESC NULLS LAST,\n FirstName ASC NULLS FIRST"
},
{
"Customers | order by Age asc , FirstName desc",
"SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC"
"SELECT *\nFROM Customers\nORDER BY\n Age ASC NULLS FIRST,\n FirstName DESC NULLS LAST"
},
{
"Customers | sort by FirstName | order by Age ",
"SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC"
"SELECT *\nFROM Customers\nORDER BY\n Age DESC NULLS LAST,\n FirstName DESC NULLS LAST"
},
{
"Customers | sort by FirstName nulls first",
Expand Down
Loading

0 comments on commit 5c651f3

Please sign in to comment.