Skip to content

Commit

Permalink
Implement raw argument extraction to support nesting
Browse files Browse the repository at this point in the history
  • Loading branch information
ltrk2 authored and kashwy committed Aug 26, 2023
1 parent 61877a4 commit 4a4d494
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 13 deletions.
73 changes: 65 additions & 8 deletions src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,42 @@

#include <format>

namespace DB
{
namespace ErrorCodes
namespace DB::ErrorCodes
{
extern const int SYNTAX_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SYNTAX_ERROR;
}

namespace
{
constexpr DB::TokenType determineClosingPair(const DB::TokenType token_type)
{
if (token_type == DB::TokenType::OpeningCurlyBrace)
return DB::TokenType::ClosingCurlyBrace;
else if (token_type == DB::TokenType::OpeningRoundBracket)
return DB::TokenType::ClosingRoundBracket;
else if (token_type == DB::TokenType::OpeningSquareBracket)
return DB::TokenType::ClosingSquareBracket;

throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Unhandled token: {}", magic_enum::enum_name(token_type));
}

constexpr bool isClosingBracket(const DB::TokenType token_type)
{
return token_type == DB::TokenType::ClosingCurlyBrace || token_type == DB::TokenType::ClosingRoundBracket
|| token_type == DB::TokenType::ClosingSquareBracket;
}

constexpr bool isOpeningBracket(const DB::TokenType token_type)
{
return token_type == DB::TokenType::OpeningCurlyBrace || token_type == DB::TokenType::OpeningRoundBracket
|| token_type == DB::TokenType::OpeningSquareBracket;
}
}

namespace DB
{
bool IParserKQLFunction::convert(String & out, IParser::Pos & pos)
{
return wrapConvertImpl(
Expand Down Expand Up @@ -88,9 +116,9 @@ String IParserKQLFunction::generateUniqueIdentifier()
return std::to_string(unique_random_generator());
}

String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos)
String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state)
{
if (auto optionalArgument = getOptionalArgument(function_name, pos))
if (auto optionalArgument = getOptionalArgument(function_name, pos, argument_state))
return std::move(*optionalArgument);

throw Exception(std::format("Required argument was not provided in {}", function_name), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
Expand Down Expand Up @@ -142,13 +170,42 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser:
return converted_arg;
}

std::optional<String> IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos)
std::optional<String>
IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state)
{
if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket)
return {};

++pos;
return getConvertedArgument(function_name, pos);
if (argument_state == ArgumentState::Parsed)
return getConvertedArgument(function_name, pos);

if (argument_state != ArgumentState::Raw)
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Argument extraction is not implemented for {}::{}",
magic_enum::enum_type_name<ArgumentState>(),
magic_enum::enum_name(argument_state));

String expression;
std::vector<DB::TokenType> scopes;
while (!pos->isEnd() && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket)))
{
if (const auto token_type = pos->type; isOpeningBracket(token_type))
scopes.push_back(token_type);
else if (isClosingBracket(token_type))
{
if (scopes.empty() || determineClosingPair(scopes.back()) != token_type)
throw Exception(DB::ErrorCodes::SYNTAX_ERROR, "Unmatched token: {} when parsing {}", magic_enum::enum_name(token_type), function_name);

scopes.pop_back();
}

expression.append(pos->begin, pos->end);
++pos;
}

return expression;
}

String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos)
Expand Down
13 changes: 10 additions & 3 deletions src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,21 @@ class IParserKQLFunction
static String getExpression(IParser::Pos & pos);

protected:
enum class ArgumentState
{
Parsed,
Raw
};

virtual bool convertImpl(String & out, IParser::Pos & pos) = 0;

static bool directMapping(String & out, IParser::Pos & pos, const String & ch_fn);
static String generateUniqueIdentifier();
static String getArgument(const String & function_name, DB::IParser::Pos & pos);
static String getArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed);
static String getConvertedArgument(const String & fn_name, IParser::Pos & pos);
static std::optional<String> getOptionalArgument(const String & function_name, DB::IParser::Pos & pos);
static String kqlCallToExpression(std::string_view function_name, std::initializer_list<const std::string_view> params, uint32_t max_depth);
static std::optional<String> getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed);
static String
kqlCallToExpression(std::string_view function_name, std::initializer_list<const std::string_view> params, uint32_t max_depth);
static String kqlCallToExpression(std::string_view function_name, std::span<const std::string_view> params, uint32_t max_depth);
static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos);
static String getKQLFunctionName(IParser::Pos & pos);
Expand Down
4 changes: 2 additions & 2 deletions src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,8 @@ bool Trim::convertImpl(String & out, IParser::Pos & pos)
if (fn_name.empty())
return false;

const auto regex = getArgument(fn_name, pos);
const auto source = getArgument(fn_name, pos);
const auto regex = getArgument(fn_name, pos, ArgumentState::Raw);
const auto source = getArgument(fn_name, pos, ArgumentState::Raw);
out = kqlCallToExpression("trim_start", {regex, std::format("trim_end({0}, {1})", regex, source)}, pos.max_depth);

return true;
Expand Down
9 changes: 9 additions & 0 deletions tests/queries/0_stateless/02366_kql_func_string.reference
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,21 @@ kusto xxx
-- trim()
https://www.ibm.com
Te st1
asd
asd
sd
-- trim_start()
www.ibm.com
Te st1// $
asdw

asd
-- trim_end()
https
- Te st1
wasd

asd
-- replace_regex
Number was: 1
-- has_any_index()
Expand Down
9 changes: 9 additions & 0 deletions tests/queries/0_stateless/02366_kql_func_string.sql
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,21 @@ print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translat
print '-- trim()';
print trim("--", "--https://www.ibm.com--");
print trim("[^\w]+", strcat("- ","Te st", "1", "// $"));
print trim("", " asd ");
print trim("a$", "asd");
print trim("^a", "asd");
print '-- trim_start()';
print trim_start("https://", "https://www.ibm.com");
print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $"));
print trim_start("asd$", "asdw");
print trim_start("asd$", "asd");
print trim_start("d$", "asd");
print '-- trim_end()';
print trim_end("://www.ibm.com", "https://www.ibm.com");
print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $"));
print trim_end("^asd", "wasd");
print trim_end("^asd", "asd");
print trim_end("^a", "asd");
print '-- replace_regex';
print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1');
print '-- has_any_index()';
Expand Down

0 comments on commit 4a4d494

Please sign in to comment.