From 0ea7ed029fa013eb43d797c567ab38e0cac09322 Mon Sep 17 00:00:00 2001 From: Zhiyuan Liang <132966438+Ami11111@users.noreply.github.com> Date: Fri, 18 Oct 2024 21:09:34 +0800 Subject: [PATCH] Support ltrim, rtrim, trim functions (#2064) ### What problem does this PR solve? Support ltrim, rtrim, trim functions ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- example/functions.py | 32 +++++++++---- example/http/functions.sh | 53 +++++++++++++++++++++ src/function/builtin_functions.cpp | 6 +++ src/function/scalar/ltrim.cpp | 62 +++++++++++++++++++++++++ src/function/scalar/ltrim.cppm | 13 ++++++ src/function/scalar/rtrim.cpp | 62 +++++++++++++++++++++++++ src/function/scalar/rtrim.cppm | 13 ++++++ src/function/scalar/trim.cpp | 74 ++++++++++++++++++++++++++++++ src/function/scalar/trim.cppm | 13 ++++++ test/sql/dql/type/varchar.slt | 54 +++++++++++++++------- 10 files changed, 356 insertions(+), 26 deletions(-) create mode 100644 src/function/scalar/ltrim.cpp create mode 100644 src/function/scalar/ltrim.cppm create mode 100644 src/function/scalar/rtrim.cpp create mode 100644 src/function/scalar/rtrim.cppm create mode 100644 src/function/scalar/trim.cpp create mode 100644 src/function/scalar/trim.cppm diff --git a/example/functions.py b/example/functions.py index 42a4e4ef34..a8d6086be5 100644 --- a/example/functions.py +++ b/example/functions.py @@ -20,42 +20,56 @@ [{"c1": 'a', "c2": 'a'}, {"c1": 'b', "c2": 'b'}, {"c1": 'c', "c2": 'c'}, {"c1": 'd', "c2": 'd'}, {"c1": 'abc', "c2": 'abc'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'}, {"c1": 'dbc', "c2": 'dbc'}, {"c1": 'abcd', "c2": 'abc'}, - {"c1": 'test@gmail.com', "c2": 'email'}, {"c1": 'test@hotmail.com', "c2": 'email'}]) + {"c1": 'test@gmail.com', "c2": 'email'}, {"c1": 'test@hotmail.com', "c2": 'email'}, + {"c1": ' abc', "c2": 'abc'}, {"c1": 'abc ', "c2": 'abc'}, {"c1": ' abc ', "c2": 'abc'}]) #function char_length -res = table_obj.output(["*"]).filter("char_length(c1) = 1").to_df() +res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 1").to_df() print(res) -res = table_obj.output(["*"]).filter("char_length(c1) = 3").to_df() +res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 3").to_df() print(res) -res = table_obj.output(["*"]).filter("char_length(c1) = 4").to_df() +res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 4").to_df() print(res) -res = table_obj.output(["*"]).filter("char_length(c1) = char_length(c2)").to_df() +res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = char_length(c2)").to_df() print(res) #function regex -res = table_obj.output(["*"]).filter("regex(c1, 'bc')").to_df() +res = table_obj.output(["*", "regex(c1, 'bc')"]).filter("regex(c1, 'bc')").to_df() print(res) res = table_obj.output(["*"]).filter("regex(c1, '(\w+([-+.]\w+)*)@(\w+([-.]\w+)*)\.(\w+([-.]\w+)*)')").to_df() print(res) #function substring -res = table_obj.output(["*"]).filter("substring(c1, 0, 2) = 'ab'").to_df() +res = table_obj.output(["*", "substring(c1, 0, 2)"]).filter("substring(c1, 0, 2) = 'ab'").to_df() print(res) -res = table_obj.output(["*"]).filter("substring(c1, 0, 4) = 'test'").to_df() +res = table_obj.output(["*", "substring(c1, 0, 4)"]).filter("substring(c1, 0, 4) = 'test'").to_df() print(res) #function upper and lower -res = table_obj.output(["*"]).filter("upper(c1) = 'TEST@GMAIL.COM'").to_df() +res = table_obj.output(["*", "upper(c1)"]).filter("upper(c1) = 'TEST@GMAIL.COM'").to_df() print(res) res = table_obj.output(["*"]).filter("lower('ABC') = c1").to_df() print(res) +#function ltrim, rtrim, trim +res = table_obj.output(["*", "ltrim(c1)"]).filter("ltrim(c1) = 'abc'").to_df() +print(res) + +res = table_obj.output(["*", "rtrim(c1)"]).filter("rtrim(c1) = 'abc'").to_df() +print(res) + +res = table_obj.output(["*", "trim(c1)"]).filter("trim(c1) = 'abc'").to_df() +print(res) + +res = table_obj.output(["*"]).filter("trim(' abc ') = rtrim(ltrim(' abc '))").to_df() +print(res) + res = db_obj.drop_table("function_example") infinity_obj.disconnect() \ No newline at end of file diff --git a/example/http/functions.sh b/example/http/functions.sh index 9b633dd6fb..76bd4833fd 100644 --- a/example/http/functions.sh +++ b/example/http/functions.sh @@ -111,6 +111,14 @@ curl --request POST \ "sparse_column": {"20":7.7, "80":7.8, "90": 97.9}, "year": 2018, "tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]] + }, + { + "num": 8, + "body": " this is an example for trim ", + "vec": [4.0, 4.2, 4.3, 4.5], + "sparse_column": {"20":7.7, "80":7.8, "90": 97.9}, + "year": 2018, + "tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]] } ] ' @@ -248,6 +256,51 @@ curl --request GET \ "filter": "body = lower('\'TEST@GMAIL.COM\'')" } ' +# show rows of 'tbl1' where ltrim(body) is "this is an example for trim " +echo -e '\n\n-- show rows of 'tbl1' where ltrim(body) is "this is an example for trim "' +curl --request GET \ + --url http://localhost:23820/databases/default_db/tables/tbl1/docs \ + --header 'accept: application/json' \ + --header 'content-type: application/json' \ + --data ' + { + "output": + [ + "body" + ], + "filter": "ltrim(body) = '\''this is an example for trim '\''" + } ' + +# show rows of 'tbl1' where rtrim(body) is " this is an example for trim" +echo -e '\n\n-- show rows of 'tbl1' where rtrim(body) is " this is an example for trim"' +curl --request GET \ + --url http://localhost:23820/databases/default_db/tables/tbl1/docs \ + --header 'accept: application/json' \ + --header 'content-type: application/json' \ + --data ' + { + "output": + [ + "body" + ], + "filter": "rtrim(body) = '\'' this is an example for trim'\''" + } ' + +# show rows of 'tbl1' where trim(body) is "this is an example for trim " +echo -e '\n\n-- show rows of 'tbl1' where trim(body) is "this is an example for trim"' +curl --request GET \ + --url http://localhost:23820/databases/default_db/tables/tbl1/docs \ + --header 'accept: application/json' \ + --header 'content-type: application/json' \ + --data ' + { + "output": + [ + "body" + ], + "filter": "trim(body) = '\''this is an example for trim'\''" + } ' + # drop tbl1 echo -e '\n\n-- drop tbl1' curl --request DELETE \ diff --git a/src/function/builtin_functions.cpp b/src/function/builtin_functions.cpp index 2b94e7754c..9c9aaf6e30 100644 --- a/src/function/builtin_functions.cpp +++ b/src/function/builtin_functions.cpp @@ -51,6 +51,9 @@ import md5; import lower; import upper; import regex; +import ltrim; +import rtrim; +import trim; import default_values; import special_function; import internal_types; @@ -123,6 +126,9 @@ void BuiltinFunctions::RegisterScalarFunction() { RegisterLowerFunction(catalog_ptr_); RegisterUpperFunction(catalog_ptr_); RegisterRegexFunction(catalog_ptr_); + RegisterLtrimFunction(catalog_ptr_); + RegisterRtrimFunction(catalog_ptr_); + RegisterTrimFunction(catalog_ptr_); } void BuiltinFunctions::RegisterTableFunction() {} diff --git a/src/function/scalar/ltrim.cpp b/src/function/scalar/ltrim.cpp new file mode 100644 index 0000000000..012e3416ac --- /dev/null +++ b/src/function/scalar/ltrim.cpp @@ -0,0 +1,62 @@ +module; + +#include + +module ltrim; + +import stl; +import catalog; +import status; +import infinity_exception; +import scalar_function; +import scalar_function_set; + +import third_party; +import logical_type; +import internal_types; +import data_type; +import logger; +import column_vector; + +namespace infinity { + +struct LtrimFunction { + template + static inline void Run(TA &left, TB &result, TC left_ptr, TD result_ptr) { + Status status = Status::NotSupport("Not implemented"); + RecoverableError(status); + } +}; + +template <> +inline void LtrimFunction::Run(VarcharT &left, VarcharT &result, ColumnVector *left_ptr, ColumnVector *result_ptr) { + const char *input = nullptr; + SizeT input_len = 0; + Span left_v = left_ptr->GetVarcharInner(left); + input = left_v.data(); + input_len = left_v.size(); + SizeT pos = 0; + while (pos < input_len && std::isspace(static_cast(input[pos]))) { + pos++; + } + + Span res_span = Span(&input[pos], input_len-pos); + result_ptr->AppendVarcharInner(res_span, result); +} + + +void RegisterLtrimFunction(const UniquePtr &catalog_ptr){ + String func_name = "ltrim"; + + SharedPtr function_set_ptr = MakeShared(func_name); + + ScalarFunction ltrim_function(func_name, + {DataType(LogicalType::kVarchar)}, + {DataType(LogicalType::kVarchar)}, + &ScalarFunction::UnaryFunctionVarlenToVarlen); + function_set_ptr->AddFunction(ltrim_function); + + Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr); +} + +} \ No newline at end of file diff --git a/src/function/scalar/ltrim.cppm b/src/function/scalar/ltrim.cppm new file mode 100644 index 0000000000..f56ff25ebd --- /dev/null +++ b/src/function/scalar/ltrim.cppm @@ -0,0 +1,13 @@ +module; + +import stl; + +export module ltrim; + +namespace infinity { + +class Catalog; + +export void RegisterLtrimFunction(const UniquePtr &catalog_ptr); + +} \ No newline at end of file diff --git a/src/function/scalar/rtrim.cpp b/src/function/scalar/rtrim.cpp new file mode 100644 index 0000000000..56d3a5e84b --- /dev/null +++ b/src/function/scalar/rtrim.cpp @@ -0,0 +1,62 @@ +module; + +#include + +module rtrim; + +import stl; +import catalog; +import status; +import infinity_exception; +import scalar_function; +import scalar_function_set; + +import third_party; +import logical_type; +import internal_types; +import data_type; +import logger; +import column_vector; + +namespace infinity { + +struct RtrimFunction { + template + static inline void Run(TA &left, TB &result, TC left_ptr, TD result_ptr) { + Status status = Status::NotSupport("Not implemented"); + RecoverableError(status); + } +}; + +template <> +inline void RtrimFunction::Run(VarcharT &left, VarcharT &result, ColumnVector *left_ptr, ColumnVector *result_ptr) { + const char *input = nullptr; + SizeT input_len = 0; + Span left_v = left_ptr->GetVarcharInner(left); + input = left_v.data(); + input_len = left_v.size(); + long pos = input_len-1; + while (pos > -1 && std::isspace(static_cast(input[pos]))) { + pos--; + } + + Span res_span = Span(input, pos+1); + result_ptr->AppendVarcharInner(res_span, result); +} + + +void RegisterRtrimFunction(const UniquePtr &catalog_ptr){ + String func_name = "rtrim"; + + SharedPtr function_set_ptr = MakeShared(func_name); + + ScalarFunction rtrim_function(func_name, + {DataType(LogicalType::kVarchar)}, + {DataType(LogicalType::kVarchar)}, + &ScalarFunction::UnaryFunctionVarlenToVarlen); + function_set_ptr->AddFunction(rtrim_function); + + Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr); +} + +} \ No newline at end of file diff --git a/src/function/scalar/rtrim.cppm b/src/function/scalar/rtrim.cppm new file mode 100644 index 0000000000..56ef209058 --- /dev/null +++ b/src/function/scalar/rtrim.cppm @@ -0,0 +1,13 @@ +module; + +import stl; + +export module rtrim; + +namespace infinity { + +class Catalog; + +export void RegisterRtrimFunction(const UniquePtr &catalog_ptr); + +} \ No newline at end of file diff --git a/src/function/scalar/trim.cpp b/src/function/scalar/trim.cpp new file mode 100644 index 0000000000..cf26c95698 --- /dev/null +++ b/src/function/scalar/trim.cpp @@ -0,0 +1,74 @@ +module; + +#include + +module trim; + +import stl; +import catalog; +import status; +import infinity_exception; +import scalar_function; +import scalar_function_set; + +import third_party; +import logical_type; +import internal_types; +import data_type; +import logger; +import column_vector; + +namespace infinity { + +struct TrimFunction { + template + static inline void Run(TA &left, TB &result, TC left_ptr, TD result_ptr) { + Status status = Status::NotSupport("Not implemented"); + RecoverableError(status); + } +}; + +template <> +inline void TrimFunction::Run(VarcharT &left, VarcharT &result, ColumnVector *left_ptr, ColumnVector *result_ptr) { + const char *input = nullptr; + SizeT input_len = 0; + Span left_v = left_ptr->GetVarcharInner(left); + input = left_v.data(); + input_len = left_v.size(); + SizeT lpos = 0; + while (lpos < input_len && std::isspace(static_cast(input[lpos]))) { + lpos++; + } + + if (lpos == input_len) { + // Construct empty varchar value; + Span substr_span = Span(input, 0); + result_ptr->AppendVarcharInner(substr_span, result); + return; + } + + long rpos = input_len-1; + while (rpos > -1 && std::isspace(static_cast(input[rpos]))) { + rpos--; + } + + Span res_span = Span(&input[lpos], rpos-lpos+1); + result_ptr->AppendVarcharInner(res_span, result); +} + + +void RegisterTrimFunction(const UniquePtr &catalog_ptr){ + String func_name = "trim"; + + SharedPtr function_set_ptr = MakeShared(func_name); + + ScalarFunction trim_function(func_name, + {DataType(LogicalType::kVarchar)}, + {DataType(LogicalType::kVarchar)}, + &ScalarFunction::UnaryFunctionVarlenToVarlen); + function_set_ptr->AddFunction(trim_function); + + Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr); +} + +} \ No newline at end of file diff --git a/src/function/scalar/trim.cppm b/src/function/scalar/trim.cppm new file mode 100644 index 0000000000..e3fe5fb9bc --- /dev/null +++ b/src/function/scalar/trim.cppm @@ -0,0 +1,13 @@ +module; + +import stl; + +export module trim; + +namespace infinity { + +class Catalog; + +export void RegisterTrimFunction(const UniquePtr &catalog_ptr); + +} \ No newline at end of file diff --git a/test/sql/dql/type/varchar.slt b/test/sql/dql/type/varchar.slt index a7afbffd22..15635699a6 100644 --- a/test/sql/dql/type/varchar.slt +++ b/test/sql/dql/type/varchar.slt @@ -44,12 +44,12 @@ statement ok INSERT INTO test_varchar_filter VALUES ('abc', 'abcd', 5); query VI -SELECT * FROM test_varchar_filter where char_length(c1) = 8; +SELECT *, char_length(c1) FROM test_varchar_filter where char_length(c1) = 8; ---- -abcddddd abcddddd 1 -abcddddc abcddddd 2 -abcdddde abcddddd 3 -abcdddde abcdddde 4 +abcddddd abcddddd 1 8 +abcddddc abcddddd 2 8 +abcdddde abcddddd 3 8 +abcdddde abcdddde 4 8 query VII SELECT * FROM test_varchar_filter where char_length(c1) = 3; @@ -72,10 +72,10 @@ statement ok INSERT INTO test_varchar_filter VALUES ('regex@regex.com', 'gmail@gmail.com', 6); query X -SELECT * FROM test_varchar_filter where regex(c1, 'abc\w+e'); +SELECT *, regex(c1, 'abc\w+e') FROM test_varchar_filter where regex(c1, 'abc\w+e'); ---- -abcdddde abcddddd 3 -abcdddde abcdddde 4 +abcdddde abcddddd 3 true +abcdddde abcdddde 4 true query XI SELECT * FROM test_varchar_filter where regex(c1, 'ddddc'); @@ -88,12 +88,12 @@ SELECT * FROM test_varchar_filter where regex(c1, '(\w+([-+.]\w+)*)@(\w+([-.]\w+ regex@regex.com gmail@gmail.com 6 query XIII -SELECT * FROM test_varchar_filter where substring(c1, 0, 4) = 'abcd'; +SELECT *, substring(c1, 0, 4) FROM test_varchar_filter where substring(c1, 0, 4) = 'abcd'; ---- -abcddddd abcddddd 1 -abcddddc abcddddd 2 -abcdddde abcddddd 3 -abcdddde abcdddde 4 +abcddddd abcddddd 1 abcd +abcddddc abcddddd 2 abcd +abcdddde abcddddd 3 abcd +abcdddde abcdddde 4 abcd query XIV SELECT * FROM test_varchar_filter where substring(c1, 0, 0) = ''; @@ -106,14 +106,34 @@ abc abcd 5 regex@regex.com gmail@gmail.com 6 query XV -SELECT * FROM test_varchar_filter where upper(c1) = 'ABCDDDDD'; +SELECT *, upper(c1) FROM test_varchar_filter where upper(c1) = 'ABCDDDDD'; ---- -abcddddd abcddddd 1 +abcddddd abcddddd 1 ABCDDDDD query XVI -SELECT * FROM test_varchar_filter where lower('ABCDDDDD') = c1; +SELECT *, lower(c1) FROM test_varchar_filter where lower('ABCDDDDD') = c1; ---- -abcddddd abcddddd 1 +abcddddd abcddddd 1 abcddddd + +statement ok +INSERT INTO test_varchar_filter VALUES (' a b c', 'abc', 7), ('a b c ', 'abc', 8), (' a b c ', 'abc', 9); + +query XVII +SELECT *, ltrim(c1) FROM test_varchar_filter where ltrim(c1) = 'a b c'; +---- + a b c abc 7 a b c + +query XVIII +SELECT *, rtrim(c1) FROM test_varchar_filter where rtrim(c1) = 'a b c'; +---- +a b c abc 8 a b c + +query XIX +SELECT *, trim(c1) FROM test_varchar_filter where trim(c1) = 'a b c'; +---- + a b c abc 7 a b c +a b c abc 8 a b c + a b c abc 9 a b c statement ok DROP TABLE test_varchar_filter;