Skip to content

Commit

Permalink
Support ltrim, rtrim, trim functions (#2064)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?
Support ltrim, rtrim, trim functions

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
  • Loading branch information
Ami11111 authored Oct 18, 2024
1 parent 4057850 commit 0ea7ed0
Show file tree
Hide file tree
Showing 10 changed files with 356 additions and 26 deletions.
32 changes: 23 additions & 9 deletions example/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,42 +20,56 @@
[{"c1": 'a', "c2": 'a'}, {"c1": 'b', "c2": 'b'}, {"c1": 'c', "c2": 'c'}, {"c1": 'd', "c2": 'd'},
{"c1": 'abc', "c2": 'abc'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'}, {"c1": 'dbc', "c2": 'dbc'},
{"c1": 'abcd', "c2": 'abc'},
{"c1": '[email protected]', "c2": 'email'}, {"c1": '[email protected]', "c2": 'email'}])
{"c1": '[email protected]', "c2": 'email'}, {"c1": '[email protected]', "c2": 'email'},
{"c1": ' abc', "c2": 'abc'}, {"c1": 'abc ', "c2": 'abc'}, {"c1": ' abc ', "c2": 'abc'}])

#function char_length
res = table_obj.output(["*"]).filter("char_length(c1) = 1").to_df()
res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 1").to_df()
print(res)

res = table_obj.output(["*"]).filter("char_length(c1) = 3").to_df()
res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 3").to_df()
print(res)

res = table_obj.output(["*"]).filter("char_length(c1) = 4").to_df()
res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 4").to_df()
print(res)

res = table_obj.output(["*"]).filter("char_length(c1) = char_length(c2)").to_df()
res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = char_length(c2)").to_df()
print(res)

#function regex
res = table_obj.output(["*"]).filter("regex(c1, 'bc')").to_df()
res = table_obj.output(["*", "regex(c1, 'bc')"]).filter("regex(c1, 'bc')").to_df()
print(res)

res = table_obj.output(["*"]).filter("regex(c1, '(\w+([-+.]\w+)*)@(\w+([-.]\w+)*)\.(\w+([-.]\w+)*)')").to_df()
print(res)

#function substring
res = table_obj.output(["*"]).filter("substring(c1, 0, 2) = 'ab'").to_df()
res = table_obj.output(["*", "substring(c1, 0, 2)"]).filter("substring(c1, 0, 2) = 'ab'").to_df()
print(res)

res = table_obj.output(["*"]).filter("substring(c1, 0, 4) = 'test'").to_df()
res = table_obj.output(["*", "substring(c1, 0, 4)"]).filter("substring(c1, 0, 4) = 'test'").to_df()
print(res)

#function upper and lower
res = table_obj.output(["*"]).filter("upper(c1) = '[email protected]'").to_df()
res = table_obj.output(["*", "upper(c1)"]).filter("upper(c1) = '[email protected]'").to_df()
print(res)

res = table_obj.output(["*"]).filter("lower('ABC') = c1").to_df()
print(res)

#function ltrim, rtrim, trim
res = table_obj.output(["*", "ltrim(c1)"]).filter("ltrim(c1) = 'abc'").to_df()
print(res)

res = table_obj.output(["*", "rtrim(c1)"]).filter("rtrim(c1) = 'abc'").to_df()
print(res)

res = table_obj.output(["*", "trim(c1)"]).filter("trim(c1) = 'abc'").to_df()
print(res)

res = table_obj.output(["*"]).filter("trim(' abc ') = rtrim(ltrim(' abc '))").to_df()
print(res)

res = db_obj.drop_table("function_example")

infinity_obj.disconnect()
53 changes: 53 additions & 0 deletions example/http/functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@ curl --request POST \
"sparse_column": {"20":7.7, "80":7.8, "90": 97.9},
"year": 2018,
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]]
},
{
"num": 8,
"body": " this is an example for trim ",
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"20":7.7, "80":7.8, "90": 97.9},
"year": 2018,
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]]
}
] '

Expand Down Expand Up @@ -248,6 +256,51 @@ curl --request GET \
"filter": "body = lower('\'[email protected]\'')"
} '

# show rows of 'tbl1' where ltrim(body) is "this is an example for trim "
echo -e '\n\n-- show rows of 'tbl1' where ltrim(body) is "this is an example for trim "'
curl --request GET \
--url http://localhost:23820/databases/default_db/tables/tbl1/docs \
--header 'accept: application/json' \
--header 'content-type: application/json' \
--data '
{
"output":
[
"body"
],
"filter": "ltrim(body) = '\''this is an example for trim '\''"
} '

# show rows of 'tbl1' where rtrim(body) is " this is an example for trim"
echo -e '\n\n-- show rows of 'tbl1' where rtrim(body) is " this is an example for trim"'
curl --request GET \
--url http://localhost:23820/databases/default_db/tables/tbl1/docs \
--header 'accept: application/json' \
--header 'content-type: application/json' \
--data '
{
"output":
[
"body"
],
"filter": "rtrim(body) = '\'' this is an example for trim'\''"
} '

# show rows of 'tbl1' where trim(body) is "this is an example for trim "
echo -e '\n\n-- show rows of 'tbl1' where trim(body) is "this is an example for trim"'
curl --request GET \
--url http://localhost:23820/databases/default_db/tables/tbl1/docs \
--header 'accept: application/json' \
--header 'content-type: application/json' \
--data '
{
"output":
[
"body"
],
"filter": "trim(body) = '\''this is an example for trim'\''"
} '

# drop tbl1
echo -e '\n\n-- drop tbl1'
curl --request DELETE \
Expand Down
6 changes: 6 additions & 0 deletions src/function/builtin_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ import md5;
import lower;
import upper;
import regex;
import ltrim;
import rtrim;
import trim;
import default_values;
import special_function;
import internal_types;
Expand Down Expand Up @@ -123,6 +126,9 @@ void BuiltinFunctions::RegisterScalarFunction() {
RegisterLowerFunction(catalog_ptr_);
RegisterUpperFunction(catalog_ptr_);
RegisterRegexFunction(catalog_ptr_);
RegisterLtrimFunction(catalog_ptr_);
RegisterRtrimFunction(catalog_ptr_);
RegisterTrimFunction(catalog_ptr_);
}

void BuiltinFunctions::RegisterTableFunction() {}
Expand Down
62 changes: 62 additions & 0 deletions src/function/scalar/ltrim.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
module;

#include<cctype>

module ltrim;

import stl;
import catalog;
import status;
import infinity_exception;
import scalar_function;
import scalar_function_set;

import third_party;
import logical_type;
import internal_types;
import data_type;
import logger;
import column_vector;

namespace infinity {

struct LtrimFunction {
template <typename TA, typename TB, typename TC, typename TD>
static inline void Run(TA &left, TB &result, TC left_ptr, TD result_ptr) {
Status status = Status::NotSupport("Not implemented");
RecoverableError(status);
}
};

template <>
inline void LtrimFunction::Run(VarcharT &left, VarcharT &result, ColumnVector *left_ptr, ColumnVector *result_ptr) {
const char *input = nullptr;
SizeT input_len = 0;
Span<const char> left_v = left_ptr->GetVarcharInner(left);
input = left_v.data();
input_len = left_v.size();
SizeT pos = 0;
while (pos < input_len && std::isspace(static_cast<unsigned char>(input[pos]))) {
pos++;
}

Span<const char> res_span = Span<const char>(&input[pos], input_len-pos);
result_ptr->AppendVarcharInner(res_span, result);
}


void RegisterLtrimFunction(const UniquePtr<Catalog> &catalog_ptr){
String func_name = "ltrim";

SharedPtr<ScalarFunctionSet> function_set_ptr = MakeShared<ScalarFunctionSet>(func_name);

ScalarFunction ltrim_function(func_name,
{DataType(LogicalType::kVarchar)},
{DataType(LogicalType::kVarchar)},
&ScalarFunction::UnaryFunctionVarlenToVarlen<VarcharT, VarcharT, LtrimFunction>);
function_set_ptr->AddFunction(ltrim_function);

Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr);
}

}
13 changes: 13 additions & 0 deletions src/function/scalar/ltrim.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module;

import stl;

export module ltrim;

namespace infinity {

class Catalog;

export void RegisterLtrimFunction(const UniquePtr<Catalog> &catalog_ptr);

}
62 changes: 62 additions & 0 deletions src/function/scalar/rtrim.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
module;

#include<cctype>

module rtrim;

import stl;
import catalog;
import status;
import infinity_exception;
import scalar_function;
import scalar_function_set;

import third_party;
import logical_type;
import internal_types;
import data_type;
import logger;
import column_vector;

namespace infinity {

struct RtrimFunction {
template <typename TA, typename TB, typename TC, typename TD>
static inline void Run(TA &left, TB &result, TC left_ptr, TD result_ptr) {
Status status = Status::NotSupport("Not implemented");
RecoverableError(status);
}
};

template <>
inline void RtrimFunction::Run(VarcharT &left, VarcharT &result, ColumnVector *left_ptr, ColumnVector *result_ptr) {
const char *input = nullptr;
SizeT input_len = 0;
Span<const char> left_v = left_ptr->GetVarcharInner(left);
input = left_v.data();
input_len = left_v.size();
long pos = input_len-1;
while (pos > -1 && std::isspace(static_cast<unsigned char>(input[pos]))) {
pos--;
}

Span<const char> res_span = Span<const char>(input, pos+1);
result_ptr->AppendVarcharInner(res_span, result);
}


void RegisterRtrimFunction(const UniquePtr<Catalog> &catalog_ptr){
String func_name = "rtrim";

SharedPtr<ScalarFunctionSet> function_set_ptr = MakeShared<ScalarFunctionSet>(func_name);

ScalarFunction rtrim_function(func_name,
{DataType(LogicalType::kVarchar)},
{DataType(LogicalType::kVarchar)},
&ScalarFunction::UnaryFunctionVarlenToVarlen<VarcharT, VarcharT, RtrimFunction>);
function_set_ptr->AddFunction(rtrim_function);

Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr);
}

}
13 changes: 13 additions & 0 deletions src/function/scalar/rtrim.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module;

import stl;

export module rtrim;

namespace infinity {

class Catalog;

export void RegisterRtrimFunction(const UniquePtr<Catalog> &catalog_ptr);

}
74 changes: 74 additions & 0 deletions src/function/scalar/trim.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
module;

#include<cctype>

module trim;

import stl;
import catalog;
import status;
import infinity_exception;
import scalar_function;
import scalar_function_set;

import third_party;
import logical_type;
import internal_types;
import data_type;
import logger;
import column_vector;

namespace infinity {

struct TrimFunction {
template <typename TA, typename TB, typename TC, typename TD>
static inline void Run(TA &left, TB &result, TC left_ptr, TD result_ptr) {
Status status = Status::NotSupport("Not implemented");
RecoverableError(status);
}
};

template <>
inline void TrimFunction::Run(VarcharT &left, VarcharT &result, ColumnVector *left_ptr, ColumnVector *result_ptr) {
const char *input = nullptr;
SizeT input_len = 0;
Span<const char> left_v = left_ptr->GetVarcharInner(left);
input = left_v.data();
input_len = left_v.size();
SizeT lpos = 0;
while (lpos < input_len && std::isspace(static_cast<unsigned char>(input[lpos]))) {
lpos++;
}

if (lpos == input_len) {
// Construct empty varchar value;
Span<const char> substr_span = Span<const char>(input, 0);
result_ptr->AppendVarcharInner(substr_span, result);
return;
}

long rpos = input_len-1;
while (rpos > -1 && std::isspace(static_cast<unsigned char>(input[rpos]))) {
rpos--;
}

Span<const char> res_span = Span<const char>(&input[lpos], rpos-lpos+1);
result_ptr->AppendVarcharInner(res_span, result);
}


void RegisterTrimFunction(const UniquePtr<Catalog> &catalog_ptr){
String func_name = "trim";

SharedPtr<ScalarFunctionSet> function_set_ptr = MakeShared<ScalarFunctionSet>(func_name);

ScalarFunction trim_function(func_name,
{DataType(LogicalType::kVarchar)},
{DataType(LogicalType::kVarchar)},
&ScalarFunction::UnaryFunctionVarlenToVarlen<VarcharT, VarcharT, TrimFunction>);
function_set_ptr->AddFunction(trim_function);

Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr);
}

}
Loading

0 comments on commit 0ea7ed0

Please sign in to comment.