Skip to content

Commit

Permalink
Support Substring function (#2061)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Support Substring function

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Test cases
  • Loading branch information
Ami11111 authored Oct 17, 2024
1 parent ae34fad commit bd43665
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 52 deletions.
15 changes: 15 additions & 0 deletions example/http/functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,21 @@ curl --request GET \
"filter": "regex(body, '\''('[0-9A-Za-z_]+'('[-+.][0-9A-Za-z_]+')''*'')'@'('[0-9A-Za-z_]+'('[-.][0-9A-Za-z_]+')''*'')''\\'.'('[0-9A-Za-z_]+'('[-.][0-9A-Za-z_]+')''*'')'\'')"
} '

# show rows of 'tbl1' where first 4 chars of body is 'test'
echo -e '\n\n-- show rows of 'tbl1' where first 4 chars of body is 'test''
curl --request GET \
--url http://localhost:23820/databases/default_db/tables/tbl1/docs \
--header 'accept: application/json' \
--header 'content-type: application/json' \
--data '
{
"output":
[
"body"
],
"filter": "substring(body, 0, 4) = '\'test\''"
} '

# drop tbl1
echo -e '\n\n-- drop tbl1'
curl --request DELETE \
Expand Down
4 changes: 2 additions & 2 deletions src/function/scalar/regex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ void RegisterRegexFunction(const UniquePtr<Catalog> &catalog_ptr){

SharedPtr<ScalarFunctionSet> function_set_ptr = MakeShared<ScalarFunctionSet>(func_name);

ScalarFunction Regex_function(func_name,
ScalarFunction regex_function(func_name,
{DataType(LogicalType::kVarchar), DataType(LogicalType::kVarchar)},
DataType(LogicalType::kBoolean),
&ScalarFunction::BinaryFunction<VarcharT, VarcharT, BooleanT, RegexFunction>);
function_set_ptr->AddFunction(Regex_function);
function_set_ptr->AddFunction(regex_function);

Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr);
}
Expand Down
81 changes: 35 additions & 46 deletions src/function/scalar/substring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,59 +33,48 @@ namespace infinity {

struct SubstrFunction {
template <typename TA, typename TB, typename TC, typename TD>
static inline bool Run(TA, TB, TC, TD &, ColumnVector *) {
static inline bool Run(TA &first, TB &second, TC &third, TD &result, ColumnVector *first_ptr, ColumnVector *result_ptr) {
String error_message = "Not implement: SubstrFunction::Run";
UnrecoverableError(error_message);
}
};

template <>
inline bool SubstrFunction::Run(VarcharT, BigIntT, BigIntT, VarcharT &, ColumnVector *) {
// Validate the input before slice the string
String error_message = "Not implement: SubstrFunction::Run";
UnrecoverableError(error_message);
inline bool SubstrFunction::Run(VarcharT &first, BigIntT &second, BigIntT &third, VarcharT &result, ColumnVector *first_ptr, ColumnVector * result_ptr) {
if (second < 0) {
UnrecoverableError(fmt::format("substring start offset should >= 0, currently it is {}", second));
}

if (third < 0) {
UnrecoverableError(fmt::format("substring length should >= 0, currently it is {}", second));
}

// if (second < 0) {
// Error<UnrecoverableException>(fmt::format("substring start offset should >= 0, currently it is {}", second));
// }
//
// if (third < 0) {
// Error<UnrecoverableException>(fmt::format("substring length should >= 0, currently it is {}", second));
// }
//
// if (third == 0) {
// // Construct empty varchar value;
// result.InitializeAsEmptyStr();
// return true;
// }
//
// SizeT source_len = first.GetDataLen();
// if (second >= source_len) {
// // Construct empty varchar value;
// result.InitializeAsEmptyStr();
// return true;
// }
//
// SizeT start_offset = second;
// SizeT end_offset = 0;
// if (start_offset + third >= source_len) {
// end_offset = source_len;
// } else {
// end_offset = start_offset + third;
// }
//
// SizeT copied_length = end_offset - start_offset;
// ptr_t source_ptr = first.GetDataPtr();
// if (copied_length <= VarcharT::INLINE_LENGTH) {
// // inline varchar
// std::memcpy(result.prefix, source_ptr + start_offset, copied_length);
// result.length = copied_length;
// } else {
// std::memcpy(result.prefix, source_ptr + start_offset, VarcharT::INLINE_LENGTH);
// result.ptr = column_vector_ptr->buffer_->fix_heap_mgr_->Allocate(copied_length);
// std::memcpy(result.ptr, source_ptr + start_offset, copied_length);
// }
Span<const char> first_v = first_ptr->GetVarcharInner(first);
if (third == 0) {
// Construct empty varchar value;
Span<const char> substr_span = Span<const char>(first_v.data(), 0);
result_ptr->AppendVarcharInner(substr_span, result);
return true;
}

SizeT source_len = first_v.size();
if ((SizeT)second >= source_len) {
// Construct empty varchar value;
Span<const char> substr_span = Span<const char>(first_v.data(), 0);;
result_ptr->AppendVarcharInner(substr_span, result);
return true;
}

SizeT start_offset = second;
SizeT end_offset = 0;
if (start_offset + third >= source_len) {
end_offset = source_len;
} else {
end_offset = start_offset + third;
}

Span<const char> substr_span = Span<const char>(first_v.data() + start_offset, end_offset - start_offset);
result_ptr->AppendVarcharInner(substr_span, result);

return true;
}
Expand All @@ -98,7 +87,7 @@ void RegisterSubstringFunction(const UniquePtr<Catalog> &catalog_ptr) {
ScalarFunction varchar_substr(func_name,
{DataType(LogicalType::kVarchar), DataType(LogicalType::kBigInt), DataType(LogicalType::kBigInt)},
{DataType(LogicalType::kVarchar)},
&ScalarFunction::TernaryFunctionToVarlenWithFailure<VarcharT, BigIntT, BigIntT, VarcharT, SubstrFunction>);
&ScalarFunction::TernaryFunctionVarlenToVarlenWithFailure<VarcharT, BigIntT, BigIntT, VarcharT, SubstrFunction>);
function_set_ptr->AddFunction(varchar_substr);

Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr);
Expand Down
56 changes: 52 additions & 4 deletions src/function/scalar_function.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct BinaryOpDirectWrapper {
template <typename Operator>
struct TernaryOpDirectWrapper {
template <typename FirstType, typename SecondType, typename ThirdType, typename ResultType>
inline static void Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *, SizeT, void *) {
inline static void Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *, SizeT, void *, void *) {
return Operator::template Run<FirstType, SecondType, ThirdType, ResultType>(first, second, third, result);
}
};
Expand Down Expand Up @@ -94,7 +94,7 @@ struct BinaryTryOpWrapper {
template <typename Operator>
struct TernaryTryOpWrapper {
template <typename FirstType, typename SecondType, typename ThirdType, typename ResultType>
inline static void Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *nulls_ptr, SizeT idx, void *) {
inline static void Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *nulls_ptr, SizeT idx, void *, void *) {
if (Operator::template Run<FirstType, SecondType, ThirdType, ResultType>(first, second, third, result)) {
return;
}
Expand Down Expand Up @@ -125,7 +125,7 @@ struct BinaryOpDirectToVarlenWrapper {
template <typename Operator>
struct TernaryOpDirectToVarlenWrapper {
template <typename FirstType, typename SecondType, typename ThirdType, typename ResultType>
inline static void Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *, SizeT, void *state_ptr) {
inline static void Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *, SizeT, void *, void *state_ptr) {
auto *function_data_ptr = (ScalarFunctionData *)(state_ptr);
return Operator::template Run<FirstType, SecondType, ThirdType, ResultType>(first,
second,
Expand Down Expand Up @@ -167,7 +167,7 @@ template <typename Operator>
struct TernaryTryOpToVarlenWrapper {
template <typename FirstType, typename SecondType, typename ThirdType, typename ResultType>
inline static void
Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *nulls_ptr, SizeT idx, void *state_ptr) {
Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *nulls_ptr, SizeT idx, void *, void *state_ptr) {
auto *function_data_ptr = (ScalarFunctionData *)(state_ptr);
if (Operator::template Run<FirstType, SecondType, ThirdType, ResultType>(first,
second,
Expand All @@ -192,6 +192,27 @@ struct UnaryOpDirectVarlenToVarlenWrapper {
}
};

template <typename Operator>
struct TernaryTryOpVarlenToVarlenWrapper {
template <typename FirstType, typename SecondType, typename ThirdType, typename ResultType>
inline static void
Execute(FirstType first, SecondType second, ThirdType third, ResultType &result, Bitmask *nulls_ptr, SizeT idx, void *first_ptr, void *state_ptr) {
auto *function_data_ptr_first = (ScalarFunctionData *)(first_ptr);
auto *function_data_ptr = (ScalarFunctionData *)(state_ptr);
if (Operator::template Run<FirstType, SecondType, ThirdType, ResultType>(first,
second,
third,
result,
function_data_ptr_first->column_vector_ptr_,
function_data_ptr->column_vector_ptr_)) {
return;
}

nulls_ptr->SetFalse(idx);
result = NullValue<ResultType>();
}
};


using ScalarFunctionType = std::function<void(const DataBlock &, SharedPtr<ColumnVector> &)>;

Expand Down Expand Up @@ -409,6 +430,7 @@ public:
output,
input.row_count(),
nullptr,
nullptr,
true);
}

Expand All @@ -429,6 +451,7 @@ public:
output,
input.row_count(),
nullptr,
nullptr,
true);
}

Expand All @@ -449,6 +472,7 @@ public:
input.column_vectors[2],
output,
input.row_count(),
nullptr,
&function_data,
true);
}
Expand All @@ -470,6 +494,30 @@ public:
input.column_vectors[2],
output,
input.row_count(),
nullptr,
&function_data,
true);
}

// Ternary function result is varlen with some failures such as overflow.
template <typename FirstType, typename SecondType, typename ThirdType, typename ResultType, typename Operation>
static inline void TernaryFunctionVarlenToVarlenWithFailure(const DataBlock &input, SharedPtr<ColumnVector> &output) {
if (input.column_count() != 3) {
String error_message = "Ternary function: input column count isn't three.";
UnrecoverableError(error_message);
}
if (!input.Finalized()) {
String error_message = "Input data block is finalized";
UnrecoverableError(error_message);
}
ScalarFunctionData function_data_first(input.column_vectors[0].get());
ScalarFunctionData function_data(output.get());
TernaryOperator::Execute<FirstType, SecondType, ThirdType, ResultType, TernaryTryOpVarlenToVarlenWrapper<Operation>>(input.column_vectors[0],
input.column_vectors[1],
input.column_vectors[2],
output,
input.row_count(),
&function_data_first,
&function_data,
true);
}
Expand Down
Loading

0 comments on commit bd43665

Please sign in to comment.