Skip to content

Commit

Permalink
[Enhancement](inverted index) improve expr evaluate_inverted_index pe…
Browse files Browse the repository at this point in the history
…rformace and remove useless code apache#40600 (apache#41122)

cherry pick from apache#40600
  • Loading branch information
airborne12 authored Sep 23, 2024
1 parent 6e8a67c commit fbd5e76
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 63 deletions.
30 changes: 0 additions & 30 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -921,36 +921,6 @@ bool SegmentIterator::_need_read_data(ColumnId cid) {
return true;
}

bool SegmentIterator::_is_target_expr_match_predicate(const vectorized::VExprSPtr& expr,
const MatchPredicate* match_pred,
const Schema* schema) {
if (!expr || expr->node_type() != TExprNodeType::MATCH_PRED) {
return false;
}

const auto& children = expr->children();
if (children.size() != 2 || !children[0]->is_slot_ref() || !children[1]->is_constant()) {
return false;
}

auto slot_ref = dynamic_cast<vectorized::VSlotRef*>(children[0].get());
if (!slot_ref) {
LOG(WARNING) << children[0]->debug_string() << " should be SlotRef";
return false;
}
std::shared_ptr<ColumnPtrWrapper> const_col_wrapper;
// children 1 is VLiteral, we do not need expr context.
auto res = children[1]->get_const_col(nullptr /* context */, &const_col_wrapper);
if (!res.ok() || !const_col_wrapper) {
return false;
}

const auto const_column =
check_and_get_column<vectorized::ColumnConst>(const_col_wrapper->column_ptr);
return const_column && match_pred->column_id() == schema->column_id(slot_ref->column_id()) &&
StringRef(match_pred->get_value()) == const_column->get_data_at(0);
}

Status SegmentIterator::_apply_inverted_index() {
std::vector<ColumnPredicate*> remaining_predicates;
std::set<const ColumnPredicate*> no_need_to_pass_column_predicate_set;
Expand Down
16 changes: 0 additions & 16 deletions be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,22 +362,6 @@ class SegmentIterator : public RowwiseIterator {
return 0;
}

bool _is_match_predicate_and_not_remaining(
ColumnPredicate* pred, const std::vector<ColumnPredicate*>& remaining_predicates) {
return pred->type() == PredicateType::MATCH &&
std::find(remaining_predicates.begin(), remaining_predicates.end(), pred) ==
remaining_predicates.end();
}

void _delete_expr_from_conjunct_roots(const vectorized::VExprSPtr& expr,
vectorized::VExprSPtrs& conjunct_roots) {
conjunct_roots.erase(std::remove(conjunct_roots.begin(), conjunct_roots.end(), expr),
conjunct_roots.end());
}

bool _is_target_expr_match_predicate(const vectorized::VExprSPtr& expr,
const MatchPredicate* match_pred, const Schema* schema);

Status _convert_to_expected_type(const std::vector<ColumnId>& col_ids);

bool _no_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
Expand Down
45 changes: 28 additions & 17 deletions be/src/vec/exprs/vexpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,15 +603,26 @@ Status VExpr::get_result_from_const(vectorized::Block* block, const std::string&

Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function,
uint32_t segment_num_rows) {
// Pre-allocate vectors based on an estimated or known size
std::vector<segment_v2::InvertedIndexIterator*> iterators;
std::vector<vectorized::IndexFieldNameAndTypePair> data_type_with_names;
std::vector<int> column_ids;
vectorized::ColumnsWithTypeAndName arguments;
VExprSPtrs children_exprs;
for (auto child : children()) {
// if child is cast expr, we need to ensure target data type is the same with storage data type.
// or they are all string type
// and if data type is array, we need to get the nested data type to ensure that.

// Reserve space to avoid multiple reallocations
const size_t estimated_size = children().size();
iterators.reserve(estimated_size);
data_type_with_names.reserve(estimated_size);
column_ids.reserve(estimated_size);
children_exprs.reserve(estimated_size);

auto index_context = context->get_inverted_index_context();

// if child is cast expr, we need to ensure target data type is the same with storage data type.
// or they are all string type
// and if data type is array, we need to get the nested data type to ensure that.
for (const auto& child : children()) {
if (child->node_type() == TExprNodeType::CAST_EXPR) {
auto* cast_expr = assert_cast<VCastExpr*>(child.get());
DCHECK_EQ(cast_expr->children().size(), 1);
Expand Down Expand Up @@ -653,7 +664,11 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase
}
}

for (auto child : children_exprs) {
if (children_exprs.empty()) {
return Status::OK(); // Early exit if no children to process
}

for (const auto& child : children_exprs) {
if (child->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
auto column_id = column_slot_ref->column_id();
Expand Down Expand Up @@ -684,25 +699,21 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase
column_literal->get_data_type(), column_literal->expr_name());
}
}
auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
if (iterators.empty()) {
return Status::OK();
}
// If arguments are empty, it means the left value in the expression is not a literal.
if (arguments.empty()) {
return Status::OK();

if (iterators.empty() || arguments.empty()) {
return Status::OK(); // Nothing to evaluate or no literals to compare against
}

auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
auto res = function->evaluate_inverted_index(arguments, data_type_with_names, iterators,
segment_num_rows, result_bitmap);
if (!res.ok()) {
return res;
}
if (!result_bitmap.is_empty()) {
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this,
result_bitmap);
for (auto column_id : column_ids) {
context->get_inverted_index_context()->set_true_for_inverted_index_status(this,
column_id);
index_context->set_inverted_index_result_for_expr(this, result_bitmap);
for (int column_id : column_ids) {
index_context->set_true_for_inverted_index_status(this, column_id);
}
// set fast_execute when expr evaluated by inverted index correctly
_can_fast_execute = true;
Expand Down

0 comments on commit fbd5e76

Please sign in to comment.