From 4964f7584a0b9ae7220dd09fee6a500328eabb19 Mon Sep 17 00:00:00 2001 From: yzq <58433399+yangzq50@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:06:26 +0800 Subject: [PATCH] remove bmm --- .../search/blockmax_maxscore_iterator.cpp | 113 ------------------ .../search/blockmax_maxscore_iterator.cppm | 84 ------------- .../invertedindex/search/query_node.cpp | 1 - 3 files changed, 198 deletions(-) delete mode 100644 src/storage/invertedindex/search/blockmax_maxscore_iterator.cpp delete mode 100644 src/storage/invertedindex/search/blockmax_maxscore_iterator.cppm diff --git a/src/storage/invertedindex/search/blockmax_maxscore_iterator.cpp b/src/storage/invertedindex/search/blockmax_maxscore_iterator.cpp deleted file mode 100644 index 8c74837e0e..0000000000 --- a/src/storage/invertedindex/search/blockmax_maxscore_iterator.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright(C) 2023 InfiniFlow, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -module; - -#include -#include -module blockmax_maxscore_iterator; -import stl; -import index_defines; -import term_doc_iterator; -import multi_doc_iterator; -import internal_types; -import logger; -import third_party; -import infinity_exception; - -namespace infinity { - -BlockMaxMaxscoreIterator::~BlockMaxMaxscoreIterator() { - OStringStream oss; - oss << "BlockMaxMaxscoreIterator: Debug Info:\n inner_pivot_loop_cnt: " << inner_pivot_loop_cnt - << " inner_must_have_loop_cnt: " << inner_must_have_loop_cnt_ << " use_prev_candidate_cnt: " << use_prev_candidate_cnt_ - << " not_use_prev_candidate_cnt: " << not_use_prev_candidate_cnt_ << "\n"; - oss << " pivot_history:\n"; - for (const auto &p : pivot_history_) { - oss << " pivot value: " << p.first << " at doc_id: " << p.second << '\n'; - } - oss << " must_have_history:\n"; - for (const auto &p : must_have_history_) { - oss << " must_have value: " << p.first << " at doc_id: " << p.second << '\n'; - } - LOG_DEBUG(std::move(oss).str()); -} - -BlockMaxMaxscoreIterator::BlockMaxMaxscoreIterator(Vector> &&iterators) : MultiDocIterator(std::move(iterators)) { - SizeT num_iterators = children_.size(); - for (SizeT i = 0; i < num_iterators; i++) { - TermDocIterator *tdi = dynamic_cast(children_[i].get()); - if (tdi == nullptr) - continue; - bm25_score_upper_bound_ += tdi->BM25ScoreUpperBound(); - doc_freq_ += tdi->GetDF(); - sorted_iterators_.push_back(tdi); - } - std::sort(sorted_iterators_.begin(), sorted_iterators_.end(), [](const auto &a, const auto &b) { - return a->BM25ScoreUpperBound() > b->BM25ScoreUpperBound(); - }); - Init(); -} - -void BlockMaxMaxscoreIterator::Init() { - common_block_max_bm25_score_parts_.resize(sorted_iterators_.size()); - leftover_scores_upper_bound_.resize(sorted_iterators_.size()); - for (u32 i = sorted_iterators_.size() - 1; i > 0; --i) { - leftover_scores_upper_bound_[i - 1] = leftover_scores_upper_bound_[i] + sorted_iterators_[i]->BM25ScoreUpperBound(); - } - bm25_score_upper_bound_ = leftover_scores_upper_bound_[0] + sorted_iterators_[0]->BM25ScoreUpperBound(); -} - -bool BlockMaxMaxscoreIterator::Next(RowID doc_id) { - assert(doc_id != INVALID_ROWID); - bm25_score_cached_ = false; - SizeT num_iterators = sorted_iterators_.size(); - if (doc_id_ == INVALID_ROWID) { - // Initialize children once. - for (SizeT i = 0; i < num_iterators; i++) { - sorted_iterators_[i]->Next(0); - } - } else { - assert(pivot_ < num_iterators); - assert(doc_id_ < doc_id); - // Move all pointers from lists[0] to lists[p] by calling Next(list, d + 1) - for (SizeT i = 0; i <= pivot_ && sorted_iterators_[i]->DocID() < doc_id; i++) { - sorted_iterators_[i]->Next(doc_id); - } - } - return true; -} - -float BlockMaxMaxscoreIterator::BM25Score() { - if (bm25_score_cached_) [[unlikely]] { - return bm25_score_cache_; - } - float sum_score = 0.0f; - bm25_score_cached_ = true; - bm25_score_cache_ = sum_score; - return sum_score; -} - -void BlockMaxMaxscoreIterator::UpdateScoreThreshold(const float threshold) { - if (threshold <= threshold_) - return; - threshold_ = threshold; -} - -u32 BlockMaxMaxscoreIterator::MatchCount() const { - UnrecoverableError("BMM not supported now"); - return 0; -} - -} // namespace infinity \ No newline at end of file diff --git a/src/storage/invertedindex/search/blockmax_maxscore_iterator.cppm b/src/storage/invertedindex/search/blockmax_maxscore_iterator.cppm deleted file mode 100644 index b84c447087..0000000000 --- a/src/storage/invertedindex/search/blockmax_maxscore_iterator.cppm +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright(C) 2023 InfiniFlow, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -module; - -export module blockmax_maxscore_iterator; -import stl; -import doc_iterator; -import term_doc_iterator; -import multi_doc_iterator; -import internal_types; -import index_defines; - -namespace infinity { - -// equivalent to "OR" iterator -export class BlockMaxMaxscoreIterator final : public MultiDocIterator { -public: - explicit BlockMaxMaxscoreIterator(Vector> &&iterators); - - ~BlockMaxMaxscoreIterator() override; - - DocIteratorType GetType() const override { return DocIteratorType::kBMMIterator; } - - String Name() const override { return "BlockMaxMaxscoreIterator"; } - - void UpdateScoreThreshold(float threshold) override; - - bool Next(RowID doc_id) override; - - float BM25Score() override; - - u32 MatchCount() const override; - -private: - void Init(); - // won't change after initialization - Vector sorted_iterators_; // sort by BM25ScoreUpperBound, in descending order - Vector leftover_scores_upper_bound_; // value at i: upper bound of sum of BM25 scores for iter i + 1, i + 2, ..., n - 1 - // block max info - RowID common_block_min_possible_doc_id_ = INVALID_ROWID; - RowID common_block_last_doc_id_ = INVALID_ROWID; - float common_block_max_bm25_score_ = 0.0f; - Vector common_block_max_bm25_score_parts_; // value at i: blockmax of sum of BM25 scores for iter i + 1, i + 2, ..., n - 1 - // pivot and must_have info - // separate the iterators into two parts: - // 1. [0, pivot) : cannot add into part 2 - // 2. [pivot, n) : sum < threshold - u32 pivot_ = sorted_iterators_.size(); - // there is an "AND" requirement for the iterators 0, 1, ..., must_have_before_ - 1 - // we have must_have_before_ > 0 <=> pivot_ <= 1. - // so: case 1: must_have_before_ = 0, pivot_ > 1. - // case 2: must_have_before_ > 0, pivot_ = 1. - u32 must_have_before_ = 0; - float must_have_total_upper_bound_score_ = 0.0f; - Vector bool_need_score_; - // bm25 score cache - bool bm25_score_cached_ = false; - bool need_seek_after_must_ = false; - bool need_seek_after_pivot_ = false; - float bm25_score_cache_ = 0.0f; - RowID prev_next_candidate_ = INVALID_ROWID; - - // debug info - u32 inner_pivot_loop_cnt = 0; - u32 inner_must_have_loop_cnt_ = 0; - u32 use_prev_candidate_cnt_ = 0; - u32 not_use_prev_candidate_cnt_ = 0; - Vector> pivot_history_; - Vector> must_have_history_; -}; - -} // namespace infinity \ No newline at end of file diff --git a/src/storage/invertedindex/search/query_node.cpp b/src/storage/invertedindex/search/query_node.cpp index e7ae7cab21..1fa26e3015 100644 --- a/src/storage/invertedindex/search/query_node.cpp +++ b/src/storage/invertedindex/search/query_node.cpp @@ -20,7 +20,6 @@ import or_iterator; import term_doc_iterator; import phrase_doc_iterator; import blockmax_wand_iterator; -import blockmax_maxscore_iterator; import minimum_should_match_iterator; namespace infinity {