Skip to content

Commit

Permalink
Support scalar quantization for IVF index (#2090)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Support 4-bit and 8-bit scalar quantization for IVF index

Issue link:#2085

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
  • Loading branch information
yangzq50 authored Oct 23, 2024
1 parent 4e97924 commit d6a3ba4
Show file tree
Hide file tree
Showing 5 changed files with 458 additions and 79 deletions.
2 changes: 1 addition & 1 deletion src/storage/knn_index/emvb/emvb_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ EMVBQueryResultType EMVBIndex::query_token_num_helper(const f32 *query_ptr, u32
return query_token_num_helper<J...>(query_ptr, query_embedding_num, std::forward<decltype(query_args)>(query_args)...);
}

template <>
template <typename>
EMVBQueryResultType EMVBIndex::query_token_num_helper(const f32 *query_ptr, u32 query_embedding_num, auto &&...query_args) const {
auto error_msg = fmt::format("EMVBIndex::GetQueryResult: query_embedding_num max value: {}, got {} instead.",
current_max_query_token_num,
Expand Down
2 changes: 1 addition & 1 deletion src/storage/knn_index/emvb/emvb_index.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ private:
template <u32 I, u32... J>
EMVBQueryResultType query_token_num_helper(const f32 *query_ptr, u32 query_embedding_num, auto &&...query_args) const;

template <>
template <typename = void>
EMVBQueryResultType query_token_num_helper(const f32 *query_ptr, u32 query_embedding_num, auto &&...query_args) const;

template <u32 FIXED_QUERY_TOKEN_NUM>
Expand Down
4 changes: 1 addition & 3 deletions src/storage/knn_index/knn_ivf/ivf_index_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,7 @@ void IVF_Index_Storage::SearchIndex(const KnnDistanceBase1 *knn_distance,
const auto centroid_dists = MakeUniqueForOverwrite<f32[]>(nprobe);
search_top_k_with_dis(nprobe, dimension, 1, query_f32_ptr, centroids_num, centroids_data, nprobe_result.data(), centroid_dists.get(), false);
}
for (const auto part_id : nprobe_result) {
ivf_parts_storage_->SearchIndex(part_id, this, knn_distance, query_ptr, query_element_type, satisfy_filter_func, add_result_func);
}
ivf_parts_storage_->SearchIndex(nprobe_result, this, knn_distance, query_ptr, query_element_type, satisfy_filter_func, add_result_func);
}

} // namespace infinity
2 changes: 1 addition & 1 deletion src/storage/knn_index/knn_ivf/ivf_index_storage.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public:
virtual void
AppendOneEmbedding(u32 part_id, const void *embedding_ptr, SegmentOffset segment_offset, const IVF_Centroids_Storage *ivf_centroids_storage) = 0;

virtual void SearchIndex(u32 part_id,
virtual void SearchIndex(const Vector<u32> &part_ids,
const IVF_Index_Storage *ivf_index_storage,
const KnnDistanceBase1 *knn_distance,
const void *query_ptr,
Expand Down
Loading

0 comments on commit d6a3ba4

Please sign in to comment.