-
Notifications
You must be signed in to change notification settings - Fork 229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
FilterStore: unifying filter specific logic #452
base: main
Are you sure you want to change the base?
Changes from 44 commits
c445d20
cf389f8
2c07ffc
497abca
b5dfd90
bc92303
77c5444
0fa86c2
1faa5b8
ff7955a
c86f085
275afc1
91eb6c0
a0cd607
9402f01
26aa806
0c73589
9bcedad
887e644
a9ab92f
119ee63
332de43
e471cf9
3992c97
b18ce98
8a5c700
eded185
392c0ec
8cfcd5f
f4b430b
21925ee
1cb4aae
7edc594
8ba9475
11f8be4
e978f98
ccb187c
9ab9445
3cfaf3e
4f1e81b
a7f6b44
6f0f8f5
68e1dbf
615247a
0b9118f
7462bd0
84174bd
86159e8
ce7db4d
82f7182
e669521
df38242
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#pragma once | ||
#include "common_includes.h" | ||
#include "utils.h" | ||
#include <any> | ||
|
||
namespace diskann | ||
{ | ||
|
||
enum class FilterMatchStrategy | ||
{ | ||
SET_INTERSECTION | ||
rakri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
// This class is responsible for filter actions in index, and should not be used outside. | ||
template <typename label_type> class AbstractFilterStore | ||
NeelamMahapatro marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
public: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also didn't find methods to expand and shrink the filter store. |
||
DISKANN_DLLEXPORT AbstractFilterStore(const size_t num_points); | ||
virtual ~AbstractFilterStore() = default; | ||
|
||
// needs some internal lock + abstract implementation | ||
DISKANN_DLLEXPORT virtual bool detect_common_filters( | ||
rakri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
uint32_t point_id, bool search_invocation, const std::vector<label_type> &incoming_labels, | ||
const FilterMatchStrategy strategy = FilterMatchStrategy::SET_INTERSECTION) = 0; | ||
|
||
DISKANN_DLLEXPORT virtual const std::vector<label_type> &get_labels_by_location(const location_t point_id) = 0; | ||
DISKANN_DLLEXPORT virtual void set_labels_to_location(const location_t location, | ||
const std::vector<std::string> &labels) = 0; | ||
DISKANN_DLLEXPORT virtual void swap_labels(const location_t location_first, const location_t location_second) = 0; | ||
|
||
DISKANN_DLLEXPORT virtual const tsl::robin_set<label_type> &get_all_label_set() = 0; | ||
rakri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
DISKANN_DLLEXPORT virtual void add_to_label_set(label_type &label) = 0; | ||
rakri marked this conversation as resolved.
Show resolved
Hide resolved
rakri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Throws: out of range exception | ||
DISKANN_DLLEXPORT virtual void add_label_to_location(const location_t point_id, label_type label) = 0; | ||
rakri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// returns internal mapping for given raw_label | ||
DISKANN_DLLEXPORT virtual label_type get_numeric_label(const std::string &raw_label) = 0; | ||
|
||
DISKANN_DLLEXPORT virtual void update_medoid_by_label(const label_type &label, const uint32_t new_medoid) = 0; | ||
DISKANN_DLLEXPORT virtual const uint32_t &get_medoid_by_label(const label_type &label) = 0; | ||
DISKANN_DLLEXPORT virtual const std::unordered_map<label_type, uint32_t> &get_labels_to_medoids() = 0; | ||
rakri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
DISKANN_DLLEXPORT virtual bool label_has_medoid(const label_type &label) = 0; | ||
|
||
// TODO: in future we may accept a set or vector of universal labels | ||
// DISKANN_DLLEXPORT virtual void set_universal_label(label_type universal_label) = 0; | ||
DISKANN_DLLEXPORT virtual void set_universal_labels(const std::string &universal_labels) = 0; | ||
DISKANN_DLLEXPORT virtual std::pair<bool, label_type> get_universal_label() = 0; | ||
|
||
// takes raw label file and then genrate internal mapping file and keep the info of mapping | ||
DISKANN_DLLEXPORT virtual size_t load_raw_labels(const std::string &raw_labels_file, | ||
const std::string &raw_universal_label) = 0; | ||
|
||
DISKANN_DLLEXPORT virtual void save_labels(const std::string &save_path, const size_t total_points) = 0; | ||
rakri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// For dynamic filtered build, we compact the data and hence location_to_labels, we need the compacted version of | ||
// raw labels to compute GT correctly. | ||
DISKANN_DLLEXPORT virtual void save_raw_labels(const std::string &save_path, const size_t total_points) = 0; | ||
DISKANN_DLLEXPORT virtual void save_medoids(const std::string &save_path) = 0; | ||
DISKANN_DLLEXPORT virtual void save_label_map(const std::string &save_path) = 0; | ||
DISKANN_DLLEXPORT virtual void save_universal_label(const std::string &save_path) = 0; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we also need a public load() which calls the protected load* methods? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, need to do this, and perhaps remove the "friend" relation between filter store and index class? |
||
protected: | ||
// This is for internal use and only loads already parsed file | ||
DISKANN_DLLEXPORT virtual size_t load_labels(const std::string &labels_file) = 0; | ||
DISKANN_DLLEXPORT virtual size_t load_medoids(const std::string &labels_to_medoid_file) = 0; | ||
DISKANN_DLLEXPORT virtual void load_label_map(const std::string &labels_map_file) = 0; | ||
DISKANN_DLLEXPORT virtual void load_universal_labels(const std::string &universal_labels_file) = 0; | ||
|
||
private: | ||
size_t _num_points; | ||
|
||
// populates pts_to labels and _labels from given label file | ||
virtual size_t parse_label_file(const std::string &label_file) = 0; | ||
|
||
// mark Index as friend so it can access protected loads | ||
template <typename T, typename TagT, typename LabelT> friend class Index; | ||
}; | ||
|
||
} // namespace diskann |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if numeric labels are not exposed, why are we using numeric labels here?