From a642cf161be7acfc3e25ebee1d441e73f733a6c6 Mon Sep 17 00:00:00 2001 From: Paris Morgan Date: Tue, 15 Oct 2024 10:44:29 -0700 Subject: [PATCH] Rename nlist to partitions --- .../src/tiledb/vector_search/ivf_pq_index.py | 2 +- .../vector_search/type_erased_module.cc | 6 +- src/include/api/ivf_flat_index.h | 40 ++++++------- src/include/api/ivf_pq_index.h | 56 +++++++++---------- src/include/index/ivf_flat_index.h | 6 +- src/include/index/ivf_pq_index.h | 6 +- src/include/test/unit_api_ivf_pq_index.cc | 18 +++--- src/include/test/unit_ivf_flat_index.cc | 36 ++++++------ src/include/test/utils/query_common.h | 16 +++--- 9 files changed, 94 insertions(+), 92 deletions(-) diff --git a/apis/python/src/tiledb/vector_search/ivf_pq_index.py b/apis/python/src/tiledb/vector_search/ivf_pq_index.py index 78a04f7e6..4b7f4b86a 100644 --- a/apis/python/src/tiledb/vector_search/ivf_pq_index.py +++ b/apis/python/src/tiledb/vector_search/ivf_pq_index.py @@ -226,7 +226,7 @@ def create( id_type=np.dtype(np.uint64).name, partitioning_index_type=np.dtype(np.uint64).name, dimensions=dimensions, - n_list=partitions if (partitions is not None and partitions != -1) else 0, + partitions=partitions if (partitions is not None and partitions != -1) else 0, num_subspaces=num_subspaces, distance_metric=int(distance_metric), ) diff --git a/apis/python/src/tiledb/vector_search/type_erased_module.cc b/apis/python/src/tiledb/vector_search/type_erased_module.cc index e323ef979..7b0eab528 100644 --- a/apis/python/src/tiledb/vector_search/type_erased_module.cc +++ b/apis/python/src/tiledb/vector_search/type_erased_module.cc @@ -491,9 +491,11 @@ void init_type_erased_module(py::module_& m) { "train", [](IndexIVFPQ& index, const FeatureVectorArray& vectors, - std::optional nlist) { index.train(vectors, nlist); }, + std::optional partitions) { + index.train(vectors, partitions); + }, py::arg("vectors"), - py::arg("nlist") = std::nullopt) + py::arg("partitions") = std::nullopt) .def( "add", [](IndexIVFPQ& index, const FeatureVectorArray& vectors) { diff --git a/src/include/api/ivf_flat_index.h b/src/include/api/ivf_flat_index.h index b8414621f..98597378f 100644 --- a/src/include/api/ivf_flat_index.h +++ b/src/include/api/ivf_flat_index.h @@ -100,8 +100,8 @@ class IndexIVFFlat { for (auto&& c : *config) { auto key = c.first; auto value = c.second; - if (key == "nlist") { - nlist_ = std::stol(value); + if (key == "partitions") { + partitions_ = std::stol(value); } else if (key == "dimensions") { dimensions_ = std::stol(value); } else if (key == "max_iter") { @@ -255,12 +255,12 @@ class IndexIVFFlat { " != " + std::to_string(index_->dimensions())); } dimensions_ = index_->dimensions(); - if (nlist_ != 0 && nlist_ != index_->num_partitions()) { + if (partitions_ != 0 && partitions_ != index_->num_partitions()) { throw std::runtime_error( - "nlist mismatch: " + std::to_string(nlist_) + + "partitions mismatch: " + std::to_string(partitions_) + " != " + std::to_string(index_->num_partitions())); } - nlist_ = index_->num_partitions(); + partitions_ = index_->num_partitions(); } /** @@ -292,49 +292,49 @@ class IndexIVFFlat { px_datatype_ == TILEDB_UINT32) { index_ = std::make_unique< index_impl>>( - nlist_, max_iterations_, tolerance_, num_threads_); + partitions_, max_iterations_, tolerance_, num_threads_); } else if ( feature_datatype_ == TILEDB_FLOAT32 && id_datatype_ == TILEDB_UINT32 && px_datatype_ == TILEDB_UINT32) { index_ = std::make_unique< index_impl>>( - nlist_, max_iterations_, tolerance_, num_threads_); + partitions_, max_iterations_, tolerance_, num_threads_); } else if ( feature_datatype_ == TILEDB_UINT8 && id_datatype_ == TILEDB_UINT32 && px_datatype_ == TILEDB_UINT64) { index_ = std::make_unique< index_impl>>( - nlist_, max_iterations_, tolerance_, num_threads_); + partitions_, max_iterations_, tolerance_, num_threads_); } else if ( feature_datatype_ == TILEDB_FLOAT32 && id_datatype_ == TILEDB_UINT32 && px_datatype_ == TILEDB_UINT64) { index_ = std::make_unique< index_impl>>( - nlist_, max_iterations_, tolerance_, num_threads_); + partitions_, max_iterations_, tolerance_, num_threads_); } else if ( feature_datatype_ == TILEDB_UINT8 && id_datatype_ == TILEDB_UINT64 && px_datatype_ == TILEDB_UINT32) { index_ = std::make_unique< index_impl>>( - nlist_, max_iterations_, tolerance_, num_threads_); + partitions_, max_iterations_, tolerance_, num_threads_); } else if ( feature_datatype_ == TILEDB_FLOAT32 && id_datatype_ == TILEDB_UINT64 && px_datatype_ == TILEDB_UINT32) { index_ = std::make_unique< index_impl>>( - nlist_, max_iterations_, tolerance_, num_threads_); + partitions_, max_iterations_, tolerance_, num_threads_); } else if ( feature_datatype_ == TILEDB_UINT8 && id_datatype_ == TILEDB_UINT64 && px_datatype_ == TILEDB_UINT64) { index_ = std::make_unique< index_impl>>( - nlist_, max_iterations_, tolerance_, num_threads_); + partitions_, max_iterations_, tolerance_, num_threads_); } else if ( feature_datatype_ == TILEDB_FLOAT32 && id_datatype_ == TILEDB_UINT64 && px_datatype_ == TILEDB_UINT64) { index_ = std::make_unique< index_impl>>( - nlist_, max_iterations_, tolerance_, num_threads_); + partitions_, max_iterations_, tolerance_, num_threads_); } index_->train(training_set, init); @@ -346,12 +346,12 @@ class IndexIVFFlat { } dimensions_ = index_->dimensions(); - if (nlist_ != 0 && nlist_ != index_->num_partitions()) { + if (partitions_ != 0 && partitions_ != index_->num_partitions()) { throw std::runtime_error( - "nlist mismatch: " + std::to_string(nlist_) + + "partitions mismatch: " + std::to_string(partitions_) + " != " + std::to_string(index_->num_partitions())); } - nlist_ = index_->num_partitions(); + partitions_ = index_->num_partitions(); } /** @@ -440,7 +440,7 @@ class IndexIVFFlat { } constexpr auto num_partitions() const { - return nlist_; + return partitions_; } constexpr tiledb_datatype_t feature_type() const { @@ -523,11 +523,11 @@ class IndexIVFFlat { } index_impl( - size_t nlist, + size_t partitions, size_t max_iter, float tolerance, std::optional num_threads) - : impl_index_(nlist, max_iter, tolerance) { + : impl_index_(partitions, max_iter, tolerance) { } index_impl( @@ -717,7 +717,7 @@ class IndexIVFFlat { }; uint64_t dimensions_ = 0; - size_t nlist_ = 0; + size_t partitions_ = 0; uint32_t max_iterations_ = 2; float tolerance_ = 1e-4; std::optional num_threads_ = std::nullopt; diff --git a/src/include/api/ivf_pq_index.h b/src/include/api/ivf_pq_index.h index 5b437a4a6..129a9bddd 100644 --- a/src/include/api/ivf_pq_index.h +++ b/src/include/api/ivf_pq_index.h @@ -99,8 +99,8 @@ class IndexIVFPQ { auto value = c.second; if (key == "dimensions") { dimensions_ = std::stol(value); - } else if (key == "n_list") { - n_list_ = std::stol(value); + } else if (key == "partitions") { + partitions_ = std::stol(value); } else if (key == "num_subspaces") { num_subspaces_ = std::stol(value); } else if (key == "max_iterations") { @@ -161,7 +161,7 @@ class IndexIVFPQ { } index_ = uri_dispatch_table.at(type)( ctx, group_uri, index_load_strategy, upper_bound, temporal_policy); - n_list_ = index_->nlist(); + partitions_ = index_->partitions(); num_subspaces_ = index_->num_subspaces(); max_iterations_ = index_->max_iterations(); convergence_tolerance_ = index_->convergence_tolerance(); @@ -180,13 +180,13 @@ class IndexIVFPQ { /** * @brief Train the index based on the given training set. * @param training_set The training input vectors. - * @param n_list The number of clusters to use in the index. Can be passed to - * override the value we used when we first created the index. + * @param partitions The number of clusters to use in the index. Can be passed + * to override the value we used when we first created the index. */ // @todo -- infer feature type from input void train( const FeatureVectorArray& training_set, - std::optional n_list = std::nullopt) { + std::optional partitions = std::nullopt) { if (feature_datatype_ == TILEDB_ANY) { feature_datatype_ = training_set.feature_type(); } else if (feature_datatype_ != training_set.feature_type()) { @@ -202,8 +202,8 @@ class IndexIVFPQ { throw std::runtime_error("Unsupported datatype combination"); } - if (n_list.has_value()) { - n_list_ = *n_list; + if (partitions.has_value()) { + partitions_ = *partitions; } // Create a new index. Note that we may have already loaded an existing @@ -211,7 +211,7 @@ class IndexIVFPQ { // num_subspaces_, etc.), but we should also use the timestamp from that // already loaded index. index_ = dispatch_table.at(type)( - n_list_, + partitions_, num_subspaces_, max_iterations_, convergence_tolerance_, @@ -309,8 +309,8 @@ class IndexIVFPQ { return upper_bound_; } - constexpr auto n_list() const { - return n_list_; + constexpr auto partitions() const { + return partitions_; } constexpr uint32_t num_subspaces() const { @@ -415,7 +415,7 @@ class IndexIVFPQ { [[nodiscard]] virtual uint64_t dimensions() const = 0; [[nodiscard]] virtual size_t upper_bound() const = 0; [[nodiscard]] virtual TemporalPolicy temporal_policy() const = 0; - [[nodiscard]] virtual uint64_t nlist() const = 0; + [[nodiscard]] virtual uint64_t partitions() const = 0; [[nodiscard]] virtual uint32_t num_subspaces() const = 0; [[nodiscard]] virtual uint32_t max_iterations() const = 0; [[nodiscard]] virtual float convergence_tolerance() const = 0; @@ -434,7 +434,7 @@ class IndexIVFPQ { } index_impl( - size_t n_list, + size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, @@ -442,7 +442,7 @@ class IndexIVFPQ { std::optional temporal_policy, DistanceMetric distance_metric) : impl_index_( - n_list, + partitions, num_subspaces, max_iterations, convergence_tolerance, @@ -572,7 +572,7 @@ class IndexIVFPQ { return impl_index_.temporal_policy(); } - uint64_t nlist() const override { + uint64_t partitions() const override { return impl_index_.partitions(); } @@ -619,7 +619,7 @@ class IndexIVFPQ { uint64_t dimensions_{0}; size_t upper_bound_{0}; - size_t n_list_{0}; + size_t partitions_{0}; uint32_t num_subspaces_{16}; uint32_t max_iterations_{2}; float convergence_tolerance_{0.000025f}; @@ -633,18 +633,18 @@ class IndexIVFPQ { // clang-format off const IndexIVFPQ::table_type IndexIVFPQ::dispatch_table = { - {{TILEDB_INT8, TILEDB_UINT32, TILEDB_UINT32}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_UINT8, TILEDB_UINT32, TILEDB_UINT32}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT32}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_INT8, TILEDB_UINT32, TILEDB_UINT64}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_UINT8, TILEDB_UINT32, TILEDB_UINT64}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT64}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_INT8, TILEDB_UINT64, TILEDB_UINT32}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_UINT8, TILEDB_UINT64, TILEDB_UINT32}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT32}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_INT8, TILEDB_UINT64, TILEDB_UINT64}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_UINT8, TILEDB_UINT64, TILEDB_UINT64}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, - {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT64}, [](size_t nlist, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(nlist, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_INT8, TILEDB_UINT32, TILEDB_UINT32}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_UINT8, TILEDB_UINT32, TILEDB_UINT32}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT32}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_INT8, TILEDB_UINT32, TILEDB_UINT64}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_UINT8, TILEDB_UINT32, TILEDB_UINT64}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_FLOAT32, TILEDB_UINT32, TILEDB_UINT64}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_INT8, TILEDB_UINT64, TILEDB_UINT32}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_UINT8, TILEDB_UINT64, TILEDB_UINT32}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT32}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_INT8, TILEDB_UINT64, TILEDB_UINT64}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_UINT8, TILEDB_UINT64, TILEDB_UINT64}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, + {{TILEDB_FLOAT32, TILEDB_UINT64, TILEDB_UINT64}, [](size_t partitions, uint32_t num_subspaces, uint32_t max_iterations, float convergence_tolerance, float reassign_ratio, std::optional temporal_policy, DistanceMetric distance_metric) { return std::make_unique>>(partitions, num_subspaces, max_iterations, convergence_tolerance, reassign_ratio, temporal_policy, distance_metric); }}, }; const IndexIVFPQ::uri_table_type IndexIVFPQ::uri_dispatch_table = { diff --git a/src/include/index/ivf_flat_index.h b/src/include/index/ivf_flat_index.h index 83ed265dd..45deba513 100644 --- a/src/include/index/ivf_flat_index.h +++ b/src/include/index/ivf_flat_index.h @@ -156,7 +156,7 @@ class ivf_flat_index { * * @param dimensions Dimensions of the vectors comprising the training set and * the data set. - * @param nlist Number of centroids / partitions to compute. + * @param partitions Number of centroids / partitions to compute. * @param max_iter Maximum number of iterations for kmans algorithm. * @param tol Convergence tolerance for kmeans algorithm. * @param timestamp Timestamp for the index. @@ -169,7 +169,7 @@ class ivf_flat_index { */ ivf_flat_index( // size_t dim, - size_t nlist = 0, + size_t partitions = 0, uint32_t max_iterations = 2, float tol = 0.000025, TemporalPolicy temporal_policy = TemporalPolicy{TimeTravel, 0}) @@ -183,7 +183,7 @@ class ivf_flat_index { std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count())}} - , num_partitions_(nlist) + , num_partitions_(partitions) , max_iterations_(max_iterations) , tol_(tol) { } diff --git a/src/include/index/ivf_pq_index.h b/src/include/index/ivf_pq_index.h index dd637c744..7bd160e54 100644 --- a/src/include/index/ivf_pq_index.h +++ b/src/include/index/ivf_pq_index.h @@ -283,7 +283,7 @@ class ivf_pq_index { * parameters to be used subsequently in training. To fully create an index * we will need to call `train()` and `add()`. * - * @param nlist Number of centroids / partitions to compute. + * @param partitions Number of centroids / partitions to compute. * @param num_subspaces Number of subspaces to use for pq compression. This is * the number of sections to divide the vector into. * @param max_iterations Maximum number of iterations for kmeans algorithm. @@ -304,7 +304,7 @@ class ivf_pq_index { * @todo -- May also want start/stop? Use a variant? TemporalPolicy? */ ivf_pq_index( - size_t nlist = 0, + size_t partitions = 0, uint32_t num_subspaces = 16, uint32_t max_iterations = 2, float convergence_tolerance = 0.000025f, @@ -316,7 +316,7 @@ class ivf_pq_index { : temporal_policy_{ temporal_policy.has_value() ? *temporal_policy : TemporalPolicy{TimeTravel, static_cast(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count())}} - , num_partitions_(nlist) + , num_partitions_(partitions) , num_subspaces_{num_subspaces} , max_iterations_(max_iterations) , convergence_tolerance_(convergence_tolerance) diff --git a/src/include/test/unit_api_ivf_pq_index.cc b/src/include/test/unit_api_ivf_pq_index.cc index 9936248d1..5d039c84c 100644 --- a/src/include/test/unit_api_ivf_pq_index.cc +++ b/src/include/test/unit_api_ivf_pq_index.cc @@ -382,7 +382,7 @@ TEST_CASE( "[api_ivf_pq_index]") { auto ctx = tiledb::Context{}; size_t k_nn = 10; - size_t n_list = 100; + size_t partitions = 100; auto feature_type = "float32"; auto id_type = "uint32"; auto partitioning_index_type = "uint32"; @@ -399,7 +399,7 @@ TEST_CASE( {{"feature_type", feature_type}, {"id_type", id_type}, {"partitioning_index_type", partitioning_index_type}, - {"n_list", std::to_string(n_list)}, + {"partitions", std::to_string(partitions)}, {"num_subspaces", std::to_string(siftsmall_dimensions / 4)}})); size_t num_vectors = 0; @@ -674,7 +674,7 @@ TEST_CASE("clear history with an open index", "[api_ivf_pq_index]") { auto id_type = "uint32"; auto partitioning_index_type = "uint32"; uint64_t dimensions = 3; - size_t n_list = 1; + size_t partitions = 1; uint32_t num_subspaces = 1; float convergence_tolerance = 0.00003f; uint32_t max_iterations = 3; @@ -690,7 +690,7 @@ TEST_CASE("clear history with an open index", "[api_ivf_pq_index]") { {{"feature_type", feature_type}, {"id_type", id_type}, {"partitioning_index_type", partitioning_index_type}, - {"n_list", std::to_string(n_list)}, + {"partitions", std::to_string(partitions)}, {"num_subspaces", std::to_string(num_subspaces)}, {"convergence_tolerance", std::to_string(convergence_tolerance)}, {"max_iterations", std::to_string(max_iterations)}})); @@ -724,7 +724,7 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { auto id_type = "uint32"; auto partitioning_index_type = "uint32"; uint64_t dimensions = 3; - size_t n_list = 1; + size_t partitions = 1; uint32_t num_subspaces = 1; uint32_t max_iterations = 3; float convergence_tolerance = 0.00003f; @@ -744,7 +744,7 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { {"feature_type", feature_type}, {"id_type", id_type}, {"partitioning_index_type", partitioning_index_type}, - {"n_list", std::to_string(n_list)}, + {"partitions", std::to_string(partitions)}, {"num_subspaces", std::to_string(num_subspaces)}, {"max_iterations", std::to_string(max_iterations)}, {"convergence_tolerance", std::to_string(convergence_tolerance)}, @@ -760,7 +760,7 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { CHECK(index.temporal_policy().timestamp_end() == 0); CHECK(index.dimensions() == dimensions); - CHECK(index.n_list() == n_list); + CHECK(index.partitions() == partitions); CHECK(index.num_subspaces() == num_subspaces); CHECK(index.max_iterations() == max_iterations); CHECK(index.convergence_tolerance() == convergence_tolerance); @@ -784,7 +784,7 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { CHECK(typed_index.group().get_all_base_sizes().size() == 1); CHECK(typed_index.group().get_all_ingestion_timestamps().size() == 1); - CHECK(typed_index.group().get_all_num_partitions()[0] == n_list); + CHECK(typed_index.group().get_all_num_partitions()[0] == partitions); CHECK(typed_index.group().get_all_base_sizes()[0] == 0); CHECK(typed_index.group().get_all_ingestion_timestamps()[0] == 0); } @@ -800,7 +800,7 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") { index.temporal_policy().timestamp_end() == std::numeric_limits::max()); CHECK(index.dimensions() == dimensions); - CHECK(index.n_list() == n_list); + CHECK(index.partitions() == partitions); CHECK(index.num_subspaces() == num_subspaces); CHECK(index.max_iterations() == max_iterations); CHECK(index.convergence_tolerance() == convergence_tolerance); diff --git a/src/include/test/unit_ivf_flat_index.cc b/src/include/test/unit_ivf_flat_index.cc index 7486127c6..7e5cc7c7a 100644 --- a/src/include/test/unit_ivf_flat_index.cc +++ b/src/include/test/unit_ivf_flat_index.cc @@ -239,7 +239,7 @@ TEST_CASE("debug w/ sk", "[ivf_index]") { TEST_CASE("ivf_index write and read", "[ivf_index]") { size_t dimension = 128; - size_t nlist = 100; + size_t partitions = 100; size_t k_nn = 10; size_t nthreads = 1; @@ -253,8 +253,8 @@ TEST_CASE("ivf_index write and read", "[ivf_index]") { auto training_set = tdbColMajorMatrix(ctx, siftsmall_inputs_uri, 0); load(training_set); - auto idx = - ivf_flat_index(/*dimension,*/ nlist, nthreads); + auto idx = ivf_flat_index( + /*dimension,*/ partitions, nthreads); idx.train(training_set, kmeans_init::kmeanspp); idx.add(training_set); @@ -295,17 +295,17 @@ TEMPLATE_TEST_CASE( } // Test with just a single partition -- should match flat index - SECTION("nlist = 1") { + SECTION("partitions = 1") { size_t k_nn = 6; - size_t nlist = 1; + size_t partitions = 1; auto ivf_idx2 = ivf_flat_index( - /*128,*/ nlist, 4, 1.e-4); // dim nlist maxiter eps nthreads + /*128,*/ partitions, 4, 1.e-4); // dim partitions maxiter eps nthreads ivf_idx2.train(hypercube2); ivf_idx2.add(hypercube2); auto ivf_idx4 = ivf_flat_index( - /*128,*/ nlist, 4, 1.e-4); + /*128,*/ partitions, 4, 1.e-4); ivf_idx4.train(hypercube4); ivf_idx4.add(hypercube4); @@ -372,11 +372,11 @@ TEMPLATE_TEST_CASE( // @todo Use a fixed seed for initializing kmeans TEST_CASE("Build index and query in place, infinite", "[ivf_index]") { tiledb::Context ctx; - size_t nlist = GENERATE(1, 100); + size_t partitions = GENERATE(1, 100); using s = siftsmall_test_init_defaults; using index = ivf_flat_index; - auto init = siftsmall_test_init(ctx, nlist); + auto init = siftsmall_test_init(ctx, partitions); auto&& [nprobe, k_nn, nthreads, max_iterations, tolerance] = std::tie( init.nprobe, @@ -419,11 +419,11 @@ TEST_CASE("Build index and query in place, infinite", "[ivf_index]") { TEST_CASE("Build index, write, read and query, infinite", "[ivf_index]") { tiledb::Context ctx; - size_t nlist = GENERATE(/*1,*/ 100); + size_t partitions = GENERATE(/*1,*/ 100); using s = siftsmall_test_init_defaults; using index = ivf_flat_index; - auto init = siftsmall_test_init(ctx, nlist); + auto init = siftsmall_test_init(ctx, partitions); auto&& [nprobe, k_nn, nthreads, max_iterations, tolerance] = std::tie( init.nprobe, @@ -467,11 +467,11 @@ TEST_CASE("Build index, write, read and query, infinite", "[ivf_index]") { TEST_CASE("Build index, write, read and query, finite", "[ivf_index]") { tiledb::Context ctx; - size_t nlist = GENERATE(/*1,*/ 100); + size_t partitions = GENERATE(/*1,*/ 100); using s = siftsmall_test_init_defaults; using index = ivf_flat_index; - auto init = siftsmall_test_init(ctx, nlist); + auto init = siftsmall_test_init(ctx, partitions); auto&& [nprobe, k_nn, nthreads, max_iterations, tolerance] = std::tie( init.nprobe, @@ -510,12 +510,12 @@ TEST_CASE("Build index, write, read and query, finite", "[ivf_index]") { TEST_CASE( "Build index, write, read and query, finite, out of core", "[ivf_index]") { tiledb::Context ctx; - size_t nlist = 100; + size_t partitions = 100; size_t upper_bound = GENERATE(1000, 5000); using s = siftsmall_test_init_defaults; using index = ivf_flat_index; - auto init = siftsmall_test_init(ctx, nlist); + auto init = siftsmall_test_init(ctx, partitions); auto&& [nprobe, k_nn, nthreads, max_iterations, tolerance] = std::tie( init.nprobe, @@ -563,7 +563,7 @@ TEST_CASE("Read from externally written index", "[ivf_index]") { auto k_nn = 10; auto nprobe = 20; - auto nlist = 100; + auto partitions = 100; tiledb::Context ctx; auto query_set = tdbColMajorMatrix(ctx, siftsmall_query_uri); @@ -578,7 +578,7 @@ TEST_CASE("Read from externally written index", "[ivf_index]") { auto init = siftsmall_test_init>( - ctx, nlist); + ctx, partitions); std::string tmp_ivf_index_uri = (std::filesystem::temp_directory_path() / "tmp_ivf_index").string(); tiledb::VFS vfs(ctx); @@ -627,7 +627,7 @@ TEST_CASE("Read from externally written index", "[ivf_index]") { size_t intersections1 = count_intersections(top_k_ivf, groundtruth_set, k_nn); double recall1 = intersections1 / static_cast(top_k_ivf.num_cols() * k_nn); - if (nlist == 1) { + if (partitions == 1) { CHECK(intersections1 == num_vectors(top_k_ivf) * dimensions(top_k_ivf)); CHECK(recall1 == 1.0); } diff --git a/src/include/test/utils/query_common.h b/src/include/test/utils/query_common.h index 38cd23794..5dab45b4d 100644 --- a/src/include/test/utils/query_common.h +++ b/src/include/test/utils/query_common.h @@ -153,7 +153,7 @@ struct siftsmall_test_init : public siftsmall_test_init_defaults { using px_type = Base::px_type; tiledb::Context ctx_; - size_t nlist; + size_t partitions; size_t nprobe; siftsmall_test_init( @@ -162,8 +162,8 @@ struct siftsmall_test_init : public siftsmall_test_init_defaults { uint32_t num_subspaces = 0, size_t num_vectors = 0) : ctx_{ctx} - , nlist(nl) - , nprobe(std::min(10, nlist)) + , partitions(nl) + , nprobe(std::min(10, partitions)) , training_set(tdbColMajorMatrix( ctx_, siftsmall_inputs_uri, num_vectors)) , query_set(tdbColMajorMatrix(ctx_, siftsmall_query_uri)) @@ -172,12 +172,12 @@ struct siftsmall_test_init : public siftsmall_test_init_defaults { if constexpr (std::is_same_v< IndexType, ivf_flat_index>) { - idx = IndexType(nlist, max_iterations, convergence_tolerance); + idx = IndexType(partitions, max_iterations, convergence_tolerance); } else if constexpr (std::is_same_v< IndexType, ivf_pq_index>) { idx = IndexType( - nlist, num_subspaces, max_iterations, convergence_tolerance); + partitions, num_subspaces, max_iterations, convergence_tolerance); } else { std::cout << "Unsupported index type" << std::endl; } @@ -236,7 +236,7 @@ struct siftsmall_test_init : public siftsmall_test_init_defaults { size_t intersectionsm1 = count_intersections(top_k, groundtruth_set, k_nn); double recallm1 = intersectionsm1 / ((double)top_k.num_cols() * k_nn); - if (nlist == 1) { + if (partitions == 1) { CHECK( intersectionsm1 == (size_t)(num_vectors(top_k) * dimensions(top_k))); CHECK(recallm1 == 1.0); @@ -246,7 +246,7 @@ struct siftsmall_test_init : public siftsmall_test_init_defaults { // @todo There is randomness in initialization of kmeans, use a fixed seed size_t intersections0 = count_intersections(top_k_ivf, top_k, k_nn); double recall0 = intersections0 / ((double)top_k.num_cols() * k_nn); - if (nlist == 1) { + if (partitions == 1) { CHECK(intersections0 == (size_t)(num_vectors(top_k) * dimensions(top_k))); CHECK(recall0 == 1.0); } @@ -254,7 +254,7 @@ struct siftsmall_test_init : public siftsmall_test_init_defaults { size_t intersections1 = (long)count_intersections(top_k_ivf, groundtruth_set, k_nn); double recall1 = intersections1 / ((double)top_k_ivf.num_cols() * k_nn); - if (nlist == 1) { + if (partitions == 1) { CHECK(intersections1 == (size_t)(num_vectors(top_k) * dimensions(top_k))); CHECK(recall1 == 1.0); }