Revised inner product (#10)

* working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * working towards inner product in memory indices * done with in-memory code * made the inner product distance function return std::float_max if negative * more changes for disk index support * on the way to disk index support for MIPS * works now, need to change the PQ generation for MIPS * now incorporated disk+memory search for inner product * support for mips and l2 * changed inner product to -IP rather than 1/IP * towards adding support for storing PQ vectors in disk index for very large data * towards adding support for storing PQ vectors in disk index for very large data * halfway through PQ-based disk search option * code compiles for disk index pq * fixed some bug * shards are written as and when necessary * sharding is now on demand * minor changes * fixed one malloc bug in parameters * added a vector analyzer util * added missing file * fixed a bug which used L2 instead of inner product in cached beam search * now setting up the normalizing approach * towards pre-processing data * working towards newer inner product * more changes to do MIPS by reducing to L2 with extra coordinate * cleaned up code a bit, need to test everything again * testing underway * added back saturate graph to create denser indices * now we dont sample a new test dataset every iteration for estimating sharding * now num_parts increases by 2 * cleaned up warnings in Debug mode compiler * added a normalizer to vector analysis * fixed one bug for MIPS * addressed all comments of PR * fixed minor typos. now running unit tests * ran clang-format as it doesnt run by default due to LINUX flag not set anywhere * clang introduced a bug in distance.h, fixed itt * added unit tester partially * minor bugfix * finished unit tester * changed back training size to 100K for now, we can increase to 1M later if necessary * added comments for unit_tester.sh * added auto tuning parameters for unit tester * re-ran clang formatting * small change to unit tester * fixed minor bug in unit tester * fixed some formatting on unit tester * started code for range search support in pq_flash_index * added more code for range search in disk index * added range search support * tested range search on small dataset * Update memory_mapper.h * minor edits Co-authored-by: ravishankar <[email protected]>
microsoft · Aug 11, 2021 · bc39cd0 · bc39cd0
1 parent 4b01372
commit bc39cd0
Show file tree

Hide file tree

Showing 39 changed files with 1,905 additions and 410 deletions.
diff --git a/include/aligned_file_reader.h b/include/aligned_file_reader.h
@@ -18,7 +18,7 @@ typedef io_context_t IOContext;
 #include <minwinbase.h>
 
 #ifndef USE_BING_INFRA
-struct IOContext{
+struct IOContext {
   HANDLE                  fhandle = NULL;
   HANDLE                  iocp = NULL;
   std::vector<OVERLAPPED> reqs;
@@ -77,7 +77,7 @@ struct AlignedRead {
 class AlignedFileReader {
  protected:
   tsl::robin_map<std::thread::id, IOContext> ctx_map;
-  std::mutex ctx_mut;
+  std::mutex                                 ctx_mut;
 
  public:
   // returns the thread-specific context

diff --git a/include/aux_utils.h b/include/aux_utils.h
@@ -29,13 +29,15 @@ typedef int FileHandle;
 #include "common_includes.h"
 #include "utils.h"
 #include "windows_customizations.h"
+#include "gperftools/malloc_extension.h"
 
 namespace diskann {
-  const size_t   TRAINING_SET_SIZE = 1500000;
+  const size_t   TRAINING_SET_SIZE = 100000;
   const double   SPACE_FOR_CACHED_NODES_IN_GB = 0.25;
   const double   THRESHOLD_FOR_CACHING_IN_GB = 1.0;
   const uint32_t NUM_NODES_TO_CACHE = 250000;
   const uint32_t WARMUP_L = 20;
+  const uint32_t NUM_KMEANS_REPS = 12;
 
   template<typename T>
   class PQFlashIndex;
@@ -44,6 +46,9 @@ namespace diskann {
       unsigned num_queries, unsigned *gold_std, float *gs_dist, unsigned dim_gs,
       unsigned *our_results, unsigned dim_or, unsigned recall_at);
 
+DISKANN_DLLEXPORT double calculate_range_search_recall(unsigned num_queries, std::vector<std::vector<_u32>> &groundtruth,
+                          std::vector<std::vector<_u32>> &our_results);
+
   DISKANN_DLLEXPORT void read_idmap(const std::string &    fname,
                                     std::vector<unsigned> &ivecs);
 

diff --git a/include/distance.h b/include/distance.h
@@ -255,11 +255,16 @@ namespace diskann {
     virtual float compare(const int8_t *a, const int8_t *b,
                           unsigned int length) const {
 #ifndef _WINDOWS
-      std::cout << "AVX only supported in Windows build.";
-      return 0;
+      int32_t result = 0;
+#pragma omp simd reduction(+ : result) aligned(a, b : 8)
+      for (_s32 i = 0; i < (_s32) length; i++) {
+        result += ((int32_t)((int16_t) a[i] - (int16_t) b[i])) *
+                  ((int32_t)((int16_t) a[i] - (int16_t) b[i]));
+      }
+      return (float) result;
     }
 #else
-      __m128  r = _mm_setzero_ps();
+      __m128 r = _mm_setzero_ps();
       __m128i r1;
       while (length >= 16) {
         r1 = _mm_subs_epi8(_mm_load_si128((__m128i *) a),
@@ -273,7 +278,7 @@ namespace diskann {
       float res = r.m128_f32[0];
 
       if (length >= 8) {
-        __m128  r2 = _mm_setzero_ps();
+        __m128 r2 = _mm_setzero_ps();
         __m128i r3 = _mm_subs_epi8(_mm_load_si128((__m128i *) (a - 8)),
                                    _mm_load_si128((__m128i *) (b - 8)));
         r2 = _mm_add_ps(r2, _mm_mulhi_epi8(r3));
@@ -285,7 +290,7 @@ namespace diskann {
       }
 
       if (length >= 4) {
-        __m128  r2 = _mm_setzero_ps();
+        __m128 r2 = _mm_setzero_ps();
         __m128i r3 = _mm_subs_epi8(_mm_load_si128((__m128i *) (a - 12)),
                                    _mm_load_si128((__m128i *) (b - 12)));
         r2 = _mm_add_ps(r2, _mm_mulhi_epi8_shift32(r3));
@@ -302,8 +307,12 @@ namespace diskann {
     virtual float compare(const float *a, const float *b,
                           unsigned int length) const {
 #ifndef _WINDOWS
-      std::cout << "AVX only supported in Windows build.";
-      return 0;
+      float result = 0;
+#pragma omp simd reduction(+ : result) aligned(a, b : 8)
+      for (_s32 i = 0; i < (_s32) length; i++) {
+        result += (a[i] - b[i]) * (a[i] - b[i]);
+      }
+      return result;
     }
 #else
       __m128 diff, v1, v2;
@@ -328,7 +337,7 @@ namespace diskann {
   template<typename T>
   class DistanceInnerProduct : public Distance<T> {
    public:
-    float compare(const T *a, const T *b, unsigned size) const {
+    float inner_product(const T *a, const T *b, unsigned size) const {
       float result = 0;
 #ifdef __GNUC__
 #ifdef __AVX__
@@ -426,10 +435,21 @@ namespace diskann {
 #endif
       return result;
     }
+    float compare(const T *a, const T *b, unsigned size)
+        const {  // since we use normally minimization objective for distance
+                 // comparisons, we are returning 1/x.
+      float result = inner_product(a, b, size);
+      //      if (result < 0)
+      //      return std::numeric_limits<float>::max();
+      //      else
+      return -result;
+    }
   };
 
   template<typename T>
-  class DistanceFastL2 : public DistanceInnerProduct<T> {
+  class DistanceFastL2
+      : public DistanceInnerProduct<T> {  // currently defined only for float.
+                                          // templated for future use.
    public:
     float norm(const T *a, unsigned size) const {
       float result = 0;
@@ -522,7 +542,7 @@ namespace diskann {
     using DistanceInnerProduct<T>::compare;
     float compare(const T *a, const T *b, float norm,
                   unsigned size) const {  // not implement
-      float result = -2 * DistanceInnerProduct<T>::compare(a, b, size);
+      float result = -2 * DistanceInnerProduct<T>::inner_product(a, b, size);
       result += norm;
       return result;
     }

diff --git a/include/exceptions.h b/include/exceptions.h
@@ -12,4 +12,4 @@ namespace diskann {
         : std::logic_error("Function not yet implemented.") {
     }
   };
-}
+}  // namespace diskann
diff --git a/include/index.h b/include/index.h
@@ -52,7 +52,7 @@ namespace diskann {
 
     // Gopal. Added search overload that takes L as parameter, so that we
     // can customize L on a per-query basis without tampering with "Parameters"
-    DISKANN_DLLEXPORT std::pair<uint32_t, uint32_t> search(const T *query,
+    DISKANN_DLLEXPORT std::pair<uint32_t, uint32_t> search(const T *      query,
                                                            const size_t   K,
                                                            const unsigned L,
                                                            unsigned *indices);
@@ -63,7 +63,7 @@ namespace diskann {
 
     DISKANN_DLLEXPORT std::pair<uint32_t, uint32_t> search_with_tags(
         const T *query, const size_t K, const unsigned L, TagT *tags,
-        unsigned frozen_pts, unsigned *indices_buffer = NULL);
+        unsigned *indices_buffer = NULL);
 
     // repositions frozen points to the end of _data - if they have been moved
     // during deletion
@@ -167,6 +167,7 @@ namespace diskann {
     size_t consolidate_deletes(const Parameters &parameters);
 
    private:
+    Metric       _metric = diskann::L2;
     size_t       _dim;
     size_t       _aligned_dim;
     T *          _data;

diff --git a/include/memory_mapper.h b/include/memory_mapper.h
@@ -38,4 +38,4 @@ namespace diskann {
 
     ~MemoryMapper();
   };
-}
+}  // namespace diskann
diff --git a/include/parameters.h b/include/parameters.h
@@ -19,6 +19,9 @@ namespace diskann {
     template<typename ParamType>
     inline void Set(const std::string &name, const ParamType &value) {
       //      ParamType *ptr = (ParamType *) malloc(sizeof(ParamType));
+      if (params.find(name) != params.end()) {
+        free(params[name]);
+      }
       ParamType *ptr = new ParamType;
       *ptr = value;
       params[name] = (void *) ptr;

diff --git a/include/partition_and_pq.h b/include/partition_and_pq.h
@@ -27,17 +27,27 @@ template<typename T>
 void gen_random_slice(const T *inputdata, size_t npts, size_t ndims,
                       double p_val, float *&sampled_data, size_t &slice_size);
 
-template<typename T>
-int estimate_cluster_sizes(const std::string data_file, float *pivots,
-                           const size_t num_centers, const size_t dim,
-                           const size_t         k_base,
+int estimate_cluster_sizes(float *test_data_float, size_t num_test,
+                           float *pivots, const size_t num_centers,
+                           const size_t dim, const size_t k_base,
                            std::vector<size_t> &cluster_sizes);
 
 template<typename T>
 int shard_data_into_clusters(const std::string data_file, float *pivots,
                              const size_t num_centers, const size_t dim,
                              const size_t k_base, std::string prefix_path);
 
+template<typename T>
+int shard_data_into_clusters_only_ids(const std::string data_file,
+                                      float *pivots, const size_t num_centers,
+                                      const size_t dim, const size_t k_base,
+                                      std::string prefix_path);
+
+template<typename T>
+int retrieve_shard_data_from_ids(const std::string data_file,
+                                 std::string       idmap_filename,
+                                 std::string       data_filename);
+
 template<typename T>
 int partition(const std::string data_file, const float sampling_rate,
               size_t num_centers, size_t max_k_means_reps,
@@ -49,12 +59,10 @@ int partition_with_ram_budget(const std::string data_file,
                               size_t            graph_degree,
                               const std::string prefix_path, size_t k_base);
 
-DISKANN_DLLEXPORT int generate_pq_pivots(const float *train_data,
-                                         size_t num_train, unsigned dim,
-                                         unsigned    num_centers,
-                                         unsigned    num_pq_chunks,
-                                         unsigned    max_k_means_reps,
-                                         std::string pq_pivots_path);
+DISKANN_DLLEXPORT int generate_pq_pivots(
+    const float *train_data, size_t num_train, unsigned dim,
+    unsigned num_centers, unsigned num_pq_chunks, unsigned max_k_means_reps,
+    std::string pq_pivots_path, bool make_zero_mean = false);
 
 template<typename T>
 int generate_pq_data_from_pivots(const std::string data_file,

diff --git a/include/percentile_stats.h b/include/percentile_stats.h
@@ -58,4 +58,4 @@ namespace diskann {
     }
     return avg / len;
   }
-}
+}  // namespace diskann
diff --git a/include/pq_flash_index.h b/include/pq_flash_index.h
@@ -70,7 +70,8 @@ namespace diskann {
     // Freeing the reader object is now the client's (DiskANNInterface's)
     // responsibility.
     DISKANN_DLLEXPORT PQFlashIndex(
-        std::shared_ptr<AlignedFileReader> &fileReader);
+        std::shared_ptr<AlignedFileReader> &fileReader,
+        diskann::Metric                     metric = diskann::Metric::L2);
     DISKANN_DLLEXPORT ~PQFlashIndex();
 
 #ifdef EXEC_ENV_OLS
@@ -79,8 +80,8 @@ namespace diskann {
                                const char *disk_index_file);
 #else
     // load compressed data, and obtains the handle to the disk-resident index
-    DISKANN_DLLEXPORT int load(uint32_t num_threads, const char *pq_prefix,
-                               const char *disk_index_file);
+    DISKANN_DLLEXPORT int  load(uint32_t num_threads, const char *pq_prefix,
+                                const char *disk_index_file);
 #endif
 
     DISKANN_DLLEXPORT void load_cache_list(std::vector<uint32_t> &node_list);
@@ -112,10 +113,15 @@ namespace diskann {
     // implemented
     DISKANN_DLLEXPORT void cached_beam_search(
         const T *query, const _u64 k_search, const _u64 l_search, _u64 *res_ids,
-        float *res_dists, const _u64 beam_width, QueryStats *stats = nullptr,
-        Distance<T> *output_dist_func = nullptr);
-    std::shared_ptr<AlignedFileReader> &reader;
+        float *res_dists, const _u64 beam_width, QueryStats *stats = nullptr);
+
 
+  DISKANN_DLLEXPORT _u32 range_search(const T *query1, const double range,
+                                           const _u64 l_search, _u64* indices, float* distances,
+                                           const _u64  beam_width,
+                                           QueryStats *stats = nullptr);
+
+    std::shared_ptr<AlignedFileReader> &reader;
    protected:
     DISKANN_DLLEXPORT void use_medoids_data_as_centroids();
     DISKANN_DLLEXPORT void setup_thread_data(_u64 nthreads);
@@ -129,28 +135,39 @@ namespace diskann {
     // nbrs of node `i`: ((unsigned*)buf) + 1
     _u64 max_node_len = 0, nnodes_per_sector = 0, max_degree = 0;
 
+    diskann::Metric metric = diskann::Metric::L2;
+    float           max_base_norm =
+        0;  // used only for inner product search to re-scale the result value
+            // (due to the pre-processing of base during index build)
     // data info
     _u64 num_points = 0;
     _u64 data_dim = 0;
+    _u64 disk_data_dim = 0;  // will be different from data_dim only if we use
+                             // PQ for disk data (very large dimensionality)
     _u64 aligned_dim = 0;
+    _u64 disk_bytes_per_point = 0;
 
-    std::string disk_index_file;
+    std::string                        disk_index_file;
     std::vector<std::pair<_u32, _u32>> node_visit_counter;
 
     // PQ data
     // n_chunks = # of chunks ndims is split into
     // data: _u8 * n_chunks
     // chunk_size = chunk size of each dimension chunk
     // pq_tables = float* [[2^8 * [chunk_size]] * n_chunks]
-    _u8 *                data = nullptr;
-    _u64                 chunk_size;
-    _u64                 n_chunks;
-    FixedChunkPQTable<T> pq_table;
+    _u8 *             data = nullptr;
+    _u64              n_chunks;
+    FixedChunkPQTable pq_table;
 
     // distance comparator
     Distance<T> *    dist_cmp = nullptr;
     Distance<float> *dist_cmp_float = nullptr;
 
+    // for very large datasets: we use PQ even for the disk resident index
+    bool              use_disk_index_pq = false;
+    _u64              disk_pq_n_chunks;
+    FixedChunkPQTable disk_pq_table;
+
     // medoid/start info
     uint32_t *medoids =
         nullptr;         // by default it is just one entry point of graph, we
@@ -162,11 +179,11 @@ namespace diskann {
                   // closest centroid as the starting point of search
 
     // nhood_cache
-    unsigned *nhood_cache_buf = nullptr;
+    unsigned *                                    nhood_cache_buf = nullptr;
     tsl::robin_map<_u32, std::pair<_u32, _u32 *>> nhood_cache;
 
     // coord_cache
-    T *coord_cache_buf = nullptr;
+    T *                       coord_cache_buf = nullptr;
     tsl::robin_map<_u32, T *> coord_cache;
 
     // thread-specific scratch
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,4 +12,4 @@ namespace diskann { @@
             : std::logic_error("Function not yet implemented.") {
         }
       };
-    }
+    }  // namespace diskann