diff --git a/examples/cpp/todo_examples/401_persistent.cpp b/examples/cpp/todo_examples/401_persistent.cpp index 7633d478..dd8dcb90 100644 --- a/examples/cpp/todo_examples/401_persistent.cpp +++ b/examples/cpp/todo_examples/401_persistent.cpp @@ -110,7 +110,23 @@ class LocalKvStore { void example_serialize_with_kv_store() { - // create index + /******************* Prepare Base Dataset *****************/ + uint32_t num_vectors = 1000; + uint32_t dim = 128; + auto ids = new int64_t[num_vectors]; + auto vectors = new float[dim * num_vectors]; + + std::mt19937 rng; + rng.seed(47); + std::uniform_real_distribution<> distrib_real; + for (uint32_t i = 0; i < num_vectors; ++i) { + ids[i] = i; + } + for (uint64_t i = 0; i < dim * num_vectors; ++i) { + vectors[i] = distrib_real(rng); + } + + /******************* Create an Index *****************/ vsag::Engine engine; auto index_paramesters = R"( { @@ -132,8 +148,83 @@ example_serialize_with_kv_store() { index = *create_index; } - // generate some vectors - uint32_t num_vectors = 10000; + auto base = vsag::Dataset::Make(); + base->NumElements(num_vectors)->Dim(dim)->Ids(ids)->Float32Vectors(vectors)->Owner(false); + if (auto build_index = index->Build(base); not build_index.has_value()) { + std::cerr << "build index failed: " << build_index.error().message << std::endl; + abort(); + } + std::cout << "index contains vectors: " << index->GetNumElements() << std::endl; + + /******************* Save Index to KVStore *****************/ + auto serialize_result = index->Serialize(); + if (not serialize_result.has_value()) { + std::cerr << std::endl; + abort(); + } + + LocalKvStore store("/tmp/vsag-persistent-example"); + for (const auto& key : serialize_result.value().GetKeys()) { + auto binary = serialize_result.value().Get(key); + std::string value((const char*)binary.data.get(), binary.size); + store.Put(key, value); + } + + /******************* Load Index from KVStore *****************/ + index = nullptr; + if (auto create_index = engine.CreateIndex("hnsw", index_paramesters); + not create_index.has_value()) { + std::cout << "create index failed: " << create_index.error().message << std::endl; + abort(); + } else { + index = *create_index; + } + auto keys = store.GetKeys(); + vsag::BinarySet bs; + for (const auto& key : keys) { + auto value = store.Get(key); + vsag::Binary binary; + binary.data = std::shared_ptr(new int8_t[value.size()]); + memcpy(binary.data.get(), value.data(), value.size()); + binary.size = value.size(); + bs.Set(key, binary); + } + if (auto deserialize = index->Deserialize(bs); not deserialize.has_value()) { + std::cerr << "load index failed: " << deserialize.error().message << std::endl; + abort(); + } + + /******************* Search and Print Results *****************/ + auto topk = 10; + auto query_vector = new float[dim]; + for (uint64_t i = 0; i < dim; ++i) { + query_vector[i] = distrib_real(rng); + } + auto query = vsag::Dataset::Make(); + query->NumElements(1)->Dim(dim)->Float32Vectors(query_vector)->Owner(false); + auto search_parameters = R"( + { + "hnsw": { + "ef_search": 100 + } + } + )"; + if (auto knn_search = index->KnnSearch(query, topk, search_parameters); + not knn_search.has_value()) { + std::cerr << "search knn failed: " << knn_search.error().message << std::endl; + abort(); + } else { + auto result = *knn_search; + for (int64_t i = 0; i < result->GetDim(); ++i) { + std::cout << result->GetIds()[i] << " " << result->GetDistances()[i] << std::endl; + } + } +} + +void +example_serialize_with_stream() { + /******************* Prepare Base Dataset *****************/ + uint32_t num_vectors = 1000; uint32_t dim = 128; auto ids = new int64_t[num_vectors]; auto vectors = new float[dim * num_vectors]; @@ -148,7 +239,28 @@ example_serialize_with_kv_store() { vectors[i] = distrib_real(rng); } - // insert vectors into the index + /******************* Create an Index *****************/ + vsag::Engine engine; + auto index_paramesters = R"( + { + "dtype": "float32", + "metric_type": "l2", + "dim": 128, + "hnsw": { + "max_degree": 16, + "ef_construction": 100 + } + } + )"; + vsag::IndexPtr index = nullptr; + if (auto create_index = engine.CreateIndex("hnsw", index_paramesters); + not create_index.has_value()) { + std::cout << "create index failed: " << create_index.error().message << std::endl; + abort(); + } else { + index = *create_index; + } + auto base = vsag::Dataset::Make(); base->NumElements(num_vectors)->Dim(dim)->Ids(ids)->Float32Vectors(vectors)->Owner(false); if (auto build_index = index->Build(base); not build_index.has_value()) { @@ -157,7 +269,7 @@ example_serialize_with_kv_store() { } std::cout << "index contains vectors: " << index->GetNumElements() << std::endl; - // save index to kv store + /******************* Save Index to KVStore *****************/ auto serialize_result = index->Serialize(); if (not serialize_result.has_value()) { std::cerr << std::endl; @@ -171,7 +283,7 @@ example_serialize_with_kv_store() { store.Put(key, value); } - // load index from kv store + /******************* Load Index from KVStore *****************/ index = nullptr; if (auto create_index = engine.CreateIndex("hnsw", index_paramesters); not create_index.has_value()) { @@ -195,7 +307,7 @@ example_serialize_with_kv_store() { abort(); } - // search top 10 + /******************* Search and Print Results *****************/ auto topk = 10; auto query_vector = new float[dim]; for (uint64_t i = 0; i < dim; ++i) { @@ -226,5 +338,7 @@ int main(int32_t argc, char** argv) { example_serialize_with_kv_store(); + example_serialize_with_stream(); + return 0; }