Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ljcui committed Oct 25, 2024
1 parent cca4e13 commit 148ba9c
Show file tree
Hide file tree
Showing 10 changed files with 96 additions and 55 deletions.
3 changes: 2 additions & 1 deletion src/core/index_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,8 @@ class IndexManager {
is.type = ent.type;
indexes.emplace_back(std::move(is));
} else if (index_name.size() > ve_index_len &&
index_name.substr(index_name.size() - ve_index_len) == _detail::VERTEX_VECTOR_INDEX) {
index_name.substr(index_name.size() - ve_index_len)
== _detail::VERTEX_VECTOR_INDEX) {
_detail::VectorIndexEntry ent = LoadVectorIndex(it->GetValue());
VectorIndexSpec vis;
vis.label = ent.label;
Expand Down
6 changes: 4 additions & 2 deletions src/core/lightning_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ void LightningGraph::DropAllVertex() {
Transaction txn = CreateWriteTxn(false);
ScopedRef<SchemaInfo> curr_schema = schema_.GetScopedRef();
// clear indexes
auto [indexes, composite_indexes, vector_indexes] = index_manager_->ListAllIndexes(txn.GetTxn());
auto [indexes, composite_indexes, vector_indexes]
= index_manager_->ListAllIndexes(txn.GetTxn());
for (auto& idx : indexes) {
auto v_schema = curr_schema->v_schema_manager.GetSchema(idx.label);
auto e_schema = curr_schema->e_schema_manager.GetSchema(idx.label);
Expand Down Expand Up @@ -2816,7 +2817,8 @@ void LightningGraph::DropAllIndex() {
ScopedRef<SchemaInfo> curr_schema = schema_.GetScopedRef();
std::unique_ptr<SchemaInfo> new_schema(new SchemaInfo(*curr_schema.Get()));
std::unique_ptr<SchemaInfo> backup_schema(new SchemaInfo(*curr_schema.Get()));
auto [indexes, composite_indexes, vector_indexes] = index_manager_->ListAllIndexes(txn.GetTxn());
auto [indexes, composite_indexes, vector_indexes]
= index_manager_->ListAllIndexes(txn.GetTxn());

bool success = true;
for (auto& idx : indexes) {
Expand Down
1 change: 1 addition & 0 deletions src/core/vector_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class VectorIndex {

virtual int64_t GetElementsNum() = 0;
virtual int64_t GetMemoryUsage() = 0;
virtual int64_t GetDeletedIdsNum() = 0;
};

struct VectorIndexEntry {
Expand Down
46 changes: 32 additions & 14 deletions src/core/vsag_hnsw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,13 @@ void HNSW::Add(const std::vector<std::vector<float>>& vectors,
std::copy(vectors[i].begin(), vectors[i].end(), &index_vectors[i * vec_dimension_]);
}
for (size_t i = 0; i < num_vectors; i++) {
ids[i] = vids[i];
vectorid_++;
ids[i] = vectorid_;
if (vid_vectorid_.count(vids[i])) {
THROW_CODE(VectorIndexException, "[HNSW Add] vid {} already exists", vids[i]);
}
vid_vectorid_[vids[i]] = vectorid_;
vectorid_vid_[vectorid_] = {false, vids[i]};
}
auto dataset = vsag::Dataset::Make();
dataset->Dim(vec_dimension_)->NumElements(num_vectors)
Expand All @@ -67,21 +73,23 @@ void HNSW::Add(const std::vector<std::vector<float>>& vectors,
}

void HNSW::Clear() {
vectorid_ = 0;
index_ = nullptr;
deleted_vectorid_ = 0;
std::unordered_map<int64_t, int64_t>().swap(vid_vectorid_);
std::unordered_map<int64_t, std::pair<bool, int64_t>>().swap(vectorid_vid_);
Build();
}

void HNSW::Remove(const std::vector<int64_t>& vids) {
for (auto vid : vids) {
auto result = index_->Remove(vid);
if (result.has_value()) {
if (!result.value()) {
THROW_CODE(InputError, "failed to remove vector from index, vid:{}", vid);
}
} else {
THROW_CODE(InputError, "failed to remove vector from index, vid:{}, error:{}",
vid, result.error().message);
auto iter = vid_vectorid_.find(vid);
if (iter == vid_vectorid_.end()) {
THROW_CODE(VectorIndexException, "[HNSW Remove] vid {} does not exist", vid);
}
vectorid_vid_.at(iter->second) = {true, -1};
deleted_vectorid_++;
vid_vectorid_.erase(iter);
}
}

Expand All @@ -96,7 +104,7 @@ void HNSW::Build() {
{"dim", vec_dimension_},
{"hnsw", hnsw_parameters}
};
auto temp = vsag::Factory::CreateIndex("fresh_hnsw", index_parameters.dump());
auto temp = vsag::Factory::CreateIndex("hnsw", index_parameters.dump());
if (temp.has_value()) {
index_ = std::move(temp.value());
} else {
Expand Down Expand Up @@ -204,10 +212,13 @@ HNSW::KnnSearch(const std::vector<float>& query, int64_t top_k, int ef_search) {
{"hnsw", {{"ef_search", ef_search}}},
};
std::vector<std::pair<int64_t, float>> ret;
auto result = index_->KnnSearch(dataset, top_k, parameters.dump());
auto result = index_->KnnSearch(dataset, top_k, parameters.dump(), [this](int64_t id)->bool {
return vectorid_vid_.at(id).first;
});
if (result.has_value()) {
for (int64_t i = 0; i < result.value()->GetDim(); ++i) {
ret.emplace_back(result.value()->GetIds()[i], result.value()->GetDistances()[i]);
auto vector_id = result.value()->GetIds()[i];
ret.emplace_back(vectorid_vid_.at(vector_id).second, result.value()->GetDistances()[i]);
}
} else {
THROW_CODE(VectorIndexException, result.error().message);
Expand All @@ -227,10 +238,13 @@ HNSW::RangeSearch(const std::vector<float>& query, float radius, int ef_search,
{"hnsw", {{"ef_search", ef_search}}},
};
std::vector<std::pair<int64_t, float>> ret;
auto result = index_->RangeSearch(dataset, radius, parameters.dump(), limit);
auto result = index_->RangeSearch(dataset, radius, parameters.dump(), [this](int64_t id)->bool {
return vectorid_vid_.at(id).first;
}, limit);
if (result.has_value()) {
for (int64_t i = 0; i < result.value()->GetDim(); ++i) {
ret.emplace_back(result.value()->GetIds()[i], result.value()->GetDistances()[i]);
int64_t vector_id = result.value()->GetIds()[i];
ret.emplace_back(vectorid_vid_.at(vector_id).second, result.value()->GetDistances()[i]);
}
} else {
THROW_CODE(VectorIndexException, result.error().message);
Expand All @@ -246,4 +260,8 @@ int64_t HNSW::GetMemoryUsage() {
return index_->GetMemoryUsage();
}

int64_t HNSW::GetDeletedIdsNum() {
return deleted_vectorid_;
}

} // namespace lgraph
6 changes: 6 additions & 0 deletions src/core/vsag_hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,15 @@ class HNSW : public VectorIndex {
friend class LightningGraph;
friend class Transaction;
friend class IndexManager;
int64_t vectorid_ = 0;
int64_t deleted_vectorid_ = 0;
std::unordered_map<int64_t, int64_t> vid_vectorid_;
std::unordered_map<int64_t, std::pair<bool, int64_t>> vectorid_vid_;
std::shared_ptr<vsag::Index> index_;

// build index
void Build();

public:
HNSW(const std::string& label, const std::string& name,
const std::string& distance_type, const std::string& index_type,
Expand Down Expand Up @@ -72,6 +77,7 @@ class HNSW : public VectorIndex {

int64_t GetElementsNum() override;
int64_t GetMemoryUsage() override;
int64_t GetDeletedIdsNum() override;

template <typename T>
static void writeBinaryPOD(std::ostream& out, const T& podRef) {
Expand Down
1 change: 1 addition & 0 deletions src/cypher/procedure/procedure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4257,6 +4257,7 @@ void VectorFunc::ShowVertexVectorIndex(RTContext *ctx, const cypher::Record *rec
auto index = ctx->txn_->GetTxn()->GetVertexVectorIndex(item.label, item.field);
r.AddConstant(lgraph::FieldData(index->GetElementsNum()));
r.AddConstant(lgraph::FieldData(index->GetMemoryUsage()));
r.AddConstant(lgraph::FieldData(index->GetDeletedIdsNum()));
records->emplace_back(r.Snapshot());
}
FillProcedureYieldItem("db.showVertexVectorIndex", yield_items, records);
Expand Down
1 change: 1 addition & 0 deletions src/cypher/procedure/procedure.h
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,7 @@ static std::vector<Procedure> global_procedures = {
{"hnsw.ef_construction", {6, lgraph_api::LGraphType::INTEGER}},
{"elements_num", {7, lgraph_api::LGraphType::INTEGER}},
{"memory_usage", {8, lgraph_api::LGraphType::INTEGER}},
{"deleted_ids_num", {9, lgraph_api::LGraphType::INTEGER}},
}),

Procedure("db.vertexVectorKnnSearch", VectorFunc::VertexVectorKnnSearch,
Expand Down
57 changes: 33 additions & 24 deletions test/resource/unit_test/vector_index/cypher/vector_index.result
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CALL db.addVertexVectorIndex('person','embedding2', {dimension:4});
CALL db.addVertexVectorIndex('person','name', {dimension:4});
[VectorIndexException] Only FLOAT_VECTOR type supports vector index
CALL db.showVertexVectorIndex();
[{"dimension":4,"distance_type":"l2","elements_num":0,"field_name":"embedding1","hnsw.ef_construction":100,"hnsw.m":16,"index_type":"hnsw","label_name":"person","memory_usage":96},{"dimension":4,"distance_type":"l2","elements_num":0,"field_name":"embedding2","hnsw.ef_construction":100,"hnsw.m":16,"index_type":"hnsw","label_name":"person","memory_usage":96}]
[{"deleted_ids_num":0,"dimension":4,"distance_type":"l2","elements_num":0,"field_name":"embedding1","hnsw.ef_construction":100,"hnsw.m":16,"index_type":"hnsw","label_name":"person","memory_usage":96},{"deleted_ids_num":0,"dimension":4,"distance_type":"l2","elements_num":0,"field_name":"embedding2","hnsw.ef_construction":100,"hnsw.m":16,"index_type":"hnsw","label_name":"person","memory_usage":96}]
CREATE (n:person {id:1, name:'name1', embedding1: [1.0,1.0,1.0,1.0], embedding2: [11.0,11.0,11.0,11.0]});
[{"<SUMMARY>":"created 1 vertices, created 0 edges."}]
CREATE (n:person {id:2, name:'name2', embedding1: [2.0,2.0,2.0,2.0], embedding2: [12.0,12.0,12.0,12.0]});
Expand Down Expand Up @@ -35,7 +35,7 @@ CALL db.vertexVectorRangeSearch('person','embedding1', [1.0,2.0,3.0,4.0], {radiu
CALL db.alterLabelDelFields('vertex', 'person', ['embedding1']);
[{"record_affected":3}]
CALL db.showVertexVectorIndex();
[{"dimension":4,"distance_type":"l2","elements_num":3,"field_name":"embedding2","hnsw.ef_construction":100,"hnsw.m":16,"index_type":"hnsw","label_name":"person","memory_usage":576}]
[{"deleted_ids_num":1,"dimension":4,"distance_type":"l2","elements_num":4,"field_name":"embedding2","hnsw.ef_construction":100,"hnsw.m":16,"index_type":"hnsw","label_name":"person","memory_usage":736}]
CALL db.vertexVectorKnnSearch('person','embedding1',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id;
[FieldNotFound] Field [embedding1] does not exist.
CALL db.vertexVectorKnnSearch('person','embedding2',[1,2,3,4], {top_k:2, hnsw_ef_search:10}) yield node return node.id;
Expand All @@ -62,56 +62,65 @@ CALL db.createVertexLabelByJson('{"label":"Chunk","primary":"id","type":"VERTEX"
[]
CALL db.addVertexVectorIndex('Chunk','embedding', {dimension:4});
[]
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":0,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":0,"label_name":"Chunk"}]
CALL db.upsertVertex('Chunk', [{id:1},{id:2}]);
[{"data_error":0,"index_conflict":0,"insert":2,"total":2,"update":0}]
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":0,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":0,"label_name":"Chunk"}]
CALL db.upsertVertex('Chunk', [{id:1, embedding:[0.1,0.1,0.1,0.1]},{id:2, embedding:[-0.2,-0.2,-0.2,-0.2]}]);
[{"data_error":0,"index_conflict":0,"insert":0,"total":2,"update":2}]
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":2,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":2,"label_name":"Chunk"}]
CALL db.vertexVectorKnnSearch('Chunk','embedding',[1,2,3,4], {top_k:10, hnsw_ef_search:10});
[{"distance":28.04,"node":{"identity":7,"label":"Chunk","properties":{"embedding":[0.10000000149011612,0.10000000149011612,0.10000000149011612,0.10000000149011612],"id":1}}},{"distance":34.16,"node":{"identity":8,"label":"Chunk","properties":{"embedding":[-0.20000000298023224,-0.20000000298023224,-0.20000000298023224,-0.20000000298023224],"id":2}}}]
CALL db.upsertVertex('Chunk', [{id:1, embedding:[0.1,0.1,0.1,0.1]},{id:2, embedding:[-0.2,-0.2,-0.2,-0.2]}, {id:3, embedding:[0.3,0.3,0.3,0.3]}]);
[{"data_error":0,"index_conflict":0,"insert":1,"total":3,"update":2}]
CALL db.vertexVectorKnnSearch('Chunk','embedding',[1,2,3,4], {top_k:10, hnsw_ef_search:10});
[{"distance":24.36,"node":{"identity":9,"label":"Chunk","properties":{"embedding":[0.30000001192092896,0.30000001192092896,0.30000001192092896,0.30000001192092896],"id":3}}},{"distance":28.04,"node":{"identity":7,"label":"Chunk","properties":{"embedding":[0.10000000149011612,0.10000000149011612,0.10000000149011612,0.10000000149011612],"id":1}}},{"distance":34.16,"node":{"identity":8,"label":"Chunk","properties":{"embedding":[-0.20000000298023224,-0.20000000298023224,-0.20000000298023224,-0.20000000298023224],"id":2}}}]
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":3,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":3,"label_name":"Chunk"}]
CALL db.upsertVertex('Chunk', [{id:1, embedding:[1.1,1.1,1.1,1.1]},{id:2, embedding:[-1.2,-1.2,-1.2,-1.2]}, {id:3, embedding:[1.3,1.3,1.3,1.3]}]);
[{"data_error":0,"index_conflict":0,"insert":0,"total":3,"update":3}]
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":3,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":3,"elements_num":6,"label_name":"Chunk"}]
CALL db.vertexVectorKnnSearch('Chunk','embedding',[1,2,3,4], {top_k:10, hnsw_ef_search:10});
[{"distance":10.76,"node":{"identity":9,"label":"Chunk","properties":{"embedding":[1.2999999523162842,1.2999999523162842,1.2999999523162842,1.2999999523162842],"id":3}}},{"distance":12.84,"node":{"identity":7,"label":"Chunk","properties":{"embedding":[1.100000023841858,1.100000023841858,1.100000023841858,1.100000023841858],"id":1}}},{"distance":59.75999,"node":{"identity":8,"label":"Chunk","properties":{"embedding":[-1.2000000476837158,-1.2000000476837158,-1.2000000476837158,-1.2000000476837158],"id":2}}}]
CALL db.upsertVertex('Chunk', [{id:1, embedding:null},{id:2, embedding:[-1.2,-1.2,-1.2,-1.2]}, {id:3, embedding:null}]);
[{"data_error":0,"index_conflict":0,"insert":0,"total":3,"update":3}]
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":1,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":5,"elements_num":6,"label_name":"Chunk"}]
CALL db.vertexVectorKnnSearch('Chunk','embedding',[1,2,3,4], {top_k:10, hnsw_ef_search:10});
[{"distance":59.75999,"node":{"identity":8,"label":"Chunk","properties":{"embedding":[-1.2000000476837158,-1.2000000476837158,-1.2000000476837158,-1.2000000476837158],"id":2}}}]
CALL db.vertexVectorRangeSearch('Chunk','embedding', [1,2,3,4], {radius:100.0, hnsw_ef_search:10}) yield node,distance return node.id, distance;
[{"distance":59.75999,"node.id":2}]

CALL db.showVertexVectorIndex() yield label_name, field_name, elements_num return label_name, field_name, elements_num;
[{"elements_num":1,"field_name":"embedding","label_name":"Chunk"},{"elements_num":3,"field_name":"embedding2","label_name":"person"},{"elements_num":3,"field_name":"embedding","label_name":"student"}]
CALL db.showVertexVectorIndex() yield label_name, field_name, elements_num, deleted_ids_num return label_name, field_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":5,"elements_num":6,"field_name":"embedding","label_name":"Chunk"},{"deleted_ids_num":1,"elements_num":4,"field_name":"embedding2","label_name":"person"},{"deleted_ids_num":0,"elements_num":3,"field_name":"embedding","label_name":"student"}]
CALL db.dropAllVertex();
[]
CALL db.showVertexVectorIndex() yield label_name, field_name, elements_num return label_name, field_name, elements_num;
[{"elements_num":0,"field_name":"embedding","label_name":"Chunk"},{"elements_num":0,"field_name":"embedding2","label_name":"person"},{"elements_num":0,"field_name":"embedding","label_name":"student"}]
CALL db.showVertexVectorIndex() yield label_name, field_name, elements_num, deleted_ids_num return label_name, field_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":0,"field_name":"embedding","label_name":"Chunk"},{"deleted_ids_num":0,"elements_num":0,"field_name":"embedding2","label_name":"person"},{"deleted_ids_num":0,"elements_num":0,"field_name":"embedding","label_name":"student"}]
CALL db.createVertexLabelByJson('{"label":"Chunk","primary":"id","type":"VERTEX","detach_property":true,"properties":[{"name":"id","type":"INT32","optional":false},{"name":"embedding","type":"FLOAT_VECTOR","optional":true}]}');
[LabelExist] Vertex label [Chunk] already exists.
CALL db.addVertexVectorIndex('Chunk','embedding', {dimension:4});
[IndexExist] VertexIndex [Chunk:embedding] already exist.
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":0,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":0,"label_name":"Chunk"}]
CALL db.upsertVertex('Chunk', [{id:1},{id:2}]);
[{"data_error":0,"index_conflict":0,"insert":2,"total":2,"update":0}]
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":0,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":0,"label_name":"Chunk"}]
CALL db.upsertVertex('Chunk', [{id:1, embedding:[0.1,0.1,0.1,0.1]},{id:2, embedding:[-0.2,-0.2,-0.2,-0.2]}]);
[{"data_error":0,"index_conflict":0,"insert":0,"total":2,"update":2}]
CALL db.showVertexVectorIndex() yield label_name, elements_num where label_name = 'Chunk' return label_name, elements_num;
[{"elements_num":2,"label_name":"Chunk"}]
CALL db.showVertexVectorIndex() yield label_name, elements_num, deleted_ids_num where label_name = 'Chunk' return label_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":2,"label_name":"Chunk"}]

CALL db.dropAllVertex();
[]
CALL db.showVertexVectorIndex() yield label_name, field_name, elements_num, deleted_ids_num return label_name, field_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":0,"field_name":"embedding","label_name":"Chunk"},{"deleted_ids_num":0,"elements_num":0,"field_name":"embedding2","label_name":"person"},{"deleted_ids_num":0,"elements_num":0,"field_name":"embedding","label_name":"student"}]
Create(n1:Chunk {id:1, embedding:[0.1,0.1,0.1,0.1]}) Create(n2:Chunk {id:1, embedding:[0.2,0.2,0.2,0.2]});
[InputError] Failed to index vertex [13] with field value [id:1]: index value already exists.
CALL db.showVertexVectorIndex() yield label_name, field_name, elements_num, deleted_ids_num return label_name, field_name, elements_num, deleted_ids_num;
[{"deleted_ids_num":0,"elements_num":1,"field_name":"embedding","label_name":"Chunk"},{"deleted_ids_num":0,"elements_num":0,"field_name":"embedding2","label_name":"person"},{"deleted_ids_num":0,"elements_num":0,"field_name":"embedding","label_name":"student"}]
Loading

0 comments on commit 148ba9c

Please sign in to comment.