From b54d97b530751e0104a43f5bc4288362b07e25b0 Mon Sep 17 00:00:00 2001 From: Masajiro Iwasaki Date: Tue, 27 Dec 2022 13:10:55 +0900 Subject: [PATCH] fix the bug of the cosine similarity --- VERSION | 2 +- lib/NGT/Command.cpp | 2 +- lib/NGT/Graph.cpp | 7 ++++++- lib/NGT/Graph.h | 8 +++++++- lib/NGT/Index.cpp | 21 +++++++++++++++++---- lib/NGT/Node.cpp | 13 +++++++++++++ lib/NGT/PrimitiveComparator.h | 7 ++++--- samples/qg-l2-float/qg-l2-float.cpp | 3 ++- 8 files changed, 51 insertions(+), 12 deletions(-) diff --git a/VERSION b/VERSION index f1547e6..815e68d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.7 +2.0.8 diff --git a/lib/NGT/Command.cpp b/lib/NGT/Command.cpp index 797f6fc..c83f815 100644 --- a/lib/NGT/Command.cpp +++ b/lib/NGT/Command.cpp @@ -861,7 +861,7 @@ using namespace std; void NGT::Command::repair(Args &args) { - const string usage = "Usage: ng[ [-m c|r|R] repair index \n" + const string usage = "Usage: ngt [-m c|r|R] repair index \n" "\t-m mode\n" "\t\tc: Check. (default)\n" "\t\tr: Repair and save it as [index].repair.\n" diff --git a/lib/NGT/Graph.cpp b/lib/NGT/Graph.cpp index abcda0e..96eab3d 100644 --- a/lib/NGT/Graph.cpp +++ b/lib/NGT/Graph.cpp @@ -925,7 +925,12 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, } if (insertionA != insertionB) { stringstream msg; - msg << "Graph::removeEdgeReliably:Warning. Lost connectivity! Isn't this ANNG? ID=" << id << "."; + msg << "Graph::removeEdgeReliably:Warning. Lost connectivity! Isn't this ANNG? ID=" << id +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + << ". (" << node.at(i, repository.allocator).id << ":" << node.at(minj, repository.allocator).id << ")"; +#else + << ". (" << node[i].id << ":" << node[minj].id << ")"; +#endif #ifdef NGT_FORCED_REMOVE msg << " Anyway continue..."; cerr << msg.str() << endl; diff --git a/lib/NGT/Graph.h b/lib/NGT/Graph.h index 9336313..146437a 100644 --- a/lib/NGT/Graph.h +++ b/lib/NGT/Graph.h @@ -938,7 +938,13 @@ namespace NGT { bool addEdge(ObjectID target, ObjectID addID, Distance addDistance, bool identityCheck = true) { size_t minsize = 0; GraphNode &node = property.truncationThreshold == 0 ? *getNode(target) : *getNode(target, minsize); - addEdge(node, addID, addDistance, identityCheck); + try { + addEdge(node, addID, addDistance, identityCheck); + } catch(NGT::Exception &err) { + std::stringstream msg; + msg << " Cannot add the edge. " << target << "->" << addID << ". " << err.what(); + NGTThrowException(msg); + } if ((size_t)property.truncationThreshold != 0 && node.size() - minsize > (size_t)property.truncationThreshold) { return true; diff --git a/lib/NGT/Index.cpp b/lib/NGT/Index.cpp index 0b3f179..8fdbdcd 100644 --- a/lib/NGT/Index.cpp +++ b/lib/NGT/Index.cpp @@ -423,8 +423,9 @@ CreateIndexThread::run() { graphIndex.searchForNNGInsertion(obj, *rs); } } catch(NGT::Exception &err) { - cerr << "CreateIndex::search:Fatal error! ID=" << job.id << " " << err.what() << endl; - abort(); + stringstream msg; + msg << "CreateIndex::search:Fatal error! ID=" << job.id << " " << err.what(); + NGTThrowException(msg); } job.results = rs; poolThread.getOutputJobQueue().pushBack(job); @@ -730,7 +731,13 @@ insertMultipleSearchResults(GraphIndex &neighborhoodGraph, cerr << " The number of edges for the node=" << gr.results->size() << endl; cerr << " The pruned parameter (edgeSizeForSearch [-S])=" << neighborhoodGraph.NeighborhoodGraph::property.edgeSizeForSearch << endl; } - neighborhoodGraph.insertNode(gr.id, *gr.results); + try { + neighborhoodGraph.insertNode(gr.id, *gr.results); + } catch(NGT::Exception &err) { + std::stringstream msg; + msg << " Cannot insert the node. " << gr.id << ". " << err.what(); + NGTThrowException(msg); + } } } @@ -1403,7 +1410,13 @@ GraphAndTreeIndex::createIndex(const vector > &object if (((*job.results).size() == 0) && (job.id != 1)) { cerr << "insert warning!! No searched nodes!. If the first time, no problem. " << job.id << endl; } - GraphIndex::insertNode(job.id, *job.results); + try { + GraphIndex::insertNode(job.id, *job.results); + } catch(NGT::Exception &err) { + std::stringstream msg; + msg << " Cannot insert the node. " << job.id << ". " << err.what(); + NGTThrowException(msg); + } } if (job.results != 0) { delete job.results; diff --git a/lib/NGT/Node.cpp b/lib/NGT/Node.cpp index e0a7996..6bd2913 100644 --- a/lib/NGT/Node.cpp +++ b/lib/NGT/Node.cpp @@ -235,6 +235,19 @@ LeafNode::removeObject(size_t id, size_t replaceId) { size_t fsize = getObjectSize(); size_t idx; + if (replaceId != 0) { + for (idx = 0; idx < fsize; idx++) { +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + if (getObjectIDs(allocator)[idx].id == replaceId) { +#else + if (getObjectIDs()[idx].id == replaceId) { +#endif + std::cerr << " Warning. found the same ID as the replaced ID." << std::endl; + replaceId = 0; + break; + } + } + } for (idx = 0; idx < fsize; idx++) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) if (getObjectIDs(allocator)[idx].id == id) { diff --git a/lib/NGT/PrimitiveComparator.h b/lib/NGT/PrimitiveComparator.h index a2d3ae4..fc41ef5 100644 --- a/lib/NGT/PrimitiveComparator.h +++ b/lib/NGT/PrimitiveComparator.h @@ -837,13 +837,14 @@ namespace NGT { template inline static double compareCosineSimilarity(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { - return 1.0 - compareCosine(a, b, size); + auto v = 1.0 - compareCosine(a, b, size); + return v < 0.0 ? -v : v; } template inline static double compareNormalizedCosineSimilarity(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { - double v = 1.0 - compareDotProduct(a, b, size); - return v < 0.0 ? 0.0 : v; + auto v = 1.0 - compareDotProduct(a, b, size); + return v < 0.0 ? -v : v; } class L1Uint8 { diff --git a/samples/qg-l2-float/qg-l2-float.cpp b/samples/qg-l2-float/qg-l2-float.cpp index ee98ed7..4ce5ca7 100644 --- a/samples/qg-l2-float/qg-l2-float.cpp +++ b/samples/qg-l2-float/qg-l2-float.cpp @@ -4,6 +4,7 @@ int main(int argc, char **argv) { +#ifdef NGTQ_QBG string indexPath = "index"; string objectFile = "./data/sift-dataset-5k.tsv"; string queryFile = "./data/sift-query-3.tsv"; @@ -115,7 +116,7 @@ main(int argc, char **argv) cerr << "Error" << endl; return 1; } - +#endif return 0; }