diff --git a/README.md b/README.md
index 67d174b..8dee54b 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,7 @@
-NGT
+
+
+
+
===
Neighborhood Graph and Tree for Indexing High-dimensional Data
diff --git a/VERSION b/VERSION
index de28578..91c74a5 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.7.6
+1.7.7
diff --git a/lib/NGT/Capi.cpp b/lib/NGT/Capi.cpp
index df27bb2..cdf5b52 100644
--- a/lib/NGT/Capi.cpp
+++ b/lib/NGT/Capi.cpp
@@ -511,6 +511,27 @@ bool ngt_batch_append_index(NGTIndex index, float *obj, uint32_t data_count, NGT
}
}
+bool ngt_batch_insert_index(NGTIndex index, float *obj, uint32_t data_count, uint32_t *ids, NGTError error) {
+ NGT::Index* pindex = static_cast(index);
+ int32_t dim = pindex->getObjectSpace().getDimension();
+
+ bool status = true;
+ float *objptr = obj;
+ for (size_t idx = 0; idx < data_count; idx++, objptr += dim) {
+ try{
+ std::vector vobj(objptr, objptr + dim);
+ ids[idx] = pindex->insert(vobj);
+ }catch(std::exception &err) {
+ status = false;
+ ids[idx] = 0;
+ std::stringstream ss;
+ ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what();
+ operate_error_string_(ss, error);
+ }
+ }
+ return status;
+}
+
bool ngt_create_index(NGTIndex index, uint32_t pool_size, NGTError error) {
if(index == NULL){
std::stringstream ss;
diff --git a/lib/NGT/Capi.h b/lib/NGT/Capi.h
index 1c9d8ff..d39222b 100644
--- a/lib/NGT/Capi.h
+++ b/lib/NGT/Capi.h
@@ -102,6 +102,8 @@ ObjectID ngt_append_index_as_float(NGTIndex, float*, uint32_t, NGTError);
bool ngt_batch_append_index(NGTIndex, float*, uint32_t, NGTError);
+bool ngt_batch_insert_index(NGTIndex, float*, uint32_t, uint32_t *, NGTError);
+
bool ngt_create_index(NGTIndex, uint32_t, NGTError);
bool ngt_remove_index(NGTIndex, ObjectID, NGTError);
diff --git a/lib/NGT/Command.cpp b/lib/NGT/Command.cpp
index c7d2522..1adfc50 100644
--- a/lib/NGT/Command.cpp
+++ b/lib/NGT/Command.cpp
@@ -716,35 +716,10 @@
#endif
cerr << "ngt::reconstructGraph: Extract the graph data." << endl;
// extract only edges from the index to reduce the memory usage.
- NGT::GraphIndex &outGraph = (NGT::GraphIndex&)outIndex.getIndex();
Timer timer;
timer.start();
vector graph;
- graph.reserve(outGraph.repository.size());
- for (size_t id = 1; id < outGraph.repository.size(); id++) {
- if (id % 1000000 == 0) {
- cerr << "Processed " << id << " objects." << endl;
- }
- try {
- NGT::GraphNode &node = *outGraph.getNode(id);
-#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
- NGT::ObjectDistances nd;
- nd.reserve(node.size());
- for (auto n = node.begin(outGraph.repository.allocator); n != node.end(outGraph.repository.allocator); ++n) {
- nd.push_back(ObjectDistance((*n).id, (*n).distance));
- }
- graph.push_back(nd);
-#else
- graph.push_back(node);
-#endif
- if (graph.back().size() != graph.back().capacity()) {
- cerr << "ngt::reconstructGraph: Warning! The graph size must be the same as the capacity. " << id << endl;
- }
- } catch(NGT::Exception &err) {
- cerr << "ngt::reconstructGraph: Warning! Cannot get the node. ID=" << id << ":" << err.what() << endl;
- continue;
- }
- }
+ GraphReconstructor::extractGraph(graph, outIndex);
char mode = args.getChar("m", 's');
char pamode = args.getChar("P", 'a');
@@ -794,7 +769,8 @@
double gtEpsilon = 0.1;
double mergin = 0.2;
- NGT::Optimizer optimizer(outIndex);
+ NGT::Optimizer optimizer(outIndex);
+ NGT::GraphIndex &outGraph = (NGT::GraphIndex&)outIndex.getIndex();
try {
auto param = optimizer.adjustSearchEdgeSize(baseAccuracyRange, rateAccuracyRange, querySize, gtEpsilon, mergin);
NeighborhoodGraph::Property &prop = outGraph.getGraphProperty();
diff --git a/lib/NGT/GraphReconstructor.h b/lib/NGT/GraphReconstructor.h
index f3ebe42..c6c5620 100644
--- a/lib/NGT/GraphReconstructor.h
+++ b/lib/NGT/GraphReconstructor.h
@@ -30,6 +30,36 @@ namespace NGT {
class GraphReconstructor {
public:
+ static void extractGraph(vector &graph, NGT::Index &index) {
+ NGT::GraphIndex &graphIndex = static_cast(index.getIndex());
+ graph.reserve(graphIndex.repository.size());
+ for (size_t id = 1; id < graphIndex.repository.size(); id++) {
+ if (id % 1000000 == 0) {
+ cerr << "GraphReconstructor::extractGraph: Processed " << id << " objects." << endl;
+ }
+ try {
+ NGT::GraphNode &node = *graphIndex.getNode(id);
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+ NGT::ObjectDistances nd;
+ nd.reserve(node.size());
+ for (auto n = node.begin(graphIndex.repository.allocator); n != node.end(graphIndex.repository.allocator); ++n) {
+ nd.push_back(ObjectDistance((*n).id, (*n).distance));
+ }
+ graph.push_back(nd);
+#else
+ graph.push_back(node);
+#endif
+ if (graph.back().size() != graph.back().capacity()) {
+ cerr << "GraphReconstructor::extractGraph: Warning! The graph size must be the same as the capacity. " << id << endl;
+ }
+ } catch(NGT::Exception &err) {
+ cerr << "GraphReconstructor::extractGraph: Warning! Cannot get the node. ID=" << id << ":" << err.what() << endl;
+ continue;
+ }
+ }
+
+ }
+
@@ -382,12 +412,14 @@ class GraphReconstructor {
}
originalEdgeTimer.stop();
- reverseEdgeTimer.start();
+ reverseEdgeTimer.start();
+ int insufficientNodeCount = 0;
for (size_t id = 1; id <= graph.size(); ++id) {
try {
NGT::ObjectDistances &node = graph[id - 1];
size_t rsize = reverseEdgeSize;
if (rsize > node.size()) {
+ insufficientNodeCount++;
rsize = node.size();
}
for (size_t i = 0; i < rsize; ++i) {
@@ -408,6 +440,9 @@ class GraphReconstructor {
}
}
reverseEdgeTimer.stop();
+ if (insufficientNodeCount != 0) {
+ cerr << "# of the nodes edges of which are in short = " << insufficientNodeCount << endl;
+ }
normalizeEdgeTimer.start();
for (size_t id = 1; id < outGraph.repository.size(); id++) {
diff --git a/lib/NGT/Optimizer.h b/lib/NGT/Optimizer.h
index cecfa6c..bd88a38 100644
--- a/lib/NGT/Optimizer.h
+++ b/lib/NGT/Optimizer.h
@@ -16,6 +16,8 @@
#pragma once
+#include "Command.h"
+
#define NGT_LOG_BASED_OPTIMIZATION
namespace NGT {
@@ -525,9 +527,6 @@ namespace NGT {
toOver = fromOver;
toOverEpsilon = fromOverEpsilon;
}
- if (fromOverEpsilon == toOverEpsilon) {
- cerr << "Warning!! fromOverEpsilon equals toOverEpsilon " << fromOverEpsilon << ". This might cause some problems." << endl;
- }
fromUnderEpsilon = fromOverEpsilon - epsilonStep;
}
sp.beginOfEpsilon = sp.endOfEpsilon = fromUnderEpsilon;
@@ -742,7 +741,7 @@ namespace NGT {
cerr << "adjustRateSearchEdgeSize::explore for the mergin " << mergin << ", " << rateStart << "..." << endl;
for (size_t rateStep = 16; rateStep != 1; rateStep /= 2) {
double prevTime = DBL_MAX;
- for (size_t rate = rateStart; rate < 200; rate += rateStep) {
+ for (size_t rate = rateStart; rate < 2000; rate += rateStep) {
if (rate > 1000) {
stringstream msg;
msg << "rate is too large! " << rate;
diff --git a/python/setup.py b/python/setup.py
index f36c645..40e9b77 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -21,7 +21,7 @@
# for pip >= 10.0
from pip._internal import locations
-version = '1.4.0'
+version = '1.5.0'
if static_library:
with open('../VERSION', 'r') as fh:
diff --git a/python/src/ngtpy.cpp b/python/src/ngtpy.cpp
index ea79184..e696a50 100644
--- a/python/src/ngtpy.cpp
+++ b/python/src/ngtpy.cpp
@@ -15,6 +15,8 @@
//
#include "NGT/Index.h"
+#include "NGT/GraphReconstructor.h"
+#include "NGT/Optimizer.h"
#include
#include
@@ -207,6 +209,8 @@ class Index : public NGT::Index {
NGT::SearchContainer sc(*ngtquery);
sc.setSize(size); // the number of resultant objects.
+ NGT::ObjectDistances rs;
+ sc.setResults(&rs);
NGT::Index::linearSearch(sc);
@@ -214,34 +218,27 @@ class Index : public NGT::Index {
NGT::Index::deleteObject(ngtquery);
if (!withDistance) {
- NGT::ResultPriorityQueue &r = sc.getWorkingResult();
- py::array_t ids(r.size());
+ py::array_t ids(rs.size());
py::buffer_info idsinfo = ids.request();
- int *endptr = reinterpret_cast(idsinfo.ptr);
- int *ptr = endptr + (r.size() - 1);
+ int *ptr = reinterpret_cast(idsinfo.ptr);
if (zeroNumbering) {
- while (ptr >= endptr) {
- *ptr-- = r.top().id - 1;
- r.pop();
+ for (auto ri = rs.begin(); ri != rs.end(); ++ri) {
+ *ptr++ = (*ri).id - 1;
}
} else {
- while (ptr >= endptr) {
- *ptr-- = r.top().id;
- r.pop();
+ for (auto ri = rs.begin(); ri != rs.end(); ++ri) {
+ *ptr++ = (*ri).id;
}
}
-
return ids;
}
py::list results;
- NGT::ObjectDistances r;
- r.moveFrom(sc.getWorkingResult());
if (zeroNumbering) {
- for (auto ri = r.begin(); ri != r.end(); ++ri) {
+ for (auto ri = rs.begin(); ri != rs.end(); ++ri) {
results.append(py::make_tuple((*ri).id - 1, (*ri).distance));
}
} else {
- for (auto ri = r.begin(); ri != r.end(); ++ri) {
+ for (auto ri = rs.begin(); ri != rs.end(); ++ri) {
results.append(py::make_tuple((*ri).id, (*ri).distance));
}
}
@@ -287,6 +284,139 @@ class Index : public NGT::Index {
size_t numOfDistanceComputations;
};
+class Optimizer {
+public:
+ Optimizer() {
+ numOfOutgoingEdges = 10;
+ numOfIncomingEdges= 120;
+ baseAccuracyRange = pair(0.30, 0.50);
+ rateAccuracyRange = pair(0.80, 0.90);
+ numOfQueries = 100;
+ gtEpsilon = 0.1;
+ mergin = 0.2;
+ }
+
+ void adjustSearchCoefficients(const string indexPath){
+ NGT::Index index(indexPath);
+ NGT::GraphIndex &graph = static_cast(index.getIndex());
+ NGT::Optimizer optimizer(index);
+ try {
+ auto coefficients = optimizer.adjustSearchEdgeSize(baseAccuracyRange, rateAccuracyRange, numOfQueries, gtEpsilon, mergin);
+ NGT::NeighborhoodGraph::Property &prop = graph.getGraphProperty();
+ prop.dynamicEdgeSizeBase = coefficients.first;
+ prop.dynamicEdgeSizeRate = coefficients.second;
+ } catch(NGT::Exception &err) {
+ stringstream msg;
+ msg << "Optimizer::adjustSearchCoefficients: Cannot adjust the search coefficients. " << err.what();
+ NGTThrowException(msg);
+ }
+ graph.saveIndex(indexPath);
+ }
+
+ void execute(
+ const string inIndexPath,
+ const string outIndexPath
+ ){
+ if ((numOfOutgoingEdges < 0 && numOfIncomingEdges >= 0) ||
+ (numOfOutgoingEdges >= 0 && numOfIncomingEdges < 0)) {
+ NGTThrowException("Optimizer::execute: Specified any of the number of edges is invalid.");
+ }
+#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
+ if (access(outIndexPath.c_str(), 0) == 0) {
+ stringstream msg;
+ msg << "Optimizer::execute: The specified index exists. " << outIndexPath;
+ NGTThrowException(msg);
+ }
+ const string com = "cp -r " + inIndexPath + " " + outIndexPath;
+ system(com.c_str());
+ NGT::Index outIndex(outIndexPath);
+#else
+ NGT::Index outIndex(inIndexPath);
+#endif
+ cerr << "Optimizer::execute: Extract the graph data." << endl;
+ // extract only edges from the index to reduce the memory usage.
+ NGT::GraphIndex &outGraph = static_cast(outIndex.getIndex());
+ NGT::Timer timer;
+ timer.start();
+ vector graph;
+ NGT::GraphReconstructor::extractGraph(graph, outIndex);
+
+ if (numOfOutgoingEdges >= 0) {
+ NGT::GraphReconstructor::convertToANNG(graph);
+ NGT::GraphReconstructor::reconstructGraph(graph, outIndex, numOfOutgoingEdges, numOfIncomingEdges);
+ }
+ timer.stop();
+ cerr << "Optimizer::execute: Graph reconstruction time=" << timer.time << " (sec) " << endl;
+ timer.reset();
+ timer.start();
+ NGT::GraphReconstructor::adjustPathsEffectively(outIndex);
+ timer.stop();
+ cerr << "Optimizer::execute: Path adjustment time=" << timer.time << " (sec) " << endl;
+
+ NGT::Optimizer optimizer(outIndex);
+ try {
+ auto coefficients = optimizer.adjustSearchEdgeSize(baseAccuracyRange, rateAccuracyRange, numOfQueries, gtEpsilon, mergin);
+ NGT::NeighborhoodGraph::Property &prop = outGraph.getGraphProperty();
+ prop.dynamicEdgeSizeBase = coefficients.first;
+ prop.dynamicEdgeSizeRate = coefficients.second;
+ } catch(NGT::Exception &err) {
+ stringstream msg;
+ msg << "Optimizer::execute: Cannot adjust the search coefficients. " << err.what();
+ NGTThrowException(msg);
+ }
+
+ outGraph.saveIndex(outIndexPath);
+
+ }
+
+ void set(int outgoing, int incoming, int nofqs,
+ float baseAccuracyFrom, float baseAccuracyTo,
+ float rateAccuracyFrom, float rateAccuracyTo,
+ double qte, double m
+ ) {
+ if (outgoing >= 0) {
+ numOfOutgoingEdges = outgoing;
+ }
+ if (incoming >= 0) {
+ numOfIncomingEdges = incoming;
+ }
+ if (nofqs > 0) {
+ numOfQueries = nofqs;
+ }
+ auto range = baseAccuracyRange;
+ if (baseAccuracyFrom > 0.0) {
+ range.first = baseAccuracyFrom;
+ }
+ if (baseAccuracyTo > 0.0) {
+ range.second = baseAccuracyTo;
+ }
+ baseAccuracyRange = range;
+ range = rateAccuracyRange;
+ if (rateAccuracyFrom > 0.0) {
+ range.first = rateAccuracyFrom;
+ }
+ if (rateAccuracyTo > 0.0) {
+ range.second = rateAccuracyTo;
+ }
+ rateAccuracyRange = range;
+ if (qte != DBL_MIN) {
+ gtEpsilon = qte;
+ }
+ if (m > 0.0) {
+ mergin = m;
+ }
+ }
+
+ size_t numOfOutgoingEdges;
+ size_t numOfIncomingEdges;
+ pair baseAccuracyRange;
+ pair rateAccuracyRange;
+ size_t numOfQueries;
+ double gtEpsilon;
+ double mergin;
+
+};
+
PYBIND11_MODULE(ngtpy, m) {
m.doc() = "ngt python";
@@ -330,5 +460,24 @@ PYBIND11_MODULE(ngtpy, m) {
.def("insert", &::Index::insert,
py::arg("object"),
py::arg("debug") = false);
+
+ py::class_(m, "Optimizer")
+ .def(py::init<>())
+ .def("execute", &::Optimizer::execute,
+ py::arg("in_index_path"),
+ py::arg("out_index_path"))
+ .def("adjust_search_coefficients", &::Optimizer::adjustSearchCoefficients,
+ py::arg("index_path"))
+ .def("set", &::Optimizer::set,
+ py::arg("num_of_outgoings") = -1,
+ py::arg("num_of_incomings") = -1,
+ py::arg("num_of_queries") = -1,
+ py::arg("low_accuracy_from") = -1.0,
+ py::arg("low_accuracy_to") = -1.0,
+ py::arg("high_accuracy_from") = -1.0,
+ py::arg("high_accuracy_to") = -1.0,
+ py::arg("gt_epsilon") = DBL_MIN,
+ py::arg("merge") = -1.0
+ );
}