Skip to content

Commit

Permalink
v1.7.7 add search adjustment to ngtpy
Browse files Browse the repository at this point in the history
  • Loading branch information
masajiro committed Jul 29, 2019
1 parent 6cd35d6 commit cccc6b3
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 50 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
NGT
<div align="center">
<img src="./assets/logo.svg" width="50%">
</div>

===

Neighborhood Graph and Tree for Indexing High-dimensional Data
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.7.6
1.7.7
21 changes: 21 additions & 0 deletions lib/NGT/Capi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,27 @@ bool ngt_batch_append_index(NGTIndex index, float *obj, uint32_t data_count, NGT
}
}

bool ngt_batch_insert_index(NGTIndex index, float *obj, uint32_t data_count, uint32_t *ids, NGTError error) {
NGT::Index* pindex = static_cast<NGT::Index*>(index);
int32_t dim = pindex->getObjectSpace().getDimension();

bool status = true;
float *objptr = obj;
for (size_t idx = 0; idx < data_count; idx++, objptr += dim) {
try{
std::vector<double> vobj(objptr, objptr + dim);
ids[idx] = pindex->insert(vobj);
}catch(std::exception &err) {
status = false;
ids[idx] = 0;
std::stringstream ss;
ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what();
operate_error_string_(ss, error);
}
}
return status;
}

bool ngt_create_index(NGTIndex index, uint32_t pool_size, NGTError error) {
if(index == NULL){
std::stringstream ss;
Expand Down
2 changes: 2 additions & 0 deletions lib/NGT/Capi.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ ObjectID ngt_append_index_as_float(NGTIndex, float*, uint32_t, NGTError);

bool ngt_batch_append_index(NGTIndex, float*, uint32_t, NGTError);

bool ngt_batch_insert_index(NGTIndex, float*, uint32_t, uint32_t *, NGTError);

bool ngt_create_index(NGTIndex, uint32_t, NGTError);

bool ngt_remove_index(NGTIndex, ObjectID, NGTError);
Expand Down
30 changes: 3 additions & 27 deletions lib/NGT/Command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,35 +716,10 @@
#endif
cerr << "ngt::reconstructGraph: Extract the graph data." << endl;
// extract only edges from the index to reduce the memory usage.
NGT::GraphIndex &outGraph = (NGT::GraphIndex&)outIndex.getIndex();
Timer timer;
timer.start();
vector<NGT::ObjectDistances> graph;
graph.reserve(outGraph.repository.size());
for (size_t id = 1; id < outGraph.repository.size(); id++) {
if (id % 1000000 == 0) {
cerr << "Processed " << id << " objects." << endl;
}
try {
NGT::GraphNode &node = *outGraph.getNode(id);
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
NGT::ObjectDistances nd;
nd.reserve(node.size());
for (auto n = node.begin(outGraph.repository.allocator); n != node.end(outGraph.repository.allocator); ++n) {
nd.push_back(ObjectDistance((*n).id, (*n).distance));
}
graph.push_back(nd);
#else
graph.push_back(node);
#endif
if (graph.back().size() != graph.back().capacity()) {
cerr << "ngt::reconstructGraph: Warning! The graph size must be the same as the capacity. " << id << endl;
}
} catch(NGT::Exception &err) {
cerr << "ngt::reconstructGraph: Warning! Cannot get the node. ID=" << id << ":" << err.what() << endl;
continue;
}
}
GraphReconstructor::extractGraph(graph, outIndex);

char mode = args.getChar("m", 's');
char pamode = args.getChar("P", 'a');
Expand Down Expand Up @@ -794,7 +769,8 @@
double gtEpsilon = 0.1;
double mergin = 0.2;

NGT::Optimizer optimizer(outIndex);
NGT::Optimizer optimizer(outIndex);
NGT::GraphIndex &outGraph = (NGT::GraphIndex&)outIndex.getIndex();
try {
auto param = optimizer.adjustSearchEdgeSize(baseAccuracyRange, rateAccuracyRange, querySize, gtEpsilon, mergin);
NeighborhoodGraph::Property &prop = outGraph.getGraphProperty();
Expand Down
37 changes: 36 additions & 1 deletion lib/NGT/GraphReconstructor.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,36 @@ namespace NGT {

class GraphReconstructor {
public:
static void extractGraph(vector<NGT::ObjectDistances> &graph, NGT::Index &index) {
NGT::GraphIndex &graphIndex = static_cast<NGT::GraphIndex&>(index.getIndex());
graph.reserve(graphIndex.repository.size());
for (size_t id = 1; id < graphIndex.repository.size(); id++) {
if (id % 1000000 == 0) {
cerr << "GraphReconstructor::extractGraph: Processed " << id << " objects." << endl;
}
try {
NGT::GraphNode &node = *graphIndex.getNode(id);
#if defined(NGT_SHARED_MEMORY_ALLOCATOR)
NGT::ObjectDistances nd;
nd.reserve(node.size());
for (auto n = node.begin(graphIndex.repository.allocator); n != node.end(graphIndex.repository.allocator); ++n) {
nd.push_back(ObjectDistance((*n).id, (*n).distance));
}
graph.push_back(nd);
#else
graph.push_back(node);
#endif
if (graph.back().size() != graph.back().capacity()) {
cerr << "GraphReconstructor::extractGraph: Warning! The graph size must be the same as the capacity. " << id << endl;
}
} catch(NGT::Exception &err) {
cerr << "GraphReconstructor::extractGraph: Warning! Cannot get the node. ID=" << id << ":" << err.what() << endl;
continue;
}
}

}




Expand Down Expand Up @@ -382,12 +412,14 @@ class GraphReconstructor {
}
originalEdgeTimer.stop();

reverseEdgeTimer.start();
reverseEdgeTimer.start();
int insufficientNodeCount = 0;
for (size_t id = 1; id <= graph.size(); ++id) {
try {
NGT::ObjectDistances &node = graph[id - 1];
size_t rsize = reverseEdgeSize;
if (rsize > node.size()) {
insufficientNodeCount++;
rsize = node.size();
}
for (size_t i = 0; i < rsize; ++i) {
Expand All @@ -408,6 +440,9 @@ class GraphReconstructor {
}
}
reverseEdgeTimer.stop();
if (insufficientNodeCount != 0) {
cerr << "# of the nodes edges of which are in short = " << insufficientNodeCount << endl;
}

normalizeEdgeTimer.start();
for (size_t id = 1; id < outGraph.repository.size(); id++) {
Expand Down
7 changes: 3 additions & 4 deletions lib/NGT/Optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

#pragma once

#include "Command.h"

#define NGT_LOG_BASED_OPTIMIZATION

namespace NGT {
Expand Down Expand Up @@ -525,9 +527,6 @@ namespace NGT {
toOver = fromOver;
toOverEpsilon = fromOverEpsilon;
}
if (fromOverEpsilon == toOverEpsilon) {
cerr << "Warning!! fromOverEpsilon equals toOverEpsilon " << fromOverEpsilon << ". This might cause some problems." << endl;
}
fromUnderEpsilon = fromOverEpsilon - epsilonStep;
}
sp.beginOfEpsilon = sp.endOfEpsilon = fromUnderEpsilon;
Expand Down Expand Up @@ -742,7 +741,7 @@ namespace NGT {
cerr << "adjustRateSearchEdgeSize::explore for the mergin " << mergin << ", " << rateStart << "..." << endl;
for (size_t rateStep = 16; rateStep != 1; rateStep /= 2) {
double prevTime = DBL_MAX;
for (size_t rate = rateStart; rate < 200; rate += rateStep) {
for (size_t rate = rateStart; rate < 2000; rate += rateStep) {
if (rate > 1000) {
stringstream msg;
msg << "rate is too large! " << rate;
Expand Down
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# for pip >= 10.0
from pip._internal import locations

version = '1.4.0'
version = '1.5.0'

if static_library:
with open('../VERSION', 'r') as fh:
Expand Down
Loading

0 comments on commit cccc6b3

Please sign in to comment.