Skip to content

Commit

Permalink
v1.7.10 correct avx definitions, add a range search parameter and sup…
Browse files Browse the repository at this point in the history
…press messages for ngtpy
  • Loading branch information
masajiro committed Sep 25, 2019
1 parent d62ad3e commit d219f3d
Show file tree
Hide file tree
Showing 15 changed files with 467 additions and 178 deletions.
6 changes: 2 additions & 4 deletions README-jp.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,8 @@ Neighborhood Graph and Tree for Indexing High-dimensional Data
$ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
$ brew install cmake
$ brew install gcc@9
$ ln -s ./gcc-9 /usr/local/bin/gcc
$ ln -s ./g++-9 /usr/local/bin/g++
$ export CXX=g++
$ export CC=gcc
$ export CXX=/usr/local/bin/g++
$ export CC=/usr/local/bin/gcc
$ unzip NGT-x.x.x.zip
$ cd NGT-x.x.x
$ mkdir build
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Neighborhood Graph and Tree for Indexing High-dimensional Data
**NGT** provides commands and a library for performing high-speed approximate nearest neighbor searches against a large volume of data (several million to several 10 million items of data) in high dimensional vector data space (several ten to several thousand dimensions).

News
----

- 06/26/2019 Jaccard distance is available. (v1.7.6)
- 06/10/2019 PyPI NGT package v1.7.5 is now available.
- 01/17/2019 Python NGT can be installed via pip from PyPI. (v1.5.1)
Expand Down Expand Up @@ -40,10 +42,8 @@ Installation
$ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
$ brew install cmake
$ brew install gcc@9
$ ln -s ./gcc-9 /usr/local/bin/gcc
$ ln -s ./g++-9 /usr/local/bin/g++
$ export CXX=g++
$ export CC=gcc
$ export CXX=/usr/local/bin/g++
$ export CC=/usr/local/bin/gcc
$ unzip NGT-x.x.x.zip
$ cd NGT-x.x.x
$ mkdir build
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.7.9
1.7.10
2 changes: 1 addition & 1 deletion bin/ngt/README-jp.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ ANNGやBKNNGを指定した場合には登録データ(ノード)からエ
グラフ生成時の各ノードの初期エッジ数を指定します。インデックス生成終了時にANNGやBKNNGでは指定されたエッジ数以上のエッジが付与されますが、KNNGでは指定されたエッジ数となります。

**-S** *no\_of\_edges\_at\_search\_time* (デフォルト=40)
インデックス生成に伴う検索時及び生成後の検索時に利用するエッジ数を指定します。seachコマンドによる検索時においてエッジ数を指定しない場合にこの値が利用されます。グラフ上の各ノードの実エッジ数よりも少ないエッジ数で検索する場合に指定します。ANNGやBKNNGでは大量のエッジが生成される場合があり、エッジ数を制限することで検索性能が向上する傾向があります。エッジ数を制限しない(実エッジをすべて利用する)場合には0を指定します。0を指定した場合にはインデックスの生成が比較的遅くなりますが、検索時には最も高い性能を得られます。
インデックス生成に伴う検索時及び生成後の検索時に利用するエッジ数を指定します。seachコマンドによる検索時においてエッジ数を指定しない場合にこの値が利用されます。グラフ上の各ノードの実エッジ数よりも少ないエッジ数で検索する場合に指定します。ANNGやBKNNGでは大量のエッジが生成される場合があり、エッジ数を制限することで検索性能が向上する傾向があります。エッジ数を制限しない(実エッジをすべて利用する)場合には0を指定します。

**-o** *object\_type*
データオブジェクトの型を指定します。
Expand Down
108 changes: 54 additions & 54 deletions bin/ngt/README.md

Large diffs are not rendered by default.

18 changes: 17 additions & 1 deletion lib/NGT/Clustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,22 @@

#include "NGT/Index.h"

#if defined(NGT_AVX_DISABLED)
#define NGT_CLUSTER_NO_AVX
#else
#if defined(__AVX2__)
#define NGT_CLUSTER_AVX2
#else
#define NGT_CLUSTER_NO_AVX
#endif
#endif

#if defined(NGT_CLUSTER_NO_AVX)
#warning "*** SIMD is *NOT* available! ***"
#else
#include <immintrin.h>
#endif

#include <omp.h>
#include <random>

Expand Down Expand Up @@ -161,7 +177,7 @@ namespace NGT {
exit(1);
}
}
#if !defined(NGT_AVX_DISABLED) && defined(__AVX__)
#if !defined(NGT_CLUSTER_NO_AVX)
static double
sumOfSquares(float *a, float *b, size_t size) {
__m256 sum = _mm256_setzero_ps();
Expand Down
49 changes: 49 additions & 0 deletions lib/NGT/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <typeinfo>

#include <sys/time.h>
#include <fcntl.h>

#include "NGT/defines.h"
#include "NGT/SharedMemoryAllocator.h"
Expand Down Expand Up @@ -223,6 +224,54 @@ namespace NGT {
static int getProcessVmRSS() { return strtol(getProcessStatus("VmRSS")); }
};

class StdOstreamRedirector {
public:
StdOstreamRedirector(bool e = false, const std::string path = "/dev/null", mode_t m = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, int f = 2) {
logFilePath = path;
mode = m;
logFD = -1;
fdNo = f;
enabled = e;
}
~StdOstreamRedirector() { end(); }

void enable() { enabled = true; }
void disable() { enabled = false; }
void begin() {
if (!enabled) {
return;
}
if (logFilePath != "/dev/null") {
logFD = open("log", O_WRONLY|O_APPEND, mode);
} else {
logFD = open("log", O_CREAT|O_WRONLY|O_APPEND, mode);
}
if (logFD < 0) {
std::cerr << "Logger: Cannot begin logging." << std::endl;
logFD = -1;
return;
}
savedFdNo = dup(fdNo);
std::cerr << std::flush;
dup2(logFD, fdNo);
}

void end() {
if (logFD < 0) {
return;
}
std::cerr << std::flush;
dup2(savedFdNo, fdNo);
savedFdNo = -1;
}

std::string logFilePath;
mode_t mode;
int logFD;
int savedFdNo;
int fdNo;
bool enabled;
};

template <class TYPE>
class CompactVector {
Expand Down
3 changes: 3 additions & 0 deletions lib/NGT/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,9 @@ GraphAndTreeIndex::verify(vector<uint8_t> &status, bool info) {
}
// status: tree|graph|object
for (size_t id = 1; id < status.size(); id++) {
if (id % 100000 == 0) {
cerr << "The number of processed objects=" << id << endl;
}
if (status[id] != 0x00 && status[id] != 0x07) {
if (status[id] == 0x03) {
#ifdef NGT_SHARED_MEMORY_ALLOCATOR
Expand Down
44 changes: 37 additions & 7 deletions lib/NGT/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,9 @@ namespace NGT {
NGTThrowException(msg);
}
}
static void createGraphAndTree(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize = 0);
static void createGraphAndTree(const string &database, NGT::Property &prop) { createGraphAndTree(database, prop, ""); }
static void createGraph(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize = 0);
static void createGraphAndTree(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize = 0, bool redirect = false);
static void createGraphAndTree(const string &database, NGT::Property &prop) { createGraphAndTree(database, prop, "", false); }
static void createGraph(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize = 0, bool redirect = false);
template<typename T> size_t insert(vector<T> &object);
template<typename T> size_t append(vector<T> &object);
static void append(const string &database, const string &dataFile, size_t threadSize, size_t dataSize);
Expand All @@ -305,9 +305,27 @@ namespace NGT {
virtual void load(const string &ifile, size_t dataSize) { getIndex().load(ifile, dataSize); }
virtual void append(const string &ifile, size_t dataSize) { getIndex().append(ifile, dataSize); }
virtual void append(const float *data, size_t dataSize) { getIndex().append(data, dataSize); }
virtual void append(const double *data, size_t dataSize) { getIndex().append(data, dataSize); }
virtual void append(const double *data, size_t dataSize) {
redirector.begin();
try {
getIndex().append(data, dataSize);
} catch(Exception &err) {
redirector.end();
throw err;
}
redirector.end();
}
virtual size_t getObjectRepositorySize() { return getIndex().getObjectRepositorySize(); }
virtual void createIndex(size_t threadNumber) { getIndex().createIndex(threadNumber); }
virtual void createIndex(size_t threadNumber) {
redirector.begin();
try {
getIndex().createIndex(threadNumber);
} catch(Exception &err) {
redirector.end();
throw err;
}
redirector.end();
}
virtual void saveIndex(const string &ofile) { getIndex().saveIndex(ofile); }
virtual void loadIndex(const string &ofile) { getIndex().loadIndex(ofile); }
virtual Object *allocateObject(const string &textLine, const string &sep) { return getIndex().allocateObject(textLine, sep); }
Expand Down Expand Up @@ -349,6 +367,8 @@ namespace NGT {
}
return *index;
}
void enableLog() { redirector.disable(); }
void disableLog() { redirector.enable(); }

static void destroy(const string &path) {
#ifdef NGT_SHARED_MEMORY_ALLOCATOR
Expand Down Expand Up @@ -378,6 +398,7 @@ namespace NGT {

Index *index;
string path;
StdOstreamRedirector redirector;
};

class GraphIndex : public Index,
Expand Down Expand Up @@ -1682,7 +1703,7 @@ NGT::Index::open(const string &database, bool rdOnly) {

inline void
NGT::Index::createGraphAndTree(const string &database, NGT::Property &prop, const string &dataFile,
size_t dataSize) {
size_t dataSize, bool redirect) {
if (prop.dimension == 0) {
NGTThrowException("Index::createGraphAndTree. Dimension is not specified.");
}
Expand All @@ -1695,14 +1716,19 @@ inline void
idx = new NGT::GraphAndTreeIndex(prop);
#endif
assert(idx != 0);
StdOstreamRedirector redirector(redirect);
redirector.begin();
try {
loadAndCreateIndex(*idx, database, dataFile, prop.threadPoolSize, dataSize);
} catch(Exception &err) {
delete idx;
redirector.end();
throw err;
}
delete idx;
redirector.end();
}

template<typename T>
size_t NGT::Index::append(vector<T> &object)
{
Expand All @@ -1728,7 +1754,7 @@ size_t NGT::Index::insert(vector<T> &object)
}

inline void
NGT::Index::createGraph(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize) {
NGT::Index::createGraph(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize, bool redirect) {
if (prop.dimension == 0) {
NGTThrowException("Index::createGraphAndTree. Dimension is not specified.");
}
Expand All @@ -1741,13 +1767,17 @@ inline void
idx = new NGT::GraphIndex(prop);
#endif
assert(idx != 0);
StdOstreamRedirector redirector(redirect);
redirector.begin();
try {
loadAndCreateIndex(*idx, database, dataFile, prop.threadPoolSize, dataSize);
} catch(Exception &err) {
delete idx;
redirector.end();
throw err;
}
delete idx;
redirector.end();
}

inline void
Expand Down
2 changes: 1 addition & 1 deletion lib/NGT/ObjectSpaceRepository.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ namespace NGT {
ObjectRepository &rep = *this;
for (size_t idx = 0; idx < rep.size(); idx++) {
#ifndef NGT_PREFETCH_DISABLED
if (idx + prefetchOffset < rep.size()) {
if (idx + prefetchOffset < rep.size() && rep[idx + prefetchOffset] != 0) {
MemoryCache::prefetch((unsigned char*)&(*static_cast<PersistentObject*>(rep[idx + prefetchOffset]))[0], byteSizeOfObject);
}
#endif
Expand Down
12 changes: 10 additions & 2 deletions lib/NGT/Optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ namespace NGT {
double meanVisitCount;
};

void enableLog() { redirector.disable(); }
void disableLog() { redirector.enable(); }

static void search(NGT::Index &index, istream &gtStream, Command::SearchParameter &sp, vector<MeasuredValue> &acc) {
ifstream is(sp.query);
if (!is) {
Expand Down Expand Up @@ -817,22 +820,26 @@ namespace NGT {
NGT::GraphIndex &graphIndex = static_cast<GraphIndex&>(index.getIndex());
NeighborhoodGraph::Property &prop = graphIndex.getGraphProperty();
searchParameter.size = nOfResults;
redirector.begin();
try {
cerr << "adjustSearchEdgeSize::Extract queries for GT..." << endl;
extractQueries(querySize, queries);
cerr << "adjustSearchEdgeSize::create GT..." << endl;
createGroundTruth(index, epsilon, searchParameter, queries, gtStream);
} catch (NGT::Exception &err) {
cerr << "adjustSearchEdgeSize::Error!! Cannot adjust. " << err.what() << endl;
redirector.end();
return pair<size_t, size_t>(0, 0);
}
redirector.end();

auto prevBase = pair<size_t, double>(0, 0);
auto prevRate = pair<size_t, double>(0, 0);
auto base = pair<size_t, double>(0, 0);
auto rate = pair<size_t, double>(20, 0);

map<pair<size_t, size_t>, double> history;
redirector.begin();
for(;;) {
try {
prop.dynamicEdgeSizeRate = rate.first;
Expand Down Expand Up @@ -868,11 +875,12 @@ namespace NGT {
history.insert(std::make_pair(std::make_pair(base.first, rate.first), rate.second));
} catch (NGT::Exception &err) {
cerr << "adjustRateSearchEdgeSize::Error!! Cannot adjust. " << err.what() << endl;
redirector.end();
return pair<size_t, size_t>(0, 0);
}
}
redirector.end();
return std::make_pair(base.first, rate.first);

}

static void adjustSearchEdgeSize(Args &args)
Expand Down Expand Up @@ -1249,7 +1257,7 @@ namespace NGT {

NGT::Index &index;
size_t nOfResults;

StdOstreamRedirector redirector;
};

}; // NGT
Expand Down
Loading

0 comments on commit d219f3d

Please sign in to comment.