Skip to content

Commit

Permalink
v1.3.3 minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
masajiro committed May 11, 2018
1 parent eee2147 commit 61ace19
Show file tree
Hide file tree
Showing 12 changed files with 327 additions and 66 deletions.
54 changes: 28 additions & 26 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,46 +1,48 @@
cmake_minimum_required(VERSION 2.8)

project( ngt )
project(ngt)

set(ngt_VERSION_MAJOR 1 )
set(ngt_VERSION_MINOR 3 )
set(ngt_VERSION_PATCH 2 )
set(ngt_VERSION_MAJOR 1)
set(ngt_VERSION_MINOR 3)
set(ngt_VERSION_PATCH 3)

set( ngt_VERSION ${ngt_VERSION_MAJOR}.${ngt_VERSION_MINOR}.${ngt_VERSION_PATCH} )
set( ngt_SOVERSION ${ngt_VERSION_MAJOR} )
set(ngt_VERSION ${ngt_VERSION_MAJOR}.${ngt_VERSION_MINOR}.${ngt_VERSION_PATCH})
set(ngt_SOVERSION ${ngt_VERSION_MAJOR})

if (NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE "Release")
endif (NOT CMAKE_BUILD_TYPE)
string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER)
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")

if( ${UNIX} )
if( CMAKE_VERSION VERSION_LESS 3.1 )
link_directories("/usr/lib64")

set(CMAKE_SKIP_BUILD_RPATH TRUE)
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)

set(BUILD_DATE_OPTION "-DBUILD_DATE=\"\\\"`date +'%Y/%m/%d %H:%M:%S'`\\\"\"")
set(GIT_HASH_OPTION "-DGIT_HASH=\"\\\"`git log -1 --format='%H'`\\\"\"")
set(GIT_DATE_OPTION "-DGIT_DATE=\"\\\"`git log -1 --format='%cd'`\\\"\"")
set(GIT_TAG_OPTION "-DGIT_TAG=\"\\\"`git describe --abbrev=0`\\\"\"")

message(STATUS "CMAKE_BUILD_TYPE_LOWER: ${CMAKE_BUILD_TYPE_LOWER}")

if(${UNIX})
set(BUILD_DATE_OPTION "-DBUILD_DATE=\"\\\"`date +'%Y/%m/%d %H:%M:%S'`\\\"\"")
set(GIT_HASH_OPTION "-DGIT_HASH=\"\\\"`git log -1 --format='%H'`\\\"\"")
set(GIT_DATE_OPTION "-DGIT_DATE=\"\\\"`git log -1 --format='%cd'`\\\"\"")
set(GIT_TAG_OPTION "-DGIT_TAG=\"\\\"`git describe --abbrev=0`\\\"\"")
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
if(CMAKE_VERSION VERSION_LESS 3.1)
set(BASE_OPTIONS "-Wall -std=gnu++0x -lrt ${BUILD_DATE_OPTION} ${GIT_HASH_OPTION} ${GIT_DATE_OPTION} ${GIT_TAG_OPTION}")
if( ${NGT_AVX_DISABLED} )
message(STATUS "AVX will not be used to compute distances.")
else()
set(BASE_OPTIONS "${BASE_OPTIONS} -mavx")
endif()
set(CMAKE_CXX_FLAGS_DEBUG "-g ${BASE_OPTIONS}")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 ${BASE_OPTIONS}")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native ${BASE_OPTIONS}")
else()
option(WALL "enable all warnings" ON)
if( ${WALL} )
add_compile_options(-Wall)
endif()

# CMAKE_CXX_STANDARD is supported from CMake 3.1
add_definitions(${BUILD_DATE_OPTION} ${GIT_HASH_OPTION} ${GIT_DATE_OPTION} ${GIT_TAG_OPTION})
if (CMAKE_BUILD_TYPE_LOWER STREQUAL "release")
set(CMAKE_CXX_FLAGS_RELEASE "")
add_compile_options(-Ofast -march=native -DNDEBUG)
endif()
add_compile_options(-Wall -lrt)
if(${NGT_AVX_DISABLED})
message(STATUS "AVX will not be used to compute distances.")
else()
add_compile_options(-mavx)
endif()
set(CMAKE_CXX_STANDARD 11) # for std::unordered_set, std::unique_ptr
set(CMAKE_CXX_STANDARD_REQUIRED ON)
endif()
Expand Down
2 changes: 1 addition & 1 deletion bin/ngt/Command.h
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ class Command {
void
prune(Args &args)
{
const string usage = "Usage: ngt prune -e #-of-forcedly-pruned-edges -s #-of-selecively-pruned-edge";
const string usage = "Usage: ngt prune -e #-of-forcedly-pruned-edges -s #-of-selecively-pruned-edge index(in/out)";
string indexName;
try {
indexName = args.get("#1");
Expand Down
2 changes: 2 additions & 0 deletions lib/NGT/Graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds,

std::sort(tmp.begin(), tmp.end());

#if 0
if (tmp.size() > (size_t)property.seedSize) {
tmp.resize(property.seedSize);
}
#endif

#ifdef NGT_GRAPH_UNCHECK_STACK
for (ObjectDistances::reverse_iterator ri = tmp.rbegin(); ri != tmp.rend(); ri++) {
Expand Down
3 changes: 2 additions & 1 deletion lib/NGT/Graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
#endif

#ifndef NGT_SEED_SIZE
#define NGT_SEED_SIZE 10
//#define NGT_SEED_SIZE 10
#define NGT_SEED_SIZE 50
#endif

#ifndef NGT_CREATION_EDGE_SIZE
Expand Down
17 changes: 9 additions & 8 deletions lib/NGT/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,14 @@ namespace NGT {
virtual void load(const string &ifile, size_t dataSize) { getIndex().load(ifile, dataSize); }
virtual void append(const string &ifile, size_t dataSize) { getIndex().append(ifile, dataSize); }
virtual void append(const float *data, size_t dataSize) { getIndex().append(data, dataSize); }
virtual void append(const double *data, size_t dataSize) { getIndex().append(data, dataSize); }
virtual size_t getObjectRepositorySize() { return getIndex().getObjectRepositorySize(); }
virtual void createIndex(size_t threadNumber) { getIndex().createIndex(threadNumber); }
virtual void saveIndex(const string &ofile) { getIndex().saveIndex(ofile); }
virtual void loadIndex(const string &ofile) { getIndex().loadIndex(ofile); }
virtual Object *allocateObject(const string &textLine, const string &sep) { return getIndex().allocateObject(textLine, sep); }
virtual Object *allocateObject(vector<double> &obj) { return getIndex().allocateObject(obj); }
virtual Object *allocateObject(vector<float> &obj) { return getIndex().allocateObject(obj); }
virtual size_t getSizeOfElement() { return getIndex().getSizeOfElement(); }
virtual void setProperty(NGT::Property &prop) { getIndex().setProperty(prop); }
virtual void getProperty(NGT::Property &prop) { getIndex().getProperty(prop); }
Expand Down Expand Up @@ -384,9 +386,8 @@ namespace NGT {
objectSpace->appendText(is, dataSize);
}

virtual void append(const float *data, size_t dataSize) {
objectSpace->append(data, dataSize);
}
virtual void append(const float *data, size_t dataSize) { objectSpace->append(data, dataSize); }
virtual void append(const double *data, size_t dataSize) { objectSpace->append(data, dataSize); }

virtual void saveIndex(const string &ofile) {
#ifndef NGT_SHARED_MEMORY_ALLOCATOR
Expand Down Expand Up @@ -982,9 +983,8 @@ namespace NGT {
return objectSpace->allocateObject(textLine, sep);
}

Object *allocateObject(vector<double> &obj) {
return objectSpace->allocateObject(obj);
}
Object *allocateObject(vector<double> &obj) { return objectSpace->allocateObject(obj); }
Object *allocateObject(vector<float> &obj) { return objectSpace->allocateObject(obj); }

void deleteObject(Object *po) {
return objectSpace->deleteObject(po);
Expand Down Expand Up @@ -1216,6 +1216,7 @@ namespace NGT {
}
// if seedSize is zero, the result size of the query is used as seedSize.
size_t seedSize = NeighborhoodGraph::property.seedSize == 0 ? sc.size : NeighborhoodGraph::property.seedSize;
seedSize = seedSize > sc.size ? sc.size : seedSize;
if (seeds.size() > seedSize) {
srand(tso.nodeID.getID());
// to accelerate thinning data.
Expand Down Expand Up @@ -1402,7 +1403,7 @@ NGT::Index::append(const string &database, const string &dataFile, size_t thread
if (dataFile.size() != 0) {
index.append(dataFile, dataSize);
} else {
NGTThrowException("Index::create: No data file.");
NGTThrowException("Index::append: No data file.");
}
timer.stop();
cerr << "Data loading time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl;
Expand All @@ -1424,7 +1425,7 @@ NGT::Index::append(const string &database, const float *data, size_t dataSize, s
if (data != 0 && dataSize != 0) {
index.append(data, dataSize);
} else {
NGTThrowException("Index::create: No data.");
NGTThrowException("Index::append: No data.");
}
timer.stop();
cerr << "Data loading time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl;
Expand Down
6 changes: 3 additions & 3 deletions lib/NGT/MmapManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,13 @@ namespace MemoryManager{
return -1;
}

if(size > _impl->mmapCntlHead->base_size + sizeof(chunk_head_st)){
size_t alloc_size = getAlignSize(size);

if( (alloc_size + sizeof(chunk_head_st)) >= _impl->mmapCntlHead->base_size ){
std::cerr << "alloc size over. size=" << size << "." << std::endl;
return -1;
}

size_t alloc_size = getAlignSize(size);

if(!not_reuse_flag){
if( _impl->mmapCntlHead->reuse_type == REUSE_DATA_CLASSIFY
|| _impl->mmapCntlHead->reuse_type == REUSE_DATA_QUEUE
Expand Down
98 changes: 81 additions & 17 deletions lib/NGT/ObjectSpace.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@


#if !defined(NGT_AVX_DISABLED) && defined(__AVX__)
#warning "***** AVX is available! ************************************************"
#include <immintrin.h>
#else
#warning "***** AVX is *NOT* available! ************************************************"
#endif

#include "Common.h"
Expand Down Expand Up @@ -243,6 +246,7 @@ namespace NGT {
virtual void readText(istream &is, size_t dataSize) = 0;
virtual void appendText(ifstream &is, size_t dataSize) = 0;
virtual void append(const float *data, size_t dataSize) = 0;
virtual void append(const double *data, size_t dataSize) = 0;
virtual void copy(Object &objecta, Object &objectb) = 0;

virtual void linearSearch(Object &query, double radius, size_t size,
Expand All @@ -255,6 +259,7 @@ namespace NGT {
virtual size_t getByteSizeOfObject() = 0;
virtual Object *allocateObject(const string &textLine, const string &sep) = 0;
virtual Object *allocateObject(vector<double> &obj) = 0;
virtual Object *allocateObject(vector<float> &obj) = 0;
virtual void deleteObject(Object *po) = 0;
virtual Object *allocateObject() = 0;
virtual void remove(size_t id) = 0;
Expand Down Expand Up @@ -851,7 +856,7 @@ namespace NGT {
#else
ComparatorCosineSimilarity(size_t d) : Comparator(d) {}
double operator()(Object &objecta, Object &objectb) {
return ObjectSpaceT::compareAngleDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension);
return ObjectSpaceT::compareCosineSimilarity((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension);
}
#endif
};
Expand Down Expand Up @@ -1127,7 +1132,8 @@ namespace NGT {
return (double)count;
}

inline static double compareAngleDistance(OBJECT_TYPE *a, OBJECT_TYPE *b, size_t size) {
#if defined(NGT_AVX_DISABLED) || !defined(__AVX__)
inline static double compareCosine(OBJECT_TYPE *a, OBJECT_TYPE *b, size_t size) {
// Calculate the norm of A and B (the supplied vector).
double normA = 0.0F;
double normB = 0.0F;
Expand All @@ -1137,26 +1143,62 @@ namespace NGT {
normB += (double)b[loc] * (double)b[loc];
sum += (double)a[loc] * (double)b[loc];
}

assert(normA > 0.0F);
assert(normB > 0.0F);

// Compute the dot product of the two vectors.
double cosine = sum / (sqrt(normA) * sqrt(normB));
// Compute the vector angle from the cosine value, and return.
// Roundoff error could have put the cosine value out of range.
// Handle these cases explicitly.
if (cosine >= 1.0F) {
return 0.0F;
} else if (cosine <= -1.0F) {
return acos (-1.0F);
} else {
return acos (cosine);
double cosine = sum / sqrt(normA * normB);

return cosine;
}
#else
inline static double compareCosine(float *a, float *b, size_t size) {
// Calculate the norm of A and B (the supplied vector).

__m256 normA = _mm256_setzero_ps();
__m256 normB = _mm256_setzero_ps();
__m256 sum = _mm256_setzero_ps();
float *last = a + size;
float *lastgroup = last - 7;
while (a < lastgroup) {
__m256 am = _mm256_loadu_ps(a);
__m256 bm = _mm256_loadu_ps(b);
normA = _mm256_add_ps(normA, _mm256_mul_ps(am, am));
normB = _mm256_add_ps(normB, _mm256_mul_ps(bm, bm));
sum = _mm256_add_ps(sum, _mm256_mul_ps(am, bm));
a += 8;
b += 8;
}

__attribute__((aligned(32))) float f[8];

_mm256_store_ps(f, normA);
double na = f[0] + f[1] + f[2] + f[3] + f[4] + f[5] + f[6] + f[7];
_mm256_store_ps(f, normB);
double nb = f[0] + f[1] + f[2] + f[3] + f[4] + f[5] + f[6] + f[7];
_mm256_store_ps(f, sum);
double s = f[0] + f[1] + f[2] + f[3] + f[4] + f[5] + f[6] + f[7];

while (a < last) {
double av = *a;
double bv = *b;
na += av * av;
nb += bv * bv;
s += av * bv;
a++;
b++;
}


assert(na > 0.0F);
assert(nb > 0.0F);

double cosine = s / sqrt(na * nb);

return cosine;
}

inline static double compareCosineSimilarity(OBJECT_TYPE *a, OBJECT_TYPE *b, size_t size) {
inline static double compareCosine(unsigned char *a, unsigned char *b, size_t size) {
// Calculate the norm of A and B (the supplied vector).
double normA = 0.0F;
double normB = 0.0F;
Expand All @@ -1166,14 +1208,32 @@ namespace NGT {
normB += (double)b[loc] * (double)b[loc];
sum += (double)a[loc] * (double)b[loc];
}

assert(normA > 0.0F);
assert(normB > 0.0F);

// Compute the dot product of the two vectors.
double cosine = sum / (sqrt(normA) * sqrt(normB));
double cosine = sum / sqrt(normA * normB);

return cosine;
}
#endif // #if defined(NGT_AVX_DISABLED) || !defined(__AVX__)

inline static double compareAngleDistance(OBJECT_TYPE *a, OBJECT_TYPE *b, size_t size) {
double cosine = compareAngleDistance(a, b, size);
// Compute the vector angle from the cosine value, and return.
// Roundoff error could have put the cosine value out of range.
// Handle these cases explicitly.
if (cosine >= 1.0F) {
return 0.0F;
} else if (cosine <= -1.0F) {
return acos(-1.0F);
} else {
return acos(cosine);
}
}

return 1.0 - cosine;
inline static double compareCosineSimilarity(OBJECT_TYPE *a, OBJECT_TYPE *b, size_t size) {
return 1.0 - compareCosine(a, b, size);
}

void serialize(const string &ofile) { ObjectRepository::serialize(ofile, this); }
Expand All @@ -1183,6 +1243,7 @@ namespace NGT {
void readText(istream &is, size_t dataSize) { ObjectRepository::readText(is, dataSize); }
void appendText(ifstream &is, size_t dataSize) { ObjectRepository::appendText(is, dataSize); }
void append(const float *data, size_t dataSize) { ObjectRepository::append(data, dataSize); }
void append(const double *data, size_t dataSize) { ObjectRepository::append(data, dataSize); }



Expand Down Expand Up @@ -1245,6 +1306,9 @@ namespace NGT {
Object *allocateObject(vector<double> &obj) {
return ObjectRepository::allocateObject(obj);
}
Object *allocateObject(vector<float> &obj) {
return ObjectRepository::allocateObject(obj);
}

size_t getSize() { return ObjectRepository::size(); }
size_t getSizeOfElement() { return sizeof(OBJECT_TYPE); }
Expand Down
4 changes: 4 additions & 0 deletions lib/NGT/defines.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@


#define NGT_COMPACT_VECTOR

#ifndef NGT_GRAPH_CHECK_VECTOR
#define NGT_GRAPH_CHECK_BOOLEANSET // use original booleanset to check whether nodes were accessed.
#endif

#if defined(NGT_GRAPH_CHECK_BOOLEANSET) || defined(NGT_GRAPH_CHECK_BITSET)
#define NGT_GRAPH_CHECK_VECTOR // use vector to check whether nodes were accessed.
#endif
Expand Down
2 changes: 1 addition & 1 deletion python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ You **MUST** install the NGT library according to the [README](../README.md#buil
```
cd NGT_ROOT/python
python setup.py sdist
pip install dist/ngt-1.0.0.tar.gz
pip install dist/ngt-1.1.0.tar.gz
```

## Usage
Expand Down
2 changes: 1 addition & 1 deletion python/ngt/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class Index(object):
objects.append(vector)
query = objects[0]
index = ngt.Index.create("tmp", dim)
index = ngt.Index.create(b"tmp", dim)
index.insert(objects)
# You can also insert objects from a file like this.
# index.insert_from_tsv('list.dat')
Expand Down
Loading

0 comments on commit 61ace19

Please sign in to comment.