Skip to content

Commit

Permalink
fix the issue #141
Browse files Browse the repository at this point in the history
  • Loading branch information
masajiro committed Jul 25, 2023
1 parent 5a5a32b commit 491e88f
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 42 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.1.0
2.1.1
4 changes: 2 additions & 2 deletions lib/NGT/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,8 @@ namespace NGT {
}
return *index;
}
void enableLog() { redirect = true; }
void disableLog() { redirect = false; }
void enableLog() { redirect = false; }
void disableLog() { redirect = true; }

static void destroy(const std::string &path) {
#ifdef NGT_SHARED_MEMORY_ALLOCATOR
Expand Down
3 changes: 1 addition & 2 deletions lib/NGT/NGTQ/Capi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ void ngtqg_initialize_quantization_parameters(NGTQGQuantizationParameters *param

bool ngtqg_quantize(const char *indexPath, NGTQGQuantizationParameters parameters, NGTError error) {
try{
NGTQG::Index::quantize(indexPath, parameters.dimension_of_subvector, parameters.max_number_of_edges, true);
NGTQG::Index::quantize(indexPath, parameters.dimension_of_subvector, parameters.max_number_of_edges, false);
return true;
}catch(std::exception &err){
std::stringstream ss;
Expand Down Expand Up @@ -165,7 +165,6 @@ bool qbg_create(const char *indexPath, QBGConstructionParameters *parameters, NG
{

try {
cerr << "qbgcapi: Create" << endl;
std::vector<float> r;
NGTQ::Property property;
NGT::Property globalProperty;
Expand Down
30 changes: 18 additions & 12 deletions lib/NGT/NGTQ/HierarchicalKmeans.h
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,8 @@ namespace QBG {

lowerClusters.resize(upperClusters.size());
std::vector<size_t> counters(nthreads, 0);
size_t progressStep = upperClusters.size() / 20;;
progressStep = progressStep < 20 ? 20 : progressStep;
#pragma omp parallel for schedule(dynamic)
for (size_t idx = 0; idx < upperClusters.size(); idx++) {
std::vector<std::vector<float>> partialVectors;
Expand Down Expand Up @@ -798,12 +800,13 @@ namespace QBG {
for (auto c : counters) {
cnt += c;
}
if (cnt % ((upperClusters.size() < 20 ? 20 : upperClusters.size()) / 20) == 0) {
timer.stop();
std::cerr << "subclustering: " << cnt << " clusters ("
<< (cnt * 100 / upperClusters.size()) << "%) have been processed. time=" << timer << std::endl;
timer.restart();
}
if (cnt % progressStep == 0) {
timer.stop();
float progress = (cnt * 100 / upperClusters.size());
std::cerr << "subclustering: " << cnt << " clusters ("
<< progress << "%) have been processed. time=" << timer << std::endl;
timer.restart();
}
}
}
size_t nc = 0;
Expand Down Expand Up @@ -1156,6 +1159,8 @@ namespace QBG {
timer.stop();
std::cerr << "assignWithNGT: exploring epsilon. time=" << timer << " epsilon=" << epsilon << std::endl;
timer.start();
size_t progressStep = (endID - beginID) / 20;;
progressStep = progressStep < 20 ? 20 : progressStep;
#pragma omp parallel for
for (size_t id = beginID; id < endID; id++) {
std::vector<float> obj;
Expand All @@ -1179,12 +1184,13 @@ namespace QBG {
for (auto d : distances) {
cnt += d.first;
}
if (cnt % ((endID - beginID) / 100) == 0) {
timer.stop();
std::cerr << "assignWithNGT: " << cnt << " objects ("
<< (cnt * 100 / (endID - beginID)) << "%) have been assigned. time=" << timer << std::endl;
timer.restart();
}
if (cnt % progressStep == 0) {
timer.stop();
float progress = cnt * 100 / (endID - beginID);
std::cerr << "assignWithNGT: " << cnt << " objects ("
<< progress << "%) have been assigned. time=" << timer << std::endl;
timer.restart();
}
}
}
std::cerr << "pushing..." << std::endl;
Expand Down
21 changes: 11 additions & 10 deletions lib/NGT/NGTQ/Optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,17 +299,18 @@ void QBG::Optimizer::optimize(const std::string indexPath, size_t threadSize) {
std::vector<std::vector<float>> global(1);
global[0].resize(index.getQuantizer().property.dimension, 0.0);
NGT::Clustering::saveVectors(QBG::Index::getQuantizerCodebookFile(indexPath), global);

ifstream ifs(QBG::Index::getCodebookIndexFile(indexPath));
if (!ifs) {
std::stringstream msg;
msg << "Cannot open the file. " << QBG::Index::getCodebookIndexFile(indexPath);
NGTThrowException(msg);
}
size_t id;
size_t count = 0;
while (ifs >> id) {
count++;
{
ifstream ifs(QBG::Index::getCodebookIndexFile(indexPath));
if (!ifs) {
count = 1;
std::cerr << "the codebook index file is missing. this index must be QG." << std::endl;
} else {
size_t id;
while (ifs >> id) {
count++;
}
}
}
ofstream ofs(QBG::Index::getCodebookIndexFile(indexPath));
if (!ofs) {
Expand Down
8 changes: 4 additions & 4 deletions lib/NGT/NGTQ/QbgCli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,12 @@ QBG::CLI::buildQG(NGT::Args &args)
if (phase == 0 || phase == 2) {
std::cerr << "building the inverted index..." << std::endl;
bool verbose = false;
QBG::Index::buildNGTQ(qgPath, !verbose);
QBG::Index::buildNGTQ(qgPath, verbose);
}
if (phase == 0 || phase == 3) {
std::cerr << "building the quantized graph... " << std::endl;
bool verbose = false;
NGTQG::Index::realign(indexPath, maxNumOfEdges, !verbose);
NGTQG::Index::realign(indexPath, maxNumOfEdges, verbose);
}
}

Expand Down Expand Up @@ -1002,9 +1002,9 @@ QBG::CLI::append(NGT::Args &args)
NGT::Timer timer;
timer.start();
if (mode.find_first_of('b') != std::string::npos) {
QBG::Index::appendBinary(indexPath, data, dataSize, !verbose);
QBG::Index::appendBinary(indexPath, data, dataSize, verbose);
} else {
QBG::Index::append(indexPath, data, dataSize, !verbose);
QBG::Index::append(indexPath, data, dataSize, verbose);
}
timer.stop();
std::cerr << "qbg: appending time=" << timer << std::endl;
Expand Down
7 changes: 5 additions & 2 deletions lib/NGT/NGTQ/QuantizedBlobGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1270,7 +1270,7 @@ namespace QBG {
}

static void buildNGTQ(const std::string &indexPath, bool verbose = false) {
load(indexPath, QBG::Index::getQuantizerCodebookFile(indexPath), "", "");
load(indexPath, QBG::Index::getQuantizerCodebookFile(indexPath), "", "", "", verbose);
buildNGTQ(indexPath, "", "-", "-", 1, 0, verbose);
if (verbose) {
std::cerr << "NGTQ and NGTQBG indices are completed." << std::endl;
Expand Down Expand Up @@ -1593,8 +1593,10 @@ namespace QBG {


static void
load(std::string indexPath, std::string blobs = "", std::string localCodebooks = "", std::string quantizerCodebook = "", std::string rotationPath = "", int threadSize = 0)
load(std::string indexPath, std::string blobs = "", std::string localCodebooks = "", std::string quantizerCodebook = "", std::string rotationPath = "", bool verbose = false, int threadSize = 0)
{
NGT::StdOstreamRedirector redirector(!verbose);
redirector.begin();
if (blobs.empty()) {
blobs = QBG::Index::getBlobFile(indexPath);
}
Expand Down Expand Up @@ -1709,6 +1711,7 @@ namespace QBG {
QBG::Index::load(indexPath, qCodebook, rotation);
}
#endif
redirector.end();
}

static const std::string getSubvectorPrefix() { return "sv"; }
Expand Down
2 changes: 1 addition & 1 deletion lib/NGT/NGTQ/QuantizedGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void NGTQG::Index::quantize(const std::string indexPath, size_t dimensionOfSubve
#endif

if (optimizer.globalType == QBG::Optimizer::GlobalTypeNone) {
std::cerr << "build-qg: Warning! None is unavailable for the global type. Zero is set to the global type." << std::endl;
if (verbose) std::cerr << "build-qg: Warning! None is unavailable for the global type. Zero is set to the global type." << std::endl;
optimizer.globalType = QBG::Optimizer::GlobalTypeZero;
}

Expand Down
10 changes: 6 additions & 4 deletions lib/NGT/NGTQ/QuantizedGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,9 @@ namespace NGTQG {
PARENT::resize(graphRepository.size());

for (size_t id = 1; id < graphRepository.size(); id++) {
if (id % 100000 == 0) {
std::cerr << "# of processed objects=" << id << "/" << graphRepository.size() << std::endl;
if (id % ((graphRepository.size() - 1) / 100) == 0) {
std::cerr << "# of processed objects=" << id << "/" << (graphRepository.size() - 1)
<< "(" << id * 100 / (graphRepository.size() - 1) << "%)" << std::endl;
}
NGT::GraphNode &node = *graphRepository.VECTOR::get(id);
size_t numOfEdges = node.size() < maxNoOfEdges ? node.size() : maxNoOfEdges;
Expand Down Expand Up @@ -118,8 +119,9 @@ namespace NGTQG {
abort();
#else
if (invertedIndexObjects[(*i).id].localID[idx] < 1 || invertedIndexObjects[(*i).id].localID[idx] > 16) {
std::cerr << "Fatal inner error! Invalid local centroid ID. ID=" << (*i).id << ":" << invertedIndexObjects[(*i).id].localID[idx] << std::endl;
abort();
std::stringstream msg;
msg << "Fatal inner error! Invalid local centroid ID. ID=" << (*i).id << ":" << invertedIndexObjects[(*i).id].localID[idx];
NGTThrowException(msg);
}
quantizedStream.arrangeQuantizedObject(dataNo, idx, invertedIndexObjects[(*i).id].localID[idx] - 1);
#endif
Expand Down
16 changes: 12 additions & 4 deletions lib/NGT/ObjectSpace.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,12 +302,16 @@ namespace NGT {
public:
virtual uint8_t &operator[](size_t idx) const = 0;
void serialize(std::ostream &os, ObjectSpace *objectspace = 0) {
assert(objectspace != 0);
if (objectspace == 0) {
NGTThrowException("Object: objectspace is null");
}
size_t byteSize = objectspace->getByteSizeOfObject();
NGT::Serializer::write(os, (uint8_t*)&(*this)[0], byteSize);
}
void deserialize(std::istream &is, ObjectSpace *objectspace = 0) {
assert(objectspace != 0);
if (objectspace == 0) {
NGTThrowException("Object: objectspace is null");
}
size_t byteSize = objectspace->getByteSizeOfObject();
assert(&(*this)[0] != 0);
NGT::Serializer::read(is, (uint8_t*)&(*this)[0], byteSize);
Expand All @@ -318,7 +322,9 @@ namespace NGT {
}
}
void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) {
assert(objectspace != 0);
if (objectspace == 0) {
NGTThrowException("Object: objectspace is null");
}
const std::type_info &t = objectspace->getObjectType();
size_t dimension = objectspace->getDimension();
void *ref = (void*)&(*this)[0];
Expand All @@ -342,7 +348,9 @@ namespace NGT {
}
}
void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0) {
assert(objectspace != 0);
if (objectspace == 0) {
NGTThrowException("Object: objectspace is null");
}
const std::type_info &t = objectspace->getObjectType();
size_t dimension = objectspace->getDimension();
void *ref = (void*)&(*this)[0];
Expand Down
24 changes: 24 additions & 0 deletions python/src/ngtpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ class Index : public NGT::Index {
if (debug) {
std::cerr << info.shape.size() << ":" << info.shape[0] << ":" << info.shape[1] << std::endl;
}
if ((objects.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) == 0) {
std::stringstream msg;
msg << "ngtpy::batchInsert: Error! The array order is not C type. " << static_cast<int>(objects.flags())
<< ":" << static_cast<int>(py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_);
NGTThrowException(msg);
}
auto ptr = static_cast<double *>(info.ptr);
assert(info.shape.size() == 2);
NGT::Property prop;
Expand Down Expand Up @@ -653,6 +659,12 @@ class QuantizedBlobIndex : public QBG::Index {
if (debug) {
std::cerr << info.shape.size() << ":" << info.shape[0] << ":" << info.shape[1] << std::endl;
}
if ((objects.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) == 0) {
std::stringstream msg;
msg << "ngtpy::batchInsert: Error! The array order is not C type. " << static_cast<int>(objects.flags())
<< ":" << static_cast<int>(py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_);
NGTThrowException(msg);
}
auto ptr = static_cast<double *>(info.ptr);
assert(info.shape.size() == 2);
for (int idx = 0; idx < info.shape[0]; idx++) {
Expand Down Expand Up @@ -799,6 +811,12 @@ class QuantizedBlobIndex : public QBG::Index {
BatchResults &results,
size_t size
) {
if ((queries.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) == 0) {
std::stringstream msg;
msg << "ngtpy::batchSearch: Error! The array order is not C type. " << static_cast<int>(queries.flags())
<< ":" << static_cast<int>(py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_);
NGTThrowException(msg);
}
if (defaultNumOfProbes == 0) {
batchSearchInOneStep(queries, results, size);
} else {
Expand All @@ -812,6 +830,12 @@ class QuantizedBlobIndex : public QBG::Index {
BatchResults &results,
float radius
) {
if ((queries.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) == 0) {
std::stringstream msg;
msg << "ngtpy::batchRangeSearch: Error! The array order is not C type. " << static_cast<int>(queries.flags())
<< ":" << static_cast<int>(py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_);
NGTThrowException(msg);
}
const py::buffer_info &qinfo = queries.request();
const std::vector<long int> &qshape = qinfo.shape;
auto nOfQueries = qshape[0];
Expand Down

0 comments on commit 491e88f

Please sign in to comment.