From 491e88fb1dc24eee342bbeb7a88e363fbe027b54 Mon Sep 17 00:00:00 2001 From: Masajiro Iwasaki Date: Tue, 25 Jul 2023 13:20:19 +0900 Subject: [PATCH] fix the issue #141 --- VERSION | 2 +- lib/NGT/Index.h | 4 ++-- lib/NGT/NGTQ/Capi.cpp | 3 +-- lib/NGT/NGTQ/HierarchicalKmeans.h | 30 ++++++++++++++++++------------ lib/NGT/NGTQ/Optimizer.cpp | 21 +++++++++++---------- lib/NGT/NGTQ/QbgCli.cpp | 8 ++++---- lib/NGT/NGTQ/QuantizedBlobGraph.h | 7 +++++-- lib/NGT/NGTQ/QuantizedGraph.cpp | 2 +- lib/NGT/NGTQ/QuantizedGraph.h | 10 ++++++---- lib/NGT/ObjectSpace.h | 16 ++++++++++++---- python/src/ngtpy.cpp | 24 ++++++++++++++++++++++++ 11 files changed, 85 insertions(+), 42 deletions(-) diff --git a/VERSION b/VERSION index 7ec1d6d..3e3c2f1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.0 +2.1.1 diff --git a/lib/NGT/Index.h b/lib/NGT/Index.h index 815cad9..2972916 100644 --- a/lib/NGT/Index.h +++ b/lib/NGT/Index.h @@ -526,8 +526,8 @@ namespace NGT { } return *index; } - void enableLog() { redirect = true; } - void disableLog() { redirect = false; } + void enableLog() { redirect = false; } + void disableLog() { redirect = true; } static void destroy(const std::string &path) { #ifdef NGT_SHARED_MEMORY_ALLOCATOR diff --git a/lib/NGT/NGTQ/Capi.cpp b/lib/NGT/NGTQ/Capi.cpp index 83bf174..ecb3af4 100644 --- a/lib/NGT/NGTQ/Capi.cpp +++ b/lib/NGT/NGTQ/Capi.cpp @@ -125,7 +125,7 @@ void ngtqg_initialize_quantization_parameters(NGTQGQuantizationParameters *param bool ngtqg_quantize(const char *indexPath, NGTQGQuantizationParameters parameters, NGTError error) { try{ - NGTQG::Index::quantize(indexPath, parameters.dimension_of_subvector, parameters.max_number_of_edges, true); + NGTQG::Index::quantize(indexPath, parameters.dimension_of_subvector, parameters.max_number_of_edges, false); return true; }catch(std::exception &err){ std::stringstream ss; @@ -165,7 +165,6 @@ bool qbg_create(const char *indexPath, QBGConstructionParameters *parameters, NG { try { - cerr << "qbgcapi: Create" << endl; std::vector r; NGTQ::Property property; NGT::Property globalProperty; diff --git a/lib/NGT/NGTQ/HierarchicalKmeans.h b/lib/NGT/NGTQ/HierarchicalKmeans.h index de63735..6636330 100644 --- a/lib/NGT/NGTQ/HierarchicalKmeans.h +++ b/lib/NGT/NGTQ/HierarchicalKmeans.h @@ -761,6 +761,8 @@ namespace QBG { lowerClusters.resize(upperClusters.size()); std::vector counters(nthreads, 0); + size_t progressStep = upperClusters.size() / 20;; + progressStep = progressStep < 20 ? 20 : progressStep; #pragma omp parallel for schedule(dynamic) for (size_t idx = 0; idx < upperClusters.size(); idx++) { std::vector> partialVectors; @@ -798,12 +800,13 @@ namespace QBG { for (auto c : counters) { cnt += c; } - if (cnt % ((upperClusters.size() < 20 ? 20 : upperClusters.size()) / 20) == 0) { - timer.stop(); - std::cerr << "subclustering: " << cnt << " clusters (" - << (cnt * 100 / upperClusters.size()) << "%) have been processed. time=" << timer << std::endl; - timer.restart(); - } + if (cnt % progressStep == 0) { + timer.stop(); + float progress = (cnt * 100 / upperClusters.size()); + std::cerr << "subclustering: " << cnt << " clusters (" + << progress << "%) have been processed. time=" << timer << std::endl; + timer.restart(); + } } } size_t nc = 0; @@ -1156,6 +1159,8 @@ namespace QBG { timer.stop(); std::cerr << "assignWithNGT: exploring epsilon. time=" << timer << " epsilon=" << epsilon << std::endl; timer.start(); + size_t progressStep = (endID - beginID) / 20;; + progressStep = progressStep < 20 ? 20 : progressStep; #pragma omp parallel for for (size_t id = beginID; id < endID; id++) { std::vector obj; @@ -1179,12 +1184,13 @@ namespace QBG { for (auto d : distances) { cnt += d.first; } - if (cnt % ((endID - beginID) / 100) == 0) { - timer.stop(); - std::cerr << "assignWithNGT: " << cnt << " objects (" - << (cnt * 100 / (endID - beginID)) << "%) have been assigned. time=" << timer << std::endl; - timer.restart(); - } + if (cnt % progressStep == 0) { + timer.stop(); + float progress = cnt * 100 / (endID - beginID); + std::cerr << "assignWithNGT: " << cnt << " objects (" + << progress << "%) have been assigned. time=" << timer << std::endl; + timer.restart(); + } } } std::cerr << "pushing..." << std::endl; diff --git a/lib/NGT/NGTQ/Optimizer.cpp b/lib/NGT/NGTQ/Optimizer.cpp index 1a02db2..3dc407c 100644 --- a/lib/NGT/NGTQ/Optimizer.cpp +++ b/lib/NGT/NGTQ/Optimizer.cpp @@ -299,17 +299,18 @@ void QBG::Optimizer::optimize(const std::string indexPath, size_t threadSize) { std::vector> global(1); global[0].resize(index.getQuantizer().property.dimension, 0.0); NGT::Clustering::saveVectors(QBG::Index::getQuantizerCodebookFile(indexPath), global); - - ifstream ifs(QBG::Index::getCodebookIndexFile(indexPath)); - if (!ifs) { - std::stringstream msg; - msg << "Cannot open the file. " << QBG::Index::getCodebookIndexFile(indexPath); - NGTThrowException(msg); - } - size_t id; size_t count = 0; - while (ifs >> id) { - count++; + { + ifstream ifs(QBG::Index::getCodebookIndexFile(indexPath)); + if (!ifs) { + count = 1; + std::cerr << "the codebook index file is missing. this index must be QG." << std::endl; + } else { + size_t id; + while (ifs >> id) { + count++; + } + } } ofstream ofs(QBG::Index::getCodebookIndexFile(indexPath)); if (!ofs) { diff --git a/lib/NGT/NGTQ/QbgCli.cpp b/lib/NGT/NGTQ/QbgCli.cpp index ff75897..d68d8c4 100644 --- a/lib/NGT/NGTQ/QbgCli.cpp +++ b/lib/NGT/NGTQ/QbgCli.cpp @@ -419,12 +419,12 @@ QBG::CLI::buildQG(NGT::Args &args) if (phase == 0 || phase == 2) { std::cerr << "building the inverted index..." << std::endl; bool verbose = false; - QBG::Index::buildNGTQ(qgPath, !verbose); + QBG::Index::buildNGTQ(qgPath, verbose); } if (phase == 0 || phase == 3) { std::cerr << "building the quantized graph... " << std::endl; bool verbose = false; - NGTQG::Index::realign(indexPath, maxNumOfEdges, !verbose); + NGTQG::Index::realign(indexPath, maxNumOfEdges, verbose); } } @@ -1002,9 +1002,9 @@ QBG::CLI::append(NGT::Args &args) NGT::Timer timer; timer.start(); if (mode.find_first_of('b') != std::string::npos) { - QBG::Index::appendBinary(indexPath, data, dataSize, !verbose); + QBG::Index::appendBinary(indexPath, data, dataSize, verbose); } else { - QBG::Index::append(indexPath, data, dataSize, !verbose); + QBG::Index::append(indexPath, data, dataSize, verbose); } timer.stop(); std::cerr << "qbg: appending time=" << timer << std::endl; diff --git a/lib/NGT/NGTQ/QuantizedBlobGraph.h b/lib/NGT/NGTQ/QuantizedBlobGraph.h index 0491547..06a9620 100644 --- a/lib/NGT/NGTQ/QuantizedBlobGraph.h +++ b/lib/NGT/NGTQ/QuantizedBlobGraph.h @@ -1270,7 +1270,7 @@ namespace QBG { } static void buildNGTQ(const std::string &indexPath, bool verbose = false) { - load(indexPath, QBG::Index::getQuantizerCodebookFile(indexPath), "", ""); + load(indexPath, QBG::Index::getQuantizerCodebookFile(indexPath), "", "", "", verbose); buildNGTQ(indexPath, "", "-", "-", 1, 0, verbose); if (verbose) { std::cerr << "NGTQ and NGTQBG indices are completed." << std::endl; @@ -1593,8 +1593,10 @@ namespace QBG { static void - load(std::string indexPath, std::string blobs = "", std::string localCodebooks = "", std::string quantizerCodebook = "", std::string rotationPath = "", int threadSize = 0) + load(std::string indexPath, std::string blobs = "", std::string localCodebooks = "", std::string quantizerCodebook = "", std::string rotationPath = "", bool verbose = false, int threadSize = 0) { + NGT::StdOstreamRedirector redirector(!verbose); + redirector.begin(); if (blobs.empty()) { blobs = QBG::Index::getBlobFile(indexPath); } @@ -1709,6 +1711,7 @@ namespace QBG { QBG::Index::load(indexPath, qCodebook, rotation); } #endif + redirector.end(); } static const std::string getSubvectorPrefix() { return "sv"; } diff --git a/lib/NGT/NGTQ/QuantizedGraph.cpp b/lib/NGT/NGTQ/QuantizedGraph.cpp index 7bcb91b..ff1d60f 100644 --- a/lib/NGT/NGTQ/QuantizedGraph.cpp +++ b/lib/NGT/NGTQ/QuantizedGraph.cpp @@ -45,7 +45,7 @@ void NGTQG::Index::quantize(const std::string indexPath, size_t dimensionOfSubve #endif if (optimizer.globalType == QBG::Optimizer::GlobalTypeNone) { - std::cerr << "build-qg: Warning! None is unavailable for the global type. Zero is set to the global type." << std::endl; + if (verbose) std::cerr << "build-qg: Warning! None is unavailable for the global type. Zero is set to the global type." << std::endl; optimizer.globalType = QBG::Optimizer::GlobalTypeZero; } diff --git a/lib/NGT/NGTQ/QuantizedGraph.h b/lib/NGT/NGTQ/QuantizedGraph.h index a855da2..bf221fc 100644 --- a/lib/NGT/NGTQ/QuantizedGraph.h +++ b/lib/NGT/NGTQ/QuantizedGraph.h @@ -87,8 +87,9 @@ namespace NGTQG { PARENT::resize(graphRepository.size()); for (size_t id = 1; id < graphRepository.size(); id++) { - if (id % 100000 == 0) { - std::cerr << "# of processed objects=" << id << "/" << graphRepository.size() << std::endl; + if (id % ((graphRepository.size() - 1) / 100) == 0) { + std::cerr << "# of processed objects=" << id << "/" << (graphRepository.size() - 1) + << "(" << id * 100 / (graphRepository.size() - 1) << "%)" << std::endl; } NGT::GraphNode &node = *graphRepository.VECTOR::get(id); size_t numOfEdges = node.size() < maxNoOfEdges ? node.size() : maxNoOfEdges; @@ -118,8 +119,9 @@ namespace NGTQG { abort(); #else if (invertedIndexObjects[(*i).id].localID[idx] < 1 || invertedIndexObjects[(*i).id].localID[idx] > 16) { - std::cerr << "Fatal inner error! Invalid local centroid ID. ID=" << (*i).id << ":" << invertedIndexObjects[(*i).id].localID[idx] << std::endl; - abort(); + std::stringstream msg; + msg << "Fatal inner error! Invalid local centroid ID. ID=" << (*i).id << ":" << invertedIndexObjects[(*i).id].localID[idx]; + NGTThrowException(msg); } quantizedStream.arrangeQuantizedObject(dataNo, idx, invertedIndexObjects[(*i).id].localID[idx] - 1); #endif diff --git a/lib/NGT/ObjectSpace.h b/lib/NGT/ObjectSpace.h index 2dee133..b72a869 100644 --- a/lib/NGT/ObjectSpace.h +++ b/lib/NGT/ObjectSpace.h @@ -302,12 +302,16 @@ namespace NGT { public: virtual uint8_t &operator[](size_t idx) const = 0; void serialize(std::ostream &os, ObjectSpace *objectspace = 0) { - assert(objectspace != 0); + if (objectspace == 0) { + NGTThrowException("Object: objectspace is null"); + } size_t byteSize = objectspace->getByteSizeOfObject(); NGT::Serializer::write(os, (uint8_t*)&(*this)[0], byteSize); } void deserialize(std::istream &is, ObjectSpace *objectspace = 0) { - assert(objectspace != 0); + if (objectspace == 0) { + NGTThrowException("Object: objectspace is null"); + } size_t byteSize = objectspace->getByteSizeOfObject(); assert(&(*this)[0] != 0); NGT::Serializer::read(is, (uint8_t*)&(*this)[0], byteSize); @@ -318,7 +322,9 @@ namespace NGT { } } void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { - assert(objectspace != 0); + if (objectspace == 0) { + NGTThrowException("Object: objectspace is null"); + } const std::type_info &t = objectspace->getObjectType(); size_t dimension = objectspace->getDimension(); void *ref = (void*)&(*this)[0]; @@ -342,7 +348,9 @@ namespace NGT { } } void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0) { - assert(objectspace != 0); + if (objectspace == 0) { + NGTThrowException("Object: objectspace is null"); + } const std::type_info &t = objectspace->getObjectType(); size_t dimension = objectspace->getDimension(); void *ref = (void*)&(*this)[0]; diff --git a/python/src/ngtpy.cpp b/python/src/ngtpy.cpp index 023115c..a9ffb34 100644 --- a/python/src/ngtpy.cpp +++ b/python/src/ngtpy.cpp @@ -120,6 +120,12 @@ class Index : public NGT::Index { if (debug) { std::cerr << info.shape.size() << ":" << info.shape[0] << ":" << info.shape[1] << std::endl; } + if ((objects.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) == 0) { + std::stringstream msg; + msg << "ngtpy::batchInsert: Error! The array order is not C type. " << static_cast(objects.flags()) + << ":" << static_cast(py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_); + NGTThrowException(msg); + } auto ptr = static_cast(info.ptr); assert(info.shape.size() == 2); NGT::Property prop; @@ -653,6 +659,12 @@ class QuantizedBlobIndex : public QBG::Index { if (debug) { std::cerr << info.shape.size() << ":" << info.shape[0] << ":" << info.shape[1] << std::endl; } + if ((objects.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) == 0) { + std::stringstream msg; + msg << "ngtpy::batchInsert: Error! The array order is not C type. " << static_cast(objects.flags()) + << ":" << static_cast(py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_); + NGTThrowException(msg); + } auto ptr = static_cast(info.ptr); assert(info.shape.size() == 2); for (int idx = 0; idx < info.shape[0]; idx++) { @@ -799,6 +811,12 @@ class QuantizedBlobIndex : public QBG::Index { BatchResults &results, size_t size ) { + if ((queries.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) == 0) { + std::stringstream msg; + msg << "ngtpy::batchSearch: Error! The array order is not C type. " << static_cast(queries.flags()) + << ":" << static_cast(py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_); + NGTThrowException(msg); + } if (defaultNumOfProbes == 0) { batchSearchInOneStep(queries, results, size); } else { @@ -812,6 +830,12 @@ class QuantizedBlobIndex : public QBG::Index { BatchResults &results, float radius ) { + if ((queries.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) == 0) { + std::stringstream msg; + msg << "ngtpy::batchRangeSearch: Error! The array order is not C type. " << static_cast(queries.flags()) + << ":" << static_cast(py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_); + NGTThrowException(msg); + } const py::buffer_info &qinfo = queries.request(); const std::vector &qshape = qinfo.shape; auto nOfQueries = qshape[0];