From 0416078aa28552093ae122c5dfe75be831be486b Mon Sep 17 00:00:00 2001 From: Masajiro Iwasaki Date: Tue, 29 Oct 2024 08:52:17 +0900 Subject: [PATCH] implement scalar quantization as data type for NGT-graph and QBG --- VERSION | 2 +- bin/ngt/ngt.cpp | 2 + bin/qbg/qbg.cpp | 11 +- lib/NGT/Capi.cpp | 57 +- lib/NGT/Capi.h | 6 + lib/NGT/Clustering.h | 22 +- lib/NGT/Command.cpp | 474 +++-- lib/NGT/Command.h | 5 +- lib/NGT/Common.h | 98 +- lib/NGT/Graph.cpp | 149 +- lib/NGT/Graph.h | 38 +- lib/NGT/GraphOptimizer.h | 18 +- lib/NGT/GraphReconstructor.h | 24 +- lib/NGT/HashBasedBooleanSet.h | 22 +- lib/NGT/Index.cpp | 599 +++++- lib/NGT/Index.h | 358 ++-- lib/NGT/NGTQ/HierarchicalKmeans.cpp | 79 +- lib/NGT/NGTQ/HierarchicalKmeans.h | 93 +- lib/NGT/NGTQ/ObjectFile.h | 25 +- lib/NGT/NGTQ/Optimizer.cpp | 210 +++ lib/NGT/NGTQ/Optimizer.h | 23 +- lib/NGT/NGTQ/QbgCli.cpp | 775 ++++++-- lib/NGT/NGTQ/QbgCli.h | 107 +- lib/NGT/NGTQ/QuantizedBlobGraph.h | 1200 +++++++++--- lib/NGT/NGTQ/QuantizedGraph.h | 28 +- lib/NGT/NGTQ/Quantizer.h | 2683 +++++++++++++++++--------- lib/NGT/ObjectRepository.h | 30 +- lib/NGT/ObjectSpace.cpp | 7 +- lib/NGT/ObjectSpace.h | 204 +- lib/NGT/ObjectSpaceRepository.h | 364 +++- lib/NGT/Optimizer.h | 100 +- lib/NGT/PrimitiveComparator.h | 383 +++- lib/NGT/SharedMemoryAllocator.h | 2 +- lib/NGT/defines.h.in | 6 +- lib/NGT/half.hpp | 2712 +++++++++++++-------------- python/src/ngtpy.cpp | 146 +- samples/qbg-capi/qbg-capi.cpp | 2 +- 37 files changed, 7722 insertions(+), 3342 deletions(-) diff --git a/VERSION b/VERSION index 530cdd9..276cbf9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.2.4 +2.3.0 diff --git a/bin/ngt/ngt.cpp b/bin/ngt/ngt.cpp index 1403c8b..8572721 100644 --- a/bin/ngt/ngt.cpp +++ b/bin/ngt/ngt.cpp @@ -122,6 +122,8 @@ main(int argc, char **argv) } } catch(NGT::Exception &err) { cerr << "ngt: Error: " << err.what() << endl; + return 1; } + return 0; } diff --git a/bin/qbg/qbg.cpp b/bin/qbg/qbg.cpp index 92281eb..a6c8fc5 100644 --- a/bin/qbg/qbg.cpp +++ b/bin/qbg/qbg.cpp @@ -23,7 +23,16 @@ main(int argc, char **argv) QBG::CLI ngt; - ngt.execute(args); + try { + ngt.execute(args); + } catch(NGT::Exception &err) { + cerr << "qbg: Error: " << err.what() << endl; + return 1; + } catch(...) { + cerr << "qbg: Error: " << endl; + return 1; + } + return 0; } diff --git a/lib/NGT/Capi.cpp b/lib/NGT/Capi.cpp index 4e1f9ae..c7a6960 100644 --- a/lib/NGT/Capi.cpp +++ b/lib/NGT/Capi.cpp @@ -331,6 +331,18 @@ bool ngt_set_property_object_type_integer(NGTProperty prop, NGTError error) { return true; } +bool ngt_set_property_object_type_qsint8(NGTProperty prop, NGTError error) { + if(prop == NULL){ + std::stringstream ss; + ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; + operate_error_string_(ss, error); + return false; + } + + (*static_cast(prop)).objectType = NGT::ObjectSpace::ObjectType::Qsuint8; + return true; +} + bool ngt_set_property_distance_type(NGTProperty prop, NGT::Index::Property::DistanceType type, NGTError error) { if(prop == NULL){ std::stringstream ss; @@ -428,11 +440,7 @@ NGTPropertyInfo ngt_get_property_info(NGTIndex index, NGTError error) { prop.prefetchSize, prop.accuracyTable.c_str(), prop.searchType.c_str(), -#ifdef NGT_INNER_PRODUCT prop.maxMagnitude, -#else - -1, -#endif prop.nOfNeighborsForInsertionOrder, prop.epsilonForInsertionOrder, #ifdef NGT_REFINEMENT @@ -1055,6 +1063,47 @@ ObjectID ngt_append_index_as_float16(NGTIndex index, NGTFloat16 *obj, uint32_t o } } +ObjectID ngt_insert_to_refinement_as_float(NGTIndex index, float *obj, uint32_t obj_dim, NGTError error) { + if(index == NULL || obj == NULL || obj_dim == 0){ + std::stringstream ss; + ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " obj = " << obj << " obj_dim = " << obj_dim; + operate_error_string_(ss, error); + return 0; + } + + try{ + NGT::Index* pindex = static_cast(index); + std::vector vobj(&obj[0], &obj[obj_dim]); + return pindex->insertToRefinement(vobj); + }catch(std::exception &err) { + std::stringstream ss; + ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); + operate_error_string_(ss, error); + return 0; + } +} + +ObjectID ngt_append_to_refinement_as_float(NGTIndex index, float *obj, uint32_t obj_dim, NGTError error) { + if(index == NULL || obj == NULL || obj_dim == 0){ + std::stringstream ss; + ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " obj = " << obj << " obj_dim = " << obj_dim; + operate_error_string_(ss, error); + return 0; + } + + try{ + NGT::Index* pindex = static_cast(index); + std::vector vobj(&obj[0], &obj[obj_dim]); + return pindex->appendToRefinement(vobj); + }catch(std::exception &err) { + std::stringstream ss; + ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); + operate_error_string_(ss, error); + return 0; + } +} + + bool ngt_batch_append_index(NGTIndex index, float *obj, uint32_t data_count, NGTError error) { try{ NGT::Index* pindex = static_cast(index); diff --git a/lib/NGT/Capi.h b/lib/NGT/Capi.h index b4aabae..4f08df3 100644 --- a/lib/NGT/Capi.h +++ b/lib/NGT/Capi.h @@ -196,6 +196,8 @@ bool ngt_set_property_object_type_float16(NGTProperty, NGTError); bool ngt_set_property_object_type_integer(NGTProperty, NGTError); +bool ngt_set_property_object_type_qsint8(NGTProperty, NGTError); + bool ngt_set_property_distance_type_l1(NGTProperty, NGTError); bool ngt_set_property_distance_type_l2(NGTProperty, NGTError); @@ -272,6 +274,10 @@ ObjectID ngt_insert_index_as_float16(NGTIndex, NGTFloat16*, uint32_t, NGTError); ObjectID ngt_append_index_as_float16(NGTIndex, NGTFloat16*, uint32_t, NGTError); +ObjectID ngt_append_to_refinement_as_float(NGTIndex, float*, uint32_t, NGTError); + +ObjectID ngt_insert_to_refinement_as_float(NGTIndex, float*, uint32_t, NGTError); + bool ngt_batch_append_index(NGTIndex, float*, uint32_t, NGTError); bool ngt_batch_insert_index(NGTIndex, float*, uint32_t, uint32_t *, NGTError); diff --git a/lib/NGT/Clustering.h b/lib/NGT/Clustering.h index 046d526..bfa650b 100644 --- a/lib/NGT/Clustering.h +++ b/lib/NGT/Clustering.h @@ -125,7 +125,7 @@ namespace NGT { } static void - loadVectors(const std::string &file, std::vector > &vectors) + loadVectors(const std::string &file, std::vector> &vectors) { std::ifstream is(file); if (!is) { @@ -152,7 +152,7 @@ namespace NGT { } static void - saveVectors(const std::string &file, std::vector > &vectors) + saveVectors(const std::string &file, std::vector> &vectors) { std::ofstream os(file); for (auto vit = vectors.begin(); vit != vectors.end(); ++vit) { @@ -167,6 +167,22 @@ namespace NGT { } } + static void + saveVectors(const std::string &file, std::vector> &vectors) + { + std::ofstream os(file); + for (auto vit = vectors.begin(); vit != vectors.end(); ++vit) { + std::vector &v = *vit; + for (auto it = v.begin(); it != v.end(); ++it) { + os << (*it); + if (it + 1 != v.end()) { + os << "\t"; + } + } + os << std::endl; + } + } + static void loadVector(const std::string &file, std::vector &vectors) { @@ -403,7 +419,7 @@ namespace NGT { } } - std::vector sortedObjects(vectors.size()); + std::vector sortedObjects(vectors.size()); #pragma omp parallel for for (size_t vi = 0; vi < vectors.size(); vi++) { auto vit = vectors.begin() + vi; diff --git a/lib/NGT/Command.cpp b/lib/NGT/Command.cpp index b057edc..1d1b75f 100644 --- a/lib/NGT/Command.cpp +++ b/lib/NGT/Command.cpp @@ -24,8 +24,16 @@ using namespace std; +#define NGT_APPENDING_BINARY +#ifdef NGT_APPENDING_BINARY +#include "NGT/ArrayFile.h" +#include "NGT/ObjectSpace.h" +#include "NGTQ/ObjectFile.h" +#endif + NGT::Command::CreateParameters::CreateParameters(Args &args) { + args.parse("v"); try { index = args.get("#1"); } catch (...) { @@ -104,7 +112,7 @@ using namespace std; case '-': property.seedType = NGT::Property::SeedType::SeedTypeNone; break; } - char objectType = args.getChar("o", 'f'); + auto objectType = args.getString("o", "f"); char distanceType = args.getChar("D", '2'); #ifdef NGT_REFINEMENT char refinementObjectType = args.getChar("R", 'f'); @@ -113,24 +121,21 @@ using namespace std; numOfObjects = args.getl("n", 0); indexType = args.getChar("i", 't'); - switch (objectType) { - case 'f': + if (objectType == "f") { property.objectType = NGT::Index::Property::ObjectType::Float; - break; - case 'c': + } else if (objectType == "c") { property.objectType = NGT::Index::Property::ObjectType::Uint8; - break; #ifdef NGT_HALF_FLOAT - case 'h': + } else if (objectType == "h") { property.objectType = NGT::Index::Property::ObjectType::Float16; - break; #endif + } else if (objectType == "s8" || objectType == "sqsu8") { + property.objectType = NGT::Index::Property::ObjectType::Qsuint8; #ifdef NGT_BFLOAT - case 'H': + } else if (objectType == "H") { property.objectType = NGT::Index::Property::ObjectType::Bfloat16; - break; #endif - default: + } else { std::stringstream msg; msg << "Command::CreateParameter: Error: Invalid object type. " << objectType; NGTThrowException(msg); @@ -193,11 +198,9 @@ using namespace std; case 'E': property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeNormalizedL2; break; -#ifdef NGT_INNER_PRODUCT case 'i': property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeInnerProduct; break; -#endif case 'p': // added by Nyapicom property.distanceType = NGT::Index::Property::DistanceType::DistanceTypePoincare; break; @@ -219,6 +222,8 @@ using namespace std; } #endif + property.clippingRate = args.getf("c", 0.0); + { string str = args.getString("l", "-"); if (str != "-") { @@ -257,6 +262,7 @@ using namespace std; "[-N maximum-#-of-inserted-objects] " #endif "[-l #-of-neighbors-for-insertion-order[:epsilon-for-insertion-order]] " + "[-c scalar-quantization-clipping-rate] " "index(output) [data.tsv(input)]"; try { @@ -283,108 +289,49 @@ using namespace std; break; } } catch(NGT::Exception &err) { - std::cerr << err.what() << std::endl; - cerr << usage << endl; + std::stringstream msg; + msg << err.what() << std::endl; + msg << usage; + NGTThrowException(msg); } } - void appendTextVectors(NGT::Index &index, const std::string &data, size_t dataSize, char destination) { - NGT::Property prop; - index.getProperty(prop); - - size_t id = index.getObjectRepositorySize(); - vector> objects; - NGT::Timer timer; - timer.start(); - ifstream is(data); - if (!is) { - cerr << "Cannot open the specified data file. " << data << endl; - return; - } - std::string line; - size_t counter = 0; - float maxMag = 0.0; - while (getline(is, line)) { - if (is.eof()) break; - if (dataSize > 0 && counter > dataSize) break; - vector object; - vector tokens; - NGT::Common::tokenize(line, tokens, "\t, "); - for (auto &v : tokens) object.push_back(NGT::Common::strtod(v)); -#ifdef NGT_INNER_PRODUCT - if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { - double mag = 0.0; - for (auto &v : object) { - mag += v * v; - } - if (mag > maxMag) { - maxMag = mag; - } - //object.emplace_back(sqrt(maxMag - mag)); - object.emplace_back(mag); - } -#endif -#ifdef NGT_REFINEMENT - if (destination == 'r') { - index.appendToRefinement(object); - } else { - index.append(object); - } -#else - index.append(object); -#endif - counter++; - id++; - if (counter % 1000000 == 0) { - timer.stop(); - std::cerr << "appended " << static_cast(counter) / 1000000.0 << "M objects."; - if (counter != id) { - std::cerr << " # of the total objects=" << static_cast(id) / 1000000.0 << "M"; - } - cerr << " peak vm size=" << NGT::Common::getProcessVmPeakStr() - << " time=" << timer << std::endl; - timer.restart(); - } - } -#ifdef NGT_INNER_PRODUCT - if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { - NGT::ObjectSpace *rep = 0; -#ifdef NGT_REFINEMENT - if (destination == 'r') { - rep = &index.getRefinementObjectSpace(); + void appendTextVectors(std::string &indexPath, std::string &data, size_t dataSize, char appendMode, + std::string &destination, size_t ioSearchSize, float ioEpsilon) { + NGT::StdOstreamRedirector redirector(false); + redirector.begin(); + NGT::Index index(indexPath); + index.enableLog(); + auto append = destination.find('n') == std::string::npos; + auto refinement = destination.find('r') != std::string::npos; + index.appendFromTextObjectFile(data, dataSize, append, refinement); + + if (appendMode == 't') { + if (ioSearchSize > 0) { + NGT::Index::InsertionOrder insertionOrder; + insertionOrder.nOfNeighboringNodes = ioSearchSize; + insertionOrder.epsilon = ioEpsilon; + std::cerr << "append: insertion order optimization is enabled. " + << ioSearchSize << ":" << ioEpsilon << std::endl; + index.extractInsertionOrder(insertionOrder); + index.createIndexWithInsertionOrder(insertionOrder); } else { - rep = &index.getObjectSpace(); - } -#else - rep = &index.getObjectSpace(); -#endif - for (size_t idx = 1; idx < rep->getRepository().size(); idx++) { - std::vector object; - rep->getObject(idx, object); - //object.emplace_back(sqrt(maxMag - mag)); - object.back() = sqrt(maxMag - object.back()); -#ifdef NGT_REFINEMENT - if (destination == 'r') { - index.updateToRefinement(idx, object); - } else { - index.update(idx, object); - } -#else - index.update(idx, object); -#endif + index.createIndex(); } } -#endif + index.save(); + index.close(); + redirector.end(); } - void appendTextVectors(std::string &indexPath, std::string &data, size_t dataSize, char appendMode, char destination, size_t ioSearchSize, float ioEpsilon, float cutRate) { + void appendRefinementVectors(std::string &indexPath, char appendMode, size_t ioSearchSize, float ioEpsilon) { NGT::StdOstreamRedirector redirector(false); redirector.begin(); NGT::Index index(indexPath); index.enableLog(); - appendTextVectors(index, data, dataSize, destination); - if (appendMode == 't') { + index.appendFromRefinementObjectFile(); + if (appendMode == 'r') { if (ioSearchSize > 0) { NGT::Index::InsertionOrder insertionOrder; insertionOrder.nOfNeighboringNodes = ioSearchSize; @@ -402,6 +349,68 @@ using namespace std; redirector.end(); } + void appendTextVectorsInMemory(std::string &indexPath, std::string &data, size_t dataSize, char appendMode, + size_t ioSearchSize, float ioEpsilon) { + NGT::Index index(indexPath); + index.enableLog(); + { + ifstream is(data); + if (!is) { + std::stringstream msg; + msg << "Cannot open the specified data file. " << data; + NGTThrowException(msg); + } + std::string line; + size_t counter = 0; + std::vector objects; + while (getline(is, line)) { + if (is.eof()) break; + vector tokens; + NGT::Common::tokenize(line, tokens, "\t, "); + for (auto &v : tokens) objects.emplace_back(NGT::Common::strtod(v)); + counter++; + } + index.append(objects.data(), counter); + index.save(); + } + if (appendMode == 'm') { + if (ioSearchSize > 0) { + NGT::Index::InsertionOrder insertionOrder; + insertionOrder.nOfNeighboringNodes = ioSearchSize; + insertionOrder.epsilon = ioEpsilon; + std::cerr << "append: insertion order optimization is enabled. " + << ioSearchSize << ":" << ioEpsilon << std::endl; + index.extractInsertionOrder(insertionOrder); + index.createIndexWithInsertionOrder(insertionOrder); + } else { + index.createIndex(); + } + } + index.save(); + index.close(); + } + +#ifdef NGT_APPENDING_BINARY + +void appendBinaryVectors(std::string &indexPath, std::string &data, size_t dataSize, char appendMode, std::string &destination) { + NGT::StdOstreamRedirector redirector(false); + redirector.begin(); + NGT::Index index(indexPath); + index.enableLog(); + std::vector tokens; + NGT::Common::tokenize(data, tokens, "."); + auto append = destination.find('n') == std::string::npos; + auto refinement = destination.find('r') != std::string::npos; + index.appendFromBinaryObjectFile(data, dataSize, append, refinement); + + if (appendMode == 'b') { + index.createIndex(32); + } + index.save(); + index.close(); + redirector.end(); + } +#endif void NGT::Command::append(Args &args) @@ -413,9 +422,10 @@ using namespace std; try { indexPath = args.get("#1"); } catch (...) { - cerr << "ngt: Error: DB is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified." << endl; + msg << usage << endl; + NGTThrowException(msg); } string data; try { @@ -430,7 +440,6 @@ using namespace std; size_t ioSearchSize = args.getl("S", 0); float ioEpsilon = args.getf("E", 0.1); - float cutRate = args.getf("c", 0.02); if (debugLevel >= 1) { cerr << "thread size=" << threadSize << endl; @@ -438,21 +447,34 @@ using namespace std; } - char appendMode = args.getChar("m", '-'); - char destination = args.getChar("D", '-'); - if (appendMode == '-') { + char appendMode = args.getChar("m", 't'); + auto destination = args.getString("D", "o"); + if (appendMode == 'n') { try { NGT::Index::append(indexPath, data, threadSize, dataSize); } catch (NGT::Exception &err) { - cerr << "ngt: Error. " << err.what() << endl; - cerr << usage << endl; + std::stringstream msg; + msg << err.what() << std::endl; + msg << usage; + NGTThrowException(msg); } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; + std::stringstream msg; + msg << usage; + NGTThrowException(msg); } } else if (appendMode == 't' || appendMode == 'T') { - appendTextVectors(indexPath, data, dataSize, appendMode, destination, ioSearchSize, ioEpsilon, cutRate); + appendTextVectors(indexPath, data, dataSize, appendMode, destination, ioSearchSize, ioEpsilon); + } else if (appendMode == 'r' || appendMode == 'R') { + appendRefinementVectors(indexPath, appendMode, ioSearchSize, ioEpsilon); + } else if (appendMode == 'm' || appendMode == 'M') { + appendTextVectorsInMemory(indexPath, data, dataSize, appendMode, ioSearchSize, ioEpsilon); +#ifdef NGT_APPENDING_BINARY + } else if (appendMode == 'b' || appendMode == 'B') { + appendBinaryVectors(indexPath, data, dataSize, appendMode, destination); + } +#else } +#endif } void @@ -550,8 +572,11 @@ using namespace std; stream << "Rank\tID\tDistance" << endl; } for (size_t i = 0; i < objects.size(); i++) { - stream << i + 1 << "\t" << objects[i].id << "\t"; - stream << objects[i].distance << endl; + if (searchParameters.outputMode == "e-") { + stream << i + 1 << "\t" << objects[i].id << "\t" << 0.0 << std::endl; + } else { + stream << i + 1 << "\t" << objects[i].id << "\t" << objects[i].distance << std::endl; + } } if (searchParameters.outputMode[0] == 'e') { stream << "# End of Search" << endl; @@ -625,9 +650,10 @@ using namespace std; try { database = args.get("#1"); } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified" << endl; + msg << usage; + NGTThrowException(msg); } SearchParameters searchParameters(args); @@ -647,11 +673,14 @@ using namespace std; cerr << "Peak VM size=" << NGT::Common::getProcessVmPeakStr() << std::endl; } } catch (NGT::Exception &err) { - cerr << "ngt: Error. " << err.what() << endl; - cerr << usage << endl; + std::stringstream msg; + msg << err.what() << std::endl; + msg << usage; + NGTThrowException(msg); } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; + std::stringstream msg; + msg << usage; + NGTThrowException(msg); } } @@ -665,16 +694,18 @@ using namespace std; try { database = args.get("#1"); } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified" << endl; + msg << usage; + NGTThrowException(msg); } try { args.get("#2"); } catch (...) { - cerr << "ngt: Error: ID is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "ID is not specified" << endl; + msg << usage; + NGTThrowException(msg); } char dataType = args.getChar("d", 'f'); char mode = args.getChar("m", '-'); @@ -693,15 +724,15 @@ using namespace std; try { ids = args.get("#2"); } catch (...) { - cerr << "ngt: Error: Data file is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Data file is not specified" << endl; + NGTThrowException(msg); } ifstream is(ids); if (!is) { - cerr << "ngt: Error: Cannot open the specified file. " << ids << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Cannot open the specified file. " << ids << endl; + NGTThrowException(msg); } string line; int count = 0; @@ -732,11 +763,14 @@ using namespace std; } NGT::Index::remove(database, objects, force); } catch (NGT::Exception &err) { - cerr << "ngt: Error. " << err.what() << endl; - cerr << usage << endl; + std::stringstream msg; + msg << err.what() << std::endl; + msg << usage; + NGTThrowException(msg); } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; + std::stringstream msg; + msg << usage; + NGTThrowException(msg); } } @@ -748,26 +782,31 @@ using namespace std; try { database = args.get("#1"); } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified." << endl; + msg << usage; + NGTThrowException(msg); } string exportFile; try { exportFile = args.get("#2"); } catch (...) { - cerr << "ngt: Error: ID is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "ID is not specified" << endl; + msg << usage; + NGTThrowException(msg); } try { NGT::Index::exportIndex(database, exportFile); } catch (NGT::Exception &err) { - cerr << "ngt: Error. " << err.what() << endl; - cerr << usage << endl; + std::stringstream msg; + msg << err.what() << std::endl; + msg << usage; + NGTThrowException(msg); } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; + std::stringstream msg; + msg << usage; + NGTThrowException(msg); } } @@ -779,26 +818,27 @@ using namespace std; try { database = args.get("#1"); } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified" << endl; + msg << usage; + NGTThrowException(msg); } string importFile; try { importFile = args.get("#2"); } catch (...) { - cerr << "ngt: Error: ID is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "ID is not specified" << endl; + msg << usage; + NGTThrowException(msg); } try { NGT::Index::importIndex(database, importFile); } catch (NGT::Exception &err) { - cerr << "ngt: Error. " << err.what() << endl; + cerr << err.what() << endl; cerr << usage << endl; } catch (...) { - cerr << "ngt: Error" << endl; cerr << usage << endl; } @@ -812,9 +852,10 @@ using namespace std; try { indexName = args.get("#1"); } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified" << endl; + msg << usage << endl; + NGTThrowException(msg); } // the number of forcedly pruned edges @@ -826,15 +867,17 @@ using namespace std; cerr << "selectively pruned edge size=" << selectivelyPrunedEdgeSize << endl; if (selectivelyPrunedEdgeSize == 0 && forcedlyPrunedEdgeSize == 0) { - cerr << "prune: Error! Either of selective edge size or remaining edge size should be specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "prune: Error! Either of selective edge size or remaining edge size should be specified." << endl; + msg << usage << endl; + NGTThrowException(msg); } if (forcedlyPrunedEdgeSize != 0 && selectivelyPrunedEdgeSize != 0 && selectivelyPrunedEdgeSize >= forcedlyPrunedEdgeSize) { - cerr << "prune: Error! selective edge size is less than remaining edge size." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "prune: Error! selective edge size is less than remaining edge size." << endl; + msg << usage << endl; + NGTThrowException(msg); } NGT::Index index(indexName); @@ -923,17 +966,19 @@ using namespace std; try { inIndexPath = args.get("#1"); } catch (...) { - cerr << "ngt::reconstructGraph: Input index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "ngt::reconstructGraph: Input index is not specified." << endl; + msg << usage << endl; + NGTThrowException(msg); } string outIndexPath; try { outIndexPath = args.get("#2"); } catch (...) { - cerr << "ngt::reconstructGraph: Output index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "ngt::reconstructGraph: Output index is not specified." << endl; + msg << usage << endl; + NGTThrowException(msg); } char mode = args.getChar("m", 'S'); @@ -944,6 +989,7 @@ using namespace std; double margin = args.getf("M", 0.2); char smode = args.getChar("s", '-'); bool verbose = args.getBool("v"); + char graphConversion = args.getChar("C", '-'); // the number (rank) of original edges int numOfOutgoingEdges = args.getl("o", -1); @@ -972,6 +1018,7 @@ using namespace std; #else graphOptimizer.shortcutReductionRange = args.getf("R", 18.0); #endif + graphOptimizer.undirectedGraphConversion = graphConversion == '-' ? false : true; graphOptimizer.logDisabled = !verbose; graphOptimizer.set(numOfOutgoingEdges, numOfIncomingEdges, nOfQueries, nOfResults); @@ -993,9 +1040,10 @@ using namespace std; try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified" << endl; + msg << usage << endl; + NGTThrowException(msg); } char mode = args.getChar("m", '-'); @@ -1018,7 +1066,7 @@ using namespace std; std::cout << "Successfully completed." << std::endl; } catch (NGT::Exception &err) { - cerr << "ngt: Error. " << err.what() << endl; + cerr << err.what() << endl; cerr << usage << endl; } @@ -1037,18 +1085,20 @@ using namespace std; try { inIndexPath = args.get("#1"); } catch (...) { - cerr << "Input index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Input index is not specified" << endl; + msg << usage << endl; + NGTThrowException(msg); } string outIndexPath; try { outIndexPath = args.get("#2"); } catch (...) { - cerr << "Output index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Output index is not specified" << endl; + msg << usage << endl; + NGTThrowException(msg); } NGT::Index index(inIndexPath); @@ -1062,8 +1112,10 @@ using namespace std; try { GraphReconstructor::refineANNG(index, epsilon, expectedAccuracy, noOfEdges, exploreEdgeSize, batchSize); } catch (NGT::Exception &err) { - std::cerr << "Error!! Cannot refine the index. " << err.what() << std::endl; - return; + std::stringstream msg; + msg << "Error!! Cannot refine the index. " << err.what() << std::endl; + msg << usage << endl; + NGTThrowException(msg); } index.saveIndex(outIndexPath); #endif @@ -1082,9 +1134,10 @@ using namespace std; try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified." << endl; + msg << usage << endl; + NGTThrowException(msg); } char mode = args.getChar("m", 'c'); @@ -1099,9 +1152,10 @@ using namespace std; const string com = "cp -r " + indexPath + " " + path; int stat = system(com.c_str()); if (stat != 0) { - std::cerr << "ngt::repair: Cannot create the specified index. " << path << std::endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "ngt::repair: Cannot create the specified index. " << path << std::endl; + msg << usage; + NGTThrowException(msg); } } @@ -1240,9 +1294,10 @@ using namespace std; std::cerr << "Saving index." << std::endl; index.saveIndex(path); } catch (NGT::Exception &err) { - cerr << "ngt: Error. " << err.what() << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << err.what() << endl; + msg << usage; + NGTThrowException(msg); } } } @@ -1259,9 +1314,10 @@ using namespace std; try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified." << endl; + msg << usage; + NGTThrowException(msg); } GraphOptimizer::ANNGEdgeOptimizationParameter parameter; @@ -1295,9 +1351,10 @@ using namespace std; try { database = args.get("#1"); } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified" << endl; + msg << usage; + NGTThrowException(msg); } size_t edgeSize = args.getl("E", UINT_MAX); @@ -1319,7 +1376,6 @@ using namespace std; cerr << "ngt: NGT Error. " << err.what() << endl; cerr << usage << endl; } catch (...) { - cerr << "ngt: Error" << endl; cerr << usage << endl; } } @@ -1327,7 +1383,7 @@ using namespace std; void NGT::Command::exportGraph(Args &args) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "ngt: Error: exportGraph is not implemented." << std::endl; + std::cerr << "exportGraph is not implemented." << std::endl; abort(); #else std::string usage = "ngt export-graph [-k #-of-edges] index"; @@ -1335,9 +1391,10 @@ using namespace std; try { indexPath = args.get("#1"); } catch (...) { - cerr << "ngt::exportGraph: Index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified" << endl; + msg << usage; + NGTThrowException(msg); } int k = args.getl("k", 0); @@ -1371,7 +1428,7 @@ using namespace std; void NGT::Command::exportObjects(Args &args) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "ngt: Error: exportObjects is not implemented." << std::endl; + std::cerr << "exportObjects is not implemented." << std::endl; abort(); #else std::string usage = "ngt export-objects index"; @@ -1379,9 +1436,10 @@ using namespace std; try { indexPath = args.get("#1"); } catch (...) { - cerr << "ngt::exportGraph: Index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Index is not specified" << endl; + msg << usage; + NGTThrowException(msg); } NGT::Index index(indexPath); diff --git a/lib/NGT/Command.h b/lib/NGT/Command.h index 0279adb..b2d90ef 100644 --- a/lib/NGT/Command.h +++ b/lib/NGT/Command.h @@ -119,8 +119,9 @@ class Command { { std::ifstream is(searchParameters.query); if (!is) { - std::cerr << "Cannot open the specified file. " << searchParameters.query << std::endl; - return; + std::stringstream msg; + msg << "Cannot open the specified query file. " << searchParameters.query; + NGTThrowException(msg); } search(index, searchParameters, is, stream); } diff --git a/lib/NGT/Common.h b/lib/NGT/Common.h index e449aaf..e45fc14 100644 --- a/lib/NGT/Common.h +++ b/lib/NGT/Common.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,23 @@ namespace NGT { typedef half_float::half float16; #endif + class quint8 { + public: + quint8(uint8_t v):value(v){} + quint8 &operator=(uint8_t v) { value = v; return *this; } + operator uint8_t() const { return value; } + uint8_t get() { return value; } + uint8_t value; + }; + class qsint8 { + public: + qsint8(int8_t v):value(v){} + qsint8 &operator=(int8_t v) { value = v; return *this; } + operator int8_t() const { return value; } + int8_t get() { return value; } + int8_t value; + }; + #ifdef NGT_BFLOAT class bfloat16 { public: @@ -1201,6 +1219,7 @@ namespace NGT { vectorSize--; } } + iterator insert(iterator &i, const TYPE &data, SharedMemoryAllocator &allocator) { if (size() == 0) { push_back(data, allocator); @@ -1766,7 +1785,6 @@ namespace NGT { removedList->pop_back(); return idx; } - void removedListPush(size_t id) { if (removedList->size() == 0) { removedList->push_back(id, allocator); @@ -1780,6 +1798,16 @@ namespace NGT { } removedList->insert(rmi, id, allocator); } + void removedListRemove(size_t id) { + if (removedList->size() == 0) { + return; + } + Vector::iterator rmi + = std::lower_bound(removedList->begin(allocator), removedList->end(allocator), id, std::greater()); + if ((rmi != removedList->end(allocator)) && ((*rmi) == id)) { + removedList->erase(rmi, allocator); + } + } #else void *construct() { SharedMemoryAllocator &allocator = getAllocator(); @@ -1812,6 +1840,14 @@ namespace NGT { return push(n); } + size_t insert(size_t idx, TYPE *n) { +#ifdef ADVANCED_USE_REMOVED_LIST + removedListRemove(idx); +#endif + put(idx, n); + return idx; + } + bool isEmpty(size_t idx) { if (idx < size()) { return (*array).at(idx, allocator) == 0; @@ -2055,6 +2091,29 @@ namespace NGT { return std::vector::size() - 1; } +#ifdef ADVANCED_USE_REMOVED_LIST + void removedListRemove(size_t id) { + if (!removedList.empty()) { + std::priority_queue, std::greater> rl = removedList; + std::priority_queue, std::greater> newrl; + while (rl.size() != 0) { + if (rl.top() == id) { + rl.pop(); + while (rl.size() != 0) { + newrl.push(rl.top()); + rl.pop(); + } + removedList = newrl; + break; + } + if (rl.top() > id) break; + newrl.push(rl.top()); + rl.pop(); + } + } + } +#endif + size_t insert(TYPE *n) { #ifdef ADVANCED_USE_REMOVED_LIST if (!removedList.empty()) { @@ -2067,6 +2126,14 @@ namespace NGT { return push(n); } + size_t insert(size_t idx, TYPE *n) { +#ifdef ADVANCED_USE_REMOVED_LIST + removedListRemove(idx); +#endif + put(idx, n); + return idx; + } + bool isEmpty(size_t idx) { if (idx < std::vector::size()) { return (*this)[idx] == 0; @@ -2271,7 +2338,7 @@ namespace NGT { #ifdef ADVANCED_USE_REMOVED_LIST size_t count() { return std::vector::size() == 0 ? 0 : std::vector::size() - removedList.size() - 1; } protected: - std::priority_queue, std::greater > removedList; + std::priority_queue, std::greater> removedList; #endif }; @@ -2344,7 +2411,7 @@ namespace NGT { ObjectID id; }; - typedef std::priority_queue, std::less > ResultPriorityQueue; + typedef std::priority_queue, std::less> ResultPriorityQueue; class SearchContainer : public NGT::Container { public: @@ -2365,6 +2432,7 @@ namespace NGT { useAllNodesInLeaf = sc.useAllNodesInLeaf; expectedAccuracy = sc.expectedAccuracy; visitCount = sc.visitCount; + insertion = sc.insertion; return *this; } virtual ~SearchContainer() {} @@ -2376,6 +2444,7 @@ namespace NGT { edgeSize = -1; // dynamically prune the edges during search. -1 means following the index property. 0 means using all edges. useAllNodesInLeaf = false; expectedAccuracy = -1.0; + insertion = false; } void setSize(size_t s) { size = s; } void setResults(ObjectDistances *r) { result = r; } @@ -2385,6 +2454,7 @@ namespace NGT { void setExpectedAccuracy(float a) { expectedAccuracy = a; } inline bool resultIsAvailable() { return result != 0; } + float getEpsilon() { return explorationCoefficient - 1.0; } ObjectDistances &getResult() { if (result == 0) { NGTThrowException("Inner error: results is not set"); @@ -2406,6 +2476,8 @@ namespace NGT { float expectedAccuracy; private: ObjectDistances *result; + public: + bool insertion; }; @@ -2497,3 +2569,23 @@ namespace NGT { } // namespace NGT +namespace std { + template<> + class numeric_limits { + public: + static NGT::qsint8 max() { return NGT::qsint8(127); } + static NGT::qsint8 min() { return NGT::qsint8(-128); } + static bool is_specialized() { return true; } + }; +} + +namespace std { + template<> + class numeric_limits { + public: + static NGT::quint8 max() { return NGT::quint8(255); } + static NGT::quint8 min() { return NGT::quint8(0); } + static bool is_specialized() { return true; } + }; +} + diff --git a/lib/NGT/Graph.cpp b/lib/NGT/Graph.cpp index 52d0dbb..44b9eeb 100644 --- a/lib/NGT/Graph.cpp +++ b/lib/NGT/Graph.cpp @@ -213,6 +213,21 @@ NeighborhoodGraph::Search::lorentzFloat16(NeighborhoodGraph &graph, NGT::SearchC graph.searchReadOnlyGraph(sc, seeds); } #endif +void +NeighborhoodGraph::Search::l2Qsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) +{ + graph.searchReadOnlyGraph(sc, seeds); +} +void +NeighborhoodGraph::Search::innerProductQsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) +{ + graph.searchReadOnlyGraph(sc, seeds); +} +void +NeighborhoodGraph::Search::normalizedCosineSimilarityQsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) +{ + graph.searchReadOnlyGraph(sc, seeds); +} //// void @@ -360,14 +375,34 @@ NeighborhoodGraph::Search::lorentzFloat16ForLargeDataset(NeighborhoodGraph &grap graph.searchReadOnlyGraph(sc, seeds); } #endif - +void +NeighborhoodGraph::Search::l2Qsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) +{ + graph.searchReadOnlyGraph(sc, seeds); +} +void +NeighborhoodGraph::Search::innerProductQsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) +{ + graph.searchReadOnlyGraph(sc, seeds); +} +void +NeighborhoodGraph::Search::normalizedCosineSimilarityQsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) +{ + graph.searchReadOnlyGraph(sc, seeds); +} #endif void NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds) { - ObjectRepository &objectRepository = getObjectRepository(); NGT::ObjectSpace::Comparator &comparator = objectSpace->getComparator(); + setupDistances(sc, seeds, comparator); +} + +void +NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, NGT::ObjectSpace::Comparator &comp) +{ + ObjectRepository &objectRepository = getObjectRepository(); ObjectDistances tmp; tmp.reserve(seeds.size()); size_t seedSize = seeds.size(); @@ -401,9 +436,9 @@ NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &see continue; } #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - seeds[i].distance = comparator(sc.object, *objectRepository.get(seeds[i].id)); + seeds[i].distance = comp(sc.object, *objectRepository.get(seeds[i].id)); #else - seeds[i].distance = comparator(sc.object, *objects[seeds[i].id]); + seeds[i].distance = comp(sc.object, *objects[seeds[i].id]); #endif } @@ -519,6 +554,98 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, #ifdef NGT_GRAPH_READ_ONLY_GRAPH +#ifdef NGT_GRAPH_COMPACT_READ_ONLY_GRAPH + template + void + NeighborhoodGraph::searchReadOnlyGraph(NGT::SearchContainer &sc, ObjectDistances &seeds) + { + + if (sc.explorationCoefficient == 0.0) { + sc.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT; + } + + // setup edgeSize + size_t edgeSize = getEdgeSize(sc); + + UncheckedSet unchecked; + + CHECK_LIST distanceChecked(searchRepository.size()); + + ResultSet results; + + setupDistances(sc, seeds, COMPARATOR::compare); + setupSeeds(sc, seeds, results, unchecked, distanceChecked); + + Distance explorationRadius = sc.explorationCoefficient * sc.radius; + const size_t dimension = objectSpace->getPaddedDimension(); + ReadOnlyGraphNode *nodes = &searchRepository.front(); + ObjectDistance result; + ObjectDistance target; + const size_t prefetchSize = objectSpace->getPrefetchSize(); + const size_t prefetchOffset = objectSpace->getPrefetchOffset(); + while (!unchecked.empty()) { + target = unchecked.top(); + unchecked.pop(); + if (target.distance > explorationRadius) { + break; + } + auto *neighbors = &nodes[target.id]; + auto *neighborptr = &(*neighbors)[0]; + size_t neighborSize = neighbors->size() < edgeSize ? neighbors->size() : edgeSize; + auto *neighborendptr = neighborptr + neighborSize; + ObjectRepository &objectRepository = getObjectRepository(); + pair nsPtrs[neighborSize]; + size_t nsPtrsSize = 0; + for (; neighborptr < neighborendptr; ++neighborptr) { +#ifdef NGT_VISIT_COUNT + sc.visitCount++; +#endif + if (!distanceChecked[*neighborptr]) { + distanceChecked.insert(*neighborptr); + nsPtrs[nsPtrsSize].first = *neighborptr; + nsPtrs[nsPtrsSize].second = objectRepository.get(*neighborptr); + if (nsPtrsSize < prefetchOffset) { + unsigned char *ptr = reinterpret_cast(objectRepository.get(*neighborptr)); + MemoryCache::prefetch(ptr, prefetchSize); + } + nsPtrsSize++; + } + } + for (size_t idx = 0; idx < nsPtrsSize; idx++) { + auto *neighborptr = &nsPtrs[idx]; + if (idx + prefetchOffset < nsPtrsSize) { + unsigned char *ptr = reinterpret_cast((nsPtrs[idx + prefetchOffset]).second); + MemoryCache::prefetch(ptr, prefetchSize); + } + +#ifdef NGT_DISTANCE_COMPUTATION_COUNT + sc.distanceComputationCount++; +#endif + Distance distance = COMPARATOR::compare((void*)&sc.object[0], + (void*)&(*static_cast(neighborptr->second))[0], dimension); + if (distance <= explorationRadius) { + result.set(neighborptr->first, distance); + unchecked.push(result); + if (distance <= sc.radius) { + results.push(result); + if (results.size() > sc.size) { + results.pop(); + sc.radius = results.top().distance; + explorationRadius = sc.explorationCoefficient * sc.radius; + } + } + } + } + } + if (sc.resultIsAvailable()) { + ObjectDistances &qresults = sc.getResult(); + qresults.moveFrom(results); + } else { + sc.workingResult = std::move(results); + } + + } +#else // NGT_GRAPH_COMPACT_READ_ONLY_GRAPH template void NeighborhoodGraph::searchReadOnlyGraph(NGT::SearchContainer &sc, ObjectDistances &seeds) @@ -607,6 +734,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, } } +#endif // NGT_GRAPH_COMPACT_READ_ONLY_GRAPH #endif @@ -616,7 +744,6 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, if (sc.explorationCoefficient == 0.0) { sc.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT; } - // setup edgeSize size_t edgeSize = getEdgeSize(sc); @@ -634,10 +761,16 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, #endif ResultSet results; - setupDistances(sc, seeds); + NGT::ObjectSpace::Comparator *comparatorPtr = 0; + if (sc.insertion) { + comparatorPtr = &objectSpace->getComparator(); + } else { + comparatorPtr = &objectSpace->getComparatorForSearch(); + } + NGT::ObjectSpace::Comparator &comparator = *comparatorPtr; + setupDistances(sc, seeds, comparator); setupSeeds(sc, seeds, results, unchecked, distanceChecked); Distance explorationRadius = sc.explorationCoefficient * sc.radius; - NGT::ObjectSpace::Comparator &comparator = objectSpace->getComparator(); ObjectRepository &objectRepository = getObjectRepository(); const size_t prefetchSize = objectSpace->getPrefetchSize(); ObjectDistance result; @@ -801,7 +934,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, continue; } objtbl.push_back(getObjectRepository().get((*i).id)); - GraphNode *n = 0; + GraphNode *n = 0; try { n = getNode((*i).id); } catch (Exception &err) { diff --git a/lib/NGT/Graph.h b/lib/NGT/Graph.h index be2b651..b0dbaa2 100644 --- a/lib/NGT/Graph.h +++ b/lib/NGT/Graph.h @@ -189,6 +189,9 @@ namespace NGT { }; #ifdef NGT_GRAPH_READ_ONLY_GRAPH +#ifdef NGT_GRAPH_COMPACT_READ_ONLY_GRAPH + typedef std::vector ReadOnlyGraphNode; +#else class ReadOnlyGraphNode : public std::vector> { typedef std::vector> PARENT; public: @@ -219,6 +222,7 @@ namespace NGT { size_t reservedSize; size_t usedSize; }; +#endif // NGT_GRAPH_COMPACT_READ_ONLY_GRAPH class SearchGraphRepository : public std::vector { public: @@ -252,7 +256,11 @@ namespace NGT { } #else for (auto ni = node.begin(); ni != node.end(); ni++) { +#ifdef NGT_GRAPH_COMPACT_READ_ONLY_GRAPH + searchNode.push_back((*ni).id); +#else searchNode.push_back(std::pair((*ni).id, objectRepository.get((*ni).id))); +#endif // NGT_GRAPH_COMPACT_READ_ONLY_GRAPH } #endif } @@ -339,6 +347,14 @@ namespace NGT { } break; #endif + case NGT::ObjectSpace::Qsuint8: + switch (dtype) { + case NGT::ObjectSpace::DistanceTypeL2 : return l2Qsint8; + case NGT::ObjectSpace::DistanceTypeInnerProduct : return innerProductQsint8; + case NGT::ObjectSpace::DistanceTypeNormalizedCosine : return normalizedCosineSimilarityQsint8; + default : return l2Qsint8; + } + break; default: NGTThrowException("NGT::Graph::Search: Not supported object type."); break; @@ -386,6 +402,14 @@ namespace NGT { default: return l2Float16ForLargeDataset; } #endif + case NGT::ObjectSpace::Qsuint8: + switch (dtype) { + case NGT::ObjectSpace::DistanceTypeL2 : return l2Qsint8ForLargeDataset; + case NGT::ObjectSpace::DistanceTypeInnerProduct : return innerProductQsint8ForLargeDataset; + case NGT::ObjectSpace::DistanceTypeNormalizedCosine : return normalizedCosineSimilarityQsint8ForLargeDataset; + default : return l2Qsint8ForLargeDataset; + } + break; default: NGTThrowException("NGT::Graph::Search: Not supported object type."); break; @@ -419,7 +443,9 @@ namespace NGT { static void poincareFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); // added by Nyapicom static void lorentzFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); // added by Nyapicom #endif - + static void l2Qsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); + static void innerProductQsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); + static void normalizedCosineSimilarityQsint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); static void l1Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); static void l2Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); static void l1FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); @@ -446,6 +472,9 @@ namespace NGT { static void poincareFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); static void lorentzFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); #endif + static void l2Qsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); + static void innerProductQsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); + static void normalizedCosineSimilarityQsint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); }; #endif @@ -599,7 +628,7 @@ namespace NGT { NeighborhoodGraph(): objectSpace(0) { property.truncationThreshold = NGT_TRUNCATION_THRESHOLD; // initialize random to generate random seeds -#ifdef NGT_DISABLE_SRAND_FOR_RANDOM +#ifdef NGT_ENABLE_TIME_SEED_FOR_RANDOM struct timeval randTime; gettimeofday(&randTime, 0); srand(randTime.tv_usec); @@ -920,7 +949,7 @@ namespace NGT { #elif defined(NGT_GRAPH_CHECK_VECTOR) typedef BooleanVector DistanceCheckedSet; #elif defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET) - typedef HashBasedBooleanSet DistanceCheckedSet; + typedef HashBasedBooleanSet DistanceCheckedSet; #else class DistanceCheckedSet : public unordered_set { public: @@ -928,7 +957,7 @@ namespace NGT { }; #endif - typedef HashBasedBooleanSet DistanceCheckedSetForLargeDataset; + typedef HashBasedBooleanSet DistanceCheckedSetForLargeDataset; class NodeWithPosition : public ObjectDistance { public: @@ -953,6 +982,7 @@ namespace NGT { #endif #endif void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds); + void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, NGT::ObjectSpace::Comparator &comp); void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, double (&comparator)(const void*, const void*, size_t)); void setupSeeds(SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, diff --git a/lib/NGT/GraphOptimizer.h b/lib/NGT/GraphOptimizer.h index d3ea102..9f73a37 100644 --- a/lib/NGT/GraphOptimizer.h +++ b/lib/NGT/GraphOptimizer.h @@ -79,6 +79,7 @@ namespace NGT { prefetchParameterOptimization = true; accuracyTableGeneration = true; shortcutReductionWithLessMemory = false; + undirectedGraphConversion = false; numOfThreads = 0; } @@ -109,10 +110,18 @@ namespace NGT { NGT::ObjectSpace &objectSpace = index.getObjectSpace(); NGT::ObjectRepository &objectRepository = objectSpace.getRepository(); size_t nQueries = 200; + if (objectRepository.size() == 0) { + std::stringstream msg; + msg << "The object repository is empty. " << objectRepository.size(); + NGTThrowException(msg); + } nQueries = objectRepository.size() - 1 < nQueries ? objectRepository.size() - 1 : nQueries; - + if (nQueries == 0) { + std::stringstream msg; + msg << "# of the queries is unexpected zero value. " << nQueries << ":" << objectRepository.size(); + NGTThrowException(msg); + } size_t step = objectRepository.size() / nQueries; - assert(step != 0); std::vector ids; for (size_t startID = start; startID < step; startID++) { for (size_t id = startID; id < objectRepository.size(); id += step) { @@ -311,7 +320,7 @@ namespace NGT { // extract only edges from the index to reduce the memory usage. NGT::GraphReconstructor::extractGraph(graph, *graphIndex); NeighborhoodGraph::Property &prop = graphIndex->getGraphProperty(); - if (prop.graphType == NGT::NeighborhoodGraph::GraphTypeONNG) { + if (undirectedGraphConversion) { NGT::GraphReconstructor::convertToANNG(graph); } NGT::GraphReconstructor::reconstructGraph(graph, *graphIndex, numOfOutgoingEdges, numOfIncomingEdges, maxNumOfEdges); @@ -390,7 +399,7 @@ namespace NGT { if (searchParameterOptimization || prefetchParameterOptimization || accuracyTableGeneration) { NGT::StdOstreamRedirector redirector(logDisabled); redirector.begin(); - NGT::Index outIndex(outIndexPath, true); + NGT::Index outIndex(outIndexPath, true); NGT::GraphIndex &outGraph = static_cast(outIndex.getIndex()); if (prefetchParameterOptimization) { if (!logDisabled) { @@ -704,6 +713,7 @@ namespace NGT { bool prefetchParameterOptimization; bool accuracyTableGeneration; bool shortcutReductionWithLessMemory; + bool undirectedGraphConversion; float shortcutReductionRange; size_t numOfThreads; }; diff --git a/lib/NGT/GraphReconstructor.h b/lib/NGT/GraphReconstructor.h index da48895..a78f810 100644 --- a/lib/NGT/GraphReconstructor.h +++ b/lib/NGT/GraphReconstructor.h @@ -26,7 +26,7 @@ #warning "*** OMP is *NOT* available! ***" #endif -//#define NGT_SHORTCUT_REDUCTION_WITH_ANGLE +#define NGT_SHORTCUT_REDUCTION_WITH_ANGLE //#define NGT_SHORTCUT_REDUCTION_WITH_ADDITIONAL_CONDITION namespace NGT { @@ -248,7 +248,7 @@ class GraphReconstructor { auto it = tmpGraph.begin() + idx; size_t id = idx + 1; try { - NGT::GraphNode &srcNode = *it; + NGT::GraphNode &srcNode = *it; std::unordered_map> neighbors; for (uint32_t sni = 0; sni < srcNode.size(); ++sni) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) @@ -559,11 +559,11 @@ class GraphReconstructor { for (size_t id = 1; id < outGraph.repository.size(); id++) { try { - NGT::GraphNode &node = *outGraph.getNode(id); #if defined(NGT_SHARED_MEMORY_ALLOCATOR) std::cerr << "Not implemented yet." << std::endl; abort(); #else + NGT::GraphNode &node = *outGraph.getNode(id); node.erase(std::remove_if(node.begin(), node.end(), [](NGT::ObjectDistance &n){ return (n.id & 0x80000000) != 0; }), node.end()); #endif } catch(...) {} @@ -833,7 +833,7 @@ class GraphReconstructor { } NGT::GraphIndex::showStatisticsOfGraph(outGraph); - std::vector reverse(graph.size() + 1); + std::vector reverse(graph.size() + 1); for (size_t id = 1; id <= graph.size(); ++id) { try { NGT::GraphNode &node = graph[id - 1]; @@ -849,15 +849,15 @@ class GraphReconstructor { } } - std::vector > reverseSize(graph.size() + 1); + std::vector > reverseSize(graph.size() + 1); reverseSize[0] = std::pair(0, 0); for (size_t rid = 1; rid <= graph.size(); ++rid) { reverseSize[rid] = std::pair(reverse[rid].size(), rid); } - std::sort(reverseSize.begin(), reverseSize.end()); + std::sort(reverseSize.begin(), reverseSize.end()); - std::vector indegreeCount(graph.size(), 0); + std::vector indegreeCount(graph.size(), 0); size_t zeroCount = 0; for (size_t sizerank = 0; sizerank <= reverseSize.size(); sizerank++) { @@ -865,17 +865,17 @@ class GraphReconstructor { zeroCount++; continue; } - size_t rid = reverseSize[sizerank].second; - ObjectDistances &rnode = reverse[rid]; + size_t rid = reverseSize[sizerank].second; + ObjectDistances &rnode = reverse[rid]; for (auto rni = rnode.begin(); rni != rnode.end(); ++rni) { - if (indegreeCount[(*rni).id] >= reverseEdgeSize) { + if (indegreeCount[(*rni).id] >= reverseEdgeSize) { continue; } - NGT::GraphNode &node = *outGraph.getNode(rid); + NGT::GraphNode &node = *outGraph.getNode(rid); if (indegreeCount[(*rni).id] > 0 && node.size() >= originalEdgeSize) { continue; } - + node.push_back(NGT::ObjectDistance((*rni).id, (*rni).distance)); indegreeCount[(*rni).id]++; } diff --git a/lib/NGT/HashBasedBooleanSet.h b/lib/NGT/HashBasedBooleanSet.h index 2495824..077f8c4 100644 --- a/lib/NGT/HashBasedBooleanSet.h +++ b/lib/NGT/HashBasedBooleanSet.h @@ -22,16 +22,16 @@ #include #include -class HashBasedBooleanSet{ +template class HashBasedBooleanSet{ private: - uint32_t *_table; + TYPE *_table; uint32_t _tableSize; uint32_t _mask; - std::unordered_set _stlHash; + std::unordered_set _stlHash; - inline uint32_t _hash1(const uint32_t value){ + inline uint32_t _hash1(const TYPE value){ return value & _mask; } @@ -56,8 +56,8 @@ class HashBasedBooleanSet{ std::cerr << "[WARN] table size is not 2^N : " << tableSize << std::endl; } - _table = new uint32_t[tableSize]; - memset(_table, 0, tableSize * sizeof(uint32_t)); + _table = new TYPE[tableSize]; + memset(_table, 0, tableSize * sizeof(TYPE)); } ~HashBasedBooleanSet(){ @@ -65,7 +65,7 @@ class HashBasedBooleanSet{ _stlHash.clear(); } - inline bool operator[](const uint32_t num){ + inline bool operator[](const TYPE num){ const uint32_t hashValue = _hash1(num); auto v = _table[hashValue]; @@ -81,8 +81,8 @@ class HashBasedBooleanSet{ return true; } - inline void set(const uint32_t num){ - uint32_t &value = _table[_hash1(num)]; + inline void set(const TYPE num){ + TYPE &value = _table[_hash1(num)]; if(value == 0){ value = num; }else{ @@ -92,11 +92,11 @@ class HashBasedBooleanSet{ } } - inline void insert(const uint32_t num){ + inline void insert(const TYPE num){ set(num); } - inline void reset(const uint32_t num){ + inline void reset(const TYPE num){ const uint32_t hashValue = _hash1(num); if(_table[hashValue] != 0){ if(_table[hashValue] != num){ diff --git a/lib/NGT/Index.cpp b/lib/NGT/Index.cpp index 4402760..a09f51d 100644 --- a/lib/NGT/Index.cpp +++ b/lib/NGT/Index.cpp @@ -23,6 +23,7 @@ #include "NGT/Thread.h" #include "NGT/GraphReconstructor.h" #include "NGT/Version.h" +#include "NGT/NGTQ/ObjectFile.h" using namespace std; using namespace NGT; @@ -246,6 +247,494 @@ NGT::Index::append(const string &database, const float *data, size_t dataSize, s return; } +void +NGT::Index::appendFromRefinementObjectFile(const std::string &indexPath) { + NGT::Index index(indexPath); + index.appendFromRefinementObjectFile(); + index.createIndex(); + index.save(); + index.close(); +} + + +void +NGT::Index::appendFromRefinementObjectFile() { + NGT::Property prop; + getProperty(prop); + float maxMag = prop.maxMagnitude; + bool maxMagSkip = false; + if (maxMag > 0.0) maxMagSkip = true; + auto &ros = getRefinementObjectSpace(); + auto &rrepo = ros.getRepository(); + size_t dim = getDimension(); + auto dataSize = rrepo.size(); + std::vector addedElement(dataSize); + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + NGT::Timer timer; + timer.start(); + for (size_t idx = 1; idx < rrepo.size(); idx++) { + if (rrepo[idx] == 0) { + continue; + } + std::vector object; + ros.getObject(idx, object); + if (object.size() != dim) { + if (object.size() == dim + 1) { + object.resize(dim); + } else { + std::stringstream msg; + msg << "Fatal inner error! iInvalid dimension. " << dim << ":" << object.size();; + NGTThrowException(msg); + } + } + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + double mag = 0.0; + for (auto &v : object) { + mag += v * v; + } + if (!maxMagSkip && mag > maxMag) { + maxMag = mag; + } + addedElement[idx] = mag; + } + if (idx % 2000000 == 0) { + timer.stop(); + std::cerr << "processed " << static_cast(idx) / 1000000.0 << "M objects." + << " maxMag=" << maxMag << " time=" << timer << std::endl; + timer.restart(); + } + } + timer.stop(); + std::cerr << "time=" << timer << std::endl; + std::cerr << "maxMag=" << maxMag << std::endl; + std::cerr << "dataSize=" << dataSize << std::endl; + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + if (static_cast(getIndex()).property.maxMagnitude <= 0.0 && maxMag > 0.0) { + static_cast(getIndex()).property.maxMagnitude = maxMag; + } + } + } + + if (getObjectSpace().isQintObjectType() && prop.clippingRate >= 0.0) { + std::priority_queue min; + std::priority_queue, std::greater> max; + { + NGT::Timer timer; + timer.start(); + auto clippingSize = static_cast(dataSize * dim) * prop.clippingRate; + clippingSize = clippingSize == 0 ? 1 : clippingSize; + size_t counter = 0; + for (size_t idx = 1; idx < rrepo.size(); idx++) { + if (rrepo[idx] == 0) continue; + std::vector object; + ros.getObject(idx, object); + if (object.size() != dim) object.resize(dim); + if (getObjectSpace().isNormalizedDistance()) { + ObjectSpace::normalize(object); + } + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + float v = maxMag - addedElement[idx]; + object.emplace_back(sqrt(v >= 0.0 ? v : 0.0)); + } + for (auto &v : object) { + if (max.size() < clippingSize) { + max.push(v); + } else if (max.top() <= v) { + max.push(v); + max.pop(); + } + if (min.size() < clippingSize) { + min.push(v); + } else if (min.top() >= v) { + min.push(v); + min.pop(); + } + } + counter++; + } + std::cerr << "time=" << timer << std::endl; + if (counter != 0) { + std::cerr << "max:min=" << max.top() << ":" << min.top() << std::endl; + setQuantizationFromMaxMin(max.top(), min.top()); + } + } + } + + { + + for (size_t idx = 1; idx < rrepo.size(); idx++) { + if (rrepo[idx] == 0) continue; + std::vector object; + ros.getObject(idx, object); + if (object.size() != dim) object.resize(dim); + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + object.emplace_back(sqrt(maxMag - addedElement[idx])); + } + append(object); + if (idx + 1 != getObjectRepositorySize()) { + std::stringstream msg; + msg << "The object repository and refinement repository are inconsistent. " << idx + 1 << ":" << getObjectRepositorySize(); + NGTThrowException(msg); + } + } + } + +} + +void +NGT::Index::insertFromRefinementObjectFile() { + NGT::Property prop; + getProperty(prop); + float maxMag = prop.maxMagnitude; + if (prop.maxMagnitude <= 0.0) { + std::stringstream msg; + msg << "Max magnitude is not set yet. " << maxMag; + NGTThrowException(msg); + } + auto &ros = getRefinementObjectSpace(); + auto &rrepo = ros.getRepository(); + auto &repo = getObjectSpace().getRepository(); + size_t dim = getDimension(); + auto dataSize = rrepo.size(); + std::vector addedElement(dataSize); + + for (size_t idx = 1; idx < rrepo.size(); idx++) { + if (rrepo[idx] == 0) continue; + if (repo.size() > idx && repo[idx] != 0) continue; + std::vector object; + ros.getObject(idx, object); + if (object.size() != dim) { + if (object.size() == dim + 1) { + object.resize(dim); + } else { + std::stringstream msg; + msg << "Fatal inner error! iInvalid dimension. " << dim << ":" << object.size();; + NGTThrowException(msg); + } + } + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + double mag = 0.0; + for (auto &v : object) { + mag += v * v; + } + if (mag > maxMag) { + maxMag = mag; + } + object.emplace_back(sqrt(maxMag - mag)); + } + try { + insert(idx, object); + } catch(NGT::Exception &err) { + std::stringstream msg; + msg << "Cannot insert. " << idx << " " << err.what(); + NGTThrowException(msg); + } + if (idx + 1 > getObjectRepositorySize()) { + std::stringstream msg; + msg << "The object repository and refinement repository are inconsistent. " << idx + 1 << ":" << getObjectRepositorySize(); + NGTThrowException(msg); + } + } +} + +void +NGT::Index::appendFromTextObjectFile(const std::string &indexPath, const std::string &data, size_t dataSize, + bool append, bool refinement) { +//#define APPEND_TEST + + NGT::Index index(indexPath); + index.appendFromTextObjectFile(data, dataSize, append, refinement); + index.createIndex(); + index.save(); + index.close(); +} + +void +NGT::Index::appendFromTextObjectFile(const std::string &data, size_t dataSize, bool append, bool refinement) { + NGT::Property prop; + getProperty(prop); + float maxMag = prop.maxMagnitude; + bool maxMagSkip = false; + if (maxMag > 0.0) maxMagSkip = true; + std::vector addedElement; + size_t dim = 0; + if (append && prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + NGT::Timer timer; + timer.start(); + ifstream is(data); + if (!is) { + std::stringstream msg; + msg << "Cannot open the specified data file. " << data; + NGTThrowException(msg); + } + std::string line; + size_t counter = 0; + while (getline(is, line)) { + if (is.eof()) break; + if (dataSize > 0 && counter > dataSize) break; + vector object; + vector tokens; + NGT::Common::tokenize(line, tokens, "\t, "); + if (tokens.back() == "") tokens.pop_back(); + if (dim == 0) { + dim = tokens.size(); + } else if (dim != tokens.size()) { + std::stringstream msg; + msg << "The dimensions are not inconsist. " << counter << ":" << dim << "x" << tokens.size() << data; + NGTThrowException(msg); + } + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + double mag = 0.0; + for (auto &vstr : tokens) { + auto v = NGT::Common::strtof(vstr); + mag += v * v; + } + if (!maxMagSkip && mag > maxMag) { + maxMag = mag; + } + addedElement.emplace_back(mag); + } + counter++; + if (counter % 2000000 == 0) { + timer.stop(); + std::cerr << "processed " << static_cast(counter) / 1000000.0 << "M objects." + << " maxMag=" << maxMag << " time=" << timer << std::endl; + timer.restart(); + } + } + timer.stop(); + dataSize = counter; + std::cerr << "time=" << timer << std::endl; + std::cerr << "maxMag=" << maxMag << std::endl; + std::cerr << "dataSize=" << dataSize << std::endl; + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + if (static_cast(getIndex()).property.maxMagnitude <= 0.0 && maxMag > 0.0) { + static_cast(getIndex()).property.maxMagnitude = maxMag; + } + } + } + if (append && getObjectSpace().isQintObjectType() && prop.clippingRate >= 0.0) { + std::priority_queue min; + std::priority_queue, std::greater> max; + { + NGT::Timer timer; + timer.start(); + ifstream is(data); + if (!is) { + std::stringstream msg; + msg << "Cannot open the specified data file. " << data; + NGTThrowException(msg); + } + auto clippingSize = static_cast(dataSize * dim) * prop.clippingRate; + clippingSize = clippingSize == 0 ? 1 : clippingSize; + std::string line; + size_t counter = 0; + while (getline(is, line)) { + if (is.eof()) break; + if (dataSize > 0 && counter > dataSize) break; + vector object; + vector tokens; + NGT::Common::tokenize(line, tokens, "\t, "); + if (tokens.back() == "") tokens.pop_back(); + for (auto &vstr : tokens) { + auto v = NGT::Common::strtof(vstr); + object.emplace_back(v); + } + if (getObjectSpace().isNormalizedDistance()) { + ObjectSpace::normalize(object); + } + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + float v = maxMag - addedElement[counter]; + object.emplace_back(sqrt(v >= 0.0 ? v : 0.0)); + } + for (auto &v : object) { + if (max.size() < clippingSize) { + max.push(v); + } else if (max.top() <= v) { + max.push(v); + max.pop(); + } + if (min.size() < clippingSize) { + min.push(v); + } else if (min.top() >= v) { + min.push(v); + min.pop(); + } + } + counter++; + } + std::cerr << "time=" << timer << std::endl; + if (counter != 0) { + std::cerr << "max:min=" << max.top() << ":" << min.top() << std::endl; + setQuantizationFromMaxMin(max.top(), min.top()); + } + } + } + if (append || refinement) { + + ifstream is(data); + if (!is) { + std::stringstream msg; + msg << "Cannot open the specified data file. " << data; + NGTThrowException(msg); + } + std::string line; + size_t counter = 0; + while (getline(is, line)) { + if (is.eof()) break; + if (dataSize > 0 && counter > dataSize) break; + vector object; + vector tokens; + NGT::Common::tokenize(line, tokens, "\t, "); + if (tokens.back() == "") tokens.pop_back(); + for (auto &vstr : tokens) { + auto v = NGT::Common::strtof(vstr); + object.emplace_back(v); + } +#ifdef NGT_REFINEMENT + if (refinement) { + appendToRefinement(object); + } +#endif + if (append) { + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct && maxMag > 0.0) { + float v = maxMag - addedElement[counter]; + object.emplace_back(sqrt(v >= 0.0 ? v : 0.0)); + } + NGT::Index::append(object); + } + counter++; + } + } + +} + + +void +NGT::Index::appendFromBinaryObjectFile(const std::string &indexPath, const std::string &data, + size_t dataSize, bool append, bool refinement) { + NGT::Index index(indexPath); + index.appendFromBinaryObjectFile(data, dataSize, append, refinement); + index.createIndex(); + index.save(); + index.close(); +} + +void +NGT::Index::appendFromBinaryObjectFile(const std::string &data, size_t dataSize, bool append, bool refinement) { + NGT::Property prop; + getProperty(prop); + float maxMag = prop.maxMagnitude; + bool maxMagSkip = false; + if (maxMag > 0.0) maxMagSkip = true; + std::vector addedElement; + size_t dim = 0; + if (append && prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + NGT::Timer timer; + timer.start(); + StaticObjectFileLoader loader(data); + size_t counter = 0; + while (!loader.isEmpty()) { + if (dataSize > 0 && counter > dataSize) break; + auto object = loader.getObject(); + if (dim == 0) { + dim = object.size(); + } else if (dim != object.size()) { + std::stringstream msg; + msg << "The dimensions are not inconsist. " << counter << ":" << dim << "x" << object.size() << data; + NGTThrowException(msg); + } + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + double mag = 0.0; + for (auto &v : object) { + mag += v * v; + } + if (!maxMagSkip && mag > maxMag) { + maxMag = mag; + } + addedElement.emplace_back(mag); + } + counter++; + if (counter % 2000000 == 0) { + timer.stop(); + std::cerr << "processed " << static_cast(counter) / 1000000.0 << "M objects." + << " maxMag=" << maxMag << " time=" << timer << std::endl; + timer.restart(); + } + } + timer.stop(); + dataSize = counter; + std::cerr << "time=" << timer << std::endl; + std::cerr << "maxMag=" << maxMag << std::endl; + std::cerr << "dataSize=" << dataSize << std::endl; + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + if (static_cast(getIndex()).property.maxMagnitude <= 0.0 && maxMag > 0.0) { + static_cast(getIndex()).property.maxMagnitude = maxMag; + } + } + } + if (append && getObjectSpace().isQintObjectType() && prop.clippingRate >= 0.0) { + std::priority_queue min; + std::priority_queue, std::greater> max; + { + NGT::Timer timer; + timer.start(); + auto clippingSize = static_cast(dataSize * dim) * prop.clippingRate; + clippingSize = clippingSize == 0 ? 1 : clippingSize; + StaticObjectFileLoader loader(data); + size_t counter = 0; + while (!loader.isEmpty()) { + if (dataSize > 0 && counter > dataSize) break; + auto object = loader.getObject(); + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + float v = maxMag - addedElement[counter]; + object.emplace_back(sqrt(v >= 0.0 ? v : 0.0)); + } + for (auto &v : object) { + if (max.size() < clippingSize) { + max.push(v); + } else if (max.top() <= v) { + max.push(v); + max.pop(); + } + if (min.size() < clippingSize) { + min.push(v); + } else if (min.top() >= v) { + min.push(v); + min.pop(); + } + } + counter++; + } + std::cerr << "time=" << timer << std::endl; + if (counter != 0) { + std::cerr << "max:min=" << max.top() << ":" << min.top() << std::endl; + setQuantizationFromMaxMin(max.top(), min.top()); + } + } + } + if (append || refinement) { + StaticObjectFileLoader loader(data); + size_t counter = 0; + while (!loader.isEmpty()) { + if (dataSize > 0 && counter > dataSize) break; + auto object = loader.getObject(); +#ifdef NGT_REFINEMENT + if (refinement) { + appendToRefinement(object); + } +#endif + if (append) { + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + object.emplace_back(sqrt(maxMag - addedElement[counter])); + } + NGT::Index::append(object); + } + counter++; + } + } +} + void NGT::Index::remove(const string &database, vector &objects, bool force) { NGT::Index index(database); @@ -344,6 +833,27 @@ NGT::Index::makeSparseObject(std::vector &object) return obj; } +void +NGT::Index::setQuantizationFromMaxMin(float max, float min) { + + float offset; + float scale; + if (getObjectSpace().getObjectType() == typeid(NGT::qsint8)) { + offset = 0.0; + scale = std::max(fabs(max), fabs(min)); + } else { + offset = min; + scale = max - offset; + } + setQuantization(scale, offset); +} + +void +NGT::Index::setQuantization(float scale, float offset) { + static_cast(getIndex()).property.quantizationScale = scale; + static_cast(getIndex()).property.quantizationOffset = offset; + getObjectSpace().setQuantization(scale, offset); +} void NGT::Index::extractInsertionOrder(InsertionOrder &insertionOrder) { @@ -358,26 +868,45 @@ NGT::Index::createIndex(size_t threadNumber, size_t sizeOfRepository) { InsertionOrder insertionOrder; NGT::Property prop; getProperty(prop); -#ifdef NGT_INNER_PRODUCT - if (prop.distanceType == ObjectSpace::DistanceTypeInnerProduct) { - size_t beginId = 1; - NGT::GraphRepository &graphRepository = static_cast(getIndex()).repository; + if (prop.objectType == NGT::ObjectSpace::ObjectType::Qsuint8 + ) { + auto &ros = getRefinementObjectSpace(); + auto &os = getObjectSpace(); + if (&ros != 0 && ros.getRepository().size() > os.getRepository().size()) { + if (os.getRepository().size() <= 1) { + if (ros.getRepository().size() < 100) { + std::cerr << "Warning! # of refinement objects is too small. " << ros.getRepository().size() << std::endl; + } + appendFromRefinementObjectFile(); + } else { + if (prop.quantizationScale <= 0.0) { + stringstream msg; + msg << "Fatal inner error! Scalar quantization parameters are not set yet. " << prop.quantizationScale << ":" << prop.quantizationOffset; + NGTThrowException(msg); + } + insertFromRefinementObjectFile(); + } + } + } else { + if (prop.distanceType == ObjectSpace::DistanceTypeInnerProduct) { + size_t beginId = 1; + NGT::GraphRepository &graphRepository = static_cast(getIndex()).repository; #ifdef NGT_SHARED_MEMORY_ALLOCATOR - auto &graphNodes = static_cast&>(graphRepository); - auto &graphNodeVector = reinterpret_cast&>(graphNodes); + auto &graphNodes = static_cast&>(graphRepository); + auto &graphNodeVectors = reinterpret_cast&>(graphNodes); #else - auto &graphNodes = static_cast&>(graphRepository); - auto &graphNodeVector = reinterpret_cast&>(graphNodes); + auto &graphNodes = static_cast&>(graphRepository); + auto &graphNodeVectors = reinterpret_cast&>(graphNodes); #endif - if (prop.maxMagnitude != 0.0) { - getObjectSpace().setMagnitude(prop.maxMagnitude, graphNodeVector, beginId); - } else { - auto maxMag = getObjectSpace().computeMaxMagnitude(beginId); - static_cast(getIndex()).property.maxMagnitude = maxMag; - getObjectSpace().setMagnitude(maxMag, graphNodeVector, beginId); + if (prop.maxMagnitude <= 0.0) { + getObjectSpace().setMagnitude(prop.maxMagnitude, graphNodeVectors, beginId); + } else { + auto maxMag = getObjectSpace().computeMaxMagnitude(beginId); + static_cast(getIndex()).property.maxMagnitude = maxMag; + getObjectSpace().setMagnitude(maxMag, graphNodeVectors, beginId); + } } } -#endif if (prop.nOfNeighborsForInsertionOrder != 0) { insertionOrder.nOfNeighboringNodes = prop.nOfNeighborsForInsertionOrder; insertionOrder.epsilon = prop.epsilonForInsertionOrder; @@ -412,9 +941,10 @@ NGT::Index::Property::set(NGT::Property &prop) { if (prop.prefetchOffset != -1) prefetchOffset = prop.prefetchOffset; if (prop.prefetchSize != -1) prefetchSize = prop.prefetchSize; if (prop.accuracyTable != "") accuracyTable = prop.accuracyTable; -#ifdef NGT_INNER_PRODUCT if (prop.maxMagnitude != -1) maxMagnitude = prop.maxMagnitude; -#endif + if (prop.quantizationScale != -1) quantizationScale = prop.quantizationScale; + if (prop.quantizationOffset != -1) quantizationOffset = prop.quantizationOffset; + if (prop.clippingRate != -1) clippingRate = prop.clippingRate; if (prop.nOfNeighborsForInsertionOrder != -1) nOfNeighborsForInsertionOrder = prop.nOfNeighborsForInsertionOrder; if (prop.epsilonForInsertionOrder != -1) epsilonForInsertionOrder = prop.epsilonForInsertionOrder; } @@ -439,9 +969,10 @@ NGT::Index::Property::get(NGT::Property &prop) { prop.prefetchOffset = prefetchOffset; prop.prefetchSize = prefetchSize; prop.accuracyTable = accuracyTable; -#ifdef NGT_INNER_PRODUCT prop.maxMagnitude = maxMagnitude; -#endif + prop.quantizationScale = quantizationScale; + prop.quantizationOffset = quantizationOffset; + prop.clippingRate = clippingRate; prop.nOfNeighborsForInsertionOrder = nOfNeighborsForInsertionOrder; prop.epsilonForInsertionOrder = epsilonForInsertionOrder; } @@ -574,16 +1105,10 @@ void NGT::GraphIndex::constructObjectSpace(NGT::Property &prop) { assert(prop.dimension != 0); size_t dimension = prop.dimension; -#ifdef NGT_INNER_PRODUCT if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeSparseJaccard || prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { dimension++; } -#else - if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeSparseJaccard) { - dimension++; - } -#endif switch (prop.objectType) { case NGT::ObjectSpace::ObjectType::Float : @@ -597,27 +1122,33 @@ NGT::GraphIndex::constructObjectSpace(NGT::Property &prop) { objectSpace = new ObjectSpaceRepository(dimension, typeid(float16), prop.distanceType); break; #endif + case NGT::ObjectSpace::ObjectType::Qsuint8 : + objectSpace = new ObjectSpaceRepository(dimension, typeid(qsint8), prop.distanceType); + break; default: stringstream msg; msg << "Invalid Object Type in the property. " << prop.objectType; NGTThrowException(msg); } + objectSpace->setQuantization(prop.quantizationScale, prop.quantizationOffset); #ifdef NGT_REFINEMENT + auto dtype = prop.distanceType; + dtype = dtype == ObjectSpace::DistanceTypeInnerProduct ? ObjectSpace::DistanceTypeDotProduct : prop.distanceType; switch (prop.refinementObjectType) { case NGT::ObjectSpace::ObjectType::Float : - refinementObjectSpace = new ObjectSpaceRepository(dimension, typeid(float), prop.distanceType); + refinementObjectSpace = new ObjectSpaceRepository(dimension, typeid(float), dtype); break; case NGT::ObjectSpace::ObjectType::Uint8 : - refinementObjectSpace = new ObjectSpaceRepository(dimension, typeid(uint8_t), prop.distanceType); + refinementObjectSpace = new ObjectSpaceRepository(dimension, typeid(uint8_t), dtype); break; #ifdef NGT_HALF_FLOAT case NGT::ObjectSpace::ObjectType::Float16 : - refinementObjectSpace = new ObjectSpaceRepository(dimension, typeid(float16), prop.distanceType); + refinementObjectSpace = new ObjectSpaceRepository(dimension, typeid(float16), dtype); break; #endif #ifdef NGT_BFLOAT case NGT::ObjectSpace::ObjectType::Bfloat16 : - refinementObjectSpace = new ObjectSpaceRepository(dimension, typeid(bfloat16), prop.distanceType); + refinementObjectSpace = new ObjectSpaceRepository(dimension, typeid(bfloat16), dtype); break; #endif default: @@ -1852,7 +2383,7 @@ GraphAndTreeIndex::createIndexWithInsertionOrder(InsertionOrder &insertionOrder, CreateIndexThreadPool::OutputJobQueue &output = threads.getOutputJobQueue(); BuildTimeController buildTimeController(*this, NeighborhoodGraph::property); - + try { CreateIndexJob job; NGT::ObjectID id = 1; @@ -1990,7 +2521,7 @@ GraphAndTreeIndex::createIndex(const vector > &object } { size_t size = NeighborhoodGraph::property.edgeSizeForCreation; - sort(output.begin(), output.end()); + sort(output.begin(), output.end()); for (size_t idxi = 0; idxi < cnt; idxi++) { // add distances ObjectDistances &objs = *output[idxi].results; @@ -2004,7 +2535,7 @@ GraphAndTreeIndex::createIndex(const vector > &object ObjectDistance r; r.distance = GraphIndex::objectSpace->getComparator()(*output[idxi].object, *output[idxj].object); r.id = output[idxj].id; - objs.push_back(r); + objs.emplace_back(r); } std::sort(objs.begin(), objs.end()); if (objs.size() > size) { @@ -2017,7 +2548,7 @@ GraphAndTreeIndex::createIndex(const vector > &object ids[output[idxi].batchIdx].identical = true; ids[output[idxi].batchIdx].id = objs[0].id; ids[output[idxi].batchIdx].distance = objs[0].distance; - output[idxi].id = 0; + output[idxi].id = 0; } else { assert(output[idxi].id == 0); #ifdef NGT_SHARED_MEMORY_ALLOCATOR @@ -2074,7 +2605,7 @@ GraphAndTreeIndex::createIndex(const vector > &object } output.pop_front(); } - + count += cnt; if (timerCount <= count) { timer.stop(); diff --git a/lib/NGT/Index.h b/lib/NGT/Index.h index 257dce1..df73076 100644 --- a/lib/NGT/Index.h +++ b/lib/NGT/Index.h @@ -95,9 +95,10 @@ namespace NGT { #endif prefetchOffset = 0; prefetchSize = 0; -#ifdef NGT_INNER_PRODUCT - maxMagnitude = 0.0; -#endif + maxMagnitude = -1.0; + quantizationScale = 0.0; + quantizationOffset = 0.0; + clippingRate = 0.0; nOfNeighborsForInsertionOrder = 0; epsilonForInsertionOrder = 0.1; } @@ -121,9 +122,10 @@ namespace NGT { prefetchOffset = -1; prefetchSize = -1; accuracyTable = ""; -#ifdef NGT_INNER_PRODUCT maxMagnitude = -1; -#endif + quantizationScale = -1.0; + quantizationOffset = -1.0; + clippingRate = -1.0; nOfNeighborsForInsertionOrder = -1; epsilonForInsertionOrder = -1; } @@ -137,6 +139,7 @@ namespace NGT { #ifdef NGT_HALF_FLOAT case ObjectSpace::ObjectType::Float16: p.set("ObjectType", "Float-2"); break; #endif + case ObjectSpace::ObjectType::Qsuint8: p.set("ObjectType", "QSUInteger-8B"); break; #ifdef NGT_BFLOAT case ObjectSpace::ObjectType::Bfloat16: p.set("ObjectType", "Bfloat-2"); break; #endif @@ -167,9 +170,7 @@ namespace NGT { case DistanceType::DistanceTypeNormalizedAngle: p.set("DistanceType", "NormalizedAngle"); break; case DistanceType::DistanceTypeNormalizedCosine: p.set("DistanceType", "NormalizedCosine"); break; case DistanceType::DistanceTypeNormalizedL2: p.set("DistanceType", "NormalizedL2"); break; -#ifdef NGT_INNER_PRODUCT case DistanceType::DistanceTypeInnerProduct: p.set("DistanceType", "InnerProduct"); break; -#endif case DistanceType::DistanceTypePoincare: p.set("DistanceType", "Poincare"); break; // added by Nyapicom case DistanceType::DistanceTypeLorentz: p.set("DistanceType", "Lorentz"); break; // added by Nyapicom default : std::cerr << "Fatal error. Invalid distance type. " << distanceType << std::endl; abort(); @@ -199,9 +200,10 @@ namespace NGT { p.set("PrefetchOffset", prefetchOffset); p.set("PrefetchSize", prefetchSize); p.set("AccuracyTable", accuracyTable); -#ifdef NGT_INNER_PRODUCT p.set("MaxMagnitude", maxMagnitude); -#endif + p.set("QuantizationScale", quantizationScale); + p.set("QuantizationOffset", quantizationOffset); + p.set("QuantizationClippingRate", clippingRate); p.set("NumberOfNeighborsForInsertionOrder", nOfNeighborsForInsertionOrder); p.set("EpsilonForInsertionOrder", epsilonForInsertionOrder); } @@ -220,6 +222,8 @@ namespace NGT { } else if (it->second == "Float-2") { objectType = ObjectSpace::ObjectType::Float16; #endif + } else if (it->second == "QSUInteger-8B") { + objectType = ObjectSpace::ObjectType::Qsuint8; #ifdef NGT_BFLOAT } else if (it->second == "Bfloat-2") { objectType = ObjectSpace::ObjectType::Bfloat16; @@ -282,10 +286,8 @@ namespace NGT { distanceType = DistanceType::DistanceTypeNormalizedCosine; } else if (it->second == "NormalizedL2") { distanceType = DistanceType::DistanceTypeNormalizedL2; -#ifdef NGT_INNER_PRODUCT } else if (it->second == "InnerProduct") { distanceType = DistanceType::DistanceTypeInnerProduct; -#endif } else { std::cerr << "Invalid Distance Type in the property. " << it->first << ":" << it->second << std::endl; } @@ -347,9 +349,10 @@ namespace NGT { if (it != p.end()) { searchType = it->second; } -#ifdef NGT_INNER_PRODUCT maxMagnitude = p.getf("MaxMagnitude", maxMagnitude); -#endif + quantizationScale = p.getf("QuantizationScale", quantizationScale); + quantizationOffset = p.getf("QuantizationOffset", quantizationOffset); + clippingRate = p.getf("QuantizationClippingRate", clippingRate); nOfNeighborsForInsertionOrder = p.getl("NumberOfNeighborsForInsertionOrder", nOfNeighborsForInsertionOrder); epsilonForInsertionOrder = p.getf("EpsilonForInsertionOrder", epsilonForInsertionOrder); } @@ -373,9 +376,10 @@ namespace NGT { int prefetchSize; std::string accuracyTable; std::string searchType; // test -#ifdef NGT_INNER_PRODUCT float maxMagnitude; -#endif + float quantizationScale; + float quantizationOffset; + float clippingRate; int nOfNeighborsForInsertionOrder; float epsilonForInsertionOrder; #ifdef NGT_REFINEMENT @@ -538,65 +542,33 @@ namespace NGT { static void createGraphAndTree(const std::string &database, NGT::Property &prop, bool redirect = false) { createGraphAndTree(database, prop, "", redirect); } static void createGraph(const std::string &database, NGT::Property &prop, const std::string &dataFile, size_t dataSize = 0, bool redirect = false); template size_t insert(const std::vector &object); + template size_t insert(ObjectID id, const std::vector &object); template size_t append(const std::vector &object); template void update(ObjectID id, const std::vector &object); #ifdef NGT_REFINEMENT template size_t appendToRefinement(const std::vector &object); + template size_t insertToRefinement(const std::vector &object); template void updateToRefinement(ObjectID id, const std::vector &object); #endif - static void append(const std::string &database, const std::string &dataFile, size_t threadSize, size_t dataSize); - static void append(const std::string &database, const float *data, size_t dataSize, size_t threadSize); + static void append(const std::string &index, const std::string &dataFile, size_t threadSize, size_t dataSize); + static void append(const std::string &index, const float *data, size_t dataSize, size_t threadSize); + static void appendFromRefinementObjectFile(const std::string &index); + void appendFromRefinementObjectFile(); + void insertFromRefinementObjectFile(); + static void appendFromTextObjectFile(const std::string &index, const std::string &data, + size_t dataSize, bool append = true, bool refinement = false); + void appendFromTextObjectFile(const std::string &data, size_t dataSize, bool append = true, bool refinement = false); + static void appendFromBinaryObjectFile(const std::string &index, const std::string &data, + size_t dataSize, bool append = true, bool refinement = false); + void appendFromBinaryObjectFile(const std::string &data, size_t dataSize, bool apend = true, bool refinement = false); static void remove(const std::string &database, std::vector &objects, bool force = false); static void exportIndex(const std::string &database, const std::string &file); static void importIndex(const std::string &database, const std::string &file); virtual void load(const std::string &ifile, size_t dataSize) { getIndex().load(ifile, dataSize); } virtual void append(const std::string &ifile, size_t dataSize) { getIndex().append(ifile, dataSize); } - virtual void append(const float *data, size_t dataSize) { - StdOstreamRedirector redirector(redirect); - redirector.begin(); - try { - getIndex().append(data, dataSize); - } catch(Exception &err) { - redirector.end(); - throw err; - } - redirector.end(); - } - virtual void append(const double *data, size_t dataSize) { - StdOstreamRedirector redirector(redirect); - redirector.begin(); - try { - getIndex().append(data, dataSize); - } catch(Exception &err) { - redirector.end(); - throw err; - } - redirector.end(); - } - virtual void append(const uint8_t *data, size_t dataSize) { - StdOstreamRedirector redirector(redirect); - redirector.begin(); - try { - getIndex().append(data, dataSize); - } catch(Exception &err) { - redirector.end(); - throw err; - } - redirector.end(); - } -#ifdef NGT_HALF_FLOAT - virtual void append(const float16 *data, size_t dataSize) { - StdOstreamRedirector redirector(redirect); - redirector.begin(); - try { - getIndex().append(data, dataSize); - } catch(Exception &err) { - redirector.end(); - throw err; - } - redirector.end(); - } -#endif + template void appendWithPreprocessing(const T *data, size_t dataSize, + bool append = true, bool refinement = false); + template void append(const T *data, size_t dataSize); virtual size_t getNumberOfObjects() { return getIndex().getNumberOfObjects(); } virtual size_t getNumberOfIndexedObjects() { return getIndex().getNumberOfIndexedObjects(); } virtual size_t getObjectRepositorySize() { return getIndex().getObjectRepositorySize(); } @@ -633,7 +605,12 @@ namespace NGT { virtual void search(NGT::SearchQuery &sc) { getIndex().search(sc); } virtual void search(NGT::SearchContainer &sc, ObjectDistances &seeds) { getIndex().search(sc, seeds); } virtual void getSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, size_t n) { getIndex().getSeeds(sc, seeds, n); } - virtual void remove(ObjectID id, bool force = false) { getIndex().remove(id, force); } + virtual void remove(ObjectID id, bool force = false) { + try { + getRefinementObjectSpace().remove(id); + } catch(...) {} + getIndex().remove(id, force); + } virtual void exportIndex(const std::string &file) { getIndex().exportIndex(file); } virtual void importIndex(const std::string &file) { getIndex().importIndex(file); } virtual bool verify(std::vector &status, bool info = false, char mode = '-') { return getIndex().verify(status, info, mode); } @@ -665,6 +642,8 @@ namespace NGT { void disableLog() { redirect = true; } void extractInsertionOrder(InsertionOrder &insertionOrder); + void setQuantizationFromMaxMin(float max, float min); + void setQuantization(float scale, float offset); static void destroy(const std::string &path) { #ifdef NGT_SHARED_MEMORY_ALLOCATOR std::remove(std::string(path + "/grp").c_str()); @@ -924,42 +903,20 @@ namespace NGT { void destructObjectSpace() { #ifdef NGT_REFINEMENT if (refinementObjectSpace != 0) { - auto *os = (ObjectSpaceRepository*)refinementObjectSpace; #ifndef NGT_SHARED_MEMORY_ALLOCATOR - os->deleteAll(); + refinementObjectSpace->deleteAll(); #endif - delete os; + delete refinementObjectSpace; refinementObjectSpace = 0; } #endif - if (objectSpace == 0) { - return; - } - if (property.objectType == NGT::ObjectSpace::ObjectType::Float) { - ObjectSpaceRepository *os = (ObjectSpaceRepository*)objectSpace; -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - os->deleteAll(); -#endif - delete os; - } else if (property.objectType == NGT::ObjectSpace::ObjectType::Uint8) { - ObjectSpaceRepository *os = (ObjectSpaceRepository*)objectSpace; -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - os->deleteAll(); -#endif - delete os; -#ifdef NGT_HALF_FLOAT - } else if (property.objectType == NGT::ObjectSpace::ObjectType::Float16) { - ObjectSpaceRepository *os = (ObjectSpaceRepository*)objectSpace; + if (objectSpace != 0) { #ifndef NGT_SHARED_MEMORY_ALLOCATOR - os->deleteAll(); + objectSpace->deleteAll(); #endif - delete os; -#endif - } else { - std::cerr << "Cannot find Object Type in the property. " << property.objectType << std::endl; - return; + delete objectSpace; + objectSpace = 0; } - objectSpace = 0; } virtual void load(const std::string &ifile, size_t dataSize = 0) { @@ -1141,7 +1098,9 @@ namespace NGT { searchQuery.workingResult = std::move(sc.workingResult); } else { size_t poffset = 12; +#ifndef NGT_SHARED_MEMORY_ALLOCATOR size_t psize = 64; +#endif auto size = sc.size; sc.size *= expansion; try { @@ -1238,6 +1197,11 @@ namespace NGT { seedSize = seedSize > repositorySize ? repositorySize : seedSize; std::vector deteted; size_t emptyCount = 0; +#ifndef NGT_ENABLE_TIME_SEED_FOR_RANDOM + if (seeds.size() != 0) { + srand(seeds[0].id); + } +#endif while (seedSize > seeds.size()) { double random = ((double)rand() + 1.0) / ((double)RAND_MAX + 2.0); size_t idx = floor(repositorySize * random) + 1; @@ -1274,6 +1238,7 @@ namespace NGT { sc.size = NeighborhoodGraph::property.edgeSizeForCreation; sc.radius = FLT_MAX; sc.explorationCoefficient = NeighborhoodGraph::property.insertionRadiusCoefficient; + sc.insertion = true; try { GraphIndex::search(sc); } catch(Exception &err) { @@ -1311,9 +1276,7 @@ namespace NGT { } } - virtual void insert( - ObjectID id - ) { + virtual void insert(ObjectID id) { ObjectRepository &fr = objectSpace->getRepository(); if (fr[id] == 0) { std::cerr << "NGTIndex::insert empty " << id << std::endl; @@ -1910,20 +1873,21 @@ namespace NGT { so.size = 2; so.radius = 0.0; so.explorationCoefficient = 1.1; + so.insertion = true; ObjectDistances seeds; seeds.push_back(ObjectDistance(id, 0.0)); GraphIndex::search(so, seeds); if (results.size() == 0) { - if (!GraphIndex::objectSpace->isNormalizedDistance()) { + if (!GraphIndex::objectSpace->isNormalizedDistance() && !GraphIndex::objectSpace->isQintObjectType()) { #ifdef NGT_SHARED_MEMORY_ALLOCATOR GraphIndex::objectSpace->deleteObject(obj); #endif std::stringstream msg; - msg << "Not found the specified id. ID=" << id; + msg << "Not found the specified id. (1) ID=" << id; NGTThrowException(msg); } so.radius = FLT_MAX; - so.size = 10; + so.size = 50; results.clear(); GraphIndex::search(so, seeds); for (size_t i = 0; i < results.size(); i++) { @@ -1952,7 +1916,7 @@ namespace NGT { GraphIndex::objectSpace->deleteObject(obj); #endif std::stringstream msg; - msg << "Not found the specified id. ID=" << id; + msg << "Not found the specified id. (2) ID=" << id; NGTThrowException(msg); } } @@ -1984,6 +1948,7 @@ namespace NGT { sc.radius = FLT_MAX; sc.explorationCoefficient = NeighborhoodGraph::property.insertionRadiusCoefficient; sc.useAllNodesInLeaf = true; + sc.insertion = true; try { GraphAndTreeIndex::search(sc); } catch(Exception &err) { @@ -2110,7 +2075,9 @@ namespace NGT { size_t seedSize = NeighborhoodGraph::property.seedSize == 0 ? sc.size : NeighborhoodGraph::property.seedSize; seedSize = seedSize > sc.size ? sc.size : seedSize; if (seeds.size() > seedSize) { +#ifndef NGT_ENABLE_TIME_SEED_FOR_RANDOM srand(tso.nodeID.getID()); +#endif // to accelerate thinning data. for (size_t i = seeds.size(); i > seedSize; i--) { double random = ((double)rand() + 1.0) / ((double)RAND_MAX + 2.0); @@ -2146,7 +2113,9 @@ namespace NGT { searchQuery.workingResult = std::move(sc.workingResult); } else { size_t poffset = 12; +#ifndef NGT_SHARED_MEMORY_ALLOCATOR size_t psize = 64; +#endif auto size = sc.size; sc.size *= expansion; try { @@ -2283,6 +2252,7 @@ namespace NGT { } }; + } // namespace NGT template @@ -2293,32 +2263,28 @@ size_t NGT::Index::append(const std::vector &object) if (repo.size() == 0) { repo.initialize(); } - auto *o = repo.allocateNormalizedPersistentObject(object); repo.push_back(dynamic_cast(o)); size_t oid = repo.size() - 1; return oid; } -#ifdef NGT_REFINEMENT template -size_t NGT::Index::appendToRefinement(const std::vector &object) +size_t NGT::Index::insert(const std::vector &object) { - auto &os = getRefinementObjectSpace(); + auto &os = getObjectSpace(); auto &repo = os.getRepository(); if (repo.size() == 0) { repo.initialize(); } auto *o = repo.allocateNormalizedPersistentObject(object); - repo.push_back(dynamic_cast(o)); - size_t oid = repo.size() - 1; + size_t oid = repo.insert(dynamic_cast(o)); return oid; } -#endif template -size_t NGT::Index::insert(const std::vector &object) +size_t NGT::Index::insert(ObjectID id, const std::vector &object) { auto &os = getObjectSpace(); auto &repo = os.getRepository(); @@ -2327,7 +2293,7 @@ size_t NGT::Index::insert(const std::vector &object) } auto *o = repo.allocateNormalizedPersistentObject(object); - size_t oid = repo.insert(dynamic_cast(o)); + size_t oid = repo.insert(id, dynamic_cast(o)); return oid; } @@ -2357,6 +2323,35 @@ template } #ifdef NGT_REFINEMENT +template +size_t NGT::Index::appendToRefinement(const std::vector &object) +{ + auto &os = getRefinementObjectSpace(); + auto &repo = os.getRepository(); + if (repo.size() == 0) { + repo.initialize(); + } + + auto *o = repo.allocateNormalizedPersistentObject(object); + repo.push_back(dynamic_cast(o)); + size_t oid = repo.size() - 1; + return oid; +} + +template +size_t NGT::Index::insertToRefinement(const std::vector &object) +{ + auto &os = getRefinementObjectSpace(); + auto &repo = os.getRepository(); + if (repo.size() == 0) { + repo.initialize(); + } + + auto *o = repo.allocateNormalizedPersistentObject(object); + size_t oid = repo.insert(dynamic_cast(o)); + return oid; +} + template void NGT::Index::updateToRefinement(ObjectID id, const std::vector &object) { @@ -2382,3 +2377,150 @@ template return; } #endif + +template + void NGT::Index::appendWithPreprocessing(const T *data, size_t dataSize, bool append, bool refinement) { + if (dataSize == 0) { + return; + } + NGT::Property prop; + getProperty(prop); + float maxMag = prop.maxMagnitude; + bool maxMagSkip = false; + if (maxMag > 0.0) maxMagSkip = true; + std::vector addedElement; + auto *obj = data; + size_t dim = prop.dimension; + if (append && prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + NGT::Timer timer; + timer.start(); + size_t counter = 0; + for (size_t idx = 0; idx < dataSize; idx++, obj += dim) { + std::vector object; + object.reserve(dim); + for (size_t dataidx = 0; dataidx < dim; dataidx++) { + object.push_back(obj[dataidx]); + } + double mag = 0.0; + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + for (auto &v : object) { + mag += static_cast(v) * v; + } + if (!maxMagSkip && mag > maxMag) { + maxMag = mag; + } + addedElement.emplace_back(mag); + } + counter++; + if (counter % 2000000 == 0) { + timer.stop(); + std::cerr << "processed " << static_cast(counter) / 1000000.0 << "M objects." + << " maxMag=" << maxMag << " time=" << timer << std::endl; + timer.restart(); + } + } + timer.stop(); + std::cerr << "time=" << timer << std::endl; + std::cerr << "maxMag=" << maxMag << std::endl; + std::cerr << "dataSize=" << dataSize << std::endl; + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + if (static_cast(getIndex()).property.maxMagnitude <= 0.0 && maxMag > 0.0) { + static_cast(getIndex()).property.maxMagnitude = maxMag; + } + } + } + + if (append && getObjectSpace().isQintObjectType() && prop.clippingRate >= 0.0) { + std::priority_queue min; + std::priority_queue, std::greater> max; + { + NGT::Timer timer; + timer.start(); + auto clippingSize = static_cast(dataSize * dim) * prop.clippingRate; + clippingSize = clippingSize == 0 ? 1 : clippingSize; + std::string line; + size_t counter = 0; + obj = data; + for (size_t idx = 0; idx < dataSize; idx++, obj += dim) { + std::vector object; + object.reserve(dim); + for (size_t dataidx = 0; dataidx < dim; dataidx++) { + object.push_back(obj[dataidx]); + } + if (getObjectSpace().isNormalizedDistance()) { + ObjectSpace::normalize(object); + } + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + float v = maxMag - addedElement[counter]; + object.emplace_back(sqrt(v >= 0.0 ? v : 0.0)); + } + for (auto &v : object) { + if (max.size() < clippingSize) { + max.push(v); + } else if (max.top() <= v) { + max.push(v); + max.pop(); + } + if (min.size() < clippingSize) { + min.push(v); + } else if (min.top() >= v) { + min.push(v); + min.pop(); + } + } + counter++; + } + std::cerr << "time=" << timer << std::endl; + if (counter != 0) { + std::cerr << "max:min=" << max.top() << ":" << min.top() << std::endl; + setQuantizationFromMaxMin(max.top(), min.top()); + } + } + } + if (append || refinement) { + + size_t counter = 0; + obj = data; + for (size_t idx = 0; idx < dataSize; idx++, obj += dim) { + std::vector object; + object.reserve(dim); + for (size_t dataidx = 0; dataidx < dim; dataidx++) { + object.push_back(obj[dataidx]); + } +#ifdef NGT_REFINEMENT + if (refinement) { + appendToRefinement(object); + } +#endif + if (append) { + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct) { + object.emplace_back(sqrt(maxMag - addedElement[counter])); + } + NGT::Index::append(object); + } + counter++; + } + } + +} + +template +void NGT::Index::append(const T *data, size_t dataSize) { + StdOstreamRedirector redirector(redirect); + redirector.begin(); + try { + NGT::Property prop; + getProperty(prop); + if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeInnerProduct || + getObjectSpace().isQintObjectType()) { + appendWithPreprocessing(data, dataSize); + } else { + auto &index = static_cast(getIndex()); + index.append(data, dataSize); + } + } catch(Exception &err) { + redirector.end(); + throw err; + } + redirector.end(); +} diff --git a/lib/NGT/NGTQ/HierarchicalKmeans.cpp b/lib/NGT/NGTQ/HierarchicalKmeans.cpp index 444c8bc..4b16629 100644 --- a/lib/NGT/NGTQ/HierarchicalKmeans.cpp +++ b/lib/NGT/NGTQ/HierarchicalKmeans.cpp @@ -158,9 +158,6 @@ void QBG::HierarchicalKmeans::threeLayerClustering(std::string prefix, QBG::Inde numOfThirdClusters = index.getQuantizer().property.globalCentroidLimit; } } - if (numOfThirdClusters != 0 && index.getQuantizer().property.globalCentroidLimit != 0 && - numOfThirdClusters != index.getQuantizer().property.globalCentroidLimit) { - } auto &quantizer = static_cast&>(index.getQuantizer()); QBGObjectList &objectList = quantizer.objectList; if (numOfObjects == 0) { @@ -287,7 +284,7 @@ void QBG::HierarchicalKmeans::threeLayerClustering(std::string prefix, QBG::Inde if (thirdFlatClusters[idx].members.size() == 0) { std::cerr << "warning. found an empty cluster in thirdFlatClusters. " << idx << std::endl; } else { - bqindex.push_back(idx1); + bqindex.emplace_back(idx1); } } } @@ -586,6 +583,8 @@ void QBG::HierarchicalKmeans::clustering(std::string indexPath, std::string pref NGT::StdOstreamRedirector redirector(!verbose); redirector.begin(); + QBG::Index::setupObjects(indexPath, 0, verbose); + std::cerr << "The specified params=FC:" << numOfFirstClusters << ":FO:" << numOfFirstObjects << ",SC:" << numOfSecondClusters << ":SO:" << numOfSecondObjects << ",TC:" << numOfThirdClusters << ":TO:" << numOfThirdObjects << ",O:" << numOfObjects << std::endl; @@ -637,5 +636,77 @@ void QBG::HierarchicalKmeans::clustering(std::string indexPath, std::string pref redirector.end(); } +void QBG::HierarchicalKmeans::assignAll(std::string indexPath, int64_t lowerBoundOfNoOfObjects, size_t noOfNearestNeighbors) { + + std::cerr << "assignAll " << lowerBoundOfNoOfObjects << ":" << noOfNearestNeighbors << std::endl; + bool readOnly = false; + QBG::Index index(indexPath, readOnly); + if (index.getQuantizer().objectList.size() <= 1) { + NGTThrowException("No objects in the index."); + } + + auto &quantizer = static_cast&>(index.getQuantizer()); + QBGObjectList &objectList = quantizer.objectList; + if (numOfObjects == 0) { + numOfObjects = objectList.size() - 1; + } + auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace(); + std::vector thirdFlatClusters; + std::string prefix = indexPath + "/" + QBG::Index::getWorkspaceName(); + prefix +="/" + QBG::Index::getHierarchicalClusteringPrefix(); + NGT::Clustering::loadClusters(prefix + QBG::Index::getThirdCentroidSuffix(), thirdFlatClusters); + + assignWithNGT(thirdFlatClusters, 1, numOfObjects, objectSpace, objectList, epsilonExplorationSize, expectedRecall, noOfNearestNeighbors); + size_t remove = 0; + size_t max = 0; + for (auto it = thirdFlatClusters.begin(); it != thirdFlatClusters.end();) { + if ((*it).members.size() > max) { + max = (*it).members.size(); + } + if (static_cast((*it).members.size()) <= lowerBoundOfNoOfObjects) { + remove++; + (*it) = std::move(thirdFlatClusters.back()); + thirdFlatClusters.pop_back(); + } else { + ++it; + } + } + std::cerr << "max=" << max << " removed=" << remove << std::endl; + if (lowerBoundOfNoOfObjects > 0 && remove > 0) { + NGT::Clustering::clearMembers(thirdFlatClusters); + std::cerr << "the second assignWithNGT." << std::endl; + assignWithNGT(thirdFlatClusters, 1, numOfObjects, objectSpace, objectList, epsilonExplorationSize, expectedRecall, noOfNearestNeighbors); + } + + { + std::vector> cindex(numOfObjects); + for (auto it = thirdFlatClusters.begin(); it != thirdFlatClusters.end(); ++it) { + size_t idx = distance(thirdFlatClusters.begin(), it); + if (lowerBoundOfNoOfObjects >= 0 && (*it).members.empty()) { + std::stringstream msg; + msg << "Fatal error! Found empty cluster. " << idx; + NGTThrowException(msg); + } + for (auto mit = (*it).members.begin(); mit != (*it).members.end(); ++mit) { + size_t vid = (*mit).vectorID; + cindex[vid].emplace_back(idx); + } + } + std::cerr << "save index... " << cindex.size() << std::endl; + NGT::Clustering::saveVectors(prefix + QBG::Index::getObjTo3rdSuffix(), cindex); + } + if (remove > 0) { + std::cerr << "found empty clusters. " << remove << std::endl; + NGT::Clustering::saveClusters(prefix + QBG::Index::getThirdCentroidSuffix(), thirdFlatClusters); + NGT::Clustering::saveClusters(prefix + QBG::Index::getSecondCentroidSuffix(), thirdFlatClusters); + std::vector bqindex; + for (size_t idx = 0; idx < thirdFlatClusters.size(); idx++) { + bqindex.emplace_back(idx); + } + std::cerr << "save the 3rd to the 2nd index..." << std::endl; + NGT::Clustering::saveVector(prefix + QBG::Index::get3rdTo2ndSuffix(), bqindex); + } + +} #endif diff --git a/lib/NGT/NGTQ/HierarchicalKmeans.h b/lib/NGT/NGTQ/HierarchicalKmeans.h index f7f2284..93eccff 100644 --- a/lib/NGT/NGTQ/HierarchicalKmeans.h +++ b/lib/NGT/NGTQ/HierarchicalKmeans.h @@ -241,8 +241,8 @@ namespace QBG { size_t rootID = 0; HKInternalNode &root = static_cast(*nodes[rootID]); std::cerr << "first=" << root.children.size() << std::endl; - size_t secondCount = 0; - size_t thirdCount = 0; + size_t secondCount = 0; + size_t thirdCount = 0; size_t objectCount = 0; size_t leafID = 0; size_t qID = 0; @@ -1020,7 +1020,7 @@ namespace QBG { index.linearSearch(sc); } - float startEpsilon = 0.12; + float startEpsilon = 0.02; float epsilon; std::vector recall(endID - beginID, 0.0); for (epsilon = startEpsilon; epsilon < 1.0; epsilon += 0.01) { @@ -1073,7 +1073,8 @@ namespace QBG { static void assignWithNGT(std::vector &clusters, size_t beginID, size_t endID, NGT::ObjectSpace &objectSpace, QBGObjectList &objectList, size_t epsilonExplorationSize = 100, - float expectedRecall = 0.98) { + float expectedRecall = 0.98, + size_t noOfNearestNeighbors = 1) { if (beginID > endID) { std::cerr << "assignWithNGT::Warning. beginID:" << beginID << " > endID:" << endID << std::endl; return; @@ -1122,7 +1123,7 @@ namespace QBG { int numOfOutgoingEdges = 10; int numOfIncomingEdges = 120; int numOfQueries = 200; - int numOfResultantObjects = 20; + int numOfResultantObjects = noOfNearestNeighbors + 19; graphOptimizer.set(numOfOutgoingEdges, numOfIncomingEdges, numOfQueries, numOfResultantObjects); graphOptimizer.execute(anng, onng); } @@ -1147,11 +1148,10 @@ namespace QBG { abort(); } #endif - std::vector> clusterIDs(endID - beginID); std::vector> distances(omp_get_max_threads(), std::make_pair(0, 0.0)); size_t endOfEval = beginID + epsilonExplorationSize; endOfEval = endOfEval > endID ? endID : endOfEval; - size_t nOfObjects = 20; + size_t nOfObjects = noOfNearestNeighbors + 19;; NGT::Timer timer; timer.start(); auto epsilon = optimizeEpsilon(index, beginID, endOfEval, nOfObjects, @@ -1161,43 +1161,58 @@ namespace QBG { timer.start(); size_t progressStep = (endID - beginID) / 20;; progressStep = progressStep < 20 ? 20 : progressStep; + size_t step = 1000000; + for (size_t bid = beginID; bid < endID; bid += step) { + std::vector>> clusterIDs(step); + auto eid = std::min(endID, bid + step); #pragma omp parallel for - for (size_t id = beginID; id < endID; id++) { - std::vector obj; + for (size_t id = bid; id < eid; id++) { + std::vector obj; #ifdef MULTIPLE_OBJECT_LISTS - objectList.get(omp_get_thread_num(), id, obj, &objectSpace); + objectList.get(omp_get_thread_num(), id, obj, &objectSpace); #else - objectList.get(id, obj, &objectSpace); + objectList.get(id, obj, &objectSpace); #endif - NGT::SearchQuery sc(obj); - NGT::ObjectDistances objects; - sc.setResults(&objects); - sc.setSize(nOfObjects); - sc.setEpsilon(epsilon); - index.search(sc); - clusterIDs[id - beginID] = make_pair(objects[0].id - 1, objects[0].distance); - auto threadID = omp_get_thread_num(); - distances[threadID].first++; - distances[threadID].second += objects[0].distance; - { - size_t cnt = 0; - for (auto d : distances) { - cnt += d.first; + NGT::SearchQuery sc(obj); + NGT::ObjectDistances objects; + sc.setResults(&objects); + sc.setSize(nOfObjects); + sc.setEpsilon(epsilon); + index.search(sc); + if (clusterIDs[id - bid].capacity() < noOfNearestNeighbors) { + clusterIDs[id - bid].reserve(noOfNearestNeighbors); + } + for (size_t i = 0; objects.size(); i++) { + if (clusterIDs[id - bid].size() == noOfNearestNeighbors) { + break; + } + clusterIDs[id - bid].emplace_back(make_pair(objects[i].id - 1, objects[0].distance)); + } + auto threadID = omp_get_thread_num(); + distances[threadID].first++; + distances[threadID].second += objects[0].distance; + { + size_t cnt = 0; + for (auto d : distances) { + cnt += d.first; + } + if (cnt % progressStep == 0) { + timer.stop(); + float progress = cnt * 100 / (endID - beginID); + std::cerr << "assignWithNGT: " << cnt << " objects (" + << progress << "%) have been assigned. time=" << timer << std::endl; + timer.restart(); + } + } + } + std::cerr << "pushing..." << std::endl; + for (size_t id = bid; id < eid; id++) { + for (auto &e : clusterIDs[id - bid]) { + auto cid = e.first; + auto cdistance = e.second; + clusters[cid].members.emplace_back(NGT::Clustering::Entry(id - 1, cid, cdistance)); } - if (cnt % progressStep == 0) { - timer.stop(); - float progress = cnt * 100 / (endID - beginID); - std::cerr << "assignWithNGT: " << cnt << " objects (" - << progress << "%) have been assigned. time=" << timer << std::endl; - timer.restart(); - } } - } - std::cerr << "pushing..." << std::endl; - for (size_t id = beginID; id < endID; id++) { - auto cid = clusterIDs[id - beginID].first; - auto cdistance = clusterIDs[id - beginID].second; - clusters[cid].members.push_back(NGT::Clustering::Entry(id - 1, cid, cdistance)); } { size_t n = 0; @@ -1286,6 +1301,8 @@ namespace QBG { void clustering(std::string indexPath, std::string prefix = "", std::string objectIDsFile = ""); #endif + void assignAll(std::string indexPath, int64_t lowerBoundOfNoOfObjects, size_t noOfNearestNeighbors = 1); + size_t maxSize; size_t numOfObjects; size_t numOfClusters; diff --git a/lib/NGT/NGTQ/ObjectFile.h b/lib/NGT/NGTQ/ObjectFile.h index 3d48949..c98af84 100644 --- a/lib/NGT/NGTQ/ObjectFile.h +++ b/lib/NGT/NGTQ/ObjectFile.h @@ -24,6 +24,7 @@ #include #include #include +#include "NGT/ArrayFile.h" namespace NGT { class ObjectSpace; @@ -34,7 +35,8 @@ class ObjectFile : public ArrayFile { enum DataType { DataTypeUint8 = 0, DataTypeFloat = 1, - DataTypeFloat16 = 2 + DataTypeFloat16 = 2, + DataTypeNone = 99 }; ObjectFile():objectSpace(0) {} @@ -52,6 +54,9 @@ class ObjectFile : public ArrayFile { bool open() { if (!ArrayFile::open(fileName)) { + std::stringstream msg; + msg << "ObjectFile::Cannot open the specified file. " << fileName; + NGTThrowException(msg); return false; } switch (dataType) { @@ -70,7 +75,7 @@ class ObjectFile : public ArrayFile { break; #endif default: - stringstream msg; + std::stringstream msg; msg << "ObjectFile::Invalid Object Type in the property. " << dataType; NGTThrowException(msg); break; @@ -129,7 +134,7 @@ class ObjectFile : public ArrayFile { template bool get(const size_t id, std::vector &data, NGT::ObjectSpace *os = 0) { if (objectSpace == 0) { - stringstream msg; + std::stringstream msg; msg << "ObjectFile::Fatal Error. objectSpace is not set." << std::endl; NGTThrowException(msg); } @@ -169,13 +174,13 @@ class ObjectFile : public ArrayFile { void put(const size_t id, std::vector &data, NGT::ObjectSpace *os = 0) { if (objectSpace == 0) { - stringstream msg; + std::stringstream msg; msg << "ObjectFile::Fatal Error. objectSpace is not set." << std::endl; NGTThrowException(msg); } if (objectSpace->getDimension() != data.size()) { - stringstream msg; - msg << "ObjectFile::Dimensions are inconsistency. " << objectSpace->getDimension() << ":" << data.size(); + std::stringstream msg; + msg << "ObjectFile::Dimensions are inconsistent. " << objectSpace->getDimension() << ":" << data.size(); NGTThrowException(msg); } NGT::Object *object = objectSpace->allocateObject(); @@ -304,13 +309,13 @@ class StaticObjectFileLoader { id = noOfObjects - 1; } size_t headerSize = sizeof(noOfObjects) + sizeof(noOfDimensions); - stream.seekg(id * sizeOfObject + headerSize, ios_base::beg); + stream.seekg(id * sizeOfObject + headerSize, std::ios_base::beg); counter = id; return; } std::vector getObject() { - vector object; + std::vector object; if (isEmpty()) { return object; } @@ -448,12 +453,12 @@ bool StaticObjectFile::open(const std::string &file, size_t pseudoDimensio bool ret = _readFileHead(); if (_fileHead.noOfObjects != noOfObjects) { - stringstream msg; + std::stringstream msg; msg << "Invalid # of objects=" << _fileHead.noOfObjects << ":" << noOfObjects; NGTThrowException(msg); } if (_fileHead.noOfDimensions != noOfDimensions) { - stringstream msg; + std::stringstream msg; msg << "Invalid # of dimensions=" << _fileHead.noOfDimensions << ":" << noOfDimensions; NGTThrowException(msg); } diff --git a/lib/NGT/NGTQ/Optimizer.cpp b/lib/NGT/NGTQ/Optimizer.cpp index 3dc407c..7d9e5ce 100644 --- a/lib/NGT/NGTQ/Optimizer.cpp +++ b/lib/NGT/NGTQ/Optimizer.cpp @@ -593,4 +593,214 @@ void QBG::Optimizer::optimize(std::string invector, std::string ofile, std::stri } #endif } + +size_t QBG::Optimizer::extractScaleAndOffset(const std::string indexPath, float clippingRate, + int32_t nOfObjects, bool verbose) { + NGT::StdOstreamRedirector redirector(!verbose); + redirector.begin(); + + int32_t n = 0; + try { + QBG::Index index(indexPath); + auto &quantizer = index.getQuantizer(); + auto dim = quantizer.property.dimension; + if (clippingRate < 0.0) { + clippingRate = quantizer.property.scalarQuantizationClippingRate; + } + if (nOfObjects < 0) { + nOfObjects = quantizer.property.scalarQuantizationNoOfSamples; + } + if (clippingRate < 0.0) { + std::stringstream msg; + msg << "Clipping rate is invalid. " << clippingRate; + NGTThrowException(msg); + } + if (nOfObjects < 0) { + return 0; + } + auto &objectList = quantizer.objectList; + if (objectList.size() <= 1) { + NGTThrowException("optimize: No objects"); + } + std::priority_queue min; + std::priority_queue, std::greater> max; + if (nOfObjects == 0 || nOfObjects >= static_cast(objectList.size())) { + nOfObjects = objectList.size() - 1; + } + auto cutsize = static_cast(nOfObjects * dim) * clippingRate; + cutsize = cutsize == 0 ? 1 : cutsize; + for (size_t id = 1; id < objectList.size(); id++) { + if (n == nOfObjects) break; + auto p = static_cast(nOfObjects - n) / (objectList.size() - id); + double random = (rand() + 1.0) / (RAND_MAX + 2.0); + if (random > p) continue; + std::vector object; + objectList.get(id, object, &quantizer.globalCodebookIndex.getObjectSpace()); + if (!quantizer.rotation.empty()) { + quantizer.rotation.mul(object); + } + for (auto &v : object) { + if (max.size() < cutsize) { + max.push(v); + } else if (max.top() <= v) { + max.push(v); + max.pop(); + } + if (min.size() < cutsize) { + min.push(v); + } else if (min.top() >= v) { + min.push(v); + min.pop(); + } + } + n++; + } + index.setQuantizationFromMaxMin(max.top(), min.top()); + index.saveProperty(); + } catch(NGT::Exception &err) { + redirector.end(); + throw err; + } + redirector.end(); + return n; +} + +size_t QBG::Optimizer::convertObjectsFromInnerProductToL2(const std::string indexPath, size_t nOfObjects, bool verbose) { + NGT::StdOstreamRedirector redirector(!verbose); + redirector.begin(); + NGT::Timer timer; + timer.start(); + + size_t count = 0; + try { + QBG::Index index(indexPath); + auto &quantizer = index.getQuantizer(); + auto dim = quantizer.property.genuineDimension; + auto &objectList = quantizer.objectList; + if (objectList.size() <= 1) { + NGTThrowException("optimize: No objects"); + } + if (dim != objectList.genuineDimension) { + std::stringstream msg; + msg << "Inner fatal error! The dimensions are inconsitent. " + << dim << ":" << objectList.genuineDimension; + NGTThrowException(msg); + } + std::priority_queue min; + std::priority_queue, std::greater> max; + if (nOfObjects == 0 || nOfObjects >= objectList.size()) { + nOfObjects = objectList.size() - 1; + } + std::vector mags(objectList.size()); + + float maxMag; + if (quantizer.property.maxMagnitude > 0.0) { + maxMag = quantizer.property.maxMagnitude; + } else { + maxMag = 0.0; + for (size_t id = 1; id < objectList.size(); id++) { + if (count == nOfObjects) break; + auto p = static_cast(nOfObjects - count) / (objectList.size() - id); + double random = (rand() + 1.0) / (RAND_MAX + 2.0); + if (random > p) { + mags[id] = -1.0; + continue; + } + std::vector object; + objectList.get(id, object, &quantizer.globalCodebookIndex.getObjectSpace()); + float mag = 0.0; + for (size_t i = 0; i < dim - 1; i++) { + mag += object[i] * object[i]; + } + if (mag > maxMag) { + maxMag = mag; + } + mags[id] = mag; + count++; + if (count % 2000000 == 0) { + timer.stop(); + std::cerr << "processed " << static_cast(count) / 1000000.0 << "M objects." + << " maxMag=" << maxMag << " time=" << timer << std::endl; + timer.restart(); + } + } + index.setMaxMagnitude(maxMag); + index.saveProperty(); + } + for (size_t id = 1; id < objectList.size(); id++) { + std::vector object; + objectList.get(id, object, &quantizer.globalCodebookIndex.getObjectSpace()); + float mag = mags[id]; + if (mag < 0.0) { + mag = 0.0; + for (size_t i = 0; i < dim - 1; i++) { + mag += object[i] * object[i]; + } + } + object[dim - 1] = sqrt(maxMag - mag); + object.resize(dim); + objectList.put(id, object, &quantizer.globalCodebookIndex.getObjectSpace()); + if (id % 2000000 == 0) { + timer.stop(); + std::cerr << "processed " << static_cast(id) / 1000000.0 << "M objects." + << " maxMag=" << maxMag << " time=" << timer << std::endl; + timer.restart(); + } + } + } catch(NGT::Exception &err) { + redirector.end(); + throw err; + } + redirector.end(); + return count; +} + +size_t QBG::Optimizer::normalizeObjectsForCosine(const std::string indexPath, size_t nOfObjects, bool verbose) { + NGT::StdOstreamRedirector redirector(!verbose); + redirector.begin(); + NGT::Timer timer; + timer.start(); + + size_t count = 0; + try { + QBG::Index index(indexPath); + auto &quantizer = index.getQuantizer(); + auto dim = quantizer.property.genuineDimension; + auto &objectList = quantizer.objectList; + if (objectList.size() <= 1) { + NGTThrowException("optimize: No objects"); + } + if (dim != objectList.genuineDimension) { + std::stringstream msg; + msg << "Inner fatal error! The dimensions are inconsitent. " + << dim << ":" << objectList.genuineDimension; + NGTThrowException(msg); + } + for (size_t id = 1; id < objectList.size(); id++) { + std::vector object; + objectList.get(id, object, &quantizer.globalCodebookIndex.getObjectSpace()); + object.resize(dim); + float mag = 0.0; + for (size_t i = 0; i < dim; i++) { + mag += object[i] * object[i]; + } + mag = sqrt(mag); + for (size_t i = 0; i < dim; i++) { + object[i] /= mag; + } + objectList.put(id, object, &quantizer.globalCodebookIndex.getObjectSpace()); + if (id % 2000000 == 0) { + timer.stop(); + std::cerr << "processed " << static_cast(id) / 1000000.0 << "M objects." << std::endl; + timer.restart(); + } + } + } catch(NGT::Exception &err) { + redirector.end(); + throw err; + } + redirector.end(); + return count; +} + #endif diff --git a/lib/NGT/NGTQ/Optimizer.h b/lib/NGT/NGTQ/Optimizer.h index 51abad9..8d7e587 100644 --- a/lib/NGT/NGTQ/Optimizer.h +++ b/lib/NGT/NGTQ/Optimizer.h @@ -337,18 +337,18 @@ namespace QBG { timer.start(); Matrix optr; optimizeRotation( - iteration, - vectors, - xt, - rs[ri], - optr, + iteration, + vectors, + xt, + rs[ri], + optr, localClusters[ri], clusteringType, imode, numberOfClusters, - numberOfSubvectors, - subvectorSize, - clusterIteration, + numberOfSubvectors, + subvectorSize, + clusterIteration, clusterSizeConstraint, clusterSizeConstraintCoefficient, convergenceLimitTimes, @@ -366,6 +366,9 @@ namespace QBG { void optimizeWithinIndex(std::string indexPath); void optimize(std::string invector, std::string ofile, std::string global); void optimize(vector> &vectors, vector> &globalCentroid, Matrix &r, vector> &localClusters, vector &errors); + static size_t extractScaleAndOffset(const std::string indexPath, float clippingRate, int32_t nOfObjects, bool verbose); + static size_t convertObjectsFromInnerProductToL2(const std::string indexPath, size_t nOfObjects, bool verbose); + static size_t normalizeObjectsForCosine(const std::string indexPath, size_t nOfObjects, bool verbose); #endif NGT::Timer timelimitTimer; size_t subvectorSize; @@ -373,10 +376,10 @@ namespace QBG { NGT::Clustering::ClusteringType clusteringType; NGT::Clustering::InitializationMode initMode; size_t iteration; - size_t clusterIteration; + size_t clusterIteration; bool clusterSizeConstraint; float clusterSizeConstraintCoefficient; - size_t convergenceLimitTimes; + size_t convergenceLimitTimes; size_t numberOfObjects; size_t numberOfClusters; size_t numberOfSubvectors; diff --git a/lib/NGT/NGTQ/QbgCli.cpp b/lib/NGT/NGTQ/QbgCli.cpp index d737275..a2239e9 100644 --- a/lib/NGT/NGTQ/QbgCli.cpp +++ b/lib/NGT/NGTQ/QbgCli.cpp @@ -37,12 +37,12 @@ class QbgCliBuildParameters : public QBG::BuildParameters { } void getCreationParameters() { - char objectType = args.getChar("o", 'f'); - char distanceType = args.getChar("D", '2'); creation.numOfObjects = args.getl("n", 0); - creation.threadSize = args.getl("p", 24); creation.dimension = args.getl("d", 0); + auto clusterDataType = args.getString("C", "-"); + creation.scalarQuantizationClippingRate = args.getf("r", 0.0); + creation.scalarQuantizationNoOfSamples = args.getl("V", 0); #ifdef NGTQ_QBG creation.numOfLocalClusters = args.getl("c", 16); #else @@ -94,39 +94,84 @@ class QbgCliBuildParameters : public QBG::BuildParameters { creation.localInsertionRadiusCoefficient = creation.globalInsertionRadiusCoefficient; - switch (objectType) { - case 'f': creation.dataType = NGTQ::DataTypeFloat; break; + transform(clusterDataType.begin(), clusterDataType.end(), clusterDataType.begin(), ::tolower); + if (clusterDataType == "-" || clusterDataType == "pq4") { + creation.localClusterDataType = NGTQ::ClusterDataTypePQ4; + } else if (clusterDataType == "sqsu8" || clusterDataType == "sq8") { + creation.localClusterDataType = NGTQ::ClusterDataTypeSQSU8; + } else if (clusterDataType == "nq") { + creation.localClusterDataType = NGTQ::ClusterDataTypeNQ; + } + + char objectType = args.getChar("o", 'f'); + { + switch (objectType) { + case 'f': creation.dataType = NGTQ::DataTypeFloat; break; #ifdef NGT_HALF_FLOAT - case 'h': creation.dataType = NGTQ::DataTypeFloat16; break; + case 'h': creation.dataType = NGTQ::DataTypeFloat16; break; #endif - case 'c': creation.dataType = NGTQ::DataTypeUint8; break; - default: - std::stringstream msg; - msg << "Command::CreateParameters: Error: Invalid object type. " << objectType; - NGTThrowException(msg); + case 'c': creation.dataType = NGTQ::DataTypeUint8; break; + default: + std::stringstream msg; + msg << "Command::CreateParameters: Error: Invalid object type. " << objectType; + NGTThrowException(msg); + } + } + { + std::string globalObjectType = args.getString("K", "-"); + std::string objectType; + if (globalObjectType == "-") { + if (clusterDataType == "-" || clusterDataType == "pq4") { + objectType = "f"; + } else { + objectType = clusterDataType; + } + } else { + objectType = globalObjectType; + } + if (objectType == "f") { + creation.globalObjectType = NGT::ObjectSpace::ObjectType::Float; + } else if (objectType == "h") { + creation.globalObjectType = NGT::ObjectSpace::ObjectType::Float16; + } else if (objectType == "sqsu8" || objectType == "sq8") { + creation.globalObjectType = NGT::ObjectSpace::ObjectType::Qsuint8; + } else { + std::stringstream msg; + msg << "Command::CreateParameters: Error: Invalid global object type. " << objectType; + NGTThrowException(msg); + } } - switch (distanceType) { - case '2': creation.distanceType = NGTQ::DistanceType::DistanceTypeL2; break; - case '1': creation.distanceType = NGTQ::DistanceType::DistanceTypeL1; break; - case 'a': creation.distanceType = NGTQ::DistanceType::DistanceTypeAngle; break; - case 'C': creation.distanceType = NGTQ::DistanceType::DistanceTypeNormalizedCosine; break; - case 'E': creation.distanceType = NGTQ::DistanceType::DistanceTypeL2; break; -#ifdef NGT_INNER_PRODUCT - case 'i': creation.distanceType = NGTQ::DistanceType::DistanceTypeInnerProduct; break; -#endif - default: - std::stringstream msg; - msg << "Command::CreateParameters: Error: Invalid distance type. " << distanceType; - NGTThrowException(msg); + { + char distanceType = args.getChar("D", '2'); + switch (distanceType) { + case '2': creation.distanceType = NGTQ::DistanceType::DistanceTypeL2; break; + case '1': creation.distanceType = NGTQ::DistanceType::DistanceTypeL1; break; + case 'a': creation.distanceType = NGTQ::DistanceType::DistanceTypeAngle; break; + case 'C': creation.distanceType = NGTQ::DistanceType::DistanceTypeNormalizedCosine; break; + case 'E': creation.distanceType = NGTQ::DistanceType::DistanceTypeL2; break; + case 'i': creation.distanceType = NGTQ::DistanceType::DistanceTypeInnerProduct; break; + default: + std::stringstream msg; + msg << "Command::CreateParameters: Error: Invalid distance type. " << distanceType; + NGTThrowException(msg); + } } #ifdef NGTQ_QBG creation.genuineDimension = creation.dimension; - creation.dimension = args.getl("P", creation.genuineDimension); + creation.dimension = args.getl("P", 0); + if (creation.dimension == 0) { + creation.dimension = ((creation.genuineDimension + 15) / 16) * 16; + } creation.dimensionOfSubvector = args.getl("Q", 0); + if (creation.numOfSubvectors == 0 && + (creation.localClusterDataType == NGTQ::ClusterDataTypeSQSU8 + )) { + creation.numOfSubvectors = creation.dimension; + } { - char objectType = args.getChar("O", 'f'); - switch (objectType) { + char oType = args.getChar("O", objectType); + switch (oType) { case 'f': creation.genuineDataType = ObjectFile::DataTypeFloat; break; #ifdef NGT_HALF_FLOAT case 'h': creation.genuineDataType = ObjectFile::DataTypeFloat16; break; @@ -134,14 +179,24 @@ class QbgCliBuildParameters : public QBG::BuildParameters { case 'c': creation.genuineDataType = ObjectFile::DataTypeUint8; break; default: std::stringstream msg; - msg << "Command::CreateParameters: Error: Invalid genuine object type. " << objectType; + msg << "Command::CreateParameters: Error: Invalid genuine data type. " << objectType; NGTThrowException(msg); } } #endif { - char objectListOnMemory = args.getChar("R", 'f'); - creation.objectListOnMemory = (objectListOnMemory == 't' || objectListOnMemory == 'T'); + char refinementDataType = args.getChar("R", '-'); + switch (refinementDataType) { + case 'f': creation.refinementDataType = NGTQ::DataTypeFloat; break; +#ifdef NGT_HALF_FLOAT + case 'h': creation.refinementDataType = NGTQ::DataTypeFloat16; break; +#endif + case '-': creation.refinementDataType = NGTQ::DataTypeNone; break; + default: + std::stringstream msg; + msg << "Command::CreateParameters: Error: Invalid refinement data type. " << refinementDataType; + NGTThrowException(msg); + } } } @@ -364,7 +419,6 @@ class QbgCliBuildParameters : public QBG::BuildParameters { NGT::Args &args; }; - class SearchParameters : public NGT::Command::SearchParameters { public: SearchParameters(NGT::Args &args): NGT::Command::SearchParameters(args, "0.02") { @@ -394,9 +448,10 @@ QBG::CLI::buildQG(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "An index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } size_t phase = args.getl("p", 0); @@ -419,7 +474,7 @@ QBG::CLI::buildQG(NGT::Args &args) std::cerr << "optimizing..." << std::endl; optimizer.optimize(qgPath); } - bool verbose = false; + auto verbose = buildParameters.optimization.verbose; if (phase == 0 || phase == 2) { std::cerr << "building the inverted index..." << std::endl; QBG::Index::buildNGTQ(qgPath, verbose); @@ -557,9 +612,10 @@ QBG::CLI::searchQG(NGT::Args &args) { try { indexPath = args.get("#1"); } catch (...) { - cerr << "An index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl;; + msg << usage << endl; + NGTThrowException(msg); } SearchParameters searchParameters(args); @@ -580,14 +636,20 @@ QBG::CLI::searchQG(NGT::Args &args) { try { ::searchQG(index, searchParameters, std::cout); } catch (NGT::Exception &err) { - cerr << "qbg: Error " << err.what() << endl; - cerr << usage << endl; + std::stringstream msg; + msg << "qbg: Error " << err.what() << std::endl; + msg << usage << endl; + NGTThrowException(msg); } catch (std::exception &err) { - cerr << "qbg: Error " << err.what() << endl; - cerr << usage << endl; + std::stringstream msg; + msg << "qbg: Error " << err.what() << std::endl; + msg << usage << endl; + NGTThrowException(msg); } catch (...) { - cerr << "qbg: Error" << endl; - cerr << usage << endl; + std::stringstream msg; + msg << "qbg: Error "; + msg << usage << endl; + NGTThrowException(msg); } } @@ -605,12 +667,14 @@ QBG::CLI::createQG(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "An index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << endl; + NGTThrowException(msg); } std::cerr << "creating..." << std::endl; NGTQG::Index::create(indexPath, buildParameters); + std::cerr << "appending..." << std::endl; NGTQG::Index::append(indexPath, buildParameters); } @@ -622,14 +686,16 @@ QBG::CLI::appendQG(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "An index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << endl; + NGTThrowException(msg); } QBG::Index::appendFromObjectRepository(indexPath, indexPath + "/qg", false); } + void QBG::CLI::info(NGT::Args &args) { @@ -639,9 +705,10 @@ QBG::CLI::info(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << endl; + NGTThrowException(msg); } try { @@ -663,8 +730,10 @@ QBG::CLI::info(NGT::Args &args) NGTQG::Index index(indexPath, 128, readOnly); std::cout << "The index type: QG" << std::endl; } catch (...) { - cerr << "qbg: The specified index is neither QBG nor QG." << std::endl; - cerr << usage << endl; + std::stringstream msg; + msg << "qbg: The specified index is neither QBG nor QG." << std::endl; + msg << usage << endl; + NGTThrowException(msg); } } @@ -674,9 +743,11 @@ void QBG::CLI::create(NGT::Args &args) { const string usage = "Usage: qbg create " - " -d dimension [-o object-type (f:float|c:unsigned char)] [-D distance-function] [-n data-size] " + " -d dimension [-o object-type (object-list-data-type)] " + "[-O genuine-data-type (=object-type)] [-C cluster-data-type] [-K graph-data-type] " + "[-D distance-function] [-n data-size] " "[-p #-of-thread] [-R global-codebook-range] [-r local-codebook-range] " - "[-C global-codebook-size-limit] [-c local-codebook-size-limit] [-N local-division-no] " + "[-c local-codebook-size-limit] [-N local-division-no] " "[-T single-local-centroid (t|f)] [-e epsilon] [-i index-type (t:Tree|g:Graph)] " "[-M global-centroid-creation-mode (d|s)] [-L local-centroid-creation-mode (d|k|s)] " "[-s local-sample-coefficient] " @@ -695,9 +766,10 @@ QBG::CLI::create(NGT::Args &args) cerr << "rotation is " << rotationPath << "." << endl; std::ifstream stream(rotationPath); if (!stream) { - std::cerr << "Cannot open the rotation. " << rotationPath << std::endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Cannot open the rotation. " << rotationPath << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } std::string line; while (getline(stream, line)) { @@ -720,8 +792,10 @@ QBG::CLI::create(NGT::Args &args) QBG::Index::create(indexPath, buildParameters, rotation, objectPath); } catch(NGT::Exception &err) { - std::cerr << err.what() << std::endl; - cerr << usage << endl; + std::stringstream msg; + msg << err.what() << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } } @@ -737,9 +811,10 @@ QBG::CLI::load(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - std::cerr << "Not specified the index." << std::endl; - std::cerr << usage << std::endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } std::cerr << "qbg: loading the specified blobs..." << std::endl; @@ -770,7 +845,6 @@ QBG::CLI::load(NGT::Args &args) void QBG::CLI::search(NGT::Args &args) { - const string usage = "Usage: qbg search [-i g|t|s] [-n result-size] [-e epsilon] [-m mode(r|l|c|a)] " "[-E edge-size] [-o output-mode] [-b result expansion(begin:end:[x]step)] " "index(input) query.tsv(input)"; @@ -779,18 +853,20 @@ QBG::CLI::search(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } string query; try { query = args.get("#2"); } catch (...) { - cerr << "Query is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Query is not specified" << endl; + msg << usage << std::endl; + NGTThrowException(msg); } bool verbose = args.getBool("v"); @@ -799,6 +875,22 @@ QBG::CLI::search(NGT::Args &args) float epsilon = 0.1; char searchMode = args.getChar("M", 'n'); + NGTQ::DataType refinementDataType = NGTQ::DataTypeAny; + { + char refinement = args.getChar("R", '-'); + switch (refinement) { + case 'f': refinementDataType = NGTQ::DataTypeFloat; break; +#ifdef NGT_HALF_FLOAT + case 'h': refinementDataType = NGTQ::DataTypeFloat16; break; +#endif + case 'x': refinementDataType = NGTQ::DataTypeNone; break; + case '-': refinementDataType = NGTQ::DataTypeAny; break; + default: + std::stringstream msg; + msg << "Command::CreateParameters: Error: Invalid refinement data type. " << refinement; + NGTThrowException(msg); + } + } if (args.getString("e", "none") == "-") { // linear search epsilon = FLT_MAX; @@ -860,11 +952,14 @@ QBG::CLI::search(NGT::Args &args) cerr << "parameter=" << beginOfParameter << "->" << endOfParameter << "," << stepOfParameter << endl; } - QBG::Index index(indexPath, true, verbose); + auto prebuilt = true; + QBG::Index index(indexPath, prebuilt, verbose, refinementDataType); std::cerr << "qbg::The index is open." << std::endl; std::cerr << " vmsize=" << NGT::Common::getProcessVmSizeStr() << std::endl; std::cerr << " peak vmsize=" << NGT::Common::getProcessVmPeakStr() << std::endl; - auto dimension = index.getQuantizer().globalCodebookIndex.getObjectSpace().getDimension(); + if (outputMode == 'e') { + std::cout << "# Beginning of Evaluation" << endl; + } try { for (size_t trial = 0; trial < nOfTrials; trial++) { ifstream is(query); @@ -872,7 +967,6 @@ QBG::CLI::search(NGT::Args &args) cerr << "Cannot open the specified file. " << query << endl; return; } - if (outputMode == 's') { cout << "# Beginning of Evaluation" << endl; } string line; double totalTime = 0; int queryCount = 0; @@ -884,7 +978,6 @@ QBG::CLI::search(NGT::Args &args) linestream >> value; queryVector.push_back(value); } - queryVector.resize(dimension); queryCount++; for (auto parameter = beginOfParameter; parameter <= endOfParameter; @@ -897,16 +990,11 @@ QBG::CLI::search(NGT::Args &args) searchContainer.setResults(&objects); auto re = resultExpansion; if (re < 0.0) re = parameter; - if (re >= 1.0) { - searchContainer.setSize(static_cast(size) * re); - searchContainer.setExactResultSize(size); - } else { - searchContainer.setSize(size); - searchContainer.setExactResultSize(0); - } + searchContainer.setRefinementExpansion(re); auto np = nOfProbes; if (np == 0) np = parameter; searchContainer.setNumOfProbes(np); + searchContainer.setSize(size); searchContainer.setEpsilon(epsilon); searchContainer.setBlobEpsilon(blobEpsilon); searchContainer.setEdgeSize(edgeSize); @@ -934,7 +1022,7 @@ QBG::CLI::search(NGT::Args &args) cout << "# Index Type=" << "----" << endl; cout << "# Size=" << size << endl; cout << "# Epsilon=" << epsilon << endl; - cout << "# Result expansion=" << re << endl; + cout << "# Refinement expansion=" << re << endl; cout << "# # of probes=" << np << endl; if (nOfProbes == 0) { cout << "# Factor=" << np << endl; @@ -943,19 +1031,34 @@ QBG::CLI::search(NGT::Args &args) } cout << "# Distance Computation=" << index.getQuantizer().distanceComputationCount << endl; cout << "# Query Time (msec)=" << timer.time * 1000.0 << endl; + } else if (outputMode == 't' || outputMode =='T') { + cout << queryCount << "\t"; } else { cout << "Query No." << queryCount << endl; cout << "Rank\tIN-ID\tID\tDistance" << endl; } - for (size_t i = 0; i < objects.size(); i++) { - cout << i + 1 << "\t" << objects[i].id << "\t"; - cout << objects[i].distance << endl; + if (outputMode == 't' || outputMode =='T') { + for (size_t i = 0; i < objects.size(); i++) { + cout << objects[i].id; + if (outputMode == 'T') { + cout << "\t" << objects[i].distance; + } + if (i + 1 != objects.size()) { + std::cout << "\t"; + } + } + std::cout << std::endl; + } else { + for (size_t i = 0; i < objects.size(); i++) { + cout << i + 1 << "\t" << objects[i].id << "\t"; + cout << objects[i].distance << endl; + } } - + if (outputMode == 'e' || outputMode == 'E') { cout << "# End of Search" << endl; - } else { + } else if (!(outputMode == 't' || outputMode =='T')) { cout << "Query Time= " << timer.time << " (sec), " << timer.time * 1000.0 << " (msec)" << endl; } } @@ -968,18 +1071,22 @@ QBG::CLI::search(NGT::Args &args) cout << "# Average Query Time (msec)=" << queryTimes.back() << endl; cout << "# Number of queries=" << queryCount << endl; cout << "# End of Evaluation" << endl; - } else { + } else if (!(outputMode == 't' || outputMode =='T')) { cout << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " << totalTime * 1000.0 / (double)queryCount << " (msec), (" << totalTime << "/" << queryCount << ")" << endl; } } } catch (NGT::Exception &err) { - cerr << "Error " << err.what() << endl; - cerr << usage << endl; + std::stringstream msg; + msg << "Error " << err.what() << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } catch (...) { - cerr << "Error" << endl; - cerr << usage << endl; + std::stringstream msg; + msg << "Error" << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } if (outputMode == 'e' || outputMode == 'E') { if (nOfTrials >= 1) { @@ -993,6 +1100,227 @@ QBG::CLI::search(NGT::Args &args) index.close(); } +void +QBG::CLI::batchSearch(NGT::Args &args) +{ + const string usage = "Usage: qbg search [-i g|t|s] [-n result-size] [-e epsilon] [-m mode(r|l|c|a)] " + "[-E edge-size] [-o output-mode] [-b result expansion(begin:end:[x]step)] " + "index(input) query.tsv(input)"; + args.parse("v"); + string indexPath; + try { + indexPath = args.get("#1"); + } catch (...) { + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); + } + + string query; + try { + query = args.get("#2"); + } catch (...) { + std::stringstream msg; + msg << "Query is not specified" << endl; + msg << usage << std::endl; + NGTThrowException(msg); + } + + bool verbose = args.getBool("v"); + size_t size = args.getl("n", 20); + char outputMode = args.getChar("o", '-'); + float epsilon = 0.1; + + NGTQ::DataType refinementDataType = NGTQ::DataTypeAny; + { + char refinement = args.getChar("R", '-'); + switch (refinement) { + case 'f': refinementDataType = NGTQ::DataTypeFloat; break; +#ifdef NGT_HALF_FLOAT + case 'h': refinementDataType = NGTQ::DataTypeFloat16; break; +#endif + case 'x': refinementDataType = NGTQ::DataTypeNone; break; + case '-': refinementDataType = NGTQ::DataTypeAny; break; + default: + std::stringstream msg; + msg << "Command::CreateParameters: Error: Invalid refinement data type. " << refinement; + NGTThrowException(msg); + } + } + if (args.getString("e", "none") == "-") { + // linear search + epsilon = FLT_MAX; + } else { + epsilon = args.getf("e", 0.1); + } + float blobEpsilon = args.getf("B", 0.05); + size_t edgeSize = args.getl("E", 0); + float cutback = args.getf("C", 0.0); + size_t explorationSize = args.getf("N", 256); + size_t nOfProbes = 0; + float resultExpansion = -1; + size_t nOfTrials = args.getl("T", 1); + if (nOfTrials != 1) { + std::cerr << "# of trials=" << nOfTrials << std::endl; + } + std::vector queryTimes; + + float beginOfParameter, endOfParameter, stepOfParameter; + //-/bool mulStep = false; + { + beginOfParameter = 0.0; + endOfParameter = 0.0; + stepOfParameter = 1; + vector tokens; + if (args.getString("p", "-").find_first_of(':') == std::string::npos) { + resultExpansion = args.getf("p", 0.0); + } + if (args.getString("P", "-").find_first_of(':') == std::string::npos) { + nOfProbes = args.getl("P", 10); + } + if (resultExpansion < 0 && nOfProbes == 0) { + std::cerr << "Cannot specify both -p and -P as a fluctuating value. -P is prioritized." << std::endl; + NGT::Common::tokenize(args.getString("p", "-"), tokens, ":"); + resultExpansion = NGT::Common::strtod(tokens[0]); + tokens.clear(); + } + if (resultExpansion < 0) { + NGT::Common::tokenize(args.getString("p", "-"), tokens, ":"); + } else if (nOfProbes == 0) { + NGT::Common::tokenize(args.getString("P", "-"), tokens, ":"); + } + if (tokens.size() >= 2) { + beginOfParameter = NGT::Common::strtod(tokens[0]); + endOfParameter = beginOfParameter; + if (tokens.size() >= 2) { endOfParameter = NGT::Common::strtod(tokens[1]); } + if (tokens.size() >= 3) { + if (tokens[2][0] == 'x') { + //-/mulStep = true; + stepOfParameter = NGT::Common::strtod(tokens[2].substr(1)); + } else { + stepOfParameter = NGT::Common::strtod(tokens[2]); + } + } + } + } + if (debugLevel >= 1) { + cerr << "size=" << size << endl; + cerr << "parameter=" << beginOfParameter << "->" << endOfParameter << "," << stepOfParameter << endl; + } + + QBG::Index index(indexPath, true, verbose, refinementDataType); + std::cerr << "qbg::The index is open." << std::endl; + std::cerr << " vmsize=" << NGT::Common::getProcessVmSizeStr() << std::endl; + std::cerr << " peak vmsize=" << NGT::Common::getProcessVmPeakStr() << std::endl; + if (outputMode == 'e') { + std::cout << "# Beginning of Evaluation" << endl; + } + try { + for (size_t trial = 0; trial < nOfTrials; trial++) { + auto pseudoDimension = index.getQuantizer().property.dimension; + std::ifstream is(query); + if (!is) { + std::cerr << "Cannot open the specified file. " << query << std::endl; + return; + } + string line; + std::vector> queries; + while(getline(is, line)) { + std::stringstream linestream(line); + { + vector queryVector; + while (!linestream.eof()) { + float value; + linestream >> value; + queryVector.emplace_back(value); + } + queryVector.resize(pseudoDimension); + queries.emplace_back(queryVector); + } + } + //auto qs(new float[queries.size() * pseudoDimension]); + // 上記のようにautoで書くとメモリリークが発生する + std::unique_ptr qs(new float[queries.size() * pseudoDimension]); + for (size_t i = 0; i < queries.size(); i++) { + memcpy(&qs[i * pseudoDimension], queries[i].data(), pseudoDimension * sizeof(float)); + } + std::cerr << "# of queries=" << queries.size() << std::endl; + QBG::BatchSearchContainer searchContainer; + searchContainer.setObjectVectors(&qs[0], queries.size(), pseudoDimension); + auto re = resultExpansion; + //if (re < 0.0) re = parameter; + if (re < 0.0) abort(); + searchContainer.setRefinementExpansion(re); + auto np = nOfProbes; + //if (np == 0) np = parameter; + if (np == 0) abort(); + searchContainer.setNumOfProbes(np); + searchContainer.setEpsilon(epsilon); + searchContainer.setBlobEpsilon(blobEpsilon); + searchContainer.setEdgeSize(edgeSize); + searchContainer.setCutback(cutback); + searchContainer.setGraphExplorationSize(explorationSize); + index.searchInTwoSteps(searchContainer); + + auto &result = searchContainer.getBatchResult(); + for (auto it = result.begin(); it != result.end(); ++it) { + if (outputMode == 't' || outputMode == 'T') { + auto no = distance(result.begin(), it) + 1; + std::cout << no << "\t"; + for (auto r = (*it).begin(); r != (*it).end(); ++r) { + std::cout << (*r).id; + if (outputMode == 'T') { + std::cout << "\t" << (*r).distance; + } + if (r + 1 != (*it).end()) { + std::cout << "\t"; + } + } + std::cout << std::endl;; + } else { + auto no = distance(result.begin(), it) + 1; + if (outputMode == 'e') { + std::cout << "# Query No.=" << no << endl; + std::cout << "# Epsilon=" << epsilon << endl; + } else { + std::cout << "Query No." << no << std::endl; + std::cout << "Rank\tIN-ID\tDistance" << std::endl; + } + for (auto r = (*it).begin(); r != (*it).end(); ++r) { + auto rank = distance((*it).begin(), r); + std::cout << rank + 1 << "\t" << (*r).id << "\t"; + std::cout << (*r).distance << std::endl; + } + if (outputMode == 'e') { + std::cout << "# End of Search" << endl; + } + } + if (outputMode == 'e') { + std::cout << "# End of Query" << endl; + } + } + } + } catch (NGT::Exception &err) { + std::stringstream msg; + msg << "Error " << err.what() << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); + } catch (...) { + std::stringstream msg; + msg << "Error" << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); + } + if (outputMode == 'e' || outputMode == 'E') { + if (nOfTrials >= 1) { + } + std::cout << "# End of Evaluation" << endl; + std::cout << "# vmsize=" << NGT::Common::getProcessVmSizeStr() << std::endl; + std::cout << "# peak vmsize=" << NGT::Common::getProcessVmPeakStr() << std::endl; + } + index.close(); +} void QBG::CLI::append(NGT::Args &args) @@ -1003,16 +1331,16 @@ QBG::CLI::append(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } string data; try { data = args.get("#2"); } catch (...) { - cerr << usage << endl; - cerr << "Data is not specified." << endl; + std::cerr << "Data is not specified." << std::endl; } size_t dataSize = args.getl("n", 0); @@ -1047,29 +1375,30 @@ QBG::CLI::append(NGT::Args &args) void QBG::CLI::insert(NGT::Args &args) { - const string usage = "Usage: qbg append [-n data-size] [-m b|e] [-v] index(output) data.tsv(input)"; + const string usage = "Usage: qbg insert [-n data-size] [-m b|e] [-v] index(output) data.tsv(input)"; args.parse("v"); string indexPath; try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } string data; try { data = args.get("#2"); } catch (...) { - cerr << usage << endl; - cerr << "Data is not specified." << endl; + std::cerr << "Data is not specified." << std::endl; } std::ifstream stream(data); if (!stream) { - std::cerr << "Cannot open the data file. " << data << std::endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Cannot open the data file. " << data << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } bool verbose = args.getBool("v"); @@ -1108,16 +1437,19 @@ QBG::CLI::remove(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } uint32_t ids; try { ids = args.getl("#2", 0); } catch (...) { - cerr << usage << endl; - cerr << "Data is not specified." << endl; + std::stringstream msg; + msg << "Data is not specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } auto verbose = args.getBool("v"); @@ -1133,6 +1465,56 @@ QBG::CLI::remove(NGT::Args &args) } +void +QBG::CLI::expandBlob(NGT::Args &args) +{ + const string usage = "Usage: qbg expand-blob index [centroids-file]"; + args.parse("v"); + std::string indexPath; + try { + indexPath = args.get("#1"); + } catch (...) { + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); + } + std::string clusterCentroidsPath; + try { + clusterCentroidsPath = args.get("#2"); + } catch (...) {} + + NGTQ::DataType refinementDataType = NGTQ::DataTypeAny; + { + char dataType = args.getChar("R", '-'); + switch (dataType) { + case 'f': refinementDataType = NGTQ::DataTypeFloat; break; +#ifdef NGT_HALF_FLOAT + case 'h': refinementDataType = NGTQ::DataTypeFloat16; break; +#endif + case 'x': refinementDataType = NGTQ::DataTypeNone; break; + case '-': refinementDataType = NGTQ::DataTypeAny; break; + default: + std::stringstream msg; + msg << "Command::CreateParameters: Error: Invalid refinement data type. " << dataType; + NGTThrowException(msg); + } + } + + auto verbose = args.getBool("v"); + + NGT::SearchContainer ngtSearchContainer; + ngtSearchContainer.setSize(50); + QBG::SearchContainer qbgSearchContainer; + qbgSearchContainer.setSize(args.getl("n", 20)); + qbgSearchContainer.setBlobEpsilon(args.getl("b", 0.1)); + float rate = args.getf("r", -1.0); + + QBG::Index::expandBlob(indexPath, clusterCentroidsPath, ngtSearchContainer, + qbgSearchContainer, rate, refinementDataType, verbose); + +} + void QBG::CLI::buildIndex(NGT::Args &args) @@ -1142,9 +1524,10 @@ QBG::CLI::buildIndex(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "An index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } char mode = args.getChar("m", '-'); @@ -1154,7 +1537,7 @@ QBG::CLI::buildIndex(NGT::Args &args) std::vector> quantizerCodebook; std::vector codebookIndex; - std::vector objectIndex; + std::vector> objectIndex; if (mode == 'q' || mode == '-') { { @@ -1167,9 +1550,10 @@ QBG::CLI::buildIndex(NGT::Args &args) } std::ifstream stream(codebookPath); if (!stream) { - std::cerr << "Cannot open the codebook. " << codebookPath << std::endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Cannot open the codebook. " << codebookPath << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } std::string line; while (getline(stream, line)) { @@ -1180,10 +1564,11 @@ QBG::CLI::buildIndex(NGT::Args &args) object.push_back(NGT::Common::strtof(token)); } if (!quantizerCodebook.empty() && quantizerCodebook[0].size() != object.size()) { - cerr << "The specified quantizer codebook is invalid. " << quantizerCodebook[0].size() - << ":" << object.size() << ":" << quantizerCodebook.size() << ":" << line << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "The specified quantizer codebook is invalid. " << quantizerCodebook[0].size() + << ":" << object.size() << ":" << quantizerCodebook.size() << ":" << line << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } if (!object.empty()) { quantizerCodebook.push_back(object); @@ -1204,18 +1589,20 @@ QBG::CLI::buildIndex(NGT::Args &args) cerr << "codebook index is " << codebookIndexPath << "." << endl; std::ifstream stream(codebookIndexPath); if (!stream) { - std::cerr << "Cannot open the codebook index. " << codebookIndexPath << std::endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "Cannot open the codebook index. " << codebookIndexPath << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } std::string line; while (getline(stream, line)) { std::vector tokens; NGT::Common::tokenize(line, tokens, " \t"); if (tokens.size() != 1) { - cerr << "The specified codebook index is invalid. " << line << std::endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "The specified codebook index is invalid. " << line << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } codebookIndex.push_back(NGT::Common::strtol(tokens[0])); } @@ -1231,25 +1618,42 @@ QBG::CLI::buildIndex(NGT::Args &args) } catch (...) { objectIndexPath = indexPath + "/ws/kmeans-cluster_index.tsv"; } - std::ifstream stream(objectIndexPath); - if (!stream) { - std::cerr << "Cannot open the codebook index. " << objectIndexPath << std::endl; - cerr << usage << endl; - return; + { + std::ifstream stream(objectIndexPath); + if (!stream) { + std::stringstream msg; + msg << "Cannot open the codebook index. " << objectIndexPath; + NGTThrowException(msg); + } + size_t nOfObjs = 0; + std::string line; + while (getline(stream, line)) nOfObjs++; + objectIndex.resize(nOfObjs); } - std::string line; - while (getline(stream, line)) { - std::vector tokens; - NGT::Common::tokenize(line, tokens, " \t"); - std::vector object; - if (tokens.size() != 1) { - cerr << "The specified codebook index is invalid. " << line << std::endl; - cerr << usage << endl; - return; + { + std::ifstream stream(objectIndexPath); + if (!stream) { + std::stringstream msg; + msg << "Cannot open the codebook index. " << objectIndexPath << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); + } + { + std::string line; + size_t idx = 0; + while (getline(stream, line)) { + std::vector tokens; + NGT::Common::tokenize(line, tokens, " \t"); + if (tokens.size() > 0) { + objectIndex[idx].reserve(tokens.size()); + for (auto &token : tokens) { + objectIndex[idx].emplace_back(NGT::Common::strtol(token)); + } + } + idx++; + } } - objectIndex.push_back(NGT::Common::strtol(tokens[0])); } - } catch (...) {} } @@ -1284,13 +1688,29 @@ QBG::CLI::build(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "An index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } - std::string phaseString = args.getString("p", "1-3"); + if (phaseString.substr(0, 10) == "assign-all") { + std::vector tokens; + NGT::Common::tokenize(phaseString, tokens, ":"); + int64_t lowerBoundOfObjects = 0; + size_t noOfNearestNeighbors = 1; + if (tokens.size() >= 2) { + lowerBoundOfObjects = NGT::Common::strtod(tokens[1]); + } + if (tokens.size() >= 3) { + noOfNearestNeighbors = NGT::Common::strtod(tokens[2]); + } + std::cerr << "qbg: assign-all" << std::endl; + HierarchicalKmeans hierarchicalKmeans(buildParameters); + hierarchicalKmeans.assignAll(indexPath, lowerBoundOfObjects, noOfNearestNeighbors); + return; + } bool phase[3]; if (phaseString.empty()) { phase[0] = phase[1] = phase[2] = true; @@ -1307,9 +1727,10 @@ QBG::CLI::build(NGT::Args &args) } if (tokens.size() >= 2) { endOfPhase = NGT::Common::strtod(tokens[1]) - 1;} if (tokens.size() >= 3 || tokens.size() == 0) { - cerr << "The specified phases are invalid! " << phaseString << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "The specified phases are invalid! " << phaseString << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } phase[0] = phase[1] = phase[2] = false; for (int p = beginOfPhase; p <= endOfPhase; p++) { @@ -1377,14 +1798,17 @@ QBG::CLI::rebuild(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "An index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } auto start = args.getl("s", 0); if (start == 0) { - std::cerr << "Start ID(-s) should be set. The ID is the smallest ID of the objects that are appended but not indexed." << std::endl; - std::cerr << usage << std::endl; + std::stringstream msg; + msg << "Start ID(-s) should be set. The ID is the smallest ID of the objects that are appended but not indexed." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } NGT::Timer timer; @@ -1416,9 +1840,10 @@ QBG::CLI::hierarchicalKmeans(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } std::string prefix; @@ -1459,9 +1884,10 @@ QBG::CLI::assign(NGT::Args &args) try { indexPath = args.get("#1"); } catch (...) { - cerr << "Any index is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } std::string queryPath; @@ -1469,9 +1895,10 @@ QBG::CLI::assign(NGT::Args &args) try { queryPath = args.get("#2"); } catch (...) { - cerr << "Any query is not specified" << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No query is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } auto epsilon = args.getf("e", 0.1); @@ -1538,9 +1965,10 @@ QBG::CLI::extract(NGT::Args &args) try { objectPath = args.get("#1"); } catch (...) { - std::cerr << "Object file is not specified." << std::endl; - std::cerr << usage << std::endl; - return; + std::stringstream msg; + msg << "Object file is not specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } std::ostream *os; @@ -1776,9 +2204,10 @@ QBG::CLI::optimize(NGT::Args &args) try { indexPath = args.get("#1"); } catch(...) { - cerr << "qbg: index is not specified." << endl; - cerr << usage << endl; - return; + std::stringstream msg; + msg << "No index is specified." << std::endl; + msg << usage << std::endl; + NGTThrowException(msg); } string invector; diff --git a/lib/NGT/NGTQ/QbgCli.h b/lib/NGT/NGTQ/QbgCli.h index 8603e4b..ff12cf5 100644 --- a/lib/NGT/NGTQ/QbgCli.h +++ b/lib/NGT/NGTQ/QbgCli.h @@ -32,8 +32,10 @@ namespace QBG { void append(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; void insert(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; void remove(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; + void expandBlob(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; void buildIndex(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; void hierarchicalKmeans(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; + void batchSearch(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; void search(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; void assign(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; void extract(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; @@ -53,8 +55,10 @@ namespace QBG { void append(NGT::Args &args); void insert(NGT::Args &args); void remove(NGT::Args &args); + void expandBlob(NGT::Args &args); void buildIndex(NGT::Args &args); void hierarchicalKmeans(NGT::Args &args); + void batchSearch(NGT::Args &args); void search(NGT::Args &args); void assign(NGT::Args &args); void extract(NGT::Args &args); @@ -89,55 +93,60 @@ namespace QBG { debugLevel = args.getl("X", 0); - try { - if (debugLevel >= 1) { - cerr << "ngt::command=" << command << endl; - } - if (command == "search") { - search(args); - } else if (command == "create") { - create(args); - } else if (command == "load") { - load(args); - } else if (command == "append") { - append(args); - } else if (command == "insert") { - insert(args); - } else if (command == "remove") { - remove(args); - } else if (command == "build-index") { - buildIndex(args); - } else if (command == "kmeans") { - hierarchicalKmeans(args); - } else if (command == "assign") { - assign(args); - } else if (command == "extract") { - extract(args); - } else if (command == "gt") { - gt(args); - } else if (command == "gt-range") { - gtRange(args); - } else if (command == "optimize") { - optimize(args); - } else if (command == "build") { - build(args); - } else if (command == "rebuild") { - rebuild(args); - } else if (command == "create-qg") { - createQG(args); - } else if (command == "build-qg") { - buildQG(args); - } else if (command == "append-qg") { - appendQG(args); - } else if (command == "search-qg") { - searchQG(args); - } else if (command == "info") { - info(args); - } else { - cerr << "qbg: Illegal command. " << command << endl; - } - } catch(NGT::Exception &err) { - cerr << "qbg: Error: " << err.what() << endl; + if (debugLevel >= 1) { + cerr << "ngt::command=" << command << endl; + } + if (command == "search") { + search(args); + } else if (command == "batch-search") { + batchSearch(args); + } else if (command == "create") { + create(args); + } else if (command == "load") { + load(args); + } else if (command == "append") { + append(args); + } else if (command == "insert") { + insert(args); + } else if (command == "remove") { + remove(args); + } else if (command == "expand-blob") { + expandBlob(args); + } else if (command == "build-index") { + buildIndex(args); + } else if (command == "kmeans") { + hierarchicalKmeans(args); + } else if (command == "assign") { + assign(args); + } else if (command == "extract") { + extract(args); + } else if (command == "gt") { + gt(args); + } else if (command == "gt-range") { + gtRange(args); + } else if (command == "optimize") { + optimize(args); + } else if (command == "build") { + build(args); + } else if (command == "rebuild") { + rebuild(args); + } else if (command == "create-qg") { + createQG(args); + } else if (command == "build-qg") { + buildQG(args); + } else if (command == "append-qg") { + appendQG(args); + } else if (command == "search-qg") { + searchQG(args); + } else if (command == "info") { + info(args); + } else if (command == "-h") { + help(); + } else { + help(); + std::stringstream msg; + msg << "qbg: Illegal command. " << command << endl; + NGTThrowException(msg); } } diff --git a/lib/NGT/NGTQ/QuantizedBlobGraph.h b/lib/NGT/NGTQ/QuantizedBlobGraph.h index 6de5c5c..faba5e8 100644 --- a/lib/NGT/NGTQ/QuantizedBlobGraph.h +++ b/lib/NGT/NGTQ/QuantizedBlobGraph.h @@ -27,6 +27,7 @@ #include + namespace QBG { class CreationParameters { @@ -51,13 +52,17 @@ namespace QBG { localCentroidCreationMode = NGTQ::CentroidCreationModeStatic; localIDByteSize = 1; localClusteringSampleCoefficient = 10; - objectListOnMemory = false; + refinementDataType = NGTQ::DataTypeNone; + localClusterDataType = NGTQ::ClusterDataTypePQ4; + scalarQuantizationClippingRate = 0.01; + scalarQuantizationNoOfSamples = 0; globalEdgeSizeForCreation = 10; globalEdgeSizeForSearch = 40; globalIndexType = NGT::Property::GraphAndTree; globalInsertionRadiusCoefficient = 1.1; globalGraphType = NGT::NeighborhoodGraph::GraphTypeANNG; + globalObjectType = NGT::ObjectSpace::ObjectType::Float; localIndexType = NGT::Property::GraphAndTree; localInsertionRadiusCoefficient = 1.1; @@ -89,12 +94,17 @@ namespace QBG { property.localCentroidCreationMode = creation.localCentroidCreationMode; property.localIDByteSize = creation.localIDByteSize; property.localClusteringSampleCoefficient = creation.localClusteringSampleCoefficient; - property.objectListOnMemory = creation.objectListOnMemory; + property.localClusterDataType = creation.localClusterDataType; + property.scalarQuantizationClippingRate = creation.scalarQuantizationClippingRate; + property.scalarQuantizationNoOfSamples = creation.scalarQuantizationNoOfSamples; + property.refinementDataType = creation.refinementDataType; globalProperty.edgeSizeForCreation = creation.globalEdgeSizeForCreation; globalProperty.edgeSizeForSearch = creation.globalEdgeSizeForSearch; globalProperty.indexType = creation.globalIndexType; globalProperty.insertionRadiusCoefficient = creation.globalInsertionRadiusCoefficient; globalProperty.graphType = creation.globalGraphType; + globalProperty.objectType = creation.globalObjectType; + globalProperty.seedSize = 0; localProperty.indexType = creation.localIndexType; localProperty.insertionRadiusCoefficient = creation.localInsertionRadiusCoefficient; localProperty.graphType = creation.localGraphType; @@ -127,13 +137,17 @@ namespace QBG { NGTQ::CentroidCreationMode localCentroidCreationMode; size_t localIDByteSize; size_t localClusteringSampleCoefficient; - bool objectListOnMemory; - + NGTQ::DataType refinementDataType; + NGTQ::ClusterDataType localClusterDataType; + float scalarQuantizationClippingRate; + size_t scalarQuantizationNoOfSamples; + size_t globalEdgeSizeForCreation; size_t globalEdgeSizeForSearch; NGT::Property::IndexType globalIndexType; float globalInsertionRadiusCoefficient; NGT::Property::GraphType globalGraphType; + NGT::ObjectSpace::ObjectType globalObjectType; NGT::Property::IndexType localIndexType; float localInsertionRadiusCoefficient; @@ -226,10 +240,10 @@ namespace QBG { float timelimit; size_t iteration; - size_t clusterIteration; + size_t clusterIteration; bool clusterSizeConstraint; float clusterSizeConstraintCoefficient; - size_t convergenceLimitTimes; + size_t convergenceLimitTimes; size_t numOfObjects; size_t numOfClusters; size_t numOfSubvectors; @@ -275,15 +289,14 @@ namespace QBG { bool verbose; }; - class SearchContainer : public NGT::SearchContainer { public: SearchContainer(NGT::Object &q): NGT::SearchContainer(q), cutback(0.0), graphExplorationSize(50), exactResultSize(0), - blobExplorationCoefficient(0.0), numOfProbes(0) {} + blobExplorationCoefficient(1.0), numOfProbes(5), refinementExpansion(0.0) {} SearchContainer(): NGT::SearchContainer(*reinterpret_cast(0)), cutback(0.0), graphExplorationSize(50), exactResultSize(0), - blobExplorationCoefficient(0.0), numOfProbes(0) {} + blobExplorationCoefficient(1.0), numOfProbes(5), refinementExpansion(0.0) {} SearchContainer(SearchContainer &sc, NGT::Object &q): NGT::SearchContainer(q) { QBG::SearchContainer::operator=(sc); } @@ -294,6 +307,7 @@ namespace QBG { exactResultSize = sc.exactResultSize; blobExplorationCoefficient = sc.blobExplorationCoefficient; numOfProbes = sc.numOfProbes; + refinementExpansion = sc.refinementExpansion; objectVector = sc.objectVector; return *this; } @@ -302,15 +316,38 @@ namespace QBG { void setExactResultSize(size_t esize) { exactResultSize = esize; } void setBlobEpsilon(float c) { blobExplorationCoefficient = c + 1.0; } void setNumOfProbes(size_t p) { numOfProbes = p; } - void setObjectVector(std::vector &query) { objectVector = std::move(query); } + void setObjectVector(std::vector &query) { objectVector = query; } + void setRefinementExpansion(float re) { refinementExpansion = re; } float cutback; size_t graphExplorationSize; size_t exactResultSize; float blobExplorationCoefficient; size_t numOfProbes; + float refinementExpansion; std::vector objectVector; }; + class BatchSearchContainer : public SearchContainer { + public: + BatchSearchContainer(NGT::Object &q): SearchContainer(q), objectVectors(0), numOfQueries(0) {} + BatchSearchContainer(): objectVectors(0), numOfQueries(0) {} + BatchSearchContainer(SearchContainer &sc, NGT::Object &q): SearchContainer(sc, q), objectVectors(0), numOfQueries(0) {} + + void setObjectVectors(void *qs, size_t nq, size_t dim) { + objectVectors = reinterpret_cast(qs); + numOfQueries = nq; + dimension = dim; + } + void *getQuery(size_t idx) { return objectVectors + dimension * idx; } + NGT::ObjectDistances &getBatchResult(size_t i) { return batchResult[i]; } + std::vector &getBatchResult() { return batchResult; } + + float *objectVectors; + size_t numOfQueries; + size_t dimension; + std::vector batchResult; + }; + class QuantizedBlobGraphRepository : public NGTQG::QuantizedGraphRepository { public: QuantizedBlobGraphRepository(NGTQ::Index &quantizedIndex): NGTQG::QuantizedGraphRepository(quantizedIndex){ @@ -326,7 +363,7 @@ namespace QBG { NGT::Timer timer; timer.start(); for (size_t gid = 1; gid < quantizedIndex.getInvertedIndexSize(); gid++) { - if (gid % 100000 == 0) { + if (gid % 10000 == 0) { timer.stop(); std::cerr << "The number of processed blobs=" << gid << " VmSize=" << NGT::Common::getProcessVmSizeStr() << " Elapsed time=" << timer << std::endl; timer.restart(); @@ -345,40 +382,27 @@ namespace QBG { continue; } } - NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedIndex.getQuantizer().divisionNo, invertedIndexObjects.size()); - rearrange(invertedIndexObjects, (*this)[gid], quantizedStream); + rearrange(invertedIndexObjects, (*this)[gid], quantizedIndex.getQuantizer()); } #endif } - static void rearrange(NGTQ::InvertedIndexEntry &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects, NGTQ::QuantizedObjectProcessingStream &quantizedStream) { - rearrangedObjects.clear(); - rearrangedObjects.ids.reserve(invertedIndexObjects.size()); - for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) { - rearrangedObjects.ids.push_back(invertedIndexObjects[oidx].id); - for (size_t idx = 0; idx < invertedIndexObjects.numOfSubvectors; idx++) { -#ifdef NGTQ_UINT8_LUT -#ifdef NGTQ_SIMD_BLOCK_SIZE - size_t dataNo = oidx; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - abort(); -#else - quantizedStream.arrangeQuantizedObject(dataNo, idx, invertedIndexObjects[oidx].localID[idx] - 1); -#endif -#else - objectData[idx * noobjs + dataNo] = invertedIndexObjects[oidx].localID[idx] - 1; -#endif -#else - objectData[idx * noobjs + dataNo] = invertedIndexObjects[oidx].localID[idx]; -#endif - } - } + static void rearrangeObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects, NGTQ::Quantizer &quantizer) { + rearrangedObjects.subspaceID = invertedIndexObjects.subspaceID; + auto &quantizedObjectDistance = quantizer.getQuantizedObjectDistance(); + rearrangedObjects.objects = quantizedObjectDistance.generateRearrangedObjects(invertedIndexObjects); + //rearrangedObjects.objects = quantizedStream.compressIntoUint4(); + } + static void rearrangeObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects) { + NGTQ::QuantizedObjectProcessingStream quantizedStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size()); + quantizedStream.arrange(invertedIndexObjects); rearrangedObjects.subspaceID = invertedIndexObjects.subspaceID; rearrangedObjects.objects = quantizedStream.compressIntoUint4(); } - static void rearrange(NGTQ::InvertedIndexEntry &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects) { + // static void rearrange(NGTQ::InvertedIndexEntry &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects) { + static void rearrange(NGTQ::InvertedIndexEntry &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects, NGTQ::Quantizer &quantizer) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) std::cerr << "construct: Not implemented" << std::endl; abort(); @@ -386,17 +410,19 @@ namespace QBG { if (invertedIndexObjects.numOfSubvectors == 0) { NGTThrowException("# of subvectors is zero."); } - - //(*this).resize(quantizedIndex.getInvertedIndexSize()); NGT::Timer timer; timer.start(); { + rearrangedObjects.clear(); + rearrangedObjects.ids.reserve(invertedIndexObjects.size()); + for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) { + rearrangedObjects.ids.emplace_back(invertedIndexObjects[oidx].id); + } //NGTQ::InvertedIndexEntry invertedIndexObjects(numOfSubspaces); //quantizedIndex.getQuantizer().extractInvertedIndexObject(invertedIndexObjects, gid); //quantizedIndex.getQuantizer().eraseInvertedIndexObject(gid); - NGTQ::QuantizedObjectProcessingStream quantizedStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size()); - - rearrange(invertedIndexObjects, rearrangedObjects, quantizedStream); + //rearrangeFloatObjects(invertedIndexObjects, rearrangedObjects, quantizer); + rearrangeObjects(invertedIndexObjects, rearrangedObjects, quantizer); } #endif } @@ -404,12 +430,13 @@ namespace QBG { static void rearrange(NGTQ::QuantizedObjectSet &quantizedObjects, NGTQG::QuantizedNode &rearrangedObjects) { NGTQ::InvertedIndexEntry iie; iie.set(quantizedObjects); - rearrange(iie, rearrangedObjects); + rearrange(iie, rearrangedObjects, *reinterpret_cast(0)); } void extractRemovedIdSet(size_t objectListSize, std::vector &removedIDs) { std::vector exist(objectListSize); size_t count = 0; + size_t duplicatedCount = 0; for (auto &blob : *this) { for (auto id : blob.ids) { if (id >= exist.size()) { @@ -418,13 +445,19 @@ namespace QBG { NGTThrowException(msg); } if (exist.at(id)) { - std::cerr << "Warning: the object is duplicated. " << id << std::endl; + if (duplicatedCount == 0) { + std::cerr << "Warning: the object is duplicated. " << id << std::endl; + } + duplicatedCount++; } else { count++; exist.at(id) = true; } } } + if (duplicatedCount > 0) { + std::cerr << "Warning: # of duplicated objects is " << duplicatedCount << "." << std::endl; + } { removedIDs.clear(); removedIDs.reserve(objectListSize - count); @@ -443,8 +476,9 @@ namespace QBG { class Index : public NGTQ::Index { public: - Index(const std::string &indexPath, bool prebuilt = false, bool verbose = false) : - NGTQ::Index(indexPath, prebuilt), path(indexPath), quantizedBlobGraph(*this) { + Index(const std::string &indexPath, bool prebuilt = false, bool verbose = false, + NGTQ::DataType refinementDataType = NGTQ::DataTypeAny) : + NGTQ::Index(indexPath, prebuilt, refinementDataType), path(indexPath), quantizedBlobGraph(*this) { searchable = false; NGT::StdOstreamRedirector redirector(!verbose); redirector.begin(); @@ -562,9 +596,19 @@ namespace QBG { std::vector object; NGT::Common::extractVector(line, " ,\t", object); if (object.empty()) { - cerr << "An empty line or invalid value: " << line << endl; + cerr << "Empty line or invalid value: " << line << endl; continue; } + if ((quantizer.property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) && + (object.size() + 1 == quantizer.objectList.genuineDimension)) { + object.emplace_back(0); + } + if (object.size() != quantizer.objectList.genuineDimension) { + std::stringstream msg; + msg << "The dimension of the specified object is inconsistent with the dimension of the index. " + << object.size() << ":" << quantizer.objectList.genuineDimension; + NGTThrowException(msg); + } index.insert(idx, object); if (count % 100000 == 0) { @@ -596,43 +640,9 @@ namespace QBG { NGT::Common::tokenize(data, tokens, "."); if (tokens.size() < 2) { std::stringstream msg; - msg << "Invalid file name format"; + msg << "Invalid file name format. " << data; NGTThrowException(msg); } -#ifdef NGT_INNER_PRODUCT - double maxMag = 0.0; - if (index.getQuantizer().property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) { - std::cerr << "Inner product." << std::endl; - NGT::Timer timer; - timer.start(); - StaticObjectFileLoader loader(data, tokens[tokens.size() - 1]); - size_t count = 0; - while (!loader.isEmpty()) { - if (dataSize > 0 && count > dataSize) { - break; - } - auto object = loader.getObject(); - double mag = 0.0; - for (auto &v : object) { - //std::cerr << v << ":" << mag << std::endl; - mag += v * v; - } - if (mag > maxMag) { - maxMag = mag; - } - count++; - if (count % 2000000 == 0) { - timer.stop(); - std::cerr << "processed " << static_cast(count) / 1000000.0 << "M objects." - << " maxMag=" << maxMag << " time=" << timer << std::endl; - timer.restart(); - } - } - timer.stop(); - std::cerr << "time=" << timer << std::endl; - } - std::cerr << "final maxMag=" << maxMag << std::endl; -#endif auto &quantizer = index.getQuantizer(); StaticObjectFileLoader loader(data, tokens[tokens.size() - 1]); size_t idx = quantizer.objectList.size() == 0 ? 0 : quantizer.objectList.size() - 1; @@ -644,16 +654,16 @@ namespace QBG { break; } auto object = loader.getObject(); -#ifdef NGT_INNER_PRODUCT - if (index.getQuantizer().property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) { - double mag = 0.0; - for (auto &v : object) { - //std::cerr << v << ":" << mag << std::endl; - mag += v * v; - } - object.push_back(sqrt(maxMag - mag)); + if ((quantizer.property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) && + (object.size() + 1 == quantizer.objectList.genuineDimension)) { + object.emplace_back(0); + } + if (object.size() != quantizer.objectList.genuineDimension) { + std::stringstream msg; + msg << "The dimension of the specified object is inconsistent with the dimension of the index. " + << object.size() << ":" << quantizer.objectList.genuineDimension; + NGTThrowException(msg); } -#endif index.insert(idx, object); if (count % 1000000 == 0) { std::cerr << "appended " << static_cast(count) / 1000000.0 << "M objects."; @@ -699,7 +709,7 @@ namespace QBG { quantizer.objectList.get(id, object, &gcodebook.getObjectSpace()); objects.push_back(pair, size_t>(object, id)); } - vector gids; + vector gids; NGTQ::Quantizer::searchIndex(gcodebook, objects, gids); for (size_t bidx = 0; bidx < gids.size(); bidx++) { @@ -716,25 +726,47 @@ namespace QBG { msg << "remove: Not found the specified ID. " << ids[bidx]; NGTThrowException(msg); } - NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedBlobGraph.numOfSubspaces, - rearrangedObjects.ids.size()); - quantizedStream.uncompressFromUint4(static_cast(rearrangedObjects.objects)); NGTQ::InvertedIndexEntry invertedIndexObjects; - invertedIndexObjects.initialize(quantizedBlobGraph.numOfSubspaces); - quantizedStream.restoreToInvertedIndex(invertedIndexObjects); + quantizer.getQuantizedObjectDistance().restoreIntoInvertedIndex(invertedIndexObjects, quantizedBlobGraph.numOfSubspaces, rearrangedObjects.ids, rearrangedObjects.objects); + + ///-/ /////////////////////////////////////// invertedIndexObjects.erase(invertedIndexObjects.begin() + rmidx); - NGTQ::QuantizedObjectProcessingStream removedQuantizedStream(quantizedBlobGraph.numOfSubspaces, - rearrangedObjects.ids.size()); - removedQuantizedStream.arrange(invertedIndexObjects); + ///-/ /////////////////////////////////////// + auto ids = rearrangedObjects.ids; ids.erase(ids.begin() + rmidx); rearrangedObjects.ids.clear(); rearrangedObjects.clear(); - rearrangedObjects.objects = removedQuantizedStream.compressIntoUint4(); + rearrangedObjects.objects = quantizer.getQuantizedObjectDistance().generateRearrangedObjects(invertedIndexObjects); rearrangedObjects.ids = std::move(ids); } } + void insertObjectsToBlob(NGT::ObjectID blobID, std::vector, size_t>> &objects) { + auto &quantizer = getQuantizer(); + auto &rearrangedObjects = quantizedBlobGraph[blobID]; + ///-/ ///////////// + auto subspaceID = quantizedBlobGraph[blobID].subspaceID; + NGTQ::InvertedIndexEntry invertedIndexObjects; + quantizer.getQuantizedObjectDistance().restoreIntoInvertedIndex(invertedIndexObjects, quantizedBlobGraph.numOfSubspaces, rearrangedObjects.ids, rearrangedObjects.objects); + ///-/ /////////////////////////////////////// + auto idsback = rearrangedObjects.ids; + for (auto &b : objects) { + auto &object = b.first; + auto id = b.second; + NGTQ::Object tobject(object, id, subspaceID); + NGTQ::QuantizedObject quantizedObject; + quantizer.encode(subspaceID, tobject, quantizedObject); + invertedIndexObjects.pushBack(id, quantizedObject); + idsback.push_back(id); + } + ///-/ /////////////////////////////////////// + rearrangedObjects.ids.clear(); + rearrangedObjects.clear(); + rearrangedObjects.objects = quantizer.getQuantizedObjectDistance().generateRearrangedObjects(invertedIndexObjects); + rearrangedObjects.ids = std::move(idsback); + } + template NGT::ObjectID insert(std::vector &object) { std::vector> objects; @@ -780,6 +812,10 @@ namespace QBG { rmids.push_back(id); } ids.push_back(id); + if ((quantizer.property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) && + (obj.size() + 1 == quantizer.objectList.genuineDimension)) { + obj.emplace_back(0); + } if (obj.size() != quantizer.property.genuineDimension) { ids.clear(); std::stringstream msg; @@ -801,7 +837,7 @@ namespace QBG { } } auto &gcodebook = static_cast(quantizer.globalCodebookIndex.getIndex()); - vector gids; + vector gids; NGTQ::Quantizer::searchIndex(gcodebook, floatObjects, gids); if (gids.size() != floatObjects.size()) { @@ -828,31 +864,7 @@ namespace QBG { for (size_t idx = 0; idx < vbatchObjects.size(); idx++) { auto &it = vbatchObjects[idx]; auto blobID = (*it).first; - auto &rearrangedObjects = quantizedBlobGraph[blobID]; - NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedBlobGraph.numOfSubspaces, - rearrangedObjects.ids.size()); - quantizedStream.uncompressFromUint4(static_cast(rearrangedObjects.objects)); - NGTQ::InvertedIndexEntry invertedIndexObjects; - invertedIndexObjects.initialize(quantizedBlobGraph.numOfSubspaces); - quantizedStream.restoreToInvertedIndex(invertedIndexObjects); - auto subspaceID = quantizedBlobGraph[blobID].subspaceID; - auto idsback = rearrangedObjects.ids; - for (auto &b : (*it).second) { - auto &object = b.first; - auto id = b.second; - NGTQ::Object tobject(object, id, subspaceID); - NGTQ::QuantizedObject quantizedObject; - quantizer.encode(subspaceID, tobject, quantizedObject); - invertedIndexObjects.pushBack(id, quantizedObject); - idsback.push_back(id); - } - NGTQ::QuantizedObjectProcessingStream updatedQuantizedStream(quantizedBlobGraph.numOfSubspaces, - invertedIndexObjects.size()); - updatedQuantizedStream.arrange(invertedIndexObjects); - rearrangedObjects.ids.clear(); - rearrangedObjects.clear(); - rearrangedObjects.objects = updatedQuantizedStream.compressIntoUint4(); - rearrangedObjects.ids = std::move(idsback); + insertObjectsToBlob(blobID, (*it).second); } return; } @@ -949,6 +961,146 @@ namespace QBG { redirector.end(); } + static void expandBlob(std::string qbgIndexPath, std::string clusterCentroidsPath, + NGT::SearchContainer &ngtSearchContainer, + QBG::SearchContainer &qbgSearchContainer, + float rate, + NGTQ::DataType refinementDataType, + bool verbose = false) { + + auto extractNeighbors = [](std::vector> &objects, + std::vector &sizes, + QBG::Index &qbg, size_t &gidx, + NGT::SearchContainer &searchContainer, + QBG::SearchContainer qbgSearchContainer, + std::vector> &nearestNeighbors) { + NGT::Index &gcodebook = qbg.getQuantizer().globalCodebookIndex; +#pragma omp parallel for + for (size_t oidx = 0; oidx < objects.size(); oidx++) { + //-/std::cerr << "oidx=" << oidx << std::endl; + auto gtarget = gidx + oidx; + { + NGT::SearchQuery sq(objects[oidx]); + if (gtarget >= gcodebook.getObjectRepositorySize()) { + std::stringstream msg; + msg << "Cluster centroids file has more entries than global codebook. " + << gtarget << ":" << gcodebook.getObjectRepositorySize(); + NGTThrowException(msg); + } + static_cast(sq) = searchContainer; + NGT::ObjectDistances neighbors; + sq.setResults(&neighbors); + gcodebook.search(sq); + if (gtarget + 1 != neighbors[0].id) { + std::cerr << "extpandClusters: Warning! " << gtarget << ":" << neighbors[0].id << std::endl; + auto found = false; + for (size_t i = 1; i < neighbors.size(); i++) { + std::cerr << neighbors[i].id << ":" << neighbors[i].distance << std::endl; + if (gtarget + 1 == neighbors[i].id) { + found = true; + std::cerr << "Found" << std::endl; + break; + } + } + if (!found) { + std::cerr << "extpandClusters: Strong warning! " << gtarget << std::endl; + } + neighbors[0].id = gtarget + 1; + } + } + { + NGT::ObjectDistances neighbors; + QBG::SearchContainer sc(qbgSearchContainer); + sc.setObjectVector(objects[oidx]); + sc.setSize(sizes[oidx]); + sc.setResults(&neighbors); + qbg.searchInTwoSteps(sc); + for (auto &n : neighbors) { + nearestNeighbors[gtarget].emplace_back(n.id); + } + } + } + gidx += objects.size(); + objects.clear(); + sizes.clear(); + }; + + NGT::StdOstreamRedirector redirector(!verbose); + redirector.begin(); + + auto prebuilt = false; + QBG::Index qbg(qbgIndexPath, prebuilt, verbose, refinementDataType); + if (clusterCentroidsPath.empty()) { + clusterCentroidsPath = QBG::Index::getStoredBlobFile(qbgIndexPath); + } + std::ifstream stream(clusterCentroidsPath); + if (!stream) { + std::stringstream msg; + msg << "Cannot open the centroid list file. " << clusterCentroidsPath; + NGTThrowException(msg); + } + auto &quantizer = qbg.getQuantizer(); + auto &gcodebook = quantizer.globalCodebookIndex; + std::string line; + if (gcodebook.getObjectRepositorySize() == 0) { + NGTThrowException("Global codebook index is empty."); + } + + if (verbose) { + std::cerr << "qbg search container size=" << qbgSearchContainer.size << std::endl; + std::cerr << "repo size=" << gcodebook.getObjectRepositorySize() << std::endl; + } + + std::vector> nearestNeighbors(gcodebook.getObjectRepositorySize() - 1); + std::vector> objects; + std::vector sizes; + size_t gidx = 0; + while (getline(stream, line)) { + std::vector tokens; + NGT::Common::tokenize(line, tokens, " \t"); + std::vector object; + for (auto &token : tokens) { + object.emplace_back(NGT::Common::strtof(token)); + } + objects.emplace_back(object); + if (rate < 0.0) { + sizes.emplace_back(qbgSearchContainer.size); + } else { + sizes.emplace_back(qbg.quantizedBlobGraph[gidx + 1].ids.size() * (1.0 + rate)); + } + if (objects.size() == 10) { + extractNeighbors(objects, sizes, qbg, gidx, ngtSearchContainer, qbgSearchContainer, + nearestNeighbors); + } + } + if (objects.size() > 0) { + extractNeighbors(objects, sizes, qbg, gidx, ngtSearchContainer, qbgSearchContainer, + nearestNeighbors); + } + size_t nOfAddedObjects = 0; + for (size_t gidx = 0; gidx < nearestNeighbors.size(); gidx++) { + NGT::ObjectID blobID = gidx + 1; + auto &rearrangedObjects = qbg.quantizedBlobGraph[blobID]; + auto &ids = rearrangedObjects.ids; + std::unordered_set blob(ids.begin(), ids.end()); + std::vector, size_t>> objects; + for (auto &id : nearestNeighbors[gidx]) { + if (blob.find(id) == blob.end()) { + std::vector object; + qbg.getQuantizer().objectList.get(id, object); + objects.emplace_back(std::make_pair(object, id)); + } + } + nOfAddedObjects += objects.size(); + qbg.insertObjectsToBlob(blobID, objects); + } + if (verbose) { + std::cerr << "# of added objects=" << nOfAddedObjects << " the mean # of added objects=" << nOfAddedObjects / nearestNeighbors.size() << std::endl; + } + qbg.save(); + redirector.end(); + } + void getSeeds(NGT::Index &index, NGT::Object *object, NGT::ObjectDistances &seeds, size_t noOfSeeds) { auto &graph = static_cast(index.getIndex()); NGT::SearchContainer sc(*object); @@ -976,16 +1128,27 @@ namespace QBG { judge(NGTQG::QuantizedNode &ivi, size_t k, NGT::Distance radius, NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut, NGT::NeighborhoodGraph::ResultSet &result, size_t &foundCount + , void *query = 0, std::unique_ptr *checkedIDs = 0 ) { auto noOfObjects = ivi.ids.size(); - float distances[NGTQ::QuantizedObjectProcessingStream::getNumOfAlignedObjects(noOfObjects)]; auto &quantizedObjectDistance = getQuantizer().getQuantizedObjectDistance(); + std::vector distances(quantizedObjectDistance.getNumOfAlignedObjects(noOfObjects)); + if (checkedIDs != 0) { + for (size_t idx = 0; idx < ivi.ids.size(); idx++) { + auto id = ivi.ids[idx]; + if ((**checkedIDs)[id]) { + distances[idx] = 1.0; + } else { + //std::cerr << "non checked" << std::endl; + (**checkedIDs).set(id); + } + } + } #ifdef NGTQBG_MIN - float distance = quantizedObjectDistance(ivi.objects, &distances[0], noOfObjects, lut); + float distance = quantizedObjectDistance(ivi.objects, &distances[0], noOfObjects, lut, query); #else - quantizedObjectDistance(ivi.objects, &distances[0], noOfObjects, lut); + quantizedObjectDistance(ivi.objects, &distances[0], noOfObjects, lut, query); #endif - #ifdef NGTQBG_MIN if (distance >= radius) { return std::make_pair(distance, radius); @@ -1013,20 +1176,32 @@ namespace QBG { } - static void refineDistances(QBG::SearchContainer &searchContainer, NGTQ::Quantizer &quantizer, + static float refineDistances(NGTQ::Quantizer &quantizer, NGT::NeighborhoodGraph::ResultSet &result, - NGT::ObjectDistances &qresults) { - auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace(); + NGT::ObjectDistances &qresults, + size_t exactResultSize, + std::unique_ptr> &resizedQuery) { + float err; + NGT::ObjectSpace *objectSpace; + if (quantizer.refinementObjectSpace != 0) { + objectSpace = quantizer.refinementObjectSpace; + } else if (quantizer.refinementObjectSpaceForObjectList != 0) { + objectSpace = quantizer.refinementObjectSpaceForObjectList; + } else { + std::stringstream msg; + msg << "Fatal inner error! Any refinement object space is unavailable."; + NGTThrowException(msg); + } NGT::ResultPriorityQueue qres; - if (objectSpace.getObjectType() == typeid(float)) { - refineDistances(searchContainer, quantizer, result, qres); - } else if (objectSpace.getObjectType() == typeid(uint8_t)) { - refineDistances(searchContainer, quantizer, result, qres); - } else if (objectSpace.getObjectType() == typeid(NGT::float16)) { - refineDistances(searchContainer, quantizer, result, qres); + if (objectSpace->getObjectType() == typeid(float)) { + err = refineDistances(quantizer, result, qres, exactResultSize, resizedQuery); + } else if (objectSpace->getObjectType() == typeid(uint8_t)) { + err = refineDistances(quantizer, result, qres, exactResultSize, resizedQuery); + } else if (objectSpace->getObjectType() == typeid(NGT::float16)) { + err = refineDistances(quantizer, result, qres, exactResultSize, resizedQuery); } else { std::stringstream msg; - msg << "refineDistances: Fatal error! Invalid datatype. " << objectSpace.getObjectType().name() << std::endl; + msg << "refineDistances: Fatal error! Invalid datatype. " << objectSpace->getObjectType().name() << std::endl; NGTThrowException(msg); } qresults.resize(qres.size()); @@ -1034,18 +1209,21 @@ namespace QBG { qresults[i] = qres.top(); qres.pop(); } + return err; } - static void refineDistances(QBG::SearchContainer &searchContainer, NGTQ::Quantizer &quantizer, + static float refineDistances(NGTQ::Quantizer &quantizer, NGT::NeighborhoodGraph::ResultSet &result, - NGT::ResultPriorityQueue &qresults) { - auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace(); + NGT::ResultPriorityQueue &qresults, + size_t exactResultSize, + std::unique_ptr> &resizedQuery) { + auto &objectSpace = *quantizer.refinementObjectSpace; if (objectSpace.getObjectType() == typeid(float)) { - refineDistances(searchContainer, quantizer, result, qresults); + return refineDistances(quantizer, result, qresults, exactResultSize, resizedQuery); } else if (objectSpace.getObjectType() == typeid(uint8_t)) { - refineDistances(searchContainer, quantizer, result, qresults); + return refineDistances(quantizer, result, qresults, exactResultSize, resizedQuery); } else if (objectSpace.getObjectType() == typeid(NGT::float16)) { - refineDistances(searchContainer, quantizer, result, qresults); + return refineDistances(quantizer, result, qresults, exactResultSize, resizedQuery); } else { std::stringstream msg; msg << "refineDistances: Fatal error! Invalid datatype. " << objectSpace.getObjectType().name() << std::endl; @@ -1054,30 +1232,34 @@ namespace QBG { } template - static void refineDistances(QBG::SearchContainer &searchContainer, NGTQ::Quantizer &quantizer, + static float refineDistances(NGTQ::Quantizer &quantizer, NGT::NeighborhoodGraph::ResultSet &result, - NGT::ResultPriorityQueue &qresults) { + NGT::ResultPriorityQueue &qresults, + size_t exactResultSize, + std::unique_ptr> &resizedQuery) { qresults = NGT::ResultPriorityQueue(); - NGT::Object &query = searchContainer.object; - auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace(); - auto paddedDimension = objectSpace.getPaddedDimension(); - const size_t prefetchSize = objectSpace.getPrefetchSize(); #ifdef NGTQ_OBJECT_IN_MEMORY - if (quantizer.objectListOnMemory.size() != 0) { + if (quantizer.refinementObjectSpace != 0) { + auto &os = *quantizer.refinementObjectSpace; + auto &repo = os.getRepository(); + auto &comparator = os.getComparator(); + auto *q = os.allocateNormalizedObject(*resizedQuery); while (!result.empty()) { auto r = result.top(); result.pop(); - NGT::Object &object = *quantizer.objectListOnMemory.get(r.id); - if (!result.empty()) { - uint8_t *ptr = static_cast(quantizer.objectListOnMemory.get(result.top().id)->getPointer()); - NGT::MemoryCache::prefetch(ptr, prefetchSize); + { + r.distance = comparator(*q, *repo.get(r.id)); + //r.distance = comparator(*query, *repo.get(r.id)); + qresults.push(r); } - r.distance = objectSpace.getComparator()(query, object); - qresults.push(r); } - } else { + os.deleteObject(q); + } else if (quantizer.refinementObjectSpaceForObjectList != 0) { #endif auto threadid = omp_get_thread_num(); + auto &os = *quantizer.refinementObjectSpaceForObjectList; + auto &comparator = os.getComparator(); + auto *q = os.allocateNormalizedObject(*resizedQuery); while (!result.empty()) { auto r = result.top(); result.pop(); @@ -1087,71 +1269,559 @@ namespace QBG { #else quantizer.objectList.get(r.id, object); #endif - r.distance = NGT::PrimitiveComparator::compareL2(static_cast(query.getPointer()), - static_cast(object.data()), paddedDimension); - - + auto *o = os.allocateNormalizedObject(object); + r.distance = comparator(*q, *o); + os.deleteObject(o); qresults.push(r); } + os.deleteObject(q); #ifdef NGTQ_OBJECT_IN_MEMORY } #endif - while (qresults.size() > searchContainer.exactResultSize) { + while (qresults.size() > exactResultSize) { qresults.pop(); } + return 0.0; + } + + void searchInTwoSteps(QBG::BatchSearchContainer &searchContainer) { + if (searchContainer.numOfQueries == 0) { + NGTThrowException("search: object is null."); + } + auto parameterSize = searchContainer.size; + auto parameterExactResultSize = searchContainer.size; + if (searchContainer.refinementExpansion >= 1.0) { + parameterSize *= searchContainer.refinementExpansion; + } else { + parameterExactResultSize = 0; + } + NGT::Timer timer; + timer.start(); + auto &quantizer = getQuantizer(); + auto &globalIndex = quantizer.globalCodebookIndex; + std::vector nearestBlobs(searchContainer.numOfQueries); + + +#pragma omp parallel for + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { +#if 1 + std::vector qobj(searchContainer.dimension); + memcpy(qobj.data(), searchContainer.getQuery(qi), searchContainer.dimension * sizeof(float)); + float mag = 0; + for (size_t i = 0; i < quantizer.property.genuineDimension; i++) { + mag += qobj[i] * qobj[i]; + } + if (quantizer.property.maxMagnitude - mag > 0.0) { + qobj[quantizer.property.genuineDimension] = sqrt(quantizer.property.maxMagnitude - mag); + } else { + qobj[quantizer.property.genuineDimension] = 0.0; + } + auto &globalGraph = static_cast(globalIndex.getIndex()); + NGT::ObjectDistances seeds; + { + NGT::Object *query = globalIndex.getObjectSpace().allocateNormalizedPersistentObject(qobj); + NGT::SearchContainer sc(*query); + sc.setSize(500); + try { + globalGraph.getSeedsFromTree(sc, seeds); + } catch(NGT::Exception &err) { + globalIndex.deleteObject(query); + throw err; + } + globalIndex.deleteObject(query); + } + { + qobj[quantizer.property.genuineDimension] = 0.0; + NGT::Object *query = globalIndex.getObjectSpace().allocateNormalizedPersistentObject(qobj); + NGT::SearchContainer sc(searchContainer, *query); + sc.setResults(&nearestBlobs[qi]); + sc.setEpsilon(searchContainer.blobExplorationCoefficient - 1.0); + sc.setSize(searchContainer.numOfProbes); + try { + globalIndex.search(sc, seeds); + } catch(NGT::Exception &err) { + globalIndex.deleteObject(query); + throw err; + } + globalIndex.deleteObject(query); + } +#else + std::vector qobj(searchContainer.dimension); + memcpy(qobj.data(), searchContainer.getQuery(qi), searchContainer.dimension * sizeof(float)); + //NGT::Object *query = globalIndex.allocateObject(qobj); + NGT::Object *query = globalIndex.getObjectSpace().allocateNormalizedPersistentObject(qobj); + //NGT::Object *query = allocateObject(q); + NGT::SearchContainer sc(searchContainer, *query); + sc.setResults(&nearestBlobs[qi]); + sc.setEpsilon(searchContainer.blobExplorationCoefficient - 1.0); + sc.setSize(searchContainer.numOfProbes); + globalIndex.search(sc); + globalIndex.deleteObject(query); +#endif + } + std::unordered_map> blobs; + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { + for (size_t i = 0; i < nearestBlobs[qi].size(); i++) { + auto blobID = nearestBlobs[qi][i].id; + if (blobs.find(blobID) == blobs.end()) { + blobs.insert({blobID, std::vector()}); + } + blobs[blobID].emplace_back(qi); + } + } + //-/ ˙並列化のために形式変更 + std::vector>> blobList; + blobList.reserve(blobs.size()); + for (auto &v : blobs) { + blobList.emplace_back(v); + } + auto &quantizedObjectDistance = getQuantizer().getQuantizedObjectDistance(); + auto dimension = searchContainer.dimension; + auto *fqueries = new float[searchContainer.numOfQueries * dimension]; + auto *cqueries = new uint8_t[searchContainer.numOfQueries * dimension]; + void *transformedQueries = fqueries; + float offset = 0.0; + float scale = -1.0; + NGT::ObjectSpace::ObjectType objectType = NGT::ObjectSpace::ObjectTypeNone; + switch(quantizer.property.localClusterDataType) { + case NGTQ::ClusterDataTypeSQSU8: + objectType = NGT::ObjectSpace::ObjectType::Qsuint8; break; + default: break; + } + if (objectType != NGT::ObjectSpace::ObjectTypeNone) { + offset = getQuantizer().property.scalarQuantizationOffset; + scale = getQuantizer().property.scalarQuantizationScale; + transformedQueries = cqueries; + } +#pragma omp parallel for + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { + auto *fq = fqueries + dimension * qi; + memcpy(fq, searchContainer.getQuery(qi), dimension * sizeof(float)); +#if defined(NGTQG_ROTATION) + if (quantizedObjectDistance.rotation != 0) { + quantizedObjectDistance.rotation->mul(fq); + } +#endif + if (objectType != NGT::ObjectSpace::ObjectTypeNone) { + NGT::ObjectSpace::quantizeToQint8(fq, dimension, cqueries + dimension * qi, objectType, offset, scale); + } + } + std::vector> distances(blobList.size()); +#pragma omp parallel for + for (size_t bi = 0; bi < blobList.size(); bi++) { + auto blobID = blobList[bi].first; + auto noOfObjects = quantizedBlobGraph[blobID].ids.size(); + auto *lut = reinterpret_cast(0); + std::vector &queryList = blobList[bi].second; + distances[bi].resize(noOfObjects * queryList.size()); + quantizedObjectDistance(quantizedBlobGraph[blobID].objects, distances[bi].data(), noOfObjects, *lut, + transformedQueries, queryList); + } + delete[] fqueries; + delete[] cqueries; +#define LOGIC6 +#if defined(LOGIC1) + searchContainer.batchResult.clear(); + searchContainer.batchResult.resize(searchContainer.numOfQueries); + for (size_t bi = 0; bi < blobList.size(); bi++) { + auto blobID = blobList[bi].first; + //auto subspaceID = quantizedBlobGraph[blobID].subspaceID; + std::vector &queryList = blobList[bi].second; + auto noOfObjects = quantizedBlobGraph[blobID].ids.size(); + for (size_t qi = 0; qi < queryList.size(); qi++) { + //std::cerr << "q=" << queryList[qi] << ":" << std::endl; + for (size_t di = 0; di < noOfObjects; di++) { + //std::cerr << "res=" << quantizedBlobGraph[blobID].ids[di] << " d=" << distances[bi][qi * noOfObjects + di] << std::endl; + searchContainer.batchResult[queryList[qi]].emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], + distances[bi][qi * noOfObjects + di] )); + } + } + } +#pragma omp parallel for + for (size_t i = 0; i < searchContainer.batchResult.size(); i++) { + std::sort(searchContainer.batchResult[i].begin(), + searchContainer.batchResult[i].end()); + searchContainer.batchResult[i].resize(parameterSize); + } +#elif defined(LOGIC2) + std::vector, + std::less>> resultSet(searchContainer.numOfQueries); + for (size_t bi = 0; bi < blobList.size(); bi++) { + auto blobID = blobList[bi].first; + //auto subspaceID = quantizedBlobGraph[blobID].subspaceID; + std::vector &queryList = blobList[bi].second; + auto noOfObjects = quantizedBlobGraph[blobID].ids.size(); + for (size_t qi = 0; qi < queryList.size(); qi++) { + //std::cerr << "q=" << queryList[qi] << ":" << std::endl; + for (size_t di = 0; di < noOfObjects; di++) { + resultSet[queryList[qi]].push(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], + distances[bi][qi * noOfObjects + di])); + //searchContainer.batchResult[queryList[qi]].emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], + // distances[bi][qi * noOfObjects + di] )); + if (resultSet[queryList[qi]].size() > parameterSize) { + resultSet[queryList[qi]].pop(); + } + } + } + } + searchContainer.batchResult.clear(); + searchContainer.batchResult.resize(searchContainer.numOfQueries); +#pragma omp parallel for + for (size_t qi = 0; qi < resultSet.size(); qi++) { + searchContainer.batchResult[qi].resize(resultSet[qi].size()); + while (!resultSet[qi].empty()) { + searchContainer.batchResult[qi][resultSet[qi].size() - 1] = std::move(resultSet[qi].top()); + resultSet[qi].pop(); + } + } +#elif defined(LOGIC3) + auto nOfThreads = omp_get_max_threads(); + std::vector, + std::less>> resultSet(searchContainer.numOfQueries * nOfThreads); +#pragma omp parallel for + for (size_t bi = 0; bi < blobList.size(); bi++) { + auto thdID = omp_get_thread_num(); + auto thdIdx = thdID * searchContainer.numOfQueries; + auto blobID = blobList[bi].first; + std::vector &queryList = blobList[bi].second; + auto noOfObjects = quantizedBlobGraph[blobID].ids.size(); + for (size_t qi = 0; qi < queryList.size(); qi++) { + for (size_t di = 0; di < noOfObjects; di++) { + auto &rset = resultSet[thdIdx + queryList[qi]]; + auto d = distances[bi][qi * noOfObjects + di]; + if (rset.size() < parameterSize) { + rset.push(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d)); + } else if (rset.top().distance >= d) { + rset.push(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d)); + rset.pop(); + } + } + } + } +#pragma omp parallel for + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { + auto &rset = resultSet[qi]; + for (size_t ti = 1; ti < nOfThreads; ti++) { + auto thdIdx = ti * searchContainer.numOfQueries; + while (!resultSet[thdIdx + qi].empty()) { + if (rset.size() < parameterSize) { + rset.push(resultSet[thdIdx + qi].top()); + } else if (rset.top().distance >= resultSet[thdIdx + qi].top().distance) { + rset.push(resultSet[thdIdx + qi].top()); + rset.pop(); + } + resultSet[thdIdx + qi].pop(); + } + } + } + searchContainer.batchResult.clear(); + searchContainer.batchResult.resize(searchContainer.numOfQueries); +#pragma omp parallel for + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { + searchContainer.batchResult[qi].resize(resultSet[qi].size()); + while (!resultSet[qi].empty()) { + searchContainer.batchResult[qi][resultSet[qi].size() - 1] = std::move(resultSet[qi].top()); + resultSet[qi].pop(); + } + } +#elif defined(LOGIC4) + auto nOfThreads = omp_get_max_threads(); + std::vector resultSet(searchContainer.numOfQueries * nOfThreads); +#pragma omp parallel for + for (size_t bi = 0; bi < blobList.size(); bi++) { + auto thdID = omp_get_thread_num(); + auto thdIdx = thdID * searchContainer.numOfQueries; + auto blobID = blobList[bi].first; + //auto subspaceID = quantizedBlobGraph[blobID].subspaceID; + std::vector &queryList = blobList[bi].second; + auto noOfObjects = quantizedBlobGraph[blobID].ids.size(); + for (size_t qi = 0; qi < queryList.size(); qi++) { + auto &rset = resultSet[thdIdx + queryList[qi]]; + //std::cerr << "q=" << queryList[qi] << ":" << std::endl; + for (size_t di = 0; di < noOfObjects; di++) { + auto d = distances[bi][qi * noOfObjects + di]; + //std::cerr << quantizedBlobGraph[blobID].ids[di] << ":" << d << std::endl; + rset.emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d)); + } + } + } + searchContainer.batchResult.clear(); + searchContainer.batchResult.resize(searchContainer.numOfQueries); +#pragma omp parallel for + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { + for (size_t ti = 0; ti < nOfThreads; ti++) { + auto thdIdx = ti * searchContainer.numOfQueries; + std::sort(resultSet[thdIdx + qi].begin(), resultSet[thdIdx + qi].end()); + if (resultSet[thdIdx + qi].size() > parameterSize) { + resultSet[thdIdx + qi].resize(parameterSize); + } + for (auto &obj : resultSet[thdIdx + qi]) { + searchContainer.batchResult[qi].emplace_back(obj); + } + } + std::sort(searchContainer.batchResult[qi].begin(), + searchContainer.batchResult[qi].end()); + searchContainer.batchResult[qi].resize(parameterSize); + } +#elif defined(LOGIC5) + auto nOfThreads = omp_get_max_threads(); + std::vector resultSet(searchContainer.numOfQueries * nOfThreads); + std::vector> max(searchContainer.numOfQueries * nOfThreads, + std::pair(FLT_MAX, 0)); +#pragma omp parallel for + for (size_t bi = 0; bi < blobList.size(); bi++) { + auto thdID = omp_get_thread_num(); + auto thdIdx = thdID * searchContainer.numOfQueries; + auto blobID = blobList[bi].first; + std::vector &queryList = blobList[bi].second; + auto noOfObjects = quantizedBlobGraph[blobID].ids.size(); + for (size_t qi = 0; qi < queryList.size(); qi++) { + auto &rset = resultSet[thdIdx + queryList[qi]]; + auto &mx = max[thdIdx + queryList[qi]]; + //std::cerr << "q=" << queryList[qi] << ":" << std::endl; + for (size_t di = 0; di < noOfObjects; di++) { + auto d = distances[bi][qi * noOfObjects + di]; + //std::cerr << quantizedBlobGraph[blobID].ids[di] << ":" << d << std::endl; + if (d < mx.first) { + if (rset.size() >= parameterSize) { + rset[mx.second] = std::move(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d)); + mx.first = -1.0; + mx.second = -1; + for (auto it = rset.begin(); it != rset.end(); ++it) { + if ((*it).distance > mx.first) { + mx.first = (*it).distance; + mx.second = std::distance(rset.begin(), it); + } + } + } else { + rset.emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d)); + if (rset.size() >= parameterSize) { + mx.first = -1.0; + mx.second = -1; + for (auto it = rset.begin(); it != rset.end(); ++it) { + if ((*it).distance > mx.first) { + mx.first = (*it).distance; + mx.second = std::distance(rset.begin(), it); + } + } + } + } + } + } + } + } + + searchContainer.batchResult.clear(); + searchContainer.batchResult.resize(searchContainer.numOfQueries); +#pragma omp parallel for + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { + for (size_t ti = 0; ti < nOfThreads; ti++) { + auto thdIdx = ti * searchContainer.numOfQueries; + for (auto &obj : resultSet[thdIdx + qi]) { + searchContainer.batchResult[qi].emplace_back(obj); + } + } + std::sort(searchContainer.batchResult[qi].begin(), + searchContainer.batchResult[qi].end()); + if (searchContainer.batchResult[qi].size() > parameterSize) { + searchContainer.batchResult[qi].resize(parameterSize); + } + } +#elif defined(LOGIC6) + auto nOfThreads = omp_get_max_threads(); + std::vector resultSet(searchContainer.numOfQueries * nOfThreads); + std::vector max(searchContainer.numOfQueries * nOfThreads, FLT_MAX); +#pragma omp parallel for + for (size_t bi = 0; bi < blobList.size(); bi++) { + auto thdID = omp_get_thread_num(); + auto thdIdx = thdID * searchContainer.numOfQueries; + auto blobID = blobList[bi].first; + //auto subspaceID = quantizedBlobGraph[blobID].subspaceID; + std::vector &queryList = blobList[bi].second; + auto noOfObjects = quantizedBlobGraph[blobID].ids.size(); + for (size_t qi = 0; qi < queryList.size(); qi++) { + auto &rset = resultSet[thdIdx + queryList[qi]]; + auto &mx = max[thdIdx + queryList[qi]]; + for (size_t di = 0; di < noOfObjects; di++) { + auto d = distances[bi][qi * noOfObjects + di]; + if (d < mx) { + rset.emplace_back(NGT::ObjectDistance(quantizedBlobGraph[blobID].ids[di], d)); + if (rset.size() >= parameterSize * 2) { + std::sort(rset.begin(), rset.end()); + for (auto it = rset.begin(); it + 1 != rset.end();) { + if ((*it).id == (*(it + 1)).id) { + it = rset.erase(it); + } else { + ++it; + } + } + rset.resize(parameterSize); + mx = rset.back().distance; + } + } + } + if (rset.size() > 0) { + std::sort(rset.begin(), rset.end()); + for (auto it = rset.begin(); it + 1 != rset.end();) { + if ((*it).id == (*(it + 1)).id) { + it = rset.erase(it); + } else { + ++it; + } + } + if (rset.size() > parameterSize) { + rset.resize(searchContainer.size); + } + } + } + } + + searchContainer.batchResult.clear(); + searchContainer.batchResult.resize(searchContainer.numOfQueries); +#pragma omp parallel for + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { + for (int ti = 0; ti < nOfThreads; ti++) { + auto thdIdx = ti * searchContainer.numOfQueries; + for (auto &obj : resultSet[thdIdx + qi]) { + searchContainer.batchResult[qi].emplace_back(obj); + } + } + if (searchContainer.batchResult[qi].size() > 0) { + std::sort(searchContainer.batchResult[qi].begin(), + searchContainer.batchResult[qi].end()); + for (auto it = searchContainer.batchResult[qi].begin(); + it + 1 != searchContainer.batchResult[qi].end();) { + if ((*it).id == (*(it + 1)).id) { + it = searchContainer.batchResult[qi].erase(it); + } else { + ++it; + } + } + if (searchContainer.batchResult[qi].size() > parameterSize) { + searchContainer.batchResult[qi].resize(parameterSize); + } + } + } +#endif + + if (parameterExactResultSize > 0 && quantizer.refinementObjectSpace != 0) { + auto &os = *quantizer.refinementObjectSpace; + auto &repo = os.getRepository(); + auto &comparator = os.getComparator(); +#pragma omp parallel for + for (size_t qi = 0; qi < searchContainer.numOfQueries; qi++) { + auto *fq = static_cast(searchContainer.getQuery(qi)); + auto *q = os.allocateNormalizedObject(fq, dimension); + for (auto &r : searchContainer.batchResult[qi]) { + r.distance = comparator(*q, *repo.get(r.id)); + } + os.deleteObject(q); + std::sort(searchContainer.batchResult[qi].begin(), + searchContainer.batchResult[qi].end()); + if (parameterExactResultSize > 0) { + if (searchContainer.batchResult[qi].size() > parameterExactResultSize ) { + searchContainer.batchResult[qi].resize(parameterExactResultSize); + } + } + } + } } void searchInTwoSteps(QBG::SearchContainer &searchContainer) { - if (searchContainer.isEmptyObject()) { - NGT::Object query(searchContainer.objectVector, getQuantizer().globalCodebookIndex.getObjectSpace()); - SearchContainer sc(searchContainer, query); - searchInTwoSteps(sc); - searchContainer.workingResult = std::move(sc.workingResult); - return; + auto parameterSize = searchContainer.size; + auto parameterExactResultSize = searchContainer.size; + if (searchContainer.refinementExpansion >= 1.0) { + parameterSize *= searchContainer.refinementExpansion; + } else { + parameterExactResultSize = 0; } NGT::ObjectDistances blobs; - NGT::SearchContainer sc(searchContainer); - sc.setResults(&blobs); - sc.setEpsilon(searchContainer.blobExplorationCoefficient - 1.0); - sc.setSize(searchContainer.numOfProbes); - auto &quantizer = getQuantizer(); auto &globalIndex = quantizer.globalCodebookIndex; - auto &quantizedObjectDistance = quantizer.getQuantizedObjectDistance(); if (searchContainer.objectVector.size() == 0) { NGTThrowException("search: object is null."); } + auto dimension = getQuantizer().globalCodebookIndex.getObjectSpace().getDimension(); std::vector rotatedQuery = searchContainer.objectVector; - { - NGT::Object *query = allocateObject(searchContainer.objectVector); - NGT::SearchContainer tsc(sc, *query); - tsc.setResults(&sc.getResult()); - globalIndex.search(tsc); - globalIndex.deleteObject(query); + if (rotatedQuery.size() < dimension) { + if (rotatedQuery.size() == quantizer.property.genuineDimension || + rotatedQuery.size() + 1 == quantizer.property.genuineDimension) { + rotatedQuery.resize(dimension); + } } - if (blobs.empty()) { - std::cerr << "Warning: No blobs can be searched." << std::endl; - std::cerr << " global index size=" << globalIndex.getObjectRepositorySize() << std::endl; - std::cerr << " size=" << sc.size << std::endl; - return; + std::unique_ptr> resizedQuery = nullptr; + if (parameterExactResultSize > 0) { + std::unique_ptr> tmp(new std::vector(rotatedQuery)); + resizedQuery = std::move(tmp); + } + NGT::Object *query = 0; + try { + query = allocateObject(rotatedQuery); + } catch(NGT::Exception &err) { + std::stringstream msg; + msg << "search : allocate query for global. dimension=" << searchContainer.objectVector.size() + << " " << err.what(); + NGTThrowException(msg); + } + { + NGT::SearchContainer gsc(*query); + gsc.setResults(&blobs); + gsc.setEpsilon(searchContainer.blobExplorationCoefficient - 1.0); + gsc.setSize(searchContainer.numOfProbes); + globalIndex.search(gsc); + if (blobs.empty()) { + std::stringstream msg; + msg << "Error! No blobs can be searched."; + msg << " global index size=" << globalIndex.getObjectRepositorySize(); + msg << " size=" << gsc.size << " # of probes=" << searchContainer.numOfProbes; + NGTThrowException(msg); + } } - #if defined(NGTQG_ROTATION) if (quantizedObjectDistance.rotation != 0) { quantizedObjectDistance.rotation->mul(rotatedQuery.data()); } #endif + void *selectiveQuery = rotatedQuery.data(); + NGT::ObjectSpace::ObjectType objectType = NGT::ObjectSpace::ObjectTypeNone; + switch(quantizer.property.localClusterDataType) { + case NGTQ::ClusterDataTypeSQSU8: + objectType = NGT::ObjectSpace::ObjectType::Qsuint8; break; + default: break; + } + uint8_t scalarQuantizedObject[rotatedQuery.size()]; + if (objectType != NGT::ObjectSpace::ObjectTypeNone) { + auto dimension = rotatedQuery.size(); + float sqobj[dimension]; + memcpy(sqobj, rotatedQuery.data(), dimension * sizeof(float)); + auto offset = getQuantizer().property.scalarQuantizationOffset; + auto scale = getQuantizer().property.scalarQuantizationScale; + NGT::ObjectSpace::quantizeToQint8(sqobj, dimension, scalarQuantizedObject, objectType, offset, scale); + selectiveQuery = scalarQuantizedObject; + } + std::unordered_map luts; size_t foundCount = 0; - size_t k = searchContainer.size; + size_t k = parameterSize; NGT::Distance radius = FLT_MAX; NGT::NeighborhoodGraph::ResultSet result; #ifdef NGTQBG_COARSE_BLOB NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 lookupTable; quantizedObjectDistance.initialize(lookupTable); #endif + //NGTQ::BooleanSet *checkedIDs = nullptr; + std::unique_ptr checkedIDs = nullptr; + if (quantizer.objectList.size() < 5000000) { + //checkedIDs = new NGTQ::BooleanVector(quantizer.objectList.size()); + std::unique_ptr tmp(new NGTQ::BooleanVector(quantizer.objectList.size())); + checkedIDs = std::move(tmp); + } else { + //checkedIDs = new NGTQ::BooleanHash(quantizer.objectList.size()); + std::unique_ptr tmp(new NGTQ::BooleanHash(quantizer.objectList.size())); + checkedIDs = std::move(tmp); + } for (size_t idx = 0; idx < blobs.size(); idx++) { #ifdef NGTQBG_COARSE_BLOB NGT::Distance blobDistance = std::numeric_limits::max(); @@ -1161,7 +1831,8 @@ namespace QBG { auto endIvtID = graphNodeToInvertedIndexEntries[graphNodeID] + 1; for (auto blobID = beginIvtID; blobID < endIvtID; blobID++) { auto subspaceID = quantizedBlobGraph[blobID].subspaceID; - quantizedObjectDistance.createDistanceLookup(rotatedQuery.data(), subspaceID, lookupTable); + //quantizedObjectDistance.createDistanceLookup(rotatedQuery.data(), subspaceID, lookupTable); + quantizedObjectDistance.createDistanceLookup(selectiveQuery, subspaceID, lookupTable); NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut = lookupTable; #else { @@ -1176,9 +1847,8 @@ namespace QBG { } NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut = (*luti).second; #endif - NGT::Distance bd; - std::tie(bd, radius) = judge(quantizedBlobGraph[blobID], k, radius, lut, result, foundCount); + std::tie(bd, radius) = judge(quantizedBlobGraph[blobID], k, radius, lut, result, foundCount, selectiveQuery, &checkedIDs); #ifdef NGTQBG_COARSE_BLOB if (bd < blobDistance) { blobDistance = bd; @@ -1190,19 +1860,20 @@ namespace QBG { #endif } if (searchContainer.resultIsAvailable()) { - if (searchContainer.exactResultSize > 0) { + if (parameterExactResultSize > 0) { NGT::ObjectDistances &qresults = searchContainer.getResult(); - refineDistances(searchContainer, quantizer, result, qresults); + refineDistances(quantizer, result, qresults, parameterExactResultSize, resizedQuery); } else { searchContainer.getResult().moveFrom(result); } } else { - if (searchContainer.exactResultSize > 0) { - refineDistances(searchContainer, quantizer, result, searchContainer.workingResult); + if (parameterExactResultSize > 0) { + refineDistances(quantizer, result, searchContainer.workingResult, parameterExactResultSize, resizedQuery); } else { searchContainer.workingResult = std::move(result); } } + deleteObject(query); } void searchInOneStep(QBG::SearchContainer &searchContainer) { @@ -1233,6 +1904,13 @@ namespace QBG { msg << "The specified index is not now searchable. "; NGTThrowException(msg); } + auto parameterSize = searchContainer.size; + auto parameterExactResultSize = searchContainer.size; + if (searchContainer.refinementExpansion >= 1.0) { + parameterSize *= searchContainer.refinementExpansion; + } else { + parameterExactResultSize = 0; + } auto &quantizer = getQuantizer(); auto &globalIndex = quantizer.globalCodebookIndex; @@ -1246,7 +1924,7 @@ namespace QBG { searchContainer.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT; } - const auto requestedSize = searchContainer.size; + const auto requestedSize = parameterSize; searchContainer.size = std::numeric_limits::max(); // setup edgeSize @@ -1280,7 +1958,19 @@ namespace QBG { size_t explorationSize = 1; auto &quantizedObjectDistance = quantizer.getQuantizedObjectDistance(); std::unordered_map luts; + auto dimension = getQuantizer().globalCodebookIndex.getObjectSpace().getDimension(); std::vector rotatedQuery = searchContainer.objectVector; + if (rotatedQuery.size() < dimension) { + if (rotatedQuery.size() == quantizer.property.genuineDimension || + rotatedQuery.size() + 1 == quantizer.property.genuineDimension) { + rotatedQuery.resize(dimension); + } + } + std::unique_ptr> resizedQuery = nullptr; + if (parameterExactResultSize > 0) { + std::unique_ptr> tmp(new std::vector(rotatedQuery)); + resizedQuery = std::move(tmp); + } quantizedObjectDistance.rotation->mul(rotatedQuery.data()); NGT::Distance radius = searchContainer.radius; if (requestedSize >= std::numeric_limits::max()) { @@ -1435,15 +2125,15 @@ namespace QBG { } if (searchContainer.resultIsAvailable()) { - if (searchContainer.exactResultSize > 0) { + if (parameterExactResultSize > 0) { NGT::ObjectDistances &qresults = searchContainer.getResult(); - refineDistances(searchContainer, quantizer, results, qresults); + refineDistances(quantizer, results, qresults, parameterExactResultSize, resizedQuery); } else { searchContainer.getResult().moveFrom(results); } } else { - if (searchContainer.exactResultSize > 0) { - refineDistances(searchContainer, quantizer, results, searchContainer.workingResult); + if (parameterExactResultSize > 0) { + refineDistances(quantizer, results, searchContainer.workingResult, parameterExactResultSize, resizedQuery); } else { searchContainer.workingResult = std::move(results); } @@ -1505,7 +2195,7 @@ namespace QBG { static void build(const std::string &indexPath, std::vector> &quantizerCodebook, std::vector &codebookIndex, - std::vector &objectIndex, + std::vector> &objectIndex, size_t beginID = 1, size_t endID = 0) { buildNGTQ(indexPath, quantizerCodebook, codebookIndex, objectIndex, beginID, endID); buildQBG(indexPath); @@ -1521,7 +2211,7 @@ namespace QBG { size_t beginID = 1, size_t endID = 0, bool verbose = false) { std::vector> quantizerCodebook; std::vector codebookIndex; - std::vector objectIndex; + std::vector> objectIndex; { std::string codebookPath = quantizerCodebookFile; if (codebookPath.empty()) { @@ -1586,23 +2276,38 @@ namespace QBG { objectIndexPath = QBG::Index::getObjectIndexFile(indexPath); } if (objectIndexPath != "-") { - std::ifstream stream(objectIndexPath); - if (!stream) { - std::stringstream msg; - msg << "Cannot open the codebook index. " << objectIndexPath; - NGTThrowException(msg); + { + std::ifstream stream(objectIndexPath); + if (!stream) { + std::stringstream msg; + msg << "Cannot open the codebook index. " << objectIndexPath; + NGTThrowException(msg); + } + size_t nOfObjs = 0; + std::string line; + while (getline(stream, line)) nOfObjs++; + objectIndex.resize(nOfObjs); } - std::string line; - while (getline(stream, line)) { - std::vector tokens; - NGT::Common::tokenize(line, tokens, " \t"); - std::vector object; - if (tokens.size() != 1) { + { + std::ifstream stream(objectIndexPath); + if (!stream) { std::stringstream msg; - msg << "The specified object index is invalid. " << line; + msg << "Cannot open the codebook index. " << objectIndexPath; NGTThrowException(msg); } - objectIndex.push_back(NGT::Common::strtol(tokens[0])); + std::string line; + size_t idx = 0; + while (getline(stream, line)) { + std::vector tokens; + NGT::Common::tokenize(line, tokens, " \t"); + if (tokens.size() > 0) { + objectIndex[idx].reserve(tokens.size()); + for (auto &token : tokens) { + objectIndex[idx].emplace_back(NGT::Common::strtol(token)); + } + } + idx++; + } } } } @@ -1612,7 +2317,7 @@ namespace QBG { static void buildNGTQ(const std::string &indexPath, std::vector> &quantizerCodebook, std::vector &codebookIndex, - std::vector &objectIndex, + std::vector> &objectIndex, size_t beginID = 1, size_t endID = 0, bool verbose = false) { NGT::StdOstreamRedirector redirector(!verbose); redirector.begin(); @@ -1634,34 +2339,49 @@ namespace QBG { size_t size = index.getQuantizer().objectList.size(); size = size == 0 ? 0 : size - 1; objectIndex.resize(size); + for (auto &list : objectIndex) { + list.emplace_back(0); + } } - index.createIndex(quantizerCodebook, codebookIndex, objectIndex, beginID, endID); + index.createIndex(codebookIndex, objectIndex, beginID, endID); } { + { + const std::string comcp = "cp -f " + QBG::Index::getBlobFile(indexPath) + " " + QBG::Index::getStoredBlobFile(indexPath); + if (system(comcp.c_str()) == -1) { + std::cerr << "Warning. cannot remove the blob. " + << comcp << std::endl; + } + } char *s = getenv("NGT_NOT_REMOVE_WORKSPACE"); if (s == 0) { - const string comrmdir = "rm -rf " + indexPath + "/" + getWorkspaceName(); + const std::string comrmdir = "rm -rf " + indexPath + "/" + getWorkspaceName(); if (system(comrmdir.c_str()) == -1) { std::cerr << "Warning. cannot remove the workspace directory. " << comrmdir << std::endl; } } - const string comrm = "rm -f " + indexPath + "/" + NGTQ::Quantizer::getInvertedIndexFile(); - if (system(comrm.c_str()) == -1) { - std::cerr << "Warning. cannot remove the indeverted index. " - << comrm << std::endl; + { + const std::string comrm = "rm -f " + indexPath + "/" + NGTQ::Quantizer::getInvertedIndexFile(); + if (system(comrm.c_str()) == -1) { + std::cerr << "Warning. cannot remove the inverted index. " + << comrm << std::endl; + } } } timer.stop(); + index.save(); + + QBG::Optimizer::extractScaleAndOffset(indexPath, -1.0, -1, verbose); + + redirector.end(); std::cerr << "NGTQ index is completed." << std::endl; std::cerr << " time=" << timer << std::endl; std::cerr << " vmsize=" << NGT::Common::getProcessVmSizeStr() << std::endl; std::cerr << " peak vmsize=" << NGT::Common::getProcessVmPeakStr() << std::endl; std::cerr << "saving..." << std::endl; - index.save(); - redirector.end(); } static void buildQBG(const std::string &indexPath, bool verbose = false) { @@ -1814,6 +2534,8 @@ namespace QBG { assert(threadSize != 0); size_t dataSize = 0; + NGTQ::Property property; + property.load(indexPath); { const char *ngtDirString = "/tmp/ngt-XXXXXX"; char ngtDir[strlen(ngtDirString) + 1]; @@ -1826,9 +2548,8 @@ namespace QBG { msg << "Error! moving is failed. " << mvcom; NGTThrowException(msg); } - - NGT::Index::append(tmpDir + "/" + NGTQ::Quantizer::getGlobalFile(), blobs, threadSize, dataSize); - + NGT::Index::appendFromTextObjectFile(tmpDir + "/" + NGTQ::Quantizer::getGlobalFile(), + blobs, dataSize); auto unlog = false; NGT::GraphOptimizer graphOptimizer(unlog); graphOptimizer.searchParameterOptimization = false; @@ -1852,9 +2573,11 @@ namespace QBG { std::cerr << "Warning. remove is failed. " << rmcom << std::endl; } } - NGTQ::Property property; - property.load(indexPath); + if (property.centroidCreationMode != NGTQ::CentroidCreationModeStaticLayer && + property.centroidCreationMode != NGTQ::CentroidCreationModeStatic) { + std::cerr << "Warning. Inspite of not static mode, load the local codebook." << std::endl; + } std::vector tokens; NGT::Common::tokenize(localCodebooks, tokens, "@"); if (tokens.size() != 2) { @@ -1868,7 +2591,8 @@ namespace QBG { std::cerr << data.str() << "->" << localCodebook.str() << std::endl; NGT::Index::append(localCodebook.str(), data.str(), threadSize, dataSize); } - + property.localCodebookState = true; + property.save(indexPath); #ifdef NGTQ_QBG std::vector> qCodebook; { @@ -1921,6 +2645,17 @@ namespace QBG { redirector.end(); } + static void setupObjects(std::string indexPath, size_t nOfObjects, bool verbose) { + NGTQ::Property property; + property.load(indexPath); + if (property.distanceType == NGTQ::DistanceType::DistanceTypeInnerProduct) { + Optimizer::convertObjectsFromInnerProductToL2(indexPath, nOfObjects, verbose); + } + if (property.distanceType == NGTQ::DistanceType::DistanceTypeNormalizedCosine) { + Optimizer::normalizeObjectsForCosine(indexPath, nOfObjects, verbose); + } + } + static const std::string getSubvectorPrefix() { return "sv"; } static const std::string getHierarchicalClusteringPrefix() { return "hkc"; } static const std::string getSecondCentroidSuffix() { return "_2c"; } @@ -1946,6 +2681,7 @@ namespace QBG { static const std::string getObjectIndexFile(std::string indexPath) { return getPrefix(indexPath) + getObjTo3rdSuffix(); } static const std::string getRotationFile(std::string indexPath) { return getPQFile(indexPath) + "/" + getRotationFile (); } + static const std::string getStoredBlobFile(std::string indexPath) { return indexPath + "/blbc"; } static const std::string getWorkspaceName() { return "ws"; } const std::string path; diff --git a/lib/NGT/NGTQ/QuantizedGraph.h b/lib/NGT/NGTQ/QuantizedGraph.h index 8a30ceb..f7bac30 100644 --- a/lib/NGT/NGTQ/QuantizedGraph.h +++ b/lib/NGT/NGTQ/QuantizedGraph.h @@ -66,7 +66,9 @@ namespace NGTQG { class QuantizedGraphRepository : public std::vector { typedef std::vector PARENT; public: - QuantizedGraphRepository(NGTQ::Index &quantizedIndex): numOfSubspaces(quantizedIndex.getQuantizer().property.localDivisionNo) {} + QuantizedGraphRepository(NGTQ::Index &quantizedIndex) : + quantizer(quantizedIndex.getQuantizer()), + numOfSubspaces(quantizedIndex.getQuantizer().property.localDivisionNo) {} ~QuantizedGraphRepository() {} void *get(size_t id) { @@ -86,6 +88,7 @@ namespace NGTQG { void construct(NGT::GraphRepository &graphRepository, NGTQ::Index &quantizedIndex, size_t maxNoOfEdges) { NGTQ::InvertedIndexEntry invertedIndexObjects(numOfSubspaces); quantizedIndex.getQuantizer().extractInvertedIndexObject(invertedIndexObjects); + std::cerr << "inverted index object size=" << invertedIndexObjects.size() << std::endl; quantizedIndex.getQuantizer().eraseInvertedIndexObject(); @@ -101,6 +104,7 @@ namespace NGTQG { size_t numOfEdges = node.size() < maxNoOfEdges ? node.size() : maxNoOfEdges; (*this)[id].ids.reserve(numOfEdges); NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedIndex.getQuantizer().divisionNo, numOfEdges); + std::cerr << "pass XX " << node.size() << ":" << invertedIndexObjects.size() << std::endl; #ifdef NGT_SHARED_MEMORY_ALLOCATOR for (auto i = node.begin(graphRepository.allocator); i != node.end(graphRepository.allocator); ++i) { if (distance(node.begin(graphRepository.allocator), i) >= static_cast(numOfEdges)) { @@ -140,7 +144,10 @@ namespace NGTQG { } void serialize(std::ofstream &os, NGT::ObjectSpace *objspace = 0) { +#ifdef NGT_IVI +#else NGTQ::QuantizedObjectProcessingStream quantizedObjectProcessingStream(numOfSubspaces); +#endif uint64_t n = numOfSubspaces; NGT::Serializer::write(os, n); n = PARENT::size(); @@ -149,14 +156,21 @@ namespace NGTQG { uint32_t sid = (*i).subspaceID; NGT::Serializer::write(os, sid); NGT::Serializer::write(os, (*i).ids); +#ifdef NGT_IVI + size_t streamSize = quantizer.getQuantizedObjectDistance().getSizeOfCluster((*i).ids.size()); +#else size_t streamSize = quantizedObjectProcessingStream.getUint4StreamSize((*i).ids.size()); +#endif NGT::Serializer::write(os, static_cast((*i).objects), streamSize); } } void deserialize(std::ifstream &is, NGT::ObjectSpace *objectspace = 0) { try { +#ifdef NGT_IVI +#else NGTQ::QuantizedObjectProcessingStream quantizedObjectProcessingStream(numOfSubspaces); +#endif uint64_t n; NGT::Serializer::read(is, n); numOfSubspaces = n; @@ -167,7 +181,11 @@ namespace NGTQG { NGT::Serializer::read(is, sid); (*i).subspaceID = sid; NGT::Serializer::read(is, (*i).ids); +#ifdef NGT_IVI + size_t streamSize = quantizer.getQuantizedObjectDistance().getSizeOfCluster((*i).ids.size()); +#else size_t streamSize = quantizedObjectProcessingStream.getUint4StreamSize((*i).ids.size()); +#endif uint8_t *objectStream = new uint8_t[streamSize]; NGT::Serializer::read(is, objectStream, streamSize); (*i).objects = objectStream; @@ -200,6 +218,7 @@ namespace NGTQG { deserialize(is); } + NGTQ::Quantizer &quantizer; size_t numOfSubspaces; }; @@ -451,7 +470,7 @@ namespace NGTQG { } if (dimension % dimensionOfSubvector != 0) { stringstream msg; - msg << "Quantizer::getNumOfSubvectors: dimensionOfSubvector is invalid. " << dimension << " : " << dimensionOfSubvector << std::endl; + msg << "Quantizer::getNumOfSubvectors: dimensionOfSubvector is invalid. " << dimension << " : " << dimensionOfSubvector; NGTThrowException(msg); } return dimension / dimensionOfSubvector; @@ -465,8 +484,9 @@ namespace NGTQG { struct stat st; std::string qgGraphPath(qgPath + "/grp"); if (stat(qgGraphPath.c_str(), &st) == 0) { - std::cerr << "already exists" << std::endl; - abort(); + stringstream msg; + msg << "Already exists. " << qgGraphPath; + NGTThrowException(msg); } else { NGT::GraphRepository graph; NGT::GraphIndex::loadGraph(indexPath, graph); diff --git a/lib/NGT/NGTQ/Quantizer.h b/lib/NGT/NGTQ/Quantizer.h index 4e925e7..aee5fd8 100644 --- a/lib/NGT/NGTQ/Quantizer.h +++ b/lib/NGT/NGTQ/Quantizer.h @@ -21,8 +21,11 @@ #include "NGT/Clustering.h" #include #include "NGT/NGTQ/ObjectFile.h" +#include "NGT/HashBasedBooleanSet.h" +#define NGT_IVI + #if defined(NGT_SHARED_MEMORY_ALLOCATOR) || defined(NGT_QBG_DISABLED) #undef NGTQ_QBG #else @@ -46,13 +49,13 @@ #define NGTQ_OBJECT_IN_MEMORY #define NGTQ_UINT8_LUT -#define NGTQ_SIMD_BLOCK_SIZE 16 -#define NGTQ_BATCH_SIZE 2 +#define NGTQ_SIMD_BLOCK_SIZE 16 +#define NGTQ_BATCH_SIZE 2 #define NGTQ_UINT4_OBJECT #define NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION #define NGTQG_PREFETCH #if defined(NGT_AVX512) -#define NGTQG_AVX512 +#define NGTQG_AVX512 #warning "AVX512 is available for NGTQG" #elif defined(NGT_AVX2) #define NGTQG_AVX2 @@ -66,7 +69,7 @@ #ifdef NGT_SHARED_MEMORY_ALLOCATOR -#define NGTQ_SHARED_INVERTED_INDEX +#define NGTQ_SHARED_INVERTED_INDEX #endif extern "C" { @@ -77,9 +80,35 @@ extern "C" { namespace NGTQ { +class BooleanSet { + public: + virtual bool get(size_t idx) = 0; + virtual void set(size_t idx) = 0; + virtual bool operator[](size_t idx) = 0; +}; + +class BooleanVector : public std::vector, public BooleanSet { + public: + BooleanVector(size_t s): std::vector(s, false){} + virtual ~BooleanVector() {} + bool get(size_t idx) { return BooleanVector::get(idx); } + void set(size_t idx) { std::vector::operator[](idx) = true; } + bool operator[](size_t idx) { return std::vector::operator[](idx); } +}; + +class BooleanHash : public HashBasedBooleanSet, public BooleanSet { + public: + BooleanHash(size_t s): HashBasedBooleanSet(s){} + virtual ~BooleanHash() {} + bool get(size_t idx) { return BooleanHash::get(idx); } + void set(size_t idx) { HashBasedBooleanSet::set(idx); } + bool operator[](size_t idx) { return HashBasedBooleanSet::operator[](idx); } +}; + class Rotation : public std::vector { typedef std::vector PARENT; public: + Rotation():dim(0) {} Rotation& operator=(const std::vector &r) { PARENT::operator=(r); dim = sqrt(PARENT::size()); @@ -167,9 +196,7 @@ class Rotation : public std::vector { std::cerr << "rotation::deserialize: Fatal inner error. Invalid data. " << dim << ":" << dim * dim << ":" << v << std::endl; abort(); } - is.read(reinterpret_cast(PARENT::data()), PARENT::size() * sizeof(float)); - } bool isIdentity() { @@ -466,8 +493,8 @@ class InvertedIndexObject { localID[i] = 0; } } - uint32_t id; - T localID[1]; + uint32_t id; + T localID[1]; }; template @@ -672,7 +699,7 @@ class LocalDatam { #else LocalDatam(size_t iii, size_t iil) : iiIdx(iii), iiLocalIdx(iil) {} #endif - size_t iiIdx; + size_t iiIdx; size_t iiLocalIdx; #ifdef NGTQ_QBG uint32_t subspaceID; @@ -686,12 +713,13 @@ class SerializableObject : public NGT::Object { }; enum DataType { - DataTypeUint8 = 0, - DataTypeFloat = 1 + DataTypeUint8 = ObjectFile::DataTypeUint8, + DataTypeFloat = ObjectFile::DataTypeFloat, #ifdef NGT_HALF_FLOAT - , - DataTypeFloat16 = 2 + DataTypeFloat16 = ObjectFile::DataTypeFloat16, #endif + DataTypeNone = ObjectFile::DataTypeNone, + DataTypeAny = 100 }; typedef NGT::ObjectSpace::DistanceType DistanceType; @@ -701,7 +729,7 @@ class SerializableObject : public NGT::Object { CentroidCreationModeStatic = 1, CentroidCreationModeDynamicKmeans = 2, CentroidCreationModeStaticLayer = 3, - CentroidCreationModeNone = 9 + CentroidCreationModeNone = 9 }; enum AggregationMode { @@ -717,6 +745,15 @@ class SerializableObject : public NGT::Object { QuantizerTypeQG = 1, QuantizerTypeQBG = 2 }; + + enum ClusterDataType { + ClusterDataTypeNone = 0, + ClusterDataTypeNQ = 1, + ClusterDataTypePQ4 = 2, + ClusterDataTypeSQSU8 = 4, + ClusterDataTypeSQU7T = 7, + ClusterDataTypeSQS8T = 9 + }; class Property { public: @@ -742,14 +779,20 @@ class SerializableObject : public NGT::Object { localCentroidCreationMode = CentroidCreationModeDynamic; localIDByteSize = 0; // finally decided by localCentroidLimit localCodebookState = false; // not completed - localClusteringSampleCoefficient = 10; + localClusteringSampleCoefficient = 10; quantizerType = QuantizerTypeNone; #ifdef NGTQ_OBJECT_IN_MEMORY - objectListOnMemory = false; + refinementDataType = DataTypeNone; #endif + localClusterDataType = NGTQ::ClusterDataTypePQ4; #ifdef NGT_SHARED_MEMORY_ALLOCATOR invertedIndexSharedMemorySize = 512; // MB #endif + scalarQuantizationScale = 0.0; + scalarQuantizationOffset = 0.0; + scalarQuantizationClippingRate = 0.01; + scalarQuantizationNoOfSamples = 0; + maxMagnitude = -1.0; } void save(const string &path) { @@ -772,16 +815,22 @@ class SerializableObject : public NGT::Object { prop.set("BatchSize", (long)batchSize); prop.set("CentroidCreationMode", (long)centroidCreationMode); prop.set("LocalCentroidCreationMode", (long)localCentroidCreationMode); - prop.set("LocalIDByteSize", (long)localIDByteSize); + prop.set("LocalIDByteSize", (long)localIDByteSize); prop.set("LocalCodebookState", (long)localCodebookState); prop.set("LocalSampleCoefficient", (long)localClusteringSampleCoefficient); prop.set("QuantizerType", (long)quantizerType); #ifdef NGTQ_OBJECT_IN_MEMORY - prop.set("ObjectListOnMemory", (long)objectListOnMemory); + prop.set("RefinementDataType", (long)refinementDataType); #endif + prop.set("LocalClusterDataType", (long)localClusterDataType); #ifdef NGT_SHARED_MEMORY_ALLOCATOR prop.set("InvertedIndexSharedMemorySize", (long)invertedIndexSharedMemorySize); #endif + prop.set("ScalarQuantizationScale", scalarQuantizationScale); + prop.set("ScalarQuantizationOffset", scalarQuantizationOffset); + prop.set("ScalarQuantizationClippingRate", scalarQuantizationClippingRate); + prop.set("ScalarQuantizationNoOfSamples", scalarQuantizationNoOfSamples); + prop.set("MaxMagnitude", maxMagnitude); prop.save(path + "/prf"); } @@ -835,16 +884,22 @@ class SerializableObject : public NGT::Object { setupLocalIDByteSize(); quantizerType = (QuantizerType)prop.getl("QuantizerType", quantizerType); #ifdef NGTQ_OBJECT_IN_MEMORY - objectListOnMemory = prop.getl("ObjectListOnMemory", objectListOnMemory); + refinementDataType = (DataType)prop.getl("RefinementDataType", refinementDataType); #endif + localClusterDataType = (ClusterDataType)prop.getl("LocalClusterDataType", localClusterDataType); #ifdef NGT_SHARED_MEMORY_ALLOCATOR invertedIndexSharedMemorySize = prop.getl("InvertedIndexSharedMemorySize", invertedIndexSharedMemorySize); #endif + scalarQuantizationScale = prop.getf("ScalarQuantizationScale", scalarQuantizationScale); + scalarQuantizationOffset = prop.getf("ScalarQuantizationOffset", scalarQuantizationOffset); + scalarQuantizationClippingRate = prop.getf("ScalarQuantizationClippingRate", scalarQuantizationClippingRate); + scalarQuantizationNoOfSamples = prop.getl("ScalarQuantizationNoOfSamples", scalarQuantizationNoOfSamples); + maxMagnitude = prop.getf("MaxMagnitude", maxMagnitude); } - void setup(const Property &p) { - *this = p; + size_t getDataSize() { + size_t dataSize = 0; #ifdef NGTQ_QBG switch (genuineDataType) { #else @@ -877,8 +932,21 @@ class SerializableObject : public NGT::Object { NGTThrowException("Quantizer constructor: Inner error. Invalid data type."); break; } + return dataSize; + } + + void setup(const Property &p) { + *this = p; setupLocalIDByteSize(); localDivisionNo = getLocalCodebookNo(); +#ifdef NGTQ_QBG + if (dimension == 0) { + dimension = genuineDimension; + } + if (dimension % 4 != 0) { + dimension = ((dimension - 1) / 4 + 1) * 4; + } +#endif } inline size_t getLocalCodebookNo() { return singleLocalCodebook ? 1 : localDivisionNo; } @@ -906,11 +974,17 @@ class SerializableObject : public NGT::Object { size_t localClusteringSampleCoefficient; QuantizerType quantizerType; #ifdef NGTQ_OBJECT_IN_MEMORY - bool objectListOnMemory; + DataType refinementDataType; #endif + ClusterDataType localClusterDataType; #ifdef NGT_SHARED_MEMORY_ALLOCATOR size_t invertedIndexSharedMemorySize; #endif + float scalarQuantizationScale; + float scalarQuantizationOffset; + float scalarQuantizationClippingRate; + size_t scalarQuantizationNoOfSamples; + float maxMagnitude; }; #ifdef NGTQ_DISTANCE_ANGLE @@ -924,230 +998,599 @@ class SerializableObject : public NGT::Object { }; #endif -class QuantizedObjectDistance { -public: - class DistanceLookupTable { - public: - DistanceLookupTable():localDistanceLookup(0) {} - ~DistanceLookupTable() { - if (localDistanceLookup != 0) { - delete[] localDistanceLookup; - localDistanceLookup = 0; - } - } - bool isValid(size_t idx) { -#ifdef NGTQ_QBG - std::cerr << "isValid() is not implemented" << std::endl; - abort(); -#else - return flag[idx]; -#endif - } -#ifndef NGTQ_DISTANCE_ANGLE - void set(size_t idx, double d) { -#ifndef NGTQ_QBG - flag[idx] = true; -#endif - localDistanceLookup[idx] = d; - } - double getDistance(size_t idx) { return localDistanceLookup[idx]; } -#endif - void initialize(size_t s) { - size = s; -#ifdef NGTQ_DISTANCE_ANGLE - localDistanceLookup = new LocalDistanceLookup[size]; -#else - localDistanceLookup = new float[size]; -#endif -#ifndef NGTQ_QBG - flag.resize(size, false); -#endif - } -#ifdef NGTQ_DISTANCE_ANGLE - LocalDistanceLookup *localDistanceLookup; -#else - float *localDistanceLookup; -#endif - size_t size; -#ifndef NGTQ_QBG - vector flag; -#endif - }; - - class DistanceLookupTableUint8 { - public: - DistanceLookupTableUint8():localDistanceLookup(0) {} - ~DistanceLookupTableUint8() { - if (localDistanceLookup != 0) { - delete[] localDistanceLookup; - localDistanceLookup = 0; - delete[] scales; - delete[] offsets; - } - } - void initialize(size_t numOfSubspaces, size_t localCodebookCentroidNo) { - size_t numOfAlignedSubvectors = ((numOfSubspaces - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE; - size = numOfAlignedSubvectors * localCodebookCentroidNo; - localDistanceLookup = new uint8_t[size]; - scales = new float[numOfAlignedSubvectors]; - offsets = new float[numOfAlignedSubvectors]; - range512 = (numOfSubspaces >> 2) * step512; - range256 = (((numOfSubspaces - 1) >> 1) + 1) * step256; - } - - uint8_t *localDistanceLookup; - size_t size; - size_t aslignedNumOfSubspaces; - size_t localCodebookCentroidNo; - float *scales; - float *offsets; - float totalOffset; - size_t range512; - size_t range256; - static constexpr size_t step512 = 32; - static constexpr size_t step256 = 16; - }; - QuantizedObjectDistance(){} - virtual ~QuantizedObjectDistance() { - delete[] localCentroids; - delete[] localCentroidsForSIMD; +class QuantizedObjectProcessingStream { + public: + QuantizedObjectProcessingStream(size_t numOfSubspaces, size_t nOfObjects) { + initialize(numOfSubspaces); + numOfObjects = nOfObjects; + setStreamSize(); + stream = new uint8_t[streamSize](); } - virtual double operator()(NGT::Object &object, size_t objectID, void *localID) = 0; + QuantizedObjectProcessingStream(size_t numOfSubspaces): stream(0) { + initialize(numOfSubspaces); + } - virtual double operator()(void *localID, DistanceLookupTable &distanceLUT) = 0; + ~QuantizedObjectProcessingStream() { + delete[] stream; + } -#ifdef NGTQBG_MIN - virtual float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) = 0; -#else - virtual void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) = 0; -#endif - virtual double operator()(NGT::Object &object, size_t objectID, void *localID, DistanceLookupTable &distanceLUT) = 0; + void initialize(size_t divisionNo) { + numOfAlignedSubvectors = ((divisionNo - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE; + alignedBlockSize = NGTQ_SIMD_BLOCK_SIZE * numOfAlignedSubvectors; + } - template - inline double getAngleDistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { - assert(globalCodebookIndex != 0); - NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator); -#else - unsigned char *gcptr = &gcentroid[0]; -#endif - unsigned char *optr = &((NGT::Object&)object)[0]; - double normA = 0.0F; - double normB = 0.0F; - double sum = 0.0F; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]); + static size_t getNumOfAlignedObjects(size_t numOfObjects) { + return (((numOfObjects - 1) / NGTQ_SIMD_BLOCK_SIZE + 1) * NGTQ_SIMD_BLOCK_SIZE); + } + + void setStreamSize() { + numOfAlignedObjects = getNumOfAlignedObjects(numOfObjects); + streamSize = numOfAlignedObjects * numOfAlignedSubvectors; + return; + } +#ifdef NGTQ_QBG + void arrangeQuantizedObject(size_t dataNo, size_t subvectorNo, uint8_t quantizedObject) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator); + abort(); #else - float *lcptr = (float*)&lcentroid[0]; + size_t blkNo = dataNo / NGTQ_SIMD_BLOCK_SIZE; + size_t oft = dataNo - blkNo * NGTQ_SIMD_BLOCK_SIZE; + stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft] = quantizedObject; #endif - float *lcendptr = lcptr + localDataSize; - while (lcptr != lcendptr) { - double a = *optr++; - double b = *gcptr++ + *lcptr++; - normA += a * a; - normB += b * b; - sum += a * b; - } - } - double cosine = sum / (sqrt(normA) * sqrt(normB)); - if (cosine >= 1.0F) { - return 0.0F; - } else if (cosine <= -1.0F) { - return acos(-1.0F); - } - return acos(cosine); } -#if defined(NGT_NO_AVX) - template - inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { - assert(globalCodebookIndex != 0); - NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator); -#else - unsigned char *gcptr = &gcentroid[0]; -#endif - unsigned char *optr = &((NGT::Object&)object)[0]; - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]); + void arrange(NGTQ::InvertedIndexEntry &invertedIndexObjects) { + for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) { + for (size_t idx = 0; idx < invertedIndexObjects.numOfSubvectors; idx++) { +#ifdef NGTQ_UINT8_LUT +#ifdef NGTQ_SIMD_BLOCK_SIZE #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator); + abort(); #else - float *lcptr = (float*)&lcentroid[0]; + arrangeQuantizedObject(oidx, idx, invertedIndexObjects[oidx].localID[idx] - 1); #endif - double d = 0.0; - float *lcendptr = lcptr + localDataSize; - while (lcptr != lcendptr) { - double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++; - d += sub * sub; - } - distance += d; - } - return sqrt(distance); - } -#else - template - inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { - assert(globalCodebookIndex != 0); - NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator); #else - unsigned char *gcptr = &gcentroid[0]; + objectData[idx * noobjs + oidx] = invertedIndexObjects[oidx].localID[idx] - 1; #endif - unsigned char *optr = &((NGT::Object&)object)[0]; - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator); #else - float *lcptr = (float*)&lcentroid[0]; + objectData[idx * noobjs + oidx] = invertedIndexObjects[oidx].localID[idx]; #endif - - float *lcendptr = lcptr + localDataSize - 3; - __m128 sum = _mm_setzero_ps(); - while (lcptr < lcendptr) { - __m128i x1 = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i const*)optr)); - __m128i x2 = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i const*)gcptr)); - x1 = _mm_sub_epi32(x1, x2); - __m128 sub = _mm_sub_ps(_mm_cvtepi32_ps(x1), _mm_loadu_ps(lcptr)); - sum = _mm_add_ps(sum, _mm_mul_ps(sub, sub)); - optr += 4; - gcptr += 4; - lcptr += 4; - } - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, sum); - double d = f[0] + f[1] + f[2] + f[3]; - while (lcptr < lcendptr) { - double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++; - d += sub * sub; } - distance += d; } - distance = sqrt(distance); - return distance; + } + + uint8_t getQuantizedObject(size_t dataNo, size_t subvectorNo) { + size_t blkNo = dataNo / NGTQ_SIMD_BLOCK_SIZE; + size_t oft = dataNo - blkNo * NGTQ_SIMD_BLOCK_SIZE; + return stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft]; } #endif - template + uint8_t* compressIntoUint4() { + size_t idx = 0; + size_t uint4StreamSize = streamSize / 2; + uint8_t *uint4Objects = new uint8_t[uint4StreamSize](); + while (idx < streamSize) { + for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) { + for (size_t bidx = 0; bidx < NGTQ_SIMD_BLOCK_SIZE; bidx++) { + if (idx / 2 > uint4StreamSize) { + std::stringstream msg; + msg << "Quantizer::compressIntoUint4: Fatal inner error! " << (idx / 2) << ":" << uint4StreamSize; + NGTThrowException(msg); + } + if (idx % 2 == 0) { + uint4Objects[idx / 2] = stream[idx]; + } else { + uint4Objects[idx / 2] |= (stream[idx] << 4); + } + idx++; + } + } + } + return uint4Objects; + } + + void uncompressFromUint4(uint8_t *uint4Objects) { + size_t idx = 0; + size_t uint4StreamSize = streamSize / 2; + while (idx < streamSize) { + for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) { + for (size_t bidx = 0; bidx < NGTQ_SIMD_BLOCK_SIZE; bidx++) { + if (idx / 2 > uint4StreamSize) { + std::stringstream msg; + msg << "Quantizer::uncompressFromUint4: Fatal inner error! " << (idx / 2) << ":" << uint4StreamSize; + NGTThrowException(msg); + } + if (idx % 2 == 0) { + stream[idx] = uint4Objects[idx / 2] & 0x0f; + } else { + stream[idx] = uint4Objects[idx / 2] >> 4; + } + idx++; + } + } + } + } + +#ifdef NGTQ_QBG + void restoreToInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects) { +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + std::cerr << "Not implemented." << std::endl; + abort(); +#else + invertedIndexObjects.resize(numOfAlignedObjects); + for (size_t oidx = 0; oidx < numOfAlignedObjects; oidx++) { + for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) { + invertedIndexObjects[oidx].localID[lidx] = getQuantizedObject(oidx, lidx) + 1; + } + } + invertedIndexObjects.resize(numOfObjects); +#endif + } +#endif + + uint8_t* getStream() { + auto s = stream; + stream = 0; + return s; + } + + size_t getUint4StreamSize(size_t nOfObjects) { + numOfObjects = nOfObjects; + setStreamSize(); + return streamSize / 2; + } + + size_t getStreamSize(size_t nOfObjects) { + numOfObjects = nOfObjects; + setStreamSize(); + return streamSize; + } + + uint8_t *stream; + size_t numOfAlignedSubvectors; + size_t alignedBlockSize; + size_t numOfAlignedObjects; + size_t numOfObjects; + size_t streamSize; +}; + +///// +class Quantizer; + +template +class ObjectProcessingStream { + public: + ObjectProcessingStream(size_t dimension, size_t nOfObjects, NGTQ::Quantizer &q): quantizer(q) { + initialize(dimension); + numOfObjects = nOfObjects; + setStreamSize(); + stream = new uint8_t[streamSize](); + } + + ObjectProcessingStream(size_t dim): quantizer(*reinterpret_cast(0)) { + initialize(dim); + stream = 0; + } + + ~ObjectProcessingStream() { + delete[] stream; + } + + void initialize(size_t dim) { + dimension = dim; + } + + void setStreamSize() { + streamSize = sizeof(TYPE) * dimension * numOfObjects; + return; + } + + void arrangeObject(size_t dataNo, void *object) { + if (dataNo >= numOfObjects) { + std::stringstream msg; + msg << "The data index is out of the range. " << dataNo << ":" << numOfObjects; + NGTThrowException(msg); + } +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + abort(); +#else + memcpy(stream + dataNo * dimension * sizeof(TYPE), object, dimension * sizeof(TYPE)); +#endif + } + + void arrange(NGTQ::InvertedIndexEntry &invertedIndexObjects); + + uint8_t* getStream() { + auto s = stream; + stream = 0; + return s; + } + + size_t getStreamSize(size_t nOfObjects) { + numOfObjects = nOfObjects; + setStreamSize(); + return streamSize; + } + + uint8_t *stream; + Quantizer &quantizer; + size_t dimension; + size_t numOfObjects ; + size_t streamSize; +}; + +class ScalarQuantizedInt8ObjectProcessingStream { + public: + ScalarQuantizedInt8ObjectProcessingStream(NGTQ::Quantizer &q): quantizer(q) {} + + ScalarQuantizedInt8ObjectProcessingStream(size_t dimension, size_t nOfObjects, + const std::type_info *tinfo, NGTQ::Quantizer &q): quantizer(q) { + initialize(dimension, nOfObjects, tinfo); + stream = new uint8_t[streamSize](); + } + + ScalarQuantizedInt8ObjectProcessingStream(size_t dim): quantizer(*reinterpret_cast(0)) { + initialize(dim, 0); + stream = 0; + } + + ~ScalarQuantizedInt8ObjectProcessingStream() { + delete[] stream; + } + + void initialize(size_t dim, size_t nOfObjects, const std::type_info *typeInfo = 0) { + dimension = dim; + numOfObjects = nOfObjects; + setStreamSize(); + dataTypeInfo = typeInfo; + } + + virtual void arrangeObject(size_t dataNo, std::vector &object, float scale, float offset, bool shift) { + if (dataNo >= numOfObjects) { + std::stringstream msg; + msg << "The data index is out of the range. " << dataNo << ":" << numOfObjects; + NGTThrowException(msg); + } + NGT::ObjectSpace::quantizeToQint8(object, *dataTypeInfo, dimension, offset, scale, shift); + for (size_t i = 0; i < object.size(); i++) { + *(stream + dataNo * dimension * sizeof(uint8_t) + i) = static_cast(object[i]); + } + } + + void arrange(NGTQ::InvertedIndexEntry &invertedIndexObjects); + + uint8_t* getStream() { + auto s = stream; + stream = 0; + return s; + } + + void setStreamSize() { + streamSize = sizeof(uint8_t) * dimension * numOfObjects; + return; + } + + size_t getStreamSize(size_t nOfObjects) { + numOfObjects = nOfObjects; + setStreamSize(); + return streamSize; + } + + uint8_t *stream; + Quantizer &quantizer; + size_t dimension; + size_t numOfObjects ; + size_t streamSize; + const std::type_info* dataTypeInfo; +}; + +class ScalarQuantizedUint8TransposedObjectProcessingStream : public ScalarQuantizedInt8ObjectProcessingStream { + public: + ScalarQuantizedUint8TransposedObjectProcessingStream(size_t dimension, size_t nOfObjects, NGTQ::Quantizer &q): + ScalarQuantizedInt8ObjectProcessingStream(q) { + initialize(dimension, nOfObjects); + stream = new uint8_t[streamSize](); + } + + ScalarQuantizedUint8TransposedObjectProcessingStream(size_t dim): + ScalarQuantizedInt8ObjectProcessingStream(*reinterpret_cast(0)) { + initialize(dim, 0); + stream = 0; + } + + void initialize(size_t dim, size_t nOfObjects) { + dimension = dim; + numOfObjects = nOfObjects; + smallBlockSize = 16; + blockSize = 64; + lineSize = blockSize * nOfObjects; + setStreamSize(); + } + + void arrangeObject(size_t dataNo, std::vector &object, float scale, float offset, bool shift) { + if (dataNo >= numOfObjects) { + std::stringstream msg; + msg << "The data index is out of the range. " << dataNo << ":" << numOfObjects; + NGTThrowException(msg); + } + NGT::ObjectSpace::quantizeToQint8(object, *dataTypeInfo, dimension, offset, scale, shift); + size_t endOfBlock = ((object.size() - 1) / blockSize) * blockSize; + for (size_t i = 0; i < endOfBlock; i++) { + size_t idx = lineSize * (i / blockSize) + blockSize * dataNo + i % blockSize; + if (idx >= streamSize) { + std::stringstream msg; + msg << "Fatal inner error! (1) " << idx << ":" << streamSize << " " + << dataNo << ":" << i << "/" << object.size(); + NGTThrowException(msg); + } + stream[idx] = static_cast(object[i]); + } + for (size_t i = endOfBlock; i < object.size(); i++) { + size_t idx = lineSize * (i / blockSize) + (i - endOfBlock) / smallBlockSize * (smallBlockSize * numOfObjects) + smallBlockSize * dataNo + (i - endOfBlock) % smallBlockSize; + if (idx >= streamSize) { + std::stringstream msg; + msg << "Fatal inner error! (2) " << idx << ":" << streamSize << " " + << dataNo << ":" << i << "/" << object.size(); + NGTThrowException(msg); + } + stream[idx] = static_cast(object[i]); + } + } + + void setStreamSize() { + streamSize = ((dimension - 1) / smallBlockSize + 1) * smallBlockSize * numOfObjects; + return; + } + + size_t getStreamSize(size_t nOfObjects) { + numOfObjects = nOfObjects; + setStreamSize(); + return streamSize; + } + size_t smallBlockSize; + size_t blockSize; + size_t lineSize; +}; +///// + +class QuantizedObjectDistance { +public: + class DistanceLookupTable { + public: + DistanceLookupTable():localDistanceLookup(0) {} + ~DistanceLookupTable() { + if (localDistanceLookup != 0) { + delete[] localDistanceLookup; + localDistanceLookup = 0; + } + } + bool isValid(size_t idx) { +#ifdef NGTQ_QBG + std::cerr << "isValid() is not implemented" << std::endl; + abort(); +#else + return flag[idx]; +#endif + } +#ifndef NGTQ_DISTANCE_ANGLE + void set(size_t idx, double d) { +#ifndef NGTQ_QBG + flag[idx] = true; +#endif + localDistanceLookup[idx] = d; + } + double getDistance(size_t idx) { return localDistanceLookup[idx]; } +#endif + void initialize(size_t s) { + size = s; +#ifdef NGTQ_DISTANCE_ANGLE + localDistanceLookup = new LocalDistanceLookup[size]; +#else + localDistanceLookup = new float[size]; +#endif +#ifndef NGTQ_QBG + flag.resize(size, false); +#endif + } + +#ifdef NGTQ_DISTANCE_ANGLE + LocalDistanceLookup *localDistanceLookup; +#else + float *localDistanceLookup; +#endif + size_t size; +#ifndef NGTQ_QBG + vector flag; +#endif + }; + + class DistanceLookupTableUint8 { + public: + DistanceLookupTableUint8():localDistanceLookup(0) {} + ~DistanceLookupTableUint8() { + if (localDistanceLookup != 0) { + delete[] localDistanceLookup; + localDistanceLookup = 0; + delete[] scales; + delete[] offsets; + } + } + void initialize(size_t numOfSubspaces, size_t localCodebookCentroidNo) { + size_t numOfAlignedSubvectors = ((numOfSubspaces - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE; + size = numOfAlignedSubvectors * localCodebookCentroidNo; + localDistanceLookup = new uint8_t[size]; + scales = new float[numOfAlignedSubvectors]; + offsets = new float[numOfAlignedSubvectors]; + range512 = (numOfSubspaces >> 2) * step512; + range256 = (((numOfSubspaces - 1) >> 1) + 1) * step256; + } + + uint8_t *localDistanceLookup; + size_t size; + size_t aslignedNumOfSubspaces; + size_t localCodebookCentroidNo; + float *scales; + float *offsets; + float totalOffset; + size_t range512; + size_t range256; + static constexpr size_t step512 = 32; + static constexpr size_t step256 = 16; + }; +#ifdef NGT_IVI + QuantizedObjectDistance(Quantizer &q): quantizer(q) {} + QuantizedObjectDistance(): quantizer(*reinterpret_cast(0)){} +#else + QuantizedObjectDistance(){} +#endif + + virtual ~QuantizedObjectDistance() { + delete[] localCentroids; + delete[] localCentroidsForSIMD; + } + + virtual double operator()(NGT::Object &object, size_t objectID, void *localID) = 0; + + virtual double operator()(void *localID, DistanceLookupTable &distanceLUT) = 0; + +#ifdef NGTQBG_MIN + virtual float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) = 0; +#else + virtual void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) = 0; +#endif +#ifdef NGTQBG_MIN + virtual float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) = 0; +#else + virtual void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) = 0; +#endif + virtual double operator()(NGT::Object &object, size_t objectID, void *localID, DistanceLookupTable &distanceLUT) = 0; + + template + inline double getAngleDistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { + assert(globalCodebookIndex != 0); + NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); + size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject(); + size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator); +#else + unsigned char *gcptr = &gcentroid[0]; +#endif + unsigned char *optr = &((NGT::Object&)object)[0]; + double normA = 0.0F; + double normB = 0.0F; + double sum = 0.0F; + for (size_t li = 0; li < localDivisionNo; li++) { + size_t idx = localCodebookNo == 1 ? 0 : li; + NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]); +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator); +#else + float *lcptr = (float*)&lcentroid[0]; +#endif + float *lcendptr = lcptr + localDataSize; + while (lcptr != lcendptr) { + double a = *optr++; + double b = *gcptr++ + *lcptr++; + normA += a * a; + normB += b * b; + sum += a * b; + } + } + double cosine = sum / (sqrt(normA) * sqrt(normB)); + if (cosine >= 1.0F) { + return 0.0F; + } else if (cosine <= -1.0F) { + return acos(-1.0F); + } + return acos(cosine); + } + +#if defined(NGT_NO_AVX) + template + inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { + assert(globalCodebookIndex != 0); + NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); + size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject(); + size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator); +#else + unsigned char *gcptr = &gcentroid[0]; +#endif + unsigned char *optr = &((NGT::Object&)object)[0]; + double distance = 0.0; + for (size_t li = 0; li < localDivisionNo; li++) { + size_t idx = localCodebookNo == 1 ? 0 : li; + NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]); +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator); +#else + float *lcptr = (float*)&lcentroid[0]; +#endif + double d = 0.0; + float *lcendptr = lcptr + localDataSize; + while (lcptr != lcendptr) { + double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++; + d += sub * sub; + } + distance += d; + } + return sqrt(distance); + } +#else + template + inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { + assert(globalCodebookIndex != 0); + NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); + size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject(); + size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + unsigned char *gcptr = &gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator); +#else + unsigned char *gcptr = &gcentroid[0]; +#endif + unsigned char *optr = &((NGT::Object&)object)[0]; + double distance = 0.0; + for (size_t li = 0; li < localDivisionNo; li++) { + size_t idx = localCodebookNo == 1 ? 0 : li; + NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]); +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator); +#else + float *lcptr = (float*)&lcentroid[0]; +#endif + + float *lcendptr = lcptr + localDataSize - 3; + __m128 sum = _mm_setzero_ps(); + while (lcptr < lcendptr) { + __m128i x1 = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i const*)optr)); + __m128i x2 = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i const*)gcptr)); + x1 = _mm_sub_epi32(x1, x2); + __m128 sub = _mm_sub_ps(_mm_cvtepi32_ps(x1), _mm_loadu_ps(lcptr)); + sum = _mm_add_ps(sum, _mm_mul_ps(sub, sub)); + optr += 4; + gcptr += 4; + lcptr += 4; + } + __attribute__((aligned(32))) float f[4]; + _mm_store_ps(f, sum); + double d = f[0] + f[1] + f[2] + f[3]; + while (lcptr < lcendptr) { + double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++; + d += sub * sub; + } + distance += d; + } + distance = sqrt(distance); + return distance; + } +#endif + + template inline double getAngleDistanceFloat(NGT::Object &object, size_t objectID, T localID[]) { assert(globalCodebookIndex != 0); NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); @@ -1235,8 +1678,8 @@ class QuantizedObjectDistance { dlu++; for (size_t k = 1; k < localCodebookCentroidNo; k++) { NGT::Object &lcentroid = (NGT::Object&)*localCodebookIndexes[li].getObjectSpace().getRepository().get(k); - float *lcptr = (float*)&lcentroid[0]; - float *lcendptr = lcptr + localDataSize; + float *lcptr = (float*)&lcentroid[0]; + float *lcendptr = lcptr + localDataSize; float *toptr = optr + oft; float *tgcptr = gcptr + oft; double normA = 0.0F; @@ -1255,11 +1698,13 @@ class QuantizedObjectDistance { } } #else - inline void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) { + + virtual void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) { void *objectPtr = &((NGT::Object&)object)[0]; createDistanceLookup(objectPtr, objectID, distanceLUT); } - inline void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTable &distanceLUT) { + + virtual void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTable &distanceLUT) { assert(globalCodebookIndex != 0); #ifdef NGTQ_QBG void *globalCentroid = quantizationCodebook->data(objectID); @@ -1288,7 +1733,7 @@ class QuantizedObjectDistance { lut++; lcptr += localDataSize; for (size_t k = 1; k < localCodebookCentroidNo; k++) { - float *lcendptr = lcptr + localDataSize; + float *lcendptr = lcptr + localDataSize; float *toptr = optr + oft; float *tgcptr = gcptr + oft; float d = 0.0; @@ -1563,7 +2008,7 @@ class QuantizedObjectDistance { *lut++ = 0; lcptr += localDataSize; for (size_t k = 1; k < localCodebookCentroidNo; k++) { - float *lcendptr = lcptr + localDataSize; + float *lcendptr = lcptr + localDataSize; float *toptr = optr + oft; #if !defined(NGTQG_ZERO_GLOBAL) float *tgcptr = gcptr + oft; @@ -1584,11 +2029,11 @@ class QuantizedObjectDistance { } - inline void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTableUint8 &distanceLUT) { + virtual void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTableUint8 &distanceLUT) { void *objectPtr = &((NGT::Object&)object)[0]; createDistanceLookup(objectPtr, objectID, distanceLUT); } - inline void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTableUint8 &distanceLUT) { + virtual void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTableUint8 &distanceLUT) { assert(globalCodebookIndex != 0); size_t sizeOfObject = dimension * sizeOfType; #ifdef NGTQG_DOT_PRODUCT @@ -1674,6 +2119,12 @@ class QuantizedObjectDistance { c.initialize(localCodebookNo, localCodebookCentroidNo); } + virtual uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects) = 0; + virtual void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects, + size_t numOfSubspaces, std::vector &ids, void *objects) = 0; + virtual size_t getNumOfAlignedObjects(size_t noOfObjects) = 0; + virtual size_t getSizeOfCluster(size_t noOfObjects) = 0; + NGT::Index *globalCodebookIndex; NGT::Index *localCodebookIndexes; size_t localDivisionNo; @@ -1686,12 +2137,16 @@ class QuantizedObjectDistance { vector globalCentroid; QuantizationCodebook *quantizationCodebook; - float *localCentroids; - float *localCentroidsForSIMD; + float *localCentroids; + float *localCentroidsForSIMD; size_t localCodebookCentroidNoSIMD; Rotation *rotation; + +#ifdef NGT_IVI + Quantizer &quantizer; +#endif }; template @@ -1733,7 +2188,7 @@ class QuantizedObjectDistanceUint8 : public QuantizedObjectDistance { for (size_t li = 0; li < localDivisionNo; li++) { distance += distanceLUT.getDistance(li * localCodebookCentroidNo + localID[li]); } - return sqrt(distance); + return sqrt(distance); } inline double operator()(NGT::Object &object, size_t objectID, void *l) { @@ -1774,16 +2229,48 @@ class QuantizedObjectDistanceUint8 : public QuantizedObjectDistance { distanceLUT.set(li * localCodebookCentroidNo + localID[li], d); } } - return sqrt(distance); + return sqrt(distance); + } +#ifdef NGTQBG_MIN + inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { +#else + inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { +#endif + cerr << "operator is not implemented" << endl; + abort(); } + #ifdef NGTQBG_MIN - inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) { + inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { #else - inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) { + inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { #endif cerr << "operator is not implemented" << endl; abort(); } + + uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects) { + NGTThrowException("Not implemented"); +#ifdef NGTQ_QBG + QuantizedObjectProcessingStream quantizedStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size()); + quantizedStream.arrange(invertedIndexObjects); + return quantizedStream.compressIntoUint4(); +#else + return 0; +#endif + } + void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects, + size_t numOfSubspaces, std::vector &ids, void *objects) { + NGTThrowException("Not implemented"); + } + size_t getNumOfAlignedObjects(size_t noOfObjects) { + abort(); + return 0; + } + size_t getSizeOfCluster(size_t noOfObjects) { + abort(); + return 0; + } #endif }; @@ -1831,7 +2318,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { localID++; lut += localCodebookCentroidNo; } - return sqrt(distance); + return sqrt(distance); } @@ -1849,24 +2336,158 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { b = _mm256_max_ps(b, mask); data = _mm256_min_ps(data, b); } - - data = _mm256_min_ps(data, (__m256)_mm256_permute4x64_epi64((__m256i)data, _MM_SHUFFLE(3, 2, 3, 2))); - data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 8)); - data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 4)); - - return data[0]; + + data = _mm256_min_ps(data, (__m256)_mm256_permute4x64_epi64((__m256i)data, _MM_SHUFFLE(3, 2, 3, 2))); + data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 8)); + data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 4)); + + return data[0]; + } +#endif + +#if defined(NGTQG_AVX512) || defined(NGTQG_AVX2) +#if defined(NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION) +#ifdef NGTQBG_MIN + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { +#else + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { +#endif + + + uint8_t *localID = static_cast(inv); + float *d = distances; +#ifdef NGTQBG_MIN + float *lastd = distances + noOfObjects; + float min = std::numeric_limits::max(); +#endif +#if defined(NGTQG_AVX512) + const __m512i mask512x0F = _mm512_set1_epi16(0x000f); + const __m512i mask512xF0 = _mm512_set1_epi16(0x00f0); + const size_t range512 = distanceLUT.range512; + auto step512 = distanceLUT.step512; +#endif + const __m256i mask256x0F = _mm256_set1_epi16(0x000f); + const __m256i mask256xF0 = _mm256_set1_epi16(0x00f0); + const size_t range256 = distanceLUT.range256; + auto step256 = distanceLUT.step256; + auto *last = localID + range256 / NGTQ_SIMD_BLOCK_SIZE * noOfObjects; + while (localID < last) { + uint8_t *lut = distanceLUT.localDistanceLookup; + auto *lastgroup256 = localID + range256; +#if defined(NGTQG_AVX512) + __m512i depu16 = _mm512_setzero_si512(); + auto *lastgroup512 = localID + range512; + while (localID < lastgroup512) { + __m512i lookupTable = _mm512_loadu_si512((__m512i const*)lut); + _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); + __m512i packedobj = _mm512_cvtepu8_epi16(_mm256_loadu_si256((__m256i const*)&localID[0])); + __m512i lo = _mm512_and_si512(packedobj, mask512x0F); + __m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4); + __m512i obj = _mm512_or_si512(lo, hi); + __m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj); + depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 0))); + depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 1))); + lut += (localCodebookCentroidNo - 1) * 4; + localID += step512; + } +#else + __m256i depu16l = _mm256_setzero_si256(); + __m256i depu16h = _mm256_setzero_si256(); +#endif + while (localID < lastgroup256) { + __m256i lookupTable = _mm256_loadu_si256((__m256i const*)lut); + _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); + __m256i packedobj = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)&localID[0])); + __m256i lo = _mm256_and_si256(packedobj, mask256x0F); + __m256i hi = _mm256_slli_epi16(_mm256_and_si256(packedobj, mask256xF0), 4); + __m256i obj = _mm256_or_si256(lo, hi); + __m256i vtmp = _mm256_shuffle_epi8(lookupTable, obj); + +#if defined(NGTQG_AVX512) + depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(vtmp)); +#else + depu16l = _mm256_adds_epu16(depu16l, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 0))); + depu16h = _mm256_adds_epu16(depu16h, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 1))); +#endif + lut += (localCodebookCentroidNo - 1) * 2; + localID += step256; + } +#if defined(NGTQG_AVX512) + __m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0)); + __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1)); + + __m512 distance = _mm512_cvtepi32_ps(_mm512_add_epi32(lo, hi)); + __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); + distance = _mm512_mul_ps(distance, scale); + distance = _mm512_add_ps(distance, _mm512_set1_ps(distanceLUT.totalOffset)); +#if defined(NGTQG_DOT_PRODUCT) + float one = 1.0; + float two = 2.0; + distance = _mm512_mul_ps(_mm512_sub_ps(_mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distance), _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); +#endif + distance = _mm512_sqrt_ps(distance); + _mm512_storeu_ps(d, distance); +#ifdef NGTQBG_MIN + { + float tmpmin; + int rest = 16 - (lastd - d); + if (rest > 0) { + __mmask16 mask = 0xffff; + mask >>= rest; + tmpmin = _mm512_mask_reduce_min_ps(mask, distance); + } else { + tmpmin = _mm512_reduce_min_ps(distance); + } + //std::cerr << "tmpmin=" << tmpmin << std::endl; + if (min > tmpmin) min = tmpmin; + } +#endif +#else + __m256i lol = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 0)); + __m256i loh = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 1)); + __m256i hil = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 0)); + __m256i hih = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 1)); + __m256 distancel = _mm256_cvtepi32_ps(_mm256_add_epi32(lol, hil)); + __m256 distanceh = _mm256_cvtepi32_ps(_mm256_add_epi32(loh, hih)); + __m256 scalel = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); + __m256 scaleh = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); + distancel = _mm256_mul_ps(distancel, scalel); + distancel = _mm256_add_ps(distancel, _mm256_set1_ps(distanceLUT.totalOffset)); + distanceh = _mm256_mul_ps(distanceh, scaleh); + distanceh = _mm256_add_ps(distanceh, _mm256_set1_ps(distanceLUT.totalOffset)); +#if defined(NGTQG_DOT_PRODUCT) + float one = 1.0; + float two = 2.0; + distancel = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distancel), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); + distanceh = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distanceh), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); +#endif + distancel = _mm256_sqrt_ps(distancel); + distanceh = _mm256_sqrt_ps(distanceh); + _mm256_storeu_ps(d, distancel); + _mm256_storeu_ps(d + 8, distanceh); +#ifdef NGTQBG_MIN + { + float tmpmin = horizontalMin(distancel, distanceh, lastd - d); + if (min > tmpmin) min = tmpmin; + } +#endif +#endif + d += 16; + } +#ifdef NGTQBG_MIN + return min; +#endif } +#else /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION //////////////////////////////////////// +#ifndef NGT_AVX512 +#error "AVX512 is *NOT* defined. *INDIVIDUAL* scale offset compression is available only for AVX512!" #endif - -#if defined(NGTQG_AVX512) || defined(NGTQG_AVX2) -#if defined(NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION) #ifdef NGTQBG_MIN - inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) { + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { #else - inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) { + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { #endif - uint8_t *localID = static_cast(inv); float *d = distances; #ifdef NGTQBG_MIN @@ -1874,8 +2495,8 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { float min = std::numeric_limits::max(); #endif #if defined(NGTQG_AVX512) - const __m512i mask512x0F = _mm512_set1_epi16(0x000f); - const __m512i mask512xF0 = _mm512_set1_epi16(0x00f0); + __m512i mask512x0F = _mm512_set1_epi16(0x000f); + __m512i mask512xF0 = _mm512_set1_epi16(0x00f0); const size_t range512 = distanceLUT.range512; auto step512 = distanceLUT.step512; #endif @@ -1886,9 +2507,11 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { auto *last = localID + range256 / NGTQ_SIMD_BLOCK_SIZE * noOfObjects; while (localID < last) { uint8_t *lut = distanceLUT.localDistanceLookup; + float *scales = distanceLUT.scales; auto *lastgroup256 = localID + range256; + __m512 distance = _mm512_setzero_ps(); #if defined(NGTQG_AVX512) - __m512i depu16 = _mm512_setzero_si512(); + //__m512i depu16 = _mm512_setzero_si512(); auto *lastgroup512 = localID + range512; while (localID < lastgroup512) { __m512i lookupTable = _mm512_loadu_si512((__m512i const*)lut); @@ -1898,9 +2521,22 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { __m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4); __m512i obj = _mm512_or_si512(lo, hi); __m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj); - depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 0))); - depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 1))); + + __m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 0))); + __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0])); + distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); + d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 1))); + scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1])); + distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); + d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 2))); + scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[2])); + distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); + d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 3))); + scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[3])); + distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); + lut += (localCodebookCentroidNo - 1) * 4; + scales += 4; localID += step512; } #else @@ -1910,28 +2546,36 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { while (localID < lastgroup256) { __m256i lookupTable = _mm256_loadu_si256((__m256i const*)lut); _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); + //std::cerr << "obj=" << (int)(localID[0] & 0x0f) << "," << (int)((localID[0] >> 4) & 0x0f) << std::endl; __m256i packedobj = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)&localID[0])); __m256i lo = _mm256_and_si256(packedobj, mask256x0F); __m256i hi = _mm256_slli_epi16(_mm256_and_si256(packedobj, mask256xF0), 4); __m256i obj = _mm256_or_si256(lo, hi); + //std::cerr << "LUT=" << (int)*lut << "," << (int)*(lut+1) << std::endl; __m256i vtmp = _mm256_shuffle_epi8(lookupTable, obj); #if defined(NGTQG_AVX512) - depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(vtmp)); + __m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm256_extracti32x4_epi32(vtmp, 0))); + __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0])); + distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); + d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm256_extracti32x4_epi32(vtmp, 1))); + scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1])); + distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); + //////////////////// #else depu16l = _mm256_adds_epu16(depu16l, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 0))); depu16h = _mm256_adds_epu16(depu16h, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 1))); #endif lut += (localCodebookCentroidNo - 1) * 2; + scales += 2; localID += step256; } -#if defined(NGTQG_AVX512) - __m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0)); - __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1)); - __m512 distance = _mm512_cvtepi32_ps(_mm512_add_epi32(lo, hi)); - __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); - distance = _mm512_mul_ps(distance, scale); +#if defined(NGTQG_AVX512) + //__m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0)); + //__m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1)); + //__m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); + //distance = _mm512_mul_ps(distance, scale); distance = _mm512_add_ps(distance, _mm512_set1_ps(distanceLUT.totalOffset)); #if defined(NGTQG_DOT_PRODUCT) float one = 1.0; @@ -1962,6 +2606,9 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { __m256i hih = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 1)); __m256 distancel = _mm256_cvtepi32_ps(_mm256_add_epi32(lol, hil)); __m256 distanceh = _mm256_cvtepi32_ps(_mm256_add_epi32(loh, hih)); + __attribute__((aligned(32))) float v32[8]; + _mm256_storeu_ps((float*)&v32, distancel); + _mm256_storeu_ps((float*)&v32, distanceh); __m256 scalel = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); __m256 scaleh = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); distancel = _mm256_mul_ps(distancel, scalel); @@ -1974,295 +2621,584 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { distancel = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distancel), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); distanceh = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distanceh), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); #endif - distancel = _mm256_sqrt_ps(distancel); - distanceh = _mm256_sqrt_ps(distanceh); - _mm256_storeu_ps(d, distancel); - _mm256_storeu_ps(d + 8, distanceh); + distancel = _mm256_sqrt_ps(distancel); + distanceh = _mm256_sqrt_ps(distanceh); + _mm256_storeu_ps(d, distancel); + _mm256_storeu_ps(d + 8, distanceh); +#endif + d += 16; + } +#ifdef NGTQBG_MIN + return min; +#endif + } +#endif /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION //////////////////////////////////////// + +#else +#ifdef NGTQBG_MIN + inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { +#else + inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { +#endif + uint8_t *localID = static_cast(inv); +#ifdef NGTQBG_MIN + float min = std::numeric_limits::max(); +#endif + size_t numOfAlignedSubvectors = ((localDivisionNo - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE; + size_t alignedSize = ((size - 1) / 2 + 1) * 2; + uint32_t d[NGTQ_SIMD_BLOCK_SIZE]; + size_t didx = 0; + size_t byteSize = numOfAlignedSubvectors * alignedSize / 2; + auto *last = localID + byteSize; + while (localID < last) { + uint8_t *lut = distanceLUT.localDistanceLookup; + memset(d, 0, sizeof(uint32_t) * NGTQ_SIMD_BLOCK_SIZE); + for (size_t li = 0; li < numOfAlignedSubvectors; li++) { + for (size_t i = 0; i < NGTQ_SIMD_BLOCK_SIZE; i++) { + uint8_t obj = *localID; + if (i % 2 == 0) { + obj &= 0x0f; + } else { + obj >>= 4; + localID++; + } + d[i] += *(lut + obj); + } + lut += localCodebookCentroidNo - 1; + } + for (size_t i = 0; i < NGTQ_SIMD_BLOCK_SIZE; i++) { + distances[didx + i] = sqrt(static_cast(d[i]) * distanceLUT.scales[0] + distanceLUT.totalOffset); +#ifdef NGTQBG_MIN + if (min > distances[didx + i]) { + min = distances[didx + i]; + } +#endif + } + didx += NGTQ_SIMD_BLOCK_SIZE; + } +#ifdef NGTQBG_MIN + return min; +#endif + } +#endif + + +#ifdef NGTQBG_MIN + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { + return 0.0; +#else + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { +#endif + } + + inline double operator()(NGT::Object &object, size_t objectID, void *l) { + return getL2DistanceFloat(object, objectID, static_cast(l)); + } + inline double operator()(NGT::Object &object, size_t objectID, void *l, DistanceLookupTable &distanceLUT) { + T *localID = static_cast(l); + NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + float *gcptr = (float*)&gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator); +#else + float *gcptr = (float*)&gcentroid[0]; +#endif + float *optr = (float*)&((NGT::Object&)object)[0]; + double distance = 0.0; + for (size_t li = 0; li < localDivisionNo; li++) { + size_t distanceLUTidx = li * localCodebookCentroidNo + localID[li]; + if (distanceLUT.isValid(distanceLUTidx)) { + distance += distanceLUT.getDistance(distanceLUTidx); + optr += localDataSize; + gcptr += localDataSize; + } else { + size_t idx = li; + NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]); +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) + float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator); +#else + float *lcptr = (float*)&lcentroid[0]; +#endif +#if defined(NGTQG_AVX512) || defined(NGTQG_AVX2) + float *lcendptr = lcptr + localDataSize; + __m256 sum256 = _mm256_setzero_ps(); + __m256 v; + while (lcptr < lcendptr) { + v = _mm256_sub_ps(_mm256_sub_ps(_mm256_loadu_ps(optr), _mm256_loadu_ps(gcptr)), _mm256_loadu_ps(lcptr)); + sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v, v)); + optr += 8; + gcptr += 8; + lcptr += 8; + } + __m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); + __attribute__((aligned(32))) float f[4]; + _mm_store_ps(f, sum128); + double d = f[0] + f[1] + f[2] + f[3]; +#else + float *lcendptr = lcptr + localDataSize; + double d = 0.0; + while (lcptr != lcendptr) { + double sub = (*optr++ - *gcptr++) - *lcptr++; + d += sub * sub; + } +#endif + distance += d; + } + } + return sqrt(distance); + } + + uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects) { + if (invertedIndexObjects.numOfSubvectors != localDivisionNo) { + std::stringstream msg; + msg << "Internal fatal error. Invalid # of subvectos. " << invertedIndexObjects.numOfSubvectors << ":" << localDivisionNo; + NGTThrowException(msg); + } +#ifdef NGTQ_QBG + QuantizedObjectProcessingStream quantizedStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size()); + quantizedStream.arrange(invertedIndexObjects); + return quantizedStream.compressIntoUint4(); +#else + return 0; +#endif + } + void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects, + size_t numOfSubspaces, std::vector &ids, void *objects) { +#ifdef NGTQ_QBG + NGTQ::QuantizedObjectProcessingStream quantizedStream(numOfSubspaces, ids.size()); + quantizedStream.uncompressFromUint4(static_cast(objects)); + invertedIndexObjects.initialize(numOfSubspaces); + quantizedStream.restoreToInvertedIndex(invertedIndexObjects); +#endif + } + size_t getNumOfAlignedObjects(size_t noOfObjects) { + return QuantizedObjectProcessingStream::getNumOfAlignedObjects(noOfObjects); + } + size_t getSizeOfCluster(size_t noOfObjects) { + QuantizedObjectProcessingStream quantizedStream(localDivisionNo); + return quantizedStream.getUint4StreamSize(noOfObjects);; + } +#endif + +}; + +class NonLocalQuantizedObjectDistance : public QuantizedObjectDistance { + public: + NonLocalQuantizedObjectDistance(Quantizer &q):QuantizedObjectDistance(q){} + virtual void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects, + size_t numOfSubspaces, std::vector &ids, void *objects) { +#ifdef NGTQ_QBG + invertedIndexObjects.initialize(0); + for (auto &id : ids) { + NGTQ::QuantizedObject quantizedObject; + quantizedObject.objectID = id; + invertedIndexObjects.pushBack(id, quantizedObject); + } +#endif + } +}; + +#ifdef NGT_IVI + /////////////////////////////////////////////// +template +class NonQuantizedObjectDistance : public NonLocalQuantizedObjectDistance { +public: + NonQuantizedObjectDistance(Quantizer &q):NonLocalQuantizedObjectDistance(q){} + inline double operator()(void *l, DistanceLookupTable &distanceLUT) { + return 0.0; + } + + ///-/ 近似距離計算 ///////////////////////////// +#ifdef NGTQBG_MIN + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { +#else + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query = 0) { +#endif #ifdef NGTQBG_MIN - { - float tmpmin = horizontalMin(distancel, distanceh, lastd - d); - if (min > tmpmin) min = tmpmin; - } + float min = std::numeric_limits::max(); #endif + for (size_t idx = 0; idx < noOfObjects; idx++) { + distances[idx] = NGT::PrimitiveComparator::compareL2(static_cast(query), + static_cast(inv) + (dimension * idx), + dimension); +#ifdef NGTQBG_MIN + if (distances[idx] < min) min = distances[idx]; #endif - d += 16; } #ifdef NGTQBG_MIN return min; #endif } -#else /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION //////////////////////////////////////// -#ifndef NGT_AVX512 -#error "AVX512 is *NOT* defined. *INDIVIDUAL* scale offset compression is available only for AVX512!" -#endif #ifdef NGTQBG_MIN - inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) { + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { + return 0.0; #else - inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) { + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { #endif + } - uint8_t *localID = static_cast(inv); - float *d = distances; -#ifdef NGTQBG_MIN - float *lastd = distances + noOfObjects; - float min = std::numeric_limits::max(); -#endif -#if defined(NGTQG_AVX512) - __m512i mask512x0F = _mm512_set1_epi16(0x000f); - __m512i mask512xF0 = _mm512_set1_epi16(0x00f0); - const size_t range512 = distanceLUT.range512; - auto step512 = distanceLUT.step512; -#endif - const __m256i mask256x0F = _mm256_set1_epi16(0x000f); - const __m256i mask256xF0 = _mm256_set1_epi16(0x00f0); - const size_t range256 = distanceLUT.range256; - auto step256 = distanceLUT.step256; - auto *last = localID + range256 / NGTQ_SIMD_BLOCK_SIZE * noOfObjects; - while (localID < last) { - uint8_t *lut = distanceLUT.localDistanceLookup; - float *scales = distanceLUT.scales; - auto *lastgroup256 = localID + range256; - __m512 distance = _mm512_setzero_ps(); -#if defined(NGTQG_AVX512) - //__m512i depu16 = _mm512_setzero_si512(); - auto *lastgroup512 = localID + range512; - while (localID < lastgroup512) { - __m512i lookupTable = _mm512_loadu_si512((__m512i const*)lut); - _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); - __m512i packedobj = _mm512_cvtepu8_epi16(_mm256_loadu_si256((__m256i const*)&localID[0])); - __m512i lo = _mm512_and_si512(packedobj, mask512x0F); - __m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4); - __m512i obj = _mm512_or_si512(lo, hi); - __m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj); + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTable &distanceLUT) { + NGTThrowException("Not implemented."); + return 0.0; + } - __m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 0))); - __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0])); - distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); - d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 1))); - scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1])); - distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); - d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 2))); - scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[2])); - distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); - d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 3))); - scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[3])); - distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); + inline double operator()(NGT::Object &object, size_t objectID, void *l) { + NGTThrowException("Not implemented."); + return getL2DistanceFloat(object, objectID, static_cast(l)); + } + inline double operator()(NGT::Object &object, size_t objectID, void *l, DistanceLookupTable &distanceLUT) { + NGTThrowException("Not implemented."); + return 0.0; + } + void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) { + void *objectPtr = &((NGT::Object&)object)[0]; + createDistanceLookup(objectPtr, objectID, distanceLUT); + } - lut += (localCodebookCentroidNo - 1) * 4; - scales += 4; - localID += step512; - } -#else - __m256i depu16l = _mm256_setzero_si256(); - __m256i depu16h = _mm256_setzero_si256(); + void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTable &distanceLUT) { + } + + void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTableUint8 &distanceLUT) { + } + + uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects) { + if (invertedIndexObjects.numOfSubvectors != localDivisionNo) { + std::stringstream msg; + msg << "Internal fatal error. Invalid # of subvectos. " << invertedIndexObjects.numOfSubvectors << ":" << localDivisionNo; + NGTThrowException(msg); + } + ObjectProcessingStream processingStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size(), quantizer); + processingStream.arrange(invertedIndexObjects); + return processingStream.getStream(); + } + void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects, + size_t numOfSubspaces, std::vector &ids, void *objects) { +#ifdef NGTQ_QBG + invertedIndexObjects.initialize(0); + for (auto id : ids) { + NGTQ::QuantizedObject quantizedObject; + quantizedObject.objectID = id; + invertedIndexObjects.pushBack(id, quantizedObject); + } #endif - while (localID < lastgroup256) { - __m256i lookupTable = _mm256_loadu_si256((__m256i const*)lut); - _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); - //std::cerr << "obj=" << (int)(localID[0] & 0x0f) << "," << (int)((localID[0] >> 4) & 0x0f) << std::endl; - __m256i packedobj = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)&localID[0])); - __m256i lo = _mm256_and_si256(packedobj, mask256x0F); - __m256i hi = _mm256_slli_epi16(_mm256_and_si256(packedobj, mask256xF0), 4); - __m256i obj = _mm256_or_si256(lo, hi); - //std::cerr << "LUT=" << (int)*lut << "," << (int)*(lut+1) << std::endl; - __m256i vtmp = _mm256_shuffle_epi8(lookupTable, obj); + } + size_t getNumOfAlignedObjects(size_t noOfObjects) { return noOfObjects; } + size_t getSizeOfCluster(size_t noOfObjects) { + ObjectProcessingStream processingStream(localDivisionNo); + return processingStream.getStreamSize(noOfObjects);; + } +}; -#if defined(NGTQG_AVX512) - __m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm256_extracti32x4_epi32(vtmp, 0))); - __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0])); - distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); - d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm256_extracti32x4_epi32(vtmp, 1))); - scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1])); - distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); - //////////////////// +template +class ScalarQuantizedInt8ObjectDistance : public NonLocalQuantizedObjectDistance { + public: + ScalarQuantizedInt8ObjectDistance(Quantizer &q, DistanceType dt):NonLocalQuantizedObjectDistance(q) { + setCompareFunction(dt); + } + ~ScalarQuantizedInt8ObjectDistance() {} + + inline double operator()(void *l, DistanceLookupTable &distanceLUT) { + return 0.0; + } + + ///-/ 近似距離計算 ///////////////////////////// +#ifdef NGTQBG_MIN + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) { #else - depu16l = _mm256_adds_epu16(depu16l, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 0))); - depu16h = _mm256_adds_epu16(depu16h, _mm256_cvtepu8_epi16(_mm256_extractf128_si256(vtmp, 1))); + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) { #endif - lut += (localCodebookCentroidNo - 1) * 2; - scales += 2; - localID += step256; + if (query == 0) { + NGTThrowException("Fatal inner error! The specified query is invalid.."); + } +#ifdef NGTQBG_MIN + float min = std::numeric_limits::max(); +#endif + for (size_t idx = 0; idx < noOfObjects; idx++) { + if (distances[idx] != 0.0) { + distances[idx] = std::numeric_limits::max(); + continue; } - -#if defined(NGTQG_AVX512) - //__m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0)); - //__m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1)); - //__m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); - //distance = _mm512_mul_ps(distance, scale); - distance = _mm512_add_ps(distance, _mm512_set1_ps(distanceLUT.totalOffset)); -#if defined(NGTQG_DOT_PRODUCT) - float one = 1.0; - float two = 2.0; - distance = _mm512_mul_ps(_mm512_sub_ps(_mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distance), _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); + distances[idx] = compare(query, static_cast(inv) + (dimension * idx), dimension); +#ifdef NGTQBG_MIN + if (distances[idx] < min) min = distances[idx]; #endif - distance = _mm512_sqrt_ps(distance); - _mm512_storeu_ps(d, distance); + } #ifdef NGTQBG_MIN - { - float tmpmin; - int rest = 16 - (lastd - d); - if (rest > 0) { - __mmask16 mask = 0xffff; - mask >>= rest; - tmpmin = _mm512_mask_reduce_min_ps(mask, distance); - } else { - tmpmin = _mm512_reduce_min_ps(distance); - } - //std::cerr << "tmpmin=" << tmpmin << std::endl; - if (min > tmpmin) min = tmpmin; - } + return min; #endif + } + +#ifdef NGTQBG_MIN + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { #else - __m256i lol = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 0)); - __m256i loh = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 1)); - __m256i hil = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 0)); - __m256i hih = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 1)); - __m256 distancel = _mm256_cvtepi32_ps(_mm256_add_epi32(lol, hil)); - __m256 distanceh = _mm256_cvtepi32_ps(_mm256_add_epi32(loh, hih)); - __attribute__((aligned(32))) float v32[8]; - _mm256_storeu_ps((float*)&v32, distancel); - _mm256_storeu_ps((float*)&v32, distanceh); - __m256 scalel = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); - __m256 scaleh = _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&distanceLUT.scales[0])); - distancel = _mm256_mul_ps(distancel, scalel); - distancel = _mm256_add_ps(distancel, _mm256_set1_ps(distanceLUT.totalOffset)); - distanceh = _mm256_mul_ps(distanceh, scaleh); - distanceh = _mm256_add_ps(distanceh, _mm256_set1_ps(distanceLUT.totalOffset)); -#if defined(NGTQG_DOT_PRODUCT) - float one = 1.0; - float two = 2.0; - distancel = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distancel), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); - distanceh = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distanceh), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { #endif - distancel = _mm256_sqrt_ps(distancel); - distanceh = _mm256_sqrt_ps(distanceh); - _mm256_storeu_ps(d, distancel); - _mm256_storeu_ps(d + 8, distanceh); + if (query == 0) { + NGTThrowException("Fatal inner error! The specified query is invalid.."); + } +#if 0 + std::cerr << "q size=" << queryList.size() << std::endl; + std::cerr << "qs="; + for (size_t i = 0; i < queryList.size(); i++) { + std::cerr << queryList[i] << " "; + } + std::cerr << std::endl; #endif - d += 16; +#ifdef NGTQBG_MIN + float min = std::numeric_limits::max(); +#endif +#define DIST1 +#if defined(DIST0) + for (size_t qi = 0; qi < queryList.size(); qi++) { + for (size_t idx = 0; idx < noOfObjects; idx++) { + auto *q = static_cast(query) + dimension * queryList[qi]; + auto *o = static_cast(inv) + dimension * idx; + auto d = NGT::PrimitiveComparator::compareL2(reinterpret_cast(q), + reinterpret_cast(o), + dimension); + distances[noOfObjects * qi + idx] = d; + } + } +#elif defined(DIST1) + size_t bsize = 192 * 1024 * 0.5 / dimension; + //size_t bsize = 20; + for (size_t bi = 0; bi < noOfObjects; bi += bsize) { + for (size_t qi = 0; qi < queryList.size(); qi++) { + if (qi + 1 < queryList.size()) { + NGT::MemoryCache::prefetch(static_cast(query) + dimension * queryList[qi + 1], 64); + } + for (size_t idx = bi; idx < std::min(noOfObjects, bi + bsize); idx++) { + auto *q = static_cast(query) + dimension * queryList[qi]; + auto *o = static_cast(inv) + dimension * idx; + distances[noOfObjects * qi + idx] = compare(q, o, dimension); + } + } } +#endif #ifdef NGTQBG_MIN return min; #endif } -#endif /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION //////////////////////////////////////// -#else + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTable &distanceLUT) { + NGTThrowException("Not implemented."); + return 0.0; + } + + inline double operator()(NGT::Object &object, size_t objectID, void *l) { + NGTThrowException("Not implemented."); + //return getL2DistanceFloat(object, objectID, static_cast(l)); + return 0.0; + } + inline double operator()(NGT::Object &object, size_t objectID, void *l, DistanceLookupTable &distanceLUT) { + NGTThrowException("Not implemented."); + return 0.0; + } + + void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) {} + void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTable &distanceLUT) {} + void createDistanceLookup(void *objectPtr, size_t objectID, DistanceLookupTableUint8 &distanceLUT) {} + + uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects) { + //ScalarQuantizedInt8ObjectProcessingStream processingStream(localDivisionNo, invertedIndexObjects.size(), &typeid(OT), quantizer); + ScalarQuantizedInt8ObjectProcessingStream processingStream(localDivisionNo, invertedIndexObjects.size(), &typeid(QT), quantizer); + processingStream.arrange(invertedIndexObjects); + return processingStream.getStream(); + } + + size_t getNumOfAlignedObjects(size_t noOfObjects) { return noOfObjects; } + size_t getSizeOfCluster(size_t noOfObjects) { + ScalarQuantizedInt8ObjectProcessingStream processingStream(localDivisionNo); + return processingStream.getStreamSize(noOfObjects);; + } + + float compareDotProduct(void *q, void *o, size_t dimension) { + double maxmag = 255.0 * 255.0 * dimension; + double d = NGT::PrimitiveComparator::compareDotProduct(reinterpret_cast(q), reinterpret_cast(o), dimension); + return maxmag - d; + } + + float compareL2(void *q, void *o, size_t dimension) { + return NGT::PrimitiveComparator::compareL2(reinterpret_cast(q), reinterpret_cast(o), dimension); + } + + float compare(void *q, void *o, size_t dimension) { + return (this->*comparePtr)(q, o, dimension); + } + + void setCompareFunction(DistanceType dtype) { + switch (dtype) { + case DistanceType::DistanceTypeInnerProduct: + case DistanceType::DistanceTypeNormalizedCosine: + comparePtr = &ScalarQuantizedInt8ObjectDistance::compareDotProduct; break; + case DistanceType::DistanceTypeL2: + comparePtr = &ScalarQuantizedInt8ObjectDistance::compareL2; break; + default: + { + std::stringstream msg; + msg << "Invalid distance type. " << dtype; + NGTThrowException(msg); + break; + } + } + } + + float (ScalarQuantizedInt8ObjectDistance::*comparePtr)(void *, void*, size_t); + //float (*comparePtr)(void *, void*, size_t); + +}; + +class ScalarQuantizedUint8TransposedObjectDistance : public ScalarQuantizedInt8ObjectDistance { + public: + ScalarQuantizedUint8TransposedObjectDistance(Quantizer &q, DistanceType dtype):ScalarQuantizedInt8ObjectDistance(q, dtype) {} + ~ScalarQuantizedUint8TransposedObjectDistance() {} + #ifdef NGTQBG_MIN - inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) { + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) { #else - inline void operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) { + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) { #endif - uint8_t *localID = static_cast(inv); -#ifdef NGTQBG_MIN - float min = std::numeric_limits::max(); + if (query == 0) { + NGTThrowException("Fatal inner error! The specified query is invalid.."); + } + const uint8_t *object = static_cast(inv); + const uint8_t *qobject = static_cast(query); + const unsigned char *last = qobject + dimension; + __m256i sum256[noOfObjects]; +#if defined(NGT_AVX512) + { + __m512i sum512[noOfObjects]; + for (size_t oi = 0; oi < noOfObjects; oi++) { + sum512[oi] = _mm512_setzero_si512(); + } + const unsigned char *lastgroup = last - 63; + while (qobject < lastgroup) { + __m512i q = _mm512_loadu_si512(reinterpret_cast(qobject)); + for (size_t oi = 0; oi < noOfObjects; oi++) { + __m512i o = _mm512_loadu_si512(reinterpret_cast(object)); + __mmask64 m = _mm512_cmplt_epu8_mask(q, o); + __m512i x = _mm512_add_epi8(_mm512_maskz_subs_epu8(m, o, q), + _mm512_maskz_subs_epu8(~m, q, o)); + __m512i xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,0)); + sum512[oi] = _mm512_add_epi32(sum512[oi], _mm512_madd_epi16(xi16, xi16)); + xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,1)); + sum512[oi] = _mm512_add_epi32(sum512[oi], _mm512_madd_epi16(xi16, xi16)); + object += 64; + } + qobject += 64; + } + for (size_t oi = 0; oi < noOfObjects; oi++) { + sum256[oi] = _mm256_add_epi32(_mm512_extracti32x8_epi32(sum512[oi], 0), + _mm512_extracti32x8_epi32(sum512[oi], 1)); + } + } +#elif defined(NGT_AVX2) + { + for (size_t oi = 0; oi < noOfObjects; oi++) { + sum256[oi] = _mm256_setzero_si256(); + } + //__attribute__((aligned(32))) uint16_t iv[16]; + const unsigned char *lastgroup = last - 63; + while (qobject < lastgroup) { + //std::cerr << "qobj=" << (size_t)qobject << ":" << (size_t)lastgroup << std::endl; + __m256i q[4]; + q[0] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); + qobject += 16; + q[1] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); + qobject += 16; + q[2] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); + qobject += 16; + q[3] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); + qobject += 16; + for (size_t oi = 0; oi < noOfObjects; oi++) { + __m256i o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); + __m256i sub16 = _mm256_subs_epi16(q[0], o); + sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); + object += 16; + o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); + sub16 = _mm256_subs_epi16(q[1], o); + sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); + object += 16; + o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); + sub16 = _mm256_subs_epi16(q[2], o); + sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); + object += 16; + o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); + sub16 = _mm256_subs_epi16(q[3], o); + sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); + object += 16; + } + } + } #endif - size_t numOfAlignedSubvectors = ((localDivisionNo - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE; - size_t alignedSize = ((size - 1) / 2 + 1) * 2; - uint32_t d[NGTQ_SIMD_BLOCK_SIZE]; - size_t didx = 0; - size_t byteSize = numOfAlignedSubvectors * alignedSize / 2; - auto *last = localID + byteSize; - while (localID < last) { - uint8_t *lut = distanceLUT.localDistanceLookup; - memset(d, 0, sizeof(uint32_t) * NGTQ_SIMD_BLOCK_SIZE); - for (size_t li = 0; li < numOfAlignedSubvectors; li++) { - for (size_t i = 0; i < NGTQ_SIMD_BLOCK_SIZE; i++) { - uint8_t obj = *localID; - if (i % 2 == 0) { - obj &= 0x0f; - } else { - obj >>= 4; - localID++; - } - d[i] += *(lut + obj); + { + //__attribute__((aligned(32))) uint16_t iv[16]; + const unsigned char *lastgroup = last - 15; + while (qobject < lastgroup) { + __m256i q = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); + for (size_t oi = 0; oi < noOfObjects; oi++) { + __m256i o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); + __m256i sub16 = _mm256_subs_epi16(q, o); + sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); + object += 16; } - lut += localCodebookCentroidNo - 1; + qobject += 16; } - for (size_t i = 0; i < NGTQ_SIMD_BLOCK_SIZE; i++) { - distances[didx + i] = sqrt(static_cast(d[i]) * distanceLUT.scales[0] + distanceLUT.totalOffset); + } #ifdef NGTQBG_MIN - if (min > distances[didx + i]) { - min = distances[didx + i]; - } + float min = std::numeric_limits::max(); #endif + const __m256i value0 = _mm256_set1_epi32(0); + for (size_t oi = 0; oi < noOfObjects; oi++) { + __m256i tmp1 = _mm256_hadd_epi32(sum256[oi], value0); + __m256i tmp2 = _mm256_hadd_epi32(tmp1, value0); + distances[oi] = _mm256_extract_epi32(tmp2, 0) + _mm256_extract_epi32(tmp2, 4); + if (distances[oi] < min) { + min = distances[oi]; } - didx += NGTQ_SIMD_BLOCK_SIZE; } #ifdef NGTQBG_MIN return min; #endif } -#endif - - inline double operator()(NGT::Object &object, size_t objectID, void *l) { - return getL2DistanceFloat(object, objectID, static_cast(l)); - } - inline double operator()(NGT::Object &object, size_t objectID, void *l, DistanceLookupTable &distanceLUT) { - T *localID = static_cast(l); - NGT::PersistentObject &gcentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(objectID); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *gcptr = (float*)&gcentroid.at(0, globalCodebookIndex->getObjectSpace().getRepository().allocator); -#else - float *gcptr = (float*)&gcentroid[0]; -#endif - float *optr = (float*)&((NGT::Object&)object)[0]; - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t distanceLUTidx = li * localCodebookCentroidNo + localID[li]; - if (distanceLUT.isValid(distanceLUTidx)) { - distance += distanceLUT.getDistance(distanceLUTidx); - optr += localDataSize; - gcptr += localDataSize; - } else { - size_t idx = li; - NGT::PersistentObject &lcentroid = *localCodebookIndexes[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebookIndexes[idx].getObjectSpace().getRepository().allocator); -#else - float *lcptr = (float*)&lcentroid[0]; -#endif -#if defined(NGTQG_AVX512) || defined(NGTQG_AVX2) - float *lcendptr = lcptr + localDataSize; - __m256 sum256 = _mm256_setzero_ps(); - __m256 v; - while (lcptr < lcendptr) { - v = _mm256_sub_ps(_mm256_sub_ps(_mm256_loadu_ps(optr), _mm256_loadu_ps(gcptr)), _mm256_loadu_ps(lcptr)); - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v, v)); - optr += 8; - gcptr += 8; - lcptr += 8; - } - __m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, sum128); - double d = f[0] + f[1] + f[2] + f[3]; +#ifdef NGTQBG_MIN + inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { + return 0.0; #else - float *lcendptr = lcptr + localDataSize; - double d = 0.0; - while (lcptr != lcendptr) { - double sub = (*optr++ - *gcptr++) - *lcptr++; - d += sub * sub; - } + inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { #endif - distance += d; - } + } + + uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects) { + if (invertedIndexObjects.numOfSubvectors != localDivisionNo) { + std::stringstream msg; + msg << "Internal fatal error. Invalid # of subvectos. " << invertedIndexObjects.numOfSubvectors << ":" << localDivisionNo; + NGTThrowException(msg); } - return sqrt(distance); + ScalarQuantizedUint8TransposedObjectProcessingStream processingStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size(), quantizer); + processingStream.arrange(invertedIndexObjects); + return processingStream.getStream(); + } + void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects, + size_t numOfSubspaces, std::vector &ids, void *objects) { + NGTThrowException("not implemented"); + } + size_t getSizeOfCluster(size_t noOfObjects) { + ScalarQuantizedUint8TransposedObjectProcessingStream processingStream(localDivisionNo); + return processingStream.getStreamSize(noOfObjects);; } -#endif }; +#endif class Quantizer { public: #ifdef NGTQ_STATIC_OBJECT_FILE - typedef StaticObjectFile ObjectList; + typedef StaticObjectFile ObjectList; #else - typedef ObjectFile ObjectList; + typedef ObjectFile ObjectList; #endif @@ -2290,16 +3226,16 @@ class Quantizer { #ifdef NGTQ_QBG virtual void createIndex(size_t beginID, size_t endID) = 0; #endif - virtual void setupInvertedIndex(std::vector> &quantizationCodebook, - std::vector &codebookIndex, - std::vector &objectIndex) = 0; + virtual void setupInvertedIndex(std::vector &codebookIndex, + std::vector> &objectIndex) = 0; #ifndef NGTQ_QBG virtual void rebuildIndex() = 0; #endif virtual void save() = 0; virtual void loadQuantizationCodebookAndRotation(const std::vector> &quantizationCodebook, const std::vector &rotation) = 0; - virtual void open(const string &index, NGT::Property &globalProperty, bool readOnly) = 0; - virtual void open(const string &index, bool readOnly) = 0; + virtual void open(const string &index, NGT::Property &globalProperty, bool readOnly, + DataType refinementDataType = DataTypeAny) = 0; + virtual void open(const string &index, bool readOnly, DataType refinementDataType = DataTypeAny) = 0; virtual void close() = 0; virtual void closeCodebooks() = 0; #ifdef NGTQ_SHARED_INVERTED_INDEX @@ -2380,7 +3316,6 @@ class Quantizer { #endif virtual size_t getInvertedIndexSize() = 0; - //void searchIndex(NGT::GraphAndTreeIndex &codebook, static void searchIndex(NGT::GraphAndTreeIndex &globalCodebookIndex, #ifdef NGTQ_VECTOR_OBJECT const vector, size_t>> &objects, @@ -2391,216 +3326,73 @@ class Quantizer { { ids.clear(); ids.resize(objects.size()); -#pragma omp parallel for - for (size_t idx = 0; idx < objects.size(); idx++) { -#ifdef NGTQ_VECTOR_OBJECT - auto *object = globalCodebookIndex.allocateObject(objects[idx].first); - globalCodebookIndex.deleteObject(object); -#else -#endif - NGT::ObjectDistances result; -#define QID_WEIGHT 100 - { -#ifdef NGTQ_VECTOR_OBJECT - auto *object = globalCodebookIndex.allocateObject(objects[idx].first); - NGT::SearchContainer sc(*object); -#else - NGT::SearchContainer sc(*objects[idx].first); -#endif - sc.setResults(&result); - sc.setSize(10); - sc.radius = FLT_MAX; - sc.setEpsilon(0.1); - globalCodebookIndex.search(sc); -#ifdef NGTQ_VECTOR_OBJECT - globalCodebookIndex.deleteObject(object); -#endif - } - ids[idx].id = result[0].id; - ids[idx].distance = result[0].distance; - ids[idx].identical = true; - } - return; - } - - static const std::string getInvertedIndexFile() { return "ivt"; } - static const std::string getGlobalFile() { return "global"; } - static const std::string getLocalPrefix() { return "local-"; } - static const std::string getRotationFile() { return "qr"; } - static const std::string getGlobalToInvertedIndexFile() { return "g2i"; } - - ObjectList objectList; - - string rootDirectory; - - Property property; - - NGT::Index globalCodebookIndex; - - size_t distanceComputationCount; - - size_t localIDByteSize; - NGT::ObjectSpace::ObjectType objectType; - size_t divisionNo; - - std::vector localCodebookIndexes; - - QuantizationCodebook quantizationCodebook; - std::vector objectToBlobIndex; - Rotation rotation; - -#ifdef NGTQ_OBJECT_IN_MEMORY - NGT::Repository objectListOnMemory; -#endif -}; - -class QuantizedObjectProcessingStream { - public: - QuantizedObjectProcessingStream(size_t divisionNo, size_t nOfObjects): stream(0) { - initialize(divisionNo); - numOfObjects = nOfObjects; - setStreamSize(); - stream = new uint8_t[streamSize](); - } - - QuantizedObjectProcessingStream(size_t numOfSubspaces): stream(0) { - initialize(numOfSubspaces); - } - - ~QuantizedObjectProcessingStream() { - delete[] stream; - } - - void initialize(size_t divisionNo) { - numOfAlignedSubvectors = ((divisionNo - 1) / NGTQ_BATCH_SIZE + 1) * NGTQ_BATCH_SIZE; - alignedBlockSize = NGTQ_SIMD_BLOCK_SIZE * numOfAlignedSubvectors; - } - - static size_t getNumOfAlignedObjects(size_t numOfObjects) { - return (((numOfObjects - 1) / NGTQ_SIMD_BLOCK_SIZE + 1) * NGTQ_SIMD_BLOCK_SIZE); - } - - void setStreamSize() { - numOfAlignedObjects = getNumOfAlignedObjects(numOfObjects); - streamSize = numOfAlignedObjects * numOfAlignedSubvectors; - return; - } - -#ifdef NGTQ_QBG - void arrangeQuantizedObject(size_t dataNo, size_t subvectorNo, uint8_t quantizedObject) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - abort(); -#else - size_t blkNo = dataNo / NGTQ_SIMD_BLOCK_SIZE; - size_t oft = dataNo - blkNo * NGTQ_SIMD_BLOCK_SIZE; - stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft] = quantizedObject; -#endif - } - - void arrange(NGTQ::InvertedIndexEntry &invertedIndexObjects) { - for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) { - for (size_t idx = 0; idx < invertedIndexObjects.numOfSubvectors; idx++) { - arrangeQuantizedObject(oidx, idx, invertedIndexObjects[oidx].localID[idx] - 1); - } - } - } - - uint8_t getQuantizedObject(size_t dataNo, size_t subvectorNo) { - size_t blkNo = dataNo / NGTQ_SIMD_BLOCK_SIZE; - size_t oft = dataNo - blkNo * NGTQ_SIMD_BLOCK_SIZE; - return stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft]; - } -#endif - - uint8_t* compressIntoUint4() { - size_t idx = 0; - size_t uint4StreamSize = streamSize / 2; - uint8_t *uint4Objects = new uint8_t[uint4StreamSize](); - while (idx < streamSize) { - for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) { - for (size_t bidx = 0; bidx < NGTQ_SIMD_BLOCK_SIZE; bidx++) { - if (idx / 2 > uint4StreamSize) { - std::stringstream msg; - msg << "Quantizer::compressIntoUint4: Fatal inner error! " << (idx / 2) << ":" << uint4StreamSize; - NGTThrowException(msg); - } - if (idx % 2 == 0) { - uint4Objects[idx / 2] = stream[idx]; - } else { - uint4Objects[idx / 2] |= (stream[idx] << 4); - } - idx++; - } - } - } - return uint4Objects; - } - - void uncompressFromUint4(uint8_t *uint4Objects) { - size_t idx = 0; - size_t uint4StreamSize = streamSize / 2; - while (idx < streamSize) { - for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) { - for (size_t bidx = 0; bidx < NGTQ_SIMD_BLOCK_SIZE; bidx++) { - if (idx / 2 > uint4StreamSize) { - std::stringstream msg; - msg << "Quantizer::uncompressFromUint4: Fatal inner error! " << (idx / 2) << ":" << uint4StreamSize; - NGTThrowException(msg); - } - if (idx % 2 == 0) { - stream[idx] = uint4Objects[idx / 2] & 0x0f; - } else { - stream[idx] = uint4Objects[idx / 2] >> 4; - } - idx++; - } - } - } - } - -#ifdef NGTQ_QBG - void restoreToInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "Not implemented." << std::endl; - abort(); +#pragma omp parallel for + for (size_t idx = 0; idx < objects.size(); idx++) { +#ifdef NGTQ_VECTOR_OBJECT + auto *object = globalCodebookIndex.allocateObject(objects[idx].first); + globalCodebookIndex.deleteObject(object); #else - invertedIndexObjects.resize(numOfAlignedObjects); - for (size_t oidx = 0; oidx < numOfAlignedObjects; oidx++) { - for (size_t lidx = 0; lidx < numOfAlignedSubvectors; lidx++) { - invertedIndexObjects[oidx].localID[lidx] = getQuantizedObject(oidx, lidx) + 1; +#endif + NGT::ObjectDistances result; +#define QID_WEIGHT 100 + { +#ifdef NGTQ_VECTOR_OBJECT + auto *object = globalCodebookIndex.allocateObject(objects[idx].first); + NGT::SearchContainer sc(*object); +#else + NGT::SearchContainer sc(*objects[idx].first); +#endif + sc.setResults(&result); + sc.setSize(10); + sc.radius = FLT_MAX; + sc.setEpsilon(0.1); + globalCodebookIndex.search(sc); +#ifdef NGTQ_VECTOR_OBJECT + globalCodebookIndex.deleteObject(object); +#endif } + ids[idx].id = result[0].id; + ids[idx].distance = result[0].distance; + ids[idx].identical = true; } - invertedIndexObjects.resize(numOfObjects); -#endif + return; } -#endif - uint8_t* getStream() { - auto s = stream; - stream = 0; - return s; + static const std::string getInvertedIndexFile() { return "ivt"; } + static const std::string getGlobalFile() { return "global"; } + static const std::string getLocalPrefix() { return "local-"; } + static const std::string getRotationFile() { return "qr"; } + static const std::string getGlobalToInvertedIndexFile() { return "g2i"; } + void saveProperty() { + property.save(rootDirectory); } - size_t getUint4StreamSize(size_t nOfObjects) { - numOfObjects = nOfObjects; - setStreamSize(); - return streamSize / 2; - } + ObjectList objectList; - size_t getStreamSize(size_t nOfObjects) { - numOfObjects = nOfObjects; - setStreamSize(); - return streamSize; - } + string rootDirectory; + + Property property; + + NGT::Index globalCodebookIndex; + + size_t distanceComputationCount; + + size_t localIDByteSize; + NGT::ObjectSpace::ObjectType objectType; + size_t divisionNo; + + std::vector localCodebookIndexes; + + QuantizationCodebook quantizationCodebook; + std::vector> objectToBlobIndex; + Rotation rotation; + +#ifdef NGTQ_OBJECT_IN_MEMORY + NGT::ObjectSpace *refinementObjectSpace; +#endif + NGT::ObjectSpace *refinementObjectSpaceForObjectList; + }; - uint8_t *stream; - size_t numOfAlignedSubvectors; - size_t alignedBlockSize; - size_t numOfAlignedObjects ; - size_t numOfObjects ; - size_t streamSize; -}; - class GenerateResidualObject { public: GenerateResidualObject():globalCodebookIndex(0), objectList(0), quantizationCodebook(0) {} @@ -2783,6 +3575,8 @@ class QuantizerInstance : public Quantizer { generateResidualObject = 0; localCodebooks = 0; verbose = false; + refinementObjectSpace = 0; + refinementObjectSpaceForObjectList = 0; } virtual ~QuantizerInstance() { close(); } @@ -2797,6 +3591,7 @@ class QuantizerInstance : public Quantizer { NGT::Property &localProperty) #endif { + rootDirectory = index; NGT::Index::mkdir(rootDirectory); string global = rootDirectory + "/" + getGlobalFile(); @@ -2839,17 +3634,6 @@ class QuantizerInstance : public Quantizer { invertedIndex.serialize(of); #endif string fname = rootDirectory + "/obj"; - if (property.dataSize == 0) { - std::stringstream msg; -#ifdef NGTQ_QBG - msg << "Quantizer: data size of the object is zero. " << property.dataSize << ":" << property.dimension - << ":" << property.dataType << ":" << property.genuineDataType; -#else - msg << "Quantizer: data size of the object is zero. " << property.dataSize << ":" << property.dimension - << ":" << property.dataType; -#endif - NGTThrowException(msg); - } #ifdef NGTQ_STATIC_OBJECT_FILE if (!objectList.create(fname, objectFile)) { std::stringstream msg; @@ -2861,7 +3645,7 @@ class QuantizerInstance : public Quantizer { objectList.openMultipleStreams(omp_get_max_threads()); #endif #else - objectList.create(fname, property.dataSize); + objectList.create(fname, property.getDataSize()); #endif #ifdef NGTQ_QBG if (rotation != 0) { @@ -2887,7 +3671,7 @@ class QuantizerInstance : public Quantizer { qCodebook.serialize(ofs); } - void loadQuantizationCodebookAndRotation(const std::vector> &qCodebook, const std::vector &rotation) { + void loadQuantizationCodebookAndRotation(const std::vector> &qCodebook, const std::vector &rotation) { QuantizationCodebook qc; qc.setPaddedDimension(globalCodebookIndex.getObjectSpace().getPaddedDimension()); qc = qCodebook; @@ -2903,12 +3687,12 @@ class QuantizerInstance : public Quantizer { saveQuantizationCodebook(qc); } - void open(const string &index, NGT::Property &globalProperty, bool readOnly) { - open(index, readOnly); + void open(const string &index, NGT::Property &globalProperty, bool readOnly, DataType refinementDataType) { + open(index, readOnly, refinementDataType); globalCodebookIndex.setProperty(globalProperty); } - void open(const string &index, bool readOnly) { + void open(const string &index, bool readOnly, DataType refinementDataType) { NGT::StdOstreamRedirector redirector(!verbose); redirector.begin(); rootDirectory = index; @@ -2967,63 +3751,161 @@ class QuantizerInstance : public Quantizer { objectList.openMultipleStreams(omp_get_max_threads()); #endif #ifdef NGTQ_OBJECT_IN_MEMORY - if (property.objectListOnMemory) { - objectListOnMemory.resize(objectList.size()); - for (size_t id = 1; id < objectList.size(); id++) { - std::vector object; - objectList.get(id, object, &globalCodebookIndex.getObjectSpace()); - NGT::Object *ngtObject = globalCodebookIndex.allocateObject(object); - objectListOnMemory.put(id, ngtObject); + refinementObjectSpace = 0; + refinementDataType = refinementDataType == DataTypeAny ? property. refinementDataType : refinementDataType; + if (refinementDataType != DataTypeNone) { + auto distanceType = property.distanceType == NGT::ObjectSpace::DistanceTypeInnerProduct + ? NGT::ObjectSpace::DistanceTypeDotProduct : property.distanceType; + try { + switch (refinementDataType) { + case DataTypeFloat: + refinementObjectSpace = new NGT::ObjectSpaceRepository(objectList.pseudoDimension, + typeid(float), + distanceType, + property.maxMagnitude); + break; + case DataTypeFloat16: + refinementObjectSpace = new NGT::ObjectSpaceRepository(objectList.pseudoDimension, + typeid(NGT::float16), + distanceType, + property.maxMagnitude); + break; + default: + stringstream msg; + msg << "Invalid refinement data type. " << refinementDataType; + NGTThrowException(msg); + break; + } + + auto &repo = refinementObjectSpace->getRepository(); + repo.initialize(); + for (size_t id = 1; id < objectList.size(); id++) { + std::vector object; + objectList.get(id, object, refinementObjectSpace); + auto *o = repo.allocateNormalizedPersistentObject(object); + repo.push_back(dynamic_cast(o)); + } + } catch(NGT::Exception &err) { + stringstream msg; + msg << "Fatal inner error. Cannot set up the refinmentObjectSpace. " << err.what(); + NGTThrowException(msg); + } + } +#endif + refinementObjectSpaceForObjectList = 0; +#ifdef NGTQ_QBG + if (property.genuineDataType != ObjectFile::DataTypeNone) { + auto distanceType = property.distanceType == NGT::ObjectSpace::DistanceTypeInnerProduct + ? NGT::ObjectSpace::DistanceTypeDotProduct : property.distanceType; + try { + switch (property.genuineDataType) { + case DataTypeFloat: + refinementObjectSpaceForObjectList = new NGT::ObjectSpaceRepository(objectList.pseudoDimension, + typeid(float), + distanceType, + property.maxMagnitude); + break; + case DataTypeFloat16: + refinementObjectSpaceForObjectList = new NGT::ObjectSpaceRepository(objectList.pseudoDimension, + typeid(NGT::float16), + distanceType, + property.maxMagnitude); + break; + default: + stringstream msg; + msg << "Invalid refinement data type for the object list. " << property.genuineDataType; + NGTThrowException(msg); + break; + } + } catch(NGT::Exception &err) { + stringstream msg; + msg << "Fatal inner error. Cannot set up the refinmentObjectSpac for the object liste. " << err.what(); + NGTThrowException(msg); } } #endif NGT::Property globalProperty; globalCodebookIndex.getProperty(globalProperty); size_t sizeoftype = 0; + switch (property.localClusterDataType) { +#ifdef NGT_IVI + case ClusterDataTypeNQ: + { + quantizedObjectDistance = new NonQuantizedObjectDistance(*this); + generateResidualObject = new GenerateResidualObjectFloat; + sizeoftype = sizeof(float); + break; + } + case ClusterDataTypeSQSU8: + { + if (property.distanceType == NGT::ObjectSpace::DistanceTypeInnerProduct) { + quantizedObjectDistance = new ScalarQuantizedInt8ObjectDistance(*this, property.distanceType); + } else { + quantizedObjectDistance = new ScalarQuantizedInt8ObjectDistance(*this, property.distanceType); + } + generateResidualObject = new GenerateResidualObjectFloat; + sizeoftype = sizeof(float); + break; + } +#endif + default: + { #ifdef NGT_HALF_FLOAT - if (globalProperty.objectType == NGT::Property::ObjectType::Float || - globalProperty.objectType == NGT::Property::ObjectType::Float16) { + if (globalProperty.objectType == NGT::Property::ObjectType::Float || + globalProperty.objectType == NGT::Property::ObjectType::Float16) { #else - if (globalProperty.objectType == NGT::Property::ObjectType::Float) { + if (globalProperty.objectType == NGT::Property::ObjectType::Float) { #endif - if (property.localIDByteSize == 4) { - quantizedObjectDistance = new QuantizedObjectDistanceFloat; - } else if (property.localIDByteSize == 2) { - quantizedObjectDistance = new QuantizedObjectDistanceFloat; + if (property.localIDByteSize == 4) { + quantizedObjectDistance = new QuantizedObjectDistanceFloat; + } else if (property.localIDByteSize == 2) { + quantizedObjectDistance = new QuantizedObjectDistanceFloat; #ifdef NGTQ_QBG - } else if (property.localIDByteSize == 1) { - quantizedObjectDistance = new QuantizedObjectDistanceFloat; + } else if (property.localIDByteSize == 1) { + quantizedObjectDistance = new QuantizedObjectDistanceFloat; #endif - } else { - std::cerr << "Invalid localIDByteSize : " << property.localIDByteSize << std::endl; - abort(); - } - generateResidualObject = new GenerateResidualObjectFloat; - sizeoftype = sizeof(float); - } else if (globalProperty.objectType == NGT::Property::ObjectType::Uint8) { - if (property.localIDByteSize == 4) { - quantizedObjectDistance = new QuantizedObjectDistanceUint8; - } else if (property.localIDByteSize == 2) { - quantizedObjectDistance = new QuantizedObjectDistanceUint8; + } else { + std::cerr << "Invalid localIDByteSize : " << property.localIDByteSize << std::endl; + abort(); + } + generateResidualObject = new GenerateResidualObjectFloat; + sizeoftype = sizeof(float); + } else if (globalProperty.objectType == NGT::Property::ObjectType::Uint8) { + if (property.localIDByteSize == 4) { + quantizedObjectDistance = new QuantizedObjectDistanceUint8; + } else if (property.localIDByteSize == 2) { + quantizedObjectDistance = new QuantizedObjectDistanceUint8; #ifdef NGTQ_QBG - } else if (property.localIDByteSize == 1) { - quantizedObjectDistance = new QuantizedObjectDistanceFloat; + } else if (property.localIDByteSize == 1) { + quantizedObjectDistance = new QuantizedObjectDistanceFloat; #endif - } else { - std::cerr << "Inconsistent localIDByteSize and ObjectType. " << property.localIDByteSize << ":" << globalProperty.objectType << std::endl; - abort(); - } + } else { + std::cerr << "Inconsistent localIDByteSize and ObjectType. " << property.localIDByteSize << ":" << globalProperty.objectType << std::endl; + abort(); + } #ifdef NGTQ_VECTOR_OBJECT - generateResidualObject = new GenerateResidualObjectFloat; - sizeoftype = sizeof(float); + generateResidualObject = new GenerateResidualObjectFloat; + sizeoftype = sizeof(float); #else - generateResidualObject = new GenerateResidualObjectUint8; - sizeoftype = sizeof(uint8_t); + generateResidualObject = new GenerateResidualObjectUint8; + sizeoftype = sizeof(uint8_t); #endif - } else { - cerr << "NGTQ::open: Fatal Inner Error: invalid object type. " << globalProperty.objectType << endl; - cerr << " check NGT version consistency between the caller and the library." << endl; - abort(); + } else { + cerr << "NGTQ::open: Fatal Inner Error: invalid object type. " << globalProperty.objectType << endl; + cerr << " check NGT version consistency between the caller and the library." << endl; + abort(); + } + break; + } + } + if (quantizedObjectDistance == 0) { + NGTThrowException("Inner fatal error! quantizeObjectDistance is invalid."); + } + if (generateResidualObject == 0) { + NGTThrowException("Inner fatal error! generateResidualObject is invalid."); + } + if (sizeoftype == 0) { + NGTThrowException("Inner fatal error! sizeoftype is invalid."); } assert(quantizedObjectDistance != 0); #ifdef NGTQ_QBG @@ -3113,10 +3995,18 @@ class QuantizerInstance : public Quantizer { void close() { objectList.close(); #ifdef NGTQ_OBJECT_IN_MEMORY - for (size_t i = 1; i < objectListOnMemory.size(); i++) { - globalCodebookIndex.deleteObject(objectListOnMemory.get(i)); + if (refinementObjectSpace != 0) { +#ifndef NGT_SHARED_MEMORY_ALLOCATOR + refinementObjectSpace->deleteAll(); +#endif + delete refinementObjectSpace; + refinementObjectSpace = 0; } #endif + if (refinementObjectSpaceForObjectList != 0) { + delete refinementObjectSpaceForObjectList; + refinementObjectSpaceForObjectList = 0; + } closeCodebooks(); if (quantizedObjectDistance != 0) { delete quantizedObjectDistance; @@ -3304,7 +4194,7 @@ class QuantizerInstance : public Quantizer { float lr = property.localRange; size_t localCentroidLimit = property.localCentroidLimit; if (property.localCodebookState) { - lr = FLT_MAX; + lr = FLT_MAX; localCentroidLimit = 0; } vector lids; @@ -3342,7 +4232,7 @@ class QuantizerInstance : public Quantizer { localCentroidLimit *= property.localClusteringSampleCoefficient; } if (property.localCodebookState) { - lr = FLT_MAX; + lr = FLT_MAX; localCentroidLimit = 0; } else { if (property.localCentroidCreationMode == CentroidCreationModeDynamicKmeans) { @@ -3390,7 +4280,7 @@ class QuantizerInstance : public Quantizer { localCentroidLimit *= property.localClusteringSampleCoefficient; } if (property.localCodebookState) { - lr = FLT_MAX; + lr = FLT_MAX; localCentroidLimit = 0; } else { if (property.localCentroidCreationMode == CentroidCreationModeDynamicKmeans) { @@ -3605,14 +4495,15 @@ class QuantizerInstance : public Quantizer { #ifdef NGTQ_VECTOR_OBJECT void getBlobIDFromObjectToBlobIndex(const vector, size_t>> &objects, - vector &ids) + vector &ids, + vector &ids2oidx) #else void getBlobIDFromObjectToBlobIndex(const vector> &objects, - vector &ids) + vector &ids, + vector &ids2oidx) #endif { ids.clear(); - ids.resize(objects.size()); #ifdef GET_BLOB_EVAL size_t identicalObjectCount = 0; #endif @@ -3624,25 +4515,26 @@ class QuantizerInstance : public Quantizer { << ":" << objects.size(); NGTThrowException(msg); } - ids[idx].id = objectToBlobIndex[objects[idx].second - 1] + 1; - ids[idx].distance = 0.0; - ids[idx].identical = true; + for (auto bid : objectToBlobIndex[objects[idx].second - 1]) { + ids.emplace_back(NGT::Index::InsertionResult(bid + 1, true, 0.0)); + ids2oidx.emplace_back(static_cast(idx)); #ifdef GET_BLOB_EVAL - { - NGT::ObjectDistances result; - NGT::SearchContainer sc(*objects[idx].first); - sc.setResults(&result); - sc.setSize(50); - sc.radius = FLT_MAX; - sc.setEpsilon(0.1); - globalCodebookIndex.search(sc); - //std::cerr << "insert:Eval: "; - if (result[0].id == ids[idx].id) { - identicalObjectCount++; - } else { + { + NGT::ObjectDistances result; + NGT::SearchContainer sc(*objects[idx].first); + sc.setResults(&result); + sc.setSize(50); + sc.radius = FLT_MAX; + sc.setEpsilon(0.1); + globalCodebookIndex.search(sc); + //std::cerr << "insert:Eval: "; + if (result[0].id == ids[idx].id) { + identicalObjectCount++; + } else { + } } - } #endif + } } #ifdef GET_BLOB_EVAL std::cerr << identicalObjectCount << "/" << objects.size() << std::endl; @@ -3692,7 +4584,6 @@ class QuantizerInstance : public Quantizer { stringstream msg; msg << "buildGlobalCodebookWithQIDIndex: fatal inner error. " << err.what() << " : ID=" << id << " size=" << invertedIndex.size(); NGTThrowException(msg); - NGTThrowException(msg); } } std::cerr << "creating the index..." << std::endl; @@ -3842,7 +4733,8 @@ class QuantizerInstance : public Quantizer { for (size_t i = 0; i < localCodebookNo; i++) { lcodebook.push_back(&static_cast(localCodebookIndexes[i].getIndex())); } - vector ids; + std::vector ids; + std::vector ids2oidx; if (property.centroidCreationMode == CentroidCreationModeStaticLayer || property.centroidCreationMode == CentroidCreationModeStatic) { if (objectToBlobIndex.empty()) { @@ -3855,7 +4747,7 @@ class QuantizerInstance : public Quantizer { invertedIndex.reserve(invertedIndex.size() + objects.size()); #endif } else { - getBlobIDFromObjectToBlobIndex(objects, ids); + getBlobIDFromObjectToBlobIndex(objects, ids, ids2oidx); } } else { std::stringstream msg; @@ -3863,8 +4755,14 @@ class QuantizerInstance : public Quantizer { NGTThrowException(msg); } vector localData; - for (size_t i = 0; i < ids.size(); i++) { - setGlobalCodeToInvertedEntry(ids[i], objects[i], localData); + if (ids2oidx.empty()) { + for (size_t i = 0; i < ids.size(); i++) { + setGlobalCodeToInvertedEntry(ids[i], objects[i], localData); + } + } else { + for (size_t i = 0; i < ids.size(); i++) { + setGlobalCodeToInvertedEntry(ids[i], objects[ids2oidx[i]], localData); + } } float subspaceObjects[localData.size()][globalCodebookIndex.getObjectSpace().getPaddedDimension()]; bool error = false; @@ -3872,6 +4770,10 @@ class QuantizerInstance : public Quantizer { #pragma omp parallel for for (size_t i = 0; i < localData.size(); i++) { if (error) continue; + size_t objidx = i; + if (!ids2oidx.empty()) { + objidx = ids2oidx[i]; + } IIEntry &invertedIndexEntry = *invertedIndex.at(localData[i].iiIdx); #ifdef NGTQ_SHARED_INVERTED_INDEX #ifdef NGTQ_QBG @@ -3884,25 +4786,24 @@ class QuantizerInstance : public Quantizer { #endif #else #ifdef NGTQ_QBG - #ifdef NGTQG_ROTATED_GLOBAL_CODEBOOKS if (!rotation.empty()) { #ifdef NGTQ_VECTOR_OBJECT - rotation.mul(objects[i].first.data()); + rotation.mul(objects[objidx].first.data()); #else - rotation.mul(static_cast(objects[i].first->getPointer())); + rotation.mul(static_cast(objects[objidx].first->getPointer())); #endif } #endif try { #ifdef NGTQ_VECTOR_OBJECT - (*generateResidualObject)(objects[i].first, // object + (*generateResidualObject)(objects[objidx].first, // object invertedIndexEntry.subspaceID, - subspaceObjects[i]); // subspace objects + subspaceObjects[objidx]); // subspace objects #else - (*generateResidualObject)(*objects[i].first, // object + (*generateResidualObject)(*objects[objidx].first, // object invertedIndexEntry.subspaceID, - subspaceObjects[i]); // subspace objects + subspaceObjects[objidx]); // subspace objects #endif } catch(NGT::Exception &err) { if (errorMessage.empty()) { @@ -3912,7 +4813,7 @@ class QuantizerInstance : public Quantizer { continue; } #ifndef NGTQG_ROTATED_GLOBAL_CODEBOOKS - rotation.mul(subspaceObjects[i]); + rotation.mul(subspaceObjects[objidx]); #endif #else (*generateResidualObject)(invertedIndexEntry[localData[i].iiLocalIdx].id, @@ -3942,11 +4843,11 @@ class QuantizerInstance : public Quantizer { if (property.localCentroidCreationMode == CentroidCreationModeDynamicKmeans) { buildMultipleLocalCodebooks(localCodebookIndexes.data(), localCodebookNo, property.localCentroidLimit); (*generateResidualObject).set(localCodebookIndexes.data(), localCodebookNo); - property.localCodebookState = true; - localCodebookFull = false; + property.localCodebookState = true; + localCodebookFull = false; replaceInvertedIndexEntry(localCodebookNo); } else { - property.localCodebookState = true; + property.localCodebookState = true; localCodebookFull = false; } } @@ -4049,9 +4950,8 @@ class QuantizerInstance : public Quantizer { } #endif - void setupInvertedIndex(std::vector> &qCodebook, - std::vector &codebookIndex, - std::vector &objectIndex) { + void setupInvertedIndex(std::vector &codebookIndex, + std::vector> &objectIndex) { #if !defined(NGTQ_QBG) std::cerr << "setupInvertedIndex: Not implemented." << std::endl; abort(); @@ -4088,7 +4988,9 @@ class QuantizerInstance : public Quantizer { objectIndex.clear(); std::vector invertedIndexCount(codebookIndex.size()); for (size_t idx = 0; idx < objectToBlobIndex.size(); idx++) { - invertedIndexCount[objectToBlobIndex[idx]]++; + for (auto bid : objectToBlobIndex[idx]) { + invertedIndexCount[bid]++; + } } for (size_t idx = 0; idx < codebookIndex.size(); idx++) { auto gid = idx + 1; @@ -4155,52 +5057,10 @@ class QuantizerInstance : public Quantizer { gp.set(globalProperty); lp.set(localProperty); - gp.edgeSizeForSearch = 40; - lp.edgeSizeForSearch = 40; + gp.edgeSizeForSearch = 40; + lp.edgeSizeForSearch = 40; lp.objectType = NGT::Index::Property::ObjectType::Float; -#ifdef NGTQ_QBG - if (property.genuineDimension > property.dimension) { - stringstream msg; - msg << "NGTQ::Quantizer::create: dimension must be larger than genuineDimension. " << property.dimension << ":" << property.genuineDimension << std::endl; - NGTThrowException(msg); - } -#endif - gp.dimension = property.dimension; - if (gp.dimension == 0) { - stringstream msg; - msg << "NGTQ::Quantizer::create: specified dimension is zero!"; - NGTThrowException(msg); - } - if (property.localDivisionNo == 0) { - NGTThrowException("NGTQ::Quantizer::create: # of subvectors is zero"); - } - if (property.localDivisionNo != 1 && property.dimension % property.localDivisionNo != 0) { - stringstream msg; - msg << "NGTQ::Quantizer::create: The combination of dimension and localDivisionNo is invalid. " - << "the localDivisionNo must be a divisor of the dimension. " - << property.dimension << ":" << property.localDivisionNo; - NGTThrowException(msg); - } - lp.dimension = property.dimension / property.localDivisionNo; - - switch (property.dataType) { - case DataTypeFloat: - gp.objectType = NGT::Index::Property::ObjectType::Float; - break; - case DataTypeFloat16: - gp.objectType = NGT::Index::Property::ObjectType::Float16; - break; - case DataTypeUint8: - gp.objectType = NGT::Index::Property::ObjectType::Uint8; - break; - default: - { - stringstream msg; - msg << "NGTQ::Quantizer::create: Inner error! Invalid data type."; - NGTThrowException(msg); - } - } switch (property.distanceType) { case DistanceType::DistanceTypeL1: @@ -4245,10 +5105,14 @@ class QuantizerInstance : public Quantizer { gp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeNormalizedL2; lp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; break; -#ifdef NGT_INNER_PRODUCT +#ifdef NGTQ_QBG case DistanceType::DistanceTypeInnerProduct: gp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; lp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; + if (property.dimension == property.genuineDimension) { + property.dimension++; + } + property.genuineDimension++; break; #endif default: @@ -4259,6 +5123,31 @@ class QuantizerInstance : public Quantizer { } } +#ifdef NGTQ_QBG + if (property.genuineDimension > property.dimension) { + stringstream msg; + msg << "NGTQ::Quantizer::create: dimension must be larger than genuineDimension. " << property.dimension << ":" << property.genuineDimension << std::endl; + NGTThrowException(msg); + } +#endif + gp.dimension = property.dimension; + if (gp.dimension == 0) { + stringstream msg; + msg << "NGTQ::Quantizer::create: specified dimension is zero!"; + NGTThrowException(msg); + } + if (property.localDivisionNo == 0) { + NGTThrowException("NGTQ::Quantizer::create: # of subvectors is zero"); + } + if (property.localDivisionNo != 1 && property.dimension % property.localDivisionNo != 0) { + stringstream msg; + msg << "NGTQ::Quantizer::create: The combination of dimension and localDivisionNo is invalid. " + << "the localDivisionNo must be a divisor of the dimension. " + << property.dimension << ":" << property.localDivisionNo; + NGTThrowException(msg); + } + lp.dimension = property.dimension / property.localDivisionNo; + #ifdef NGTQ_QBG createEmptyIndex(index, gp, lp, rotation, objectFile); #else @@ -4908,8 +5797,8 @@ class Quantization { class Index { public: Index():quantizer(0) {} - Index(const string& index, bool rdOnly = false):quantizer(0) { - open(index, rdOnly); + Index(const string& index, bool rdOnly = false, DataType refinementDataType = DataTypeAny):quantizer(0) { + open(index, rdOnly, refinementDataType); } ~Index() { close(); } @@ -4930,17 +5819,10 @@ class Quantization { property.setup(property); NGTQ::Quantizer *quantizer = NGTQ::Quantization::generate(property); try { -#ifdef NGTQ_QBG - if (property.dimension == 0) { - property.dimension = property.genuineDimension; - } - if (property.dimension % 4 != 0) { - property.dimension = ((property.dimension - 1) / 4 + 1) * 4; - } quantizer->property = property; +#ifdef NGTQ_QBG quantizer->create(index, globalProperty, localProperty, rotation, objectFile); #else - quantizer->property = property; quantizer->create(index, globalProperty, localProperty); #endif if (property.dimension == 0) { @@ -4970,8 +5852,8 @@ class Quantization { #endif #ifndef NGTQ_QBG - static void rebuild(const string &indexName, - const string &rebuiltIndexName + static void rebuild(const string &indexName, + const string &rebuiltIndexName ) { const string srcObjectList = indexName + "/obj"; @@ -4998,12 +5880,12 @@ class Quantization { } #endif - void open(const string &index, bool readOnly = false) { + void open(const string &index, bool readOnly = false, DataType refinementDataType = DataTypeAny) { close(); NGT::Property globalProperty; globalProperty.clear(); globalProperty.edgeSizeForSearch = 40; - quantizer = getQuantizer(index, globalProperty, readOnly); + quantizer = getQuantizer(index, globalProperty, readOnly, refinementDataType); if ((quantizer->property.quantizerType == NGTQ::QuantizerTypeQG) && readOnly) { quantizer->closeCodebooks(); } @@ -5036,19 +5918,17 @@ class Quantization { getQuantizer().createIndex(beginID, endID); } - void createIndex(std::vector> &quantizationCodebook, - std::vector &codebookIndex, - std::vector &objectIndex, + void createIndex(std::vector &codebookIndex, + std::vector> &objectIndex, size_t beginID = 1, size_t endID = 0) { - setupInvertedIndex(quantizationCodebook, codebookIndex, objectIndex); + setupInvertedIndex(codebookIndex, objectIndex); createIndex(beginID, endID); } #endif - void setupInvertedIndex(std::vector> &quantizationCodebook, - std::vector &codebookIndex, - std::vector &objectIndex) { - getQuantizer().setupInvertedIndex(quantizationCodebook, codebookIndex, objectIndex); + void setupInvertedIndex(std::vector &codebookIndex, + std::vector> &objectIndex) { + getQuantizer().setupInvertedIndex(codebookIndex, objectIndex); } @@ -5119,8 +5999,35 @@ class Quantization { return object; } + void setQuantizationFromMaxMin(float max, float min) { + float offset; + float scale; + if (getQuantizer().property.localClusterDataType == ClusterDataTypeSQSU8) { + offset = 0.0; + scale = std::max(fabs(max), fabs(min)); + } else { + offset = min; + scale = max - offset; + } + setQuantization(scale, offset); + } + + void setQuantization(float scale, float offset) { + getQuantizer().property.scalarQuantizationScale = scale; + getQuantizer().property.scalarQuantizationOffset = offset; + } + + void setMaxMagnitude(float maxMagnitude) { + getQuantizer().property.maxMagnitude = maxMagnitude; + } + + void saveProperty() { + getQuantizer().saveProperty(); + } + protected: - static NGTQ::Quantizer *getQuantizer(const string &index, NGT::Property &globalProperty, bool readOnly) { + static NGTQ::Quantizer *getQuantizer(const string &index, NGT::Property &globalProperty, bool readOnly, + DataType refinementDataType = DataTypeAny) { NGTQ::Property property; try { property.load(index); @@ -5134,7 +6041,8 @@ class Quantization { NGTThrowException("NGTQ::Index: Cannot get quantizer."); } try { - quantizer->open(index, globalProperty, property.quantizerType == NGTQ::QuantizerTypeQBG ? readOnly : false); + quantizer->open(index, globalProperty, property.quantizerType == NGTQ::QuantizerTypeQBG ? readOnly : false, + refinementDataType); } catch(NGT::Exception &err) { delete quantizer; throw err; @@ -5147,4 +6055,41 @@ class Quantization { bool verbose; }; + template + void NGTQ::ObjectProcessingStream::arrange(NGTQ::InvertedIndexEntry &invertedIndexObjects) { +#ifdef NGTQ_QBG + if (&quantizer == 0) { + NGTThrowException("quantizer is invalid."); + } + for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) { + std::vector object; + quantizer.objectList.get(invertedIndexObjects[oidx].id, object); +#ifdef NGTQG_ROTATED_GLOBAL_CODEBOOKS + quantizer.rotation.mul(object); +#endif + arrangeObject(oidx, object.data()); + } +#endif + } + + inline void NGTQ::ScalarQuantizedInt8ObjectProcessingStream::arrange(NGTQ::InvertedIndexEntry &invertedIndexObjects) { +#ifdef NGTQ_QBG + if (&quantizer == 0) { + NGTThrowException("quantizer is invalid."); + } + float scale = quantizer.property.scalarQuantizationScale; + float offset = quantizer.property.scalarQuantizationOffset; + auto shift = quantizer.property.distanceType == DistanceType::DistanceTypeInnerProduct && + *dataTypeInfo == typeid(NGT::qsint8); + for (size_t oidx = 0; oidx < invertedIndexObjects.size(); oidx++) { + std::vector object; + quantizer.objectList.get(invertedIndexObjects[oidx].id, object); +#ifdef NGTQG_ROTATED_GLOBAL_CODEBOOKS + quantizer.rotation.mul(object); +#endif + arrangeObject(oidx, object, scale, offset, shift); + } +#endif + } + } // namespace NGTQ diff --git a/lib/NGT/ObjectRepository.h b/lib/NGT/ObjectRepository.h index 39e71af..725cccf 100644 --- a/lib/NGT/ObjectRepository.h +++ b/lib/NGT/ObjectRepository.h @@ -126,6 +126,7 @@ namespace NGT { if (dataSize > 0) { reserve(size() + dataSize); } + size_t dim = innerProduct ? dimension - 1 : dimension; std::string line; size_t lineNo = 0; while (getline(is, line)) { @@ -136,6 +137,7 @@ namespace NGT { break; } std::vector object; + object.reserve(dim); try { extractObjectFromText(line, "\t, ", object); PersistentObject *obj = 0; @@ -179,7 +181,7 @@ namespace NGT { try { obj = allocateNormalizedPersistentObject(object); } catch (Exception &err) { - std::cerr << err.what() << " continue..." << std::endl; + std::cerr << err.what() << " " << typeid(T).name() << ". continue..." << std::endl; obj = allocatePersistentObject(object); } push_back(obj); @@ -250,6 +252,12 @@ namespace NGT { obj[i] = static_cast(o[i]); } #endif + } else if (type == typeid(qsint8)) { + uint8_t *obj = static_cast(object); + for (size_t i = 0; i < size; i++) { + auto i8 = static_cast(o[i]); + obj[i] = *reinterpret_cast(&i8); + } #ifdef NGT_BFLOAT } else if (type == typeid(bfloat16)) { bfloat16 *obj = static_cast(object); @@ -273,16 +281,25 @@ namespace NGT { template Object *allocateObject(T *o, size_t size) { size_t osize = paddedByteSize; + if (size == 0) { + NGTThrowException("ObjectSpace::allocateObject: Fatal error! The specified dimension is zero."); + } if (sparse) { size_t vsize = size * (type == typeid(float) ? 4 : 1); osize = osize < vsize ? vsize : osize; + } else if (innerProduct) { + if (dimension != size && (dimension - 1) != size) { + std::stringstream msg; + msg << "ObjectSpace::allocateObject: Fatal error! The specified dimension is invalid. " + << "The indexed objects=" << dimension << " The specified object=" << size + << " for Inner product!"; + NGTThrowException(msg); + } } else { - if (size != 0 && - ((innerProduct && dimension != size && (dimension - 1) != size) || - (!innerProduct && dimension != size))) { + if (dimension != size) { std::stringstream msg; - msg << "ObjectSpace::allocateObject: Fatal error! The specified dimension is invalid. The indexed objects=" - << dimension << " The specified object=" << size; + msg << "ObjectSpace::allocateObject: Fatal error! The specified dimension is invalid. " + << "The indexed objects=" << dimension << " The specified object=" << size; NGTThrowException(msg); } } @@ -447,6 +464,7 @@ namespace NGT { void setInnerProduct() { innerProduct = true; } size_t getByteSize() { return byteSize; } size_t insert(PersistentObject *obj) { return Parent::insert(obj); } + size_t insert(size_t id, PersistentObject *obj) { return Parent::insert(id, obj); } const size_t dimension; const std::type_info &type; protected: diff --git a/lib/NGT/ObjectSpace.cpp b/lib/NGT/ObjectSpace.cpp index dbbd7bd..35c58be 100644 --- a/lib/NGT/ObjectSpace.cpp +++ b/lib/NGT/ObjectSpace.cpp @@ -22,7 +22,9 @@ NGT::Distance NGT::ObjectSpace::compareWithL1(NGT::Object &o1, NGT::Object &o2) { auto dim = getPaddedDimension(); NGT::Distance d; - if (getObjectType() == typeid(uint8_t)) { + if (getObjectType() == typeid(uint8_t) || + getObjectType() == typeid(quint8) || + getObjectType() == typeid(qsint8)) { d = PrimitiveComparator::compareL1(reinterpret_cast(o1.getPointer()), reinterpret_cast(o2.getPointer()), dim); #ifdef NGT_HALF_FLOAT @@ -35,7 +37,8 @@ NGT::Distance NGT::ObjectSpace::compareWithL1(NGT::Object &o1, NGT::Object &o2) reinterpret_cast(o2.getPointer()), dim); } else { std::stringstream msg; - msg << "ObjectSpace::compareWithL1: Fatal Inner Error! Unexpected object type."; + msg << "ObjectSpace::compareWithL1: Fatal Inner Error! Unexpected object type. " + << getObjectType().name(); NGTThrowException(msg); } return d; diff --git a/lib/NGT/ObjectSpace.h b/lib/NGT/ObjectSpace.h index a2f7b98..ce3b916 100644 --- a/lib/NGT/ObjectSpace.h +++ b/lib/NGT/ObjectSpace.h @@ -175,9 +175,8 @@ namespace NGT { DistanceTypeJaccard = 7, DistanceTypeSparseJaccard = 8, DistanceTypeNormalizedL2 = 9, -#ifdef NGT_INNER_PRODUCT DistanceTypeInnerProduct = 10, -#endif + DistanceTypeDotProduct = 11, DistanceTypePoincare = 100, // added by Nyapicom DistanceTypeLorentz = 101 // added by Nyapicom }; @@ -190,6 +189,8 @@ namespace NGT { , Float16 = 3 #endif + , + Qsuint8 = 7 #ifdef NGT_BFLOAT , Bfloat16 = 5 @@ -198,10 +199,15 @@ namespace NGT { typedef std::priority_queue, std::less > ResultSet; - ObjectSpace(size_t d):dimension(d), distanceType(DistanceTypeNone), comparator(0), normalization(false), - prefetchOffset(-1), prefetchSize(-1) + ObjectSpace(size_t d):dimension(d), distanceType(DistanceTypeNone), comparator(0), comparatorForSearch(0), + normalization(false), + prefetchOffset(-1), prefetchSize(-1), quantizationScale(0.0), quantizationOffset(0.0), + magnitude(-1) {} - virtual ~ObjectSpace() { if (comparator != 0) { delete comparator; } } + virtual ~ObjectSpace() { + if (comparator != 0) { delete comparator; } + if (comparatorForSearch != 0) { delete comparatorForSearch; } + } #ifdef NGT_SHARED_MEMORY_ALLOCATOR virtual void open(const std::string &f, size_t shareMemorySize) = 0; @@ -214,9 +220,17 @@ namespace NGT { virtual size_t insert(PersistentObject *obj) = 0; #else virtual size_t insert(Object *obj) = 0; + virtual void deleteAll() = 0; #endif Comparator &getComparator() { return *comparator; } + Comparator &getComparatorForSearch() { + if (comparatorForSearch != 0) { + return *comparatorForSearch; + } else { + return *comparator; + } + } virtual void serialize(const std::string &of) = 0; virtual void deserialize(const std::string &ifile) = 0; @@ -235,6 +249,7 @@ namespace NGT { virtual void linearSearch(Object &query, double radius, size_t size, ObjectSpace::ResultSet &results) = 0; + virtual std::pair getMaxMin(float cut = 0.01, size_t size = 0) = 0; virtual const std::type_info &getObjectType() = 0; virtual void show(std::ostream &os, Object &object) = 0; virtual size_t getSize() = 0; @@ -265,20 +280,18 @@ namespace NGT { #endif virtual std::vector getObject(Object &object) = 0; virtual void getObjects(const std::vector &idxs, std::vector> &vs) = 0; -#ifdef NGT_INNER_PRODUCT virtual float computeMaxMagnitude(ObjectID beginId) = 0; #ifdef NGT_SHARED_MEMORY_ALLOCATOR virtual void setMagnitude(float maxMag, NGT::PersistentRepository &graphNodes, NGT::ObjectID beginID) = 0; #else virtual void setMagnitude(float maxMag, NGT::Repository &graphNodes, ObjectID beginId) = 0; -#endif #endif DistanceType getDistanceType() { return distanceType; } size_t getDimension() { return dimension; } size_t getPaddedDimension() { return ((dimension - 1) / 16 + 1) * 16; } template - void normalize(T *data, size_t dim) { + static void normalize(T *data, size_t dim) { float sum = 0.0; for (size_t i = 0; i < dim; i++) { sum += static_cast(data[i]) * static_cast(data[i]); @@ -292,7 +305,8 @@ namespace NGT { } } std::stringstream msg; - msg << "ObjectSpace::normalize: Error! the object is an invalid zero vector for the cosine similarity."; + msg << "ObjectSpace::normalize: Error! the object is an invalid zero vector for the cosine similarity. " + << typeid(T).name() << "."; NGTThrowException(msg); } sum = sqrt(sum); @@ -300,6 +314,12 @@ namespace NGT { data[i] = static_cast(data[i]) / sum; } } + + template + static void normalize(std::vector &object) { + ObjectSpace::normalize(object.data(), object.size()); + } + int32_t getPrefetchOffset() { return prefetchOffset; } int32_t setPrefetchOffset(int offset) { if (offset > 0) { @@ -321,12 +341,154 @@ namespace NGT { return prefetchSize; } - bool isNormalizedDistance() { - return (getDistanceType() == ObjectSpace::DistanceTypeNormalizedAngle) || - (getDistanceType() == ObjectSpace::DistanceTypeNormalizedCosine) || - (getDistanceType() == ObjectSpace::DistanceTypeNormalizedL2); + bool quantizationIsEnabled() { return quantizationScale != 0.0; } + void setQuantization(float scale, float offset) { + quantizationScale = scale; + quantizationOffset = offset; + } + std::pair getQuantization() { + return std::make_pair(quantizationScale, quantizationOffset); + } + + template static void quantizeSymmetrically(T *vector, size_t dim, float max, float scale) { + auto fmax = max + 0.5; + for (size_t i = 0; i < dim; i++) { + float fv = static_cast(vector[i]); + fv = std::round(fv / scale * fmax); + fv = fv < -max ? -max : fv; + fv = fv > max ? max : fv; + vector[i] = static_cast(fv); + } + } + + template static void quantizeSymmetrically(std::vector &vector, float max, float scale) { + quantizeSymmetrically(vector.data(), vector.size(), max, scale); + } + + template static void dequantizeSymmetrically(T *vector, int8_t *cvector, size_t dimension, float max, float scale) { + auto fmax = max + 0.5; + for (size_t i = 0; i < dimension; i++) { + float fv = static_cast(cvector[i]); + fv = (fv / fmax) * scale; + vector[i] = static_cast(fv); + } + } + + template static void dequantizeSymmetrically(std::vector &vector, int8_t *cvector, size_t dimension, float max, float scale) { + vector.resize(dimension); + dequantizeSymmetrically(vector.data(), cvector, dimension, max, scale); + } + + template static void quantize(T *vector, size_t dim, float max, float offset, float scale) { + auto fmax = max + 1.0; + for (size_t i = 0; i < dim; i++) { + float fv = static_cast(vector[i]); + fv = floorf((fv - offset) / scale * fmax); + fv = fv < 0 ? 0 : fv; + fv = fv > max ? max : fv; + vector[i] = static_cast(fv); + } + } + + template static void quantize(std::vector &vector, float max, float offset, float scale) { + quantize(vector.data(), vector.size(), max, offset, scale); } + template static void dequantize(T *vector, uint8_t *cvector, size_t dimension, float max, float offset, float scale) { + auto fmax = max + 1.0; + for (size_t i = 0; i < dimension; i++) { + float fv = static_cast(cvector[i]) + 0.5; + fv = (fv / fmax) * scale + offset; + vector[i] = static_cast(fv); + } + } + + template static void dequantize(std::vector &vector, uint8_t *cvector, size_t dimension, float max, float offset, float scale) { + vector.resize(dimension); + dequantize(vector.data(), cvector, vector.size(), max, offset, scale); + } + + template void quantizeToQint8(std::vector &vector, float offset, float scale, bool shift = false) { + quantizeToQint8(vector, getObjectType(), getDimension(), offset, scale, shift); + } + + template static void quantizeToQint8(std::vector &vector, const std::type_info &t, size_t dimension, + float offset, float scale, bool shift = false) { + if (t == typeid(qsint8)) { + quantizeSymmetrically(vector, 127.0, scale); + if (shift) { + for (size_t i = 0; i < dimension; i++) { + vector[i] += 127; + } + } + } else { + std::stringstream msg; + msg << "not supported type. " << t.name(); + NGTThrowException(msg); + } + } + template void quantizeToQint8(std::vector &vector, bool shift = false) { + if (quantizationOffset == 0.0 && quantizationScale == 0.0) { + NGTThrowException("Error. Quantization parameters are not set yet."); + } + quantizeToQint8(vector, quantizationOffset, quantizationScale, shift); + } + static void quantizeToQint8(float *vector, size_t dimension, uint8_t *cvector, + ObjectType type, + float offset, float scale, bool shift = false) { + if (type == Qsuint8) { + quantizeSymmetrically(vector, dimension, 127.0, scale); + if (shift) { + auto *cv = reinterpret_cast(cvector); + for (size_t i = 0; i < dimension; i++) { + cv[i] = static_cast(vector[i] + 127); + } + } else { + auto *cv = reinterpret_cast(cvector); + for (size_t i = 0; i < dimension; i++) { + cv[i] = static_cast(vector[i]); + } + } + } else { + std::stringstream msg; + msg << "not supported type. " << type; + NGTThrowException(msg); + } + } + template void dequantizeFromQint8(std::vector &vector, uint8_t *cvector, + bool shift = false) { + dequantizeFromQint8(vector, cvector, dimension, getObjectType(), quantizationOffset, + quantizationScale, shift); + } + template static void dequantizeFromQint8(std::vector &vector, uint8_t *cvector, size_t dimension, + const std::type_info &t, + float offset, float scale, bool shift = false) { + if (t == typeid(qsint8)) { + dequantizeSymmetrically(vector, reinterpret_cast(cvector), dimension, 127.0, scale); + if (shift) { + auto *cv = reinterpret_cast(cvector); + for (size_t i = 0; i < dimension; i++) { + cv[i] = static_cast(vector[i] + 127); + } + } else { + auto *cv = reinterpret_cast(cvector); + for (size_t i = 0; i < dimension; i++) { + cv[i] = static_cast(vector[i]); + } + } + } else { + std::stringstream msg; + msg << "not supported type. " << t.name(); + NGTThrowException(msg); + } + } + bool isQintObjectType() { + const std::type_info &t = getObjectType(); + if (t == typeid(qsint8)) return true; + return false; + } + bool isNormalizedDistance() { return normalization; } + NGT::Distance compareWithL1(NGT::Object &o1, NGT::Object &o2); #ifdef NGT_SHARED_MEMORY_ALLOCATOR NGT::Distance compareWithL1(NGT::Object &o1, NGT::PersistentObject &o2); @@ -336,9 +498,13 @@ namespace NGT { const size_t dimension; DistanceType distanceType; Comparator *comparator; + Comparator *comparatorForSearch; bool normalization; int32_t prefetchOffset; int32_t prefetchSize; + float quantizationScale; + float quantizationOffset; + float magnitude; }; class BaseObject { @@ -360,7 +526,7 @@ namespace NGT { NGT::Serializer::read(is, (uint8_t*)&(*this)[0], byteSize); if (is.eof()) { std::stringstream msg; - msg << "ObjectSpace::BaseObject: Fatal Error! Read beyond the end of the object file. The object file is corrupted?" << byteSize; + msg << "ObjectSpace::BaseObject: Fatal Error! Read beyond the end of the object file. The object file is corrupted? " << byteSize; NGTThrowException(msg); } } @@ -373,6 +539,8 @@ namespace NGT { void *ref = (void*)&(*this)[0]; if (t == typeid(uint8_t)) { NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension); + } else if (t == typeid(qsint8)) { + NGT::Serializer::writeAsText(os, (int8_t*)ref, dimension); } else if (t == typeid(float)) { NGT::Serializer::writeAsText(os, (float*)ref, dimension); #ifdef NGT_HALF_FLOAT @@ -429,6 +597,10 @@ namespace NGT { for (size_t d = 0; d < dimension; d++) { *(static_cast(ref) + d) = v[d]; } + } else if (t == typeid(qsint8)) { + for (size_t d = 0; d < dimension; d++) { + *(static_cast(ref) + d) = v[d]; + } } else if (t == typeid(float)) { for (size_t d = 0; d < dimension; d++) { *(static_cast(ref) + d) = v[d]; @@ -510,7 +682,7 @@ namespace NGT { void construct(size_t s) { assert(vector == 0); - size_t allocsize = ((s - 1) / 64 + 1) * 64; + size_t allocsize = ((s - 1) / 64 + 1) * 64; vector = static_cast(MemoryCache::alignedAlloc(allocsize)); memset(vector, 0, allocsize); } @@ -582,7 +754,7 @@ namespace NGT { void construct(size_t s, SharedMemoryAllocator &allocator) { assert(array == 0); assert(s != 0); - size_t allocsize = ((s - 1) / 64 + 1) * 64; + size_t allocsize = ((s - 1) / 64 + 1) * 64; array = allocator.getOffset(new(allocator) uint8_t[allocsize]); memset(getPointer(0, allocator), 0, allocsize); } diff --git a/lib/NGT/ObjectSpaceRepository.h b/lib/NGT/ObjectSpaceRepository.h index 9c636a6..575b493 100644 --- a/lib/NGT/ObjectSpaceRepository.h +++ b/lib/NGT/ObjectSpaceRepository.h @@ -288,30 +288,90 @@ namespace NGT { #endif }; -#ifdef NGT_INNER_PRODUCT class ComparatorInnerProduct : public Comparator { public: #ifdef NGT_SHARED_MEMORY_ALLOCATOR ComparatorInnerProduct(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); + return -PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); } double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); + return -PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); } double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); + return -PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); } #else ComparatorInnerProduct(size_t d) : Comparator(d) {} double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); + auto d = PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); + return -d; } #endif }; + class ComparatorInnerProductQsint8Quint8 : public Comparator { + public: +#ifdef NGT_SHARED_MEMORY_ALLOCATOR + ComparatorInnerProductQsint8Quint8(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} + double operator()(Object &objecta, Object &objectb) { + return PrimitiveComparator::InnerProductQsint8::compare(&objecta[0], &objectb[0], dimension); + } + double operator()(Object &objecta, PersistentObject &objectb) { + return PrimitiveComparator::InnerProductQsint8::compare(&objecta[0], &objectb.at(0, allocator), dimension); + } + double operator()(PersistentObject &objecta, PersistentObject &objectb) { + return PrimitiveComparator::InnerProductQsint8::compare(&objecta.at(0, allocator), &objectb.at(0, allocator), dimension); + } +#else + ComparatorInnerProductQsint8Quint8(size_t d) : Comparator(d) {} + double operator()(Object &objecta, Object &objectb) { + return PrimitiveComparator::InnerProductQsint8::compare(&objecta[0], &objectb[0], dimension); + } +#endif + }; + class ComparatorL2Quint8Quint8 : public Comparator { + public: +#ifdef NGT_SHARED_MEMORY_ALLOCATOR + ComparatorL2Quint8Quint8(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} + double operator()(Object &objecta, Object &objectb) { + return PrimitiveComparator::compareL2((quint8*)&objecta[0], (quint8*)&objectb[0], dimension); + } + double operator()(Object &objecta, PersistentObject &objectb) { + return PrimitiveComparator::compareL2((quint8*)&objecta[0], (quint8*)&objectb.at(0, allocator), dimension); + } + double operator()(PersistentObject &objecta, PersistentObject &objectb) { + return PrimitiveComparator::compareL2((quint8*)&objecta.at(0, allocator), (quint8*)&objectb.at(0, allocator), dimension); + } +#else + ComparatorL2Quint8Quint8(size_t d) : Comparator(d) {} + double operator()(Object &objecta, Object &objectb) { + return PrimitiveComparator::compareL2((quint8*)&objecta[0], (quint8*)&objectb[0], dimension); + } +#endif + }; + class ComparatorDotProduct : public Comparator { + public: +#ifdef NGT_SHARED_MEMORY_ALLOCATOR + ComparatorDotProduct(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} + double operator()(Object &objecta, Object &objectb) { + return magnitude - PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); + } + double operator()(Object &objecta, PersistentObject &objectb) { + return magnitude - PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); + } + double operator()(PersistentObject &objecta, PersistentObject &objectb) { + return magnitude - PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); + } +#else + ComparatorDotProduct(size_t d) : Comparator(d) {} + double operator()(Object &objecta, Object &objectb) { + return magnitude - PrimitiveComparator::compareDotProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); + } #endif + float magnitude; + }; - ObjectSpaceRepository(size_t d, const std::type_info &ot, DistanceType t) : ObjectSpace(d), ObjectRepository(d, ot) { + ObjectSpaceRepository(size_t d, const std::type_info &ot, DistanceType t, float mag = -1) : ObjectSpace(d), ObjectRepository(d, ot) { size_t objectSize = 0; if (ot == typeid(uint8_t)) { objectSize = sizeof(uint8_t); @@ -321,6 +381,8 @@ namespace NGT { } else if (ot == typeid(float16)) { objectSize = sizeof(float16); #endif + } else if (ot == typeid(qsint8)) { + objectSize = sizeof(qsint8); #ifdef NGT_BFLOAT } else if (ot == typeid(bfloat16)) { objectSize = sizeof(bfloat16); @@ -332,8 +394,9 @@ namespace NGT { } setLength(objectSize * d); setPaddedLength(objectSize * ObjectSpace::getPaddedDimension()); + magnitude = mag; setDistanceType(t); - } + } #ifdef NGT_SHARED_MEMORY_ALLOCATOR void open(const std::string &f, size_t sharedMemorySize) { ObjectRepository::open(f, sharedMemorySize); } @@ -390,7 +453,12 @@ namespace NGT { void setDistanceType(DistanceType t) { if (comparator != 0) { delete comparator; - } + comparator = 0; + } + if (comparatorForSearch != 0) { + delete comparatorForSearch; + comparatorForSearch = 0; + } assert(ObjectSpace::dimension != 0); distanceType = t; switch (distanceType) { @@ -435,12 +503,25 @@ namespace NGT { comparator = new ObjectSpaceRepository::ComparatorNormalizedCosineSimilarity(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); normalization = true; break; -#ifdef NGT_INNER_PRODUCT case DistanceTypeInnerProduct: - comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - setInnerProduct(); + { + if (typeid(OBJECT_TYPE) == typeid(qsint8)) { + comparator = new ObjectSpaceRepository::ComparatorL2Quint8Quint8(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); + comparatorForSearch = new ObjectSpaceRepository::ComparatorInnerProductQsint8Quint8(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); + } else { + comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); + } + setInnerProduct(); + } + break; + case DistanceTypeDotProduct: + { + auto *comp = new ObjectSpaceRepository::ComparatorDotProduct(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); + comp->magnitude = magnitude; + comparator = comp; + setInnerProduct(); + } break; -#endif #else case DistanceTypeL1: comparator = new ObjectSpaceRepository::ComparatorL1(ObjectSpace::getPaddedDimension()); @@ -482,12 +563,25 @@ namespace NGT { comparator = new ObjectSpaceRepository::ComparatorNormalizedCosineSimilarity(ObjectSpace::getPaddedDimension()); normalization = true; break; -#ifdef NGT_INNER_PRODUCT case DistanceTypeInnerProduct: - comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension()); - setInnerProduct(); + { + if (typeid(OBJECT_TYPE) == typeid(qsint8)) { + comparator = new ObjectSpaceRepository::ComparatorL2Quint8Quint8(ObjectSpace::getPaddedDimension()); + comparatorForSearch = new ObjectSpaceRepository::ComparatorInnerProductQsint8Quint8(ObjectSpace::getPaddedDimension()); + } else { + comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension()); + } + setInnerProduct(); + } + break; + case DistanceTypeDotProduct: + { + auto *comp = new ObjectSpaceRepository::ComparatorDotProduct(ObjectSpace::getPaddedDimension()); + comp->magnitude = magnitude; + comparator = comp; + setInnerProduct(); + } break; -#endif #endif default: std::stringstream msg; @@ -511,6 +605,7 @@ namespace NGT { void append(const float16 *data, size_t dataSize) { ObjectRepository::append(data, dataSize); } #endif + void deleteAll() { ObjectRepository::deleteAll(); } #ifdef NGT_SHARED_MEMORY_ALLOCATOR PersistentObject *allocatePersistentObject(Object &obj) { @@ -524,6 +619,34 @@ namespace NGT { void remove(size_t id) { ObjectRepository::remove(id); } void linearSearch(Object &query, double radius, size_t size, ObjectSpace::ResultSet &results) { + if (distanceType == DistanceTypeInnerProduct) { + Comparator *comp; + if (typeid(OBJECT_TYPE) == typeid(qsint8)) { +#ifdef NGT_SHARED_MEMORY_ALLOCATOR + comp = new ObjectSpaceRepository::ComparatorInnerProductQsint8Quint8(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); +#else + comp = new ObjectSpaceRepository::ComparatorInnerProductQsint8Quint8(ObjectSpace::getPaddedDimension()); +#endif + } else { +#ifdef NGT_SHARED_MEMORY_ALLOCATOR + comp = new ObjectSpaceRepository::ComparatorInnerProduct(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); +#else + comp = new ObjectSpaceRepository::ComparatorInnerProduct(ObjectSpace::getPaddedDimension()); +#endif + } + try { + linearSearch(query, radius, size, results, *comp); + } catch(Exception &err) { + delete comp; + throw err; + } + delete comp; + } else { + linearSearch(query, radius, size, results, *comparator); + } + } + void linearSearch(Object &query, double radius, size_t size, ObjectSpace::ResultSet &results, + Comparator &comparator) { if (!results.empty()) { NGTThrowException("lenearSearch: results is not empty"); } @@ -546,9 +669,9 @@ namespace NGT { continue; } #ifdef NGT_SHARED_MEMORY_ALLOCATOR - Distance d = (*comparator)((Object&)query, (PersistentObject&)*rep[idx]); + Distance d = comparator((Object&)query, (PersistentObject&)*rep[idx]); #else - Distance d = (*comparator)((Object&)query, (Object&)*rep[idx]); + Distance d = comparator((Object&)query, (Object&)*rep[idx]); #endif if (radius < 0.0 || d <= radius) { NGT::ObjectDistance obj(idx, d); @@ -561,7 +684,6 @@ namespace NGT { return; } -#ifdef NGT_INNER_PRODUCT float computeMaxMagnitude(NGT::ObjectID beginID = 1) { float maxMag = 0.0; ObjectRepository &rep = *this; @@ -594,7 +716,6 @@ namespace NGT { return maxMag; } #ifdef NGT_SHARED_MEMORY_ALLOCATOR - //void setMagnitude(float maxMag, NGT::Vector &graphNodes, NGT::ObjectID beginID = 1) { void setMagnitude(float maxMag, NGT::PersistentRepository &graphNodes, NGT::ObjectID beginID = 1) { #else void setMagnitude(float maxMag, NGT::Repository &graphNodes, NGT::ObjectID beginID = 1) { @@ -630,8 +751,43 @@ namespace NGT { #endif } } -#endif + std::pair getMaxMin(float clippingRate = 0.02, size_t size = 0) { + ObjectRepository &rep = *this; + if (size == 0) { + size = rep.size(); + } else { + size = size > rep.size() ? size : rep.size(); + } + auto dim = getDimension(); + auto clippingSize = static_cast(size) * clippingRate; + clippingSize = clippingSize == 0 ? 1 : clippingSize; + std::priority_queue min; + std::priority_queue, std::greater> max; + std::cerr << "repo size=" << rep.size() << " " << clippingSize << std::endl; + for (size_t idx = 1; idx < rep.size(); idx++) { + try { + OBJECT_TYPE *obj = static_cast(getObject(idx)); + for (size_t i = 0; i < dim; i++) { + float v = static_cast(obj[i]); + if (max.size() < clippingSize) { + max.push(v); + } else if (max.top() <= v) { + max.push(v); + max.pop(); + } + if (min.size() < clippingSize) { + min.push(v); + } else if (min.top() >= v) { + min.push(v); + min.pop(); + } + } + } catch(...) {} + } + auto ret = std::make_pair(max.top(), min.top()); + return ret; + } void *getObject(size_t idx) { if (isEmpty(idx)) { @@ -688,12 +844,12 @@ namespace NGT { #ifdef NGT_SHARED_MEMORY_ALLOCATOR void normalize(PersistentObject &object) { - OBJECT_TYPE *obj = (OBJECT_TYPE*)&object.at(0, getRepository().getAllocator()); + auto *obj = reinterpret_cast(object.getPointer(getRepository().getAllocator())); ObjectSpace::normalize(obj, ObjectSpace::dimension); } #endif void normalize(Object &object) { - OBJECT_TYPE *obj = (OBJECT_TYPE*)&object[0]; + auto *obj = reinterpret_cast(object.getPointer()); ObjectSpace::normalize(obj, ObjectSpace::dimension); } @@ -709,71 +865,177 @@ namespace NGT { } Object *allocateNormalizedObject(const std::vector &obj) { - Object *allocatedObject = ObjectRepository::allocateObject(obj); - if (normalization) { - normalize(*allocatedObject); + Object *allocatedObject = 0; + if (quantizationIsEnabled()) { + std::vector qobj(obj.begin(), obj.end()); + if (normalization) { + ObjectSpace::normalize(qobj); + } + quantizeToQint8(qobj); + allocatedObject = ObjectRepository::allocateObject(qobj); + } else { + allocatedObject = ObjectRepository::allocateObject(obj); + if (normalization) { + normalize(*allocatedObject); + } } return allocatedObject; } Object *allocateNormalizedObject(const std::vector &obj) { - Object *allocatedObject = ObjectRepository::allocateObject(obj); - if (normalization) { - normalize(*allocatedObject); + Object *allocatedObject = 0; + if (quantizationIsEnabled()) { + std::vector qobj(obj.begin(), obj.end()); + if (normalization) { + ObjectSpace::normalize(qobj); + } + quantizeToQint8(qobj); + allocatedObject = ObjectRepository::allocateObject(qobj); + } else { + allocatedObject = ObjectRepository::allocateObject(obj); + if (normalization) { + normalize(*allocatedObject); + } } return allocatedObject; } #ifdef NGT_HALF_FLOAT Object *allocateNormalizedObject(const std::vector &obj) { - Object *allocatedObject = ObjectRepository::allocateObject(obj); - if (normalization) { - normalize(*allocatedObject); + Object *allocatedObject = 0; + if (quantizationIsEnabled()) { + std::vector qobj(obj.begin(), obj.end()); + if (normalization) { + ObjectSpace::normalize(qobj); + } + quantizeToQint8(qobj); + allocatedObject = ObjectRepository::allocateObject(qobj); + } else { + allocatedObject = ObjectRepository::allocateObject(obj); + if (normalization) { + normalize(*allocatedObject); + } } return allocatedObject; } #endif Object *allocateNormalizedObject(const std::vector &obj) { - Object *allocatedObject = ObjectRepository::allocateObject(obj); - if (normalization) { - normalize(*allocatedObject); + Object *allocatedObject = 0; + if (quantizationIsEnabled()) { + std::vector qobj(obj.begin(), obj.end()); + if (normalization) { + ObjectSpace::normalize(qobj); + } + quantizeToQint8(qobj); + allocatedObject = ObjectRepository::allocateObject(qobj); + } else { + allocatedObject = ObjectRepository::allocateObject(obj); + if (normalization) { + normalize(*allocatedObject); + } } return allocatedObject; } Object *allocateNormalizedObject(const float *obj, size_t size) { - Object *allocatedObject = ObjectRepository::allocateObject(obj, size); - if (normalization) { - normalize(*allocatedObject); + Object *allocatedObject = 0; + try { + if (quantizationIsEnabled()) { + std::vector qobj(obj, obj + size); + if (normalization) { + ObjectSpace::normalize(qobj); + } + quantizeToQint8(qobj); + allocatedObject = ObjectRepository::allocateObject(qobj); + } else { + allocatedObject = ObjectRepository::allocateObject(obj, size); + if (normalization) { + normalize(*allocatedObject); + } + } + } catch (Exception &err) { + std::stringstream msg; + msg << err.what() << " quantization=" << (quantizationIsEnabled() ? "True" : "False"); + NGTThrowException(msg); } return allocatedObject; } PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { - PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj); - if (normalization) { - normalize(*allocatedObject); + PersistentObject *allocatedObject = 0; + if (quantizationIsEnabled()) { + std::vector qobj(obj.begin(), obj.end()); + if (normalization) { + ObjectSpace::normalize(qobj); + } + auto shift = distanceType == DistanceTypeInnerProduct && typeid(OBJECT_TYPE) == typeid(qsint8); + quantizeToQint8(qobj, shift); + allocatedObject = ObjectRepository::allocatePersistentObject(qobj); + } else { + allocatedObject = ObjectRepository::allocatePersistentObject(obj); + if (normalization) { + normalize(*allocatedObject); + } } return allocatedObject; } PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { - PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj); - if (normalization) { - normalize(*allocatedObject); + PersistentObject *allocatedObject = 0; + try { + if (quantizationIsEnabled()) { + std::vector qobj(obj.begin(), obj.end()); + if (normalization) { + ObjectSpace::normalize(qobj); + } + auto shift = distanceType == DistanceTypeInnerProduct && typeid(OBJECT_TYPE) == typeid(qsint8); + quantizeToQint8(qobj, shift); + allocatedObject = ObjectRepository::allocatePersistentObject(qobj); + } else { + allocatedObject = ObjectRepository::allocatePersistentObject(obj); + if (normalization) { + normalize(*allocatedObject); + } + } + } catch (Exception &err) { + std::stringstream msg; + msg << err.what() << " quantization=" << (quantizationIsEnabled() ? "True" : "False"); + NGTThrowException(msg); } return allocatedObject; } #ifdef NGT_HALF_FLOAT PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { - PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj); - if (normalization) { - normalize(*allocatedObject); + PersistentObject *allocatedObject = 0; + if (quantizationIsEnabled()) { + std::vector qobj(obj.begin(), obj.end()); + if (normalization) { + ObjectSpace::normalize(qobj); + } + auto shift = distanceType == DistanceTypeInnerProduct && typeid(OBJECT_TYPE) == typeid(qsint8); + quantizeToQint8(qobj, shift); + allocatedObject = ObjectRepository::allocatePersistentObject(qobj); + } else { + allocatedObject = ObjectRepository::allocatePersistentObject(obj); + if (normalization) { + normalize(*allocatedObject); + } } return allocatedObject; } #endif PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { - PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj); - if (normalization) { - normalize(*allocatedObject); + PersistentObject *allocatedObject = 0; + if (quantizationIsEnabled()) { + std::vector qobj(obj.begin(), obj.end()); + if (normalization) { + ObjectSpace::normalize(qobj); + } + auto shift = distanceType == DistanceTypeInnerProduct && typeid(OBJECT_TYPE) == typeid(qsint8); + quantizeToQint8(qobj, shift); + allocatedObject = ObjectRepository::allocatePersistentObject(qobj); + } else { + allocatedObject = ObjectRepository::allocatePersistentObject(obj); + if (normalization) { + normalize(*allocatedObject); + } } return allocatedObject; } @@ -833,6 +1095,8 @@ namespace NGT { size_t dimension = objectspace->getDimension(); if (t == typeid(uint8_t)) { NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension); + } else if (t == typeid(qsint8)) { + NGT::Serializer::writeAsText(os, (int8_t*)ref, dimension); } else if (t == typeid(float)) { NGT::Serializer::writeAsText(os, (float*)ref, dimension); #ifdef NGT_HALF_FLOAT diff --git a/lib/NGT/Optimizer.h b/lib/NGT/Optimizer.h index 4f1afe3..610e357 100644 --- a/lib/NGT/Optimizer.h +++ b/lib/NGT/Optimizer.h @@ -262,7 +262,7 @@ namespace NGT { NGT::Common::tokenize(line, result, " \t"); if (result.size() < 3) { std::stringstream msg; - msg << "result format is wrong. "; + msg << "result format is wrong. [" << line << "]"; NGTThrowException(msg); } size_t id = NGT::Common::strtol(result[1]); @@ -320,7 +320,7 @@ namespace NGT { NGT::Common::tokenize(line, result, " \t"); if (result.size() < 3) { std::stringstream msg; - msg << "result format is wrong. "; + msg << "result format is wrong. [" << line << "]"; NGTThrowException(msg); } size_t rank = NGT::Common::strtol(result[0]); @@ -398,7 +398,7 @@ namespace NGT { double key; if (fluctuation != "") { key = NGT::Common::strtod(fluctuation); - keyValue = "Factor (Epsilon or a fluctuating value)"; + keyValue = "Factor (Epsilon or any fluctuating value)"; } else { std::stringstream msg; msg << "check: inner error! " << fluctuation; @@ -480,8 +480,9 @@ namespace NGT { std::vector result; NGT::Common::tokenize(line, result, " \t"); if (result.size() < 3) { - std::cerr << "result format is wrong. " << std::endl; - abort(); + std::stringstream msg; + msg << "result format is wrong. [" << line << "]"; + NGTThrowException(msg); } size_t rank = NGT::Common::strtol(result[0]); size_t id = NGT::Common::strtol(result[1]); @@ -996,41 +997,18 @@ namespace NGT { void outputObject(std::ostream &os, std::vector &v, NGT::Property &prop) { - switch (prop.objectType) { - case NGT::ObjectSpace::ObjectType::Uint8: - { - for (auto i = v.begin(); i != v.end(); ++i) { - int d = *i; - os << d; - if (i + 1 != v.end()) { - os << "\t"; - } - } - os << std::endl; + for (auto i = v.begin(); i != v.end(); ++i) { + os << *i; + if (i + 1 != v.end()) { + os << "\t"; } - break; - default: -#ifdef NGT_HALF_FLOAT - case NGT::ObjectSpace::ObjectType::Float16: -#endif - case NGT::ObjectSpace::ObjectType::Float: - { - for (auto i = v.begin(); i != v.end(); ++i) { - os << *i; - if (i + 1 != v.end()) { - os << "\t"; - } - } - os << std::endl; - } - break; } + os << std::endl; } void outputObjects(std::vector> &vs, std::ostream &os) { NGT::Property prop; index.getProperty(prop); - for (auto i = vs.begin(); i != vs.end(); ++i) { outputObject(os, *i, prop); } @@ -1059,7 +1037,6 @@ namespace NGT { } break; #endif - default: case NGT::ObjectSpace::ObjectType::Float: { auto *obj = static_cast(index.getObjectSpace().getObject(id)); @@ -1069,46 +1046,26 @@ namespace NGT { } } break; + case NGT::ObjectSpace::ObjectType::Qsuint8: + { + auto *obj = static_cast(index.getObjectSpace().getObject(id)); + index.getObjectSpace().dequantizeFromQint8(v, obj); + } + break; + default: + std::stringstream msg; + msg << "Fatal error! Invalid object type. (" << prop.objectType << ")" << std::endl; + NGTThrowException(msg); } return v; } std::vector meanObject(size_t id1, size_t id2, NGT::Property &prop) { std::vector v; - switch (prop.objectType) { - case NGT::ObjectSpace::ObjectType::Uint8: - { - auto *obj1 = static_cast(index.getObjectSpace().getObject(id1)); - auto *obj2 = static_cast(index.getObjectSpace().getObject(id2)); - for (int i = 0; i < prop.dimension; i++) { - int d = (*obj1++ + *obj2++) / 2; - v.push_back(d); - } - } - break; -#ifdef NGT_HALF_FLOAT - case NGT::ObjectSpace::ObjectType::Float16: - { - auto *obj1 = static_cast(index.getObjectSpace().getObject(id1)); - auto *obj2 = static_cast(index.getObjectSpace().getObject(id2)); - for (int i = 0; i < prop.dimension; i++) { - float d = (*obj1++ + *obj2++) / 2.0F; - v.push_back(d); - } - } - break; -#endif - default: - case NGT::ObjectSpace::ObjectType::Float: - { - auto *obj1 = static_cast(index.getObjectSpace().getObject(id1)); - auto *obj2 = static_cast(index.getObjectSpace().getObject(id2)); - for (int i = 0; i < prop.dimension; i++) { - float d = (*obj1++ + *obj2++) / 2.0F; - v.push_back(d); - } - } - break; + auto obj1 = extractObject(id1, prop); + auto obj2 = extractObject(id2, prop); + for (int i = 0; i < prop.dimension; i++) { + v.emplace_back((obj1[i] + obj2[i]) / 2.0); } return v; } @@ -1116,7 +1073,6 @@ namespace NGT { void extractQueries(std::vector> &queries, std::ostream &os) { NGT::Property prop; index.getProperty(prop); - for (auto i = queries.begin(); i != queries.end(); ++i) { outputObject(os, *i, prop); } @@ -1518,7 +1474,6 @@ namespace NGT { static std::vector> generateAccuracyTable(NGT::Index &index, size_t nOfResults = 50, size_t querySize = 100) { - NGT::Property prop; index.getProperty(prop); if (prop.edgeSizeForSearch != 0 && prop.edgeSizeForSearch != -2) { @@ -1528,13 +1483,10 @@ namespace NGT { } NGT::Optimizer optimizer(index, nOfResults); - float maxEpsilon = 0.0; std::stringstream queryStream; std::stringstream gtStream; - optimizer.generatePseudoGroundTruth(querySize, maxEpsilon, queryStream, gtStream); - std::map map; { float interval = 0.05; @@ -1563,7 +1515,7 @@ namespace NGT { if (accuracy - prev < 0.02) { interval *= 2.0; } else if (accuracy - prev > 0.05 && interval > 0.0001) { - + epsilon -= interval; interval /= 2.0; accuracy = prev; diff --git a/lib/NGT/PrimitiveComparator.h b/lib/NGT/PrimitiveComparator.h index 1c3bbf5..669fead 100644 --- a/lib/NGT/PrimitiveComparator.h +++ b/lib/NGT/PrimitiveComparator.h @@ -310,8 +310,105 @@ namespace NGT { return sqrt(s); } + inline static double compareL2(const quint8 *a, const quint8 *b, size_t size) { + auto *u8a = reinterpret_cast(a); + auto *u8b = reinterpret_cast(b); + + const unsigned char *last = u8a + size; +#if defined(NGT_AVX512) + __m512i sum512 = _mm512_setzero_si512(); + { + const unsigned char *lastgroup = last - 63; + while (u8a < lastgroup) { + __m512i mu8a = _mm512_loadu_si512(reinterpret_cast(u8a)); + __m512i mu8b = _mm512_loadu_si512(reinterpret_cast(u8b)); + __mmask64 m = _mm512_cmplt_epu8_mask(mu8a, mu8b); + __m512i x = _mm512_add_epi8(_mm512_maskz_subs_epu8(m, mu8b, mu8a), + _mm512_maskz_subs_epu8(~m, mu8a, mu8b)); + __m512i xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,0)); + sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(xi16, xi16)); + xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,1)); + sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(xi16, xi16)); + u8a += 64; + u8b += 64; + } + } + { + const unsigned char *lastgroup = last - 31; + while (u8a < lastgroup) { + __m256i mu8a = _mm256_loadu_si256(reinterpret_cast(u8a)); + __m256i mu8b = _mm256_loadu_si256(reinterpret_cast(u8b)); + __mmask32 m = _mm256_cmplt_epu8_mask(mu8a, mu8b); + __m256i x = _mm256_add_epi8(_mm256_maskz_subs_epu8(m, mu8b, mu8a), + _mm256_maskz_subs_epu8(~m, mu8a, mu8b)); + __m512i xi16 = _mm512_cvtepu8_epi16(x); + sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(xi16, xi16)); + u8a += 32; + u8b += 32; + } + } + __m256i sum256 = _mm256_add_epi32(_mm512_extracti32x8_epi32(sum512, 0), _mm512_extracti32x8_epi32(sum512, 1)); +#elif defined(NGT_AVX2) + __m256i sum256 = _mm256_setzero_si256(); + { + const unsigned char *lastgroup = last - 31; + while (u8a < lastgroup) { + __m256i x1 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8a)); + __m256i x2 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8b)); + __m256i xi16 = _mm256_subs_epi16(x1, x2); + sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(xi16, xi16)); + u8a += 16; + u8b += 16; + x1 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8a)); + x2 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8b)); + xi16 = _mm256_subs_epi16(x1, x2); + sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(xi16, xi16)); + u8a += 16; + u8b += 16; + } + } +#else + __m256i sum256 = _mm256_setzero_si256(); +#endif + { + const unsigned char *lastgroup = last - 15; + + while (u8a < lastgroup) { + __m256i x1 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8a)); + __m256i x2 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)u8b)); + __m256i xi16 = _mm256_subs_epi16(x1, x2); + sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(xi16, xi16)); + u8a += 16; + u8b += 16; + } + } + + const __m256i value0 = _mm256_set1_epi32(0); + __m256i tmp1 = _mm256_hadd_epi32(sum256, value0); + __m256i tmp2 = _mm256_hadd_epi32(tmp1, value0); + double s = _mm256_extract_epi32(tmp2, 0) + _mm256_extract_epi32(tmp2, 4); + return s; + + } #endif + inline static double compareL2(const qsint8 *a, const qsint8 *b, size_t size) { + auto *i8a = reinterpret_cast(a); + auto *i8b = reinterpret_cast(b); + double sum = 0.0; + for (size_t loc = 0; loc < size; loc++) { + auto sub = static_cast(*i8a) - static_cast(*i8b); + sum += sub * sub; + i8a++; + i8b++; + } + return sqrt(sum); + } + + inline static double compareL2(const qsint8 *a, const quint8 *b, size_t size) { + NGTThrowException("Not supported."); + return 0.00; + } template inline static double compareNormalizedL2(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { @@ -323,8 +420,6 @@ namespace NGT { } } - -#if defined(NGT_NO_AVX) template static double compareL1(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { const OBJECT_TYPE *last = a + size; @@ -347,10 +442,15 @@ namespace NGT { return d; } +#if defined(NGT_NO_AVX) inline static double compareL1(const uint8_t *a, const uint8_t *b, size_t size) { return compareL1(a, b, size); } + inline static double compareL1(const int8_t *a, const int8_t *b, size_t size) { + return compareL1(a, b, size); + } + inline static double compareL1(const float *a, const float *b, size_t size) { return compareL1(a, b, size); } @@ -421,8 +521,8 @@ namespace NGT { const unsigned char *lastgroup = last - 7; const __m128i zero = _mm_setzero_si128(); while (a < lastgroup) { - __m128i x1 = _mm_cvtepu8_epi16(*reinterpret_cast<__m128i const*>(a)); - __m128i x2 = _mm_cvtepu8_epi16(*reinterpret_cast<__m128i const*>(b)); + __m128i x1 = _mm_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)a)); + __m128i x2 = _mm_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)b)); x1 = _mm_subs_epi16(x1, x2); x1 = _mm_sign_epi16(x1, x1); sum = _mm_add_ps(sum, _mm_cvtepi32_ps(_mm_unpacklo_epi16(x1, zero))); @@ -439,6 +539,12 @@ namespace NGT { } return s; } + inline static double compareL1(const int8_t *a, const int8_t *b, size_t size) { + return compareL1(a, b, size); + } + inline static double compareL1(const qsint8 *a, const qsint8 *b, size_t size) { + return compareL1(reinterpret_cast(a), reinterpret_cast(b), size); + } #endif #if defined(NGT_NO_AVX) || !defined(__POPCNT__) @@ -592,6 +698,9 @@ namespace NGT { } #endif + inline static double compareSparseJaccardDistance(const qsint8 *a, const qsint8 *b, size_t size) { + NGTThrowException("Not supported."); + } inline static double compareSparseJaccardDistance(const float *a, const float *b, size_t size) { size_t loca = 0; size_t locb = 0; @@ -721,13 +830,158 @@ namespace NGT { } #endif - inline static double compareDotProduct(const unsigned char *a, const unsigned char *b, size_t size) { + inline static double compareDotProduct(const uint8_t *a, const uint8_t *b, size_t size) { double sum = 0.0; for (size_t loc = 0; loc < size; loc++) { sum += static_cast(a[loc]) * static_cast(b[loc]); } return sum; } + inline static double compareDotProduct(const int8_t *a, const int8_t *b, size_t size) { +#if defined(NGT_NO_AVX) + double sum = 0.0; + for (size_t loc = 0; loc < size; loc++) { + sum += static_cast(a[loc]) * static_cast(b[loc]); + } + return sum; +#else + const auto *last = a + size; +#if defined(NGT_AVX512) || defined(NGT_AVX2) +#if defined(NGT_AVX512) + __m512i sum512 = _mm512_setzero_si512(); + { + const auto *lastgroup = last - 63; + while (a < lastgroup) { + __m512i ma = _mm512_loadu_si512(reinterpret_cast(a)); + __m512i mb = _mm512_loadu_si512(reinterpret_cast(b)); + __m512i malo = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(ma, 0)); + __m512i mahi = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(ma, 1)); + __m512i mblo = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(mb, 0)); + __m512i mbhi = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(mb, 1)); + sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(malo, mblo)); + sum512 = _mm512_add_epi32(sum512, _mm512_madd_epi16(mahi, mbhi)); + a += 64; + b += 64; + } + } + __m256i sum256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(sum512, 0), _mm512_extracti64x4_epi64(sum512, 1)); +#else + __m256i sum256 = _mm256_setzero_si256(); +#endif + { + const auto *lastgroup = last - 31; + while (a < lastgroup) { + __m256i ma = _mm256_loadu_si256(reinterpret_cast(a)); + __m256i mb = _mm256_loadu_si256(reinterpret_cast(b)); + __m256i malo = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(ma, 0)); + __m256i mahi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(ma, 1)); + __m256i mblo = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(mb, 0)); + __m256i mbhi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(mb, 1)); + sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(malo, mblo)); + sum256 = _mm256_add_epi32(sum256, _mm256_madd_epi16(mahi, mbhi)); + a += 32; + b += 32; + } + } + __m128i sum128 = _mm_add_epi32(_mm256_extracti128_si256(sum256, 0), _mm256_extracti128_si256(sum256, 1)); +#endif + //__m128i sum128 = _mm_setzero_si128(); + { + const auto *lastgroup = last - 15; + while (a < lastgroup) { + __m128i ma = _mm_loadu_si128(reinterpret_cast(a)); + __m128i mb = _mm_loadu_si128(reinterpret_cast(b)); + __m128i malo = _mm_cvtepi8_epi16(ma); + __m128i mahi = _mm_cvtepi8_epi16(_mm_bsrli_si128(ma, 8)); + __m128i mblo = _mm_cvtepi8_epi16(mb); + __m128i mbhi = _mm_cvtepi8_epi16(_mm_bsrli_si128(mb, 8)); + sum128 = _mm_add_epi32(sum128, _mm_madd_epi16(malo, mblo)); + sum128 = _mm_add_epi32(sum128, _mm_madd_epi16(mahi, mbhi)); + a += 16; + b += 16; + } + } + __m128i tmp = _mm_hadd_epi32(sum128, _mm_set1_epi32(0)); + double sum = _mm_extract_epi32(tmp, 0) + _mm_extract_epi32(tmp, 1); + return sum; +#endif + } + inline static double compareDotProduct(const int8_t *a, const uint8_t *b, size_t size) { +#if defined(__AVX512VNNI__) + const auto *last = a + size; +#if defined(NGT_AVX512) + __m512i sum512 = _mm512_setzero_si512(); + { + const auto *lastgroup = last - 191; + while (a < lastgroup) { + __m512i ma = _mm512_loadu_si512(reinterpret_cast(a)); + __m512i mb = _mm512_loadu_si512(reinterpret_cast(b)); + sum512 = _mm512_dpbusd_epi32(sum512, mb, ma); + a += 64; + b += 64; + ma = _mm512_loadu_si512(reinterpret_cast(a)); + mb = _mm512_loadu_si512(reinterpret_cast(b)); + sum512 = _mm512_dpbusd_epi32(sum512, mb, ma); + a += 64; + b += 64; + ma = _mm512_loadu_si512(reinterpret_cast(a)); + mb = _mm512_loadu_si512(reinterpret_cast(b)); + sum512 = _mm512_dpbusd_epi32(sum512, mb, ma); + a += 64; + b += 64; + } + } + __m256i sum256 = _mm256_add_epi32(_mm512_extracti32x8_epi32(sum512, 0), + _mm512_extracti32x8_epi32(sum512, 1)); + __m128i sum128 = _mm_add_epi32(_mm256_extracti32x4_epi32(sum256, 0), + _mm256_extracti32x4_epi32(sum256, 1)); +#elif defined(NGT_AVX2) + __m256i sum256 = _mm256_setzero_si256(); + { + const auto *lastgroup = last - 31; + while (a < lastgroup) { + __m256i ma = _mm256_loadu_si256(reinterpret_cast(a)); + __m256i mb = _mm256_loadu_si256(reinterpret_cast(b)); + sum256 = _mm256_dpbusd_epi32(sum256, mb, ma); + a += 32; + b += 32; + } + } + __m128i sum128 = _mm_add_epi32(_mm256_extracti32x4_epi32(sum256, 0), + _mm256_extracti32x4_epi32(sum256, 1)); +#else + __m128i sum128 = _mm_setzero_si128(); +#endif + { + const auto *lastgroup = last - 15; + while (a < lastgroup) { + __m128i ma = _mm_loadu_si128(reinterpret_cast(a)); + __m128i mb = _mm_loadu_si128(reinterpret_cast(b)); + sum128 = _mm_dpbusd_epi32(sum128, mb, ma); + a += 16; + b += 16; + } + } + __m128i tmp = _mm_hadd_epi32(sum128, _mm_set1_epi32(0)); + double sum = _mm_extract_epi32(tmp, 0) + _mm_extract_epi32(tmp, 1); +#else + double sum = 0.0; + for (size_t loc = 0; loc < size; loc++) { + sum += static_cast(a[loc]) * static_cast(b[loc]); + } +#endif + return sum; + } + inline static double compareDotProduct(const quint8 *a, const quint8 *b, size_t size) { + return compareDotProduct(reinterpret_cast(a), reinterpret_cast(b), size); + } + inline static double compareDotProduct(const qsint8 *a, const qsint8 *b, size_t size) { + auto d = compareDotProduct(reinterpret_cast(a), reinterpret_cast(b), size); + return d; + } + inline static double compareDotProduct(const qsint8 *a, const quint8 *b, size_t size) { + return compareDotProduct(reinterpret_cast(a), reinterpret_cast(b), size); + } inline static double compareCosine(const float *a, const float *b, size_t size) { const float *last = a + size; @@ -896,7 +1150,38 @@ namespace NGT { return cosine; } + inline static double compareCosine(const qsint8 *a, const qsint8 *b, size_t size) { + return compareCosine(reinterpret_cast(a), reinterpret_cast(b), size); + } + inline static double compareNormalizedCosineSimilarity(const float *a, const float *b, size_t size) { + auto v = 1.0 - compareDotProduct(a, b, size); + return v < 0.0 ? -v : v; + } + inline static double compareNormalizedCosineSimilarity(const float16 *a, const float16 *b, size_t size) { + auto v = 1.0 - compareDotProduct(a, b, size); + return v < 0.0 ? -v : v; + } +#ifdef NGT_BFLOAT + inline static double compareNormalizedCosineSimilarity(const bfloat16 *a, const bfloat16 *b, size_t size) { + auto v = 1.0 - compareDotProduct(a, b, size); + return v < 0.0 ? -v : v; + } +#endif + inline static double compareNormalizedCosineSimilarity(const uint8_t *a, const uint8_t *b, size_t size) { + auto v = 1.0 - compareDotProduct(a, b, size); + return v < 0.0 ? -v : v; + } + inline static double compareNormalizedCosineSimilarity(const qsint8 *a, const qsint8 *b, size_t size) { + float max = 127.0 * 127.0 / 0.5; + auto v = max - compareDotProduct(a, b, size); + return v; + } + inline static double compareNormalizedCosineSimilarity(const quint8 *a, const quint8 *b, size_t size) { + float max = 255.0 * 255.0 * size; + auto v = max - compareDotProduct(a, b, size); + return v; + } #endif // #if defined(NGT_NO_AVX) template @@ -954,12 +1239,6 @@ namespace NGT { return v < 0.0 ? -v : v; } - template - inline static double compareNormalizedCosineSimilarity(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { - auto v = 1.0 - compareDotProduct(a, b, size); - return v < 0.0 ? -v : v; - } - class L1Uint8 { public: inline static double compare(const void *a, const void *b, size_t size) { @@ -1208,6 +1487,88 @@ namespace NGT { #endif + + class SparseJaccardQsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + NGTThrowException("Not supported."); + } + }; + + class L2Qsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + return PrimitiveComparator::compareL2((const qsint8*)a, (const qsint8*)b, size); + } + }; + + class NormalizedL2Qsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + return PrimitiveComparator::compareNormalizedL2((const qsint8*)a, (const qsint8*)b, size); + } + }; + + class L1Qsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + NGTThrowException("Not supported."); + } + }; + + class CosineSimilarityQsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + NGTThrowException("Not supported."); + } + }; + + class AngleQsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + NGTThrowException("Not supported."); + } + }; + + class NormalizedAngleQsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + NGTThrowException("Not supported."); + } + }; + + // added by Nyapicom + class PoincareQsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + NGTThrowException("Not supported."); + } + }; + + // added by Nyapicom + class LorentzQsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + NGTThrowException("Not supported."); + } + }; + + class InnerProductQsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + auto d = PrimitiveComparator::compareDotProduct((const qsint8*)a, (const quint8*)b, size); + return 127.0 * 127.0 * size - d; + } + }; + + class NormalizedCosineSimilarityQsint8 { + public: + inline static double compare(const void *a, const void *b, size_t size) { + float max = 127.0 * 127.0 * size; + auto d = max - PrimitiveComparator::compareDotProduct((const qsint8*)a, (const qsint8*)b, size); + return d; + } + }; }; diff --git a/lib/NGT/SharedMemoryAllocator.h b/lib/NGT/SharedMemoryAllocator.h index ccc4b1d..e7441a3 100644 --- a/lib/NGT/SharedMemoryAllocator.h +++ b/lib/NGT/SharedMemoryAllocator.h @@ -62,7 +62,7 @@ class SharedMemoryAllocator { if(!isValid){ return NULL; } - off_t file_offset = mmanager->alloc(size, true); + off_t file_offset = mmanager->alloc(size, true); if (file_offset == -1) { std::cerr << "Fatal Error: Allocating memory size is too big for this settings." << std::endl; std::cerr << " Max allocation size should be enlarged." << std::endl; diff --git a/lib/NGT/defines.h.in b/lib/NGT/defines.h.in index 5ee481f..4bec75b 100644 --- a/lib/NGT/defines.h.in +++ b/lib/NGT/defines.h.in @@ -28,6 +28,8 @@ #cmakedefine NGTQG_NO_ROTATION #cmakedefine NGT_BFLOAT_DISABLED // not use bfloat #cmakedefine NGT_BFLOAT_ENABLED +#cmakedefine NGT_GRAPH_COMPACT_READ_ONLY_GRAPH +#cmakedefine NGT_ENABLE_TIME_SEED_FOR_RANDOM // End of cmake defines ////////////////////////////////////////////////////////////////////////// @@ -63,7 +65,7 @@ #define NGT_QBG_DISABLED #endif -#define NGT_INNER_PRODUCT +#define NGT_REFINEMENT #if defined(NGT_AVX_DISABLED) @@ -72,6 +74,8 @@ #undef NGT_AVX512 #else #if defined(__AVX512F__) && defined(__AVX512DQ__) +#if defined(__AVX512VNNI__) +#endif #define NGT_AVX512 #elif defined(__AVX2__) #define NGT_AVX2 diff --git a/lib/NGT/half.hpp b/lib/NGT/half.hpp index 4f4030f..bc5798d 100644 --- a/lib/NGT/half.hpp +++ b/lib/NGT/half.hpp @@ -16,8 +16,8 @@ // Version 2.2.0 -/// \file -/// Main header file for half-precision functionality. + + #ifndef HALF_HALF_HPP #define HALF_HALF_HPP @@ -269,12 +269,12 @@ #ifndef HALF_ENABLE_F16C_INTRINSICS - /// Enable F16C intruction set intrinsics. - /// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting between - /// half-precision and single-precision values which may result in improved performance. This will not perform additional checks - /// for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature. - /// - /// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms. + + + + + + #define HALF_ENABLE_F16C_INTRINSICS __F16C__ #endif #if HALF_ENABLE_F16C_INTRINSICS @@ -282,117 +282,117 @@ #endif #ifdef HALF_DOXYGEN_ONLY -/// Type for internal floating-point computations. -/// This can be predefined to a built-in floating-point type (`float`, `double` or `long double`) to override the internal -/// half-precision implementation to use this type for computing arithmetic operations and mathematical function (if available). -/// This can result in improved performance for arithmetic operators and mathematical functions but might cause results to -/// deviate from the specified half-precision rounding mode and inhibits proper detection of half-precision exceptions. + + + + + #define HALF_ARITHMETIC_TYPE (undefined) -/// Enable internal exception flags. -/// Defining this to 1 causes operations on half-precision values to raise internal floating-point exception flags according to -/// the IEEE 754 standard. These can then be cleared and checked with clearexcept(), testexcept(). + + + #define HALF_ERRHANDLING_FLAGS 0 -/// Enable exception propagation to `errno`. -/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to -/// [errno](https://en.cppreference.com/w/cpp/error/errno) from ``. Specifically this will propagate domain errors as -/// [EDOM](https://en.cppreference.com/w/cpp/error/errno_macros) and pole, overflow and underflow errors as -/// [ERANGE](https://en.cppreference.com/w/cpp/error/errno_macros). Inexact errors won't be propagated. + + + + + #define HALF_ERRHANDLING_ERRNO 0 -/// Enable exception propagation to built-in floating-point platform. -/// Defining this to 1 causes operations on half-precision values to propagate floating-point exceptions to the built-in -/// single- and double-precision implementation's exception flags using the -/// [C++11 floating-point environment control](https://en.cppreference.com/w/cpp/numeric/fenv) from ``. However, this -/// does not work in reverse and single- or double-precision exceptions will not raise the corresponding half-precision -/// exception flags, nor will explicitly clearing flags clear the corresponding built-in flags. + + + + + + #define HALF_ERRHANDLING_FENV 0 -/// Throw C++ exception on domain errors. -/// Defining this to a string literal causes operations on half-precision values to throw a -/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on domain errors. + + + #define HALF_ERRHANDLING_THROW_INVALID (undefined) -/// Throw C++ exception on pole errors. -/// Defining this to a string literal causes operations on half-precision values to throw a -/// [std::domain_error](https://en.cppreference.com/w/cpp/error/domain_error) with the specified message on pole errors. + + + #define HALF_ERRHANDLING_THROW_DIVBYZERO (undefined) -/// Throw C++ exception on overflow errors. -/// Defining this to a string literal causes operations on half-precision values to throw a -/// [std::overflow_error](https://en.cppreference.com/w/cpp/error/overflow_error) with the specified message on overflows. + + + #define HALF_ERRHANDLING_THROW_OVERFLOW (undefined) -/// Throw C++ exception on underflow errors. -/// Defining this to a string literal causes operations on half-precision values to throw a -/// [std::underflow_error](https://en.cppreference.com/w/cpp/error/underflow_error) with the specified message on underflows. + + + #define HALF_ERRHANDLING_THROW_UNDERFLOW (undefined) -/// Throw C++ exception on rounding errors. -/// Defining this to 1 causes operations on half-precision values to throw a -/// [std::range_error](https://en.cppreference.com/w/cpp/error/range_error) with the specified message on general rounding errors. + + + #define HALF_ERRHANDLING_THROW_INEXACT (undefined) #endif #ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT -/// Raise INEXACT exception on overflow. -/// Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition. -/// These will be raised after any possible handling of the underflow exception. + + + #define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 #endif #ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT -/// Raise INEXACT exception on underflow. -/// Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. -/// These will be raised after any possible handling of the underflow exception. -/// -/// **Note:** This will actually cause underflow (and the accompanying inexact) exceptions to be raised *only* when the result -/// is inexact, while if disabled bare underflow errors will be raised for *any* (possibly exact) subnormal result. + + + + + + #define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 #endif -/// Default rounding mode. -/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and more precise types -/// (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical -/// functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective -/// constants or the equivalent values of -/// [std::float_round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/float_round_style): -/// -/// `std::float_round_style` | value | rounding -/// ---------------------------------|-------|------------------------- -/// `std::round_indeterminate` | -1 | fastest -/// `std::round_toward_zero` | 0 | toward zero -/// `std::round_to_nearest` | 1 | to nearest (default) -/// `std::round_toward_infinity` | 2 | toward positive infinity -/// `std::round_toward_neg_infinity` | 3 | toward negative infinity -/// -/// By default this is set to `1` (`std::round_to_nearest`), which rounds results to the nearest representable value. It can even -/// be set to [std::numeric_limits::round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/round_style) to synchronize -/// the rounding mode with that of the built-in single-precision implementation (which is likely `std::round_to_nearest`, though). + + + + + + + + + + + + + + + + + + #ifndef HALF_ROUND_STYLE #define HALF_ROUND_STYLE 1 // = std::round_to_nearest #endif -/// Value signaling overflow. -/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow of an -/// operation, in particular it just evaluates to positive infinity. -/// -/// **See also:** Documentation for [HUGE_VAL](https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL) + + + + + #define HUGE_VALH std::numeric_limits::infinity() -/// Fast half-precision fma function. -/// This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate -/// half-precision multiplication followed by an addition, which is always the case. -/// -/// **See also:** Documentation for [FP_FAST_FMA](https://en.cppreference.com/w/cpp/numeric/math/fma) + + + + + #define FP_FAST_FMAH 1 -/// Half rounding mode. -/// In correspondence with `FLT_ROUNDS` from `` this symbol expands to the rounding mode used for -/// half-precision operations. It is an alias for [HALF_ROUND_STYLE](\ref HALF_ROUND_STYLE). -/// -/// **See also:** Documentation for [FLT_ROUNDS](https://en.cppreference.com/w/cpp/types/climits/FLT_ROUNDS) + + + + + #define HLF_ROUNDS HALF_ROUND_STYLE #ifndef FP_ILOGB0 @@ -427,51 +427,51 @@ #endif -/// Main namespace for half-precision functionality. -/// This namespace contains all the functionality provided by the library. + + namespace half_float { class half; #if HALF_ENABLE_CPP11_USER_LITERALS - /// Library-defined half-precision literals. - /// Import this namespace to enable half-precision floating-point literals: - /// ~~~~{.cpp} - /// using namespace half_float::literal; - /// half_float::half = 4.2_h; - /// ~~~~ + + + + + + namespace literal { half operator "" _h(long double); } #endif - /// \internal - /// \brief Implementation details. + + namespace detail { #if HALF_ENABLE_CPP11_TYPE_TRAITS - /// Conditional type. + template struct conditional : std::conditional {}; - /// Helper for tag dispatching. + template struct bool_type : std::integral_constant {}; using std::true_type; using std::false_type; - /// Type traits for floating-point types. + template struct is_float : std::is_floating_point {}; #else - /// Conditional type. + template struct conditional { typedef T type; }; template struct conditional { typedef F type; }; - /// Helper for tag dispatching. + template struct bool_type {}; typedef bool_type true_type; typedef bool_type false_type; - /// Type traits for floating-point types. + template struct is_float : false_type {}; template struct is_float : is_float {}; template struct is_float : is_float {}; @@ -481,68 +481,68 @@ namespace half_float template<> struct is_float : true_type {}; #endif - /// Type traits for floating-point bits. + template struct bits { typedef unsigned char type; }; template struct bits : bits {}; template struct bits : bits {}; template struct bits : bits {}; #if HALF_ENABLE_CPP11_CSTDINT - /// Unsigned integer of (at least) 16 bits width. + typedef std::uint_least16_t uint16; - /// Fastest unsigned integer of (at least) 32 bits width. + typedef std::uint_fast32_t uint32; - /// Fastest signed integer of (at least) 32 bits width. + typedef std::int_fast32_t int32; - /// Unsigned integer of (at least) 32 bits width. + template<> struct bits { typedef std::uint_least32_t type; }; - /// Unsigned integer of (at least) 64 bits width. + template<> struct bits { typedef std::uint_least64_t type; }; #else - /// Unsigned integer of (at least) 16 bits width. + typedef unsigned short uint16; - /// Fastest unsigned integer of (at least) 32 bits width. + typedef unsigned long uint32; - /// Fastest unsigned integer of (at least) 32 bits width. + typedef long int32; - /// Unsigned integer of (at least) 32 bits width. + template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; #if HALF_ENABLE_CPP11_LONG_LONG - /// Unsigned integer of (at least) 64 bits width. + template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; #else - /// Unsigned integer of (at least) 64 bits width. + template<> struct bits { typedef unsigned long type; }; #endif #endif #ifdef HALF_ARITHMETIC_TYPE - /// Type to use for arithmetic computations and mathematic functions internally. + typedef HALF_ARITHMETIC_TYPE internal_t; #endif - /// Tag type for binary construction. + struct binary_t {}; - /// Tag for binary construction. + HALF_CONSTEXPR_CONST binary_t binary = binary_t(); - /// \name Implementation defined classification and arithmetic - /// \{ + + - /// Check for infinity. - /// \tparam T argument type (builtin floating-point type) - /// \param arg value to query - /// \retval true if infinity - /// \retval false else + + + + + template bool builtin_isinf(T arg) { #if HALF_ENABLE_CPP11_CMATH @@ -554,11 +554,11 @@ namespace half_float #endif } - /// Check for NaN. - /// \tparam T argument type (builtin floating-point type) - /// \param arg value to query - /// \retval true if not a number - /// \retval false else + + + + + template bool builtin_isnan(T arg) { #if HALF_ENABLE_CPP11_CMATH @@ -570,11 +570,11 @@ namespace half_float #endif } - /// Check sign. - /// \tparam T argument type (builtin floating-point type) - /// \param arg value to query - /// \retval true if signbit set - /// \retval false else + + + + + template bool builtin_signbit(T arg) { #if HALF_ENABLE_CPP11_CMATH @@ -584,10 +584,10 @@ namespace half_float #endif } - /// Platform-independent sign mask. - /// \param arg integer value in two's complement - /// \retval -1 if \a arg negative - /// \retval 0 if \a arg positive + + + + inline uint32 sign_mask(uint32 arg) { static const int N = std::numeric_limits::digits - 1; @@ -598,10 +598,10 @@ namespace half_float #endif } - /// Platform-independent arithmetic right shift. - /// \param arg integer value in two's complement - /// \param i shift amount (at most 31) - /// \return \a arg right shifted for \a i bits with possible sign extension + + + + inline uint32 arithmetic_shift(uint32 arg, int i) { #if HALF_TWOS_COMPLEMENT_INT @@ -611,17 +611,17 @@ namespace half_float #endif } - /// \} - /// \name Error handling - /// \{ + + + - /// Internal exception flags. - /// \return reference to global exception flags + + inline int& errflags() { HALF_THREAD_LOCAL int flags = 0; return flags; } - /// Raise floating-point exception. - /// \param flags exceptions to raise - /// \param cond condition to raise exceptions for + + + inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) { #if HALF_ERRHANDLING @@ -670,12 +670,12 @@ namespace half_float #endif } - /// Check and signal for any NaN. - /// \param x first half-precision value to check - /// \param y second half-precision value to check - /// \retval true if either \a x or \a y is NaN - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN + + + + + + inline HALF_CONSTEXPR_NOERR bool compsignal(unsigned int x, unsigned int y) { #if HALF_ERRHANDLING @@ -684,10 +684,10 @@ namespace half_float return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00; } - /// Signal and silence signaling NaN. - /// \param nan half-precision NaN value - /// \return quiet NaN - /// \exception FE_INVALID if \a nan is signaling NaN + + + + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int nan) { #if HALF_ERRHANDLING @@ -696,11 +696,11 @@ namespace half_float return nan | 0x200; } - /// Signal and silence signaling NaNs. - /// \param x first half-precision value to check - /// \param y second half-precision value to check - /// \return quiet NaN - /// \exception FE_INVALID if \a x or \a y is signaling NaN + + + + + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y) { #if HALF_ERRHANDLING @@ -709,12 +709,12 @@ namespace half_float return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200); } - /// Signal and silence signaling NaNs. - /// \param x first half-precision value to check - /// \param y second half-precision value to check - /// \param z third half-precision value to check - /// \return quiet NaN - /// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN + + + + + + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) { #if HALF_ERRHANDLING @@ -723,11 +723,11 @@ namespace half_float return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200); } - /// Select value or signaling NaN. - /// \param x preferred half-precision value - /// \param y ignored half-precision value except for signaling NaN - /// \return \a y if signaling NaN, \a x otherwise - /// \exception FE_INVALID if \a y is signaling NaN + + + + + inline HALF_CONSTEXPR_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) { #if HALF_ERRHANDLING @@ -737,9 +737,9 @@ namespace half_float #endif } - /// Raise domain error and return NaN. - /// return quiet NaN - /// \exception FE_INVALID + + + inline HALF_CONSTEXPR_NOERR unsigned int invalid() { #if HALF_ERRHANDLING @@ -748,10 +748,10 @@ namespace half_float return 0x7FFF; } - /// Raise pole error and return infinity. - /// \param sign half-precision value with sign bit only - /// \return half-precision infinity with sign of \a sign - /// \exception FE_DIVBYZERO + + + + inline HALF_CONSTEXPR_NOERR unsigned int pole(unsigned int sign = 0) { #if HALF_ERRHANDLING @@ -760,10 +760,10 @@ namespace half_float return sign | 0x7C00; } - /// Check value for underflow. - /// \param arg non-zero half-precision value to check - /// \return \a arg - /// \exception FE_UNDERFLOW if arg is subnormal + + + + inline HALF_CONSTEXPR_NOERR unsigned int check_underflow(unsigned int arg) { #if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT @@ -772,15 +772,15 @@ namespace half_float return arg; } - /// \} - /// \name Conversion and rounding - /// \{ + + + - /// Half-precision overflow. - /// \tparam R rounding mode to use - /// \param sign half-precision value with sign bit only - /// \return rounded overflowing half-precision value - /// \exception FE_OVERFLOW + + + + + template HALF_CONSTEXPR_NOERR unsigned int overflow(unsigned int sign = 0) { #if HALF_ERRHANDLING @@ -792,11 +792,11 @@ namespace half_float (sign|0x7C00); } - /// Half-precision underflow. - /// \tparam R rounding mode to use - /// \param sign half-precision value with sign bit only - /// \return rounded underflowing half-precision value - /// \exception FE_UNDERFLOW + + + + + template HALF_CONSTEXPR_NOERR unsigned int underflow(unsigned int sign = 0) { #if HALF_ERRHANDLING @@ -807,16 +807,16 @@ namespace half_float sign; } - /// Round half-precision number. - /// \tparam R rounding mode to use - /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results - /// \param value finite half-precision number to round - /// \param g guard bit (most significant discarded bit) - /// \param s sticky bit (or of all but the most significant discarded bits) - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded or \a I is `true` + + + + + + + + + + template HALF_CONSTEXPR_NOERR unsigned int rounded(unsigned int value, int g, int s) { #if HALF_ERRHANDLING @@ -838,14 +838,14 @@ namespace half_float #endif } - /// Round half-precision number to nearest integer value. - /// \tparam R rounding mode to use - /// \tparam E `true` for round to even, `false` for round away from zero - /// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it - /// \param value half-precision value to round - /// \return half-precision bits for nearest integral value - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded and \a I is `true` + + + + + + + + template unsigned int integral(unsigned int value) { unsigned int abs = value & 0x7FFF; @@ -867,20 +867,20 @@ namespace half_float 0) + value) & ~mask; } - /// Convert fixed point to half-precision floating-point. - /// \tparam R rounding mode to use - /// \tparam F number of fractional bits in [11,31] - /// \tparam S `true` for signed, `false` for unsigned - /// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F - /// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results - /// \param m mantissa in Q1.F fixed point format - /// \param exp biased exponent - 1 - /// \param sign half-precision value with sign bit only - /// \param s sticky bit (or of all but the most significant already discarded bits) - /// \return value converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded or \a I is `true` + + + + + + + + + + + + + + template unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) { if(S) @@ -896,14 +896,14 @@ namespace half_float return rounded(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast(1)<<(F-11))-1))!=0)); } - /// Convert IEEE single-precision to half-precision. - /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). - /// \tparam R rounding mode to use - /// \param value single-precision value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded + + + + + + + + template unsigned int float2half_impl(float value, true_type) { #if HALF_ENABLE_F16C_INTRINSICS @@ -985,13 +985,13 @@ namespace half_float #endif } - /// Convert IEEE double-precision to half-precision. - /// \tparam R rounding mode to use - /// \param value double-precision value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded + + + + + + + template unsigned int float2half_impl(double value, true_type) { #if HALF_ENABLE_F16C_INTRINSICS @@ -1020,14 +1020,14 @@ namespace half_float return sign; } - /// Convert non-IEEE floating-point to half-precision. - /// \tparam R rounding mode to use - /// \tparam T source type (builtin floating-point type) - /// \param value floating-point value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded + + + + + + + + template unsigned int float2half_impl(T value, ...) { unsigned int hbits = static_cast(builtin_signbit(value)) << 15; @@ -1053,26 +1053,26 @@ namespace half_float return rounded(hbits+(m>>1), m&1, frac!=T()); } - /// Convert floating-point to half-precision. - /// \tparam R rounding mode to use - /// \tparam T source type (builtin floating-point type) - /// \param value floating-point value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded + + + + + + + + template unsigned int float2half(T value) { return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); } - /// Convert integer to half-precision floating-point. - /// \tparam R rounding mode to use - /// \tparam T type to convert (builtin integer type) - /// \param value integral value to convert - /// \return rounded half-precision value - /// \exception FE_OVERFLOW on overflows - /// \exception FE_INEXACT if value had to be rounded + + + + + + + template unsigned int int2half(T value) { unsigned int bits = static_cast(value<0) << 15; @@ -1089,10 +1089,10 @@ namespace half_float return (exp>24) ? rounded(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits; } - /// Convert half-precision to IEEE single-precision. - /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). - /// \param value half-precision value to convert - /// \return single-precision value + + + + inline float half2float_impl(unsigned int value, float, true_type) { #if HALF_ENABLE_F16C_INTRINSICS @@ -1253,9 +1253,9 @@ namespace half_float #endif } - /// Convert half-precision to IEEE double-precision. - /// \param value half-precision value to convert - /// \return double-precision value + + + inline double half2float_impl(unsigned int value, double, true_type) { #if HALF_ENABLE_F16C_INTRINSICS @@ -1276,10 +1276,10 @@ namespace half_float #endif } - /// Convert half-precision to non-IEEE floating-point. - /// \tparam T type to convert to (builtin integer type) - /// \param value half-precision value to convert - /// \return floating-point value + + + + template T half2float_impl(unsigned int value, T, ...) { T out; @@ -1296,24 +1296,24 @@ namespace half_float return (value&0x8000) ? -out : out; } - /// Convert half-precision to floating-point. - /// \tparam T type to convert to (builtin integer type) - /// \param value half-precision value to convert - /// \return floating-point value + + + + template T half2float(unsigned int value) { return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); } - /// Convert half-precision floating-point to integer. - /// \tparam R rounding mode to use - /// \tparam E `true` for round to even, `false` for round away from zero - /// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it - /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) - /// \param value half-precision value to convert - /// \return rounded integer value - /// \exception FE_INVALID if value is not representable in type \a T - /// \exception FE_INEXACT if value had to be rounded and \a I is `true` + + + + + + + + + template T half2int(unsigned int value) { unsigned int abs = value & 0x7FFF; @@ -1343,15 +1343,15 @@ namespace half_float return static_cast((value&0x8000) ? -i : i); } - /// \} - /// \name Mathematics - /// \{ + + + - /// upper part of 64-bit multiplication. - /// \tparam R rounding mode to use - /// \param x first factor - /// \param y second factor - /// \return upper 32 bit of \a x * \a y + + + + + template uint32 mulhi(uint32 x, uint32 y) { uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16); @@ -1359,10 +1359,10 @@ namespace half_float ((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0); } - /// 64-bit multiplication. - /// \param x first factor - /// \param y second factor - /// \return upper 32 bit of \a x * \a y rounded to nearest + + + + inline uint32 multiply64(uint32 x, uint32 y) { #if HALF_ENABLE_CPP11_LONG_LONG @@ -1372,11 +1372,11 @@ namespace half_float #endif } - /// 64-bit division. - /// \param x upper 32 bit of dividend - /// \param y divisor - /// \param s variable to store sticky bit for rounding - /// \return (\a x << 32) / \a y + + + + + inline uint32 divide64(uint32 x, uint32 y, int &s) { #if HALF_ENABLE_CPP11_LONG_LONG @@ -1399,13 +1399,13 @@ namespace half_float #endif } - /// Half precision positive modulus. - /// \tparam Q `true` to compute full quotient, `false` else - /// \tparam R `true` to compute signed remainder, `false` for positive remainder - /// \param x first operand as positive finite half-precision value - /// \param y second operand as positive finite half-precision value - /// \param quo adress to store quotient at, `nullptr` if \a Q `false` - /// \return modulus of \a x / \a y + + + + + + + template unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) { unsigned int q = 0; @@ -1472,11 +1472,11 @@ namespace half_float return x; } - /// Fixed point square root. - /// \tparam F number of fractional bits - /// \param r radicand in Q1.F fixed point format - /// \param exp exponent - /// \return square root as Q1.F/2 + + + + + template uint32 sqrt(uint32 &r, int &exp) { int i = exp & 1; @@ -1496,11 +1496,11 @@ namespace half_float return m; } - /// Fixed point binary exponential. - /// This uses the BKM algorithm in E-mode. - /// \param m exponent in [0,1) as Q0.31 - /// \param n number of iterations (at most 32) - /// \return 2 ^ \a m as Q1.31 + + + + + inline uint32 exp2(uint32 m, unsigned int n = 32) { static const uint32 logs[] = { @@ -1523,11 +1523,11 @@ namespace half_float return mx; } - /// Fixed point binary logarithm. - /// This uses the BKM algorithm in L-mode. - /// \param m mantissa in [1,2) as Q1.30 - /// \param n number of iterations (at most 32) - /// \return log2(\a m) as Q0.31 + + + + + inline uint32 log2(uint32 m, unsigned int n = 32) { static const uint32 logs[] = { @@ -1550,11 +1550,11 @@ namespace half_float return my; } - /// Fixed point sine and cosine. - /// This uses the CORDIC algorithm in rotation mode. - /// \param mz angle in [-pi/2,pi/2] as Q1.30 - /// \param n number of iterations (at most 31) - /// \return sine and cosine of \a mz as Q1.30 + + + + + inline std::pair sincos(uint32 mz, unsigned int n = 31) { static const uint32 angles[] = { @@ -1573,12 +1573,12 @@ namespace half_float return std::make_pair(my, mx); } - /// Fixed point arc tangent. - /// This uses the CORDIC algorithm in vectoring mode. - /// \param my y coordinate as Q0.30 - /// \param mx x coordinate as Q0.30 - /// \param n number of iterations (at most 31) - /// \return arc tangent of \a my / \a mx as Q1.30 + + + + + + inline uint32 atan2(uint32 my, uint32 mx, unsigned int n = 31) { static const uint32 angles[] = { @@ -1597,10 +1597,10 @@ namespace half_float return mz; } - /// Reduce argument for trigonometric functions. - /// \param abs half-precision floating-point value - /// \param k value to take quarter period - /// \return \a abs reduced to [-pi/4,pi/4] as Q0.30 + + + + inline uint32 angle_arg(unsigned int abs, int &k) { uint32 m = (abs&0x3FF) | ((abs>0x3FF)<<10); @@ -1621,9 +1621,9 @@ namespace half_float #endif } - /// Get arguments for atan2 function. - /// \param abs half-precision floating-point value - /// \return \a abs and sqrt(1 - \a abs^2) as Q0.30 + + + inline std::pair atan2_args(unsigned int abs) { int exp = -15; @@ -1642,11 +1642,11 @@ namespace half_float return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx); } - /// Get exponentials for hyperbolic computation - /// \param abs half-precision floating-point value - /// \param exp variable to take unbiased exponent of larger result - /// \param n number of BKM iterations (at most 32) - /// \return exp(abs) and exp(-\a abs) as Q1.31 with same exponent + + + + + inline std::pair hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) { uint32 mx = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my; @@ -1674,17 +1674,17 @@ namespace half_float return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast(1)< unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32) { if(esign) @@ -1706,17 +1706,17 @@ namespace half_float return fixed2half(m, exp+14, sign, s); } - /// Postprocessing for binary logarithm. - /// \tparam R rounding mode to use - /// \tparam L logarithm for base transformation as Q1.31 - /// \param m fractional part of logarithm as Q0.31 - /// \param ilog signed integer part of logarithm - /// \param exp biased exponent of result - /// \param sign sign bit of result - /// \return value base-transformed and converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if no other exception occurred + + + + + + + + + + + template unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) { uint32 msign = sign_mask(ilog); @@ -1734,14 +1734,14 @@ namespace half_float return fixed2half(m, exp, sign, 1); } - /// Hypotenuse square root and postprocessing. - /// \tparam R rounding mode to use - /// \param r mantissa as Q2.30 - /// \param exp biased exponent - /// \return square root converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if value had to be rounded + + + + + + + + template unsigned int hypot_post(uint32 r, int exp) { int i = r >> 31; @@ -1754,16 +1754,16 @@ namespace half_float return fixed2half(m, exp-1, 0, r!=0); } - /// Division and postprocessing for tangents. - /// \tparam R rounding mode to use - /// \param my dividend as Q1.31 - /// \param mx divisor as Q1.31 - /// \param exp biased exponent of result - /// \param sign sign bit of result - /// \return quotient converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if no other exception occurred + + + + + + + + + + template unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) { int i = my >= mx, s; @@ -1776,15 +1776,15 @@ namespace half_float return fixed2half(m, exp, sign, s); } - /// Area function and postprocessing. - /// This computes the value directly in Q2.30 using the representation `asinh|acosh(x) = log(x+sqrt(x^2+|-1))`. - /// \tparam R rounding mode to use - /// \tparam S `true` for asinh, `false` for acosh - /// \param arg half-precision argument - /// \return asinh|acosh(\a arg) converted to half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if no other exception occurred + + + + + + + + + template unsigned int area(unsigned int arg) { int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i; @@ -1834,16 +1834,16 @@ namespace half_float return log2_post(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast(S)<<15)); } - /// Class for 1.31 unsigned floating-point computation + struct f31 { - /// Constructor. - /// \param mant mantissa as 1.31 - /// \param e exponent + + + HALF_CONSTEXPR f31(uint32 mant, int e) : m(mant), exp(e) {} - /// Constructor. - /// \param abs unsigned half-precision value + + f31(unsigned int abs) : exp(-15) { for(; abs<0x400; abs<<=1,--exp) ; @@ -1851,10 +1851,10 @@ namespace half_float exp += (abs>>10); } - /// Addition operator. - /// \param a first operand - /// \param b second operand - /// \return \a a + \a b + + + + friend f31 operator+(f31 a, f31 b) { if(b.exp > a.exp) @@ -1865,10 +1865,10 @@ namespace half_float return f31(((m+i)>>i)|0x80000000, a.exp+i); } - /// Subtraction operator. - /// \param a first operand - /// \param b second operand - /// \return \a a - \a b + + + + friend f31 operator-(f31 a, f31 b) { int d = a.exp - b.exp, exp = a.exp; @@ -1879,10 +1879,10 @@ namespace half_float return f31(m, exp); } - /// Multiplication operator. - /// \param a first operand - /// \param b second operand - /// \return \a a * \a b + + + + friend f31 operator*(f31 a, f31 b) { uint32 m = multiply64(a.m, b.m); @@ -1890,10 +1890,10 @@ namespace half_float return f31(m<<(1-i), a.exp + b.exp + i); } - /// Division operator. - /// \param a first operand - /// \param b second operand - /// \return \a a / \a b + + + + friend f31 operator/(f31 a, f31 b) { int i = a.m >= b.m, s; @@ -1901,20 +1901,20 @@ namespace half_float return f31(m, a.exp - b.exp + i - 1); } - uint32 m; ///< mantissa as 1.31. - int exp; ///< exponent. + uint32 m; + int exp; }; - /// Error function and postprocessing. - /// This computes the value directly in Q1.31 using the approximations given - /// [here](https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions). - /// \tparam R rounding mode to use - /// \tparam C `true` for comlementary error function, `false` else - /// \param arg half-precision function argument - /// \return approximated value of error function in half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if no other exception occurred + + + + + + + + + + template unsigned int erf(unsigned int arg) { unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; @@ -1925,15 +1925,15 @@ namespace half_float (e.exp<-25) ? underflow() : fixed2half(e.m>>1, e.exp+14, 0, e.m&1); } - /// Gamma function and postprocessing. - /// This approximates the value of either the gamma function or its logarithm directly in Q1.31. - /// \tparam R rounding mode to use - /// \tparam L `true` for lograithm of gamma function, `false` for gamma function - /// \param arg half-precision floating-point value - /// \return lgamma/tgamma(\a arg) in half-precision - /// \exception FE_OVERFLOW on overflows - /// \exception FE_UNDERFLOW on underflows - /// \exception FE_INEXACT if \a arg is not a positive integer + + + + + + + + + template unsigned int gamma(unsigned int arg) { /* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 }; @@ -2030,144 +2030,144 @@ namespace half_float } return fixed2half(s.m, s.exp+14, sign); } - /// \} + template struct half_caster; } - /// Half-precision floating-point type. - /// This class implements an IEEE-conformant half-precision floating-point type with the usual arithmetic - /// operators and conversions. It is implicitly convertible to single-precision floating-point, which makes artihmetic - /// expressions and functions with mixed-type operands to be of the most precise operand type. - /// - /// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and - /// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which - /// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the - /// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of - /// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most - /// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit - /// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if - /// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on - /// nearly any reasonable platform. - /// - /// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable - /// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. + + + + + + + + + + + + + + + + + class half { public: - /// \name Construction and assignment - /// \{ + + - /// Default constructor. - /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics - /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. + + + HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} - /// Conversion constructor. - /// \param rhs float to convert - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + explicit half(float rhs) : data_(static_cast(detail::float2half(rhs))) {} - /// Conversion to single-precision. - /// \return single precision value representing expression value + + operator float() const { return detail::half2float(data_); } - /// Assignment operator. - /// \param rhs single-precision value to copy from - /// \return reference to this half - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + half& operator=(float rhs) { data_ = static_cast(detail::float2half(rhs)); return *this; } - /// \} - /// \name Arithmetic updates - /// \{ + + + - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to add - /// \return reference to this half - /// \exception FE_... according to operator+(half,half) + + + + + half& operator+=(half rhs) { return *this = *this + rhs; } - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to subtract - /// \return reference to this half - /// \exception FE_... according to operator-(half,half) + + + + + half& operator-=(half rhs) { return *this = *this - rhs; } - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to multiply with - /// \return reference to this half - /// \exception FE_... according to operator*(half,half) + + + + + half& operator*=(half rhs) { return *this = *this * rhs; } - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to divide by - /// \return reference to this half - /// \exception FE_... according to operator/(half,half) + + + + + half& operator/=(half rhs) { return *this = *this / rhs; } - /// Arithmetic assignment. - /// \param rhs single-precision value to add - /// \return reference to this half - /// \exception FE_... according to operator=() + + + + half& operator+=(float rhs) { return *this = *this + rhs; } - /// Arithmetic assignment. - /// \param rhs single-precision value to subtract - /// \return reference to this half - /// \exception FE_... according to operator=() + + + + half& operator-=(float rhs) { return *this = *this - rhs; } - /// Arithmetic assignment. - /// \param rhs single-precision value to multiply with - /// \return reference to this half - /// \exception FE_... according to operator=() + + + + half& operator*=(float rhs) { return *this = *this * rhs; } - /// Arithmetic assignment. - /// \param rhs single-precision value to divide by - /// \return reference to this half - /// \exception FE_... according to operator=() + + + + half& operator/=(float rhs) { return *this = *this / rhs; } - /// \} - /// \name Increment and decrement - /// \{ + + + - /// Prefix increment. - /// \return incremented half value - /// \exception FE_... according to operator+(half,half) + + + half& operator++() { return *this = *this + half(detail::binary, 0x3C00); } - /// Prefix decrement. - /// \return decremented half value - /// \exception FE_... according to operator-(half,half) + + + half& operator--() { return *this = *this + half(detail::binary, 0xBC00); } - /// Postfix increment. - /// \return non-incremented half value - /// \exception FE_... according to operator+(half,half) + + + half operator++(int) { half out(*this); ++*this; return out; } - /// Postfix decrement. - /// \return non-decremented half value - /// \exception FE_... according to operator-(half,half) + + + half operator--(int) { half out(*this); --*this; return out; } - /// \} + private: - /// Rounding mode to use + static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); - /// Constructor. - /// \param bits binary representation to set half to + + HALF_CONSTEXPR half(detail::binary_t, unsigned int bits) HALF_NOEXCEPT : data_(static_cast(bits)) {} - /// Internal binary representation + detail::uint16 data_; #ifndef HALF_DOXYGEN_ONLY @@ -2269,25 +2269,25 @@ namespace half_float #if HALF_ENABLE_CPP11_USER_LITERALS namespace literal { - /// Half literal. - /// While this returns a properly rounded half-precision value, half literals can unfortunately not be constant - /// expressions due to rather involved conversions. So don't expect this to be a literal literal without involving - /// conversion operations at runtime. It is a convenience feature, not a performance optimization. - /// \param value literal value - /// \return half with of given value (possibly rounded) - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half(value)); } } #endif namespace detail { - /// Helper class for half casts. - /// This class template has to be specialized for all valid cast arguments to define an appropriate static - /// `cast` member function and a corresponding `type` member denoting its return type. - /// \tparam T destination type - /// \tparam U source type - /// \tparam R rounding mode to use + + + + + + template struct half_caster {}; template struct half_caster { @@ -2320,132 +2320,132 @@ namespace half_float } } -/// Extensions to the C++ standard library. + namespace std { - /// Numeric limits for half-precision floats. - /// **See also:** Documentation for [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits) + + template<> class numeric_limits { public: - /// Is template specialization. + static HALF_CONSTEXPR_CONST bool is_specialized = true; - /// Supports signed values. + static HALF_CONSTEXPR_CONST bool is_signed = true; - /// Is not an integer type. + static HALF_CONSTEXPR_CONST bool is_integer = false; - /// Is not exact. + static HALF_CONSTEXPR_CONST bool is_exact = false; - /// Doesn't provide modulo arithmetic. + static HALF_CONSTEXPR_CONST bool is_modulo = false; - /// Has a finite set of values. + static HALF_CONSTEXPR_CONST bool is_bounded = true; - /// IEEE conformant. + static HALF_CONSTEXPR_CONST bool is_iec559 = true; - /// Supports infinity. + static HALF_CONSTEXPR_CONST bool has_infinity = true; - /// Supports quiet NaNs. + static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; - /// Supports signaling NaNs. + static HALF_CONSTEXPR_CONST bool has_signaling_NaN = true; - /// Supports subnormal values. + static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; - /// Supports no denormalization detection. + static HALF_CONSTEXPR_CONST bool has_denorm_loss = false; #if HALF_ERRHANDLING_THROWS static HALF_CONSTEXPR_CONST bool traps = true; #else - /// Traps only if [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID) is acitvated. + static HALF_CONSTEXPR_CONST bool traps = false; #endif - /// Does not support no pre-rounding underflow detection. + static HALF_CONSTEXPR_CONST bool tinyness_before = false; - /// Rounding mode. + static HALF_CONSTEXPR_CONST float_round_style round_style = half_float::half::round_style; - /// Significant digits. + static HALF_CONSTEXPR_CONST int digits = 11; - /// Significant decimal digits. + static HALF_CONSTEXPR_CONST int digits10 = 3; - /// Required decimal digits to represent all possible values. + static HALF_CONSTEXPR_CONST int max_digits10 = 5; - /// Number base. + static HALF_CONSTEXPR_CONST int radix = 2; - /// One more than smallest exponent. + static HALF_CONSTEXPR_CONST int min_exponent = -13; - /// Smallest normalized representable power of 10. + static HALF_CONSTEXPR_CONST int min_exponent10 = -4; - /// One more than largest exponent + static HALF_CONSTEXPR_CONST int max_exponent = 16; - /// Largest finitely representable power of 10. + static HALF_CONSTEXPR_CONST int max_exponent10 = 4; - /// Smallest positive normal value. + static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } - /// Smallest finite value. + static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } - /// Largest finite value. + static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } - /// Difference between 1 and next representable value. + static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } - /// Maximum rounding error in ULP (units in the last place). + static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } - /// Positive infinity. + static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } - /// Quiet NaN. + static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } - /// Signaling NaN. + static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } - /// Smallest positive subnormal value. + static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } }; #if HALF_ENABLE_CPP11_HASH - /// Hash function for half-precision floats. - /// This is only defined if C++11 `std::hash` is supported and enabled. - /// - /// **See also:** Documentation for [std::hash](https://en.cppreference.com/w/cpp/utility/hash) + + + + template<> struct hash { - /// Type of function argument. + typedef half_float::half argument_type; - /// Function return type. + typedef size_t result_type; - /// Compute hash function. - /// \param arg half to hash - /// \return hash value + + + result_type operator()(argument_type arg) const { return hash()(arg.data_&-static_cast(arg.data_!=0x8000)); } }; #endif @@ -2453,102 +2453,102 @@ namespace std namespace half_float { - /// \anchor compop - /// \name Comparison operators - /// \{ - - /// Comparison for equality. - /// \param x first operand - /// \param y second operand - /// \retval true if operands equal - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN + + + + + + + + + + inline HALF_CONSTEXPR_NOERR bool operator==(half x, half y) { return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)); } - /// Comparison for inequality. - /// \param x first operand - /// \param y second operand - /// \retval true if operands not equal - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN + + + + + + inline HALF_CONSTEXPR_NOERR bool operator!=(half x, half y) { return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)); } - /// Comparison for less than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less than \a y - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN + + + + + + inline HALF_CONSTEXPR_NOERR bool operator<(half x, half y) { return !detail::compsignal(x.data_, y.data_) && ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); } - /// Comparison for greater than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater than \a y - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN + + + + + + inline HALF_CONSTEXPR_NOERR bool operator>(half x, half y) { return !detail::compsignal(x.data_, y.data_) && ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); } - /// Comparison for less equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less equal \a y - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN + + + + + + inline HALF_CONSTEXPR_NOERR bool operator<=(half x, half y) { return !detail::compsignal(x.data_, y.data_) && ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); } - /// Comparison for greater equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater equal \a y - /// \retval false else - /// \exception FE_INVALID if \a x or \a y is NaN + + + + + + inline HALF_CONSTEXPR_NOERR bool operator>=(half x, half y) { return !detail::compsignal(x.data_, y.data_) && ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); } - /// \} - /// \anchor arithmetics - /// \name Arithmetic operators - /// \{ + + + + - /// Identity. - /// \param arg operand - /// \return unchanged operand + + + inline HALF_CONSTEXPR half operator+(half arg) { return arg; } - /// Negation. - /// \param arg operand - /// \return negated operand + + + inline HALF_CONSTEXPR half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); } - /// Addition. - /// This operation is exact to rounding for all rounding modes. - /// \param x left operand - /// \param y right operand - /// \return sum of half expressions - /// \exception FE_INVALID if \a x and \a y are infinities with different signs or signaling NaNs - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + inline half operator+(half x, half y) { #ifdef HALF_ARITHMETIC_TYPE @@ -2592,13 +2592,13 @@ namespace half_float #endif } - /// Subtraction. - /// This operation is exact to rounding for all rounding modes. - /// \param x left operand - /// \param y right operand - /// \return difference of half expressions - /// \exception FE_INVALID if \a x and \a y are infinities with equal signs or signaling NaNs - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + inline half operator-(half x, half y) { #ifdef HALF_ARITHMETIC_TYPE @@ -2608,13 +2608,13 @@ namespace half_float #endif } - /// Multiplication. - /// This operation is exact to rounding for all rounding modes. - /// \param x left operand - /// \param y right operand - /// \return product of half expressions - /// \exception FE_INVALID if multiplying 0 with infinity or if \a x or \a y is signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + inline half operator*(half x, half y) { #ifdef HALF_ARITHMETIC_TYPE @@ -2640,14 +2640,14 @@ namespace half_float #endif } - /// Division. - /// This operation is exact to rounding for all rounding modes. - /// \param x left operand - /// \param y right operand - /// \return quotient of half expressions - /// \exception FE_INVALID if dividing 0s or infinities with each other or if \a x or \a y is signaling NaN - /// \exception FE_DIVBYZERO if dividing finite value by 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half operator/(half x, half y) { #ifdef HALF_ARITHMETIC_TYPE @@ -2677,16 +2677,16 @@ namespace half_float #endif } - /// \} - /// \anchor streaming - /// \name Input and output - /// \{ + + + + - /// Output operator. - /// This uses the built-in functionality for streaming out floating-point numbers. - /// \param out output stream to write into - /// \param arg half expression to write - /// \return reference to output stream + + + + + template std::basic_ostream& operator<<(std::basic_ostream &out, half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -2696,15 +2696,15 @@ namespace half_float #endif } - /// Input operator. - /// This uses the built-in functionality for streaming in floating-point numbers, specifically double precision floating - /// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref HALF_ARITHMETIC_TYPE)). So the input string is first - /// rounded to double precision using the underlying platform's current floating-point rounding mode before being rounded - /// to half-precision using the library's half-precision rounding mode. - /// \param in input stream to read from - /// \param arg half to read into - /// \return reference to input stream - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + template std::basic_istream& operator>>(std::basic_istream &in, half &arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -2717,29 +2717,29 @@ namespace half_float return in; } - /// \} - /// \anchor basic - /// \name Basic mathematical operations - /// \{ + + + + - /// Absolute value. - /// **See also:** Documentation for [std::fabs](https://en.cppreference.com/w/cpp/numeric/math/fabs). - /// \param arg operand - /// \return absolute value of \a arg + + + + inline HALF_CONSTEXPR half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); } - /// Absolute value. - /// **See also:** Documentation for [std::abs](https://en.cppreference.com/w/cpp/numeric/math/fabs). - /// \param arg operand - /// \return absolute value of \a arg + + + + inline HALF_CONSTEXPR half abs(half arg) { return fabs(arg); } - /// Remainder of division. - /// **See also:** Documentation for [std::fmod](https://en.cppreference.com/w/cpp/numeric/math/fmod). - /// \param x first operand - /// \param y second operand - /// \return remainder of floating-point division. - /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + + + + + + inline half fmod(half x, half y) { unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; @@ -2755,12 +2755,12 @@ namespace half_float return half(detail::binary, sign|detail::mod(absx, absy)); } - /// Remainder of division. - /// **See also:** Documentation for [std::remainder](https://en.cppreference.com/w/cpp/numeric/math/remainder). - /// \param x first operand - /// \param y second operand - /// \return remainder of floating-point division. - /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + + + + + + inline half remainder(half x, half y) { unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; @@ -2774,13 +2774,13 @@ namespace half_float return half(detail::binary, sign^detail::mod(absx, absy)); } - /// Remainder of division. - /// **See also:** Documentation for [std::remquo](https://en.cppreference.com/w/cpp/numeric/math/remquo). - /// \param x first operand - /// \param y second operand - /// \param quo address to store some bits of quotient at - /// \return remainder of floating-point division. - /// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN + + + + + + + inline half remquo(half x, half y, int *quo) { unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000; @@ -2796,16 +2796,16 @@ namespace half_float return *quo = qsign ? -q : q, half(detail::binary, value); } - /// Fused multiply add. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::fma](https://en.cppreference.com/w/cpp/numeric/math/fma). - /// \param x first operand - /// \param y second operand - /// \param z third operand - /// \return ( \a x * \a y ) + \a z rounded as one operation. - /// \exception FE_INVALID according to operator*() and operator+() unless any argument is a quiet NaN and no argument is a signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding the final addition + + + + + + + + + + inline half fma(half x, half y, half z) { #ifdef HALF_ARITHMETIC_TYPE @@ -2869,38 +2869,38 @@ namespace half_float #endif } - /// Maximum of half expressions. - /// **See also:** Documentation for [std::fmax](https://en.cppreference.com/w/cpp/numeric/math/fmax). - /// \param x first operand - /// \param y second operand - /// \return maximum of operands, ignoring quiet NaNs - /// \exception FE_INVALID if \a x or \a y is signaling NaN + + + + + + inline HALF_CONSTEXPR_NOERR half fmax(half x, half y) { return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) < (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); } - /// Minimum of half expressions. - /// **See also:** Documentation for [std::fmin](https://en.cppreference.com/w/cpp/numeric/math/fmin). - /// \param x first operand - /// \param y second operand - /// \return minimum of operands, ignoring quiet NaNs - /// \exception FE_INVALID if \a x or \a y is signaling NaN + + + + + + inline HALF_CONSTEXPR_NOERR half fmin(half x, half y) { return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) > (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); } - /// Positive difference. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::fdim](https://en.cppreference.com/w/cpp/numeric/math/fdim). - /// \param x first operand - /// \param y second operand - /// \return \a x - \a y or 0 if difference negative - /// \exception FE_... according to operator-(half,half) + + + + + + + + inline half fdim(half x, half y) { if(isnan(x) || isnan(y)) @@ -2908,10 +2908,10 @@ namespace half_float return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y); } - /// Get NaN value. - /// **See also:** Documentation for [std::nan](https://en.cppreference.com/w/cpp/numeric/math/nan). - /// \param arg string code - /// \return quiet NaN + + + + inline half nanh(const char *arg) { unsigned int value = 0x7FFF; @@ -2920,19 +2920,19 @@ namespace half_float return half(detail::binary, value); } - /// \} - /// \anchor exponential - /// \name Exponential functions - /// \{ - - /// Exponential function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::exp](https://en.cppreference.com/w/cpp/numeric/math/exp). - /// \param arg function argument - /// \return e raised to \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + + + + inline half exp(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -2960,14 +2960,14 @@ namespace half_float #endif } - /// Binary exponential. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::exp2](https://en.cppreference.com/w/cpp/numeric/math/exp2). - /// \param arg function argument - /// \return 2 raised to \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half exp2(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -2985,15 +2985,15 @@ namespace half_float #endif } - /// Exponential minus one. - /// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` - /// and in <1% of inputs for any other rounding mode. - /// - /// **See also:** Documentation for [std::expm1](https://en.cppreference.com/w/cpp/numeric/math/expm1). - /// \param arg function argument - /// \return e raised to \a arg and subtracted by 1 - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half expm1(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3038,15 +3038,15 @@ namespace half_float #endif } - /// Natural logarithm. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::log](https://en.cppreference.com/w/cpp/numeric/math/log). - /// \param arg function argument - /// \return logarithm of \a arg to base e - /// \exception FE_INVALID for signaling NaN or negative argument - /// \exception FE_DIVBYZERO for 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half log(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3066,15 +3066,15 @@ namespace half_float #endif } - /// Common logarithm. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::log10](https://en.cppreference.com/w/cpp/numeric/math/log10). - /// \param arg function argument - /// \return logarithm of \a arg to base 10 - /// \exception FE_INVALID for signaling NaN or negative argument - /// \exception FE_DIVBYZERO for 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half log10(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3101,15 +3101,15 @@ namespace half_float #endif } - /// Binary logarithm. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::log2](https://en.cppreference.com/w/cpp/numeric/math/log2). - /// \param arg function argument - /// \return logarithm of \a arg to base 2 - /// \exception FE_INVALID for signaling NaN or negative argument - /// \exception FE_DIVBYZERO for 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half log2(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3143,16 +3143,16 @@ namespace half_float #endif } - /// Natural logarithm plus one. - /// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` - /// and in ~1% of inputs for any other rounding mode. - /// - /// **See also:** Documentation for [std::log1p](https://en.cppreference.com/w/cpp/numeric/math/log1p). - /// \param arg function argument - /// \return logarithm of \a arg plus 1 to base e - /// \exception FE_INVALID for signaling NaN or argument <-1 - /// \exception FE_DIVBYZERO for -1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + inline half log1p(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3190,19 +3190,19 @@ namespace half_float #endif } - /// \} - /// \anchor power - /// \name Power functions - /// \{ - - /// Square root. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::sqrt](https://en.cppreference.com/w/cpp/numeric/math/sqrt). - /// \param arg function argument - /// \return square root of \a arg - /// \exception FE_INVALID for signaling NaN and negative arguments - /// \exception FE_INEXACT according to rounding + + + + + + + + + + + + + inline half sqrt(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3217,13 +3217,13 @@ namespace half_float #endif } - /// Inverse square root. - /// This function is exact to rounding for all rounding modes and thus generally more accurate than directly computing - /// 1 / sqrt(\a arg) in half-precision, in addition to also being faster. - /// \param arg function argument - /// \return reciprocal of square root of \a arg - /// \exception FE_INVALID for signaling NaN and negative arguments - /// \exception FE_INEXACT according to rounding + + + + + + + inline half rsqrt(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3253,14 +3253,14 @@ namespace half_float #endif } - /// Cubic root. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::cbrt](https://en.cppreference.com/w/cpp/numeric/math/cbrt). - /// \param arg function argument - /// \return cubic root of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT according to rounding + + + + + + + + inline half cbrt(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3303,15 +3303,15 @@ namespace half_float #endif } - /// Hypotenuse function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot). - /// \param x first argument - /// \param y second argument - /// \return square root of sum of squares without internal over- or underflows - /// \exception FE_INVALID if \a x or \a y is signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root + + + + + + + + + inline half hypot(half x, half y) { #ifdef HALF_ARITHMETIC_TYPE @@ -3348,16 +3348,16 @@ namespace half_float #endif } - /// Hypotenuse function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot). - /// \param x first argument - /// \param y second argument - /// \param z third argument - /// \return square root of sum of squares without internal over- or underflows - /// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root + + + + + + + + + + inline half hypot(half x, half y, half z) { #ifdef HALF_ARITHMETIC_TYPE @@ -3414,16 +3414,16 @@ namespace half_float #endif } - /// Power function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.00025% of inputs. - /// - /// **See also:** Documentation for [std::pow](https://en.cppreference.com/w/cpp/numeric/math/pow). - /// \param x base - /// \param y exponent - /// \return \a x raised to \a y - /// \exception FE_INVALID if \a x or \a y is signaling NaN or if \a x is finite an negative and \a y is finite and not integral - /// \exception FE_DIVBYZERO if \a x is 0 and \a y is negative - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + inline half pow(half x, half y) { #ifdef HALF_ARITHMETIC_TYPE @@ -3474,20 +3474,20 @@ namespace half_float #endif } - /// \} - /// \anchor trigonometric - /// \name Trigonometric functions - /// \{ - - /// Compute sine and cosine simultaneously. - /// This returns the same results as sin() and cos() but is faster than calling each function individually. - /// - /// This function is exact to rounding for all rounding modes. - /// \param arg function argument - /// \param sin variable to take sine of \a arg - /// \param cos variable to take cosine of \a arg - /// \exception FE_INVALID for signaling NaN or infinity - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + + + + + inline void sincos(half arg, half *sin, half *cos) { #ifdef HALF_ARITHMETIC_TYPE @@ -3545,14 +3545,14 @@ namespace half_float #endif } - /// Sine function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::sin](https://en.cppreference.com/w/cpp/numeric/math/sin). - /// \param arg function argument - /// \return sine value of \a arg - /// \exception FE_INVALID for signaling NaN or infinity - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half sin(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3578,14 +3578,14 @@ namespace half_float #endif } - /// Cosine function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::cos](https://en.cppreference.com/w/cpp/numeric/math/cos). - /// \param arg function argument - /// \return cosine value of \a arg - /// \exception FE_INVALID for signaling NaN or infinity - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half cos(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3606,14 +3606,14 @@ namespace half_float #endif } - /// Tangent function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::tan](https://en.cppreference.com/w/cpp/numeric/math/tan). - /// \param arg function argument - /// \return tangent value of \a arg - /// \exception FE_INVALID for signaling NaN or infinity - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half tan(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3643,14 +3643,14 @@ namespace half_float #endif } - /// Arc sine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::asin](https://en.cppreference.com/w/cpp/numeric/math/asin). - /// \param arg function argument - /// \return arc sine value of \a arg - /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half asin(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3672,14 +3672,14 @@ namespace half_float #endif } - /// Arc cosine function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::acos](https://en.cppreference.com/w/cpp/numeric/math/acos). - /// \param arg function argument - /// \return arc cosine value of \a arg - /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half acos(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3697,14 +3697,14 @@ namespace half_float #endif } - /// Arc tangent function. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::atan](https://en.cppreference.com/w/cpp/numeric/math/atan). - /// \param arg function argument - /// \return arc tangent value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half atan(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3725,16 +3725,16 @@ namespace half_float #endif } - /// Arc tangent function. - /// This function may be 1 ULP off the correctly rounded exact result in ~0.005% of inputs for `std::round_to_nearest`, - /// in ~0.1% of inputs for `std::round_toward_zero` and in ~0.02% of inputs for any other rounding mode. - /// - /// **See also:** Documentation for [std::atan2](https://en.cppreference.com/w/cpp/numeric/math/atan2). - /// \param y numerator - /// \param x denominator - /// \return arc tangent value - /// \exception FE_INVALID if \a x or \a y is signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + inline half atan2(half y, half x) { #ifdef HALF_ARITHMETIC_TYPE @@ -3777,19 +3777,19 @@ namespace half_float #endif } - /// \} - /// \anchor hyperbolic - /// \name Hyperbolic functions - /// \{ - - /// Hyperbolic sine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::sinh](https://en.cppreference.com/w/cpp/numeric/math/sinh). - /// \param arg function argument - /// \return hyperbolic sine value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + + + + inline half sinh(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3810,14 +3810,14 @@ namespace half_float #endif } - /// Hyperbolic cosine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::cosh](https://en.cppreference.com/w/cpp/numeric/math/cosh). - /// \param arg function argument - /// \return hyperbolic cosine value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half cosh(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3837,14 +3837,14 @@ namespace half_float #endif } - /// Hyperbolic tangent. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::tanh](https://en.cppreference.com/w/cpp/numeric/math/tanh). - /// \param arg function argument - /// \return hyperbolic tangent value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half tanh(half arg) { #ifdef HALF_ARITHMETIC_TYPE @@ -3869,14 +3869,14 @@ namespace half_float #endif } - /// Hyperbolic area sine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::asinh](https://en.cppreference.com/w/cpp/numeric/math/asinh). - /// \param arg function argument - /// \return area sine value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half asinh(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3897,14 +3897,14 @@ namespace half_float #endif } - /// Hyperbolic area cosine. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::acosh](https://en.cppreference.com/w/cpp/numeric/math/acosh). - /// \param arg function argument - /// \return area cosine value of \a arg - /// \exception FE_INVALID for signaling NaN or arguments <1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half acosh(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3921,15 +3921,15 @@ namespace half_float #endif } - /// Hyperbolic area tangent. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::atanh](https://en.cppreference.com/w/cpp/numeric/math/atanh). - /// \param arg function argument - /// \return area tangent value of \a arg - /// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1 - /// \exception FE_DIVBYZERO for +/-1 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half atanh(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3950,19 +3950,19 @@ namespace half_float #endif } - /// \} - /// \anchor special - /// \name Error and gamma functions - /// \{ - - /// Error function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs. - /// - /// **See also:** Documentation for [std::erf](https://en.cppreference.com/w/cpp/numeric/math/erf). - /// \param arg function argument - /// \return error function value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + + + + inline half erf(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -3977,14 +3977,14 @@ namespace half_float #endif } - /// Complementary error function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs. - /// - /// **See also:** Documentation for [std::erfc](https://en.cppreference.com/w/cpp/numeric/math/erfc). - /// \param arg function argument - /// \return 1 minus error function value of \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + inline half erfc(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -4001,15 +4001,15 @@ namespace half_float #endif } - /// Natural logarithm of gamma function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.025% of inputs. - /// - /// **See also:** Documentation for [std::lgamma](https://en.cppreference.com/w/cpp/numeric/math/lgamma). - /// \param arg function argument - /// \return natural logarith of gamma function for \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_DIVBYZERO for 0 or negative integer arguments - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half lgamma(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -4026,15 +4026,15 @@ namespace half_float #endif } - /// Gamma function. - /// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.25% of inputs. - /// - /// **See also:** Documentation for [std::tgamma](https://en.cppreference.com/w/cpp/numeric/math/tgamma). - /// \param arg function argument - /// \return gamma function value of \a arg - /// \exception FE_INVALID for signaling NaN, negative infinity or negative integer arguments - /// \exception FE_DIVBYZERO for 0 - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half tgamma(half arg) { #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH @@ -4057,100 +4057,100 @@ namespace half_float #endif } - /// \} - /// \anchor rounding - /// \name Rounding - /// \{ - - /// Nearest integer not less than half value. - /// **See also:** Documentation for [std::ceil](https://en.cppreference.com/w/cpp/numeric/math/ceil). - /// \param arg half to round - /// \return nearest integer not less than \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded + + + + + + + + + + + inline half ceil(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - /// Nearest integer not greater than half value. - /// **See also:** Documentation for [std::floor](https://en.cppreference.com/w/cpp/numeric/math/floor). - /// \param arg half to round - /// \return nearest integer not greater than \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded + + + + + + inline half floor(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - /// Nearest integer not greater in magnitude than half value. - /// **See also:** Documentation for [std::trunc](https://en.cppreference.com/w/cpp/numeric/math/trunc). - /// \param arg half to round - /// \return nearest integer not greater in magnitude than \a arg - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded + + + + + + inline half trunc(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - /// Nearest integer. - /// **See also:** Documentation for [std::round](https://en.cppreference.com/w/cpp/numeric/math/round). - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded + + + + + + inline half round(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - /// Nearest integer. - /// **See also:** Documentation for [std::lround](https://en.cppreference.com/w/cpp/numeric/math/round). - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases - /// \exception FE_INVALID if value is not representable as `long` + + + + + inline long lround(half arg) { return detail::half2int(arg.data_); } - /// Nearest integer using half's internal rounding mode. - /// **See also:** Documentation for [std::rint](https://en.cppreference.com/w/cpp/numeric/math/rint). - /// \param arg half expression to round - /// \return nearest integer using default rounding mode - /// \exception FE_INVALID for signaling NaN - /// \exception FE_INEXACT if value had to be rounded + + + + + + inline half rint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - /// Nearest integer using half's internal rounding mode. - /// **See also:** Documentation for [std::lrint](https://en.cppreference.com/w/cpp/numeric/math/rint). - /// \param arg half expression to round - /// \return nearest integer using default rounding mode - /// \exception FE_INVALID if value is not representable as `long` - /// \exception FE_INEXACT if value had to be rounded + + + + + + inline long lrint(half arg) { return detail::half2int(arg.data_); } - /// Nearest integer using half's internal rounding mode. - /// **See also:** Documentation for [std::nearbyint](https://en.cppreference.com/w/cpp/numeric/math/nearbyint). - /// \param arg half expression to round - /// \return nearest integer using default rounding mode - /// \exception FE_INVALID for signaling NaN + + + + + inline half nearbyint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } #if HALF_ENABLE_CPP11_LONG_LONG - /// Nearest integer. - /// **See also:** Documentation for [std::llround](https://en.cppreference.com/w/cpp/numeric/math/round). - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases - /// \exception FE_INVALID if value is not representable as `long long` + + + + + inline long long llround(half arg) { return detail::half2int(arg.data_); } - /// Nearest integer using half's internal rounding mode. - /// **See also:** Documentation for [std::llrint](https://en.cppreference.com/w/cpp/numeric/math/rint). - /// \param arg half expression to round - /// \return nearest integer using default rounding mode - /// \exception FE_INVALID if value is not representable as `long long` - /// \exception FE_INEXACT if value had to be rounded + + + + + + inline long long llrint(half arg) { return detail::half2int(arg.data_); } #endif - /// \} - /// \anchor float - /// \name Floating point manipulation - /// \{ - - /// Decompress floating-point number. - /// **See also:** Documentation for [std::frexp](https://en.cppreference.com/w/cpp/numeric/math/frexp). - /// \param arg number to decompress - /// \param exp address to store exponent at - /// \return significant in range [0.5, 1) - /// \exception FE_INVALID for signaling NaN + + + + + + + + + + + inline half frexp(half arg, int *exp) { *exp = 0; @@ -4162,15 +4162,15 @@ namespace half_float return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF)); } - /// Multiply by power of two. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::scalbln](https://en.cppreference.com/w/cpp/numeric/math/scalbn). - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half scalbln(half arg, long exp) { unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; @@ -4188,34 +4188,34 @@ namespace half_float return half(detail::binary, detail::rounded(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0)); } - /// Multiply by power of two. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::scalbn](https://en.cppreference.com/w/cpp/numeric/math/scalbn). - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half scalbn(half arg, int exp) { return scalbln(arg, exp); } - /// Multiply by power of two. - /// This function is exact to rounding for all rounding modes. - /// - /// **See also:** Documentation for [std::ldexp](https://en.cppreference.com/w/cpp/numeric/math/ldexp). - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + inline half ldexp(half arg, int exp) { return scalbln(arg, exp); } - /// Extract integer and fractional parts. - /// **See also:** Documentation for [std::modf](https://en.cppreference.com/w/cpp/numeric/math/modf). - /// \param arg number to decompress - /// \param iptr address to store integer part at - /// \return fractional part - /// \exception FE_INVALID for signaling NaN + + + + + + inline half modf(half arg, half *iptr) { unsigned int abs = arg.data_ & 0x7FFF; @@ -4236,14 +4236,14 @@ namespace half_float return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF)); } - /// Extract exponent. - /// **See also:** Documentation for [std::ilogb](https://en.cppreference.com/w/cpp/numeric/math/ilogb). - /// \param arg number to query - /// \return floating-point exponent - /// \retval FP_ILOGB0 for zero - /// \retval FP_ILOGBNAN for NaN - /// \retval INT_MAX for infinity - /// \exception FE_INVALID for 0 or infinite values + + + + + + + + inline int ilogb(half arg) { int abs = arg.data_ & 0x7FFF, exp; @@ -4256,12 +4256,12 @@ namespace half_float return exp; } - /// Extract exponent. - /// **See also:** Documentation for [std::logb](https://en.cppreference.com/w/cpp/numeric/math/logb). - /// \param arg number to query - /// \return floating-point exponent - /// \exception FE_INVALID for signaling NaN - /// \exception FE_DIVBYZERO for 0 + + + + + + inline half logb(half arg) { int abs = arg.data_ & 0x7FFF, exp; @@ -4280,14 +4280,14 @@ namespace half_float return half(detail::binary, value); } - /// Next representable value. - /// **See also:** Documentation for [std::nextafter](https://en.cppreference.com/w/cpp/numeric/math/nextafter). - /// \param from value to compute next representable value for - /// \param to direction towards which to compute next value - /// \return next representable value after \a from in direction towards \a to - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW for infinite result from finite argument - /// \exception FE_UNDERFLOW for subnormal result + + + + + + + + inline half nextafter(half from, half to) { int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; @@ -4307,14 +4307,14 @@ namespace half_float return half(detail::binary, out); } - /// Next representable value. - /// **See also:** Documentation for [std::nexttoward](https://en.cppreference.com/w/cpp/numeric/math/nexttoward). - /// \param from value to compute next representable value for - /// \param to direction towards which to compute next value - /// \return next representable value after \a from in direction towards \a to - /// \exception FE_INVALID for signaling NaN - /// \exception FE_OVERFLOW for infinite result from finite argument - /// \exception FE_UNDERFLOW for subnormal result + + + + + + + + inline half nexttoward(half from, long double to) { int fabs = from.data_ & 0x7FFF; @@ -4334,26 +4334,26 @@ namespace half_float return half(detail::binary, out); } - /// Take sign. - /// **See also:** Documentation for [std::copysign](https://en.cppreference.com/w/cpp/numeric/math/copysign). - /// \param x value to change sign for - /// \param y value to take sign from - /// \return value equal to \a x in magnitude and to \a y in sign + + + + + inline HALF_CONSTEXPR half copysign(half x, half y) { return half(detail::binary, x.data_^((x.data_^y.data_)&0x8000)); } - /// \} - /// \anchor classification - /// \name Floating point classification - /// \{ - - /// Classify floating-point value. - /// **See also:** Documentation for [std::fpclassify](https://en.cppreference.com/w/cpp/numeric/math/fpclassify). - /// \param arg number to classify - /// \retval FP_ZERO for positive and negative zero - /// \retval FP_SUBNORMAL for subnormal numbers - /// \retval FP_INFINITY for positive and negative infinity - /// \retval FP_NAN for NaNs - /// \retval FP_NORMAL for all other (normal) values + + + + + + + + + + + + + inline HALF_CONSTEXPR int fpclassify(half arg) { return !(arg.data_&0x7FFF) ? FP_ZERO : @@ -4363,212 +4363,212 @@ namespace half_float FP_NAN; } - /// Check if finite number. - /// **See also:** Documentation for [std::isfinite](https://en.cppreference.com/w/cpp/numeric/math/isfinite). - /// \param arg number to check - /// \retval true if neither infinity nor NaN - /// \retval false else + + + + + inline HALF_CONSTEXPR bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; } - /// Check for infinity. - /// **See also:** Documentation for [std::isinf](https://en.cppreference.com/w/cpp/numeric/math/isinf). - /// \param arg number to check - /// \retval true for positive or negative infinity - /// \retval false else + + + + + inline HALF_CONSTEXPR bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; } - /// Check for NaN. - /// **See also:** Documentation for [std::isnan](https://en.cppreference.com/w/cpp/numeric/math/isnan). - /// \param arg number to check - /// \retval true for NaNs - /// \retval false else + + + + + inline HALF_CONSTEXPR bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; } - /// Check if normal number. - /// **See also:** Documentation for [std::isnormal](https://en.cppreference.com/w/cpp/numeric/math/isnormal). - /// \param arg number to check - /// \retval true if normal number - /// \retval false if either subnormal, zero, infinity or NaN + + + + + inline HALF_CONSTEXPR bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } - /// Check sign. - /// **See also:** Documentation for [std::signbit](https://en.cppreference.com/w/cpp/numeric/math/signbit). - /// \param arg number to check - /// \retval true for negative number - /// \retval false for positive number + + + + + inline HALF_CONSTEXPR bool signbit(half arg) { return (arg.data_&0x8000) != 0; } - /// \} - /// \anchor compfunc - /// \name Comparison - /// \{ - - /// Quiet comparison for greater than. - /// **See also:** Documentation for [std::isgreater](https://en.cppreference.com/w/cpp/numeric/math/isgreater). - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater than \a y - /// \retval false else + + + + + + + + + + + inline HALF_CONSTEXPR bool isgreater(half x, half y) { return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); } - /// Quiet comparison for greater equal. - /// **See also:** Documentation for [std::isgreaterequal](https://en.cppreference.com/w/cpp/numeric/math/isgreaterequal). - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater equal \a y - /// \retval false else + + + + + + inline HALF_CONSTEXPR bool isgreaterequal(half x, half y) { return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); } - /// Quiet comparison for less than. - /// **See also:** Documentation for [std::isless](https://en.cppreference.com/w/cpp/numeric/math/isless). - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less than \a y - /// \retval false else + + + + + + inline HALF_CONSTEXPR bool isless(half x, half y) { return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); } - /// Quiet comparison for less equal. - /// **See also:** Documentation for [std::islessequal](https://en.cppreference.com/w/cpp/numeric/math/islessequal). - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less equal \a y - /// \retval false else + + + + + + inline HALF_CONSTEXPR bool islessequal(half x, half y) { return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); } - /// Quiet comarison for less or greater. - /// **See also:** Documentation for [std::islessgreater](https://en.cppreference.com/w/cpp/numeric/math/islessgreater). - /// \param x first operand - /// \param y second operand - /// \retval true if either less or greater - /// \retval false else + + + + + + inline HALF_CONSTEXPR bool islessgreater(half x, half y) { return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y); } - /// Quiet check if unordered. - /// **See also:** Documentation for [std::isunordered](https://en.cppreference.com/w/cpp/numeric/math/isunordered). - /// \param x first operand - /// \param y second operand - /// \retval true if unordered (one or two NaN operands) - /// \retval false else + + + + + + inline HALF_CONSTEXPR bool isunordered(half x, half y) { return isnan(x) || isnan(y); } - /// \} - /// \anchor casting - /// \name Casting - /// \{ - - /// Cast to or from half-precision floating-point number. - /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted - /// directly using the default rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. - /// - /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types - /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler - /// error and casting between [half](\ref half_float::half)s returns the argument unmodified. - /// \tparam T destination type (half or built-in arithmetic type) - /// \tparam U source type (half or built-in arithmetic type) - /// \param arg value to cast - /// \return \a arg converted to destination type - /// \exception FE_INVALID if \a T is integer type and result is not representable as \a T - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + + + + + + + + + template T half_cast(U arg) { return detail::half_caster::cast(arg); } - /// Cast to or from half-precision floating-point number. - /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted - /// directly using the specified rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. - /// - /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types - /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler - /// error and casting between [half](\ref half_float::half)s returns the argument unmodified. - /// \tparam T destination type (half or built-in arithmetic type) - /// \tparam R rounding mode to use. - /// \tparam U source type (half or built-in arithmetic type) - /// \param arg value to cast - /// \return \a arg converted to destination type - /// \exception FE_INVALID if \a T is integer type and result is not representable as \a T - /// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding + + + + + + + + + + + + + + template T half_cast(U arg) { return detail::half_caster::cast(arg); } - /// \} - - /// \} - /// \anchor errors - /// \name Error handling - /// \{ - - /// Clear exception flags. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::feclearexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feclearexcept). - /// \param excepts OR of exceptions to clear - /// \retval 0 all selected flags cleared successfully + + + + + + + + + + + + + + inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; } - /// Test exception flags. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::fetestexcept](https://en.cppreference.com/w/cpp/numeric/fenv/fetestexcept). - /// \param excepts OR of exceptions to test - /// \return OR of selected exceptions if raised + + + + + + + inline int fetestexcept(int excepts) { return detail::errflags() & excepts; } - /// Raise exception flags. - /// This raises the specified floating point exceptions and also invokes any additional automatic exception handling as - /// configured with the [HALF_ERRHANDLIG_...](\ref HALF_ERRHANDLING_ERRNO) preprocessor symbols. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::feraiseexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feraiseexcept). - /// \param excepts OR of exceptions to raise - /// \retval 0 all selected exceptions raised successfully + + + + + + + + + inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; } - /// Save exception flags. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::fegetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag). - /// \param flagp adress to store flag state at - /// \param excepts OR of flags to save - /// \retval 0 for success + + + + + + + + inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; } - /// Restore exception flags. - /// This only copies the specified exception state (including unset flags) without incurring any additional exception handling. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// - /// **See also:** Documentation for [std::fesetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag). - /// \param flagp adress to take flag state from - /// \param excepts OR of flags to restore - /// \retval 0 for success + + + + + + + + + inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; } - /// Throw C++ exceptions based on set exception flags. - /// This function manually throws a corresponding C++ exception if one of the specified flags is set, - /// no matter if automatic throwing (via [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID)) is enabled or not. - /// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, - /// but in that case manual flag management is the only way to raise flags. - /// \param excepts OR of exceptions to test - /// \param msg error message to use for exception description - /// \throw std::domain_error if `FE_INVALID` or `FE_DIVBYZERO` is selected and set - /// \throw std::overflow_error if `FE_OVERFLOW` is selected and set - /// \throw std::underflow_error if `FE_UNDERFLOW` is selected and set - /// \throw std::range_error if `FE_INEXACT` is selected and set + + + + + + + + + + + inline void fethrowexcept(int excepts, const char *msg = "") { excepts &= detail::errflags(); @@ -4581,7 +4581,7 @@ namespace half_float if(excepts & FE_INEXACT) throw std::range_error(msg); } - /// \} + } diff --git a/python/src/ngtpy.cpp b/python/src/ngtpy.cpp index d320c5f..b5fbaa3 100644 --- a/python/src/ngtpy.cpp +++ b/python/src/ngtpy.cpp @@ -71,15 +71,18 @@ class BatchResults { } py::array_t getIDs() { convert(); - if (size == 0 || resultList[0].size() == 0) { - NGTThrowException("ngtpy::BatchResults::get: empty."); + if (size == 0 || resultList.size() == 0) { + std::stringstream msg; + msg << "ngtpy::BatchResults::getIDs: empty. " << size << ":" << resultList.size(); + NGTThrowException(msg); } size_t nobjects = resultList[0].size(); py::array_t r({size, nobjects}); auto wr = r.mutable_unchecked<2>(); for (size_t idx = 0; idx < size; idx++) { if (resultList[idx].size() != nobjects) { - NGTThrowException("ngtpy::BatchResults::get: not knn results."); + std::cerr << "ngtpy::BatchResults::getIDs: not knn results. " << resultList[idx].size() + << ":" << nobjects << std::endl; } for (auto ri = resultList[idx].begin(); ri != resultList[idx].end(); ++ri) { wr(idx, std::distance(resultList[idx].begin(), ri)) = (*ri).id - 1; @@ -590,7 +593,7 @@ class QuantizedIndex : public NGTQG::Index { size_t size, // the number of resultant objects float epsilon, // search parameter epsilon. the adequate range is from 0.0 to 0.05. float resultExpansion, // the number of inner resultant objects - int edgeSize // the number of used edges for each node during the exploration of the graph. + int edgeSize // the number of used edges for each node during the exploration of the graph. ) { py::array_t qobject(query); py::buffer_info qinfo = qobject.request(); @@ -602,7 +605,7 @@ class QuantizedIndex : public NGTQG::Index { resultExpansion = resultExpansion >= 0.0 ? resultExpansion : defaultResultExpansion; edgeSize = edgeSize >= -2 ? edgeSize : defaultEdgeSize; sc.setSize(size); // the number of resulting objects. - sc.setRadius(defaultRadius); // the radius of search. + sc.setRadius(defaultRadius); // the radius of search. sc.setEpsilon(epsilon); // set exploration coefficient. sc.setResultExpansion(resultExpansion); // set result expansion. sc.setEdgeSize(edgeSize); // if maxEdge is minus, the specified value in advance is used. @@ -628,7 +631,6 @@ class QuantizedIndex : public NGTQG::Index { r.pop(); } } - return ids; } py::list results; @@ -690,8 +692,9 @@ class QuantizedBlobIndex : public QBG::Index { bool zeroBasedNumbering, // object ID numbering. bool treeDisabled, // not use the tree index. bool logDisabled, // stderr log is disabled. - bool readOnly // open mode - ):QBG::Index(path, readOnly) { + bool readOnly, // open mode. + const std::string refinementObjectTypeString // object type for distance refinement. + ):QBG::Index(path, readOnly, !logDisabled, refinementObjectType(refinementObjectTypeString)) { zeroNumbering = zeroBasedNumbering; numOfDistanceComputations = 0; treeIndex = !treeDisabled; @@ -706,6 +709,28 @@ class QuantizedBlobIndex : public QBG::Index { defaultNumOfProbes = 0; } + static NGTQ::DataType refinementObjectType(const std::string type) { + NGTQ::DataType objectType = NGTQ::DataTypeAny; + if (type == "Float" || type == "float") { + objectType = NGTQ::DataTypeFloat; + } else if (type == "Byte" || type == "byte") { + objectType = NGTQ::DataTypeUint8; +#ifdef NGT_HALF_FLOAT + } else if (type == "Float16" || type == "float16") { + objectType = NGTQ::DataTypeFloat16; +#endif + } else if (type == "Any" || type == "any") { + objectType = NGTQ::DataTypeAny; + } else if (type == "None" || type == "none") { + objectType = NGTQ::DataTypeNone; + } else { + std::stringstream msg; + msg << "ngtpy::create: invalid object type. " << objectType; + NGTThrowException(msg); + } + return objectType; + } + void batchInsert( py::array_t objects, bool debug = false @@ -737,7 +762,7 @@ class QuantizedBlobIndex : public QBG::Index { py::array_t batchSearchTmp( py::array_t queries, - size_t size + size_t size ) { const py::buffer_info &qinfo = queries.request(); const std::vector &qshape = qinfo.shape; @@ -776,7 +801,7 @@ class QuantizedBlobIndex : public QBG::Index { return results; } - void batchSearchInTwoSteps( + void parallelSearchInTwoSteps( py::array_t queries, BatchResults &results, size_t size @@ -785,7 +810,8 @@ class QuantizedBlobIndex : public QBG::Index { const std::vector &qshape = qinfo.shape; auto nOfQueries = qshape[0]; size_t dimension = qshape[1]; - size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension(); + //size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension(); + auto pseudoDimension = QBG::Index::getQuantizer().property.dimension; auto *queryPtr = static_cast(qinfo.ptr); size = size > 0 ? size : defaultNumOfSearchObjects; @@ -794,14 +820,22 @@ class QuantizedBlobIndex : public QBG::Index { results.resultList.clear(); results.results.resize(nOfQueries); + auto resultExpansion = defaultResultExpansion; + size_t exactResultSize = 0; + if (resultExpansion >= 1.0) { + exactResultSize = size; + size = static_cast(size) * resultExpansion; + } + #pragma omp parallel for schedule(dynamic) for (int idx = 0; idx < nOfQueries; idx++) { float *qptr = queryPtr + idx * dimension; - vector query(psedoDimension, 0); + vector query(pseudoDimension, 0); memcpy(query.data(), qptr, dimension * sizeof(float)); QBG::SearchContainer sc; sc.setObjectVector(query); sc.setSize(size); + sc.setExactResultSize(exactResultSize); sc.setEpsilon(defaultEpsilon); sc.setBlobEpsilon(defaultBlobEpsilon); sc.setEdgeSize(defaultEdgeSize); @@ -816,7 +850,50 @@ class QuantizedBlobIndex : public QBG::Index { return; } - void batchSearchInOneStep( + void batchSearchInTwoSteps( + py::array_t queries, + BatchResults &results, + size_t size + ) { + const py::buffer_info &qinfo = queries.request(); + const std::vector &qshape = qinfo.shape; + auto nOfQueries = qshape[0]; + size_t dimension = qshape[1]; + //size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension(); + auto pseudoDimension = QBG::Index::getQuantizer().property.dimension; + auto *queryPtr = static_cast(qinfo.ptr); + + size = size > 0 ? size : defaultNumOfSearchObjects; + + results.results.clear(); + results.resultList.clear(); + + std::unique_ptr qs(new float[queries.size() * pseudoDimension]); +#pragma omp parallel for + for (int idx = 0; idx < nOfQueries; idx++) { + float *qptr = queryPtr + idx * dimension; + float *qsptr = &qs[idx * pseudoDimension]; + memset(qsptr + dimension, 0, sizeof(float) * (pseudoDimension - dimension)); + memcpy(qsptr, qptr, dimension * sizeof(float)); + } + QBG::BatchSearchContainer sc; + sc.setObjectVectors(&qs[0], nOfQueries, pseudoDimension); + sc.setSize(size); + sc.setRefinementExpansion(defaultResultExpansion); + sc.setEpsilon(defaultEpsilon); + sc.setBlobEpsilon(defaultBlobEpsilon); + sc.setEdgeSize(defaultEdgeSize); + sc.setNumOfProbes(defaultNumOfProbes); +#ifdef NGTQBG_FUNCTION_SELECTOR + sc.functionSelector = defaultFunctionSelector; ///////////////// +#endif + QBG::Index::searchInTwoSteps(sc); + results.resultList = std::move(sc.getBatchResult()); + results.size = results.resultList.size(); + return; + } + + void parallelSearchInOneStep( py::array_t queries, BatchResults &results, size_t size @@ -825,7 +902,7 @@ class QuantizedBlobIndex : public QBG::Index { const std::vector &qshape = qinfo.shape; auto nOfQueries = qshape[0]; size_t dimension = qshape[1]; - size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension(); + size_t pseudoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension(); auto *queryPtr = static_cast(qinfo.ptr); size = size > 0 ? size : defaultNumOfSearchObjects; @@ -834,21 +911,20 @@ class QuantizedBlobIndex : public QBG::Index { results.resultList.clear(); results.results.resize(nOfQueries); - size_t searchSize = size; - size_t searchExactResultSize = 0; - if (defaultExactResultExpansion >= 1.0) { - searchSize = static_cast(size) * defaultExactResultExpansion; - searchExactResultSize = size; + size_t exactResultSize = 0; + if (defaultResultExpansion >= 1.0) { + size = static_cast(size) * defaultResultExpansion; + exactResultSize = size; } #pragma omp parallel for schedule(dynamic) for (int idx = 0; idx < nOfQueries; idx++) { float *qptr = queryPtr + idx * dimension; - vector query(psedoDimension, 0); + vector query(pseudoDimension, 0); memcpy(query.data(), qptr, dimension * sizeof(float)); QBG::SearchContainer sc; sc.setObjectVector(query); - sc.setSize(searchSize); - sc.setExactResultSize(searchExactResultSize); + sc.setSize(size); + sc.setExactResultSize(exactResultSize); sc.setEpsilon(defaultEpsilon); sc.setBlobEpsilon(defaultBlobEpsilon); sc.setEdgeSize(defaultEdgeSize); @@ -873,7 +949,7 @@ class QuantizedBlobIndex : public QBG::Index { NGTThrowException(msg); } if (defaultNumOfProbes == 0) { - batchSearchInOneStep(queries, results, size); + parallelSearchInOneStep(queries, results, size); } else { batchSearchInTwoSteps(queries, results, size); } @@ -895,7 +971,7 @@ class QuantizedBlobIndex : public QBG::Index { const std::vector &qshape = qinfo.shape; auto nOfQueries = qshape[0]; size_t dimension = qshape[1]; - size_t psedoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension(); + size_t pseudoDimension = QBG::Index::getQuantizer().globalCodebookIndex.getObjectSpace().getPaddedDimension(); auto *queryPtr = static_cast(qinfo.ptr); radius = radius >= 0 ? radius : defaultRadius; radius = sqrt(radius); @@ -907,7 +983,7 @@ class QuantizedBlobIndex : public QBG::Index { #pragma omp parallel for schedule(dynamic) for (int idx = 0; idx < nOfQueries; idx++) { float *qptr = queryPtr + idx * dimension; - vector query(psedoDimension, 0); + vector query(pseudoDimension, 0); memcpy(query.data(), qptr, dimension * sizeof(float)); QBG::SearchContainer sc; sc.setObjectVector(query); @@ -937,12 +1013,18 @@ class QuantizedBlobIndex : public QBG::Index { sc.setObjectVector(qvector); size = size > 0 ? size : defaultNumOfSearchObjects; epsilon = epsilon > -1.0 ? epsilon : defaultEpsilon; +#if 0 if (defaultExactResultExpansion >= 1.0) { - sc.setSize(static_cast(size) * defaultExactResultExpansion); - sc.setExactResultSize(size); + sc.setSize(static_cast(size) * defaultExactResultExpansion); + sc.setExactResultSize(size); } else { - sc.setSize(size); // the number of resulting objects. + sc.setSize(size); // the number of resulting objects. } +#else + sc.setSize(size); + //std::cerr << "pass defaultResultExpansion=" << defaultResultExpansion << std::endl; + sc.setRefinementExpansion(defaultResultExpansion); +#endif sc.setEpsilon(epsilon); // set exploration coefficient. sc.setBlobEpsilon(defaultBlobEpsilon); sc.setEdgeSize(defaultEdgeSize); @@ -973,7 +1055,6 @@ class QuantizedBlobIndex : public QBG::Index { r.pop(); } } - return ids; } py::list results; @@ -1201,13 +1282,14 @@ PYBIND11_MODULE(ngtpy, m) { py::class_(m, "QuantizedBlobIndex") - .def(py::init(), + .def(py::init(), py::arg("path"), py::arg("max_no_of_edges") = 128, py::arg("zero_based_numbering") = true, py::arg("tree_disabled") = false, - py::arg("log_disabled") = false, - py::arg("read_only") = true) + py::arg("log_disabled") = true, + py::arg("read_only") = true, + py::arg("refinement_object_type") = "Any") .def("save", (void (QBG::Index::*)()) &QBG::Index::save) .def("batch_insert", &::QuantizedBlobIndex::batchInsert, py::arg("objects"), diff --git a/samples/qbg-capi/qbg-capi.cpp b/samples/qbg-capi/qbg-capi.cpp index 3610bba..4f1db87 100644 --- a/samples/qbg-capi/qbg-capi.cpp +++ b/samples/qbg-capi/qbg-capi.cpp @@ -85,7 +85,7 @@ main(int argc, char **argv) std::cerr << "building the index..." << std::endl; QBGBuildParameters buildParameters; qbg_initialize_build_parameters(&buildParameters); - buildParameters.number_of_objects = 500; + buildParameters.number_of_objects = 500; auto status = qbg_build_index(indexPath.c_str(), &buildParameters, err); if (!status) { std::cerr << "Cannot build. " << ngt_get_error_string(err) << std::endl;