diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fc3796..5ee1b02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,7 @@ if(${UNIX}) find_package(BLAS REQUIRED) find_package(LAPACK REQUIRED) endif() - endif() + endif() add_subdirectory("${PROJECT_SOURCE_DIR}/lib") add_subdirectory("${PROJECT_SOURCE_DIR}/bin") diff --git a/bin/ngt/CMakeLists.txt b/bin/ngt/CMakeLists.txt index 852421a..3f672f0 100644 --- a/bin/ngt/CMakeLists.txt +++ b/bin/ngt/CMakeLists.txt @@ -5,7 +5,7 @@ if( ${UNIX} ) add_executable(ngt_exe ngt.cpp) add_dependencies(ngt_exe ngt) - set_target_properties(ngt_exe PROPERTIES OUTPUT_NAME ngt) + set_target_properties(ngt_exe PROPERTIES OUTPUT_NAME ngt) if(CMAKE_VERSION VERSION_LESS 3.1) target_link_libraries(ngt_exe ngt pthread) else() diff --git a/lib/NGT/ArrayFile.h b/lib/NGT/ArrayFile.h index 5a1b68a..9500dae 100644 --- a/lib/NGT/ArrayFile.h +++ b/lib/NGT/ArrayFile.h @@ -39,10 +39,10 @@ class ArrayFile { struct RecordStruct { bool deleteFlag; - uint64_t extraData; // reserve + uint64_t extraData; // reserve }; - bool _isOpen; + bool _isOpen; std::fstream _stream; FileHeadStruct _fileHead; @@ -65,7 +65,7 @@ class ArrayFile { }; -// constructor +// constructor template ArrayFile::ArrayFile() : _isOpen(false), _mutex((pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER){ @@ -82,7 +82,7 @@ ArrayFile::~ArrayFile() { template bool ArrayFile::create(const std::string &file, size_t recordSize) { std::fstream tmpstream; - tmpstream.open(file.c_str()); + tmpstream.open(file.c_str()); if(tmpstream){ return false; } @@ -98,9 +98,9 @@ bool ArrayFile::create(const std::string &file, size_t recordSize) { template bool ArrayFile::open(const std::string &file) { - _stream.open(file.c_str(), std::ios::in | std::ios::out); + _stream.open(file.c_str(), std::ios::in | std::ios::out); if(!_stream){ - _isOpen = false; + _isOpen = false; return false; } _isOpen = true; @@ -112,7 +112,7 @@ bool ArrayFile::open(const std::string &file) { template void ArrayFile::close(){ _stream.close(); - _isOpen = false; + _isOpen = false; } template @@ -140,7 +140,7 @@ void ArrayFile::put(const size_t id, TYPE &data, NGT::ObjectSpace *objectS _stream.seekp(offset_pos, std::ios::beg); for(size_t i = 0; i < _fileHead.recordSize; i++) { _stream.write("", 1); } - _stream.seekp(offset_pos, std::ios::beg); + _stream.seekp(offset_pos, std::ios::beg); data.serialize(_stream, objectSpace); } @@ -149,12 +149,12 @@ bool ArrayFile::get(const size_t id, TYPE &data, NGT::ObjectSpace *objectS pthread_mutex_lock(&_mutex); if( size() <= id ){ - pthread_mutex_unlock(&_mutex); + pthread_mutex_unlock(&_mutex); return false; } uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); - offset_pos += sizeof(RecordStruct); + offset_pos += sizeof(RecordStruct); _stream.seekg(offset_pos, std::ios::beg); if (!_stream.fail()) { data.deserialize(_stream, objectSpace); @@ -185,7 +185,7 @@ bool ArrayFile::get(const size_t id, TYPE &data, NGT::ObjectSpace *objectS template void ArrayFile::remove(const size_t id) { - uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); + uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); _stream.seekp(offset_pos, std::ios::beg); RecordStruct recordHead = {1, 0}; _stream.write((char *)(&recordHead), sizeof(RecordStruct)); @@ -205,7 +205,7 @@ size_t ArrayFile::size() offset_pos -= sizeof(FileHeadStruct); size_t num = offset_pos / (sizeof(RecordStruct) + _fileHead.recordSize); - return num; + return num; } template diff --git a/lib/NGT/CMakeLists.txt b/lib/NGT/CMakeLists.txt index fd93bde..4e90aee 100644 --- a/lib/NGT/CMakeLists.txt +++ b/lib/NGT/CMakeLists.txt @@ -8,7 +8,7 @@ if( ${UNIX} ) file(GLOB NGTQ_HEADER_FILES NGTQ/*.h NGTQ/*.hpp) add_library(ngtstatic STATIC ${NGT_SOURCES}) - set_target_properties(ngtstatic PROPERTIES OUTPUT_NAME ngt) + set_target_properties(ngtstatic PROPERTIES OUTPUT_NAME ngt) set_target_properties(ngtstatic PROPERTIES COMPILE_FLAGS "-fPIC") target_link_libraries(ngtstatic) if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") @@ -16,9 +16,9 @@ if( ${UNIX} ) endif() add_library(ngt SHARED ${NGT_SOURCES}) - set_target_properties(ngt PROPERTIES VERSION ${ngt_VERSION}) - set_target_properties(ngt PROPERTIES SOVERSION ${ngt_SOVERSION}) - add_dependencies(ngt ngtstatic) + set_target_properties(ngt PROPERTIES VERSION ${ngt_VERSION}) + set_target_properties(ngt PROPERTIES SOVERSION ${ngt_SOVERSION}) + add_dependencies(ngt ngtstatic) if(${APPLE}) if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") target_link_libraries(ngt lapack blas OpenMP::OpenMP_CXX) diff --git a/lib/NGT/Capi.cpp b/lib/NGT/Capi.cpp index dab9f78..edcd216 100644 --- a/lib/NGT/Capi.cpp +++ b/lib/NGT/Capi.cpp @@ -930,7 +930,7 @@ float* ngt_get_object_as_float(NGTObjectSpace object_space, ObjectID id, NGTErro auto os = static_cast(object_space); if (os->getObjectType() != typeid(float)) { std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: Not available for the object type of the index. " + ss << "Capi : " << __FUNCTION__ << "() : Error: Not available for the object type of the index. " << os->getObjectType().name(); operate_error_string_(ss, error); return NULL; @@ -942,7 +942,7 @@ NGTFloat16* ngt_get_object_as_float16(NGTObjectSpace object_space, ObjectID id, auto os = static_cast(object_space); if (os->getObjectType() != typeid(NGT::float16)) { std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: Not available for the object type of the index. " + ss << "Capi : " << __FUNCTION__ << "() : Error: Not available for the object type of the index. " << os->getObjectType().name(); operate_error_string_(ss, error); return NULL; @@ -954,7 +954,7 @@ uint8_t* ngt_get_object_as_integer(NGTObjectSpace object_space, ObjectID id, NGT auto os = static_cast(object_space); if (os->getObjectType() != typeid(uint8_t)) { std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: Not available for the object type of the index. " + ss << "Capi : " << __FUNCTION__ << "() : Error: Not available for the object type of the index. " << os->getObjectType().name(); operate_error_string_(ss, error); return NULL; @@ -974,7 +974,7 @@ float* ngt_get_allocated_object_as_float(NGTObjectSpace object_space, ObjectID i return NULL; } auto sizeOfObject = sizeof(float) * v.size(); - auto fv = static_cast(malloc(sizeOfObject)); + auto fv = static_cast(malloc(sizeOfObject)); if (fv == NULL) { std::stringstream ss; ss << "Capi : " << __FUNCTION__ << "() : Error: Cannot allocate a vector."; diff --git a/lib/NGT/Clustering.h b/lib/NGT/Clustering.h index ed05643..ac538ab 100644 --- a/lib/NGT/Clustering.h +++ b/lib/NGT/Clustering.h @@ -20,7 +20,7 @@ using namespace std; -#if defined(NGT_AVX_DISABLED) +#if defined(NGT_AVX_DISABLED) #define NGT_CLUSTER_NO_AVX #else #if defined(__AVX2__) @@ -120,7 +120,7 @@ namespace NGT { extractVector(const std::string &str, std::vector &vec) { std::vector tokens; - NGT::Common::tokenize(str, tokens, " \t"); + NGT::Common::tokenize(str, tokens, " \t"); convert(tokens, vec); } @@ -215,7 +215,7 @@ namespace NGT { } } #if !defined(NGT_CLUSTER_NO_AVX) - static double + static double sumOfSquares(float *a, float *b, size_t size) { __m256 sum = _mm256_setzero_ps(); float *last = a + size; @@ -236,7 +236,7 @@ namespace NGT { return s; } #else // !defined(NGT_AVX_DISABLED) && defined(__AVX__) - static double + static double sumOfSquares(float *a, float *b, size_t size) { double csum = 0.0; float *x = a; @@ -297,7 +297,7 @@ namespace NGT { return sumOfSquares(&vector1[0], &vector2[0], vector1.size()) / (double)vector1.size(); } - static void + static void subtract(std::vector &a, std::vector &b) { if (a.size() != b.size()) { std::stringstream msg; @@ -326,7 +326,7 @@ namespace NGT { size = size > vectors.size() ? vectors.size() : size; clusters.clear(); if (seed == 0) { - std::random_device rnd; + std::random_device rnd; seed = rnd(); } std::mt19937 mt(seed); @@ -345,7 +345,7 @@ namespace NGT { size = size > vectors.size() ? vectors.size() : size; clusters.clear(); if (seed == 0) { - std::random_device rnd; + std::random_device rnd; seed = rnd(); } std::mt19937 mt(seed); @@ -362,7 +362,7 @@ namespace NGT { for (size_t vi = 0; vi < vectors.size(); vi++) { auto vit = vectors.begin() + vi; double mind = DBL_MAX; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { + for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { double d = distanceL2(*vit, (*cit).centroid); d *= d; if (d < mind) { @@ -390,7 +390,7 @@ namespace NGT { static void - assign(std::vector> &vectors, std::vector &clusters, + assign(std::vector> &vectors, std::vector &clusters, size_t clusterSize = std::numeric_limits::max(), bool clear = true) { // compute distances to the nearest clusters, and construct heap by the distances. NGT::Timer timer; @@ -404,13 +404,13 @@ namespace NGT { } std::vector sortedObjects(vectors.size()); -#pragma omp parallel for +#pragma omp parallel for for (size_t vi = 0; vi < vectors.size(); vi++) { auto vit = vectors.begin() + vi; { double mind = DBL_MAX; int mincidx = -1; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { + for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { double d = distanceL2(*vit, (*cit).centroid); if (d < mind) { mind = d; @@ -429,7 +429,7 @@ namespace NGT { // clear if (clear) { - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { + for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { (*cit).members.clear(); } } @@ -448,7 +448,7 @@ namespace NGT { #if 0 double mind = DBL_MAX; size_t mincidx = -1; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { + for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { if ((*cit).members.size() >= clusterSize) { continue; } @@ -492,12 +492,12 @@ namespace NGT { static void moveFartherObjectsToEmptyClusters(std::vector &clusters) { size_t emptyClusterCount = 0; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { + for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { if ((*cit).members.size() == 0) { emptyClusterCount++; double max = -DBL_MAX; auto maxit = clusters.begin(); - for (auto scit = clusters.begin(); scit != clusters.end(); ++scit) { + for (auto scit = clusters.begin(); scit != clusters.end(); ++scit) { if ((*scit).members.size() >= 2 && (*scit).members.back().distance > max) { maxit = scit; max = (*scit).members.back().distance; @@ -525,7 +525,7 @@ namespace NGT { } static void - assignWithNGT(NGT::Index &index, std::vector > &vectors, std::vector &clusters, + assignWithNGT(NGT::Index &index, std::vector > &vectors, std::vector &clusters, size_t &resultSize, float epsilon = 0.12, size_t clusterSize = std::numeric_limits::max()) { size_t dataSize = vectors.size(); @@ -563,7 +563,7 @@ namespace NGT { sort(sortedDistances.begin(), sortedDistances.end()); - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { + for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { (*cit).members.clear(); } @@ -593,7 +593,7 @@ namespace NGT { if (clusterSize < std::numeric_limits::max()) { do { vector> notAssignedObjects(notAssignedObjectIDs.size()); - size_t nOfClosestClusters = 1 * 1024 * 1024 * 1024 / 16 / (notAssignedObjectIDs.size() == 0 ? 1 : notAssignedObjectIDs.size()); + size_t nOfClosestClusters = 1 * 1024 * 1024 * 1024 / 16 / (notAssignedObjectIDs.size() == 0 ? 1 : notAssignedObjectIDs.size()); #pragma omp parallel for for (size_t vi = 0; vi < notAssignedObjectIDs.size(); vi++) { auto vit = notAssignedObjectIDs.begin() + vi; @@ -686,7 +686,7 @@ namespace NGT { abort(); } } - return distance; + return distance; } static void @@ -738,7 +738,7 @@ namespace NGT { } return diff; } - double kmeansWithoutNGT(std::vector > &vectors, size_t numberOfClusters, + double kmeansWithoutNGT(std::vector > &vectors, size_t numberOfClusters, std::vector &clusters) { size_t clusterSize = std::numeric_limits::max(); @@ -789,7 +789,7 @@ namespace NGT { // diff is distance between the current centroids and the previous centroids. double prevDiff = diff; std::vector prevClusters = clusters; - diff = calculateCentroid(vectors, clusters); + diff = calculateCentroid(vectors, clusters); if (prevDiff == diff) { std::cerr << "epsilon=" << epsilon << "->" << epsilon * 1.1 << std::endl; epsilon *= 1.1; @@ -922,7 +922,7 @@ namespace NGT { { double d = 0.0; size_t count = 0; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { + for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { count += (*cit).members.size(); double localD = 0.0; for (auto mit = (*cit).members.begin(); mit != (*cit).members.end(); ++mit) { @@ -939,12 +939,12 @@ namespace NGT { } static double - calculateML2FromSpecifiedCentroids(std::vector > &vectors, std::vector &clusters, + calculateML2FromSpecifiedCentroids(std::vector > &vectors, std::vector &clusters, std::vector ¢roidIds) { double d = 0.0; size_t count = 0; - for (auto it = centroidIds.begin(); it != centroidIds.end(); ++it) { + for (auto it = centroidIds.begin(); it != centroidIds.end(); ++it) { Cluster &cluster = clusters[(*it)]; count += cluster.members.size(); for (auto mit = cluster.members.begin(); mit != cluster.members.end(); ++mit) { @@ -1014,11 +1014,11 @@ namespace NGT { setupInitialClusters(vectors, numberOfClusters, clusters); switch (clusteringType) { - case ClusteringTypeKmeansWithoutNGT: + case ClusteringTypeKmeansWithoutNGT: return kmeansWithoutNGT(vectors, numberOfClusters, clusters); break; #ifndef NGT_SHARED_MEMORY_ALLOCATOR - case ClusteringTypeKmeansWithNGT: + case ClusteringTypeKmeansWithNGT: return kmeansWithNGT(vectors, numberOfClusters, clusters); break; #endif @@ -1031,7 +1031,7 @@ namespace NGT { static void - evaluate(std::vector > &vectors, std::vector &clusters, char mode, + evaluate(std::vector > &vectors, std::vector &clusters, char mode, std::vector centroidIds = std::vector()) { size_t clusterSize = std::numeric_limits::max(); diff --git a/lib/NGT/Command.cpp b/lib/NGT/Command.cpp index 58bbad9..f5addff 100644 --- a/lib/NGT/Command.cpp +++ b/lib/NGT/Command.cpp @@ -67,7 +67,7 @@ using namespace std; std::stringstream msg; msg << "Command::CreateParameter: Error: Invalid graph type. " << graphType; NGTThrowException(msg); - } + } if (property.graphType == NGT::Property::GraphType::GraphTypeONNG) { property.outgoingEdge = 10; @@ -103,7 +103,7 @@ using namespace std; indexType = args.getChar("i", 't'); switch (objectType) { - case 'f': + case 'f': property.objectType = NGT::Index::Property::ObjectType::Float; break; case 'c': @@ -121,7 +121,7 @@ using namespace std; } switch (distanceType) { - case '1': + case '1': property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL1; break; case '2': @@ -167,14 +167,14 @@ using namespace std; #ifdef NGT_SHARED_MEMORY_ALLOCATOR size_t maxNoOfObjects = args.getl("N", 0); if (maxNoOfObjects > 0) { - property.graphSharedMemorySize + property.graphSharedMemorySize = property.treeSharedMemorySize = property.objectSharedMemorySize = 512 * ceil(maxNoOfObjects / 50000000); } #endif } - void + void NGT::Command::create(Args &args) { const string usage = "Usage: ngt create " @@ -223,7 +223,7 @@ using namespace std; } } - void + void NGT::Command::append(Args &args) { const string usage = "Usage: ngt append [-p #-of-thread] [-d dimension] [-n data-size] " @@ -254,7 +254,7 @@ using namespace std; try { - NGT::Index::append(database, data, threadSize, dataSize); + NGT::Index::append(database, data, threadSize, dataSize); } catch (NGT::Exception &err) { cerr << "ngt: Error " << err.what() << endl; cerr << usage << endl; @@ -269,8 +269,8 @@ using namespace std; NGT::Command::search(NGT::Index &index, NGT::Command::SearchParameters &searchParameters, istream &is, ostream &stream) { - if (searchParameters.outputMode[0] == 'e') { - stream << "# Beginning of Evaluation" << endl; + if (searchParameters.outputMode[0] == 'e') { + stream << "# Beginning of Evaluation" << endl; } string line; @@ -287,7 +287,7 @@ using namespace std; NGT::SearchContainer sc(*object); double epsilon; if (searchParameters.step != 0) { - epsilon = searchParameters.beginOfEpsilon + (searchParameters.endOfEpsilon - searchParameters.beginOfEpsilon) * n / step; + epsilon = searchParameters.beginOfEpsilon + (searchParameters.endOfEpsilon - searchParameters.beginOfEpsilon) * n / step; } else { epsilon = searchParameters.beginOfEpsilon + searchParameters.stepOfEpsilon * n; if (epsilon > searchParameters.endOfEpsilon) { @@ -416,8 +416,8 @@ using namespace std; stream << "# Average distance of edges=" << setprecision(10) << distance / (double)numberOfEdges << endl; } } else { - stream << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " - << totalTime * 1000.0 / (double)queryCount << " (msec), (" + stream << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " + << totalTime * 1000.0 / (double)queryCount << " (msec), (" << totalTime << "/" << queryCount << ")" << endl; } } @@ -443,7 +443,7 @@ using namespace std; cerr << "indexType=" << searchParameters.indexType << endl; cerr << "size=" << searchParameters.size << endl; cerr << "edgeSize=" << searchParameters.edgeSize << endl; - cerr << "epsilon=" << searchParameters.beginOfEpsilon << "<->" << searchParameters.endOfEpsilon << "," + cerr << "epsilon=" << searchParameters.beginOfEpsilon << "<->" << searchParameters.endOfEpsilon << "," << searchParameters.stepOfEpsilon << endl; } @@ -527,7 +527,7 @@ using namespace std; if (*e != 0) { cerr << "Illegal data. " << e << endl; } - cerr << "removed ID=" << id << endl; + cerr << "removed ID=" << id << endl; } } else { size_t id = args.getl("#2", 0); @@ -676,7 +676,7 @@ using namespace std; continue; } NGT::GraphNode &node2 = *graph.getNode(node[t1].id); - for (size_t t2 = 0; t2 < node2.size(); ++t2) { + for (size_t t2 = 0; t2 < node2.size(); ++t2) { if (t2 >= selectivelyPrunedEdgeSize) { break; } @@ -921,7 +921,7 @@ using namespace std; size_t uninsertedTreeObjectCount = 0; std::cerr << "remove invalid objects from the tree." << std::endl; size_t size = objSize > idsSize ? objSize : idsSize; - for (size_t id = 1; id < size; id++) { + for (size_t id = 1; id < size; id++) { if (ids.find(id) != ids.end()) { if (removedIDs.find(id) != removedIDs.end() || id >= objSize) { if (repair) { diff --git a/lib/NGT/Common.h b/lib/NGT/Common.h index 6b5b925..50f15b3 100644 --- a/lib/NGT/Common.h +++ b/lib/NGT/Common.h @@ -61,10 +61,10 @@ namespace NGT { Exception():message("No message") {} Exception(const std::string &file, const std::string &function, size_t line, std::stringstream &m) { set(file, function, line, m.str()); } Exception(const std::string &file, const std::string &function, size_t line, const std::string &m) { set(file, function, line, m); } - void set(const std::string &file, const std::string &function, size_t line, const std::string &m) { + void set(const std::string &file, const std::string &function, size_t line, const std::string &m) { std::stringstream ss; ss << file << ":" << function << ":" << line << ": " << m; - message = ss.str(); + message = ss.str(); } ~Exception() throw() {} Exception &operator=(const Exception &e) { @@ -207,7 +207,7 @@ namespace NGT { } if (*e != 0) { std::stringstream msg; - msg << "ARGS::getl: Illegal string. Option=-" << s << " Specified value=" << get(s) + msg << "ARGS::getl: Illegal string. Option=-" << s << " Specified value=" << get(s) << " Illegal string=" << e << std::endl; NGTThrowException(msg.str()); } @@ -223,7 +223,7 @@ namespace NGT { } if (*e != 0) { std::stringstream msg; - msg << "ARGS::getf: Illegal string. Option=-" << s << " Specified value=" << get(s) + msg << "ARGS::getf: Illegal string. Option=-" << s << " Specified value=" << get(s) << " Illegal string=" << e << std::endl; NGTThrowException(msg.str()); } @@ -271,11 +271,11 @@ namespace NGT { auto time = t.time; if (time < 1.0) { time *= 1000.0; - os << std::setprecision(6) << time << " (ms)"; + os << std::setprecision(6) << time << " (ms)"; return os; } if (time < 60.0) { - os << std::setprecision(6) << time << " (s)"; + os << std::setprecision(6) << time << " (s)"; return os; } time /= 60.0; @@ -516,9 +516,9 @@ namespace NGT { class StdOstreamRedirector { public: - StdOstreamRedirector(bool e = false, const std::string path = "/dev/null", mode_t m = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, int f = 2) { + StdOstreamRedirector(bool e = false, const std::string path = "/dev/null", mode_t m = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, int f = 2) { logFilePath = path; - mode = m; + mode = m; logFD = -1; fdNo = f; enabled = e; @@ -571,7 +571,7 @@ namespace NGT { bool enabled; }; - template + template class CompactVector { public: typedef TYPE * iterator; @@ -618,7 +618,7 @@ namespace NGT { if (idx >= vectorSize) { std::stringstream msg; msg << "CompactVector: beyond the range. " << idx << ":" << vectorSize; - NGTThrowException(msg); + NGTThrowException(msg); } return vector[idx]; } @@ -768,17 +768,17 @@ namespace NGT { char &at(size_t idx) const { if (idx >= size()) { - NGTThrowException("CompactString: beyond the range"); + NGTThrowException("CompactString: beyond the range"); } return vector[idx]; } char *c_str() { return vector; } - size_t size() const { + size_t size() const { if (vector == 0) { return 0; } else { - return (size_t)strlen(vector); + return (size_t)strlen(vector); } } @@ -790,7 +790,7 @@ namespace NGT { public: BooleanSet(size_t s) { size = (s >> 6) + 1; // 2^6=64 - size = ((size >> 2) << 2) + 4; + size = ((size >> 2) << 2) + 4; bitvec.resize(size); } inline uint64_t getBitString(size_t i) { return (uint64_t)1 << (i & (64 - 1)); } @@ -868,8 +868,8 @@ namespace NGT { } return defvalue; } - void load(const std::string &f) { - std::ifstream st(f); + void load(const std::string &f) { + std::ifstream st(f); if (!st) { std::stringstream msg; msg << "PropertySet::load: Cannot load the property file " << f << "."; @@ -877,15 +877,15 @@ namespace NGT { } load(st); } - void save(const std::string &f) { - std::ofstream st(f); + void save(const std::string &f) { + std::ofstream st(f); if (!st) { std::stringstream msg; msg << "PropertySet::save: Cannot save. " << f << std::endl; NGTThrowException(msg); } - save(st); - } + save(st); + } void save(std::ofstream &os) { for (std::map::iterator i = this->begin(); i != this->end(); i++) { os << i->first << "\t" << i->second << std::endl; @@ -943,7 +943,7 @@ namespace NGT { } } - template void write(std::ostream &os, const std::vector &v) { + template void write(std::ostream &os, const std::vector &v) { unsigned int s = v.size(); write(os, s); for (unsigned int i = 0; i < s; i++) { @@ -951,7 +951,7 @@ namespace NGT { } } - template void writeAsText(std::ostream &os, const std::vector &v) { + template void writeAsText(std::ostream &os, const std::vector &v) { unsigned int s = v.size(); os << s << " "; for (unsigned int i = 0; i < s; i++) { @@ -960,7 +960,7 @@ namespace NGT { } } - template void write(std::ostream &os, const CompactVector &v) { + template void write(std::ostream &os, const CompactVector &v) { unsigned int s = v.size(); write(os, s); for (unsigned int i = 0; i < s; i++) { @@ -968,7 +968,7 @@ namespace NGT { } } - template void writeAsText(std::ostream &os, const CompactVector &v) { + template void writeAsText(std::ostream &os, const CompactVector &v) { unsigned int s = v.size(); for (unsigned int i = 0; i < s; i++) { writeAsText(os, v[i]); @@ -976,7 +976,7 @@ namespace NGT { } } - template void writeAsText(std::ostream &os, TYPE *v, size_t s) { + template void writeAsText(std::ostream &os, TYPE *v, size_t s) { os << s << " "; for (unsigned int i = 0; i < s; i++) { writeAsText(os, v[i]); @@ -984,7 +984,7 @@ namespace NGT { } } - template void read(std::istream &is, std::vector &v) { + template void read(std::istream &is, std::vector &v) { v.clear(); unsigned int s; read(is, s); @@ -996,7 +996,7 @@ namespace NGT { } } - template void readAsText(std::istream &is, std::vector &v) { + template void readAsText(std::istream &is, std::vector &v) { v.clear(); unsigned int s; is >> s; @@ -1008,7 +1008,7 @@ namespace NGT { } - template void read(std::istream &is, CompactVector &v) { + template void read(std::istream &is, CompactVector &v) { v.clear(); unsigned int s; read(is, s); @@ -1020,7 +1020,7 @@ namespace NGT { } } - template void readAsText(std::istream &is, CompactVector &v) { + template void readAsText(std::istream &is, CompactVector &v) { v.clear(); unsigned int s; is >> s; @@ -1031,7 +1031,7 @@ namespace NGT { } } - template void readAsText(std::istream &is, TYPE *v, size_t s) { + template void readAsText(std::istream &is, TYPE *v, size_t s) { unsigned int size; is >> size; if (s != size) { @@ -1053,7 +1053,7 @@ namespace NGT { #ifdef NGT_SHARED_MEMORY_ALLOCATOR - template + template class Vector { public: typedef TYPE * iterator; @@ -1072,8 +1072,8 @@ namespace NGT { TYPE &front(SharedMemoryAllocator &allocator) { return (*this).at(0, allocator); } TYPE &back(SharedMemoryAllocator &allocator) { return (*this).at(vectorSize - 1, allocator); } bool empty() { return vectorSize == 0; } - iterator begin(SharedMemoryAllocator &allocator) { - return (TYPE*)allocator.getAddr((off_t)vector); + iterator begin(SharedMemoryAllocator &allocator) { + return (TYPE*)allocator.getAddr((off_t)vector); } iterator end(SharedMemoryAllocator &allocator) { return begin(allocator) + vectorSize; @@ -1102,7 +1102,7 @@ namespace NGT { if (idx >= vectorSize) { std::stringstream msg; msg << "Vector: beyond the range. " << idx << ":" << vectorSize; - NGTThrowException(msg); + NGTThrowException(msg); } return *(begin(allocator) + idx); } @@ -1192,7 +1192,7 @@ namespace NGT { vectorSize = s; } - void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { + void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { unsigned int s = size(); os << s << " "; for (unsigned int i = 0; i < s; i++) { @@ -1200,7 +1200,7 @@ namespace NGT { os << " "; } } - void deserializeAsText(std::istream &is, ObjectSpace *objectspace = 0) { + void deserializeAsText(std::istream &is, ObjectSpace *objectspace = 0) { clear(); size_t s; Serializer::readAsText(is, s); @@ -1234,10 +1234,10 @@ namespace NGT { uint32_t vectorSize; uint32_t allocatedSize; }; -#endif +#endif #ifdef NGT_SHARED_MEMORY_ALLOCATOR - template + template class DynamicLengthVector { public: typedef TYPE * iterator; @@ -1257,8 +1257,8 @@ namespace NGT { TYPE &front(SharedMemoryAllocator &allocator) { return (*this).at(0, allocator); } TYPE &back(SharedMemoryAllocator &allocator) { return (*this).at(vectorSize - 1, allocator); } bool empty() { return vectorSize == 0; } - iterator begin(SharedMemoryAllocator &allocator) { - return (TYPE*)allocator.getAddr((off_t)vector); + iterator begin(SharedMemoryAllocator &allocator) { + return (TYPE*)allocator.getAddr((off_t)vector); } iterator end(SharedMemoryAllocator &allocator) { return begin(allocator) + vectorSize; @@ -1273,7 +1273,7 @@ namespace NGT { if (idx >= vectorSize) { std::stringstream msg; msg << "Vector: beyond the range. " << idx << ":" << vectorSize; - NGTThrowException(msg); + NGTThrowException(msg); } return *reinterpret_cast(reinterpret_cast(begin(allocator)) + idx * elementSize); } @@ -1361,7 +1361,7 @@ namespace NGT { vectorSize = s; } - void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { + void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { unsigned int s = size(); os << s << " "; for (unsigned int i = 0; i < s; i++) { @@ -1371,7 +1371,7 @@ namespace NGT { } - void deserializeAsText(std::istream &is, ObjectSpace *objectspace = 0) { + void deserializeAsText(std::istream &is, ObjectSpace *objectspace = 0) { clear(); size_t s; Serializer::readAsText(is, s); @@ -1411,7 +1411,7 @@ namespace NGT { #else // NGT_SHARED_MEMORY_ALLOCATOR - template + template class DynamicLengthVector { public: typedef TYPE * iterator; @@ -1431,8 +1431,8 @@ namespace NGT { TYPE &front() { return (*this).at(0); } TYPE &back() { return (*this).at(vectorSize - 1); } bool empty() { return vectorSize == 0; } - iterator begin() { - return reinterpret_cast(vector); + iterator begin() { + return reinterpret_cast(vector); } iterator end(SharedMemoryAllocator &allocator) { return begin() + vectorSize; @@ -1447,7 +1447,7 @@ namespace NGT { if (idx >= vectorSize) { std::stringstream msg; msg << "Vector: beyond the range. " << idx << ":" << vectorSize; - NGTThrowException(msg); + NGTThrowException(msg); } return *reinterpret_cast(reinterpret_cast(begin()) + idx * elementSize); } @@ -1539,7 +1539,7 @@ namespace NGT { vectorSize = s; } - void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { + void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { unsigned int s = size(); os << s << " "; for (unsigned int i = 0; i < s; i++) { @@ -1549,7 +1549,7 @@ namespace NGT { } - void deserializeAsText(std::istream &is, ObjectSpace *objectspace = 0) { + void deserializeAsText(std::istream &is, ObjectSpace *objectspace = 0) { clear(); size_t s; Serializer::readAsText(is, s); @@ -1562,7 +1562,7 @@ namespace NGT { void serialize(std::ofstream &os, NGT::ObjectSpace *objspace = 0) { uint32_t sz = size(); - NGT::Serializer::write(os, sz); + NGT::Serializer::write(os, sz); os.write(reinterpret_cast(vector), size() * elementSize); } @@ -1726,14 +1726,14 @@ namespace NGT { resize(idx + 1); } if ((*this)[idx] != 0) { - NGTThrowException("put: Not empty"); + NGTThrowException("put: Not empty"); } set(idx, n); } void erase(size_t idx) { if (isEmpty(idx)) { - NGTThrowException("erase: Not in-memory or invalid id"); + NGTThrowException("erase: Not in-memory or invalid id"); } (*this)[idx]->~TYPE(); allocator.free((*this)[idx]); @@ -1757,7 +1757,7 @@ namespace NGT { } void serialize(std::ofstream &os, ObjectSpace *objectspace = 0) { - NGT::Serializer::write(os, array->size()); + NGT::Serializer::write(os, array->size()); for (size_t idx = 0; idx < array->size(); idx++) { if ((*this)[idx] == 0) { NGT::Serializer::write(os, '-'); @@ -1916,8 +1916,8 @@ namespace NGT { TYPE *at(size_t idx) { return (TYPE*)allocator.getAddr(array->at(idx, allocator)); } - void push_back(TYPE *data) { - array->push_back(allocator.getOffset(data), allocator); + void push_back(TYPE *data) { + array->push_back(allocator.getOffset(data), allocator); } void reserve(size_t s) { array->reserve(s, allocator); } void resize(size_t s) { array->resize(s, allocator, (off_t)0); } @@ -1980,14 +1980,14 @@ namespace NGT { std::vector::resize(idx + 1, 0); } if ((*this)[idx] != 0) { - NGTThrowException("put: Not empty"); + NGTThrowException("put: Not empty"); } (*this)[idx] = n; } void erase(size_t idx) { if (isEmpty(idx)) { - NGTThrowException("erase: Not in-memory or invalid id"); + NGTThrowException("erase: Not in-memory or invalid id"); } delete (*this)[idx]; (*this)[idx] = 0; @@ -2017,7 +2017,7 @@ namespace NGT { if (!os.is_open()) { NGTThrowException("NGT::Common: Not open the specified stream yet."); } - NGT::Serializer::write(os, std::vector::size()); + NGT::Serializer::write(os, std::vector::size()); for (size_t idx = 0; idx < std::vector::size(); idx++) { if ((*this)[idx] == 0) { NGT::Serializer::write(os, '-'); @@ -2262,7 +2262,7 @@ namespace NGT { distanceComputationCount = sc.distanceComputationCount; edgeSize = sc.edgeSize; workingResult = sc.workingResult; - useAllNodesInLeaf = sc.useAllNodesInLeaf; + useAllNodesInLeaf = sc.useAllNodesInLeaf; expectedAccuracy = sc.expectedAccuracy; visitCount = sc.visitCount; return *this; diff --git a/lib/NGT/Graph.cpp b/lib/NGT/Graph.cpp index 96eab3d..6268f3c 100644 --- a/lib/NGT/Graph.cpp +++ b/lib/NGT/Graph.cpp @@ -24,7 +24,7 @@ using namespace std; using namespace NGT; -void +void NeighborhoodGraph::Property::set(NGT::Property &prop) { if (prop.truncationThreshold != -1) truncationThreshold = prop.truncationThreshold; if (prop.edgeSizeForCreation != -1) edgeSizeForCreation = prop.edgeSizeForCreation; @@ -43,7 +43,7 @@ NeighborhoodGraph::Property::set(NGT::Property &prop) { if (prop.graphType != GraphTypeNone) graphType = prop.graphType; } -void +void NeighborhoodGraph::Property::get(NGT::Property &prop) { prop.truncationThreshold = truncationThreshold; prop.edgeSizeForCreation = edgeSizeForCreation; @@ -64,298 +64,298 @@ NeighborhoodGraph::Property::get(NGT::Property &prop) { #ifdef NGT_GRAPH_READ_ONLY_GRAPH -void +void NeighborhoodGraph::Search::normalizedCosineSimilarityFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::cosineSimilarityFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::normalizedAngleFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::angleFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l1Float(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l2Float(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::normalizedL2Float(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::sparseJaccardFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } // added by Nyapicom -void +void NeighborhoodGraph::Search::poincareFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } // added by Nyapicom -void +void NeighborhoodGraph::Search::lorentzFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l1Uint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l2Uint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::hammingUint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::jaccardUint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } #ifdef NGT_HALF_FLOAT -void +void NeighborhoodGraph::Search::normalizedCosineSimilarityFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::cosineSimilarityFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::normalizedAngleFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::angleFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l1Float16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l2Float16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::normalizedL2Float16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::sparseJaccardFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } // added by Nyapicom -void +void NeighborhoodGraph::Search::poincareFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } // added by Nyapicom -void +void NeighborhoodGraph::Search::lorentzFloat16(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -#endif +#endif //// -void +void NeighborhoodGraph::Search::normalizedCosineSimilarityFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::cosineSimilarityFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::normalizedAngleFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::angleFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l1FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l2FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::normalizedL2FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::sparseJaccardFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::poincareFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::lorentzFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l1Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l2Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::hammingUint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::jaccardUint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } #ifdef NGT_HALF_FLOAT -void +void NeighborhoodGraph::Search::normalizedCosineSimilarityFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::cosineSimilarityFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::normalizedAngleFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::angleFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l1Float16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::l2Float16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::normalizedL2Float16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::sparseJaccardFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::poincareFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); } -void +void NeighborhoodGraph::Search::lorentzFloat16ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) { graph.searchReadOnlyGraph(sc, seeds); @@ -364,7 +364,7 @@ NeighborhoodGraph::Search::lorentzFloat16ForLargeDataset(NeighborhoodGraph &grap #endif -void +void NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds) { ObjectRepository &objectRepository = getObjectRepository(); @@ -401,7 +401,7 @@ NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &see cerr << "setupseeds:warning! unavailable object:" << seeds[i].id << "." << endl; continue; } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) seeds[i].distance = comparator(sc.object, *objectRepository.get(seeds[i].id)); #else seeds[i].distance = comparator(sc.object, *objects[seeds[i].id]); @@ -449,7 +449,7 @@ NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &see cerr << "setupseeds:warning! unavailable object:" << seeds[i].id << "." << endl; continue; } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) seeds[i].distance = comparator(static_cast(&sc.object[0]), static_cast(objectRepository.get(seeds[i].id)), dimension); #else seeds[i].distance = comparator(&sc.object[0], &(*objects[seeds[i].id])[0], dimension); @@ -463,8 +463,8 @@ NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &see } -void -NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, +void +NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, UncheckedSet &unchecked, DistanceCheckedSet &distanceChecked) { std::sort(seeds.begin(), seeds.end()); @@ -492,8 +492,8 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, } #if !defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET) -void -NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, +void +NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, UncheckedSet &unchecked, DistanceCheckedSetForLargeDataset &distanceChecked) { std::sort(seeds.begin(), seeds.end()); @@ -579,7 +579,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, } } for (size_t idx = 0; idx < nsPtrsSize; idx++) { - neighborptr = nsPtrs[idx]; + neighborptr = nsPtrs[idx]; if (idx + prefetchOffset < nsPtrsSize) { unsigned char *ptr = reinterpret_cast((*(nsPtrs[idx + prefetchOffset])).second); MemoryCache::prefetch(ptr, prefetchSize); @@ -588,7 +588,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, #ifdef NGT_DISTANCE_COMPUTATION_COUNT sc.distanceComputationCount++; #endif - Distance distance = COMPARATOR::compare((void*)&sc.object[0], + Distance distance = COMPARATOR::compare((void*)&sc.object[0], (void*)&(*static_cast(neighborptr->second))[0], dimension); if (distance <= explorationRadius) { result.set(neighborptr->first, distance); @@ -599,13 +599,13 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, results.pop(); sc.radius = results.top().distance; explorationRadius = sc.explorationCoefficient * sc.radius; - } - } - } - } - } + } + } + } + } + } - if (sc.resultIsAvailable()) { + if (sc.resultIsAvailable()) { ObjectDistances &qresults = sc.getResult(); qresults.moveFrom(results); } else { @@ -635,7 +635,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, DistanceCheckedSet distanceChecked(repository.size()); #elif defined(NGT_GRAPH_CHECK_VECTOR) DistanceCheckedSet distanceChecked(repository.size()); -#else +#else DistanceCheckedSet distanceChecked; #endif @@ -736,7 +736,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, explorationRadius = sc.explorationCoefficient * sc.radius; } } - } + } #ifdef NGT_GRAPH_BETTER_FIRST_RESTORE if ((distance < target.distance) && (distance <= explorationRadius) && ((neighborptr + 2) < neighborendptr)) { target.position = position + 1; @@ -744,11 +744,11 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, break; } #endif - } - } + } + } - } - if (sc.resultIsAvailable()) { + } + if (sc.resultIsAvailable()) { ObjectDistances &qresults = sc.getResult(); qresults.clear(); qresults.moveFrom(results); @@ -967,7 +967,7 @@ NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, objtbl[minj] = tmpf; nodetbl[minj] = tmprs; } - } + } } catch(Exception &err) { stringstream msg; @@ -1008,7 +1008,7 @@ class TruncationSearchJob { class TruncationSearchSharedData { public: - TruncationSearchSharedData(NGT::NeighborhoodGraph &g, NGT::ObjectID id, size_t size, NGT::Distance lr) : + TruncationSearchSharedData(NGT::NeighborhoodGraph &g, NGT::ObjectID id, size_t size, NGT::Distance lr) : graphIndex(g), targetID(id), resultSize(size), explorationCoefficient(lr) {} NGT::NeighborhoodGraph &graphIndex; NGT::ObjectID targetID; @@ -1228,8 +1228,8 @@ NeighborhoodGraph::truncateEdgesOptimally( output.pop_front(); } - } - } + } + } int cnt = 0; for (size_t i = 0; i < delNodes.size(); i++) { diff --git a/lib/NGT/Graph.h b/lib/NGT/Graph.h index 02edec6..291b7a5 100644 --- a/lib/NGT/Graph.h +++ b/lib/NGT/Graph.h @@ -153,7 +153,7 @@ namespace NGT { Serializer::write(os, *prevsize); } void deserialize(std::ifstream &is) { - VECTOR::deserialize(is); + VECTOR::deserialize(is); Serializer::read(is, *prevsize); } void show() { @@ -290,7 +290,7 @@ namespace NGT { static void (*getMethod(NGT::ObjectSpace::DistanceType dtype, NGT::ObjectSpace::ObjectType otype, size_t size))(NGT::NeighborhoodGraph&, NGT::SearchContainer&, NGT::ObjectDistances&) { if (size < 5000000) { switch (otype) { - case NGT::ObjectSpace::Float: + case NGT::ObjectSpace::Float: switch (dtype) { case NGT::ObjectSpace::DistanceTypeNormalizedCosine : return normalizedCosineSimilarityFloat; case NGT::ObjectSpace::DistanceTypeCosine : return cosineSimilarityFloat; @@ -338,7 +338,7 @@ namespace NGT { return l1Uint8; } else { switch (otype) { - case NGT::ObjectSpace::Float: + case NGT::ObjectSpace::Float: switch (dtype) { case NGT::ObjectSpace::DistanceTypeNormalizedCosine : return normalizedCosineSimilarityFloatForLargeDataset; case NGT::ObjectSpace::DistanceTypeCosine : return cosineSimilarityFloatForLargeDataset; @@ -599,13 +599,13 @@ namespace NGT { void insertNode(ObjectID id, ObjectDistances &objects) { switch (property.graphType) { case GraphTypeANNG: - insertANNGNode(id, objects); + insertANNGNode(id, objects); break; case GraphTypeIANNG: - insertIANNGNode(id, objects); + insertIANNGNode(id, objects); break; case GraphTypeONNG: - insertONNGNode(id, objects); + insertONNGNode(id, objects); break; case GraphTypeKNNG: insertKNNGNode(id, objects); @@ -707,7 +707,7 @@ namespace NGT { if (count >= property.incomingEdge) { break; } - addEdge((*ri).id, id, (*ri).distance); + addEdge((*ri).id, id, (*ri).distance); } if (static_cast(results.size()) > property.outgoingEdge) { results.resize(property.outgoingEdge); @@ -861,11 +861,11 @@ namespace NGT { void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds); void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, double (&comparator)(const void*, const void*, size_t)); - void setupSeeds(SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, + void setupSeeds(SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, UncheckedSet &unchecked, DistanceCheckedSet &distanceChecked); #if !defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET) - void setupSeeds(SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, + void setupSeeds(SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, UncheckedSet &unchecked, DistanceCheckedSetForLargeDataset &distanceChecked); #endif @@ -945,7 +945,7 @@ namespace NGT { msg << " Cannot add the edge. " << target << "->" << addID << ". " << err.what(); NGTThrowException(msg); } - if ((size_t)property.truncationThreshold != 0 && node.size() - minsize > + if ((size_t)property.truncationThreshold != 0 && node.size() - minsize > (size_t)property.truncationThreshold) { return true; } @@ -959,7 +959,7 @@ namespace NGT { if (node.size() > kEdge && node.at(kEdge, repository.allocator).distance >= addDistance) { GraphNode &linkedNode = *getNode(node.at(kEdge, repository.allocator).id); ObjectDistance linkedNodeEdge(target, node.at(kEdge, repository.allocator).distance); - if ((linkedNode.size() > kEdge) && node.at(kEdge, repository.allocator).distance >= + if ((linkedNode.size() > kEdge) && node.at(kEdge, repository.allocator).distance >= linkedNode.at(kEdge, repository.allocator).distance) { #else if (node.size() > kEdge && node[kEdge].distance >= addDistance) { @@ -1015,7 +1015,7 @@ namespace NGT { #ifdef NGT_GRAPH_READ_ONLY_GRAPH SearchGraphRepository searchRepository; -#endif +#endif NeighborhoodGraph::Property property; diff --git a/lib/NGT/GraphOptimizer.h b/lib/NGT/GraphOptimizer.h index 046e0a4..ce39d64 100644 --- a/lib/NGT/GraphOptimizer.h +++ b/lib/NGT/GraphOptimizer.h @@ -97,7 +97,7 @@ namespace NGT { } catch(NGT::Exception &err) { std::stringstream msg; msg << "Optimizer::adjustSearchCoefficients: Cannot adjust the search coefficients. " << err.what(); - NGTThrowException(msg); + NGTThrowException(msg); } graph.saveIndex(indexPath); } @@ -150,7 +150,7 @@ namespace NGT { return timer.time * 1000.0; } - static std::pair searchMinimumQueryTime(NGT::Index &index, size_t prefetchOffset, + static std::pair searchMinimumQueryTime(NGT::Index &index, size_t prefetchOffset, int maxPrefetchSize, size_t seedID) { NGT::ObjectSpace &objectSpace = index.getObjectSpace(); int step = 256; @@ -192,7 +192,7 @@ namespace NGT { std::vector> mins; NGT::ObjectSpace &objectSpace = index.getObjectSpace(); int maxSize = objectSpace.getByteSizeOfObject() * 4; - maxSize = maxSize < 64 * 28 ? maxSize : 64 * 28; + maxSize = maxSize < 64 * 28 ? maxSize : 64 * 28; for (int trial = 0; trial < 10; trial++) { size_t minps = 0; size_t minpo = 0; @@ -326,7 +326,7 @@ namespace NGT { } catch(NGT::Exception &err) { std::stringstream msg; msg << "Optimizer::execute: Cannot adjust the search coefficients. " << err.what(); - NGTThrowException(msg); + NGTThrowException(msg); } } @@ -404,7 +404,7 @@ namespace NGT { { std::vector graph; NGT::GraphReconstructor::extractGraph(graph, static_cast(index.getIndex())); - float epsilon = 0.0; + float epsilon = 0.0; for (size_t edgeSize = 5; edgeSize <= maxNoOfEdges; edgeSize += (edgeSize >= 10 ? 10 : 5) ) { NGT::GraphReconstructor::reconstructANNGFromANNG(graph, index, edgeSize); NGT::Command::SearchParameters searchParameters; @@ -461,7 +461,7 @@ namespace NGT { prop.edgeSizeForCreation = parameter.maxNoOfEdges; std::vector>> transition; size_t targetNo = 12500; - for (;targetNo <= objectRepository.size() && targetNo <= parameter.noOfSampleObjects; + for (;targetNo <= objectRepository.size() && targetNo <= parameter.noOfSampleObjects; targetNo *= 2) { ObjectID id = 0; size_t noOfObjects = 0; @@ -502,7 +502,7 @@ namespace NGT { if (estimatedEdge == 0) { std::stringstream msg; - msg << "Optimizer::optimizeNumberOfEdgesForANNG: Cannot optimize the number of edges. " + msg << "Optimizer::optimizeNumberOfEdgesForANNG: Cannot optimize the number of edges. " << estimatedEdge << ":" << estimatedAccuracy << " # of objects=" << objectRepository.size(); NGTThrowException(msg); } @@ -593,7 +593,7 @@ namespace NGT { } // obsolete because of a lack of a parameter - void set(int outgoing, int incoming, int nofqs, + void set(int outgoing, int incoming, int nofqs, float baseAccuracyFrom, float baseAccuracyTo, float rateAccuracyFrom, float rateAccuracyTo, double gte, double m @@ -627,7 +627,7 @@ namespace NGT { } } - void setProcessingModes(bool shortcut = true, bool searchParameter = true, bool prefetchParameter = true, + void setProcessingModes(bool shortcut = true, bool searchParameter = true, bool prefetchParameter = true, bool accuracyTable = true) { shortcutReduction = shortcut; searchParameterOptimization = searchParameter; diff --git a/lib/NGT/GraphReconstructor.h b/lib/NGT/GraphReconstructor.h index b5abcaa..b937ad1 100644 --- a/lib/NGT/GraphReconstructor.h +++ b/lib/NGT/GraphReconstructor.h @@ -62,7 +62,7 @@ class GraphReconstructor { - static void + static void adjustPaths(NGT::Index &outIndex) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) @@ -70,7 +70,7 @@ class GraphReconstructor { exit(1); #else NGT::GraphIndex &outGraph = dynamic_cast(outIndex.getIndex()); - size_t rStartRank = 0; + size_t rStartRank = 0; std::list > tmpGraph; for (size_t id = 1; id < outGraph.repository.size(); id++) { NGT::GraphNode &node = *outGraph.getNode(id); @@ -94,7 +94,7 @@ class GraphReconstructor { edge = true; if (rank >= 1 && node[rank - 1].distance > node[rank].distance) { std::cerr << "distance order is wrong!" << std::endl; - std::cerr << id << ":" << rank << ":" << node[rank - 1].id << ":" << node[rank].id << std::endl; + std::cerr << id << ":" << rank << ":" << node[rank - 1].id << ":" << node[rank].id << std::endl; } NGT::GraphNode &tn = *outGraph.getNode(id); volatile bool found = false; @@ -109,8 +109,8 @@ class GraphReconstructor { found = true; break; } - } - } + } + } } else { #ifdef _OPENMP #pragma omp parallel for num_threads(10) @@ -127,9 +127,9 @@ class GraphReconstructor { if ((dstNode[dni].id == node[rank].id) && (dstNode[dni].distance < node[rank].distance)) { found = true; } - } - } - } + } + } + } if (!found) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) outGraph.addEdge(id, node.at(i, outGraph.repository.allocator).id, @@ -146,15 +146,15 @@ class GraphReconstructor { continue; } it++; - } + } if (edge == false) { break; } - } + } #endif // NGT_SHARED_MEMORY_ALLOCATOR } - static void + static void adjustPathsEffectively(NGT::Index &outIndex, size_t minNoOfEdges = 0) { NGT::GraphIndex &outGraph = dynamic_cast(outIndex.getIndex()); @@ -172,7 +172,7 @@ class GraphReconstructor { node.insert(ni, edge, graph.repository.allocator); } - static bool hasEdge(NGT::GraphIndex &graph, size_t srcNodeID, size_t dstNodeID) + static bool hasEdge(NGT::GraphIndex &graph, size_t srcNodeID, size_t dstNodeID) { NGT::GraphNode &srcNode = *graph.getNode(srcNodeID); GraphNode::iterator ni = std::lower_bound(srcNode.begin(graph.repository.allocator), srcNode.end(graph.repository.allocator), ObjectDistance(dstNodeID, 0.0), edgeComp); @@ -185,7 +185,7 @@ class GraphReconstructor { node.insert(ni, edge); } - static bool hasEdge(NGT::GraphIndex &graph, size_t srcNodeID, size_t dstNodeID) + static bool hasEdge(NGT::GraphIndex &graph, size_t srcNodeID, size_t dstNodeID) { NGT::GraphNode &srcNode = *graph.getNode(srcNodeID); GraphNode::iterator ni = std::lower_bound(srcNode.begin(), srcNode.end(), ObjectDistance(dstNodeID, 0.0), edgeComp); @@ -194,9 +194,9 @@ class GraphReconstructor { #endif - static void + static void adjustPathsEffectively(NGT::GraphIndex &outGraph, - size_t minNoOfEdges) + size_t minNoOfEdges) { Timer timer; timer.start(); @@ -248,8 +248,8 @@ class GraphReconstructor { #endif } - std::vector > > candidates; - for (size_t sni = 0; sni < srcNode.size(); sni++) { + std::vector > > candidates; + for (size_t sni = 0; sni < srcNode.size(); sni++) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) NGT::GraphNode &pathNode = tmpGraph[srcNode.at(sni, outGraph.repository.allocator).id - 1]; #else @@ -263,20 +263,20 @@ class GraphReconstructor { #endif auto dstNode = neighbors.find(dstNodeID); #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (dstNode != neighbors.end() - && srcNode.at(sni, outGraph.repository.allocator).distance < (*dstNode).second.second - && pathNode.at(pni, outGraph.repository.allocator).distance < (*dstNode).second.second + if (dstNode != neighbors.end() + && srcNode.at(sni, outGraph.repository.allocator).distance < (*dstNode).second.second + && pathNode.at(pni, outGraph.repository.allocator).distance < (*dstNode).second.second ) { #else - if (dstNode != neighbors.end() - && srcNode[sni].distance < (*dstNode).second.second - && pathNode[pni].distance < (*dstNode).second.second + if (dstNode != neighbors.end() + && srcNode[sni].distance < (*dstNode).second.second + && pathNode[pni].distance < (*dstNode).second.second ) { #endif #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - candidates.push_back(std::pair >((*dstNode).second.first, std::pair(srcNode.at(sni, outGraph.repository.allocator).id, dstNodeID))); + candidates.push_back(std::pair >((*dstNode).second.first, std::pair(srcNode.at(sni, outGraph.repository.allocator).id, dstNodeID))); #else - candidates.push_back(std::pair >((*dstNode).second.first, std::pair(srcNode[sni].id, dstNodeID))); + candidates.push_back(std::pair >((*dstNode).second.first, std::pair(srcNode[sni].id, dstNodeID))); #endif removeCandidateCount++; } @@ -386,7 +386,7 @@ class GraphReconstructor { } - static + static void convertToANNG(std::vector &graph) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) @@ -422,8 +422,8 @@ class GraphReconstructor { #endif } - static - void reconstructGraph(std::vector &graph, NGT::GraphIndex &outGraph, size_t originalEdgeSize, size_t reverseEdgeSize) + static + void reconstructGraph(std::vector &graph, NGT::GraphIndex &outGraph, size_t originalEdgeSize, size_t reverseEdgeSize) { if (reverseEdgeSize > 10000) { std::cerr << "something wrong. Edge size=" << reverseEdgeSize << std::endl; @@ -506,13 +506,13 @@ class GraphReconstructor { std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; continue; } - } - reverseEdgeTimer.stop(); + } + reverseEdgeTimer.stop(); if (insufficientNodeCount != 0) { std::cerr << "# of the nodes edges of which are in short = " << insufficientNodeCount << std::endl; } - normalizeEdgeTimer.start(); + normalizeEdgeTimer.start(); for (size_t id = 1; id < outGraph.repository.size(); id++) { try { NGT::GraphNode &n = *outGraph.getNode(id); @@ -551,7 +551,7 @@ class GraphReconstructor { } } normalizeEdgeTimer.stop(); - std::cerr << "Reconstruction time=" << originalEdgeTimer.time << ":" << reverseEdgeTimer.time + std::cerr << "Reconstruction time=" << originalEdgeTimer.time << ":" << reverseEdgeTimer.time << ":" << normalizeEdgeTimer.time << std::endl; NGT::Property prop; @@ -562,15 +562,15 @@ class GraphReconstructor { - static - void reconstructGraphWithConstraint(std::vector &graph, NGT::GraphIndex &outGraph, + static + void reconstructGraphWithConstraint(std::vector &graph, NGT::GraphIndex &outGraph, size_t originalEdgeSize, size_t reverseEdgeSize, - char mode = 'a') + char mode = 'a') { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) std::cerr << "reconstructGraphWithConstraint is not implemented." << std::endl; abort(); -#else +#else NGT::Timer originalEdgeTimer, reverseEdgeTimer, normalizeEdgeTimer; @@ -625,7 +625,7 @@ class GraphReconstructor { std::vector indegreeCount(graph.size(), 0); size_t zeroCount = 0; for (size_t sizerank = 0; sizerank <= reverseSize.size(); sizerank++) { - + if (reverseSize[sizerank].first == 0) { zeroCount++; continue; @@ -645,11 +645,11 @@ class GraphReconstructor { indegreeCount[(*rni).id]++; } } - reverseEdgeTimer.stop(); + reverseEdgeTimer.stop(); std::cerr << "The number of nodes with zero outdegree by reverse edges=" << zeroCount << std::endl; NGT::GraphIndex::showStatisticsOfGraph(outGraph); - normalizeEdgeTimer.start(); + normalizeEdgeTimer.start(); for (size_t id = 1; id < outGraph.repository.size(); id++) { try { NGT::GraphNode &n = *outGraph.getNode(id); @@ -708,7 +708,7 @@ class GraphReconstructor { originalEdgeTimer.stop(); NGT::GraphIndex::showStatisticsOfGraph(outGraph); - std::cerr << "Reconstruction time=" << originalEdgeTimer.time << ":" << reverseEdgeTimer.time + std::cerr << "Reconstruction time=" << originalEdgeTimer.time << ":" << reverseEdgeTimer.time << ":" << normalizeEdgeTimer.time << std::endl; #endif @@ -717,13 +717,13 @@ class GraphReconstructor { // reconstruct a pseudo ANNG with a fewer edges from an actual ANNG with more edges. // graph is a source ANNG // index is an index with a reconstructed ANNG - static - void reconstructANNGFromANNG(std::vector &graph, NGT::Index &index, size_t edgeSize) + static + void reconstructANNGFromANNG(std::vector &graph, NGT::Index &index, size_t edgeSize) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) std::cerr << "reconstructANNGFromANNG is not implemented." << std::endl; abort(); -#else +#else NGT::GraphIndex &outGraph = dynamic_cast(index.getIndex()); @@ -777,7 +777,7 @@ class GraphReconstructor { } } catch(NGT::Exception &err) { } - } + } for (size_t id = 1; id < outGraph.repository.size(); id++) { try { diff --git a/lib/NGT/HashBasedBooleanSet.h b/lib/NGT/HashBasedBooleanSet.h index f015058..2495824 100644 --- a/lib/NGT/HashBasedBooleanSet.h +++ b/lib/NGT/HashBasedBooleanSet.h @@ -75,7 +75,7 @@ class HashBasedBooleanSet{ if (v == 0){ return false; } - if (_stlHash.count(num) <= 0) { + if (_stlHash.count(num) <= 0) { return false; } return true; diff --git a/lib/NGT/Index.cpp b/lib/NGT/Index.cpp index 8fdbdcd..76002f8 100644 --- a/lib/NGT/Index.cpp +++ b/lib/NGT/Index.cpp @@ -26,8 +26,8 @@ using namespace std; using namespace NGT; -void -Index::version(ostream &os) +void +Index::version(ostream &os) { os << "libngt:" << endl; Version::get(os); @@ -84,12 +84,12 @@ NGT::Index::Index(NGT::Property &prop):redirect(false) { } #endif -float -NGT::Index::getEpsilonFromExpectedAccuracy(double accuracy) { +float +NGT::Index::getEpsilonFromExpectedAccuracy(double accuracy) { return static_cast(getIndex()).getEpsilonFromExpectedAccuracy(accuracy); } -void +void NGT::Index::open(const string &database, bool rdOnly, bool graphDisabled) { NGT::Property prop; prop.load(database); @@ -114,7 +114,7 @@ NGT::Index::open(const string &database, bool rdOnly, bool graphDisabled) { path = database; } -void +void NGT::Index::createGraphAndTree(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize, bool redirect) { if (prop.dimension == 0) { @@ -142,7 +142,7 @@ NGT::Index::createGraphAndTree(const string &database, NGT::Property &prop, cons redirector.end(); } -void +void NGT::Index::createGraph(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize, bool redirect) { if (prop.dimension == 0) { NGTThrowException("Index::createGraphAndTree. Dimension is not specified."); @@ -169,7 +169,7 @@ NGT::Index::createGraph(const string &database, NGT::Property &prop, const strin redirector.end(); } -void +void NGT::Index::loadAndCreateIndex(Index &index, const string &database, const string &dataFile, size_t threadSize, size_t dataSize) { NGT::Timer timer; timer.start(); @@ -193,7 +193,7 @@ NGT::Index::loadAndCreateIndex(Index &index, const string &database, const strin cerr << "Index creation time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; } -void +void NGT::Index::append(const string &database, const string &dataFile, size_t threadSize, size_t dataSize) { NGT::Index index(database); NGT::Timer timer; @@ -213,7 +213,7 @@ NGT::Index::append(const string &database, const string &dataFile, size_t thread return; } -void +void NGT::Index::append(const string &database, const float *data, size_t dataSize, size_t threadSize) { NGT::Index index(database); NGT::Timer timer; @@ -235,7 +235,7 @@ NGT::Index::append(const string &database, const float *data, size_t dataSize, s return; } -void +void NGT::Index::remove(const string &database, vector &objects, bool force) { NGT::Index index(database); NGT::Timer timer; @@ -255,7 +255,7 @@ NGT::Index::remove(const string &database, vector &objects, bool force return; } -void +void NGT::Index::importIndex(const string &database, const string &file) { Index *idx = 0; NGT::Property property; @@ -293,7 +293,7 @@ NGT::Index::importIndex(const string &database, const string &file) { delete idx; } -void +void NGT::Index::exportIndex(const string &database, const string &file) { NGT::Index idx(database); NGT::Timer timer; @@ -323,7 +323,7 @@ NGT::Index::makeSparseObject(std::vector &object) return obj; } -void +void NGT::Index::Property::set(NGT::Property &prop) { if (prop.dimension != -1) dimension = prop.dimension; if (prop.threadPoolSize != -1) threadPoolSize = prop.threadPoolSize; @@ -343,7 +343,7 @@ NGT::Index::Property::set(NGT::Property &prop) { if (prop.accuracyTable != "") accuracyTable = prop.accuracyTable; } -void +void NGT::Index::Property::get(NGT::Property &prop) { prop.dimension = dimension; prop.threadPoolSize = threadPoolSize; @@ -426,7 +426,7 @@ CreateIndexThread::run() { stringstream msg; msg << "CreateIndex::search:Fatal error! ID=" << job.id << " " << err.what(); NGTThrowException(msg); - } + } job.results = rs; poolThread.getOutputJobQueue().pushBack(job); } @@ -486,7 +486,7 @@ class BuildTimeController { NeighborhoodGraph::Property &property; }; -void +void NGT::GraphIndex::constructObjectSpace(NGT::Property &prop) { assert(prop.dimension != 0); size_t dimension = prop.dimension; @@ -509,7 +509,7 @@ NGT::GraphIndex::constructObjectSpace(NGT::Property &prop) { default: stringstream msg; msg << "Invalid Object Type in the property. " << prop.objectType; - NGTThrowException(msg); + NGTThrowException(msg); } } @@ -519,7 +519,7 @@ NGT::GraphIndex::loadGraph(const string &ifile, NGT::GraphRepository &graph) { graph.deserialize(isg); } -void +void NGT::GraphIndex::loadIndex(const string &ifile, bool readOnly, bool graphDisabled) { objectSpace->deserialize(ifile + "/obj"); if (graphDisabled) { @@ -536,12 +536,12 @@ NGT::GraphIndex::loadIndex(const string &ifile, bool readOnly, bool graphDisable #endif } -void +void NGT::GraphIndex::saveProperty(const std::string &file) { NGT::Property::save(*this, file); } -void +void NGT::GraphIndex::exportProperty(const std::string &file) { NGT::Property::exportProperty(*this, file); } @@ -564,8 +564,8 @@ NGT::GraphAndTreeIndex::GraphAndTreeIndex(const string &allocator, NGT::Property initialize(allocator, prop.treeSharedMemorySize); } -void -GraphAndTreeIndex::createTreeIndex() +void +GraphAndTreeIndex::createTreeIndex() { ObjectRepository &fr = GraphIndex::objectSpace->getRepository(); for (size_t id = 0; id < fr.size(); id++){ @@ -593,7 +593,7 @@ GraphAndTreeIndex::createTreeIndex() } } -void +void NGT::GraphIndex::initialize(const string &allocator, NGT::Property &prop) { constructObjectSpace(prop); repository.open(allocator + "/grp", prop.graphSharedMemorySize); @@ -646,9 +646,9 @@ GraphIndex::createIndex() } static size_t -searchMultipleQueryForCreation(GraphIndex &neighborhoodGraph, - NGT::ObjectID &id, - CreateIndexJob &job, +searchMultipleQueryForCreation(GraphIndex &neighborhoodGraph, + NGT::ObjectID &id, + CreateIndexJob &job, CreateIndexThreadPool &threads, size_t sizeOfRepository) { @@ -685,8 +685,8 @@ searchMultipleQueryForCreation(GraphIndex &neighborhoodGraph, } static void -insertMultipleSearchResults(GraphIndex &neighborhoodGraph, - CreateIndexThreadPool::OutputJobQueue &output, +insertMultipleSearchResults(GraphIndex &neighborhoodGraph, + CreateIndexThreadPool::OutputJobQueue &output, size_t dataSize) { // compute distances among all of the resultant objects @@ -712,7 +712,7 @@ insertMultipleSearchResults(GraphIndex &neighborhoodGraph, r.id = output[idxj].id; objs.push_back(r); } - // sort and cut excess edges + // sort and cut excess edges std::sort(objs.begin(), objs.end()); if (objs.size() > size) { objs.resize(size); @@ -741,8 +741,8 @@ insertMultipleSearchResults(GraphIndex &neighborhoodGraph, } } -void -GraphIndex::createIndex(size_t threadPoolSize, size_t sizeOfRepository) +void +GraphIndex::createIndex(size_t threadPoolSize, size_t sizeOfRepository) { if (NeighborhoodGraph::property.edgeSizeForCreation == 0) { return; @@ -820,7 +820,7 @@ void GraphIndex::setupPrefetch(NGT::Property &prop) { prop.prefetchSize = GraphIndex::objectSpace->setPrefetchSize(prop.prefetchSize); } -bool +bool NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, size_t edgeSize) { long double distance = 0.0; @@ -915,7 +915,7 @@ NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, siz continue; } NGT::GraphNode &node = *n; - for (size_t i = 0; i < node.size(); i++) { + for (size_t i = 0; i < node.size(); i++) { NGT::GraphNode *nn = 0; try { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) @@ -926,16 +926,16 @@ NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, siz } catch(NGT::Exception &err) { count++; #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "Directed edge! " << id << "->" << node.at(i, graph.allocator).id << " no object. " - << node.at(i, graph.allocator).id << std::endl; + std::cerr << "Directed edge! " << id << "->" << node.at(i, graph.allocator).id << " no object. " + << node.at(i, graph.allocator).id << std::endl; #else - std::cerr << "Directed edge! " << id << "->" << node[i].id << " no object. " << node[i].id << std::endl; + std::cerr << "Directed edge! " << id << "->" << node[i].id << " no object. " << node[i].id << std::endl; #endif continue; } NGT::GraphNode &nnode = *nn; bool found = false; - for (size_t i = 0; i < nnode.size(); i++) { + for (size_t i = 0; i < nnode.size(); i++) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) if (nnode.at(i, graph.allocator).id == id) { #else @@ -947,10 +947,10 @@ NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, siz } if (!found) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "Directed edge! " << id << "->" << node.at(i, graph.allocator).id << " no edge. " - << node.at(i, graph.allocator).id << "->" << id << std::endl; + std::cerr << "Directed edge! " << id << "->" << node.at(i, graph.allocator).id << " no edge. " + << node.at(i, graph.allocator).id << "->" << id << std::endl; #else - std::cerr << "Directed edge! " << id << "->" << node[i].id << " no edge. " << node[i].id << "->" << id << std::endl; + std::cerr << "Directed edge! " << id << "->" << node[i].id << " no edge. " << node[i].id << "->" << id << std::endl; #endif count++; } @@ -980,7 +980,7 @@ NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, siz d10SkipCount++; continue; } - for (size_t i = 0; i < node.size(); i++) { + for (size_t i = 0; i < node.size(); i++) { if (i >= dcsize) { break; } @@ -1005,7 +1005,7 @@ NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, siz continue; } std::sort(node.begin(), node.end()); - for (size_t i = 0; i < node.size(); i++) { + for (size_t i = 0; i < node.size(); i++) { if (i > 0 && node[i - 1] > node[i]) { stringstream msg; msg << "Index::showStatisticsOfGraph: Fatal inner error! Wrong distance order " << node[i - 1] << ":" << node[i]; @@ -1037,7 +1037,7 @@ NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, siz } size_t esize = node->size(); sumOfSquareOfOutdegree += ((double)esize - averageNumberOfOutdegree) * ((double)esize - averageNumberOfOutdegree); - sumOfSquareOfIndegree += ((double)indegreeCount[id] - averageNumberOfOutdegree) * ((double)indegreeCount[id] - averageNumberOfOutdegree); + sumOfSquareOfIndegree += ((double)indegreeCount[id] - averageNumberOfOutdegree) * ((double)indegreeCount[id] - averageNumberOfOutdegree); } size_t numberOfNodesWithoutIndegree = 0; @@ -1157,13 +1157,13 @@ NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, siz std::cerr << "The minimum of the indegrees:\t" << minNumberOfIndegree << std::endl; } std::cerr << "#-nodes,#-edges,#-no-indegree,avg-edges,avg-dist,max-out,min-out,v-out,max-in,min-in,v-in,med-out," - "med-in,mode-out,mode-in,c95,c5,o-distance(10),o-skip,i-distance(10),i-skip:" - << numberOfNodes << ":" << numberOfOutdegree << ":" << numberOfNodesWithoutIndegree << ":" + "med-in,mode-out,mode-in,c95,c5,o-distance(10),o-skip,i-distance(10),i-skip:" + << numberOfNodes << ":" << numberOfOutdegree << ":" << numberOfNodesWithoutIndegree << ":" << std::setprecision(10) << (double)numberOfOutdegree / (double)numberOfNodes << ":" << distance / (double)numberOfOutdegree << ":" << maxNumberOfOutdegree << ":" << minNumberOfOutdegree << ":" << sumOfSquareOfOutdegree / (double)numberOfOutdegree<< ":" << maxNumberOfIndegree << ":" << minNumberOfIndegree << ":" << sumOfSquareOfIndegree / (double)numberOfOutdegree << ":" - << medianOutdegree << ":" << medianIndegree << ":" << modeOutdegree << ":" << modeIndegree + << medianOutdegree << ":" << medianIndegree << ":" << modeOutdegree << ":" << modeIndegree << ":" << c95 << ":" << c5 << ":" << c99 << ":" << c1 << ":" << distance10 << ":" << d10SkipCount << ":" << indegreeDistance10 << ":" << ind10SkipCount << std::endl; if (mode == 'h') { @@ -1183,8 +1183,8 @@ NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, siz } -void -GraphAndTreeIndex::createIndex(size_t threadPoolSize, size_t sizeOfRepository) +void +GraphAndTreeIndex::createIndex(size_t threadPoolSize, size_t sizeOfRepository) { assert(threadPoolSize > 0); @@ -1285,9 +1285,9 @@ GraphAndTreeIndex::createIndex(size_t threadPoolSize, size_t sizeOfRepository) } -void -GraphAndTreeIndex::createIndex(const vector > &objects, - vector &ids, +void +GraphAndTreeIndex::createIndex(const vector > &objects, + vector &ids, float range, size_t threadPoolSize) { Timer timer; @@ -1327,7 +1327,7 @@ GraphAndTreeIndex::createIndex(const vector > &object idx++; break; } - } + } } if (cnt == 0) { break; @@ -1377,7 +1377,7 @@ GraphAndTreeIndex::createIndex(const vector > &object #endif ids[output[idxi].batchIdx].id = output[idxi].id; } - } + } } // insert resultant objects into the graph as edges for (size_t i = 0; i < cnt; i++) { @@ -1417,7 +1417,7 @@ GraphAndTreeIndex::createIndex(const vector > &object msg << " Cannot insert the node. " << job.id << ". " << err.what(); NGTThrowException(msg); } - } + } if (job.results != 0) { delete job.results; } @@ -1441,7 +1441,7 @@ GraphAndTreeIndex::createIndex(const vector > &object } } -static bool +static bool findPathAmongIdenticalObjects(GraphAndTreeIndex &graph, size_t srcid, size_t dstid) { stack nodes; unordered_set done; @@ -1470,7 +1470,7 @@ findPathAmongIdenticalObjects(GraphAndTreeIndex &graph, size_t srcid, size_t dst return false; } -bool +bool GraphAndTreeIndex::verify(vector &status, bool info, char mode) { bool valid = GraphIndex::verify(status, info); if (!valid) { @@ -1608,13 +1608,13 @@ GraphAndTreeIndex::verify(vector &status, bool info, char mode) { if (!fromFound || !toFound) { if (info) { if (!fromFound && !toFound) { - cerr << "Warning no undirected edge between " << id << "(" << fromNode.size() << ") and " + cerr << "Warning no undirected edge between " << id << "(" << fromNode.size() << ") and " << objects[n].id << "(" << toNode.size() << ")." << endl; } else if (!fromFound) { - cerr << "Warning no directed edge from " << id << "(" << fromNode.size() << ") to " + cerr << "Warning no directed edge from " << id << "(" << fromNode.size() << ") to " << objects[n].id << "(" << toNode.size() << ")." << endl; } else if (!toFound) { - cerr << "Warning no reverse directed edge from " << id << "(" << fromNode.size() << ") to " + cerr << "Warning no reverse directed edge from " << id << "(" << fromNode.size() << ") to " << objects[n].id << "(" << toNode.size() << ")." << endl; } } diff --git a/lib/NGT/Index.h b/lib/NGT/Index.h index 557783d..815cad9 100644 --- a/lib/NGT/Index.h +++ b/lib/NGT/Index.h @@ -132,7 +132,7 @@ namespace NGT { case DistanceType::DistanceTypeLorentz: p.set("DistanceType", "Lorentz"); break; // added by Nyapicom default : std::cerr << "Fatal error. Invalid distance type. " << distanceType << std::endl; abort(); } - switch (indexType) { + switch (indexType) { case IndexType::GraphAndTree: p.set("IndexType", "GraphAndTree"); break; case IndexType::Graph: p.set("IndexType", "Graph"); break; default : std::cerr << "Fatal error. Invalid index type. " << indexType << std::endl; abort(); @@ -402,15 +402,15 @@ namespace NGT { void open(const std::string &database, bool rdOnly, bool graphDisabled); void close() { - if (index != 0) { + if (index != 0) { delete index; index = 0; - } + } path.clear(); } void save() { if (path.empty()) { - NGTThrowException("NGT::Index::saveIndex: path is empty"); + NGTThrowException("NGT::Index::saveIndex: path is empty"); } saveIndex(path); } @@ -419,11 +419,11 @@ namespace NGT { saveIndex(indexPath); } #endif - static void mkdir(const std::string &dir) { + static void mkdir(const std::string &dir) { if (::mkdir(dir.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) != 0) { std::stringstream msg; msg << "NGT::Index::mkdir: Cannot make the specified directory. " << dir; - NGTThrowException(msg); + NGTThrowException(msg); } } static void create(const std::string &database, NGT::Property &prop, bool redirect = false) { createGraphAndTree(database, prop, redirect); } @@ -432,29 +432,29 @@ namespace NGT { static void createGraph(const std::string &database, NGT::Property &prop, const std::string &dataFile, size_t dataSize = 0, bool redirect = false); template size_t insert(const std::vector &object); template size_t append(const std::vector &object); - static void append(const std::string &database, const std::string &dataFile, size_t threadSize, size_t dataSize); + static void append(const std::string &database, const std::string &dataFile, size_t threadSize, size_t dataSize); static void append(const std::string &database, const float *data, size_t dataSize, size_t threadSize); static void remove(const std::string &database, std::vector &objects, bool force = false); static void exportIndex(const std::string &database, const std::string &file); static void importIndex(const std::string &database, const std::string &file); virtual void load(const std::string &ifile, size_t dataSize) { getIndex().load(ifile, dataSize); } virtual void append(const std::string &ifile, size_t dataSize) { getIndex().append(ifile, dataSize); } - virtual void append(const float *data, size_t dataSize) { + virtual void append(const float *data, size_t dataSize) { StdOstreamRedirector redirector(redirect); redirector.begin(); try { - getIndex().append(data, dataSize); + getIndex().append(data, dataSize); } catch(Exception &err) { redirector.end(); throw err; } redirector.end(); } - virtual void append(const double *data, size_t dataSize) { + virtual void append(const double *data, size_t dataSize) { StdOstreamRedirector redirector(redirect); redirector.begin(); try { - getIndex().append(data, dataSize); + getIndex().append(data, dataSize); } catch(Exception &err) { redirector.end(); throw err; @@ -469,7 +469,7 @@ namespace NGT { StdOstreamRedirector redirector(redirect); redirector.begin(); try { - getIndex().createIndex(threadNumber, sizeOfRepository); + getIndex().createIndex(threadNumber, sizeOfRepository); } catch(Exception &err) { redirector.end(); throw err; @@ -508,21 +508,21 @@ namespace NGT { size_t osize = 0; #endif os << "object=" << osize << std::endl; - size_t isize = getIndex().getSharedMemorySize(os, t); + size_t isize = getIndex().getSharedMemorySize(os, t); return osize + isize; } float getEpsilonFromExpectedAccuracy(double accuracy); - void searchUsingOnlyGraph(NGT::SearchContainer &sc) { + void searchUsingOnlyGraph(NGT::SearchContainer &sc) { sc.distanceComputationCount = 0; sc.visitCount = 0; - ObjectDistances seeds; - getIndex().search(sc, seeds); + ObjectDistances seeds; + getIndex().search(sc, seeds); } std::vector makeSparseObject(std::vector &object); Index &getIndex() { if (index == 0) { assert(index != 0); - NGTThrowException("NGT::Index::getIndex: Index is unavailable."); + NGTThrowException("NGT::Index::getIndex: Index is unavailable."); } return *index; } @@ -557,7 +557,7 @@ namespace NGT { if (vec == 0) { std::stringstream msg; msg << "NGT::Index::allocateObject: Object is not set. "; - NGTThrowException(msg); + NGTThrowException(msg); } Object *object = 0; if (objectType == typeid(float)) { @@ -573,7 +573,7 @@ namespace NGT { } else { std::stringstream msg; msg << "NGT::Index::allocateObject: Unavailable object type."; - NGTThrowException(msg); + NGTThrowException(msg); } return object; } @@ -586,7 +586,7 @@ namespace NGT { bool redirect; }; - class GraphIndex : public Index, + class GraphIndex : public Index, public NeighborhoodGraph { public: @@ -878,7 +878,7 @@ namespace NGT { } catch(Exception &err) { throw err; } - if (static_cast(result.size()) < NeighborhoodGraph::property.edgeSizeForCreation && + if (static_cast(result.size()) < NeighborhoodGraph::property.edgeSizeForCreation && result.size() < repository.size()) { if (sc.edgeSize != 0) { sc.edgeSize = 0; // not prune edges. @@ -901,7 +901,7 @@ namespace NGT { objectSpace->linearSearch(po, radius, size, rs); result.moveFrom(rs, id); if ((size_t)NeighborhoodGraph::property.edgeSizeForCreation != result.size()) { - std::cerr << "searchForKNNGInsert::Warning! inconsistency of the sizes. ID=" << id + std::cerr << "searchForKNNGInsert::Warning! inconsistency of the sizes. ID=" << id << " " << NeighborhoodGraph::property.edgeSizeForCreation << ":" << result.size() << std::endl; for (size_t i = 0; i < result.size(); i++) { std::cerr << result[i].id << ":" << result[i].distance << " "; @@ -968,7 +968,7 @@ namespace NGT { std::cerr << "Cannot get the specified number of the results. " << rs.size() << ":" << objects->size() << std::endl; } size_t count = 0; - ObjectDistances::iterator rsi = rs.begin(); + ObjectDistances::iterator rsi = rs.begin(); #if defined(NGT_SHARED_MEMORY_ALLOCATOR) for (GraphNode::iterator ri = objects->begin(repo.allocator); ri != objects->end(repo.allocator) && rsi != rs.end();) { @@ -1066,15 +1066,15 @@ namespace NGT { for (GraphNode::iterator rj = objects->begin(repo.allocator) + std::distance(objects->begin(repo.allocator), ri); rj != objects->end(repo.allocator); ++rj) { if ((*ri).id == (*rj).id && std::distance(objects->begin(repo.allocator), ri) != std::distance(objects->begin(repo.allocator), rj)) { - std::cerr << "Error! More than two identical objects! ID=" << (*rj).id << " idx=" - << std::distance(objects->begin(repo.allocator), ri) << ":" << std::distance(objects->begin(repo.allocator), rj) + std::cerr << "Error! More than two identical objects! ID=" << (*rj).id << " idx=" + << std::distance(objects->begin(repo.allocator), ri) << ":" << std::distance(objects->begin(repo.allocator), rj) << " disntace=" << (*ri).distance << ":" << (*rj).distance << std::endl; #else for (GraphNode::iterator rj = objects->begin() + std::distance(objects->begin(), ri); rj != objects->end(); ++rj) { if ((*ri).id == (*rj).id && std::distance(objects->begin(), ri) != std::distance(objects->begin(), rj)) { - std::cerr << "Error! More than two identical objects! ID=" << (*rj).id << " idx=" - << std::distance(objects->begin(), ri) << ":" << std::distance(objects->begin(), rj) + std::cerr << "Error! More than two identical objects! ID=" << (*rj).id << " idx=" + << std::distance(objects->begin(), ri) << ":" << std::distance(objects->begin(), rj) << " disntace=" << (*ri).distance << ":" << (*rj).distance << std::endl; #endif valid = false; @@ -1135,16 +1135,16 @@ namespace NGT { Object *allocateObject(const std::string &textLine, const std::string &sep) { return objectSpace->allocateNormalizedObject(textLine, sep); } - Object *allocateObject(const std::vector &obj) { + Object *allocateObject(const std::vector &obj) { return objectSpace->allocateNormalizedObject(obj); } Object *allocateObject(const std::vector &obj) { return objectSpace->allocateNormalizedObject(obj); } - Object *allocateObject(const std::vector &obj) { + Object *allocateObject(const std::vector &obj) { return objectSpace->allocateNormalizedObject(obj); } - Object *allocateObject(const float *obj, size_t size) { + Object *allocateObject(const float *obj, size_t size) { return objectSpace->allocateNormalizedObject(obj, size); } @@ -1188,7 +1188,7 @@ namespace NGT { template void getSeedsFromGraph(REPOSITORY &repo, ObjectDistances &seeds) { if (repo.size() != 0) { - size_t seedSize = repo.size() - 1 < (size_t)NeighborhoodGraph::property.seedSize ? + size_t seedSize = repo.size() - 1 < (size_t)NeighborhoodGraph::property.seedSize ? repo.size() - 1 : (size_t)NeighborhoodGraph::property.seedSize; if (NeighborhoodGraph::property.seedType == NeighborhoodGraph::SeedTypeRandomNodes || NeighborhoodGraph::property.seedType == NeighborhoodGraph::SeedTypeNone) { @@ -1291,7 +1291,7 @@ namespace NGT { void alignObjects() { } -#else +#else void alignObjects() { NGT::ObjectSpace &space = getObjectSpace(); @@ -1309,7 +1309,7 @@ namespace NGT { objectCount++; } } - std::multimap notexist; + std::multimap notexist; if (objectCount != repo.size()) { for (size_t id = 1; id < exist.size(); id++) { if (!exist[id]) { @@ -1332,7 +1332,7 @@ namespace NGT { assert(objectCount == repo.size() - 1); objectCount = 1; - std::vector > order; + std::vector > order; for (size_t i = 0; i < leafNodeIDs.size(); i++) { ObjectDistances objects; DVPTree::getObjectIDsFromLeaf(leafNodeIDs[i], objects); @@ -1362,7 +1362,7 @@ namespace NGT { } size_t id = startID; space.copy(*tmp, *object[id]); - uncopiedObjects.erase(id); + uncopiedObjects.erase(id); do { space.copy(*object[id], *object[order[id - 1].first]); copycount++; @@ -1392,7 +1392,7 @@ namespace NGT { } size_t id = startID; tmpPtr = object[id]; - uncopiedObjects.erase(id); + uncopiedObjects.erase(id); do { object[id] = object[order[id - 1].second]; copycount++; @@ -1516,7 +1516,7 @@ namespace NGT { } catch(Exception &err) { std::stringstream msg; msg << "remove:: cannot remove from tree. id=" << id << " " << err.what(); - NGTThrowException(msg); + NGTThrowException(msg); } } else { ObjectID replaceID = id == results[0].id ? results[1].id : results[0].id; @@ -1540,7 +1540,7 @@ namespace NGT { } catch(Exception &err) { throw err; } - if (static_cast(result.size()) < NeighborhoodGraph::property.edgeSizeForCreation && + if (static_cast(result.size()) < NeighborhoodGraph::property.edgeSizeForCreation && result.size() < repository.size()) { if (sc.edgeSize != 0) { try { @@ -1760,7 +1760,7 @@ namespace NGT { } // namespace NGT template -size_t NGT::Index::append(const std::vector &object) +size_t NGT::Index::append(const std::vector &object) { if (getObjectSpace().getRepository().size() == 0) { getObjectSpace().getRepository().initialize(); @@ -1773,7 +1773,7 @@ size_t NGT::Index::append(const std::vector &object) } template -size_t NGT::Index::insert(const std::vector &object) +size_t NGT::Index::insert(const std::vector &object) { if (getObjectSpace().getRepository().size() == 0) { getObjectSpace().getRepository().initialize(); diff --git a/lib/NGT/MmapManager.cpp b/lib/NGT/MmapManager.cpp index 1112777..9314743 100644 --- a/lib/NGT/MmapManager.cpp +++ b/lib/NGT/MmapManager.cpp @@ -17,7 +17,7 @@ #include "MmapManagerImpl.hpp" namespace MemoryManager{ - // static method --- + // static method --- void MmapManager::setDefaultOptionValue(init_option_st &optionst) { optionst.use_expand = MMAP_DEFAULT_ALLOW_EXPAND; @@ -28,10 +28,10 @@ namespace MemoryManager{ if((size % MMAP_MEMORY_ALIGN) == 0){ return size; }else{ - return ( (size >> MMAP_MEMORY_ALIGN_EXP ) + 1 ) * MMAP_MEMORY_ALIGN; + return ( (size >> MMAP_MEMORY_ALIGN_EXP ) + 1 ) * MMAP_MEMORY_ALIGN; } } - // static method --- + // static method --- MmapManager::MmapManager():_impl(new MmapManager::Impl(*this)) @@ -105,8 +105,8 @@ namespace MemoryManager{ boot_st bootStruct = {0}; control_st controlStruct = {0}; - _impl->initBootStruct(bootStruct, size); - _impl->initControlStruct(controlStruct, size); + _impl->initBootStruct(bootStruct, size); + _impl->initControlStruct(controlStruct, size); char *cntl_head = cntl_p; cntl_head += sizeof(boot_st); @@ -138,7 +138,7 @@ namespace MemoryManager{ } const std::string controlFile = filePath + MMAP_CNTL_FILE_SUFFIX; - _impl->filePath = filePath; + _impl->filePath = filePath; int32_t fd; @@ -161,7 +161,7 @@ namespace MemoryManager{ std::cerr << "[WARN] : version error" << std::endl; errno = 0; if(munmap(boot_p, MMAP_CNTL_FILE_SIZE) == -1) throw MmapManagerException("munmap error : " + getErrorStr(errno)); - throw MmapManagerException("MemoryManager version error"); + throw MmapManagerException("MemoryManager version error"); } errno = 0; @@ -172,7 +172,7 @@ namespace MemoryManager{ throw MmapManagerException("file open error = " + std::string(filePath.c_str()) + err_str); } - _impl->mmapCntlHead = (control_st*)( (char *)boot_p + sizeof(boot_st)); + _impl->mmapCntlHead = (control_st*)( (char *)boot_p + sizeof(boot_st)); _impl->mmapCntlAddr = (void *)boot_p; for(uint64_t i = 0; i < _impl->mmapCntlHead->unit_num; i++){ @@ -188,7 +188,7 @@ namespace MemoryManager{ } const std::string err_str = getErrorStr(errno); if(close(fd) == -1) std::cerr << controlFile << "[WARN] : filedescript cannot close" << std::endl; - closeMemory(true); + closeMemory(true); throw MmapManagerException(err_str); } } @@ -257,9 +257,9 @@ namespace MemoryManager{ } if(!not_reuse_flag){ - if( _impl->mmapCntlHead->reuse_type == REUSE_DATA_CLASSIFY + if( _impl->mmapCntlHead->reuse_type == REUSE_DATA_CLASSIFY || _impl->mmapCntlHead->reuse_type == REUSE_DATA_QUEUE - || _impl->mmapCntlHead->reuse_type == REUSE_DATA_QUEUE_PLUS){ + || _impl->mmapCntlHead->reuse_type == REUSE_DATA_QUEUE_PLUS){ off_t ret_offset; reuse_state_t reuse_state = REUSE_STATE_OK; ret_offset = reuse(alloc_size, reuse_state); @@ -338,7 +338,7 @@ namespace MemoryManager{ return ret_off; } - void *MmapManager::getAbsAddr(off_t p) const + void *MmapManager::getAbsAddr(off_t p) const { if(p < 0){ return NULL; @@ -350,7 +350,7 @@ namespace MemoryManager{ return ABS_ADDR(ret_p, _impl->mmapDataAddr[unit_id]); } - off_t MmapManager::getRelAddr(const void *p) const + off_t MmapManager::getRelAddr(const void *p) const { const chunk_head_st *chunk_head = (chunk_head_st *)((char *)p - sizeof(chunk_head_st)); const uint16_t unit_id = chunk_head->unit_id; diff --git a/lib/NGT/MmapManager.h b/lib/NGT/MmapManager.h index 4602d13..7f582d8 100644 --- a/lib/NGT/MmapManager.h +++ b/lib/NGT/MmapManager.h @@ -30,25 +30,25 @@ namespace MemoryManager{ typedef enum _option_reuse_t{ REUSE_DATA_CLASSIFY, REUSE_DATA_QUEUE, - REUSE_DATA_QUEUE_PLUS, + REUSE_DATA_QUEUE_PLUS, }option_reuse_t; typedef enum _reuse_state_t{ - REUSE_STATE_OK, - REUSE_STATE_FALSE, - REUSE_STATE_ALLOC, + REUSE_STATE_OK, + REUSE_STATE_FALSE, + REUSE_STATE_ALLOC, }reuse_state_t; typedef enum _check_statistics_t{ - CHECK_STATS_USE_SIZE, - CHECK_STATS_USE_NUM, - CHECK_STATS_FREE_SIZE, - CHECK_STATS_FREE_NUM, + CHECK_STATS_USE_SIZE, + CHECK_STATS_USE_NUM, + CHECK_STATS_FREE_SIZE, + CHECK_STATS_FREE_NUM, }check_statistics_t; typedef struct _init_option_st{ - bool use_expand; - option_reuse_t reuse_type; + bool use_expand; + option_reuse_t reuse_type; }init_option_st; @@ -66,15 +66,15 @@ namespace MemoryManager{ void *getAbsAddr(off_t p) const; off_t getRelAddr(const void *p) const; - size_t getTotalSize() const; - size_t getUseSize() const; - uint64_t getUseNum() const; - size_t getFreeSize() const; - uint64_t getFreeNum() const; - uint16_t getUnitNum() const; - size_t getQueueCapacity() const; - uint64_t getQueueNum() const; - uint64_t getLargeListNum() const; + size_t getTotalSize() const; + size_t getUseSize() const; + uint64_t getUseNum() const; + size_t getFreeSize() const; + uint64_t getFreeNum() const; + uint16_t getUnitNum() const; + size_t getQueueCapacity() const; + uint64_t getQueueNum() const; + uint64_t getLargeListNum() const; void dumpHeap() const; @@ -82,7 +82,7 @@ namespace MemoryManager{ void *getEntryHook() const; void setEntryHook(const void *entry_p); - // static method --- + // static method --- static void setDefaultOptionValue(init_option_st &optionst); static size_t getAlignSize(size_t size); diff --git a/lib/NGT/MmapManagerDefs.h b/lib/NGT/MmapManagerDefs.h index b1b06d7..538b3a8 100644 --- a/lib/NGT/MmapManagerDefs.h +++ b/lib/NGT/MmapManagerDefs.h @@ -23,36 +23,36 @@ namespace MemoryManager{ const uint64_t MMAP_MANAGER_VERSION = 5; - const bool MMAP_DEFAULT_ALLOW_EXPAND = false; - const uint64_t MMAP_CNTL_FILE_RANGE = 16; - const size_t MMAP_CNTL_FILE_SIZE = MMAP_CNTL_FILE_RANGE * sysconf(_SC_PAGESIZE); - const uint64_t MMAP_MAX_FILE_NAME_LENGTH = 1024; - const std::string MMAP_CNTL_FILE_SUFFIX = "c"; + const bool MMAP_DEFAULT_ALLOW_EXPAND = false; + const uint64_t MMAP_CNTL_FILE_RANGE = 16; + const size_t MMAP_CNTL_FILE_SIZE = MMAP_CNTL_FILE_RANGE * sysconf(_SC_PAGESIZE); + const uint64_t MMAP_MAX_FILE_NAME_LENGTH = 1024; + const std::string MMAP_CNTL_FILE_SUFFIX = "c"; - const size_t MMAP_LOWER_SIZE = 1; - const size_t MMAP_MEMORY_ALIGN = 8; + const size_t MMAP_LOWER_SIZE = 1; + const size_t MMAP_MEMORY_ALIGN = 8; const size_t MMAP_MEMORY_ALIGN_EXP = 3; #ifndef MMANAGER_TEST_MODE - const uint64_t MMAP_MAX_UNIT_NUM = 1024; + const uint64_t MMAP_MAX_UNIT_NUM = 1024; #else - const uint64_t MMAP_MAX_UNIT_NUM = 8; + const uint64_t MMAP_MAX_UNIT_NUM = 8; #endif const uint64_t MMAP_FREE_QUEUE_SIZE = 1024; - const uint64_t MMAP_FREE_LIST_NUM = 64; + const uint64_t MMAP_FREE_LIST_NUM = 64; typedef struct _boot_st{ - uint32_t version; - uint64_t reserve; - size_t size; + uint32_t version; + uint64_t reserve; + size_t size; }boot_st; typedef struct _head_st{ - off_t break_p; - uint64_t chunk_num; - uint64_t reserve; + off_t break_p; + uint64_t chunk_num; + uint64_t reserve; }head_st; @@ -77,22 +77,22 @@ namespace MemoryManager{ typedef struct _control_st{ - bool use_expand; - uint16_t unit_num; - uint16_t active_unit; - uint64_t reserve; - size_t base_size; - off_t entry_p; - option_reuse_t reuse_type; - free_st free_data; - free_queue_st free_queue; - head_st data_headers[MMAP_MAX_UNIT_NUM]; + bool use_expand; + uint16_t unit_num; + uint16_t active_unit; + uint64_t reserve; + size_t base_size; + off_t entry_p; + option_reuse_t reuse_type; + free_st free_data; + free_queue_st free_queue; + head_st data_headers[MMAP_MAX_UNIT_NUM]; }control_st; typedef struct _chunk_head_st{ - bool delete_flg; - uint16_t unit_id; - off_t free_next; - size_t size; + bool delete_flg; + uint16_t unit_id; + off_t free_next; + size_t size; }chunk_head_st; } diff --git a/lib/NGT/MmapManagerImpl.hpp b/lib/NGT/MmapManagerImpl.hpp index c43baa4..bfe9b62 100644 --- a/lib/NGT/MmapManagerImpl.hpp +++ b/lib/NGT/MmapManagerImpl.hpp @@ -40,7 +40,7 @@ namespace MemoryManager{ bool isOpen; void *mmapCntlAddr; control_st *mmapCntlHead; - std::string filePath; + std::string filePath; void *mmapDataAddr[MMAP_MAX_UNIT_NUM]; void initBootStruct(boot_st &bst, size_t size) const; @@ -53,7 +53,7 @@ namespace MemoryManager{ int32_t formatFile(const std::string &targetFile, size_t size) const; void clearChunk(const off_t chunk_off) const; - void free_data_classify(const off_t p, const bool force_large_list = false) const; + void free_data_classify(const off_t p, const bool force_large_list = false) const; off_t reuse_data_classify(const size_t size, reuse_state_t &reuse_state, const bool force_large_list = false) const; void free_data_queue(const off_t p); off_t reuse_data_queue(const size_t size, reuse_state_t &reuse_state); @@ -76,7 +76,7 @@ namespace MemoryManager{ MmapManager::Impl::Impl(MmapManager &ommanager):mmanager(ommanager), isOpen(false), mmapCntlAddr(NULL), mmapCntlHead(NULL){} - void MmapManager::Impl::initBootStruct(boot_st &bst, size_t size) const + void MmapManager::Impl::initBootStruct(boot_st &bst, size_t size) const { bst.version = MMAP_MANAGER_VERSION; bst.reserve = 0; @@ -86,10 +86,10 @@ namespace MemoryManager{ void MmapManager::Impl::initFreeStruct(free_st &fst) const { fst.large_list.free_p = -1; - fst.large_list.free_last_p = -1; + fst.large_list.free_last_p = -1; for(uint32_t i = 0; i < MMAP_FREE_LIST_NUM; ++i){ fst.free_lists[i].free_p = -1; - fst.free_lists[i].free_last_p = -1; + fst.free_lists[i].free_last_p = -1; } } @@ -216,7 +216,7 @@ namespace MemoryManager{ free_list_st *free_list; if(p_size <= border_size && force_large_list == false){ - uint32_t index = (p_size / MMAP_MEMORY_ALIGN) - 1; + uint32_t index = (p_size / MMAP_MEMORY_ALIGN) - 1; free_list = &mmapCntlHead->free_data.free_lists[index]; }else{ free_list = &mmapCntlHead->free_data.large_list; @@ -240,7 +240,7 @@ namespace MemoryManager{ free_list_st *free_list; if(size <= border_size && force_large_list == false){ - uint32_t index = (size / MMAP_MEMORY_ALIGN) - 1; + uint32_t index = (size / MMAP_MEMORY_ALIGN) - 1; free_list = &mmapCntlHead->free_data.free_lists[index]; }else{ free_list = &mmapCntlHead->free_data.large_list; @@ -327,7 +327,7 @@ namespace MemoryManager{ return free_data_classify(p, true); }else{ - const off_t alloc_offset = mmanager.alloc(new_size); + const off_t alloc_offset = mmanager.alloc(new_size); if(alloc_offset == -1){ return free_data_classify(p, true); @@ -356,8 +356,8 @@ namespace MemoryManager{ off_t MmapManager::Impl::reuse_data_queue(const size_t size, reuse_state_t &reuse_state) { - free_queue_st *free_queue = &mmapCntlHead->free_queue; - if(free_queue->data == -1){ + free_queue_st *free_queue = &mmapCntlHead->free_queue; + if(free_queue->data == -1){ reuse_state = REUSE_STATE_ALLOC; return -1; @@ -423,7 +423,7 @@ namespace MemoryManager{ if(reuse_state == REUSE_STATE_ALLOC){ reuse_state = REUSE_STATE_OK; - ret_off = reuse_data_queue(size, reuse_state); + ret_off = reuse_data_queue(size, reuse_state); } }else{ ret_off = reuse_data_queue(size, reuse_state); @@ -483,7 +483,7 @@ namespace MemoryManager{ return true; } - void MmapManager::Impl::upHeap(free_queue_st *free_queue, uint64_t index) const + void MmapManager::Impl::upHeap(free_queue_st *free_queue, uint64_t index) const { off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); @@ -493,7 +493,7 @@ namespace MemoryManager{ const off_t parent_chunk_offset = queue[parent]; const off_t index_chunk_offset = queue[index]; const chunk_head_st *parent_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(parent_chunk_offset); - const chunk_head_st *index_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(index_chunk_offset); + const chunk_head_st *index_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(index_chunk_offset); if(parent_chunk_head->size < index_chunk_head->size){ @@ -508,13 +508,13 @@ namespace MemoryManager{ void MmapManager::Impl::downHeap(free_queue_st *free_queue)const { off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); - uint64_t index = 1; + uint64_t index = 1; while(index * 2 <= free_queue->tail){ uint64_t child = index * 2; const off_t index_chunk_offset = queue[index]; - const chunk_head_st *index_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(index_chunk_offset); + const chunk_head_st *index_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(index_chunk_offset); if(child + 1 < free_queue->tail){ const off_t left_chunk_offset = queue[child]; @@ -544,9 +544,9 @@ namespace MemoryManager{ } } - bool MmapManager::Impl::insertHeap(free_queue_st *free_queue, const off_t p) const + bool MmapManager::Impl::insertHeap(free_queue_st *free_queue, const off_t p) const { - off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); + off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); uint64_t index; if(free_queue->capacity < free_queue->tail){ return false; @@ -564,11 +564,11 @@ namespace MemoryManager{ bool MmapManager::Impl::getHeap(free_queue_st *free_queue, off_t *p) const { - if( (free_queue->tail - 1) <= 0){ + if( (free_queue->tail - 1) <= 0){ return false; } - off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); + off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); *p = queue[1]; free_queue->tail -= 1; queue[1] = queue[free_queue->tail]; @@ -596,7 +596,7 @@ namespace MemoryManager{ return; } - off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); + off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); for(uint32_t i = 1; i < free_queue->tail; ++i){ const off_t chunk_offset = queue[i]; const off_t payload_offset = chunk_offset + sizeof(chunk_head_st); @@ -632,8 +632,8 @@ namespace MemoryManager{ setupChunkHead(new_chunk_head, true, chunk_head->unit_id, -1, new_size); - head_st *unit_header = &mmapCntlHead->data_headers[mmapCntlHead->active_unit]; - unit_header->chunk_num++; + head_st *unit_header = &mmapCntlHead->data_headers[mmapCntlHead->active_unit]; + unit_header->chunk_num++; const off_t payload_offset = new_chunk_offset + sizeof(chunk_head_st); diff --git a/lib/NGT/NGTQ/Capi.cpp b/lib/NGT/NGTQ/Capi.cpp index 84bbff2..83bf174 100644 --- a/lib/NGT/NGTQ/Capi.cpp +++ b/lib/NGT/NGTQ/Capi.cpp @@ -29,7 +29,7 @@ #ifdef NGTQ_QBG static bool operate_error_string_(const std::stringstream &ss, NGTError error){ - if(error != NULL){ + if(error != NULL){ try{ std::string *error_str = static_cast(error); *error_str = ss.str(); @@ -94,7 +94,7 @@ bool ngtqg_search_index(NGTQGIndex index, NGTQGQuery query, NGTObjectDistances r return false; } - NGTQG::Index* pindex = static_cast(index); + NGTQG::Index* pindex = static_cast(index); int32_t dim = pindex->getObjectSpace().getDimension(); NGT::Object *ngtquery = NULL; @@ -130,7 +130,7 @@ bool ngtqg_quantize(const char *indexPath, NGTQGQuantizationParameters parameter }catch(std::exception &err){ std::stringstream ss; ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); + operate_error_string_(ss, error); return false; } } @@ -170,13 +170,13 @@ bool qbg_create(const char *indexPath, QBGConstructionParameters *parameters, NG NGTQ::Property property; NGT::Property globalProperty; NGT::Property localProperty; - property.dimension = parameters->extended_dimension; + property.dimension = parameters->extended_dimension; if (property.dimension == 0) { property.dimension = parameters->dimension; } - property.genuineDimension = parameters->dimension; - property.globalRange = 0; - property.localRange = 0; + property.genuineDimension = parameters->dimension; + property.globalRange = 0; + property.localRange = 0; property.globalCentroidLimit = parameters->number_of_blobs; property.localCentroidLimit = 16; property.localDivisionNo = parameters->number_of_subvectors; @@ -184,8 +184,8 @@ bool qbg_create(const char *indexPath, QBGConstructionParameters *parameters, NG property.centroidCreationMode = NGTQ::CentroidCreationModeStaticLayer; property.localCentroidCreationMode = NGTQ::CentroidCreationModeStatic; property.localIDByteSize = 1; - property.dataType = static_cast(parameters->internal_data_type); - property.genuineDataType = static_cast(parameters->data_type); + property.dataType = static_cast(parameters->internal_data_type); + property.genuineDataType = static_cast(parameters->data_type); property.distanceType = static_cast(parameters->distance_type); globalProperty.edgeSizeForCreation = 10; @@ -202,7 +202,7 @@ bool qbg_create(const char *indexPath, QBGConstructionParameters *parameters, NG } catch(NGT::Exception &err) { std::stringstream ss; ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); + operate_error_string_(ss, error); return false; } @@ -304,10 +304,10 @@ bool qbg_build_index(const char *index_path, QBGBuildParameters *parameters, QBG QBG::HierarchicalKmeans hierarchicalKmeans; - hierarchicalKmeans.maxSize = 1000; - hierarchicalKmeans.numOfClusters = 2; + hierarchicalKmeans.maxSize = 1000; + hierarchicalKmeans.numOfClusters = 2; hierarchicalKmeans.numOfTotalClusters = 0; - hierarchicalKmeans.numOfTotalBlobs = 0; + hierarchicalKmeans.numOfTotalBlobs = 0; hierarchicalKmeans.clusterID = -1; hierarchicalKmeans.initMode = static_cast(parameters->hierarchical_clustering_init_mode); hierarchicalKmeans.numOfRandomObjects = 0; @@ -317,7 +317,7 @@ bool qbg_build_index(const char *index_path, QBGBuildParameters *parameters, QBG hierarchicalKmeans.numOfSecondObjects = parameters->number_of_second_objects; hierarchicalKmeans.numOfSecondClusters = parameters->number_of_second_clusters; hierarchicalKmeans.numOfThirdClusters = parameters->number_of_third_clusters; - hierarchicalKmeans.numOfObjects = 0; + hierarchicalKmeans.numOfObjects = 0; //-/hierarchicalKmeans.threeLayerClustering = true; hierarchicalKmeans.clusteringType = QBG::HierarchicalKmeans::ClusteringTypeThreeLayer; hierarchicalKmeans.verbose = false; @@ -346,8 +346,8 @@ bool qbg_build_index(const char *index_path, QBGBuildParameters *parameters, QBG optimizer.seedNumberOfSteps = 2; optimizer.seedStep = 10; optimizer.reject = 0.9; - optimizer.timelimit = 24 * 2; - optimizer.timelimit *= 60.0 * 60.0; + optimizer.timelimit = 24 * 2; + optimizer.timelimit *= 60.0 * 60.0; optimizer.rotation = parameters->rotation; optimizer.repositioning = parameters->repositioning; optimizer.globalType = QBG::Optimizer::GlobalTypeNone; @@ -507,4 +507,4 @@ size_t qbg_get_dimension(QBGIndex index, QBGError error) { return pindex->getQuantizer().property.genuineDimension; } -#endif +#endif diff --git a/lib/NGT/NGTQ/HierarchicalKmeans.cpp b/lib/NGT/NGTQ/HierarchicalKmeans.cpp index 2f1dc88..444c8bc 100644 --- a/lib/NGT/NGTQ/HierarchicalKmeans.cpp +++ b/lib/NGT/NGTQ/HierarchicalKmeans.cpp @@ -82,7 +82,7 @@ void QBG::HierarchicalKmeans::treeBasedTopdownClustering(std::string prefix, QBG batch.push_back(id); if (batch.size() > 100000) { size_t kmeansBatchSize = nleaves < nOfThreads ? nleaves : nOfThreads; - hierarchicalKmeansBatch(batch, exceededLeaves, rootID, object, objectList, objectSpace, nodes, + hierarchicalKmeansBatch(batch, exceededLeaves, rootID, object, objectList, objectSpace, nodes, clustering, maxSize, nleaves, kmeansBatchSize); } @@ -102,7 +102,7 @@ void QBG::HierarchicalKmeans::treeBasedTopdownClustering(std::string prefix, QBG std::cerr << "# of nodes=" << nodes.size() << std::endl; std::cerr << "# of leaves=" << numOfLeaves << std::endl; std::cerr << "clustering for quantization." << std::endl; - hierarchicalKmeansWithNumberOfClustersInParallel(numOfTotalClusters, numOfObjects, numOfLeaves, + hierarchicalKmeansWithNumberOfClustersInParallel(numOfTotalClusters, numOfObjects, numOfLeaves, objectList, objectSpace, nodes, initMode); if (numOfTotalBlobs != 0) { NGT::Timer timer; @@ -129,14 +129,14 @@ void QBG::HierarchicalKmeans::treeBasedTopdownClustering(std::string prefix, QBG } } std::cerr << "clustering to make blobs." << std::endl; - hierarchicalKmeansWithNumberOfClustersInParallel(numOfTotalBlobs, numOfObjects, numOfTotalClusters, + hierarchicalKmeansWithNumberOfClustersInParallel(numOfTotalBlobs, numOfObjects, numOfTotalClusters, objectList, objectSpace, nodes, initMode); { std::ofstream of(prefix + QBG::Index::get3rdTo2ndSuffix()); extractBtoQIndex(of, nodes, qNodeIDs); } } - } + } } @@ -586,8 +586,8 @@ void QBG::HierarchicalKmeans::clustering(std::string indexPath, std::string pref NGT::StdOstreamRedirector redirector(!verbose); redirector.begin(); - std::cerr << "The specified params=FC:" << numOfFirstClusters << ":FO:" << numOfFirstObjects - << ",SC:" << numOfSecondClusters << ":SO:" << numOfSecondObjects + std::cerr << "The specified params=FC:" << numOfFirstClusters << ":FO:" << numOfFirstObjects + << ",SC:" << numOfSecondClusters << ":SO:" << numOfSecondObjects << ",TC:" << numOfThirdClusters << ":TO:" << numOfThirdObjects << ",O:" << numOfObjects << std::endl; bool readOnly = false; diff --git a/lib/NGT/NGTQ/HierarchicalKmeans.h b/lib/NGT/NGTQ/HierarchicalKmeans.h index c5d6007..de63735 100644 --- a/lib/NGT/NGTQ/HierarchicalKmeans.h +++ b/lib/NGT/NGTQ/HierarchicalKmeans.h @@ -69,7 +69,7 @@ namespace QBG { if (node->leaf) { return nodeID; } else { - HKInternalNode &internalNode = static_cast(*node); + HKInternalNode &internalNode = static_cast(*node); float min = std::numeric_limits::max(); int32_t minid = 0; for (auto &c : internalNode.children) { @@ -130,8 +130,8 @@ namespace QBG { } } } - delete nodes[leafNodeID]; - nodes[leafNodeID] = newNode; + delete nodes[leafNodeID]; + nodes[leafNodeID] = newNode; } static double computeError(std::vector &nodes, NGT::ObjectSpace &objectSpace, QBGObjectList &objectList) { @@ -157,7 +157,7 @@ namespace QBG { } } } - } + } } distance /= dcount; std::cout << "# of vectors=" << dcount << std::endl; @@ -196,7 +196,7 @@ namespace QBG { } } } - } + } } std::cerr << "# of clusters=" << clusterCount << std::endl; return objectCount; @@ -298,7 +298,7 @@ namespace QBG { std::cerr << "Internal error. " << idx << ":" << leafNode.members.size() << std::endl; abort(); } - randomObjectIDXs.push_back(leafNode.members[idx]); + randomObjectIDXs.push_back(leafNode.members[idx]); } } else { srand(leafNode.id); @@ -330,7 +330,7 @@ namespace QBG { } randomObjects[leafNode.id].push_back(object); } - } else { + } else { if (extractCentroid) { HKInternalNode &internalNode = static_cast(*node); for (auto &child : internalNode.children) { @@ -340,7 +340,7 @@ namespace QBG { } } } - } + } } for (size_t idx = 0; idx < centroids.size(); idx++) { auto &c = centroids[idx]; @@ -497,15 +497,15 @@ namespace QBG { } } nleaves += clusters[idx].size() - 1; - delete nodes[leafNodeID]; - nodes[leafNodeID] = newNode; + delete nodes[leafNodeID]; + nodes[leafNodeID] = newNode; } exceededLeaves.clear(); } - static void hierarchicalKmeansWithNumberOfClusters(size_t numOfTotalClusters, size_t numOfObjects, size_t numOfLeaves, - QBGObjectList &objectList, NGT::ObjectSpace &objectSpace, + static void hierarchicalKmeansWithNumberOfClusters(size_t numOfTotalClusters, size_t numOfObjects, size_t numOfLeaves, + QBGObjectList &objectList, NGT::ObjectSpace &objectSpace, std::vector &nodes, NGT::Clustering::InitializationMode initMode){ std::cerr << "numOfTotalClusters=" << numOfTotalClusters << std::endl; std::cerr << "numOfLeaves=" << numOfLeaves << std::endl; @@ -516,7 +516,7 @@ namespace QBG { auto numOfRemainingClusters = numOfTotalClusters; auto numOfRemainingVectors = numOfObjects; size_t leafCount = 0; - size_t nodeSize = nodes.size(); + size_t nodeSize = nodes.size(); for (size_t nidx = 0; nidx < nodeSize; nidx++) { if (nodes[nidx]->leaf) { leafCount++; @@ -540,11 +540,11 @@ namespace QBG { abort(); } } - } + } } - static void hierarchicalKmeansWithNumberOfClustersInParallel(size_t numOfTotalClusters, size_t numOfObjects, size_t numOfLeaves, - QBGObjectList &objectList, NGT::ObjectSpace &objectSpace, + static void hierarchicalKmeansWithNumberOfClustersInParallel(size_t numOfTotalClusters, size_t numOfObjects, size_t numOfLeaves, + QBGObjectList &objectList, NGT::ObjectSpace &objectSpace, std::vector &nodes, NGT::Clustering::InitializationMode initMode){ NGT::Timer timer; timer.start(); @@ -604,15 +604,15 @@ namespace QBG { cnode->members.push_back(leafNode.members[member.vectorID]); } } - delete nodes[leafNodeID]; - nodes[leafNodeID] = newNode; + delete nodes[leafNodeID]; + nodes[leafNodeID] = newNode; } timer.stop(); std::cerr << "hierarchicalKmeansWithNumberOfClustersInParallel: add nodes. Time=" << timer << std::endl; } - static void flattenClusters(std::vector &upperClusters, + static void flattenClusters(std::vector &upperClusters, std::vector> &lowerClusters, size_t numOfLowerClusters, std::vector &flatClusters) { @@ -634,7 +634,7 @@ namespace QBG { #ifndef MULTIPLE_OBJECT_LISTS void subclustering(std::vector &upperClusters, size_t numOfLowerClusters, size_t numOfObjects, - NGT::ObjectSpace &objectSpace, QBGObjectList &objectList, + NGT::ObjectSpace &objectSpace, QBGObjectList &objectList, NGT::Clustering::InitializationMode initMode, std::vector> &lowerClusters, size_t maximumIteration = 1000) { @@ -643,7 +643,7 @@ namespace QBG { auto numOfRemainingVectors = numOfObjects; size_t ts = 0; for (size_t idx = 0; idx < upperClusters.size(); idx++) { - size_t ncs = round(static_cast(upperClusters[idx].members.size()) / numOfRemainingVectors * + size_t ncs = round(static_cast(upperClusters[idx].members.size()) / numOfRemainingVectors * numOfRemainingClusters); ncs = ncs == 0 ? 1 : ncs; numOfRemainingVectors -= upperClusters[idx].members.size(); @@ -663,7 +663,7 @@ namespace QBG { } lowerClusters.resize(upperClusters.size()); -#pragma omp parallel for schedule(dynamic) +#pragma omp parallel for schedule(dynamic) for (size_t idx = 0; idx < upperClusters.size(); idx++) { std::vector> partialVectors; partialVectors.reserve(upperClusters[idx].members.size()); @@ -682,7 +682,7 @@ namespace QBG { NGT::Clustering lowerClustering(initMode, NGT::Clustering::ClusteringTypeKmeansWithoutNGT, maximumIteration); lowerClustering.kmeans(partialVectors, nPartialClusters[idx], lowerClusters[idx]); if (nPartialClusters[idx] != lowerClusters[idx].size()) { - std::cerr << "the sizes of cluster members are not consistent" << std::endl; + std::cerr << "the sizes of cluster members are not consistent" << std::endl; abort(); } } @@ -697,7 +697,7 @@ namespace QBG { std::cerr << "# of clusters=" << nc << " # of members=" << mc << std::endl; } void subclustering(std::vector &upperClusters, size_t numOfLowerClusters, size_t numOfObjects, - NGT::ObjectSpace &objectSpace, QBGObjectList &objectList, + NGT::ObjectSpace &objectSpace, QBGObjectList &objectList, NGT::Clustering::InitializationMode initMode, std::vector &flatLowerClusters, size_t maximumIteration = 1000) { @@ -708,9 +708,9 @@ namespace QBG { } -#else +#else static void subclustering(std::vector &upperClusters, size_t numOfLowerClusters, size_t numOfObjects, - NGT::ObjectSpace &objectSpace, QBGObjectList &objectList, + NGT::ObjectSpace &objectSpace, QBGObjectList &objectList, NGT::Clustering::InitializationMode initMode, std::vector> &lowerClusters, size_t maximumIteration = 1000) { @@ -724,7 +724,7 @@ namespace QBG { float ncsf = static_cast(upperClusters[idx].members.size()) / numOfRemainingVectors * (numOfRemainingClusters - (upperClusters.size() - idx)); - ncsf += 1.0; + ncsf += 1.0; int ncs = round(ncsf); numOfRemainingVectors -= upperClusters[idx].members.size(); numOfRemainingClusters -= ncs; @@ -800,7 +800,7 @@ namespace QBG { } if (cnt % ((upperClusters.size() < 20 ? 20 : upperClusters.size()) / 20) == 0) { timer.stop(); - std::cerr << "subclustering: " << cnt << " clusters (" + std::cerr << "subclustering: " << cnt << " clusters (" << (cnt * 100 / upperClusters.size()) << "%) have been processed. time=" << timer << std::endl; timer.restart(); } @@ -829,7 +829,7 @@ namespace QBG { } -#endif +#endif static void subclustering(std::vector &upperClusters, size_t numOfLowerClusters, size_t numOfObjects, NGT::Clustering::InitializationMode initMode, @@ -844,7 +844,7 @@ namespace QBG { float ncsf = static_cast(upperClusters[idx].members.size()) / numOfRemainingVectors * (numOfRemainingClusters - (upperClusters.size() - idx)); - ncsf += 1.0; + ncsf += 1.0; int ncs = round(ncsf); numOfRemainingVectors -= upperClusters[idx].members.size(); numOfRemainingClusters -= ncs; @@ -998,7 +998,7 @@ namespace QBG { } static float optimizeEpsilon(NGT::Index &index, size_t beginID, size_t endID, - size_t nOfObjects, + size_t nOfObjects, QBGObjectList &objectList, float expectedRecall, NGT::ObjectSpace &objectSpace) { std::cerr << "optimizeEpsilon: expectedRecall=" << expectedRecall << std::endl; @@ -1058,7 +1058,7 @@ namespace QBG { totalRecall += recall[id - beginID]; } totalRecall /= endID - beginID; - std::cerr << "Info: # of not exact results=" << notExactResultCount << " start epsilon=" << startEpsilon + std::cerr << "Info: # of not exact results=" << notExactResultCount << " start epsilon=" << startEpsilon << " current epsilon=" << epsilon << " total recall=" << totalRecall << std::endl; if (totalRecall >= expectedRecall) { break; @@ -1151,7 +1151,7 @@ namespace QBG { size_t nOfObjects = 20; NGT::Timer timer; timer.start(); - auto epsilon = optimizeEpsilon(index, beginID, endOfEval, nOfObjects, + auto epsilon = optimizeEpsilon(index, beginID, endOfEval, nOfObjects, objectList, expectedRecall, objectSpace); timer.stop(); std::cerr << "assignWithNGT: exploring epsilon. time=" << timer << " epsilon=" << epsilon << std::endl; @@ -1181,12 +1181,12 @@ namespace QBG { } if (cnt % ((endID - beginID) / 100) == 0) { timer.stop(); - std::cerr << "assignWithNGT: " << cnt << " objects (" + std::cerr << "assignWithNGT: " << cnt << " objects (" << (cnt * 100 / (endID - beginID)) << "%) have been assigned. time=" << timer << std::endl; timer.restart(); } } - } + } std::cerr << "pushing..." << std::endl; for (size_t id = beginID; id < endID; id++) { auto cid = clusterIDs[id - beginID].first; @@ -1278,7 +1278,7 @@ namespace QBG { void multiLayerClustering(QBG::Index &index, std::string prefix, std::string objectIDsFile); void clustering(std::string indexPath, std::string prefix = "", std::string objectIDsFile = ""); -#endif +#endif size_t maxSize; size_t numOfObjects; @@ -1303,4 +1303,4 @@ namespace QBG { float expectedRecall; bool verbose; }; -} +} diff --git a/lib/NGT/NGTQ/Matrix.h b/lib/NGT/NGTQ/Matrix.h index 042968a..18095d4 100644 --- a/lib/NGT/NGTQ/Matrix.h +++ b/lib/NGT/NGTQ/Matrix.h @@ -58,8 +58,8 @@ template class Matrix { ~Matrix() { delete[] matrix; } - Matrix &operator=(const Matrix &m) { - allocate(m.row, m.col); + Matrix &operator=(const Matrix &m) { + allocate(m.row, m.col); std::memcpy(matrix, m.matrix, row * col * sizeof(T)); return *this; } @@ -98,7 +98,7 @@ template class Matrix { bool isEmpty() { return (col == 0) && (row == 0); } - static void + static void tokenize(const std::string &str, std::vector &token, const std::string seps) { std::string::size_type current = 0; std::string::size_type next; @@ -181,7 +181,7 @@ template class Matrix { } } put(0, col, m); - col = nc; + col = nc; delete[] matrix; matrix = mtx; } @@ -200,7 +200,7 @@ template class Matrix { } } put(row, 0, m); - row = nr; + row = nr; delete[] matrix; matrix = mtx; } @@ -276,12 +276,12 @@ template class Matrix { mulBlas(m, true); } - + void mul(const Matrix &mtx) { mulBlas(mtx); } - + void mulBlas(const Matrix &mtx, bool transpose = false) { char transa = 'N'; char transb = 'N'; @@ -294,7 +294,7 @@ template class Matrix { std::cerr << "mul:" << row << "x" << mtx.row << std::endl; } assert(row == mtx.row); - n = mtx.row; + n = mtx.row; row = m; col = mtx.row; } else { @@ -317,13 +317,13 @@ template class Matrix { delete[] matrix; matrix = tmpmtx; } -#else +#else void mul(const Matrix &mtx) { mulNaive(mtx); } -#endif +#endif + - void mulNaive(const Matrix &mtx) { #ifdef MATRIX_TRACE cerr << row << "x" << col << " mtx=" << mtx.row << "x" << mtx.col << std::endl; @@ -508,7 +508,7 @@ template class Matrix { int v1 = 3 * min + max; int v2 = 5 * min; int lwork = v1 > v2 ? v1 : v2; - T work[lwork]; + T work[lwork]; Matrix sd; sd.allocate(m, 1); @@ -559,10 +559,10 @@ template class Matrix { mtmp = matrix[i + j * col]; printf("%5.2e", mtmp); if (j < col - 1) printf(", "); - } + } if (i < row - 1) printf("]; "); else printf("] "); - } + } printf("]"); std::cout << std::endl; } @@ -600,23 +600,23 @@ template class Matrix { extractVector(const std::string &str, std::vector &vec) { std::vector tokens; - tokenize(str, tokens, " \t"); + tokenize(str, tokens, " \t"); convert(tokens, vec); } #if !defined(NGT_DISABLE_BLAS) - static - void load(const std::string &file, Matrix &m) + static + void load(const std::string &file, Matrix &m) { loadVectors(file, m); m.transpose(); } - static - void loadVectors(const std::string &file, Matrix &m) + static + void loadVectors(const std::string &file, Matrix &m) #else - static - void load(const std::string &file, Matrix &m) + static + void load(const std::string &file, Matrix &m) #endif { std::ifstream is(file); @@ -630,7 +630,7 @@ template class Matrix { std::vector tmpv; while (getline(is, line)) { std::vector v; - extractVector(line, v); + extractVector(line, v); #if !defined(NGT_DISABLE_BLAS) if (row == 0) { row = v.size(); diff --git a/lib/NGT/NGTQ/ObjectFile.h b/lib/NGT/NGTQ/ObjectFile.h index da37d1b..a7c7db9 100644 --- a/lib/NGT/NGTQ/ObjectFile.h +++ b/lib/NGT/NGTQ/ObjectFile.h @@ -72,7 +72,7 @@ class ObjectFile : public ArrayFile { default: stringstream msg; msg << "ObjectFile::Invalid Object Type in the property. " << dataType; - NGTThrowException(msg); + NGTThrowException(msg); break; } return true; @@ -131,7 +131,7 @@ class ObjectFile : public ArrayFile { if (objectSpace == 0) { stringstream msg; msg << "ObjectFile::Fatal Error. objectSpace is not set." << std::endl; - NGTThrowException(msg); + NGTThrowException(msg); } NGT::Object *object = objectSpace->allocateObject(); if (!ArrayFile::get(id, *object, objectSpace)) { @@ -171,12 +171,12 @@ class ObjectFile : public ArrayFile { if (objectSpace == 0) { stringstream msg; msg << "ObjectFile::Fatal Error. objectSpace is not set." << std::endl; - NGTThrowException(msg); + NGTThrowException(msg); } if (objectSpace->getDimension() != data.size()) { stringstream msg; msg << "ObjectFile::Dimensions are inconsistency. " << objectSpace->getDimension() << ":" << data.size(); - NGTThrowException(msg); + NGTThrowException(msg); } NGT::Object *object = objectSpace->allocateObject(); const std::type_info &otype = objectSpace->getObjectType(); @@ -362,7 +362,7 @@ class StaticObjectFileLoader { }; -// constructor +// constructor template StaticObjectFile::StaticObjectFile() : _isOpen(false) { @@ -440,7 +440,7 @@ bool StaticObjectFile::open(const std::string &file, size_t pseudoDimensio } _stream.open(_objectPath, std::ios::in); if(!_stream){ - _isOpen = false; + _isOpen = false; return false; } _isOpen = true; @@ -449,12 +449,12 @@ bool StaticObjectFile::open(const std::string &file, size_t pseudoDimensio if (_fileHead.noOfObjects != noOfObjects) { stringstream msg; msg << "Invalid # of objects=" << _fileHead.noOfObjects << ":" << noOfObjects; - NGTThrowException(msg); + NGTThrowException(msg); } if (_fileHead.noOfDimensions != noOfDimensions) { stringstream msg; msg << "Invalid # of dimensions=" << _fileHead.noOfDimensions << ":" << noOfDimensions; - NGTThrowException(msg); + NGTThrowException(msg); } _recordSize = _sizeOfElement * _fileHead.noOfDimensions; return ret; @@ -539,7 +539,7 @@ bool StaticObjectFile::get(size_t id, std::vector &data, NGT::Objec } //uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); uint64_t offset_pos = id * _recordSize + sizeof(FileHeadStruct); - //offset_pos += sizeof(RecordStruct); + //offset_pos += sizeof(RecordStruct); _stream.seekg(offset_pos, std::ios::beg); if (!_stream.fail()) { switch (_type) { @@ -600,7 +600,7 @@ size_t StaticObjectFile::size() int64_t offset_pos = _stream.tellg(); offset_pos -= sizeof(FileHeadStruct); size_t num = offset_pos / _recordSize; - num++; + num++; return num; } diff --git a/lib/NGT/NGTQ/Optimizer.cpp b/lib/NGT/NGTQ/Optimizer.cpp index fd29e73..1a02db2 100644 --- a/lib/NGT/NGTQ/Optimizer.cpp +++ b/lib/NGT/NGTQ/Optimizer.cpp @@ -120,7 +120,7 @@ void QBG::Optimizer::evaluate(string global, vector> &vectors, cha } Matrix R; - Matrix::load(ofile + QBG::Index::getRotationFile(), R); + Matrix::load(ofile + QBG::Index::getRotationFile(), R); vector> qv(vectors.size()); // quantized vector vector> xp; // residual vector if (residualVectors.empty()) { @@ -191,7 +191,7 @@ void QBG::Optimizer::evaluate(string global, vector> &vectors, cha #endif cout << "distortion=" << distortion << endl; return; -#endif +#endif } #endif @@ -203,7 +203,7 @@ void QBG::Optimizer::evaluate(vector> &vectors, string &ofile, siz #else cerr << "Evaluate" << endl; Matrix R; - Matrix::load(ofile + QBG::Index::getRotationFile(), R); + Matrix::load(ofile + QBG::Index::getRotationFile(), R); vector> xp = vectors; Matrix::mulSquare(xp, R); for (size_t m = 0; m < numberOfSubvectors; m++) { @@ -234,7 +234,7 @@ void QBG::Optimizer::evaluate(vector> &vectors, string &ofile, siz } } return; -#endif +#endif } #endif @@ -262,7 +262,7 @@ void QBG::Optimizer::optimize(const std::string indexPath, size_t threadSize) { msg << "optimize: # of clusters is illegal. " << index.getQuantizer().property.localCentroidLimit << ":" << numberOfClusters; NGTThrowException(msg); } - if (index.getQuantizer().property.localCentroidLimit != 0 && numberOfClusters != 0 && + if (index.getQuantizer().property.localCentroidLimit != 0 && numberOfClusters != 0 && index.getQuantizer().property.localCentroidLimit != numberOfClusters) { std::stringstream msg; msg << "optimize: # of clusters is already specified. " << index.getQuantizer().property.localCentroidLimit << ":" << numberOfClusters; @@ -353,7 +353,7 @@ void QBG::Optimizer::optimize(const std::string indexPath, size_t threadSize) { redirector.end(); } -#endif +#endif #ifdef NGTQ_QBG void QBG::Optimizer::optimizeWithinIndex(std::string indexPath) { @@ -372,7 +372,7 @@ void QBG::Optimizer::optimizeWithinIndex(std::string indexPath) { pq += "/"; optimize(object, pq, global); } -#endif +#endif @@ -459,11 +459,11 @@ void QBG::Optimizer::optimize(vector> &vectors, vector> &qvectors, vector &clusters) + extractQuantizedVector(vector> &qvectors, vector &clusters) #else - extractQuantizedVector(vector> &qvectors, vector &clusters) + extractQuantizedVector(vector> &qvectors, vector &clusters) #endif { for (size_t cidx = 0; cidx < clusters.size(); ++cidx) { @@ -174,7 +174,7 @@ namespace QBG { abort(); } } -#endif +#endif } void @@ -232,11 +232,11 @@ namespace QBG { vector> localClusters(numberOfSubvectors); #endif vector> subQuantizedVectors[numberOfSubvectors]; -#define ERROR_CALCULATION +#define ERROR_CALCULATION #ifdef ERROR_CALCULATION vector subvectorDistances(numberOfSubvectors); #endif -#pragma omp parallel for +#pragma omp parallel for for (size_t m = 0; m < numberOfSubvectors; m++) { vector> subVectors; extractSubvector(xp, subVectors, m * subvectorSize, subvectorSize); @@ -310,7 +310,7 @@ namespace QBG { } } - void optimize(vector> &vectors, + void optimize(vector> &vectors, Matrix &reposition, vector> &rs, vector>> &localClusters, @@ -366,7 +366,7 @@ namespace QBG { void optimizeWithinIndex(std::string indexPath); void optimize(std::string invector, std::string ofile, std::string global); void optimize(vector> &vectors, vector> &globalCentroid, Matrix &r, vector> &localClusters, vector &errors); -#endif +#endif NGT::Timer timelimitTimer; size_t subvectorSize; diff --git a/lib/NGT/NGTQ/QbgCli.cpp b/lib/NGT/NGTQ/QbgCli.cpp index 81d03a8..ff75897 100644 --- a/lib/NGT/NGTQ/QbgCli.cpp +++ b/lib/NGT/NGTQ/QbgCli.cpp @@ -144,9 +144,9 @@ class QbgCliBuildParameters : public QBG::BuildParameters { } void getHierarchicalClustringParameters() { - hierarchicalClustering.maxSize = args.getl("r", 1000); + hierarchicalClustering.maxSize = args.getl("r", 1000); hierarchicalClustering.numOfObjects = args.getl("O", 0); - hierarchicalClustering.numOfClusters = args.getl("E", 2); + hierarchicalClustering.numOfClusters = args.getl("E", 2); try { hierarchicalClustering.numOfTotalClusters = args.getl("C", 0); } catch (...) { @@ -317,8 +317,8 @@ class QbgCliBuildParameters : public QBG::BuildParameters { optimization.seedNumberOfSteps = args.getf("S", 2); optimization.seedStep = args.getl("X", 10); optimization.reject = args.getf("R", 0.9); - optimization.timelimit = args.getf("L", 24 * 1); - optimization.timelimit *= 60.0 * 60.0; + optimization.timelimit = args.getf("L", 24 * 1); + optimization.timelimit *= 60.0 * 60.0; optimization.showClusterInfo = args.getBool("Z"); optimization.verbose = args.getBool("v"); @@ -379,7 +379,7 @@ class SearchParameters : public NGT::Command::SearchParameters { }; -void +void QBG::CLI::buildQG(NGT::Args &args) { const std::string usage = "Usage: qbg build-qg [-Q dimension-of-subvector] [-E max-number-of-edges] index"; @@ -438,8 +438,8 @@ searchQG(NGTQG::Index &index, SearchParameters &searchParameters, ostream &strea return; } - if (searchParameters.outputMode[0] == 'e') { - stream << "# Beginning of Evaluation" << endl; + if (searchParameters.outputMode[0] == 'e') { + stream << "# Beginning of Evaluation" << endl; } string line; @@ -527,7 +527,7 @@ searchQG(NGTQG::Index &index, SearchParameters &searchParameters, ostream &strea } else { stream << "Query Time= " << timer.time << " (sec), " << timer.time * 1000.0 << " (msec)" << endl; } - } + } if (searchParameters.outputMode[0] == 'e') { stream << "# End of Query" << endl; } @@ -537,8 +537,8 @@ searchQG(NGTQG::Index &index, SearchParameters &searchParameters, ostream &strea stream << "# Number of queries=" << queryCount << endl; stream << "# End of Evaluation" << endl; } else { - stream << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " - << totalTime * 1000.0 / (double)queryCount << " (msec), (" + stream << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " + << totalTime * 1000.0 / (double)queryCount << " (msec), (" << totalTime << "/" << queryCount << ")" << endl; } } @@ -561,14 +561,14 @@ QBG::CLI::searchQG(NGT::Args &args) { SearchParameters searchParameters(args); - bool readOnly = true; + bool readOnly = true; NGTQG::Index index(indexPath, 128, readOnly); if (debugLevel >= 1) { cerr << "indexType=" << searchParameters.indexType << endl; cerr << "size=" << searchParameters.size << endl; cerr << "edgeSize=" << searchParameters.edgeSize << endl; - cerr << "epsilon=" << searchParameters.beginOfEpsilon << "<->" << searchParameters.endOfEpsilon << "," + cerr << "epsilon=" << searchParameters.beginOfEpsilon << "<->" << searchParameters.endOfEpsilon << "," << searchParameters.stepOfEpsilon << endl; } @@ -588,7 +588,7 @@ QBG::CLI::searchQG(NGT::Args &args) { } -void +void QBG::CLI::createQG(NGT::Args &args) { const std::string usage = "Usage: qbg create-qg [-Q dimension-of-subvector] index"; @@ -609,7 +609,7 @@ QBG::CLI::createQG(NGT::Args &args) NGTQG::Index::append(indexPath, buildParameters); } -void +void QBG::CLI::appendQG(NGT::Args &args) { const std::string usage = "Usage: qbg append-qbg ngt-index"; @@ -625,7 +625,7 @@ QBG::CLI::appendQG(NGT::Args &args) } -void +void QBG::CLI::info(NGT::Args &args) { const string usage = "Usage: qbg index"; @@ -639,7 +639,7 @@ QBG::CLI::info(NGT::Args &args) return; } - try { + try { bool readOnly = true; try { QBG::Index index(indexPath, readOnly); @@ -665,7 +665,7 @@ QBG::CLI::info(NGT::Args &args) } -void +void QBG::CLI::create(NGT::Args &args) { const string usage = "Usage: qbg create " @@ -818,17 +818,17 @@ QBG::CLI::search(NGT::Args &args) string str = args.getString("p", "0.0"); vector tokens; NGT::Common::tokenize(str, tokens, ":"); - if (tokens.size() >= 1) { + if (tokens.size() >= 1) { beginOfResultExpansion = NGT::Common::strtod(tokens[0]); endOfResultExpansion = beginOfResultExpansion; } if (tokens.size() >= 2) { endOfResultExpansion = NGT::Common::strtod(tokens[1]); } - if (tokens.size() >= 3) { + if (tokens.size() >= 3) { if (tokens[2][0] == 'x') { mulStep = true; - stepOfResultExpansion = NGT::Common::strtod(tokens[2].substr(1)); + stepOfResultExpansion = NGT::Common::strtod(tokens[2].substr(1)); } else { - stepOfResultExpansion = NGT::Common::strtod(tokens[2]); + stepOfResultExpansion = NGT::Common::strtod(tokens[2]); } } } @@ -863,9 +863,9 @@ QBG::CLI::search(NGT::Args &args) } queryVector.resize(dimension); queryCount++; - for (auto resultExpansion = beginOfResultExpansion; - resultExpansion <= endOfResultExpansion; - resultExpansion = mulStep ? resultExpansion * stepOfResultExpansion : + for (auto resultExpansion = beginOfResultExpansion; + resultExpansion <= endOfResultExpansion; + resultExpansion = mulStep ? resultExpansion * stepOfResultExpansion : resultExpansion + stepOfResultExpansion) { NGT::ObjectDistances objects; QBG::SearchContainer searchContainer; @@ -925,19 +925,19 @@ QBG::CLI::search(NGT::Args &args) } else { cout << "Query Time= " << timer.time << " (sec), " << timer.time * 1000.0 << " (msec)" << endl; } - } + } if (outputMode == 'e') { cout << "# End of Query" << endl; } - } + } queryTimes.push_back(totalTime * 1000.0 / static_cast(queryCount)); if (outputMode == 'e') { cout << "# Average Query Time (msec)=" << queryTimes.back() << endl; cout << "# Number of queries=" << queryCount << endl; cout << "# End of Evaluation" << endl; } else { - cout << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " - << totalTime * 1000.0 / (double)queryCount << " (msec), (" + cout << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " + << totalTime * 1000.0 / (double)queryCount << " (msec), (" << totalTime << "/" << queryCount << ")" << endl; } } @@ -961,7 +961,7 @@ QBG::CLI::search(NGT::Args &args) } -void +void QBG::CLI::append(NGT::Args &args) { const string usage = "Usage: qbg append [-n data-size] [-m b|e] [-v] index(output) data.tsv(input)"; @@ -1012,7 +1012,7 @@ QBG::CLI::append(NGT::Args &args) } -void +void QBG::CLI::buildIndex(NGT::Args &args) { const std::string usage = "Usage: qbg build-index [-Q dimension-of-subvector] [-E max-number-of-edges] index"; @@ -1151,7 +1151,7 @@ QBG::CLI::buildIndex(NGT::Args &args) } -void +void QBG::CLI::build(NGT::Args &args) { const std::string usage = "Usage: qbg build [-Q dimension-of-subvector] [-E max-number-of-edges] index"; @@ -1177,7 +1177,7 @@ QBG::CLI::build(NGT::Args &args) vector tokens; NGT::Common::tokenize(phaseString, tokens, "-"); int beginOfPhase, endOfPhase; - if (tokens.size() >= 1) { + if (tokens.size() >= 1) { if (tokens[0].empty()) { beginOfPhase = endOfPhase = 0; } else { @@ -1370,7 +1370,7 @@ QBG::CLI::assign(NGT::Args &args) void QBG::CLI::extract(NGT::Args &args) -{ +{ const string usage = "Usage: qbg extract binary-file|index [output-file]"; @@ -1475,9 +1475,9 @@ QBG::CLI::extract(NGT::Args &args) return; } -void +void QBG::CLI::gt(NGT::Args &args) -{ +{ string path; try { @@ -1537,9 +1537,9 @@ QBG::CLI::gt(NGT::Args &args) } -void +void QBG::CLI::gtRange(NGT::Args &args) -{ +{ string path; try { @@ -1649,4 +1649,4 @@ QBG::CLI::optimize(NGT::Args &args) } -#endif +#endif diff --git a/lib/NGT/NGTQ/QuantizedBlobGraph.h b/lib/NGT/NGTQ/QuantizedBlobGraph.h index b5dd01d..0491547 100644 --- a/lib/NGT/NGTQ/QuantizedBlobGraph.h +++ b/lib/NGT/NGTQ/QuantizedBlobGraph.h @@ -66,7 +66,7 @@ namespace QBG { verbose = false; } - static void setProperties(CreationParameters &creation, NGTQ::Property &property, NGT::Property &globalProperty, + static void setProperties(CreationParameters &creation, NGTQ::Property &property, NGT::Property &globalProperty, NGT::Property &localProperty) { property.threadSize = creation.threadSize; property.globalCentroidLimit = 0; @@ -256,7 +256,7 @@ namespace QBG { optimization.setDefault(); } - void setProperties(NGTQ::Property &property, NGT::Property &globalProperty, + void setProperties(NGTQ::Property &property, NGT::Property &globalProperty, NGT::Property &localProperty) { CreationParameters::setProperties(creation, property, globalProperty, localProperty); } @@ -336,8 +336,8 @@ namespace QBG { quantizedIndex.getQuantizer().eraseInvertedIndexObject(gid); NGTQ::QuantizedObjectProcessingStream quantizedStream(quantizedIndex.getQuantizer().divisionNo, invertedIndexObjects.size()); rearrange(invertedIndexObjects, (*this)[gid], quantizedStream); - } -#endif + } +#endif } static void rearrange(NGTQ::InvertedIndexEntry &invertedIndexObjects, NGTQG::QuantizedNode &rearrangedObjects, NGTQ::QuantizedObjectProcessingStream &quantizedStream) { @@ -347,20 +347,20 @@ namespace QBG { for (size_t idx = 0; idx < invertedIndexObjects.numOfSubvectors; idx++) { #ifdef NGTQ_UINT8_LUT #ifdef NGTQ_SIMD_BLOCK_SIZE - size_t dataNo = oidx; + size_t dataNo = oidx; #if defined(NGT_SHARED_MEMORY_ALLOCATOR) abort(); #else quantizedStream.arrangeQuantizedObject(dataNo, idx, invertedIndexObjects[oidx].localID[idx] - 1); #endif -#else +#else objectData[idx * noobjs + dataNo] = invertedIndexObjects[oidx].localID[idx] - 1; -#endif -#else +#endif +#else objectData[idx * noobjs + dataNo] = invertedIndexObjects[oidx].localID[idx]; -#endif +#endif } - } + } rearrangedObjects.subspaceID = invertedIndexObjects.subspaceID; rearrangedObjects.objects = quantizedStream.compressIntoUint4(); @@ -386,7 +386,7 @@ namespace QBG { rearrange(invertedIndexObjects, rearrangedObjects, quantizedStream); } -#endif +#endif } static void rearrange(NGTQ::QuantizedObjectSet &quantizedObjects, NGTQG::QuantizedNode &rearrangedObjects) { @@ -422,12 +422,12 @@ namespace QBG { bool &getVerbose() { return verbose; } #ifdef NGTQ_QBG - static void create(const std::string &index, + static void create(const std::string &index, BuildParameters &buildParameters, std::vector *rotation = 0,const std::string objectFile = "") { create(index, buildParameters.creation, rotation, objectFile); } - static void create(const std::string &index, + static void create(const std::string &index, CreationParameters &creation, std::vector *rotation = 0,const std::string objectFile = "") { NGTQ::Property property; @@ -438,16 +438,16 @@ namespace QBG { NGTQ::Index::create(index, property, globalProperty, localProperty, rotation, objectFile); } #endif -#ifdef NGTQ_QBG +#ifdef NGTQ_QBG static void initialize(NGTQ::Property &property, NGT::Property &globalProperty,NGT::Property &localProperty) { QBG::CreationParameters params; QBG::CreationParameters::setProperties(params, property, globalProperty, localProperty); } #endif - static void create(const std::string &index, NGTQ::Property &property, + static void create(const std::string &index, NGTQ::Property &property, NGT::Property &globalProperty, -#ifdef NGTQ_QBG +#ifdef NGTQ_QBG NGT::Property &localProperty, std::vector *rotation, const std::string &objectFile) { @@ -578,7 +578,7 @@ namespace QBG { redirector.end(); } - float getApproximateDistances(std::vector &query, NGTQG::RearrangedQuantizedObjectSet &quantizedObjects, + float getApproximateDistances(std::vector &query, NGTQG::RearrangedQuantizedObjectSet &quantizedObjects, size_t subspaceID, std::vector &distances) { if (query.empty()) { NGTThrowException("The specified query is empty."); @@ -606,7 +606,7 @@ namespace QBG { return minDistance; } - void getApproximateDistances(std::vector &query, NGTQ::QuantizedObjectSet &quantizedObjects, + void getApproximateDistances(std::vector &query, NGTQ::QuantizedObjectSet &quantizedObjects, size_t subspaceID, std::vector &distances) { if (query.empty()) { NGTThrowException("The specified query is empty."); @@ -752,7 +752,7 @@ namespace QBG { #ifdef NGTQG_ZERO_GLOBAL NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 lut; quantizedObjectDistance.initialize(lut); - quantizedObjectDistance.createDistanceLookup(*query, 1, lut); + quantizedObjectDistance.createDistanceLookup(*query, 1, lut); #else #if defined(NGTQG_ROTATION) quantizedObjectDistance.rotation->mul(static_cast(query->getPointer())); @@ -817,7 +817,7 @@ namespace QBG { quantizedObjectDistance.createDistanceLookup(*query, subspaceID, (*luti).second); } std::tie(distance, radius) = judge(quantizedBlobGraph[neighborID], k, radius, (*luti).second, result, foundCount); -#endif +#endif if (static_cast(foundCount) / visitCount < searchContainer.cutback) { uncheckedBlobs = NGT::NeighborhoodGraph::UncheckedSet(); break; @@ -828,7 +828,7 @@ namespace QBG { } } - if (searchContainer.resultIsAvailable()) { + if (searchContainer.resultIsAvailable()) { searchContainer.getResult().clear(); searchContainer.getResult().moveFrom(result); } else { @@ -838,7 +838,7 @@ namespace QBG { } - static void refineDistances(QBG::SearchContainer &searchContainer, NGTQ::Quantizer &quantizer, + static void refineDistances(QBG::SearchContainer &searchContainer, NGTQ::Quantizer &quantizer, NGT::NeighborhoodGraph::ResultSet &result, NGT::ObjectDistances &qresults) { auto &objectSpace = quantizer.globalCodebookIndex.getObjectSpace(); @@ -988,7 +988,7 @@ namespace QBG { auto subspaceID = quantizedBlobGraph[blobID].subspaceID; quantizedObjectDistance.createDistanceLookup(rotatedQuery, subspaceID, lookupTable); NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut = lookupTable; -#else +#else { auto blobID = blobs[idx].id; auto subspaceID = quantizedBlobGraph[blobID].subspaceID; @@ -1000,7 +1000,7 @@ namespace QBG { quantizedObjectDistance.createDistanceLookup(rotatedQuery.data(), subspaceID, (*luti).second); } NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut = (*luti).second; -#endif +#endif NGT::Distance bd; std::tie(bd, radius) = judge(quantizedBlobGraph[blobID], k, radius, lut, result, foundCount); @@ -1012,9 +1012,9 @@ namespace QBG { #endif } #ifdef NGTQBG_MIN -#endif +#endif } - if (searchContainer.resultIsAvailable()) { + if (searchContainer.resultIsAvailable()) { if (searchContainer.exactResultSize > 0) { NGT::ObjectDistances &qresults = searchContainer.getResult(); refineDistances(searchContainer, quantizer, result, qresults); @@ -1135,7 +1135,7 @@ namespace QBG { auto subspaceID = quantizedBlobGraph[blobID].subspaceID; quantizedObjectDistance.createDistanceLookup(rotatedQuery.data(), subspaceID, lookupTable); NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut = lookupTable; -#else +#else { auto blobID = currentNearestBlob.id; auto subspaceID = quantizedBlobGraph[blobID].subspaceID; @@ -1147,10 +1147,10 @@ namespace QBG { quantizedObjectDistance.createDistanceLookup(rotatedQuery.data(), subspaceID, (*luti).second); } NGTQ::QuantizedObjectDistance::DistanceLookupTableUint8 &lut = (*luti).second; -#endif +#endif size_t foundCount; NGT::Distance bd; - std::tie(bd, radius) = judge(quantizedBlobGraph[blobID], requestedSize, + std::tie(bd, radius) = judge(quantizedBlobGraph[blobID], requestedSize, radius, lut, results, foundCount); #ifdef NGTQBG_COARSE_BLOB if (bd < blobDistance) { @@ -1165,7 +1165,7 @@ namespace QBG { if (blobDistance > radius * searchContainer.explorationCoefficient) { break; } -#endif +#endif if (explorationSize > searchContainer.graphExplorationSize) { break; } @@ -1214,7 +1214,7 @@ namespace QBG { } distanceChecked.insert((*(neighborptr)).first); #else - neighborptr = nsPtrs[idx]; + neighborptr = nsPtrs[idx]; if (idx + prefetchOffset < nsPtrsSize) { unsigned char *ptr = reinterpret_cast((*(nsPtrs[idx + prefetchOffset])).second); NGT::MemoryCache::prefetch(ptr, prefetchSize); @@ -1234,10 +1234,10 @@ namespace QBG { } else { discardedObjects.push(r); } - } - } + } + } - if (searchContainer.resultIsAvailable()) { + if (searchContainer.resultIsAvailable()) { if (searchContainer.exactResultSize > 0) { NGT::ObjectDistances &qresults = searchContainer.getResult(); refineDistances(searchContainer, quantizer, results, qresults); @@ -1251,7 +1251,7 @@ namespace QBG { searchContainer.workingResult = std::move(results); } } -#endif +#endif } void search(QBG::SearchContainer &searchContainer) { @@ -1270,7 +1270,7 @@ namespace QBG { } static void buildNGTQ(const std::string &indexPath, bool verbose = false) { - load(indexPath, QBG::Index::getQuantizerCodebookFile(indexPath), "", ""); + load(indexPath, QBG::Index::getQuantizerCodebookFile(indexPath), "", ""); buildNGTQ(indexPath, "", "-", "-", 1, 0, verbose); if (verbose) { std::cerr << "NGTQ and NGTQBG indices are completed." << std::endl; @@ -1280,7 +1280,7 @@ namespace QBG { } static void build(const std::string &indexPath, bool verbose = false) { - load(indexPath, "", "", ""); + load(indexPath, "", "", ""); buildNGTQ(indexPath, "", "", "", 1, 0, verbose); buildQBG(indexPath, verbose); if (verbose) { @@ -1443,13 +1443,13 @@ namespace QBG { if (s == 0) { const string comrmdir = "rm -rf " + indexPath + "/" + getWorkspaceName(); if (system(comrmdir.c_str()) == -1) { - std::cerr << "Warning. cannot remove the workspace directory. " + std::cerr << "Warning. cannot remove the workspace directory. " << comrmdir << std::endl; } } const string comrm = "rm -f " + indexPath + "/" + NGTQ::Quantizer::getInvertedIndexFile(); if (system(comrm.c_str()) == -1) { - std::cerr << "Warning. cannot remove the indeverted index. " + std::cerr << "Warning. cannot remove the indeverted index. " << comrm << std::endl; } } @@ -1617,7 +1617,7 @@ namespace QBG { char ngtDir[strlen(ngtDirString) + 1]; strcpy(ngtDir, ngtDirString); std::string tmpDir = mkdtemp(ngtDir); - const std::string mvcom = "mv " + indexPath + "/" + NGTQ::Quantizer::getGlobalFile() + const std::string mvcom = "mv " + indexPath + "/" + NGTQ::Quantizer::getGlobalFile() + " " + tmpDir + "/"; if (system(mvcom.c_str()) == -1) { std::stringstream msg; @@ -1625,7 +1625,7 @@ namespace QBG { NGTThrowException(msg); } - NGT::Index::append(tmpDir + "/" + NGTQ::Quantizer::getGlobalFile(), blobs, threadSize, dataSize); + NGT::Index::append(tmpDir + "/" + NGTQ::Quantizer::getGlobalFile(), blobs, threadSize, dataSize); auto unlog = false; NGT::GraphOptimizer graphOptimizer(unlog); @@ -1748,6 +1748,6 @@ namespace QBG { }; -} +} -#endif +#endif diff --git a/lib/NGT/NGTQ/QuantizedGraph.cpp b/lib/NGT/NGTQ/QuantizedGraph.cpp index 189399a..7bcb91b 100644 --- a/lib/NGT/NGTQ/QuantizedGraph.cpp +++ b/lib/NGT/NGTQ/QuantizedGraph.cpp @@ -77,4 +77,4 @@ void NGTQG::Index::create(const std::string indexPath, QBG::BuildParameters &bui void NGTQG::Index::append(const std::string indexPath, QBG::BuildParameters &buildParameters) { QBG::Index::appendFromObjectRepository(indexPath, indexPath + "/qg", buildParameters.verbose); } -#endif +#endif diff --git a/lib/NGT/NGTQ/QuantizedGraph.h b/lib/NGT/NGTQ/QuantizedGraph.h index 5fa8a0c..a855da2 100644 --- a/lib/NGT/NGTQ/QuantizedGraph.h +++ b/lib/NGT/NGTQ/QuantizedGraph.h @@ -112,7 +112,7 @@ namespace NGTQG { for (size_t idx = 0; idx < numOfSubspaces; idx++) { #ifdef NGT_SHARED_MEMORY_ALLOCATOR #else - size_t dataNo = distance(node.begin(), i); + size_t dataNo = distance(node.begin(), i); #endif #if defined(NGT_SHARED_MEMORY_ALLOCATOR) abort(); @@ -124,7 +124,7 @@ namespace NGTQG { quantizedStream.arrangeQuantizedObject(dataNo, idx, invertedIndexObjects[(*i).id].localID[idx] - 1); #endif } - } + } (*this)[id].objects = quantizedStream.compressIntoUint4(); @@ -139,7 +139,7 @@ namespace NGTQG { NGT::Serializer::write(os, n); for (auto i = PARENT::begin(); i != PARENT::end(); ++i) { uint32_t sid = (*i).subspaceID; - NGT::Serializer::write(os, sid); + NGT::Serializer::write(os, sid); NGT::Serializer::write(os, (*i).ids); size_t streamSize = quantizedObjectProcessingStream.getUint4StreamSize((*i).ids.size()); NGT::Serializer::write(os, static_cast((*i).objects), streamSize); @@ -204,7 +204,7 @@ namespace NGTQG { readOnly(rdOnly), path(indexPath), quantizedIndex(indexPath + "/qg", rdOnly), - quantizedGraph(quantizedIndex) + quantizedGraph(quantizedIndex) { { struct stat st; @@ -284,11 +284,11 @@ namespace NGTQG { uint8_t *lid = static_cast(quantizedGraph.get(target.id)); size_t size = ((neighborSize - 1) / (NGTQ_SIMD_BLOCK_SIZE * NGTQ_BATCH_SIZE) + 1) * (NGTQ_SIMD_BLOCK_SIZE * NGTQ_BATCH_SIZE); size /= 2; - size *= quantizedIndex.getQuantizer().divisionNo; + size *= quantizedIndex.getQuantizer().divisionNo; NGT::MemoryCache::prefetch(lid, size); } -#endif +#endif #ifdef NGTQ_QBG quantizedObjectDistance(quantizedGraph.get(target.id), ds, neighborSize, cache[0]); #else @@ -317,12 +317,12 @@ namespace NGTQG { sc.radius = results.top().distance; explorationRadius = sc.explorationCoefficient * sc.radius; } - } - } + } + } } - } + } - if (sc.resultIsAvailable()) { + if (sc.resultIsAvailable()) { NGT::ObjectDistances &qresults = sc.getResult(); qresults.moveFrom(results); if (sc.resultExpansion >= 1.0) { @@ -332,7 +332,7 @@ namespace NGTQG { for (auto i = qresults.begin(); i != qresults.end(); ++i) { #ifdef NGTQG_PREFETCH if (static_cast(distance(qresults.begin(), i + 10)) < qresults.size()) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) NGT::PersistentObject &o = *objectRepository.get((*(i + 10)).id); #else NGT::Object &o = *objectRepository[(*(i + 10)).id]; @@ -507,7 +507,7 @@ namespace NGTQG { quantizedIndex.save(); quantizedIndex.close(); } -#endif +#endif #ifdef NGTQ_QBG static void createQuantizedGraphFrame(const std::string quantizedIndexPath, size_t dimension, size_t pseudoDimension, size_t dimensionOfSubvector) { @@ -525,16 +525,16 @@ namespace NGTQG { property.distanceType = NGTQ::DistanceType::DistanceTypeL2; property.singleLocalCodebook = false; property.batchSize = 1000; - property.centroidCreationMode = NGTQ::CentroidCreationModeStatic; + property.centroidCreationMode = NGTQ::CentroidCreationModeStatic; #ifdef NGTQ_QBG - property.localCentroidCreationMode = NGTQ::CentroidCreationModeStatic; + property.localCentroidCreationMode = NGTQ::CentroidCreationModeStatic; #else - property.localCentroidCreationMode = NGTQ::CentroidCreationModeDynamicKmeans; + property.localCentroidCreationMode = NGTQ::CentroidCreationModeDynamicKmeans; #endif - property.globalCentroidLimit = 1; - property.localCentroidLimit = 16; - property.localClusteringSampleCoefficient = 100; + property.globalCentroidLimit = 1; + property.localCentroidLimit = 16; + property.localClusteringSampleCoefficient = 100; #ifdef NGTQ_QBG property.genuineDimension = dimension; if (pseudoDimension == 0) { @@ -579,13 +579,13 @@ namespace NGTQG { } if (ngtProperty.dimension > static_cast(pseudoDimension)) { std::stringstream msg; - msg << "QuantizedGraph::quantize: the specified pseudo dimension is smaller than the genuine dimension. " + msg << "QuantizedGraph::quantize: the specified pseudo dimension is smaller than the genuine dimension. " << ngtProperty.dimension << ":" << pseudoDimension << std::endl; NGTThrowException(msg); } if (pseudoDimension % 4 != 0) { std::stringstream msg; - msg << "QuantizedGraph::quantize: the specified pseudo dimension should be a multiple of 4. " + msg << "QuantizedGraph::quantize: the specified pseudo dimension should be a multiple of 4. " << pseudoDimension << std::endl; NGTThrowException(msg); } @@ -619,7 +619,7 @@ namespace NGTQG { } redirector.end(); } -#else +#else static void quantize(const std::string indexPath, float dimensionOfSubvector, size_t maxNumOfEdges, bool verbose = false) { NGT::StdOstreamRedirector redirector(!verbose); redirector.begin(); @@ -640,7 +640,7 @@ namespace NGTQG { } redirector.end(); } -#endif +#endif const bool readOnly; const std::string path; @@ -649,8 +649,8 @@ namespace NGTQG { QuantizedGraphRepository quantizedGraph; - }; + }; -} +} -#endif +#endif diff --git a/lib/NGT/NGTQ/Quantizer.h b/lib/NGT/NGTQ/Quantizer.h index c30f6db..978cc9e 100644 --- a/lib/NGT/NGTQ/Quantizer.h +++ b/lib/NGT/NGTQ/Quantizer.h @@ -37,18 +37,18 @@ #define NGTQBG_MIN //#define NGTQBG_COARSE_BLOB -#define NGTQ_USING_ONNG +#define NGTQ_USING_ONNG #define MULTIPLE_OBJECT_LISTS #define NGTQG_ROTATION -#define NGTQ_BLAS_FOR_ROTATION -#define NGTQG_ROTATED_GLOBAL_CODEBOOKS +#define NGTQ_BLAS_FOR_ROTATION +#define NGTQG_ROTATED_GLOBAL_CODEBOOKS #define NGTQ_OBJECT_IN_MEMORY #define NGTQ_UINT8_LUT #define NGTQ_SIMD_BLOCK_SIZE 16 #define NGTQ_BATCH_SIZE 2 -#define NGTQ_UINT4_OBJECT +#define NGTQ_UINT4_OBJECT #define NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION #define NGTQG_PREFETCH #if defined(NGT_AVX512) @@ -255,7 +255,7 @@ class QuantizationCodebook : public std::vector { property.dimension = dimension; property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; #ifdef NGTQ_SHARED_INVERTED_INDEX - index = new NGT::Index("dummy", property); + index = new NGT::Index("dummy", property); std::cerr << "Not implemented" << std::endl; abort(); #else @@ -267,7 +267,7 @@ class QuantizationCodebook : public std::vector { if ((idx + 1) % 100000 == 0) { std::cerr << "QuantizationCodebook::buildIndex processed objects=" << idx << std::endl; } - index->append(data(idx), 1); + index->append(data(idx), 1); } index->createIndex(50); } @@ -485,11 +485,11 @@ class InvertedIndexEntry : public NGT::DynamicLengthVector(), allocator); - PARENT::back(allocator).clear(numOfSubvectors); + PARENT::back(allocator).clear(numOfSubvectors); } - void pushBack(size_t id, SharedMemoryAllocator &allocator) { + void pushBack(size_t id, SharedMemoryAllocator &allocator) { pushBack(allocator); PARENT::back(allocator).setID(id); } @@ -510,11 +510,11 @@ class InvertedIndexEntry : public NGT::DynamicLengthVector()); PARENT::back().clear(numOfSubvectors); } - void pushBack(size_t id) { + void pushBack(size_t id) { pushBack(); PARENT::back().setID(id); } @@ -558,7 +558,7 @@ class InvertedIndexEntry : public NGT::DynamicLengthVector(PARENT::vector), sz * PARENT::elementSize); } @@ -609,7 +609,7 @@ class InvertedIndexEntry : public NGT::DynamicLengthVector dummy; @@ -663,7 +663,7 @@ class LocalDatam { LocalDatam(size_t iii, size_t iil) : iiIdx(iii), iiLocalIdx(iil) {} #endif size_t iiIdx; - size_t iiLocalIdx; + size_t iiLocalIdx; #ifdef NGTQ_QBG uint32_t subspaceID; #endif @@ -776,7 +776,7 @@ class SerializableObject : public NGT::Object { } void setupLocalIDByteSize() { - if (localCentroidLimit > 0xffff - 1) { + if (localCentroidLimit > 0xffff - 1) { if (localIDByteSize == 2) { NGTThrowException("NGTQ::Property: The localIDByteSize is illegal for the localCentroidLimit."); } @@ -866,7 +866,7 @@ class SerializableObject : public NGT::Object { default: NGTThrowException("Quantizer constructor: Inner error. Invalid data type."); break; - } + } setupLocalIDByteSize(); localDivisionNo = getLocalCodebookNo(); } @@ -925,20 +925,20 @@ class QuantizedObjectDistance { localDistanceLookup = 0; } } - bool isValid(size_t idx) { + bool isValid(size_t idx) { #ifdef NGTQ_QBG std::cerr << "isValid() is not implemented" << std::endl; abort(); #else - return flag[idx]; + return flag[idx]; #endif } #ifndef NGTQ_DISTANCE_ANGLE - void set(size_t idx, double d) { + void set(size_t idx, double d) { #ifndef NGTQ_QBG flag[idx] = true; #endif - localDistanceLookup[idx] = d; + localDistanceLookup[idx] = d; } double getDistance(size_t idx) { return localDistanceLookup[idx]; } #endif @@ -1088,7 +1088,7 @@ class QuantizedObjectDistance { } return sqrt(distance); } -#else +#else template inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { assert(globalCodebookIndex != 0); @@ -1135,7 +1135,7 @@ class QuantizedObjectDistance { distance = sqrt(distance); return distance; } -#endif +#endif template inline double getAngleDistanceFloat(NGT::Object &object, size_t objectID, T localID[]) { @@ -1222,7 +1222,7 @@ class QuantizedObjectDistance { LocalDistanceLookup *dlu = distanceLUT.localDistanceLookup; size_t oft = 0; for (size_t li = 0; li < localCodebookNo; li++, oft += localDataSize) { - dlu++; + dlu++; for (size_t k = 1; k < localCodebookCentroidNo; k++) { NGT::Object &lcentroid = (NGT::Object&)*localCodebookIndexes[li].getObjectSpace().getRepository().get(k); float *lcptr = (float*)&lcentroid[0]; @@ -1244,7 +1244,7 @@ class QuantizedObjectDistance { } } } -#else +#else inline void createDistanceLookup(NGT::Object &object, size_t objectID, DistanceLookupTable &distanceLUT) { void *objectPtr = &((NGT::Object&)object)[0]; createDistanceLookup(objectPtr, objectID, distanceLUT); @@ -1260,11 +1260,11 @@ class QuantizedObjectDistance { size_t sizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject(); #endif - - createFloatL2DistanceLookup(objectPtr, sizeOfObject, globalCentroid, distanceLUT.localDistanceLookup); + + createFloatL2DistanceLookup(objectPtr, sizeOfObject, globalCentroid, distanceLUT.localDistanceLookup); } -#endif +#endif inline void createFloatDotProductLookup(void *object, size_t sizeOfObject, void *globalCentroid, float *lut) { size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(float); @@ -1275,7 +1275,7 @@ class QuantizedObjectDistance { size_t oft = 0; float *lcptr = static_cast(&localCentroids[0]); for (size_t li = 0; li < localCodebookNo; li++, oft += localDataSize) { - lut++; + lut++; lcptr += localDataSize; for (size_t k = 1; k < localCodebookCentroidNo; k++) { float *lcendptr = lcptr + localDataSize; @@ -1302,7 +1302,7 @@ class QuantizedObjectDistance { } size_t dim = sizeOfObject / sizeof(float); size_t localDim = dim / localDivisionNo; -#if defined(NGT_AVX512) +#if defined(NGT_AVX512) __m512 flut[localCodebookNo]; __m512 *flutptr = &flut[0]; #ifdef NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION @@ -1367,15 +1367,15 @@ class QuantizedObjectDistance { _mm_storeu_si128((__m128i_u*)blutptr, b); flutptr++; blutptr += 16; - distanceLUT.offsets[li] = offset; + distanceLUT.offsets[li] = offset; distanceLUT.scales[li] = scale; } -#else +#else distanceLUT.totalOffset = 0.0; auto *blutptr = distanceLUT.localDistanceLookup; flutptr = &flut[0]; for (size_t li = 0; li < localCodebookNo; li++) { - float offset = range[li].first; + float offset = range[li].first; float scale = (range[li].second - range[li].first) / 255.0; __m512 v = _mm512_div_ps(_mm512_sub_ps(*flutptr, _mm512_set1_ps(offset)), _mm512_set1_ps(scale)); __m512i b4v = _mm512_cvtps_epi32(_mm512_roundscale_ps(v, _MM_FROUND_TO_NEAREST_INT)); @@ -1383,13 +1383,13 @@ class QuantizedObjectDistance { _mm_storeu_si128((__m128i_u*)blutptr, b); flutptr++; blutptr += 16; - distanceLUT.offsets[li] = offset; + distanceLUT.offsets[li] = offset; distanceLUT.scales[li] = scale; - distanceLUT.totalOffset += offset; + distanceLUT.totalOffset += offset; } #endif -#elif defined(NGT_AVX2) +#elif defined(NGT_AVX2) __m256 flut[localCodebookNo * 2]; __m256 *flutptr = &flut[0]; __m256 mmin = _mm256_set1_ps(std::numeric_limits::max()); @@ -1479,7 +1479,7 @@ class QuantizedObjectDistance { distanceLUT.offsets[li] = offset; distanceLUT.scales[li] = scale; } -#else +#else float flut[localCodebookNo * localCodebookCentroidNoSIMD]; auto *flutptr = &flut[0]; memset(flutptr, 0, sizeof(float) * localCodebookNo * localCodebookCentroidNoSIMD); @@ -1533,7 +1533,7 @@ class QuantizedObjectDistance { distanceLUT.offsets[li] = offset; distanceLUT.scales[li] = scale; } -#endif +#endif @@ -1550,7 +1550,7 @@ class QuantizedObjectDistance { size_t oft = 0; float *lcptr = static_cast(&localCentroids[0]); for (size_t li = 0; li < localCodebookNo; li++, oft += localDataSize) { - *lut++ = 0; + *lut++ = 0; lcptr += localDataSize; for (size_t k = 1; k < localCodebookCentroidNo; k++) { float *lcendptr = lcptr + localDataSize; @@ -1682,7 +1682,7 @@ class QuantizedObjectDistance { size_t localCodebookCentroidNoSIMD; Rotation *rotation; -}; +}; template class QuantizedObjectDistanceUint8 : public QuantizedObjectDistance { @@ -1774,7 +1774,7 @@ class QuantizedObjectDistanceUint8 : public QuantizedObjectDistance { cerr << "operator is not implemented" << endl; abort(); } -#endif +#endif }; @@ -1810,7 +1810,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { abort(); return 0.0; } -#else +#else inline double operator()(void *l, DistanceLookupTable &distanceLUT) { T *localID = static_cast(l); float *lut = distanceLUT.localDistanceLookup; @@ -1843,13 +1843,13 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { data = _mm256_min_ps(data, (__m256)_mm256_permute4x64_epi64((__m256i)data, _MM_SHUFFLE(3, 2, 3, 2))); data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 8)); data = _mm256_min_ps(data, (__m256)_mm256_srli_si256((__m256i)data, 4)); - + return data[0]; } #endif #if defined(NGTQG_AVX512) || defined(NGTQG_AVX2) -#if defined(NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION) +#if defined(NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION) #ifdef NGTQBG_MIN inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT) { #else @@ -1868,7 +1868,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { const __m512i mask512xF0 = _mm512_set1_epi16(0x00f0); const size_t range512 = distanceLUT.range512; auto step512 = distanceLUT.step512; -#endif +#endif const __m256i mask256x0F = _mm256_set1_epi16(0x000f); const __m256i mask256xF0 = _mm256_set1_epi16(0x00f0); const size_t range256 = distanceLUT.range256; @@ -1882,24 +1882,24 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { auto *lastgroup512 = localID + range512; while (localID < lastgroup512) { __m512i lookupTable = _mm512_loadu_si512((__m512i const*)lut); - _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); - __m512i packedobj = _mm512_cvtepu8_epi16(_mm256_loadu_si256((__m256i const*)&localID[0])); - __m512i lo = _mm512_and_si512(packedobj, mask512x0F); - __m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4); - __m512i obj = _mm512_or_si512(lo, hi); - __m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj); + _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); + __m512i packedobj = _mm512_cvtepu8_epi16(_mm256_loadu_si256((__m256i const*)&localID[0])); + __m512i lo = _mm512_and_si512(packedobj, mask512x0F); + __m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4); + __m512i obj = _mm512_or_si512(lo, hi); + __m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj); depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 0))); depu16 = _mm512_adds_epu16(depu16, _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(vtmp, 1))); lut += (localCodebookCentroidNo - 1) * 4; localID += step512; - } -#else + } +#else __m256i depu16l = _mm256_setzero_si256(); __m256i depu16h = _mm256_setzero_si256(); -#endif +#endif while (localID < lastgroup256) { __m256i lookupTable = _mm256_loadu_si256((__m256i const*)lut); - _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); + _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); __m256i packedobj = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)&localID[0])); __m256i lo = _mm256_and_si256(packedobj, mask256x0F); __m256i hi = _mm256_slli_epi16(_mm256_and_si256(packedobj, mask256xF0), 4); @@ -1914,7 +1914,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { #endif lut += (localCodebookCentroidNo - 1) * 2; localID += step256; - } + } #if defined(NGTQG_AVX512) __m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0)); __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 1)); @@ -1928,7 +1928,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { float two = 2.0; distance = _mm512_mul_ps(_mm512_sub_ps(_mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distance), _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); #endif - distance = _mm512_sqrt_ps(distance); + distance = _mm512_sqrt_ps(distance); _mm512_storeu_ps(d, distance); #ifdef NGTQBG_MIN { @@ -1945,7 +1945,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { if (min > tmpmin) min = tmpmin; } #endif -#else +#else __m256i lol = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 0)); __m256i loh = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 1)); __m256i hil = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 0)); @@ -1964,8 +1964,8 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { distancel = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distancel), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); distanceh = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distanceh), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); #endif - distancel = _mm256_sqrt_ps(distancel); - distanceh = _mm256_sqrt_ps(distanceh); + distancel = _mm256_sqrt_ps(distancel); + distanceh = _mm256_sqrt_ps(distanceh); _mm256_storeu_ps(d, distancel); _mm256_storeu_ps(d + 8, distanceh); #ifdef NGTQBG_MIN @@ -1974,9 +1974,9 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { if (min > tmpmin) min = tmpmin; } #endif -#endif +#endif d += 16; - } + } #ifdef NGTQBG_MIN return min; #endif @@ -2003,7 +2003,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { __m512i mask512xF0 = _mm512_set1_epi16(0x00f0); const size_t range512 = distanceLUT.range512; auto step512 = distanceLUT.step512; -#endif +#endif const __m256i mask256x0F = _mm256_set1_epi16(0x000f); const __m256i mask256xF0 = _mm256_set1_epi16(0x00f0); const size_t range256 = distanceLUT.range256; @@ -2019,38 +2019,38 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { auto *lastgroup512 = localID + range512; while (localID < lastgroup512) { __m512i lookupTable = _mm512_loadu_si512((__m512i const*)lut); - _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); - __m512i packedobj = _mm512_cvtepu8_epi16(_mm256_loadu_si256((__m256i const*)&localID[0])); - __m512i lo = _mm512_and_si512(packedobj, mask512x0F); - __m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4); - __m512i obj = _mm512_or_si512(lo, hi); - __m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj); - - __m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 0))); + _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); + __m512i packedobj = _mm512_cvtepu8_epi16(_mm256_loadu_si256((__m256i const*)&localID[0])); + __m512i lo = _mm512_and_si512(packedobj, mask512x0F); + __m512i hi = _mm512_slli_epi16(_mm512_and_si512(packedobj, mask512xF0), 4); + __m512i obj = _mm512_or_si512(lo, hi); + __m512i vtmp = _mm512_shuffle_epi8(lookupTable, obj); + + __m512 d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 0))); __m512 scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[0])); distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); - d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 1))); + d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 1))); scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[1])); distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); - d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 2))); + d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 2))); scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[2])); distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); - d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 3))); + d = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(_mm512_extracti64x2_epi64(vtmp, 3))); scale = _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&scales[3])); distance = _mm512_add_ps(distance, _mm512_mul_ps(d, scale)); lut += (localCodebookCentroidNo - 1) * 4; scales += 4; localID += step512; - } -#else + } +#else __m256i depu16l = _mm256_setzero_si256(); __m256i depu16h = _mm256_setzero_si256(); -#endif +#endif while (localID < lastgroup256) { __m256i lookupTable = _mm256_loadu_si256((__m256i const*)lut); - _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); - //std::cerr << "obj=" << (int)(localID[0] & 0x0f) << "," << (int)((localID[0] >> 4) & 0x0f) << std::endl; + _mm_prefetch(&localID[0] + 64 * 8, _MM_HINT_T0); + //std::cerr << "obj=" << (int)(localID[0] & 0x0f) << "," << (int)((localID[0] >> 4) & 0x0f) << std::endl; __m256i packedobj = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)&localID[0])); __m256i lo = _mm256_and_si256(packedobj, mask256x0F); __m256i hi = _mm256_slli_epi16(_mm256_and_si256(packedobj, mask256xF0), 4); @@ -2073,7 +2073,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { lut += (localCodebookCentroidNo - 1) * 2; scales += 2; localID += step256; - } + } #if defined(NGTQG_AVX512) //__m512i lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(depu16, 0)); @@ -2086,7 +2086,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { float two = 2.0; distance = _mm512_mul_ps(_mm512_sub_ps(_mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distance), _mm512_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); #endif - distance = _mm512_sqrt_ps(distance); + distance = _mm512_sqrt_ps(distance); _mm512_storeu_ps(d, distance); #ifdef NGTQBG_MIN { @@ -2103,7 +2103,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { if (min > tmpmin) min = tmpmin; } #endif -#else +#else __m256i lol = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 0)); __m256i loh = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16l, 1)); __m256i hil = _mm256_cvtepu16_epi32(_mm256_extractf128_si256(depu16h, 0)); @@ -2125,20 +2125,20 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { distancel = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distancel), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); distanceh = _mm256_mul_ps(_mm256_sub_ps(_mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&one)), distanceh), _mm256_broadcastss_ps(*reinterpret_cast<__m128*>(&two))); #endif - distancel = _mm256_sqrt_ps(distancel); - distanceh = _mm256_sqrt_ps(distanceh); + distancel = _mm256_sqrt_ps(distancel); + distanceh = _mm256_sqrt_ps(distanceh); _mm256_storeu_ps(d, distancel); _mm256_storeu_ps(d + 8, distanceh); -#endif +#endif d += 16; - } + } #ifdef NGTQBG_MIN return min; #endif } -#endif /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION //////////////////////////////////////// +#endif /// NGTQ_TOTAL_SCALE_OFFSET_COMPRESSION //////////////////////////////////////// -#else +#else #ifdef NGTQBG_MIN inline float operator()(void *inv, float *distances, size_t size, DistanceLookupTableUint8 &distanceLUT) { #else @@ -2184,7 +2184,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { return min; #endif } -#endif +#endif inline double operator()(NGT::Object &object, size_t objectID, void *l) { @@ -2242,7 +2242,7 @@ class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { } return sqrt(distance); } -#endif +#endif }; @@ -2306,7 +2306,7 @@ class Quantizer { virtual void eraseInvertedIndexObject(size_t id) = 0; virtual void eraseInvertedIndexObject() = 0; - virtual NGT::Distance getApproximateDistance(NGT::Object &query, uint32_t globalID, uint16_t *localID, QuantizedObjectDistance::DistanceLookupTable &distanceLUT) { + virtual NGT::Distance getApproximateDistance(NGT::Object &query, uint32_t globalID, uint16_t *localID, QuantizedObjectDistance::DistanceLookupTable &distanceLUT) { std::cerr << "getApproximateDistance() is not implemented." << std::endl; abort(); } @@ -2442,7 +2442,7 @@ class QuantizedObjectProcessingStream { stream[blkNo * alignedBlockSize + NGTQ_SIMD_BLOCK_SIZE * subvectorNo + oft] = quantizedObject; #endif } -#endif +#endif uint8_t* compressIntoUint4() { size_t idx = 0; @@ -2502,10 +2502,10 @@ class GenerateResidualObject { #else virtual void operator()(NGT::Object &object, size_t centroidID, float *subspaceObject) = 0; #endif - virtual void operator()(NGT::Object &object, size_t centroidID, + virtual void operator()(NGT::Object &object, size_t centroidID, vector>> &localObjs) = 0; #else - virtual void operator()(size_t objectID, size_t centroidID, + virtual void operator()(size_t objectID, size_t centroidID, vector>> &localObjs) = 0; #endif void set(NGT::Index &gc, NGT::Index lc[], size_t dn, size_t lcn, @@ -2543,7 +2543,7 @@ class GenerateResidualObjectUint8 : public GenerateResidualObject { void operator()(NGT::Object &xobject, size_t centroidID, vector>> &localObjs) { abort(); } #else - void operator()(size_t objectID, size_t centroidID, + void operator()(size_t objectID, size_t centroidID, vector>> &localObjs) { NGT::PersistentObject &globalCentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(centroidID); NGT::Object object(&globalCodebookIndex->getObjectSpace()); @@ -2555,7 +2555,7 @@ class GenerateResidualObjectUint8 : public GenerateResidualObject { subObject.resize(lsize); for (size_t d = 0; d < lsize; d++) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - subObject[d] = (double)object[di * lsize + d] - + subObject[d] = (double)object[di * lsize + d] - (double)globalCentroid.at(di * lsize + d, globalCodebookIndex->getObjectSpace().getRepository().allocator); #else subObject[d] = (double)object[di * lsize + d] - (double)globalCentroid[di * lsize + d]; @@ -2601,7 +2601,7 @@ class GenerateResidualObjectFloat : public GenerateResidualObject { #endif } } - void operator()(NGT::Object &object, size_t centroidID, + void operator()(NGT::Object &object, size_t centroidID, vector>> &localObjs) { size_t byteSizeOfObject = globalCodebookIndex->getObjectSpace().getByteSizeOfObject(); size_t localByteSize = byteSizeOfObject / divisionNo; @@ -2629,10 +2629,10 @@ class GenerateResidualObjectFloat : public GenerateResidualObject { } }; -#else +#else class GenerateResidualObjectFloat : public GenerateResidualObject { public: - void operator()(size_t objectID, size_t centroidID, + void operator()(size_t objectID, size_t centroidID, vector>> &localObjs) { NGT::PersistentObject &globalCentroid = *globalCodebookIndex->getObjectSpace().getRepository().get(centroidID); NGT::Object object(&globalCodebookIndex->getObjectSpace()); @@ -2645,7 +2645,7 @@ class GenerateResidualObjectFloat : public GenerateResidualObject { subObject.resize(localDimension); float *subVector = static_cast(object.getPointer(di * localByteSize)); #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *globalCentroidSubVector = static_cast(globalCentroid.getPointer(di * localByteSize, + float *globalCentroidSubVector = static_cast(globalCentroid.getPointer(di * localByteSize, globalCodebookIndex->getObjectSpace().getRepository().allocator)); #else float *globalCentroidSubVector = static_cast(globalCentroid.getPointer(di * localByteSize)); @@ -2659,7 +2659,7 @@ class GenerateResidualObjectFloat : public GenerateResidualObject { } } }; -#endif +#endif template class QuantizerInstance : public Quantizer { @@ -2732,10 +2732,10 @@ class QuantizerInstance : public Quantizer { if (property.dataSize == 0) { std::stringstream msg; #ifdef NGTQ_QBG - msg << "Quantizer: data size of the object is zero. " << property.dataSize << ":" << property.dimension + msg << "Quantizer: data size of the object is zero. " << property.dataSize << ":" << property.dimension << ":" << property.dataType << ":" << property.genuineDataType; #else - msg << "Quantizer: data size of the object is zero. " << property.dataSize << ":" << property.dimension + msg << "Quantizer: data size of the object is zero. " << property.dataSize << ":" << property.dimension << ":" << property.dataType; #endif NGTThrowException(msg); @@ -2750,9 +2750,9 @@ class QuantizerInstance : public Quantizer { #ifdef MULTIPLE_OBJECT_LISTS objectList.openMultipleStreams(omp_get_max_threads()); #endif -#else +#else objectList.create(fname, property.dataSize); -#endif +#endif #ifdef NGTQ_QBG if (rotation != 0) { saveRotation(*rotation); @@ -2848,7 +2848,7 @@ class QuantizerInstance : public Quantizer { if (!objectList.open(index + "/obj", property.genuineDataType, property.distanceType, property.dimension)) { #else if (!objectList.open(index + "/obj", static_cast(property.dataType), property.distanceType, property.dimension)) { -#endif +#endif stringstream msg; msg << "NGTQ::Quantizer::open: cannot open the object file. " << index + "/obj" << std::endl; std::cerr << "Ignore. " << msg.str() << std::endl; @@ -2950,11 +2950,11 @@ class QuantizerInstance : public Quantizer { } else { quantizationCodebook.deserialize(irfs, readOnly); } -#else +#else std::string qcbName(rootDirectory + "/qcb"); ifstream ifs(qcbName); quantizationCodebook.deserialize(ifs, readOnly); -#endif +#endif } #endif redirector.end(); @@ -3085,8 +3085,8 @@ class QuantizerInstance : public Quantizer { void createIndex(NGT::GraphAndTreeIndex &codebook, size_t centroidLimit, - const vector> &objects, - vector &ids, + const vector> &objects, + vector &ids, float &range) { if (centroidLimit > 0) { @@ -3152,12 +3152,12 @@ class QuantizerInstance : public Quantizer { #endif if (property.centroidCreationMode == CentroidCreationModeStatic || property.centroidCreationMode == CentroidCreationModeStaticLayer) { - localData.push_back(LocalDatam(globalCentroidID, - invertedIndexEntry.size() - 1)); + localData.push_back(LocalDatam(globalCentroidID, + invertedIndexEntry.size() - 1)); } else { if (id.distance != 0.0) { - localData.push_back(LocalDatam(globalCentroidID, - invertedIndexEntry.size() - 1)); + localData.push_back(LocalDatam(globalCentroidID, + invertedIndexEntry.size() - 1)); } } } else { @@ -3184,8 +3184,8 @@ class QuantizerInstance : public Quantizer { #endif } if (property.quantizerType == QuantizerTypeQBG) { - localData.push_back(LocalDatam(globalCentroidID, - invertedIndexEntry.size() - 1)); + localData.push_back(LocalDatam(globalCentroidID, + invertedIndexEntry.size() - 1)); } } } @@ -3203,7 +3203,7 @@ class QuantizerInstance : public Quantizer { for (size_t i = 0; i < localData.size(); i++) { for (size_t di = 0; di < divisionNo; di++) { size_t id = lids[i * divisionNo + di].id; - assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); + assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); #ifdef NGTQ_SHARED_INVERTED_INDEX (*invertedIndex.at(localData[i].iiIdx)).at(localData[i].iiLocalIdx, invertedIndex.allocator).localID[di] = id; #else @@ -3223,7 +3223,7 @@ class QuantizerInstance : public Quantizer { #ifndef NGTQ_QBG bool setMultipleLocalCodeToInvertedIndexEntry(vector &lcodebook, vector &localData, vector>> &localObjs) { size_t localCodebookNo = property.getLocalCodebookNo(); - bool localCodebookFull = true; + bool localCodebookFull = true; #pragma omp parallel for for (size_t li = 0; li < localCodebookNo; ++li) { float lr = property.localRange; @@ -3241,13 +3241,13 @@ class QuantizerInstance : public Quantizer { } vector lids; createIndex(*lcodebook[li], localCentroidLimit, localObjs[li], lids, lr); - if (lr != FLT_MAX) { + if (lr != FLT_MAX) { localCodebookFull = false; } assert(localData.size() == lids.size()); for (size_t i = 0; i < localData.size(); i++) { size_t id = lids[i].id; - assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); + assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); #ifdef NGTQ_SHARED_INVERTED_INDEX (*invertedIndex.at(localData[i].iiIdx)).at(localData[i].iiLocalIdx, invertedIndex.allocator).localID[li] = id; #else @@ -3260,11 +3260,11 @@ class QuantizerInstance : public Quantizer { localCodebookIndexes[li].deleteObject(localObjs[li][i].first); } #endif - } - } + } + } return localCodebookFull; } -#endif +#endif #ifdef NGTQ_QBG bool setMultipleLocalCodeToInvertedIndexEntry(vector &lcodebook, @@ -3272,7 +3272,7 @@ class QuantizerInstance : public Quantizer { float *subspaceObjects) { size_t paddedDimension = globalCodebookIndex.getObjectSpace().getPaddedDimension(); size_t localCodebookNo = property.getLocalCodebookNo(); - bool localCodebookFull = true; + bool localCodebookFull = true; for (size_t li = 0; li < localCodebookNo; ++li) { float lr = property.localRange; size_t localCentroidLimit = property.localCentroidLimit; @@ -3295,13 +3295,13 @@ class QuantizerInstance : public Quantizer { localObjects[i].second = 0; } createIndex(*lcodebook[li], localCentroidLimit, localObjects, lids, lr); - if (lr != FLT_MAX) { + if (lr != FLT_MAX) { localCodebookFull = false; } assert(localData.size() == lids.size()); for (size_t i = 0; i < localData.size(); i++) { size_t id = lids[i].id; - assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); + assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); #ifdef NGTQ_SHARED_INVERTED_INDEX (*invertedIndex.at(localData[i].iiIdx)).at(localData[i].iiLocalIdx, invertedIndex.allocator).localID[li] = id; #else @@ -3314,11 +3314,11 @@ class QuantizerInstance : public Quantizer { lcodebook[li]->deleteObject(localObjects[i].first); } #endif - } - } + } + } return localCodebookFull; } -#endif +#endif void constructLocalCodebooks() { delete[] localCodebooks; @@ -3390,7 +3390,7 @@ class QuantizerInstance : public Quantizer { size_t d = svi * localDimension + ld; auto dist = subspaceObjects[idx * paddedDimension + d] - localCodebooks[cid * paddedDimension + d]; dist *= dist; - distance[idx * codebookSize * localCodebookNo + cid * localCodebookNo + svi] += dist; + distance[idx * codebookSize * localCodebookNo + cid * localCodebookNo + svi] += dist; localDistance = distance[idx * codebookSize * localCodebookNo + cid * localCodebookNo + svi]; } if (localDistance < min[idx * localCodebookNo + svi].first) { @@ -3406,7 +3406,7 @@ class QuantizerInstance : public Quantizer { for (size_t li = 0; li < localCodebookNo; ++li) { for (size_t i = 0; i < localData.size(); i++) { size_t id = min[i * localCodebookNo + li].second + 1; - assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); + assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); #ifdef NGTQ_SHARED_INVERTED_INDEX (*invertedIndex.at(localData[i].iiIdx)).at(localData[i].iiLocalIdx, invertedIndex.allocator).localID[li] = id; #else @@ -3416,7 +3416,7 @@ class QuantizerInstance : public Quantizer { } return; } -#endif +#endif void buildMultipleLocalCodebooks(NGT::Index *localCodebook, size_t localCodebookNo, size_t numberOfCentroids) { NGT::Clustering clustering; @@ -3424,7 +3424,7 @@ class QuantizerInstance : public Quantizer { clustering.epsilonTo = 0.50; clustering.epsilonStep = 0.05; clustering.maximumIteration = 20; - clustering.clusterSizeConstraint = false; + clustering.clusterSizeConstraint = false; for (size_t li = 0; li < localCodebookNo; ++li) { double diff = clustering.kmeansWithNGT(localCodebook[li], numberOfCentroids); if (diff > 0.0) { @@ -3449,9 +3449,9 @@ class QuantizerInstance : public Quantizer { property.quantizerType == QuantizerTypeQBG) ? 0 : 1; oi < invertedIndexEntry.size(); oi++) { #ifdef NGTQ_QBG - localData.push_back(LocalDatam(gidx, oi, invertedIndexEntry.subspaceID)); + localData.push_back(LocalDatam(gidx, oi, invertedIndexEntry.subspaceID)); #else - localData.push_back(LocalDatam(gidx, oi)); + localData.push_back(LocalDatam(gidx, oi)); #endif } } @@ -3468,38 +3468,38 @@ class QuantizerInstance : public Quantizer { (*generateResidualObject)(object, // object invertedIndexEntry.subspaceID, subspaceObjects[i]); // subspace objects -#endif +#endif #else #ifdef NGTQ_VECTOR_OBJECT std::vector object; objectList.get(invertedIndexEntry[localData[i].iiLocalIdx].id, object, &globalCodebookIndex.getObjectSpace()); #else - NGT::Object object(&globalCodebookIndex.getObjectSpace()); + NGT::Object object(&globalCodebookIndex.getObjectSpace()); #endif (*generateResidualObject)(object, // object invertedIndexEntry.subspaceID, subspaceObjects[i]); // subspace objects #endif } - setMultipleLocalCodeToInvertedIndexEntryFixed(localData, &subspaceObjects[0][0]); + setMultipleLocalCodeToInvertedIndexEntryFixed(localData, &subspaceObjects[0][0]); } -#endif +#endif #ifndef NGTQ_QBG void insert(vector> &objects) { std::cerr << "insert() is not implemented." << std::endl; abort(); } -#endif +#endif void searchIndex(NGT::GraphAndTreeIndex &codebook, size_t centroidLimit, #ifdef NGTQ_VECTOR_OBJECT - const vector, size_t>> &objects, + const vector, size_t>> &objects, #else - const vector> &objects, + const vector> &objects, #endif - vector &ids, + vector &ids, float &range, NGT::Index *gqindex) { if (quantizationCodebook.size() == 0) { @@ -3510,7 +3510,7 @@ class QuantizerInstance : public Quantizer { ids.resize(objects.size()); size_t foundCount = 0; double foundRank = 0.0; -#pragma omp parallel for +#pragma omp parallel for for (size_t idx = 0; idx < objects.size(); idx++) { #ifdef NGTQ_VECTOR_OBJECT auto *object = globalCodebookIndex.allocateObject(objects[idx].first); @@ -3576,10 +3576,10 @@ class QuantizerInstance : public Quantizer { } #ifdef NGTQ_VECTOR_OBJECT - void getBlobIDFromObjectToBlobIndex(const vector, size_t>> &objects, + void getBlobIDFromObjectToBlobIndex(const vector, size_t>> &objects, vector &ids) #else - void getBlobIDFromObjectToBlobIndex(const vector> &objects, + void getBlobIDFromObjectToBlobIndex(const vector> &objects, vector &ids) #endif { @@ -3591,7 +3591,7 @@ class QuantizerInstance : public Quantizer { for (size_t idx = 0; idx < objects.size(); idx++) { if (objects[idx].second - 1 >= objectToBlobIndex.size()) { std::stringstream msg; - msg << "Quantizer::insert: Fatal Error! Object ID is invalid. " + msg << "Quantizer::insert: Fatal Error! Object ID is invalid. " << idx << ":" << objects[idx].second - 1 << ":" << objectToBlobIndex.size() << ":" << objects.size(); NGTThrowException(msg); @@ -3629,11 +3629,11 @@ class QuantizerInstance : public Quantizer { abort(); #else NGT::Property property; - + property.dimension = globalCodebookIndex.getObjectSpace().getDimension() + 1; property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; #ifdef NGTQ_SHARED_INVERTED_INDEX - NGT::Index *index = new NGT::Index("dummy", property); + NGT::Index *index = new NGT::Index("dummy", property); std::cerr << "Not implemented" << std::endl; abort(); #else @@ -3670,9 +3670,9 @@ class QuantizerInstance : public Quantizer { std::cerr << "creating the index..." << std::endl; index->createIndex(50); return index; -#endif +#endif } -#endif +#endif #ifdef NGTQ_QBG void decode(QuantizedObject &qobj, Object &object) { @@ -3733,7 +3733,7 @@ class QuantizerInstance : public Quantizer { if (!rotation.empty()) { rotation.mul(object.object.data()); } -#endif +#endif (*generateResidualObject)(object.object, // object subspaceID, object.object.data()); // subspace objects @@ -3778,23 +3778,23 @@ class QuantizerInstance : public Quantizer { encode(subspaceID, objects, qobjs); invertedIndexEntry.set(qobjs); } -#endif +#endif #ifdef NGTQ_QBG void encode(uint32_t subspaceID, ObjectSet &objects, QuantizedObjectSet &qobjs) { #ifdef NGTQ_SHARED_INVERTED_INDEX std::cerr << "enode: Not implemented." << std::endl; abort(); -#else +#else qobjs.resize(objects.size()); -#pragma omp parallel for +#pragma omp parallel for for (size_t i = 0; i < objects.size(); i++) { // multiple local codebooks encode(subspaceID, objects[i], qobjs[i]); } -#endif +#endif } -#endif +#endif #ifdef NGTQ_QBG @@ -3838,9 +3838,9 @@ class QuantizerInstance : public Quantizer { vector localData; for (size_t i = 0; i < ids.size(); i++) { setGlobalCodeToInvertedEntry(ids[i], objects[i], localData); - } + } float subspaceObjects[localData.size()][globalCodebookIndex.getObjectSpace().getPaddedDimension()]; -#pragma omp parallel for +#pragma omp parallel for for (size_t i = 0; i < localData.size(); i++) { IIEntry &invertedIndexEntry = *invertedIndex.at(localData[i].iiIdx); #ifdef NGTQ_SHARED_INVERTED_INDEX @@ -3852,7 +3852,7 @@ class QuantizerInstance : public Quantizer { localData[i].iiIdx, // centroid:ID of global codebook localObjs); #endif -#else +#else #ifdef NGTQ_QBG #ifdef NGTQG_ROTATED_GLOBAL_CODEBOOKS @@ -3863,7 +3863,7 @@ class QuantizerInstance : public Quantizer { rotation.mul(static_cast(objects[i].first->getPointer())); #endif } -#endif +#endif #ifdef NGTQ_VECTOR_OBJECT (*generateResidualObject)(objects[i].first, // object invertedIndexEntry.subspaceID, @@ -3876,20 +3876,20 @@ class QuantizerInstance : public Quantizer { #ifndef NGTQG_ROTATED_GLOBAL_CODEBOOKS rotation.mul(subspaceObjects[i]); #endif -#else +#else (*generateResidualObject)(invertedIndexEntry[localData[i].iiLocalIdx].id, localData[i].iiIdx, // centroid:ID of global codebook localObjs); -#endif -#endif +#endif +#endif - } + } if (property.singleLocalCodebook) { // single local codebook std::cerr << "insert: Fatal Error. single local codebook isn't available." << std::endl; abort(); - } else { + } else { // multiple local codebooks bool localCodebookFull = true; if (property.localCodebookState) { @@ -3909,7 +3909,7 @@ class QuantizerInstance : public Quantizer { localCodebookFull = false; } } - } + } #pragma omp parallel for for (size_t i = 0; i < objects.size(); i++) { #ifdef NGT_SHARED_MEMORY_ALLOCATOR @@ -3923,9 +3923,9 @@ class QuantizerInstance : public Quantizer { #endif } objects.clear(); -#endif +#endif } -#endif +#endif #ifndef NGTQ_QBG @@ -3945,7 +3945,7 @@ class QuantizerInstance : public Quantizer { insert(objects); // batch insert } } -#endif +#endif void insertIntoObjectRepository(vector &objvector, size_t count) { size_t id = count; @@ -3965,8 +3965,8 @@ class QuantizerInstance : public Quantizer { return; } NGT::Index *gqindex = 0; - if ((property.centroidCreationMode == CentroidCreationModeStaticLayer || - property.centroidCreationMode == CentroidCreationModeStatic) && + if ((property.centroidCreationMode == CentroidCreationModeStaticLayer || + property.centroidCreationMode == CentroidCreationModeStatic) && objectToBlobIndex.empty()) { gqindex = buildGlobalCodebookWithQIDIndex(); } @@ -4013,7 +4013,7 @@ class QuantizerInstance : public Quantizer { } delete gqindex; } -#endif +#endif void setupInvertedIndex(std::vector> &qCodebook, std::vector &codebookIndex, @@ -4065,7 +4065,7 @@ class QuantizerInstance : public Quantizer { #endif invertedIndexEntry.reserve(invertedIndexCount[idx]); } -#endif +#endif } @@ -4073,7 +4073,7 @@ class QuantizerInstance : public Quantizer { void rebuildIndex() { abort(); } -#endif +#endif void create(const string &index, NGT::Property &globalProperty, @@ -4301,7 +4301,7 @@ class QuantizerInstance : public Quantizer { bool lookuptable = false; double epsilon = 0.1; NGT::ObjectDistances objects; - search(gcentroidFromList, objects, resultSize, approximateSearchSize, codebookSearchSize, + search(gcentroidFromList, objects, resultSize, approximateSearchSize, codebookSearchSize, refine, lookuptable, epsilon); for (size_t resulti = 0; resulti < objects.size(); resulti++) { if (std::find(elements.begin(), elements.end(), objects[resulti].id) != elements.end()) { @@ -4329,7 +4329,7 @@ class QuantizerInstance : public Quantizer { void searchGlobalCodebook(NGT::Object *query, size_t size, NGT::ObjectDistances &objects, size_t &approximateSearchSize, - size_t codebookSearchSize, + size_t codebookSearchSize, double epsilon) { #ifdef NGTQ_TRACE std::cerr << "searchGlobalCodebook codebookSearchSize=" << codebookSearchSize << std::endl; @@ -4364,9 +4364,9 @@ class QuantizerInstance : public Quantizer { double distance; if (invertedIndexEntry.localID[0] == 0) { distance = globalCentroid.distance; - } else { + } else { distance = (*quantizedObjectDistance)(invertedIndexEntry.localID, distanceLUT); - } + } NGT::ObjectDistance obj; @@ -4375,7 +4375,7 @@ class QuantizerInstance : public Quantizer { assert(obj.id > 0); results.push(obj); - } + } } void eraseInvertedIndexObject(size_t id) { @@ -4435,9 +4435,9 @@ class QuantizerInstance : public Quantizer { #endif } } -#endif +#endif } -#endif +#endif #ifdef NGTQ_QBG void extractInvertedIndexObject(InvertedIndexEntry &invertedIndexObjects) { @@ -4480,12 +4480,12 @@ class QuantizerInstance : public Quantizer { } assert(invertedIndexObjects[entry.id].localID[0] == entry.localID[0]); } - } -#endif + } +#endif } -#endif +#endif - void extractInvertedIndex(std::vector> &ii) { + void extractInvertedIndex(std::vector> &ii) { ii.resize(invertedIndex.size()); for (size_t gid = 1; gid < invertedIndex.size(); gid++) { if (invertedIndex[gid] == 0 || invertedIndex[gid]->size() == 0) { @@ -4525,9 +4525,9 @@ class QuantizerInstance : public Quantizer { double distance; if (invertedIndexEntry.localID[0] == 0) { distance = globalCentroid.distance; - } else { + } else { distance = getApproximateDistance(*query, globalCentroid.id, invertedIndexEntry.localID, distanceLUT); - } + } NGT::ObjectDistance obj; obj.id = invertedIndexEntry.id; @@ -4535,7 +4535,7 @@ class QuantizerInstance : public Quantizer { assert(obj.id > 0); results.push(obj); - } + } } @@ -4549,9 +4549,9 @@ class QuantizerInstance : public Quantizer { double distance; if (invertedIndexEntry.localID[0] == 0) { distance = globalCentroid.distance; - } else { + } else { distance = (*quantizedObjectDistance)(*query, globalCentroid.id, invertedIndexEntry.localID); - } + } NGT::ObjectDistance obj; @@ -4563,7 +4563,7 @@ class QuantizerInstance : public Quantizer { return; } - } + } } @@ -4579,7 +4579,7 @@ class QuantizerInstance : public Quantizer { if (results.size() >= approximateSearchSize) { return; } - } + } } void refineDistance(NGT::Object *query, NGT::ObjectDistances &results) { @@ -4593,11 +4593,11 @@ class QuantizerInstance : public Quantizer { result.distance = distance; } std::sort(results.begin(), results.end()); -#endif +#endif } - void search(NGT::Object *query, NGT::ObjectDistances &objs, - size_t size, + void search(NGT::Object *query, NGT::ObjectDistances &objs, + size_t size, float expansion, AggregationMode aggregationMode, double epsilon = FLT_MAX) { @@ -4606,7 +4606,7 @@ class QuantizerInstance : public Quantizer { search(query, objs, size, approximateSearchSize, codebookSearchSize, aggregationMode, epsilon); } - void search(NGT::Object *query, NGT::ObjectDistances &objs, + void search(NGT::Object *query, NGT::ObjectDistances &objs, size_t size, size_t approximateSearchSize, size_t codebookSearchSize, bool resultRefinement, bool lookUpTable = false, @@ -4624,9 +4624,9 @@ class QuantizerInstance : public Quantizer { search(query, objs, size, approximateSearchSize, codebookSearchSize, aggregationMode, epsilon); } - void search(NGT::Object *query, NGT::ObjectDistances &objs, + void search(NGT::Object *query, NGT::ObjectDistances &objs, size_t size, size_t approximateSearchSize, - size_t codebookSearchSize, + size_t codebookSearchSize, AggregationMode aggregationMode, double epsilon = FLT_MAX) { if (aggregationMode == AggregationModeApproximateDistanceWithLookupTable) { @@ -4713,7 +4713,7 @@ class QuantizerInstance : public Quantizer { globalDistance /= count; std::cerr << distance << ":" << globalDistance << std::endl; return distance; -#endif +#endif } void info(ostream &os, char mode) { @@ -4875,7 +4875,7 @@ class Quantization { - static void create(const string &index, Property &property, + static void create(const string &index, Property &property, NGT::Property &globalProperty, #ifdef NGTQ_QBG NGT::Property &localProperty, @@ -5003,7 +5003,7 @@ class Quantization { setupInvertedIndex(quantizationCodebook, codebookIndex, objectIndex); createIndex(beginID, endID); } -#endif +#endif void setupInvertedIndex(std::vector> &quantizationCodebook, std::vector &codebookIndex, @@ -5032,19 +5032,19 @@ class Quantization { void deleteObject(NGT::Object *object) { getQuantizer().deleteObject(object); } - void search(NGT::Object *object, NGT::ObjectDistances &objs, + void search(NGT::Object *object, NGT::ObjectDistances &objs, size_t size, size_t approximateSearchSize, - size_t codebookSearchSize, bool resultRefinement, + size_t codebookSearchSize, bool resultRefinement, bool lookUpTable, double epsilon) { - getQuantizer().search(object, objs, size, approximateSearchSize, codebookSearchSize, + getQuantizer().search(object, objs, size, approximateSearchSize, codebookSearchSize, resultRefinement, lookUpTable, epsilon); } - void search(NGT::Object *object, NGT::ObjectDistances &objs, + void search(NGT::Object *object, NGT::ObjectDistances &objs, size_t size, float expansion, AggregationMode aggregationMode, double epsilon) { - getQuantizer().search(object, objs, size, expansion, + getQuantizer().search(object, objs, size, expansion, aggregationMode, epsilon); } @@ -5052,11 +5052,11 @@ class Quantization { void verify() { getQuantizer().verify(); } - NGTQ::Quantizer &getQuantizer() { + NGTQ::Quantizer &getQuantizer() { if (quantizer == 0) { NGTThrowException("NGTQ::Index: Not open."); } - return *quantizer; + return *quantizer; } size_t getGlobalCodebookSize() { return quantizer->globalCodebookIndex.getObjectRepositorySize(); } diff --git a/lib/NGT/Node.cpp b/lib/NGT/Node.cpp index 6bd2913..c11d116 100644 --- a/lib/NGT/Node.cpp +++ b/lib/NGT/Node.cpp @@ -293,7 +293,7 @@ LeafNode::removeObject(size_t id, size_t replaceId) { } #if defined(NGT_SHARED_MEMORY_ALLOCATOR) -bool InternalNode::verify(PersistentRepository &internalNodes, PersistentRepository &leafNodes, +bool InternalNode::verify(PersistentRepository &internalNodes, PersistentRepository &leafNodes, SharedMemoryAllocator &allocator) { #else bool InternalNode::verify(Repository &internalNodes, Repository &leafNodes) { diff --git a/lib/NGT/Node.h b/lib/NGT/Node.h index 260d149..624fbfa 100644 --- a/lib/NGT/Node.h +++ b/lib/NGT/Node.h @@ -115,8 +115,8 @@ namespace NGT { } getPivot(os).set(f, os); } - PersistentObject &getPivot(ObjectSpace &os) { - return *(PersistentObject*)os.getRepository().getAllocator().getAddr(pivot); + PersistentObject &getPivot(ObjectSpace &os) { + return *(PersistentObject*)os.getRepository().getAllocator().getAddr(pivot); } void deletePivot(ObjectSpace &os, SharedMemoryAllocator &allocator) { os.deleteObject(&getPivot(os)); @@ -149,7 +149,7 @@ namespace NGT { }; - + class InternalNode : public Node { public: #if defined(NGT_SHARED_MEMORY_ALLOCATOR) @@ -218,7 +218,7 @@ namespace NGT { Distance *getBorders() { return borders; } #endif // NGT_SHARED_MEMORY_ALLOCATOR -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) +#if defined(NGT_SHARED_MEMORY_ALLOCATOR) void serialize(std::ofstream &os, SharedMemoryAllocator &allocator, ObjectSpace *objectspace = 0) { #else void serialize(std::ofstream &os, ObjectSpace *objectspace = 0) { @@ -243,9 +243,9 @@ namespace NGT { } for (size_t i = 0; i < childrenSize - 1; i++) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::Serializer::write(os, getBorders(allocator)[i]); + NGT::Serializer::write(os, getBorders(allocator)[i]); #else - NGT::Serializer::write(os, getBorders()[i]); + NGT::Serializer::write(os, getBorders()[i]); #endif } } @@ -313,9 +313,9 @@ namespace NGT { } for (size_t i = 0; i < childrenSize - 1; i++) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::Serializer::writeAsText(os, getBorders(allocator)[i]); + NGT::Serializer::writeAsText(os, getBorders(allocator)[i]); #else - NGT::Serializer::writeAsText(os, getBorders()[i]); + NGT::Serializer::writeAsText(os, getBorders()[i]); #endif os << " "; } @@ -373,7 +373,7 @@ namespace NGT { } #if defined(NGT_SHARED_MEMORY_ALLOCATOR) - bool verify(PersistentRepository &internalNodes, PersistentRepository &leafNodes, + bool verify(PersistentRepository &internalNodes, PersistentRepository &leafNodes, SharedMemoryAllocator &allocator); #else bool verify(Repository &internalNodes, Repository &leafNodes); @@ -390,7 +390,7 @@ namespace NGT { #endif }; - + class LeafNode : public Node { public: #if defined(NGT_SHARED_MEMORY_ALLOCATOR) @@ -441,7 +441,7 @@ namespace NGT { #ifdef NGT_SHARED_MEMORY_ALLOCATOR #ifndef NGT_NODE_USE_VECTOR NGT::ObjectDistance *getObjectIDs(SharedMemoryAllocator &allocator) { - return (NGT::ObjectDistance *)allocator.getAddr(objectIDs); + return (NGT::ObjectDistance *)allocator.getAddr(objectIDs); } #endif #else // NGT_SHARED_MEMORY_ALLOCATOR diff --git a/lib/NGT/ObjectRepository.h b/lib/NGT/ObjectRepository.h index 64334c2..b5933d9 100644 --- a/lib/NGT/ObjectRepository.h +++ b/lib/NGT/ObjectRepository.h @@ -18,11 +18,11 @@ namespace NGT { #ifdef NGT_SHARED_MEMORY_ALLOCATOR - class ObjectRepository : + class ObjectRepository : public PersistentRepository { public: typedef PersistentRepository Parent; - void open(const std::string &smfile, size_t sharedMemorySize) { + void open(const std::string &smfile, size_t sharedMemorySize) { std::string file = smfile; file.append("po"); Parent::open(file, sharedMemorySize); @@ -39,17 +39,17 @@ namespace NGT { Parent::push_back((PersistentObject*)0); } - void serialize(const std::string &ofile, ObjectSpace *ospace) { + void serialize(const std::string &ofile, ObjectSpace *ospace) { std::ofstream objs(ofile); if (!objs.is_open()) { std::stringstream msg; msg << "NGT::ObjectSpace: Cannot open the specified file " << ofile << "."; NGTThrowException(msg); } - Parent::serialize(objs, ospace); + Parent::serialize(objs, ospace); } - void deserialize(const std::string &ifile, ObjectSpace *ospace) { + void deserialize(const std::string &ifile, ObjectSpace *ospace) { assert(ospace != 0); std::ifstream objs(ifile); if (!objs.is_open()) { @@ -60,24 +60,24 @@ namespace NGT { Parent::deserialize(objs, ospace); } - void serializeAsText(const std::string &ofile, ObjectSpace *ospace) { + void serializeAsText(const std::string &ofile, ObjectSpace *ospace) { std::ofstream objs(ofile); if (!objs.is_open()) { std::stringstream msg; msg << "NGT::ObjectSpace: Cannot open the specified file " << ofile << "."; NGTThrowException(msg); } - Parent::serializeAsText(objs, ospace); + Parent::serializeAsText(objs, ospace); } - void deserializeAsText(const std::string &ifile, ObjectSpace *ospace) { + void deserializeAsText(const std::string &ifile, ObjectSpace *ospace) { std::ifstream objs(ifile); if (!objs.is_open()) { std::stringstream msg; msg << "NGT::ObjectSpace: Cannot open the specified file " << ifile << "."; NGTThrowException(msg); } - Parent::deserializeAsText(objs, ospace); + Parent::deserializeAsText(objs, ospace); } void readText(std::istream &is, size_t dataSize = 0) { @@ -130,7 +130,7 @@ namespace NGT { while (getline(is, line)) { lineNo++; if (dataSize > 0 && (dataSize <= size() - prevDataSize)) { - std::cerr << "The size of data reached the specified size. The remaining data in the file are not inserted. " + std::cerr << "The size of data reached the specified size. The remaining data in the file are not inserted. " << dataSize << std::endl; break; } @@ -206,7 +206,7 @@ namespace NGT { NGT::Common::tokenize(textLine, tokens, sep); if (dimension > tokens.size()) { std::stringstream msg; - msg << "ObjectSpace::allocate: too few dimension. " << tokens.size() << ":" << dimension << ". " + msg << "ObjectSpace::allocate: too few dimension. " << tokens.size() << ":" << dimension << ". " << textLine; NGTThrowException(msg); } @@ -214,7 +214,7 @@ namespace NGT { for (idx = 0; idx < dimension; idx++) { if (tokens[idx].size() == 0) { std::stringstream msg; - msg << "ObjectSpace::allocate: an empty value string. " << idx << ":" << tokens.size() << ":" + msg << "ObjectSpace::allocate: an empty value string. " << idx << ":" << tokens.size() << ":" << dimension << ". " << textLine; NGTThrowException(msg); } @@ -236,7 +236,7 @@ namespace NGT { } else { if (dimension != size) { std::stringstream msg; - msg << "ObjectSpace::allocateObject: Fatal error! The specified dimension is invalid. The indexed objects=" + msg << "ObjectSpace::allocateObject: Fatal error! The specified dimension is invalid. The indexed objects=" << dimension << " The specified object=" << size; NGTThrowException(msg); } @@ -301,7 +301,7 @@ namespace NGT { PersistentObject *po = new (objectAllocator) PersistentObject(objectAllocator, paddedByteSize); if (size != 0 && dimension != size) { std::stringstream msg; - msg << "ObjectSpace::allocatePersistentObject: Fatal error! The dimensionality is invalid. The specified dimensionality=" + msg << "ObjectSpace::allocatePersistentObject: Fatal error! The dimensionality is invalid. The specified dimensionality=" << (sparse ? dimension - 1 : dimension) << ". The specified object=" << (sparse ? size - 1 : size) << "."; NGTThrowException(msg); } @@ -340,7 +340,7 @@ namespace NGT { PersistentObject *allocatePersistentObject(T *o, size_t size) { if (size != 0 && dimension != size) { std::stringstream msg; - msg << "ObjectSpace::allocatePersistentObject: Fatal error! The dimensionality is invalid. The specified dimensionality=" + msg << "ObjectSpace::allocatePersistentObject: Fatal error! The dimensionality is invalid. The specified dimensionality=" << (sparse ? dimension - 1 : dimension) << ". The specified object=" << (sparse ? size - 1 : size) << "."; NGTThrowException(msg); } diff --git a/lib/NGT/ObjectSpace.h b/lib/NGT/ObjectSpace.h index 1368ef1..2dee133 100644 --- a/lib/NGT/ObjectSpace.h +++ b/lib/NGT/ObjectSpace.h @@ -29,7 +29,7 @@ namespace NGT { void serialize(std::ofstream &os, ObjectSpace *objspace = 0) { NGT::Serializer::write(os, (std::vector&)*this);} void deserialize(std::ifstream &is, ObjectSpace *objspace = 0) { NGT::Serializer::read(is, (std::vector&)*this);} - void serializeAsText(std::ofstream &os, ObjectSpace *objspace = 0) { + void serializeAsText(std::ofstream &os, ObjectSpace *objspace = 0) { NGT::Serializer::writeAsText(os, size()); os << " "; for (size_t i = 0; i < size(); i++) { @@ -39,7 +39,7 @@ namespace NGT { } void deserializeAsText(std::ifstream &is, ObjectSpace *objspace = 0) { size_t s; - NGT::Serializer::readAsText(is, s); + NGT::Serializer::readAsText(is, s); resize(s); for (size_t i = 0; i < size(); i++) { (*this)[i].deserializeAsText(is); @@ -96,7 +96,7 @@ namespace NGT { PersistentObjectDistances(SharedMemoryAllocator &allocator, NGT::ObjectSpace *os = 0) {} void serialize(std::ofstream &os, ObjectSpace *objectspace = 0) { NGT::Serializer::write(os, (Vector&)*this); } void deserialize(std::ifstream &is, ObjectSpace *objectspace = 0) { NGT::Serializer::read(is, (Vector&)*this); } - void serializeAsText(std::ofstream &os, SharedMemoryAllocator &allocator, ObjectSpace *objspace = 0) { + void serializeAsText(std::ofstream &os, SharedMemoryAllocator &allocator, ObjectSpace *objspace = 0) { NGT::Serializer::writeAsText(os, size()); os << " "; for (size_t i = 0; i < size(); i++) { @@ -221,7 +221,7 @@ namespace NGT { virtual void copy(Object &objecta, Object &objectb) = 0; - virtual void linearSearch(Object &query, double radius, size_t size, + virtual void linearSearch(Object &query, double radius, size_t size, ObjectSpace::ResultSet &results) = 0; virtual const std::type_info &getObjectType() = 0; @@ -301,41 +301,41 @@ namespace NGT { class BaseObject { public: virtual uint8_t &operator[](size_t idx) const = 0; - void serialize(std::ostream &os, ObjectSpace *objectspace = 0) { + void serialize(std::ostream &os, ObjectSpace *objectspace = 0) { assert(objectspace != 0); size_t byteSize = objectspace->getByteSizeOfObject(); - NGT::Serializer::write(os, (uint8_t*)&(*this)[0], byteSize); + NGT::Serializer::write(os, (uint8_t*)&(*this)[0], byteSize); } - void deserialize(std::istream &is, ObjectSpace *objectspace = 0) { + void deserialize(std::istream &is, ObjectSpace *objectspace = 0) { assert(objectspace != 0); size_t byteSize = objectspace->getByteSizeOfObject(); assert(&(*this)[0] != 0); - NGT::Serializer::read(is, (uint8_t*)&(*this)[0], byteSize); + NGT::Serializer::read(is, (uint8_t*)&(*this)[0], byteSize); if (is.eof()) { std::stringstream msg; msg << "ObjectSpace::BaseObject: Fatal Error! Read beyond the end of the object file. The object file is corrupted?" << byteSize; NGTThrowException(msg); } } - void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { + void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { assert(objectspace != 0); const std::type_info &t = objectspace->getObjectType(); size_t dimension = objectspace->getDimension(); void *ref = (void*)&(*this)[0]; if (t == typeid(uint8_t)) { - NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension); + NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension); } else if (t == typeid(float)) { - NGT::Serializer::writeAsText(os, (float*)ref, dimension); + NGT::Serializer::writeAsText(os, (float*)ref, dimension); #ifdef NGT_HALF_FLOAT } else if (t == typeid(float16)) { - NGT::Serializer::writeAsText(os, (float16*)ref, dimension); + NGT::Serializer::writeAsText(os, (float16*)ref, dimension); #endif } else if (t == typeid(double)) { - NGT::Serializer::writeAsText(os, (double*)ref, dimension); + NGT::Serializer::writeAsText(os, (double*)ref, dimension); } else if (t == typeid(uint16_t)) { - NGT::Serializer::writeAsText(os, (uint16_t*)ref, dimension); + NGT::Serializer::writeAsText(os, (uint16_t*)ref, dimension); } else if (t == typeid(uint32_t)) { - NGT::Serializer::writeAsText(os, (uint32_t*)ref, dimension); + NGT::Serializer::writeAsText(os, (uint32_t*)ref, dimension); } else { std::cerr << "Object::serializeAsText: not supported data type. [" << t.name() << "]" << std::endl; assert(0); @@ -348,19 +348,19 @@ namespace NGT { void *ref = (void*)&(*this)[0]; assert(ref != 0); if (t == typeid(uint8_t)) { - NGT::Serializer::readAsText(is, (uint8_t*)ref, dimension); + NGT::Serializer::readAsText(is, (uint8_t*)ref, dimension); } else if (t == typeid(float)) { - NGT::Serializer::readAsText(is, (float*)ref, dimension); + NGT::Serializer::readAsText(is, (float*)ref, dimension); #ifdef NGT_HALF_FLOAT } else if (t == typeid(float16)) { - NGT::Serializer::readAsText(is, (float16*)ref, dimension); + NGT::Serializer::readAsText(is, (float16*)ref, dimension); #endif } else if (t == typeid(double)) { - NGT::Serializer::readAsText(is, (double*)ref, dimension); + NGT::Serializer::readAsText(is, (double*)ref, dimension); } else if (t == typeid(uint16_t)) { - NGT::Serializer::readAsText(is, (uint16_t*)ref, dimension); + NGT::Serializer::readAsText(is, (uint16_t*)ref, dimension); } else if (t == typeid(uint32_t)) { - NGT::Serializer::readAsText(is, (uint32_t*)ref, dimension); + NGT::Serializer::readAsText(is, (uint32_t*)ref, dimension); } else { std::cerr << "Object::deserializeAsText: not supported data type. [" << t.name() << "]" << std::endl; assert(0); @@ -483,7 +483,7 @@ namespace NGT { ~PersistentObject() {} - uint8_t &at(size_t idx, SharedMemoryAllocator &allocator) const { + uint8_t &at(size_t idx, SharedMemoryAllocator &allocator) const { uint8_t *a = (uint8_t *)allocator.getAddr(array); return a[idx]; } @@ -496,7 +496,7 @@ namespace NGT { void *getPointer(size_t idx, SharedMemoryAllocator &allocator) { uint8_t *a = (uint8_t *)allocator.getAddr(array); - return a + idx; + return a + idx; } // set v in objectspace to this object using allocator. @@ -504,22 +504,22 @@ namespace NGT { static off_t allocate(ObjectSpace &objectspace); - void serializeAsText(std::ostream &os, SharedMemoryAllocator &allocator, - ObjectSpace *objectspace = 0) { + void serializeAsText(std::ostream &os, SharedMemoryAllocator &allocator, + ObjectSpace *objectspace = 0) { serializeAsText(os, objectspace); } void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0); - void deserializeAsText(std::ifstream &is, SharedMemoryAllocator &allocator, + void deserializeAsText(std::ifstream &is, SharedMemoryAllocator &allocator, ObjectSpace *objectspace = 0) { deserializeAsText(is, objectspace); } void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0); - void serialize(std::ostream &os, SharedMemoryAllocator &allocator, - ObjectSpace *objectspace = 0) { + void serialize(std::ostream &os, SharedMemoryAllocator &allocator, + ObjectSpace *objectspace = 0) { std::cerr << "serialize is not implemented" << std::endl; assert(0); } diff --git a/lib/NGT/ObjectSpaceRepository.h b/lib/NGT/ObjectSpaceRepository.h index 09b01c1..dd00fef 100644 --- a/lib/NGT/ObjectSpaceRepository.h +++ b/lib/NGT/ObjectSpaceRepository.h @@ -26,7 +26,7 @@ class ObjectSpace; namespace NGT { - template + template class ObjectSpaceRepository : public ObjectSpace, public ObjectRepository { public: @@ -330,7 +330,7 @@ namespace NGT { } } - Object *allocateObject(Object &o) { + Object *allocateObject(Object &o) { Object *po = new Object(getByteSizeOfObject()); for (size_t i = 0; i < getByteSizeOfObject(); i++) { (*po)[i] = o[i]; @@ -359,7 +359,7 @@ namespace NGT { delete comparator; } assert(ObjectSpace::dimension != 0); - distanceType = t; + distanceType = t; switch (distanceType) { #ifdef NGT_SHARED_MEMORY_ALLOCATOR case DistanceTypeL1: @@ -691,26 +691,26 @@ namespace NGT { return allocator.getOffset(new(allocator) PersistentObject(allocator, &objectspace)); } - inline void PersistentObject::serializeAsText(std::ostream &os, ObjectSpace *objectspace) { + inline void PersistentObject::serializeAsText(std::ostream &os, ObjectSpace *objectspace) { assert(objectspace != 0); SharedMemoryAllocator &allocator = objectspace->getRepository().getAllocator(); const std::type_info &t = objectspace->getObjectType(); void *ref = &(*this).at(0, allocator); size_t dimension = objectspace->getDimension(); if (t == typeid(uint8_t)) { - NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension); + NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension); } else if (t == typeid(float)) { - NGT::Serializer::writeAsText(os, (float*)ref, dimension); + NGT::Serializer::writeAsText(os, (float*)ref, dimension); #ifdef NGT_HALF_FLOAT } else if (t == typeid(float16)) { - NGT::Serializer::writeAsText(os, (float16*)ref, dimension); + NGT::Serializer::writeAsText(os, (float16*)ref, dimension); #endif } else if (t == typeid(double)) { - NGT::Serializer::writeAsText(os, (double*)ref, dimension); + NGT::Serializer::writeAsText(os, (double*)ref, dimension); } else if (t == typeid(uint16_t)) { - NGT::Serializer::writeAsText(os, (uint16_t*)ref, dimension); + NGT::Serializer::writeAsText(os, (uint16_t*)ref, dimension); } else if (t == typeid(uint32_t)) { - NGT::Serializer::writeAsText(os, (uint32_t*)ref, dimension); + NGT::Serializer::writeAsText(os, (uint32_t*)ref, dimension); } else { std::cerr << "ObjectT::serializeAsText: not supported data type. [" << t.name() << "]" << std::endl; assert(0); @@ -725,19 +725,19 @@ namespace NGT { void *ref = &(*this).at(0, allocator); assert(ref != 0); if (t == typeid(uint8_t)) { - NGT::Serializer::readAsText(is, (uint8_t*)ref, dimension); + NGT::Serializer::readAsText(is, (uint8_t*)ref, dimension); } else if (t == typeid(float)) { - NGT::Serializer::readAsText(is, (float*)ref, dimension); + NGT::Serializer::readAsText(is, (float*)ref, dimension); #ifdef NGT_HALF_FLOAT } else if (t == typeid(float16)) { - NGT::Serializer::readAsText(is, (float16*)ref, dimension); + NGT::Serializer::readAsText(is, (float16*)ref, dimension); #endif } else if (t == typeid(double)) { - NGT::Serializer::readAsText(is, (double*)ref, dimension); + NGT::Serializer::readAsText(is, (double*)ref, dimension); } else if (t == typeid(uint16_t)) { - NGT::Serializer::readAsText(is, (uint16_t*)ref, dimension); + NGT::Serializer::readAsText(is, (uint16_t*)ref, dimension); } else if (t == typeid(uint32_t)) { - NGT::Serializer::readAsText(is, (uint32_t*)ref, dimension); + NGT::Serializer::readAsText(is, (uint32_t*)ref, dimension); } else { std::cerr << "Object::deserializeAsText: not supported data type. [" << t.name() << "]" << std::endl; assert(0); diff --git a/lib/NGT/Optimizer.h b/lib/NGT/Optimizer.h index f58a03b..e80a423 100644 --- a/lib/NGT/Optimizer.h +++ b/lib/NGT/Optimizer.h @@ -25,7 +25,7 @@ namespace NGT { class Optimizer { public: - Optimizer(NGT::Index &i, size_t n = 10):index(i), nOfResults(n) { + Optimizer(NGT::Index &i, size_t n = 10):index(i), nOfResults(n) { } ~Optimizer() {} @@ -113,12 +113,12 @@ namespace NGT { resultStream.seekg(0, std::ios_base::beg); std::string type; size_t actualResultSize = 0; - gtStream.seekg(0, std::ios_base::end); + gtStream.seekg(0, std::ios_base::end); auto pos = gtStream.tellg(); if (pos == 0) { acc = evaluate(resultStream, type, actualResultSize); } else { - SumupValues sumupValues(true); + SumupValues sumupValues(true); gtStream.clear(); gtStream.seekg(0, std::ios_base::beg); acc = evaluate(gtStream, resultStream, sumupValues, type, actualResultSize); @@ -128,7 +128,7 @@ namespace NGT { } static std::vector - evaluate(std::istream &resultStream, std::string &type, + evaluate(std::istream &resultStream, std::string &type, size_t &resultDataSize, size_t specifiedResultSize = 0, size_t groundTruthSize = 0, bool recall = false) { @@ -165,7 +165,7 @@ namespace NGT { } static std::vector - evaluate(std::istream >Stream, std::istream &resultStream, std::string &type, + evaluate(std::istream >Stream, std::istream &resultStream, std::string &type, size_t &resultDataSize, size_t specifiedResultSize = 0, size_t groundTruthSize = 0, bool recall = false, bool approximateDistance = false) { SumupValues sumupValues; @@ -173,7 +173,7 @@ namespace NGT { } static std::vector - evaluate(std::istream >Stream, std::istream &resultStream, SumupValues &sumupValues, std::string &type, + evaluate(std::istream >Stream, std::istream &resultStream, SumupValues &sumupValues, std::string &type, size_t &resultDataSize, size_t specifiedResultSize = 0, size_t groundTruthSize = 0, bool recall = false, bool approximateDistance = false) { resultDataSize = 0; @@ -258,7 +258,7 @@ namespace NGT { if (dataCount > resultDataSize) { continue; } - std::vector result; + std::vector result; NGT::Common::tokenize(line, result, " \t"); if (result.size() < 3) { std::stringstream msg; @@ -274,7 +274,7 @@ namespace NGT { msg << "Cannot insert id into the gt. " << id; NGTThrowException(msg); } - } + } } static void checkAndGetSize(std::istream &resultStream, size_t &resultDataSize) @@ -316,7 +316,7 @@ namespace NGT { continue; } lastDataLine = line; - std::vector result; + std::vector result; NGT::Common::tokenize(line, result, " \t"); if (result.size() < 3) { std::stringstream msg; @@ -342,8 +342,8 @@ namespace NGT { } } - static void sumup(std::istream &resultStream, - size_t queryNo, + static void sumup(std::istream &resultStream, + size_t queryNo, SumupValues &sumupValues, std::unordered_set >, const size_t resultDataSize, @@ -364,7 +364,7 @@ namespace NGT { size_t relevantCount = 0; size_t dataCount = 0; std::string epsilon; - std::string expansion; + std::string expansion; double queryTime = 0.0; size_t distanceCount = 0; size_t visitCount = 0; @@ -480,10 +480,10 @@ namespace NGT { totalDistance = 0.0; relevantCount = 0; dataCount = 0; - } + } continue; - } - std::vector result; + } + std::vector result; NGT::Common::tokenize(line, result, " \t"); if (result.size() < 3) { std::cerr << "result format is wrong. " << std::endl; @@ -510,18 +510,18 @@ namespace NGT { std::cerr << "inner error! $rank $dataCount !!" << std::endl;; abort(); } - } - } else { + } + } else { std::cerr << "Fatal error! : Cannot find query No. " << queryNo << std::endl; abort(); - } - } - } - } + } + } + } + } } - static void exploreEpsilonForAccuracy(NGT::Index &index, std::istream &queries, std::istream >Stream, - Command::SearchParameters &sp, std::pair accuracyRange, double margin) + static void exploreEpsilonForAccuracy(NGT::Index &index, std::istream &queries, std::istream >Stream, + Command::SearchParameters &sp, std::pair accuracyRange, double margin) { double fromUnder = 0.0; double fromOver = 1.0; @@ -544,7 +544,7 @@ namespace NGT { float epsilonStep = 0.02; size_t count; for (count = 0;; count++) { - float epsilon = round((startEpsilon + epsilonStep * count) * 100.0F) / 100.0F; + float epsilon = round((startEpsilon + epsilonStep * count) * 100.0F) / 100.0F; if (epsilon > 0.25F) { std::stringstream msg; msg << "exploreEpsilonForAccuracy:" << std::endl; @@ -571,7 +571,7 @@ namespace NGT { if (fromOver < accuracyRangeTo) { startEpsilon = fromOverEpsilon; for (count = 0;; count++) { - float epsilon = round((startEpsilon + epsilonStep * count) * 100.0F) / 100.0F; + float epsilon = round((startEpsilon + epsilonStep * count) * 100.0F) / 100.0F; sp.beginOfEpsilon = sp.endOfEpsilon = toOverEpsilon = epsilon; if (epsilon > 0.25F) { std::stringstream msg; @@ -728,11 +728,11 @@ namespace NGT { err.getMessage().find("is too large") != std::string::npos) { std::cerr << "Warning: Cannot adjust the base edge size." << err.what() << std::endl; std::cerr << "Try again with the next base" << std::endl; - NGTThrowException("**Retry**"); + NGTThrowException("**Retry**"); } if (margin > 0.4) { std::cerr << "Warning: Cannot adjust the base even for the widest margin " << margin << ". " << err.what(); - NGTThrowException("**Retry**"); + NGTThrowException("**Retry**"); } else { std::cerr << "Warning: Cannot adjust the base edge size for margin " << margin << ". " << err.what() << std::endl; std::cerr << "Try again for the next margin." << std::endl; @@ -832,7 +832,7 @@ namespace NGT { } if (margin > 0.4) { std::cerr << "Error: Cannot adjust the rate even for the widest margin " << margin << ". " << err.what(); - NGTThrowException("**Retry**"); + NGTThrowException("**Retry**"); } else { std::cerr << "Warning: Cannot adjust the rate of edge size for margin " << margin << ". " << err.what() << std::endl; std::cerr << "Try again for the next margin." << std::endl; @@ -974,7 +974,7 @@ namespace NGT { size_t querySize = args.getl("q", 100); size_t nOfResults = args.getl("n", 10); - std::cerr << "adjustRateSearchEdgeSize: range= " << baseAccuracyRange.first << "-" << baseAccuracyRange.second + std::cerr << "adjustRateSearchEdgeSize: range= " << baseAccuracyRange.first << "-" << baseAccuracyRange.second << "," << rateAccuracyRange.first << "-" << rateAccuracyRange.second << std::endl; std::cerr << "adjustRateSearchEdgeSize: # of queries=" << querySize << std::endl; @@ -1254,8 +1254,8 @@ namespace NGT { NGT::Command::search(index, searchParameters, queries, gtStream); } - static int - calculateMeanValues(std::vector &accuracies, double accuracyRangeFrom, double accuracyRangeTo, + static int + calculateMeanValues(std::vector &accuracies, double accuracyRangeFrom, double accuracyRangeTo, size_t &size, double &meanDistanceCount, double &meanVisitCount, double &meanTime) { int stat = 0; size = 0; @@ -1302,11 +1302,11 @@ namespace NGT { } { size_t last = distance.size() - 1; - double xfrom = (distance[1].second * distance[0].first - distance[0].second * distance[1].first + - accuracyRangeFrom * (distance[1].first - distance[0].first)) / + double xfrom = (distance[1].second * distance[0].first - distance[0].second * distance[1].first + + accuracyRangeFrom * (distance[1].first - distance[0].first)) / (distance[1].second - distance[0].second); - double xto = (distance[last].second * distance[last - 1].first - distance[last - 1].second * distance[last].first + - accuracyRangeTo * (distance[last].first - distance[last - 1].first)) / + double xto = (distance[last].second * distance[last - 1].first - distance[last - 1].second * distance[last].first + + accuracyRangeTo * (distance[last].first - distance[last - 1].first)) / (distance[last].second - distance[last - 1].second); distance[0].first = xfrom; distance[0].second = accuracyRangeFrom; @@ -1320,11 +1320,11 @@ namespace NGT { } { size_t last = visit.size() - 1; - double xfrom = (visit[1].second * visit[0].first - visit[0].second * visit[1].first + - accuracyRangeFrom * (visit[1].first - visit[0].first)) / + double xfrom = (visit[1].second * visit[0].first - visit[0].second * visit[1].first + + accuracyRangeFrom * (visit[1].first - visit[0].first)) / (visit[1].second - visit[0].second); - double xto = (visit[last].second * visit[last - 1].first - visit[last - 1].second * visit[last].first + - accuracyRangeTo * (visit[last].first - visit[last - 1].first)) / + double xto = (visit[last].second * visit[last - 1].first - visit[last - 1].second * visit[last].first + + accuracyRangeTo * (visit[last].first - visit[last - 1].first)) / (visit[last].second - visit[last - 1].second); visit[0].first = xfrom; visit[0].second = accuracyRangeFrom; @@ -1338,11 +1338,11 @@ namespace NGT { } { size_t last = time.size() - 1; - double xfrom = (time[1].second * time[0].first - time[0].second * time[1].first + - accuracyRangeFrom * (time[1].first - time[0].first)) / + double xfrom = (time[1].second * time[0].first - time[0].second * time[1].first + + accuracyRangeFrom * (time[1].first - time[0].first)) / (time[1].second - time[0].second); - double xto = (time[last].second * time[last - 1].first - time[last - 1].second * time[last].first + - accuracyRangeTo * (time[last].first - time[last - 1].first)) / + double xto = (time[last].second * time[last - 1].first - time[last - 1].second * time[last].first + + accuracyRangeTo * (time[last].first - time[last - 1].first)) / (time[last].second - time[last - 1].second); time[0].first = xfrom; time[0].second = accuracyRangeFrom; @@ -1428,13 +1428,13 @@ namespace NGT { if (omode == 'd') { std::cout << "# of computations\t# of visted nodes" << std::endl; for (auto it = accuracies.begin(); it != accuracies.end(); ++it) { - std::cout << (*it).keyValue << "\t" << (*it).totalCount << "\t" << (*it).meanAccuracy << "\t" + std::cout << (*it).keyValue << "\t" << (*it).totalCount << "\t" << (*it).meanAccuracy << "\t" << (*it).meanDistanceCount << "\t" << (*it).meanVisitCount << std::endl; } } else { std::cout << "Time(msec)\t# of computations\t# of visted nodes" << std::endl; for (auto it = accuracies.begin(); it != accuracies.end(); ++it) { - std::cout << (*it).keyValue << "\t" << (*it).totalCount << "\t" << (*it).meanAccuracy << "\t" << (*it).meanTime << "\t" + std::cout << (*it).keyValue << "\t" << (*it).totalCount << "\t" << (*it).meanAccuracy << "\t" << (*it).meanTime << "\t" << (*it).meanDistanceCount << "\t" << (*it).meanVisitCount << std::endl; } } @@ -1488,21 +1488,21 @@ namespace NGT { if (e == 0.0) { time = timer.time; } - if (timer.time > time * 40.0) { + if (timer.time > time * 40.0) { maxEpsilon = e; break; } if (identity) { identityCount++; step *= 1.2; - if (identityCount > 5) { + if (identityCount > 5) { maxEpsilon = e; break; } } else { identityCount = 0; } - } + } for (auto i = queryObjects.begin(); i != queryObjects.end(); ++i) { index.deleteObject(*i); @@ -1522,7 +1522,7 @@ namespace NGT { } } - static std::vector> + static std::vector> generateAccuracyTable(NGT::Index &index, size_t nOfResults = 50, size_t querySize = 100) { NGT::Property prop; @@ -1569,7 +1569,7 @@ namespace NGT { if (accuracy - prev < 0.02) { interval *= 2.0; } else if (accuracy - prev > 0.05 && interval > 0.0001) { - + epsilon -= interval; interval /= 2.0; accuracy = prev; diff --git a/lib/NGT/PrimitiveComparator.h b/lib/NGT/PrimitiveComparator.h index fc41ef5..1a4c6cc 100644 --- a/lib/NGT/PrimitiveComparator.h +++ b/lib/NGT/PrimitiveComparator.h @@ -109,7 +109,7 @@ namespace NGT { static int absolute(int v) { return abs(v); } #if defined(NGT_NO_AVX) - template + template inline static double compareL2(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { const OBJECT_TYPE *last = a + size; const OBJECT_TYPE *lastgroup = last - 3; @@ -207,7 +207,7 @@ namespace NGT { #if defined(NGT_AVX512) __m512 sum512 = _mm512_setzero_ps(); while (a < last) { - __m512 v = _mm512_sub_ps(_mm512_cvtph_ps(_mm256_loadu_si256(reinterpret_cast(a))), + __m512 v = _mm512_sub_ps(_mm512_cvtph_ps(_mm256_loadu_si256(reinterpret_cast(a))), _mm512_cvtph_ps(_mm256_loadu_si256(reinterpret_cast(b)))); sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v, v)); a += 16; @@ -220,12 +220,12 @@ namespace NGT { __m256 sum256 = _mm256_setzero_ps(); __m256 v; while (a < last) { - v = _mm256_sub_ps(_mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(a))), + v = _mm256_sub_ps(_mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(a))), _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(b)))); sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v, v)); a += 8; b += 8; - v = _mm256_sub_ps(_mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(a))), + v = _mm256_sub_ps(_mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(a))), _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(b)))); sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v, v)); a += 8; @@ -254,7 +254,7 @@ namespace NGT { double s = f[0] + f[1] + f[2] + f[3]; return sqrt(s); } -#endif +#endif inline static double compareL2(const unsigned char *a, const unsigned char *b, size_t size) { __m128 sum = _mm_setzero_ps(); @@ -282,7 +282,7 @@ namespace NGT { } #endif - template + template inline static double compareNormalizedL2(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { double v = 2.0 - 2.0 * compareDotProduct(a, b, size); if (v < 0.0) { @@ -294,7 +294,7 @@ namespace NGT { #if defined(NGT_NO_AVX) - template + template static double compareL1(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { const OBJECT_TYPE *last = a + size; const OBJECT_TYPE *lastgroup = last - 3; @@ -356,7 +356,7 @@ namespace NGT { const float16 *last = a + size; const float16 *lastgroup = last - 7; while (a < lastgroup) { - __m256 x1 = _mm256_sub_ps(_mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(a))), + __m256 x1 = _mm256_sub_ps(_mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(a))), _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(b)))); const __m256 mask = _mm256_set1_ps(-0.0f); __m256 v = _mm256_andnot_ps(mask, x1); @@ -401,7 +401,7 @@ namespace NGT { } #endif -#if defined(NGT_NO_AVX) || !defined(__POPCNT__) +#if defined(NGT_NO_AVX) || !defined(__POPCNT__) inline static double popCount(uint32_t x) { x = (x & 0x55555555) + (x >> 1 & 0x55555555); x = (x & 0x33333333) + (x >> 2 & 0x33333333); @@ -411,7 +411,7 @@ namespace NGT { return x; } - template + template inline static double compareHammingDistance(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { const uint32_t *last = reinterpret_cast(a + size); @@ -441,7 +441,7 @@ namespace NGT { } #endif -#if defined(NGT_NO_AVX) || !defined(__POPCNT__) +#if defined(NGT_NO_AVX) || !defined(__POPCNT__) template inline static double compareJaccardDistance(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { const uint32_t *last = reinterpret_cast(a + size); @@ -510,11 +510,11 @@ namespace NGT { while (locb < size && bi[locb] != 0) { locb++; } - return 1.0 - static_cast(count) / static_cast(loca + locb - count); + return 1.0 - static_cast(count) / static_cast(loca + locb - count); } #if defined(NGT_NO_AVX) - template + template inline static double compareDotProduct(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { double sum = 0.0; for (size_t loc = 0; loc < size; loc++) { @@ -523,7 +523,7 @@ namespace NGT { return sum; } - template + template inline static double compareCosine(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { double normA = 0.0; double normB = 0.0; @@ -538,7 +538,7 @@ namespace NGT { return cosine; } -#else +#else inline static double compareDotProduct(const float *a, const float *b, size_t size) { const float *last = a + size; #if defined(NGT_AVX512) @@ -613,7 +613,7 @@ namespace NGT { double s = static_cast(f[0]) + static_cast(f[1]) + static_cast(f[2]) + static_cast(f[3]); return s; } -#endif +#endif inline static double compareDotProduct(const unsigned char *a, const unsigned char *b, size_t size) { double sum = 0.0; @@ -768,7 +768,7 @@ namespace NGT { double cosine = s / sqrt(na * nb); return cosine; } -#endif +#endif inline static double compareCosine(const unsigned char *a, const unsigned char *b, size_t size) { double normA = 0.0; @@ -786,7 +786,7 @@ namespace NGT { } #endif // #if defined(NGT_NO_AVX) - template + template inline static double compareAngleDistance(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { double cosine = compareCosine(a, b, size); if (cosine >= 1.0) { @@ -798,7 +798,7 @@ namespace NGT { } } - template + template inline static double compareNormalizedAngleDistance(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { double cosine = compareDotProduct(a, b, size); if (cosine >= 1.0) { @@ -811,7 +811,7 @@ namespace NGT { } // added by Nyapicom - template + template inline static double comparePoincareDistance(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { // Unlike the other distance functions, this is not optimized... double a2 = 0.0; @@ -825,7 +825,7 @@ namespace NGT { } // added by Nyapicom - template + template inline static double compareLorentzDistance(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { // Unlike the other distance functions, this is not optimized... double sum = static_cast(a[0]) * static_cast(b[0]); @@ -835,13 +835,13 @@ namespace NGT { return std::acosh(sum); } - template + template inline static double compareCosineSimilarity(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { auto v = 1.0 - compareCosine(a, b, size); return v < 0.0 ? -v : v; } - template + template inline static double compareNormalizedCosineSimilarity(const OBJECT_TYPE *a, const OBJECT_TYPE *b, size_t size) { auto v = 1.0 - compareDotProduct(a, b, size); return v < 0.0 ? -v : v; @@ -1019,7 +1019,7 @@ namespace NGT { return PrimitiveComparator::compareLorentzDistance((const float16*)a, (const float16*)b, size); } }; -#endif +#endif }; diff --git a/lib/NGT/SharedMemoryAllocator.cpp b/lib/NGT/SharedMemoryAllocator.cpp index 8058923..f4d06c4 100644 --- a/lib/NGT/SharedMemoryAllocator.cpp +++ b/lib/NGT/SharedMemoryAllocator.cpp @@ -18,8 +18,8 @@ -void* operator -new(size_t size, SharedMemoryAllocator &allocator) +void* operator +new(size_t size, SharedMemoryAllocator &allocator) { void *addr = allocator.allocate(size); #ifdef MEMORY_ALLOCATOR_INFO @@ -28,8 +28,8 @@ new(size_t size, SharedMemoryAllocator &allocator) return addr; } -void* operator -new[](size_t size, SharedMemoryAllocator &allocator) +void* operator +new[](size_t size, SharedMemoryAllocator &allocator) { void *addr = allocator.allocate(size); diff --git a/lib/NGT/SharedMemoryAllocator.h b/lib/NGT/SharedMemoryAllocator.h index 9d838c9..e7441a3 100644 --- a/lib/NGT/SharedMemoryAllocator.h +++ b/lib/NGT/SharedMemoryAllocator.h @@ -41,13 +41,13 @@ class SharedMemoryAllocator { GetFreedMemorySize = 2 }; - SharedMemoryAllocator():isValid(false) { + SharedMemoryAllocator():isValid(false) { #ifdef SMA_TRACE - std::cerr << "SharedMemoryAllocatorSiglton::constructor" << std::endl; + std::cerr << "SharedMemoryAllocatorSiglton::constructor" << std::endl; #endif } SharedMemoryAllocator(const SharedMemoryAllocator& a){} - SharedMemoryAllocator& operator=(const SharedMemoryAllocator& a){ return *this; } + SharedMemoryAllocator& operator=(const SharedMemoryAllocator& a){ return *this; } public: void* allocate(size_t size) { if (isValid == false) { @@ -142,8 +142,8 @@ class SharedMemoryAllocator { #endif isValid = true; #ifdef SMA_TRACE - std::cerr << "SharedMemoryAllocator::construct: " << filePath << " total=" - << getTotalSize() << " allocated=" << getAllocatedSize() << " freed=" + std::cerr << "SharedMemoryAllocator::construct: " << filePath << " total=" + << getTotalSize() << " allocated=" << getAllocatedSize() << " freed=" << getFreedSize() << " (" << (double)getFreedSize() / (double)getTotalSize() << ") " << std::endl; #endif return hook; @@ -163,23 +163,23 @@ class SharedMemoryAllocator { mmanager->setEntryHook(entry); #endif } - void *getAddr(off_t oft) { + void *getAddr(off_t oft) { if (oft == 0) { return 0; } assert(oft > 0); #if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) - return mmanager->getAbsAddr(oft); + return mmanager->getAbsAddr(oft); #else return (void*)oft; #endif } - off_t getOffset(void *adr) { + off_t getOffset(void *adr) { if (adr == 0) { return 0; } #if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) - return mmanager->getRelAddr(adr); + return mmanager->getRelAddr(adr); #else return (off_t)adr; #endif diff --git a/lib/NGT/Tree.cpp b/lib/NGT/Tree.cpp index fa09620..2037393 100644 --- a/lib/NGT/Tree.cpp +++ b/lib/NGT/Tree.cpp @@ -43,7 +43,7 @@ DVPTree::insert(InsertContainer &iobj) { } void -DVPTree::insert(InsertContainer &iobj, LeafNode *leafNode) +DVPTree::insert(InsertContainer &iobj, LeafNode *leafNode) { LeafNode &leaf = *leafNode; size_t fsize = leaf.getObjectSize(); @@ -94,7 +94,7 @@ DVPTree::insert(InsertContainer &iobj, LeafNode *leafNode) return; } -Node::ID +Node::ID DVPTree::split(InsertContainer &iobj, LeafNode &leaf) { Node::Objects *fs = getObjects(leaf, iobj); @@ -527,7 +527,7 @@ DVPTree::search(SearchContainer &so, LeafNode &node, UncheckedNode &uncheckedNod } } -void +void DVPTree::search(SearchContainer &sc) { ((SearchContainer&)sc).vptree = this; Node *root = getRootNode(); diff --git a/lib/NGT/Tree.h b/lib/NGT/Tree.h index 5960428..5562dba 100644 --- a/lib/NGT/Tree.h +++ b/lib/NGT/Tree.h @@ -140,7 +140,7 @@ namespace NGT { void search(SearchContainer &so, LeafNode &node, UncheckedNode &uncheckedNode); bool searchObject(ObjectID id) { - LeafNode &ln = getLeaf(id); + LeafNode &ln = getLeaf(id); for (size_t i = 0; i < ln.getObjectSize(); i++) { #if defined(NGT_SHARED_MEMORY_ALLOCATOR) if (ln.getObjectIDs(leafNodes.allocator)[i].id == id) { @@ -250,7 +250,7 @@ namespace NGT { if (id.getType() == Node::ID::Leaf) { leafNodes.remove(idx); } else { - internalNodes.remove(idx); + internalNodes.remove(idx); } } @@ -292,7 +292,7 @@ namespace NGT { } void - insertNode(InternalNode *n) + insertNode(InternalNode *n) { size_t id = internalNodes.insert(n); n->id.setID(id); diff --git a/lib/NGT/Version.cpp b/lib/NGT/Version.cpp index d2e4453..137d900 100644 --- a/lib/NGT/Version.cpp +++ b/lib/NGT/Version.cpp @@ -17,7 +17,7 @@ #include "NGT/Version.h" void -NGT::Version::get(std::ostream &os) +NGT::Version::get(std::ostream &os) { os << " Version:" << NGT::Version::getVersion() << std::endl; os << " Built date:" << NGT::Version::getBuildDate() << std::endl; @@ -26,33 +26,33 @@ NGT::Version::get(std::ostream &os) os << " The last git commit date:" << Version::getGitDate() << std::endl; } -const std::string -NGT::Version::getVersion() +const std::string +NGT::Version::getVersion() { - return NGT_VERSION; + return NGT_VERSION; } -const std::string -NGT::Version::getBuildDate() +const std::string +NGT::Version::getBuildDate() { - return NGT_BUILD_DATE; + return NGT_BUILD_DATE; } -const std::string -NGT::Version::getGitHash() -{ - return NGT_GIT_HASH; +const std::string +NGT::Version::getGitHash() +{ + return NGT_GIT_HASH; } -const std::string -NGT::Version::getGitDate() -{ +const std::string +NGT::Version::getGitDate() +{ return NGT_GIT_DATE; } -const std::string +const std::string NGT::Version::getGitTag() -{ - return NGT_GIT_TAG; +{ + return NGT_GIT_TAG; } diff --git a/lib/NGT/half.hpp b/lib/NGT/half.hpp index 31321bb..bc5798d 100644 --- a/lib/NGT/half.hpp +++ b/lib/NGT/half.hpp @@ -1,305 +1,305 @@ -// half - IEEE 754-based half-precision floating-point library. -// -// Copyright (c) 2012-2021 Christian Rau -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -// Version 2.2.0 +// half - IEEE 754-based half-precision floating-point library. +// +// Copyright (c) 2012-2021 Christian Rau +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +// Version 2.2.0 -#ifndef HALF_HALF_HPP -#define HALF_HALF_HPP - -#define HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__) - -#if defined(__INTEL_COMPILER) - #define HALF_ICC_VERSION __INTEL_COMPILER -#elif defined(__ICC) - #define HALF_ICC_VERSION __ICC -#elif defined(__ICL) - #define HALF_ICC_VERSION __ICL -#else - #define HALF_ICC_VERSION 0 -#endif - -// check C++11 language features -#if defined(__clang__) // clang - #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if __has_feature(cxx_thread_local) && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) - #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 - #endif - #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif -#elif HALF_ICC_VERSION && defined(__INTEL_CXX11_MODE__) // Intel C++ - #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) - #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 - #endif - #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif -#elif defined(__GNUC__) // gcc - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L - #if HALF_GCC_VERSION >= 408 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) - #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 - #endif - #if HALF_GCC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif - #endif - #define HALF_TWOS_COMPLEMENT_INT 1 -#elif defined(_MSC_VER) // Visual C++ - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) - #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif - #define HALF_TWOS_COMPLEMENT_INT 1 - #define HALF_POP_WARNINGS 1 - #pragma warning(push) - #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned -#endif - -// check C++11 library features -#include -#if defined(_LIBCPP_VERSION) // libc++ - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 - #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #ifndef HALF_ENABLE_CPP11_CSTDINT - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #ifndef HALF_ENABLE_CPP11_CMATH - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #ifndef HALF_ENABLE_CPP11_HASH - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #ifndef HALF_ENABLE_CPP11_CFENV - #define HALF_ENABLE_CPP11_CFENV 1 - #endif - #endif -#elif defined(__GLIBCXX__) // libstdc++ - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 - #ifdef __clang__ - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CFENV) - #define HALF_ENABLE_CPP11_CFENV 1 - #endif - #else - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CFENV) - #define HALF_ENABLE_CPP11_CFENV 1 - #endif - #endif - #endif -#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ - #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CFENV) - #define HALF_ENABLE_CPP11_CFENV 1 - #endif -#endif -#undef HALF_GCC_VERSION -#undef HALF_ICC_VERSION - -// any error throwing C++ exceptions? -#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT) -#define HALF_ERRHANDLING_THROWS 1 -#endif - -// any error handling enabled? -#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS) - -#if HALF_ERRHANDLING - #define HALF_UNUSED_NOERR(name) name -#else - #define HALF_UNUSED_NOERR(name) -#endif - -// support constexpr -#if HALF_ENABLE_CPP11_CONSTEXPR - #define HALF_CONSTEXPR constexpr - #define HALF_CONSTEXPR_CONST constexpr - #if HALF_ERRHANDLING - #define HALF_CONSTEXPR_NOERR - #else - #define HALF_CONSTEXPR_NOERR constexpr - #endif -#else - #define HALF_CONSTEXPR - #define HALF_CONSTEXPR_CONST const - #define HALF_CONSTEXPR_NOERR -#endif - -// support noexcept -#if HALF_ENABLE_CPP11_NOEXCEPT - #define HALF_NOEXCEPT noexcept - #define HALF_NOTHROW noexcept -#else - #define HALF_NOEXCEPT - #define HALF_NOTHROW throw() -#endif - -// support thread storage -#if HALF_ENABLE_CPP11_THREAD_LOCAL - #define HALF_THREAD_LOCAL thread_local -#else - #define HALF_THREAD_LOCAL static -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if HALF_ENABLE_CPP11_TYPE_TRAITS - #include -#endif -#if HALF_ENABLE_CPP11_CSTDINT - #include -#endif -#if HALF_ERRHANDLING_ERRNO - #include -#endif -#if HALF_ENABLE_CPP11_CFENV - #include -#endif -#if HALF_ENABLE_CPP11_HASH - #include -#endif - - -#ifndef HALF_ENABLE_F16C_INTRINSICS - - - - - - - #define HALF_ENABLE_F16C_INTRINSICS __F16C__ -#endif -#if HALF_ENABLE_F16C_INTRINSICS - #include -#endif - -#ifdef HALF_DOXYGEN_ONLY +#ifndef HALF_HALF_HPP +#define HALF_HALF_HPP + +#define HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__) + +#if defined(__INTEL_COMPILER) + #define HALF_ICC_VERSION __INTEL_COMPILER +#elif defined(__ICC) + #define HALF_ICC_VERSION __ICC +#elif defined(__ICL) + #define HALF_ICC_VERSION __ICL +#else + #define HALF_ICC_VERSION 0 +#endif + +// check C++11 language features +#if defined(__clang__) // clang + #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if __has_feature(cxx_thread_local) && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif +#elif HALF_ICC_VERSION && defined(__INTEL_CXX11_MODE__) // Intel C++ + #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if HALF_ICC_VERSION >= 1500 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if HALF_ICC_VERSION >= 1400 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if HALF_ICC_VERSION >= 1110 && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif +#elif defined(__GNUC__) // gcc + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L + #if HALF_GCC_VERSION >= 408 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if HALF_GCC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if HALF_GCC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif + #endif + #define HALF_TWOS_COMPLEMENT_INT 1 +#elif defined(_MSC_VER) // Visual C++ + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_THREAD_LOCAL) + #define HALF_ENABLE_CPP11_THREAD_LOCAL 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) + #define HALF_ENABLE_CPP11_USER_LITERALS 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif + #define HALF_TWOS_COMPLEMENT_INT 1 + #define HALF_POP_WARNINGS 1 + #pragma warning(push) + #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned +#endif + +// check C++11 library features +#include +#if defined(_LIBCPP_VERSION) // libc++ + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 + #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #ifndef HALF_ENABLE_CPP11_CSTDINT + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #ifndef HALF_ENABLE_CPP11_CMATH + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #ifndef HALF_ENABLE_CPP11_HASH + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #ifndef HALF_ENABLE_CPP11_CFENV + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #endif +#elif defined(__GLIBCXX__) // libstdc++ + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 + #ifdef __clang__ + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #else + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if HALF_GCC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif + #endif + #endif +#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) + #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 + #endif + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_CSTDINT) + #define HALF_ENABLE_CPP11_CSTDINT 1 + #endif + #if _CPPLIB_VER >= 520 && !defined(HALF_ENABLE_CPP11_HASH) + #define HALF_ENABLE_CPP11_HASH 1 + #endif + #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CMATH) + #define HALF_ENABLE_CPP11_CMATH 1 + #endif + #if _CPPLIB_VER >= 610 && !defined(HALF_ENABLE_CPP11_CFENV) + #define HALF_ENABLE_CPP11_CFENV 1 + #endif +#endif +#undef HALF_GCC_VERSION +#undef HALF_ICC_VERSION + +// any error throwing C++ exceptions? +#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT) +#define HALF_ERRHANDLING_THROWS 1 +#endif + +// any error handling enabled? +#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS) + +#if HALF_ERRHANDLING + #define HALF_UNUSED_NOERR(name) name +#else + #define HALF_UNUSED_NOERR(name) +#endif + +// support constexpr +#if HALF_ENABLE_CPP11_CONSTEXPR + #define HALF_CONSTEXPR constexpr + #define HALF_CONSTEXPR_CONST constexpr + #if HALF_ERRHANDLING + #define HALF_CONSTEXPR_NOERR + #else + #define HALF_CONSTEXPR_NOERR constexpr + #endif +#else + #define HALF_CONSTEXPR + #define HALF_CONSTEXPR_CONST const + #define HALF_CONSTEXPR_NOERR +#endif + +// support noexcept +#if HALF_ENABLE_CPP11_NOEXCEPT + #define HALF_NOEXCEPT noexcept + #define HALF_NOTHROW noexcept +#else + #define HALF_NOEXCEPT + #define HALF_NOTHROW throw() +#endif + +// support thread storage +#if HALF_ENABLE_CPP11_THREAD_LOCAL + #define HALF_THREAD_LOCAL thread_local +#else + #define HALF_THREAD_LOCAL static +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if HALF_ENABLE_CPP11_TYPE_TRAITS + #include +#endif +#if HALF_ENABLE_CPP11_CSTDINT + #include +#endif +#if HALF_ERRHANDLING_ERRNO + #include +#endif +#if HALF_ENABLE_CPP11_CFENV + #include +#endif +#if HALF_ENABLE_CPP11_HASH + #include +#endif + + +#ifndef HALF_ENABLE_F16C_INTRINSICS + + + + + + + #define HALF_ENABLE_F16C_INTRINSICS __F16C__ +#endif +#if HALF_ENABLE_F16C_INTRINSICS + #include +#endif + +#ifdef HALF_DOXYGEN_ONLY -#define HALF_ARITHMETIC_TYPE (undefined) +#define HALF_ARITHMETIC_TYPE (undefined) -#define HALF_ERRHANDLING_FLAGS 0 +#define HALF_ERRHANDLING_FLAGS 0 -#define HALF_ERRHANDLING_ERRNO 0 +#define HALF_ERRHANDLING_ERRNO 0 @@ -307,50 +307,50 @@ -#define HALF_ERRHANDLING_FENV 0 +#define HALF_ERRHANDLING_FENV 0 -#define HALF_ERRHANDLING_THROW_INVALID (undefined) +#define HALF_ERRHANDLING_THROW_INVALID (undefined) -#define HALF_ERRHANDLING_THROW_DIVBYZERO (undefined) +#define HALF_ERRHANDLING_THROW_DIVBYZERO (undefined) -#define HALF_ERRHANDLING_THROW_OVERFLOW (undefined) +#define HALF_ERRHANDLING_THROW_OVERFLOW (undefined) -#define HALF_ERRHANDLING_THROW_UNDERFLOW (undefined) +#define HALF_ERRHANDLING_THROW_UNDERFLOW (undefined) -#define HALF_ERRHANDLING_THROW_INEXACT (undefined) -#endif +#define HALF_ERRHANDLING_THROW_INEXACT (undefined) +#endif -#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT +#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT -#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 -#endif +#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 +#endif -#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT +#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT -#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 -#endif +#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 +#endif @@ -370,170 +370,170 @@ -#ifndef HALF_ROUND_STYLE - #define HALF_ROUND_STYLE 1 // = std::round_to_nearest -#endif +#ifndef HALF_ROUND_STYLE + #define HALF_ROUND_STYLE 1 // = std::round_to_nearest +#endif -#define HUGE_VALH std::numeric_limits::infinity() +#define HUGE_VALH std::numeric_limits::infinity() -#define FP_FAST_FMAH 1 +#define FP_FAST_FMAH 1 -#define HLF_ROUNDS HALF_ROUND_STYLE - -#ifndef FP_ILOGB0 - #define FP_ILOGB0 INT_MIN -#endif -#ifndef FP_ILOGBNAN - #define FP_ILOGBNAN INT_MAX -#endif -#ifndef FP_SUBNORMAL - #define FP_SUBNORMAL 0 -#endif -#ifndef FP_ZERO - #define FP_ZERO 1 -#endif -#ifndef FP_NAN - #define FP_NAN 2 -#endif -#ifndef FP_INFINITE - #define FP_INFINITE 3 -#endif -#ifndef FP_NORMAL - #define FP_NORMAL 4 -#endif - -#if !HALF_ENABLE_CPP11_CFENV && !defined(FE_ALL_EXCEPT) - #define FE_INVALID 0x10 - #define FE_DIVBYZERO 0x08 - #define FE_OVERFLOW 0x04 - #define FE_UNDERFLOW 0x02 - #define FE_INEXACT 0x01 - #define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT) -#endif +#define HLF_ROUNDS HALF_ROUND_STYLE + +#ifndef FP_ILOGB0 + #define FP_ILOGB0 INT_MIN +#endif +#ifndef FP_ILOGBNAN + #define FP_ILOGBNAN INT_MAX +#endif +#ifndef FP_SUBNORMAL + #define FP_SUBNORMAL 0 +#endif +#ifndef FP_ZERO + #define FP_ZERO 1 +#endif +#ifndef FP_NAN + #define FP_NAN 2 +#endif +#ifndef FP_INFINITE + #define FP_INFINITE 3 +#endif +#ifndef FP_NORMAL + #define FP_NORMAL 4 +#endif + +#if !HALF_ENABLE_CPP11_CFENV && !defined(FE_ALL_EXCEPT) + #define FE_INVALID 0x10 + #define FE_DIVBYZERO 0x08 + #define FE_OVERFLOW 0x04 + #define FE_UNDERFLOW 0x02 + #define FE_INEXACT 0x01 + #define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT) +#endif -namespace half_float -{ - class half; +namespace half_float +{ + class half; -#if HALF_ENABLE_CPP11_USER_LITERALS +#if HALF_ENABLE_CPP11_USER_LITERALS - namespace literal - { - half operator "" _h(long double); - } -#endif + namespace literal + { + half operator "" _h(long double); + } +#endif - namespace detail - { - #if HALF_ENABLE_CPP11_TYPE_TRAITS + namespace detail + { + #if HALF_ENABLE_CPP11_TYPE_TRAITS - template struct conditional : std::conditional {}; + template struct conditional : std::conditional {}; - template struct bool_type : std::integral_constant {}; - using std::true_type; - using std::false_type; + template struct bool_type : std::integral_constant {}; + using std::true_type; + using std::false_type; - template struct is_float : std::is_floating_point {}; - #else + template struct is_float : std::is_floating_point {}; + #else - template struct conditional { typedef T type; }; - template struct conditional { typedef F type; }; + template struct conditional { typedef T type; }; + template struct conditional { typedef F type; }; - template struct bool_type {}; - typedef bool_type true_type; - typedef bool_type false_type; + template struct bool_type {}; + typedef bool_type true_type; + typedef bool_type false_type; - template struct is_float : false_type {}; - template struct is_float : is_float {}; - template struct is_float : is_float {}; - template struct is_float : is_float {}; - template<> struct is_float : true_type {}; - template<> struct is_float : true_type {}; - template<> struct is_float : true_type {}; - #endif + template struct is_float : false_type {}; + template struct is_float : is_float {}; + template struct is_float : is_float {}; + template struct is_float : is_float {}; + template<> struct is_float : true_type {}; + template<> struct is_float : true_type {}; + template<> struct is_float : true_type {}; + #endif - template struct bits { typedef unsigned char type; }; - template struct bits : bits {}; - template struct bits : bits {}; - template struct bits : bits {}; + template struct bits { typedef unsigned char type; }; + template struct bits : bits {}; + template struct bits : bits {}; + template struct bits : bits {}; - #if HALF_ENABLE_CPP11_CSTDINT + #if HALF_ENABLE_CPP11_CSTDINT - typedef std::uint_least16_t uint16; + typedef std::uint_least16_t uint16; - typedef std::uint_fast32_t uint32; + typedef std::uint_fast32_t uint32; - typedef std::int_fast32_t int32; + typedef std::int_fast32_t int32; - template<> struct bits { typedef std::uint_least32_t type; }; + template<> struct bits { typedef std::uint_least32_t type; }; - template<> struct bits { typedef std::uint_least64_t type; }; - #else + template<> struct bits { typedef std::uint_least64_t type; }; + #else - typedef unsigned short uint16; + typedef unsigned short uint16; - typedef unsigned long uint32; + typedef unsigned long uint32; - typedef long int32; + typedef long int32; - template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; + template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; - #if HALF_ENABLE_CPP11_LONG_LONG + #if HALF_ENABLE_CPP11_LONG_LONG - template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; - #else + template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; + #else - template<> struct bits { typedef unsigned long type; }; - #endif - #endif + template<> struct bits { typedef unsigned long type; }; + #endif + #endif - #ifdef HALF_ARITHMETIC_TYPE + #ifdef HALF_ARITHMETIC_TYPE - typedef HALF_ARITHMETIC_TYPE internal_t; - #endif + typedef HALF_ARITHMETIC_TYPE internal_t; + #endif - struct binary_t {}; + struct binary_t {}; - HALF_CONSTEXPR_CONST binary_t binary = binary_t(); + HALF_CONSTEXPR_CONST binary_t binary = binary_t(); @@ -543,73 +543,73 @@ namespace half_float - template bool builtin_isinf(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::isinf(arg); - #elif defined(_MSC_VER) - return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); - #else - return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); - #endif - } + template bool builtin_isinf(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::isinf(arg); + #elif defined(_MSC_VER) + return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); + #else + return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); + #endif + } - template bool builtin_isnan(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::isnan(arg); - #elif defined(_MSC_VER) - return ::_isnan(static_cast(arg)) != 0; - #else - return arg != arg; - #endif - } + template bool builtin_isnan(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::isnan(arg); + #elif defined(_MSC_VER) + return ::_isnan(static_cast(arg)) != 0; + #else + return arg != arg; + #endif + } - template bool builtin_signbit(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::signbit(arg); - #else - return arg < T() || (arg == T() && T(1)/arg < T()); - #endif - } + template bool builtin_signbit(T arg) + { + #if HALF_ENABLE_CPP11_CMATH + return std::signbit(arg); + #else + return arg < T() || (arg == T() && T(1)/arg < T()); + #endif + } - inline uint32 sign_mask(uint32 arg) - { - static const int N = std::numeric_limits::digits - 1; - #if HALF_TWOS_COMPLEMENT_INT - return static_cast(arg) >> N; - #else - return -((arg>>N)&1); - #endif - } + inline uint32 sign_mask(uint32 arg) + { + static const int N = std::numeric_limits::digits - 1; + #if HALF_TWOS_COMPLEMENT_INT + return static_cast(arg) >> N; + #else + return -((arg>>N)&1); + #endif + } - inline uint32 arithmetic_shift(uint32 arg, int i) - { - #if HALF_TWOS_COMPLEMENT_INT - return static_cast(arg) >> i; - #else - return static_cast(arg)/(static_cast(1)<>(std::numeric_limits::digits-1))&1); - #endif - } + inline uint32 arithmetic_shift(uint32 arg, int i) + { + #if HALF_TWOS_COMPLEMENT_INT + return static_cast(arg) >> i; + #else + return static_cast(arg)/(static_cast(1)<>(std::numeric_limits::digits-1))&1); + #endif + } @@ -617,58 +617,58 @@ namespace half_float - inline int& errflags() { HALF_THREAD_LOCAL int flags = 0; return flags; } + inline int& errflags() { HALF_THREAD_LOCAL int flags = 0; return flags; } - inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) - { - #if HALF_ERRHANDLING - if(!cond) - return; - #if HALF_ERRHANDLING_FLAGS - errflags() |= flags; - #endif - #if HALF_ERRHANDLING_ERRNO - if(flags & FE_INVALID) - errno = EDOM; - else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW)) - errno = ERANGE; - #endif - #if HALF_ERRHANDLING_FENV && HALF_ENABLE_CPP11_CFENV - std::feraiseexcept(flags); - #endif - #ifdef HALF_ERRHANDLING_THROW_INVALID - if(flags & FE_INVALID) - throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID); - #endif - #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO - if(flags & FE_DIVBYZERO) - throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO); - #endif - #ifdef HALF_ERRHANDLING_THROW_OVERFLOW - if(flags & FE_OVERFLOW) - throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW); - #endif - #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW - if(flags & FE_UNDERFLOW) - throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW); - #endif - #ifdef HALF_ERRHANDLING_THROW_INEXACT - if(flags & FE_INEXACT) - throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT); - #endif - #if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT - if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT)) - raise(FE_INEXACT); - #endif - #if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT - if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT)) - raise(FE_INEXACT); - #endif - #endif - } + inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) + { + #if HALF_ERRHANDLING + if(!cond) + return; + #if HALF_ERRHANDLING_FLAGS + errflags() |= flags; + #endif + #if HALF_ERRHANDLING_ERRNO + if(flags & FE_INVALID) + errno = EDOM; + else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW)) + errno = ERANGE; + #endif + #if HALF_ERRHANDLING_FENV && HALF_ENABLE_CPP11_CFENV + std::feraiseexcept(flags); + #endif + #ifdef HALF_ERRHANDLING_THROW_INVALID + if(flags & FE_INVALID) + throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID); + #endif + #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO + if(flags & FE_DIVBYZERO) + throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO); + #endif + #ifdef HALF_ERRHANDLING_THROW_OVERFLOW + if(flags & FE_OVERFLOW) + throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW); + #endif + #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW + if(flags & FE_UNDERFLOW) + throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW); + #endif + #ifdef HALF_ERRHANDLING_THROW_INEXACT + if(flags & FE_INEXACT) + throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT); + #endif + #if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT + if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT)) + raise(FE_INEXACT); + #endif + #if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT + if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT)) + raise(FE_INEXACT); + #endif + #endif + } @@ -676,38 +676,38 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR bool compsignal(unsigned int x, unsigned int y) - { - #if HALF_ERRHANDLING - raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00); - #endif - return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00; - } + inline HALF_CONSTEXPR_NOERR bool compsignal(unsigned int x, unsigned int y) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00); + #endif + return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00; + } - inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int nan) - { - #if HALF_ERRHANDLING - raise(FE_INVALID, !(nan&0x200)); - #endif - return nan | 0x200; - } + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int nan) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, !(nan&0x200)); + #endif + return nan | 0x200; + } - inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y) - { - #if HALF_ERRHANDLING - raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200))); - #endif - return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200); - } + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200))); + #endif + return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200); + } @@ -715,62 +715,62 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) - { - #if HALF_ERRHANDLING - raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200))); - #endif - return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200); - } + inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) + { + #if HALF_ERRHANDLING + raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200))); + #endif + return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200); + } - inline HALF_CONSTEXPR_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) - { - #if HALF_ERRHANDLING - return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x; - #else - return x; - #endif - } + inline HALF_CONSTEXPR_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) + { + #if HALF_ERRHANDLING + return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x; + #else + return x; + #endif + } - inline HALF_CONSTEXPR_NOERR unsigned int invalid() - { - #if HALF_ERRHANDLING - raise(FE_INVALID); - #endif - return 0x7FFF; - } + inline HALF_CONSTEXPR_NOERR unsigned int invalid() + { + #if HALF_ERRHANDLING + raise(FE_INVALID); + #endif + return 0x7FFF; + } - inline HALF_CONSTEXPR_NOERR unsigned int pole(unsigned int sign = 0) - { - #if HALF_ERRHANDLING - raise(FE_DIVBYZERO); - #endif - return sign | 0x7C00; - } + inline HALF_CONSTEXPR_NOERR unsigned int pole(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_DIVBYZERO); + #endif + return sign | 0x7C00; + } - inline HALF_CONSTEXPR_NOERR unsigned int check_underflow(unsigned int arg) - { - #if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT - raise(FE_UNDERFLOW, !(arg&0x7C00)); - #endif - return arg; - } + inline HALF_CONSTEXPR_NOERR unsigned int check_underflow(unsigned int arg) + { + #if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT + raise(FE_UNDERFLOW, !(arg&0x7C00)); + #endif + return arg; + } @@ -781,31 +781,31 @@ namespace half_float - template HALF_CONSTEXPR_NOERR unsigned int overflow(unsigned int sign = 0) - { - #if HALF_ERRHANDLING - raise(FE_OVERFLOW); - #endif - return (R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) : - (R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) : - (R==std::round_toward_zero) ? (sign|0x7BFF) : - (sign|0x7C00); - } + template HALF_CONSTEXPR_NOERR unsigned int overflow(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_OVERFLOW); + #endif + return (R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) : + (R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) : + (R==std::round_toward_zero) ? (sign|0x7BFF) : + (sign|0x7C00); + } - template HALF_CONSTEXPR_NOERR unsigned int underflow(unsigned int sign = 0) - { - #if HALF_ERRHANDLING - raise(FE_UNDERFLOW); - #endif - return (R==std::round_toward_infinity) ? (sign+1-(sign>>15)) : - (R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) : - sign; - } + template HALF_CONSTEXPR_NOERR unsigned int underflow(unsigned int sign = 0) + { + #if HALF_ERRHANDLING + raise(FE_UNDERFLOW); + #endif + return (R==std::round_toward_infinity) ? (sign+1-(sign>>15)) : + (R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) : + sign; + } @@ -817,26 +817,26 @@ namespace half_float - template HALF_CONSTEXPR_NOERR unsigned int rounded(unsigned int value, int g, int s) - { - #if HALF_ERRHANDLING - value += (R==std::round_to_nearest) ? (g&(s|value)) : - (R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) : - (R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0; - if((value&0x7C00) == 0x7C00) - raise(FE_OVERFLOW); - else if(value & 0x7C00) - raise(FE_INEXACT, I || (g|s)!=0); - else - raise(FE_UNDERFLOW, !(HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT) || I || (g|s)!=0); - return value; - #else - return (R==std::round_to_nearest) ? (value+(g&(s|value))) : - (R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) : - (R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) : - value; - #endif - } + template HALF_CONSTEXPR_NOERR unsigned int rounded(unsigned int value, int g, int s) + { + #if HALF_ERRHANDLING + value += (R==std::round_to_nearest) ? (g&(s|value)) : + (R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) : + (R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0; + if((value&0x7C00) == 0x7C00) + raise(FE_OVERFLOW); + else if(value & 0x7C00) + raise(FE_INEXACT, I || (g|s)!=0); + else + raise(FE_UNDERFLOW, !(HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT) || I || (g|s)!=0); + return value; + #else + return (R==std::round_to_nearest) ? (value+(g&(s|value))) : + (R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) : + (R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) : + value; + #endif + } @@ -846,26 +846,26 @@ namespace half_float - template unsigned int integral(unsigned int value) - { - unsigned int abs = value & 0x7FFF; - if(abs < 0x3C00) - { - raise(FE_INEXACT, I); - return ((R==std::round_to_nearest) ? (0x3C00&-static_cast(abs>=(0x3800+E))) : - (R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) : - (R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast(value>0x8000)) : - 0) | (value&0x8000); - } - if(abs >= 0x6400) - return (abs>0x7C00) ? signal(value) : value; - unsigned int exp = 25 - (abs>>10), mask = (1<>exp)&E)) : - (R==std::round_toward_infinity) ? (mask&((value>>15)-1)) : - (R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) : - 0) + value) & ~mask; - } + template unsigned int integral(unsigned int value) + { + unsigned int abs = value & 0x7FFF; + if(abs < 0x3C00) + { + raise(FE_INEXACT, I); + return ((R==std::round_to_nearest) ? (0x3C00&-static_cast(abs>=(0x3800+E))) : + (R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) : + (R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast(value>0x8000)) : + 0) | (value&0x8000); + } + if(abs >= 0x6400) + return (abs>0x7C00) ? signal(value) : value; + unsigned int exp = 25 - (abs>>10), mask = (1<>exp)&E)) : + (R==std::round_toward_infinity) ? (mask&((value>>15)-1)) : + (R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) : + 0) + value) & ~mask; + } @@ -881,20 +881,20 @@ namespace half_float - template unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) - { - if(S) - { - uint32 msign = sign_mask(m); - m = (m^msign) - msign; - sign = msign & 0x8000; - } - if(N) - for(; m<(static_cast(1)<(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast(1)<<(F-11-exp))-1))!=0)); - return rounded(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast(1)<<(F-11))-1))!=0)); - } + template unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) + { + if(S) + { + uint32 msign = sign_mask(m); + m = (m^msign) - msign; + sign = msign & 0x8000; + } + if(N) + for(; m<(static_cast(1)<(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast(1)<<(F-11-exp))-1))!=0)); + return rounded(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast(1)<<(F-11))-1))!=0)); + } @@ -904,406 +904,406 @@ namespace half_float - template unsigned int float2half_impl(float value, true_type) - { - #if HALF_ENABLE_F16C_INTRINSICS - return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), - (R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT : - (R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO : - (R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF : - (R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF : - _MM_FROUND_CUR_DIRECTION)); - #else - bits::type fbits; - std::memcpy(&fbits, &value, sizeof(float)); - #if 1 - unsigned int sign = (fbits>>16) & 0x8000; - fbits &= 0x7FFFFFFF; - if(fbits >= 0x7F800000) - return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0); - if(fbits >= 0x47800000) - return overflow(sign); - if(fbits >= 0x38800000) - return rounded(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0); - if(fbits >= 0x33000000) - { - int i = 125 - (fbits>>23); - fbits = (fbits&0x7FFFFF) | 0x800000; - return rounded(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast(1)<(sign); - return sign; - #else - static const uint16 base_table[512] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, - 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, - 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, - 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, - 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, - 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, - 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 }; - static const unsigned char shift_table[256] = { - 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 }; - int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp]; - fbits &= 0x7FFFFF; - uint32 m = (fbits|((exp!=0)<<23)) & -static_cast(exp!=0xFF); - return rounded(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast(1)<<(i-1))-1)&m)!=0); - #endif - #endif - } - - - - - - - - - template unsigned int float2half_impl(double value, true_type) - { - #if HALF_ENABLE_F16C_INTRINSICS - if(R == std::round_indeterminate) - return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION)); - #endif - bits::type dbits; - std::memcpy(&dbits, &value, sizeof(double)); - uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF; - unsigned int sign = (hi>>16) & 0x8000; - hi &= 0x7FFFFFFF; - if(hi >= 0x7FF00000) - return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0); - if(hi >= 0x40F00000) - return overflow(sign); - if(hi >= 0x3F100000) - return rounded(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0); - if(hi >= 0x3E600000) - { - int i = 1018 - (hi>>20); - hi = (hi&0xFFFFF) | 0x100000; - return rounded(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast(1)<(sign); - return sign; - } - - - - - - - - - - template unsigned int float2half_impl(T value, ...) - { - unsigned int hbits = static_cast(builtin_signbit(value)) << 15; - if(value == T()) - return hbits; - if(builtin_isnan(value)) - return hbits | 0x7FFF; - if(builtin_isinf(value)) - return hbits | 0x7C00; - int exp; - std::frexp(value, &exp); - if(exp > 16) - return overflow(hbits); - if(exp < -13) - value = std::ldexp(value, 25); - else - { - value = std::ldexp(value, 12-exp); - hbits |= ((exp+13)<<10); - } - T ival, frac = std::modf(value, &ival); - int m = std::abs(static_cast(ival)); - return rounded(hbits+(m>>1), m&1, frac!=T()); - } - - - - - - - - - - template unsigned int float2half(T value) - { - return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); - } - - - - - - - - - template unsigned int int2half(T value) - { - unsigned int bits = static_cast(value<0) << 15; - if(!value) - return bits; - if(bits) - value = -value; - if(value > 0xFFFF) - return overflow(bits); - unsigned int m = static_cast(value), exp = 24; - for(; m<0x400; m<<=1,--exp) ; - for(; m>0x7FF; m>>=1,++exp) ; - bits |= (exp<<10) + m; - return (exp>24) ? rounded(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits; - } - - - - - - inline float half2float_impl(unsigned int value, float, true_type) - { - #if HALF_ENABLE_F16C_INTRINSICS - return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value))); - #else - #if 0 - bits::type fbits = static_cast::type>(value&0x8000) << 16; - int abs = value & 0x7FFF; - if(abs) - { - fbits |= 0x38000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,fbits-=0x800000) ; - fbits += static_cast::type>(abs) << 13; - } - #else - static const bits::type mantissa_table[2048] = { - 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, - 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, - 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, - 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, - 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, - 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, - 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, - 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, - 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, - 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, - 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, - 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, - 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, - 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, - 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, - 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, - 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, - 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, - 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, - 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, - 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, - 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, - 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, - 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, - 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, - 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, - 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, - 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, - 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, - 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, - 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, - 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, - 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, - 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, - 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, - 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, - 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, - 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, - 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, - 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, - 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, - 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, - 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, - 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, - 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, - 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, - 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, - 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, - 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, - 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, - 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, - 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, - 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, - 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, - 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, - 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, - 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, - 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, - 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, - 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, - 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, - 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, - 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, - 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, - 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, - 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, - 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, - 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, - 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, - 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, - 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, - 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, - 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, - 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, - 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, - 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, - 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, - 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, - 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, - 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, - 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, - 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, - 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, - 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, - 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, - 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, - 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, - 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, - 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, - 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, - 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, - 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, - 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, - 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, - 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, - 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, - 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, - 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, - 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, - 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, - 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, - 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, - 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, - 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, - 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, - 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, - 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, - 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, - 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, - 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, - 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, - 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, - 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, - 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, - 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, - 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, - 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, - 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, - 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, - 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, - 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, - 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, - 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, - 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, - 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, - 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, - 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, - 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; - static const bits::type exponent_table[64] = { - 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, - 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, - 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, - 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; - static const unsigned short offset_table[64] = { - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; - bits::type fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; - #endif - float out; - std::memcpy(&out, &fbits, sizeof(float)); - return out; - #endif - } - - - - - inline double half2float_impl(unsigned int value, double, true_type) - { - #if HALF_ENABLE_F16C_INTRINSICS - return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value)))); - #else - uint32 hi = static_cast(value&0x8000) << 16; - unsigned int abs = value & 0x7FFF; - if(abs) - { - hi |= 0x3F000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,hi-=0x100000) ; - hi += static_cast(abs) << 10; - } - bits::type dbits = static_cast::type>(hi) << 32; - double out; - std::memcpy(&out, &dbits, sizeof(double)); - return out; - #endif - } - - - - - - template T half2float_impl(unsigned int value, T, ...) - { - T out; - unsigned int abs = value & 0x7FFF; - if(abs > 0x7C00) - out = (std::numeric_limits::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits::signaling_NaN() : - std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); - else if(abs == 0x7C00) - out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); - else if(abs > 0x3FF) - out = std::ldexp(static_cast((abs&0x3FF)|0x400), (abs>>10)-25); - else - out = std::ldexp(static_cast(abs), -24); - return (value&0x8000) ? -out : out; - } - - - - - - template T half2float(unsigned int value) - { - return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); - } + template unsigned int float2half_impl(float value, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), + (R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT : + (R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO : + (R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF : + (R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF : + _MM_FROUND_CUR_DIRECTION)); + #else + bits::type fbits; + std::memcpy(&fbits, &value, sizeof(float)); + #if 1 + unsigned int sign = (fbits>>16) & 0x8000; + fbits &= 0x7FFFFFFF; + if(fbits >= 0x7F800000) + return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0); + if(fbits >= 0x47800000) + return overflow(sign); + if(fbits >= 0x38800000) + return rounded(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0); + if(fbits >= 0x33000000) + { + int i = 125 - (fbits>>23); + fbits = (fbits&0x7FFFFF) | 0x800000; + return rounded(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast(1)<(sign); + return sign; + #else + static const uint16 base_table[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, + 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, + 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, + 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, + 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, + 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, + 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 }; + static const unsigned char shift_table[256] = { + 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 }; + int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp]; + fbits &= 0x7FFFFF; + uint32 m = (fbits|((exp!=0)<<23)) & -static_cast(exp!=0xFF); + return rounded(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast(1)<<(i-1))-1)&m)!=0); + #endif + #endif + } + + + + + + + + + template unsigned int float2half_impl(double value, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + if(R == std::round_indeterminate) + return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION)); + #endif + bits::type dbits; + std::memcpy(&dbits, &value, sizeof(double)); + uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF; + unsigned int sign = (hi>>16) & 0x8000; + hi &= 0x7FFFFFFF; + if(hi >= 0x7FF00000) + return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0); + if(hi >= 0x40F00000) + return overflow(sign); + if(hi >= 0x3F100000) + return rounded(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0); + if(hi >= 0x3E600000) + { + int i = 1018 - (hi>>20); + hi = (hi&0xFFFFF) | 0x100000; + return rounded(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast(1)<(sign); + return sign; + } + + + + + + + + + + template unsigned int float2half_impl(T value, ...) + { + unsigned int hbits = static_cast(builtin_signbit(value)) << 15; + if(value == T()) + return hbits; + if(builtin_isnan(value)) + return hbits | 0x7FFF; + if(builtin_isinf(value)) + return hbits | 0x7C00; + int exp; + std::frexp(value, &exp); + if(exp > 16) + return overflow(hbits); + if(exp < -13) + value = std::ldexp(value, 25); + else + { + value = std::ldexp(value, 12-exp); + hbits |= ((exp+13)<<10); + } + T ival, frac = std::modf(value, &ival); + int m = std::abs(static_cast(ival)); + return rounded(hbits+(m>>1), m&1, frac!=T()); + } + + + + + + + + + + template unsigned int float2half(T value) + { + return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); + } + + + + + + + + + template unsigned int int2half(T value) + { + unsigned int bits = static_cast(value<0) << 15; + if(!value) + return bits; + if(bits) + value = -value; + if(value > 0xFFFF) + return overflow(bits); + unsigned int m = static_cast(value), exp = 24; + for(; m<0x400; m<<=1,--exp) ; + for(; m>0x7FF; m>>=1,++exp) ; + bits |= (exp<<10) + m; + return (exp>24) ? rounded(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits; + } + + + + + + inline float half2float_impl(unsigned int value, float, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value))); + #else + #if 0 + bits::type fbits = static_cast::type>(value&0x8000) << 16; + int abs = value & 0x7FFF; + if(abs) + { + fbits |= 0x38000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,fbits-=0x800000) ; + fbits += static_cast::type>(abs) << 13; + } + #else + static const bits::type mantissa_table[2048] = { + 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, + 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, + 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, + 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, + 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, + 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, + 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, + 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, + 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, + 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, + 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, + 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, + 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, + 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, + 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, + 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, + 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, + 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, + 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, + 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, + 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, + 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, + 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, + 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, + 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, + 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, + 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, + 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, + 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, + 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, + 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, + 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, + 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, + 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, + 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, + 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, + 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, + 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, + 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, + 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, + 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, + 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, + 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, + 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, + 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, + 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, + 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, + 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, + 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, + 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, + 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, + 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, + 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, + 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, + 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, + 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, + 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, + 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, + 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, + 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, + 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, + 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, + 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, + 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, + 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, + 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, + 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, + 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, + 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, + 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, + 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, + 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, + 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, + 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, + 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, + 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, + 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, + 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, + 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, + 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, + 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, + 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, + 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, + 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, + 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, + 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, + 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, + 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, + 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, + 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, + 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, + 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, + 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, + 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, + 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, + 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, + 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, + 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, + 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, + 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, + 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, + 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, + 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, + 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, + 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, + 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, + 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, + 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, + 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, + 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, + 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, + 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, + 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, + 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, + 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, + 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, + 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, + 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, + 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, + 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, + 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, + 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, + 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; + static const bits::type exponent_table[64] = { + 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, + 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, + 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, + 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; + static const unsigned short offset_table[64] = { + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; + bits::type fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; + #endif + float out; + std::memcpy(&out, &fbits, sizeof(float)); + return out; + #endif + } + + + + + inline double half2float_impl(unsigned int value, double, true_type) + { + #if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value)))); + #else + uint32 hi = static_cast(value&0x8000) << 16; + unsigned int abs = value & 0x7FFF; + if(abs) + { + hi |= 0x3F000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,hi-=0x100000) ; + hi += static_cast(abs) << 10; + } + bits::type dbits = static_cast::type>(hi) << 32; + double out; + std::memcpy(&out, &dbits, sizeof(double)); + return out; + #endif + } + + + + + + template T half2float_impl(unsigned int value, T, ...) + { + T out; + unsigned int abs = value & 0x7FFF; + if(abs > 0x7C00) + out = (std::numeric_limits::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits::signaling_NaN() : + std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); + else if(abs == 0x7C00) + out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); + else if(abs > 0x3FF) + out = std::ldexp(static_cast((abs&0x3FF)|0x400), (abs>>10)-25); + else + out = std::ldexp(static_cast(abs), -24); + return (value&0x8000) ? -out : out; + } + + + + + + template T half2float(unsigned int value) + { + return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); + } @@ -1314,34 +1314,34 @@ namespace half_float - template T half2int(unsigned int value) - { - unsigned int abs = value & 0x7FFF; - if(abs >= 0x7C00) - { - raise(FE_INVALID); - return (value&0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); - } - if(abs < 0x3800) - { - raise(FE_INEXACT, I); - return (R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) : - (R==std::round_toward_neg_infinity) ? -T(value>0x8000) : - T(); - } - int exp = 25 - (abs>>10); - unsigned int m = (value&0x3FF) | 0x400; - int32 i = static_cast((exp<=0) ? (m<<-exp) : ((m+( - (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) : - (R==std::round_toward_infinity) ? (((1<>15)-1)) : - (R==std::round_toward_neg_infinity) ? (((1<>15)) : 0))>>exp)); - if((!std::numeric_limits::is_signed && (value&0x8000)) || (std::numeric_limits::digits<16 && - ((value&0x8000) ? (-i::min()) : (i>std::numeric_limits::max())))) - raise(FE_INVALID); - else if(I && exp > 0 && (m&((1<((value&0x8000) ? -i : i); - } + template T half2int(unsigned int value) + { + unsigned int abs = value & 0x7FFF; + if(abs >= 0x7C00) + { + raise(FE_INVALID); + return (value&0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); + } + if(abs < 0x3800) + { + raise(FE_INEXACT, I); + return (R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) : + (R==std::round_toward_neg_infinity) ? -T(value>0x8000) : + T(); + } + int exp = 25 - (abs>>10); + unsigned int m = (value&0x3FF) | 0x400; + int32 i = static_cast((exp<=0) ? (m<<-exp) : ((m+( + (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) : + (R==std::round_toward_infinity) ? (((1<>15)-1)) : + (R==std::round_toward_neg_infinity) ? (((1<>15)) : 0))>>exp)); + if((!std::numeric_limits::is_signed && (value&0x8000)) || (std::numeric_limits::digits<16 && + ((value&0x8000) ? (-i::min()) : (i>std::numeric_limits::max())))) + raise(FE_INVALID); + else if(I && exp > 0 && (m&((1<((value&0x8000) ? -i : i); + } @@ -1352,52 +1352,52 @@ namespace half_float - template uint32 mulhi(uint32 x, uint32 y) - { - uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16); - return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) + - ((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0); - } - - - - - - inline uint32 multiply64(uint32 x, uint32 y) - { - #if HALF_ENABLE_CPP11_LONG_LONG - return static_cast((static_cast(x)*static_cast(y)+0x80000000)>>32); - #else - return mulhi(x, y); - #endif - } + template uint32 mulhi(uint32 x, uint32 y) + { + uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16); + return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) + + ((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0); + } + + + + + + inline uint32 multiply64(uint32 x, uint32 y) + { + #if HALF_ENABLE_CPP11_LONG_LONG + return static_cast((static_cast(x)*static_cast(y)+0x80000000)>>32); + #else + return mulhi(x, y); + #endif + } - inline uint32 divide64(uint32 x, uint32 y, int &s) - { - #if HALF_ENABLE_CPP11_LONG_LONG - unsigned long long xx = static_cast(x) << 32; - return s = (xx%y!=0), static_cast(xx/y); - #else - y >>= 1; - uint32 rem = x, div = 0; - for(unsigned int i=0; i<32; ++i) - { - div <<= 1; - if(rem >= y) - { - rem -= y; - div |= 1; - } - rem <<= 1; - } - return s = rem > 1, div; - #endif - } + inline uint32 divide64(uint32 x, uint32 y, int &s) + { + #if HALF_ENABLE_CPP11_LONG_LONG + unsigned long long xx = static_cast(x) << 32; + return s = (xx%y!=0), static_cast(xx/y); + #else + y >>= 1; + uint32 rem = x, div = 0; + for(unsigned int i=0; i<32; ++i) + { + div <<= 1; + if(rem >= y) + { + rem -= y; + div |= 1; + } + rem <<= 1; + } + return s = rem > 1, div; + #endif + } @@ -1406,273 +1406,273 @@ namespace half_float - template unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) - { - unsigned int q = 0; - if(x > y) - { - int absx = x, absy = y, expx = 0, expy = 0; - for(; absx<0x400; absx<<=1,--expx) ; - for(; absy<0x400; absy<<=1,--expy) ; - expx += absx >> 10; - expy += absy >> 10; - int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; - for(int d=expx-expy; d; --d) - { - if(!Q && mx == my) - return 0; - if(mx >= my) - { - mx -= my; - q += Q; - } - mx <<= 1; - q <<= static_cast(Q); - } - if(!Q && mx == my) - return 0; - if(mx >= my) - { - mx -= my; - ++q; - } - if(Q) - { - q &= (1<<(std::numeric_limits::digits-1)) - 1; - if(!mx) - return *quo = q, 0; - } - for(; mx<0x400; mx<<=1,--expy) ; - x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy)); - } - if(R) - { - unsigned int a, b; - if(y < 0x800) - { - a = (x<0x400) ? (x<<1) : (x+0x400); - b = y; - } - else - { - a = x; - b = y - 0x400; - } - if(a > b || (a == b && (q&1))) - { - int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF); - int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d)); - for(; m<0x800 && exp>1; m<<=1,--exp) ; - x = 0x8000 + ((exp-1)<<10) + (m>>1); - q += Q; - } - } - if(Q) - *quo = q; - return x; - } - - - - - - - template uint32 sqrt(uint32 &r, int &exp) - { - int i = exp & 1; - r <<= i; - exp = (exp-i) / 2; - uint32 m = 0; - for(uint32 bit=static_cast(1)<>=2) - { - if(r < m+bit) - m >>= 1; - else - { - r -= m + bit; - m = (m>>1) + bit; - } - } - return m; - } - - - - - - - inline uint32 exp2(uint32 m, unsigned int n = 32) - { - static const uint32 logs[] = { - 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, - 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, - 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, - 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; - if(!m) - return 0x80000000; - uint32 mx = 0x80000000, my = 0; - for(unsigned int i=1; i> i; - } - } - return mx; - } - - - - - - - inline uint32 log2(uint32 m, unsigned int n = 32) - { - static const uint32 logs[] = { - 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, - 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, - 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, - 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; - if(m == 0x40000000) - return 0; - uint32 mx = 0x40000000, my = 0; - for(unsigned int i=1; i>i); - if(mz <= m) - { - mx = mz; - my += logs[i]; - } - } - return my; - } - - - - - - - inline std::pair sincos(uint32 mz, unsigned int n = 31) - { - static const uint32 angles[] = { - 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55, - 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000, - 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080, - 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 }; - uint32 mx = 0x26DD3B6A, my = 0; - for(unsigned int i=0; i unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) + { + unsigned int q = 0; + if(x > y) + { + int absx = x, absy = y, expx = 0, expy = 0; + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + expx += absx >> 10; + expy += absy >> 10; + int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + for(int d=expx-expy; d; --d) + { + if(!Q && mx == my) + return 0; + if(mx >= my) + { + mx -= my; + q += Q; + } + mx <<= 1; + q <<= static_cast(Q); + } + if(!Q && mx == my) + return 0; + if(mx >= my) + { + mx -= my; + ++q; + } + if(Q) + { + q &= (1<<(std::numeric_limits::digits-1)) - 1; + if(!mx) + return *quo = q, 0; + } + for(; mx<0x400; mx<<=1,--expy) ; + x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy)); + } + if(R) + { + unsigned int a, b; + if(y < 0x800) + { + a = (x<0x400) ? (x<<1) : (x+0x400); + b = y; + } + else + { + a = x; + b = y - 0x400; + } + if(a > b || (a == b && (q&1))) + { + int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF); + int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d)); + for(; m<0x800 && exp>1; m<<=1,--exp) ; + x = 0x8000 + ((exp-1)<<10) + (m>>1); + q += Q; + } + } + if(Q) + *quo = q; + return x; + } + + + + + + + template uint32 sqrt(uint32 &r, int &exp) + { + int i = exp & 1; + r <<= i; + exp = (exp-i) / 2; + uint32 m = 0; + for(uint32 bit=static_cast(1)<>=2) + { + if(r < m+bit) + m >>= 1; + else + { + r -= m + bit; + m = (m>>1) + bit; + } + } + return m; + } + + + + + + + inline uint32 exp2(uint32 m, unsigned int n = 32) + { + static const uint32 logs[] = { + 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, + 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, + 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, + 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; + if(!m) + return 0x80000000; + uint32 mx = 0x80000000, my = 0; + for(unsigned int i=1; i> i; + } + } + return mx; + } + + + + + + + inline uint32 log2(uint32 m, unsigned int n = 32) + { + static const uint32 logs[] = { + 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B, + 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153, + 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171, + 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 }; + if(m == 0x40000000) + return 0; + uint32 mx = 0x40000000, my = 0; + for(unsigned int i=1; i>i); + if(mz <= m) + { + mx = mz; + my += logs[i]; + } + } + return my; + } + + + + + + + inline std::pair sincos(uint32 mz, unsigned int n = 31) + { + static const uint32 angles[] = { + 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55, + 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000, + 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080, + 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 }; + uint32 mx = 0x26DD3B6A, my = 0; + for(unsigned int i=0; i0x3FF)<<10); - int exp = (abs>>10) + (abs<=0x3FF) - 15; - if(abs < 0x3A48) - return k = 0, m << (exp+20); - #if HALF_ENABLE_CPP11_LONG_LONG - unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi; - uint32 sign = -static_cast(f>>63); - k = static_cast(yi>>(62-exp)); - return (multiply64(static_cast((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign; - #else - uint32 yh = m*0xA2F98 + mulhi(m, 0x36E4E442), yl = (m*0x36E4E442) & 0xFFFFFFFF; - uint32 mask = (static_cast(1)<<(30-exp)) - 1, yi = (yh+(mask>>1)) & ~mask, sign = -static_cast(yi>yh); - k = static_cast(yi>>(30-exp)); - uint32 fh = (yh^sign) + (yi^~sign) - ~sign, fl = (yl^sign) - sign; - return (multiply64((exp>-1) ? (((fh<<(1+exp))&0xFFFFFFFF)|((fl&0xFFFFFFFF)>>(31-exp))) : fh, 0xC90FDAA2)^sign) - sign; - #endif - } + + + + inline uint32 angle_arg(unsigned int abs, int &k) + { + uint32 m = (abs&0x3FF) | ((abs>0x3FF)<<10); + int exp = (abs>>10) + (abs<=0x3FF) - 15; + if(abs < 0x3A48) + return k = 0, m << (exp+20); + #if HALF_ENABLE_CPP11_LONG_LONG + unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi; + uint32 sign = -static_cast(f>>63); + k = static_cast(yi>>(62-exp)); + return (multiply64(static_cast((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign; + #else + uint32 yh = m*0xA2F98 + mulhi(m, 0x36E4E442), yl = (m*0x36E4E442) & 0xFFFFFFFF; + uint32 mask = (static_cast(1)<<(30-exp)) - 1, yi = (yh+(mask>>1)) & ~mask, sign = -static_cast(yi>yh); + k = static_cast(yi>>(30-exp)); + uint32 fh = (yh^sign) + (yi^~sign) - ~sign, fl = (yl^sign) - sign; + return (multiply64((exp>-1) ? (((fh<<(1+exp))&0xFFFFFFFF)|((fl&0xFFFFFFFF)>>(31-exp))) : fh, 0xC90FDAA2)^sign) - sign; + #endif + } - inline std::pair atan2_args(unsigned int abs) - { - int exp = -15; - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my; - int rexp = 2 * exp; - r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast(1)<<-rexp)-1))!=0)) : 1); - for(rexp=0; r<0x40000000; r<<=1,--rexp) ; - uint32 mx = sqrt<30>(r, rexp); - int d = exp - rexp; - if(d < 0) - return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx); - if(d > 0) - return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx))); - return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx); - } + inline std::pair atan2_args(unsigned int abs) + { + int exp = -15; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my; + int rexp = 2 * exp; + r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast(1)<<-rexp)-1))!=0)) : 1); + for(rexp=0; r<0x40000000; r<<=1,--rexp) ; + uint32 mx = sqrt<30>(r, rexp); + int d = exp - rexp; + if(d < 0) + return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx); + if(d > 0) + return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx))); + return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx); + } - inline std::pair hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) - { - uint32 mx = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my; - int e = (abs>>10) + (abs<=0x3FF); - if(e < 14) - { - exp = 0; - mx >>= 14 - e; - } - else - { - exp = mx >> (45-e); - mx = (mx<<(e-14)) & 0x7FFFFFFF; - } - mx = exp2(mx, n); - int d = exp << 1, s; - if(mx > 0x80000000) - { - my = divide64(0x80000000, mx, s); - my |= s; - ++d; - } - else - my = mx; - return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast(1)< hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) + { + uint32 mx = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my; + int e = (abs>>10) + (abs<=0x3FF); + if(e < 14) + { + exp = 0; + mx >>= 14 - e; + } + else + { + exp = mx >> (45-e); + mx = (mx<<(e-14)) & 0x7FFFFFFF; + } + mx = exp2(mx, n); + int d = exp << 1, s; + if(mx > 0x80000000) + { + my = divide64(0x80000000, mx, s); + my |= s; + ++d; + } + else + my = mx; + return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast(1)< unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32) - { - if(esign) - { - exp = -exp - (m!=0); - if(exp < -25) - return underflow(sign); - else if(exp == -25) - return rounded(sign, 1, m!=0); - } - else if(exp > 15) - return overflow(sign); - if(!m) - return sign | (((exp+=15)>0) ? (exp<<10) : check_underflow(0x200>>-exp)); - m = exp2(m, n); - int s = 0; - if(esign) - m = divide64(0x80000000, m, s); - return fixed2half(m, exp+14, sign, s); - } + template unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0, unsigned int n = 32) + { + if(esign) + { + exp = -exp - (m!=0); + if(exp < -25) + return underflow(sign); + else if(exp == -25) + return rounded(sign, 1, m!=0); + } + else if(exp > 15) + return overflow(sign); + if(!m) + return sign | (((exp+=15)>0) ? (exp<<10) : check_underflow(0x200>>-exp)); + m = exp2(m, n); + int s = 0; + if(esign) + m = divide64(0x80000000, m, s); + return fixed2half(m, exp+14, sign, s); + } @@ -1717,22 +1717,22 @@ namespace half_float - template unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) - { - uint32 msign = sign_mask(ilog); - m = (((static_cast(ilog)<<27)+(m>>4))^msign) - msign; - if(!m) - return 0; - for(; m<0x80000000; m<<=1,--exp) ; - int i = m >= L, s; - exp += i; - m >>= 1 + i; - sign ^= msign & 0x8000; - if(exp < -11) - return underflow(sign); - m = divide64(m, L, s); - return fixed2half(m, exp, sign, 1); - } + template unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) + { + uint32 msign = sign_mask(ilog); + m = (((static_cast(ilog)<<27)+(m>>4))^msign) - msign; + if(!m) + return 0; + for(; m<0x80000000; m<<=1,--exp) ; + int i = m >= L, s; + exp += i; + m >>= 1 + i; + sign ^= msign & 0x8000; + if(exp < -11) + return underflow(sign); + m = divide64(m, L, s); + return fixed2half(m, exp, sign, 1); + } @@ -1742,17 +1742,17 @@ namespace half_float - template unsigned int hypot_post(uint32 r, int exp) - { - int i = r >> 31; - if((exp+=i) > 46) - return overflow(); - if(exp < -34) - return underflow(); - r = (r>>i) | (r&i); - uint32 m = sqrt<30>(r, exp+=15); - return fixed2half(m, exp-1, 0, r!=0); - } + template unsigned int hypot_post(uint32 r, int exp) + { + int i = r >> 31; + if((exp+=i) > 46) + return overflow(); + if(exp < -34) + return underflow(); + r = (r>>i) | (r&i); + uint32 m = sqrt<30>(r, exp+=15); + return fixed2half(m, exp-1, 0, r!=0); + } @@ -1764,17 +1764,17 @@ namespace half_float - template unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) - { - int i = my >= mx, s; - exp += i; - if(exp > 29) - return overflow(sign); - if(exp < -11) - return underflow(sign); - uint32 m = divide64(my>>(i+1), mx, s); - return fixed2half(m, exp, sign, s); - } + template unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) + { + int i = my >= mx, s; + exp += i; + if(exp > 29) + return overflow(sign); + if(exp < -11) + return underflow(sign); + uint32 m = divide64(my>>(i+1), mx, s); + return fixed2half(m, exp, sign, s); + } @@ -1785,265 +1785,265 @@ namespace half_float - template unsigned int area(unsigned int arg) - { - int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i; - uint32 mx = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r; - for(; abs<0x400; abs<<=1,--expy) ; - expy += abs >> 10; - r = ((abs&0x3FF)|0x400) << 5; - r *= r; - i = r >> 31; - expy = 2*expy + i; - r >>= i; - if(S) - { - if(expy < 0) - { - r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast(1)<<-expy)-1))!=0)) : 1); - expy = 0; - } - else - { - r += 0x40000000 >> expy; - i = r >> 31; - r = (r>>i) | (r&i); - expy += i; - } - } - else - { - r -= 0x40000000 >> expy; - for(; r<0x40000000; r<<=1,--expy) ; - } - my = sqrt<30>(r, expy); - my = (my<<15) + (r<<14)/my; - if(S) - { - mx >>= expy - expx; - ilog = expy; - } - else - { - my >>= expx - expy; - ilog = expx; - } - my += mx; - i = my >> 31; - static const int G = S && (R==std::round_to_nearest); - return log2_post(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast(S)<<15)); - } + template unsigned int area(unsigned int arg) + { + int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i; + uint32 mx = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r; + for(; abs<0x400; abs<<=1,--expy) ; + expy += abs >> 10; + r = ((abs&0x3FF)|0x400) << 5; + r *= r; + i = r >> 31; + expy = 2*expy + i; + r >>= i; + if(S) + { + if(expy < 0) + { + r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast(1)<<-expy)-1))!=0)) : 1); + expy = 0; + } + else + { + r += 0x40000000 >> expy; + i = r >> 31; + r = (r>>i) | (r&i); + expy += i; + } + } + else + { + r -= 0x40000000 >> expy; + for(; r<0x40000000; r<<=1,--expy) ; + } + my = sqrt<30>(r, expy); + my = (my<<15) + (r<<14)/my; + if(S) + { + mx >>= expy - expx; + ilog = expy; + } + else + { + my >>= expx - expy; + ilog = expx; + } + my += mx; + i = my >> 31; + static const int G = S && (R==std::round_to_nearest); + return log2_post(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast(S)<<15)); + } - struct f31 - { + struct f31 + { - HALF_CONSTEXPR f31(uint32 mant, int e) : m(mant), exp(e) {} + HALF_CONSTEXPR f31(uint32 mant, int e) : m(mant), exp(e) {} - f31(unsigned int abs) : exp(-15) - { - for(; abs<0x400; abs<<=1,--exp) ; - m = static_cast((abs&0x3FF)|0x400) << 21; - exp += (abs>>10); - } + f31(unsigned int abs) : exp(-15) + { + for(; abs<0x400; abs<<=1,--exp) ; + m = static_cast((abs&0x3FF)|0x400) << 21; + exp += (abs>>10); + } - friend f31 operator+(f31 a, f31 b) - { - if(b.exp > a.exp) - std::swap(a, b); - int d = a.exp - b.exp; - uint32 m = a.m + ((d<32) ? (b.m>>d) : 0); - int i = (m&0xFFFFFFFF) < a.m; - return f31(((m+i)>>i)|0x80000000, a.exp+i); - } + friend f31 operator+(f31 a, f31 b) + { + if(b.exp > a.exp) + std::swap(a, b); + int d = a.exp - b.exp; + uint32 m = a.m + ((d<32) ? (b.m>>d) : 0); + int i = (m&0xFFFFFFFF) < a.m; + return f31(((m+i)>>i)|0x80000000, a.exp+i); + } - friend f31 operator-(f31 a, f31 b) - { - int d = a.exp - b.exp, exp = a.exp; - uint32 m = a.m - ((d<32) ? (b.m>>d) : 0); - if(!m) - return f31(0, -32); - for(; m<0x80000000; m<<=1,--exp) ; - return f31(m, exp); - } + friend f31 operator-(f31 a, f31 b) + { + int d = a.exp - b.exp, exp = a.exp; + uint32 m = a.m - ((d<32) ? (b.m>>d) : 0); + if(!m) + return f31(0, -32); + for(; m<0x80000000; m<<=1,--exp) ; + return f31(m, exp); + } - friend f31 operator*(f31 a, f31 b) - { - uint32 m = multiply64(a.m, b.m); - int i = m >> 31; - return f31(m<<(1-i), a.exp + b.exp + i); - } + friend f31 operator*(f31 a, f31 b) + { + uint32 m = multiply64(a.m, b.m); + int i = m >> 31; + return f31(m<<(1-i), a.exp + b.exp + i); + } - friend f31 operator/(f31 a, f31 b) - { - int i = a.m >= b.m, s; - uint32 m = divide64((a.m+i)>>i, b.m, s); - return f31(m, a.exp - b.exp + i - 1); - } + friend f31 operator/(f31 a, f31 b) + { + int i = a.m >= b.m, s; + uint32 m = divide64((a.m+i)>>i, b.m, s); + return f31(m, a.exp - b.exp + i - 1); + } uint32 m; int exp; - }; - - - - - - - - - - - - template unsigned int erf(unsigned int arg) - { - unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; - f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t; - f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t / - ((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<>(31-x2.exp))); - return (!C || sign) ? fixed2half(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) : - (e.exp<-25) ? underflow() : fixed2half(e.m>>1, e.exp+14, 0, e.m&1); - } - - - - - - - - - - - template unsigned int gamma(unsigned int arg) - { -/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 }; - double t = arg + 4.65, s = p[0]; - for(unsigned int i=0; i<5; ++i) - s += p[i+1] / (arg+i); - return std::log(s) + (arg-0.5)*std::log(t) - t; -*/ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0); - unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; - bool bsign = sign != 0; - f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s = - f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1)) - + f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1)); - int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16); - s = f31((static_cast(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe; - if(x.exp != -1 || x.m != 0x80000000) - { - i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8); - f31 l = f31((static_cast(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe; - s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l); - } - s = x.exp ? (s-t) : (t-s); - if(bsign) - { - if(z.exp >= 0) - { - sign &= (L|((z.m>>(31-z.exp))&1)) - 1; - for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ; - } - if(z.exp == -1) - z = f31(0x80000000, 0) - z; - if(z.exp < -1) - { - z = z * pi; - z.m = sincos(z.m>>(1-z.exp), 30).first; - for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ; - } - else - z = f31(0x80000000, 0); - } - if(L) - { - if(bsign) - { - f31 l(0x92868247, 0); - if(z.exp < 0) - { - uint32 m = log2((z.m+1)>>1, 27); - z = f31(-((static_cast(z.exp)<<26)+(m>>5)), 5); - for(; z.m<0x80000000; z.m<<=1,--z.exp) ; - l = l + z / lbe; - } - sign = static_cast(x.exp&&(l.exp(x.exp==0) << 15; - if(s.exp < -24) - return underflow(sign); - if(s.exp > 15) - return overflow(sign); - } - } - else - { - s = s * lbe; - uint32 m; - if(s.exp < 0) - { - m = s.m >> -s.exp; - s.exp = 0; - } - else - { - m = (s.m<>(31-s.exp)); - } - s.m = exp2(m, 27); - if(!x.exp) - s = f31(0x80000000, 0) / s; - if(bsign) - { - if(z.exp < 0) - s = s * z; - s = pi / s; - if(s.exp < -24) - return underflow(sign); - } - else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1))) - return ((s.exp+14)<<10) + (s.m>>21); - if(s.exp > 15) - return overflow(sign); - } - return fixed2half(s.m, s.exp+14, sign); - } + }; + + + + + + + + + + + + template unsigned int erf(unsigned int arg) + { + unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; + f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t; + f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t / + ((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<>(31-x2.exp))); + return (!C || sign) ? fixed2half(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) : + (e.exp<-25) ? underflow() : fixed2half(e.m>>1, e.exp+14, 0, e.m&1); + } + + + + + + + + + + + template unsigned int gamma(unsigned int arg) + { +/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 }; + double t = arg + 4.65, s = p[0]; + for(unsigned int i=0; i<5; ++i) + s += p[i+1] / (arg+i); + return std::log(s) + (arg-0.5)*std::log(t) - t; +*/ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0); + unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; + bool bsign = sign != 0; + f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s = + f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1)) + + f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1)); + int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16); + s = f31((static_cast(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe; + if(x.exp != -1 || x.m != 0x80000000) + { + i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8); + f31 l = f31((static_cast(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe; + s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l); + } + s = x.exp ? (s-t) : (t-s); + if(bsign) + { + if(z.exp >= 0) + { + sign &= (L|((z.m>>(31-z.exp))&1)) - 1; + for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ; + } + if(z.exp == -1) + z = f31(0x80000000, 0) - z; + if(z.exp < -1) + { + z = z * pi; + z.m = sincos(z.m>>(1-z.exp), 30).first; + for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ; + } + else + z = f31(0x80000000, 0); + } + if(L) + { + if(bsign) + { + f31 l(0x92868247, 0); + if(z.exp < 0) + { + uint32 m = log2((z.m+1)>>1, 27); + z = f31(-((static_cast(z.exp)<<26)+(m>>5)), 5); + for(; z.m<0x80000000; z.m<<=1,--z.exp) ; + l = l + z / lbe; + } + sign = static_cast(x.exp&&(l.exp(x.exp==0) << 15; + if(s.exp < -24) + return underflow(sign); + if(s.exp > 15) + return overflow(sign); + } + } + else + { + s = s * lbe; + uint32 m; + if(s.exp < 0) + { + m = s.m >> -s.exp; + s.exp = 0; + } + else + { + m = (s.m<>(31-s.exp)); + } + s.m = exp2(m, 27); + if(!x.exp) + s = f31(0x80000000, 0) / s; + if(bsign) + { + if(z.exp < 0) + s = s * z; + s = pi / s; + if(s.exp < -24) + return underflow(sign); + } + else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1))) + return ((s.exp+14)<<10) + (s.m>>21); + if(s.exp > 15) + return overflow(sign); + } + return fixed2half(s.m, s.exp+14, sign); + } - - template struct half_caster; - } - - - - - - - - - - + + template struct half_caster; + } + + + + + + + + + + @@ -2052,31 +2052,31 @@ namespace half_float - class half - { - public: + class half + { + public: - HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} + HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} - explicit half(float rhs) : data_(static_cast(detail::float2half(rhs))) {} + explicit half(float rhs) : data_(static_cast(detail::float2half(rhs))) {} - operator float() const { return detail::half2float(data_); } + operator float() const { return detail::half2float(data_); } - half& operator=(float rhs) { data_ = static_cast(detail::float2half(rhs)); return *this; } + half& operator=(float rhs) { data_ = static_cast(detail::float2half(rhs)); return *this; } @@ -2087,52 +2087,52 @@ namespace half_float - half& operator+=(half rhs) { return *this = *this + rhs; } + half& operator+=(half rhs) { return *this = *this + rhs; } - half& operator-=(half rhs) { return *this = *this - rhs; } + half& operator-=(half rhs) { return *this = *this - rhs; } - half& operator*=(half rhs) { return *this = *this * rhs; } + half& operator*=(half rhs) { return *this = *this * rhs; } - half& operator/=(half rhs) { return *this = *this / rhs; } + half& operator/=(half rhs) { return *this = *this / rhs; } - half& operator+=(float rhs) { return *this = *this + rhs; } + half& operator+=(float rhs) { return *this = *this + rhs; } - half& operator-=(float rhs) { return *this = *this - rhs; } + half& operator-=(float rhs) { return *this = *this - rhs; } - half& operator*=(float rhs) { return *this = *this * rhs; } + half& operator*=(float rhs) { return *this = *this * rhs; } - half& operator/=(float rhs) { return *this = *this / rhs; } + half& operator/=(float rhs) { return *this = *this / rhs; } @@ -2141,318 +2141,318 @@ namespace half_float - half& operator++() { return *this = *this + half(detail::binary, 0x3C00); } + half& operator++() { return *this = *this + half(detail::binary, 0x3C00); } - half& operator--() { return *this = *this + half(detail::binary, 0xBC00); } + half& operator--() { return *this = *this + half(detail::binary, 0xBC00); } - half operator++(int) { half out(*this); ++*this; return out; } + half operator++(int) { half out(*this); ++*this; return out; } - half operator--(int) { half out(*this); --*this; return out; } + half operator--(int) { half out(*this); --*this; return out; } - private: - - static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); - - - - HALF_CONSTEXPR half(detail::binary_t, unsigned int bits) HALF_NOEXCEPT : data_(static_cast(bits)) {} - - - detail::uint16 data_; - - #ifndef HALF_DOXYGEN_ONLY - friend HALF_CONSTEXPR_NOERR bool operator==(half, half); - friend HALF_CONSTEXPR_NOERR bool operator!=(half, half); - friend HALF_CONSTEXPR_NOERR bool operator<(half, half); - friend HALF_CONSTEXPR_NOERR bool operator>(half, half); - friend HALF_CONSTEXPR_NOERR bool operator<=(half, half); - friend HALF_CONSTEXPR_NOERR bool operator>=(half, half); - friend HALF_CONSTEXPR half operator-(half); - friend half operator+(half, half); - friend half operator-(half, half); - friend half operator*(half, half); - friend half operator/(half, half); - template friend std::basic_ostream& operator<<(std::basic_ostream&, half); - template friend std::basic_istream& operator>>(std::basic_istream&, half&); - friend HALF_CONSTEXPR half fabs(half); - friend half fmod(half, half); - friend half remainder(half, half); - friend half remquo(half, half, int*); - friend half fma(half, half, half); - friend HALF_CONSTEXPR_NOERR half fmax(half, half); - friend HALF_CONSTEXPR_NOERR half fmin(half, half); - friend half fdim(half, half); - friend half nanh(const char*); - friend half exp(half); - friend half exp2(half); - friend half expm1(half); - friend half log(half); - friend half log10(half); - friend half log2(half); - friend half log1p(half); - friend half sqrt(half); - friend half rsqrt(half); - friend half cbrt(half); - friend half hypot(half, half); - friend half hypot(half, half, half); - friend half pow(half, half); - friend void sincos(half, half*, half*); - friend half sin(half); - friend half cos(half); - friend half tan(half); - friend half asin(half); - friend half acos(half); - friend half atan(half); - friend half atan2(half, half); - friend half sinh(half); - friend half cosh(half); - friend half tanh(half); - friend half asinh(half); - friend half acosh(half); - friend half atanh(half); - friend half erf(half); - friend half erfc(half); - friend half lgamma(half); - friend half tgamma(half); - friend half ceil(half); - friend half floor(half); - friend half trunc(half); - friend half round(half); - friend long lround(half); - friend half rint(half); - friend long lrint(half); - friend half nearbyint(half); - #ifdef HALF_ENABLE_CPP11_LONG_LONG - friend long long llround(half); - friend long long llrint(half); - #endif - friend half frexp(half, int*); - friend half scalbln(half, long); - friend half modf(half, half*); - friend int ilogb(half); - friend half logb(half); - friend half nextafter(half, half); - friend half nexttoward(half, long double); - friend HALF_CONSTEXPR half copysign(half, half); - friend HALF_CONSTEXPR int fpclassify(half); - friend HALF_CONSTEXPR bool isfinite(half); - friend HALF_CONSTEXPR bool isinf(half); - friend HALF_CONSTEXPR bool isnan(half); - friend HALF_CONSTEXPR bool isnormal(half); - friend HALF_CONSTEXPR bool signbit(half); - friend HALF_CONSTEXPR bool isgreater(half, half); - friend HALF_CONSTEXPR bool isgreaterequal(half, half); - friend HALF_CONSTEXPR bool isless(half, half); - friend HALF_CONSTEXPR bool islessequal(half, half); - friend HALF_CONSTEXPR bool islessgreater(half, half); - template friend struct detail::half_caster; - friend class std::numeric_limits; - #if HALF_ENABLE_CPP11_HASH - friend struct std::hash; - #endif - #if HALF_ENABLE_CPP11_USER_LITERALS - friend half literal::operator "" _h(long double); - #endif - #endif - }; - -#if HALF_ENABLE_CPP11_USER_LITERALS - namespace literal - { - - - - - - - - inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half(value)); } - } -#endif - - namespace detail - { - - - - - - - template struct half_caster {}; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); - #endif - - static half cast(U arg) { return cast_impl(arg, is_float()); }; - - private: - static half cast_impl(U arg, true_type) { return half(binary, float2half(arg)); } - static half cast_impl(U arg, false_type) { return half(binary, int2half(arg)); } - }; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); - #endif - - static T cast(half arg) { return cast_impl(arg, is_float()); } - - private: - static T cast_impl(half arg, true_type) { return half2float(arg.data_); } - static T cast_impl(half arg, false_type) { return half2int(arg.data_); } - }; - template struct half_caster - { - static half cast(half arg) { return arg; } - }; - } -} + private: + + static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); + + + + HALF_CONSTEXPR half(detail::binary_t, unsigned int bits) HALF_NOEXCEPT : data_(static_cast(bits)) {} + + + detail::uint16 data_; + + #ifndef HALF_DOXYGEN_ONLY + friend HALF_CONSTEXPR_NOERR bool operator==(half, half); + friend HALF_CONSTEXPR_NOERR bool operator!=(half, half); + friend HALF_CONSTEXPR_NOERR bool operator<(half, half); + friend HALF_CONSTEXPR_NOERR bool operator>(half, half); + friend HALF_CONSTEXPR_NOERR bool operator<=(half, half); + friend HALF_CONSTEXPR_NOERR bool operator>=(half, half); + friend HALF_CONSTEXPR half operator-(half); + friend half operator+(half, half); + friend half operator-(half, half); + friend half operator*(half, half); + friend half operator/(half, half); + template friend std::basic_ostream& operator<<(std::basic_ostream&, half); + template friend std::basic_istream& operator>>(std::basic_istream&, half&); + friend HALF_CONSTEXPR half fabs(half); + friend half fmod(half, half); + friend half remainder(half, half); + friend half remquo(half, half, int*); + friend half fma(half, half, half); + friend HALF_CONSTEXPR_NOERR half fmax(half, half); + friend HALF_CONSTEXPR_NOERR half fmin(half, half); + friend half fdim(half, half); + friend half nanh(const char*); + friend half exp(half); + friend half exp2(half); + friend half expm1(half); + friend half log(half); + friend half log10(half); + friend half log2(half); + friend half log1p(half); + friend half sqrt(half); + friend half rsqrt(half); + friend half cbrt(half); + friend half hypot(half, half); + friend half hypot(half, half, half); + friend half pow(half, half); + friend void sincos(half, half*, half*); + friend half sin(half); + friend half cos(half); + friend half tan(half); + friend half asin(half); + friend half acos(half); + friend half atan(half); + friend half atan2(half, half); + friend half sinh(half); + friend half cosh(half); + friend half tanh(half); + friend half asinh(half); + friend half acosh(half); + friend half atanh(half); + friend half erf(half); + friend half erfc(half); + friend half lgamma(half); + friend half tgamma(half); + friend half ceil(half); + friend half floor(half); + friend half trunc(half); + friend half round(half); + friend long lround(half); + friend half rint(half); + friend long lrint(half); + friend half nearbyint(half); + #ifdef HALF_ENABLE_CPP11_LONG_LONG + friend long long llround(half); + friend long long llrint(half); + #endif + friend half frexp(half, int*); + friend half scalbln(half, long); + friend half modf(half, half*); + friend int ilogb(half); + friend half logb(half); + friend half nextafter(half, half); + friend half nexttoward(half, long double); + friend HALF_CONSTEXPR half copysign(half, half); + friend HALF_CONSTEXPR int fpclassify(half); + friend HALF_CONSTEXPR bool isfinite(half); + friend HALF_CONSTEXPR bool isinf(half); + friend HALF_CONSTEXPR bool isnan(half); + friend HALF_CONSTEXPR bool isnormal(half); + friend HALF_CONSTEXPR bool signbit(half); + friend HALF_CONSTEXPR bool isgreater(half, half); + friend HALF_CONSTEXPR bool isgreaterequal(half, half); + friend HALF_CONSTEXPR bool isless(half, half); + friend HALF_CONSTEXPR bool islessequal(half, half); + friend HALF_CONSTEXPR bool islessgreater(half, half); + template friend struct detail::half_caster; + friend class std::numeric_limits; + #if HALF_ENABLE_CPP11_HASH + friend struct std::hash; + #endif + #if HALF_ENABLE_CPP11_USER_LITERALS + friend half literal::operator "" _h(long double); + #endif + #endif + }; + +#if HALF_ENABLE_CPP11_USER_LITERALS + namespace literal + { + + + + + + + + inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half(value)); } + } +#endif + + namespace detail + { + + + + + + + template struct half_caster {}; + template struct half_caster + { + #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); + #endif + + static half cast(U arg) { return cast_impl(arg, is_float()); }; + + private: + static half cast_impl(U arg, true_type) { return half(binary, float2half(arg)); } + static half cast_impl(U arg, false_type) { return half(binary, int2half(arg)); } + }; + template struct half_caster + { + #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); + #endif + + static T cast(half arg) { return cast_impl(arg, is_float()); } + + private: + static T cast_impl(half arg, true_type) { return half2float(arg.data_); } + static T cast_impl(half arg, false_type) { return half2int(arg.data_); } + }; + template struct half_caster + { + static half cast(half arg) { return arg; } + }; + } +} -namespace std -{ +namespace std +{ - template<> class numeric_limits - { - public: + template<> class numeric_limits + { + public: - static HALF_CONSTEXPR_CONST bool is_specialized = true; + static HALF_CONSTEXPR_CONST bool is_specialized = true; - static HALF_CONSTEXPR_CONST bool is_signed = true; + static HALF_CONSTEXPR_CONST bool is_signed = true; - static HALF_CONSTEXPR_CONST bool is_integer = false; + static HALF_CONSTEXPR_CONST bool is_integer = false; - static HALF_CONSTEXPR_CONST bool is_exact = false; + static HALF_CONSTEXPR_CONST bool is_exact = false; - static HALF_CONSTEXPR_CONST bool is_modulo = false; + static HALF_CONSTEXPR_CONST bool is_modulo = false; - static HALF_CONSTEXPR_CONST bool is_bounded = true; + static HALF_CONSTEXPR_CONST bool is_bounded = true; - static HALF_CONSTEXPR_CONST bool is_iec559 = true; + static HALF_CONSTEXPR_CONST bool is_iec559 = true; - static HALF_CONSTEXPR_CONST bool has_infinity = true; + static HALF_CONSTEXPR_CONST bool has_infinity = true; - static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; + static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; - static HALF_CONSTEXPR_CONST bool has_signaling_NaN = true; + static HALF_CONSTEXPR_CONST bool has_signaling_NaN = true; - static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; + static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; - static HALF_CONSTEXPR_CONST bool has_denorm_loss = false; + static HALF_CONSTEXPR_CONST bool has_denorm_loss = false; - #if HALF_ERRHANDLING_THROWS - static HALF_CONSTEXPR_CONST bool traps = true; - #else + #if HALF_ERRHANDLING_THROWS + static HALF_CONSTEXPR_CONST bool traps = true; + #else - static HALF_CONSTEXPR_CONST bool traps = false; - #endif + static HALF_CONSTEXPR_CONST bool traps = false; + #endif - static HALF_CONSTEXPR_CONST bool tinyness_before = false; + static HALF_CONSTEXPR_CONST bool tinyness_before = false; - static HALF_CONSTEXPR_CONST float_round_style round_style = half_float::half::round_style; + static HALF_CONSTEXPR_CONST float_round_style round_style = half_float::half::round_style; - static HALF_CONSTEXPR_CONST int digits = 11; + static HALF_CONSTEXPR_CONST int digits = 11; - static HALF_CONSTEXPR_CONST int digits10 = 3; + static HALF_CONSTEXPR_CONST int digits10 = 3; - static HALF_CONSTEXPR_CONST int max_digits10 = 5; + static HALF_CONSTEXPR_CONST int max_digits10 = 5; - static HALF_CONSTEXPR_CONST int radix = 2; + static HALF_CONSTEXPR_CONST int radix = 2; - static HALF_CONSTEXPR_CONST int min_exponent = -13; + static HALF_CONSTEXPR_CONST int min_exponent = -13; - static HALF_CONSTEXPR_CONST int min_exponent10 = -4; + static HALF_CONSTEXPR_CONST int min_exponent10 = -4; - static HALF_CONSTEXPR_CONST int max_exponent = 16; + static HALF_CONSTEXPR_CONST int max_exponent = 16; - static HALF_CONSTEXPR_CONST int max_exponent10 = 4; + static HALF_CONSTEXPR_CONST int max_exponent10 = 4; - static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } + static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } - static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } + static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } - static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } + static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } - static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } + static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } - static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW - { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } + static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW + { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } - static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } + static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } - static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } + static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } - static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } + static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } - static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } - }; + static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } + }; -#if HALF_ENABLE_CPP11_HASH +#if HALF_ENABLE_CPP11_HASH - template<> struct hash - { + template<> struct hash + { - typedef half_float::half argument_type; + typedef half_float::half argument_type; - typedef size_t result_type; + typedef size_t result_type; - result_type operator()(argument_type arg) const { return hash()(arg.data_&-static_cast(arg.data_!=0x8000)); } - }; -#endif -} + result_type operator()(argument_type arg) const { return hash()(arg.data_&-static_cast(arg.data_!=0x8000)); } + }; +#endif +} -namespace half_float -{ +namespace half_float +{ @@ -2463,10 +2463,10 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR bool operator==(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)); - } + inline HALF_CONSTEXPR_NOERR bool operator==(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)); + } @@ -2474,10 +2474,10 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR bool operator!=(half x, half y) - { - return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)); - } + inline HALF_CONSTEXPR_NOERR bool operator!=(half x, half y) + { + return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)); + } @@ -2485,11 +2485,11 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR bool operator<(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && - ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); - } + inline HALF_CONSTEXPR_NOERR bool operator<(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } @@ -2497,11 +2497,11 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR bool operator>(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && - ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); - } + inline HALF_CONSTEXPR_NOERR bool operator>(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } @@ -2509,11 +2509,11 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR bool operator<=(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && - ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); - } + inline HALF_CONSTEXPR_NOERR bool operator<=(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } @@ -2521,11 +2521,11 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR bool operator>=(half x, half y) - { - return !detail::compsignal(x.data_, y.data_) && - ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); - } + inline HALF_CONSTEXPR_NOERR bool operator>=(half x, half y) + { + return !detail::compsignal(x.data_, y.data_) && + ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)); + } @@ -2535,12 +2535,12 @@ namespace half_float - inline HALF_CONSTEXPR half operator+(half arg) { return arg; } + inline HALF_CONSTEXPR half operator+(half arg) { return arg; } - inline HALF_CONSTEXPR half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); } + inline HALF_CONSTEXPR half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); } @@ -2549,48 +2549,48 @@ namespace half_float - inline half operator+(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::half2float(x.data_)+detail::half2float(y.data_))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF; - bool sub = ((x.data_^y.data_)&0x8000) != 0; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ : - (sub && absx==0x7C00) ? detail::invalid() : y.data_); - if(!absx) - return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_)); - if(!absy) - return x; - unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000; - if(absy > absx) - std::swap(absx, absy); - int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my; - if(d < 13) - { - my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3; - my = (my>>d) | ((my&((1<(half::round_style==std::round_toward_neg_infinity)<<15); - for(; mx<0x2000 && exp>1; mx<<=1,--exp) ; - } - else - { - mx += my; - int i = mx >> 14; - if((exp+=i) > 30) - return half(detail::binary, detail::overflow(sign)); - mx = (mx>>i) | (mx&i); - } - return half(detail::binary, detail::rounded(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0)); - #endif - } + inline half operator+(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)+detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF; + bool sub = ((x.data_^y.data_)&0x8000) != 0; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ : + (sub && absx==0x7C00) ? detail::invalid() : y.data_); + if(!absx) + return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_)); + if(!absy) + return x; + unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000; + if(absy > absx) + std::swap(absx, absy); + int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my; + if(d < 13) + { + my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3; + my = (my>>d) | ((my&((1<(half::round_style==std::round_toward_neg_infinity)<<15); + for(; mx<0x2000 && exp>1; mx<<=1,--exp) ; + } + else + { + mx += my; + int i = mx >> 14; + if((exp+=i) > 30) + return half(detail::binary, detail::overflow(sign)); + mx = (mx>>i) | (mx&i); + } + return half(detail::binary, detail::rounded(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0)); + #endif + } @@ -2599,14 +2599,14 @@ namespace half_float - inline half operator-(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::half2float(x.data_)-detail::half2float(y.data_))); - #else - return x + -y; - #endif - } + inline half operator-(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)-detail::half2float(y.data_))); + #else + return x + -y; + #endif + } @@ -2615,30 +2615,30 @@ namespace half_float - inline half operator*(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::half2float(x.data_)*detail::half2float(y.data_))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16; - unsigned int sign = (x.data_^y.data_) & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - ((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00)); - if(!absx || !absy) - return half(detail::binary, sign); - for(; absx<0x400; absx<<=1,--exp) ; - for(; absy<0x400; absy<<=1,--exp) ; - detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); - int i = m >> 21, s = m & i; - exp += (absx>>10) + (absy>>10) + i; - if(exp > 29) - return half(detail::binary, detail::overflow(sign)); - else if(exp < -11) - return half(detail::binary, detail::underflow(sign)); - return half(detail::binary, detail::fixed2half(m>>i, exp, sign, s)); - #endif - } + inline half operator*(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)*detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16; + unsigned int sign = (x.data_^y.data_) & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + ((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00)); + if(!absx || !absy) + return half(detail::binary, sign); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); + int i = m >> 21, s = m & i; + exp += (absx>>10) + (absy>>10) + i; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -11) + return half(detail::binary, detail::underflow(sign)); + return half(detail::binary, detail::fixed2half(m>>i, exp, sign, s)); + #endif + } @@ -2648,34 +2648,34 @@ namespace half_float - inline half operator/(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::half2float(x.data_)/detail::half2float(y.data_))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14; - unsigned int sign = (x.data_^y.data_) & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0))); - if(!absx) - return half(detail::binary, absy ? sign : detail::invalid()); - if(!absy) - return half(detail::binary, detail::pole(sign)); - for(; absx<0x400; absx<<=1,--exp) ; - for(; absy<0x400; absy<<=1,++exp) ; - detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; - int i = mx < my; - exp += (absx>>10) - (absy>>10) - i; - if(exp > 29) - return half(detail::binary, detail::overflow(sign)); - else if(exp < -11) - return half(detail::binary, detail::underflow(sign)); - mx <<= 12 + i; - my <<= 1; - return half(detail::binary, detail::fixed2half(mx/my, exp, sign, mx%my!=0)); - #endif - } + inline half operator/(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::half2float(x.data_)/detail::half2float(y.data_))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14; + unsigned int sign = (x.data_^y.data_) & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0))); + if(!absx) + return half(detail::binary, absy ? sign : detail::invalid()); + if(!absy) + return half(detail::binary, detail::pole(sign)); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,++exp) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + int i = mx < my; + exp += (absx>>10) - (absy>>10) - i; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -11) + return half(detail::binary, detail::underflow(sign)); + mx <<= 12 + i; + my <<= 1; + return half(detail::binary, detail::fixed2half(mx/my, exp, sign, mx%my!=0)); + #endif + } @@ -2687,14 +2687,14 @@ namespace half_float - template std::basic_ostream& operator<<(std::basic_ostream &out, half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return out << detail::half2float(arg.data_); - #else - return out << detail::half2float(arg.data_); - #endif - } + template std::basic_ostream& operator<<(std::basic_ostream &out, half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return out << detail::half2float(arg.data_); + #else + return out << detail::half2float(arg.data_); + #endif + } @@ -2705,17 +2705,17 @@ namespace half_float - template std::basic_istream& operator>>(std::basic_istream &in, half &arg) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t f; - #else - double f; - #endif - if(in >> f) - arg.data_ = detail::float2half(f); - return in; - } + template std::basic_istream& operator>>(std::basic_istream &in, half &arg) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t f; + #else + double f; + #endif + if(in >> f) + arg.data_ = detail::float2half(f); + return in; + } @@ -2726,13 +2726,13 @@ namespace half_float - inline HALF_CONSTEXPR half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); } + inline HALF_CONSTEXPR half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); } - inline HALF_CONSTEXPR half abs(half arg) { return fabs(arg); } + inline HALF_CONSTEXPR half abs(half arg) { return fabs(arg); } @@ -2740,20 +2740,20 @@ namespace half_float - inline half fmod(half x, half y) - { - unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absx==0x7C00) ? detail::invalid() : x.data_); - if(!absy) - return half(detail::binary, detail::invalid()); - if(!absx) - return x; - if(absx == absy) - return half(detail::binary, sign); - return half(detail::binary, sign|detail::mod(absx, absy)); - } + inline half fmod(half x, half y) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : x.data_); + if(!absy) + return half(detail::binary, detail::invalid()); + if(!absx) + return x; + if(absx == absy) + return half(detail::binary, sign); + return half(detail::binary, sign|detail::mod(absx, absy)); + } @@ -2761,18 +2761,18 @@ namespace half_float - inline half remainder(half x, half y) - { - unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absx==0x7C00) ? detail::invalid() : x.data_); - if(!absy) - return half(detail::binary, detail::invalid()); - if(absx == absy) - return half(detail::binary, sign); - return half(detail::binary, sign^detail::mod(absx, absy)); - } + inline half remainder(half x, half y) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : x.data_); + if(!absy) + return half(detail::binary, detail::invalid()); + if(absx == absy) + return half(detail::binary, sign); + return half(detail::binary, sign^detail::mod(absx, absy)); + } @@ -2781,20 +2781,20 @@ namespace half_float - inline half remquo(half x, half y, int *quo) - { - unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_)); - if(!absy) - return half(detail::binary, detail::invalid()); - bool qsign = ((value^y.data_)&0x8000) != 0; - int q = 1; - if(absx != absy) - value ^= detail::mod(absx, absy, &q); - return *quo = qsign ? -q : q, half(detail::binary, value); - } + inline half remquo(half x, half y, int *quo) + { + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_)); + if(!absy) + return half(detail::binary, detail::invalid()); + bool qsign = ((value^y.data_)&0x8000) != 0; + int q = 1; + if(absx != absy) + value ^= detail::mod(absx, absy, &q); + return *quo = qsign ? -q : q, half(detail::binary, value); + } @@ -2806,68 +2806,68 @@ namespace half_float - inline half fma(half x, half y, half z) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); - #if HALF_ENABLE_CPP11_CMATH && FP_FAST_FMA - return half(detail::binary, detail::float2half(std::fma(fx, fy, fz))); - #else - return half(detail::binary, detail::float2half(fx*fy+fz)); - #endif - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15; - unsigned int sign = (x.data_^y.data_) & 0x8000; - bool sub = ((sign^z.data_)&0x8000) != 0; - if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) - return (absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) : - (absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : - (absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z; - if(!absx || !absy) - return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign)); - for(; absx<0x400; absx<<=1,--exp) ; - for(; absy<0x400; absy<<=1,--exp) ; - detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); - int i = m >> 21; - exp += (absx>>10) + (absy>>10) + i; - m <<= 3 - i; - if(absz) - { - int expz = 0; - for(; absz<0x400; absz<<=1,--expz) ; - expz += absz >> 10; - detail::uint32 mz = static_cast((absz&0x3FF)|0x400) << 13; - if(expz > exp || (expz == exp && mz > m)) - { - std::swap(m, mz); - std::swap(exp, expz); - if(sub) - sign = z.data_ & 0x8000; - } - int d = exp - expz; - mz = (d<23) ? ((mz>>d)|((mz&((static_cast(1)<(half::round_style==std::round_toward_neg_infinity)<<15); - for(; m<0x800000; m<<=1,--exp) ; - } - else - { - m += mz; - i = m >> 24; - m = (m>>i) | (m&i); - exp += i; - } - } - if(exp > 30) - return half(detail::binary, detail::overflow(sign)); - else if(exp < -10) - return half(detail::binary, detail::underflow(sign)); - return half(detail::binary, detail::fixed2half(m, exp-1, sign)); - #endif - } + inline half fma(half x, half y, half z) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); + #if HALF_ENABLE_CPP11_CMATH && FP_FAST_FMA + return half(detail::binary, detail::float2half(std::fma(fx, fy, fz))); + #else + return half(detail::binary, detail::float2half(fx*fy+fz)); + #endif + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15; + unsigned int sign = (x.data_^y.data_) & 0x8000; + bool sub = ((sign^z.data_)&0x8000) != 0; + if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) + return (absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) : + (absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : + (absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z; + if(!absx || !absy) + return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign)); + for(; absx<0x400; absx<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + detail::uint32 m = static_cast((absx&0x3FF)|0x400) * static_cast((absy&0x3FF)|0x400); + int i = m >> 21; + exp += (absx>>10) + (absy>>10) + i; + m <<= 3 - i; + if(absz) + { + int expz = 0; + for(; absz<0x400; absz<<=1,--expz) ; + expz += absz >> 10; + detail::uint32 mz = static_cast((absz&0x3FF)|0x400) << 13; + if(expz > exp || (expz == exp && mz > m)) + { + std::swap(m, mz); + std::swap(exp, expz); + if(sub) + sign = z.data_ & 0x8000; + } + int d = exp - expz; + mz = (d<23) ? ((mz>>d)|((mz&((static_cast(1)<(half::round_style==std::round_toward_neg_infinity)<<15); + for(; m<0x800000; m<<=1,--exp) ; + } + else + { + m += mz; + i = m >> 24; + m = (m>>i) | (m&i); + exp += i; + } + } + if(exp > 30) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -10) + return half(detail::binary, detail::underflow(sign)); + return half(detail::binary, detail::fixed2half(m, exp-1, sign)); + #endif + } @@ -2875,11 +2875,11 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR half fmax(half x, half y) - { - return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) < - (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); - } + inline HALF_CONSTEXPR_NOERR half fmax(half x, half y) + { + return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) < + (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); + } @@ -2887,11 +2887,11 @@ namespace half_float - inline HALF_CONSTEXPR_NOERR half fmin(half x, half y) - { - return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) > - (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); - } + inline HALF_CONSTEXPR_NOERR half fmin(half x, half y) + { + return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) > + (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_)); + } @@ -2901,24 +2901,24 @@ namespace half_float - inline half fdim(half x, half y) - { - if(isnan(x) || isnan(y)) - return half(detail::binary, detail::signal(x.data_, y.data_)); - return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y); - } + inline half fdim(half x, half y) + { + if(isnan(x) || isnan(y)) + return half(detail::binary, detail::signal(x.data_, y.data_)); + return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y); + } - inline half nanh(const char *arg) - { - unsigned int value = 0x7FFF; - while(*arg) - value ^= static_cast(*arg++) & 0xFF; - return half(detail::binary, value); - } + inline half nanh(const char *arg) + { + unsigned int value = 0x7FFF; + while(*arg) + value ^= static_cast(*arg++) & 0xFF; + return half(detail::binary, value); + } @@ -2933,32 +2933,32 @@ namespace half_float - inline half exp(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::exp(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp; - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); - if(abs >= 0x4C80) - return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); - detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); - if(e < 14) - { - exp = 0; - m >>= 14 - e; - } - else - { - exp = m >> (45-e); - m = (m<<(e-14)) & 0x7FFFFFFF; - } - return half(detail::binary, detail::exp2_post(m, exp, (arg.data_&0x8000)!=0, 0, 26)); - #endif - } + inline half exp(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::exp(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); + if(abs >= 0x4C80) + return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); + detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); + if(e < 14) + { + exp = 0; + m >>= 14 - e; + } + else + { + exp = m >> (45-e); + m = (m<<(e-14)) & 0x7FFFFFFF; + } + return half(detail::binary, detail::exp2_post(m, exp, (arg.data_&0x8000)!=0, 0, 26)); + #endif + } @@ -2968,22 +2968,22 @@ namespace half_float - inline half exp2(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::exp2(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10); - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); - if(abs >= 0x4E40) - return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); - return half(detail::binary, detail::exp2_post( - (static_cast(exp)<<(6+e))&0x7FFFFFFF, exp>>(25-e), (arg.data_&0x8000)!=0, 0, 28)); - #endif - } + inline half exp2(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::exp2(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10); + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_)); + if(abs >= 0x4E40) + return half(detail::binary, (arg.data_&0x8000) ? detail::underflow() : detail::overflow()); + return half(detail::binary, detail::exp2_post( + (static_cast(exp)<<(6+e))&0x7FFFFFFF, exp>>(25-e), (arg.data_&0x8000)!=0, 0, 28)); + #endif + } @@ -2994,77 +2994,77 @@ namespace half_float - inline half expm1(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::expm1(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000, e = (abs>>10) + (abs<=0x3FF), exp; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_)); - if(abs >= 0x4A00) - return half(detail::binary, (arg.data_&0x8000) ? detail::rounded(0xBBFF, 1, 1) : detail::overflow()); - detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); - if(e < 14) - { - exp = 0; - m >>= 14 - e; - } - else - { - exp = m >> (45-e); - m = (m<<(e-14)) & 0x7FFFFFFF; - } - m = detail::exp2(m); - if(sign) - { - int s = 0; - if(m > 0x80000000) - { - ++exp; - m = detail::divide64(0x80000000, m, s); - } - m = 0x80000000 - ((m>>exp)|((m&((static_cast(1)<>exp) : 1; - for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ; - if(exp > 29) - return half(detail::binary, detail::overflow()); - return half(detail::binary, detail::rounded(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0)); - #endif - } - - - - - + inline half expm1(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::expm1(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000, e = (abs>>10) + (abs<=0x3FF), exp; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_)); + if(abs >= 0x4A00) + return half(detail::binary, (arg.data_&0x8000) ? detail::rounded(0xBBFF, 1, 1) : detail::overflow()); + detail::uint32 m = detail::multiply64(static_cast((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29); + if(e < 14) + { + exp = 0; + m >>= 14 - e; + } + else + { + exp = m >> (45-e); + m = (m<<(e-14)) & 0x7FFFFFFF; + } + m = detail::exp2(m); + if(sign) + { + int s = 0; + if(m > 0x80000000) + { + ++exp; + m = detail::divide64(0x80000000, m, s); + } + m = 0x80000000 - ((m>>exp)|((m&((static_cast(1)<>exp) : 1; + for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ; + if(exp > 29) + return half(detail::binary, detail::overflow()); + return half(detail::binary, detail::rounded(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0)); + #endif + } + + + + + - inline half log(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::log(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = -15; - if(!abs) - return half(detail::binary, detail::pole(0x8000)); - if(arg.data_ & 0x8000) - return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs >= 0x7C00) - return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - return half(detail::binary, detail::log2_post( - detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17)); - #endif - } + inline half log(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::log(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + return half(detail::binary, detail::log2_post( + detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17)); + #endif + } @@ -3075,31 +3075,31 @@ namespace half_float - inline half log10(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::log10(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = -15; - if(!abs) - return half(detail::binary, detail::pole(0x8000)); - if(arg.data_ & 0x8000) - return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs >= 0x7C00) - return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); - switch(abs) - { - case 0x4900: return half(detail::binary, 0x3C00); - case 0x5640: return half(detail::binary, 0x4000); - case 0x63D0: return half(detail::binary, 0x4200); - case 0x70E2: return half(detail::binary, 0x4400); - } - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - return half(detail::binary, detail::log2_post( - detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16)); - #endif - } + inline half log10(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::log10(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + switch(abs) + { + case 0x4900: return half(detail::binary, 0x3C00); + case 0x5640: return half(detail::binary, 0x4000); + case 0x63D0: return half(detail::binary, 0x4200); + case 0x70E2: return half(detail::binary, 0x4400); + } + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + return half(detail::binary, detail::log2_post( + detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16)); + #endif + } @@ -3110,38 +3110,38 @@ namespace half_float - inline half log2(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::log2(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = -15, s = 0; - if(!abs) - return half(detail::binary, detail::pole(0x8000)); - if(arg.data_ & 0x8000) - return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs >= 0x7C00) - return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); - if(abs == 0x3C00) - return half(detail::binary, 0); - for(; abs<0x400; abs<<=1,--exp) ; - exp += (abs>>10); - if(!(abs&0x3FF)) - { - unsigned int value = static_cast(exp<0) << 15, m = std::abs(exp) << 6; - for(exp=18; m<0x400; m<<=1,--exp) ; - return half(detail::binary, value+(exp<<10)+m); - } - detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m = - (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign; - if(!m) - return half(detail::binary, 0); - for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ; - for(; m>0xFFFFFFF; m>>=1,++exp) - s |= m & 1; - return half(detail::binary, detail::fixed2half(m, exp, sign&0x8000, s)); - #endif - } + inline half log2(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::log2(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15, s = 0; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(arg.data_ & 0x8000) + return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs >= 0x7C00) + return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + if(abs == 0x3C00) + return half(detail::binary, 0); + for(; abs<0x400; abs<<=1,--exp) ; + exp += (abs>>10); + if(!(abs&0x3FF)) + { + unsigned int value = static_cast(exp<0) << 15, m = std::abs(exp) << 6; + for(exp=18; m<0x400; m<<=1,--exp) ; + return half(detail::binary, value+(exp<<10)+m); + } + detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m = + (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign; + if(!m) + return half(detail::binary, 0); + for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ; + for(; m>0xFFFFFFF; m>>=1,++exp) + s |= m & 1; + return half(detail::binary, detail::fixed2half(m, exp, sign&0x8000, s)); + #endif + } @@ -3153,42 +3153,42 @@ namespace half_float - inline half log1p(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::log1p(detail::half2float(arg.data_)))); - #else - if(arg.data_ >= 0xBC00) - return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); - int abs = arg.data_ & 0x7FFF, exp = -15; - if(!abs || abs >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - detail::uint32 m = static_cast((abs&0x3FF)|0x400) << 20; - if(arg.data_ & 0x8000) - { - m = 0x40000000 - (m>>-exp); - for(exp=0; m<0x40000000; m<<=1,--exp) ; - } - else - { - if(exp < 0) - { - m = 0x40000000 + (m>>-exp); - exp = 0; - } - else - { - m += 0x40000000 >> exp; - int i = m >> 31; - m >>= i; - exp += i; - } - } - return half(detail::binary, detail::log2_post(detail::log2(m), exp, 17)); - #endif - } + inline half log1p(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::log1p(detail::half2float(arg.data_)))); + #else + if(arg.data_ >= 0xBC00) + return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_)); + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + detail::uint32 m = static_cast((abs&0x3FF)|0x400) << 20; + if(arg.data_ & 0x8000) + { + m = 0x40000000 - (m>>-exp); + for(exp=0; m<0x40000000; m<<=1,--exp) ; + } + else + { + if(exp < 0) + { + m = 0x40000000 + (m>>-exp); + exp = 0; + } + else + { + m += 0x40000000 >> exp; + int i = m >> 31; + m >>= i; + exp += i; + } + } + return half(detail::binary, detail::log2_post(detail::log2(m), exp, 17)); + #endif + } @@ -3203,19 +3203,19 @@ namespace half_float - inline half sqrt(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::sqrt(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = 15; - if(!abs || arg.data_ >= 0x7C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_); - for(; abs<0x400; abs<<=1,--exp) ; - detail::uint32 r = static_cast((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10); - return half(detail::binary, detail::rounded((exp<<10)+(m&0x3FF), r>m, r!=0)); - #endif - } + inline half sqrt(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sqrt(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 15; + if(!abs || arg.data_ >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_); + for(; abs<0x400; abs<<=1,--exp) ; + detail::uint32 r = static_cast((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10); + return half(detail::binary, detail::rounded((exp<<10)+(m&0x3FF), r>m, r!=0)); + #endif + } @@ -3224,359 +3224,359 @@ namespace half_float - inline half rsqrt(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(detail::internal_t(1)/std::sqrt(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, bias = 0x4000; - if(!abs || arg.data_ >= 0x7C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? - detail::invalid() : !abs ? detail::pole(arg.data_&0x8000) : 0); - for(; abs<0x400; abs<<=1,bias-=0x400) ; - unsigned int frac = (abs+=bias) & 0x7FF; - if(frac == 0x400) - return half(detail::binary, 0x7A00-(abs>>1)); - if((half::round_style == std::round_to_nearest && (frac == 0x3FE || frac == 0x76C)) || - (half::round_style != std::round_to_nearest && (frac == 0x15A || frac == 0x3FC || frac == 0x401 || frac == 0x402 || frac == 0x67B))) - return pow(arg, half(detail::binary, 0xB800)); - detail::uint32 f = 0x17376 - abs, mx = (abs&0x3FF) | 0x400, my = ((f>>1)&0x3FF) | 0x400, mz = my * my; - int expy = (f>>11) - 31, expx = 32 - (abs>>10), i = mz >> 21; - for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; - i = (my*=mz>>10) >> 31; - expy += i; - my = (my>>(20+i)) + 1; - i = (mz=my*my) >> 21; - for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; - i = (my*=(mz>>10)+1) >> 31; - return half(detail::binary, detail::fixed2half(my>>i, expy+i+14)); - #endif - } - - - + inline half rsqrt(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(detail::internal_t(1)/std::sqrt(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, bias = 0x4000; + if(!abs || arg.data_ >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? + detail::invalid() : !abs ? detail::pole(arg.data_&0x8000) : 0); + for(; abs<0x400; abs<<=1,bias-=0x400) ; + unsigned int frac = (abs+=bias) & 0x7FF; + if(frac == 0x400) + return half(detail::binary, 0x7A00-(abs>>1)); + if((half::round_style == std::round_to_nearest && (frac == 0x3FE || frac == 0x76C)) || + (half::round_style != std::round_to_nearest && (frac == 0x15A || frac == 0x3FC || frac == 0x401 || frac == 0x402 || frac == 0x67B))) + return pow(arg, half(detail::binary, 0xB800)); + detail::uint32 f = 0x17376 - abs, mx = (abs&0x3FF) | 0x400, my = ((f>>1)&0x3FF) | 0x400, mz = my * my; + int expy = (f>>11) - 31, expx = 32 - (abs>>10), i = mz >> 21; + for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; + i = (my*=mz>>10) >> 31; + expy += i; + my = (my>>(20+i)) + 1; + i = (mz=my*my) >> 21; + for(mz=0x60000000-(((mz>>i)*mx)>>(expx-2*expy-i)); mz<0x40000000; mz<<=1,--expy) ; + i = (my*=(mz>>10)+1) >> 31; + return half(detail::binary, detail::fixed2half(my>>i, expy+i+14)); + #endif + } + + + - inline half cbrt(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::cbrt(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = -15; - if(!abs || abs == 0x3C00 || abs >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - for(; abs<0x400; abs<<=1, --exp); - detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m = - (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign; - for(exp=2; m<0x80000000; m<<=1,--exp) ; - m = detail::multiply64(m, 0xAAAAAAAB); - int i = m >> 31, s; - exp += i; - m <<= 1 - i; - if(exp < 0) - { - f = m >> -exp; - exp = 0; - } - else - { - f = (m<> (31-exp); - } - m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26); - if(sign) - { - if(m > 0x80000000) - { - m = detail::divide64(0x80000000, m, s); - ++exp; - } - exp = -exp; - } - return half(detail::binary, (half::round_style==std::round_to_nearest) ? - detail::fixed2half(m, exp+14, arg.data_&0x8000) : - detail::fixed2half((m+0x80)>>8, exp+14, arg.data_&0x8000)); - #endif - } - - - - - - - - - - - inline half hypot(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_); - #if HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::hypot(fx, fy))); - #else - return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy))); - #endif - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0; - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, y.data_) : - (absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_)); - if(!absx) - return half(detail::binary, absy ? detail::check_underflow(absy) : 0); - if(!absy) - return half(detail::binary, detail::check_underflow(absx)); - if(absy > absx) - std::swap(absx, absy); - for(; absx<0x400; absx<<=1,--expx) ; - for(; absy<0x400; absy<<=1,--expy) ; - detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; - mx *= mx; - my *= my; - int ix = mx >> 21, iy = my >> 21; - expx = 2*(expx+(absx>>10)) - 15 + ix; - expy = 2*(expy+(absy>>10)) - 15 + iy; - mx <<= 10 - ix; - my <<= 10 - iy; - int d = expx - expy; - my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); - #endif - } - - - - - - - - - - - - inline half hypot(half x, half y, half z) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); - return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy+fz*fz))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0; - if(!absx) - return hypot(y, z); - if(!absy) - return hypot(x, z); - if(!absz) - return hypot(x, y); - if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) - return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) : - (absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) : - (absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) : - detail::signal(x.data_, y.data_, z.data_)); - if(absz > absy) - std::swap(absy, absz); - if(absy > absx) - std::swap(absx, absy); - if(absz > absy) - std::swap(absy, absz); - for(; absx<0x400; absx<<=1,--expx) ; - for(; absy<0x400; absy<<=1,--expy) ; - for(; absz<0x400; absz<<=1,--expz) ; - detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400; - mx *= mx; - my *= my; - mz *= mz; - int ix = mx >> 21, iy = my >> 21, iz = mz >> 21; - expx = 2*(expx+(absx>>10)) - 15 + ix; - expy = 2*(expy+(absy>>10)) - 15 + iy; - expz = 2*(expz+(absz>>10)) - 15 + iz; - mx <<= 10 - ix; - my <<= 10 - iy; - mz <<= 10 - iz; - int d = expy - expz; - mz = (d<30) ? ((mz>>d)|((mz&((static_cast(1)<>1) | (my&1); - if(++expy > expx) - { - std::swap(mx, my); - std::swap(expx, expy); - } - } - d = expx - expy; - my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); - #endif - } - - - - - - - - - - - - inline half pow(half x, half y) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::pow(detail::half2float(x.data_), detail::half2float(y.data_)))); - #else - int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15; - if(!absy || x.data_ == 0x3C00) - return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_)); - bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1))); - unsigned int sign = x.data_ & (static_cast((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15); - if(absx >= 0x7C00 || absy >= 0x7C00) - return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : - (absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() : - (0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U)))); - if(!absx) - return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign); - if((x.data_&0x8000) && !is_int) - return half(detail::binary, detail::invalid()); - if(x.data_ == 0xBC00) - return half(detail::binary, sign|0x3C00); - switch(y.data_) - { - case 0x3800: return sqrt(x); - case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_)); - case 0x4000: return x * x; - case 0xBC00: return half(detail::binary, 0x3C00) / x; - } - for(; absx<0x400; absx<<=1,--exp) ; - detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = - (((ilog<<27)+((detail::log2(static_cast((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign; - for(exp=-11; m<0x80000000; m<<=1,--exp) ; - for(; absy<0x400; absy<<=1,--exp) ; - m = detail::multiply64(m, static_cast((absy&0x3FF)|0x400)<<21); - int i = m >> 31; - exp += (absy>>10) + i; - m <<= 1 - i; - if(exp < 0) - { - f = m >> -exp; - exp = 0; - } - else - { - f = (m<> (31-exp); - } - return half(detail::binary, detail::exp2_post(f, exp, ((msign&1)^(y.data_>>15))!=0, sign)); - #endif - } - - - - - - - - - - - - - - - - inline void sincos(half arg, half *sin, half *cos) - { - #ifdef HALF_ARITHMETIC_TYPE - detail::internal_t f = detail::half2float(arg.data_); - *sin = half(detail::binary, detail::float2half(std::sin(f))); - *cos = half(detail::binary, detail::float2half(std::cos(f))); - #else - int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k; - if(abs >= 0x7C00) - *sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - else if(!abs) - { - *sin = arg; - *cos = half(detail::binary, 0x3C00); - } - else if(abs < 0x2500) - { - *sin = half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); - } - else - { - if(half::round_style != std::round_to_nearest) - { - switch(abs) - { - case 0x48B7: - *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); - *cos = half(detail::binary, detail::rounded(0xBBFF, 1, 1)); - return; - case 0x598C: - *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); - *cos = half(detail::binary, detail::rounded(0x80FC, 1, 1)); - return; - case 0x6A64: - *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); - *cos = half(detail::binary, detail::rounded(0x27FF, 1, 1)); - return; - case 0x6D8C: - *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); - *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); - return; - } - } - std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); - switch(k & 3) - { - case 1: sc = std::make_pair(sc.second, -sc.first); break; - case 2: sc = std::make_pair(-sc.first, -sc.second); break; - case 3: sc = std::make_pair(-sc.second, sc.first); break; - } - *sin = half(detail::binary, detail::fixed2half((sc.first^-static_cast(sign))+sign)); - *cos = half(detail::binary, detail::fixed2half(sc.second)); - } - #endif - } - - - - - + inline half cbrt(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::cbrt(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = -15; + if(!abs || abs == 0x3C00 || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1, --exp); + detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m = + (((ilog<<27)+(detail::log2(static_cast((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign; + for(exp=2; m<0x80000000; m<<=1,--exp) ; + m = detail::multiply64(m, 0xAAAAAAAB); + int i = m >> 31, s; + exp += i; + m <<= 1 - i; + if(exp < 0) + { + f = m >> -exp; + exp = 0; + } + else + { + f = (m<> (31-exp); + } + m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26); + if(sign) + { + if(m > 0x80000000) + { + m = detail::divide64(0x80000000, m, s); + ++exp; + } + exp = -exp; + } + return half(detail::binary, (half::round_style==std::round_to_nearest) ? + detail::fixed2half(m, exp+14, arg.data_&0x8000) : + detail::fixed2half((m+0x80)>>8, exp+14, arg.data_&0x8000)); + #endif + } + + + + + + + + + + + inline half hypot(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_); + #if HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::hypot(fx, fy))); + #else + return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy))); + #endif + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0; + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, y.data_) : + (absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_)); + if(!absx) + return half(detail::binary, absy ? detail::check_underflow(absy) : 0); + if(!absy) + return half(detail::binary, detail::check_underflow(absx)); + if(absy > absx) + std::swap(absx, absy); + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400; + mx *= mx; + my *= my; + int ix = mx >> 21, iy = my >> 21; + expx = 2*(expx+(absx>>10)) - 15 + ix; + expy = 2*(expy+(absy>>10)) - 15 + iy; + mx <<= 10 - ix; + my <<= 10 - iy; + int d = expx - expy; + my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); + #endif + } + + + + + + + + + + + + inline half hypot(half x, half y, half z) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t fx = detail::half2float(x.data_), fy = detail::half2float(y.data_), fz = detail::half2float(z.data_); + return half(detail::binary, detail::float2half(std::sqrt(fx*fx+fy*fy+fz*fz))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0; + if(!absx) + return hypot(y, z); + if(!absy) + return hypot(x, z); + if(!absz) + return hypot(x, y); + if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00) + return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) : + (absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) : + (absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) : + detail::signal(x.data_, y.data_, z.data_)); + if(absz > absy) + std::swap(absy, absz); + if(absy > absx) + std::swap(absx, absy); + if(absz > absy) + std::swap(absy, absz); + for(; absx<0x400; absx<<=1,--expx) ; + for(; absy<0x400; absy<<=1,--expy) ; + for(; absz<0x400; absz<<=1,--expz) ; + detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400; + mx *= mx; + my *= my; + mz *= mz; + int ix = mx >> 21, iy = my >> 21, iz = mz >> 21; + expx = 2*(expx+(absx>>10)) - 15 + ix; + expy = 2*(expy+(absy>>10)) - 15 + iy; + expz = 2*(expz+(absz>>10)) - 15 + iz; + mx <<= 10 - ix; + my <<= 10 - iy; + mz <<= 10 - iz; + int d = expy - expz; + mz = (d<30) ? ((mz>>d)|((mz&((static_cast(1)<>1) | (my&1); + if(++expy > expx) + { + std::swap(mx, my); + std::swap(expx, expy); + } + } + d = expx - expy; + my = (d<30) ? ((my>>d)|((my&((static_cast(1)<(mx+my, expx)); + #endif + } + + + + + + + + + + + + inline half pow(half x, half y) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::pow(detail::half2float(x.data_), detail::half2float(y.data_)))); + #else + int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15; + if(!absy || x.data_ == 0x3C00) + return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_)); + bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1))); + unsigned int sign = x.data_ & (static_cast((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15); + if(absx >= 0x7C00 || absy >= 0x7C00) + return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : + (absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() : + (0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U)))); + if(!absx) + return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign); + if((x.data_&0x8000) && !is_int) + return half(detail::binary, detail::invalid()); + if(x.data_ == 0xBC00) + return half(detail::binary, sign|0x3C00); + switch(y.data_) + { + case 0x3800: return sqrt(x); + case 0x3C00: return half(detail::binary, detail::check_underflow(x.data_)); + case 0x4000: return x * x; + case 0xBC00: return half(detail::binary, 0x3C00) / x; + } + for(; absx<0x400; absx<<=1,--exp) ; + detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = + (((ilog<<27)+((detail::log2(static_cast((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign; + for(exp=-11; m<0x80000000; m<<=1,--exp) ; + for(; absy<0x400; absy<<=1,--exp) ; + m = detail::multiply64(m, static_cast((absy&0x3FF)|0x400)<<21); + int i = m >> 31; + exp += (absy>>10) + i; + m <<= 1 - i; + if(exp < 0) + { + f = m >> -exp; + exp = 0; + } + else + { + f = (m<> (31-exp); + } + return half(detail::binary, detail::exp2_post(f, exp, ((msign&1)^(y.data_>>15))!=0, sign)); + #endif + } + + + + + + + + + + + + + + + + inline void sincos(half arg, half *sin, half *cos) + { + #ifdef HALF_ARITHMETIC_TYPE + detail::internal_t f = detail::half2float(arg.data_); + *sin = half(detail::binary, detail::float2half(std::sin(f))); + *cos = half(detail::binary, detail::float2half(std::cos(f))); + #else + int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k; + if(abs >= 0x7C00) + *sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + else if(!abs) + { + *sin = arg; + *cos = half(detail::binary, 0x3C00); + } + else if(abs < 0x2500) + { + *sin = half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + } + else + { + if(half::round_style != std::round_to_nearest) + { + switch(abs) + { + case 0x48B7: + *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); + *cos = half(detail::binary, detail::rounded(0xBBFF, 1, 1)); + return; + case 0x598C: + *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x80FC, 1, 1)); + return; + case 0x6A64: + *sin = half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x27FF, 1, 1)); + return; + case 0x6D8C: + *sin = half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); + *cos = half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + return; + } + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + switch(k & 3) + { + case 1: sc = std::make_pair(sc.second, -sc.first); break; + case 2: sc = std::make_pair(-sc.first, -sc.second); break; + case 3: sc = std::make_pair(-sc.second, sc.first); break; + } + *sin = half(detail::binary, detail::fixed2half((sc.first^-static_cast(sign))+sign)); + *cos = half(detail::binary, detail::fixed2half(sc.second)); + } + #endif + } + + + + + - inline half sin(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::sin(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, k; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs < 0x2900) - return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - if(half::round_style != std::round_to_nearest) - switch(abs) - { - case 0x48B7: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); - case 0x6A64: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); - case 0x6D8C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); - } - std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); - detail::uint32 sign = -static_cast(((k>>1)&1)^(arg.data_>>15)); - return half(detail::binary, detail::fixed2half((((k&1) ? sc.second : sc.first)^sign) - sign)); - #endif - } + inline half sin(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sin(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, k; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2900) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x48B7: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x1D07, 1, 1)); + case 0x6A64: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x3BFE, 1, 1)); + case 0x6D8C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x0FE6, 1, 1)); + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + detail::uint32 sign = -static_cast(((k>>1)&1)^(arg.data_>>15)); + return half(detail::binary, detail::fixed2half((((k&1) ? sc.second : sc.first)^sign) - sign)); + #endif + } @@ -3584,27 +3584,27 @@ namespace half_float - - - inline half cos(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::cos(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, k; - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs < 0x2500) - return half(detail::binary, detail::rounded(0x3BFF, 1, 1)); - if(half::round_style != std::round_to_nearest && abs == 0x598C) - return half(detail::binary, detail::rounded(0x80FC, 1, 1)); - std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); - detail::uint32 sign = -static_cast(((k>>1)^k)&1); - return half(detail::binary, detail::fixed2half((((k&1) ? sc.first : sc.second)^sign) - sign)); - #endif - } + + + inline half cos(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::cos(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, k; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2500) + return half(detail::binary, detail::rounded(0x3BFF, 1, 1)); + if(half::round_style != std::round_to_nearest && abs == 0x598C) + return half(detail::binary, detail::rounded(0x80FC, 1, 1)); + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 28); + detail::uint32 sign = -static_cast(((k>>1)^k)&1); + return half(detail::binary, detail::fixed2half((((k&1) ? sc.first : sc.second)^sign) - sign)); + #endif + } @@ -3614,34 +3614,34 @@ namespace half_float - inline half tan(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::tan(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = 13, k; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs < 0x2700) - return half(detail::binary, detail::rounded(arg.data_, 0, 1)); - if(half::round_style != std::round_to_nearest) - switch(abs) - { - case 0x658C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x07E6, 1, 1)); - case 0x7330: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x4B62, 1, 1)); - } - std::pair sc = detail::sincos(detail::angle_arg(abs, k), 30); - if(k & 1) - sc = std::make_pair(-sc.second, sc.first); - detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second); - detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx; - for(; my<0x80000000; my<<=1,--exp) ; - for(; mx<0x80000000; mx<<=1,++exp) ; - return half(detail::binary, detail::tangent_post(my, mx, exp, (signy^signx^arg.data_)&0x8000)); - #endif - } + inline half tan(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::tan(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 13, k; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x658C: return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x07E6, 1, 1)); + case 0x7330: return half(detail::binary, detail::rounded((~arg.data_&0x8000)|0x4B62, 1, 1)); + } + std::pair sc = detail::sincos(detail::angle_arg(abs, k), 30); + if(k & 1) + sc = std::make_pair(-sc.second, sc.first); + detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second); + detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx; + for(; my<0x80000000; my<<=1,--exp) ; + for(; mx<0x80000000; mx<<=1,++exp) ; + return half(detail::binary, detail::tangent_post(my, mx, exp, (signy^signx^arg.data_)&0x8000)); + #endif + } @@ -3651,26 +3651,26 @@ namespace half_float - inline half asin(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::asin(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; - if(!abs) - return arg; - if(abs >= 0x3C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : - detail::rounded(sign|0x3E48, 0, 1)); - if(abs < 0x2900) - return half(detail::binary, detail::rounded(arg.data_, 0, 1)); - if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3)) - return half(detail::binary, detail::rounded(arg.data_+1, 1, 1)); - std::pair sc = detail::atan2_args(abs); - detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26); - return half(detail::binary, detail::fixed2half(m, 14, sign)); - #endif - } + inline half asin(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::asin(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(!abs) + return arg; + if(abs >= 0x3C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : + detail::rounded(sign|0x3E48, 0, 1)); + if(abs < 0x2900) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3)) + return half(detail::binary, detail::rounded(arg.data_+1, 1, 1)); + std::pair sc = detail::atan2_args(abs); + detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26); + return half(detail::binary, detail::fixed2half(m, 14, sign)); + #endif + } @@ -3680,22 +3680,22 @@ namespace half_float - inline half acos(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::acos(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15; - if(!abs) - return half(detail::binary, detail::rounded(0x3E48, 0, 1)); - if(abs >= 0x3C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : - sign ? detail::rounded(0x4248, 0, 1) : 0); - std::pair cs = detail::atan2_args(abs); - detail::uint32 m = detail::atan2(cs.second, cs.first, 28); - return half(detail::binary, detail::fixed2half(sign ? (0xC90FDAA2-m) : m, 15, 0, sign)); - #endif - } + inline half acos(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::acos(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15; + if(!abs) + return half(detail::binary, detail::rounded(0x3E48, 0, 1)); + if(abs >= 0x3C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() : + sign ? detail::rounded(0x4248, 0, 1) : 0); + std::pair cs = detail::atan2_args(abs); + detail::uint32 m = detail::atan2(cs.second, cs.first, 28); + return half(detail::binary, detail::fixed2half(sign ? (0xC90FDAA2-m) : m, 15, 0, sign)); + #endif + } @@ -3705,25 +3705,25 @@ namespace half_float - inline half atan(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::atan(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? detail::rounded(sign|0x3E48, 0, 1) : detail::signal(arg.data_)); - if(abs <= 0x2700) - return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - int exp = (abs>>10) + (abs<=0x3FF); - detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10); - detail::uint32 m = (exp>15) ? detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) : - detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28); - return half(detail::binary, detail::fixed2half(m, 14, sign)); - #endif - } + inline half atan(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::atan(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? detail::rounded(sign|0x3E48, 0, 1) : detail::signal(arg.data_)); + if(abs <= 0x2700) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + int exp = (abs>>10) + (abs<=0x3FF); + detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10); + detail::uint32 m = (exp>15) ? detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) : + detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28); + return half(detail::binary, detail::fixed2half(m, 14, sign)); + #endif + } @@ -3735,47 +3735,47 @@ namespace half_float - inline half atan2(half y, half x) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::atan2(detail::half2float(y.data_), detail::half2float(x.data_)))); - #else - unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000; - if(absx >= 0x7C00 || absy >= 0x7C00) - { - if(absx > 0x7C00 || absy > 0x7C00) - return half(detail::binary, detail::signal(x.data_, y.data_)); - if(absy == 0x7C00) - return half(detail::binary, (absx<0x7C00) ? detail::rounded(signy|0x3E48, 0, 1) : - signx ? detail::rounded(signy|0x40B6, 0, 1) : - detail::rounded(signy|0x3A48, 0, 1)); - return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); - } - if(!absy) - return signx ? half(detail::binary, detail::rounded(signy|0x4248, 0, 1)) : y; - if(!absx) - return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); - int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF); - if(d > (signx ? 18 : 12)) - return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); - if(signx && d < -11) - return half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); - if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9)) - { - for(; absy<0x400; absy<<=1,--d) ; - detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800; - int i = my < mx; - d -= i; - if(d < -25) - return half(detail::binary, detail::underflow(signy)); - my <<= 11 + i; - return half(detail::binary, detail::fixed2half(my/mx, d+14, signy, my%mx!=0)); - } - detail::uint32 m = detail::atan2( ((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)), - ((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1))); - return half(detail::binary, detail::fixed2half(signx ? (0xC90FDAA2-m) : m, 15, signy, signx)); - #endif - } + inline half atan2(half y, half x) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::atan2(detail::half2float(y.data_), detail::half2float(x.data_)))); + #else + unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000; + if(absx >= 0x7C00 || absy >= 0x7C00) + { + if(absx > 0x7C00 || absy > 0x7C00) + return half(detail::binary, detail::signal(x.data_, y.data_)); + if(absy == 0x7C00) + return half(detail::binary, (absx<0x7C00) ? detail::rounded(signy|0x3E48, 0, 1) : + signx ? detail::rounded(signy|0x40B6, 0, 1) : + detail::rounded(signy|0x3A48, 0, 1)); + return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); + } + if(!absy) + return signx ? half(detail::binary, detail::rounded(signy|0x4248, 0, 1)) : y; + if(!absx) + return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); + int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF); + if(d > (signx ? 18 : 12)) + return half(detail::binary, detail::rounded(signy|0x3E48, 0, 1)); + if(signx && d < -11) + return half(detail::binary, detail::rounded(signy|0x4248, 0, 1)); + if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9)) + { + for(; absy<0x400; absy<<=1,--d) ; + detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800; + int i = my < mx; + d -= i; + if(d < -25) + return half(detail::binary, detail::underflow(signy)); + my <<= 11 + i; + return half(detail::binary, detail::fixed2half(my/mx, d+14, signy, my%mx!=0)); + } + detail::uint32 m = detail::atan2( ((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)), + ((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1))); + return half(detail::binary, detail::fixed2half(signx ? (0xC90FDAA2-m) : m, 15, signy, signx)); + #endif + } @@ -3790,25 +3790,25 @@ namespace half_float - inline half sinh(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::sinh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp; - if(!abs || abs >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - if(abs <= 0x2900) - return half(detail::binary, detail::rounded(arg.data_, 0, 1)); - std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27); - detail::uint32 m = mm.first - mm.second; - for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ; - unsigned int sign = arg.data_ & 0x8000; - if(exp > 29) - return half(detail::binary, detail::overflow(sign)); - return half(detail::binary, detail::fixed2half(m, exp, sign)); - #endif - } + inline half sinh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::sinh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + if(abs <= 0x2900) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27); + detail::uint32 m = mm.first - mm.second; + for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ; + unsigned int sign = arg.data_ & 0x8000; + if(exp > 29) + return half(detail::binary, detail::overflow(sign)); + return half(detail::binary, detail::fixed2half(m, exp, sign)); + #endif + } @@ -3818,24 +3818,24 @@ namespace half_float - inline half cosh(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::cosh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp; - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x7C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00); - std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26); - detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31; - m = (m>>i) | (m&i) | 0x80000000; - if((exp+=13+i) > 29) - return half(detail::binary, detail::overflow()); - return half(detail::binary, detail::fixed2half(m, exp)); - #endif - } + inline half cosh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::cosh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00); + std::pair mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26); + detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31; + m = (m>>i) | (m&i) | 0x80000000; + if((exp+=13+i) > 29) + return half(detail::binary, detail::overflow()); + return half(detail::binary, detail::fixed2half(m, exp)); + #endif + } @@ -3845,29 +3845,29 @@ namespace half_float - inline half tanh(half arg) - { - #ifdef HALF_ARITHMETIC_TYPE - return half(detail::binary, detail::float2half(std::tanh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp; - if(!abs) - return arg; - if(abs >= 0x7C00) - return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000)); - if(abs >= 0x4500) - return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); - if(abs < 0x2700) - return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - if(half::round_style != std::round_to_nearest && abs == 0x2D3F) - return half(detail::binary, detail::rounded(arg.data_-3, 0, 1)); - std::pair mm = detail::hyperbolic_args(abs, exp, 27); - detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31; - for(exp=13; my<0x80000000; my<<=1,--exp) ; - mx = (mx>>i) | 0x80000000; - return half(detail::binary, detail::tangent_post(my, mx, exp-i, arg.data_&0x8000)); - #endif - } + inline half tanh(half arg) + { + #ifdef HALF_ARITHMETIC_TYPE + return half(detail::binary, detail::float2half(std::tanh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return arg; + if(abs >= 0x7C00) + return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000)); + if(abs >= 0x4500) + return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest && abs == 0x2D3F) + return half(detail::binary, detail::rounded(arg.data_-3, 0, 1)); + std::pair mm = detail::hyperbolic_args(abs, exp, 27); + detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31; + for(exp=13; my<0x80000000; my<<=1,--exp) ; + mx = (mx>>i) | 0x80000000; + return half(detail::binary, detail::tangent_post(my, mx, exp-i, arg.data_&0x8000)); + #endif + } @@ -3877,25 +3877,25 @@ namespace half_float - inline half asinh(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::asinh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF; - if(!abs || abs >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - if(abs <= 0x2900) - return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); - if(half::round_style != std::round_to_nearest) - switch(abs) - { - case 0x32D4: return half(detail::binary, detail::rounded(arg.data_-13, 1, 1)); - case 0x3B5B: return half(detail::binary, detail::rounded(arg.data_-197, 1, 1)); - } - return half(detail::binary, detail::area(arg.data_)); - #endif - } + inline half asinh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::asinh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if(!abs || abs >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + if(abs <= 0x2900) + return half(detail::binary, detail::rounded(arg.data_-1, 1, 1)); + if(half::round_style != std::round_to_nearest) + switch(abs) + { + case 0x32D4: return half(detail::binary, detail::rounded(arg.data_-13, 1, 1)); + case 0x3B5B: return half(detail::binary, detail::rounded(arg.data_-197, 1, 1)); + } + return half(detail::binary, detail::area(arg.data_)); + #endif + } @@ -3905,21 +3905,21 @@ namespace half_float - inline half acosh(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::acosh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF; - if((arg.data_&0x8000) || abs < 0x3C00) - return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs == 0x3C00) - return half(detail::binary, 0); - if(arg.data_ >= 0x7C00) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - return half(detail::binary, detail::area(arg.data_)); - #endif - } + inline half acosh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::acosh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if((arg.data_&0x8000) || abs < 0x3C00) + return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs == 0x3C00) + return half(detail::binary, 0); + if(arg.data_ >= 0x7C00) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + return half(detail::binary, detail::area(arg.data_)); + #endif + } @@ -3930,25 +3930,25 @@ namespace half_float - inline half atanh(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::atanh(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF, exp = 0; - if(!abs) - return arg; - if(abs >= 0x3C00) - return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); - if(abs < 0x2700) - return half(detail::binary, detail::rounded(arg.data_, 0, 1)); - detail::uint32 m = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m; - for(; mx<0x80000000; mx<<=1,++exp) ; - int i = my >= mx, s; - return half(detail::binary, detail::log2_post(detail::log2( - (detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000)); - #endif - } + inline half atanh(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::atanh(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF, exp = 0; + if(!abs) + return arg; + if(abs >= 0x3C00) + return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_)); + if(abs < 0x2700) + return half(detail::binary, detail::rounded(arg.data_, 0, 1)); + detail::uint32 m = static_cast((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m; + for(; mx<0x80000000; mx<<=1,++exp) ; + int i = my >= mx, s; + return half(detail::binary, detail::log2_post(detail::log2( + (detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000)); + #endif + } @@ -3963,19 +3963,19 @@ namespace half_float - inline half erf(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::erf(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF; - if(!abs || abs >= 0x7C00) - return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg; - if(abs >= 0x4200) - return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); - return half(detail::binary, detail::erf(arg.data_)); - #endif - } + inline half erf(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::erf(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF; + if(!abs || abs >= 0x7C00) + return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg; + if(abs >= 0x4200) + return half(detail::binary, detail::rounded((arg.data_&0x8000)|0x3BFF, 1, 1)); + return half(detail::binary, detail::erf(arg.data_)); + #endif + } @@ -3985,21 +3985,21 @@ namespace half_float - inline half erfc(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::erfc(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; - if(abs >= 0x7C00) - return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg; - if(!abs) - return half(detail::binary, 0x3C00); - if(abs >= 0x4400) - return half(detail::binary, detail::rounded((sign>>1)-(sign>>15), sign>>15, 1)); - return half(detail::binary, detail::erf(arg.data_)); - #endif - } + inline half erfc(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::erfc(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(abs >= 0x7C00) + return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg; + if(!abs) + return half(detail::binary, 0x3C00); + if(abs >= 0x4400) + return half(detail::binary, detail::rounded((sign>>1)-(sign>>15), sign>>15, 1)); + return half(detail::binary, detail::erf(arg.data_)); + #endif + } @@ -4010,21 +4010,21 @@ namespace half_float - inline half lgamma(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::lgamma(detail::half2float(arg.data_)))); - #else - int abs = arg.data_ & 0x7FFF; - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); - if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) - return half(detail::binary, detail::pole()); - if(arg.data_ == 0x3C00 || arg.data_ == 0x4000) - return half(detail::binary, 0); - return half(detail::binary, detail::gamma(arg.data_)); - #endif - } + inline half lgamma(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::lgamma(detail::half2float(arg.data_)))); + #else + int abs = arg.data_ & 0x7FFF; + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); + if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) + return half(detail::binary, detail::pole()); + if(arg.data_ == 0x3C00 || arg.data_ == 0x4000) + return half(detail::binary, 0); + return half(detail::binary, detail::gamma(arg.data_)); + #endif + } @@ -4035,27 +4035,27 @@ namespace half_float - inline half tgamma(half arg) - { - #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH - return half(detail::binary, detail::float2half(std::tgamma(detail::half2float(arg.data_)))); - #else - unsigned int abs = arg.data_ & 0x7FFF; - if(!abs) - return half(detail::binary, detail::pole(arg.data_)); - if(abs >= 0x7C00) - return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); - if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) - return half(detail::binary, detail::invalid()); - if(arg.data_ >= 0xCA80) - return half(detail::binary, detail::underflow((1-((abs>>(25-(abs>>10)))&1))<<15)); - if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000)) - return half(detail::binary, detail::overflow()); - if(arg.data_ == 0x3C00) - return arg; - return half(detail::binary, detail::gamma(arg.data_)); - #endif - } + inline half tgamma(half arg) + { + #if defined(HALF_ARITHMETIC_TYPE) && HALF_ENABLE_CPP11_CMATH + return half(detail::binary, detail::float2half(std::tgamma(detail::half2float(arg.data_)))); + #else + unsigned int abs = arg.data_ & 0x7FFF; + if(!abs) + return half(detail::binary, detail::pole(arg.data_)); + if(abs >= 0x7C00) + return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_)); + if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1)))) + return half(detail::binary, detail::invalid()); + if(arg.data_ >= 0xCA80) + return half(detail::binary, detail::underflow((1-((abs>>(25-(abs>>10)))&1))<<15)); + if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000)) + return half(detail::binary, detail::overflow()); + if(arg.data_ == 0x3C00) + return arg; + return half(detail::binary, detail::gamma(arg.data_)); + #endif + } @@ -4068,7 +4068,7 @@ namespace half_float - inline half ceil(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + inline half ceil(half arg) { return half(detail::binary, detail::integral(arg.data_)); } @@ -4076,7 +4076,7 @@ namespace half_float - inline half floor(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + inline half floor(half arg) { return half(detail::binary, detail::integral(arg.data_)); } @@ -4084,7 +4084,7 @@ namespace half_float - inline half trunc(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + inline half trunc(half arg) { return half(detail::binary, detail::integral(arg.data_)); } @@ -4092,14 +4092,14 @@ namespace half_float - inline half round(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + inline half round(half arg) { return half(detail::binary, detail::integral(arg.data_)); } - inline long lround(half arg) { return detail::half2int(arg.data_); } + inline long lround(half arg) { return detail::half2int(arg.data_); } @@ -4107,7 +4107,7 @@ namespace half_float - inline half rint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } + inline half rint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } @@ -4115,21 +4115,21 @@ namespace half_float - inline long lrint(half arg) { return detail::half2int(arg.data_); } + inline long lrint(half arg) { return detail::half2int(arg.data_); } - inline half nearbyint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } -#if HALF_ENABLE_CPP11_LONG_LONG + inline half nearbyint(half arg) { return half(detail::binary, detail::integral(arg.data_)); } +#if HALF_ENABLE_CPP11_LONG_LONG - inline long long llround(half arg) { return detail::half2int(arg.data_); } + inline long long llround(half arg) { return detail::half2int(arg.data_); } @@ -4137,8 +4137,8 @@ namespace half_float - inline long long llrint(half arg) { return detail::half2int(arg.data_); } -#endif + inline long long llrint(half arg) { return detail::half2int(arg.data_); } +#endif @@ -4151,16 +4151,16 @@ namespace half_float - inline half frexp(half arg, int *exp) - { - *exp = 0; - unsigned int abs = arg.data_ & 0x7FFF; - if(abs >= 0x7C00 || !abs) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - for(; abs<0x400; abs<<=1,--*exp) ; - *exp += (abs>>10) - 14; - return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF)); - } + inline half frexp(half arg, int *exp) + { + *exp = 0; + unsigned int abs = arg.data_ & 0x7FFF; + if(abs >= 0x7C00 || !abs) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--*exp) ; + *exp += (abs>>10) - 14; + return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF)); + } @@ -4171,22 +4171,22 @@ namespace half_float - inline half scalbln(half arg, long exp) - { - unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; - if(abs >= 0x7C00 || !abs) - return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; - for(; abs<0x400; abs<<=1,--exp) ; - exp += abs >> 10; - if(exp > 30) - return half(detail::binary, detail::overflow(sign)); - else if(exp < -10) - return half(detail::binary, detail::underflow(sign)); - else if(exp > 0) - return half(detail::binary, sign|(exp<<10)|(abs&0x3FF)); - unsigned int m = (abs&0x3FF) | 0x400; - return half(detail::binary, detail::rounded(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0)); - } + inline half scalbln(half arg, long exp) + { + unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000; + if(abs >= 0x7C00 || !abs) + return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; + for(; abs<0x400; abs<<=1,--exp) ; + exp += abs >> 10; + if(exp > 30) + return half(detail::binary, detail::overflow(sign)); + else if(exp < -10) + return half(detail::binary, detail::underflow(sign)); + else if(exp > 0) + return half(detail::binary, sign|(exp<<10)|(abs&0x3FF)); + unsigned int m = (abs&0x3FF) | 0x400; + return half(detail::binary, detail::rounded(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0)); + } @@ -4197,7 +4197,7 @@ namespace half_float - inline half scalbn(half arg, int exp) { return scalbln(arg, exp); } + inline half scalbn(half arg, int exp) { return scalbln(arg, exp); } @@ -4208,7 +4208,7 @@ namespace half_float - inline half ldexp(half arg, int exp) { return scalbln(arg, exp); } + inline half ldexp(half arg, int exp) { return scalbln(arg, exp); } @@ -4216,25 +4216,25 @@ namespace half_float - inline half modf(half arg, half *iptr) - { - unsigned int abs = arg.data_ & 0x7FFF; - if(abs > 0x7C00) - { - arg = half(detail::binary, detail::signal(arg.data_)); - return *iptr = arg, arg; - } - if(abs >= 0x6400) - return *iptr = arg, half(detail::binary, arg.data_&0x8000); - if(abs < 0x3C00) - return iptr->data_ = arg.data_ & 0x8000, arg; - unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask; - iptr->data_ = arg.data_ & ~mask; - if(!m) - return half(detail::binary, arg.data_&0x8000); - for(; m<0x400; m<<=1,--exp) ; - return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF)); - } + inline half modf(half arg, half *iptr) + { + unsigned int abs = arg.data_ & 0x7FFF; + if(abs > 0x7C00) + { + arg = half(detail::binary, detail::signal(arg.data_)); + return *iptr = arg, arg; + } + if(abs >= 0x6400) + return *iptr = arg, half(detail::binary, arg.data_&0x8000); + if(abs < 0x3C00) + return iptr->data_ = arg.data_ & 0x8000, arg; + unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask; + iptr->data_ = arg.data_ & ~mask; + if(!m) + return half(detail::binary, arg.data_&0x8000); + for(; m<0x400; m<<=1,--exp) ; + return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF)); + } @@ -4244,17 +4244,17 @@ namespace half_float - inline int ilogb(half arg) - { - int abs = arg.data_ & 0x7FFF, exp; - if(!abs || abs >= 0x7C00) - { - detail::raise(FE_INVALID); - return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN; - } - for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; - return exp; - } + inline int ilogb(half arg) + { + int abs = arg.data_ & 0x7FFF, exp; + if(!abs || abs >= 0x7C00) + { + detail::raise(FE_INVALID); + return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN; + } + for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; + return exp; + } @@ -4262,23 +4262,23 @@ namespace half_float - inline half logb(half arg) - { - int abs = arg.data_ & 0x7FFF, exp; - if(!abs) - return half(detail::binary, detail::pole(0x8000)); - if(abs >= 0x7C00) - return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); - for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; - unsigned int value = static_cast(exp<0) << 15; - if(exp) - { - unsigned int m = std::abs(exp) << 6; - for(exp=18; m<0x400; m<<=1,--exp) ; - value |= (exp<<10) + m; - } - return half(detail::binary, value); - } + inline half logb(half arg) + { + int abs = arg.data_ & 0x7FFF, exp; + if(!abs) + return half(detail::binary, detail::pole(0x8000)); + if(abs >= 0x7C00) + return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_)); + for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ; + unsigned int value = static_cast(exp<0) << 15; + if(exp) + { + unsigned int m = std::abs(exp) << 6; + for(exp=18; m<0x400; m<<=1,--exp) ; + value |= (exp<<10) + m; + } + return half(detail::binary, value); + } @@ -4288,24 +4288,24 @@ namespace half_float - inline half nextafter(half from, half to) - { - int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; - if(fabs > 0x7C00 || tabs > 0x7C00) - return half(detail::binary, detail::signal(from.data_, to.data_)); - if(from.data_ == to.data_ || !(fabs|tabs)) - return to; - if(!fabs) - { - detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); - return half(detail::binary, (to.data_&0x8000)+1); - } - unsigned int out = from.data_ + (((from.data_>>15)^static_cast( - (from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1; - detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00); - detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7C00)<0x400); - return half(detail::binary, out); - } + inline half nextafter(half from, half to) + { + int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; + if(fabs > 0x7C00 || tabs > 0x7C00) + return half(detail::binary, detail::signal(from.data_, to.data_)); + if(from.data_ == to.data_ || !(fabs|tabs)) + return to; + if(!fabs) + { + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); + return half(detail::binary, (to.data_&0x8000)+1); + } + unsigned int out = from.data_ + (((from.data_>>15)^static_cast( + (from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1; + detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00); + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7C00)<0x400); + return half(detail::binary, out); + } @@ -4315,31 +4315,31 @@ namespace half_float - inline half nexttoward(half from, long double to) - { - int fabs = from.data_ & 0x7FFF; - if(fabs > 0x7C00) - return half(detail::binary, detail::signal(from.data_)); - long double lfrom = static_cast(from); - if(detail::builtin_isnan(to) || lfrom == to) - return half(static_cast(to)); - if(!fabs) - { - detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); - return half(detail::binary, (static_cast(detail::builtin_signbit(to))<<15)+1); - } - unsigned int out = from.data_ + (((from.data_>>15)^static_cast(lfrom 0x7C00) + return half(detail::binary, detail::signal(from.data_)); + long double lfrom = static_cast(from); + if(detail::builtin_isnan(to) || lfrom == to) + return half(static_cast(to)); + if(!fabs) + { + detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT); + return half(detail::binary, (static_cast(detail::builtin_signbit(to))<<15)+1); + } + unsigned int out = from.data_ + (((from.data_>>15)^static_cast(lfrom 0x7C00; } + inline HALF_CONSTEXPR bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; } - inline HALF_CONSTEXPR bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } + inline HALF_CONSTEXPR bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } - inline HALF_CONSTEXPR bool signbit(half arg) { return (arg.data_&0x8000) != 0; } + inline HALF_CONSTEXPR bool signbit(half arg) { return (arg.data_&0x8000) != 0; } @@ -4409,10 +4409,10 @@ namespace half_float - inline HALF_CONSTEXPR bool isgreater(half x, half y) - { - return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); - } + inline HALF_CONSTEXPR bool isgreater(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } @@ -4420,10 +4420,10 @@ namespace half_float - inline HALF_CONSTEXPR bool isgreaterequal(half x, half y) - { - return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); - } + inline HALF_CONSTEXPR bool isgreaterequal(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } @@ -4431,10 +4431,10 @@ namespace half_float - inline HALF_CONSTEXPR bool isless(half x, half y) - { - return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); - } + inline HALF_CONSTEXPR bool isless(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } @@ -4442,10 +4442,10 @@ namespace half_float - inline HALF_CONSTEXPR bool islessequal(half x, half y) - { - return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); - } + inline HALF_CONSTEXPR bool islessequal(half x, half y) + { + return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y); + } @@ -4453,10 +4453,10 @@ namespace half_float - inline HALF_CONSTEXPR bool islessgreater(half x, half y) - { - return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y); - } + inline HALF_CONSTEXPR bool islessgreater(half x, half y) + { + return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y); + } @@ -4464,7 +4464,7 @@ namespace half_float - inline HALF_CONSTEXPR bool isunordered(half x, half y) { return isnan(x) || isnan(y); } + inline HALF_CONSTEXPR bool isunordered(half x, half y) { return isnan(x) || isnan(y); } @@ -4484,7 +4484,7 @@ namespace half_float - template T half_cast(U arg) { return detail::half_caster::cast(arg); } + template T half_cast(U arg) { return detail::half_caster::cast(arg); } @@ -4500,7 +4500,7 @@ namespace half_float - template T half_cast(U arg) { return detail::half_caster::cast(arg); } + template T half_cast(U arg) { return detail::half_caster::cast(arg); } @@ -4515,7 +4515,7 @@ namespace half_float - inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; } + inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; } @@ -4524,7 +4524,7 @@ namespace half_float - inline int fetestexcept(int excepts) { return detail::errflags() & excepts; } + inline int fetestexcept(int excepts) { return detail::errflags() & excepts; } @@ -4535,7 +4535,7 @@ namespace half_float - inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; } + inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; } @@ -4545,7 +4545,7 @@ namespace half_float - inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; } + inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; } @@ -4556,7 +4556,7 @@ namespace half_float - inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; } + inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; } @@ -4569,33 +4569,33 @@ namespace half_float - inline void fethrowexcept(int excepts, const char *msg = "") - { - excepts &= detail::errflags(); - if(excepts & (FE_INVALID|FE_DIVBYZERO)) - throw std::domain_error(msg); - if(excepts & FE_OVERFLOW) - throw std::overflow_error(msg); - if(excepts & FE_UNDERFLOW) - throw std::underflow_error(msg); - if(excepts & FE_INEXACT) - throw std::range_error(msg); - } + inline void fethrowexcept(int excepts, const char *msg = "") + { + excepts &= detail::errflags(); + if(excepts & (FE_INVALID|FE_DIVBYZERO)) + throw std::domain_error(msg); + if(excepts & FE_OVERFLOW) + throw std::overflow_error(msg); + if(excepts & FE_UNDERFLOW) + throw std::underflow_error(msg); + if(excepts & FE_INEXACT) + throw std::range_error(msg); + } -} +} -#undef HALF_UNUSED_NOERR -#undef HALF_CONSTEXPR -#undef HALF_CONSTEXPR_CONST -#undef HALF_CONSTEXPR_NOERR -#undef HALF_NOEXCEPT -#undef HALF_NOTHROW -#undef HALF_THREAD_LOCAL -#undef HALF_TWOS_COMPLEMENT_INT -#ifdef HALF_POP_WARNINGS - #pragma warning(pop) - #undef HALF_POP_WARNINGS -#endif +#undef HALF_UNUSED_NOERR +#undef HALF_CONSTEXPR +#undef HALF_CONSTEXPR_CONST +#undef HALF_CONSTEXPR_NOERR +#undef HALF_NOEXCEPT +#undef HALF_NOTHROW +#undef HALF_THREAD_LOCAL +#undef HALF_TWOS_COMPLEMENT_INT +#ifdef HALF_POP_WARNINGS + #pragma warning(pop) + #undef HALF_POP_WARNINGS +#endif -#endif +#endif diff --git a/python/src/ngtpy.cpp b/python/src/ngtpy.cpp index 23e9d04..023115c 100644 --- a/python/src/ngtpy.cpp +++ b/python/src/ngtpy.cpp @@ -112,7 +112,7 @@ class Index : public NGT::Index { } void batchInsert( - py::array_t objects, + py::array_t objects, size_t numThreads = 16, bool debug = false ) { @@ -135,7 +135,7 @@ class Index : public NGT::Index { } int insert( - py::array_t object, + py::array_t object, bool debug = false ) { py::buffer_info info = object.request(); @@ -197,7 +197,7 @@ class Index : public NGT::Index { NGT::ResultPriorityQueue &r = sc.getWorkingResult(); py::array_t ids(r.size()); py::buffer_info idsinfo = ids.request(); - int *endptr = reinterpret_cast(idsinfo.ptr); + int *endptr = reinterpret_cast(idsinfo.ptr); int *ptr = endptr + (r.size() - 1); if (zeroNumbering) { while (ptr >= endptr) { @@ -261,7 +261,7 @@ class Index : public NGT::Index { if (!withDistance) { py::array_t ids(rs.size()); py::buffer_info idsinfo = ids.request(); - int *ptr = reinterpret_cast(idsinfo.ptr); + int *ptr = reinterpret_cast(idsinfo.ptr); if (zeroNumbering) { for (auto ri = rs.begin(); ri != rs.end(); ++ri) { *ptr++ = (*ri).id - 1; @@ -359,7 +359,7 @@ class Index : public NGT::Index { class Optimizer : public NGT::GraphOptimizer { public: - using NGT::GraphOptimizer::GraphOptimizer; + using NGT::GraphOptimizer::GraphOptimizer; int optimizeNumberOfEdgesForANNG( const std::string path, // anng index path @@ -445,7 +445,7 @@ class QuantizedIndex : public NGTQG::Index { NGT::ResultPriorityQueue &r = sc.getWorkingResult(); py::array_t ids(r.size()); py::buffer_info idsinfo = ids.request(); - int *endptr = reinterpret_cast(idsinfo.ptr); + int *endptr = reinterpret_cast(idsinfo.ptr); int *ptr = endptr + (r.size() - 1); if (zeroNumbering) { while (ptr >= endptr) { @@ -646,7 +646,7 @@ class QuantizedBlobIndex : public QBG::Index { } void batchInsert( - py::array_t objects, + py::array_t objects, bool debug = false ) { py::buffer_info info = objects.request(); @@ -881,7 +881,7 @@ class QuantizedBlobIndex : public QBG::Index { NGT::ResultPriorityQueue &r = sc.getWorkingResult(); py::array_t ids(r.size()); py::buffer_info idsinfo = ids.request(); - int *endptr = reinterpret_cast(idsinfo.ptr); + int *endptr = reinterpret_cast(idsinfo.ptr); int *ptr = endptr + (r.size() - 1); if (zeroNumbering) { while (ptr >= endptr) { @@ -974,31 +974,31 @@ PYBIND11_MODULE(ngtpy, m) { m.attr("__version__") = NGT_VERSION; - m.def("create", &::Index::create, - py::arg("path"), - py::arg("dimension"), - py::arg("edge_size_for_creation") = 10, - py::arg("edge_size_for_search") = 40, - py::arg("distance_type") = "L2", + m.def("create", &::Index::create, + py::arg("path"), + py::arg("dimension"), + py::arg("edge_size_for_creation") = 10, + py::arg("edge_size_for_search") = 40, + py::arg("distance_type") = "L2", py::arg("object_type") = "Float"); py::class_(m, "Index") - .def(py::init(), + .def(py::init(), py::arg("path"), py::arg("read_only") = false, py::arg("zero_based_numbering") = true, py::arg("tree_disabled") = false, py::arg("log_disabled") = false) - .def("search", &::Index::search, - py::arg("query"), - py::arg("size") = 0, - py::arg("epsilon") = -FLT_MAX, + .def("search", &::Index::search, + py::arg("query"), + py::arg("size") = 0, + py::arg("epsilon") = -FLT_MAX, py::arg("edge_size") = INT_MIN, - py::arg("expected_accuracy") = -FLT_MAX, + py::arg("expected_accuracy") = -FLT_MAX, py::arg("with_distance") = true) - .def("linear_search", &::Index::linearSearch, - py::arg("query"), - py::arg("size") = 0, + .def("linear_search", &::Index::linearSearch, + py::arg("query"), + py::arg("size") = 0, py::arg("with_distance") = true) .def("get_num_of_distance_computations", &::Index::getNumOfDistanceComputations) .def("save", (void (NGT::Index::*)()) &NGT::Index::save) @@ -1032,9 +1032,9 @@ PYBIND11_MODULE(ngtpy, m) { py::arg("epsilon") = -FLT_MAX, py::arg("edge_size") = INT_MIN, py::arg("expected_accuracy") = -FLT_MAX) - .def("export_index", (void (NGT::Index::*)(const std::string&)) &NGT::Index::exportIndex, + .def("export_index", (void (NGT::Index::*)(const std::string&)) &NGT::Index::exportIndex, py::arg("path")) - .def("import_index", (void (NGT::Index::*)(const std::string&)) &NGT::Index::importIndex, + .def("import_index", (void (NGT::Index::*)(const std::string&)) &NGT::Index::importIndex, py::arg("path")); py::class_(m, "Optimizer") @@ -1050,10 +1050,10 @@ PYBIND11_MODULE(ngtpy, m) { py::arg("gt_epsilon") = -DBL_MAX, py::arg("margin") = -1.0, py::arg("log_disabled") = false) - .def("execute", &NGT::GraphOptimizer::execute, + .def("execute", &NGT::GraphOptimizer::execute, py::arg("in_path"), py::arg("out_path")) - .def("adjust_search_coefficients", &NGT::GraphOptimizer::adjustSearchCoefficients, + .def("adjust_search_coefficients", &NGT::GraphOptimizer::adjustSearchCoefficients, py::arg("path")) .def("set", (void (NGT::GraphOptimizer::*)(int, int, int, int, float, float, float, float, double, double)) &NGT::GraphOptimizer::set, @@ -1072,7 +1072,7 @@ PYBIND11_MODULE(ngtpy, m) { py::arg("search_parameter_optimization") = true, py::arg("prefetch_parameter_optimization") = true, py::arg("accuracy_table_generation") = true) - .def("optimize_search_parameters", &NGT::GraphOptimizer::optimizeSearchParameters, + .def("optimize_search_parameters", &NGT::GraphOptimizer::optimizeSearchParameters, py::arg("path")) .def("optimize_number_of_edges_for_anng", &::Optimizer::optimizeNumberOfEdgesForANNG, py::arg("path"), @@ -1085,7 +1085,7 @@ PYBIND11_MODULE(ngtpy, m) { py::arg("max_num_of_edges") = -1); py::class_(m, "QuantizedIndex") - .def(py::init(), + .def(py::init(), py::arg("path"), py::arg("max_no_of_edges") = 128, py::arg("zero_based_numbering") = true, @@ -1115,7 +1115,7 @@ PYBIND11_MODULE(ngtpy, m) { py::class_(m, "QuantizedBlobIndex") - .def(py::init(), + .def(py::init(), py::arg("path"), py::arg("max_no_of_edges") = 128, py::arg("zero_based_numbering") = true, diff --git a/samples/jaccard-sparse/jaccard-sparse.cpp b/samples/jaccard-sparse/jaccard-sparse.cpp index e1286dd..f39f714 100644 --- a/samples/jaccard-sparse/jaccard-sparse.cpp +++ b/samples/jaccard-sparse/jaccard-sparse.cpp @@ -14,7 +14,7 @@ void help() { cerr << " command : info create search append" << endl; } -void +void append(NGT::Args &args) { const string usage = "Usage: jaccard-sparse append [-p #-of-thread] [-n data-size] " @@ -102,8 +102,8 @@ search(NGT::Index &index, NGT::Command::SearchParameters &searchParameters, ostr return; } - if (searchParameters.outputMode[0] == 'e') { - stream << "# Beginning of Evaluation" << endl; + if (searchParameters.outputMode[0] == 'e') { + stream << "# Beginning of Evaluation" << endl; } string line; @@ -174,8 +174,8 @@ search(NGT::Index &index, NGT::Command::SearchParameters &searchParameters, ostr stream << "# Number of queries=" << queryCount << endl; stream << "# End of Evaluation" << endl; } else { - stream << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " - << totalTime * 1000.0 / (double)queryCount << " (msec), (" + stream << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " + << totalTime * 1000.0 / (double)queryCount << " (msec), (" << totalTime << "/" << queryCount << ")" << endl; } }