From eb613b68557d27c056bdc5e78318df6c7bd27f55 Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Sat, 16 Dec 2023 19:44:09 -0800 Subject: [PATCH 01/10] Minimal change to make this compile (added ) --- libhdt/src/util/StopWatch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libhdt/src/util/StopWatch.cpp b/libhdt/src/util/StopWatch.cpp index 2edc368a..a3e44ee3 100644 --- a/libhdt/src/util/StopWatch.cpp +++ b/libhdt/src/util/StopWatch.cpp @@ -31,6 +31,7 @@ #include #include // For memcpy() +#include // for uint64_t etc #include "StopWatch.hpp" From 529f7741cf177dda6d96ada23c879f9a8df54ff3 Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Wed, 20 Dec 2023 08:57:38 -0800 Subject: [PATCH 02/10] Fix some compiler warnings --- libcds/include/RMQ_succinct.h | 2 +- libcds/include/TextIndexCSA.h | 2 +- libcds/src/static/bitsequence/BitSequenceRRR.cpp | 2 +- libhdt/src/rdf/RDFParser.cpp | 1 + libhdt/tools/hdtSearch.cpp | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libcds/include/RMQ_succinct.h b/libcds/include/RMQ_succinct.h index 6e79d9cb..ea3dd92d 100644 --- a/libcds/include/RMQ_succinct.h +++ b/libcds/include/RMQ_succinct.h @@ -55,7 +55,7 @@ namespace cds_static void save(ostream & fp); RMQ_succinct * load(istream & fp); - ~RMQ_succinct(); + virtual ~RMQ_succinct(); protected: /* size of array a*/ diff --git a/libcds/include/TextIndexCSA.h b/libcds/include/TextIndexCSA.h index 64670875..560ed4f8 100644 --- a/libcds/include/TextIndexCSA.h +++ b/libcds/include/TextIndexCSA.h @@ -49,7 +49,7 @@ namespace cds_static **/ TextIndexCSA(uchar *text, ulong length, char *build_options); - ~TextIndexCSA(); + virtual ~TextIndexCSA(); /* Writes in numocc the number of occurrences of the substring * pattern[0..length-1] found in the text indexed by index. */ diff --git a/libcds/src/static/bitsequence/BitSequenceRRR.cpp b/libcds/src/static/bitsequence/BitSequenceRRR.cpp index de16e66f..e4cd2cda 100644 --- a/libcds/src/static/bitsequence/BitSequenceRRR.cpp +++ b/libcds/src/static/bitsequence/BitSequenceRRR.cpp @@ -422,7 +422,7 @@ namespace cds_static ret->create_sampling(ret->sample_rate); return ret; } - catch(exception e) { + catch(const exception&) { delete ret; } return NULL; diff --git a/libhdt/src/rdf/RDFParser.cpp b/libhdt/src/rdf/RDFParser.cpp index fadd9004..245a2c8e 100644 --- a/libhdt/src/rdf/RDFParser.cpp +++ b/libhdt/src/rdf/RDFParser.cpp @@ -24,6 +24,7 @@ RDFParserCallback *RDFParserCallback::getParserCallback(RDFNotation notation) { #else throw ParseException("No Parser available for input RDF Format"); #endif + return nullptr; } } diff --git a/libhdt/tools/hdtSearch.cpp b/libhdt/tools/hdtSearch.cpp index 0c4a283e..6f6c838b 100644 --- a/libhdt/tools/hdtSearch.cpp +++ b/libhdt/tools/hdtSearch.cpp @@ -98,7 +98,7 @@ void iterate(HDT *hdt, char *query, ostream &out, bool measure, uint32_t offset) it->skip(offset); offset = 0; } - catch (const runtime_error error) { + catch (const runtime_error&) { /*invalid offset*/ interruptSignal = 1; } From b9935438de7389043cf9a62f8bdea5cebff7bffe Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Wed, 20 Dec 2023 11:26:47 -0800 Subject: [PATCH 03/10] Fix some compiler warnings (2) --- libcds/include/WaveletTreeNoptrsS.h | 2 +- libcds/src/static/sequence/WaveletMatrix.cpp | 6 +++--- libcds/src/static/sequence/WaveletTreeNoptrsS.cpp | 9 +++++---- libhdt/src/libdcs/CSD_FMIndex.cpp | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/libcds/include/WaveletTreeNoptrsS.h b/libcds/include/WaveletTreeNoptrsS.h index 816bd67f..1632730e 100644 --- a/libcds/include/WaveletTreeNoptrsS.h +++ b/libcds/include/WaveletTreeNoptrsS.h @@ -89,7 +89,7 @@ namespace cds_static uint set(uint val, uint ind) const; /** Recursive function for building the Wavelet Tree. */ - void build_level(uint **bm, uint *symbols, uint length, uint *occs); + void build_level(uint **bm, uint *symbols, uint length); }; }; #endif diff --git a/libcds/src/static/sequence/WaveletMatrix.cpp b/libcds/src/static/sequence/WaveletMatrix.cpp index c92a156d..9e29eed5 100644 --- a/libcds/src/static/sequence/WaveletMatrix.cpp +++ b/libcds/src/static/sequence/WaveletMatrix.cpp @@ -24,9 +24,11 @@ namespace cds_static { - WaveletMatrix::WaveletMatrix(const Array &symbols2, BitSequenceBuilder * bmb, Mapper * am) : Sequence(n) { + WaveletMatrix::WaveletMatrix(const Array &symbols2, BitSequenceBuilder * bmb, Mapper * am) : Sequence(0) { bmb->use(); n = symbols2.getLength(); + this->length = n; // sets Sequence::length (Sequence(0) in initializer) + uint *symbols = new uint[n]; this->am = am; am->use(); @@ -81,8 +83,6 @@ namespace cds_static delete [] _bm; // delete [] oc; bmb->unuse(); - - this->length = n; } WaveletMatrix::WaveletMatrix(uint * symbols, size_t n, BitSequenceBuilder * bmb, Mapper * am, bool deleteSymbols) : Sequence(n) { diff --git a/libcds/src/static/sequence/WaveletTreeNoptrsS.cpp b/libcds/src/static/sequence/WaveletTreeNoptrsS.cpp index 8c41b0cc..186d8c46 100644 --- a/libcds/src/static/sequence/WaveletTreeNoptrsS.cpp +++ b/libcds/src/static/sequence/WaveletTreeNoptrsS.cpp @@ -24,9 +24,10 @@ namespace cds_static { - WaveletTreeNoptrsS::WaveletTreeNoptrsS(const Array &symb, BitSequenceBuilder * bmb, Mapper * am) : Sequence(n) { + WaveletTreeNoptrsS::WaveletTreeNoptrsS(const Array &symb, BitSequenceBuilder * bmb, Mapper * am) : Sequence(0) { bmb->use(); this->n=symb.getLength(); + this->length = this->n; // sets Sequence::length (Sequence(0) in initializer) this->am=am; bool deleteSymbols = true; am->use(); @@ -80,7 +81,7 @@ namespace cds_static _bm[i][j]=0; } - build_level(_bm,new_symb,new_n,occurrences); + build_level(_bm,new_symb,new_n); bitstring = new BitSequence*[height]; for(uint i=0;ibuild(_bm[i],new_n); @@ -148,7 +149,7 @@ namespace cds_static _bm[i][j]=0; } - build_level(_bm,new_symb,new_n,occurrences); + build_level(_bm,new_symb,new_n); bitstring = new BitSequence*[height]; for(uint i=0;ibuild(_bm[i],new_n); @@ -328,7 +329,7 @@ namespace cds_static return bytesBitstrings+occ->getSize()+ptrs; } - void WaveletTreeNoptrsS::build_level(uint **bm, uint *symbols, uint length, uint *occs) { + void WaveletTreeNoptrsS::build_level(uint **bm, uint *symbols, uint length) { // for (uint i = 0; i < length; i++) // cout << " " << symbols[i]; // cout << endl; diff --git a/libhdt/src/libdcs/CSD_FMIndex.cpp b/libhdt/src/libdcs/CSD_FMIndex.cpp index 0f2f0b56..f61ec93f 100644 --- a/libhdt/src/libdcs/CSD_FMIndex.cpp +++ b/libhdt/src/libdcs/CSD_FMIndex.cpp @@ -54,7 +54,7 @@ CSD_FMIndex::CSD_FMIndex(hdt::IteratorUCharString *it, bool sparse_bitsequence, this->type = FMINDEX; string element; - unsigned char *text; + unsigned char *text; // TODO: std::vector uint *bitmap = 0; // uint32_t *bitmap = 0; @@ -149,7 +149,7 @@ CSD_FMIndex::CSD_FMIndex(hdt::IteratorUCharString *it, bool sparse_bitsequence, separators = new BitSequenceRG(bitmap, len, 4); delete[] bitmap; } - delete[] text; + free(text); } void CSD_FMIndex::build_ssa(unsigned char *text, size_t len, From e77521a985cf21081e5142ee8af77f499a9158ff Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Thu, 21 Dec 2023 10:11:18 -0800 Subject: [PATCH 04/10] Fix memcpy to nullptr (commit 7e43168) --- libhdt/src/libdcs/CSD_HTFC.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/libhdt/src/libdcs/CSD_HTFC.cpp b/libhdt/src/libdcs/CSD_HTFC.cpp index 65085684..cb795aa1 100644 --- a/libhdt/src/libdcs/CSD_HTFC.cpp +++ b/libhdt/src/libdcs/CSD_HTFC.cpp @@ -25,6 +25,7 @@ * Miguel A. Martinez-Prieto: migumar2@infor.uva.es */ +#include #include "CSD_HTFC.h" #if HAVE_CDS @@ -57,8 +58,9 @@ CSD_HTFC::CSD_HTFC(hdt::IteratorUCharString *it, uint32_t blocksize, vector xblocks; // Temporal storage for start positions - unsigned char *previousStr = NULL, *currentStr = NULL; - uint previousLength = 0, currentLength = 0; + std::basic_string previousStr((const unsigned char*)""); + unsigned char *currentStr = NULL; + uint currentLength = 0; while (it->hasNext()) { currentStr = it->next(); @@ -99,8 +101,8 @@ CSD_HTFC::CSD_HTFC(hdt::IteratorUCharString *it, uint32_t blocksize, // Regular string // Calculating the length of the long common prefix - uint delta = longest_common_prefix(previousStr, currentStr, - previousLength, currentLength); + uint delta = longest_common_prefix(previousStr.data(), currentStr, + previousStr.length(), currentLength); // cout << "Block: " << nblocks << " Pos: "<< length << endl; // cout << previousStr << endl << currentStr << endl << " Delta: " << @@ -121,8 +123,7 @@ CSD_HTFC::CSD_HTFC(hdt::IteratorUCharString *it, uint32_t blocksize, // New string processed numstrings++; - memcpy(previousStr, currentStr, currentLength); - previousLength = currentLength; + previousStr.assign(currentStr, currentLength); it->freeStr(currentStr); // NOTIFYCOND(listener, "Converting dictionary to HTFC", length, From 16662eab6957c418160bc5729963faa538749b6f Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Thu, 21 Dec 2023 18:49:58 -0800 Subject: [PATCH 05/10] Don't use deprecated std::binary_function<...> --- libhdt/src/triples/BitmapTriples.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libhdt/src/triples/BitmapTriples.cpp b/libhdt/src/triples/BitmapTriples.cpp index 8f6fc77d..e37fc68e 100644 --- a/libhdt/src/triples/BitmapTriples.cpp +++ b/libhdt/src/triples/BitmapTriples.cpp @@ -204,7 +204,7 @@ struct sort_pred { }; // function object -class PredicateComparator : public std::binary_function +class PredicateComparator { private: IntSequence *array; From 91ea422dc483ef53c8e9fa762bf4422defec8fad Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Thu, 21 Dec 2023 18:50:05 -0800 Subject: [PATCH 06/10] Don't use deprecated serd_uri_to_path() --- libhdt/src/rdf/RDFParserSerd.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libhdt/src/rdf/RDFParserSerd.cpp b/libhdt/src/rdf/RDFParserSerd.cpp index a2463cc8..e06dbe90 100644 --- a/libhdt/src/rdf/RDFParserSerd.cpp +++ b/libhdt/src/rdf/RDFParserSerd.cpp @@ -185,7 +185,7 @@ void RDFParserSerd::doParse(const char *fileName, const char *baseUri, RDFNotati serd_reader_set_error_sink(reader, hdtserd_on_error, NULL); - const uint8_t* input=serd_uri_to_path((const uint8_t *)fileName); + uint8_t* input=serd_file_uri_parse((const uint8_t *)fileName, NULL); if(fileUtil::str_ends_with(fileName,".gz")){ @@ -216,6 +216,7 @@ void RDFParserSerd::doParse(const char *fileName, const char *baseUri, RDFNotati serd_env_free(env); serd_node_free(&base); + serd_free(input); } } From d4c876310eb5f30adea940c13b9af2c38a43041b Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Thu, 11 Jul 2024 15:41:32 -0700 Subject: [PATCH 07/10] Ensure local variables are initialized (compiler warnings) --- libcds/tests/testLCP.cpp | 4 ++-- libcds/tests/testNPR.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libcds/tests/testLCP.cpp b/libcds/tests/testLCP.cpp index e6ca2311..18b0b87a 100644 --- a/libcds/tests/testLCP.cpp +++ b/libcds/tests/testLCP.cpp @@ -53,8 +53,8 @@ bool testLCP(LCP *s1, LCP *s2, TextIndex *csa){ int main(int argc, char *argv[]){ - char *text; - size_t length; + char *text = nullptr; + size_t length = 0; LCP *lcp_naive=NULL; LCP *lcp=NULL; diff --git a/libcds/tests/testNPR.cpp b/libcds/tests/testNPR.cpp index 8a4d9e01..ae03851c 100644 --- a/libcds/tests/testNPR.cpp +++ b/libcds/tests/testNPR.cpp @@ -71,8 +71,8 @@ bool testNPR(NPR *npr, LCP *lcp, TextIndex *csa, size_t *naive_nsv, size_t *nai int main(int argc, char *argv[]){ - char *text; - size_t length; + char *text = nullptr; + size_t length = 0; LCP *lcp = NULL; NPR *npr = NULL; From b9a409d5a34eca9cebfe11a3453c689c558088c8 Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Thu, 11 Jul 2024 15:42:17 -0700 Subject: [PATCH 08/10] Ignore hdt-generated index files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e5a731db..5089ab41 100644 --- a/.gitignore +++ b/.gitignore @@ -57,7 +57,7 @@ stamp-h1 .deps/ *.hdt -*.hdt.index +*.hdt.index* *.nq *.nt *.rdf From 7996ca6a0e610d1be68211a4e114f19c05234814 Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Thu, 11 Jul 2024 15:51:20 -0700 Subject: [PATCH 09/10] Fix "sz" parameter in calls to strncpy() - possible buffer overflow or string truncation --- libhdt/src/libdcs/CSD_FMIndex.cpp | 6 +++--- libhdt/src/libdcs/CSD_HTFC.cpp | 8 ++++---- libhdt/src/libdcs/CSD_PFC.cpp | 4 ++-- libhdt/src/triples/TripleListDisk.cpp | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libhdt/src/libdcs/CSD_FMIndex.cpp b/libhdt/src/libdcs/CSD_FMIndex.cpp index f61ec93f..00a3a6b2 100644 --- a/libhdt/src/libdcs/CSD_FMIndex.cpp +++ b/libhdt/src/libdcs/CSD_FMIndex.cpp @@ -90,7 +90,7 @@ CSD_FMIndex::CSD_FMIndex(hdt::IteratorUCharString *it, bool sparse_bitsequence, // Checking the current size of the encoded // sequence: realloc if necessary if ((total + currentLength + 1) > reservedSize) { - while (((size_t)total + currentLength + 1) > reservedSize) { + while ((total + currentLength + 1) > reservedSize) { reservedSize <<= 1; if (reservedSize == 0) { reservedSize = ((size_t)total + currentLength) * 2; @@ -99,7 +99,7 @@ CSD_FMIndex::CSD_FMIndex(hdt::IteratorUCharString *it, bool sparse_bitsequence, text = (unsigned char *)realloc(text, reservedSize * sizeof(unsigned char)); } - strncpy((char *)(text + total), (char *)currentStr, currentLength); + strncpy((char *)(text + total), (char *)currentStr, reservedSize - total); total += currentLength; @@ -118,7 +118,7 @@ CSD_FMIndex::CSD_FMIndex(hdt::IteratorUCharString *it, bool sparse_bitsequence, textFinal = new char[total + 1]; // cout<<"testing:total cpy:"< " << currentStr << endl; @@ -113,7 +113,7 @@ CSD_HTFC::CSD_HTFC(hdt::IteratorUCharString *it, uint32_t blocksize, // The suffix is copied to the sequence strncpy((char *)(textfc + bytesfc), (char *)currentStr + delta, - currentLength - delta); + reservedSize - bytesfc); bytesfc += currentLength - delta; // cout << nblocks-1 << "," << length << " => " << currentStr << endl; } @@ -333,7 +333,7 @@ void CSD_HTFC::dumpBlock(uint block) { uint idInBlock = 0; // Reading the first string - strncpy((char *)string, (char *)(text + pos), slen); + strncpy((char *)string, (char *)(text + pos), maxlength + 1); string[slen] = '\0'; pos += slen; @@ -352,7 +352,7 @@ void CSD_HTFC::dumpBlock(uint block) { // Copying the suffix slen = strlen((char *)text + pos) + 1; - strncpy((char *)(string + delta), (char *)(text + pos), slen); + strncpy((char *)(string + delta), (char *)(text + pos), maxlength - delta + 1); cout << block * blocksize + idInBlock << " (" << idInBlock << ") => " << string << " Delta=" << delta << " Len=" << slen << endl; diff --git a/libhdt/src/libdcs/CSD_PFC.cpp b/libhdt/src/libdcs/CSD_PFC.cpp index a0f876c0..f6dc3447 100644 --- a/libhdt/src/libdcs/CSD_PFC.cpp +++ b/libhdt/src/libdcs/CSD_PFC.cpp @@ -81,7 +81,7 @@ CSD_PFC::CSD_PFC(hdt::IteratorUCharString *it, uint32_t blocksize, nblocks++; // The string is explicitly copied to the encoded sequence. - strncpy((char *)(text + bytes), (char *)currentStr, currentLength); + strncpy((char *)(text + bytes), (char *)currentStr, reservedSize - bytes); bytes += currentLength; } else { // Regular string @@ -96,7 +96,7 @@ CSD_PFC::CSD_PFC(hdt::IteratorUCharString *it, uint32_t blocksize, // The suffix is copied to the sequence strncpy((char *)(text + bytes), (char *)currentStr + delta, - currentLength - delta); + reservedSize - bytes); bytes += currentLength - delta; } diff --git a/libhdt/src/triples/TripleListDisk.cpp b/libhdt/src/triples/TripleListDisk.cpp index 0f4ec37c..ae575150 100644 --- a/libhdt/src/triples/TripleListDisk.cpp +++ b/libhdt/src/triples/TripleListDisk.cpp @@ -303,7 +303,7 @@ void TripleListDisk::insert(TripleID &triple) //cout << "Insert: " <<&pointer[numTotalTriples] << "* "<< triple << " "< Date: Fri, 12 Jul 2024 08:58:31 -0700 Subject: [PATCH 10/10] Fix undefined behavior (new size_t[0]), typos in layout --- libhdt/src/util/Histogram.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libhdt/src/util/Histogram.h b/libhdt/src/util/Histogram.h index a70acfc6..54998bd7 100644 --- a/libhdt/src/util/Histogram.h +++ b/libhdt/src/util/Histogram.h @@ -28,7 +28,7 @@ class Histogram { Histogram() : Start(0), nBins_by_interval(0), nBins(0), - freq(new size_t[0]) { + freq(new size_t[1]) { // size_t[0] is undefined behavior reset(); } @@ -69,7 +69,7 @@ class Histogram { delete[] freq; } - /** operator= + /** operator= * Set this histogram equal to another. * @param other Description of the param. * @return The expected result. @@ -81,9 +81,9 @@ class Histogram { if (nBins != other.nBins) { nBins = other.nBins; delete[] freq; - freq = new size_t[nBins]; + freq = new size_t[nBins]; } - for (size_t i(0); i < nBins; ++i) + for (size_t i(0); i < nBins; ++i) freq[i] = other.freq[i]; } return *this;