From d9a78c6d9d60021aaf482971dcada72c8df97523 Mon Sep 17 00:00:00 2001 From: Jan Wijffels Date: Wed, 13 Sep 2023 09:03:21 +0200 Subject: [PATCH] - Add explicit initialization to silence false positive valgrind report in compressor_save.cpp - Drop C++11 specification in Makevars - Remove use of std::iterator by incorporating fixes of https://github.com/ufal/nametag/commit/2aa1d1de78d2f562c0770423f94cc7d7e1347ff7 in utf8.h and utf16.h --- DESCRIPTION | 2 +- NEWS.md | 6 ++++++ src/Makevars | 1 - src/nametag/src/unilib/utf16.h | 18 ++++++++++++++---- src/nametag/src/unilib/utf8.h | 18 ++++++++++++++---- src/nametag/src/utils/compressor_save.cpp | 2 ++ 6 files changed, 37 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 530db29..163e203 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: nametagger Type: Package Title: Named Entity Recognition in Texts using 'NameTag' -Version: 0.1.2 +Version: 0.1.3 Authors@R: c( person('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = 'jwijffels@bnosac.be'), person('BNOSAC', role = 'cph'), diff --git a/NEWS.md b/NEWS.md index 906e7e4..fbf3e72 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +## CHANGES IN nametagger VERSION 0.1.3 + +- Add explicit initialization to silence false positive valgrind report in compressor_save.cpp +- Drop C++11 specification in Makevars +- Remove use of std::iterator by incorporating fixes of https://github.com/ufal/nametag/commit/2aa1d1de78d2f562c0770423f94cc7d7e1347ff7 in utf8.h and utf16.h + ## CHANGES IN nametagger VERSION 0.1.2 - use snprintf instead of sprintf to handle the R CMD check deprecating note on M1mac diff --git a/src/Makevars b/src/Makevars index 3a99f7f..0c97a1c 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,4 +1,3 @@ -CXX_STD = CXX11 PKG_CPPFLAGS = -DSTRICT_R_HEADERS -I./nametag/src SOURCES_NAMETAG = nametag/src/bilou/bilou_probabilities.cpp \ diff --git a/src/nametag/src/unilib/utf16.h b/src/nametag/src/unilib/utf16.h index 611312e..0108342 100644 --- a/src/nametag/src/unilib/utf16.h +++ b/src/nametag/src/unilib/utf16.h @@ -7,8 +7,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. // -// UniLib version: 3.1.1 -// Unicode version: 8.0.0 +// UniLib version: 3.3.1 +// Unicode version: 15.0.0 #pragma once @@ -113,8 +113,13 @@ void utf16::decode(const std::u16string& str, std::u32string& decoded) { decode(str.c_str(), decoded); } -class utf16::string_decoder::iterator : public std::iterator { +class utf16::string_decoder::iterator { public: + using iterator_category = std::input_iterator_tag; + using value_type = char32_t; + using difference_type = ptrdiff_t; + using pointer = char32_t*; + using reference = char32_t&; iterator(const char16_t* str) : codepoint(0), next(str) { operator++(); } iterator(const iterator& it) : codepoint(it.codepoint), next(it.next) {} iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; } @@ -145,8 +150,13 @@ utf16::string_decoder utf16::decoder(const std::u16string& str) { return string_decoder(str.c_str()); } -class utf16::buffer_decoder::iterator : public std::iterator { +class utf16::buffer_decoder::iterator { public: + using iterator_category = std::input_iterator_tag; + using value_type = char32_t; + using difference_type = ptrdiff_t; + using pointer = char32_t*; + using reference = char32_t&; iterator(const char16_t* str, size_t len) : codepoint(0), next(str), len(len) { operator++(); } iterator(const iterator& it) : codepoint(it.codepoint), next(it.next), len(it.len) {} iterator& operator++() { if (!len) next = nullptr; if (next) codepoint = decode(next, len); return *this; } diff --git a/src/nametag/src/unilib/utf8.h b/src/nametag/src/unilib/utf8.h index 81997ef..60707a9 100644 --- a/src/nametag/src/unilib/utf8.h +++ b/src/nametag/src/unilib/utf8.h @@ -7,8 +7,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. // -// UniLib version: 3.1.1 -// Unicode version: 8.0.0 +// UniLib version: 3.3.1 +// Unicode version: 15.0.0 #pragma once @@ -145,8 +145,13 @@ void utf8::decode(const std::string& str, std::u32string& decoded) { decode(str.c_str(), decoded); } -class utf8::string_decoder::iterator : public std::iterator { +class utf8::string_decoder::iterator { public: + using iterator_category = std::input_iterator_tag; + using value_type = char32_t; + using difference_type = ptrdiff_t; + using pointer = char32_t*; + using reference = char32_t&; iterator(const char* str) : codepoint(0), next(str) { operator++(); } iterator(const iterator& it) : codepoint(it.codepoint), next(it.next) {} iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; } @@ -177,8 +182,13 @@ utf8::string_decoder utf8::decoder(const std::string& str) { return string_decoder(str.c_str()); } -class utf8::buffer_decoder::iterator : public std::iterator { +class utf8::buffer_decoder::iterator { public: + using iterator_category = std::input_iterator_tag; + using value_type = char32_t; + using difference_type = ptrdiff_t; + using pointer = char32_t*; + using reference = char32_t&; iterator(const char* str, size_t len) : codepoint(0), next(str), len(len) { operator++(); } iterator(const iterator& it) : codepoint(it.codepoint), next(it.next), len(it.len) {} iterator& operator++() { if (!len) next = nullptr; if (next) codepoint = decode(next, len); return *this; } diff --git a/src/nametag/src/utils/compressor_save.cpp b/src/nametag/src/utils/compressor_save.cpp index 3a4447d..79dcfa1 100644 --- a/src/nametag/src/utils/compressor_save.cpp +++ b/src/nametag/src/utils/compressor_save.cpp @@ -445,6 +445,8 @@ void MatchFinder_Construct(CMatchFinder *p) p->bufferBase = 0; p->directInput = 0; p->hash = 0; + p->hashSizeSum = 0; + p->numSons = 0; MatchFinder_SetDefaultSettings(p); for (i = 0; i < 256; i++)