From 77038ab6b651fe16caa4908e0de63e0597686c43 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Wed, 6 Dec 2023 17:25:55 -0500 Subject: [PATCH] latest string changes from CODA-OSS --- .../modules/c++/logging/source/Setup.cpp | 19 +-- .../c++/plugin/include/plugin/ErrorHandler.h | 2 +- .../modules/c++/str/include/str/Manip.h | 68 ++++++++- .../modules/c++/str/source/Encoding.cpp | 89 ++++++----- .../coda-oss/modules/c++/str/source/Manip.cpp | 140 ++++++++++++++++-- .../c++/str/unittests/test_base_convert.cpp | 31 ++-- .../modules/c++/str/unittests/test_str.cpp | 70 ++++++++- .../modules/c++/sys/include/sys/Path.h | 14 +- .../modules/c++/sys/source/AbstractOS.cpp | 19 +-- .../modules/c++/sys/source/DateTime.cpp | 20 +-- .../modules/c++/sys/source/FileFinder.cpp | 15 +- .../coda-oss/modules/c++/sys/source/Path.cpp | 17 +++ .../c++/xml.lite/source/ValidatorXerces.cpp | 20 +-- 13 files changed, 374 insertions(+), 150 deletions(-) diff --git a/externals/coda-oss/modules/c++/logging/source/Setup.cpp b/externals/coda-oss/modules/c++/logging/source/Setup.cpp index 3c1c36363..b8ff117bd 100644 --- a/externals/coda-oss/modules/c++/logging/source/Setup.cpp +++ b/externals/coda-oss/modules/c++/logging/source/Setup.cpp @@ -35,7 +35,7 @@ std::unique_ptr logging::setupLogger(const path& program_, const std::string& logLevel, - const path& logFile_, + const path& logFile, const std::string& logFormat, size_t logCount, size_t logBytes) @@ -44,30 +44,25 @@ logging::setupLogger(const path& program_, std::unique_ptr log(new logging::Logger(program)); // setup logging level - std::string lev = logLevel; - str::upper(lev); + auto lev = str::upper(logLevel); str::trim(lev); - logging::LogLevel level = (lev.empty()) ? logging::LogLevel::LOG_WARNING : - logging::LogLevel(lev); + const auto level = lev.empty() ? logging::LogLevel::LOG_WARNING : logging::LogLevel(lev); // setup logging formatter std::unique_ptr formatter; - const auto logFile = logFile_.string(); - auto file = logFile; - str::lower(file); + const auto file = str::lower(logFile.string()); if (str::endsWith(file, ".xml")) { - formatter.reset( - new logging::XMLFormatter("", "")); + formatter = std::make_unique("", ""); } else { - formatter.reset(new logging::StandardFormatter(logFormat)); + formatter = std::make_unique(logFormat); } // setup logging handler std::unique_ptr logHandler; - if (file.empty() || file == "console") + if (file.empty() || (file == "console") || (file == "-")) logHandler.reset(new logging::StreamHandler()); else { diff --git a/externals/coda-oss/modules/c++/plugin/include/plugin/ErrorHandler.h b/externals/coda-oss/modules/c++/plugin/include/plugin/ErrorHandler.h index 234e6c5a1..248b61f46 100644 --- a/externals/coda-oss/modules/c++/plugin/include/plugin/ErrorHandler.h +++ b/externals/coda-oss/modules/c++/plugin/include/plugin/ErrorHandler.h @@ -49,7 +49,7 @@ class CODA_OSS_API ErrorHandler virtual void onPluginError(except::Context& c) = 0; }; -class CODA_OSS_API DefaultErrorHandler final : public ErrorHandler +class CODA_OSS_API DefaultErrorHandler : public ErrorHandler { public: DefaultErrorHandler(logging::LoggerPtr logger = logging::LoggerPtr()); diff --git a/externals/coda-oss/modules/c++/str/include/str/Manip.h b/externals/coda-oss/modules/c++/str/include/str/Manip.h index 0b48673a5..c9c96eb60 100644 --- a/externals/coda-oss/modules/c++/str/include/str/Manip.h +++ b/externals/coda-oss/modules/c++/str/include/str/Manip.h @@ -1,4 +1,4 @@ -/* ========================================================================= +/* ========================================================================= * This file is part of str-c++ * ========================================================================= * @@ -34,6 +34,7 @@ #include "coda_oss/CPlusPlus.h" #include "coda_oss/string.h" #include "str/Convert.h" +#include "str/W1252string.h" namespace str { @@ -177,10 +178,71 @@ CODA_OSS_API std::vector split(const std::string& s, const std::string& splitter = " ", size_t maxSplit = std::string::npos); +/***********************************************************************************/ //! Uses std::transform to convert all chars to lower case //! Uses std::transform to convert all chars to upper case -CODA_OSS_API void lower(std::string& s); -CODA_OSS_API void upper(std::string& s); +//CODA_OSS_API void lower(std::string& s); +//CODA_OSS_API void upper(std::string& s); +// +// Using std::transform() with ::toupper() is considerably slower than a lookup-table +CODA_OSS_API void ascii_lower(std::string& s); +inline void lower(std::string& s) +{ + ascii_lower(s); +} +inline std::string lower(const std::string& s) +{ + std::string retval = s; + lower(retval); + return retval; +} + +CODA_OSS_API void ascii_upper(std::string& s); +inline void upper(std::string& s) +{ + ascii_upper(s); +} +inline std::string upper(const std::string& s) +{ + std::string retval = s; + upper(retval); + return retval; +} + +// At this point, you might want to `lower()` and `upper()` for UTF-8 and/or +// Windows-1252. That can be done, but ... our needs are mostly English (99.9%) +// with a very occassional smattering of French (Canada). We've gotten by this +// long without being able to upper/lower 'ä' and 'Ä' and there's no current +// requirement to do so. +// +// Furthermore, while Windows-1252 is easy as it's a single-byte encoding and +// covers many european languages, the standard is UTF-8. +// Upper/lower-casing in Unicode is quite a bit more complicated as there can be +// numerous rules for various languages. For example, in German, the "old +// rules" where that 'ß' was uppercased to "SS"; however, there is now a 'ẞ'. +// And then there are semantics: in German, no word can begin with 'ß' (or 'ẞ') +// making "ßanything" rather non-sensical. +// +// So for now (until there is a real use case), just "define these problems +// away" by not implementing `w1252_lower()`, `utf8_upper()`, etc. +/* +CODA_OSS_API void w1252_lower(std::string& s); +CODA_OSS_API void w1252_upper(std::string& s); +CODA_OSS_API void lower(str::W1252string& s); +CODA_OSS_API void upper(str::W1252string& s); + +CODA_OSS_API void utf8_lower(std::string& s); +CODA_OSS_API void utf8_upper(std::string& s); +CODA_OSS_API void lower(coda_oss::u8string& s); +CODA_OSS_API void upper(coda_oss::u8string& s); +*/ + +// I've already got these hooked up, keep the code around ... long ugly +// names to discourage use. +CODA_OSS_API str::Windows1252_T to_w1252_upper(str::Windows1252_T); +CODA_OSS_API str::Windows1252_T to_w1252_lower(str::Windows1252_T); + +/***********************************************************************************/ /*! * Replaces any characters that are invalid in XML (&, <, >, ', ") with their diff --git a/externals/coda-oss/modules/c++/str/source/Encoding.cpp b/externals/coda-oss/modules/c++/str/source/Encoding.cpp index eacc016d8..80603dae9 100644 --- a/externals/coda-oss/modules/c++/str/source/Encoding.cpp +++ b/externals/coda-oss/modules/c++/str/source/Encoding.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include "gsl/gsl.h" #include "config/compiler_extensions.h" @@ -65,59 +66,58 @@ CODA_OSS_disable_warning_pop // Need to look up characters from \x80 (EURO SIGN) to \x9F (LATIN CAPITAL LETTER Y WITH DIAERESIS) // in a map: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT -inline coda_oss::u8string utf8_(uint32_t i) +inline coda_oss::u8string utf8_(char32_t i) { const auto ch = gsl::narrow(i); return str::to_u8string(std::u32string{ch}); } -static const auto& Windows1252_x80_x9F_to_u8string() -{ - static const std::map retval { - {0x80, utf8_(0x20AC) } // EURO SIGN - // , {0x81, replacement_character } // UNDEFINED - , {0x82, utf8_(0x201A) } // SINGLE LOW-9 QUOTATION MARK - , {0x83, utf8_(0x0192) } // LATIN SMALL LETTER F WITH HOOK - , {0x84, utf8_(0x201E) } // DOUBLE LOW-9 QUOTATION MARK - , {0x85, utf8_(0x2026) } // HORIZONTAL ELLIPSIS - , {0x86, utf8_(0x2020) } // DAGGER - , {0x87, utf8_(0x2021) } // DOUBLE DAGGER - , {0x88, utf8_(0x02C6) } // MODIFIER LETTER CIRCUMFLEX ACCENT - , {0x89, utf8_(0x2030) } // PER MILLE SIGN - , {0x8A, utf8_(0x0160) } // LATIN CAPITAL LETTER S WITH CARON - , {0x8B, utf8_(0x2039) } // SINGLE LEFT-POINTING ANGLE QUOTATION MARK - , {0x8C, utf8_(0x0152) } // LATIN CAPITAL LIGATURE OE - //, {0x8D, replacement_character } // UNDEFINED - , {0x8E, utf8_(0x017D) } // LATIN CAPITAL LETTER Z WITH CARON - //, {0x8F, replacement_character } // UNDEFINED - //, {0x90, replacement_character } // UNDEFINED - , {0x91, utf8_(0x2018) } // LEFT SINGLE QUOTATION MARK - , {0x92, utf8_(0x2019) } // RIGHT SINGLE QUOTATION MARK - , {0x93, utf8_(0x201C) } // LEFT DOUBLE QUOTATION MARK - , {0x94, utf8_(0x201D) } // RIGHT DOUBLE QUOTATION MARK - , {0x95, utf8_(0x2022) } // BULLET - , {0x96, utf8_(0x2013) } // EN DASH - , {0x97, utf8_(0x2014) } // EM DASH - , {0x98, utf8_(0x02DC) } // SMALL TILDE - , {0x99, utf8_(0x2122) } // TRADE MARK SIGN - , {0x9A, utf8_(0x0161) } // LATIN SMALL LETTER S WITH CARON - , {0x9B, utf8_(0x203A) } // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - , {0x9C, utf8_(0x0153) } // LATIN SMALL LIGATURE OE - //, {0x9D, replacement_character } // UNDEFINED - , {0x9E, utf8_(0x017E) } // LATIN SMALL LETTER Z WITH CARON - , {0x9F, utf8_(0x0178) } // LATIN CAPITAL LETTER Y WITH DIAERESIS +static const auto& Windows1252_x80_x9F_to_u8string_() +{ + static const std::map retval{ + {U'\x80', utf8_(U'\x20AC')} // EURO SIGN + // , {U'\x81, replacement_character } // UNDEFINED + , {U'\x82', utf8_(U'\x201A') } // SINGLE LOW-9 QUOTATION MARK + , {U'\x83', utf8_(U'\x0192') } // LATIN SMALL LETTER F WITH HOOK + , {U'\x84', utf8_(U'\x201E') } // DOUBLE LOW-9 QUOTATION MARK + , {U'\x85', utf8_(U'\x2026') } // HORIZONTAL ELLIPSIS + , {U'\x86', utf8_(U'\x2020') } // DAGGER + , {U'\x87', utf8_(U'\x2021') } // DOUBLE DAGGER + , {U'\x88', utf8_(U'\x02C6') } // MODIFIER LETTER CIRCUMFLEX ACCENT + , {U'\x89', utf8_(U'\x2030') } // PER MILLE SIGN + , {U'\x8A', utf8_(U'\x0160') } // LATIN CAPITAL LETTER S WITH CARON + , {U'\x8B', utf8_(U'\x2039') } // SINGLE LEFT-POINTING ANGLE QUOTATION MARK + , {U'\x8C', utf8_(U'\x0152') } // LATIN CAPITAL LIGATURE OE + //, {U'\x8D, replacement_character } // UNDEFINED + , {U'\x8E', utf8_(U'\x017D') } // LATIN CAPITAL LETTER Z WITH CARON + //, {U'\x8F, replacement_character } // UNDEFINED + //, {U'\x90, replacement_character } // UNDEFINED + , {U'\x91', utf8_(U'\x2018') } // LEFT SINGLE QUOTATION MARK + , {U'\x92', utf8_(U'\x2019') } // RIGHT SINGLE QUOTATION MARK + , {U'\x93', utf8_(U'\x201C') } // LEFT DOUBLE QUOTATION MARK + , {U'\x94', utf8_(U'\x201D') } // RIGHT DOUBLE QUOTATION MARK + , {U'\x95', utf8_(U'\x2022') } // BULLET + , {U'\x96', utf8_(U'\x2013') } // EN DASH + , {U'\x97', utf8_(U'\x2014') } // EM DASH + , {U'\x98', utf8_(U'\x02DC') } // SMALL TILDE + , {U'\x99', utf8_(U'\x2122') } // TRADE MARK SIGN + , {U'\x9A', utf8_(U'\x0161') } // LATIN SMALL LETTER S WITH CARON + , {U'\x9B', utf8_(U'\x203A') } // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + , {U'\x9C', utf8_(U'\x0153') } // LATIN SMALL LIGATURE OE + //, {U'\x9D, replacement_character } // UNDEFINED + , {U'\x9E', utf8_(U'\x017E') } // LATIN SMALL LETTER Z WITH CARON + , {U'\x9F', utf8_(U'\x0178') } // LATIN CAPITAL LETTER Y WITH DIAERESIS }; return retval; } - static auto Windows1252_to_u8string() { - auto retval = Windows1252_x80_x9F_to_u8string(); + auto retval = Windows1252_x80_x9F_to_u8string_(); // Add the ISO8859-1 values to the map too. 1) We're already looking // in the map anyway for Windows-1252 characters. 2) Need map // entires for conversion from UTF-8 to Windows-1252. - for (std::u32string::value_type ch = 0xA0; ch <= 0xff; ch++) + for (char32_t ch = U'\xA0'; ch <= U'\xff'; ch++) { // ISO8859-1 can be converted to UTF-8 with bit-twiddling @@ -186,7 +186,14 @@ static void fromWindows1252_(str::W1252string::value_type ch, std::basic_string< // If the input text contains a character that isn't defined in Windows-1252; return a // "replacement character." Yes, this will **corrupt** the input data as information is lost: // https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character - static const coda_oss::u8string replacement_character = utf8_(0xfffd); + // + // Or ... https://en.wikipedia.org/wiki/Windows-1252 + // > According to the information on Microsoft's and the Unicode + // > Consortium's websites, positions 81, 8D, 8F, 90, and 9D are + // > unused; however, the Windows API `MultiByteToWideChar` maps these + // > to the corresponding C1 control codes. The "best fit" mapping + // > documents this behavior, too. + static const coda_oss::u8string replacement_character = utf8_(U'\xfffd'); append(result, replacement_character); } else @@ -229,7 +236,7 @@ inline void w1252to32(str::W1252string::const_pointer p, size_t sz, std::u32stri } template -std::map kv_to_vk(const std::map& kv) +auto kv_to_vk(const std::map& kv) { std::map retval; for (const auto& p : kv) diff --git a/externals/coda-oss/modules/c++/str/source/Manip.cpp b/externals/coda-oss/modules/c++/str/source/Manip.cpp index f8dbce7db..1d6bae1c2 100644 --- a/externals/coda-oss/modules/c++/str/source/Manip.cpp +++ b/externals/coda-oss/modules/c++/str/source/Manip.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include "gsl/gsl.h" @@ -40,7 +41,7 @@ namespace { -char transformCheck(int c, int (*transform)(int)) +inline char transformCheck(int c, int (*transform)(int)) { // Ensure the character can be represented // as an unsigned char or is 'EOF', as the @@ -56,12 +57,12 @@ char transformCheck(int c, int (*transform)(int)) } } -char tolowerCheck(char c) +inline char tolowerCheck(char c) { return transformCheck(c, tolower); } -char toupperCheck(char c) +inline char toupperCheck(char c) { return transformCheck(c, toupper); } @@ -239,9 +240,8 @@ bool isAlphanumeric(const std::string& s) bool isAsciiPrintable(const std::string& s) { - for (const auto& ch : s) + for (const auto& c : s) { - char c = ch; if (c < 32 || c > 126) return false; } @@ -290,24 +290,136 @@ std::vector split(const std::string& s, return vec; } -template -inline void transform(std::basic_string& s, Fn f) + +// Calling ::toupper() can be slow as the CRT might check for locales. +// Since we only have 256 values, a lookup table is very fast and doesn't +// use much memory. +static const auto& make_lookup(std::array& result, + char (*to)(char)) +{ + // For each of 256 values, record the corresponding tolower/toupper value; + // this makes converting very fast as no checking or arithmetic must be done. + for (size_t i = 0; i <= 0xff; i++) + { + const auto ch = to(static_cast(i)); + result[i] = static_cast(ch); + } + return result; +} + +template +static void do_lookup(std::basic_string& s, const std::array& lookup) { - (void) std::transform(s.begin(), s.end(), s.begin(), f); + for (auto& ch : s) + { + const auto i = static_cast(ch); + ch = static_cast(lookup[i]); + } } -void lower(std::string& s) + +void ascii_upper(std::string& s) { - transform(s, tolowerCheck); + static std::array lookup_; + static const auto& lookup = make_lookup(lookup_, toupperCheck); + do_lookup(s, lookup); +} + +void ascii_lower(std::string& s) +{ + static std::array lookup_; + static const auto& lookup = make_lookup(lookup_, tolowerCheck); + do_lookup(s, lookup); +} + +inline char to_w1252_upper_(char ch) +{ + if ((ch >= 'a') && (ch <= 'z')) + { + return ch ^ 0x20; // ('a' - 'A'); + } + + // See chart at: https://en.wikipedia.org/wiki/Windows-1252 + constexpr uint8_t s_with_caron = 0x9a /* š */; + constexpr uint8_t oe = 0x9c /* œ */; + constexpr uint8_t z_with_caron = 0x9e /* ž */; + constexpr uint8_t a_with_grave = 0xe0 /* à */; + constexpr uint8_t o_with_diaeresis = 0xf6 /* ö */; + constexpr uint8_t o_with_slash = 0xf8 /* ø */; + constexpr uint8_t small_thorn = 0xfe /* þ */; + constexpr uint8_t y_with_diaeresis = 0xff /* ÿ */; + + const auto u8 = static_cast(ch); + if ((u8 == s_with_caron) || (u8 == oe) || (u8 == z_with_caron)) + { + return ch ^ 0x10; + } + if ((u8 >= a_with_grave) && (u8 <= o_with_diaeresis)) + { + return ch ^ 0x20; + } + if ((u8 >= o_with_slash) && (u8 <= small_thorn)) + { + return ch ^ 0x20; + } + if (u8 == y_with_diaeresis) + { + constexpr uint8_t Y_with_diaeresis = 0x9f /* Ÿ */; + return Y_with_diaeresis; + } + + return ch; +} +str::Windows1252_T to_w1252_upper(str::Windows1252_T ch) +{ + const auto retval = to_w1252_upper_(static_cast(ch)); + return static_cast(retval); +} + +inline char to_w1252_lower_(char ch) +{ + if ((ch >= 'A') && (ch <= 'Z')) + { + return ch | 0x20; + } + + constexpr uint8_t S_with_caron = 0x8a /* Š */; + constexpr uint8_t OE = 0x8c /*Œ */; + constexpr uint8_t Z_with_caron = 0x8e /* Ž */; + constexpr uint8_t Y_with_diaeresis = 0x9f /* Ÿ */; + constexpr uint8_t A_with_grave = 0xc0 /* À */; + constexpr uint8_t O_with_diaeresis = 0xd6 /* Ö */; + constexpr uint8_t O_with_slash = 0xd8 /* Ø */; + constexpr uint8_t capital_thorn = 0xde /* Þ */; + + const auto u8 = static_cast(ch); + if ((u8 == S_with_caron) || (u8 == OE) || (u8 == Z_with_caron)) + { + return ch | 0x10; + } + if (u8 == Y_with_diaeresis) + { + constexpr uint8_t y_with_diaeresis = 0xff /* ÿ */; + return y_with_diaeresis; + } + if ((u8 >= A_with_grave) && (u8 <= O_with_diaeresis)) + { + return ch | 0x20; + } + if ((u8 >= O_with_slash) && (u8 <= capital_thorn)) + { + return ch | 0x20; + } + return ch; } -void upper(std::string& s) +str::Windows1252_T to_w1252_lower(str::Windows1252_T ch) { - transform(s, toupperCheck); + const auto retval = to_w1252_lower_(static_cast(ch)); + return static_cast(retval); } void escapeForXML(std::string& str) { - // & needs to be first or else it'll mess up the other characters that we - // replace + // & needs to be first or else it'll mess up the other characters that we replace replaceAll(str, "&", "&"); replaceAll(str, "<", "<"); replaceAll(str, ">", ">"); diff --git a/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp b/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp index 3b951015a..c2ecf4f36 100644 --- a/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp +++ b/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp @@ -216,21 +216,18 @@ TEST_CASE(test_string_to_u8string_iso8859_1) } } -template -static void test_change_case_(const std::string& testName, const TString& lower, const TString& upper) +template +static void test_change_case_(const std::string& testName, + const std::basic_string& lower, const std::basic_string& upper) { - auto s = upper; - str::lower(s); + auto s = str::lower(upper); TEST_ASSERT(s == lower); - s = lower; - str::upper(s); + s = str::upper(lower); TEST_ASSERT(s == upper); - s = upper; - str::upper(s); + s = str::upper(upper); TEST_ASSERT(s == upper); - s = lower; - str::lower(s); + s = str::lower(lower); TEST_ASSERT(s == lower); } TEST_CASE(test_change_case) @@ -243,14 +240,16 @@ TEST_CASE(test_change_case) //const std::wstring abc_w = L"abc"; //test_change_case_(testName, abc_w, ABC_w); - //// Yes, this can really come up, "non classifié" is French (Canadian) for "unclassified". - //const std::string DEF_1252{'D', '\xc9', 'F'}; // "DÉF" Windows-1252 - //const auto DEF8 = from_windows1252(DEF_1252); + // Yes, this can really come up, "non classifié" is French (Canadian) for "unclassified". + const std::string DEF_1252_{'D', '\xc9', 'F'}; // "DÉF" Windows-1252 + const auto DEF_1252 = str::str(DEF_1252_); + const auto DEF8 = str::to_u8string(DEF_1252); - //const std::string def_1252{'d', '\xe9', 'f'}; // "déf" Windows-1252 - //const auto def8 = from_windows1252(def_1252); + const std::string def_1252_{'d', '\xe9', 'f'}; // "déf" Windows-1252 + const auto def_1252 = str::str(def_1252_); + const auto def8 = str::to_u8string(def_1252); - ////test_change_case_(testName, def, DEF); + //test_change_case_(testName, def8, DEF8); //test_change_case_(testName, def_1252, DEF_1252); } diff --git a/externals/coda-oss/modules/c++/str/unittests/test_str.cpp b/externals/coda-oss/modules/c++/str/unittests/test_str.cpp index 2d00862d8..eb4821608 100644 --- a/externals/coda-oss/modules/c++/str/unittests/test_str.cpp +++ b/externals/coda-oss/modules/c++/str/unittests/test_str.cpp @@ -57,16 +57,78 @@ TEST_CASE(testData) TEST_CASE(testUpper) { - std::string s = "test-something1"; + const std::string s_ = "test-something1"; + std::string s = s_; + TEST_ASSERT(str::eq(s, "TEST-SOMETHING1")); str::upper( s); TEST_ASSERT_EQ(s, "TEST-SOMETHING1"); + + //#if _WIN32 + //s = "<×àa`öo\"øo/þb÷>"; + //str::w1252_upper(s); + //TEST_ASSERT_EQ(s, "<×ÀA`ÖO\"ØO/ÞB÷>"); + //#endif +} + +TEST_CASE(test_toupper) +{ + for (uint16_t i = 0x20; i <= 0xff; i++) // uint16_t to avoid wrap-around + { + const auto w1252 = static_cast(i); + const auto w1252_upper = str::to_w1252_upper(w1252); + + const auto w1252_lower = w1252 == w1252_upper ? w1252 : str::to_w1252_lower(w1252_upper); // round-trip + TEST_ASSERT_EQ(static_cast(w1252), static_cast(w1252_lower)); + + if (i <= 0x7f) // ASCII + { + const auto ch = static_cast(i); + const auto upper = toupper(ch); + TEST_ASSERT_EQ(static_cast(upper), static_cast(w1252_upper)); + + const auto lower = ch == upper ? ch : tolower(upper); // round-trip + TEST_ASSERT_EQ(ch, lower); + TEST_ASSERT_EQ(static_cast(lower), static_cast(w1252_lower)); + } + } } TEST_CASE(testLower) { - std::string s = "TEST1"; - str::lower( s); + const std::string s_ = "TEST1"; + std::string s = s_; + TEST_ASSERT(str::eq(s, "test1")); + str::lower(s); TEST_ASSERT_EQ(s, "test1"); + + //#if _WIN32 + //s = "[×ÀÖØÞ÷]"; + //str::w1252_lower(s); + //TEST_ASSERT_EQ(s, "[×àöøþ÷]"); + //#endif +} + +TEST_CASE(test_tolower) +{ + for (uint16_t i = 0x20; i <= 0xff; i++) // uint16_t to avoid wrap-around + { + const auto w1252 = static_cast(i); + const auto w1252_lower = str::to_w1252_lower(w1252); + + const auto w1252_upper = w1252 == w1252_lower ? w1252 : str::to_w1252_upper(w1252_lower); // round-trip + TEST_ASSERT_EQ(static_cast(w1252), static_cast(w1252_upper)); + + if (i <= 0x7f) // ASCII + { + const auto ch = static_cast(i); + const auto lower = tolower(ch); + TEST_ASSERT_EQ(static_cast(lower), static_cast(w1252_lower)); + + const auto upper = ch == lower ? ch : toupper(lower); // round-trip + TEST_ASSERT_EQ(ch, upper); + TEST_ASSERT_EQ(static_cast(upper), static_cast(w1252_upper)); + } + } } TEST_CASE(test_eq_ne) @@ -309,7 +371,9 @@ TEST_MAIN( TEST_CHECK(testTrim); TEST_CHECK(testData); TEST_CHECK(testUpper); + TEST_CHECK(test_toupper); TEST_CHECK(testLower); + TEST_CHECK(test_tolower); TEST_CHECK(test_eq_ne); TEST_CHECK(testReplace); TEST_CHECK(testReplaceAllInfinite); diff --git a/externals/coda-oss/modules/c++/sys/include/sys/Path.h b/externals/coda-oss/modules/c++/sys/include/sys/Path.h index d7d950202..74d78af5c 100644 --- a/externals/coda-oss/modules/c++/sys/include/sys/Path.h +++ b/externals/coda-oss/modules/c++/sys/include/sys/Path.h @@ -27,13 +27,15 @@ #include #include #include +#include #include "config/Exports.h" - #include +#include "coda_oss/span.h" #include "sys/OS.h" #include "sys/filesystem.h" +#include "sys/Span.h" /*! @@ -295,6 +297,16 @@ class CODA_OSS_API Path std::ostream& operator<<(std::ostream& os, const sys::Path& path); std::istream& operator>>(std::istream& os, sys::Path& path); + +// Convert between collections of paths as strings and sys::filesystem::path +CODA_OSS_API std::vector convertPaths(coda_oss::span); +CODA_OSS_API std::vector convertPaths(coda_oss::span); +template +inline auto convertPaths(const std::vector& paths) +{ + return convertPaths(make_span(paths)); +} + } #endif // CODA_OSS_sys_Path_h_INCLUDED_ diff --git a/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp b/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp index dd22c3786..436e12bb6 100644 --- a/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp +++ b/externals/coda-oss/modules/c++/sys/source/AbstractOS.cpp @@ -86,29 +86,14 @@ AbstractOS::search(const std::vector& searchPaths, return elementsFound; } -inline auto convert(const std::vector& paths) -{ - std::vector retval; - std::transform(paths.begin(), paths.end(), std::back_inserter(retval), - [](const fs::path& p) { return p.string(); }); - return retval; -} -inline auto convert(const std::vector& paths) -{ - std::vector retval; - std::transform(paths.begin(), paths.end(), std::back_inserter(retval), - [](const auto& p) { return p; }); - return retval; -} - std::vector AbstractOS::search( const std::vector& searchPaths, const std::string& fragment, const std::string& extension, bool recursive) const { - const auto results = search(convert(searchPaths), fragment, extension, recursive); - return convert(results); + const auto results = search(convertPaths(searchPaths), fragment, extension, recursive); + return convertPaths(results); } void AbstractOS::remove(const std::string& path) const diff --git a/externals/coda-oss/modules/c++/sys/source/DateTime.cpp b/externals/coda-oss/modules/c++/sys/source/DateTime.cpp index b6669002d..0679aed62 100644 --- a/externals/coda-oss/modules/c++/sys/source/DateTime.cpp +++ b/externals/coda-oss/modules/c++/sys/source/DateTime.cpp @@ -169,15 +169,13 @@ char* strptime(const char *buf, const char *fmt, struct tm& tm, double& millis) // Full name. len = DAY[i].size(); std::string day(bp, len); - str::lower(day); - if (day == DAY[i]) + if (str::eq(day, DAY[i])) break; // Abbreviated name. len = AB_DAY[i].size(); day = std::string(bp, len); - str::lower(day); - if (day == AB_DAY[i]) + if (str::eq(day, AB_DAY[i])) break; } @@ -202,15 +200,13 @@ char* strptime(const char *buf, const char *fmt, struct tm& tm, double& millis) // Full name. len = MONTH[i].size(); std::string month(bp, len); - str::lower(month); - if (month == MONTH[i]) + if (str::eq(month, MONTH[i])) break; // Abbreviated name. len = AB_MONTH[i].size(); month = std::string(bp, len); - str::lower(month); - if (month == AB_MONTH[i]) + if (str::eq(month, AB_MONTH[i])) break; } @@ -467,9 +463,7 @@ std::string sys::DateTime::dayOfWeekToStringAbbr(int dayOfWeek) int sys::DateTime::monthToValue(const std::string& month) { - std::string m = month; - str::lower(m); - + const auto m = str::lower(month); if (str::startsWith(m, "jan")) return 1; else if (str::startsWith(m, "feb")) @@ -501,9 +495,7 @@ int sys::DateTime::monthToValue(const std::string& month) int sys::DateTime::dayOfWeekToValue(const std::string& dayOfWeek) { - std::string d = dayOfWeek; - str::lower(d); - + const auto d = str::lower(dayOfWeek); if (str::startsWith(d, "sun")) return 1; else if (str::startsWith(d, "mon")) diff --git a/externals/coda-oss/modules/c++/sys/source/FileFinder.cpp b/externals/coda-oss/modules/c++/sys/source/FileFinder.cpp index b33cc47c4..7edf1a512 100644 --- a/externals/coda-oss/modules/c++/sys/source/FileFinder.cpp +++ b/externals/coda-oss/modules/c++/sys/source/FileFinder.cpp @@ -56,12 +56,8 @@ bool sys::FragmentPredicate::operator()(const std::string& entry) const { if (mIgnoreCase) { - std::string base = entry; - str::lower(base); - - std::string match = mFragment; - str::lower(match); - + const auto base = str::lower(entry); + const auto match = str::lower(mFragment); return str::contains(base, match); } else @@ -80,13 +76,10 @@ bool sys::ExtensionPredicate::operator()(const std::string& filename) const if (!sys::FileOnlyPredicate::operator()(filename)) return false; - std::string ext = sys::Path::splitExt(filename).second; + const std::string ext = sys::Path::splitExt(filename).second; if (mIgnoreCase) { - std::string matchExt = mExt; - str::lower(matchExt); - str::lower(ext); - return ext == matchExt; + return str::eq(ext, mExt); } else return ext == mExt; diff --git a/externals/coda-oss/modules/c++/sys/source/Path.cpp b/externals/coda-oss/modules/c++/sys/source/Path.cpp index 4c91e6d18..99233b236 100644 --- a/externals/coda-oss/modules/c++/sys/source/Path.cpp +++ b/externals/coda-oss/modules/c++/sys/source/Path.cpp @@ -22,6 +22,7 @@ #include "sys/Path.h" #include +#include #include namespace fs = coda_oss::filesystem; @@ -849,4 +850,20 @@ std::string Path::expandEnvironmentVariables(const std::string& path, fs::file_t return expandEnvironmentVariables_(path, unused_checkIfExists, &type); } +template +inline auto convertPaths_(coda_oss::span paths, TFunc fun) +{ + std::vector retval; + std::transform(paths.begin(), paths.end(), std::back_inserter(retval), fun); + return retval; +} +std::vector convertPaths(coda_oss::span paths) +{ + return convertPaths_(paths, [](const auto& p) { return p.string(); }); } +std::vector convertPaths(coda_oss::span paths) +{ + return convertPaths_(paths, [](const auto& p) { return p; }); +} + +} \ No newline at end of file diff --git a/externals/coda-oss/modules/c++/xml.lite/source/ValidatorXerces.cpp b/externals/coda-oss/modules/c++/xml.lite/source/ValidatorXerces.cpp index 3950d1242..08d7a9c00 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/ValidatorXerces.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/ValidatorXerces.cpp @@ -37,6 +37,7 @@ CODA_OSS_disable_warning(-Wshadow) CODA_OSS_disable_warning_pop #include +#include #include #include @@ -89,26 +90,11 @@ bool ValidationErrorHandler::handleError( return true; } -inline std::vector convert(const std::vector& schemaPaths) -{ - std::vector retval; - std::transform(schemaPaths.begin(), schemaPaths.end(), std::back_inserter(retval), - [](const fs::path& p) { return p.string(); }); - return retval; -} -inline auto convert(const std::vector& paths) -{ - std::vector retval; - std::transform(paths.begin(), paths.end(), std::back_inserter(retval), - [](const auto& p) { return p; }); - return retval; -} - ValidatorXerces::ValidatorXerces( const std::vector& schemaPaths, logging::Logger* log, bool recursive) : - ValidatorXerces(convert(schemaPaths), log, recursive) + ValidatorXerces(sys::convertPaths(schemaPaths), log, recursive) { } ValidatorXerces::ValidatorXerces( @@ -169,7 +155,7 @@ ValidatorXerces::ValidatorXerces( // load our schemas -- // search each directory for schemas - const auto schemas = loadSchemas(convert(schemaPaths), recursive); + const auto schemas = loadSchemas(sys::convertPaths(schemaPaths), recursive); // add the schema to the validator // add the schema to the validator