diff --git a/include/nlohmann/detail/string_escape.hpp b/include/nlohmann/detail/string_escape.hpp index 63715cde13..36e76bd0fd 100644 --- a/include/nlohmann/detail/string_escape.hpp +++ b/include/nlohmann/detail/string_escape.hpp @@ -15,58 +15,106 @@ namespace detail { /*! -@brief replace all occurrences of a substring by another string - -@param[in,out] s the string to manipulate; changed so that all - occurrences of @a f are replaced with @a t -@param[in] f the substring to replace with @a t -@param[in] t the string to replace @a f - -@pre The search string @a f must not be empty. **This precondition is -enforced with an assertion.** - -@since version 2.0.0 -*/ -template -inline void replace_substring(StringType& s, const StringType& f, - const StringType& t) -{ - JSON_ASSERT(!f.empty()); - for (auto pos = s.find(f); // find the first occurrence of f - pos != StringType::npos; // make sure f was found - s.replace(pos, f.size(), t), // replace with t, and - pos = s.find(f, pos + t.size())) // find the next occurrence of f - {} -} - -/*! - * @brief string escaping as described in RFC 6901 (Sect. 4) + * @brief Returns a copy of a string escaped as described in RFC 6901 (Sect. 4) * @param[in] s string to escape * @return escaped string * - * Note the order of escaping "~" to "~0" and "/" to "~1" is important. */ -template -inline StringType escape(StringType s) +template // [[nodiscard]] +inline StringType escape(StringType const& s) { - replace_substring(s, StringType{"~"}, StringType{"~0"}); - replace_substring(s, StringType{"/"}, StringType{"~1"}); - return s; + using CharT = typename StringType::value_type; + StringType res; + + auto esz = s.size(); + for (auto const ch : s) + { + if (ch == CharT('~') || ch == CharT('/')) + { + ++esz; + } + } + if (esz == s.size()) + { + res = s; + } + else + { + res.reserve(esz); + for (auto const ch : s) // Yes, this is UTF8-safe + { + if (ch == CharT('~')) + { + res.push_back(CharT('~')); + res.push_back(CharT{'0'}); + } + else if (ch == CharT('/')) + { + res.push_back(CharT{'~'}); + res.push_back(CharT{'1'}); + } + else + { + res.push_back(ch); + } + } + } + return res; } /*! - * @brief string unescaping as described in RFC 6901 (Sect. 4) + * @brief Unescapes a string as described in RFC 6901 (Sect. 4), in-place * @param[in] s string to unescape - * @return unescaped string * - * Note the order of escaping "~1" to "/" and "~0" to "~" is important. */ template inline void unescape(StringType& s) { - replace_substring(s, StringType{"~1"}, StringType{"/"}); - replace_substring(s, StringType{"~0"}, StringType{"~"}); + using CharT = typename StringType::value_type; + auto j = s.begin(); + while (j != s.end() && *j != CharT('~')) + { + ++j; + } + auto i = j; + while (i != s.end()) + { + if (*i == CharT('~') && (i + 1) != s.end()) + { + if (*(i + 1) == CharT('0')) + { + *j++ = CharT('~'); + ++i; + } + else if (*(i + 1) == CharT('1')) + { + *j++ = CharT('/'); + ++i; + } // ... else shouldn't we throw parse_error.108 here? + } + else + { + *j++ = *i; + } + ++i; + } + s.erase(j, s.end()); + s.shrink_to_fit(); } +// Left out, so far we don't use it, so it just lowers test coverage +// /*! +// * @brief Out Of Place string unescaping as described in RFC 6901 (Sect. 4) +// * @param[in] s string to unescape +// * +// */ +// template // [[nodiscard]] +// inline StringType unescape(StringType const& s) +// { +// StringType res = s; +// unescape(res); +// return res; +// } + } // namespace detail NLOHMANN_JSON_NAMESPACE_END diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index aafeaf5ae5..aceb21bc4c 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3085,59 +3085,107 @@ namespace detail { /*! -@brief replace all occurrences of a substring by another string - -@param[in,out] s the string to manipulate; changed so that all - occurrences of @a f are replaced with @a t -@param[in] f the substring to replace with @a t -@param[in] t the string to replace @a f - -@pre The search string @a f must not be empty. **This precondition is -enforced with an assertion.** - -@since version 2.0.0 -*/ -template -inline void replace_substring(StringType& s, const StringType& f, - const StringType& t) -{ - JSON_ASSERT(!f.empty()); - for (auto pos = s.find(f); // find the first occurrence of f - pos != StringType::npos; // make sure f was found - s.replace(pos, f.size(), t), // replace with t, and - pos = s.find(f, pos + t.size())) // find the next occurrence of f - {} -} - -/*! - * @brief string escaping as described in RFC 6901 (Sect. 4) + * @brief Returns a copy of a string escaped as described in RFC 6901 (Sect. 4) * @param[in] s string to escape * @return escaped string * - * Note the order of escaping "~" to "~0" and "/" to "~1" is important. */ -template -inline StringType escape(StringType s) +template // [[nodiscard]] +inline StringType escape(StringType const& s) { - replace_substring(s, StringType{"~"}, StringType{"~0"}); - replace_substring(s, StringType{"/"}, StringType{"~1"}); - return s; + using CharT = typename StringType::value_type; + StringType res; + + auto esz = s.size(); + for (auto const ch : s) + { + if (ch == CharT('~') || ch == CharT('/')) + { + ++esz; + } + } + if (esz == s.size()) + { + res = s; + } + else + { + res.reserve(esz); + for (auto const ch : s) // Yes, this is UTF8-safe + { + if (ch == CharT('~')) + { + res.push_back(CharT('~')); + res.push_back(CharT{'0'}); + } + else if (ch == CharT('/')) + { + res.push_back(CharT{'~'}); + res.push_back(CharT{'1'}); + } + else + { + res.push_back(ch); + } + } + } + return res; } /*! - * @brief string unescaping as described in RFC 6901 (Sect. 4) + * @brief Unescapes a string as described in RFC 6901 (Sect. 4), in-place * @param[in] s string to unescape - * @return unescaped string * - * Note the order of escaping "~1" to "/" and "~0" to "~" is important. */ template inline void unescape(StringType& s) { - replace_substring(s, StringType{"~1"}, StringType{"/"}); - replace_substring(s, StringType{"~0"}, StringType{"~"}); + using CharT = typename StringType::value_type; + auto j = s.begin(); + while (j != s.end() && *j != CharT('~')) + { + ++j; + } + auto i = j; + while (i != s.end()) + { + if (*i == CharT('~') && (i + 1) != s.end()) + { + if (*(i + 1) == CharT('0')) + { + *j++ = CharT('~'); + ++i; + } + else if (*(i + 1) == CharT('1')) + { + *j++ = CharT('/'); + ++i; + } // ... else shouldn't we throw parse_error.108 here? + } + else + { + *j++ = *i; + } + ++i; + } + s.erase(j, s.end()); + s.shrink_to_fit(); } +// Left out, so far we don't use it, so it just lowers test coverage +// /*! +// * @brief Out Of Place string unescaping as described in RFC 6901 (Sect. 4) +// * @param[in] s string to unescape +// * +// */ +// template // [[nodiscard]] +// inline StringType unescape(StringType const& s) +// { +// StringType res = s; +// unescape(res); +// return res; +// } + } // namespace detail NLOHMANN_JSON_NAMESPACE_END diff --git a/tests/src/unit-alt-string.cpp b/tests/src/unit-alt-string.cpp index fd6047c374..61d74a8984 100644 --- a/tests/src/unit-alt-string.cpp +++ b/tests/src/unit-alt-string.cpp @@ -58,6 +58,36 @@ class alt_string str_impl.push_back(c); } + void shrink_to_fit() + { + str_impl.shrink_to_fit(); + } + + std::string::iterator begin() + { + return str_impl.begin(); + } + + std::string::const_iterator begin() const + { + return str_impl.begin(); + } + + std::string::iterator end() noexcept + { + return str_impl.end(); + } + + std::string::const_iterator end() const noexcept + { + return str_impl.end(); + } + + std::string::iterator erase(std::string::const_iterator first, std::string::const_iterator last ) + { + return str_impl.erase(first, last); + } + template bool operator==(const op_type& op) const {