Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 85 additions & 37 deletions include/nlohmann/detail/string_escape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,58 +15,106 @@ namespace detail
{

/*!
@brief replace all occurrences of a substring by another string

@param[in,out] s the string to manipulate; changed so that all
occurrences of @a f are replaced with @a t
@param[in] f the substring to replace with @a t
@param[in] t the string to replace @a f

@pre The search string @a f must not be empty. **This precondition is
enforced with an assertion.**

@since version 2.0.0
*/
template<typename StringType>
inline void replace_substring(StringType& s, const StringType& f,
const StringType& t)
{
JSON_ASSERT(!f.empty());
for (auto pos = s.find(f); // find the first occurrence of f
pos != StringType::npos; // make sure f was found
s.replace(pos, f.size(), t), // replace with t, and
pos = s.find(f, pos + t.size())) // find the next occurrence of f
{}
}

/*!
* @brief string escaping as described in RFC 6901 (Sect. 4)
* @brief Returns a copy of a string escaped as described in RFC 6901 (Sect. 4)
* @param[in] s string to escape
* @return escaped string
*
* Note the order of escaping "~" to "~0" and "/" to "~1" is important.
*/
template<typename StringType>
inline StringType escape(StringType s)
template<typename StringType> // [[nodiscard]]
inline StringType escape(StringType const& s)
{
replace_substring(s, StringType{"~"}, StringType{"~0"});
replace_substring(s, StringType{"/"}, StringType{"~1"});
return s;
using CharT = typename StringType::value_type;
StringType res;

auto esz = s.size();
for (auto const ch : s)
{
if (ch == CharT('~') || ch == CharT('/'))
{
++esz;
}
}
if (esz == s.size())
{
res = s;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you commented about this not being as clear, perhaps some use of algorithms would help

{
  using CharT = typename StringType::value_type;
  auto results = std::count_if(s.begin(), s.end(), [](CharT ch) { return (ch == CharT('~')) || (ch == CharT('/')); });

  if (results == 0) {
    return s;
  }

  StringType res;
  res.reserve(s.size() + results);

  // for loop here

  return res;
}

Copy link
Contributor Author

@puffetto puffetto Jul 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did read somewhere that one of the goals was to remove the dependency on <algorithm> in order to reduce compile times.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't speak for @nlohmann here, but one person posted a PR to remove one of the 8 uses of <algorithm>. That PR hasn't been accepted and is now marked as stale. So it isn't necessarily a goal of the project as a whole. My personal opinion is that precompiled headers and/or using the standard library through modules are the way to go for compiler performance going forward, rather than not using library algorithms.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with using <algorithm> to have readable code. If we can improve performance, we can do it later, but I think removing <algorithm> was just a first shot at improving compilation speed, but not necessarily the most important thing.

else
{
res.reserve(esz);
for (auto const ch : s) // Yes, this is UTF8-safe
{
if (ch == CharT('~'))
{
res.push_back(CharT('~'));
res.push_back(CharT{'0'});
}
else if (ch == CharT('/'))
{
res.push_back(CharT{'~'});
res.push_back(CharT{'1'});
}
else
{
res.push_back(ch);
}
}
}
return res;
}

/*!
* @brief string unescaping as described in RFC 6901 (Sect. 4)
* @brief Unescapes a string as described in RFC 6901 (Sect. 4), in-place
* @param[in] s string to unescape
* @return unescaped string
*
* Note the order of escaping "~1" to "/" and "~0" to "~" is important.
*/
template<typename StringType>
inline void unescape(StringType& s)
{
replace_substring(s, StringType{"~1"}, StringType{"/"});
replace_substring(s, StringType{"~0"}, StringType{"~"});
using CharT = typename StringType::value_type;
auto j = s.begin();
while (j != s.end() && *j != CharT('~'))
{
++j;
}
auto i = j;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you commented about this not being as clear, maybe something like this would help, with some explanatory comments:

  using CharT = typename StringType::value_type;
  auto start = s.find(CharT('~');

  if (start == StringType::npos) {
    return;
  }

  for (auto read = s.begin() + start, write = read; read != s.end(); read++, write++) {
    auto ch = *read;

    if (ch == CharT('~')) {
      auto next = read + 1;

      if (next != s.end()) {
        switch (*next) {
          case CharT('0'):
            ch = CharT('~');
            read++;
            break;
          case CharT('1'):
            ch = CharT('/');
            read++;
            break;
        }
      }
    }

    *write = ch;
  } 

while (i != s.end())
{
if (*i == CharT('~') && (i + 1) != s.end())
{
if (*(i + 1) == CharT('0'))
{
*j++ = CharT('~');
++i;
}
else if (*(i + 1) == CharT('1'))
{
*j++ = CharT('/');
++i;
} // ... else shouldn't we throw parse_error.108 here?
}
else
{
*j++ = *i;
}
++i;
}
s.erase(j, s.end());
s.shrink_to_fit();
}

// Left out, so far we don't use it, so it just lowers test coverage
// /*!
// * @brief Out Of Place string unescaping as described in RFC 6901 (Sect. 4)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

returns a copy of a string unescaped as described...

// * @param[in] s string to unescape
// *
// */
// template<typename StringType> // [[nodiscard]]
// inline StringType unescape(StringType const& s)
// {
// StringType res = s;
// unescape(res);
// return res;
// }

} // namespace detail
NLOHMANN_JSON_NAMESPACE_END
122 changes: 85 additions & 37 deletions single_include/nlohmann/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3085,59 +3085,107 @@ namespace detail
{

/*!
@brief replace all occurrences of a substring by another string

@param[in,out] s the string to manipulate; changed so that all
occurrences of @a f are replaced with @a t
@param[in] f the substring to replace with @a t
@param[in] t the string to replace @a f

@pre The search string @a f must not be empty. **This precondition is
enforced with an assertion.**

@since version 2.0.0
*/
template<typename StringType>
inline void replace_substring(StringType& s, const StringType& f,
const StringType& t)
{
JSON_ASSERT(!f.empty());
for (auto pos = s.find(f); // find the first occurrence of f
pos != StringType::npos; // make sure f was found
s.replace(pos, f.size(), t), // replace with t, and
pos = s.find(f, pos + t.size())) // find the next occurrence of f
{}
}

/*!
* @brief string escaping as described in RFC 6901 (Sect. 4)
* @brief Returns a copy of a string escaped as described in RFC 6901 (Sect. 4)
* @param[in] s string to escape
* @return escaped string
*
* Note the order of escaping "~" to "~0" and "/" to "~1" is important.
*/
template<typename StringType>
inline StringType escape(StringType s)
template<typename StringType> // [[nodiscard]]
inline StringType escape(StringType const& s)
{
replace_substring(s, StringType{"~"}, StringType{"~0"});
replace_substring(s, StringType{"/"}, StringType{"~1"});
return s;
using CharT = typename StringType::value_type;
StringType res;

auto esz = s.size();
for (auto const ch : s)
{
if (ch == CharT('~') || ch == CharT('/'))
{
++esz;
}
}
if (esz == s.size())
{
res = s;
}
else
{
res.reserve(esz);
for (auto const ch : s) // Yes, this is UTF8-safe
{
if (ch == CharT('~'))
{
res.push_back(CharT('~'));
res.push_back(CharT{'0'});
}
else if (ch == CharT('/'))
{
res.push_back(CharT{'~'});
res.push_back(CharT{'1'});
}
else
{
res.push_back(ch);
}
}
}
return res;
}

/*!
* @brief string unescaping as described in RFC 6901 (Sect. 4)
* @brief Unescapes a string as described in RFC 6901 (Sect. 4), in-place
* @param[in] s string to unescape
* @return unescaped string
*
* Note the order of escaping "~1" to "/" and "~0" to "~" is important.
*/
template<typename StringType>
inline void unescape(StringType& s)
{
replace_substring(s, StringType{"~1"}, StringType{"/"});
replace_substring(s, StringType{"~0"}, StringType{"~"});
using CharT = typename StringType::value_type;
auto j = s.begin();
while (j != s.end() && *j != CharT('~'))
{
++j;
}
auto i = j;
while (i != s.end())
{
if (*i == CharT('~') && (i + 1) != s.end())
{
if (*(i + 1) == CharT('0'))
{
*j++ = CharT('~');
++i;
}
else if (*(i + 1) == CharT('1'))
{
*j++ = CharT('/');
++i;
} // ... else shouldn't we throw parse_error.108 here?
}
else
{
*j++ = *i;
}
++i;
}
s.erase(j, s.end());
s.shrink_to_fit();
}

// Left out, so far we don't use it, so it just lowers test coverage
// /*!
// * @brief Out Of Place string unescaping as described in RFC 6901 (Sect. 4)
// * @param[in] s string to unescape
// *
// */
// template<typename StringType> // [[nodiscard]]
// inline StringType unescape(StringType const& s)
// {
// StringType res = s;
// unescape(res);
// return res;
// }

} // namespace detail
NLOHMANN_JSON_NAMESPACE_END

Expand Down
30 changes: 30 additions & 0 deletions tests/src/unit-alt-string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,36 @@ class alt_string
str_impl.push_back(c);
}

void shrink_to_fit()
{
str_impl.shrink_to_fit();
}

std::string::iterator begin()
{
return str_impl.begin();
}

std::string::const_iterator begin() const
{
return str_impl.begin();
}

std::string::iterator end() noexcept
{
return str_impl.end();
}

std::string::const_iterator end() const noexcept
{
return str_impl.end();
}

std::string::iterator erase(std::string::const_iterator first, std::string::const_iterator last )
{
return str_impl.erase(first, last);
}

template <typename op_type>
bool operator==(const op_type& op) const
{
Expand Down
Loading