-
-
Notifications
You must be signed in to change notification settings - Fork 7.2k
Make escape/unescape functions linear and not memory-storming. #4860
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
e3a1a9a
1f55457
04c412b
66a71d2
ee410a6
7d07915
18ce6f7
dc0619e
f81406f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,58 +15,106 @@ namespace detail | |
{ | ||
|
||
/*! | ||
@brief replace all occurrences of a substring by another string | ||
|
||
@param[in,out] s the string to manipulate; changed so that all | ||
occurrences of @a f are replaced with @a t | ||
@param[in] f the substring to replace with @a t | ||
@param[in] t the string to replace @a f | ||
|
||
@pre The search string @a f must not be empty. **This precondition is | ||
enforced with an assertion.** | ||
|
||
@since version 2.0.0 | ||
*/ | ||
template<typename StringType> | ||
inline void replace_substring(StringType& s, const StringType& f, | ||
const StringType& t) | ||
{ | ||
JSON_ASSERT(!f.empty()); | ||
for (auto pos = s.find(f); // find the first occurrence of f | ||
pos != StringType::npos; // make sure f was found | ||
s.replace(pos, f.size(), t), // replace with t, and | ||
pos = s.find(f, pos + t.size())) // find the next occurrence of f | ||
{} | ||
} | ||
|
||
/*! | ||
* @brief string escaping as described in RFC 6901 (Sect. 4) | ||
* @brief Returns a copy of a string escaped as described in RFC 6901 (Sect. 4) | ||
* @param[in] s string to escape | ||
* @return escaped string | ||
* | ||
* Note the order of escaping "~" to "~0" and "/" to "~1" is important. | ||
*/ | ||
template<typename StringType> | ||
inline StringType escape(StringType s) | ||
template<typename StringType> // [[nodiscard]] | ||
inline StringType escape(StringType const& s) | ||
{ | ||
replace_substring(s, StringType{"~"}, StringType{"~0"}); | ||
replace_substring(s, StringType{"/"}, StringType{"~1"}); | ||
return s; | ||
using CharT = typename StringType::value_type; | ||
StringType res; | ||
|
||
auto esz = s.size(); | ||
for (auto const ch : s) | ||
{ | ||
if (ch == CharT('~') || ch == CharT('/')) | ||
{ | ||
++esz; | ||
} | ||
} | ||
if (esz == s.size()) | ||
{ | ||
res = s; | ||
} | ||
else | ||
{ | ||
res.reserve(esz); | ||
for (auto const ch : s) // Yes, this is UTF8-safe | ||
{ | ||
if (ch == CharT('~')) | ||
{ | ||
res.push_back(CharT('~')); | ||
res.push_back(CharT{'0'}); | ||
} | ||
else if (ch == CharT('/')) | ||
{ | ||
res.push_back(CharT{'~'}); | ||
res.push_back(CharT{'1'}); | ||
} | ||
else | ||
{ | ||
res.push_back(ch); | ||
} | ||
} | ||
} | ||
return res; | ||
} | ||
|
||
/*! | ||
* @brief string unescaping as described in RFC 6901 (Sect. 4) | ||
* @brief Unescapes a string as described in RFC 6901 (Sect. 4), in-place | ||
* @param[in] s string to unescape | ||
* @return unescaped string | ||
* | ||
* Note the order of escaping "~1" to "/" and "~0" to "~" is important. | ||
*/ | ||
template<typename StringType> | ||
inline void unescape(StringType& s) | ||
{ | ||
replace_substring(s, StringType{"~1"}, StringType{"/"}); | ||
replace_substring(s, StringType{"~0"}, StringType{"~"}); | ||
using CharT = typename StringType::value_type; | ||
auto j = s.begin(); | ||
while (j != s.end() && *j != CharT('~')) | ||
{ | ||
++j; | ||
} | ||
auto i = j; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you commented about this not being as clear, maybe something like this would help, with some explanatory comments:
|
||
while (i != s.end()) | ||
{ | ||
if (*i == CharT('~') && (i + 1) != s.end()) | ||
{ | ||
if (*(i + 1) == CharT('0')) | ||
{ | ||
*j++ = CharT('~'); | ||
++i; | ||
} | ||
else if (*(i + 1) == CharT('1')) | ||
{ | ||
*j++ = CharT('/'); | ||
++i; | ||
} // ... else shouldn't we throw parse_error.108 here? | ||
} | ||
else | ||
{ | ||
*j++ = *i; | ||
} | ||
++i; | ||
} | ||
s.erase(j, s.end()); | ||
s.shrink_to_fit(); | ||
} | ||
|
||
// Left out, so far we don't use it, so it just lowers test coverage | ||
// /*! | ||
// * @brief Out Of Place string unescaping as described in RFC 6901 (Sect. 4) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. returns a copy of a string unescaped as described... |
||
// * @param[in] s string to unescape | ||
// * | ||
// */ | ||
// template<typename StringType> // [[nodiscard]] | ||
// inline StringType unescape(StringType const& s) | ||
// { | ||
// StringType res = s; | ||
// unescape(res); | ||
// return res; | ||
// } | ||
|
||
} // namespace detail | ||
NLOHMANN_JSON_NAMESPACE_END |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since you commented about this not being as clear, perhaps some use of algorithms would help
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did read somewhere that one of the goals was to remove the dependency on
<algorithm>
in order to reduce compile times.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't speak for @nlohmann here, but one person posted a PR to remove one of the 8 uses of
<algorithm>
. That PR hasn't been accepted and is now marked as stale. So it isn't necessarily a goal of the project as a whole. My personal opinion is that precompiled headers and/or using the standard library through modules are the way to go for compiler performance going forward, rather than not using library algorithms.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fine with using
<algorithm>
to have readable code. If we can improve performance, we can do it later, but I think removing<algorithm>
was just a first shot at improving compilation speed, but not necessarily the most important thing.