Skip to content

Commit

Permalink
Ensure regex authority is matched to fix sanitizer errors (#7866)
Browse files Browse the repository at this point in the history
Summary:

It is possible to trigger address sanitizer when passed garbage strings in matchAuthorityAndPath function. We need to ensure that subgroups are actually matched, since accessing an unmatched string can lead to asan errors.

Differential Revision: D51822902
  • Loading branch information
kgpai authored and facebook-github-bot committed Dec 6, 2023
1 parent 075cc92 commit 38e099c
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 25 deletions.
12 changes: 6 additions & 6 deletions velox/functions/prestosql/URLFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ namespace facebook::velox::functions {
bool matchAuthorityAndPath(
const boost::cmatch& urlMatch,
boost::cmatch& authAndPathMatch,
boost::cmatch& authorityMatch,
bool& hasAuthority) {
boost::cmatch& authorityMatch) {
static const boost::regex kAuthorityAndPathRegex("//([^/]*)(/.*)?");
auto authorityAndPath = submatch(urlMatch, 3);
if (!boost::regex_match(
Expand All @@ -32,8 +31,7 @@ bool matchAuthorityAndPath(
authAndPathMatch,
kAuthorityAndPathRegex)) {
// Does not start with //, doesn't have authority.
hasAuthority = false;
return true;
return false;
}

static const boost::regex kAuthorityRegex(
Expand All @@ -48,9 +46,11 @@ bool matchAuthorityAndPath(
return false; // Invalid URI Authority.
}

hasAuthority = true;
if (authorityMatch[3].matched) {
return true;
}

return true;
return false;
}

} // namespace facebook::velox::functions
44 changes: 25 additions & 19 deletions velox/functions/prestosql/URLFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ FOLLY_ALWAYS_INLINE StringView submatch(const boost::cmatch& match, int idx) {
return StringView(sub.first, sub.length());
}

template <typename TInString>
bool parse(const TInString& rawUrl, boost::cmatch& match) {
bool parse(const StringView& rawUrl, boost::cmatch& match) {
/// This regex is taken from RFC - 3986.
/// See: https://www.rfc-editor.org/rfc/rfc3986#appendix-B
/// The basic groups are:
Expand Down Expand Up @@ -62,6 +61,20 @@ bool parse(const TInString& rawUrl, boost::cmatch& match) {
rawUrl.data(), rawUrl.data() + rawUrl.size(), match, kUriRegex);
}

/// Parses the url and returns true if the particular sub group
/// is matched by the call to parse(url, match).
bool parse(const StringView& rawUrl, boost::cmatch& match, int subGroup) {
if (!parse(rawUrl, match)) {
return false;
}

if (subGroup >= match.size() || match[subGroup].matched == false) {
return false;
}

return true;
}

FOLLY_ALWAYS_INLINE unsigned char toHex(unsigned char c) {
return c < 10 ? (c + '0') : (c + 'A' - 10);
}
Expand Down Expand Up @@ -181,8 +194,7 @@ FOLLY_ALWAYS_INLINE void urlUnescape(
bool matchAuthorityAndPath(
const boost::cmatch& urlMatch,
boost::cmatch& authAndPathMatch,
boost::cmatch& authorityMatch,
bool& hasAuthority);
boost::cmatch& authorityMatch);

template <typename T>
struct UrlExtractProtocolFunction {
Expand Down Expand Up @@ -229,7 +241,7 @@ struct UrlExtractFragmentFunction {
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
if (!parse(url, match, 9)) {
result.setEmpty();
} else {
result.setNoCopy(submatch(match, 9));
Expand All @@ -256,17 +268,14 @@ struct UrlExtractHostFunction {
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
if (!parse(url, match, 3)) {
result.setEmpty();
return true;
}
boost::cmatch authAndPathMatch;
boost::cmatch authorityMatch;
bool hasAuthority;

if (matchAuthorityAndPath(
match, authAndPathMatch, authorityMatch, hasAuthority) &&
hasAuthority) {
if (matchAuthorityAndPath(match, authAndPathMatch, authorityMatch)) {
result.setNoCopy(submatch(authorityMatch, 3));
} else {
result.setEmpty();
Expand All @@ -284,16 +293,13 @@ struct UrlExtractPortFunction {
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
if (!parse(url, match, 4)) {
return false;
}

boost::cmatch authAndPathMatch;
boost::cmatch authorityMatch;
bool hasAuthority;
if (matchAuthorityAndPath(
match, authAndPathMatch, authorityMatch, hasAuthority) &&
hasAuthority) {
if (matchAuthorityAndPath(match, authAndPathMatch, authorityMatch)) {
auto port = submatch(authorityMatch, 4);
if (!port.empty()) {
try {
Expand Down Expand Up @@ -322,7 +328,7 @@ struct UrlExtractPathFunction {
}

boost::cmatch match;
if (!parse(url, match)) {
if (!parse(url, match, 5)) {
result.setEmpty();
return false;
}
Expand Down Expand Up @@ -351,7 +357,7 @@ struct UrlExtractQueryFunction {
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
if (!parse(url, match, 7)) {
result.setEmpty();
return true;
}
Expand Down Expand Up @@ -381,7 +387,7 @@ struct UrlExtractParameterFunction {
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
if (!parse(url, match, 7)) {
result.setEmpty();
return false;
}
Expand All @@ -402,7 +408,7 @@ struct UrlExtractParameterFunction {
boost::cregex_iterator end;

for (auto it = begin; it != end; ++it) {
if (it->length(2) != 0) { // key shouldnt be empty.
if (it->length(2) != 0 && (*it)[2].matched) { // key shouldnt be empty.
auto key = submatch((*it), 2);
if (param.compare(key) == 0) {
auto value = submatch((*it), 3);
Expand Down
8 changes: 8 additions & 0 deletions velox/functions/prestosql/tests/URLFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,14 @@ TEST_F(URLFunctionsTest, validateURL) {
std::nullopt,
std::nullopt,
std::nullopt);
validate(
"IC6S!8hGVRpo+!,yTaJEy/$RUZpqcr",
"",
"",
"IC6S!8hGVRpo !,yTaJEy/$RUZpqcr",
"",
"",
std::nullopt);
}

TEST_F(URLFunctionsTest, extractPath) {
Expand Down

0 comments on commit 38e099c

Please sign in to comment.