diff --git a/velox/functions/prestosql/URLFunctions.h b/velox/functions/prestosql/URLFunctions.h index be6edcd7bd4a8..d3d50fd894a15 100644 --- a/velox/functions/prestosql/URLFunctions.h +++ b/velox/functions/prestosql/URLFunctions.h @@ -242,19 +242,15 @@ template struct UrlExtractPathFunction { VELOX_DEFINE_FUNCTION_TYPES(T); - // Results refer to strings in the first argument. - static constexpr int32_t reuse_strings_from_arg = 0; - - // ASCII input always produces ASCII result. - static constexpr bool is_default_ascii_behavior = true; + // Input is always ASCII, but result may or may not be ASCII. - FOLLY_ALWAYS_INLINE bool call( + FOLLY_ALWAYS_INLINE void call( out_type& result, const arg_type& url) { boost::cmatch match; if (!parse(url, match)) { result.setEmpty(); - return true; + return; } boost::cmatch authAndPathMatch; @@ -263,14 +259,14 @@ struct UrlExtractPathFunction { if (matchAuthorityAndPath( match, authAndPathMatch, authorityMatch, hasAuthority)) { + StringView escapedPath; if (hasAuthority) { - result.setNoCopy(submatch(authAndPathMatch, 2)); + escapedPath = submatch(authAndPathMatch, 2); } else { - result.setNoCopy(submatch(match, 2)); + escapedPath = submatch(match, 2); } + urlUnescape(result, escapedPath); } - - return true; } }; diff --git a/velox/functions/prestosql/tests/URLFunctionsTest.cpp b/velox/functions/prestosql/tests/URLFunctionsTest.cpp index aebcc43b7ef8e..2269f02af4dfe 100644 --- a/velox/functions/prestosql/tests/URLFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/URLFunctionsTest.cpp @@ -16,27 +16,26 @@ #include #include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" -using string_t = std::string; - namespace facebook::velox { namespace { class URLFunctionsTest : public functions::test::FunctionBaseTest { protected: void validate( - const string_t& url, - const string_t& expectedProtocol, - const string_t& expectedHost, - const string_t& expectedPath, - const string_t& expectedFragment, - const string_t& expectedQuery, + const std::string& url, + const std::string& expectedProtocol, + const std::string& expectedHost, + const std::string& expectedPath, + const std::string& expectedFragment, + const std::string& expectedQuery, const std::optional expectedPort) { - const auto extractFn = [&](const string_t& fn, - const std::optional& a) { - return evaluateOnce(fmt::format("url_extract_{}(c0)", fn), a); + const auto extractFn = [&](const std::string& fn, + const std::optional& a) { + return evaluateOnce( + fmt::format("url_extract_{}(c0)", fn), a); }; - const auto extractPort = [&](const std::optional& a) { + const auto extractPort = [&](const std::optional& a) { return evaluateOnce("url_extract_port(c0)", a); }; @@ -101,6 +100,17 @@ TEST_F(URLFunctionsTest, validateURL) { validate("foo", "", "", "", "", "", std::nullopt); } +TEST_F(URLFunctionsTest, extractPath) { + const auto extractPath = [&](const std::optional& url) { + return evaluateOnce("url_extract_path(c0)", url); + }; + + ASSERT_EQ( + "/media/set/Books and Magazines.php", + extractPath( + "https://www.cnn.com/media/set/Books%20and%20Magazines.php?foo=bar")); +} + TEST_F(URLFunctionsTest, extractParameter) { const auto extractParam = [&](const std::optional& a, const std::optional& b) {