Skip to content

Commit

Permalink
[cherry-pick](branch3.0) impl translate and url encode (apache#41657)
Browse files Browse the repository at this point in the history
## Proposed changes
pick from master:
apache#40567
  • Loading branch information
suxiaogang223 authored Oct 13, 2024
1 parent 1603b8b commit c3740ba
Show file tree
Hide file tree
Showing 17 changed files with 1,205 additions and 75 deletions.
42 changes: 17 additions & 25 deletions be/src/util/url_coding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,33 @@

#include "util/url_coding.h"

#include <curl/curl.h>
#include <libbase64.h>
#include <math.h>

#include <memory>
#include <sstream>

namespace doris {

static inline void url_encode(const char* in, int in_len, std::string* out) {
(*out).reserve(in_len);
std::stringstream ss;

for (int i = 0; i < in_len; ++i) {
const char ch = in[i];

// Escape the character iff a) we are in Hive-compat mode and the
// character is in the Hive whitelist or b) we are not in
// Hive-compat mode, and the character is not alphanumeric or one
// of the four commonly excluded characters.
ss << ch;
}

(*out) = ss.str();
inline unsigned char to_hex(unsigned char x) {
return x + (x > 9 ? ('A' - 10) : '0');
}

void url_encode(const std::vector<uint8_t>& in, std::string* out) {
if (in.empty()) {
*out = "";
} else {
url_encode(reinterpret_cast<const char*>(&in[0]), in.size(), out);
// Adapted from http://dlib.net/dlib/server/server_http.cpp.html
void url_encode(const std::string_view& in, std::string* out) {
std::ostringstream os;
for (auto c : in) {
// impl as https://docs.oracle.com/javase/8/docs/api/java/net/URLEncoder.html
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ||
c == '.' || c == '-' || c == '*' || c == '_') { // allowed
os << c;
} else if (c == ' ') {
os << '+';
} else {
os << '%' << to_hex(c >> 4) << to_hex(c % 16);
}
}
}

void url_encode(const std::string& in, std::string* out) {
url_encode(in.c_str(), in.size(), out);
*out = os.str();
}

// Adapted from
Expand Down
12 changes: 2 additions & 10 deletions be/src/util/url_coding.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,19 @@

#pragma once

#include <stddef.h>

#include <cstddef>
#include <cstdint>
#include <iosfwd>
#include <string>
#include <vector>

namespace doris {

// Utility method to URL-encode a string (that is, replace special
// characters with %<hex value in ascii>).
// The optional parameter hive_compat controls whether we mimic Hive's
// behaviour when encoding a string, which is only to encode certain
// characters (excluding, e.g., ' ')
void url_encode(const std::string& in, std::string* out);
void url_encode(const std::string_view& in, std::string* out);

// Utility method to decode a string that was URL-encoded. Returns
// true unless the string could not be correctly decoded.
// The optional parameter hive_compat controls whether or not we treat
// the strings as encoded by Hive, which means selectively ignoring
// certain characters like ' '.
bool url_decode(const std::string& in, std::string* out);

void base64_encode(const std::string& in, std::string* out);
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/functions/function_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,7 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_function<FunctionExtractURLParameter>();
factory.register_function<FunctionStringParseUrl>();
factory.register_function<FunctionUrlDecode>();
factory.register_function<FunctionUrlEncode>();
factory.register_function<FunctionRandomBytes>();
factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
Expand All @@ -1057,6 +1058,7 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_function<FunctionStringDigestSHA2>();
factory.register_function<FunctionReplace<ReplaceImpl, true>>();
factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
factory.register_function<FunctionTranslate>();
factory.register_function<FunctionMask>();
factory.register_function<FunctionMaskPartial<true>>();
factory.register_function<FunctionMaskPartial<false>>();
Expand Down
Loading

0 comments on commit c3740ba

Please sign in to comment.