diff --git a/velox/common/encode/CMakeLists.txt b/velox/common/encode/CMakeLists.txt index d9918d53b59c5..612a5ae95cab8 100644 --- a/velox/common/encode/CMakeLists.txt +++ b/velox/common/encode/CMakeLists.txt @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_library(velox_encode Base64.cpp) +add_library(velox_encode Base64.cpp EncoderUtils.cpp) target_link_libraries(velox_encode PUBLIC Folly::folly) diff --git a/velox/common/encode/EncoderUtils.cpp b/velox/common/encode/EncoderUtils.cpp new file mode 100644 index 0000000000000..8606ef82532dd --- /dev/null +++ b/velox/common/encode/EncoderUtils.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/common/encode/EncoderUtils.h" + +namespace facebook::velox::encoding { + +bool isPadded(const char* data, size_t len) { + return (len > 0 && data[len - 1] == kPadding) ? true : false; +} + +size_t countPadding(const char* src, size_t len) { + size_t padding_count = 0; + while (len > 0 && src[len - 1] == kPadding) { + padding_count++; + len--; + } + + return padding_count; +} + +uint8_t +baseReverseLookup(int base, char p, const ReverseIndex& reverse_lookup) { + auto curr = reverse_lookup[(uint8_t)p]; + // Value of encoded character shall be less than base. + if (curr >= base) { + throw EncoderException( + "decode() - invalid input string: invalid characters"); + } + + return curr; +} + +} // namespace facebook::velox::encoding diff --git a/velox/common/encode/EncoderUtils.h b/velox/common/encode/EncoderUtils.h new file mode 100644 index 0000000000000..32440bf9b2f88 --- /dev/null +++ b/velox/common/encode/EncoderUtils.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace facebook::velox::encoding { + +class EncoderException : public std::exception { + public: + explicit EncoderException(const char* msg) : msg_(msg) {} + const char* what() const noexcept override { + return msg_; + } + + protected: + const char* msg_; +}; + +using Charset = std::array; +using ReverseIndex = std::array; + +// Checks is there padding in encoded data +bool isPadded(const char* src, size_t len); + +// Counts the number of padding characters in encoded data. +size_t countPadding(const char* src, size_t len); + +// Gets value corresponding to an encoded character +uint8_t baseReverseLookup(int base, char p, const ReverseIndex& table); + +// Padding character used in encoding +constexpr static char kPadding = '='; + +// Validate the character in charset with ReverseIndex table +constexpr bool checkForwardIndex( + uint8_t idx, + const Charset& charset, + const ReverseIndex& table) { + return (table[static_cast(charset[idx])] == idx) && + (idx > 0 ? checkForwardIndex(idx - 1, charset, table) : true); +} + +/// Similar to strchr(), but for null-terminated const strings. +/// Another difference is that we do not consider "\0" to be present in the +/// string. +/// Returns true if "str" contains the character c. +constexpr bool findCharacterInCharSet( + const Charset& charset, + int base, + uint8_t idx, + const char c) { + return idx < base && + ((charset[idx] == c) || + findCharacterInCharSet(charset, base, idx + 1, c)); +} + +// Validate the value in ReverseIndex table with charset. +constexpr bool checkReverseIndex( + uint8_t idx, + const Charset& charset, + int base, + const ReverseIndex& table) { + return (table[idx] == 255 ? !findCharacterInCharSet( + charset, base, 0, static_cast(idx)) + : (charset[table[idx]] == idx)) && + (idx > 0 ? checkReverseIndex(idx - 1, charset, base, table) : true); +} + +} // namespace facebook::velox::encoding