-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1b6b7b2
commit 4c614f1
Showing
10 changed files
with
534 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#include "velox/common/encode/Base32.h" | ||
|
||
#include <glog/logging.h> | ||
|
||
namespace facebook::velox::encoding { | ||
|
||
// Constants defining the size in bytes of binary and encoded blocks for Base32 | ||
// encoding. | ||
// Size of a binary block in bytes (5 bytes = 40 bits) | ||
constexpr static int kBinaryBlockByteSize = 5; | ||
// Size of an encoded block in bytes (8 bytes = 40 bits) | ||
constexpr static int kEncodedBlockByteSize = 8; | ||
|
||
constexpr Base32::Charset kBase32Charset = { | ||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', | ||
'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', | ||
'W', 'X', 'Y', 'Z', '2', '3', '4', '5', '6', '7'}; | ||
|
||
constexpr Base32::ReverseIndex kBase32ReverseIndexTable = { | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | ||
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, | ||
25, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
255}; | ||
|
||
// Verify that for each 32 entries in kBase32Charset, the corresponding entry | ||
// in kBase32ReverseIndexTable is correct. | ||
static_assert( | ||
checkForwardIndex( | ||
sizeof(kBase32Charset) / 2 - 1, | ||
kBase32Charset, | ||
kBase32ReverseIndexTable), | ||
"kBase32Charset has incorrect entries"); | ||
|
||
// Verify that for every entry in kBase32ReverseIndexTable, the corresponding | ||
// entry in kBase32Charset is correct. | ||
static_assert( | ||
checkReverseIndex( | ||
sizeof(kBase32ReverseIndexTable) - 1, | ||
kBase32Charset, | ||
kBase32ReverseIndexTable), | ||
"kBase32ReverseIndexTable has incorrect entries."); | ||
|
||
// static | ||
Status Base32::calculateDecodedSize( | ||
std::string_view input, | ||
size_t& inputSize, | ||
size_t& decodedSize) { | ||
if (inputSize == 0) { | ||
decodedSize = 0; | ||
return Status::OK(); | ||
} | ||
|
||
// Check if the input data is padded | ||
if (isPadded(input.data(), inputSize)) { | ||
// If padded, ensure that the string length is a multiple of the encoded | ||
// block size | ||
if (inputSize % kEncodedBlockByteSize != 0) { | ||
return Status::UserError( | ||
"Base32::decode() - invalid input string: string length is not a multiple of 8."); | ||
} | ||
|
||
decodedSize = (inputSize * kBinaryBlockByteSize) / kEncodedBlockByteSize; | ||
auto padding = numPadding(input.data(), inputSize); | ||
inputSize -= padding; | ||
|
||
// Adjust the needed size by deducting the bytes corresponding to the | ||
// padding | ||
decodedSize -= | ||
((padding * kBinaryBlockByteSize) + (kEncodedBlockByteSize - 1)) / | ||
kEncodedBlockByteSize; | ||
return Status::OK(); | ||
} | ||
|
||
// If not padded, calculate extra bytes, if any | ||
auto extraBytes = inputSize % kEncodedBlockByteSize; | ||
decodedSize = (inputSize / kEncodedBlockByteSize) * kBinaryBlockByteSize; | ||
|
||
// Adjust the needed size for extra bytes, if present | ||
if (extraBytes) { | ||
if ((extraBytes == 6) || (extraBytes == 3) || (extraBytes == 1)) { | ||
return Status::UserError( | ||
"Base32::decode() - invalid input string: string length cannot be 6, 3, or 1 more than a multiple of 8."); | ||
} | ||
decodedSize += (extraBytes * kBinaryBlockByteSize) / kEncodedBlockByteSize; | ||
} | ||
|
||
return Status::OK(); | ||
} | ||
|
||
// static | ||
uint8_t Base32::base32ReverseLookup( | ||
char p, | ||
const Base32::ReverseIndex& reverseIndex, | ||
Status& status) { | ||
return reverseLookup(p, reverseIndex, status, Base32::kCharsetSize); | ||
} | ||
|
||
// static | ||
Status Base32::decode( | ||
std::string_view input, | ||
size_t inputSize, | ||
char* output, | ||
size_t outputSize) { | ||
return decodeImpl( | ||
input, inputSize, output, outputSize, kBase32ReverseIndexTable); | ||
} | ||
|
||
// static | ||
Status Base32::decodeImpl( | ||
std::string_view input, | ||
size_t inputSize, | ||
char* output, | ||
size_t outputSize, | ||
const Base32::ReverseIndex& reverseIndex) { | ||
// Check if input is empty | ||
if (input.empty()) { | ||
return Status::OK(); | ||
} | ||
|
||
size_t decodedSize; | ||
// Calculate decoded size and check for status | ||
auto status = calculateDecodedSize(input, inputSize, decodedSize); | ||
if (!status.ok()) { | ||
return status; | ||
} | ||
|
||
if (outputSize < decodedSize) { | ||
return Status::UserError("Base32::decode() - output buffer too small."); | ||
} | ||
|
||
Status lookupStatus; | ||
// Handle full groups of 8 characters. | ||
while (inputSize >= 8) { | ||
// Each character of the 8 bytes encodes 5 bits of the original, grab each | ||
// with the appropriate shifts to rebuild the original and then split that | ||
// back into the original 8-bit bytes. | ||
uint64_t last = | ||
(uint64_t(base32ReverseLookup(input[0], reverseIndex, lookupStatus)) | ||
<< 35) | | ||
(uint64_t(base32ReverseLookup(input[1], reverseIndex, lookupStatus)) | ||
<< 30) | | ||
(base32ReverseLookup(input[2], reverseIndex, lookupStatus) << 25) | | ||
(base32ReverseLookup(input[3], reverseIndex, lookupStatus) << 20) | | ||
(base32ReverseLookup(input[4], reverseIndex, lookupStatus) << 15) | | ||
(base32ReverseLookup(input[5], reverseIndex, lookupStatus) << 10) | | ||
(base32ReverseLookup(input[6], reverseIndex, lookupStatus) << 5) | | ||
base32ReverseLookup(input[7], reverseIndex, lookupStatus); | ||
|
||
output[0] = (last >> 32) & 0xff; | ||
output[1] = (last >> 24) & 0xff; | ||
output[2] = (last >> 16) & 0xff; | ||
output[3] = (last >> 8) & 0xff; | ||
output[4] = last & 0xff; | ||
|
||
// Move the input string_view forward | ||
input.remove_prefix(8); | ||
output += 5; | ||
inputSize -= 8; | ||
} | ||
|
||
// Handle the last 2, 4, 5, 7, or 8 characters. | ||
if (inputSize >= 2) { | ||
uint64_t last = | ||
(uint64_t(base32ReverseLookup(input[0], reverseIndex, lookupStatus)) | ||
<< 35) | | ||
(uint64_t(base32ReverseLookup(input[1], reverseIndex, lookupStatus)) | ||
<< 30); | ||
output[0] = (last >> 32) & 0xff; | ||
|
||
if (inputSize > 2) { | ||
last |= base32ReverseLookup(input[2], reverseIndex, lookupStatus) << 25; | ||
last |= base32ReverseLookup(input[3], reverseIndex, lookupStatus) << 20; | ||
output[1] = (last >> 24) & 0xff; | ||
|
||
if (inputSize > 4) { | ||
last |= base32ReverseLookup(input[4], reverseIndex, lookupStatus) << 15; | ||
output[2] = (last >> 16) & 0xff; | ||
|
||
if (inputSize > 5) { | ||
last |= base32ReverseLookup(input[5], reverseIndex, lookupStatus) | ||
<< 10; | ||
last |= base32ReverseLookup(input[6], reverseIndex, lookupStatus) | ||
<< 5; | ||
output[3] = (last >> 8) & 0xff; | ||
|
||
if (inputSize > 7) { | ||
last |= base32ReverseLookup(input[7], reverseIndex, lookupStatus); | ||
output[4] = last & 0xff; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return lookupStatus.ok() ? Status::OK() : lookupStatus; | ||
} | ||
|
||
} // namespace facebook::velox::encoding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <exception> | ||
#include <map> | ||
#include <string> | ||
|
||
#include "velox/common/base/GTestMacros.h" | ||
#include "velox/common/base/Status.h" | ||
#include "velox/common/encode/EncoderUtils.h" | ||
|
||
namespace facebook::velox::encoding { | ||
|
||
class Base32 { | ||
public: | ||
static const size_t kCharsetSize = 32; | ||
static const size_t kReverseIndexSize = 256; | ||
|
||
/// Character set used for encoding purposes. | ||
/// Contains specific characters that form the encoding scheme. | ||
using Charset = std::array<char, kCharsetSize>; | ||
|
||
/// Reverse lookup table for decoding purposes. | ||
/// Maps each possible encoded character to its corresponding numeric value | ||
/// within the encoding base. | ||
using ReverseIndex = std::array<uint8_t, kReverseIndexSize>; | ||
|
||
/// Returns the actual size of the decoded data. Will also remove the padding | ||
/// length from the 'inputSize'. | ||
static Status calculateDecodedSize( | ||
std::string_view input, | ||
size_t& inputSize, | ||
size_t& decodedSize); | ||
|
||
/// Decodes the specified number of characters from the 'input' and writes the | ||
/// result to the 'output'. | ||
static Status decode( | ||
std::string_view input, | ||
size_t inputSize, | ||
char* output, | ||
size_t outputSize); | ||
|
||
private: | ||
// Performs a reverse lookup in the reverse index to retrieve the original | ||
// index of a character in the base. | ||
static uint8_t base32ReverseLookup( | ||
char p, | ||
const Base32::ReverseIndex& reverseIndex, | ||
Status& status); | ||
|
||
// Decodes the specified input using the provided reverse lookup table. | ||
static Status decodeImpl( | ||
std::string_view input, | ||
size_t inputSize, | ||
char* output, | ||
size_t outputSize, | ||
const Base32::ReverseIndex& reverseIndex); | ||
|
||
VELOX_FRIEND_TEST(Base32Test, Base32ReverseLookupValidChar); | ||
VELOX_FRIEND_TEST(Base32Test, Base32ReverseLookupInvalidChar); | ||
VELOX_FRIEND_TEST(Base32Test, DecodeImplValidInput); | ||
VELOX_FRIEND_TEST(Base32Test, DecodeImplInvalidInputLength); | ||
VELOX_FRIEND_TEST(Base32Test, DecodeImplOutputBufferTooSmall); | ||
}; | ||
|
||
} // namespace facebook::velox::encoding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.