Skip to content

Commit

Permalink
fix: Optimize json_parse (facebookincubator#11924)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookincubator#11924

bypass-github-export-checks

Reviewed By: kgpai

Differential Revision: D67538322

fbshipit-source-id: 7bbc0b052f57ead922862966cff0ce2b69eadcd3
  • Loading branch information
Yuhta authored and athmaja-n committed Jan 10, 2025
1 parent c857a6a commit cda8240
Show file tree
Hide file tree
Showing 12 changed files with 727 additions and 292 deletions.
145 changes: 145 additions & 0 deletions velox/common/base/SortingNetwork.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "velox/common/base/Exceptions.h"

namespace facebook::velox {

constexpr int kSortingNetworkMaxSize = 16;

template <typename T, typename LessThan = std::less<T>>
void sortingNetwork(T* data, int size, LessThan&& lt = {});

namespace detail {

// Compile time generated Bose-Nelson sorting network.
//
// https://bertdobbelaere.github.io/sorting_networks.html
// https://github.com/Vectorized/Static-Sort/blob/master/include/static_sort.h
template <int kSize>
class SortingNetworkImpl {
public:
template <typename T, typename LessThan>
static void apply(T* data, LessThan&& lt) {
PS<T, LessThan, 1, kSize, (kSize <= 1)> ps(data, lt);
}

private:
template <int I, int J, typename T, typename LessThan>
static void compareExchange(T* data, LessThan lt) {
// This is branchless if `lt' is branchless.
auto c = lt(data[I], data[J]);
auto min = c ? data[I] : data[J];
data[J] = c ? data[J] : data[I];
data[I] = min;
}

template <typename T, typename LessThan, int I, int J, int X, int Y>
struct PB {
PB(T* data, LessThan lt) {
enum {
L = X >> 1,
M = (X & 1 ? Y : Y + 1) >> 1,
IAddL = I + L,
XSubL = X - L,
};
PB<T, LessThan, I, J, L, M> p0(data, lt);
PB<T, LessThan, IAddL, J + M, XSubL, Y - M> p1(data, lt);
PB<T, LessThan, IAddL, J, XSubL, M> p2(data, lt);
}
};

template <typename T, typename LessThan, int I, int J>
struct PB<T, LessThan, I, J, 1, 1> {
PB(T* data, LessThan lt) {
compareExchange<I - 1, J - 1>(data, lt);
}
};

template <typename T, typename LessThan, int I, int J>
struct PB<T, LessThan, I, J, 1, 2> {
PB(T* data, LessThan lt) {
compareExchange<I - 1, J>(data, lt);
compareExchange<I - 1, J - 1>(data, lt);
}
};

template <typename T, typename LessThan, int I, int J>
struct PB<T, LessThan, I, J, 2, 1> {
PB(T* data, LessThan lt) {
compareExchange<I - 1, J - 1>(data, lt);
compareExchange<I, J - 1>(data, lt);
}
};

template <typename T, typename LessThan, int I, int M, bool kStop>
struct PS {
PS(T* data, LessThan lt) {
enum { L = M >> 1, IAddL = I + L, MSubL = M - L };
PS<T, LessThan, I, L, (L <= 1)> ps0(data, lt);
PS<T, LessThan, IAddL, MSubL, (MSubL <= 1)> ps1(data, lt);
PB<T, LessThan, I, IAddL, L, MSubL> pb(data, lt);
}
};

template <typename T, typename LessThan, int I, int M>
struct PS<T, LessThan, I, M, true> {
PS(T* /*data*/, LessThan /*lt*/) {}
};
};

} // namespace detail

template <typename T, typename LessThan>
void sortingNetwork(T* data, int size, LessThan&& lt) {
switch (size) {
case 0:
case 1:
return;

#ifdef VELOX_SORTING_NETWORK_IMPL_APPLY_CASE
#error "Macro name clash: VELOX_SORTING_NETWORK_IMPL_APPLY_CASE"
#endif
#define VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(_n) \
case _n: \
detail::SortingNetworkImpl<_n>::apply(data, std::forward<LessThan>(lt)); \
return;

VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(2)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(3)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(4)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(5)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(6)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(7)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(8)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(9)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(10)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(11)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(12)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(13)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(14)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(15)
VELOX_SORTING_NETWORK_IMPL_APPLY_CASE(16)

#undef VELOX_SORTING_NETWORK_IMPL_APPLY_CASE

default:
VELOX_UNREACHABLE();
}
}

} // namespace facebook::velox
8 changes: 8 additions & 0 deletions velox/common/base/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,11 @@ target_link_libraries(
velox_common_indexed_priority_queue_benchmark
PUBLIC Folly::follybenchmark
PRIVATE velox_common_base Folly::folly)

add_executable(velox_common_sorting_network_benchmark
SortingNetworkBenchmark.cpp)

target_link_libraries(
velox_common_sorting_network_benchmark
PUBLIC Folly::follybenchmark
PRIVATE velox_common_base Folly::folly)
134 changes: 134 additions & 0 deletions velox/common/base/benchmarks/SortingNetworkBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/common/base/SortingNetwork.h"

#include <folly/Benchmark.h>
#include <folly/Random.h>
#include <folly/init/Init.h>

#define VELOX_BENCHMARK(_type, _name, ...) \
[[maybe_unused]] _type _name(FOLLY_PP_STRINGIZE(_name), __VA_ARGS__)

namespace facebook::velox {
namespace {

template <typename T>
class SortingNetworkBenchmark {
public:
SortingNetworkBenchmark(const char* name, int minLen, int maxLen) {
int totalLen = 0;
for (int i = 0; i < kNumSorts; ++i) {
lengths_.push_back(folly::Random::rand32(minLen, maxLen));
totalLen += lengths_.back();
}
data_.resize(totalLen);
generateData();
folly::addBenchmark(__FILE__, fmt::format("{}_std", name), [this] {
return run([](auto* indices, int len, auto lt) {
std::sort(indices, indices + len, lt);
});
});
folly::addBenchmark(
__FILE__, fmt::format("%{}_sorting_network", name), [this] {
return run([](auto* indices, int len, auto lt) {
sortingNetwork(indices, len, lt);
});
});
}

private:
static constexpr int kNumSorts = 10'000;

void generateData();

template <typename Sort>
unsigned run(Sort sort) const {
std::vector<int32_t> indices;
BENCHMARK_SUSPEND {
for (int i = 0; i < kNumSorts; ++i) {
indices.reserve(lengths_[i]);
}
}
auto* buf = data_.data();
for (int i = 0; i < kNumSorts; ++i) {
indices.resize(lengths_[i]);
std::iota(indices.begin(), indices.end(), 0);
sort(indices.data(), lengths_[i], [&](auto i, auto j) {
return buf[i] < buf[j];
});
buf += lengths_[i];
}
folly::doNotOptimizeAway(indices);
return kNumSorts;
}

std::vector<int8_t> lengths_;
std::vector<T> data_;
};

template <>
void SortingNetworkBenchmark<int32_t>::generateData() {
for (int i = 0; i < data_.size(); ++i) {
data_[i] = folly::Random::rand32();
}
}

template <>
void SortingNetworkBenchmark<std::string>::generateData() {
for (auto& s : data_) {
s.resize(folly::Random::rand32(4, 32));
for (int i = 0; i < s.size(); ++i) {
s[i] = folly::Random::rand32(128);
}
}
}

struct ThreeWords {
std::array<uint64_t, 3> value;

bool operator<(const ThreeWords& other) const {
return value < other.value;
}
};

template <>
void SortingNetworkBenchmark<ThreeWords>::generateData() {
for (auto& x : data_) {
for (auto& y : x.value) {
y = folly::Random::rand64();
}
}
}

} // namespace
} // namespace facebook::velox

int main(int argc, char* argv[]) {
using namespace facebook::velox;
folly::Init follyInit(&argc, &argv);
VELOX_BENCHMARK(SortingNetworkBenchmark<int32_t>, int32_2, 2, 4);
VELOX_BENCHMARK(SortingNetworkBenchmark<int32_t>, int32_4, 4, 8);
VELOX_BENCHMARK(SortingNetworkBenchmark<int32_t>, int32_8, 8, 16);
VELOX_BENCHMARK(SortingNetworkBenchmark<std::string>, string_2, 2, 4);
VELOX_BENCHMARK(SortingNetworkBenchmark<std::string>, string_4, 4, 8);
VELOX_BENCHMARK(SortingNetworkBenchmark<std::string>, string_8, 8, 16);
VELOX_BENCHMARK(SortingNetworkBenchmark<ThreeWords>, ThreeWords_2, 2, 4);
VELOX_BENCHMARK(SortingNetworkBenchmark<ThreeWords>, ThreeWords_4, 4, 8);
VELOX_BENCHMARK(SortingNetworkBenchmark<ThreeWords>, ThreeWords_8, 8, 16);
folly::runBenchmarks();
return 0;
}
27 changes: 2 additions & 25 deletions velox/functions/lib/Utf8Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,26 +173,6 @@ tryGetUtf8CharLength(const char* input, int64_t size, int32_t& codePoint) {
return -1;
}

bool hasInvalidUTF8(const char* input, int32_t len) {
for (size_t inputIndex = 0; inputIndex < len;) {
if (IS_ASCII(input[inputIndex])) {
// Ascii
inputIndex++;
} else {
// Unicode
int32_t codePoint;
auto charLength =
tryGetUtf8CharLength(input + inputIndex, len - inputIndex, codePoint);
if (charLength < 0) {
return true;
}
inputIndex += charLength;
}
}

return false;
}

size_t replaceInvalidUTF8Characters(
char* outputBuffer,
const char* input,
Expand All @@ -213,12 +193,9 @@ size_t replaceInvalidUTF8Characters(
outputIndex += charLength;
inputIndex += charLength;
} else {
size_t replaceCharactersToWriteOut = inputIndex < len - 1 &&
isMultipleInvalidSequences(input, inputIndex)
? -charLength
: 1;
const auto& replacementCharacterString =
kReplacementCharacterStrings[replaceCharactersToWriteOut - 1];
getInvalidUTF8ReplacementString(
input + inputIndex, len - inputIndex, -charLength);
std::memcpy(
outputBuffer + outputIndex,
replacementCharacterString.data(),
Expand Down
8 changes: 6 additions & 2 deletions velox/functions/lib/Utf8Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,12 @@ FOLLY_ALWAYS_INLINE bool isMultipleInvalidSequences(
inputBuffer[inputIndex] == '\xc0' || inputBuffer[inputIndex] == '\xc1';
}

/// Returns true only if invalid UTF-8 is present in the input string.
bool hasInvalidUTF8(const char* input, int32_t len);
inline const std::string_view&
getInvalidUTF8ReplacementString(const char* input, int len, int codePointSize) {
auto index =
len >= 2 && isMultipleInvalidSequences(input, 0) ? codePointSize - 1 : 0;
return kReplacementCharacterStrings[index];
}

/// Replaces invalid UTF-8 characters with replacement characters similar to
/// that produced by Presto java. The function requires that output have
Expand Down
11 changes: 0 additions & 11 deletions velox/functions/lib/tests/Utf8Test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,6 @@ TEST(Utf8Test, tryCharLength) {
ASSERT_EQ(-1, tryCharLength({0xBF}));
}

TEST(UTF8Test, validUtf8) {
auto tryHasInvalidUTF8 = [](const std::vector<unsigned char>& bytes) {
return hasInvalidUTF8(
reinterpret_cast<const char*>(bytes.data()), bytes.size());
};

ASSERT_FALSE(tryHasInvalidUTF8({0x5c, 0x19, 0x7A}));
ASSERT_TRUE(tryHasInvalidUTF8({0x5c, 0x19, 0x7A, 0xBF}));
ASSERT_TRUE(tryHasInvalidUTF8({0x64, 0x65, 0x1A, 0b11100000, 0x81, 0xBF}));
}

TEST(UTF8Test, replaceInvalidUTF8Characters) {
auto testReplaceInvalidUTF8Chars = [](const std::string& input,
const std::string& expected) {
Expand Down
Loading

0 comments on commit cda8240

Please sign in to comment.