Skip to content

Commit

Permalink
fix: Remove same page hack for simdStrstr
Browse files Browse the repository at this point in the history
Summary:
The hack is breaking ASAN check, removing it does not give huge
performance loss though so we remove it.

Benchmark results before vs after change:
https://gist.github.com/Yuhta/b3f66e85d52b62240c09c15d6e7941bb

Differential Revision: D66012678
  • Loading branch information
Yuhta authored and facebook-github-bot committed Nov 15, 2024
1 parent 0357500 commit dfd6085
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 30 deletions.
41 changes: 12 additions & 29 deletions velox/common/base/SimdUtil-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1451,50 +1451,36 @@ using CharVector = xsimd::batch<uint8_t, xsimd::neon>;
#define VELOX_SIMD_STRSTR 0
#endif

extern const int kPageSize;

#if VELOX_SIMD_STRSTR

FOLLY_ALWAYS_INLINE bool pageSafe(const void* const ptr) {
return ((kPageSize - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <=
kPageSize - CharVector::size;
}

template <bool compiled, size_t kNeedleSize>
template <bool kCompiled, size_t kNeedleSize>
size_t FOLLY_ALWAYS_INLINE smidStrstrMemcmp(
const char* s,
size_t n,
int64_t n,
const char* needle,
size_t needleSize) {
static_assert(kNeedleSize >= 2);
VELOX_DCHECK_GT(needleSize, 1);
VELOX_DCHECK_GT(n, 0);
auto first = CharVector::broadcast(needle[0]);
auto last = CharVector::broadcast(needle[needleSize - 1]);
size_t i = 0;

for (; i <= n - needleSize; i += CharVector::size) {
// Assume that the input string is allocated on virtual pages : VP1, VP2,
// VP3 and VP4 has not been allocated yet, we need to check the end of input
// string is page-safe to over-read CharVector.
const auto lastPos = i + needleSize - 1;
int64_t i = 0;

if (lastPos + CharVector::size > n && !pageSafe(s + lastPos)) {
break;
}
for (int64_t end = n - needleSize - CharVector::size; i <= end;
i += CharVector::size) {
auto blockFirst = CharVector::load_unaligned(s + i);
const auto eqFirst = (first == blockFirst);
/// std:find handle the fast-path for first-char-unmatch, so we also need
/// to handle eqFirst.
if (eqFirst.mask() == 0) {
continue;
}
auto blockLast = CharVector::load_unaligned(s + lastPos);
auto blockLast = CharVector::load_unaligned(s + i + needleSize - 1);
const auto eqLast = (last == blockLast);
auto mask = (eqFirst && eqLast).mask();
while (mask != 0) {
const auto bitpos = __builtin_ctz(mask);
if constexpr (compiled) {
if constexpr (kCompiled) {
if constexpr (kNeedleSize == 2) {
return i + bitpos;
}
Expand All @@ -1510,15 +1496,12 @@ size_t FOLLY_ALWAYS_INLINE smidStrstrMemcmp(
}
}
// Fallback path for generic path.
for (; i <= n - needleSize; ++i) {
if constexpr (compiled) {
if (memcmp(s + i, needle, kNeedleSize) == 0) {
return i;
}
if (i + needleSize <= n) {
std::string_view sv(s, n);
if constexpr (kCompiled) {
return sv.find(needle, i, kNeedleSize);
} else {
if (memcmp(s + i, needle, needleSize) == 0) {
return i;
}
return sv.find(needle, i, needleSize);
}
}

Expand Down
1 change: 0 additions & 1 deletion velox/common/base/SimdUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ const LeadingMask<int64_t, xsimd::default_arch> leadingMask64;
const FromBitMask<int32_t, xsimd::default_arch> fromBitMask32;
const FromBitMask<int64_t, xsimd::default_arch> fromBitMask64;

const int kPageSize = sysconf(_SC_PAGESIZE);
} // namespace detail

namespace {
Expand Down

0 comments on commit dfd6085

Please sign in to comment.