Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to xxHash for Bloom filters; add convenience overload for trace_refs_recursive. #173

Merged
merged 5 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,18 @@ def configure(self):


def requirements(self):
self.requires("batteries/0.57.2", **VISIBLE)
self.requires("boost/1.85.0", **VISIBLE, **OVERRIDE)
self.requires("cli11/2.3.2", **VISIBLE)
self.requires("glog/0.7.0", **VISIBLE, **OVERRIDE)
self.requires("gtest/1.14.0", **VISIBLE)
self.requires("batteries/0.58.0", **VISIBLE)
self.requires("boost/1.86.0", **VISIBLE, **OVERRIDE)
self.requires("cli11/2.4.2", **VISIBLE)
self.requires("glog/0.7.1", **VISIBLE, **OVERRIDE)
self.requires("libbacktrace/cci.20210118", **VISIBLE)
self.requires("openssl/3.2.0", **VISIBLE)
self.requires("openssl/3.3.2", **VISIBLE, **OVERRIDE)
self.requires("xxhash/0.8.2", **VISIBLE)

self.requires("zlib/1.3", **OVERRIDE)

self.test_requires("gtest/1.15.0")

if platform.system() == "Linux":
self.requires("liburing/2.4", **VISIBLE)
self.requires("libfuse/3.16.2", **VISIBLE)
Expand Down
4 changes: 3 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ macro(LLFS_DefineLibrary TARGET_NAME TARGET_SRCDIR)
Boost::context
Boost::stacktrace_backtrace
libbacktrace::libbacktrace
xxHash::xxhash
dl
stdc++fs)

Expand Down Expand Up @@ -170,7 +171,8 @@ endmacro()
LLFS_DefineLibrary(llfs ./llfs
batteries::batteries
liburing::liburing
OpenSSL::Crypto
OpenSSL::Crypto
xxHash::xxhash
)

#=#=#==#==#===============+=+=+=+=++=++++++++++++++-++-+--+-+----+---------------
Expand Down
105 changes: 84 additions & 21 deletions src/llfs/bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,43 +18,106 @@
#include <batteries/async/worker_pool.hpp>
#include <batteries/math.hpp>

#include <batteries/math.hpp>
#include <batteries/seq/loop_control.hpp>
#include <batteries/static_assert.hpp>
#include <batteries/suppress.hpp>

#include <boost/functional/hash.hpp>
#include <xxhash.h>

#include <cmath>
#include <type_traits>

namespace llfs {

/** \brief Parameters used to build a Bloom filter.
*
* If instead of bits-per-item, one wants to set a given false positive (error) rate, then the
* function optimal_bloom_filter_bit_rate may be used to calculate the required bit rate.
*/
struct BloomFilterParams {
usize bits_per_item;
};

template <typename T, typename Fn>
inline seq::LoopControl hash_for_bloom(const T& item, u64 count, Fn&& fn)
//=#=#==#==#===============+=+=+=+=++=++++++++++++++-++-+--+-+----+---------------
// Implementation details - NOT FOR EXTERNAL USE
//
namespace detail {

/** \brief A set of randomly chosen (by hardware entropy generator) seeds for up to 64 different
* hash functions to use for building and querying Bloom filters.
*/
inline constexpr std::array<u64, 64> kBloomFilterHashSeeds = {
0xce3a9eb8b885d5afull, 0x33d9975b8a739ac6ull, 0xe65d0fff49425f03ull, 0x10bb3a132ec4fabcull,
0x88d476f6e7f2c53cull, 0xcb4905c588217f44ull, 0x54eb7b8b55ac05d6ull, 0xac0de731d7f3f97cull,
0x998963e5d908c156ull, 0x0bdf939d3b7c1cd6ull, 0x2cf7007c36b2c966ull, 0xb53c35171f25ccceull,
0x7d6d2ad5e3ef7ae3ull, 0xe3aaa3bf1dbffd08ull, 0xa81f70b4f8dc0f80ull, 0x1f4887ce81cdf25aull,
0x6433a69ba9e9d9b1ull, 0xf859167265201651ull, 0xe48c6589be0ff660ull, 0xadd5250ba0e7ac09ull,
0x833f55b86dee015full, 0xae3b000feb85dceaull, 0x0110cfeb4fe23291ull, 0xf3a5d699ab2ce23cull,
0x7c3a2b8a1c43942cull, 0x8cb3fb6783724d25ull, 0xe3619c66bf3aa139ull, 0x3fdf358be099c7d9ull,
0x0c38ccabc94a487full, 0x43e19e80ee4fe6edull, 0x22699c9fc26f20eeull, 0xa559cbafff2cea37ull,
0xfbed4777b17fb16dull, 0x7197788291858011ull, 0xa9325a240f0d996eull, 0x6782b2e3766f2f76ull,
0xbc3aca45c9d9dc36ull, 0x7b687762afe92061ull, 0x7b2a7cb985790bcfull, 0xf244ed1bc2b06f7dull,
0x29acd54ff9cb3809ull, 0xe1926523e6f67949ull, 0x98f964fbc223bb91ull, 0xaab5ee47827c5506ull,
0x0dab726106a4c8ddull, 0xa88bb10b8e57cdd9ull, 0xbef7ede281a687afull, 0x0e2a6b9bc5b7d6e3ull,
0x5b6f250b605200c8ull, 0xafe46bbd0e81722full, 0xb5d978e72ac594daull, 0x8c4362498b85fff9ull,
0xce8cd0d29a933471ull, 0x9c2a28aabd1e71cbull, 0x572c8c1d4ea24d86ull, 0x8fc7dff3afb5fbf7ull,
0xf378bc6c41606bf9ull, 0xa4c36401cf7a557full, 0x0b0a5bdd27f682afull, 0x3fbe0f66ef4777c1ull,
0x0ed678ccbd246356ull, 0xc2d3489afc4edcd6ull, 0xc482a884240966c6ull, 0x19b952db37267518ull,
};

// Validate assumption that the number of seeds above is a power of 2.
//
BATT_STATIC_ASSERT_EQ(u64{1} << (batt::log2_ceil(kBloomFilterHashSeeds.size())),
kBloomFilterHashSeeds.size());

/** \brief Returns the n-th hash function for the given integer value.
*/
inline u64 get_nth_hash_for_bloom(usize int_value, usize n)
{
static constexpr u64 kSeeds[32] = {
0xce3a9eb8b885d5afull, 0x33d9975b8a739ac6ull, 0xe65d0fff49425f03ull, 0x10bb3a132ec4fabcull,
0x88d476f6e7f2c53cull, 0xcb4905c588217f44ull, 0x54eb7b8b55ac05d6ull, 0xac0de731d7f3f97cull,
0x998963e5d908c156ull, 0x0bdf939d3b7c1cd6ull, 0x2cf7007c36b2c966ull, 0xb53c35171f25ccceull,
0x7d6d2ad5e3ef7ae3ull, 0xe3aaa3bf1dbffd08ull, 0xa81f70b4f8dc0f80ull, 0x1f4887ce81cdf25aull,
0x6433a69ba9e9d9b1ull, 0xf859167265201651ull, 0xe48c6589be0ff660ull, 0xadd5250ba0e7ac09ull,
0x833f55b86dee015full, 0xae3b000feb85dceaull, 0x0110cfeb4fe23291ull, 0xf3a5d699ab2ce23cull,
0x7c3a2b8a1c43942cull, 0x8cb3fb6783724d25ull, 0xe3619c66bf3aa139ull, 0x3fdf358be099c7d9ull,
0x0c38ccabc94a487full, 0x43e19e80ee4fe6edull, 0x22699c9fc26f20eeull, 0xa559cbafff2cea37ull};

const auto mix_hash = [](usize a, usize b) -> usize {
return b + 0x9e3779b9 + (a << 6) + (a >> 2);
};
return XXH64(&int_value, sizeof(int_value),
kBloomFilterHashSeeds[n & (kBloomFilterHashSeeds.size() - 1)]);
}

const u64 item_hash = std::hash<T>{}(item);
usize seed = item_hash;
/** \brief Returns the n-th hash function for the given string value.
*/
inline u64 get_nth_hash_for_bloom(const std::string_view& str, usize n)
{
return XXH64(str.data(), str.size(),
kBloomFilterHashSeeds[n & (kBloomFilterHashSeeds.size() - 1)]);
}

/** \brief Returns the n-th hash function for the given value.
*
* This is the generic overload of this function; it uses std::hash<T> to calculate a hash value,
* then hashes that value again using xxhash to obtain the n-th hash function (for Bloom filters).
*/
template <typename T,
typename = std::enable_if_t<!std::is_convertible_v<const T&, usize> &&
!std::is_convertible_v<const T&, std::string_view>>>
inline u64 get_nth_hash_for_bloom(const T& item, usize n)
{
return get_nth_hash_for_bloom(std::hash<T>{}(item), n);
}

} //namespace detail
//
//=#=#==#==#===============+=+=+=+=++=++++++++++++++-++-+--+-+----+---------------

/** \brief Invokes `fn` `count` times, each time with a unique hash function applied to `item`.
*
* `fn` may return either `void` or `seq::LoopControl`. If it returns `seq::LoopControl`, then the
* returned value is used to decide wither to continue calculating hash functions and calling `fn`,
* or break out of the loop and return early.
*
* \return seq::LoopControl::kBreak if `fn` requested that this function return early; otherwise
* seq::LoopControl::kContinue, indicating that `fn` was called `count` times.
*/
template <typename T, typename Fn = seq::LoopControl(u64)>
inline seq::LoopControl hash_for_bloom(const T& item, u64 count, Fn&& fn)
{
for (u64 i = 0; i < count; ++i) {
seed = mix_hash(seed, kSeeds[i % 32] + i / 32);
seed = mix_hash(seed, item_hash);
if (seq::run_loop_fn(fn, seed) == seq::LoopControl::kBreak) {
if (seq::run_loop_fn(fn, detail::get_nth_hash_for_bloom(item, i)) == seq::LoopControl::kBreak) {
return seq::LoopControl::kBreak;
}
}
Expand Down
19 changes: 16 additions & 3 deletions src/llfs/bloom_filter.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,23 @@ TEST(BloomFilterTest, RandomItems)
}

for (const auto& s : stats) {
EXPECT_LT(s.second.actual_rate() / s.second.expected_rate, 1.01)
<< BATT_INSPECT(s.second.actual_rate()) << BATT_INSPECT(s.second.expected_rate);
const u64 word_count = s.first.first;
const u16 hash_count = s.first.second;

LLFS_LOG_INFO() << BATT_INSPECT(s.second.actual_rate() / s.second.expected_rate);
// fpr == false positive rate
//
const double actual_fpr = s.second.actual_rate();
const double expected_fpr = s.second.expected_rate;

EXPECT_LT(actual_fpr / expected_fpr, 1.02)
<< BATT_INSPECT(actual_fpr) << BATT_INSPECT(expected_fpr);

EXPECT_GT(expected_fpr / actual_fpr, 0.98)
<< BATT_INSPECT(actual_fpr) << BATT_INSPECT(expected_fpr);

LLFS_LOG_INFO() << BATT_INSPECT(actual_fpr / expected_fpr) << BATT_INSPECT(word_count)
<< BATT_INSPECT(hash_count) << BATT_INSPECT(actual_fpr)
<< BATT_INSPECT(expected_fpr);
}

EXPECT_LT(false_positive_rate_total / false_positive_rate_count, 1.01);
Expand Down
5 changes: 2 additions & 3 deletions src/llfs/packed_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@
#include <llfs/crc.hpp>
#include <llfs/int_types.hpp>
#include <llfs/packed_array.hpp>
#include <llfs/packed_uuid.hpp>
#include <llfs/version.hpp>

#include <batteries/static_assert.hpp>
#include <batteries/suppress.hpp>

#include <boost/uuid/uuid.hpp>

#include <cstddef>

namespace llfs {
Expand Down Expand Up @@ -60,7 +59,7 @@ BATT_STATIC_ASSERT_EQ(sizeof(PackedConfigSlotBase), 4);
struct PackedConfigSlotHeader : PackedConfigSlotBase {
// The globally unique identifier of this object.
//
boost::uuids::uuid uuid;
PackedUUID uuid;
};

BATT_STATIC_ASSERT_EQ(sizeof(PackedConfigSlotHeader), 20);
Expand Down
7 changes: 4 additions & 3 deletions src/llfs/packed_page_user_slot.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@
#ifndef LLFS_PACKED_PAGE_USER_SLOT_HPP
#define LLFS_PACKED_PAGE_USER_SLOT_HPP

#include <llfs/config.hpp>
//
#include <llfs/int_types.hpp>
#include <llfs/packed_slot_offset.hpp>
#include <llfs/packed_uuid.hpp>

#include <batteries/static_assert.hpp>

#include <boost/uuid/uuid.hpp>

#include <ostream>

namespace llfs {
Expand All @@ -26,7 +27,7 @@ namespace llfs {
// end-user or human, it could be another part of the system (e.g., a Tablet).
//
struct PackedPageUserSlot {
boost::uuids::uuid user_id;
PackedUUID user_id;
PackedSlotOffset slot_offset;
};

Expand Down
10 changes: 10 additions & 0 deletions src/llfs/packed_uuid.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//#=##=##=#==#=#==#===#+==#+==========+==+=+=+=+=+=++=+++=+++++=-++++=-+++++++++++
//
// Part of the LLFS Project, under Apache License v2.0.
// See https://www.apache.org/licenses/LICENSE-2.0 for license information.
// SPDX short identifier: Apache-2.0
//
//+++++++++++-+-+--+----- --- -- - - - -

#include <llfs/packed_uuid.hpp>
//
79 changes: 79 additions & 0 deletions src/llfs/packed_uuid.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
//#=##=##=#==#=#==#===#+==#+==========+==+=+=+=+=+=++=+++=+++++=-++++=-+++++++++++
//
// Part of the LLFS Project, under Apache License v2.0.
// See https://www.apache.org/licenses/LICENSE-2.0 for license information.
// SPDX short identifier: Apache-2.0
//
//+++++++++++-+-+--+----- --- -- - - - -

#pragma once
#ifndef LLFS_PACKED_UUID_HPP
#define LLFS_PACKED_UUID_HPP

#include <llfs/config.hpp>
//

#include <llfs/int_types.hpp>
#include <llfs/uuid.hpp>

#include <batteries/static_assert.hpp>

#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/version.hpp>

#include <cstring>

namespace llfs {

struct PackedUUID {
std::array<u8, sizeof(boost::uuids::uuid)> bytes;

//----- --- -- - - - -

PackedUUID() noexcept
{
}

/*implicit*/ PackedUUID(const boost::uuids::uuid& that) noexcept
{
*this = that;
}

PackedUUID& operator=(const boost::uuids::uuid& that) noexcept
{
std::memcpy(this->bytes.data(), &that, this->bytes.size());
return *this;
}

#if BOOST_VERSION < 108600

operator const boost::uuids::uuid&() const noexcept
{
BATT_STATIC_ASSERT_EQ(alignof(boost::uuids::uuid), 1);

return *reinterpret_cast<const boost::uuids::uuid*>(this->bytes.data());
}

#else

operator boost::uuids::uuid() const noexcept
{
return boost::uuids::uuid{(const u8(&)[16])this->bytes};
}

#endif
};

BATT_STATIC_ASSERT_EQ(sizeof(PackedUUID), 16);
BATT_STATIC_ASSERT_EQ(alignof(PackedUUID), 1);
BATT_STATIC_ASSERT_EQ(sizeof(boost::uuids::uuid), sizeof(PackedUUID));

inline std::ostream& operator<<(std::ostream& out, const PackedUUID& t)
{
return out << (const boost::uuids::uuid&)t;
}

} //namespace llfs

#endif // LLFS_PACKED_UUID_HPP
2 changes: 1 addition & 1 deletion src/llfs/page_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ inline StatusOr<slot_offset_type> PageAllocator::update_page_ref_counts(
sample_count.fetch_add(1);
prc_count.fetch_add(txn->ref_counts.size());

LLFS_LOG_INFO_EVERY_T(5.0 /*seconds*/)
LLFS_LOG_INFO_EVERY_T(100.0 /*seconds*/)
<< "Average pages per allocator update: "
<< ((double)prc_count.load() / (double)sample_count.load());

Expand Down
5 changes: 2 additions & 3 deletions src/llfs/page_allocator_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@
#include <llfs/optional.hpp>
#include <llfs/packed_config.hpp>
#include <llfs/packed_pointer.hpp>
#include <llfs/packed_uuid.hpp>
#include <llfs/page_device_config.hpp>
#include <llfs/page_size.hpp>

#include <batteries/static_assert.hpp>

#include <boost/uuid/uuid.hpp>

#include <iostream>
#include <variant>

Expand Down Expand Up @@ -156,7 +155,7 @@ struct PackedPageAllocatorConfig : PackedConfigSlotHeader {

// The PageAllocator log config.
//
boost::uuids::uuid log_device_uuid;
PackedUUID log_device_uuid;

// The page size (log2) of the page device managed by this allocator (for sanity checking).
//
Expand Down
Loading