Skip to content

Commit

Permalink
node: enforce RecSplit invariants (#1283)
Browse files Browse the repository at this point in the history
  • Loading branch information
canepat authored Jun 25, 2023
1 parent 78e065d commit 2ca63e8
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 7 deletions.
3 changes: 2 additions & 1 deletion silkworm/node/recsplit/encoding/elias_fano.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include <silkworm/core/common/assert.hpp>
#include <silkworm/core/common/base.hpp>
#include <silkworm/core/common/endian.hpp>
#include <silkworm/infra/common/ensure.hpp>
#include <silkworm/infra/common/log.hpp>
#include <silkworm/node/recsplit/encoding/sequence.hpp>
#include <silkworm/node/recsplit/support/common.hpp>
Expand Down Expand Up @@ -103,7 +104,7 @@ class EliasFanoList32 {
public:
//! Create an empty new 32-bit EF list prepared for specified sequence length and max offset
EliasFanoList32(uint64_t sequence_length, uint64_t max_offset) {
if (sequence_length == 0) throw std::logic_error{"sequence length is zero"};
ensure(sequence_length > 0, "sequence length is zero");
count_ = sequence_length - 1;
max_offset_ = max_offset;
u_ = max_offset + 1;
Expand Down
10 changes: 9 additions & 1 deletion silkworm/node/recsplit/rec_split.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@

#include <silkworm/core/common/assert.hpp>
#include <silkworm/core/common/endian.hpp>
#include <silkworm/infra/common/ensure.hpp>
#include <silkworm/infra/common/log.hpp>
#include <silkworm/infra/common/memory_mapped_file.hpp>
#include <silkworm/node/etl/collector.hpp>
Expand Down Expand Up @@ -543,7 +544,12 @@ class RecSplit {
* @return the associated value.
*/
std::size_t operator()(const hash128_t& hash) const {
if (!built_) throw std::logic_error{"perfect hash function not built yet"};
ensure(built_, "RecSplit: perfect hash function not built yet");
ensure(key_count_ > 0, "RecSplit: invalid lookup with zero keys, use empty() to guard");

if (key_count_ == 1) {
return 0;
}

const std::size_t bucket = hash128_to_bucket(hash);
uint64_t cum_keys, cum_keys_next, bit_pos;
Expand Down Expand Up @@ -610,6 +616,8 @@ class RecSplit {
const auto position = 1 + 8 + bytes_per_record_ * (record + 1);

const auto address = encoded_file_->address();
ensure(position + sizeof(uint64_t) < encoded_file_->length(),
"position: " + std::to_string(position) + " plus 8 exceeds file length");
return endian::load_big_u64(address + position) & record_mask_;
}

Expand Down
40 changes: 35 additions & 5 deletions silkworm/node/recsplit/rec_split_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,34 @@ namespace silkworm::succinct {
//! Make the MPHF predictable just for testing
constexpr int kTestSalt{1};

TEST_CASE("RecSplit8", "[silkworm][recsplit]") {
TEST_CASE("RecSplit8: key_count=0", "[silkworm][node][recsplit]") {
test::SetLogVerbosityGuard guard{log::Level::kNone};
test::TemporaryFile index_file;
RecSplitSettings settings{
.keys_count = 0,
.bucket_size = 10,
.index_path = index_file.path(),
.base_data_id = 0};
RecSplit8 rs{settings, /*.salt=*/kTestSalt};
CHECK_THROWS_AS(rs.build(), std::logic_error);
CHECK_THROWS_AS(rs("first_key"), std::logic_error);
}

TEST_CASE("RecSplit8: key_count=1", "[silkworm][node][recsplit]") {
test::SetLogVerbosityGuard guard{log::Level::kNone};
test::TemporaryFile index_file;
RecSplitSettings settings{
.keys_count = 1,
.bucket_size = 10,
.index_path = index_file.path(),
.base_data_id = 0};
RecSplit8 rs{settings, /*.salt=*/kTestSalt};
CHECK_NOTHROW(rs.add_key("first_key", 0));
CHECK_NOTHROW(rs.build());
CHECK_NOTHROW(rs("first_key"));
}

TEST_CASE("RecSplit8: key_count=2", "[silkworm][node][recsplit]") {
test::SetLogVerbosityGuard guard{log::Level::kNone};
test::TemporaryFile index_file;
RecSplitSettings settings{
Expand All @@ -45,8 +72,11 @@ TEST_CASE("RecSplit8", "[silkworm][recsplit]") {
SECTION("keys") {
CHECK_NOTHROW(rs.add_key("first_key", 0));
CHECK_THROWS_AS(rs.build(), std::logic_error);
CHECK_THROWS_AS(rs("first_key"), std::logic_error);
CHECK_NOTHROW(rs.add_key("second_key", 0));
CHECK(rs.build() == false /*collision_detected*/);
CHECK_NOTHROW(rs("first_key"));
CHECK_NOTHROW(rs("second_key"));
}

SECTION("duplicated keys") {
Expand Down Expand Up @@ -88,7 +118,7 @@ const std::size_t RecSplit4::kUpperAggregationBound = RecSplit4::SplitStrategy::
template <>
const std::array<uint32_t, kMaxBucketSize> RecSplit4::memo = RecSplit4::fill_golomb_rice();

TEST_CASE("RecSplit4: keys=1000 buckets=128", "[silkworm][recsplit]") {
TEST_CASE("RecSplit4: keys=1000 buckets=128", "[silkworm][node][recsplit]") {
test::SetLogVerbosityGuard guard{log::Level::kNone};
test::TemporaryFile index_file;

Expand Down Expand Up @@ -126,7 +156,7 @@ TEST_CASE("RecSplit4: keys=1000 buckets=128", "[silkworm][recsplit]") {
}
}

TEST_CASE("RecSplit4: multiple keys-buckets", "[silkworm][recsplit]") {
TEST_CASE("RecSplit4: multiple keys-buckets", "[silkworm][node][recsplit]") {
test::SetLogVerbosityGuard guard{log::Level::kNone};
test::TemporaryFile index_file;

Expand Down Expand Up @@ -173,7 +203,7 @@ TEST_CASE("RecSplit4: multiple keys-buckets", "[silkworm][recsplit]") {
}
}

TEST_CASE("RecSplit8: index lookup", "[silkworm][recsplit][.]") {
TEST_CASE("RecSplit8: index lookup", "[silkworm][node][recsplit][.]") {
test::SetLogVerbosityGuard guard{log::Level::kNone};
test::TemporaryFile index_file;
RecSplitSettings settings{
Expand All @@ -196,7 +226,7 @@ TEST_CASE("RecSplit8: index lookup", "[silkworm][recsplit][.]") {
}
}

TEST_CASE("RecSplit8: double index lookup", "[silkworm][recsplit][.]") {
TEST_CASE("RecSplit8: double index lookup", "[silkworm][node][recsplit][.]") {
test::SetLogVerbosityGuard guard{log::Level::kNone};
test::TemporaryFile index_file;
RecSplitSettings settings{
Expand Down

0 comments on commit 2ca63e8

Please sign in to comment.