From 20336c912b50e8b2f51a8116a8cdcc27dc818c32 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Mon, 29 Apr 2024 23:28:16 +0200 Subject: [PATCH] snapshots refactoring (#1970) --- cmd/capi/execute.cpp | 138 ++--- cmd/dev/snapshots.cpp | 154 ++--- silkworm/capi/silkworm.cpp | 44 +- silkworm/capi/silkworm_test.cpp | 63 ++- silkworm/db/access_layer.cpp | 38 +- silkworm/db/access_layer.hpp | 7 +- silkworm/db/etl/collector.cpp | 13 +- silkworm/db/snapshot_sync.cpp | 66 +-- silkworm/db/snapshot_sync.hpp | 2 - silkworm/db/snapshots/basic_queries.hpp | 92 +++ .../{txn_hash.hpp => body_queries.hpp} | 8 +- silkworm/db/snapshots/body_snapshot.cpp | 28 + silkworm/db/snapshots/body_snapshot.hpp | 44 ++ .../db/snapshots/body_txs_amount_query.cpp | 54 ++ .../db/snapshots/body_txs_amount_query.hpp | 40 ++ silkworm/db/snapshots/header_queries.hpp | 27 + silkworm/db/snapshots/header_snapshot.cpp | 42 ++ silkworm/db/snapshots/header_snapshot.hpp | 48 ++ silkworm/db/snapshots/index.cpp | 31 + silkworm/db/snapshots/index.hpp | 70 +++ silkworm/db/snapshots/index_builder.hpp | 4 +- silkworm/db/snapshots/index_builder_test.cpp | 4 +- silkworm/db/snapshots/path.cpp | 20 +- silkworm/db/snapshots/path.hpp | 20 +- silkworm/db/snapshots/rec_split/rec_split.hpp | 19 +- .../rec_split/rec_split_par_test.cpp | 2 +- .../rec_split/rec_split_seq_test.cpp | 2 +- silkworm/db/snapshots/repository.cpp | 428 +++++++------- silkworm/db/snapshots/repository.hpp | 187 +++--- silkworm/db/snapshots/repository_test.cpp | 146 +++-- silkworm/db/snapshots/seg/decompressor.cpp | 23 +- silkworm/db/snapshots/seg/decompressor.hpp | 16 +- silkworm/db/snapshots/snapshot.cpp | 535 ------------------ silkworm/db/snapshots/snapshot.hpp | 212 ------- silkworm/db/snapshots/snapshot_reader.cpp | 104 ++++ silkworm/db/snapshots/snapshot_reader.hpp | 207 +++++++ silkworm/db/snapshots/snapshot_test.cpp | 236 +++----- .../db/snapshots/snapshot_word_serializer.hpp | 34 ++ silkworm/db/snapshots/txn_index.cpp | 10 +- silkworm/db/snapshots/txn_queries.hpp | 51 ++ silkworm/db/snapshots/txn_snapshot.hpp | 29 + ...h.cpp => txn_snapshot_word_serializer.cpp} | 89 ++- .../txn_snapshot_word_serializer.hpp | 74 +++ .../db/snapshots/txs_and_bodies_query.hpp | 4 +- silkworm/sentry/common/random.hpp | 4 +- 45 files changed, 1879 insertions(+), 1590 deletions(-) create mode 100644 silkworm/db/snapshots/basic_queries.hpp rename silkworm/db/snapshots/{txn_hash.hpp => body_queries.hpp} (81%) create mode 100644 silkworm/db/snapshots/body_snapshot.cpp create mode 100644 silkworm/db/snapshots/body_snapshot.hpp create mode 100644 silkworm/db/snapshots/body_txs_amount_query.cpp create mode 100644 silkworm/db/snapshots/body_txs_amount_query.hpp create mode 100644 silkworm/db/snapshots/header_queries.hpp create mode 100644 silkworm/db/snapshots/header_snapshot.cpp create mode 100644 silkworm/db/snapshots/header_snapshot.hpp create mode 100644 silkworm/db/snapshots/index.cpp create mode 100644 silkworm/db/snapshots/index.hpp delete mode 100644 silkworm/db/snapshots/snapshot.cpp delete mode 100644 silkworm/db/snapshots/snapshot.hpp create mode 100644 silkworm/db/snapshots/snapshot_reader.cpp create mode 100644 silkworm/db/snapshots/snapshot_reader.hpp create mode 100644 silkworm/db/snapshots/snapshot_word_serializer.hpp create mode 100644 silkworm/db/snapshots/txn_queries.hpp create mode 100644 silkworm/db/snapshots/txn_snapshot.hpp rename silkworm/db/snapshots/{txn_hash.cpp => txn_snapshot_word_serializer.cpp} (50%) create mode 100644 silkworm/db/snapshots/txn_snapshot_word_serializer.hpp diff --git a/cmd/capi/execute.cpp b/cmd/capi/execute.cpp index 46640b29d4..db6ee2ce69 100644 --- a/cmd/capi/execute.cpp +++ b/cmd/capi/execute.cpp @@ -145,68 +145,69 @@ const char* make_path(const snapshots::SnapshotPath& p) { return path; } -std::vector collect_all_snapshots(const SnapshotRepository& snapshot_repository) { +std::vector collect_all_snapshots(SnapshotRepository& snapshot_repository) { std::vector headers_snapshot_sequence; std::vector bodies_snapshot_sequence; std::vector transactions_snapshot_sequence; - for (const auto& segment_file : snapshot_repository.get_segment_files()) { - switch (segment_file.type()) { - case SnapshotType::headers: { - const auto* header_snapshot{snapshot_repository.get_header_segment(segment_file)}; - const auto* idx_header_hash{header_snapshot->idx_header_hash()}; + snapshot_repository.view_bundles( + [&](const SnapshotBundle& bundle) { + { SilkwormHeadersSnapshot raw_headers_snapshot{ .segment{ - .file_path = make_path(segment_file), - .memory_address = header_snapshot->memory_file_region().data(), - .memory_length = header_snapshot->memory_file_region().size()}, + .file_path = make_path(bundle.header_snapshot.path()), + .memory_address = bundle.header_snapshot.memory_file_region().data(), + .memory_length = bundle.header_snapshot.memory_file_region().size(), + }, .header_hash_index{ - .file_path = make_path(segment_file.index_file()), - .memory_address = idx_header_hash->memory_file_region().data(), - .memory_length = idx_header_hash->memory_file_region().size()}}; + .file_path = make_path(bundle.idx_header_hash.path()), + .memory_address = bundle.idx_header_hash.memory_file_region().data(), + .memory_length = bundle.idx_header_hash.memory_file_region().size(), + }, + }; headers_snapshot_sequence.push_back(raw_headers_snapshot); - } break; - case SnapshotType::bodies: { - const auto* body_snapshot{snapshot_repository.get_body_segment(segment_file)}; - const auto* idx_body_number{body_snapshot->idx_body_number()}; + } + { SilkwormBodiesSnapshot raw_bodies_snapshot{ .segment{ - .file_path = make_path(segment_file), - .memory_address = body_snapshot->memory_file_region().data(), - .memory_length = body_snapshot->memory_file_region().size()}, + .file_path = make_path(bundle.body_snapshot.path()), + .memory_address = bundle.body_snapshot.memory_file_region().data(), + .memory_length = bundle.body_snapshot.memory_file_region().size(), + }, .block_num_index{ - .file_path = make_path(segment_file.index_file()), - .memory_address = idx_body_number->memory_file_region().data(), - .memory_length = idx_body_number->memory_file_region().size()}}; + .file_path = make_path(bundle.idx_body_number.path()), + .memory_address = bundle.idx_body_number.memory_file_region().data(), + .memory_length = bundle.idx_body_number.memory_file_region().size(), + }, + }; bodies_snapshot_sequence.push_back(raw_bodies_snapshot); - } break; - case SnapshotType::transactions: { - const auto* tx_snapshot{snapshot_repository.get_tx_segment(segment_file)}; - const auto* idx_txn_hash{tx_snapshot->idx_txn_hash()}; - const auto* idx_txn_hash_2_block{tx_snapshot->idx_txn_hash_2_block()}; + } + { SilkwormTransactionsSnapshot raw_transactions_snapshot{ .segment{ - .file_path = make_path(segment_file), - .memory_address = tx_snapshot->memory_file_region().data(), - .memory_length = tx_snapshot->memory_file_region().size()}, + .file_path = make_path(bundle.txn_snapshot.path()), + .memory_address = bundle.txn_snapshot.memory_file_region().data(), + .memory_length = bundle.txn_snapshot.memory_file_region().size(), + }, .tx_hash_index{ - .file_path = make_path(segment_file.index_file()), - .memory_address = idx_txn_hash->memory_file_region().data(), - .memory_length = idx_txn_hash->memory_file_region().size()}, + .file_path = make_path(bundle.idx_txn_hash.path()), + .memory_address = bundle.idx_txn_hash.memory_file_region().data(), + .memory_length = bundle.idx_txn_hash.memory_file_region().size(), + }, .tx_hash_2_block_index{ - .file_path = make_path(segment_file.index_file_for_type(SnapshotType::transactions_to_block)), - .memory_address = idx_txn_hash_2_block->memory_file_region().data(), - .memory_length = idx_txn_hash_2_block->memory_file_region().size()}}; + .file_path = make_path(bundle.idx_txn_hash_2_block.path()), + .memory_address = bundle.idx_txn_hash_2_block.memory_file_region().data(), + .memory_length = bundle.idx_txn_hash_2_block.memory_file_region().size(), + }, + }; transactions_snapshot_sequence.push_back(raw_transactions_snapshot); - } break; - default: - ensure(false, [&]() { return "unexpected snapshot type: " + std::string{magic_enum::enum_name(segment_file.type())}; }); - } - } + } + return true; + }); - ensure(headers_snapshot_sequence.size() == snapshot_repository.header_snapshots_count(), "invalid header snapshot count"); - ensure(bodies_snapshot_sequence.size() == snapshot_repository.body_snapshots_count(), "invalid body snapshot count"); - ensure(transactions_snapshot_sequence.size() == snapshot_repository.tx_snapshots_count(), "invalid tx snapshot count"); + ensure(headers_snapshot_sequence.size() == snapshot_repository.bundles_count(), "invalid header snapshot count"); + ensure(bodies_snapshot_sequence.size() == snapshot_repository.bundles_count(), "invalid body snapshot count"); + ensure(transactions_snapshot_sequence.size() == snapshot_repository.bundles_count(), "invalid tx snapshot count"); std::vector snapshot_sequence; snapshot_sequence.reserve(headers_snapshot_sequence.size()); @@ -281,7 +282,7 @@ int execute_with_external_txn(SilkwormHandle handle, ExecuteBlocksSettings setti return SILKWORM_OK; } -int execute_blocks(SilkwormHandle handle, ExecuteBlocksSettings settings, const SnapshotRepository& repository, const DataDirectory& data_dir) { +int execute_blocks(SilkwormHandle handle, ExecuteBlocksSettings settings, SnapshotRepository& repository, const DataDirectory& data_dir) { // Open chain database silkworm::db::EnvConfig config{ .path = data_dir.chaindata().path().string(), @@ -318,10 +319,11 @@ int execute_blocks(SilkwormHandle handle, ExecuteBlocksSettings settings, const } } -int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, const SnapshotRepository& repository, const DataDirectory& data_dir) { +int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, const DataDirectory& data_dir) { SILK_INFO << "Building indexes for snapshots: " << settings.snapshot_names; - std::vector snapshots; + std::vector snapshots; + std::vector snapshot_files; // Parse snapshot paths and create memory mapped files for (auto& snapshot_name : settings.snapshot_names) { auto raw_snapshot_path = data_dir.snapshots().path() / snapshot_name; @@ -329,37 +331,21 @@ int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, c if (!snapshot_path.has_value()) throw std::runtime_error("Invalid snapshot path"); - const Snapshot* snapshot{nullptr}; - switch (snapshot_path->type()) { - case headers: - snapshot = repository.get_header_segment(*snapshot_path); - break; - case bodies: - snapshot = repository.get_body_segment(*snapshot_path); - break; - case transactions: - case transactions_to_block: - snapshot = repository.get_tx_segment(*snapshot_path); - break; - default: - throw std::runtime_error("Invalid snapshot type"); - } - if (!snapshot) { - throw std::runtime_error("Snapshot not found in the repository:" + snapshot_name); - } + Snapshot& snapshot = snapshots.emplace_back(*snapshot_path); + snapshot.reopen_segment(); auto mmf = new SilkwormMemoryMappedFile{ - .file_path = make_path(snapshot->path()), - .memory_address = snapshot->memory_file_region().data(), - .memory_length = snapshot->memory_file_region().size(), + .file_path = make_path(*snapshot_path), + .memory_address = snapshot.memory_file_region().data(), + .memory_length = snapshot.memory_file_region().size(), }; - snapshots.push_back(mmf); + snapshot_files.push_back(mmf); } // Call api to build indexes const auto start_time{std::chrono::high_resolution_clock::now()}; - const int status_code = silkworm_build_recsplit_indexes(handle, snapshots.data(), snapshots.size()); + const int status_code = silkworm_build_recsplit_indexes(handle, snapshot_files.data(), snapshot_files.size()); if (status_code != SILKWORM_OK) return status_code; auto elapsed = std::chrono::high_resolution_clock::now() - start_time; @@ -367,9 +353,9 @@ int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, c << std::chrono::duration_cast(elapsed).count() << "ms"; // Free memory mapped files - for (auto snapshot : snapshots) { - delete[] snapshot->file_path; - delete snapshot; + for (auto mmf : snapshot_files) { + delete[] mmf->file_path; + delete mmf; } return SILKWORM_OK; @@ -446,16 +432,16 @@ int main(int argc, char* argv[]) { // Add snapshots to Silkworm API library SnapshotSettings snapshot_settings{}; snapshot_settings.repository_dir = data_dir.snapshots().path(); - SnapshotRepository repository{snapshot_settings}; - repository.reopen_folder(); int status_code = -1; if (settings.execute_blocks_settings) { // Execute specified block range using Silkworm API library + SnapshotRepository repository{snapshot_settings}; + repository.reopen_folder(); status_code = execute_blocks(handle, *settings.execute_blocks_settings, repository, data_dir); } else if (settings.build_indexes_settings) { // Build index for a specific snapshot using Silkworm API library - status_code = build_indexes(handle, *settings.build_indexes_settings, repository, data_dir); + status_code = build_indexes(handle, *settings.build_indexes_settings, data_dir); } else if (settings.rpcdaemon_settings) { // Start RPC Daemon using Silkworm API library status_code = start_rpcdaemon(handle, *settings.rpcdaemon_settings, data_dir); diff --git a/cmd/dev/snapshots.cpp b/cmd/dev/snapshots.cpp index 5eab017cdb..e5419e3311 100644 --- a/cmd/dev/snapshots.cpp +++ b/cmd/dev/snapshots.cpp @@ -34,12 +34,15 @@ #include #include #include +#include #include +#include #include #include #include -#include +#include #include +#include #include #include #include @@ -211,25 +214,14 @@ auto duration_as(const std::chrono::duration& elapsed) { void decode_segment(const SnapSettings& settings, int repetitions) { ensure(settings.snapshot_file_name.has_value(), "decode_segment: --snapshot_file must be specified"); - SILK_INFO << "Decode snapshot: " << *settings.snapshot_file_name; - std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto snap_file{SnapshotPath::parse(std::filesystem::path{*settings.snapshot_file_name})}; - if (snap_file) { - std::unique_ptr snapshot; - for (int i{0}; i < repetitions; ++i) { - switch (snap_file->type()) { - case SnapshotType::headers: { - snapshot = std::make_unique(*snap_file); - } break; - case SnapshotType::bodies: { - snapshot = std::make_unique(*snap_file); - } break; - default: { - snapshot = std::make_unique(*snap_file); - } - } - snapshot->reopen_segment(); - } + ensure(snap_file.has_value(), "decode_segment: invalid snapshot_file path format"); + + SILK_INFO << "Decode snapshot: " << snap_file->path(); + std::chrono::time_point start{std::chrono::steady_clock::now()}; + for (int i = 0; i < repetitions; i++) { + Snapshot snapshot{*snap_file}; + snapshot.reopen_segment(); } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; SILK_INFO << "Decode snapshot elapsed: " << duration_as(elapsed) << " msec"; @@ -242,12 +234,12 @@ void count_bodies(const SnapSettings& settings, int repetitions) { int num_bodies{0}; uint64_t num_txns{0}; for (int i{0}; i < repetitions; ++i) { - const bool success = snapshot_repo.for_each_body([&](BlockNum number, const BlockBodyForStorage* b) -> bool { + const bool success = snapshot_repo.for_each_body([&](BlockNum number, const BlockBodyForStorage& b) -> bool { // If *system transactions* should not be counted, skip first and last tx in block body - const auto base_txn_id{settings.skip_system_txs ? b->base_txn_id + 1 : b->base_txn_id}; - const auto txn_count{settings.skip_system_txs && b->txn_count >= 2 ? b->txn_count - 2 : b->txn_count}; + const auto base_txn_id{settings.skip_system_txs ? b.base_txn_id + 1 : b.base_txn_id}; + const auto txn_count{settings.skip_system_txs && b.txn_count >= 2 ? b.txn_count - 2 : b.txn_count}; SILK_DEBUG << "Body number: " << number << " base_txn_id: " << base_txn_id << " txn_count: " << txn_count - << " #ommers: " << b->ommers.size(); + << " #ommers: " << b.ommers.size(); num_bodies++; num_txns += txn_count; return true; @@ -265,10 +257,10 @@ void count_headers(const SnapSettings& settings, int repetitions) { std::chrono::time_point start{std::chrono::steady_clock::now()}; int count{0}; for (int i{0}; i < repetitions; ++i) { - const bool success = snapshot_repo.for_each_header([&count](const BlockHeader* h) -> bool { + const bool success = snapshot_repo.for_each_header([&count](const BlockHeader& h) -> bool { ++count; - if (h->number % 50'000 == 0) { - SILK_INFO << "Header number: " << h->number << " hash: " << to_hex(h->hash()); + if (h.number % 50'000 == 0) { + SILK_INFO << "Header number: " << h.number << " hash: " << to_hex(h.hash()); } return true; }); @@ -334,14 +326,14 @@ void open_index(const SnapSettings& settings) { const uint64_t data_id{*settings.lookup_number}; const uint64_t enumeration{data_id - idx.base_data_id()}; if (enumeration < idx.key_count()) { - SILK_INFO << "Offset by ordinal lookup for " << data_id << ": " << idx.ordinal_lookup(enumeration); + SILK_INFO << "Offset by ordinal lookup for " << data_id << ": " << idx.lookup_by_ordinal(enumeration); } else { SILK_WARN << "Invalid absolute data number " << data_id << " for ordinal lookup"; } } else { for (size_t i{0}; i < idx.key_count(); ++i) { if (i % (idx.key_count() / 10) == 0) { - SILK_INFO << "Offset by ordinal lookup for " << i << ": " << idx.ordinal_lookup(i) + SILK_INFO << "Offset by ordinal lookup for " << i << ": " << idx.lookup_by_ordinal(i) << " [existence filter: " << int(idx.existence_filter()[i]) << "]"; } } @@ -411,22 +403,22 @@ void lookup_header_by_hash(const SnapSettings& settings) { SILK_INFO << "Lookup header hash: " << hash->to_hex(); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const HeaderSnapshot* matching_snapshot{nullptr}; + std::optional matching_snapshot; std::optional matching_header; SnapshotRepository snapshot_repository{settings}; snapshot_repository.reopen_folder(); - snapshot_repository.view_header_segments([&](const HeaderSnapshot* snapshot) -> bool { - const auto header{snapshot->header_by_hash(*hash)}; + snapshot_repository.view_header_segments([&](SnapshotRepository::SnapshotAndIndex snapshot) -> bool { + const auto header = HeaderFindByHashQuery{snapshot.snapshot, snapshot.index}.exec(*hash); if (header) { matching_header = header; - matching_snapshot = snapshot; + matching_snapshot = snapshot.snapshot.path(); } return header.has_value(); }); if (matching_snapshot) { - SILK_INFO << "Lookup header hash: " << hash->to_hex() << " found in: " << matching_snapshot->path().filename(); + SILK_INFO << "Lookup header hash: " << hash->to_hex() << " found in: " << matching_snapshot->filename(); if (matching_header && settings.print) { - print_header(*matching_header, matching_snapshot->path().filename()); + print_header(*matching_header, matching_snapshot->filename()); } } else { SILK_WARN << "Lookup header hash: " << hash->to_hex() << " NOT found"; @@ -445,12 +437,12 @@ void lookup_header_by_number(const SnapSettings& settings) { snapshot_repository.reopen_folder(); const auto header_snapshot{snapshot_repository.find_header_segment(block_number)}; if (header_snapshot) { - const auto header{header_snapshot->header_by_number(block_number)}; + const auto header = HeaderFindByBlockNumQuery{header_snapshot->snapshot, header_snapshot->index}.exec(block_number); ensure(header.has_value(), - [&]() { return "lookup_header_by_number: " + std::to_string(block_number) + " NOT found in " + header_snapshot->path().filename(); }); - SILK_INFO << "Lookup header number: " << block_number << " found in: " << header_snapshot->path().filename(); + [&]() { return "lookup_header_by_number: " + std::to_string(block_number) + " NOT found in " + header_snapshot->snapshot.path().filename(); }); + SILK_INFO << "Lookup header number: " << block_number << " found in: " << header_snapshot->snapshot.path().filename(); if (settings.print) { - print_header(*header, header_snapshot->path().filename()); + print_header(*header, header_snapshot->snapshot.path().filename()); } } else { SILK_WARN << "Lookup header number: " << block_number << " NOT found"; @@ -469,7 +461,7 @@ void lookup_header(const SnapSettings& settings) { } } -static void print_body(const StoredBlockBody& body, const std::string& snapshot_filename) { +static void print_body(const BlockBodyForStorage& body, const std::string& snapshot_filename) { std::cout << "Body found in: " << snapshot_filename << "\n" << "base_txn_id=" << body.base_txn_id << "\n" << "txn_count=" << body.txn_count << "\n" @@ -479,20 +471,22 @@ static void print_body(const StoredBlockBody& body, const std::string& snapshot_ void lookup_body_in_one(const SnapSettings& settings, BlockNum block_number, const std::string& file_name) { const auto snapshot_path = SnapshotPath::parse(settings.repository_dir / file_name); ensure(snapshot_path.has_value(), "lookup_body: --snapshot_file is invalid snapshot file"); - SnapshotRepository snapshot_repository{settings}; - snapshot_repository.reopen_file(*snapshot_path); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const auto body_snapshot{snapshot_repository.get_body_segment(*snapshot_path)}; - ensure(body_snapshot, [&]() { return "lookup_body: body segment not found for snapshot file: " + snapshot_path->path().string(); }); - const auto body{body_snapshot->body_by_number(block_number)}; + Snapshot body_snapshot{*snapshot_path}; + body_snapshot.reopen_segment(); + + Index idx_body_number{snapshot_path->index_file()}; + idx_body_number.reopen_index(); + + const auto body = BodyFindByBlockNumQuery{body_snapshot, idx_body_number}.exec(block_number); if (body) { - SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot->path().filename(); + SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot.path().filename(); if (settings.print) { - print_body(*body, body_snapshot->path().filename()); + print_body(*body, body_snapshot.path().filename()); } } else { - SILK_WARN << "Lookup body number: " << block_number << " NOT found in: " << body_snapshot->path().filename(); + SILK_WARN << "Lookup body number: " << block_number << " NOT found in: " << body_snapshot.path().filename(); } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; SILK_INFO << "Lookup body elapsed: " << duration_as(elapsed) << " usec"; @@ -505,12 +499,12 @@ void lookup_body_in_all(const SnapSettings& settings, BlockNum block_number) { std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto body_snapshot{snapshot_repository.find_body_segment(block_number)}; if (body_snapshot) { - const auto body{body_snapshot->body_by_number(block_number)}; + const auto body = BodyFindByBlockNumQuery{body_snapshot->snapshot, body_snapshot->index}.exec(block_number); ensure(body.has_value(), - [&]() { return "lookup_body: " + std::to_string(block_number) + " NOT found in " + body_snapshot->path().filename(); }); - SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot->path().filename(); + [&]() { return "lookup_body: " + std::to_string(block_number) + " NOT found in " + body_snapshot->snapshot.path().filename(); }); + SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot->snapshot.path().filename(); if (settings.print) { - print_body(*body, body_snapshot->path().filename()); + print_body(*body, body_snapshot->snapshot.path().filename()); } } else { SILK_WARN << "Lookup body number: " << block_number << " NOT found"; @@ -582,20 +576,23 @@ static void print_txn(const Transaction& txn, const std::string& snapshot_filena void lookup_txn_by_hash_in_one(const SnapSettings& settings, const Hash& hash, const std::string& file_name) { const auto snapshot_path = SnapshotPath::parse(settings.repository_dir / file_name); ensure(snapshot_path.has_value(), "lookup_tx_by_hash_in_one: --snapshot_file is invalid snapshot file"); - SnapshotRepository snapshot_repository{settings}; - snapshot_repository.reopen_file(*snapshot_path); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const auto tx_snapshot{snapshot_repository.get_tx_segment(*snapshot_path)}; - if (tx_snapshot) { - const auto transaction{tx_snapshot->txn_by_hash(hash)}; + Snapshot tx_snapshot{*snapshot_path}; + tx_snapshot.reopen_segment(); + + { + Index idx_txn_hash{snapshot_path->index_file()}; + idx_txn_hash.reopen_index(); + + const auto transaction = TransactionFindByHashQuery{tx_snapshot, idx_txn_hash}.exec(hash); if (transaction) { - SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << tx_snapshot->path().filename(); + SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << tx_snapshot.path().filename(); if (settings.print) { - print_txn(*transaction, tx_snapshot->path().filename()); + print_txn(*transaction, tx_snapshot.path().filename()); } } else { - SILK_WARN << "Lookup txn hash: " << hash.to_hex() << " NOT found in: " << tx_snapshot->path().filename(); + SILK_WARN << "Lookup txn hash: " << hash.to_hex() << " NOT found in: " << tx_snapshot.path().filename(); } } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; @@ -606,14 +603,14 @@ void lookup_txn_by_hash_in_all(const SnapSettings& settings, const Hash& hash) { SnapshotRepository snapshot_repository{settings}; snapshot_repository.reopen_folder(); - const TransactionSnapshot* matching_snapshot{nullptr}; + std::optional matching_snapshot; std::chrono::time_point start{std::chrono::steady_clock::now()}; - snapshot_repository.view_tx_segments([&](const TransactionSnapshot* snapshot) -> bool { - const auto transaction{snapshot->txn_by_hash(hash)}; + snapshot_repository.view_tx_segments([&](SnapshotRepository::SnapshotAndIndex snapshot) -> bool { + const auto transaction = TransactionFindByHashQuery{snapshot.snapshot, snapshot.index}.exec(hash); if (transaction) { - matching_snapshot = snapshot; + matching_snapshot = snapshot.snapshot.path(); if (settings.print) { - print_txn(*transaction, snapshot->path().filename()); + print_txn(*transaction, matching_snapshot->path().filename()); } } return transaction.has_value(); @@ -642,20 +639,23 @@ void lookup_txn_by_hash(const SnapSettings& settings, const std::string& lookup_ void lookup_txn_by_id_in_one(const SnapSettings& settings, uint64_t txn_id, const std::string& file_name) { const auto snapshot_path = SnapshotPath::parse(settings.repository_dir / file_name); ensure(snapshot_path.has_value(), "lookup_txn_by_id_in_one: --snapshot_file is invalid snapshot file"); - SnapshotRepository snapshot_repository{settings}; - snapshot_repository.reopen_file(*snapshot_path); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const auto tx_snapshot{snapshot_repository.get_tx_segment(*snapshot_path)}; - if (tx_snapshot) { - const auto transaction{tx_snapshot->txn_by_id(txn_id)}; + Snapshot tx_snapshot{*snapshot_path}; + tx_snapshot.reopen_segment(); + + { + Index idx_txn_hash{snapshot_path->index_file()}; + idx_txn_hash.reopen_index(); + + const auto transaction = TransactionFindByIdQuery{tx_snapshot, idx_txn_hash}.exec(txn_id); if (transaction) { - SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << tx_snapshot->path().filename(); + SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << tx_snapshot.path().filename(); if (settings.print) { - print_txn(*transaction, tx_snapshot->path().filename()); + print_txn(*transaction, tx_snapshot.path().filename()); } } else { - SILK_WARN << "Lookup txn ID: " << txn_id << " NOT found in: " << tx_snapshot->path().filename(); + SILK_WARN << "Lookup txn ID: " << txn_id << " NOT found in: " << tx_snapshot.path().filename(); } } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; @@ -666,14 +666,14 @@ void lookup_txn_by_id_in_all(const SnapSettings& settings, uint64_t txn_id) { SnapshotRepository snapshot_repository{settings}; snapshot_repository.reopen_folder(); - const TransactionSnapshot* matching_snapshot{nullptr}; + std::optional matching_snapshot; std::chrono::time_point start{std::chrono::steady_clock::now()}; - snapshot_repository.view_tx_segments([&](const TransactionSnapshot* snapshot) -> bool { - const auto transaction{snapshot->txn_by_id(txn_id)}; + snapshot_repository.view_tx_segments([&](SnapshotRepository::SnapshotAndIndex snapshot) -> bool { + const auto transaction = TransactionFindByIdQuery{snapshot.snapshot, snapshot.index}.exec(txn_id); if (transaction) { - matching_snapshot = snapshot; + matching_snapshot = snapshot.snapshot.path(); if (settings.print) { - print_txn(*transaction, snapshot->path().filename()); + print_txn(*transaction, matching_snapshot->path().filename()); } } return transaction.has_value(); diff --git a/silkworm/capi/silkworm.cpp b/silkworm/capi/silkworm.cpp index 677cbd9ba2..170b28bbda 100644 --- a/silkworm/capi/silkworm.cpp +++ b/silkworm/capi/silkworm.cpp @@ -36,7 +36,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -329,12 +331,8 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!headers_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::MappedHeadersSnapshot mapped_h_snapshot{ - .segment = make_region(hs.segment), - .header_hash_index = make_region(hs.header_hash_index)}; - auto headers_snapshot = std::make_unique(*headers_segment_path, mapped_h_snapshot); - headers_snapshot->reopen_segment(); - headers_snapshot->reopen_index(); + snapshots::Snapshot header_snapshot{*headers_segment_path, make_region(hs.segment)}; + snapshots::Index idx_header_hash{headers_segment_path->index_file(), make_region(hs.header_hash_index)}; const SilkwormBodiesSnapshot& bs = snapshot->bodies; if (!bs.segment.file_path || !bs.block_num_index.file_path) { @@ -344,12 +342,8 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!bodies_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::MappedBodiesSnapshot mapped_b_snapshot{ - .segment = make_region(bs.segment), - .block_num_index = make_region(bs.block_num_index)}; - auto bodies_snapshot = std::make_unique(*bodies_segment_path, mapped_b_snapshot); - bodies_snapshot->reopen_segment(); - bodies_snapshot->reopen_index(); + snapshots::Snapshot body_snapshot{*bodies_segment_path, make_region(bs.segment)}; + snapshots::Index idx_body_number{bodies_segment_path->index_file(), make_region(bs.block_num_index)}; const SilkwormTransactionsSnapshot& ts = snapshot->transactions; if (!ts.segment.file_path || !ts.tx_hash_index.file_path || !ts.tx_hash_2_block_index.file_path) { @@ -359,21 +353,21 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!transactions_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::MappedTransactionsSnapshot mapped_t_snapshot{ - .segment = make_region(ts.segment), - .tx_hash_index = make_region(ts.tx_hash_index), - .tx_hash_2_block_index = make_region(ts.tx_hash_2_block_index)}; - auto transactions_snapshot = std::make_unique(*transactions_segment_path, mapped_t_snapshot); - transactions_snapshot->reopen_segment(); - transactions_snapshot->reopen_index(); + snapshots::Snapshot txn_snapshot{*transactions_segment_path, make_region(ts.segment)}; + snapshots::Index idx_txn_hash{transactions_segment_path->index_file_for_type(snapshots::SnapshotType::transactions), make_region(ts.tx_hash_index)}; + snapshots::Index idx_txn_hash_2_block{transactions_segment_path->index_file_for_type(snapshots::SnapshotType::transactions_to_block), make_region(ts.tx_hash_2_block_index)}; snapshots::SnapshotBundle bundle{ - .headers_snapshot_path = *headers_segment_path, - .headers_snapshot = std::move(headers_snapshot), - .bodies_snapshot_path = *bodies_segment_path, - .bodies_snapshot = std::move(bodies_snapshot), - .tx_snapshot_path = *transactions_segment_path, - .tx_snapshot = std::move(transactions_snapshot)}; + .header_snapshot = std::move(header_snapshot), + .idx_header_hash = std::move(idx_header_hash), + + .body_snapshot = std::move(body_snapshot), + .idx_body_number = std::move(idx_body_number), + + .txn_snapshot = std::move(txn_snapshot), + .idx_txn_hash = std::move(idx_txn_hash), + .idx_txn_hash_2_block = std::move(idx_txn_hash_2_block), + }; handle->snapshot_repository->add_snapshot_bundle(std::move(bundle)); return SILKWORM_OK; } diff --git a/silkworm/capi/silkworm_test.cpp b/silkworm/capi/silkworm_test.cpp index 5567ddd29e..4a1b3a0c7e 100644 --- a/silkworm/capi/silkworm_test.cpp +++ b/silkworm/capi/silkworm_test.cpp @@ -25,8 +25,9 @@ #include #include #include +#include #include -#include +#include #include #include #include @@ -643,34 +644,38 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { snapshot_test::SampleTransactionSnapshotFile valid_tx_snapshot{tmp_dir.path()}; snapshot_test::SampleTransactionSnapshotPath tx_snapshot_path{valid_tx_snapshot.path()}; - auto header_index = snapshots::HeaderIndex::make(header_snapshot_path); - REQUIRE_NOTHROW(header_index.build()); - snapshots::HeaderSnapshot header_snapshot{header_snapshot_path}; + auto header_index_builder = snapshots::HeaderIndex::make(header_snapshot_path); + REQUIRE_NOTHROW(header_index_builder.build()); + snapshots::Snapshot header_snapshot{header_snapshot_path}; header_snapshot.reopen_segment(); - header_snapshot.reopen_index(); + snapshots::Index idx_header_hash{header_snapshot_path.index_file()}; + idx_header_hash.reopen_index(); - auto body_index = snapshots::BodyIndex::make(body_snapshot_path); - REQUIRE_NOTHROW(body_index.build()); - snapshots::BodySnapshot body_snapshot{body_snapshot_path}; + auto body_index_builder = snapshots::BodyIndex::make(body_snapshot_path); + REQUIRE_NOTHROW(body_index_builder.build()); + snapshots::Snapshot body_snapshot{body_snapshot_path}; body_snapshot.reopen_segment(); - body_snapshot.reopen_index(); - - auto tx_index = snapshots::TransactionIndex::make(body_snapshot_path, tx_snapshot_path); - tx_index.build(); - auto tx_index_hash_to_block = snapshots::TransactionToBlockIndex::make(body_snapshot_path, tx_snapshot_path); - tx_index_hash_to_block.build(); - snapshots::TransactionSnapshot tx_snapshot{tx_snapshot_path}; + snapshots::Index idx_body_number{body_snapshot_path.index_file()}; + idx_body_number.reopen_index(); + + auto tx_index_builder = snapshots::TransactionIndex::make(body_snapshot_path, tx_snapshot_path); + tx_index_builder.build(); + auto tx_index_hash_to_block_builder = snapshots::TransactionToBlockIndex::make(body_snapshot_path, tx_snapshot_path); + tx_index_hash_to_block_builder.build(); + snapshots::Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); + snapshots::Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); + snapshots::Index idx_txn_hash_2_block{tx_index_hash_to_block_builder.path()}; + idx_txn_hash_2_block.reopen_index(); const auto header_snapshot_path_string{header_snapshot_path.path().string()}; - const auto header_index_path_string{header_index.path().path().string()}; + const auto header_index_path_string{idx_header_hash.path().path().string()}; const auto body_snapshot_path_string{body_snapshot_path.path().string()}; - const auto body_index_path_string{body_index.path().path().string()}; + const auto body_index_path_string{idx_body_number.path().path().string()}; const auto tx_snapshot_path_string{tx_snapshot_path.path().string()}; - const auto tx_hash_index_path_string{tx_snapshot_path.index_file().path().string()}; - const auto tx_hash2block_index_path_string{ - tx_snapshot_path.index_file_for_type(snapshots::SnapshotType::transactions_to_block).path().string()}; + const auto tx_hash_index_path_string{idx_txn_hash.path().path().string()}; + const auto tx_hash2block_index_path_string{idx_txn_hash_2_block.path().path().string()}; // Prepare templates for valid header/body/transaction C data structures SilkwormHeadersSnapshot valid_shs{ @@ -681,8 +686,8 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { }, .header_hash_index = SilkwormMemoryMappedFile{ .file_path = header_index_path_string.c_str(), - .memory_address = header_snapshot.idx_header_hash()->memory_file_region().data(), - .memory_length = header_snapshot.idx_header_hash()->memory_file_region().size(), + .memory_address = idx_header_hash.memory_file_region().data(), + .memory_length = idx_header_hash.memory_file_region().size(), }, }; SilkwormBodiesSnapshot valid_sbs{ @@ -693,8 +698,8 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { }, .block_num_index = SilkwormMemoryMappedFile{ .file_path = body_index_path_string.c_str(), - .memory_address = body_snapshot.idx_body_number()->memory_file_region().data(), - .memory_length = body_snapshot.idx_body_number()->memory_file_region().size(), + .memory_address = idx_body_number.memory_file_region().data(), + .memory_length = idx_body_number.memory_file_region().size(), }, }; SilkwormTransactionsSnapshot valid_sts{ @@ -705,13 +710,13 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { }, .tx_hash_index = SilkwormMemoryMappedFile{ .file_path = tx_hash_index_path_string.c_str(), - .memory_address = tx_snapshot.idx_txn_hash()->memory_file_region().data(), - .memory_length = tx_snapshot.idx_txn_hash()->memory_file_region().size(), + .memory_address = idx_txn_hash.memory_file_region().data(), + .memory_length = idx_txn_hash.memory_file_region().size(), }, .tx_hash_2_block_index = SilkwormMemoryMappedFile{ .file_path = tx_hash2block_index_path_string.c_str(), - .memory_address = tx_snapshot.idx_txn_hash_2_block()->memory_file_region().data(), - .memory_length = tx_snapshot.idx_txn_hash_2_block()->memory_file_region().size(), + .memory_address = idx_txn_hash_2_block.memory_file_region().data(), + .memory_length = idx_txn_hash_2_block.memory_file_region().size(), }, }; diff --git a/silkworm/db/access_layer.cpp b/silkworm/db/access_layer.cpp index 4918776ab0..0b36069c8b 100644 --- a/silkworm/db/access_layer.cpp +++ b/silkworm/db/access_layer.cpp @@ -26,13 +26,18 @@ #include #include #include +#include +#include #include +#include #include #include #include namespace silkworm::db { +using namespace snapshots; + std::optional read_schema_version(ROTxn& txn) { auto cursor = txn.ro_cursor(db::table::kDatabaseInfo); if (!cursor->seek(mdbx::slice{kDbSchemaVersionKey})) { @@ -1077,7 +1082,7 @@ bool DataModel::read_body(BlockNum height, HashAsArray hash, bool read_senders, const bool found = db::read_body(txn_, height, hash, read_senders, body); if (found) return found; - return read_body_from_snapshot(height, read_senders, body); + return read_body_from_snapshot(height, body); } bool DataModel::read_body(const Hash& hash, BlockNum height, BlockBody& body) const { @@ -1137,14 +1142,14 @@ bool DataModel::read_block(HashAsSpan hash, BlockNum number, bool read_senders, const bool found = db::read_block(txn_, hash, number, read_senders, block); if (found) return found; - return read_block_from_snapshot(number, read_senders, block); + return read_block_from_snapshot(number, block); } bool DataModel::read_block(const evmc::bytes32& hash, BlockNum number, Block& block) const { const bool found = db::read_block(txn_, hash, number, block); if (found) return found; - return read_block_from_snapshot(number, /*read_senders=*/true, block); + return read_block_from_snapshot(number, block); } void DataModel::for_last_n_headers(size_t n, absl::FunctionRef callback) const { @@ -1198,7 +1203,7 @@ bool DataModel::read_block(BlockNum number, bool read_senders, Block& block) con return read_block(hash->bytes, number, read_senders, block); } -bool DataModel::read_block_from_snapshot(BlockNum height, bool read_senders, Block& block) { +bool DataModel::read_block_from_snapshot(BlockNum height, Block& block) { if (!repository_) { return false; } @@ -1208,7 +1213,7 @@ bool DataModel::read_block_from_snapshot(BlockNum height, bool read_senders, Blo block.header = std::move(*block_header); - return read_body_from_snapshot(height, read_senders, block); + return read_body_from_snapshot(height, block); } std::optional DataModel::read_header_from_snapshot(BlockNum height) { @@ -1220,7 +1225,7 @@ std::optional DataModel::read_header_from_snapshot(BlockNum height) // We know the header snapshot in advance: find it based on target block number const auto header_snapshot = repository_->find_header_segment(height); if (header_snapshot) { - block_header = header_snapshot->header_by_number(height); + block_header = HeaderFindByBlockNumQuery{header_snapshot->snapshot, header_snapshot->index}.exec(height); } return block_header; } @@ -1232,14 +1237,14 @@ std::optional DataModel::read_header_from_snapshot(const Hash& hash std::optional block_header; // We don't know the header snapshot in advance: search for block hash in each header snapshot in reverse order - repository_->view_header_segments([&](const snapshots::HeaderSnapshot* snapshot) -> bool { - block_header = snapshot->header_by_hash(hash); + repository_->view_header_segments([&](snapshots::SnapshotRepository::SnapshotAndIndex snapshot) -> bool { + block_header = HeaderFindByHashQuery{snapshot.snapshot, snapshot.index}.exec(hash); return block_header.has_value(); }); return block_header; } -bool DataModel::read_body_from_snapshot(BlockNum height, bool read_senders, BlockBody& body) { +bool DataModel::read_body_from_snapshot(BlockNum height, BlockBody& body) { if (!repository_) { return false; } @@ -1248,7 +1253,7 @@ bool DataModel::read_body_from_snapshot(BlockNum height, bool read_senders, Bloc const auto body_snapshot = repository_->find_body_segment(height); if (!body_snapshot) return false; - auto stored_body = body_snapshot->body_by_number(height); + auto stored_body = BodyFindByBlockNumQuery{body_snapshot->snapshot, body_snapshot->index}.exec(height); if (!stored_body) return false; // Skip first and last *system transactions* in block body @@ -1256,7 +1261,7 @@ bool DataModel::read_body_from_snapshot(BlockNum height, bool read_senders, Bloc const auto txn_count{stored_body->txn_count >= 2 ? stored_body->txn_count - 2 : stored_body->txn_count}; std::vector transactions; - const auto read_ok{read_transactions_from_snapshot(height, base_txn_id, txn_count, read_senders, transactions)}; + const auto read_ok{read_transactions_from_snapshot(height, base_txn_id, txn_count, transactions)}; if (!read_ok) return false; body.transactions = std::move(transactions); @@ -1273,15 +1278,14 @@ bool DataModel::is_body_in_snapshot(BlockNum height) { // We know the body snapshot in advance: find it based on target block number const auto body_snapshot = repository_->find_body_segment(height); if (body_snapshot) { - const auto stored_body = body_snapshot->body_by_number(height); + const auto stored_body = BodyFindByBlockNumQuery{body_snapshot->snapshot, body_snapshot->index}.exec(height); return stored_body.has_value(); } return false; } -bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, - bool read_senders, std::vector& txs) { +bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, std::vector& txs) { txs.reserve(txn_count); if (txn_count == 0) { return true; @@ -1290,7 +1294,7 @@ bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_t const auto tx_snapshot = repository_->find_tx_segment(height); if (!tx_snapshot) return false; - txs = tx_snapshot->txn_range(base_txn_id, txn_count, read_senders); + txs = TransactionRangeFromIdQuery{tx_snapshot->snapshot, tx_snapshot->index}.exec_into_vector(base_txn_id, txn_count); return true; } @@ -1298,7 +1302,7 @@ bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_t bool DataModel::read_rlp_transactions_from_snapshot(BlockNum height, std::vector& rlp_txs) { const auto body_snapshot = repository_->find_body_segment(height); if (body_snapshot) { - auto stored_body = body_snapshot->body_by_number(height); + auto stored_body = BodyFindByBlockNumQuery{body_snapshot->snapshot, body_snapshot->index}.exec(height); if (!stored_body) return false; // Skip first and last *system transactions* in block body @@ -1310,7 +1314,7 @@ bool DataModel::read_rlp_transactions_from_snapshot(BlockNum height, std::vector const auto tx_snapshot = repository_->find_tx_segment(height); if (!tx_snapshot) return false; - rlp_txs = tx_snapshot->txn_rlp_range(base_txn_id, txn_count); + rlp_txs = TransactionPayloadRlpRangeFromIdQuery{tx_snapshot->snapshot, tx_snapshot->index}.exec_into_vector(base_txn_id, txn_count); return true; } diff --git a/silkworm/db/access_layer.hpp b/silkworm/db/access_layer.hpp index ee2dacefd4..2d7f8f9c67 100644 --- a/silkworm/db/access_layer.hpp +++ b/silkworm/db/access_layer.hpp @@ -331,14 +331,13 @@ class DataModel { void for_last_n_headers(size_t n, absl::FunctionRef callback) const; private: - static bool read_block_from_snapshot(BlockNum height, bool read_senders, Block& block); + static bool read_block_from_snapshot(BlockNum height, Block& block); static std::optional read_header_from_snapshot(BlockNum height); static std::optional read_header_from_snapshot(const Hash& hash); - static bool read_body_from_snapshot(BlockNum height, bool read_senders, BlockBody& body); + static bool read_body_from_snapshot(BlockNum height, BlockBody& body); static bool is_body_in_snapshot(BlockNum height); static bool read_rlp_transactions_from_snapshot(BlockNum height, std::vector& rlp_txs); - static bool read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, - bool read_senders, std::vector& txs); + static bool read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, std::vector& txs); [[nodiscard]] std::optional read_tx_lookup_from_db(const evmc::bytes32& tx_hash) const; [[nodiscard]] static std::optional read_tx_lookup_from_snapshot(const evmc::bytes32& tx_hash); diff --git a/silkworm/db/etl/collector.cpp b/silkworm/db/etl/collector.cpp index e614d3758d..895d167432 100644 --- a/silkworm/db/etl/collector.cpp +++ b/silkworm/db/etl/collector.cpp @@ -50,9 +50,16 @@ void Collector::flush_buffer() { file_providers_.back()->flush(buffer_); buffer_.clear(); const auto [_, duration]{sw.stop()}; - log::Info("ETL collector flushed file", {"path", std::string(file_providers_.back()->get_file_name()), - "size", human_size(file_providers_.back()->get_file_size()), - "in", StopWatch::format(duration)}); + log::Debug( + "ETL collector flushed file", + { + "path", + std::string(file_providers_.back()->get_file_name()), + "size", + human_size(file_providers_.back()->get_file_size()), + "in", + StopWatch::format(duration), + }); } } diff --git a/silkworm/db/snapshot_sync.cpp b/silkworm/db/snapshot_sync.cpp index 9455a21e45..69502426c2 100644 --- a/silkworm/db/snapshot_sync.cpp +++ b/silkworm/db/snapshot_sync.cpp @@ -56,29 +56,24 @@ bool SnapshotSync::download_and_index_snapshots(db::RWTxn& txn) { SILK_INFO << "SnapshotSync: snapshot sync disabled, no snapshot must be downloaded"; return true; } - SILK_INFO << "SnapshotSync: snapshot repository: " << settings_.repository_dir.string(); - if (settings_.no_downloader) { - reopen(); - return true; - } - const auto snapshot_file_names = db::read_snapshots(txn); + if (!settings_.no_downloader) { + const bool download_completed = download_snapshots(snapshot_file_names); + if (!download_completed) return false; - const bool download_completed = download_snapshots(snapshot_file_names); - if (!download_completed) return false; - - db::write_snapshots(txn, snapshot_file_names); + db::write_snapshots(txn, snapshot_file_names); + SILK_INFO << "SnapshotSync: file names saved into db count=" << std::to_string(snapshot_file_names.size()); + } - SILK_INFO << "SnapshotSync: file names saved into db count=" << std::to_string(snapshot_file_names.size()); + repository_->remove_stale_indexes(); + build_missing_indexes(); - index_snapshots(); + repository_->reopen_folder(); const auto max_block_available = repository_->max_block_available(); - SILK_INFO << "SnapshotSync: max block available: " << max_block_available - << " (segment max block: " << repository_->segment_max_block() - << ", idx max block: " << repository_->idx_max_block() << ")"; + SILK_INFO << "SnapshotSync: max block available: " << max_block_available; const auto snapshot_config = Config::lookup_known_config(config_.chain_id, snapshot_file_names); const auto configured_max_block_number = snapshot_config.max_block_number(); @@ -90,12 +85,6 @@ bool SnapshotSync::download_and_index_snapshots(db::RWTxn& txn) { return true; } -void SnapshotSync::reopen() { - repository_->reopen_folder(); - SILK_INFO << "SnapshotSync: reopen completed segment_max_block=" << std::to_string(repository_->segment_max_block()) - << " idx_max_block=" << std::to_string(repository_->idx_max_block()); -} - bool SnapshotSync::download_snapshots(const std::vector& snapshot_file_names) { const auto missing_block_ranges = repository_->missing_block_ranges(); if (!missing_block_ranges.empty()) { @@ -183,24 +172,9 @@ bool SnapshotSync::download_snapshots(const std::vector& snapshot_f completed_connection.disconnect(); stats_connection.disconnect(); - reopen(); return true; } -void SnapshotSync::index_snapshots() { - if (!settings_.enabled) { - SILK_INFO << "SnapshotSync: snapshot sync disabled, no index must be created"; - return; - } - - // Build any missing snapshot index if needed, then reopen - if (repository_->idx_max_block() < repository_->segment_max_block()) { - SILK_INFO << "SnapshotSync: missing indexes detected, rebuild started"; - build_missing_indexes(); - reopen(); - } -} - bool SnapshotSync::stop() { const bool result = Stoppable::stop(); client_.stop(); @@ -215,6 +189,12 @@ void SnapshotSync::build_missing_indexes() { // Determine the missing indexes and build them in parallel const auto missing_indexes = repository_->missing_indexes(); + if (missing_indexes.empty()) { + return; + } + + SILK_INFO << "SnapshotSync: missing indexes detected, rebuild started"; + for (const auto& index : missing_indexes) { workers.push_task([=]() { try { @@ -257,15 +237,15 @@ void SnapshotSync::update_block_headers(db::RWTxn& txn, BlockNum max_block_avail db::etl_mdbx::Collector hash2bn_collector{}; intx::uint256 total_difficulty{0}; uint64_t block_count{0}; - repository_->for_each_header([&](const BlockHeader* header) -> bool { - SILK_TRACE << "SnapshotSync: header number=" << header->number << " hash=" << Hash{header->hash()}.to_hex(); - const auto block_number = header->number; + repository_->for_each_header([&](const BlockHeader& header) -> bool { + SILK_TRACE << "SnapshotSync: header number=" << header.number << " hash=" << Hash{header.hash()}.to_hex(); + const auto block_number = header.number; if (block_number > max_block_available) return true; - const auto block_hash = header->hash(); + const auto block_hash = header.hash(); // Write block header into kDifficulty table - total_difficulty += header->difficulty; + total_difficulty += header.difficulty; db::write_total_difficulty(txn, block_number, block_hash, total_difficulty); // Write block header into kCanonicalHashes table @@ -311,8 +291,8 @@ void SnapshotSync::update_block_bodies(db::RWTxn& txn, BlockNum max_block_availa // Reset sequence for kBlockTransactions table const auto tx_snapshot = repository_->find_tx_segment(max_block_available); - ensure(tx_snapshot, "SnapshotSync: snapshots max block not found in any snapshot"); - const auto last_tx_id = tx_snapshot->idx_txn_hash()->base_data_id() + tx_snapshot->item_count(); + ensure(tx_snapshot.has_value(), "SnapshotSync: snapshots max block not found in any snapshot"); + const auto last_tx_id = tx_snapshot->index.base_data_id() + tx_snapshot->snapshot.item_count(); db::reset_map_sequence(txn, db::table::kBlockTransactions.name, last_tx_id + 1); SILK_INFO << "SnapshotSync: database table BlockTransactions sequence reset"; diff --git a/silkworm/db/snapshot_sync.hpp b/silkworm/db/snapshot_sync.hpp index edc4324a6b..845cd12e8c 100644 --- a/silkworm/db/snapshot_sync.hpp +++ b/silkworm/db/snapshot_sync.hpp @@ -38,10 +38,8 @@ class SnapshotSync : public Stoppable { bool download_and_index_snapshots(db::RWTxn& txn); bool download_snapshots(const std::vector& snapshot_file_names); - void index_snapshots(); private: - void reopen(); void build_missing_indexes(); void update_database(db::RWTxn& txn, BlockNum max_block_available); void update_block_headers(db::RWTxn& txn, BlockNum max_block_available); diff --git a/silkworm/db/snapshots/basic_queries.hpp b/silkworm/db/snapshots/basic_queries.hpp new file mode 100644 index 0000000000..55be9f53e8 --- /dev/null +++ b/silkworm/db/snapshots/basic_queries.hpp @@ -0,0 +1,92 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include + +#include + +#include "index.hpp" +#include "snapshot_reader.hpp" + +namespace silkworm::snapshots { + +template +class BasicQuery { + public: + BasicQuery( + const Snapshot& snapshot, + const Index& index) + : reader_{snapshot}, + index_{index} {} + + protected: + TSnapshotReader reader_; + const Index& index_; +}; + +template +struct FindByIdQuery : public BasicQuery { + using BasicQuery::BasicQuery; + + std::optional exec(uint64_t id) { + auto offset = this->index_.lookup_by_data_id(id); + if (!offset) { + return std::nullopt; + } + + return this->reader_.seek_one(*offset); + } +}; + +template +struct FindByHashQuery : public BasicQuery { + using BasicQuery::BasicQuery; + + std::optional exec(const Hash& hash) { + auto offset = this->index_.lookup_by_hash(hash); + if (!offset) { + return std::nullopt; + } + + auto result = this->reader_.seek_one(*offset, hash); + + // We *must* ensure that the retrieved txn hash matches because there is no way to know if key exists in MPHF + if (result && (result->hash() != hash)) { + return std::nullopt; + } + + return result; + } +}; + +template +struct RangeFromIdQuery : public BasicQuery { + using BasicQuery::BasicQuery; + + std::vector exec_into_vector(uint64_t first_id, uint64_t count) { + auto offset = this->index_.lookup_by_data_id(first_id); + if (!offset) { + return {}; + } + + return this->reader_.read_into_vector(*offset, count); + } +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_hash.hpp b/silkworm/db/snapshots/body_queries.hpp similarity index 81% rename from silkworm/db/snapshots/txn_hash.hpp rename to silkworm/db/snapshots/body_queries.hpp index fe425c5b8d..732e260b8b 100644 --- a/silkworm/db/snapshots/txn_hash.hpp +++ b/silkworm/db/snapshots/body_queries.hpp @@ -16,13 +16,11 @@ #pragma once -#include - -#include -#include +#include "basic_queries.hpp" +#include "body_snapshot.hpp" namespace silkworm::snapshots { -Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id); +using BodyFindByBlockNumQuery = FindByIdQuery; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/body_snapshot.cpp b/silkworm/db/snapshots/body_snapshot.cpp new file mode 100644 index 0000000000..9054b814d1 --- /dev/null +++ b/silkworm/db/snapshots/body_snapshot.cpp @@ -0,0 +1,28 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "body_snapshot.hpp" + +#include + +namespace silkworm::snapshots { + +void decode_word_into_body(ByteView word, BlockBodyForStorage& body) { + const auto result = decode_stored_block_body(word, body); + success_or_throw(result, "decode_word_into_body: decode_stored_block_body error"); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/body_snapshot.hpp b/silkworm/db/snapshots/body_snapshot.hpp new file mode 100644 index 0000000000..8e6301b94b --- /dev/null +++ b/silkworm/db/snapshots/body_snapshot.hpp @@ -0,0 +1,44 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include + +#include "snapshot_reader.hpp" +#include "snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +void decode_word_into_body(ByteView word, BlockBodyForStorage& body); + +struct BodySnapshotWordDeserializer : public SnapshotWordDeserializer { + BlockBodyForStorage value; + + ~BodySnapshotWordDeserializer() override = default; + + void decode_word(ByteView word) override { + decode_word_into_body(word, value); + } +}; + +static_assert(SnapshotWordDeserializerConcept); + +using BodySnapshotReader = SnapshotReader; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/body_txs_amount_query.cpp b/silkworm/db/snapshots/body_txs_amount_query.cpp new file mode 100644 index 0000000000..ced200ec38 --- /dev/null +++ b/silkworm/db/snapshots/body_txs_amount_query.cpp @@ -0,0 +1,54 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "body_txs_amount_query.hpp" + +#include + +#include "body_snapshot.hpp" + +namespace silkworm::snapshots { + +BodyTxsAmountQuery::Result BodyTxsAmountQuery::exec() { + auto path = snapshot_.path(); + uint64_t first_tx_id{0}, last_tx_id{0}, last_txs_amount{0}; + BlockNum number = path.block_from(); + + BodySnapshotReader reader{snapshot_}; + for (auto& body : reader) { + if (number == path.block_from()) { + first_tx_id = body.base_txn_id; + } + if (number >= path.block_to() - 1) { + last_tx_id = body.base_txn_id; + last_txs_amount = body.txn_count; + } + number++; + } + + if ((first_tx_id == 0) && (last_tx_id == 0)) { + throw std::runtime_error("BodyTxsAmountQuery empty body snapshot: " + path.path().string()); + } + + uint64_t count = last_tx_id + last_txs_amount - first_tx_id; + + return Result{ + first_tx_id, + count, + }; +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/body_txs_amount_query.hpp b/silkworm/db/snapshots/body_txs_amount_query.hpp new file mode 100644 index 0000000000..cde5b7b406 --- /dev/null +++ b/silkworm/db/snapshots/body_txs_amount_query.hpp @@ -0,0 +1,40 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include + +#include "snapshot_reader.hpp" + +namespace silkworm::snapshots { + +class BodyTxsAmountQuery { + public: + struct Result { + uint64_t first_tx_id{}; + uint64_t count{}; + }; + + explicit BodyTxsAmountQuery(const Snapshot& snapshot) : snapshot_(snapshot) {} + + Result exec(); + + private: + const Snapshot& snapshot_; +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_queries.hpp b/silkworm/db/snapshots/header_queries.hpp new file mode 100644 index 0000000000..bb8235006b --- /dev/null +++ b/silkworm/db/snapshots/header_queries.hpp @@ -0,0 +1,27 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include "basic_queries.hpp" +#include "header_snapshot.hpp" + +namespace silkworm::snapshots { + +using HeaderFindByBlockNumQuery = FindByIdQuery; +using HeaderFindByHashQuery = FindByHashQuery; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_snapshot.cpp b/silkworm/db/snapshots/header_snapshot.cpp new file mode 100644 index 0000000000..0afa3f17c0 --- /dev/null +++ b/silkworm/db/snapshots/header_snapshot.cpp @@ -0,0 +1,42 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "header_snapshot.hpp" + +#include +#include + +namespace silkworm::snapshots { + +void decode_word_into_header(ByteView word, BlockHeader& header) { + // First byte in data is first byte of header hash. + ensure(!word.empty(), [&]() { return "decode_word_into_header: first hash byte missing"; }); + + // Skip hash first byte to obtain encoded header RLP data + ByteView encoded_header{word.data() + 1, word.length() - 1}; + + const auto decode_result = rlp::decode(encoded_header, header); + success_or_throw(decode_result, "decode_word_into_header: rlp::decode error"); +} + +void check_sanity_of_header_with_metadata(const BlockHeader& header, BlockNum block_from, BlockNum block_to) { + ensure((header.number >= block_from) && (header.number < block_to), [&]() { + return "check_sanity_of_header_with_metadata: header.number=" + std::to_string(header.number) + + " outside of range [" + std::to_string(block_from) + ", " + std::to_string(block_to) + ")"; + }); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_snapshot.hpp b/silkworm/db/snapshots/header_snapshot.hpp new file mode 100644 index 0000000000..c5b9f581b6 --- /dev/null +++ b/silkworm/db/snapshots/header_snapshot.hpp @@ -0,0 +1,48 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include + +#include "snapshot_reader.hpp" +#include "snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +void decode_word_into_header(ByteView word, BlockHeader& header); +void check_sanity_of_header_with_metadata(const BlockHeader& header, BlockNum block_from, BlockNum block_to); + +struct HeaderSnapshotWordDeserializer : public SnapshotWordDeserializer { + BlockHeader value; + + ~HeaderSnapshotWordDeserializer() override = default; + + void decode_word(ByteView word) override { + decode_word_into_header(word, value); + } + + void check_sanity_with_metadata(BlockNum block_from, BlockNum block_to) override { + check_sanity_of_header_with_metadata(value, block_from, block_to); + } +}; + +static_assert(SnapshotWordDeserializerConcept); + +using HeaderSnapshotReader = SnapshotReader; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index.cpp b/silkworm/db/snapshots/index.cpp new file mode 100644 index 0000000000..a7d4f6669d --- /dev/null +++ b/silkworm/db/snapshots/index.cpp @@ -0,0 +1,31 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "index.hpp" + +namespace silkworm::snapshots { + +void Index::reopen_index() { + close_index(); + + index_ = std::make_unique(path_.path(), region_); +} + +void Index::close_index() { + index_.reset(); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index.hpp b/silkworm/db/snapshots/index.hpp new file mode 100644 index 0000000000..b54bd8f6d8 --- /dev/null +++ b/silkworm/db/snapshots/index.hpp @@ -0,0 +1,70 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include +#include + +#include + +#include "path.hpp" +#include "rec_split/rec_split.hpp" + +namespace silkworm::snapshots { + +class Index { + public: + explicit Index( + SnapshotPath path, + std::optional region = std::nullopt) + : path_(std::move(path)), + region_(region) {} + + std::optional lookup_by_data_id(uint64_t id) const { return index_->lookup_by_data_id(id); }; + std::optional lookup_by_hash(const Hash& hash) const { return index_->lookup_by_key(hash); }; + + std::optional lookup_ordinal_by_hash(const Hash& hash) const { + auto [result, found] = index_->lookup(hash); + return found ? std::optional{result} : std::nullopt; + } + + void reopen_index(); + void close_index(); + + bool is_open() const { return index_.get(); } + const SnapshotPath& path() const { return path_; } + + MemoryMappedRegion memory_file_region() const { + return index_ ? index_->memory_file_region() : MemoryMappedRegion{}; + } + + uint64_t base_data_id() const { + assert(index_); + return index_->base_data_id(); + } + + private: + SnapshotPath path_; + //! External memory-mapped region of the index data + std::optional region_; + + std::unique_ptr index_; +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index_builder.hpp b/silkworm/db/snapshots/index_builder.hpp index 482c3fd545..c8e7b2c18d 100644 --- a/silkworm/db/snapshots/index_builder.hpp +++ b/silkworm/db/snapshots/index_builder.hpp @@ -56,7 +56,7 @@ struct IndexInputDataQuery { : query_(query), impl_(std::move(impl)), entry_(entry) {} using iterator_category = std::input_iterator_tag; - using difference_type = void; + using difference_type = std::ptrdiff_t; using pointer = value_type*; using reference = value_type&; @@ -75,6 +75,8 @@ struct IndexInputDataQuery { value_type entry_; }; + static_assert(std::input_or_output_iterator); + virtual ~IndexInputDataQuery() = default; virtual Iterator begin() = 0; diff --git a/silkworm/db/snapshots/index_builder_test.cpp b/silkworm/db/snapshots/index_builder_test.cpp index 33d74a4d58..98de5c4cbb 100644 --- a/silkworm/db/snapshots/index_builder_test.cpp +++ b/silkworm/db/snapshots/index_builder_test.cpp @@ -64,8 +64,8 @@ TEST_CASE("TransactionIndex::build KO: empty snapshot", "[silkworm][snapshot][in auto txs_snapshot_path = *SnapshotPath::parse(txs_snapshot_file.path()); auto bodies_snapshot_path = *SnapshotPath::parse(bodies_snapshot_file.path()); - CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), StartsWith("empty body snapshot")); - CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), StartsWith("empty body snapshot")); + CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), Contains("empty body snapshot")); + CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), Contains("empty body snapshot")); } } diff --git a/silkworm/db/snapshots/path.cpp b/silkworm/db/snapshots/path.cpp index d16e3961c6..268ae176fb 100644 --- a/silkworm/db/snapshots/path.cpp +++ b/silkworm/db/snapshots/path.cpp @@ -37,7 +37,7 @@ std::optional SnapshotPath::parse(fs::path path) { const std::string filename_no_ext = path.stem().string(); // Expected stem format: -<6_digit_block_from>-<6_digit_block_to>- - const std::vector tokens = absl::StrSplit(filename_no_ext, '-'); + const std::vector tokens = absl::StrSplit(filename_no_ext, absl::MaxSplits('-', 3)); if (tokens.size() != 4) { return std::nullopt; } @@ -79,8 +79,10 @@ std::optional SnapshotPath::parse(fs::path path) { return std::nullopt; } - // Expected tag format: headers|bodies|transactions (parsing relies on magic_enum, so SnapshotType items must match exactly) - std::string_view tag_str{tag.data(), tag.size()}; + // Expected tag format: headers|bodies|transactions|transactions-to-block + // parsing relies on magic_enum, so SnapshotType items must match exactly + std::string tag_str{tag.data(), tag.size()}; + std::replace(tag_str.begin(), tag_str.end(), '-', '_'); const auto type = magic_enum::enum_cast(tag_str); if (!type) { return std::nullopt; @@ -89,22 +91,26 @@ std::optional SnapshotPath::parse(fs::path path) { return SnapshotPath{std::move(path), version, block_from, block_to, *type}; } -SnapshotPath SnapshotPath::from(const fs::path& dir, uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type) { - const auto filename = SnapshotPath::build_filename(version, block_from, block_to, type); +SnapshotPath SnapshotPath::from(const fs::path& dir, uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type, const char* ext) { + const auto filename = SnapshotPath::build_filename(version, block_from, block_to, type, ext); return SnapshotPath{dir / filename, version, block_from, block_to, type}; } -fs::path SnapshotPath::build_filename(uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type) { +fs::path SnapshotPath::build_filename(uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type, const char* ext) { std::string snapshot_type_name{magic_enum::enum_name(type)}; std::string filename{absl::StrFormat("v%d-%06d-%06d-%s%s", version, block_from / kFileNameBlockScaleFactor, block_to / kFileNameBlockScaleFactor, absl::StrReplaceAll(snapshot_type_name, {{"_", "-"}}), - kSegmentExtension)}; + ext)}; return fs::path{filename}; } +SnapshotPath SnapshotPath::related_path(SnapshotType type, const char* ext) const { + return SnapshotPath::from(path_.parent_path(), version_, block_from_, block_to_, type, ext); +} + SnapshotPath::SnapshotPath(fs::path path, uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type) : path_(std::move(path)), version_(version), block_from_(block_from), block_to_(block_to), type_(type) { ensure(block_to >= block_from, "SnapshotPath: block_to less than block_from"); diff --git a/silkworm/db/snapshots/path.hpp b/silkworm/db/snapshots/path.hpp index d4630ae6c9..30a260ad73 100644 --- a/silkworm/db/snapshots/path.hpp +++ b/silkworm/db/snapshots/path.hpp @@ -65,7 +65,8 @@ class SnapshotPath { uint8_t version, BlockNum block_from, BlockNum block_to, - SnapshotType type); + SnapshotType type, + const char* ext = kSegmentExtension); [[nodiscard]] std::string filename() const { return path_.filename().string(); } @@ -100,20 +101,27 @@ class SnapshotPath { } [[nodiscard]] SnapshotPath index_file() const { - return SnapshotPath(std::filesystem::path{path_}.replace_extension(kIdxExtension), version_, block_from_, block_to_, type_); + return related_path(type_, kIdxExtension); } [[nodiscard]] SnapshotPath index_file_for_type(SnapshotType type) const { - std::filesystem::path index_path{path_}; - index_path.replace_filename(build_filename(version_, block_from_, block_to_, type)); - return SnapshotPath(index_path.replace_extension(kIdxExtension), version_, block_from_, block_to_, type); + return related_path(type, kIdxExtension); + } + + [[nodiscard]] SnapshotPath snapshot_path_for_type(SnapshotType type) const { + return related_path(type, kSegmentExtension); + } + + [[nodiscard]] std::filesystem::file_time_type last_write_time() const { + return std::filesystem::last_write_time(path_); } friend bool operator<(const SnapshotPath& lhs, const SnapshotPath& rhs); friend bool operator==(const SnapshotPath&, const SnapshotPath&) = default; protected: - static std::filesystem::path build_filename(uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type); + static std::filesystem::path build_filename(uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type, const char* ext); + SnapshotPath related_path(SnapshotType type, const char* ext) const; explicit SnapshotPath(std::filesystem::path path, uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type); diff --git a/silkworm/db/snapshots/rec_split/rec_split.hpp b/silkworm/db/snapshots/rec_split/rec_split.hpp index 1b7ef653f6..38764e5900 100644 --- a/silkworm/db/snapshots/rec_split/rec_split.hpp +++ b/silkworm/db/snapshots/rec_split/rec_split.hpp @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -680,7 +681,23 @@ class RecSplit { //! Return the offset of the i-th element in the index. Perfect hash table lookup is not performed, //! only access to the Elias-Fano structure containing all offsets - [[nodiscard]] std::size_t ordinal_lookup(uint64_t i) const { return ef_offsets_->get(i); } + [[nodiscard]] std::size_t lookup_by_ordinal(uint64_t i) const { return ef_offsets_->get(i); } + + [[nodiscard]] std::optional lookup_by_data_id(uint64_t data_id) const { + // check if data_id is not out of range + uint64_t min = base_data_id(); + uint64_t max = min + key_count() - 1; + if ((data_id < min) || (data_id > max)) { + return std::nullopt; + } + + return lookup_by_ordinal(data_id - base_data_id()); + } + + [[nodiscard]] std::optional lookup_by_key(ByteView key) const { + auto [i, found] = lookup(key); + return found ? std::optional{lookup_by_ordinal(i)} : std::nullopt; + } //! Return the number of keys used to build the RecSplit instance [[nodiscard]] std::size_t key_count() const { return key_count_; } diff --git a/silkworm/db/snapshots/rec_split/rec_split_par_test.cpp b/silkworm/db/snapshots/rec_split/rec_split_par_test.cpp index 660d0a2265..5e6582b8af 100644 --- a/silkworm/db/snapshots/rec_split/rec_split_par_test.cpp +++ b/silkworm/db/snapshots/rec_split/rec_split_par_test.cpp @@ -262,7 +262,7 @@ TEST_CASE("RecSplit8-Par: double index lookup", "[silkworm][node][recsplit][igno const auto [enumeration_index, found] = rs2.lookup("key " + std::to_string(i)); CHECK(enumeration_index == i); CHECK(found); - CHECK(rs2.ordinal_lookup(enumeration_index) == i * 17); + CHECK(rs2.lookup_by_ordinal(enumeration_index) == i * 17); } } diff --git a/silkworm/db/snapshots/rec_split/rec_split_seq_test.cpp b/silkworm/db/snapshots/rec_split/rec_split_seq_test.cpp index 9795865b91..e961aa57f2 100644 --- a/silkworm/db/snapshots/rec_split/rec_split_seq_test.cpp +++ b/silkworm/db/snapshots/rec_split/rec_split_seq_test.cpp @@ -257,7 +257,7 @@ TEST_CASE("RecSplit8: double index lookup", "[silkworm][snapshots][recsplit][ign const auto [enumeration_index, found] = rs2.lookup("key " + std::to_string(i)); CHECK(enumeration_index == i); CHECK(found); - CHECK(rs2.ordinal_lookup(enumeration_index) == i * 17); + CHECK(rs2.lookup_by_ordinal(enumeration_index) == i * 17); } } diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index a3d4d5d165..92556b3024 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -17,14 +17,18 @@ #include "repository.hpp" #include +#include +#include #include -#include #include #include +#include #include +#include #include #include +#include #include #include #include @@ -33,37 +37,15 @@ namespace silkworm::snapshots { namespace fs = std::filesystem; -template -const T* get_segment(const SnapshotsByPath& segments, const SnapshotPath& path) { - if (!segments.contains(path.path())) { - return nullptr; - } - return segments.find(path.path())->second.get(); -} - -template -SnapshotRepository::ViewResult view(const SnapshotsByPath& segments, BlockNum number, const SnapshotWalker& walker) { - // Search for target segment in reverse order (from the newest segment to the oldest one) - for (auto it = segments.rbegin(); it != segments.rend(); ++it) { - const auto& snapshot = it->second; - // We're looking for the segment containing the target block number in its block range - if (snapshot->block_from() <= number && number < snapshot->block_to()) { - const bool walk_done = walker(snapshot.get()); - return walk_done ? SnapshotRepository::kWalkSuccess : SnapshotRepository::kWalkFailed; - } - } - return SnapshotRepository::kSnapshotNotFound; -} - -template -std::size_t view(const SnapshotsByPath& segments, const SnapshotWalker& walker) { +std::size_t SnapshotRepository::view_bundles(const SnapshotBundleWalker& walker) { // Search for target segment in reverse order (from the newest segment to the oldest one) std::size_t visited_views{0}; bool walk_done{false}; - for (auto it = segments.rbegin(); it != segments.rend() && !walk_done; ++it) { - const auto& snapshot = it->second; - walk_done = walker(snapshot.get()); + for (auto& entry : std::ranges::reverse_view(bundles_)) { + const auto& bundle = entry.second; + walk_done = walker(bundle); ++visited_views; + if (walk_done) break; } return visited_views; } @@ -75,42 +57,56 @@ SnapshotRepository::~SnapshotRepository() { close(); } -void SnapshotRepository::add_snapshot_bundle(SnapshotBundle&& bundle) { - header_segments_[bundle.headers_snapshot_path.path()] = std::move(bundle.headers_snapshot); - body_segments_[bundle.bodies_snapshot_path.path()] = std::move(bundle.bodies_snapshot); - tx_segments_[bundle.tx_snapshot_path.path()] = std::move(bundle.tx_snapshot); - if (bundle.tx_snapshot_path.block_to() > segment_max_block_) { - segment_max_block_ = bundle.tx_snapshot_path.block_to() - 1; +void SnapshotRepository::add_snapshot_bundle(SnapshotBundle bundle) { + bundle.reopen(); + bundles_.emplace(bundle.block_from(), std::move(bundle)); +} + +void SnapshotBundle::reopen() { + for (auto& snapshot_ref : snapshots()) { + snapshot_ref.get().reopen_segment(); + ensure(!snapshot_ref.get().empty(), [&]() { + return "invalid empty snapshot " + snapshot_ref.get().fs_path().string(); + }); + } + for (auto& index_ref : indexes()) { + index_ref.get().reopen_index(); } - idx_max_block_ = max_idx_available(); } -void SnapshotRepository::reopen_folder() { - SILK_INFO << "Reopen snapshot repository folder: " << settings_.repository_dir.string(); - SnapshotPathList segment_files = get_segment_files(); - reopen_list(segment_files); - SILK_INFO << "Total reopened snapshots: " << total_snapshots_count(); +void SnapshotBundle::close() { + for (auto& index_ref : indexes()) { + index_ref.get().close_index(); + } + for (auto& snapshot_ref : snapshots()) { + snapshot_ref.get().close(); + } } void SnapshotRepository::close() { SILK_TRACE << "Close snapshot repository folder: " << settings_.repository_dir.string(); - for (const auto& [_, header_seg] : this->header_segments_) { - header_seg->close(); - } - for (const auto& [_, body_seg] : this->body_segments_) { - body_seg->close(); - } - for (const auto& [_, tx_seg] : this->tx_segments_) { - tx_seg->close(); + for (auto& entry : bundles_) { + auto& bundle = entry.second; + bundle.close(); } } +BlockNum SnapshotRepository::max_block_available() const { + if (bundles_.empty()) + return 0; + + // a bundle with the max block range is last in the sorted bundles map + auto& bundle = bundles_.rbegin()->second; + return (bundle.block_from() < bundle.block_to()) ? bundle.block_to() - 1 : bundle.block_from(); +} + std::vector SnapshotRepository::missing_block_ranges() const { const auto ordered_segments = get_segment_files(); std::vector missing_ranges; BlockNum previous_to{0}; for (const auto& segment : ordered_segments) { + // skips different types of snapshots having the same block range if (segment.block_to() <= previous_to) continue; if (segment.block_from() != previous_to) { missing_ranges.emplace_back(previous_to, segment.block_from()); @@ -120,80 +116,84 @@ std::vector SnapshotRepository::missing_block_ranges() const { return missing_ranges; } -bool SnapshotRepository::for_each_header(const HeaderSnapshot::Walker& fn) { - for (const auto& [_, header_snapshot] : header_segments_) { - SILK_TRACE << "for_each_header header_snapshot: " << header_snapshot->fs_path().string(); - const auto keep_going = header_snapshot->for_each_header([fn](const auto* header) { - return fn(header); - }); - if (!keep_going) return false; - } - return true; -} +bool SnapshotRepository::for_each_header(const HeaderWalker& fn) { + for (auto& entry : bundles_) { + auto& bundle = entry.second; + const Snapshot& header_snapshot = bundle.header_snapshot; + SILK_TRACE << "for_each_header header_snapshot: " << header_snapshot.fs_path().string(); -bool SnapshotRepository::for_each_body(const BodySnapshot::Walker& fn) { - for (const auto& [_, body_snapshot] : body_segments_) { - SILK_TRACE << "for_each_body body_snapshot: " << body_snapshot->fs_path().string(); - const auto keep_going = body_snapshot->for_each_body([fn](BlockNum number, const auto* body) { - return fn(number, body); - }); - if (!keep_going) return false; + HeaderSnapshotReader reader{header_snapshot}; + for (auto& header : reader) { + const bool keep_going = fn(header); + if (!keep_going) return false; + } } return true; } -SnapshotRepository::ViewResult SnapshotRepository::view_header_segment(BlockNum number, const HeaderSnapshotWalker& walker) { - return view(header_segments_, number, walker); -} - -SnapshotRepository::ViewResult SnapshotRepository::view_body_segment(BlockNum number, const BodySnapshotWalker& walker) { - return view(body_segments_, number, walker); -} - -SnapshotRepository::ViewResult SnapshotRepository::view_tx_segment(BlockNum number, const TransactionSnapshotWalker& walker) { - return view(tx_segments_, number, walker); -} +bool SnapshotRepository::for_each_body(const BodyWalker& fn) { + for (auto& entry : bundles_) { + auto& bundle = entry.second; + const Snapshot& body_snapshot = bundle.body_snapshot; + SILK_TRACE << "for_each_body body_snapshot: " << body_snapshot.fs_path().string(); -std::size_t SnapshotRepository::view_header_segments(const HeaderSnapshotWalker& walker) { - return view(header_segments_, walker); + BlockNum number = body_snapshot.block_from(); + BodySnapshotReader reader{body_snapshot}; + for (auto& body : reader) { + const bool keep_going = fn(number, body); + if (!keep_going) return false; + number++; + } + } + return true; } -std::size_t SnapshotRepository::view_body_segments(const BodySnapshotWalker& walker) { - return view(body_segments_, walker); +std::size_t SnapshotRepository::view_segments(SnapshotType type, const SnapshotWalker& walker) { + return view_bundles([&](const SnapshotBundle& bundle) { + return walker({bundle.snapshot(type), bundle.index(type)}); + }); } -std::size_t SnapshotRepository::view_tx_segments(const TransactionSnapshotWalker& walker) { - return view(tx_segments_, walker); +std::size_t SnapshotRepository::view_header_segments(const SnapshotWalker& walker) { + return view_segments(SnapshotType::headers, walker); } -const HeaderSnapshot* SnapshotRepository::get_header_segment(const SnapshotPath& path) const { - return get_segment(header_segments_, path); +std::size_t SnapshotRepository::view_body_segments(const SnapshotWalker& walker) { + return view_segments(SnapshotType::bodies, walker); } -const BodySnapshot* SnapshotRepository::get_body_segment(const SnapshotPath& path) const { - return get_segment(body_segments_, path); +std::size_t SnapshotRepository::view_tx_segments(const SnapshotWalker& walker) { + return view_segments(SnapshotType::transactions, walker); } -const TransactionSnapshot* SnapshotRepository::get_tx_segment(const SnapshotPath& path) const { - return get_segment(tx_segments_, path); +std::optional SnapshotRepository::find_segment(SnapshotType type, BlockNum number) const { + auto bundle = find_bundle(number); + if (bundle) { + return SnapshotAndIndex{bundle->snapshot(type), bundle->index(type)}; + } + return std::nullopt; } -const HeaderSnapshot* SnapshotRepository::find_header_segment(BlockNum number) const { - return find_segment(header_segments_, number); +std::optional SnapshotRepository::find_header_segment(BlockNum number) const { + return find_segment(SnapshotType::headers, number); } -const BodySnapshot* SnapshotRepository::find_body_segment(BlockNum number) const { - return find_segment(body_segments_, number); +std::optional SnapshotRepository::find_body_segment(BlockNum number) const { + return find_segment(SnapshotType::bodies, number); } -const TransactionSnapshot* SnapshotRepository::find_tx_segment(BlockNum number) const { - return find_segment(tx_segments_, number); +std::optional SnapshotRepository::find_tx_segment(BlockNum number) const { + return find_segment(SnapshotType::transactions, number); } std::optional SnapshotRepository::find_block_number(Hash txn_hash) const { - for (const auto& it : std::ranges::reverse_view(tx_segments_)) { - const auto& snapshot = it.second; - auto block = snapshot->block_num_by_txn_hash(txn_hash); + for (const auto& entry : std::ranges::reverse_view(bundles_)) { + const auto& bundle = entry.second; + const auto& snapshot = bundle.txn_snapshot; + + const Index& idx_txn_hash = bundle.idx_txn_hash; + const Index& idx_txn_hash_2_block = bundle.idx_txn_hash_2_block; + auto block = TransactionBlockNumByTxnHashQuery{idx_txn_hash_2_block, TransactionFindByHashQuery{snapshot, idx_txn_hash}}.exec(txn_hash); if (block) { return block; } @@ -204,142 +204,126 @@ std::optional SnapshotRepository::find_block_number(Hash txn_hash) con std::vector> SnapshotRepository::missing_indexes() const { SnapshotPathList segment_files = get_segment_files(); std::vector> missing_index_list; - missing_index_list.reserve(segment_files.size()); + for (const auto& seg_file : segment_files) { - const auto index_file = seg_file.index_file(); - SILK_TRACE << "Segment file: " << seg_file.filename() << " has index: " << index_file.filename(); - if (!std::filesystem::exists(index_file.path())) { - std::shared_ptr index; - switch (seg_file.type()) { - case SnapshotType::headers: { - index = std::make_shared(HeaderIndex::make(seg_file)); + switch (seg_file.type()) { + case SnapshotType::headers: { + if (!fs::exists(seg_file.index_file().path())) { + auto index = std::make_shared(HeaderIndex::make(seg_file)); missing_index_list.push_back(index); - break; } - case SnapshotType::bodies: { - index = std::make_shared(BodyIndex::make(seg_file)); + break; + } + case SnapshotType::bodies: { + if (!fs::exists(seg_file.index_file().path())) { + auto index = std::make_shared(BodyIndex::make(seg_file)); missing_index_list.push_back(index); - break; } - case SnapshotType::transactions: { - auto bodies_segment_path = TransactionIndex::bodies_segment_path(seg_file); - if (std::find(segment_files.begin(), segment_files.end(), bodies_segment_path) != segment_files.end()) { - index = std::make_shared(TransactionIndex::make(bodies_segment_path, seg_file)); - missing_index_list.push_back(index); - - index = std::make_shared(TransactionToBlockIndex::make(bodies_segment_path, seg_file)); - missing_index_list.push_back(index); - } - break; + break; + } + case SnapshotType::transactions: { + auto bodies_segment_path = TransactionIndex::bodies_segment_path(seg_file); + bool has_bodies_segment = (std::find(segment_files.begin(), segment_files.end(), bodies_segment_path) != segment_files.end()); + + if (!fs::exists(seg_file.index_file().path()) && has_bodies_segment) { + auto index = std::make_shared(TransactionIndex::make(bodies_segment_path, seg_file)); + missing_index_list.push_back(index); } - default: { - SILKWORM_ASSERT(false); + + if (!fs::exists(seg_file.index_file_for_type(SnapshotType::transactions_to_block).path()) && has_bodies_segment) { + auto index = std::make_shared(TransactionToBlockIndex::make(bodies_segment_path, seg_file)); + missing_index_list.push_back(index); } + break; + } + default: { + SILKWORM_ASSERT(false); } } } + return missing_index_list; } -void SnapshotRepository::reopen_file(const SnapshotPath& segment_path, bool optimistic) { - reopen_list(SnapshotPathList{segment_path}, optimistic); -} +void SnapshotRepository::reopen_folder() { + SILK_INFO << "Reopen snapshot repository folder: " << settings_.repository_dir.string(); + SnapshotPathList all_snapshot_paths = get_segment_files(); + SnapshotPathList all_index_paths = get_idx_files(); -void SnapshotRepository::reopen_list(const SnapshotPathList& segment_files, bool optimistic) { - BlockNum segment_max_block{0}; - for (const auto& seg_file : segment_files) { - try { - SILK_TRACE << "Reopen segment file: " << seg_file.path().filename().string(); - bool snapshot_valid{true}; - switch (seg_file.type()) { - case SnapshotType::headers: { - const auto header_it = header_segments_.find(seg_file.path()); - if (header_it != header_segments_.end()) { - header_it->second->reopen_index(); - } else { - snapshot_valid = reopen_header(seg_file); - } - break; - } - case SnapshotType::bodies: { - const auto body_it = body_segments_.find(seg_file.path()); - if (body_it != body_segments_.end()) { - body_it->second->reopen_index(); - } else { - snapshot_valid = reopen_body(seg_file); - } - break; - } - case SnapshotType::transactions: { - const auto tx_it = tx_segments_.find(seg_file.path()); - if (tx_it != tx_segments_.end()) { - tx_it->second->reopen_index(); - } else { - snapshot_valid = reopen_transaction(seg_file); - } - break; - } - default: { - SILKWORM_ASSERT(false); - } - } - ensure(snapshot_valid, [&]() { return "invalid empty snapshot " + seg_file.filename(); }); + std::map>> groups; - if (seg_file.block_to() > segment_max_block) { - segment_max_block = seg_file.block_to() - 1; - } - } catch (const std::exception& exc) { - SILK_WARN << "Reopen failed for: " << seg_file.path() << " [" << exc.what() << "]"; - if (!optimistic) throw; - } + for (size_t i = 0; i < all_snapshot_paths.size(); i++) { + auto& path = all_snapshot_paths[i]; + auto& group = groups[path.block_from()][false]; + group[path.type()] = i; } - segment_max_block_ = segment_max_block; - idx_max_block_ = max_idx_available(); -} -bool SnapshotRepository::reopen_header(const SnapshotPath& seg_file) { - return reopen(header_segments_, seg_file); -} + for (size_t i = 0; i < all_index_paths.size(); i++) { + auto& path = all_index_paths[i]; + auto& group = groups[path.block_from()][true]; + group[path.type()] = i; + } -bool SnapshotRepository::reopen_body(const SnapshotPath& seg_file) { - return reopen(body_segments_, seg_file); -} + BlockNum num = 0; + if (!groups.empty()) { + num = groups.begin()->first; + } -bool SnapshotRepository::reopen_transaction(const SnapshotPath& seg_file) { - return reopen(tx_segments_, seg_file); -} + while (groups.contains(num) && + (groups[num][false].size() == SnapshotBundle::kSnapshotsCount) && + (groups[num][true].size() == SnapshotBundle::kIndexesCount)) { + if (!bundles_.contains(num)) { + auto snapshot_path = [&](SnapshotType type) { + return all_snapshot_paths[groups[num][false][type]]; + }; + auto index_path = [&](SnapshotType type) { + return all_index_paths[groups[num][true][type]]; + }; + + SnapshotBundle bundle{ + .header_snapshot = Snapshot(snapshot_path(SnapshotType::headers)), + .idx_header_hash = Index(index_path(SnapshotType::headers)), -template -const T* SnapshotRepository::find_segment(const SnapshotsByPath& segments, BlockNum number) const { - if (number > max_block_available()) { - return nullptr; + .body_snapshot = Snapshot(snapshot_path(SnapshotType::bodies)), + .idx_body_number = Index(index_path(SnapshotType::bodies)), + + .txn_snapshot = Snapshot(snapshot_path(SnapshotType::transactions)), + .idx_txn_hash = Index(index_path(SnapshotType::transactions)), + .idx_txn_hash_2_block = Index(index_path(SnapshotType::transactions_to_block)), + }; + + bundle.reopen(); + + bundles_.emplace(num, std::move(bundle)); + } + + auto& bundle = bundles_.at(num); + + if (num < bundle.block_to()) { + num = bundle.block_to(); + } else { + break; + } } + SILK_INFO << "Total reopened bundles: " << bundles_count() + << " snapshots: " << total_snapshots_count() + << " indexes: " << total_indexes_count(); +} + +const SnapshotBundle* SnapshotRepository::find_bundle(BlockNum number) const { // Search for target segment in reverse order (from the newest segment to the oldest one) - for (auto it = segments.rbegin(); it != segments.rend(); ++it) { - const auto& snapshot = it->second; + for (const auto& entry : std::ranges::reverse_view(bundles_)) { + const auto& bundle = entry.second; // We're looking for the segment containing the target block number in its block range - if (snapshot->block_from() <= number && number < snapshot->block_to()) { - return snapshot.get(); + if (((bundle.block_from() <= number) && (number < bundle.block_to())) || + ((bundle.block_from() == number) && (bundle.block_from() == bundle.block_to()))) { + return &bundle; } } return nullptr; } -template -bool SnapshotRepository::reopen(SnapshotsByPath& segments, const SnapshotPath& seg_file) { - if (segments.find(seg_file.path()) == segments.end()) { - auto segment = std::make_unique(seg_file); - segment->reopen_segment(); - if (segment->empty()) return false; - segments[seg_file.path()] = std::move(segment); - } - SILKWORM_ASSERT(segments.find(seg_file.path()) != segments.end()); - const auto& segment = segments[seg_file.path()]; - segment->reopen_index(); - return true; -} - SnapshotPathList SnapshotRepository::get_files(const std::string& ext) const { ensure(fs::exists(settings_.repository_dir), [&]() { return "SnapshotRepository: " + settings_.repository_dir.string() + " does not exist"; }); @@ -367,24 +351,26 @@ SnapshotPathList SnapshotRepository::get_files(const std::string& ext) const { return snapshot_files; } -BlockNum SnapshotRepository::max_idx_available() const { - BlockNum max_block_headers{0}; - for (auto& [_, header_seg] : header_segments_) { - if (!header_seg->idx_header_hash()) break; - max_block_headers = header_seg->block_to() - 1; - } - BlockNum max_block_bodies{0}; - for (auto& [_, body_seg] : body_segments_) { - if (!body_seg->idx_body_number()) break; - max_block_bodies = body_seg->block_to() - 1; - } - BlockNum max_block_txs{0}; - for (auto& [_, tx_seg] : tx_segments_) { - if (!tx_seg->idx_txn_hash() || !tx_seg->idx_txn_hash_2_block()) break; - max_block_txs = tx_seg->block_to() - 1; - } +bool is_stale_index_path(const SnapshotPath& index_path) { + SnapshotType snapshot_type = (index_path.type() == SnapshotType::transactions_to_block) + ? SnapshotType::transactions + : index_path.type(); + SnapshotPath snapshot_path = index_path.snapshot_path_for_type(snapshot_type); + return (index_path.last_write_time() < snapshot_path.last_write_time()); +} - return std::min(max_block_headers, std::min(max_block_bodies, max_block_txs)); +SnapshotPathList SnapshotRepository::stale_index_paths() const { + SnapshotPathList results; + auto all_files = this->get_idx_files(); + std::copy_if(all_files.begin(), all_files.end(), std::back_inserter(results), is_stale_index_path); + return results; +} + +void SnapshotRepository::remove_stale_indexes() const { + for (auto& path : stale_index_paths()) { + const bool removed = fs::remove(path.path()); + ensure(removed, [&]() { return "SnapshotRepository::remove_stale_indexes: cannot remove index file " + path.path().string(); }); + } } } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index af545988f5..6a025cd93e 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -16,42 +16,96 @@ #pragma once +#include +#include #include #include #include #include -#include #include #include #include +#include +#include #include #include -#include +#include namespace silkworm::snapshots { struct IndexBuilder; -template -concept ConcreteSnapshot = std::is_base_of::value; +struct SnapshotBundle { + Snapshot header_snapshot; + //! Index header_hash -> block_num -> headers_segment_offset + Index idx_header_hash; + + Snapshot body_snapshot; + //! Index block_num -> bodies_segment_offset + Index idx_body_number; + + Snapshot txn_snapshot; + //! Index transaction_hash -> txn_id -> transactions_segment_offset + Index idx_txn_hash; + //! Index transaction_hash -> block_num + Index idx_txn_hash_2_block; + + static constexpr size_t kSnapshotsCount = 3; + static constexpr size_t kIndexesCount = 4; + + std::array, kSnapshotsCount> snapshots() { + return { + header_snapshot, + body_snapshot, + txn_snapshot, + }; + } + + std::array, kIndexesCount> indexes() { + return { + idx_header_hash, + idx_body_number, + idx_txn_hash, + idx_txn_hash_2_block, + }; + } -template -using SnapshotsByPath = std::map>; + const Snapshot& snapshot(SnapshotType type) const { + switch (type) { + case headers: + return header_snapshot; + case bodies: + return body_snapshot; + case transactions: + case transactions_to_block: + return txn_snapshot; + } + assert(false); + return header_snapshot; + } + + const Index& index(SnapshotType type) const { + switch (type) { + case headers: + return idx_header_hash; + case bodies: + return idx_body_number; + case transactions: + return idx_txn_hash; + case transactions_to_block: + return idx_txn_hash_2_block; + } + assert(false); + return idx_header_hash; + } -template -using SnapshotWalker = std::function; -using HeaderSnapshotWalker = SnapshotWalker; -using BodySnapshotWalker = SnapshotWalker; -using TransactionSnapshotWalker = SnapshotWalker; + // assume that all snapshots have the same block range, and use one of them + BlockNum block_from() const { return header_snapshot.block_from(); } + BlockNum block_to() const { return header_snapshot.block_to(); } -struct SnapshotBundle { - SnapshotPath headers_snapshot_path; - std::unique_ptr headers_snapshot; - SnapshotPath bodies_snapshot_path; - std::unique_ptr bodies_snapshot; - SnapshotPath tx_snapshot_path; - std::unique_ptr tx_snapshot; + void reopen(); + void close(); }; //! Read-only repository for all snapshot files. @@ -68,68 +122,57 @@ class SnapshotRepository { [[nodiscard]] const SnapshotSettings& settings() const { return settings_; } [[nodiscard]] std::filesystem::path path() const { return settings_.repository_dir; } - [[nodiscard]] BlockNum max_block_available() const { return std::min(segment_max_block_, idx_max_block_); } - - [[nodiscard]] SnapshotPathList get_segment_files() const { - return get_files(kSegmentExtension); - } - - void add_snapshot_bundle(SnapshotBundle&& bundle); - - void reopen_list(const SnapshotPathList& segment_files, bool optimistic = false); - void reopen_file(const SnapshotPath& segment_path, bool optimistic = false); void reopen_folder(); void close(); - bool for_each_header(const HeaderSnapshot::Walker& fn); - bool for_each_body(const BodySnapshot::Walker& fn); + void add_snapshot_bundle(SnapshotBundle bundle); - [[nodiscard]] std::size_t header_snapshots_count() const { return header_segments_.size(); } - [[nodiscard]] std::size_t body_snapshots_count() const { return body_segments_.size(); } - [[nodiscard]] std::size_t tx_snapshots_count() const { return tx_segments_.size(); } - [[nodiscard]] std::size_t total_snapshots_count() const { - return header_snapshots_count() + body_snapshots_count() + tx_snapshots_count(); - } + [[nodiscard]] std::size_t bundles_count() const { return bundles_.size(); } + [[nodiscard]] std::size_t total_snapshots_count() const { return bundles_count() * SnapshotBundle::kSnapshotsCount; } + [[nodiscard]] std::size_t total_indexes_count() const { return bundles_count() * SnapshotBundle::kIndexesCount; } + + //! All types of .seg and .idx files are available up to this block number + [[nodiscard]] BlockNum max_block_available() const; [[nodiscard]] std::vector missing_block_ranges() const; - enum ViewResult { - kSnapshotNotFound, - kWalkFailed, - kWalkSuccess + + [[nodiscard]] std::vector> missing_indexes() const; + void remove_stale_indexes() const; + + struct SnapshotAndIndex { + const Snapshot& snapshot; + const Index& index; }; - ViewResult view_header_segment(BlockNum number, const HeaderSnapshotWalker& walker); - ViewResult view_body_segment(BlockNum number, const BodySnapshotWalker& walker); - ViewResult view_tx_segment(BlockNum number, const TransactionSnapshotWalker& walker); - std::size_t view_header_segments(const HeaderSnapshotWalker& walker); - std::size_t view_body_segments(const BodySnapshotWalker& walker); - std::size_t view_tx_segments(const TransactionSnapshotWalker& walker); + using SnapshotWalker = std::function; - [[nodiscard]] const HeaderSnapshot* get_header_segment(const SnapshotPath& path) const; - [[nodiscard]] const BodySnapshot* get_body_segment(const SnapshotPath& path) const; - [[nodiscard]] const TransactionSnapshot* get_tx_segment(const SnapshotPath& path) const; + using SnapshotBundleWalker = std::function; + std::size_t view_bundles(const SnapshotBundleWalker& walker); - [[nodiscard]] const HeaderSnapshot* find_header_segment(BlockNum number) const; - [[nodiscard]] const BodySnapshot* find_body_segment(BlockNum number) const; - [[nodiscard]] const TransactionSnapshot* find_tx_segment(BlockNum number) const; + std::size_t view_header_segments(const SnapshotWalker& walker); + std::size_t view_body_segments(const SnapshotWalker& walker); + std::size_t view_tx_segments(const SnapshotWalker& walker); - [[nodiscard]] std::vector> missing_indexes() const; + [[nodiscard]] std::optional find_header_segment(BlockNum number) const; + [[nodiscard]] std::optional find_body_segment(BlockNum number) const; + [[nodiscard]] std::optional find_tx_segment(BlockNum number) const; - [[nodiscard]] BlockNum segment_max_block() const { return segment_max_block_; } - [[nodiscard]] BlockNum idx_max_block() const { return idx_max_block_; } + using HeaderWalker = std::function; + bool for_each_header(const HeaderWalker& fn); + + using BodyWalker = std::function; + bool for_each_body(const BodyWalker& fn); [[nodiscard]] std::optional find_block_number(Hash txn_hash) const; private: - bool reopen_header(const SnapshotPath& seg_file); - bool reopen_body(const SnapshotPath& seg_file); - bool reopen_transaction(const SnapshotPath& seg_file); - - template - const T* find_segment(const SnapshotsByPath& segments, BlockNum number) const; + std::size_t view_segments(SnapshotType type, const SnapshotWalker& walker); + const SnapshotBundle* find_bundle(BlockNum number) const; + std::optional find_segment(SnapshotType type, BlockNum number) const; - template - static bool reopen(SnapshotsByPath& segments, const SnapshotPath& seg_file); + [[nodiscard]] SnapshotPathList get_segment_files() const { + return get_files(kSegmentExtension); + } [[nodiscard]] SnapshotPathList get_idx_files() const { return get_files(kIdxExtension); @@ -137,25 +180,13 @@ class SnapshotRepository { [[nodiscard]] SnapshotPathList get_files(const std::string& ext) const; - [[nodiscard]] BlockNum max_idx_available() const; + SnapshotPathList stale_index_paths() const; //! The configuration settings for snapshots SnapshotSettings settings_; - //! All types of .seg files are available - up to this block number - BlockNum segment_max_block_{0}; - - //! All types of .idx files are available - up to this block number - BlockNum idx_max_block_{0}; - - //! The snapshots containing the block Headers - SnapshotsByPath header_segments_; - - //! The snapshots containing the block Bodies - SnapshotsByPath body_segments_; - - //! The snapshots containing the Transactions - SnapshotsByPath tx_segments_; + //! Full snapshot bundles ordered by block_from + std::map bundles_; }; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/repository_test.cpp b/silkworm/db/snapshots/repository_test.cpp index 3f87ee7c7e..32a3f7db26 100644 --- a/silkworm/db/snapshots/repository_test.cpp +++ b/silkworm/db/snapshots/repository_test.cpp @@ -16,6 +16,9 @@ #include "repository.hpp" +#include +#include + #include #include @@ -38,19 +41,17 @@ TEST_CASE("SnapshotRepository::SnapshotRepository", "[silkworm][node][snapshot]" CHECK_NOTHROW(SnapshotRepository{SnapshotSettings{}}); } -TEST_CASE("SnapshotRepository::reopen_folder", "[silkworm][node][snapshot]") { +TEST_CASE("SnapshotRepository::reopen_folder.partial_bundle", "[silkworm][node][snapshot]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; test::TemporarySnapshotFile tmp_snapshot_1{tmp_dir.path(), "v1-014500-015000-headers.seg"}; test::TemporarySnapshotFile tmp_snapshot_2{tmp_dir.path(), "v1-011500-012000-bodies.seg"}; test::TemporarySnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-015000-015500-transactions.seg"}; - SnapshotSettings settings{tmp_snapshot_1.path().parent_path()}; + SnapshotSettings settings{tmp_dir.path()}; SnapshotRepository repository{settings}; - CHECK_THROWS_AS(repository.reopen_folder(), std::logic_error); - CHECK(repository.header_snapshots_count() == 0); - CHECK(repository.body_snapshots_count() == 0); - CHECK(repository.tx_snapshots_count() == 0); + repository.reopen_folder(); + CHECK(repository.bundles_count() == 0); CHECK(repository.max_block_available() == 0); } @@ -66,62 +67,60 @@ TEST_CASE("SnapshotRepository::view", "[silkworm][node][snapshot]") { SECTION("no snapshots") { repository.reopen_folder(); - using ViewResult = SnapshotRepository::ViewResult; - CHECK(repository.view_header_segment(14'500'000, successful_walk) == ViewResult::kSnapshotNotFound); - CHECK(repository.view_body_segment(11'500'000, successful_walk) == ViewResult::kSnapshotNotFound); - CHECK(repository.view_tx_segment(15'000'000, successful_walk) == ViewResult::kSnapshotNotFound); + CHECK_FALSE(repository.find_header_segment(14'500'000)); + CHECK_FALSE(repository.find_body_segment(11'500'000)); + CHECK_FALSE(repository.find_tx_segment(15'000'000)); + CHECK(repository.view_header_segments(successful_walk) == 0); CHECK(repository.view_body_segments(successful_walk) == 0); CHECK(repository.view_tx_segments(successful_walk) == 0); - CHECK(repository.find_header_segment(14'500'000) == nullptr); - CHECK(repository.find_body_segment(11'500'000) == nullptr); - CHECK(repository.find_tx_segment(15'000'000) == nullptr); + CHECK_FALSE(repository.find_header_segment(14'500'000)); + CHECK_FALSE(repository.find_body_segment(11'500'000)); + CHECK_FALSE(repository.find_tx_segment(15'000'000)); } - SECTION("empty snapshots") { + SECTION("partial bundle") { test::TemporarySnapshotFile tmp_snapshot_1{tmp_dir.path(), "v1-014500-015000-headers.seg"}; test::TemporarySnapshotFile tmp_snapshot_2{tmp_dir.path(), "v1-011500-012000-bodies.seg"}; test::TemporarySnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-015000-015500-transactions.seg"}; - CHECK_THROWS_AS(repository.reopen_folder(), std::logic_error); + repository.reopen_folder(); + + CHECK_FALSE(repository.find_header_segment(14'500'000)); + CHECK_FALSE(repository.find_body_segment(11'500'000)); + CHECK_FALSE(repository.find_tx_segment(15'000'000)); - using ViewResult = SnapshotRepository::ViewResult; - CHECK(repository.view_header_segment(14'500'000, successful_walk) == ViewResult::kSnapshotNotFound); - CHECK(repository.view_body_segment(11'500'000, successful_walk) == ViewResult::kSnapshotNotFound); - CHECK(repository.view_tx_segment(15'000'000, successful_walk) == ViewResult::kSnapshotNotFound); CHECK(repository.view_header_segments(successful_walk) == 0); // empty snapshots are ignored by repository CHECK(repository.view_body_segments(successful_walk) == 0); // empty snapshots are ignored by repository CHECK(repository.view_tx_segments(successful_walk) == 0); // empty snapshots are ignored by repository - CHECK(repository.find_header_segment(14'500'000) == nullptr); - CHECK(repository.find_body_segment(11'500'000) == nullptr); - CHECK(repository.find_tx_segment(15'000'000) == nullptr); + CHECK_FALSE(repository.find_header_segment(14'500'000)); + CHECK_FALSE(repository.find_body_segment(11'500'000)); + CHECK_FALSE(repository.find_tx_segment(15'000'000)); } SECTION("non-empty snapshots") { - test::HelloWorldSnapshotFile tmp_snapshot_1{tmp_dir.path(), "v1-014500-015000-headers.seg"}; - test::HelloWorldSnapshotFile tmp_snapshot_2{tmp_dir.path(), "v1-011500-012000-bodies.seg"}; - test::HelloWorldSnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-015000-015500-transactions.seg"}; + test::SampleHeaderSnapshotFile tmp_snapshot_1{tmp_dir.path()}; + test::SampleBodySnapshotFile tmp_snapshot_2{tmp_dir.path()}; + test::SampleTransactionSnapshotFile tmp_snapshot_3{tmp_dir.path()}; + + for (auto& index_builder : repository.missing_indexes()) { + index_builder->build(); + } + repository.reopen_folder(); - using ViewResult = SnapshotRepository::ViewResult; - CHECK(repository.view_header_segment(14'500'000, failing_walk) == ViewResult::kWalkFailed); - CHECK(repository.view_body_segment(11'500'000, failing_walk) == ViewResult::kWalkFailed); - CHECK(repository.view_tx_segment(15'000'000, failing_walk) == ViewResult::kWalkFailed); CHECK(repository.view_header_segments(failing_walk) == 1); CHECK(repository.view_body_segments(failing_walk) == 1); CHECK(repository.view_tx_segments(failing_walk) == 1); - CHECK(repository.view_header_segment(14'500'000, successful_walk) == ViewResult::kWalkSuccess); - CHECK(repository.view_body_segment(11'500'000, successful_walk) == ViewResult::kWalkSuccess); - CHECK(repository.view_tx_segment(15'000'000, successful_walk) == ViewResult::kWalkSuccess); + CHECK(repository.find_header_segment(1'500'000).has_value()); + CHECK(repository.find_body_segment(1'500'000).has_value()); + CHECK(repository.find_tx_segment(1'500'000).has_value()); + CHECK(repository.view_header_segments(successful_walk) == 1); CHECK(repository.view_body_segments(successful_walk) == 1); CHECK(repository.view_tx_segments(successful_walk) == 1); - - // CHECK(repository.find_header_segment(14'500'000) != nullptr); // needs index after check vs max_block_available - // CHECK(repository.find_body_segment(11'500'000) != nullptr); - // CHECK(repository.find_tx_segment(15'000'000) != nullptr); } } @@ -153,22 +152,22 @@ TEST_CASE("SnapshotRepository::find_segment", "[silkworm][node][snapshot]") { test::SampleTransactionSnapshotFile txn_snapshot{tmp_dir.path()}; SECTION("header w/o index") { - CHECK(repository.find_header_segment(1'500'011) == nullptr); - CHECK(repository.find_header_segment(1'500'012) == nullptr); - CHECK(repository.find_header_segment(1'500'013) == nullptr); - CHECK(repository.find_header_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_header_segment(1'500'011)); + CHECK_FALSE(repository.find_header_segment(1'500'012)); + CHECK_FALSE(repository.find_header_segment(1'500'013)); + CHECK_FALSE(repository.find_header_segment(1'500'014)); } SECTION("body w/o index") { - CHECK(repository.find_body_segment(1'500'011) == nullptr); - CHECK(repository.find_body_segment(1'500'012) == nullptr); - CHECK(repository.find_body_segment(1'500'013) == nullptr); - CHECK(repository.find_body_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_body_segment(1'500'011)); + CHECK_FALSE(repository.find_body_segment(1'500'012)); + CHECK_FALSE(repository.find_body_segment(1'500'013)); + CHECK_FALSE(repository.find_body_segment(1'500'014)); } SECTION("tx w/o index") { - CHECK(repository.find_tx_segment(1'500'011) == nullptr); - CHECK(repository.find_tx_segment(1'500'012) == nullptr); - CHECK(repository.find_tx_segment(1'500'013) == nullptr); - CHECK(repository.find_tx_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_tx_segment(1'500'011)); + CHECK_FALSE(repository.find_tx_segment(1'500'012)); + CHECK_FALSE(repository.find_tx_segment(1'500'013)); + CHECK_FALSE(repository.find_tx_segment(1'500'014)); } test::SampleHeaderSnapshotPath header_snapshot_path{header_snapshot.path()}; // necessary to tweak the block numbers @@ -184,25 +183,25 @@ TEST_CASE("SnapshotRepository::find_segment", "[silkworm][node][snapshot]") { REQUIRE_NOTHROW(repository.reopen_folder()); SECTION("header w/ index") { - CHECK(repository.find_header_segment(1'500'011) == nullptr); + CHECK_FALSE(repository.find_header_segment(1'500'011)); // CHECK(repository.find_header_segment(1'500'012) != nullptr); // needs full block number in snapshot file names // CHECK(repository.find_header_segment(1'500'013) != nullptr); // needs full block number in snapshot file names - CHECK(repository.find_header_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_header_segment(1'500'014)); } SECTION("body w/ index") { - CHECK(repository.find_body_segment(1'500'011) == nullptr); + CHECK_FALSE(repository.find_body_segment(1'500'011)); // CHECK(repository.find_body_segment(1'500'012) != nullptr); // needs full block number in snapshot file names // CHECK(repository.find_body_segment(1'500'013) != nullptr); // needs full block number in snapshot file names - CHECK(repository.find_body_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_body_segment(1'500'014)); } SECTION("tx w/ index") { - CHECK(repository.find_tx_segment(1'500'011) == nullptr); + CHECK_FALSE(repository.find_tx_segment(1'500'011)); // CHECK(repository.find_tx_segment(1'500'012) != nullptr); // needs full block number in snapshot file names // CHECK(repository.find_tx_segment(1'500'013) != nullptr); // needs full block number in snapshot file names - CHECK(repository.find_tx_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_tx_segment(1'500'014)); } SECTION("greater than max_block_available") { - CHECK(repository.find_body_segment(repository.max_block_available() + 1) == nullptr); + CHECK_FALSE(repository.find_body_segment(repository.max_block_available() + 1)); } } @@ -245,4 +244,41 @@ TEST_CASE("SnapshotRepository::find_block_number", "[silkworm][node][snapshot]") // CHECK_FALSE(block_number.has_value()); // needs correct key check in index } +template +static auto move_last_write_time(const std::filesystem::path& p, const std::chrono::duration& d) { + const auto ftime = std::filesystem::last_write_time(p); + std::filesystem::last_write_time(p, ftime + d); + return std::filesystem::last_write_time(p) - ftime; +} + +TEST_CASE("SnapshotRepository::remove_stale_indexes", "[silkworm][node][snapshot][index]") { + using namespace std::chrono_literals; + + SetLogVerbosityGuard guard{log::Level::kNone}; + TemporaryDirectory tmp_dir; + SnapshotSettings settings{tmp_dir.path()}; + SnapshotRepository repository{settings}; + + // create a snapshot file + test::SampleHeaderSnapshotFile header_snapshot_file{tmp_dir.path()}; + test::SampleHeaderSnapshotPath header_snapshot_path{header_snapshot_file.path()}; + + // build an index + auto index_builder = HeaderIndex::make(header_snapshot_path); + REQUIRE_NOTHROW(index_builder.build()); + auto index_path = index_builder.path().path(); + + // the index is not stale + repository.remove_stale_indexes(); + CHECK(std::filesystem::exists(index_path)); + + // move the snapshot last write time 1 hour to the future to make its index "stale" + const auto last_write_time_diff = move_last_write_time(header_snapshot_path.path(), 1h); + CHECK(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); + + // the index is stale + repository.remove_stale_indexes(); + CHECK_FALSE(std::filesystem::exists(index_path)); +} + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/seg/decompressor.cpp b/silkworm/db/snapshots/seg/decompressor.cpp index d3a8ea6e22..155fc32d56 100644 --- a/silkworm/db/snapshots/seg/decompressor.cpp +++ b/silkworm/db/snapshots/seg/decompressor.cpp @@ -381,7 +381,7 @@ void Decompressor::open() { compressed_file_->advise_random(); } -Decompressor::Iterator Decompressor::begin() { +Decompressor::Iterator Decompressor::begin() const { ensure(bool(compressed_file_), "decompressor closed, call open first"); auto read_mode_guard = std::make_shared(*compressed_file_, ReadMode::kSequential, ReadMode::kRandom); Iterator it{this, std::move(read_mode_guard)}; @@ -392,6 +392,27 @@ Decompressor::Iterator Decompressor::begin() { return end(); } +Decompressor::Iterator Decompressor::seek(uint64_t offset, ByteView prefix) const { + SILK_TRACE << "Decompressor::seek offset: " << offset; + Iterator it = make_iterator(); + it.reset(offset); + if (!it.has_next()) { + return end(); + } + + if (!prefix.empty() && !it.has_prefix(prefix)) { + return end(); + } + + try { + ++it; + return it; + } catch (const std::runtime_error& re) { + SILK_WARN << "Decompressor::seek invalid offset: " << offset << " what: " << re.what(); + return end(); + } +} + void Decompressor::close() { compressed_file_.reset(); } diff --git a/silkworm/db/snapshots/seg/decompressor.hpp b/silkworm/db/snapshots/seg/decompressor.hpp index 2f8c8286de..fe5ed4c6c4 100644 --- a/silkworm/db/snapshots/seg/decompressor.hpp +++ b/silkworm/db/snapshots/seg/decompressor.hpp @@ -224,7 +224,7 @@ class Decompressor { //! input_iterator concept boilerplate using iterator_category = std::input_iterator_tag; - using difference_type = void; + using difference_type = std::ptrdiff_t; using value_type = Bytes; using pointer = value_type*; using reference = value_type&; @@ -271,9 +271,14 @@ class Decompressor { std::shared_ptr read_mode_guard_; }; + static_assert(std::input_or_output_iterator); + explicit Decompressor(std::filesystem::path compressed_path, std::optional compressed_region = {}); ~Decompressor(); + Decompressor(Decompressor&&) = default; + Decompressor& operator=(Decompressor&&) = default; + [[nodiscard]] const std::filesystem::path& compressed_path() const { return compressed_path_; } [[nodiscard]] std::string compressed_filename() const { return compressed_path_.filename().string(); } @@ -296,9 +301,16 @@ class Decompressor { [[nodiscard]] Iterator make_iterator() const { return Iterator{this, {}}; } //! Begin reading the words, expected to read in sequential order - Iterator begin(); + Iterator begin() const; Iterator end() const { return Iterator::make_end(this); } + /** + * Returns an iterator at a given offset. + * If the offset is invalid it returns end(). + * Seek makes sure that the result starts with a given prefix, otherwise returns end(). + */ + Iterator seek(uint64_t offset, ByteView prefix = {}) const; + void close(); private: diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp deleted file mode 100644 index 47bc9dbb44..0000000000 --- a/silkworm/db/snapshots/snapshot.cpp +++ /dev/null @@ -1,535 +0,0 @@ -/* - Copyright 2022 The Silkworm Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "snapshot.hpp" - -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace silkworm::snapshots { - -//! Convert the specified decoding result into its string representation -inline std::string to_string(DecodingResult result) { - std::string s; - if (!result.has_value()) { - s.append(magic_enum::enum_name(result.error())); - } - return s; -} - -Snapshot::Snapshot(SnapshotPath path, std::optional segment_region) - : path_(std::move(path)), decoder_{path_.path(), segment_region} {} - -MemoryMappedRegion Snapshot::memory_file_region() const { - const auto memory_file{decoder_.memory_file()}; - if (!memory_file) return MemoryMappedRegion{}; - return memory_file->region(); -} - -void Snapshot::reopen_segment() { - close_segment(); - - // Open decompressor that opens the mapped file in turns - decoder_.open(); -} - -bool Snapshot::for_each_item(const Snapshot::WordItemFunc& fn) { - WordItem item; - for (auto it = decoder_.begin(); it != decoder_.end(); ++it, ++item.position) { - item.value = std::move(*it); - item.offset = it.current_word_offset(); - SILK_TRACE << "Snapshot::for_each_item item: offset=" << item.offset - << " position=" << item.position - << " value=" << to_hex(item.value); - - const bool result = fn(item); - if (!result) return false; - } - return true; -} - -std::optional Snapshot::next_item(uint64_t offset, ByteView prefix) const { - SILK_TRACE << "Snapshot::next_item offset: " << offset; - auto data_iterator = decoder_.make_iterator(); - data_iterator.reset(offset); - - std::optional item; - if (!data_iterator.has_next()) { - return item; - } - if (!prefix.empty() && !data_iterator.has_prefix(prefix)) { - return item; - } - - item = WordItem{}; - try { - item->offset = data_iterator.next(item->value); - } catch (const std::runtime_error& re) { - SILK_WARN << "Snapshot::next_item invalid offset: " << offset << " what: " << re.what(); - return {}; - } - - return item; -} - -void Snapshot::close() { - close_segment(); - close_index(); -} - -void Snapshot::close_segment() { - // Close decompressor that closes the mapped file in turns - decoder_.close(); -} - -HeaderSnapshot::HeaderSnapshot(SnapshotPath path) : Snapshot(std::move(path)) {} - -HeaderSnapshot::HeaderSnapshot(SnapshotPath path, MappedHeadersSnapshot mapped) - : Snapshot(std::move(path), mapped.segment), idx_header_hash_region_{mapped.header_hash_index} {} - -HeaderSnapshot::~HeaderSnapshot() { - close(); -} - -bool HeaderSnapshot::for_each_header(const Walker& walker) { - return for_each_item([this, walker](const WordItem& item) -> bool { - BlockHeader header; - const auto decode_ok = decode_header(item, header); - if (!decode_ok) { - return false; - } - return walker(&header); - }); -} - -std::optional HeaderSnapshot::next_header(uint64_t offset, std::optional hash) const { - // Get the next data item at specified offset, optionally checking if it starts with block hash first byte - const auto item = hash ? next_item(offset, {hash->bytes, 1}) : next_item(offset); - std::optional header; - if (!item) { - return header; - } - header = BlockHeader{}; - const auto decode_ok = decode_header(*item, *header); - if (!decode_ok) { - return {}; - } - return header; -} - -std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash) const { - if (!idx_header_hash_) { - return {}; - } - - // First, get the header ordinal position in snapshot by using block hash as MPHF index - const auto [block_header_position, found] = idx_header_hash_->lookup(block_hash); - SILK_TRACE << "HeaderSnapshot::header_by_hash block_hash: " << block_hash.to_hex() << " block_header_position: " - << block_header_position << " found: " << found; - if (!found) { - return {}; - } - // Then, get the header offset in snapshot by using ordinal lookup - const auto block_header_offset = idx_header_hash_->ordinal_lookup(block_header_position); - SILK_TRACE << "HeaderSnapshot::header_by_hash block_header_offset: " << block_header_offset; - // Finally, read the next header at specified offset - auto header = next_header(block_header_offset, block_hash); - // We *must* ensure that the retrieved header hash matches because there is no way to know if key exists in MPHF - if (header && header->hash() != block_hash) { - header.reset(); - } - return header; -} - -std::optional HeaderSnapshot::header_by_number(BlockNum block_height) const { - if (!idx_header_hash_ || block_height < path_.block_from() || block_height >= path_.block_to()) { - return {}; - } - - // First, calculate the header ordinal position relative to the first block height within snapshot - const auto block_header_position = block_height - idx_header_hash_->base_data_id(); - // Then, get the header offset in snapshot by using ordinal lookup - const auto block_header_offset = idx_header_hash_->ordinal_lookup(block_header_position); - // Finally, read the next header at specified offset - return next_header(block_header_offset); -} - -bool HeaderSnapshot::decode_header(const Snapshot::WordItem& item, BlockHeader& header) const { - // First byte in data is first byte of header hash. - ensure(!item.value.empty(), [&]() { return "HeaderSnapshot: hash first byte missing at offset=" + std::to_string(item.offset); }); - - // Skip hash first byte to obtain encoded header RLP data - ByteView encoded_header{item.value.data() + 1, item.value.length() - 1}; - const auto decode_result = rlp::decode(encoded_header, header); - if (!decode_result) { - SILK_TRACE << "decode_header offset: " << item.offset << " error: " << magic_enum::enum_name(decode_result.error()); - return false; - } - - ensure(header.number >= path_.block_from(), - [&]() { return "HeaderSnapshot: number=" + std::to_string(header.number) + " < block_from=" + std::to_string(path_.block_from()); }); - return true; -} - -void HeaderSnapshot::reopen_index() { - ensure(decoder_.is_open(), "HeaderSnapshot: segment not open, call reopen_segment"); - - close_index(); - - const auto header_index_path = path().index_file(); - if (header_index_path.exists()) { - idx_header_hash_ = std::make_unique(header_index_path.path(), idx_header_hash_region_); - if (idx_header_hash_->last_write_time() < decoder_.last_write_time()) { - // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - const bool removed = std::filesystem::remove(header_index_path.path()); - ensure(removed, "HeaderSnapshot: cannot remove index file"); - close_index(); - } - } -} - -void HeaderSnapshot::close_index() { - idx_header_hash_.reset(); -} - -BodySnapshot::BodySnapshot(SnapshotPath path, std::optional segment_region) - : Snapshot(std::move(path), segment_region) {} - -BodySnapshot::BodySnapshot(SnapshotPath path, MappedBodiesSnapshot mapped) - : Snapshot(std::move(path), mapped.segment), idx_body_number_region_{mapped.block_num_index} {} - -BodySnapshot::~BodySnapshot() { - close(); -} - -bool BodySnapshot::for_each_body(const Walker& walker) { - return for_each_item([&](const WordItem& item) -> bool { - BlockBodyForStorage body; - success_or_throw(decode_body(item, body)); - const BlockNum number = path_.block_from() + item.position; - return walker(number, &body); - }); -} - -std::pair BodySnapshot::compute_txs_amount() { - uint64_t first_tx_id{0}, last_tx_id{0}, last_txs_amount{0}; - - const bool read_ok = for_each_body([&](BlockNum number, const StoredBlockBody* body) { - if (number == path_.block_from()) { - first_tx_id = body->base_txn_id; - } - if (number == path_.block_to() - 1) { - last_tx_id = body->base_txn_id; - last_txs_amount = body->txn_count; - } - return true; - }); - if (!read_ok) throw std::runtime_error{"error computing txs amount in: " + path_.path().string()}; - if (first_tx_id == 0 && last_tx_id == 0) throw std::runtime_error{"empty body snapshot: " + path_.path().string()}; - - SILK_TRACE << "first_tx_id: " << first_tx_id << " last_tx_id: " << last_tx_id << " last_txs_amount: " << last_txs_amount; - - return {first_tx_id, last_tx_id + last_txs_amount - first_tx_id}; -} - -std::optional BodySnapshot::next_body(uint64_t offset) const { - const auto item = next_item(offset); - std::optional stored_body; - if (!item) { - return stored_body; - } - stored_body = StoredBlockBody{}; - const auto decode_ok = decode_body(*item, *stored_body); - if (!decode_ok) { - return {}; - } - ensure(stored_body->base_txn_id >= idx_body_number_->base_data_id(), - [&]() { return path().index_file().filename() + " has wrong base data ID for base txn ID: " + std::to_string(stored_body->base_txn_id); }); - return stored_body; -} - -std::optional BodySnapshot::body_by_number(BlockNum block_height) const { - if (!idx_body_number_ || block_height < idx_body_number_->base_data_id()) { - return {}; - } - - // First, calculate the body ordinal position relative to the first block height within snapshot - const auto block_body_position = block_height - idx_body_number_->base_data_id(); - // Then, get the body offset in snapshot by using ordinal lookup - const auto block_body_offset = idx_body_number_->ordinal_lookup(block_body_position); - // Finally, read the next body at specified offset - return next_body(block_body_offset); -} - -DecodingResult BodySnapshot::decode_body(const Snapshot::WordItem& item, StoredBlockBody& body) { - ByteView body_rlp{item.value.data(), item.value.length()}; - SILK_TRACE << "decode_body offset: " << item.offset << " body_rlp: " << to_hex(body_rlp); - const auto result = decode_stored_block_body(body_rlp, body); - SILK_TRACE << "decode_body offset: " << item.offset << " txn_count: " << body.txn_count << " base_txn_id:" << body.base_txn_id; - return result; -} - -void BodySnapshot::reopen_index() { - ensure(decoder_.is_open(), "BodySnapshot: segment not open, call reopen_segment"); - - close_index(); - - const auto body_index_path = path().index_file(); - if (body_index_path.exists()) { - idx_body_number_ = std::make_unique(body_index_path.path(), idx_body_number_region_); - if (idx_body_number_->last_write_time() < decoder_.last_write_time()) { - // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - const bool removed = std::filesystem::remove(body_index_path.path()); - ensure(removed, "BodySnapshot: cannot remove index file"); - close_index(); - } - } -} - -void BodySnapshot::close_index() { - idx_body_number_.reset(); -} - -TransactionSnapshot::TransactionSnapshot(SnapshotPath path) : Snapshot(std::move(path)) {} - -TransactionSnapshot::TransactionSnapshot(SnapshotPath path, MappedTransactionsSnapshot mapped) - : Snapshot(std::move(path), mapped.segment), - idx_txn_hash_region_{mapped.tx_hash_index}, - idx_txn_hash_2_block_region_{mapped.tx_hash_2_block_index} {} - -TransactionSnapshot::~TransactionSnapshot() { - close(); -} - -// Skip first byte of tx hash plus sender address length for transaction decoding -constexpr int kTxRlpDataOffset{1 + kAddressLength}; - -[[nodiscard]] std::optional TransactionSnapshot::next_txn(uint64_t offset, std::optional hash) const { - // Get the next data item at specified offset, optionally checking if it starts with txn hash first byte - const auto item = hash ? next_item(offset, {hash->bytes, 1}) : next_item(offset); - std::optional transaction; - if (!item) { - return transaction; - } - // Decode transaction from the extracted data item - transaction = Transaction{}; - const auto decode_ok = decode_txn(*item, *transaction); - if (!decode_ok) { - return {}; - } - return transaction; -} - -std::optional TransactionSnapshot::txn_by_hash(const Hash& txn_hash) const { - if (!idx_txn_hash_) { - return {}; - } - - // First, get the transaction ordinal position in snapshot by using block hash as MPHF index - const auto [txn_position, found] = idx_txn_hash_->lookup(txn_hash); - if (!found) { - return {}; - } - // Then, get the transaction offset in snapshot by using ordinal lookup - const auto txn_offset = idx_txn_hash_->ordinal_lookup(txn_position); - // Finally, read the next transaction at specified offset - auto txn = next_txn(txn_offset, txn_hash); - // We *must* ensure that the retrieved txn hash matches because there is no way to know if key exists in MPHF - if (txn && txn->hash() != txn_hash) { - return {}; - } - return txn; -} - -std::optional TransactionSnapshot::txn_by_id(uint64_t txn_id) const { - if (!idx_txn_hash_) { - return {}; - } - - // First, calculate the transaction ordinal position relative to the first transaction ID within snapshot - const auto txn_position = txn_id - idx_txn_hash_->base_data_id(); - // Then, get the transaction offset in snapshot by using ordinal lookup - const auto txn_offset = idx_txn_hash_->ordinal_lookup(txn_position); - // Finally, read the next transaction at specified offset - return next_txn(txn_offset); -} - -std::optional TransactionSnapshot::block_num_by_txn_hash(const Hash& txn_hash) const { - if (!idx_txn_hash_2_block_) { - return {}; - } - - // Lookup the block number using dedicated MPHF index - const auto [block_number, found] = idx_txn_hash_2_block_->lookup(txn_hash); - if (!found) { - return {}; - } - - // Lookup the entire txn to check that the retrieved txn hash matches (no way to know if key exists in MPHF) - const auto transaction{txn_by_hash(txn_hash)}; - if (!transaction) { - return {}; - } - - return block_number; -} - -std::vector TransactionSnapshot::txn_range(uint64_t base_txn_id, uint64_t txn_count, bool read_senders) const { - std::vector transactions; - transactions.reserve(txn_count); - - for_each_txn(base_txn_id, txn_count, [&transactions, read_senders](uint64_t i, ByteView senders_data, ByteView tx_rlp) -> bool { - ByteView tx_payload = slice_tx_payload(tx_rlp); - - Transaction transaction; - const auto payload_result = rlp::decode_transaction(tx_payload, transaction, rlp::Eip2718Wrapping::kBoth); - ensure(payload_result.has_value(), - [&]() { return "TransactionSnapshot: cannot decode tx payload: " + to_hex(tx_payload) + " i: " + std::to_string(i) + - " error: " + to_string(payload_result); }); - - if (read_senders) { - transaction.set_sender(bytes_to_address(senders_data)); - } - - transactions.push_back(std::move(transaction)); - return true; - }); - - return transactions; -} - -std::vector TransactionSnapshot::txn_rlp_range(uint64_t base_txn_id, uint64_t txn_count) const { - std::vector rlp_txs; - rlp_txs.reserve(txn_count); - - for_each_txn(base_txn_id, txn_count, [&rlp_txs](uint64_t /*i*/, ByteView /*senders_data*/, ByteView tx_rlp) -> bool { - ByteView tx_payload = slice_tx_payload(tx_rlp); - rlp_txs.emplace_back(tx_payload); - return true; - }); - - return rlp_txs; -} - -std::pair TransactionSnapshot::slice_tx_data(const WordItem& item) { - const auto& buffer{item.value}; - const auto buffer_size{buffer.size()}; - SILK_TRACE << "slice_tx_data offset: " << item.offset << " buffer: " << to_hex(buffer); - - ensure(buffer_size >= kTxRlpDataOffset, [&]() { return "TransactionSnapshot: too short record: " + std::to_string(buffer_size); }); - - // Skip first byte in data as it is first byte of transaction hash - ByteView senders_data{buffer.data() + 1, kAddressLength}; - ByteView tx_rlp{buffer.data() + kTxRlpDataOffset, buffer_size - kTxRlpDataOffset}; - - SILK_TRACE << "slice_tx_data offset: " << item.offset << " tx_hash_first_byte: " << to_hex(buffer[0]) - << " senders_data: " << to_hex(senders_data) << " tx_rlp: " << to_hex(tx_rlp); - - return {senders_data, tx_rlp}; -} - -ByteView TransactionSnapshot::slice_tx_payload(ByteView tx_rlp) { - ByteView tx_envelope{tx_rlp}; - - rlp::Header tx_header; - TransactionType tx_type{}; - const auto envelope_result = rlp::decode_transaction_header_and_type(tx_envelope, tx_header, tx_type); - ensure(envelope_result.has_value(), - [&]() { return "TransactionSnapshot: cannot decode tx envelope: " + to_hex(tx_envelope) + " error: " + to_string(envelope_result); }); - - const std::size_t tx_payload_offset = tx_type == TransactionType::kLegacy ? 0 : (tx_rlp.length() - tx_header.payload_length); - ByteView tx_payload{tx_rlp.substr(tx_payload_offset)}; - - return tx_payload; -} - -//! Decode transaction from snapshot word. Format is: tx_hash_1byte + sender_address_20byte + tx_rlp_bytes -DecodingResult TransactionSnapshot::decode_txn(const WordItem& item, Transaction& tx) { - auto [senders_data, tx_rlp] = slice_tx_data(item); - const auto result = rlp::decode(tx_rlp, tx); - tx.set_sender(bytes_to_address(senders_data)); // Must happen after rlp::decode because it resets sender - return result; -} - -void TransactionSnapshot::for_each_txn(uint64_t base_txn_id, uint64_t txn_count, const Walker& walker) const { - if (!idx_txn_hash_ || txn_count == 0) { - return; - } - - ensure(base_txn_id >= idx_txn_hash_->base_data_id(), - [&]() { return path().index_file().filename() + " has wrong base data ID for base txn ID: " + std::to_string(base_txn_id); }); - - // First, calculate the first transaction ordinal position relative to the base transaction within snapshot - const auto first_txn_position = base_txn_id - idx_txn_hash_->base_data_id(); - - // Then, get the first transaction offset in snapshot by using ordinal lookup - const auto first_txn_offset = idx_txn_hash_->ordinal_lookup(first_txn_position); - - // Finally, iterate over each encoded transaction item - for (uint64_t i{0}, offset{first_txn_offset}; i < txn_count; ++i) { - const auto item = next_item(offset); - ensure(item.has_value(), [&]() { return "TransactionSnapshot: record not found at offset=" + std::to_string(offset); }); - - auto [senders_data, tx_rlp] = slice_tx_data(*item); - - const bool go_on{walker(i, senders_data, tx_rlp)}; - if (!go_on) return; - - offset = item->offset; - } -} - -void TransactionSnapshot::reopen_index() { - ensure(decoder_.is_open(), "TransactionSnapshot: segment not open, call reopen_segment"); - - close_index(); - - const auto tx_hash_index_path = path().index_file_for_type(SnapshotType::transactions); - if (tx_hash_index_path.exists()) { - idx_txn_hash_ = std::make_unique(tx_hash_index_path.path(), idx_txn_hash_region_); - if (idx_txn_hash_->last_write_time() < decoder_.last_write_time()) { - // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - const bool removed = std::filesystem::remove(tx_hash_index_path.path()); - ensure(removed, "TransactionSnapshot: cannot remove tx_hash index file"); - close_index(); - } - } - - const auto tx_hash_2_block_index_path = path().index_file_for_type(SnapshotType::transactions_to_block); - if (tx_hash_2_block_index_path.exists()) { - idx_txn_hash_2_block_ = std::make_unique(tx_hash_2_block_index_path.path(), idx_txn_hash_2_block_region_); - if (idx_txn_hash_2_block_->last_write_time() < decoder_.last_write_time()) { - // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - const bool removed = std::filesystem::remove(tx_hash_2_block_index_path.path()); - ensure(removed, "TransactionSnapshot: cannot remove tx_hash_2_block index file"); - close_index(); - } - } -} - -void TransactionSnapshot::close_index() { - idx_txn_hash_.reset(); - idx_txn_hash_2_block_.reset(); -} - -} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp deleted file mode 100644 index 1e45ea49c6..0000000000 --- a/silkworm/db/snapshots/snapshot.hpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - Copyright 2022 The Silkworm Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace silkworm::snapshots { - -struct MappedHeadersSnapshot { - MemoryMappedRegion segment; - MemoryMappedRegion header_hash_index; -}; - -struct MappedBodiesSnapshot { - MemoryMappedRegion segment; - MemoryMappedRegion block_num_index; -}; - -struct MappedTransactionsSnapshot { - MemoryMappedRegion segment; - MemoryMappedRegion tx_hash_index; - MemoryMappedRegion tx_hash_2_block_index; -}; - -//! \brief Generic snapshot containing data points for a specific block interval [block_from, block_to). -//! \warning The snapshot segment can also be externally managed. This means that the memory-mapping can happen -//! outside of this class and a \code Snapshot instance can be created by specifying the \code MemoryMappedRegion -//! segment containing the information about the memory region already mapped. This must be taken into account -//! because we must avoid to memory-map it again. -class Snapshot { - public: - static inline const auto kPageSize{os::page_size()}; - - explicit Snapshot(SnapshotPath path, std::optional segment_region = std::nullopt); - virtual ~Snapshot() = default; - - [[nodiscard]] SnapshotPath path() const { return path_; } - [[nodiscard]] std::filesystem::path fs_path() const { return path_.path(); } - - [[nodiscard]] BlockNum block_from() const { return path_.block_from(); } - [[nodiscard]] BlockNum block_to() const { return path_.block_to(); } - - [[nodiscard]] bool empty() const { return item_count() == 0; } - [[nodiscard]] std::size_t item_count() const { return decoder_.words_count(); } - - [[nodiscard]] MemoryMappedRegion memory_file_region() const; - - void reopen_segment(); - virtual void reopen_index() = 0; - - struct WordItem { - uint64_t position{0}; - uint64_t offset{0}; - Bytes value; - - WordItem() { - value.reserve(kPageSize); - } - }; - using WordItemFunc = std::function; - bool for_each_item(const WordItemFunc& fn); - [[nodiscard]] std::optional next_item(uint64_t offset, ByteView prefix = {}) const; - - void close(); - - protected: - void close_segment(); - virtual void close_index() = 0; - - //! The path of the segment file for this snapshot - SnapshotPath path_; - - seg::Decompressor decoder_; -}; - -class HeaderSnapshot : public Snapshot { - public: - explicit HeaderSnapshot(SnapshotPath path); - HeaderSnapshot(SnapshotPath path, MappedHeadersSnapshot mapped); - ~HeaderSnapshot() override; - - [[nodiscard]] const rec_split::RecSplitIndex* idx_header_hash() const { return idx_header_hash_.get(); } - - using Walker = std::function; - bool for_each_header(const Walker& walker); - [[nodiscard]] std::optional next_header(uint64_t offset, std::optional hash = {}) const; - - [[nodiscard]] std::optional header_by_hash(const Hash& block_hash) const; - [[nodiscard]] std::optional header_by_number(BlockNum block_height) const; - - void reopen_index() override; - - protected: - bool decode_header(const Snapshot::WordItem& item, BlockHeader& header) const; - - void close_index() override; - - private: - //! Index header_hash -> headers_segment_offset - std::unique_ptr idx_header_hash_; - - //! The external memory-mapped region for Headers snapshot index - std::optional idx_header_hash_region_; -}; - -using StoredBlockBody = BlockBodyForStorage; - -class BodySnapshot : public Snapshot { - public: - explicit BodySnapshot(SnapshotPath path, std::optional segment_region = std::nullopt); - BodySnapshot(SnapshotPath path, MappedBodiesSnapshot mapped); - ~BodySnapshot() override; - - [[nodiscard]] const rec_split::RecSplitIndex* idx_body_number() const { return idx_body_number_.get(); } - - using Walker = std::function; - bool for_each_body(const Walker& walker); - [[nodiscard]] std::optional next_body(uint64_t offset) const; - - std::pair compute_txs_amount(); - - [[nodiscard]] std::optional body_by_number(BlockNum block_height) const; - - void reopen_index() override; - - protected: - static DecodingResult decode_body(const Snapshot::WordItem& item, StoredBlockBody& body); - - void close_index() override; - - private: - //! Index block_num_u64 -> bodies_segment_offset - std::unique_ptr idx_body_number_; - - //! The external memory-mapped region for Bodies snapshot index - std::optional idx_body_number_region_; -}; - -class TransactionSnapshot : public Snapshot { - public: - explicit TransactionSnapshot(SnapshotPath path); - TransactionSnapshot(SnapshotPath path, MappedTransactionsSnapshot mapped); - ~TransactionSnapshot() override; - - [[nodiscard]] const rec_split::RecSplitIndex* idx_txn_hash() const { return idx_txn_hash_.get(); } - [[nodiscard]] const rec_split::RecSplitIndex* idx_txn_hash_2_block() const { return idx_txn_hash_2_block_.get(); } - - [[nodiscard]] std::optional next_txn(uint64_t offset, std::optional hash = {}) const; - - [[nodiscard]] std::optional txn_by_hash(const Hash& txn_hash) const; - [[nodiscard]] std::optional txn_by_id(uint64_t txn_id) const; - [[nodiscard]] std::vector txn_range(uint64_t base_txn_id, uint64_t txn_count, bool read_senders) const; - [[nodiscard]] std::vector txn_rlp_range(uint64_t base_txn_id, uint64_t txn_count) const; - - [[nodiscard]] std::optional block_num_by_txn_hash(const Hash& txn_hash) const; - - void reopen_index() override; - - protected: - static std::pair slice_tx_data(const WordItem& item); - static ByteView slice_tx_payload(ByteView tx_rlp); - static DecodingResult decode_txn(const WordItem& item, Transaction& tx); - - using Walker = std::function; - void for_each_txn(uint64_t base_txn_id, uint64_t txn_count, const Walker& walker) const; - - void close_index() override; - - private: - //! Index transaction_hash -> transactions_segment_offset - std::unique_ptr idx_txn_hash_; - - //! Index transaction_hash -> block_number - std::unique_ptr idx_txn_hash_2_block_; - - //! The external memory-mapped region for Transactions hash->offset index - std::optional idx_txn_hash_region_; - - //! The external memory-mapped region for Transactions hash->block_number index - std::optional idx_txn_hash_2_block_region_; -}; - -} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_reader.cpp b/silkworm/db/snapshots/snapshot_reader.cpp new file mode 100644 index 0000000000..e3f7af1d40 --- /dev/null +++ b/silkworm/db/snapshots/snapshot_reader.cpp @@ -0,0 +1,104 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "snapshot_reader.hpp" + +#include + +#include +#include + +namespace silkworm::snapshots { + +Snapshot::Snapshot( + SnapshotPath path, + std::optional segment_region) + : path_(std::move(path)), + decoder_{path_.path(), segment_region} {} + +Snapshot::~Snapshot() { + close(); +} + +MemoryMappedRegion Snapshot::memory_file_region() const { + const auto memory_file{decoder_.memory_file()}; + if (!memory_file) return MemoryMappedRegion{}; + return memory_file->region(); +} + +void Snapshot::reopen_segment() { + close(); + + // Open decompressor that opens the mapped file in turns + decoder_.open(); +} + +Snapshot::Iterator& Snapshot::Iterator::operator++() { + bool has_next = it_.has_next(); + ++it_; + + if (has_next) { + deserializer_->decode_word(*it_); + deserializer_->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + } else { + deserializer_.reset(); + } + return *this; +} + +bool operator==(const Snapshot::Iterator& lhs, const Snapshot::Iterator& rhs) { + return (lhs.deserializer_ == rhs.deserializer_) && + (!lhs.deserializer_ || (lhs.it_ == rhs.it_)); +} + +Snapshot::Iterator Snapshot::begin(std::shared_ptr deserializer) const { + auto it = decoder_.begin(); + if (it == decoder_.end()) { + return end(); + } + deserializer->decode_word(*it); + deserializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return Snapshot::Iterator{std::move(it), std::move(deserializer), path()}; +} + +Snapshot::Iterator Snapshot::end() const { + return Snapshot::Iterator{decoder_.end(), {}, path()}; +} + +seg::Decompressor::Iterator Snapshot::seek_decoder(uint64_t offset, std::optional hash_prefix) const { + return decoder_.seek(offset, hash_prefix ? ByteView{hash_prefix->bytes, 1} : ByteView{}); +} + +Snapshot::Iterator Snapshot::seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr deserializer) const { + auto it = seek_decoder(offset, hash_prefix); + if (it == decoder_.end()) { + return end(); + } + try { + deserializer->decode_word(*it); + } catch (...) { + return end(); + } + deserializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return Snapshot::Iterator{std::move(it), std::move(deserializer), path()}; +} + +void Snapshot::close() { + // Close decompressor that closes the mapped file in turns + decoder_.close(); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp new file mode 100644 index 0000000000..e7c5bd7ec7 --- /dev/null +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -0,0 +1,207 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +//! \brief Generic snapshot containing data points for a specific block interval [block_from, block_to). +//! \warning The snapshot segment can also be externally managed. This means that the memory-mapping can happen +//! outside of this class and a \code Snapshot instance can be created by specifying the \code MemoryMappedRegion +//! segment containing the information about the memory region already mapped. This must be taken into account +//! because we must avoid to memory-map it again. +class Snapshot { + public: + class Iterator { + public: + using value_type = std::shared_ptr; + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + Iterator( + seg::Decompressor::Iterator it, + std::shared_ptr deserializer, + SnapshotPath path) + : it_(std::move(it)), deserializer_(std::move(deserializer)), path_(std::move(path)) {} + + value_type operator*() const { return deserializer_; } + + Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } + Iterator& operator++(); + + friend bool operator!=(const Iterator& lhs, const Iterator& rhs) = default; + friend bool operator==(const Iterator& lhs, const Iterator& rhs); + + private: + seg::Decompressor::Iterator it_; + std::shared_ptr deserializer_; + SnapshotPath path_; + }; + + static_assert(std::input_iterator); + + static inline const auto kPageSize{os::page_size()}; + + explicit Snapshot( + SnapshotPath path, + std::optional segment_region = std::nullopt); + ~Snapshot(); + + Snapshot(Snapshot&&) = default; + Snapshot& operator=(Snapshot&&) = default; + + [[nodiscard]] SnapshotPath path() const { return path_; } + [[nodiscard]] std::filesystem::path fs_path() const { return path_.path(); } + + [[nodiscard]] BlockNum block_from() const { return path_.block_from(); } + [[nodiscard]] BlockNum block_to() const { return path_.block_to(); } + + [[nodiscard]] bool empty() const { return item_count() == 0; } + [[nodiscard]] std::size_t item_count() const { return decoder_.words_count(); } + + [[nodiscard]] MemoryMappedRegion memory_file_region() const; + + void reopen_segment(); + void close(); + + Iterator begin(std::shared_ptr deserializer) const; + Iterator end() const; + + Iterator seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr deserializer) const; + + private: + seg::Decompressor::Iterator seek_decoder(uint64_t offset, std::optional hash_prefix) const; + + //! The path of the segment file for this snapshot + SnapshotPath path_; + + seg::Decompressor decoder_; +}; + +template +class SnapshotReader { + public: + class Iterator { + public: + using value_type = decltype(TWordDeserializer::value); + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + explicit Iterator(Snapshot::Iterator it) + : it_(std::move(it)) {} + + reference operator*() const { return value(); } + pointer operator->() const { return &value(); } + + Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } + Iterator& operator++() { + ++it_; + return *this; + } + + friend bool operator!=(const Iterator& lhs, const Iterator& rhs) = default; + friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default; + + private: + value_type& value() const { + SnapshotWordDeserializer& base_deserializer = **it_; + // dynamic_cast is safe because TWordDeserializer was used when creating the Iterator + auto& s = dynamic_cast(base_deserializer); + return s.value; + } + + Snapshot::Iterator it_; + }; + + static_assert(std::input_iterator); + + using WordDeserializer = TWordDeserializer; + + SnapshotReader(const Snapshot& snapshot) : snapshot_(snapshot) {} + + Iterator begin() const { + return Iterator{snapshot_.begin(std::make_shared())}; + } + + Iterator end() const { + return Iterator{snapshot_.end()}; + } + + Iterator seek(uint64_t offset, std::optional hash_prefix = std::nullopt) const { + return Iterator{snapshot_.seek(offset, hash_prefix, std::make_shared())}; + } + + std::optional seek_one(uint64_t offset, std::optional hash_prefix = std::nullopt) const { + auto it = seek(offset, hash_prefix); + return (it != end()) ? std::optional{std::move(*it)} : std::nullopt; + } + + std::vector read_into_vector(uint64_t offset, size_t count) const { + auto it = seek(offset); + if (it == end()) { + throw std::runtime_error("SnapshotReader::read_into_vector: bad offset " + std::to_string(offset)); + } + return iterator_read_into_vector(std::move(it), count); + } + + [[nodiscard]] BlockNum block_from() const { return snapshot_.block_from(); } + [[nodiscard]] BlockNum block_to() const { return snapshot_.block_to(); } + + private: + const Snapshot& snapshot_; +}; + +template +concept SnapshotReaderConcept = std::same_as> || + std::derived_from>; + +template +void iterator_read_into(It it, size_t count, std::vector& out) { + std::copy_n(std::make_move_iterator(std::move(it)), count, std::back_inserter(out)); +} + +template +std::vector iterator_read_into_vector(It it, size_t count) { + std::vector out; + out.reserve(count); + iterator_read_into(std::move(it), count, out); + return out; +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index f5079f3ea1..ffdf23a60f 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -14,26 +14,29 @@ limitations under the License. */ -#include "snapshot.hpp" - #include #include #include #include +#include #include +#include #include #include #include +#include #include #include #include #include +#include "snapshot_reader.hpp" +#include "txn_snapshot_word_serializer.hpp" + namespace silkworm::snapshots { -using namespace std::chrono_literals; namespace test = test_util; using silkworm::test_util::SetLogVerbosityGuard; @@ -55,26 +58,8 @@ class Snapshot_ForTest : public Snapshot { explicit Snapshot_ForTest(std::filesystem::path path) : Snapshot(*SnapshotPath::parse(std::move(path))) {} Snapshot_ForTest(const std::filesystem::path& tmp_dir, BlockNum block_from, BlockNum block_to) : Snapshot(SnapshotPath_ForTest{tmp_dir, block_from, block_to}) {} - ~Snapshot_ForTest() override { close(); } - - void reopen_index() override {} - void close_index() override {} }; -class TransactionSnapshot_ForTest : public TransactionSnapshot { - public: - using TransactionSnapshot::decode_txn; - using TransactionSnapshot::slice_tx_data; - using TransactionSnapshot::slice_tx_payload; -}; - -template -static auto move_last_write_time(const std::filesystem::path& p, const std::chrono::duration& d) { - const auto ftime = std::filesystem::last_write_time(p); - std::filesystem::last_write_time(p, ftime + d); - return std::filesystem::last_write_time(p) - ftime; -} - TEST_CASE("Snapshot::Snapshot", "[silkworm][node][snapshot][snapshot]") { TemporaryDirectory tmp_dir; SECTION("valid") { @@ -112,17 +97,18 @@ TEST_CASE("Snapshot::for_each_item", "[silkworm][node][snapshot][snapshot]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; test::HelloWorldSnapshotFile hello_world_snapshot_file{tmp_dir.path(), kValidHeadersSegmentPath.filename()}; - seg::Decompressor decoder{hello_world_snapshot_file.path()}; Snapshot_ForTest tmp_snapshot{hello_world_snapshot_file.path()}; tmp_snapshot.reopen_segment(); CHECK(!tmp_snapshot.empty()); CHECK(tmp_snapshot.item_count() == 1); - tmp_snapshot.for_each_item([&](const auto& word_item) { - CHECK(std::string{word_item.value.cbegin(), word_item.value.cend()} == "hello, world"); - CHECK(word_item.position == 0); - CHECK(word_item.offset == 0); - return true; - }); + + seg::Decompressor decoder{hello_world_snapshot_file.path()}; + decoder.open(); + auto it = decoder.begin(); + auto& word = *it; + CHECK(std::string{word.cbegin(), word.cend()} == "hello, world"); + CHECK(it.current_word_offset() == 0); + CHECK(++it == decoder.end()); } TEST_CASE("Snapshot::close", "[silkworm][node][snapshot][snapshot]") { @@ -144,13 +130,16 @@ TEST_CASE("HeaderSnapshot::header_by_number OK", "[silkworm][node][snapshot][ind auto header_index = HeaderIndex::make(header_snapshot_path); REQUIRE_NOTHROW(header_index.build()); - HeaderSnapshot header_snapshot{header_snapshot_path}; + Snapshot header_snapshot{header_snapshot_path}; header_snapshot.reopen_segment(); - header_snapshot.reopen_index(); - CHECK(!header_snapshot.header_by_number(1'500'011)); - CHECK(header_snapshot.header_by_number(1'500'012)); - const auto header = header_snapshot.header_by_number(1'500'013); + Index idx_header_hash{header_snapshot_path.index_file()}; + idx_header_hash.reopen_index(); + HeaderFindByBlockNumQuery header_by_number{header_snapshot, idx_header_hash}; + + CHECK(!header_by_number.exec(1'500'011)); + CHECK(header_by_number.exec(1'500'012)); + const auto header = header_by_number.exec(1'500'013); CHECK(header.has_value()); if (header) { CHECK(header->hash() == 0xbef48d7de01f2d7ea1a7e4d1ed401f73d6d0257a364f6770b25ba51a123ac35f_bytes32); @@ -171,7 +160,7 @@ TEST_CASE("HeaderSnapshot::header_by_number OK", "[silkworm][node][snapshot][ind CHECK(header->prev_randao == 0x799895e28a837bbdf28b8ecf5fc0e6251398ecb0ffc7ff5bbb457c21b14ce982_bytes32); CHECK(header->nonce == std::array{0x86, 0x98, 0x76, 0x20, 0x12, 0xb4, 0x6f, 0xef}); } - CHECK(!header_snapshot.header_by_number(1'500'014)); + CHECK(!header_by_number.exec(1'500'014)); } // https://etherscan.io/block/1500013 @@ -183,13 +172,16 @@ TEST_CASE("BodySnapshot::body_by_number OK", "[silkworm][node][snapshot][index]" auto body_index = BodyIndex::make(body_snapshot_path); REQUIRE_NOTHROW(body_index.build()); - BodySnapshot body_snapshot{body_snapshot_path}; + Snapshot body_snapshot{body_snapshot_path}; body_snapshot.reopen_segment(); - body_snapshot.reopen_index(); - CHECK(!body_snapshot.body_by_number(1'500'011)); - CHECK(body_snapshot.body_by_number(1'500'012)); - const auto body_for_storage = body_snapshot.body_by_number(1'500'013); + Index idx_body_number{body_snapshot_path.index_file()}; + idx_body_number.reopen_index(); + BodyFindByBlockNumQuery body_by_number{body_snapshot, idx_body_number}; + + CHECK(!body_by_number.exec(1'500'011)); + CHECK(body_by_number.exec(1'500'012)); + const auto body_for_storage = body_by_number.exec(1'500'013); CHECK(body_for_storage.has_value()); if (body_for_storage) { CHECK(body_for_storage->base_txn_id == 7'341'271); @@ -209,10 +201,14 @@ TEST_CASE("TransactionSnapshot::txn_by_id OK", "[silkworm][node][snapshot][index auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); CHECK_NOTHROW(tx_index.build()); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; + Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); - const auto transaction = tx_snapshot.txn_by_id(7'341'272); + + Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); + TransactionFindByIdQuery txn_by_id{tx_snapshot, idx_txn_hash}; + + const auto transaction = txn_by_id.exec(7'341'272); CHECK(transaction.has_value()); if (transaction) { CHECK(transaction->type == TransactionType::kLegacy); @@ -234,27 +230,34 @@ TEST_CASE("TransactionSnapshot::block_num_by_txn_hash OK", "[silkworm][node][sna auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_snapshot_path, tx_snapshot_path); REQUIRE_NOTHROW(tx_index_hash_to_block.build()); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; + Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); + + Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); + TransactionFindByIdQuery txn_by_id{tx_snapshot, idx_txn_hash}; + + Index idx_txn_hash_2_block{tx_snapshot_path.index_file_for_type(SnapshotType::transactions_to_block)}; + idx_txn_hash_2_block.reopen_index(); + TransactionBlockNumByTxnHashQuery block_num_by_txn_hash{idx_txn_hash_2_block, TransactionFindByHashQuery{tx_snapshot, idx_txn_hash}}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 - auto transaction = tx_snapshot.txn_by_id(7'341'269); // known txn id in block 1'500'012 + auto transaction = txn_by_id.exec(7'341'269); // known txn id in block 1'500'012 CHECK(transaction.has_value()); - auto block_number = tx_snapshot.block_num_by_txn_hash(transaction->hash()); + auto block_number = block_num_by_txn_hash.exec(transaction->hash()); CHECK(block_number.has_value()); CHECK(block_number.value() == 1'500'012); // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 - transaction = tx_snapshot.txn_by_id(7'341'272); // known txn id in block 1'500'013 + transaction = txn_by_id.exec(7'341'272); // known txn id in block 1'500'013 CHECK(transaction.has_value()); - block_number = tx_snapshot.block_num_by_txn_hash(transaction->hash()); + block_number = block_num_by_txn_hash.exec(transaction->hash()); CHECK(block_number.has_value()); CHECK(block_number.value() == 1'500'013); // transaction hash not present in snapshot (first txn hash in block 1'500'014) - block_number = tx_snapshot.block_num_by_txn_hash(0xfa496b4cd9748754a28c66690c283ec9429440eb8609998901216908ad1b48eb_bytes32); + block_number = block_num_by_txn_hash.exec(0xfa496b4cd9748754a28c66690c283ec9429440eb8609998901216908ad1b48eb_bytes32); CHECK_FALSE(block_number.has_value()); } @@ -269,30 +272,33 @@ TEST_CASE("TransactionSnapshot::txn_range OK", "[silkworm][node][snapshot][index auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); REQUIRE_NOTHROW(tx_index.build()); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; + Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); + + Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); + TransactionRangeFromIdQuery txn_range{tx_snapshot, idx_txn_hash}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 SECTION("1'500'012 OK") { - CHECK(tx_snapshot.txn_range(7'341'263, 0, /*read_senders=*/true).empty()); - CHECK(tx_snapshot.txn_range(7'341'263, 7, /*read_senders=*/true).size() == 7); + CHECK(txn_range.exec_into_vector(7'341'263, 0).empty()); + CHECK(txn_range.exec_into_vector(7'341'263, 7).size() == 7); } SECTION("1'500'012 KO") { - CHECK_THROWS(tx_snapshot.txn_range(7'341'262, 7, /*read_senders=*/true)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'264, 7, /*read_senders=*/true)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'263, 8, /*read_senders=*/true)); // invalid txn_count + CHECK_THROWS(txn_range.exec_into_vector(7'341'262, 7)); // invalid base_txn_id + CHECK_THROWS(txn_range.exec_into_vector(7'341'264, 7)); // invalid base_txn_id + CHECK_THROWS(txn_range.exec_into_vector(7'341'263, 8)); // invalid txn_count } // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 SECTION("1'500'013 OK") { - CHECK(tx_snapshot.txn_range(7'341'272, 0, /*read_senders=*/true).empty()); - CHECK(tx_snapshot.txn_range(7'341'272, 1, /*read_senders=*/true).size() == 1); + CHECK(txn_range.exec_into_vector(7'341'272, 0).empty()); + CHECK(txn_range.exec_into_vector(7'341'272, 1).size() == 1); } SECTION("1'500'013 KO") { - CHECK_THROWS(tx_snapshot.txn_range(7'341'271, 1, /*read_senders=*/true)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'273, 1, /*read_senders=*/true)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'272, 2, /*read_senders=*/true)); // invalid txn_count + CHECK_THROWS(txn_range.exec_into_vector(7'341'271, 1)); // invalid base_txn_id + CHECK_THROWS(txn_range.exec_into_vector(7'341'273, 1)); // invalid base_txn_id + CHECK_THROWS(txn_range.exec_into_vector(7'341'272, 2)); // invalid txn_count } } @@ -306,34 +312,37 @@ TEST_CASE("TransactionSnapshot::txn_rlp_range OK", "[silkworm][node][snapshot][i auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); REQUIRE_NOTHROW(tx_index.build()); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; + Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); + + Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); + TransactionPayloadRlpRangeFromIdQuery txn_rlp_range{tx_snapshot, idx_txn_hash}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 SECTION("1'500'012 OK") { - CHECK(tx_snapshot.txn_rlp_range(7'341'263, 0).empty()); - CHECK(tx_snapshot.txn_rlp_range(7'341'263, 7).size() == 7); + CHECK(txn_rlp_range.exec_into_vector(7'341'263, 0).empty()); + CHECK(txn_rlp_range.exec_into_vector(7'341'263, 7).size() == 7); } SECTION("1'500'012 KO") { - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'262, 7)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'264, 7)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'263, 8)); // invalid txn_count + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'262, 7)); // invalid base_txn_id + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'264, 7)); // invalid base_txn_id + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'263, 8)); // invalid txn_count } // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 SECTION("1'500'013 OK") { - CHECK(tx_snapshot.txn_rlp_range(7'341'272, 0).empty()); - CHECK(tx_snapshot.txn_rlp_range(7'341'272, 1).size() == 1); + CHECK(txn_rlp_range.exec_into_vector(7'341'272, 0).empty()); + CHECK(txn_rlp_range.exec_into_vector(7'341'272, 1).size() == 1); } SECTION("1'500'013 KO") { - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'271, 1)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'273, 1)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'272, 2)); // invalid txn_count + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'271, 1)); // invalid base_txn_id + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'273, 1)); // invalid base_txn_id + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'272, 2)); // invalid txn_count } } -TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") { +TEST_CASE("slice_tx_payload", "[silkworm][node][snapshot]") { SetLogVerbosityGuard guard{log::Level::kNone}; const std::vector access_list{ {0xde0b295669a9fd93d5f28d9ec85e40f4cb697bae_address, @@ -363,7 +372,7 @@ TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") Bytes encoded{}; rlp::encode(encoded, txn); Bytes decoded{}; - CHECK_NOTHROW(decoded = TransactionSnapshot_ForTest::slice_tx_payload(encoded)); + CHECK_NOTHROW(decoded = slice_tx_payload(encoded)); CHECK(decoded == encoded); // no envelope for legacy tx } SECTION("TransactionType: kAccessList") { @@ -384,7 +393,7 @@ TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") Bytes encoded{}; rlp::encode(encoded, txn); Bytes decoded{}; - CHECK_NOTHROW(decoded = TransactionSnapshot_ForTest::slice_tx_payload(encoded)); + CHECK_NOTHROW(decoded = slice_tx_payload(encoded)); CHECK(decoded == encoded.substr(2)); // 2-byte envelope for this access-list tx } SECTION("TransactionType: kDynamicFee") { @@ -405,7 +414,7 @@ TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") Bytes encoded{}; rlp::encode(encoded, txn); Bytes decoded{}; - CHECK_NOTHROW(decoded = TransactionSnapshot_ForTest::slice_tx_payload(encoded)); + CHECK_NOTHROW(decoded = slice_tx_payload(encoded)); CHECK(decoded == encoded.substr(2)); // 2-byte envelope for this dynamic-fee tx } SECTION("TransactionType: kBlob") { @@ -430,80 +439,9 @@ TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") Bytes encoded{}; rlp::encode(encoded, txn); Bytes decoded{}; - CHECK_NOTHROW(decoded = TransactionSnapshot_ForTest::slice_tx_payload(encoded)); + CHECK_NOTHROW(decoded = slice_tx_payload(encoded)); CHECK(decoded == encoded.substr(3)); // 3-byte envelope for this blob tx } } -TEST_CASE("HeaderSnapshot::reopen_index regeneration", "[silkworm][node][snapshot][index]") { - SetLogVerbosityGuard guard{log::Level::kNone}; - TemporaryDirectory tmp_dir; - test::SampleHeaderSnapshotFile sample_header_snapshot{tmp_dir.path()}; - test::SampleHeaderSnapshotPath header_snapshot_path{sample_header_snapshot.path()}; - auto header_index = HeaderIndex::make(header_snapshot_path); - REQUIRE_NOTHROW(header_index.build()); - - HeaderSnapshot header_snapshot{header_snapshot_path}; - header_snapshot.reopen_segment(); - header_snapshot.reopen_index(); - REQUIRE(std::filesystem::exists(header_snapshot.path().index_file().path())); - - // Move 1 hour to the future the last write time for sample header snapshot - const auto last_write_time_diff = move_last_write_time(sample_header_snapshot.path(), 1h); - REQUIRE(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); - - // Verify that reopening the index removes the index file because it was created in the past - CHECK(std::filesystem::exists(header_snapshot.path().index_file().path())); - header_snapshot.reopen_index(); - CHECK_FALSE(std::filesystem::exists(header_snapshot.path().index_file().path())); -} - -TEST_CASE("BodySnapshot::reopen_index regeneration", "[silkworm][node][snapshot][index]") { - SetLogVerbosityGuard guard{log::Level::kNone}; - TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile sample_body_snapshot{tmp_dir.path()}; - test::SampleBodySnapshotPath body_snapshot_path{sample_body_snapshot.path()}; - auto body_index = BodyIndex::make(body_snapshot_path); - REQUIRE_NOTHROW(body_index.build()); - - BodySnapshot body_snapshot{body_snapshot_path}; - body_snapshot.reopen_segment(); - body_snapshot.reopen_index(); - CHECK(std::filesystem::exists(body_snapshot.path().index_file().path())); - - // Move 1 hour to the future the last write time for sample body snapshot - const auto last_write_time_diff = move_last_write_time(sample_body_snapshot.path(), 1h); - REQUIRE(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); - - // Verify that reopening the index removes the index file if created in the past - CHECK(std::filesystem::exists(body_snapshot.path().index_file().path())); - body_snapshot.reopen_index(); - CHECK_FALSE(std::filesystem::exists(body_snapshot.path().index_file().path())); -} - -TEST_CASE("TransactionSnapshot::reopen_index regeneration", "[silkworm][node][snapshot][index]") { - SetLogVerbosityGuard guard{log::Level::kNone}; - TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - test::SampleBodySnapshotPath body_snapshot_path{body_snapshot.path()}; - test::SampleTransactionSnapshotFile sample_tx_snapshot{tmp_dir.path()}; - test::SampleTransactionSnapshotPath tx_snapshot_path{sample_tx_snapshot.path()}; - auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); - REQUIRE_NOTHROW(tx_index.build()); - - TransactionSnapshot tx_snapshot{tx_snapshot_path}; - tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); - CHECK(std::filesystem::exists(tx_snapshot.path().index_file().path())); - - // Move 1 hour to the future the last write time for sample tx snapshot - const auto last_write_time_diff = move_last_write_time(sample_tx_snapshot.path(), 1h); - REQUIRE(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); - - // Verify that reopening the index removes the index file if created in the past - CHECK(std::filesystem::exists(tx_snapshot.path().index_file().path())); - tx_snapshot.reopen_index(); - CHECK_FALSE(std::filesystem::exists(tx_snapshot.path().index_file().path())); -} - } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_word_serializer.hpp b/silkworm/db/snapshots/snapshot_word_serializer.hpp new file mode 100644 index 0000000000..9936155da2 --- /dev/null +++ b/silkworm/db/snapshots/snapshot_word_serializer.hpp @@ -0,0 +1,34 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include + +namespace silkworm::snapshots { + +struct SnapshotWordDeserializer { + virtual ~SnapshotWordDeserializer() = default; + virtual void decode_word(ByteView word) = 0; + virtual void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) {} +}; + +template +concept SnapshotWordDeserializerConcept = std::derived_from && + requires(TWordDeserializer deserializer) { deserializer.value; }; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_index.cpp b/silkworm/db/snapshots/txn_index.cpp index 187af04dd7..0314d38b80 100644 --- a/silkworm/db/snapshots/txn_index.cpp +++ b/silkworm/db/snapshots/txn_index.cpp @@ -16,8 +16,9 @@ #include "txn_index.hpp" -#include "snapshot.hpp" -#include "txn_hash.hpp" +#include "body_txs_amount_query.hpp" +#include "snapshot_reader.hpp" +#include "txn_snapshot_word_serializer.hpp" namespace silkworm::snapshots { @@ -37,9 +38,10 @@ SnapshotPath TransactionIndex::bodies_segment_path(const SnapshotPath& segment_p std::pair TransactionIndex::compute_txs_amount( SnapshotPath bodies_segment_path, std::optional bodies_segment_region) { - BodySnapshot bodies_snapshot{std::move(bodies_segment_path), bodies_segment_region}; + Snapshot bodies_snapshot{std::move(bodies_segment_path), bodies_segment_region}; bodies_snapshot.reopen_segment(); - return bodies_snapshot.compute_txs_amount(); + auto result = BodyTxsAmountQuery{bodies_snapshot}.exec(); + return {result.first_tx_id, result.count}; } } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_queries.hpp b/silkworm/db/snapshots/txn_queries.hpp new file mode 100644 index 0000000000..eea51cb3eb --- /dev/null +++ b/silkworm/db/snapshots/txn_queries.hpp @@ -0,0 +1,51 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include + +#include "basic_queries.hpp" +#include "txn_snapshot.hpp" + +namespace silkworm::snapshots { + +using TransactionFindByIdQuery = FindByIdQuery; +using TransactionFindByHashQuery = FindByHashQuery; +using TransactionRangeFromIdQuery = RangeFromIdQuery; +using TransactionPayloadRlpRangeFromIdQuery = RangeFromIdQuery>; + +class TransactionBlockNumByTxnHashQuery { + public: + TransactionBlockNumByTxnHashQuery( + const Index& index, + TransactionFindByHashQuery cross_check_query) + : index_(index), + cross_check_query_(cross_check_query) {} + + std::optional exec(const Hash& hash) { + // Lookup the entire txn to check that the retrieved txn hash matches (no way to know if key exists in MPHF) + const auto transaction = cross_check_query_.exec(hash); + auto result = transaction ? index_.lookup_ordinal_by_hash(hash) : std::nullopt; + return result; + } + + private: + const Index& index_; + TransactionFindByHashQuery cross_check_query_; +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_snapshot.hpp b/silkworm/db/snapshots/txn_snapshot.hpp new file mode 100644 index 0000000000..833e603dbb --- /dev/null +++ b/silkworm/db/snapshots/txn_snapshot.hpp @@ -0,0 +1,29 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include "snapshot_reader.hpp" +#include "txn_snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +using TransactionSnapshotReader = SnapshotReader; + +template +using TransactionSnapshotPayloadRlpReader = SnapshotReader>; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_hash.cpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.cpp similarity index 50% rename from silkworm/db/snapshots/txn_hash.cpp rename to silkworm/db/snapshots/txn_snapshot_word_serializer.cpp index 18ce7fcffe..1bd1b4daee 100644 --- a/silkworm/db/snapshots/txn_hash.cpp +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ -#include "txn_hash.hpp" +#include "txn_snapshot_word_serializer.hpp" #include #include @@ -22,66 +22,93 @@ #include +#include #include #include -#include +#include +#include #include namespace silkworm::snapshots { -Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id) { - Hash tx_hash; +TransactionSnapshotWord slice_tx_data(ByteView buffer) { + // Skip first byte of tx hash plus sender address length for transaction decoding + constexpr int kTxRlpDataOffset{1 + kAddressLength}; - const bool is_system_tx{tx_buffer.empty()}; - if (is_system_tx) { - // system-txs: hash:pad32(txnID) - endian::store_big_u64(tx_hash.bytes, tx_id); - return tx_hash; - } - - // Skip tx hash first byte plus address length for transaction decoding - constexpr int kTxFirstByteAndAddressLength{1 + kAddressLength}; - if (tx_buffer.size() <= kTxFirstByteAndAddressLength) { + if (buffer.size() < kTxRlpDataOffset) { std::stringstream error; - error << " tx_buffer_hash cannot decode tx envelope: record " << to_hex(tx_buffer) - << " too short: " << tx_buffer.size() - << " tx_id: " << tx_id; + error << "slice_tx_data too short record: " << std::to_string(buffer.size()); throw std::runtime_error{error.str()}; } - const ByteView tx_envelope{tx_buffer.substr(kTxFirstByteAndAddressLength)}; - ByteView tx_envelope_view{tx_envelope}; + uint8_t first_hash_byte = buffer[0]; + ByteView senders_data = buffer.substr(1, kAddressLength); + ByteView tx_rlp = buffer.substr(kTxRlpDataOffset); + + return TransactionSnapshotWord{ + first_hash_byte, + senders_data, + tx_rlp, + }; +} + +ByteView slice_tx_payload(ByteView tx_rlp) { + ByteView tx_envelope = tx_rlp; rlp::Header tx_header; TransactionType tx_type{}; - auto decode_result = rlp::decode_transaction_header_and_type(tx_envelope_view, tx_header, tx_type); + const auto decode_result = rlp::decode_transaction_header_and_type(tx_envelope, tx_header, tx_type); if (!decode_result) { std::stringstream error; - error << " tx_buffer_hash cannot decode tx envelope: " << to_hex(tx_envelope) - << " tx_id: " << tx_id + error << "slice_tx_payload cannot decode tx envelope: " << to_hex(tx_rlp) << " error: " << magic_enum::enum_name(decode_result.error()); throw std::runtime_error{error.str()}; } - const std::size_t tx_payload_offset = tx_type == TransactionType::kLegacy ? 0 : (tx_envelope.length() - tx_header.payload_length); - if (tx_buffer.size() <= kTxFirstByteAndAddressLength + tx_payload_offset) { + if (tx_type == TransactionType::kLegacy) + return tx_rlp; + + if (tx_rlp.size() < tx_header.payload_length) { std::stringstream error; - error << " tx_buffer_hash cannot decode tx payload: record " << to_hex(tx_buffer) - << " too short: " << tx_buffer.size() - << " tx_id: " << tx_id; + error << " slice_tx_payload cannot decode tx payload: " << to_hex(tx_rlp) + << " too short: " << tx_rlp.size() + << " payload_length: " << tx_header.payload_length; throw std::runtime_error{error.str()}; } - const ByteView tx_payload{tx_buffer.substr(kTxFirstByteAndAddressLength + tx_payload_offset)}; + + const std::size_t tx_payload_offset = tx_rlp.size() - tx_header.payload_length; + return tx_rlp.substr(tx_payload_offset); +} + +void decode_word_into_tx(ByteView word, Transaction& tx) { + auto [_, senders_data, tx_rlp] = slice_tx_data(word); + const auto result = rlp::decode(tx_rlp, tx); + success_or_throw(result, "decode_word_into_tx: rlp::decode error"); + // Must happen after rlp::decode because it resets sender + tx.set_sender(bytes_to_address(senders_data)); +} + +Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id) { + Hash tx_hash; + + const bool is_system_tx{tx_buffer.empty()}; + if (is_system_tx) { + // system-txs: hash:pad32(txnID) + endian::store_big_u64(tx_hash.bytes, tx_id); + return tx_hash; + } + + auto [_1, _2, tx_envelope] = slice_tx_data(tx_buffer); + const ByteView tx_payload = slice_tx_payload(tx_envelope); + const auto h256{keccak256(tx_payload)}; std::copy(std::begin(h256.bytes), std::begin(h256.bytes) + kHashLength, std::begin(tx_hash.bytes)); if (tx_id % 100'000 == 0) { SILK_DEBUG << "tx_buffer_hash:" - << " header.list: " << tx_header.list - << " header.payload_length: " << tx_header.payload_length + << " header.payload_length: " << tx_payload.size() << " tx_id: " << tx_id; } SILK_TRACE << "tx_buffer_hash:" - << " type: " << int(tx_type) << " tx_id: " << tx_id << " payload: " << to_hex(tx_payload) << " h256: " << to_hex(h256.bytes, kHashLength); diff --git a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp new file mode 100644 index 0000000000..ee3efdb8e9 --- /dev/null +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp @@ -0,0 +1,74 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include + +#include +#include +#include + +#include "snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +struct TransactionSnapshotWord { + uint8_t first_hash_byte{}; + ByteView senders_data; + ByteView tx_rlp; +}; + +TransactionSnapshotWord slice_tx_data(ByteView buffer); + +ByteView slice_tx_payload(ByteView tx_rlp); + +Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id); + +//! Decode transaction from snapshot word. Format is: tx_hash_1byte + sender_address_20byte + tx_rlp_bytes +void decode_word_into_tx(ByteView word, Transaction& tx); + +struct TransactionSnapshotWordDeserializer : public SnapshotWordDeserializer { + Transaction value; + + ~TransactionSnapshotWordDeserializer() override = default; + + void decode_word(ByteView word) override { + decode_word_into_tx(word, value); + } +}; + +static_assert(SnapshotWordDeserializerConcept); + +template +concept BytesOrByteView = std::same_as || std::same_as; + +template +struct TransactionSnapshotWordPayloadRlpDeserializer : public SnapshotWordDeserializer { + TBytes value; + + ~TransactionSnapshotWordPayloadRlpDeserializer() override = default; + + void decode_word(ByteView word) override { + auto data = slice_tx_data(word); + value = slice_tx_payload(data.tx_rlp); + } +}; + +static_assert(SnapshotWordDeserializerConcept>); + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txs_and_bodies_query.hpp b/silkworm/db/snapshots/txs_and_bodies_query.hpp index 615a118116..9c1d8596da 100644 --- a/silkworm/db/snapshots/txs_and_bodies_query.hpp +++ b/silkworm/db/snapshots/txs_and_bodies_query.hpp @@ -55,7 +55,7 @@ class TxsAndBodiesQuery { }; using iterator_category = std::input_iterator_tag; - using difference_type = void; + using difference_type = std::ptrdiff_t; using pointer = value_type*; using reference = value_type&; @@ -83,6 +83,8 @@ class TxsAndBodiesQuery { std::string log_title_; }; + static_assert(std::input_or_output_iterator); + TxsAndBodiesQuery( SnapshotPath txs_segment_path, std::optional txs_segment_region, diff --git a/silkworm/sentry/common/random.hpp b/silkworm/sentry/common/random.hpp index 9c99cf9bb1..8f471af749 100644 --- a/silkworm/sentry/common/random.hpp +++ b/silkworm/sentry/common/random.hpp @@ -38,7 +38,7 @@ std::list random_list_items(std::list& l, size_t max_count) { public: [[maybe_unused]] typedef std::output_iterator_tag iterator_category; [[maybe_unused]] typedef void value_type; - [[maybe_unused]] typedef void difference_type; + [[maybe_unused]] typedef std::ptrdiff_t difference_type; [[maybe_unused]] typedef void pointer; [[maybe_unused]] typedef void reference; @@ -61,6 +61,8 @@ std::list random_list_items(std::list& l, size_t max_count) { std::list* container_; }; + static_assert(std::output_iterator); + std::list out; std::default_random_engine random_engine{std::random_device{}()}; std::sample(l.begin(), l.end(), BackInsertPtrIterator(out), max_count, random_engine);