From 51b9e411304af13428e5bac8a049114e86c9788a Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 11:03:07 +0200 Subject: [PATCH 01/37] extract header serialization --- silkworm/db/snapshots/header_snapshot.cpp | 42 ++++++++++++++++ silkworm/db/snapshots/header_snapshot.hpp | 43 ++++++++++++++++ silkworm/db/snapshots/snapshot.cpp | 49 +++++++------------ silkworm/db/snapshots/snapshot.hpp | 2 - .../db/snapshots/snapshot_word_serializer.hpp | 30 ++++++++++++ 5 files changed, 133 insertions(+), 33 deletions(-) create mode 100644 silkworm/db/snapshots/header_snapshot.cpp create mode 100644 silkworm/db/snapshots/header_snapshot.hpp create mode 100644 silkworm/db/snapshots/snapshot_word_serializer.hpp diff --git a/silkworm/db/snapshots/header_snapshot.cpp b/silkworm/db/snapshots/header_snapshot.cpp new file mode 100644 index 0000000000..0afa3f17c0 --- /dev/null +++ b/silkworm/db/snapshots/header_snapshot.cpp @@ -0,0 +1,42 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "header_snapshot.hpp" + +#include +#include + +namespace silkworm::snapshots { + +void decode_word_into_header(ByteView word, BlockHeader& header) { + // First byte in data is first byte of header hash. + ensure(!word.empty(), [&]() { return "decode_word_into_header: first hash byte missing"; }); + + // Skip hash first byte to obtain encoded header RLP data + ByteView encoded_header{word.data() + 1, word.length() - 1}; + + const auto decode_result = rlp::decode(encoded_header, header); + success_or_throw(decode_result, "decode_word_into_header: rlp::decode error"); +} + +void check_sanity_of_header_with_metadata(const BlockHeader& header, BlockNum block_from, BlockNum block_to) { + ensure((header.number >= block_from) && (header.number < block_to), [&]() { + return "check_sanity_of_header_with_metadata: header.number=" + std::to_string(header.number) + + " outside of range [" + std::to_string(block_from) + ", " + std::to_string(block_to) + ")"; + }); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_snapshot.hpp b/silkworm/db/snapshots/header_snapshot.hpp new file mode 100644 index 0000000000..b322ce0315 --- /dev/null +++ b/silkworm/db/snapshots/header_snapshot.hpp @@ -0,0 +1,43 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include + +#include "snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +void decode_word_into_header(ByteView word, BlockHeader& header); +void check_sanity_of_header_with_metadata(const BlockHeader& header, BlockNum block_from, BlockNum block_to); + +struct HeaderSnapshotWordSerializer : public SnapshotWordSerializer { + BlockHeader header; + + ~HeaderSnapshotWordSerializer() override = default; + + void decode_word(ByteView word) override { + decode_word_into_header(word, header); + } + + void check_sanity_with_metadata(BlockNum block_from, BlockNum block_to) override { + check_sanity_of_header_with_metadata(header, block_from, block_to); + } +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 47bc9dbb44..0e2971468e 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -26,6 +26,8 @@ #include #include +#include "header_snapshot.hpp" + namespace silkworm::snapshots { //! Convert the specified decoding result into its string representation @@ -112,29 +114,31 @@ HeaderSnapshot::~HeaderSnapshot() { } bool HeaderSnapshot::for_each_header(const Walker& walker) { - return for_each_item([this, walker](const WordItem& item) -> bool { - BlockHeader header; - const auto decode_ok = decode_header(item, header); - if (!decode_ok) { - return false; - } - return walker(&header); + HeaderSnapshotWordSerializer serializer; + + return for_each_item([this, walker, &serializer](const WordItem& item) -> bool { + serializer.decode_word(item.value); + serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return walker(&serializer.header); }); } std::optional HeaderSnapshot::next_header(uint64_t offset, std::optional hash) const { + HeaderSnapshotWordSerializer serializer; + // Get the next data item at specified offset, optionally checking if it starts with block hash first byte const auto item = hash ? next_item(offset, {hash->bytes, 1}) : next_item(offset); - std::optional header; if (!item) { - return header; + return std::nullopt; } - header = BlockHeader{}; - const auto decode_ok = decode_header(*item, *header); - if (!decode_ok) { - return {}; + + try { + serializer.decode_word(item->value); + } catch (...) { + return std::nullopt; } - return header; + serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return serializer.header; } std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash) const { @@ -174,23 +178,6 @@ std::optional HeaderSnapshot::header_by_number(BlockNum block_heigh return next_header(block_header_offset); } -bool HeaderSnapshot::decode_header(const Snapshot::WordItem& item, BlockHeader& header) const { - // First byte in data is first byte of header hash. - ensure(!item.value.empty(), [&]() { return "HeaderSnapshot: hash first byte missing at offset=" + std::to_string(item.offset); }); - - // Skip hash first byte to obtain encoded header RLP data - ByteView encoded_header{item.value.data() + 1, item.value.length() - 1}; - const auto decode_result = rlp::decode(encoded_header, header); - if (!decode_result) { - SILK_TRACE << "decode_header offset: " << item.offset << " error: " << magic_enum::enum_name(decode_result.error()); - return false; - } - - ensure(header.number >= path_.block_from(), - [&]() { return "HeaderSnapshot: number=" + std::to_string(header.number) + " < block_from=" + std::to_string(path_.block_from()); }); - return true; -} - void HeaderSnapshot::reopen_index() { ensure(decoder_.is_open(), "HeaderSnapshot: segment not open, call reopen_segment"); diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index 1e45ea49c6..bda90c5bc5 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -120,8 +120,6 @@ class HeaderSnapshot : public Snapshot { void reopen_index() override; protected: - bool decode_header(const Snapshot::WordItem& item, BlockHeader& header) const; - void close_index() override; private: diff --git a/silkworm/db/snapshots/snapshot_word_serializer.hpp b/silkworm/db/snapshots/snapshot_word_serializer.hpp new file mode 100644 index 0000000000..ac09d7095e --- /dev/null +++ b/silkworm/db/snapshots/snapshot_word_serializer.hpp @@ -0,0 +1,30 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include + +namespace silkworm::snapshots { + +struct SnapshotWordSerializer { + virtual ~SnapshotWordSerializer() = default; + virtual void decode_word(ByteView word) = 0; + virtual void check_sanity_with_metadata(BlockNum block_from, BlockNum block_to) = 0; +}; + +} // namespace silkworm::snapshots From 0972bb75123305a9dae6ebd54e9ff2b35ce7ed21 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 11:35:52 +0200 Subject: [PATCH 02/37] extract body serialization --- silkworm/db/snapshots/body_snapshot.cpp | 28 +++++++++++++++++ silkworm/db/snapshots/body_snapshot.hpp | 42 +++++++++++++++++++++++++ silkworm/db/snapshots/snapshot.cpp | 39 +++++++++++------------ silkworm/db/snapshots/snapshot.hpp | 2 -- 4 files changed, 89 insertions(+), 22 deletions(-) create mode 100644 silkworm/db/snapshots/body_snapshot.cpp create mode 100644 silkworm/db/snapshots/body_snapshot.hpp diff --git a/silkworm/db/snapshots/body_snapshot.cpp b/silkworm/db/snapshots/body_snapshot.cpp new file mode 100644 index 0000000000..9054b814d1 --- /dev/null +++ b/silkworm/db/snapshots/body_snapshot.cpp @@ -0,0 +1,28 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "body_snapshot.hpp" + +#include + +namespace silkworm::snapshots { + +void decode_word_into_body(ByteView word, BlockBodyForStorage& body) { + const auto result = decode_stored_block_body(word, body); + success_or_throw(result, "decode_word_into_body: decode_stored_block_body error"); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/body_snapshot.hpp b/silkworm/db/snapshots/body_snapshot.hpp new file mode 100644 index 0000000000..2f78b6b8ee --- /dev/null +++ b/silkworm/db/snapshots/body_snapshot.hpp @@ -0,0 +1,42 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include + +#include "snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +void decode_word_into_body(ByteView word, BlockBodyForStorage& body); + +struct BodySnapshotWordSerializer : public SnapshotWordSerializer { + BlockBodyForStorage body; + + ~BodySnapshotWordSerializer() override = default; + + void decode_word(ByteView word) override { + decode_word_into_body(word, body); + } + + void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) override { + } +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 0e2971468e..5af279e6d8 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -26,6 +26,7 @@ #include #include +#include "body_snapshot.hpp" #include "header_snapshot.hpp" namespace silkworm::snapshots { @@ -210,11 +211,13 @@ BodySnapshot::~BodySnapshot() { } bool BodySnapshot::for_each_body(const Walker& walker) { + BodySnapshotWordSerializer serializer; + return for_each_item([&](const WordItem& item) -> bool { - BlockBodyForStorage body; - success_or_throw(decode_body(item, body)); + serializer.decode_word(item.value); + serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); const BlockNum number = path_.block_from() + item.position; - return walker(number, &body); + return walker(number, &serializer.body); }); } @@ -240,19 +243,23 @@ std::pair BodySnapshot::compute_txs_amount() { } std::optional BodySnapshot::next_body(uint64_t offset) const { + BodySnapshotWordSerializer serializer; + const auto item = next_item(offset); - std::optional stored_body; if (!item) { - return stored_body; + return std::nullopt; } - stored_body = StoredBlockBody{}; - const auto decode_ok = decode_body(*item, *stored_body); - if (!decode_ok) { - return {}; + + try { + serializer.decode_word(item->value); + } catch (...) { + return std::nullopt; } - ensure(stored_body->base_txn_id >= idx_body_number_->base_data_id(), - [&]() { return path().index_file().filename() + " has wrong base data ID for base txn ID: " + std::to_string(stored_body->base_txn_id); }); - return stored_body; + serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); + + ensure(serializer.body.base_txn_id >= idx_body_number_->base_data_id(), + [&]() { return path().index_file().filename() + " has wrong base data ID for base txn ID: " + std::to_string(serializer.body.base_txn_id); }); + return serializer.body; } std::optional BodySnapshot::body_by_number(BlockNum block_height) const { @@ -268,14 +275,6 @@ std::optional BodySnapshot::body_by_number(BlockNum block_heigh return next_body(block_body_offset); } -DecodingResult BodySnapshot::decode_body(const Snapshot::WordItem& item, StoredBlockBody& body) { - ByteView body_rlp{item.value.data(), item.value.length()}; - SILK_TRACE << "decode_body offset: " << item.offset << " body_rlp: " << to_hex(body_rlp); - const auto result = decode_stored_block_body(body_rlp, body); - SILK_TRACE << "decode_body offset: " << item.offset << " txn_count: " << body.txn_count << " base_txn_id:" << body.base_txn_id; - return result; -} - void BodySnapshot::reopen_index() { ensure(decoder_.is_open(), "BodySnapshot: segment not open, call reopen_segment"); diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index bda90c5bc5..6ba6af6f06 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -151,8 +151,6 @@ class BodySnapshot : public Snapshot { void reopen_index() override; protected: - static DecodingResult decode_body(const Snapshot::WordItem& item, StoredBlockBody& body); - void close_index() override; private: From 5ac4cb0c9460455a5ddde6604d409a6cd5911815 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 11:58:21 +0200 Subject: [PATCH 03/37] refactor slice_tx_payload --- silkworm/db/snapshots/snapshot.cpp | 16 +------- silkworm/db/snapshots/snapshot.hpp | 1 - silkworm/db/snapshots/snapshot_test.cpp | 13 ++++--- silkworm/db/snapshots/txn_hash.cpp | 52 ++++++++++++++----------- silkworm/db/snapshots/txn_hash.hpp | 2 + 5 files changed, 39 insertions(+), 45 deletions(-) diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 5af279e6d8..e90a9645b7 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -28,6 +28,7 @@ #include "body_snapshot.hpp" #include "header_snapshot.hpp" +#include "txn_hash.hpp" namespace silkworm::snapshots { @@ -434,21 +435,6 @@ std::pair TransactionSnapshot::slice_tx_data(const WordItem& return {senders_data, tx_rlp}; } -ByteView TransactionSnapshot::slice_tx_payload(ByteView tx_rlp) { - ByteView tx_envelope{tx_rlp}; - - rlp::Header tx_header; - TransactionType tx_type{}; - const auto envelope_result = rlp::decode_transaction_header_and_type(tx_envelope, tx_header, tx_type); - ensure(envelope_result.has_value(), - [&]() { return "TransactionSnapshot: cannot decode tx envelope: " + to_hex(tx_envelope) + " error: " + to_string(envelope_result); }); - - const std::size_t tx_payload_offset = tx_type == TransactionType::kLegacy ? 0 : (tx_rlp.length() - tx_header.payload_length); - ByteView tx_payload{tx_rlp.substr(tx_payload_offset)}; - - return tx_payload; -} - //! Decode transaction from snapshot word. Format is: tx_hash_1byte + sender_address_20byte + tx_rlp_bytes DecodingResult TransactionSnapshot::decode_txn(const WordItem& item, Transaction& tx) { auto [senders_data, tx_rlp] = slice_tx_data(item); diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index 6ba6af6f06..f8e37dd75b 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -183,7 +183,6 @@ class TransactionSnapshot : public Snapshot { protected: static std::pair slice_tx_data(const WordItem& item); - static ByteView slice_tx_payload(ByteView tx_rlp); static DecodingResult decode_txn(const WordItem& item, Transaction& tx); using Walker = std::function; diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index f5079f3ea1..810d9bd6ca 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -31,6 +31,8 @@ #include #include +#include "txn_hash.hpp" + namespace silkworm::snapshots { using namespace std::chrono_literals; @@ -65,7 +67,6 @@ class TransactionSnapshot_ForTest : public TransactionSnapshot { public: using TransactionSnapshot::decode_txn; using TransactionSnapshot::slice_tx_data; - using TransactionSnapshot::slice_tx_payload; }; template @@ -333,7 +334,7 @@ TEST_CASE("TransactionSnapshot::txn_rlp_range OK", "[silkworm][node][snapshot][i } } -TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") { +TEST_CASE("slice_tx_payload", "[silkworm][node][snapshot]") { SetLogVerbosityGuard guard{log::Level::kNone}; const std::vector access_list{ {0xde0b295669a9fd93d5f28d9ec85e40f4cb697bae_address, @@ -363,7 +364,7 @@ TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") Bytes encoded{}; rlp::encode(encoded, txn); Bytes decoded{}; - CHECK_NOTHROW(decoded = TransactionSnapshot_ForTest::slice_tx_payload(encoded)); + CHECK_NOTHROW(decoded = slice_tx_payload(encoded)); CHECK(decoded == encoded); // no envelope for legacy tx } SECTION("TransactionType: kAccessList") { @@ -384,7 +385,7 @@ TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") Bytes encoded{}; rlp::encode(encoded, txn); Bytes decoded{}; - CHECK_NOTHROW(decoded = TransactionSnapshot_ForTest::slice_tx_payload(encoded)); + CHECK_NOTHROW(decoded = slice_tx_payload(encoded)); CHECK(decoded == encoded.substr(2)); // 2-byte envelope for this access-list tx } SECTION("TransactionType: kDynamicFee") { @@ -405,7 +406,7 @@ TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") Bytes encoded{}; rlp::encode(encoded, txn); Bytes decoded{}; - CHECK_NOTHROW(decoded = TransactionSnapshot_ForTest::slice_tx_payload(encoded)); + CHECK_NOTHROW(decoded = slice_tx_payload(encoded)); CHECK(decoded == encoded.substr(2)); // 2-byte envelope for this dynamic-fee tx } SECTION("TransactionType: kBlob") { @@ -430,7 +431,7 @@ TEST_CASE("TransactionSnapshot::slice_tx_payload", "[silkworm][node][snapshot]") Bytes encoded{}; rlp::encode(encoded, txn); Bytes decoded{}; - CHECK_NOTHROW(decoded = TransactionSnapshot_ForTest::slice_tx_payload(encoded)); + CHECK_NOTHROW(decoded = slice_tx_payload(encoded)); CHECK(decoded == encoded.substr(3)); // 3-byte envelope for this blob tx } } diff --git a/silkworm/db/snapshots/txn_hash.cpp b/silkworm/db/snapshots/txn_hash.cpp index 18ce7fcffe..69aca21009 100644 --- a/silkworm/db/snapshots/txn_hash.cpp +++ b/silkworm/db/snapshots/txn_hash.cpp @@ -29,6 +29,33 @@ namespace silkworm::snapshots { +ByteView slice_tx_payload(ByteView tx_rlp) { + ByteView tx_envelope = tx_rlp; + rlp::Header tx_header; + TransactionType tx_type{}; + const auto decode_result = rlp::decode_transaction_header_and_type(tx_envelope, tx_header, tx_type); + if (!decode_result) { + std::stringstream error; + error << "slice_tx_payload cannot decode tx envelope: " << to_hex(tx_rlp) + << " error: " << magic_enum::enum_name(decode_result.error()); + throw std::runtime_error{error.str()}; + } + + if (tx_type == TransactionType::kLegacy) + return tx_rlp; + + if (tx_rlp.size() < tx_header.payload_length) { + std::stringstream error; + error << " slice_tx_payload cannot decode tx payload: " << to_hex(tx_rlp) + << " too short: " << tx_rlp.size() + << " payload_length: " << tx_header.payload_length; + throw std::runtime_error{error.str()}; + } + + const std::size_t tx_payload_offset = tx_rlp.size() - tx_header.payload_length; + return tx_rlp.substr(tx_payload_offset); +} + Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id) { Hash tx_hash; @@ -49,39 +76,18 @@ Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id) { throw std::runtime_error{error.str()}; } const ByteView tx_envelope{tx_buffer.substr(kTxFirstByteAndAddressLength)}; - ByteView tx_envelope_view{tx_envelope}; - rlp::Header tx_header; - TransactionType tx_type{}; - auto decode_result = rlp::decode_transaction_header_and_type(tx_envelope_view, tx_header, tx_type); - if (!decode_result) { - std::stringstream error; - error << " tx_buffer_hash cannot decode tx envelope: " << to_hex(tx_envelope) - << " tx_id: " << tx_id - << " error: " << magic_enum::enum_name(decode_result.error()); - throw std::runtime_error{error.str()}; - } + const ByteView tx_payload = slice_tx_payload(tx_envelope); - const std::size_t tx_payload_offset = tx_type == TransactionType::kLegacy ? 0 : (tx_envelope.length() - tx_header.payload_length); - if (tx_buffer.size() <= kTxFirstByteAndAddressLength + tx_payload_offset) { - std::stringstream error; - error << " tx_buffer_hash cannot decode tx payload: record " << to_hex(tx_buffer) - << " too short: " << tx_buffer.size() - << " tx_id: " << tx_id; - throw std::runtime_error{error.str()}; - } - const ByteView tx_payload{tx_buffer.substr(kTxFirstByteAndAddressLength + tx_payload_offset)}; const auto h256{keccak256(tx_payload)}; std::copy(std::begin(h256.bytes), std::begin(h256.bytes) + kHashLength, std::begin(tx_hash.bytes)); if (tx_id % 100'000 == 0) { SILK_DEBUG << "tx_buffer_hash:" - << " header.list: " << tx_header.list - << " header.payload_length: " << tx_header.payload_length + << " header.payload_length: " << tx_payload.size() << " tx_id: " << tx_id; } SILK_TRACE << "tx_buffer_hash:" - << " type: " << int(tx_type) << " tx_id: " << tx_id << " payload: " << to_hex(tx_payload) << " h256: " << to_hex(h256.bytes, kHashLength); diff --git a/silkworm/db/snapshots/txn_hash.hpp b/silkworm/db/snapshots/txn_hash.hpp index fe425c5b8d..4290512b0a 100644 --- a/silkworm/db/snapshots/txn_hash.hpp +++ b/silkworm/db/snapshots/txn_hash.hpp @@ -23,6 +23,8 @@ namespace silkworm::snapshots { +ByteView slice_tx_payload(ByteView tx_rlp); + Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id); } // namespace silkworm::snapshots From 361e887a0e3185d44d07849f4ceee6aaf970796e Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 12:54:53 +0200 Subject: [PATCH 04/37] rename txn_hash -> txn_snapshot_word_serializer --- silkworm/db/snapshots/snapshot.cpp | 2 +- silkworm/db/snapshots/snapshot_test.cpp | 2 +- silkworm/db/snapshots/txn_index.cpp | 2 +- .../{txn_hash.cpp => txn_snapshot_word_serializer.cpp} | 2 +- .../{txn_hash.hpp => txn_snapshot_word_serializer.hpp} | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename silkworm/db/snapshots/{txn_hash.cpp => txn_snapshot_word_serializer.cpp} (98%) rename silkworm/db/snapshots/{txn_hash.hpp => txn_snapshot_word_serializer.hpp} (100%) diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index e90a9645b7..71195c3291 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -28,7 +28,7 @@ #include "body_snapshot.hpp" #include "header_snapshot.hpp" -#include "txn_hash.hpp" +#include "txn_snapshot_word_serializer.hpp" namespace silkworm::snapshots { diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index 810d9bd6ca..739adbcb9e 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -31,7 +31,7 @@ #include #include -#include "txn_hash.hpp" +#include "txn_snapshot_word_serializer.hpp" namespace silkworm::snapshots { diff --git a/silkworm/db/snapshots/txn_index.cpp b/silkworm/db/snapshots/txn_index.cpp index 187af04dd7..8d8ea1cc0f 100644 --- a/silkworm/db/snapshots/txn_index.cpp +++ b/silkworm/db/snapshots/txn_index.cpp @@ -17,7 +17,7 @@ #include "txn_index.hpp" #include "snapshot.hpp" -#include "txn_hash.hpp" +#include "txn_snapshot_word_serializer.hpp" namespace silkworm::snapshots { diff --git a/silkworm/db/snapshots/txn_hash.cpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.cpp similarity index 98% rename from silkworm/db/snapshots/txn_hash.cpp rename to silkworm/db/snapshots/txn_snapshot_word_serializer.cpp index 69aca21009..5fd2c54570 100644 --- a/silkworm/db/snapshots/txn_hash.cpp +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ -#include "txn_hash.hpp" +#include "txn_snapshot_word_serializer.hpp" #include #include diff --git a/silkworm/db/snapshots/txn_hash.hpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp similarity index 100% rename from silkworm/db/snapshots/txn_hash.hpp rename to silkworm/db/snapshots/txn_snapshot_word_serializer.hpp From fe4644b731b6922aa582f9b8215803730f3d3331 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 13:19:15 +0200 Subject: [PATCH 05/37] extract tx serialization --- silkworm/db/snapshots/snapshot.cpp | 92 +++++-------------- silkworm/db/snapshots/snapshot.hpp | 5 +- silkworm/db/snapshots/snapshot_test.cpp | 6 -- .../txn_snapshot_word_serializer.cpp | 45 ++++++--- .../txn_snapshot_word_serializer.hpp | 41 +++++++++ 5 files changed, 99 insertions(+), 90 deletions(-) diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 71195c3291..55b78b381c 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -16,8 +16,6 @@ #include "snapshot.hpp" -#include - #include #include #include @@ -32,15 +30,6 @@ namespace silkworm::snapshots { -//! Convert the specified decoding result into its string representation -inline std::string to_string(DecodingResult result) { - std::string s; - if (!result.has_value()) { - s.append(magic_enum::enum_name(result.error())); - } - return s; -} - Snapshot::Snapshot(SnapshotPath path, std::optional segment_region) : path_(std::move(path)), decoder_{path_.path(), segment_region} {} @@ -308,23 +297,22 @@ TransactionSnapshot::~TransactionSnapshot() { close(); } -// Skip first byte of tx hash plus sender address length for transaction decoding -constexpr int kTxRlpDataOffset{1 + kAddressLength}; - [[nodiscard]] std::optional TransactionSnapshot::next_txn(uint64_t offset, std::optional hash) const { + TransactionSnapshotWordSerializer serializer; + // Get the next data item at specified offset, optionally checking if it starts with txn hash first byte const auto item = hash ? next_item(offset, {hash->bytes, 1}) : next_item(offset); - std::optional transaction; if (!item) { - return transaction; + return std::nullopt; } - // Decode transaction from the extracted data item - transaction = Transaction{}; - const auto decode_ok = decode_txn(*item, *transaction); - if (!decode_ok) { - return {}; + + try { + serializer.decode_word(item->value); + } catch (...) { + return std::nullopt; } - return transaction; + serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return serializer.transaction; } std::optional TransactionSnapshot::txn_by_hash(const Hash& txn_hash) const { @@ -381,24 +369,16 @@ std::optional TransactionSnapshot::block_num_by_txn_hash(const Hash& t return block_number; } -std::vector TransactionSnapshot::txn_range(uint64_t base_txn_id, uint64_t txn_count, bool read_senders) const { +std::vector TransactionSnapshot::txn_range(uint64_t base_txn_id, uint64_t txn_count, bool /*read_senders*/) const { + TransactionSnapshotWordSerializer serializer; + std::vector transactions; transactions.reserve(txn_count); - for_each_txn(base_txn_id, txn_count, [&transactions, read_senders](uint64_t i, ByteView senders_data, ByteView tx_rlp) -> bool { - ByteView tx_payload = slice_tx_payload(tx_rlp); - - Transaction transaction; - const auto payload_result = rlp::decode_transaction(tx_payload, transaction, rlp::Eip2718Wrapping::kBoth); - ensure(payload_result.has_value(), - [&]() { return "TransactionSnapshot: cannot decode tx payload: " + to_hex(tx_payload) + " i: " + std::to_string(i) + - " error: " + to_string(payload_result); }); - - if (read_senders) { - transaction.set_sender(bytes_to_address(senders_data)); - } - - transactions.push_back(std::move(transaction)); + for_each_txn(base_txn_id, txn_count, [&transactions, &serializer, this](ByteView word) -> bool { + serializer.decode_word(word); + serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); + transactions.push_back(std::move(serializer.transaction)); return true; }); @@ -406,43 +386,21 @@ std::vector TransactionSnapshot::txn_range(uint64_t base_txn_id, ui } std::vector TransactionSnapshot::txn_rlp_range(uint64_t base_txn_id, uint64_t txn_count) const { + TransactionSnapshotWordPayloadRlpSerializer serializer; + std::vector rlp_txs; rlp_txs.reserve(txn_count); - for_each_txn(base_txn_id, txn_count, [&rlp_txs](uint64_t /*i*/, ByteView /*senders_data*/, ByteView tx_rlp) -> bool { - ByteView tx_payload = slice_tx_payload(tx_rlp); - rlp_txs.emplace_back(tx_payload); + for_each_txn(base_txn_id, txn_count, [&rlp_txs, &serializer, this](ByteView word) -> bool { + serializer.decode_word(word); + serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); + rlp_txs.emplace_back(serializer.tx_payload); return true; }); return rlp_txs; } -std::pair TransactionSnapshot::slice_tx_data(const WordItem& item) { - const auto& buffer{item.value}; - const auto buffer_size{buffer.size()}; - SILK_TRACE << "slice_tx_data offset: " << item.offset << " buffer: " << to_hex(buffer); - - ensure(buffer_size >= kTxRlpDataOffset, [&]() { return "TransactionSnapshot: too short record: " + std::to_string(buffer_size); }); - - // Skip first byte in data as it is first byte of transaction hash - ByteView senders_data{buffer.data() + 1, kAddressLength}; - ByteView tx_rlp{buffer.data() + kTxRlpDataOffset, buffer_size - kTxRlpDataOffset}; - - SILK_TRACE << "slice_tx_data offset: " << item.offset << " tx_hash_first_byte: " << to_hex(buffer[0]) - << " senders_data: " << to_hex(senders_data) << " tx_rlp: " << to_hex(tx_rlp); - - return {senders_data, tx_rlp}; -} - -//! Decode transaction from snapshot word. Format is: tx_hash_1byte + sender_address_20byte + tx_rlp_bytes -DecodingResult TransactionSnapshot::decode_txn(const WordItem& item, Transaction& tx) { - auto [senders_data, tx_rlp] = slice_tx_data(item); - const auto result = rlp::decode(tx_rlp, tx); - tx.set_sender(bytes_to_address(senders_data)); // Must happen after rlp::decode because it resets sender - return result; -} - void TransactionSnapshot::for_each_txn(uint64_t base_txn_id, uint64_t txn_count, const Walker& walker) const { if (!idx_txn_hash_ || txn_count == 0) { return; @@ -462,9 +420,7 @@ void TransactionSnapshot::for_each_txn(uint64_t base_txn_id, uint64_t txn_count, const auto item = next_item(offset); ensure(item.has_value(), [&]() { return "TransactionSnapshot: record not found at offset=" + std::to_string(offset); }); - auto [senders_data, tx_rlp] = slice_tx_data(*item); - - const bool go_on{walker(i, senders_data, tx_rlp)}; + const bool go_on = walker(item->value); if (!go_on) return; offset = item->offset; diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index f8e37dd75b..cdc92e120a 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -182,10 +182,7 @@ class TransactionSnapshot : public Snapshot { void reopen_index() override; protected: - static std::pair slice_tx_data(const WordItem& item); - static DecodingResult decode_txn(const WordItem& item, Transaction& tx); - - using Walker = std::function; + using Walker = std::function; void for_each_txn(uint64_t base_txn_id, uint64_t txn_count, const Walker& walker) const; void close_index() override; diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index 739adbcb9e..19906dbd5d 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -63,12 +63,6 @@ class Snapshot_ForTest : public Snapshot { void close_index() override {} }; -class TransactionSnapshot_ForTest : public TransactionSnapshot { - public: - using TransactionSnapshot::decode_txn; - using TransactionSnapshot::slice_tx_data; -}; - template static auto move_last_write_time(const std::filesystem::path& p, const std::chrono::duration& d) { const auto ftime = std::filesystem::last_write_time(p); diff --git a/silkworm/db/snapshots/txn_snapshot_word_serializer.cpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.cpp index 5fd2c54570..1bd1b4daee 100644 --- a/silkworm/db/snapshots/txn_snapshot_word_serializer.cpp +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.cpp @@ -22,13 +22,36 @@ #include +#include #include #include -#include +#include +#include #include namespace silkworm::snapshots { +TransactionSnapshotWord slice_tx_data(ByteView buffer) { + // Skip first byte of tx hash plus sender address length for transaction decoding + constexpr int kTxRlpDataOffset{1 + kAddressLength}; + + if (buffer.size() < kTxRlpDataOffset) { + std::stringstream error; + error << "slice_tx_data too short record: " << std::to_string(buffer.size()); + throw std::runtime_error{error.str()}; + } + + uint8_t first_hash_byte = buffer[0]; + ByteView senders_data = buffer.substr(1, kAddressLength); + ByteView tx_rlp = buffer.substr(kTxRlpDataOffset); + + return TransactionSnapshotWord{ + first_hash_byte, + senders_data, + tx_rlp, + }; +} + ByteView slice_tx_payload(ByteView tx_rlp) { ByteView tx_envelope = tx_rlp; rlp::Header tx_header; @@ -56,6 +79,14 @@ ByteView slice_tx_payload(ByteView tx_rlp) { return tx_rlp.substr(tx_payload_offset); } +void decode_word_into_tx(ByteView word, Transaction& tx) { + auto [_, senders_data, tx_rlp] = slice_tx_data(word); + const auto result = rlp::decode(tx_rlp, tx); + success_or_throw(result, "decode_word_into_tx: rlp::decode error"); + // Must happen after rlp::decode because it resets sender + tx.set_sender(bytes_to_address(senders_data)); +} + Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id) { Hash tx_hash; @@ -66,17 +97,7 @@ Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id) { return tx_hash; } - // Skip tx hash first byte plus address length for transaction decoding - constexpr int kTxFirstByteAndAddressLength{1 + kAddressLength}; - if (tx_buffer.size() <= kTxFirstByteAndAddressLength) { - std::stringstream error; - error << " tx_buffer_hash cannot decode tx envelope: record " << to_hex(tx_buffer) - << " too short: " << tx_buffer.size() - << " tx_id: " << tx_id; - throw std::runtime_error{error.str()}; - } - const ByteView tx_envelope{tx_buffer.substr(kTxFirstByteAndAddressLength)}; - + auto [_1, _2, tx_envelope] = slice_tx_data(tx_buffer); const ByteView tx_payload = slice_tx_payload(tx_envelope); const auto h256{keccak256(tx_payload)}; diff --git a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp index 4290512b0a..57d2df4789 100644 --- a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp @@ -20,11 +20,52 @@ #include #include +#include + +#include "snapshot_word_serializer.hpp" namespace silkworm::snapshots { +struct TransactionSnapshotWord { + uint8_t first_hash_byte{}; + ByteView senders_data; + ByteView tx_rlp; +}; + +TransactionSnapshotWord slice_tx_data(ByteView buffer); + ByteView slice_tx_payload(ByteView tx_rlp); Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id); +//! Decode transaction from snapshot word. Format is: tx_hash_1byte + sender_address_20byte + tx_rlp_bytes +void decode_word_into_tx(ByteView word, Transaction& tx); + +struct TransactionSnapshotWordSerializer : public SnapshotWordSerializer { + Transaction transaction; + + ~TransactionSnapshotWordSerializer() override = default; + + void decode_word(ByteView word) override { + decode_word_into_tx(word, transaction); + } + + void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) override { + } +}; + +struct TransactionSnapshotWordPayloadRlpSerializer : public SnapshotWordSerializer { + ByteView tx_payload; + + ~TransactionSnapshotWordPayloadRlpSerializer() override = default; + + void decode_word(ByteView word) override { + auto data = slice_tx_data(word); + tx_payload = slice_tx_payload(data.tx_rlp); + } + + void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) override { + } +}; + } // namespace silkworm::snapshots From 55094c3effe4d6e6901a1502ca5202eeb28a1055 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 16:05:22 +0200 Subject: [PATCH 06/37] snapshot iterator using SnapshotWordSerializer --- silkworm/db/snapshots/seg/decompressor.cpp | 2 +- silkworm/db/snapshots/seg/decompressor.hpp | 2 +- silkworm/db/snapshots/snapshot.cpp | 45 ++++++++++++++++++---- silkworm/db/snapshots/snapshot.hpp | 35 +++++++++++++++++ 4 files changed, 75 insertions(+), 9 deletions(-) diff --git a/silkworm/db/snapshots/seg/decompressor.cpp b/silkworm/db/snapshots/seg/decompressor.cpp index d3a8ea6e22..35f91493b0 100644 --- a/silkworm/db/snapshots/seg/decompressor.cpp +++ b/silkworm/db/snapshots/seg/decompressor.cpp @@ -381,7 +381,7 @@ void Decompressor::open() { compressed_file_->advise_random(); } -Decompressor::Iterator Decompressor::begin() { +Decompressor::Iterator Decompressor::begin() const { ensure(bool(compressed_file_), "decompressor closed, call open first"); auto read_mode_guard = std::make_shared(*compressed_file_, ReadMode::kSequential, ReadMode::kRandom); Iterator it{this, std::move(read_mode_guard)}; diff --git a/silkworm/db/snapshots/seg/decompressor.hpp b/silkworm/db/snapshots/seg/decompressor.hpp index 2f8c8286de..c2b2cc05ba 100644 --- a/silkworm/db/snapshots/seg/decompressor.hpp +++ b/silkworm/db/snapshots/seg/decompressor.hpp @@ -296,7 +296,7 @@ class Decompressor { [[nodiscard]] Iterator make_iterator() const { return Iterator{this, {}}; } //! Begin reading the words, expected to read in sequential order - Iterator begin(); + Iterator begin() const; Iterator end() const { return Iterator::make_end(this); } void close(); diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 55b78b381c..6cf344a0b3 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -46,6 +46,38 @@ void Snapshot::reopen_segment() { decoder_.open(); } +Snapshot::Iterator& Snapshot::Iterator::operator++() { + bool has_next = it_.has_next(); + ++it_; + + if (has_next) { + serializer_->decode_word(*it_); + serializer_->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + } else { + serializer_.reset(); + } + return *this; +} + +bool operator==(const Snapshot::Iterator& lhs, const Snapshot::Iterator& rhs) { + return (lhs.serializer_ == rhs.serializer_) && + (!lhs.serializer_ || (lhs.it_ == rhs.it_)); +} + +Snapshot::Iterator Snapshot::begin(std::shared_ptr serializer) const { + auto it = decoder_.begin(); + if (it == decoder_.end()) { + return end(); + } + serializer->decode_word(*it); + serializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return Snapshot::Iterator{std::move(it), std::move(serializer), path()}; +} + +Snapshot::Iterator Snapshot::end() const { + return Snapshot::Iterator{decoder_.end(), {}, path()}; +} + bool Snapshot::for_each_item(const Snapshot::WordItemFunc& fn) { WordItem item; for (auto it = decoder_.begin(); it != decoder_.end(); ++it, ++item.position) { @@ -105,13 +137,12 @@ HeaderSnapshot::~HeaderSnapshot() { } bool HeaderSnapshot::for_each_header(const Walker& walker) { - HeaderSnapshotWordSerializer serializer; - - return for_each_item([this, walker, &serializer](const WordItem& item) -> bool { - serializer.decode_word(item.value); - serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - return walker(&serializer.header); - }); + for (auto it = begin(std::make_shared()); it != end(); ++it) { + auto s = dynamic_cast(**it); + const bool go_on = walker(&s.header); + if (!go_on) return false; + } + return true; } std::optional HeaderSnapshot::next_header(uint64_t offset, std::optional hash) const { diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index cdc92e120a..31c402ceaf 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -33,6 +33,8 @@ #include #include +#include "snapshot_word_serializer.hpp" + namespace silkworm::snapshots { struct MappedHeadersSnapshot { @@ -58,6 +60,35 @@ struct MappedTransactionsSnapshot { //! because we must avoid to memory-map it again. class Snapshot { public: + class Iterator { + public: + using value_type = std::shared_ptr; + using iterator_category = std::input_iterator_tag; + using difference_type = void; + using pointer = value_type*; + using reference = value_type&; + + Iterator( + seg::Decompressor::Iterator it, + std::shared_ptr serializer, + SnapshotPath path) + : it_(std::move(it)), serializer_(std::move(serializer)), path_(std::move(path)) {} + + reference operator*() { return serializer_; } + pointer operator->() { return &serializer_; } + + Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } + Iterator& operator++(); + + friend bool operator!=(const Iterator& lhs, const Iterator& rhs) = default; + friend bool operator==(const Iterator& lhs, const Iterator& rhs); + + private: + seg::Decompressor::Iterator it_; + std::shared_ptr serializer_; + SnapshotPath path_; + }; + static inline const auto kPageSize{os::page_size()}; explicit Snapshot(SnapshotPath path, std::optional segment_region = std::nullopt); @@ -77,6 +108,9 @@ class Snapshot { void reopen_segment(); virtual void reopen_index() = 0; + Iterator begin(std::shared_ptr serializer) const; + Iterator end() const; + struct WordItem { uint64_t position{0}; uint64_t offset{0}; @@ -88,6 +122,7 @@ class Snapshot { }; using WordItemFunc = std::function; bool for_each_item(const WordItemFunc& fn); + [[nodiscard]] std::optional next_item(uint64_t offset, ByteView prefix = {}) const; void close(); From dacc3d8bd71aa03f6d124966346618aa17c56036 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 16:14:10 +0200 Subject: [PATCH 07/37] move Snapshot class to snapshot_base.hpp --- silkworm/db/snapshots/snapshot.cpp | 97 ------------------- silkworm/db/snapshots/snapshot.hpp | 90 +---------------- silkworm/db/snapshots/snapshot_base.cpp | 123 ++++++++++++++++++++++++ silkworm/db/snapshots/snapshot_base.hpp | 119 +++++++++++++++++++++++ 4 files changed, 243 insertions(+), 186 deletions(-) create mode 100644 silkworm/db/snapshots/snapshot_base.cpp create mode 100644 silkworm/db/snapshots/snapshot_base.hpp diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 6cf344a0b3..40108d0f56 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -30,103 +30,6 @@ namespace silkworm::snapshots { -Snapshot::Snapshot(SnapshotPath path, std::optional segment_region) - : path_(std::move(path)), decoder_{path_.path(), segment_region} {} - -MemoryMappedRegion Snapshot::memory_file_region() const { - const auto memory_file{decoder_.memory_file()}; - if (!memory_file) return MemoryMappedRegion{}; - return memory_file->region(); -} - -void Snapshot::reopen_segment() { - close_segment(); - - // Open decompressor that opens the mapped file in turns - decoder_.open(); -} - -Snapshot::Iterator& Snapshot::Iterator::operator++() { - bool has_next = it_.has_next(); - ++it_; - - if (has_next) { - serializer_->decode_word(*it_); - serializer_->check_sanity_with_metadata(path_.block_from(), path_.block_to()); - } else { - serializer_.reset(); - } - return *this; -} - -bool operator==(const Snapshot::Iterator& lhs, const Snapshot::Iterator& rhs) { - return (lhs.serializer_ == rhs.serializer_) && - (!lhs.serializer_ || (lhs.it_ == rhs.it_)); -} - -Snapshot::Iterator Snapshot::begin(std::shared_ptr serializer) const { - auto it = decoder_.begin(); - if (it == decoder_.end()) { - return end(); - } - serializer->decode_word(*it); - serializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); - return Snapshot::Iterator{std::move(it), std::move(serializer), path()}; -} - -Snapshot::Iterator Snapshot::end() const { - return Snapshot::Iterator{decoder_.end(), {}, path()}; -} - -bool Snapshot::for_each_item(const Snapshot::WordItemFunc& fn) { - WordItem item; - for (auto it = decoder_.begin(); it != decoder_.end(); ++it, ++item.position) { - item.value = std::move(*it); - item.offset = it.current_word_offset(); - SILK_TRACE << "Snapshot::for_each_item item: offset=" << item.offset - << " position=" << item.position - << " value=" << to_hex(item.value); - - const bool result = fn(item); - if (!result) return false; - } - return true; -} - -std::optional Snapshot::next_item(uint64_t offset, ByteView prefix) const { - SILK_TRACE << "Snapshot::next_item offset: " << offset; - auto data_iterator = decoder_.make_iterator(); - data_iterator.reset(offset); - - std::optional item; - if (!data_iterator.has_next()) { - return item; - } - if (!prefix.empty() && !data_iterator.has_prefix(prefix)) { - return item; - } - - item = WordItem{}; - try { - item->offset = data_iterator.next(item->value); - } catch (const std::runtime_error& re) { - SILK_WARN << "Snapshot::next_item invalid offset: " << offset << " what: " << re.what(); - return {}; - } - - return item; -} - -void Snapshot::close() { - close_segment(); - close_index(); -} - -void Snapshot::close_segment() { - // Close decompressor that closes the mapped file in turns - decoder_.close(); -} - HeaderSnapshot::HeaderSnapshot(SnapshotPath path) : Snapshot(std::move(path)) {} HeaderSnapshot::HeaderSnapshot(SnapshotPath path, MappedHeadersSnapshot mapped) diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index 31c402ceaf..dc1eccc4ee 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -16,12 +16,9 @@ #pragma once -#include #include -#include #include #include -#include #include #include @@ -30,9 +27,8 @@ #include #include #include -#include -#include +#include "snapshot_base.hpp" #include "snapshot_word_serializer.hpp" namespace silkworm::snapshots { @@ -53,90 +49,6 @@ struct MappedTransactionsSnapshot { MemoryMappedRegion tx_hash_2_block_index; }; -//! \brief Generic snapshot containing data points for a specific block interval [block_from, block_to). -//! \warning The snapshot segment can also be externally managed. This means that the memory-mapping can happen -//! outside of this class and a \code Snapshot instance can be created by specifying the \code MemoryMappedRegion -//! segment containing the information about the memory region already mapped. This must be taken into account -//! because we must avoid to memory-map it again. -class Snapshot { - public: - class Iterator { - public: - using value_type = std::shared_ptr; - using iterator_category = std::input_iterator_tag; - using difference_type = void; - using pointer = value_type*; - using reference = value_type&; - - Iterator( - seg::Decompressor::Iterator it, - std::shared_ptr serializer, - SnapshotPath path) - : it_(std::move(it)), serializer_(std::move(serializer)), path_(std::move(path)) {} - - reference operator*() { return serializer_; } - pointer operator->() { return &serializer_; } - - Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } - Iterator& operator++(); - - friend bool operator!=(const Iterator& lhs, const Iterator& rhs) = default; - friend bool operator==(const Iterator& lhs, const Iterator& rhs); - - private: - seg::Decompressor::Iterator it_; - std::shared_ptr serializer_; - SnapshotPath path_; - }; - - static inline const auto kPageSize{os::page_size()}; - - explicit Snapshot(SnapshotPath path, std::optional segment_region = std::nullopt); - virtual ~Snapshot() = default; - - [[nodiscard]] SnapshotPath path() const { return path_; } - [[nodiscard]] std::filesystem::path fs_path() const { return path_.path(); } - - [[nodiscard]] BlockNum block_from() const { return path_.block_from(); } - [[nodiscard]] BlockNum block_to() const { return path_.block_to(); } - - [[nodiscard]] bool empty() const { return item_count() == 0; } - [[nodiscard]] std::size_t item_count() const { return decoder_.words_count(); } - - [[nodiscard]] MemoryMappedRegion memory_file_region() const; - - void reopen_segment(); - virtual void reopen_index() = 0; - - Iterator begin(std::shared_ptr serializer) const; - Iterator end() const; - - struct WordItem { - uint64_t position{0}; - uint64_t offset{0}; - Bytes value; - - WordItem() { - value.reserve(kPageSize); - } - }; - using WordItemFunc = std::function; - bool for_each_item(const WordItemFunc& fn); - - [[nodiscard]] std::optional next_item(uint64_t offset, ByteView prefix = {}) const; - - void close(); - - protected: - void close_segment(); - virtual void close_index() = 0; - - //! The path of the segment file for this snapshot - SnapshotPath path_; - - seg::Decompressor decoder_; -}; - class HeaderSnapshot : public Snapshot { public: explicit HeaderSnapshot(SnapshotPath path); diff --git a/silkworm/db/snapshots/snapshot_base.cpp b/silkworm/db/snapshots/snapshot_base.cpp new file mode 100644 index 0000000000..4b96a8bf1d --- /dev/null +++ b/silkworm/db/snapshots/snapshot_base.cpp @@ -0,0 +1,123 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "snapshot_base.hpp" + +#include + +#include +#include + +namespace silkworm::snapshots { + +Snapshot::Snapshot(SnapshotPath path, std::optional segment_region) + : path_(std::move(path)), decoder_{path_.path(), segment_region} {} + +MemoryMappedRegion Snapshot::memory_file_region() const { + const auto memory_file{decoder_.memory_file()}; + if (!memory_file) return MemoryMappedRegion{}; + return memory_file->region(); +} + +void Snapshot::reopen_segment() { + close_segment(); + + // Open decompressor that opens the mapped file in turns + decoder_.open(); +} + +Snapshot::Iterator& Snapshot::Iterator::operator++() { + bool has_next = it_.has_next(); + ++it_; + + if (has_next) { + serializer_->decode_word(*it_); + serializer_->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + } else { + serializer_.reset(); + } + return *this; +} + +bool operator==(const Snapshot::Iterator& lhs, const Snapshot::Iterator& rhs) { + return (lhs.serializer_ == rhs.serializer_) && + (!lhs.serializer_ || (lhs.it_ == rhs.it_)); +} + +Snapshot::Iterator Snapshot::begin(std::shared_ptr serializer) const { + auto it = decoder_.begin(); + if (it == decoder_.end()) { + return end(); + } + serializer->decode_word(*it); + serializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return Snapshot::Iterator{std::move(it), std::move(serializer), path()}; +} + +Snapshot::Iterator Snapshot::end() const { + return Snapshot::Iterator{decoder_.end(), {}, path()}; +} + +bool Snapshot::for_each_item(const Snapshot::WordItemFunc& fn) { + WordItem item; + for (auto it = decoder_.begin(); it != decoder_.end(); ++it, ++item.position) { + item.value = std::move(*it); + item.offset = it.current_word_offset(); + SILK_TRACE << "Snapshot::for_each_item item: offset=" << item.offset + << " position=" << item.position + << " value=" << to_hex(item.value); + + const bool result = fn(item); + if (!result) return false; + } + return true; +} + +std::optional Snapshot::next_item(uint64_t offset, ByteView prefix) const { + SILK_TRACE << "Snapshot::next_item offset: " << offset; + auto data_iterator = decoder_.make_iterator(); + data_iterator.reset(offset); + + std::optional item; + if (!data_iterator.has_next()) { + return item; + } + if (!prefix.empty() && !data_iterator.has_prefix(prefix)) { + return item; + } + + item = WordItem{}; + try { + item->offset = data_iterator.next(item->value); + } catch (const std::runtime_error& re) { + SILK_WARN << "Snapshot::next_item invalid offset: " << offset << " what: " << re.what(); + return {}; + } + + return item; +} + +void Snapshot::close() { + close_segment(); + close_index(); +} + +void Snapshot::close_segment() { + // Close decompressor that closes the mapped file in turns + decoder_.close(); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_base.hpp b/silkworm/db/snapshots/snapshot_base.hpp new file mode 100644 index 0000000000..1010457e07 --- /dev/null +++ b/silkworm/db/snapshots/snapshot_base.hpp @@ -0,0 +1,119 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +//! \brief Generic snapshot containing data points for a specific block interval [block_from, block_to). +//! \warning The snapshot segment can also be externally managed. This means that the memory-mapping can happen +//! outside of this class and a \code Snapshot instance can be created by specifying the \code MemoryMappedRegion +//! segment containing the information about the memory region already mapped. This must be taken into account +//! because we must avoid to memory-map it again. +class Snapshot { + public: + class Iterator { + public: + using value_type = std::shared_ptr; + using iterator_category = std::input_iterator_tag; + using difference_type = void; + using pointer = value_type*; + using reference = value_type&; + + Iterator( + seg::Decompressor::Iterator it, + std::shared_ptr serializer, + SnapshotPath path) + : it_(std::move(it)), serializer_(std::move(serializer)), path_(std::move(path)) {} + + reference operator*() { return serializer_; } + pointer operator->() { return &serializer_; } + + Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } + Iterator& operator++(); + + friend bool operator!=(const Iterator& lhs, const Iterator& rhs) = default; + friend bool operator==(const Iterator& lhs, const Iterator& rhs); + + private: + seg::Decompressor::Iterator it_; + std::shared_ptr serializer_; + SnapshotPath path_; + }; + + static inline const auto kPageSize{os::page_size()}; + + explicit Snapshot(SnapshotPath path, std::optional segment_region = std::nullopt); + virtual ~Snapshot() = default; + + [[nodiscard]] SnapshotPath path() const { return path_; } + [[nodiscard]] std::filesystem::path fs_path() const { return path_.path(); } + + [[nodiscard]] BlockNum block_from() const { return path_.block_from(); } + [[nodiscard]] BlockNum block_to() const { return path_.block_to(); } + + [[nodiscard]] bool empty() const { return item_count() == 0; } + [[nodiscard]] std::size_t item_count() const { return decoder_.words_count(); } + + [[nodiscard]] MemoryMappedRegion memory_file_region() const; + + void reopen_segment(); + virtual void reopen_index() = 0; + + Iterator begin(std::shared_ptr serializer) const; + Iterator end() const; + + struct WordItem { + uint64_t position{0}; + uint64_t offset{0}; + Bytes value; + + WordItem() { + value.reserve(kPageSize); + } + }; + using WordItemFunc = std::function; + bool for_each_item(const WordItemFunc& fn); + + [[nodiscard]] std::optional next_item(uint64_t offset, ByteView prefix = {}) const; + + void close(); + + protected: + void close_segment(); + virtual void close_index() = 0; + + //! The path of the segment file for this snapshot + SnapshotPath path_; + + seg::Decompressor decoder_; +}; + +} // namespace silkworm::snapshots From 1b44c1910235e27c6b124f0c598a31a134a76807 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 16:46:53 +0200 Subject: [PATCH 08/37] refactor SnapshotWordSerializer-s: rename value, make check_sanity_with_metadata optional --- silkworm/db/snapshots/body_snapshot.hpp | 7 ++----- silkworm/db/snapshots/header_snapshot.hpp | 6 +++--- silkworm/db/snapshots/snapshot.cpp | 18 +++++++++--------- .../db/snapshots/snapshot_word_serializer.hpp | 2 +- .../snapshots/txn_snapshot_word_serializer.hpp | 14 ++++---------- 5 files changed, 19 insertions(+), 28 deletions(-) diff --git a/silkworm/db/snapshots/body_snapshot.hpp b/silkworm/db/snapshots/body_snapshot.hpp index 2f78b6b8ee..280905c3eb 100644 --- a/silkworm/db/snapshots/body_snapshot.hpp +++ b/silkworm/db/snapshots/body_snapshot.hpp @@ -27,15 +27,12 @@ namespace silkworm::snapshots { void decode_word_into_body(ByteView word, BlockBodyForStorage& body); struct BodySnapshotWordSerializer : public SnapshotWordSerializer { - BlockBodyForStorage body; + BlockBodyForStorage value; ~BodySnapshotWordSerializer() override = default; void decode_word(ByteView word) override { - decode_word_into_body(word, body); - } - - void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) override { + decode_word_into_body(word, value); } }; diff --git a/silkworm/db/snapshots/header_snapshot.hpp b/silkworm/db/snapshots/header_snapshot.hpp index b322ce0315..efb6726253 100644 --- a/silkworm/db/snapshots/header_snapshot.hpp +++ b/silkworm/db/snapshots/header_snapshot.hpp @@ -27,16 +27,16 @@ void decode_word_into_header(ByteView word, BlockHeader& header); void check_sanity_of_header_with_metadata(const BlockHeader& header, BlockNum block_from, BlockNum block_to); struct HeaderSnapshotWordSerializer : public SnapshotWordSerializer { - BlockHeader header; + BlockHeader value; ~HeaderSnapshotWordSerializer() override = default; void decode_word(ByteView word) override { - decode_word_into_header(word, header); + decode_word_into_header(word, value); } void check_sanity_with_metadata(BlockNum block_from, BlockNum block_to) override { - check_sanity_of_header_with_metadata(header, block_from, block_to); + check_sanity_of_header_with_metadata(value, block_from, block_to); } }; diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 40108d0f56..739d50d224 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -42,7 +42,7 @@ HeaderSnapshot::~HeaderSnapshot() { bool HeaderSnapshot::for_each_header(const Walker& walker) { for (auto it = begin(std::make_shared()); it != end(); ++it) { auto s = dynamic_cast(**it); - const bool go_on = walker(&s.header); + const bool go_on = walker(&s.value); if (!go_on) return false; } return true; @@ -63,7 +63,7 @@ std::optional HeaderSnapshot::next_header(uint64_t offset, std::opt return std::nullopt; } serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - return serializer.header; + return serializer.value; } std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash) const { @@ -141,7 +141,7 @@ bool BodySnapshot::for_each_body(const Walker& walker) { serializer.decode_word(item.value); serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); const BlockNum number = path_.block_from() + item.position; - return walker(number, &serializer.body); + return walker(number, &serializer.value); }); } @@ -181,9 +181,9 @@ std::optional BodySnapshot::next_body(uint64_t offset) const { } serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - ensure(serializer.body.base_txn_id >= idx_body_number_->base_data_id(), - [&]() { return path().index_file().filename() + " has wrong base data ID for base txn ID: " + std::to_string(serializer.body.base_txn_id); }); - return serializer.body; + ensure(serializer.value.base_txn_id >= idx_body_number_->base_data_id(), + [&]() { return path().index_file().filename() + " has wrong base data ID for base txn ID: " + std::to_string(serializer.value.base_txn_id); }); + return serializer.value; } std::optional BodySnapshot::body_by_number(BlockNum block_height) const { @@ -246,7 +246,7 @@ TransactionSnapshot::~TransactionSnapshot() { return std::nullopt; } serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - return serializer.transaction; + return serializer.value; } std::optional TransactionSnapshot::txn_by_hash(const Hash& txn_hash) const { @@ -312,7 +312,7 @@ std::vector TransactionSnapshot::txn_range(uint64_t base_txn_id, ui for_each_txn(base_txn_id, txn_count, [&transactions, &serializer, this](ByteView word) -> bool { serializer.decode_word(word); serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - transactions.push_back(std::move(serializer.transaction)); + transactions.push_back(std::move(serializer.value)); return true; }); @@ -328,7 +328,7 @@ std::vector TransactionSnapshot::txn_rlp_range(uint64_t base_txn_id, uint for_each_txn(base_txn_id, txn_count, [&rlp_txs, &serializer, this](ByteView word) -> bool { serializer.decode_word(word); serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - rlp_txs.emplace_back(serializer.tx_payload); + rlp_txs.emplace_back(serializer.value); return true; }); diff --git a/silkworm/db/snapshots/snapshot_word_serializer.hpp b/silkworm/db/snapshots/snapshot_word_serializer.hpp index ac09d7095e..e06f77df18 100644 --- a/silkworm/db/snapshots/snapshot_word_serializer.hpp +++ b/silkworm/db/snapshots/snapshot_word_serializer.hpp @@ -24,7 +24,7 @@ namespace silkworm::snapshots { struct SnapshotWordSerializer { virtual ~SnapshotWordSerializer() = default; virtual void decode_word(ByteView word) = 0; - virtual void check_sanity_with_metadata(BlockNum block_from, BlockNum block_to) = 0; + virtual void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) {} }; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp index 57d2df4789..8b37253931 100644 --- a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp @@ -42,29 +42,23 @@ Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id); void decode_word_into_tx(ByteView word, Transaction& tx); struct TransactionSnapshotWordSerializer : public SnapshotWordSerializer { - Transaction transaction; + Transaction value; ~TransactionSnapshotWordSerializer() override = default; void decode_word(ByteView word) override { - decode_word_into_tx(word, transaction); - } - - void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) override { + decode_word_into_tx(word, value); } }; struct TransactionSnapshotWordPayloadRlpSerializer : public SnapshotWordSerializer { - ByteView tx_payload; + ByteView value; ~TransactionSnapshotWordPayloadRlpSerializer() override = default; void decode_word(ByteView word) override { auto data = slice_tx_data(word); - tx_payload = slice_tx_payload(data.tx_rlp); - } - - void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) override { + value = slice_tx_payload(data.tx_rlp); } }; From c465358736d3e9c73e98a045b776586755bea159 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 16:36:28 +0200 Subject: [PATCH 09/37] HeaderSnapshotReader --- silkworm/db/snapshots/header_snapshot.hpp | 3 ++ silkworm/db/snapshots/repository.cpp | 13 +++--- silkworm/db/snapshots/repository.hpp | 3 +- silkworm/db/snapshots/snapshot.cpp | 9 ---- silkworm/db/snapshots/snapshot.hpp | 2 - silkworm/db/snapshots/snapshot_base.hpp | 51 +++++++++++++++++++++++ 6 files changed, 64 insertions(+), 17 deletions(-) diff --git a/silkworm/db/snapshots/header_snapshot.hpp b/silkworm/db/snapshots/header_snapshot.hpp index efb6726253..fcfc284105 100644 --- a/silkworm/db/snapshots/header_snapshot.hpp +++ b/silkworm/db/snapshots/header_snapshot.hpp @@ -19,6 +19,7 @@ #include #include +#include "snapshot_base.hpp" #include "snapshot_word_serializer.hpp" namespace silkworm::snapshots { @@ -40,4 +41,6 @@ struct HeaderSnapshotWordSerializer : public SnapshotWordSerializer { } }; +struct HeaderSnapshotReader : public SnapshotReader {}; + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index a3d4d5d165..ba56fb18c9 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -120,13 +121,15 @@ std::vector SnapshotRepository::missing_block_ranges() const { return missing_ranges; } -bool SnapshotRepository::for_each_header(const HeaderSnapshot::Walker& fn) { +bool SnapshotRepository::for_each_header(const HeaderWalker& fn) { for (const auto& [_, header_snapshot] : header_segments_) { SILK_TRACE << "for_each_header header_snapshot: " << header_snapshot->fs_path().string(); - const auto keep_going = header_snapshot->for_each_header([fn](const auto* header) { - return fn(header); - }); - if (!keep_going) return false; + + HeaderSnapshotReader reader{*header_snapshot}; + for (auto& header : reader) { + const bool keep_going = fn(&header); + if (!keep_going) return false; + } } return true; } diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index af545988f5..ce1ca3386d 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -81,7 +81,8 @@ class SnapshotRepository { void reopen_folder(); void close(); - bool for_each_header(const HeaderSnapshot::Walker& fn); + using HeaderWalker = std::function; + bool for_each_header(const HeaderWalker& fn); bool for_each_body(const BodySnapshot::Walker& fn); [[nodiscard]] std::size_t header_snapshots_count() const { return header_segments_.size(); } diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 739d50d224..ad9d085fd9 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -39,15 +39,6 @@ HeaderSnapshot::~HeaderSnapshot() { close(); } -bool HeaderSnapshot::for_each_header(const Walker& walker) { - for (auto it = begin(std::make_shared()); it != end(); ++it) { - auto s = dynamic_cast(**it); - const bool go_on = walker(&s.value); - if (!go_on) return false; - } - return true; -} - std::optional HeaderSnapshot::next_header(uint64_t offset, std::optional hash) const { HeaderSnapshotWordSerializer serializer; diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index dc1eccc4ee..edf841ef67 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -57,8 +57,6 @@ class HeaderSnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_header_hash() const { return idx_header_hash_.get(); } - using Walker = std::function; - bool for_each_header(const Walker& walker); [[nodiscard]] std::optional next_header(uint64_t offset, std::optional hash = {}) const; [[nodiscard]] std::optional header_by_hash(const Hash& block_hash) const; diff --git a/silkworm/db/snapshots/snapshot_base.hpp b/silkworm/db/snapshots/snapshot_base.hpp index 1010457e07..81b5eceaee 100644 --- a/silkworm/db/snapshots/snapshot_base.hpp +++ b/silkworm/db/snapshots/snapshot_base.hpp @@ -116,4 +116,55 @@ class Snapshot { seg::Decompressor decoder_; }; +template +class SnapshotReader { + public: + class Iterator { + public: + using value_type = decltype(TWordSerializer::value); + using iterator_category = std::input_iterator_tag; + using difference_type = void; + using pointer = value_type*; + using reference = value_type&; + + explicit Iterator(Snapshot::Iterator it) + : it_(std::move(it)) {} + + reference operator*() { return value(); } + pointer operator->() { return &value(); } + + Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } + Iterator& operator++() { + ++it_; + return *this; + } + + friend bool operator!=(const Iterator& lhs, const Iterator& rhs) = default; + friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default; + + private: + value_type& value() { + SnapshotWordSerializer& base_serializer = **it_; + // dynamic_cast is safe because TWordSerializer was used when creating the Iterator + auto& s = dynamic_cast(base_serializer); + return s.value; + } + + Snapshot::Iterator it_; + }; + + SnapshotReader(const Snapshot& snapshot) : snapshot_(snapshot) {} + + Iterator begin() const { + return Iterator{snapshot_.begin(std::make_shared())}; + } + + Iterator end() const { + return Iterator{snapshot_.end()}; + } + + private: + const Snapshot& snapshot_; +}; + } // namespace silkworm::snapshots From a6bb467eb139725e045a8835e3ffeda5ff3c6909 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 18 Apr 2024 10:42:43 +0200 Subject: [PATCH 10/37] rename snapshot_base -> snapshot_reader --- silkworm/db/snapshots/header_snapshot.hpp | 2 +- silkworm/db/snapshots/snapshot.hpp | 2 +- .../db/snapshots/{snapshot_base.cpp => snapshot_reader.cpp} | 2 +- .../db/snapshots/{snapshot_base.hpp => snapshot_reader.hpp} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename silkworm/db/snapshots/{snapshot_base.cpp => snapshot_reader.cpp} (99%) rename silkworm/db/snapshots/{snapshot_base.hpp => snapshot_reader.hpp} (100%) diff --git a/silkworm/db/snapshots/header_snapshot.hpp b/silkworm/db/snapshots/header_snapshot.hpp index fcfc284105..b3295d8d60 100644 --- a/silkworm/db/snapshots/header_snapshot.hpp +++ b/silkworm/db/snapshots/header_snapshot.hpp @@ -19,7 +19,7 @@ #include #include -#include "snapshot_base.hpp" +#include "snapshot_reader.hpp" #include "snapshot_word_serializer.hpp" namespace silkworm::snapshots { diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index edf841ef67..ebf7b74c7c 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -28,7 +28,7 @@ #include #include -#include "snapshot_base.hpp" +#include "snapshot_reader.hpp" #include "snapshot_word_serializer.hpp" namespace silkworm::snapshots { diff --git a/silkworm/db/snapshots/snapshot_base.cpp b/silkworm/db/snapshots/snapshot_reader.cpp similarity index 99% rename from silkworm/db/snapshots/snapshot_base.cpp rename to silkworm/db/snapshots/snapshot_reader.cpp index 4b96a8bf1d..269f3054c9 100644 --- a/silkworm/db/snapshots/snapshot_base.cpp +++ b/silkworm/db/snapshots/snapshot_reader.cpp @@ -14,7 +14,7 @@ limitations under the License. */ -#include "snapshot_base.hpp" +#include "snapshot_reader.hpp" #include diff --git a/silkworm/db/snapshots/snapshot_base.hpp b/silkworm/db/snapshots/snapshot_reader.hpp similarity index 100% rename from silkworm/db/snapshots/snapshot_base.hpp rename to silkworm/db/snapshots/snapshot_reader.hpp From 41ff53f2e3869243bf8d881e59e9365aebf718c3 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 16 Apr 2024 17:41:34 +0200 Subject: [PATCH 11/37] BodySnapshotReader --- silkworm/db/snapshots/body_snapshot.hpp | 3 +++ silkworm/db/snapshots/repository.cpp | 15 ++++++++----- silkworm/db/snapshots/repository.hpp | 4 +++- silkworm/db/snapshots/snapshot.cpp | 26 +++++++---------------- silkworm/db/snapshots/snapshot.hpp | 2 -- silkworm/db/snapshots/snapshot_reader.cpp | 15 ------------- silkworm/db/snapshots/snapshot_reader.hpp | 2 -- silkworm/db/snapshots/snapshot_test.cpp | 15 +++++++------ 8 files changed, 32 insertions(+), 50 deletions(-) diff --git a/silkworm/db/snapshots/body_snapshot.hpp b/silkworm/db/snapshots/body_snapshot.hpp index 280905c3eb..bec0a0a591 100644 --- a/silkworm/db/snapshots/body_snapshot.hpp +++ b/silkworm/db/snapshots/body_snapshot.hpp @@ -20,6 +20,7 @@ #include #include +#include "snapshot_reader.hpp" #include "snapshot_word_serializer.hpp" namespace silkworm::snapshots { @@ -36,4 +37,6 @@ struct BodySnapshotWordSerializer : public SnapshotWordSerializer { } }; +struct BodySnapshotReader : public SnapshotReader {}; + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index ba56fb18c9..502a08973e 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -134,13 +135,17 @@ bool SnapshotRepository::for_each_header(const HeaderWalker& fn) { return true; } -bool SnapshotRepository::for_each_body(const BodySnapshot::Walker& fn) { +bool SnapshotRepository::for_each_body(const BodyWalker& fn) { for (const auto& [_, body_snapshot] : body_segments_) { SILK_TRACE << "for_each_body body_snapshot: " << body_snapshot->fs_path().string(); - const auto keep_going = body_snapshot->for_each_body([fn](BlockNum number, const auto* body) { - return fn(number, body); - }); - if (!keep_going) return false; + + BlockNum number = body_snapshot->path().block_from(); + BodySnapshotReader reader{*body_snapshot}; + for (auto& body : reader) { + const bool keep_going = fn(number, &body); + if (!keep_going) return false; + number++; + } } return true; } diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index ce1ca3386d..de307907fa 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -83,7 +83,9 @@ class SnapshotRepository { using HeaderWalker = std::function; bool for_each_header(const HeaderWalker& fn); - bool for_each_body(const BodySnapshot::Walker& fn); + + using BodyWalker = std::function; + bool for_each_body(const BodyWalker& fn); [[nodiscard]] std::size_t header_snapshots_count() const { return header_segments_.size(); } [[nodiscard]] std::size_t body_snapshots_count() const { return body_segments_.size(); } diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index ad9d085fd9..174bdba16e 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -125,31 +125,21 @@ BodySnapshot::~BodySnapshot() { close(); } -bool BodySnapshot::for_each_body(const Walker& walker) { - BodySnapshotWordSerializer serializer; - - return for_each_item([&](const WordItem& item) -> bool { - serializer.decode_word(item.value); - serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - const BlockNum number = path_.block_from() + item.position; - return walker(number, &serializer.value); - }); -} - std::pair BodySnapshot::compute_txs_amount() { uint64_t first_tx_id{0}, last_tx_id{0}, last_txs_amount{0}; + BlockNum number = path_.block_from(); - const bool read_ok = for_each_body([&](BlockNum number, const StoredBlockBody* body) { + BodySnapshotReader reader{*this}; + for (auto& body : reader) { if (number == path_.block_from()) { - first_tx_id = body->base_txn_id; + first_tx_id = body.base_txn_id; } if (number == path_.block_to() - 1) { - last_tx_id = body->base_txn_id; - last_txs_amount = body->txn_count; + last_tx_id = body.base_txn_id; + last_txs_amount = body.txn_count; } - return true; - }); - if (!read_ok) throw std::runtime_error{"error computing txs amount in: " + path_.path().string()}; + number++; + } if (first_tx_id == 0 && last_tx_id == 0) throw std::runtime_error{"empty body snapshot: " + path_.path().string()}; SILK_TRACE << "first_tx_id: " << first_tx_id << " last_tx_id: " << last_tx_id << " last_txs_amount: " << last_txs_amount; diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index ebf7b74c7c..9ea13eacb5 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -85,8 +85,6 @@ class BodySnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_body_number() const { return idx_body_number_.get(); } - using Walker = std::function; - bool for_each_body(const Walker& walker); [[nodiscard]] std::optional next_body(uint64_t offset) const; std::pair compute_txs_amount(); diff --git a/silkworm/db/snapshots/snapshot_reader.cpp b/silkworm/db/snapshots/snapshot_reader.cpp index 269f3054c9..a5b20e0188 100644 --- a/silkworm/db/snapshots/snapshot_reader.cpp +++ b/silkworm/db/snapshots/snapshot_reader.cpp @@ -71,21 +71,6 @@ Snapshot::Iterator Snapshot::end() const { return Snapshot::Iterator{decoder_.end(), {}, path()}; } -bool Snapshot::for_each_item(const Snapshot::WordItemFunc& fn) { - WordItem item; - for (auto it = decoder_.begin(); it != decoder_.end(); ++it, ++item.position) { - item.value = std::move(*it); - item.offset = it.current_word_offset(); - SILK_TRACE << "Snapshot::for_each_item item: offset=" << item.offset - << " position=" << item.position - << " value=" << to_hex(item.value); - - const bool result = fn(item); - if (!result) return false; - } - return true; -} - std::optional Snapshot::next_item(uint64_t offset, ByteView prefix) const { SILK_TRACE << "Snapshot::next_item offset: " << offset; auto data_iterator = decoder_.make_iterator(); diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp index 81b5eceaee..0555597440 100644 --- a/silkworm/db/snapshots/snapshot_reader.hpp +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -99,8 +99,6 @@ class Snapshot { value.reserve(kPageSize); } }; - using WordItemFunc = std::function; - bool for_each_item(const WordItemFunc& fn); [[nodiscard]] std::optional next_item(uint64_t offset, ByteView prefix = {}) const; diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index 19906dbd5d..11ed78290c 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -107,17 +107,18 @@ TEST_CASE("Snapshot::for_each_item", "[silkworm][node][snapshot][snapshot]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; test::HelloWorldSnapshotFile hello_world_snapshot_file{tmp_dir.path(), kValidHeadersSegmentPath.filename()}; - seg::Decompressor decoder{hello_world_snapshot_file.path()}; Snapshot_ForTest tmp_snapshot{hello_world_snapshot_file.path()}; tmp_snapshot.reopen_segment(); CHECK(!tmp_snapshot.empty()); CHECK(tmp_snapshot.item_count() == 1); - tmp_snapshot.for_each_item([&](const auto& word_item) { - CHECK(std::string{word_item.value.cbegin(), word_item.value.cend()} == "hello, world"); - CHECK(word_item.position == 0); - CHECK(word_item.offset == 0); - return true; - }); + + seg::Decompressor decoder{hello_world_snapshot_file.path()}; + decoder.open(); + auto it = decoder.begin(); + auto& word = *it; + CHECK(std::string{word.cbegin(), word.cend()} == "hello, world"); + CHECK(it.current_word_offset() == 0); + CHECK(++it == decoder.end()); } TEST_CASE("Snapshot::close", "[silkworm][node][snapshot][snapshot]") { From 73f7760545bf284b3e88a44874dd5eaddb3b3506 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 17 Apr 2024 09:28:01 +0200 Subject: [PATCH 12/37] refactor next_item/seek, TransactionSnapshotReader --- silkworm/db/snapshots/seg/decompressor.cpp | 21 +++++++ silkworm/db/snapshots/seg/decompressor.hpp | 7 +++ silkworm/db/snapshots/snapshot.cpp | 67 ++-------------------- silkworm/db/snapshots/snapshot.hpp | 6 -- silkworm/db/snapshots/snapshot_reader.cpp | 18 ++++++ silkworm/db/snapshots/snapshot_reader.hpp | 13 +++++ silkworm/db/snapshots/txn_snapshot.hpp | 26 +++++++++ 7 files changed, 91 insertions(+), 67 deletions(-) create mode 100644 silkworm/db/snapshots/txn_snapshot.hpp diff --git a/silkworm/db/snapshots/seg/decompressor.cpp b/silkworm/db/snapshots/seg/decompressor.cpp index 35f91493b0..155fc32d56 100644 --- a/silkworm/db/snapshots/seg/decompressor.cpp +++ b/silkworm/db/snapshots/seg/decompressor.cpp @@ -392,6 +392,27 @@ Decompressor::Iterator Decompressor::begin() const { return end(); } +Decompressor::Iterator Decompressor::seek(uint64_t offset, ByteView prefix) const { + SILK_TRACE << "Decompressor::seek offset: " << offset; + Iterator it = make_iterator(); + it.reset(offset); + if (!it.has_next()) { + return end(); + } + + if (!prefix.empty() && !it.has_prefix(prefix)) { + return end(); + } + + try { + ++it; + return it; + } catch (const std::runtime_error& re) { + SILK_WARN << "Decompressor::seek invalid offset: " << offset << " what: " << re.what(); + return end(); + } +} + void Decompressor::close() { compressed_file_.reset(); } diff --git a/silkworm/db/snapshots/seg/decompressor.hpp b/silkworm/db/snapshots/seg/decompressor.hpp index c2b2cc05ba..527c9d41d8 100644 --- a/silkworm/db/snapshots/seg/decompressor.hpp +++ b/silkworm/db/snapshots/seg/decompressor.hpp @@ -299,6 +299,13 @@ class Decompressor { Iterator begin() const; Iterator end() const { return Iterator::make_end(this); } + /** + * Returns an iterator at a given offset. + * If the offset is invalid it returns end(). + * Seek makes sure that the result starts with a given prefix, otherwise returns end(). + */ + Iterator seek(uint64_t offset, ByteView prefix = {}) const; + void close(); private: diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 174bdba16e..4c0f7767d5 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -26,6 +26,7 @@ #include "body_snapshot.hpp" #include "header_snapshot.hpp" +#include "txn_snapshot.hpp" #include "txn_snapshot_word_serializer.hpp" namespace silkworm::snapshots { @@ -39,24 +40,6 @@ HeaderSnapshot::~HeaderSnapshot() { close(); } -std::optional HeaderSnapshot::next_header(uint64_t offset, std::optional hash) const { - HeaderSnapshotWordSerializer serializer; - - // Get the next data item at specified offset, optionally checking if it starts with block hash first byte - const auto item = hash ? next_item(offset, {hash->bytes, 1}) : next_item(offset); - if (!item) { - return std::nullopt; - } - - try { - serializer.decode_word(item->value); - } catch (...) { - return std::nullopt; - } - serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - return serializer.value; -} - std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash) const { if (!idx_header_hash_) { return {}; @@ -73,7 +56,7 @@ std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash const auto block_header_offset = idx_header_hash_->ordinal_lookup(block_header_position); SILK_TRACE << "HeaderSnapshot::header_by_hash block_header_offset: " << block_header_offset; // Finally, read the next header at specified offset - auto header = next_header(block_header_offset, block_hash); + auto header = HeaderSnapshotReader{*this}.seek_one(block_header_offset, block_hash); // We *must* ensure that the retrieved header hash matches because there is no way to know if key exists in MPHF if (header && header->hash() != block_hash) { header.reset(); @@ -91,7 +74,7 @@ std::optional HeaderSnapshot::header_by_number(BlockNum block_heigh // Then, get the header offset in snapshot by using ordinal lookup const auto block_header_offset = idx_header_hash_->ordinal_lookup(block_header_position); // Finally, read the next header at specified offset - return next_header(block_header_offset); + return HeaderSnapshotReader{*this}.seek_one(block_header_offset); } void HeaderSnapshot::reopen_index() { @@ -147,26 +130,6 @@ std::pair BodySnapshot::compute_txs_amount() { return {first_tx_id, last_tx_id + last_txs_amount - first_tx_id}; } -std::optional BodySnapshot::next_body(uint64_t offset) const { - BodySnapshotWordSerializer serializer; - - const auto item = next_item(offset); - if (!item) { - return std::nullopt; - } - - try { - serializer.decode_word(item->value); - } catch (...) { - return std::nullopt; - } - serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - - ensure(serializer.value.base_txn_id >= idx_body_number_->base_data_id(), - [&]() { return path().index_file().filename() + " has wrong base data ID for base txn ID: " + std::to_string(serializer.value.base_txn_id); }); - return serializer.value; -} - std::optional BodySnapshot::body_by_number(BlockNum block_height) const { if (!idx_body_number_ || block_height < idx_body_number_->base_data_id()) { return {}; @@ -177,7 +140,7 @@ std::optional BodySnapshot::body_by_number(BlockNum block_heigh // Then, get the body offset in snapshot by using ordinal lookup const auto block_body_offset = idx_body_number_->ordinal_lookup(block_body_position); // Finally, read the next body at specified offset - return next_body(block_body_offset); + return BodySnapshotReader{*this}.seek_one(block_body_offset); } void BodySnapshot::reopen_index() { @@ -212,24 +175,6 @@ TransactionSnapshot::~TransactionSnapshot() { close(); } -[[nodiscard]] std::optional TransactionSnapshot::next_txn(uint64_t offset, std::optional hash) const { - TransactionSnapshotWordSerializer serializer; - - // Get the next data item at specified offset, optionally checking if it starts with txn hash first byte - const auto item = hash ? next_item(offset, {hash->bytes, 1}) : next_item(offset); - if (!item) { - return std::nullopt; - } - - try { - serializer.decode_word(item->value); - } catch (...) { - return std::nullopt; - } - serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - return serializer.value; -} - std::optional TransactionSnapshot::txn_by_hash(const Hash& txn_hash) const { if (!idx_txn_hash_) { return {}; @@ -243,7 +188,7 @@ std::optional TransactionSnapshot::txn_by_hash(const Hash& txn_hash // Then, get the transaction offset in snapshot by using ordinal lookup const auto txn_offset = idx_txn_hash_->ordinal_lookup(txn_position); // Finally, read the next transaction at specified offset - auto txn = next_txn(txn_offset, txn_hash); + auto txn = TransactionSnapshotReader{*this}.seek_one(txn_offset, txn_hash); // We *must* ensure that the retrieved txn hash matches because there is no way to know if key exists in MPHF if (txn && txn->hash() != txn_hash) { return {}; @@ -261,7 +206,7 @@ std::optional TransactionSnapshot::txn_by_id(uint64_t txn_id) const // Then, get the transaction offset in snapshot by using ordinal lookup const auto txn_offset = idx_txn_hash_->ordinal_lookup(txn_position); // Finally, read the next transaction at specified offset - return next_txn(txn_offset); + return TransactionSnapshotReader{*this}.seek_one(txn_offset); } std::optional TransactionSnapshot::block_num_by_txn_hash(const Hash& txn_hash) const { diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index 9ea13eacb5..d0196d9425 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -57,8 +57,6 @@ class HeaderSnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_header_hash() const { return idx_header_hash_.get(); } - [[nodiscard]] std::optional next_header(uint64_t offset, std::optional hash = {}) const; - [[nodiscard]] std::optional header_by_hash(const Hash& block_hash) const; [[nodiscard]] std::optional header_by_number(BlockNum block_height) const; @@ -85,8 +83,6 @@ class BodySnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_body_number() const { return idx_body_number_.get(); } - [[nodiscard]] std::optional next_body(uint64_t offset) const; - std::pair compute_txs_amount(); [[nodiscard]] std::optional body_by_number(BlockNum block_height) const; @@ -113,8 +109,6 @@ class TransactionSnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_txn_hash() const { return idx_txn_hash_.get(); } [[nodiscard]] const rec_split::RecSplitIndex* idx_txn_hash_2_block() const { return idx_txn_hash_2_block_.get(); } - [[nodiscard]] std::optional next_txn(uint64_t offset, std::optional hash = {}) const; - [[nodiscard]] std::optional txn_by_hash(const Hash& txn_hash) const; [[nodiscard]] std::optional txn_by_id(uint64_t txn_id) const; [[nodiscard]] std::vector txn_range(uint64_t base_txn_id, uint64_t txn_count, bool read_senders) const; diff --git a/silkworm/db/snapshots/snapshot_reader.cpp b/silkworm/db/snapshots/snapshot_reader.cpp index a5b20e0188..a86f9932f0 100644 --- a/silkworm/db/snapshots/snapshot_reader.cpp +++ b/silkworm/db/snapshots/snapshot_reader.cpp @@ -95,6 +95,24 @@ std::optional Snapshot::next_item(uint64_t offset, ByteView return item; } +seg::Decompressor::Iterator Snapshot::seek_decoder(uint64_t offset, std::optional hash_prefix) const { + return decoder_.seek(offset, hash_prefix ? ByteView{hash_prefix->bytes, 1} : ByteView{}); +} + +Snapshot::Iterator Snapshot::seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr serializer) const { + auto it = seek_decoder(offset, hash_prefix); + if (it == decoder_.end()) { + return end(); + } + try { + serializer->decode_word(*it); + } catch (...) { + return end(); + } + serializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return Snapshot::Iterator{std::move(it), std::move(serializer), path()}; +} + void Snapshot::close() { close_segment(); close_index(); diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp index 0555597440..9ddfa761e2 100644 --- a/silkworm/db/snapshots/snapshot_reader.hpp +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -102,11 +103,14 @@ class Snapshot { [[nodiscard]] std::optional next_item(uint64_t offset, ByteView prefix = {}) const; + Iterator seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr serializer) const; + void close(); protected: void close_segment(); virtual void close_index() = 0; + seg::Decompressor::Iterator seek_decoder(uint64_t offset, std::optional hash_prefix) const; //! The path of the segment file for this snapshot SnapshotPath path_; @@ -161,6 +165,15 @@ class SnapshotReader { return Iterator{snapshot_.end()}; } + Iterator seek(uint64_t offset, std::optional hash_prefix = std::nullopt) const { + return Iterator{snapshot_.seek(offset, hash_prefix, std::make_shared())}; + } + + std::optional seek_one(uint64_t offset, std::optional hash_prefix = std::nullopt) const { + auto it = seek(offset, hash_prefix); + return (it != end()) ? std::optional{std::move(*it)} : std::nullopt; + } + private: const Snapshot& snapshot_; }; diff --git a/silkworm/db/snapshots/txn_snapshot.hpp b/silkworm/db/snapshots/txn_snapshot.hpp new file mode 100644 index 0000000000..bfeccefce7 --- /dev/null +++ b/silkworm/db/snapshots/txn_snapshot.hpp @@ -0,0 +1,26 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include "snapshot_reader.hpp" +#include "txn_snapshot_word_serializer.hpp" + +namespace silkworm::snapshots { + +struct TransactionSnapshotReader : public SnapshotReader {}; + +} // namespace silkworm::snapshots From 8caa0d58d90edb1d08268d76f71caf3c2d5e80ff Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 17 Apr 2024 11:24:46 +0200 Subject: [PATCH 13/37] walkers using references --- cmd/dev/snapshots.cpp | 36 ++++++++++++++-------------- silkworm/db/access_layer.cpp | 4 ++-- silkworm/db/snapshot_sync.cpp | 10 ++++---- silkworm/db/snapshots/repository.cpp | 8 +++---- silkworm/db/snapshots/repository.hpp | 6 ++--- 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/cmd/dev/snapshots.cpp b/cmd/dev/snapshots.cpp index 5eab017cdb..c65f4807bb 100644 --- a/cmd/dev/snapshots.cpp +++ b/cmd/dev/snapshots.cpp @@ -242,12 +242,12 @@ void count_bodies(const SnapSettings& settings, int repetitions) { int num_bodies{0}; uint64_t num_txns{0}; for (int i{0}; i < repetitions; ++i) { - const bool success = snapshot_repo.for_each_body([&](BlockNum number, const BlockBodyForStorage* b) -> bool { + const bool success = snapshot_repo.for_each_body([&](BlockNum number, const BlockBodyForStorage& b) -> bool { // If *system transactions* should not be counted, skip first and last tx in block body - const auto base_txn_id{settings.skip_system_txs ? b->base_txn_id + 1 : b->base_txn_id}; - const auto txn_count{settings.skip_system_txs && b->txn_count >= 2 ? b->txn_count - 2 : b->txn_count}; + const auto base_txn_id{settings.skip_system_txs ? b.base_txn_id + 1 : b.base_txn_id}; + const auto txn_count{settings.skip_system_txs && b.txn_count >= 2 ? b.txn_count - 2 : b.txn_count}; SILK_DEBUG << "Body number: " << number << " base_txn_id: " << base_txn_id << " txn_count: " << txn_count - << " #ommers: " << b->ommers.size(); + << " #ommers: " << b.ommers.size(); num_bodies++; num_txns += txn_count; return true; @@ -265,10 +265,10 @@ void count_headers(const SnapSettings& settings, int repetitions) { std::chrono::time_point start{std::chrono::steady_clock::now()}; int count{0}; for (int i{0}; i < repetitions; ++i) { - const bool success = snapshot_repo.for_each_header([&count](const BlockHeader* h) -> bool { + const bool success = snapshot_repo.for_each_header([&count](const BlockHeader& h) -> bool { ++count; - if (h->number % 50'000 == 0) { - SILK_INFO << "Header number: " << h->number << " hash: " << to_hex(h->hash()); + if (h.number % 50'000 == 0) { + SILK_INFO << "Header number: " << h.number << " hash: " << to_hex(h.hash()); } return true; }); @@ -415,11 +415,11 @@ void lookup_header_by_hash(const SnapSettings& settings) { std::optional matching_header; SnapshotRepository snapshot_repository{settings}; snapshot_repository.reopen_folder(); - snapshot_repository.view_header_segments([&](const HeaderSnapshot* snapshot) -> bool { - const auto header{snapshot->header_by_hash(*hash)}; + snapshot_repository.view_header_segments([&](const HeaderSnapshot& snapshot) -> bool { + const auto header{snapshot.header_by_hash(*hash)}; if (header) { matching_header = header; - matching_snapshot = snapshot; + matching_snapshot = &snapshot; } return header.has_value(); }); @@ -608,12 +608,12 @@ void lookup_txn_by_hash_in_all(const SnapSettings& settings, const Hash& hash) { const TransactionSnapshot* matching_snapshot{nullptr}; std::chrono::time_point start{std::chrono::steady_clock::now()}; - snapshot_repository.view_tx_segments([&](const TransactionSnapshot* snapshot) -> bool { - const auto transaction{snapshot->txn_by_hash(hash)}; + snapshot_repository.view_tx_segments([&](const TransactionSnapshot& snapshot) -> bool { + const auto transaction{snapshot.txn_by_hash(hash)}; if (transaction) { - matching_snapshot = snapshot; + matching_snapshot = &snapshot; if (settings.print) { - print_txn(*transaction, snapshot->path().filename()); + print_txn(*transaction, snapshot.path().filename()); } } return transaction.has_value(); @@ -668,12 +668,12 @@ void lookup_txn_by_id_in_all(const SnapSettings& settings, uint64_t txn_id) { const TransactionSnapshot* matching_snapshot{nullptr}; std::chrono::time_point start{std::chrono::steady_clock::now()}; - snapshot_repository.view_tx_segments([&](const TransactionSnapshot* snapshot) -> bool { - const auto transaction{snapshot->txn_by_id(txn_id)}; + snapshot_repository.view_tx_segments([&](const TransactionSnapshot& snapshot) -> bool { + const auto transaction{snapshot.txn_by_id(txn_id)}; if (transaction) { - matching_snapshot = snapshot; + matching_snapshot = &snapshot; if (settings.print) { - print_txn(*transaction, snapshot->path().filename()); + print_txn(*transaction, snapshot.path().filename()); } } return transaction.has_value(); diff --git a/silkworm/db/access_layer.cpp b/silkworm/db/access_layer.cpp index 4918776ab0..696a2adc4e 100644 --- a/silkworm/db/access_layer.cpp +++ b/silkworm/db/access_layer.cpp @@ -1232,8 +1232,8 @@ std::optional DataModel::read_header_from_snapshot(const Hash& hash std::optional block_header; // We don't know the header snapshot in advance: search for block hash in each header snapshot in reverse order - repository_->view_header_segments([&](const snapshots::HeaderSnapshot* snapshot) -> bool { - block_header = snapshot->header_by_hash(hash); + repository_->view_header_segments([&](const snapshots::HeaderSnapshot& snapshot) -> bool { + block_header = snapshot.header_by_hash(hash); return block_header.has_value(); }); return block_header; diff --git a/silkworm/db/snapshot_sync.cpp b/silkworm/db/snapshot_sync.cpp index 9455a21e45..9e43b749be 100644 --- a/silkworm/db/snapshot_sync.cpp +++ b/silkworm/db/snapshot_sync.cpp @@ -257,15 +257,15 @@ void SnapshotSync::update_block_headers(db::RWTxn& txn, BlockNum max_block_avail db::etl_mdbx::Collector hash2bn_collector{}; intx::uint256 total_difficulty{0}; uint64_t block_count{0}; - repository_->for_each_header([&](const BlockHeader* header) -> bool { - SILK_TRACE << "SnapshotSync: header number=" << header->number << " hash=" << Hash{header->hash()}.to_hex(); - const auto block_number = header->number; + repository_->for_each_header([&](const BlockHeader& header) -> bool { + SILK_TRACE << "SnapshotSync: header number=" << header.number << " hash=" << Hash{header.hash()}.to_hex(); + const auto block_number = header.number; if (block_number > max_block_available) return true; - const auto block_hash = header->hash(); + const auto block_hash = header.hash(); // Write block header into kDifficulty table - total_difficulty += header->difficulty; + total_difficulty += header.difficulty; db::write_total_difficulty(txn, block_number, block_hash, total_difficulty); // Write block header into kCanonicalHashes table diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index 502a08973e..247b581665 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -50,7 +50,7 @@ SnapshotRepository::ViewResult view(const SnapshotsByPath& segments, BlockNum const auto& snapshot = it->second; // We're looking for the segment containing the target block number in its block range if (snapshot->block_from() <= number && number < snapshot->block_to()) { - const bool walk_done = walker(snapshot.get()); + const bool walk_done = walker(*snapshot); return walk_done ? SnapshotRepository::kWalkSuccess : SnapshotRepository::kWalkFailed; } } @@ -64,7 +64,7 @@ std::size_t view(const SnapshotsByPath& segments, const SnapshotWalker& wa bool walk_done{false}; for (auto it = segments.rbegin(); it != segments.rend() && !walk_done; ++it) { const auto& snapshot = it->second; - walk_done = walker(snapshot.get()); + walk_done = walker(*snapshot); ++visited_views; } return visited_views; @@ -128,7 +128,7 @@ bool SnapshotRepository::for_each_header(const HeaderWalker& fn) { HeaderSnapshotReader reader{*header_snapshot}; for (auto& header : reader) { - const bool keep_going = fn(&header); + const bool keep_going = fn(header); if (!keep_going) return false; } } @@ -142,7 +142,7 @@ bool SnapshotRepository::for_each_body(const BodyWalker& fn) { BlockNum number = body_snapshot->path().block_from(); BodySnapshotReader reader{*body_snapshot}; for (auto& body : reader) { - const bool keep_going = fn(number, &body); + const bool keep_going = fn(number, body); if (!keep_going) return false; number++; } diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index de307907fa..063a96b560 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -40,7 +40,7 @@ template using SnapshotsByPath = std::map>; template -using SnapshotWalker = std::function; +using SnapshotWalker = std::function; using HeaderSnapshotWalker = SnapshotWalker; using BodySnapshotWalker = SnapshotWalker; using TransactionSnapshotWalker = SnapshotWalker; @@ -81,10 +81,10 @@ class SnapshotRepository { void reopen_folder(); void close(); - using HeaderWalker = std::function; + using HeaderWalker = std::function; bool for_each_header(const HeaderWalker& fn); - using BodyWalker = std::function; + using BodyWalker = std::function; bool for_each_body(const BodyWalker& fn); [[nodiscard]] std::size_t header_snapshots_count() const { return header_segments_.size(); } From 75cd594d84aff22fdef4aeeb507e4dbd34c86158 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 17 Apr 2024 13:07:54 +0200 Subject: [PATCH 14/37] ordinal_lookup refactoring --- silkworm/db/snapshots/rec_split/rec_split.hpp | 15 ++++++ silkworm/db/snapshots/snapshot.cpp | 48 +++++-------------- 2 files changed, 27 insertions(+), 36 deletions(-) diff --git a/silkworm/db/snapshots/rec_split/rec_split.hpp b/silkworm/db/snapshots/rec_split/rec_split.hpp index 1b7ef653f6..60ab4e5494 100644 --- a/silkworm/db/snapshots/rec_split/rec_split.hpp +++ b/silkworm/db/snapshots/rec_split/rec_split.hpp @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -682,6 +683,20 @@ class RecSplit { //! only access to the Elias-Fano structure containing all offsets [[nodiscard]] std::size_t ordinal_lookup(uint64_t i) const { return ef_offsets_->get(i); } + [[nodiscard]] std::size_t ordinal_lookup_by_data_id(uint64_t data_id) const { + ensure(data_id >= base_data_id(), [&]() { + return std::string("ordinal_lookup_by_data_id: data_id is out of range") + + " data_id = " + std::to_string(data_id) + ";" + + " base_data_id = " + std::to_string(base_data_id()) + ";"; + }); + return ordinal_lookup(data_id - base_data_id()); + } + + [[nodiscard]] std::optional ordinal_lookup_by_key(ByteView key) const { + auto [i, found] = lookup(key); + return found ? std::optional{ordinal_lookup(i)} : std::nullopt; + } + //! Return the number of keys used to build the RecSplit instance [[nodiscard]] std::size_t key_count() const { return key_count_; } diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 4c0f7767d5..71a8bfaa62 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -45,15 +45,11 @@ std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash return {}; } - // First, get the header ordinal position in snapshot by using block hash as MPHF index - const auto [block_header_position, found] = idx_header_hash_->lookup(block_hash); - SILK_TRACE << "HeaderSnapshot::header_by_hash block_hash: " << block_hash.to_hex() << " block_header_position: " - << block_header_position << " found: " << found; - if (!found) { - return {}; + auto block_header_offset_opt = idx_header_hash_->ordinal_lookup_by_key(block_hash); + if (!block_header_offset_opt) { + return std::nullopt; } - // Then, get the header offset in snapshot by using ordinal lookup - const auto block_header_offset = idx_header_hash_->ordinal_lookup(block_header_position); + size_t block_header_offset = *block_header_offset_opt; SILK_TRACE << "HeaderSnapshot::header_by_hash block_header_offset: " << block_header_offset; // Finally, read the next header at specified offset auto header = HeaderSnapshotReader{*this}.seek_one(block_header_offset, block_hash); @@ -69,11 +65,7 @@ std::optional HeaderSnapshot::header_by_number(BlockNum block_heigh return {}; } - // First, calculate the header ordinal position relative to the first block height within snapshot - const auto block_header_position = block_height - idx_header_hash_->base_data_id(); - // Then, get the header offset in snapshot by using ordinal lookup - const auto block_header_offset = idx_header_hash_->ordinal_lookup(block_header_position); - // Finally, read the next header at specified offset + size_t block_header_offset = idx_header_hash_->ordinal_lookup_by_data_id(block_height); return HeaderSnapshotReader{*this}.seek_one(block_header_offset); } @@ -135,11 +127,7 @@ std::optional BodySnapshot::body_by_number(BlockNum block_heigh return {}; } - // First, calculate the body ordinal position relative to the first block height within snapshot - const auto block_body_position = block_height - idx_body_number_->base_data_id(); - // Then, get the body offset in snapshot by using ordinal lookup - const auto block_body_offset = idx_body_number_->ordinal_lookup(block_body_position); - // Finally, read the next body at specified offset + size_t block_body_offset = idx_body_number_->ordinal_lookup_by_data_id(block_height); return BodySnapshotReader{*this}.seek_one(block_body_offset); } @@ -180,13 +168,11 @@ std::optional TransactionSnapshot::txn_by_hash(const Hash& txn_hash return {}; } - // First, get the transaction ordinal position in snapshot by using block hash as MPHF index - const auto [txn_position, found] = idx_txn_hash_->lookup(txn_hash); - if (!found) { - return {}; + auto txn_offset_opt = idx_txn_hash_->ordinal_lookup_by_key(txn_hash); + if (!txn_offset_opt) { + return std::nullopt; } - // Then, get the transaction offset in snapshot by using ordinal lookup - const auto txn_offset = idx_txn_hash_->ordinal_lookup(txn_position); + size_t txn_offset = *txn_offset_opt; // Finally, read the next transaction at specified offset auto txn = TransactionSnapshotReader{*this}.seek_one(txn_offset, txn_hash); // We *must* ensure that the retrieved txn hash matches because there is no way to know if key exists in MPHF @@ -201,11 +187,7 @@ std::optional TransactionSnapshot::txn_by_id(uint64_t txn_id) const return {}; } - // First, calculate the transaction ordinal position relative to the first transaction ID within snapshot - const auto txn_position = txn_id - idx_txn_hash_->base_data_id(); - // Then, get the transaction offset in snapshot by using ordinal lookup - const auto txn_offset = idx_txn_hash_->ordinal_lookup(txn_position); - // Finally, read the next transaction at specified offset + size_t txn_offset = idx_txn_hash_->ordinal_lookup_by_data_id(txn_id); return TransactionSnapshotReader{*this}.seek_one(txn_offset); } @@ -266,14 +248,8 @@ void TransactionSnapshot::for_each_txn(uint64_t base_txn_id, uint64_t txn_count, return; } - ensure(base_txn_id >= idx_txn_hash_->base_data_id(), - [&]() { return path().index_file().filename() + " has wrong base data ID for base txn ID: " + std::to_string(base_txn_id); }); - - // First, calculate the first transaction ordinal position relative to the base transaction within snapshot - const auto first_txn_position = base_txn_id - idx_txn_hash_->base_data_id(); - // Then, get the first transaction offset in snapshot by using ordinal lookup - const auto first_txn_offset = idx_txn_hash_->ordinal_lookup(first_txn_position); + size_t first_txn_offset = idx_txn_hash_->ordinal_lookup_by_data_id(base_txn_id); // Finally, iterate over each encoded transaction item for (uint64_t i{0}, offset{first_txn_offset}; i < txn_count; ++i) { From d9bb1929175bc95fd2f5353bcffb0467f970227c Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 17 Apr 2024 14:05:36 +0200 Subject: [PATCH 15/37] txn_range refactoring --- silkworm/db/access_layer.cpp | 4 +- silkworm/db/snapshots/snapshot.cpp | 72 +++++++------------ silkworm/db/snapshots/snapshot.hpp | 7 +- silkworm/db/snapshots/snapshot_reader.cpp | 24 ------- silkworm/db/snapshots/snapshot_reader.hpp | 64 ++++++++++------- silkworm/db/snapshots/snapshot_test.cpp | 20 +++--- silkworm/db/snapshots/txn_snapshot.hpp | 3 + .../txn_snapshot_word_serializer.hpp | 3 +- 8 files changed, 86 insertions(+), 111 deletions(-) diff --git a/silkworm/db/access_layer.cpp b/silkworm/db/access_layer.cpp index 696a2adc4e..a215678532 100644 --- a/silkworm/db/access_layer.cpp +++ b/silkworm/db/access_layer.cpp @@ -1281,7 +1281,7 @@ bool DataModel::is_body_in_snapshot(BlockNum height) { } bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, - bool read_senders, std::vector& txs) { + bool /*read_senders*/, std::vector& txs) { txs.reserve(txn_count); if (txn_count == 0) { return true; @@ -1290,7 +1290,7 @@ bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_t const auto tx_snapshot = repository_->find_tx_segment(height); if (!tx_snapshot) return false; - txs = tx_snapshot->txn_range(base_txn_id, txn_count, read_senders); + txs = tx_snapshot->txn_range(base_txn_id, txn_count); return true; } diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 71a8bfaa62..2fafcca8ab 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -211,56 +211,38 @@ std::optional TransactionSnapshot::block_num_by_txn_hash(const Hash& t return block_number; } -std::vector TransactionSnapshot::txn_range(uint64_t base_txn_id, uint64_t txn_count, bool /*read_senders*/) const { - TransactionSnapshotWordSerializer serializer; - - std::vector transactions; - transactions.reserve(txn_count); - - for_each_txn(base_txn_id, txn_count, [&transactions, &serializer, this](ByteView word) -> bool { - serializer.decode_word(word); - serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - transactions.push_back(std::move(serializer.value)); - return true; - }); - - return transactions; -} - -std::vector TransactionSnapshot::txn_rlp_range(uint64_t base_txn_id, uint64_t txn_count) const { - TransactionSnapshotWordPayloadRlpSerializer serializer; - - std::vector rlp_txs; - rlp_txs.reserve(txn_count); - - for_each_txn(base_txn_id, txn_count, [&rlp_txs, &serializer, this](ByteView word) -> bool { - serializer.decode_word(word); - serializer.check_sanity_with_metadata(path_.block_from(), path_.block_to()); - rlp_txs.emplace_back(serializer.value); - return true; - }); - - return rlp_txs; -} - -void TransactionSnapshot::for_each_txn(uint64_t base_txn_id, uint64_t txn_count, const Walker& walker) const { - if (!idx_txn_hash_ || txn_count == 0) { - return; +template +struct RangeFromDataIdQuery { + RangeFromDataIdQuery( + const Snapshot& snapshot, + const rec_split::RecSplitIndex& index) + : snapshot_(snapshot), + index_(index) {} + + std::vector exec_into_vector(uint64_t first_data_id, uint64_t count) { + size_t offset = index_.ordinal_lookup_by_data_id(first_data_id); + return TSnapshotReader{snapshot_}.read_into_vector(offset, count); } - // Then, get the first transaction offset in snapshot by using ordinal lookup - size_t first_txn_offset = idx_txn_hash_->ordinal_lookup_by_data_id(base_txn_id); - - // Finally, iterate over each encoded transaction item - for (uint64_t i{0}, offset{first_txn_offset}; i < txn_count; ++i) { - const auto item = next_item(offset); - ensure(item.has_value(), [&]() { return "TransactionSnapshot: record not found at offset=" + std::to_string(offset); }); + private: + const Snapshot& snapshot_; + const rec_split::RecSplitIndex& index_; +}; - const bool go_on = walker(item->value); - if (!go_on) return; +std::vector TransactionSnapshot::txn_range(uint64_t first_txn_id, uint64_t count) const { + if (!idx_txn_hash_) { + return {}; + } + RangeFromDataIdQuery query{*this, *idx_txn_hash_}; + return query.exec_into_vector(first_txn_id, count); +} - offset = item->offset; +std::vector TransactionSnapshot::txn_rlp_range(uint64_t first_txn_id, uint64_t count) const { + if (!idx_txn_hash_) { + return {}; } + RangeFromDataIdQuery> query{*this, *idx_txn_hash_}; + return query.exec_into_vector(first_txn_id, count); } void TransactionSnapshot::reopen_index() { diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index d0196d9425..deb11fd31c 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -111,17 +111,14 @@ class TransactionSnapshot : public Snapshot { [[nodiscard]] std::optional txn_by_hash(const Hash& txn_hash) const; [[nodiscard]] std::optional txn_by_id(uint64_t txn_id) const; - [[nodiscard]] std::vector txn_range(uint64_t base_txn_id, uint64_t txn_count, bool read_senders) const; - [[nodiscard]] std::vector txn_rlp_range(uint64_t base_txn_id, uint64_t txn_count) const; + [[nodiscard]] std::vector txn_range(uint64_t first_txn_id, uint64_t count) const; + [[nodiscard]] std::vector txn_rlp_range(uint64_t first_txn_id, uint64_t count) const; [[nodiscard]] std::optional block_num_by_txn_hash(const Hash& txn_hash) const; void reopen_index() override; protected: - using Walker = std::function; - void for_each_txn(uint64_t base_txn_id, uint64_t txn_count, const Walker& walker) const; - void close_index() override; private: diff --git a/silkworm/db/snapshots/snapshot_reader.cpp b/silkworm/db/snapshots/snapshot_reader.cpp index a86f9932f0..edb2bc776a 100644 --- a/silkworm/db/snapshots/snapshot_reader.cpp +++ b/silkworm/db/snapshots/snapshot_reader.cpp @@ -71,30 +71,6 @@ Snapshot::Iterator Snapshot::end() const { return Snapshot::Iterator{decoder_.end(), {}, path()}; } -std::optional Snapshot::next_item(uint64_t offset, ByteView prefix) const { - SILK_TRACE << "Snapshot::next_item offset: " << offset; - auto data_iterator = decoder_.make_iterator(); - data_iterator.reset(offset); - - std::optional item; - if (!data_iterator.has_next()) { - return item; - } - if (!prefix.empty() && !data_iterator.has_prefix(prefix)) { - return item; - } - - item = WordItem{}; - try { - item->offset = data_iterator.next(item->value); - } catch (const std::runtime_error& re) { - SILK_WARN << "Snapshot::next_item invalid offset: " << offset << " what: " << re.what(); - return {}; - } - - return item; -} - seg::Decompressor::Iterator Snapshot::seek_decoder(uint64_t offset, std::optional hash_prefix) const { return decoder_.seek(offset, hash_prefix ? ByteView{hash_prefix->bytes, 1} : ByteView{}); } diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp index 9ddfa761e2..b97b1ea219 100644 --- a/silkworm/db/snapshots/snapshot_reader.hpp +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -16,11 +16,14 @@ #pragma once +#include #include #include -#include +#include #include #include +#include +#include #include #include @@ -44,9 +47,9 @@ class Snapshot { public: using value_type = std::shared_ptr; using iterator_category = std::input_iterator_tag; - using difference_type = void; - using pointer = value_type*; - using reference = value_type&; + using difference_type = std::ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; Iterator( seg::Decompressor::Iterator it, @@ -54,8 +57,8 @@ class Snapshot { SnapshotPath path) : it_(std::move(it)), serializer_(std::move(serializer)), path_(std::move(path)) {} - reference operator*() { return serializer_; } - pointer operator->() { return &serializer_; } + reference operator*() const { return serializer_; } + pointer operator->() const { return &serializer_; } Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } Iterator& operator++(); @@ -69,6 +72,8 @@ class Snapshot { SnapshotPath path_; }; + static_assert(std::input_iterator); + static inline const auto kPageSize{os::page_size()}; explicit Snapshot(SnapshotPath path, std::optional segment_region = std::nullopt); @@ -91,18 +96,6 @@ class Snapshot { Iterator begin(std::shared_ptr serializer) const; Iterator end() const; - struct WordItem { - uint64_t position{0}; - uint64_t offset{0}; - Bytes value; - - WordItem() { - value.reserve(kPageSize); - } - }; - - [[nodiscard]] std::optional next_item(uint64_t offset, ByteView prefix = {}) const; - Iterator seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr serializer) const; void close(); @@ -125,15 +118,15 @@ class SnapshotReader { public: using value_type = decltype(TWordSerializer::value); using iterator_category = std::input_iterator_tag; - using difference_type = void; - using pointer = value_type*; - using reference = value_type&; + using difference_type = std::ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; explicit Iterator(Snapshot::Iterator it) : it_(std::move(it)) {} - reference operator*() { return value(); } - pointer operator->() { return &value(); } + reference operator*() const { return value(); } + pointer operator->() const { return &value(); } Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } Iterator& operator++() { @@ -145,7 +138,7 @@ class SnapshotReader { friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default; private: - value_type& value() { + value_type& value() const { SnapshotWordSerializer& base_serializer = **it_; // dynamic_cast is safe because TWordSerializer was used when creating the Iterator auto& s = dynamic_cast(base_serializer); @@ -155,6 +148,8 @@ class SnapshotReader { Snapshot::Iterator it_; }; + static_assert(std::input_iterator); + SnapshotReader(const Snapshot& snapshot) : snapshot_(snapshot) {} Iterator begin() const { @@ -174,8 +169,29 @@ class SnapshotReader { return (it != end()) ? std::optional{std::move(*it)} : std::nullopt; } + std::vector read_into_vector(uint64_t offset, size_t count) const { + auto it = seek(offset); + if (it == end()) { + throw std::runtime_error("SnapshotReader::read_into_vector: bad offset " + std::to_string(offset)); + } + return iterator_read_into_vector(std::move(it), count); + } + private: const Snapshot& snapshot_; }; +template +void iterator_read_into(It it, size_t count, std::vector& out) { + std::copy_n(std::make_move_iterator(std::move(it)), count, std::back_inserter(out)); +} + +template +std::vector iterator_read_into_vector(It it, size_t count) { + std::vector out; + out.reserve(count); + iterator_read_into(std::move(it), count, out); + return out; +} + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index 11ed78290c..7d202ecd39 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -271,24 +271,24 @@ TEST_CASE("TransactionSnapshot::txn_range OK", "[silkworm][node][snapshot][index // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 SECTION("1'500'012 OK") { - CHECK(tx_snapshot.txn_range(7'341'263, 0, /*read_senders=*/true).empty()); - CHECK(tx_snapshot.txn_range(7'341'263, 7, /*read_senders=*/true).size() == 7); + CHECK(tx_snapshot.txn_range(7'341'263, 0).empty()); + CHECK(tx_snapshot.txn_range(7'341'263, 7).size() == 7); } SECTION("1'500'012 KO") { - CHECK_THROWS(tx_snapshot.txn_range(7'341'262, 7, /*read_senders=*/true)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'264, 7, /*read_senders=*/true)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'263, 8, /*read_senders=*/true)); // invalid txn_count + CHECK_THROWS(tx_snapshot.txn_range(7'341'262, 7)); // invalid base_txn_id + CHECK_THROWS(tx_snapshot.txn_range(7'341'264, 7)); // invalid base_txn_id + CHECK_THROWS(tx_snapshot.txn_range(7'341'263, 8)); // invalid txn_count } // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 SECTION("1'500'013 OK") { - CHECK(tx_snapshot.txn_range(7'341'272, 0, /*read_senders=*/true).empty()); - CHECK(tx_snapshot.txn_range(7'341'272, 1, /*read_senders=*/true).size() == 1); + CHECK(tx_snapshot.txn_range(7'341'272, 0).empty()); + CHECK(tx_snapshot.txn_range(7'341'272, 1).size() == 1); } SECTION("1'500'013 KO") { - CHECK_THROWS(tx_snapshot.txn_range(7'341'271, 1, /*read_senders=*/true)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'273, 1, /*read_senders=*/true)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'272, 2, /*read_senders=*/true)); // invalid txn_count + CHECK_THROWS(tx_snapshot.txn_range(7'341'271, 1)); // invalid base_txn_id + CHECK_THROWS(tx_snapshot.txn_range(7'341'273, 1)); // invalid base_txn_id + CHECK_THROWS(tx_snapshot.txn_range(7'341'272, 2)); // invalid txn_count } } diff --git a/silkworm/db/snapshots/txn_snapshot.hpp b/silkworm/db/snapshots/txn_snapshot.hpp index bfeccefce7..0594eee6fe 100644 --- a/silkworm/db/snapshots/txn_snapshot.hpp +++ b/silkworm/db/snapshots/txn_snapshot.hpp @@ -23,4 +23,7 @@ namespace silkworm::snapshots { struct TransactionSnapshotReader : public SnapshotReader {}; +template +struct TransactionSnapshotPayloadRlpReader : public SnapshotReader> {}; + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp index 8b37253931..e57675fcfd 100644 --- a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp @@ -51,8 +51,9 @@ struct TransactionSnapshotWordSerializer : public SnapshotWordSerializer { } }; +template struct TransactionSnapshotWordPayloadRlpSerializer : public SnapshotWordSerializer { - ByteView value; + TBytes value; ~TransactionSnapshotWordPayloadRlpSerializer() override = default; From bb05598758321650830b349f47a94dcc2dc5e598 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 18 Apr 2024 09:14:23 +0200 Subject: [PATCH 16/37] basic queries --- silkworm/db/snapshots/basic_queries.hpp | 84 ++++++++++++++++++++++++ silkworm/db/snapshots/body_queries.hpp | 28 ++++++++ silkworm/db/snapshots/header_queries.hpp | 32 +++++++++ silkworm/db/snapshots/index.cpp | 21 ++++++ silkworm/db/snapshots/index.hpp | 40 +++++++++++ silkworm/db/snapshots/snapshot.cpp | 65 ++++-------------- silkworm/db/snapshots/txn_queries.hpp | 42 ++++++++++++ 7 files changed, 259 insertions(+), 53 deletions(-) create mode 100644 silkworm/db/snapshots/basic_queries.hpp create mode 100644 silkworm/db/snapshots/body_queries.hpp create mode 100644 silkworm/db/snapshots/header_queries.hpp create mode 100644 silkworm/db/snapshots/index.cpp create mode 100644 silkworm/db/snapshots/index.hpp create mode 100644 silkworm/db/snapshots/txn_queries.hpp diff --git a/silkworm/db/snapshots/basic_queries.hpp b/silkworm/db/snapshots/basic_queries.hpp new file mode 100644 index 0000000000..2bf6c23c7c --- /dev/null +++ b/silkworm/db/snapshots/basic_queries.hpp @@ -0,0 +1,84 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include + +#include + +#include "index.hpp" +#include "snapshot_reader.hpp" + +namespace silkworm::snapshots { + +template +class BasicQuery { + public: + BasicQuery( + const Snapshot& snapshot, + const Index& index) + : reader_{snapshot}, + index_{index} {} + + protected: + TSnapshotReader reader_; + const Index& index_; +}; + +template +struct FindByIdQuery : public BasicQuery { + using BasicQuery::BasicQuery; + + std::optional exec(uint64_t id) { + size_t offset = this->index_.ordinal_lookup_by_data_id(id); + return this->reader_.seek_one(offset); + } +}; + +template +struct FindByHashQuery : public BasicQuery { + using BasicQuery::BasicQuery; + + std::optional exec(const Hash& hash) { + auto offset = this->index_.ordinal_lookup_by_hash(hash); + if (!offset) { + return std::nullopt; + } + + auto result = this->reader_.seek_one(*offset, hash); + + // We *must* ensure that the retrieved txn hash matches because there is no way to know if key exists in MPHF + if (result && (result->hash() != hash)) { + return std::nullopt; + } + + return result; + } +}; + +template +struct RangeFromIdQuery : public BasicQuery { + using BasicQuery::BasicQuery; + + std::vector exec_into_vector(uint64_t first_id, uint64_t count) { + size_t offset = this->index_.ordinal_lookup_by_data_id(first_id); + return this->reader_.read_into_vector(offset, count); + } +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/body_queries.hpp b/silkworm/db/snapshots/body_queries.hpp new file mode 100644 index 0000000000..95cc41cacc --- /dev/null +++ b/silkworm/db/snapshots/body_queries.hpp @@ -0,0 +1,28 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include "basic_queries.hpp" +#include "body_snapshot.hpp" + +namespace silkworm::snapshots { + +struct BodyFindByBlockNumQuery : public FindByIdQuery { + using FindByIdQuery::FindByIdQuery; +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_queries.hpp b/silkworm/db/snapshots/header_queries.hpp new file mode 100644 index 0000000000..6bf311139a --- /dev/null +++ b/silkworm/db/snapshots/header_queries.hpp @@ -0,0 +1,32 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include "basic_queries.hpp" +#include "header_snapshot.hpp" + +namespace silkworm::snapshots { + +struct HeaderFindByBlockNumQuery : public FindByIdQuery { + using FindByIdQuery::FindByIdQuery; +}; + +struct HeaderFindByHashQuery : public FindByHashQuery { + using FindByHashQuery::FindByHashQuery; +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index.cpp b/silkworm/db/snapshots/index.cpp new file mode 100644 index 0000000000..3ecd1da320 --- /dev/null +++ b/silkworm/db/snapshots/index.cpp @@ -0,0 +1,21 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "index.hpp" + +namespace silkworm::snapshots { + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index.hpp b/silkworm/db/snapshots/index.hpp new file mode 100644 index 0000000000..f23b8bb3f4 --- /dev/null +++ b/silkworm/db/snapshots/index.hpp @@ -0,0 +1,40 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include + +#include + +#include "rec_split/rec_split.hpp" + +namespace silkworm::snapshots { + +class Index { + public: + explicit Index(const rec_split::RecSplitIndex& index) : index_(index) {} + + std::size_t ordinal_lookup_by_data_id(uint64_t id) const { return index_.ordinal_lookup_by_data_id(id); }; + std::optional ordinal_lookup_by_hash(const Hash& hash) const { return index_.ordinal_lookup_by_key(hash); }; + + private: + const rec_split::RecSplitIndex& index_; +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 2fafcca8ab..87a2279d7b 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -24,8 +24,11 @@ #include #include +#include "body_queries.hpp" #include "body_snapshot.hpp" +#include "header_queries.hpp" #include "header_snapshot.hpp" +#include "txn_queries.hpp" #include "txn_snapshot.hpp" #include "txn_snapshot_word_serializer.hpp" @@ -45,28 +48,16 @@ std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash return {}; } - auto block_header_offset_opt = idx_header_hash_->ordinal_lookup_by_key(block_hash); - if (!block_header_offset_opt) { - return std::nullopt; - } - size_t block_header_offset = *block_header_offset_opt; - SILK_TRACE << "HeaderSnapshot::header_by_hash block_header_offset: " << block_header_offset; - // Finally, read the next header at specified offset - auto header = HeaderSnapshotReader{*this}.seek_one(block_header_offset, block_hash); - // We *must* ensure that the retrieved header hash matches because there is no way to know if key exists in MPHF - if (header && header->hash() != block_hash) { - header.reset(); - } - return header; + return HeaderFindByHashQuery{*this, Index{*idx_header_hash_}}.exec(block_hash); } std::optional HeaderSnapshot::header_by_number(BlockNum block_height) const { + // TODO: move block_height checks inside ordinal_lookup_by_data_id or FindByIdQuery if (!idx_header_hash_ || block_height < path_.block_from() || block_height >= path_.block_to()) { return {}; } - size_t block_header_offset = idx_header_hash_->ordinal_lookup_by_data_id(block_height); - return HeaderSnapshotReader{*this}.seek_one(block_header_offset); + return HeaderFindByBlockNumQuery{*this, Index{*idx_header_hash_}}.exec(block_height); } void HeaderSnapshot::reopen_index() { @@ -123,12 +114,12 @@ std::pair BodySnapshot::compute_txs_amount() { } std::optional BodySnapshot::body_by_number(BlockNum block_height) const { + // TODO: move block_height check inside ordinal_lookup_by_data_id if (!idx_body_number_ || block_height < idx_body_number_->base_data_id()) { return {}; } - size_t block_body_offset = idx_body_number_->ordinal_lookup_by_data_id(block_height); - return BodySnapshotReader{*this}.seek_one(block_body_offset); + return BodyFindByBlockNumQuery{*this, Index{*idx_body_number_}}.exec(block_height); } void BodySnapshot::reopen_index() { @@ -168,18 +159,7 @@ std::optional TransactionSnapshot::txn_by_hash(const Hash& txn_hash return {}; } - auto txn_offset_opt = idx_txn_hash_->ordinal_lookup_by_key(txn_hash); - if (!txn_offset_opt) { - return std::nullopt; - } - size_t txn_offset = *txn_offset_opt; - // Finally, read the next transaction at specified offset - auto txn = TransactionSnapshotReader{*this}.seek_one(txn_offset, txn_hash); - // We *must* ensure that the retrieved txn hash matches because there is no way to know if key exists in MPHF - if (txn && txn->hash() != txn_hash) { - return {}; - } - return txn; + return TransactionFindByHashQuery{*this, Index{*idx_txn_hash_}}.exec(txn_hash); } std::optional TransactionSnapshot::txn_by_id(uint64_t txn_id) const { @@ -187,8 +167,7 @@ std::optional TransactionSnapshot::txn_by_id(uint64_t txn_id) const return {}; } - size_t txn_offset = idx_txn_hash_->ordinal_lookup_by_data_id(txn_id); - return TransactionSnapshotReader{*this}.seek_one(txn_offset); + return TransactionFindByIdQuery{*this, Index{*idx_txn_hash_}}.exec(txn_id); } std::optional TransactionSnapshot::block_num_by_txn_hash(const Hash& txn_hash) const { @@ -211,38 +190,18 @@ std::optional TransactionSnapshot::block_num_by_txn_hash(const Hash& t return block_number; } -template -struct RangeFromDataIdQuery { - RangeFromDataIdQuery( - const Snapshot& snapshot, - const rec_split::RecSplitIndex& index) - : snapshot_(snapshot), - index_(index) {} - - std::vector exec_into_vector(uint64_t first_data_id, uint64_t count) { - size_t offset = index_.ordinal_lookup_by_data_id(first_data_id); - return TSnapshotReader{snapshot_}.read_into_vector(offset, count); - } - - private: - const Snapshot& snapshot_; - const rec_split::RecSplitIndex& index_; -}; - std::vector TransactionSnapshot::txn_range(uint64_t first_txn_id, uint64_t count) const { if (!idx_txn_hash_) { return {}; } - RangeFromDataIdQuery query{*this, *idx_txn_hash_}; - return query.exec_into_vector(first_txn_id, count); + return TransactionRangeFromIdQuery{*this, Index{*idx_txn_hash_}}.exec_into_vector(first_txn_id, count); } std::vector TransactionSnapshot::txn_rlp_range(uint64_t first_txn_id, uint64_t count) const { if (!idx_txn_hash_) { return {}; } - RangeFromDataIdQuery> query{*this, *idx_txn_hash_}; - return query.exec_into_vector(first_txn_id, count); + return TransactionPayloadRlpRangeFromIdQuery{*this, Index{*idx_txn_hash_}}.exec_into_vector(first_txn_id, count); } void TransactionSnapshot::reopen_index() { diff --git a/silkworm/db/snapshots/txn_queries.hpp b/silkworm/db/snapshots/txn_queries.hpp new file mode 100644 index 0000000000..a2a9e7900f --- /dev/null +++ b/silkworm/db/snapshots/txn_queries.hpp @@ -0,0 +1,42 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include + +#include "basic_queries.hpp" +#include "txn_snapshot.hpp" + +namespace silkworm::snapshots { + +struct TransactionFindByIdQuery : public FindByIdQuery { + using FindByIdQuery::FindByIdQuery; +}; + +struct TransactionFindByHashQuery : public FindByHashQuery { + using FindByHashQuery::FindByHashQuery; +}; + +struct TransactionRangeFromIdQuery : public RangeFromIdQuery { + using RangeFromIdQuery::RangeFromIdQuery; +}; + +struct TransactionPayloadRlpRangeFromIdQuery : public RangeFromIdQuery> { + using RangeFromIdQuery>::RangeFromIdQuery; +}; + +} // namespace silkworm::snapshots From 5d70d51be6ce360273610a78fa53f123136ebfdf Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 18 Apr 2024 12:28:44 +0200 Subject: [PATCH 17/37] BodyTxsAmountQuery --- .../db/snapshots/body_txs_amount_query.cpp | 54 +++++++++++++++++++ .../db/snapshots/body_txs_amount_query.hpp | 40 ++++++++++++++ silkworm/db/snapshots/index_builder_test.cpp | 4 +- silkworm/db/snapshots/snapshot.cpp | 22 -------- silkworm/db/snapshots/snapshot.hpp | 2 - silkworm/db/snapshots/txn_index.cpp | 4 +- 6 files changed, 99 insertions(+), 27 deletions(-) create mode 100644 silkworm/db/snapshots/body_txs_amount_query.cpp create mode 100644 silkworm/db/snapshots/body_txs_amount_query.hpp diff --git a/silkworm/db/snapshots/body_txs_amount_query.cpp b/silkworm/db/snapshots/body_txs_amount_query.cpp new file mode 100644 index 0000000000..52d471530f --- /dev/null +++ b/silkworm/db/snapshots/body_txs_amount_query.cpp @@ -0,0 +1,54 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "body_txs_amount_query.hpp" + +#include + +#include "body_snapshot.hpp" + +namespace silkworm::snapshots { + +BodyTxsAmountQuery::Result BodyTxsAmountQuery::exec() { + auto path = snapshot_.path(); + uint64_t first_tx_id{0}, last_tx_id{0}, last_txs_amount{0}; + BlockNum number = path.block_from(); + + BodySnapshotReader reader{snapshot_}; + for (auto& body : reader) { + if (number == path.block_from()) { + first_tx_id = body.base_txn_id; + } + if (number == path.block_to() - 1) { + last_tx_id = body.base_txn_id; + last_txs_amount = body.txn_count; + } + number++; + } + + if ((first_tx_id == 0) && (last_tx_id == 0)) { + throw std::runtime_error("BodyTxsAmountQuery empty body snapshot: " + path.path().string()); + } + + uint64_t count = last_tx_id + last_txs_amount - first_tx_id; + + return Result{ + first_tx_id, + count, + }; +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/body_txs_amount_query.hpp b/silkworm/db/snapshots/body_txs_amount_query.hpp new file mode 100644 index 0000000000..cde5b7b406 --- /dev/null +++ b/silkworm/db/snapshots/body_txs_amount_query.hpp @@ -0,0 +1,40 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include + +#include "snapshot_reader.hpp" + +namespace silkworm::snapshots { + +class BodyTxsAmountQuery { + public: + struct Result { + uint64_t first_tx_id{}; + uint64_t count{}; + }; + + explicit BodyTxsAmountQuery(const Snapshot& snapshot) : snapshot_(snapshot) {} + + Result exec(); + + private: + const Snapshot& snapshot_; +}; + +} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index_builder_test.cpp b/silkworm/db/snapshots/index_builder_test.cpp index 33d74a4d58..98de5c4cbb 100644 --- a/silkworm/db/snapshots/index_builder_test.cpp +++ b/silkworm/db/snapshots/index_builder_test.cpp @@ -64,8 +64,8 @@ TEST_CASE("TransactionIndex::build KO: empty snapshot", "[silkworm][snapshot][in auto txs_snapshot_path = *SnapshotPath::parse(txs_snapshot_file.path()); auto bodies_snapshot_path = *SnapshotPath::parse(bodies_snapshot_file.path()); - CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), StartsWith("empty body snapshot")); - CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), StartsWith("empty body snapshot")); + CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), Contains("empty body snapshot")); + CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), Contains("empty body snapshot")); } } diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 87a2279d7b..4487904fec 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -91,28 +91,6 @@ BodySnapshot::~BodySnapshot() { close(); } -std::pair BodySnapshot::compute_txs_amount() { - uint64_t first_tx_id{0}, last_tx_id{0}, last_txs_amount{0}; - BlockNum number = path_.block_from(); - - BodySnapshotReader reader{*this}; - for (auto& body : reader) { - if (number == path_.block_from()) { - first_tx_id = body.base_txn_id; - } - if (number == path_.block_to() - 1) { - last_tx_id = body.base_txn_id; - last_txs_amount = body.txn_count; - } - number++; - } - if (first_tx_id == 0 && last_tx_id == 0) throw std::runtime_error{"empty body snapshot: " + path_.path().string()}; - - SILK_TRACE << "first_tx_id: " << first_tx_id << " last_tx_id: " << last_tx_id << " last_txs_amount: " << last_txs_amount; - - return {first_tx_id, last_tx_id + last_txs_amount - first_tx_id}; -} - std::optional BodySnapshot::body_by_number(BlockNum block_height) const { // TODO: move block_height check inside ordinal_lookup_by_data_id if (!idx_body_number_ || block_height < idx_body_number_->base_data_id()) { diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index deb11fd31c..18f1411dfd 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -83,8 +83,6 @@ class BodySnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_body_number() const { return idx_body_number_.get(); } - std::pair compute_txs_amount(); - [[nodiscard]] std::optional body_by_number(BlockNum block_height) const; void reopen_index() override; diff --git a/silkworm/db/snapshots/txn_index.cpp b/silkworm/db/snapshots/txn_index.cpp index 8d8ea1cc0f..688bbe189a 100644 --- a/silkworm/db/snapshots/txn_index.cpp +++ b/silkworm/db/snapshots/txn_index.cpp @@ -16,6 +16,7 @@ #include "txn_index.hpp" +#include "body_txs_amount_query.hpp" #include "snapshot.hpp" #include "txn_snapshot_word_serializer.hpp" @@ -39,7 +40,8 @@ std::pair TransactionIndex::compute_txs_amount( std::optional bodies_segment_region) { BodySnapshot bodies_snapshot{std::move(bodies_segment_path), bodies_segment_region}; bodies_snapshot.reopen_segment(); - return bodies_snapshot.compute_txs_amount(); + auto result = BodyTxsAmountQuery{bodies_snapshot}.exec(); + return {result.first_tx_id, result.count}; } } // namespace silkworm::snapshots From a596a2c74d8a14d89f0a57121e98d69e626c7334 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 18 Apr 2024 12:53:19 +0200 Subject: [PATCH 18/37] TransactionBlockNumByTxnHashQuery --- silkworm/db/snapshots/index.hpp | 5 +++++ silkworm/db/snapshots/snapshot.cpp | 16 +++------------- silkworm/db/snapshots/txn_queries.hpp | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/silkworm/db/snapshots/index.hpp b/silkworm/db/snapshots/index.hpp index f23b8bb3f4..d74fc61aea 100644 --- a/silkworm/db/snapshots/index.hpp +++ b/silkworm/db/snapshots/index.hpp @@ -33,6 +33,11 @@ class Index { std::size_t ordinal_lookup_by_data_id(uint64_t id) const { return index_.ordinal_lookup_by_data_id(id); }; std::optional ordinal_lookup_by_hash(const Hash& hash) const { return index_.ordinal_lookup_by_key(hash); }; + std::optional lookup_by_hash(const Hash& hash) const { + auto [result, found] = index_.lookup(hash); + return found ? std::optional{result} : std::nullopt; + } + private: const rec_split::RecSplitIndex& index_; }; diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 4487904fec..57d38ff5c5 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -153,19 +153,9 @@ std::optional TransactionSnapshot::block_num_by_txn_hash(const Hash& t return {}; } - // Lookup the block number using dedicated MPHF index - const auto [block_number, found] = idx_txn_hash_2_block_->lookup(txn_hash); - if (!found) { - return {}; - } - - // Lookup the entire txn to check that the retrieved txn hash matches (no way to know if key exists in MPHF) - const auto transaction{txn_by_hash(txn_hash)}; - if (!transaction) { - return {}; - } - - return block_number; + Index idx_txn_hash{*idx_txn_hash_}; + TransactionFindByHashQuery txn_by_hash_query{*this, idx_txn_hash}; + return TransactionBlockNumByTxnHashQuery{Index{*idx_txn_hash_2_block_}, txn_by_hash_query}.exec(txn_hash); } std::vector TransactionSnapshot::txn_range(uint64_t first_txn_id, uint64_t count) const { diff --git a/silkworm/db/snapshots/txn_queries.hpp b/silkworm/db/snapshots/txn_queries.hpp index a2a9e7900f..de570d5195 100644 --- a/silkworm/db/snapshots/txn_queries.hpp +++ b/silkworm/db/snapshots/txn_queries.hpp @@ -39,4 +39,24 @@ struct TransactionPayloadRlpRangeFromIdQuery : public RangeFromIdQuery>::RangeFromIdQuery; }; +class TransactionBlockNumByTxnHashQuery { + public: + TransactionBlockNumByTxnHashQuery( + const Index& index, + TransactionFindByHashQuery cross_check_query) + : index_(index), + cross_check_query_(cross_check_query) {} + + std::optional exec(const Hash& hash) { + // Lookup the entire txn to check that the retrieved txn hash matches (no way to know if key exists in MPHF) + const auto transaction = cross_check_query_.exec(hash); + auto result = transaction ? index_.lookup_by_hash(hash) : std::nullopt; + return result; + } + + private: + const Index& index_; + TransactionFindByHashQuery cross_check_query_; +}; + } // namespace silkworm::snapshots From acc047a1376d3439145507616641f1f01d28bfae Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 18 Apr 2024 16:55:51 +0200 Subject: [PATCH 19/37] refactor query call sites to use query objects --- cmd/dev/snapshots.cpp | 28 +++++-- silkworm/db/access_layer.cpp | 26 +++++-- silkworm/db/snapshots/body_queries.hpp | 6 ++ silkworm/db/snapshots/header_queries.hpp | 6 ++ silkworm/db/snapshots/repository.cpp | 6 +- silkworm/db/snapshots/snapshot.cpp | 74 ------------------ silkworm/db/snapshots/snapshot.hpp | 12 --- silkworm/db/snapshots/snapshot_reader.hpp | 3 + silkworm/db/snapshots/snapshot_test.cpp | 91 +++++++++++++++-------- 9 files changed, 117 insertions(+), 135 deletions(-) diff --git a/cmd/dev/snapshots.cpp b/cmd/dev/snapshots.cpp index c65f4807bb..9b05d1b80e 100644 --- a/cmd/dev/snapshots.cpp +++ b/cmd/dev/snapshots.cpp @@ -34,12 +34,15 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#include #include #include #include @@ -416,7 +419,8 @@ void lookup_header_by_hash(const SnapSettings& settings) { SnapshotRepository snapshot_repository{settings}; snapshot_repository.reopen_folder(); snapshot_repository.view_header_segments([&](const HeaderSnapshot& snapshot) -> bool { - const auto header{snapshot.header_by_hash(*hash)}; + Index idx_header_hash{*snapshot.idx_header_hash()}; + const auto header = HeaderFindByHashQuery{snapshot, idx_header_hash}.exec(*hash); if (header) { matching_header = header; matching_snapshot = &snapshot; @@ -445,7 +449,8 @@ void lookup_header_by_number(const SnapSettings& settings) { snapshot_repository.reopen_folder(); const auto header_snapshot{snapshot_repository.find_header_segment(block_number)}; if (header_snapshot) { - const auto header{header_snapshot->header_by_number(block_number)}; + Index idx_header_hash{*header_snapshot->idx_header_hash()}; + const auto header = HeaderFindByBlockNumQuery{*header_snapshot, idx_header_hash}.exec(block_number); ensure(header.has_value(), [&]() { return "lookup_header_by_number: " + std::to_string(block_number) + " NOT found in " + header_snapshot->path().filename(); }); SILK_INFO << "Lookup header number: " << block_number << " found in: " << header_snapshot->path().filename(); @@ -485,7 +490,9 @@ void lookup_body_in_one(const SnapSettings& settings, BlockNum block_number, con std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto body_snapshot{snapshot_repository.get_body_segment(*snapshot_path)}; ensure(body_snapshot, [&]() { return "lookup_body: body segment not found for snapshot file: " + snapshot_path->path().string(); }); - const auto body{body_snapshot->body_by_number(block_number)}; + + Index idx_body_number{*body_snapshot->idx_body_number()}; + const auto body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(block_number); if (body) { SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot->path().filename(); if (settings.print) { @@ -505,7 +512,8 @@ void lookup_body_in_all(const SnapSettings& settings, BlockNum block_number) { std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto body_snapshot{snapshot_repository.find_body_segment(block_number)}; if (body_snapshot) { - const auto body{body_snapshot->body_by_number(block_number)}; + Index idx_body_number{*body_snapshot->idx_body_number()}; + const auto body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(block_number); ensure(body.has_value(), [&]() { return "lookup_body: " + std::to_string(block_number) + " NOT found in " + body_snapshot->path().filename(); }); SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot->path().filename(); @@ -588,7 +596,8 @@ void lookup_txn_by_hash_in_one(const SnapSettings& settings, const Hash& hash, c std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto tx_snapshot{snapshot_repository.get_tx_segment(*snapshot_path)}; if (tx_snapshot) { - const auto transaction{tx_snapshot->txn_by_hash(hash)}; + Index idx_txn_hash{*tx_snapshot->idx_txn_hash()}; + const auto transaction = TransactionFindByHashQuery{*tx_snapshot, idx_txn_hash}.exec(hash); if (transaction) { SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << tx_snapshot->path().filename(); if (settings.print) { @@ -609,7 +618,8 @@ void lookup_txn_by_hash_in_all(const SnapSettings& settings, const Hash& hash) { const TransactionSnapshot* matching_snapshot{nullptr}; std::chrono::time_point start{std::chrono::steady_clock::now()}; snapshot_repository.view_tx_segments([&](const TransactionSnapshot& snapshot) -> bool { - const auto transaction{snapshot.txn_by_hash(hash)}; + Index idx_txn_hash{*snapshot.idx_txn_hash()}; + const auto transaction = TransactionFindByHashQuery{snapshot, idx_txn_hash}.exec(hash); if (transaction) { matching_snapshot = &snapshot; if (settings.print) { @@ -648,7 +658,8 @@ void lookup_txn_by_id_in_one(const SnapSettings& settings, uint64_t txn_id, cons std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto tx_snapshot{snapshot_repository.get_tx_segment(*snapshot_path)}; if (tx_snapshot) { - const auto transaction{tx_snapshot->txn_by_id(txn_id)}; + Index idx_txn_hash{*tx_snapshot->idx_txn_hash()}; + const auto transaction = TransactionFindByIdQuery{*tx_snapshot, idx_txn_hash}.exec(txn_id); if (transaction) { SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << tx_snapshot->path().filename(); if (settings.print) { @@ -669,7 +680,8 @@ void lookup_txn_by_id_in_all(const SnapSettings& settings, uint64_t txn_id) { const TransactionSnapshot* matching_snapshot{nullptr}; std::chrono::time_point start{std::chrono::steady_clock::now()}; snapshot_repository.view_tx_segments([&](const TransactionSnapshot& snapshot) -> bool { - const auto transaction{snapshot.txn_by_id(txn_id)}; + Index idx_txn_hash{*snapshot.idx_txn_hash()}; + const auto transaction = TransactionFindByIdQuery{snapshot, idx_txn_hash}.exec(txn_id); if (transaction) { matching_snapshot = &snapshot; if (settings.print) { diff --git a/silkworm/db/access_layer.cpp b/silkworm/db/access_layer.cpp index a215678532..37c51b003b 100644 --- a/silkworm/db/access_layer.cpp +++ b/silkworm/db/access_layer.cpp @@ -26,13 +26,18 @@ #include #include #include +#include +#include #include +#include #include #include #include namespace silkworm::db { +using namespace snapshots; + std::optional read_schema_version(ROTxn& txn) { auto cursor = txn.ro_cursor(db::table::kDatabaseInfo); if (!cursor->seek(mdbx::slice{kDbSchemaVersionKey})) { @@ -1220,7 +1225,8 @@ std::optional DataModel::read_header_from_snapshot(BlockNum height) // We know the header snapshot in advance: find it based on target block number const auto header_snapshot = repository_->find_header_segment(height); if (header_snapshot) { - block_header = header_snapshot->header_by_number(height); + Index index{*header_snapshot->idx_header_hash()}; + block_header = HeaderFindByBlockNumQuery{*header_snapshot, index}.exec(height); } return block_header; } @@ -1233,7 +1239,8 @@ std::optional DataModel::read_header_from_snapshot(const Hash& hash std::optional block_header; // We don't know the header snapshot in advance: search for block hash in each header snapshot in reverse order repository_->view_header_segments([&](const snapshots::HeaderSnapshot& snapshot) -> bool { - block_header = snapshot.header_by_hash(hash); + Index index{*snapshot.idx_header_hash()}; + block_header = HeaderFindByHashQuery{snapshot, index}.exec(hash); return block_header.has_value(); }); return block_header; @@ -1248,7 +1255,8 @@ bool DataModel::read_body_from_snapshot(BlockNum height, bool read_senders, Bloc const auto body_snapshot = repository_->find_body_segment(height); if (!body_snapshot) return false; - auto stored_body = body_snapshot->body_by_number(height); + Index idx_body_number{*body_snapshot->idx_body_number()}; + auto stored_body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(height); if (!stored_body) return false; // Skip first and last *system transactions* in block body @@ -1273,7 +1281,8 @@ bool DataModel::is_body_in_snapshot(BlockNum height) { // We know the body snapshot in advance: find it based on target block number const auto body_snapshot = repository_->find_body_segment(height); if (body_snapshot) { - const auto stored_body = body_snapshot->body_by_number(height); + Index idx_body_number{*body_snapshot->idx_body_number()}; + const auto stored_body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(height); return stored_body.has_value(); } @@ -1290,7 +1299,8 @@ bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_t const auto tx_snapshot = repository_->find_tx_segment(height); if (!tx_snapshot) return false; - txs = tx_snapshot->txn_range(base_txn_id, txn_count); + Index idx_txn_hash{*tx_snapshot->idx_txn_hash()}; + txs = TransactionRangeFromIdQuery{*tx_snapshot, idx_txn_hash}.exec_into_vector(base_txn_id, txn_count); return true; } @@ -1298,7 +1308,8 @@ bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_t bool DataModel::read_rlp_transactions_from_snapshot(BlockNum height, std::vector& rlp_txs) { const auto body_snapshot = repository_->find_body_segment(height); if (body_snapshot) { - auto stored_body = body_snapshot->body_by_number(height); + Index idx_body_number{*body_snapshot->idx_body_number()}; + auto stored_body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(height); if (!stored_body) return false; // Skip first and last *system transactions* in block body @@ -1310,7 +1321,8 @@ bool DataModel::read_rlp_transactions_from_snapshot(BlockNum height, std::vector const auto tx_snapshot = repository_->find_tx_segment(height); if (!tx_snapshot) return false; - rlp_txs = tx_snapshot->txn_rlp_range(base_txn_id, txn_count); + Index idx_txn_hash{*tx_snapshot->idx_txn_hash()}; + rlp_txs = TransactionPayloadRlpRangeFromIdQuery{*tx_snapshot, idx_txn_hash}.exec_into_vector(base_txn_id, txn_count); return true; } diff --git a/silkworm/db/snapshots/body_queries.hpp b/silkworm/db/snapshots/body_queries.hpp index 95cc41cacc..23af935a8e 100644 --- a/silkworm/db/snapshots/body_queries.hpp +++ b/silkworm/db/snapshots/body_queries.hpp @@ -23,6 +23,12 @@ namespace silkworm::snapshots { struct BodyFindByBlockNumQuery : public FindByIdQuery { using FindByIdQuery::FindByIdQuery; + + std::optional exec(BlockNum id) { + // TODO: move this check inside ordinal_lookup_by_data_id if possible and remove this method + if (id < reader_.block_from()) return std::nullopt; + return FindByIdQuery::exec(id); + } }; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_queries.hpp b/silkworm/db/snapshots/header_queries.hpp index 6bf311139a..b4a38b35fb 100644 --- a/silkworm/db/snapshots/header_queries.hpp +++ b/silkworm/db/snapshots/header_queries.hpp @@ -23,6 +23,12 @@ namespace silkworm::snapshots { struct HeaderFindByBlockNumQuery : public FindByIdQuery { using FindByIdQuery::FindByIdQuery; + + std::optional exec(BlockNum id) { + // TODO: move this check inside ordinal_lookup_by_data_id if possible and remove this method + if ((id < reader_.block_from()) || (id >= reader_.block_to())) return std::nullopt; + return FindByIdQuery::exec(id); + } }; struct HeaderFindByHashQuery : public FindByHashQuery { diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index 247b581665..57950477c7 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -201,7 +202,10 @@ const TransactionSnapshot* SnapshotRepository::find_tx_segment(BlockNum number) std::optional SnapshotRepository::find_block_number(Hash txn_hash) const { for (const auto& it : std::ranges::reverse_view(tx_segments_)) { const auto& snapshot = it.second; - auto block = snapshot->block_num_by_txn_hash(txn_hash); + + Index idx_txn_hash{*snapshot->idx_txn_hash()}; + Index idx_txn_hash_2_block{*snapshot->idx_txn_hash_2_block()}; + auto block = TransactionBlockNumByTxnHashQuery{idx_txn_hash_2_block, TransactionFindByHashQuery{*snapshot, idx_txn_hash}}.exec(txn_hash); if (block) { return block; } diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp index 57d38ff5c5..6c45ce5cc7 100644 --- a/silkworm/db/snapshots/snapshot.cpp +++ b/silkworm/db/snapshots/snapshot.cpp @@ -24,14 +24,6 @@ #include #include -#include "body_queries.hpp" -#include "body_snapshot.hpp" -#include "header_queries.hpp" -#include "header_snapshot.hpp" -#include "txn_queries.hpp" -#include "txn_snapshot.hpp" -#include "txn_snapshot_word_serializer.hpp" - namespace silkworm::snapshots { HeaderSnapshot::HeaderSnapshot(SnapshotPath path) : Snapshot(std::move(path)) {} @@ -43,23 +35,6 @@ HeaderSnapshot::~HeaderSnapshot() { close(); } -std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash) const { - if (!idx_header_hash_) { - return {}; - } - - return HeaderFindByHashQuery{*this, Index{*idx_header_hash_}}.exec(block_hash); -} - -std::optional HeaderSnapshot::header_by_number(BlockNum block_height) const { - // TODO: move block_height checks inside ordinal_lookup_by_data_id or FindByIdQuery - if (!idx_header_hash_ || block_height < path_.block_from() || block_height >= path_.block_to()) { - return {}; - } - - return HeaderFindByBlockNumQuery{*this, Index{*idx_header_hash_}}.exec(block_height); -} - void HeaderSnapshot::reopen_index() { ensure(decoder_.is_open(), "HeaderSnapshot: segment not open, call reopen_segment"); @@ -91,15 +66,6 @@ BodySnapshot::~BodySnapshot() { close(); } -std::optional BodySnapshot::body_by_number(BlockNum block_height) const { - // TODO: move block_height check inside ordinal_lookup_by_data_id - if (!idx_body_number_ || block_height < idx_body_number_->base_data_id()) { - return {}; - } - - return BodyFindByBlockNumQuery{*this, Index{*idx_body_number_}}.exec(block_height); -} - void BodySnapshot::reopen_index() { ensure(decoder_.is_open(), "BodySnapshot: segment not open, call reopen_segment"); @@ -132,46 +98,6 @@ TransactionSnapshot::~TransactionSnapshot() { close(); } -std::optional TransactionSnapshot::txn_by_hash(const Hash& txn_hash) const { - if (!idx_txn_hash_) { - return {}; - } - - return TransactionFindByHashQuery{*this, Index{*idx_txn_hash_}}.exec(txn_hash); -} - -std::optional TransactionSnapshot::txn_by_id(uint64_t txn_id) const { - if (!idx_txn_hash_) { - return {}; - } - - return TransactionFindByIdQuery{*this, Index{*idx_txn_hash_}}.exec(txn_id); -} - -std::optional TransactionSnapshot::block_num_by_txn_hash(const Hash& txn_hash) const { - if (!idx_txn_hash_2_block_) { - return {}; - } - - Index idx_txn_hash{*idx_txn_hash_}; - TransactionFindByHashQuery txn_by_hash_query{*this, idx_txn_hash}; - return TransactionBlockNumByTxnHashQuery{Index{*idx_txn_hash_2_block_}, txn_by_hash_query}.exec(txn_hash); -} - -std::vector TransactionSnapshot::txn_range(uint64_t first_txn_id, uint64_t count) const { - if (!idx_txn_hash_) { - return {}; - } - return TransactionRangeFromIdQuery{*this, Index{*idx_txn_hash_}}.exec_into_vector(first_txn_id, count); -} - -std::vector TransactionSnapshot::txn_rlp_range(uint64_t first_txn_id, uint64_t count) const { - if (!idx_txn_hash_) { - return {}; - } - return TransactionPayloadRlpRangeFromIdQuery{*this, Index{*idx_txn_hash_}}.exec_into_vector(first_txn_id, count); -} - void TransactionSnapshot::reopen_index() { ensure(decoder_.is_open(), "TransactionSnapshot: segment not open, call reopen_segment"); diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp index 18f1411dfd..59c2717f99 100644 --- a/silkworm/db/snapshots/snapshot.hpp +++ b/silkworm/db/snapshots/snapshot.hpp @@ -57,9 +57,6 @@ class HeaderSnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_header_hash() const { return idx_header_hash_.get(); } - [[nodiscard]] std::optional header_by_hash(const Hash& block_hash) const; - [[nodiscard]] std::optional header_by_number(BlockNum block_height) const; - void reopen_index() override; protected: @@ -83,8 +80,6 @@ class BodySnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_body_number() const { return idx_body_number_.get(); } - [[nodiscard]] std::optional body_by_number(BlockNum block_height) const; - void reopen_index() override; protected: @@ -107,13 +102,6 @@ class TransactionSnapshot : public Snapshot { [[nodiscard]] const rec_split::RecSplitIndex* idx_txn_hash() const { return idx_txn_hash_.get(); } [[nodiscard]] const rec_split::RecSplitIndex* idx_txn_hash_2_block() const { return idx_txn_hash_2_block_.get(); } - [[nodiscard]] std::optional txn_by_hash(const Hash& txn_hash) const; - [[nodiscard]] std::optional txn_by_id(uint64_t txn_id) const; - [[nodiscard]] std::vector txn_range(uint64_t first_txn_id, uint64_t count) const; - [[nodiscard]] std::vector txn_rlp_range(uint64_t first_txn_id, uint64_t count) const; - - [[nodiscard]] std::optional block_num_by_txn_hash(const Hash& txn_hash) const; - void reopen_index() override; protected: diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp index b97b1ea219..60354aff26 100644 --- a/silkworm/db/snapshots/snapshot_reader.hpp +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -177,6 +177,9 @@ class SnapshotReader { return iterator_read_into_vector(std::move(it), count); } + [[nodiscard]] BlockNum block_from() const { return snapshot_.block_from(); } + [[nodiscard]] BlockNum block_to() const { return snapshot_.block_to(); } + private: const Snapshot& snapshot_; }; diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index 7d202ecd39..79bfcc79db 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -22,10 +22,13 @@ #include #include +#include #include +#include #include #include #include +#include #include #include #include @@ -144,9 +147,12 @@ TEST_CASE("HeaderSnapshot::header_by_number OK", "[silkworm][node][snapshot][ind header_snapshot.reopen_segment(); header_snapshot.reopen_index(); - CHECK(!header_snapshot.header_by_number(1'500'011)); - CHECK(header_snapshot.header_by_number(1'500'012)); - const auto header = header_snapshot.header_by_number(1'500'013); + Index idx_header_hash{*header_snapshot.idx_header_hash()}; + HeaderFindByBlockNumQuery header_by_number{header_snapshot, idx_header_hash}; + + CHECK(!header_by_number.exec(1'500'011)); + CHECK(header_by_number.exec(1'500'012)); + const auto header = header_by_number.exec(1'500'013); CHECK(header.has_value()); if (header) { CHECK(header->hash() == 0xbef48d7de01f2d7ea1a7e4d1ed401f73d6d0257a364f6770b25ba51a123ac35f_bytes32); @@ -167,7 +173,7 @@ TEST_CASE("HeaderSnapshot::header_by_number OK", "[silkworm][node][snapshot][ind CHECK(header->prev_randao == 0x799895e28a837bbdf28b8ecf5fc0e6251398ecb0ffc7ff5bbb457c21b14ce982_bytes32); CHECK(header->nonce == std::array{0x86, 0x98, 0x76, 0x20, 0x12, 0xb4, 0x6f, 0xef}); } - CHECK(!header_snapshot.header_by_number(1'500'014)); + CHECK(!header_by_number.exec(1'500'014)); } // https://etherscan.io/block/1500013 @@ -183,9 +189,12 @@ TEST_CASE("BodySnapshot::body_by_number OK", "[silkworm][node][snapshot][index]" body_snapshot.reopen_segment(); body_snapshot.reopen_index(); - CHECK(!body_snapshot.body_by_number(1'500'011)); - CHECK(body_snapshot.body_by_number(1'500'012)); - const auto body_for_storage = body_snapshot.body_by_number(1'500'013); + Index idx_body_number{*body_snapshot.idx_body_number()}; + BodyFindByBlockNumQuery body_by_number{body_snapshot, idx_body_number}; + + CHECK(!body_by_number.exec(1'500'011)); + CHECK(body_by_number.exec(1'500'012)); + const auto body_for_storage = body_by_number.exec(1'500'013); CHECK(body_for_storage.has_value()); if (body_for_storage) { CHECK(body_for_storage->base_txn_id == 7'341'271); @@ -208,7 +217,11 @@ TEST_CASE("TransactionSnapshot::txn_by_id OK", "[silkworm][node][snapshot][index TransactionSnapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); tx_snapshot.reopen_index(); - const auto transaction = tx_snapshot.txn_by_id(7'341'272); + + Index idx_txn_hash{*tx_snapshot.idx_txn_hash()}; + TransactionFindByIdQuery txn_by_id{tx_snapshot, idx_txn_hash}; + + const auto transaction = txn_by_id.exec(7'341'272); CHECK(transaction.has_value()); if (transaction) { CHECK(transaction->type == TransactionType::kLegacy); @@ -234,23 +247,29 @@ TEST_CASE("TransactionSnapshot::block_num_by_txn_hash OK", "[silkworm][node][sna tx_snapshot.reopen_segment(); tx_snapshot.reopen_index(); + Index idx_txn_hash{*tx_snapshot.idx_txn_hash()}; + TransactionFindByIdQuery txn_by_id{tx_snapshot, idx_txn_hash}; + + Index idx_txn_hash_2_block{*tx_snapshot.idx_txn_hash_2_block()}; + TransactionBlockNumByTxnHashQuery block_num_by_txn_hash{idx_txn_hash_2_block, TransactionFindByHashQuery{tx_snapshot, idx_txn_hash}}; + // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 - auto transaction = tx_snapshot.txn_by_id(7'341'269); // known txn id in block 1'500'012 + auto transaction = txn_by_id.exec(7'341'269); // known txn id in block 1'500'012 CHECK(transaction.has_value()); - auto block_number = tx_snapshot.block_num_by_txn_hash(transaction->hash()); + auto block_number = block_num_by_txn_hash.exec(transaction->hash()); CHECK(block_number.has_value()); CHECK(block_number.value() == 1'500'012); // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 - transaction = tx_snapshot.txn_by_id(7'341'272); // known txn id in block 1'500'013 + transaction = txn_by_id.exec(7'341'272); // known txn id in block 1'500'013 CHECK(transaction.has_value()); - block_number = tx_snapshot.block_num_by_txn_hash(transaction->hash()); + block_number = block_num_by_txn_hash.exec(transaction->hash()); CHECK(block_number.has_value()); CHECK(block_number.value() == 1'500'013); // transaction hash not present in snapshot (first txn hash in block 1'500'014) - block_number = tx_snapshot.block_num_by_txn_hash(0xfa496b4cd9748754a28c66690c283ec9429440eb8609998901216908ad1b48eb_bytes32); + block_number = block_num_by_txn_hash.exec(0xfa496b4cd9748754a28c66690c283ec9429440eb8609998901216908ad1b48eb_bytes32); CHECK_FALSE(block_number.has_value()); } @@ -269,26 +288,29 @@ TEST_CASE("TransactionSnapshot::txn_range OK", "[silkworm][node][snapshot][index tx_snapshot.reopen_segment(); tx_snapshot.reopen_index(); + Index idx_txn_hash{*tx_snapshot.idx_txn_hash()}; + TransactionRangeFromIdQuery txn_range{tx_snapshot, idx_txn_hash}; + // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 SECTION("1'500'012 OK") { - CHECK(tx_snapshot.txn_range(7'341'263, 0).empty()); - CHECK(tx_snapshot.txn_range(7'341'263, 7).size() == 7); + CHECK(txn_range.exec_into_vector(7'341'263, 0).empty()); + CHECK(txn_range.exec_into_vector(7'341'263, 7).size() == 7); } SECTION("1'500'012 KO") { - CHECK_THROWS(tx_snapshot.txn_range(7'341'262, 7)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'264, 7)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'263, 8)); // invalid txn_count + CHECK_THROWS(txn_range.exec_into_vector(7'341'262, 7)); // invalid base_txn_id + CHECK_THROWS(txn_range.exec_into_vector(7'341'264, 7)); // invalid base_txn_id + CHECK_THROWS(txn_range.exec_into_vector(7'341'263, 8)); // invalid txn_count } // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 SECTION("1'500'013 OK") { - CHECK(tx_snapshot.txn_range(7'341'272, 0).empty()); - CHECK(tx_snapshot.txn_range(7'341'272, 1).size() == 1); + CHECK(txn_range.exec_into_vector(7'341'272, 0).empty()); + CHECK(txn_range.exec_into_vector(7'341'272, 1).size() == 1); } SECTION("1'500'013 KO") { - CHECK_THROWS(tx_snapshot.txn_range(7'341'271, 1)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'273, 1)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_range(7'341'272, 2)); // invalid txn_count + CHECK_THROWS(txn_range.exec_into_vector(7'341'271, 1)); // invalid base_txn_id + CHECK_THROWS(txn_range.exec_into_vector(7'341'273, 1)); // invalid base_txn_id + CHECK_THROWS(txn_range.exec_into_vector(7'341'272, 2)); // invalid txn_count } } @@ -306,26 +328,29 @@ TEST_CASE("TransactionSnapshot::txn_rlp_range OK", "[silkworm][node][snapshot][i tx_snapshot.reopen_segment(); tx_snapshot.reopen_index(); + Index idx_txn_hash{*tx_snapshot.idx_txn_hash()}; + TransactionPayloadRlpRangeFromIdQuery txn_rlp_range{tx_snapshot, idx_txn_hash}; + // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 SECTION("1'500'012 OK") { - CHECK(tx_snapshot.txn_rlp_range(7'341'263, 0).empty()); - CHECK(tx_snapshot.txn_rlp_range(7'341'263, 7).size() == 7); + CHECK(txn_rlp_range.exec_into_vector(7'341'263, 0).empty()); + CHECK(txn_rlp_range.exec_into_vector(7'341'263, 7).size() == 7); } SECTION("1'500'012 KO") { - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'262, 7)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'264, 7)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'263, 8)); // invalid txn_count + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'262, 7)); // invalid base_txn_id + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'264, 7)); // invalid base_txn_id + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'263, 8)); // invalid txn_count } // block 1'500'013: base_txn_id is 7'341'272, txn_count is 1 SECTION("1'500'013 OK") { - CHECK(tx_snapshot.txn_rlp_range(7'341'272, 0).empty()); - CHECK(tx_snapshot.txn_rlp_range(7'341'272, 1).size() == 1); + CHECK(txn_rlp_range.exec_into_vector(7'341'272, 0).empty()); + CHECK(txn_rlp_range.exec_into_vector(7'341'272, 1).size() == 1); } SECTION("1'500'013 KO") { - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'271, 1)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'273, 1)); // invalid base_txn_id - CHECK_THROWS(tx_snapshot.txn_rlp_range(7'341'272, 2)); // invalid txn_count + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'271, 1)); // invalid base_txn_id + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'273, 1)); // invalid base_txn_id + CHECK_THROWS(txn_rlp_range.exec_into_vector(7'341'272, 2)); // invalid txn_count } } From 52f68588c6553ee282dc95bd321a4d765715832b Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 18 Apr 2024 18:36:51 +0200 Subject: [PATCH 20/37] remove path copies from SnapshotBundle --- silkworm/capi/silkworm.cpp | 6 ++---- silkworm/db/snapshots/repository.cpp | 27 ++++++++++++++++++--------- silkworm/db/snapshots/repository.hpp | 5 +---- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/silkworm/capi/silkworm.cpp b/silkworm/capi/silkworm.cpp index 677cbd9ba2..0afc6e87f6 100644 --- a/silkworm/capi/silkworm.cpp +++ b/silkworm/capi/silkworm.cpp @@ -368,12 +368,10 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn transactions_snapshot->reopen_index(); snapshots::SnapshotBundle bundle{ - .headers_snapshot_path = *headers_segment_path, .headers_snapshot = std::move(headers_snapshot), - .bodies_snapshot_path = *bodies_segment_path, .bodies_snapshot = std::move(bodies_snapshot), - .tx_snapshot_path = *transactions_segment_path, - .tx_snapshot = std::move(transactions_snapshot)}; + .tx_snapshot = std::move(transactions_snapshot), + }; handle->snapshot_repository->add_snapshot_bundle(std::move(bundle)); return SILKWORM_OK; } diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index 57950477c7..80cba6a0e5 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -78,13 +78,23 @@ SnapshotRepository::~SnapshotRepository() { close(); } -void SnapshotRepository::add_snapshot_bundle(SnapshotBundle&& bundle) { - header_segments_[bundle.headers_snapshot_path.path()] = std::move(bundle.headers_snapshot); - body_segments_[bundle.bodies_snapshot_path.path()] = std::move(bundle.bodies_snapshot); - tx_segments_[bundle.tx_snapshot_path.path()] = std::move(bundle.tx_snapshot); - if (bundle.tx_snapshot_path.block_to() > segment_max_block_) { - segment_max_block_ = bundle.tx_snapshot_path.block_to() - 1; +void SnapshotRepository::add_snapshot_bundle(SnapshotBundle bundle) { + if (bundle.headers_snapshot && bundle.bodies_snapshot && bundle.tx_snapshot) { + // assume that all snapshot types end with the same block, and use one of them + BlockNum last_block = bundle.tx_snapshot->block_to() - 1; + segment_max_block_ = std::max(segment_max_block_, last_block); } + + if (bundle.headers_snapshot) { + header_segments_[bundle.headers_snapshot->fs_path()] = std::move(bundle.headers_snapshot); + } + if (bundle.bodies_snapshot) { + body_segments_[bundle.bodies_snapshot->fs_path()] = std::move(bundle.bodies_snapshot); + } + if (bundle.tx_snapshot) { + tx_segments_[bundle.tx_snapshot->fs_path()] = std::move(bundle.tx_snapshot); + } + idx_max_block_ = max_idx_available(); } @@ -297,9 +307,8 @@ void SnapshotRepository::reopen_list(const SnapshotPathList& segment_files, bool } ensure(snapshot_valid, [&]() { return "invalid empty snapshot " + seg_file.filename(); }); - if (seg_file.block_to() > segment_max_block) { - segment_max_block = seg_file.block_to() - 1; - } + BlockNum last_block = seg_file.block_to() - 1; + segment_max_block = std::max(segment_max_block, last_block); } catch (const std::exception& exc) { SILK_WARN << "Reopen failed for: " << seg_file.path() << " [" << exc.what() << "]"; if (!optimistic) throw; diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index 063a96b560..041af33c74 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -46,11 +46,8 @@ using BodySnapshotWalker = SnapshotWalker; using TransactionSnapshotWalker = SnapshotWalker; struct SnapshotBundle { - SnapshotPath headers_snapshot_path; std::unique_ptr headers_snapshot; - SnapshotPath bodies_snapshot_path; std::unique_ptr bodies_snapshot; - SnapshotPath tx_snapshot_path; std::unique_ptr tx_snapshot; }; @@ -74,7 +71,7 @@ class SnapshotRepository { return get_files(kSegmentExtension); } - void add_snapshot_bundle(SnapshotBundle&& bundle); + void add_snapshot_bundle(SnapshotBundle bundle); void reopen_list(const SnapshotPathList& segment_files, bool optimistic = false); void reopen_file(const SnapshotPath& segment_path, bool optimistic = false); From 3733503a92f647eae0d9fa8174f1e04359816475 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 18 Apr 2024 19:36:52 +0200 Subject: [PATCH 21/37] refactor to separate Index-es from snapshots --- cmd/capi/execute.cpp | 132 ++++----- cmd/dev/snapshots.cpp | 140 ++++----- silkworm/capi/silkworm.cpp | 47 +-- silkworm/capi/silkworm_test.cpp | 63 ++-- silkworm/db/access_layer.cpp | 23 +- silkworm/db/snapshot_sync.cpp | 4 +- silkworm/db/snapshots/index.cpp | 21 ++ silkworm/db/snapshots/index.hpp | 35 ++- silkworm/db/snapshots/repository.cpp | 322 ++++++++++----------- silkworm/db/snapshots/repository.hpp | 177 ++++++----- silkworm/db/snapshots/repository_test.cpp | 76 +++-- silkworm/db/snapshots/seg/decompressor.hpp | 3 + silkworm/db/snapshots/snapshot.cpp | 134 --------- silkworm/db/snapshots/snapshot.hpp | 124 -------- silkworm/db/snapshots/snapshot_reader.cpp | 18 +- silkworm/db/snapshots/snapshot_reader.hpp | 17 +- silkworm/db/snapshots/snapshot_test.cpp | 120 +++----- silkworm/db/snapshots/txn_index.cpp | 4 +- 18 files changed, 598 insertions(+), 862 deletions(-) delete mode 100644 silkworm/db/snapshots/snapshot.cpp delete mode 100644 silkworm/db/snapshots/snapshot.hpp diff --git a/cmd/capi/execute.cpp b/cmd/capi/execute.cpp index 46640b29d4..1728408a1f 100644 --- a/cmd/capi/execute.cpp +++ b/cmd/capi/execute.cpp @@ -145,64 +145,65 @@ const char* make_path(const snapshots::SnapshotPath& p) { return path; } -std::vector collect_all_snapshots(const SnapshotRepository& snapshot_repository) { +std::vector collect_all_snapshots(SnapshotRepository& snapshot_repository) { std::vector headers_snapshot_sequence; std::vector bodies_snapshot_sequence; std::vector transactions_snapshot_sequence; - for (const auto& segment_file : snapshot_repository.get_segment_files()) { - switch (segment_file.type()) { - case SnapshotType::headers: { - const auto* header_snapshot{snapshot_repository.get_header_segment(segment_file)}; - const auto* idx_header_hash{header_snapshot->idx_header_hash()}; + snapshot_repository.view_bundles( + [&](const SnapshotBundle& bundle) { + { SilkwormHeadersSnapshot raw_headers_snapshot{ .segment{ - .file_path = make_path(segment_file), - .memory_address = header_snapshot->memory_file_region().data(), - .memory_length = header_snapshot->memory_file_region().size()}, + .file_path = make_path(bundle.header_snapshot.path()), + .memory_address = bundle.header_snapshot.memory_file_region().data(), + .memory_length = bundle.header_snapshot.memory_file_region().size(), + }, .header_hash_index{ - .file_path = make_path(segment_file.index_file()), - .memory_address = idx_header_hash->memory_file_region().data(), - .memory_length = idx_header_hash->memory_file_region().size()}}; + .file_path = make_path(bundle.idx_header_hash.path()), + .memory_address = bundle.idx_header_hash.memory_file_region().data(), + .memory_length = bundle.idx_header_hash.memory_file_region().size(), + }, + }; headers_snapshot_sequence.push_back(raw_headers_snapshot); - } break; - case SnapshotType::bodies: { - const auto* body_snapshot{snapshot_repository.get_body_segment(segment_file)}; - const auto* idx_body_number{body_snapshot->idx_body_number()}; + } + { SilkwormBodiesSnapshot raw_bodies_snapshot{ .segment{ - .file_path = make_path(segment_file), - .memory_address = body_snapshot->memory_file_region().data(), - .memory_length = body_snapshot->memory_file_region().size()}, + .file_path = make_path(bundle.body_snapshot.path()), + .memory_address = bundle.body_snapshot.memory_file_region().data(), + .memory_length = bundle.body_snapshot.memory_file_region().size(), + }, .block_num_index{ - .file_path = make_path(segment_file.index_file()), - .memory_address = idx_body_number->memory_file_region().data(), - .memory_length = idx_body_number->memory_file_region().size()}}; + .file_path = make_path(bundle.idx_body_number.path()), + .memory_address = bundle.idx_body_number.memory_file_region().data(), + .memory_length = bundle.idx_body_number.memory_file_region().size(), + }, + }; bodies_snapshot_sequence.push_back(raw_bodies_snapshot); - } break; - case SnapshotType::transactions: { - const auto* tx_snapshot{snapshot_repository.get_tx_segment(segment_file)}; - const auto* idx_txn_hash{tx_snapshot->idx_txn_hash()}; - const auto* idx_txn_hash_2_block{tx_snapshot->idx_txn_hash_2_block()}; + } + { SilkwormTransactionsSnapshot raw_transactions_snapshot{ .segment{ - .file_path = make_path(segment_file), - .memory_address = tx_snapshot->memory_file_region().data(), - .memory_length = tx_snapshot->memory_file_region().size()}, + .file_path = make_path(bundle.txn_snapshot.path()), + .memory_address = bundle.txn_snapshot.memory_file_region().data(), + .memory_length = bundle.txn_snapshot.memory_file_region().size(), + }, .tx_hash_index{ - .file_path = make_path(segment_file.index_file()), - .memory_address = idx_txn_hash->memory_file_region().data(), - .memory_length = idx_txn_hash->memory_file_region().size()}, + .file_path = make_path(bundle.idx_txn_hash.path()), + .memory_address = bundle.idx_txn_hash.memory_file_region().data(), + .memory_length = bundle.idx_txn_hash.memory_file_region().size(), + }, .tx_hash_2_block_index{ - .file_path = make_path(segment_file.index_file_for_type(SnapshotType::transactions_to_block)), - .memory_address = idx_txn_hash_2_block->memory_file_region().data(), - .memory_length = idx_txn_hash_2_block->memory_file_region().size()}}; + .file_path = make_path(bundle.idx_txn_hash_2_block.path()), + .memory_address = bundle.idx_txn_hash_2_block.memory_file_region().data(), + .memory_length = bundle.idx_txn_hash_2_block.memory_file_region().size(), + }, + }; transactions_snapshot_sequence.push_back(raw_transactions_snapshot); - } break; - default: - ensure(false, [&]() { return "unexpected snapshot type: " + std::string{magic_enum::enum_name(segment_file.type())}; }); - } - } + } + return true; + }); ensure(headers_snapshot_sequence.size() == snapshot_repository.header_snapshots_count(), "invalid header snapshot count"); ensure(bodies_snapshot_sequence.size() == snapshot_repository.body_snapshots_count(), "invalid body snapshot count"); @@ -281,7 +282,7 @@ int execute_with_external_txn(SilkwormHandle handle, ExecuteBlocksSettings setti return SILKWORM_OK; } -int execute_blocks(SilkwormHandle handle, ExecuteBlocksSettings settings, const SnapshotRepository& repository, const DataDirectory& data_dir) { +int execute_blocks(SilkwormHandle handle, ExecuteBlocksSettings settings, SnapshotRepository& repository, const DataDirectory& data_dir) { // Open chain database silkworm::db::EnvConfig config{ .path = data_dir.chaindata().path().string(), @@ -318,10 +319,11 @@ int execute_blocks(SilkwormHandle handle, ExecuteBlocksSettings settings, const } } -int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, const SnapshotRepository& repository, const DataDirectory& data_dir) { +int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, const DataDirectory& data_dir) { SILK_INFO << "Building indexes for snapshots: " << settings.snapshot_names; - std::vector snapshots; + std::vector snapshots; + std::vector snapshot_files; // Parse snapshot paths and create memory mapped files for (auto& snapshot_name : settings.snapshot_names) { auto raw_snapshot_path = data_dir.snapshots().path() / snapshot_name; @@ -329,37 +331,21 @@ int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, c if (!snapshot_path.has_value()) throw std::runtime_error("Invalid snapshot path"); - const Snapshot* snapshot{nullptr}; - switch (snapshot_path->type()) { - case headers: - snapshot = repository.get_header_segment(*snapshot_path); - break; - case bodies: - snapshot = repository.get_body_segment(*snapshot_path); - break; - case transactions: - case transactions_to_block: - snapshot = repository.get_tx_segment(*snapshot_path); - break; - default: - throw std::runtime_error("Invalid snapshot type"); - } - if (!snapshot) { - throw std::runtime_error("Snapshot not found in the repository:" + snapshot_name); - } + Snapshot& snapshot = snapshots.emplace_back(*snapshot_path); + snapshot.reopen_segment(); auto mmf = new SilkwormMemoryMappedFile{ - .file_path = make_path(snapshot->path()), - .memory_address = snapshot->memory_file_region().data(), - .memory_length = snapshot->memory_file_region().size(), + .file_path = make_path(*snapshot_path), + .memory_address = snapshot.memory_file_region().data(), + .memory_length = snapshot.memory_file_region().size(), }; - snapshots.push_back(mmf); + snapshot_files.push_back(mmf); } // Call api to build indexes const auto start_time{std::chrono::high_resolution_clock::now()}; - const int status_code = silkworm_build_recsplit_indexes(handle, snapshots.data(), snapshots.size()); + const int status_code = silkworm_build_recsplit_indexes(handle, snapshot_files.data(), snapshot_files.size()); if (status_code != SILKWORM_OK) return status_code; auto elapsed = std::chrono::high_resolution_clock::now() - start_time; @@ -367,9 +353,9 @@ int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, c << std::chrono::duration_cast(elapsed).count() << "ms"; // Free memory mapped files - for (auto snapshot : snapshots) { - delete[] snapshot->file_path; - delete snapshot; + for (auto mmf : snapshot_files) { + delete[] mmf->file_path; + delete mmf; } return SILKWORM_OK; @@ -446,16 +432,16 @@ int main(int argc, char* argv[]) { // Add snapshots to Silkworm API library SnapshotSettings snapshot_settings{}; snapshot_settings.repository_dir = data_dir.snapshots().path(); - SnapshotRepository repository{snapshot_settings}; - repository.reopen_folder(); int status_code = -1; if (settings.execute_blocks_settings) { // Execute specified block range using Silkworm API library + SnapshotRepository repository{snapshot_settings}; + repository.reopen_folder(); status_code = execute_blocks(handle, *settings.execute_blocks_settings, repository, data_dir); } else if (settings.build_indexes_settings) { // Build index for a specific snapshot using Silkworm API library - status_code = build_indexes(handle, *settings.build_indexes_settings, repository, data_dir); + status_code = build_indexes(handle, *settings.build_indexes_settings, data_dir); } else if (settings.rpcdaemon_settings) { // Start RPC Daemon using Silkworm API library status_code = start_rpcdaemon(handle, *settings.rpcdaemon_settings, data_dir); diff --git a/cmd/dev/snapshots.cpp b/cmd/dev/snapshots.cpp index 9b05d1b80e..b05cf2b1bb 100644 --- a/cmd/dev/snapshots.cpp +++ b/cmd/dev/snapshots.cpp @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include @@ -214,25 +214,14 @@ auto duration_as(const std::chrono::duration& elapsed) { void decode_segment(const SnapSettings& settings, int repetitions) { ensure(settings.snapshot_file_name.has_value(), "decode_segment: --snapshot_file must be specified"); - SILK_INFO << "Decode snapshot: " << *settings.snapshot_file_name; - std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto snap_file{SnapshotPath::parse(std::filesystem::path{*settings.snapshot_file_name})}; - if (snap_file) { - std::unique_ptr snapshot; - for (int i{0}; i < repetitions; ++i) { - switch (snap_file->type()) { - case SnapshotType::headers: { - snapshot = std::make_unique(*snap_file); - } break; - case SnapshotType::bodies: { - snapshot = std::make_unique(*snap_file); - } break; - default: { - snapshot = std::make_unique(*snap_file); - } - } - snapshot->reopen_segment(); - } + ensure(snap_file.has_value(), "decode_segment: invalid snapshot_file path format"); + + SILK_INFO << "Decode snapshot: " << snap_file->path(); + std::chrono::time_point start{std::chrono::steady_clock::now()}; + for (int i = 0; i < repetitions; i++) { + Snapshot snapshot{*snap_file}; + snapshot.reopen_segment(); } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; SILK_INFO << "Decode snapshot elapsed: " << duration_as(elapsed) << " msec"; @@ -414,23 +403,22 @@ void lookup_header_by_hash(const SnapSettings& settings) { SILK_INFO << "Lookup header hash: " << hash->to_hex(); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const HeaderSnapshot* matching_snapshot{nullptr}; + std::optional matching_snapshot; std::optional matching_header; SnapshotRepository snapshot_repository{settings}; snapshot_repository.reopen_folder(); - snapshot_repository.view_header_segments([&](const HeaderSnapshot& snapshot) -> bool { - Index idx_header_hash{*snapshot.idx_header_hash()}; - const auto header = HeaderFindByHashQuery{snapshot, idx_header_hash}.exec(*hash); + snapshot_repository.view_header_segments([&](SnapshotRepository::SnapshotAndIndex snapshot) -> bool { + const auto header = HeaderFindByHashQuery{snapshot.snapshot, snapshot.index}.exec(*hash); if (header) { matching_header = header; - matching_snapshot = &snapshot; + matching_snapshot = snapshot.snapshot.path(); } return header.has_value(); }); if (matching_snapshot) { - SILK_INFO << "Lookup header hash: " << hash->to_hex() << " found in: " << matching_snapshot->path().filename(); + SILK_INFO << "Lookup header hash: " << hash->to_hex() << " found in: " << matching_snapshot->filename(); if (matching_header && settings.print) { - print_header(*matching_header, matching_snapshot->path().filename()); + print_header(*matching_header, matching_snapshot->filename()); } } else { SILK_WARN << "Lookup header hash: " << hash->to_hex() << " NOT found"; @@ -449,13 +437,12 @@ void lookup_header_by_number(const SnapSettings& settings) { snapshot_repository.reopen_folder(); const auto header_snapshot{snapshot_repository.find_header_segment(block_number)}; if (header_snapshot) { - Index idx_header_hash{*header_snapshot->idx_header_hash()}; - const auto header = HeaderFindByBlockNumQuery{*header_snapshot, idx_header_hash}.exec(block_number); + const auto header = HeaderFindByBlockNumQuery{header_snapshot->snapshot, header_snapshot->index}.exec(block_number); ensure(header.has_value(), - [&]() { return "lookup_header_by_number: " + std::to_string(block_number) + " NOT found in " + header_snapshot->path().filename(); }); - SILK_INFO << "Lookup header number: " << block_number << " found in: " << header_snapshot->path().filename(); + [&]() { return "lookup_header_by_number: " + std::to_string(block_number) + " NOT found in " + header_snapshot->snapshot.path().filename(); }); + SILK_INFO << "Lookup header number: " << block_number << " found in: " << header_snapshot->snapshot.path().filename(); if (settings.print) { - print_header(*header, header_snapshot->path().filename()); + print_header(*header, header_snapshot->snapshot.path().filename()); } } else { SILK_WARN << "Lookup header number: " << block_number << " NOT found"; @@ -474,7 +461,7 @@ void lookup_header(const SnapSettings& settings) { } } -static void print_body(const StoredBlockBody& body, const std::string& snapshot_filename) { +static void print_body(const BlockBodyForStorage& body, const std::string& snapshot_filename) { std::cout << "Body found in: " << snapshot_filename << "\n" << "base_txn_id=" << body.base_txn_id << "\n" << "txn_count=" << body.txn_count << "\n" @@ -484,22 +471,22 @@ static void print_body(const StoredBlockBody& body, const std::string& snapshot_ void lookup_body_in_one(const SnapSettings& settings, BlockNum block_number, const std::string& file_name) { const auto snapshot_path = SnapshotPath::parse(settings.repository_dir / file_name); ensure(snapshot_path.has_value(), "lookup_body: --snapshot_file is invalid snapshot file"); - SnapshotRepository snapshot_repository{settings}; - snapshot_repository.reopen_file(*snapshot_path); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const auto body_snapshot{snapshot_repository.get_body_segment(*snapshot_path)}; - ensure(body_snapshot, [&]() { return "lookup_body: body segment not found for snapshot file: " + snapshot_path->path().string(); }); + Snapshot body_snapshot{*snapshot_path}; + body_snapshot.reopen_segment(); - Index idx_body_number{*body_snapshot->idx_body_number()}; - const auto body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(block_number); + Index idx_body_number{snapshot_path->index_file()}; + idx_body_number.reopen_index(); + + const auto body = BodyFindByBlockNumQuery{body_snapshot, idx_body_number}.exec(block_number); if (body) { - SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot->path().filename(); + SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot.path().filename(); if (settings.print) { - print_body(*body, body_snapshot->path().filename()); + print_body(*body, body_snapshot.path().filename()); } } else { - SILK_WARN << "Lookup body number: " << block_number << " NOT found in: " << body_snapshot->path().filename(); + SILK_WARN << "Lookup body number: " << block_number << " NOT found in: " << body_snapshot.path().filename(); } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; SILK_INFO << "Lookup body elapsed: " << duration_as(elapsed) << " usec"; @@ -512,13 +499,12 @@ void lookup_body_in_all(const SnapSettings& settings, BlockNum block_number) { std::chrono::time_point start{std::chrono::steady_clock::now()}; const auto body_snapshot{snapshot_repository.find_body_segment(block_number)}; if (body_snapshot) { - Index idx_body_number{*body_snapshot->idx_body_number()}; - const auto body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(block_number); + const auto body = BodyFindByBlockNumQuery{body_snapshot->snapshot, body_snapshot->index}.exec(block_number); ensure(body.has_value(), - [&]() { return "lookup_body: " + std::to_string(block_number) + " NOT found in " + body_snapshot->path().filename(); }); - SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot->path().filename(); + [&]() { return "lookup_body: " + std::to_string(block_number) + " NOT found in " + body_snapshot->snapshot.path().filename(); }); + SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot->snapshot.path().filename(); if (settings.print) { - print_body(*body, body_snapshot->path().filename()); + print_body(*body, body_snapshot->snapshot.path().filename()); } } else { SILK_WARN << "Lookup body number: " << block_number << " NOT found"; @@ -590,21 +576,23 @@ static void print_txn(const Transaction& txn, const std::string& snapshot_filena void lookup_txn_by_hash_in_one(const SnapSettings& settings, const Hash& hash, const std::string& file_name) { const auto snapshot_path = SnapshotPath::parse(settings.repository_dir / file_name); ensure(snapshot_path.has_value(), "lookup_tx_by_hash_in_one: --snapshot_file is invalid snapshot file"); - SnapshotRepository snapshot_repository{settings}; - snapshot_repository.reopen_file(*snapshot_path); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const auto tx_snapshot{snapshot_repository.get_tx_segment(*snapshot_path)}; - if (tx_snapshot) { - Index idx_txn_hash{*tx_snapshot->idx_txn_hash()}; - const auto transaction = TransactionFindByHashQuery{*tx_snapshot, idx_txn_hash}.exec(hash); + Snapshot tx_snapshot{*snapshot_path}; + tx_snapshot.reopen_segment(); + + { + Index idx_txn_hash{snapshot_path->index_file()}; + idx_txn_hash.reopen_index(); + + const auto transaction = TransactionFindByHashQuery{tx_snapshot, idx_txn_hash}.exec(hash); if (transaction) { - SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << tx_snapshot->path().filename(); + SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << tx_snapshot.path().filename(); if (settings.print) { - print_txn(*transaction, tx_snapshot->path().filename()); + print_txn(*transaction, tx_snapshot.path().filename()); } } else { - SILK_WARN << "Lookup txn hash: " << hash.to_hex() << " NOT found in: " << tx_snapshot->path().filename(); + SILK_WARN << "Lookup txn hash: " << hash.to_hex() << " NOT found in: " << tx_snapshot.path().filename(); } } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; @@ -615,15 +603,14 @@ void lookup_txn_by_hash_in_all(const SnapSettings& settings, const Hash& hash) { SnapshotRepository snapshot_repository{settings}; snapshot_repository.reopen_folder(); - const TransactionSnapshot* matching_snapshot{nullptr}; + std::optional matching_snapshot; std::chrono::time_point start{std::chrono::steady_clock::now()}; - snapshot_repository.view_tx_segments([&](const TransactionSnapshot& snapshot) -> bool { - Index idx_txn_hash{*snapshot.idx_txn_hash()}; - const auto transaction = TransactionFindByHashQuery{snapshot, idx_txn_hash}.exec(hash); + snapshot_repository.view_tx_segments([&](SnapshotRepository::SnapshotAndIndex snapshot) -> bool { + const auto transaction = TransactionFindByHashQuery{snapshot.snapshot, snapshot.index}.exec(hash); if (transaction) { - matching_snapshot = &snapshot; + matching_snapshot = snapshot.snapshot.path(); if (settings.print) { - print_txn(*transaction, snapshot.path().filename()); + print_txn(*transaction, matching_snapshot->path().filename()); } } return transaction.has_value(); @@ -652,21 +639,23 @@ void lookup_txn_by_hash(const SnapSettings& settings, const std::string& lookup_ void lookup_txn_by_id_in_one(const SnapSettings& settings, uint64_t txn_id, const std::string& file_name) { const auto snapshot_path = SnapshotPath::parse(settings.repository_dir / file_name); ensure(snapshot_path.has_value(), "lookup_txn_by_id_in_one: --snapshot_file is invalid snapshot file"); - SnapshotRepository snapshot_repository{settings}; - snapshot_repository.reopen_file(*snapshot_path); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const auto tx_snapshot{snapshot_repository.get_tx_segment(*snapshot_path)}; - if (tx_snapshot) { - Index idx_txn_hash{*tx_snapshot->idx_txn_hash()}; - const auto transaction = TransactionFindByIdQuery{*tx_snapshot, idx_txn_hash}.exec(txn_id); + Snapshot tx_snapshot{*snapshot_path}; + tx_snapshot.reopen_segment(); + + { + Index idx_txn_hash{snapshot_path->index_file()}; + idx_txn_hash.reopen_index(); + + const auto transaction = TransactionFindByIdQuery{tx_snapshot, idx_txn_hash}.exec(txn_id); if (transaction) { - SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << tx_snapshot->path().filename(); + SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << tx_snapshot.path().filename(); if (settings.print) { - print_txn(*transaction, tx_snapshot->path().filename()); + print_txn(*transaction, tx_snapshot.path().filename()); } } else { - SILK_WARN << "Lookup txn ID: " << txn_id << " NOT found in: " << tx_snapshot->path().filename(); + SILK_WARN << "Lookup txn ID: " << txn_id << " NOT found in: " << tx_snapshot.path().filename(); } } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; @@ -677,15 +666,14 @@ void lookup_txn_by_id_in_all(const SnapSettings& settings, uint64_t txn_id) { SnapshotRepository snapshot_repository{settings}; snapshot_repository.reopen_folder(); - const TransactionSnapshot* matching_snapshot{nullptr}; + std::optional matching_snapshot; std::chrono::time_point start{std::chrono::steady_clock::now()}; - snapshot_repository.view_tx_segments([&](const TransactionSnapshot& snapshot) -> bool { - Index idx_txn_hash{*snapshot.idx_txn_hash()}; - const auto transaction = TransactionFindByIdQuery{snapshot, idx_txn_hash}.exec(txn_id); + snapshot_repository.view_tx_segments([&](SnapshotRepository::SnapshotAndIndex snapshot) -> bool { + const auto transaction = TransactionFindByIdQuery{snapshot.snapshot, snapshot.index}.exec(txn_id); if (transaction) { - matching_snapshot = &snapshot; + matching_snapshot = snapshot.snapshot.path(); if (settings.print) { - print_txn(*transaction, snapshot.path().filename()); + print_txn(*transaction, matching_snapshot->path().filename()); } } return transaction.has_value(); diff --git a/silkworm/capi/silkworm.cpp b/silkworm/capi/silkworm.cpp index 0afc6e87f6..cbfc681453 100644 --- a/silkworm/capi/silkworm.cpp +++ b/silkworm/capi/silkworm.cpp @@ -36,7 +36,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -329,12 +331,10 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!headers_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::MappedHeadersSnapshot mapped_h_snapshot{ - .segment = make_region(hs.segment), - .header_hash_index = make_region(hs.header_hash_index)}; - auto headers_snapshot = std::make_unique(*headers_segment_path, mapped_h_snapshot); - headers_snapshot->reopen_segment(); - headers_snapshot->reopen_index(); + snapshots::Snapshot header_snapshot{*headers_segment_path, make_region(hs.segment)}; + header_snapshot.reopen_segment(); + snapshots::Index idx_header_hash{headers_segment_path->index_file(), make_region(hs.header_hash_index)}; + idx_header_hash.reopen_index(); const SilkwormBodiesSnapshot& bs = snapshot->bodies; if (!bs.segment.file_path || !bs.block_num_index.file_path) { @@ -344,12 +344,10 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!bodies_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::MappedBodiesSnapshot mapped_b_snapshot{ - .segment = make_region(bs.segment), - .block_num_index = make_region(bs.block_num_index)}; - auto bodies_snapshot = std::make_unique(*bodies_segment_path, mapped_b_snapshot); - bodies_snapshot->reopen_segment(); - bodies_snapshot->reopen_index(); + snapshots::Snapshot body_snapshot{*bodies_segment_path, make_region(bs.segment)}; + body_snapshot.reopen_segment(); + snapshots::Index idx_body_number{bodies_segment_path->index_file(), make_region(bs.block_num_index)}; + idx_body_number.reopen_index(); const SilkwormTransactionsSnapshot& ts = snapshot->transactions; if (!ts.segment.file_path || !ts.tx_hash_index.file_path || !ts.tx_hash_2_block_index.file_path) { @@ -359,18 +357,23 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!transactions_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::MappedTransactionsSnapshot mapped_t_snapshot{ - .segment = make_region(ts.segment), - .tx_hash_index = make_region(ts.tx_hash_index), - .tx_hash_2_block_index = make_region(ts.tx_hash_2_block_index)}; - auto transactions_snapshot = std::make_unique(*transactions_segment_path, mapped_t_snapshot); - transactions_snapshot->reopen_segment(); - transactions_snapshot->reopen_index(); + snapshots::Snapshot txn_snapshot{*transactions_segment_path, make_region(ts.segment)}; + txn_snapshot.reopen_segment(); + snapshots::Index idx_txn_hash{transactions_segment_path->index_file_for_type(snapshots::SnapshotType::transactions), make_region(ts.tx_hash_index)}; + idx_txn_hash.reopen_index(); + snapshots::Index idx_txn_hash_2_block{transactions_segment_path->index_file_for_type(snapshots::SnapshotType::transactions_to_block), make_region(ts.tx_hash_2_block_index)}; + idx_txn_hash_2_block.reopen_index(); snapshots::SnapshotBundle bundle{ - .headers_snapshot = std::move(headers_snapshot), - .bodies_snapshot = std::move(bodies_snapshot), - .tx_snapshot = std::move(transactions_snapshot), + .header_snapshot = std::move(header_snapshot), + .idx_header_hash = std::move(idx_header_hash), + + .body_snapshot = std::move(body_snapshot), + .idx_body_number = std::move(idx_body_number), + + .txn_snapshot = std::move(txn_snapshot), + .idx_txn_hash = std::move(idx_txn_hash), + .idx_txn_hash_2_block = std::move(idx_txn_hash_2_block), }; handle->snapshot_repository->add_snapshot_bundle(std::move(bundle)); return SILKWORM_OK; diff --git a/silkworm/capi/silkworm_test.cpp b/silkworm/capi/silkworm_test.cpp index 864ee2fe14..a349d7a743 100644 --- a/silkworm/capi/silkworm_test.cpp +++ b/silkworm/capi/silkworm_test.cpp @@ -25,8 +25,9 @@ #include #include #include +#include #include -#include +#include #include #include #include @@ -643,34 +644,38 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { snapshot_test::SampleTransactionSnapshotFile valid_tx_snapshot{tmp_dir.path()}; snapshot_test::SampleTransactionSnapshotPath tx_snapshot_path{valid_tx_snapshot.path()}; - auto header_index = snapshots::HeaderIndex::make(header_snapshot_path); - REQUIRE_NOTHROW(header_index.build()); - snapshots::HeaderSnapshot header_snapshot{header_snapshot_path}; + auto header_index_builder = snapshots::HeaderIndex::make(header_snapshot_path); + REQUIRE_NOTHROW(header_index_builder.build()); + snapshots::Snapshot header_snapshot{header_snapshot_path}; header_snapshot.reopen_segment(); - header_snapshot.reopen_index(); + snapshots::Index idx_header_hash{header_snapshot_path.index_file()}; + idx_header_hash.reopen_index(); - auto body_index = snapshots::BodyIndex::make(body_snapshot_path); - REQUIRE_NOTHROW(body_index.build()); - snapshots::BodySnapshot body_snapshot{body_snapshot_path}; + auto body_index_builder = snapshots::BodyIndex::make(body_snapshot_path); + REQUIRE_NOTHROW(body_index_builder.build()); + snapshots::Snapshot body_snapshot{body_snapshot_path}; body_snapshot.reopen_segment(); - body_snapshot.reopen_index(); - - auto tx_index = snapshots::TransactionIndex::make(body_snapshot_path, tx_snapshot_path); - tx_index.build(); - auto tx_index_hash_to_block = snapshots::TransactionToBlockIndex::make(body_snapshot_path, tx_snapshot_path); - tx_index_hash_to_block.build(); - snapshots::TransactionSnapshot tx_snapshot{tx_snapshot_path}; + snapshots::Index idx_body_number{body_snapshot_path.index_file()}; + idx_body_number.reopen_index(); + + auto tx_index_builder = snapshots::TransactionIndex::make(body_snapshot_path, tx_snapshot_path); + tx_index_builder.build(); + auto tx_index_hash_to_block_builder = snapshots::TransactionToBlockIndex::make(body_snapshot_path, tx_snapshot_path); + tx_index_hash_to_block_builder.build(); + snapshots::Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); + snapshots::Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); + snapshots::Index idx_txn_hash_2_block{tx_index_hash_to_block_builder.path()}; + idx_txn_hash_2_block.reopen_index(); const auto header_snapshot_path_string{header_snapshot_path.path().string()}; - const auto header_index_path_string{header_index.path().path().string()}; + const auto header_index_path_string{idx_header_hash.path().path().string()}; const auto body_snapshot_path_string{body_snapshot_path.path().string()}; - const auto body_index_path_string{body_index.path().path().string()}; + const auto body_index_path_string{idx_body_number.path().path().string()}; const auto tx_snapshot_path_string{tx_snapshot_path.path().string()}; - const auto tx_hash_index_path_string{tx_snapshot_path.index_file().path().string()}; - const auto tx_hash2block_index_path_string{ - tx_snapshot_path.index_file_for_type(snapshots::SnapshotType::transactions_to_block).path().string()}; + const auto tx_hash_index_path_string{idx_txn_hash.path().path().string()}; + const auto tx_hash2block_index_path_string{idx_txn_hash_2_block.path().path().string()}; // Prepare templates for valid header/body/transaction C data structures SilkwormHeadersSnapshot valid_shs{ @@ -681,8 +686,8 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { }, .header_hash_index = SilkwormMemoryMappedFile{ .file_path = header_index_path_string.c_str(), - .memory_address = header_snapshot.idx_header_hash()->memory_file_region().data(), - .memory_length = header_snapshot.idx_header_hash()->memory_file_region().size(), + .memory_address = idx_header_hash.memory_file_region().data(), + .memory_length = idx_header_hash.memory_file_region().size(), }, }; SilkwormBodiesSnapshot valid_sbs{ @@ -693,8 +698,8 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { }, .block_num_index = SilkwormMemoryMappedFile{ .file_path = body_index_path_string.c_str(), - .memory_address = body_snapshot.idx_body_number()->memory_file_region().data(), - .memory_length = body_snapshot.idx_body_number()->memory_file_region().size(), + .memory_address = idx_body_number.memory_file_region().data(), + .memory_length = idx_body_number.memory_file_region().size(), }, }; SilkwormTransactionsSnapshot valid_sts{ @@ -705,13 +710,13 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { }, .tx_hash_index = SilkwormMemoryMappedFile{ .file_path = tx_hash_index_path_string.c_str(), - .memory_address = tx_snapshot.idx_txn_hash()->memory_file_region().data(), - .memory_length = tx_snapshot.idx_txn_hash()->memory_file_region().size(), + .memory_address = idx_txn_hash.memory_file_region().data(), + .memory_length = idx_txn_hash.memory_file_region().size(), }, .tx_hash_2_block_index = SilkwormMemoryMappedFile{ .file_path = tx_hash2block_index_path_string.c_str(), - .memory_address = tx_snapshot.idx_txn_hash_2_block()->memory_file_region().data(), - .memory_length = tx_snapshot.idx_txn_hash_2_block()->memory_file_region().size(), + .memory_address = idx_txn_hash_2_block.memory_file_region().data(), + .memory_length = idx_txn_hash_2_block.memory_file_region().size(), }, }; diff --git a/silkworm/db/access_layer.cpp b/silkworm/db/access_layer.cpp index 37c51b003b..6cd4f5d690 100644 --- a/silkworm/db/access_layer.cpp +++ b/silkworm/db/access_layer.cpp @@ -1225,8 +1225,7 @@ std::optional DataModel::read_header_from_snapshot(BlockNum height) // We know the header snapshot in advance: find it based on target block number const auto header_snapshot = repository_->find_header_segment(height); if (header_snapshot) { - Index index{*header_snapshot->idx_header_hash()}; - block_header = HeaderFindByBlockNumQuery{*header_snapshot, index}.exec(height); + block_header = HeaderFindByBlockNumQuery{header_snapshot->snapshot, header_snapshot->index}.exec(height); } return block_header; } @@ -1238,9 +1237,8 @@ std::optional DataModel::read_header_from_snapshot(const Hash& hash std::optional block_header; // We don't know the header snapshot in advance: search for block hash in each header snapshot in reverse order - repository_->view_header_segments([&](const snapshots::HeaderSnapshot& snapshot) -> bool { - Index index{*snapshot.idx_header_hash()}; - block_header = HeaderFindByHashQuery{snapshot, index}.exec(hash); + repository_->view_header_segments([&](snapshots::SnapshotRepository::SnapshotAndIndex snapshot) -> bool { + block_header = HeaderFindByHashQuery{snapshot.snapshot, snapshot.index}.exec(hash); return block_header.has_value(); }); return block_header; @@ -1255,8 +1253,7 @@ bool DataModel::read_body_from_snapshot(BlockNum height, bool read_senders, Bloc const auto body_snapshot = repository_->find_body_segment(height); if (!body_snapshot) return false; - Index idx_body_number{*body_snapshot->idx_body_number()}; - auto stored_body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(height); + auto stored_body = BodyFindByBlockNumQuery{body_snapshot->snapshot, body_snapshot->index}.exec(height); if (!stored_body) return false; // Skip first and last *system transactions* in block body @@ -1281,8 +1278,7 @@ bool DataModel::is_body_in_snapshot(BlockNum height) { // We know the body snapshot in advance: find it based on target block number const auto body_snapshot = repository_->find_body_segment(height); if (body_snapshot) { - Index idx_body_number{*body_snapshot->idx_body_number()}; - const auto stored_body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(height); + const auto stored_body = BodyFindByBlockNumQuery{body_snapshot->snapshot, body_snapshot->index}.exec(height); return stored_body.has_value(); } @@ -1299,8 +1295,7 @@ bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_t const auto tx_snapshot = repository_->find_tx_segment(height); if (!tx_snapshot) return false; - Index idx_txn_hash{*tx_snapshot->idx_txn_hash()}; - txs = TransactionRangeFromIdQuery{*tx_snapshot, idx_txn_hash}.exec_into_vector(base_txn_id, txn_count); + txs = TransactionRangeFromIdQuery{tx_snapshot->snapshot, tx_snapshot->index}.exec_into_vector(base_txn_id, txn_count); return true; } @@ -1308,8 +1303,7 @@ bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_t bool DataModel::read_rlp_transactions_from_snapshot(BlockNum height, std::vector& rlp_txs) { const auto body_snapshot = repository_->find_body_segment(height); if (body_snapshot) { - Index idx_body_number{*body_snapshot->idx_body_number()}; - auto stored_body = BodyFindByBlockNumQuery{*body_snapshot, idx_body_number}.exec(height); + auto stored_body = BodyFindByBlockNumQuery{body_snapshot->snapshot, body_snapshot->index}.exec(height); if (!stored_body) return false; // Skip first and last *system transactions* in block body @@ -1321,8 +1315,7 @@ bool DataModel::read_rlp_transactions_from_snapshot(BlockNum height, std::vector const auto tx_snapshot = repository_->find_tx_segment(height); if (!tx_snapshot) return false; - Index idx_txn_hash{*tx_snapshot->idx_txn_hash()}; - rlp_txs = TransactionPayloadRlpRangeFromIdQuery{*tx_snapshot, idx_txn_hash}.exec_into_vector(base_txn_id, txn_count); + rlp_txs = TransactionPayloadRlpRangeFromIdQuery{tx_snapshot->snapshot, tx_snapshot->index}.exec_into_vector(base_txn_id, txn_count); return true; } diff --git a/silkworm/db/snapshot_sync.cpp b/silkworm/db/snapshot_sync.cpp index 9e43b749be..ebb6f2179c 100644 --- a/silkworm/db/snapshot_sync.cpp +++ b/silkworm/db/snapshot_sync.cpp @@ -311,8 +311,8 @@ void SnapshotSync::update_block_bodies(db::RWTxn& txn, BlockNum max_block_availa // Reset sequence for kBlockTransactions table const auto tx_snapshot = repository_->find_tx_segment(max_block_available); - ensure(tx_snapshot, "SnapshotSync: snapshots max block not found in any snapshot"); - const auto last_tx_id = tx_snapshot->idx_txn_hash()->base_data_id() + tx_snapshot->item_count(); + ensure(tx_snapshot.has_value(), "SnapshotSync: snapshots max block not found in any snapshot"); + const auto last_tx_id = tx_snapshot->index.base_data_id() + tx_snapshot->snapshot.item_count(); db::reset_map_sequence(txn, db::table::kBlockTransactions.name, last_tx_id + 1); SILK_INFO << "SnapshotSync: database table BlockTransactions sequence reset"; diff --git a/silkworm/db/snapshots/index.cpp b/silkworm/db/snapshots/index.cpp index 3ecd1da320..d1cd3a69fd 100644 --- a/silkworm/db/snapshots/index.cpp +++ b/silkworm/db/snapshots/index.cpp @@ -18,4 +18,25 @@ namespace silkworm::snapshots { +void Index::reopen_index() { + close_index(); + + if (path_.exists()) { + index_ = std::make_unique(path_.path(), region_); + + // TODO: move this code or pass in snapshot_last_write_time as an argument + // snapshot_last_write_time: ensure(decoder_.is_open(), "segment not open, call reopen_segment"); + // if (index_->last_write_time() < snapshot_last_write_time) { + // // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent + // const bool removed = std::filesystem::remove(path_.path()); + // ensure(removed, "Index: cannot remove index file"); + // close_index(); + // } + } +} + +void Index::close_index() { + index_.reset(); +} + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index.hpp b/silkworm/db/snapshots/index.hpp index d74fc61aea..b77b4b8165 100644 --- a/silkworm/db/snapshots/index.hpp +++ b/silkworm/db/snapshots/index.hpp @@ -16,30 +16,55 @@ #pragma once +#include #include #include #include #include +#include "path.hpp" #include "rec_split/rec_split.hpp" namespace silkworm::snapshots { class Index { public: - explicit Index(const rec_split::RecSplitIndex& index) : index_(index) {} + explicit Index( + SnapshotPath path, + std::optional region = std::nullopt) + : path_(std::move(path)), + region_(region) {} - std::size_t ordinal_lookup_by_data_id(uint64_t id) const { return index_.ordinal_lookup_by_data_id(id); }; - std::optional ordinal_lookup_by_hash(const Hash& hash) const { return index_.ordinal_lookup_by_key(hash); }; + std::size_t ordinal_lookup_by_data_id(uint64_t id) const { return index_->ordinal_lookup_by_data_id(id); }; + std::optional ordinal_lookup_by_hash(const Hash& hash) const { return index_->ordinal_lookup_by_key(hash); }; std::optional lookup_by_hash(const Hash& hash) const { - auto [result, found] = index_.lookup(hash); + auto [result, found] = index_->lookup(hash); return found ? std::optional{result} : std::nullopt; } + void reopen_index(); + void close_index(); + + bool is_open() const { return index_.get(); } + const SnapshotPath& path() const { return path_; } + + MemoryMappedRegion memory_file_region() const { + return index_ ? index_->memory_file_region() : MemoryMappedRegion{}; + } + + uint64_t base_data_id() const { + assert(index_); + return index_->base_data_id(); + } + private: - const rec_split::RecSplitIndex& index_; + SnapshotPath path_; + //! External memory-mapped region of the index data + std::optional region_; + + std::unique_ptr index_; }; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index 80cba6a0e5..5652f306d8 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -17,8 +17,8 @@ #include "repository.hpp" #include +#include #include -#include #include #include @@ -36,37 +36,28 @@ namespace silkworm::snapshots { namespace fs = std::filesystem; -template -const T* get_segment(const SnapshotsByPath& segments, const SnapshotPath& path) { - if (!segments.contains(path.path())) { - return nullptr; - } - return segments.find(path.path())->second.get(); -} - -template -SnapshotRepository::ViewResult view(const SnapshotsByPath& segments, BlockNum number, const SnapshotWalker& walker) { +SnapshotRepository::ViewResult SnapshotRepository::view_segment(SnapshotType type, BlockNum number, const SnapshotWalker& walker) { // Search for target segment in reverse order (from the newest segment to the oldest one) - for (auto it = segments.rbegin(); it != segments.rend(); ++it) { - const auto& snapshot = it->second; + for (auto& entry : std::ranges::reverse_view(bundles_)) { + const auto& bundle = entry.second; // We're looking for the segment containing the target block number in its block range - if (snapshot->block_from() <= number && number < snapshot->block_to()) { - const bool walk_done = walker(*snapshot); + if ((bundle.block_from() <= number) && (number < bundle.block_to())) { + const bool walk_done = walker({bundle.snapshot(type), bundle.index(type)}); return walk_done ? SnapshotRepository::kWalkSuccess : SnapshotRepository::kWalkFailed; } } return SnapshotRepository::kSnapshotNotFound; } -template -std::size_t view(const SnapshotsByPath& segments, const SnapshotWalker& walker) { +std::size_t SnapshotRepository::view_bundles(const SnapshotBundleWalker& walker) { // Search for target segment in reverse order (from the newest segment to the oldest one) std::size_t visited_views{0}; bool walk_done{false}; - for (auto it = segments.rbegin(); it != segments.rend() && !walk_done; ++it) { - const auto& snapshot = it->second; - walk_done = walker(*snapshot); + for (auto& entry : std::ranges::reverse_view(bundles_)) { + const auto& bundle = entry.second; + walk_done = walker(bundle); ++visited_views; + if (walk_done) break; } return visited_views; } @@ -79,22 +70,12 @@ SnapshotRepository::~SnapshotRepository() { } void SnapshotRepository::add_snapshot_bundle(SnapshotBundle bundle) { - if (bundle.headers_snapshot && bundle.bodies_snapshot && bundle.tx_snapshot) { - // assume that all snapshot types end with the same block, and use one of them - BlockNum last_block = bundle.tx_snapshot->block_to() - 1; - segment_max_block_ = std::max(segment_max_block_, last_block); - } + BlockNum block_from = bundle.block_from(); + BlockNum block_to = bundle.block_to(); - if (bundle.headers_snapshot) { - header_segments_[bundle.headers_snapshot->fs_path()] = std::move(bundle.headers_snapshot); - } - if (bundle.bodies_snapshot) { - body_segments_[bundle.bodies_snapshot->fs_path()] = std::move(bundle.bodies_snapshot); - } - if (bundle.tx_snapshot) { - tx_segments_[bundle.tx_snapshot->fs_path()] = std::move(bundle.tx_snapshot); - } + bundles_.emplace(block_from, std::move(bundle)); + segment_max_block_ = std::max(segment_max_block_, block_to - 1); idx_max_block_ = max_idx_available(); } @@ -107,14 +88,14 @@ void SnapshotRepository::reopen_folder() { void SnapshotRepository::close() { SILK_TRACE << "Close snapshot repository folder: " << settings_.repository_dir.string(); - for (const auto& [_, header_seg] : this->header_segments_) { - header_seg->close(); - } - for (const auto& [_, body_seg] : this->body_segments_) { - body_seg->close(); - } - for (const auto& [_, tx_seg] : this->tx_segments_) { - tx_seg->close(); + for (auto& entry : bundles_) { + auto& bundle = entry.second; + for (auto& index_ref : bundle.indexes()) { + index_ref.get().close_index(); + } + for (auto& snapshot_ref : bundle.snapshots()) { + snapshot_ref.get().close(); + } } } @@ -124,6 +105,7 @@ std::vector SnapshotRepository::missing_block_ranges() const { std::vector missing_ranges; BlockNum previous_to{0}; for (const auto& segment : ordered_segments) { + // skips different types of snapshots having the same block range if (segment.block_to() <= previous_to) continue; if (segment.block_from() != previous_to) { missing_ranges.emplace_back(previous_to, segment.block_from()); @@ -134,10 +116,12 @@ std::vector SnapshotRepository::missing_block_ranges() const { } bool SnapshotRepository::for_each_header(const HeaderWalker& fn) { - for (const auto& [_, header_snapshot] : header_segments_) { - SILK_TRACE << "for_each_header header_snapshot: " << header_snapshot->fs_path().string(); + for (auto& entry : bundles_) { + auto& bundle = entry.second; + const Snapshot& header_snapshot = bundle.header_snapshot; + SILK_TRACE << "for_each_header header_snapshot: " << header_snapshot.fs_path().string(); - HeaderSnapshotReader reader{*header_snapshot}; + HeaderSnapshotReader reader{header_snapshot}; for (auto& header : reader) { const bool keep_going = fn(header); if (!keep_going) return false; @@ -147,11 +131,13 @@ bool SnapshotRepository::for_each_header(const HeaderWalker& fn) { } bool SnapshotRepository::for_each_body(const BodyWalker& fn) { - for (const auto& [_, body_snapshot] : body_segments_) { - SILK_TRACE << "for_each_body body_snapshot: " << body_snapshot->fs_path().string(); + for (auto& entry : bundles_) { + auto& bundle = entry.second; + const Snapshot& body_snapshot = bundle.body_snapshot; + SILK_TRACE << "for_each_body body_snapshot: " << body_snapshot.fs_path().string(); - BlockNum number = body_snapshot->path().block_from(); - BodySnapshotReader reader{*body_snapshot}; + BlockNum number = body_snapshot.block_from(); + BodySnapshotReader reader{body_snapshot}; for (auto& body : reader) { const bool keep_going = fn(number, body); if (!keep_going) return false; @@ -161,61 +147,64 @@ bool SnapshotRepository::for_each_body(const BodyWalker& fn) { return true; } -SnapshotRepository::ViewResult SnapshotRepository::view_header_segment(BlockNum number, const HeaderSnapshotWalker& walker) { - return view(header_segments_, number, walker); +SnapshotRepository::ViewResult SnapshotRepository::view_header_segment(BlockNum number, const SnapshotWalker& walker) { + return view_segment(SnapshotType::headers, number, walker); } -SnapshotRepository::ViewResult SnapshotRepository::view_body_segment(BlockNum number, const BodySnapshotWalker& walker) { - return view(body_segments_, number, walker); +SnapshotRepository::ViewResult SnapshotRepository::view_body_segment(BlockNum number, const SnapshotWalker& walker) { + return view_segment(SnapshotType::bodies, number, walker); } -SnapshotRepository::ViewResult SnapshotRepository::view_tx_segment(BlockNum number, const TransactionSnapshotWalker& walker) { - return view(tx_segments_, number, walker); +SnapshotRepository::ViewResult SnapshotRepository::view_tx_segment(BlockNum number, const SnapshotWalker& walker) { + return view_segment(SnapshotType::transactions, number, walker); } -std::size_t SnapshotRepository::view_header_segments(const HeaderSnapshotWalker& walker) { - return view(header_segments_, walker); +std::size_t SnapshotRepository::view_segments(SnapshotType type, const SnapshotWalker& walker) { + return view_bundles([&](const SnapshotBundle& bundle) { + return walker({bundle.snapshot(type), bundle.index(type)}); + }); } -std::size_t SnapshotRepository::view_body_segments(const BodySnapshotWalker& walker) { - return view(body_segments_, walker); +std::size_t SnapshotRepository::view_header_segments(const SnapshotWalker& walker) { + return view_segments(SnapshotType::headers, walker); } -std::size_t SnapshotRepository::view_tx_segments(const TransactionSnapshotWalker& walker) { - return view(tx_segments_, walker); +std::size_t SnapshotRepository::view_body_segments(const SnapshotWalker& walker) { + return view_segments(SnapshotType::bodies, walker); } -const HeaderSnapshot* SnapshotRepository::get_header_segment(const SnapshotPath& path) const { - return get_segment(header_segments_, path); +std::size_t SnapshotRepository::view_tx_segments(const SnapshotWalker& walker) { + return view_segments(SnapshotType::transactions, walker); } -const BodySnapshot* SnapshotRepository::get_body_segment(const SnapshotPath& path) const { - return get_segment(body_segments_, path); -} - -const TransactionSnapshot* SnapshotRepository::get_tx_segment(const SnapshotPath& path) const { - return get_segment(tx_segments_, path); +std::optional SnapshotRepository::find_segment(SnapshotType type, BlockNum number) const { + auto bundle = find_bundle(number); + if (bundle) { + return SnapshotAndIndex{bundle->snapshot(type), bundle->index(type)}; + } + return std::nullopt; } -const HeaderSnapshot* SnapshotRepository::find_header_segment(BlockNum number) const { - return find_segment(header_segments_, number); +std::optional SnapshotRepository::find_header_segment(BlockNum number) const { + return find_segment(SnapshotType::headers, number); } -const BodySnapshot* SnapshotRepository::find_body_segment(BlockNum number) const { - return find_segment(body_segments_, number); +std::optional SnapshotRepository::find_body_segment(BlockNum number) const { + return find_segment(SnapshotType::bodies, number); } -const TransactionSnapshot* SnapshotRepository::find_tx_segment(BlockNum number) const { - return find_segment(tx_segments_, number); +std::optional SnapshotRepository::find_tx_segment(BlockNum number) const { + return find_segment(SnapshotType::transactions, number); } std::optional SnapshotRepository::find_block_number(Hash txn_hash) const { - for (const auto& it : std::ranges::reverse_view(tx_segments_)) { - const auto& snapshot = it.second; + for (const auto& entry : std::ranges::reverse_view(bundles_)) { + const auto& bundle = entry.second; + const auto& snapshot = bundle.txn_snapshot; - Index idx_txn_hash{*snapshot->idx_txn_hash()}; - Index idx_txn_hash_2_block{*snapshot->idx_txn_hash_2_block()}; - auto block = TransactionBlockNumByTxnHashQuery{idx_txn_hash_2_block, TransactionFindByHashQuery{*snapshot, idx_txn_hash}}.exec(txn_hash); + const Index& idx_txn_hash = bundle.idx_txn_hash; + const Index& idx_txn_hash_2_block = bundle.idx_txn_hash_2_block; + auto block = TransactionBlockNumByTxnHashQuery{idx_txn_hash_2_block, TransactionFindByHashQuery{snapshot, idx_txn_hash}}.exec(txn_hash); if (block) { return block; } @@ -263,104 +252,93 @@ std::vector> SnapshotRepository::missing_indexes() return missing_index_list; } -void SnapshotRepository::reopen_file(const SnapshotPath& segment_path, bool optimistic) { - reopen_list(SnapshotPathList{segment_path}, optimistic); -} +void SnapshotRepository::reopen_list(const SnapshotPathList& segment_files) { + std::map header_snapshot_paths; + std::map body_snapshot_paths; + std::map txn_snapshot_paths; + + for (const SnapshotPath& path : segment_files) { + switch (path.type()) { + case SnapshotType::headers: + header_snapshot_paths.emplace(path.block_from(), path); + break; + case SnapshotType::bodies: + body_snapshot_paths.emplace(path.block_from(), path); + break; + case SnapshotType::transactions: + txn_snapshot_paths.emplace(path.block_from(), path); + break; + case SnapshotType::transactions_to_block: + assert(false); + break; + } + } -void SnapshotRepository::reopen_list(const SnapshotPathList& segment_files, bool optimistic) { - BlockNum segment_max_block{0}; - for (const auto& seg_file : segment_files) { - try { - SILK_TRACE << "Reopen segment file: " << seg_file.path().filename().string(); - bool snapshot_valid{true}; - switch (seg_file.type()) { - case SnapshotType::headers: { - const auto header_it = header_segments_.find(seg_file.path()); - if (header_it != header_segments_.end()) { - header_it->second->reopen_index(); - } else { - snapshot_valid = reopen_header(seg_file); - } - break; - } - case SnapshotType::bodies: { - const auto body_it = body_segments_.find(seg_file.path()); - if (body_it != body_segments_.end()) { - body_it->second->reopen_index(); - } else { - snapshot_valid = reopen_body(seg_file); - } - break; - } - case SnapshotType::transactions: { - const auto tx_it = tx_segments_.find(seg_file.path()); - if (tx_it != tx_segments_.end()) { - tx_it->second->reopen_index(); - } else { - snapshot_valid = reopen_transaction(seg_file); - } - break; - } - default: { - SILKWORM_ASSERT(false); - } + BlockNum num = 0; + if (!header_snapshot_paths.empty()) { + num = header_snapshot_paths.begin()->first; + } + + while ( + header_snapshot_paths.contains(num) && + body_snapshot_paths.contains(num) && + txn_snapshot_paths.contains(num)) { + if (!bundles_.contains(num)) { + SnapshotBundle bundle{ + .header_snapshot = Snapshot(header_snapshot_paths.at(num)), + .idx_header_hash = Index(header_snapshot_paths.at(num).index_file()), + + .body_snapshot = Snapshot(body_snapshot_paths.at(num)), + .idx_body_number = Index(body_snapshot_paths.at(num).index_file()), + + .txn_snapshot = Snapshot(txn_snapshot_paths.at(num)), + .idx_txn_hash = Index(txn_snapshot_paths.at(num).index_file_for_type(SnapshotType::transactions)), + .idx_txn_hash_2_block = Index(txn_snapshot_paths.at(num).index_file_for_type(SnapshotType::transactions_to_block)), + }; + + for (auto& snapshot_ref : bundle.snapshots()) { + snapshot_ref.get().reopen_segment(); + ensure(!snapshot_ref.get().empty(), [&]() { + return "invalid empty snapshot " + snapshot_ref.get().fs_path().string(); + }); } - ensure(snapshot_valid, [&]() { return "invalid empty snapshot " + seg_file.filename(); }); - BlockNum last_block = seg_file.block_to() - 1; - segment_max_block = std::max(segment_max_block, last_block); - } catch (const std::exception& exc) { - SILK_WARN << "Reopen failed for: " << seg_file.path() << " [" << exc.what() << "]"; - if (!optimistic) throw; + bundles_.emplace(num, std::move(bundle)); } - } - segment_max_block_ = segment_max_block; - idx_max_block_ = max_idx_available(); -} -bool SnapshotRepository::reopen_header(const SnapshotPath& seg_file) { - return reopen(header_segments_, seg_file); -} + auto& bundle = bundles_.at(num); + for (auto& index_ref : bundle.indexes()) { + index_ref.get().reopen_index(); + } -bool SnapshotRepository::reopen_body(const SnapshotPath& seg_file) { - return reopen(body_segments_, seg_file); -} + segment_max_block_ = std::max(segment_max_block_, bundle.block_to() - 1); -bool SnapshotRepository::reopen_transaction(const SnapshotPath& seg_file) { - return reopen(tx_segments_, seg_file); + if (num < bundle.block_to()) { + num = bundle.block_to(); + } else { + break; + } + } + + idx_max_block_ = max_idx_available(); } -template -const T* SnapshotRepository::find_segment(const SnapshotsByPath& segments, BlockNum number) const { +const SnapshotBundle* SnapshotRepository::find_bundle(BlockNum number) const { if (number > max_block_available()) { return nullptr; } // Search for target segment in reverse order (from the newest segment to the oldest one) - for (auto it = segments.rbegin(); it != segments.rend(); ++it) { - const auto& snapshot = it->second; + for (const auto& entry : std::ranges::reverse_view(bundles_)) { + const auto& bundle = entry.second; // We're looking for the segment containing the target block number in its block range - if (snapshot->block_from() <= number && number < snapshot->block_to()) { - return snapshot.get(); + if ((bundle.block_from() <= number) && (number < bundle.block_to())) { + return &bundle; } } return nullptr; } -template -bool SnapshotRepository::reopen(SnapshotsByPath& segments, const SnapshotPath& seg_file) { - if (segments.find(seg_file.path()) == segments.end()) { - auto segment = std::make_unique(seg_file); - segment->reopen_segment(); - if (segment->empty()) return false; - segments[seg_file.path()] = std::move(segment); - } - SILKWORM_ASSERT(segments.find(seg_file.path()) != segments.end()); - const auto& segment = segments[seg_file.path()]; - segment->reopen_index(); - return true; -} - SnapshotPathList SnapshotRepository::get_files(const std::string& ext) const { ensure(fs::exists(settings_.repository_dir), [&]() { return "SnapshotRepository: " + settings_.repository_dir.string() + " does not exist"; }); @@ -388,24 +366,18 @@ SnapshotPathList SnapshotRepository::get_files(const std::string& ext) const { return snapshot_files; } -BlockNum SnapshotRepository::max_idx_available() const { - BlockNum max_block_headers{0}; - for (auto& [_, header_seg] : header_segments_) { - if (!header_seg->idx_header_hash()) break; - max_block_headers = header_seg->block_to() - 1; - } - BlockNum max_block_bodies{0}; - for (auto& [_, body_seg] : body_segments_) { - if (!body_seg->idx_body_number()) break; - max_block_bodies = body_seg->block_to() - 1; - } - BlockNum max_block_txs{0}; - for (auto& [_, tx_seg] : tx_segments_) { - if (!tx_seg->idx_txn_hash() || !tx_seg->idx_txn_hash_2_block()) break; - max_block_txs = tx_seg->block_to() - 1; +BlockNum SnapshotRepository::max_idx_available() { + BlockNum result = 0; + for (auto& entry : bundles_) { + auto& bundle = entry.second; + for (auto& index_ref : bundle.indexes()) { + if (!index_ref.get().is_open()) { + return result; + } + } + result = bundle.block_to() - 1; } - - return std::min(max_block_headers, std::min(max_block_bodies, max_block_txs)); + return result; } } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index 041af33c74..7f21281844 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -16,39 +16,90 @@ #pragma once +#include +#include #include #include #include #include -#include #include #include #include +#include +#include #include #include -#include +#include namespace silkworm::snapshots { struct IndexBuilder; -template -concept ConcreteSnapshot = std::is_base_of::value; +struct SnapshotBundle { + Snapshot header_snapshot; + //! Index header_hash -> block_num -> headers_segment_offset + Index idx_header_hash; + + Snapshot body_snapshot; + //! Index block_num -> bodies_segment_offset + Index idx_body_number; + + Snapshot txn_snapshot; + //! Index transaction_hash -> txn_id -> transactions_segment_offset + Index idx_txn_hash; + //! Index transaction_hash -> block_num + Index idx_txn_hash_2_block; + + std::array, 3> snapshots() { + return { + header_snapshot, + body_snapshot, + txn_snapshot, + }; + } -template -using SnapshotsByPath = std::map>; + std::array, 4> indexes() { + return { + idx_header_hash, + idx_body_number, + idx_txn_hash, + idx_txn_hash_2_block, + }; + } -template -using SnapshotWalker = std::function; -using HeaderSnapshotWalker = SnapshotWalker; -using BodySnapshotWalker = SnapshotWalker; -using TransactionSnapshotWalker = SnapshotWalker; + const Snapshot& snapshot(SnapshotType type) const { + switch (type) { + case headers: + return header_snapshot; + case bodies: + return body_snapshot; + case transactions: + case transactions_to_block: + return txn_snapshot; + } + assert(false); + return header_snapshot; + } -struct SnapshotBundle { - std::unique_ptr headers_snapshot; - std::unique_ptr bodies_snapshot; - std::unique_ptr tx_snapshot; + const Index& index(SnapshotType type) const { + switch (type) { + case headers: + return idx_header_hash; + case bodies: + return idx_body_number; + case transactions: + return idx_txn_hash; + case transactions_to_block: + return idx_txn_hash_2_block; + } + assert(false); + return idx_header_hash; + } + + // assume that all snapshots have the same block range, and use one of them + BlockNum block_from() const { return header_snapshot.block_from(); } + BlockNum block_to() const { return header_snapshot.block_to(); } }; //! Read-only repository for all snapshot files. @@ -65,71 +116,71 @@ class SnapshotRepository { [[nodiscard]] const SnapshotSettings& settings() const { return settings_; } [[nodiscard]] std::filesystem::path path() const { return settings_.repository_dir; } - [[nodiscard]] BlockNum max_block_available() const { return std::min(segment_max_block_, idx_max_block_); } - - [[nodiscard]] SnapshotPathList get_segment_files() const { - return get_files(kSegmentExtension); - } - - void add_snapshot_bundle(SnapshotBundle bundle); - - void reopen_list(const SnapshotPathList& segment_files, bool optimistic = false); - void reopen_file(const SnapshotPath& segment_path, bool optimistic = false); void reopen_folder(); void close(); - using HeaderWalker = std::function; - bool for_each_header(const HeaderWalker& fn); - - using BodyWalker = std::function; - bool for_each_body(const BodyWalker& fn); + void add_snapshot_bundle(SnapshotBundle bundle); - [[nodiscard]] std::size_t header_snapshots_count() const { return header_segments_.size(); } - [[nodiscard]] std::size_t body_snapshots_count() const { return body_segments_.size(); } - [[nodiscard]] std::size_t tx_snapshots_count() const { return tx_segments_.size(); } + [[nodiscard]] std::size_t header_snapshots_count() const { return bundles_.size(); } + [[nodiscard]] std::size_t body_snapshots_count() const { return bundles_.size(); } + [[nodiscard]] std::size_t tx_snapshots_count() const { return bundles_.size(); } [[nodiscard]] std::size_t total_snapshots_count() const { return header_snapshots_count() + body_snapshots_count() + tx_snapshots_count(); } + [[nodiscard]] BlockNum segment_max_block() const { return segment_max_block_; } + [[nodiscard]] BlockNum idx_max_block() const { return idx_max_block_; } + [[nodiscard]] BlockNum max_block_available() const { return std::min(segment_max_block_, idx_max_block_); } + [[nodiscard]] std::vector missing_block_ranges() const; - enum ViewResult { + [[nodiscard]] std::vector> missing_indexes() const; + + struct SnapshotAndIndex { + const Snapshot& snapshot; + const Index& index; + }; + + enum ViewResult : uint8_t { kSnapshotNotFound, kWalkFailed, kWalkSuccess }; - ViewResult view_header_segment(BlockNum number, const HeaderSnapshotWalker& walker); - ViewResult view_body_segment(BlockNum number, const BodySnapshotWalker& walker); - ViewResult view_tx_segment(BlockNum number, const TransactionSnapshotWalker& walker); - std::size_t view_header_segments(const HeaderSnapshotWalker& walker); - std::size_t view_body_segments(const BodySnapshotWalker& walker); - std::size_t view_tx_segments(const TransactionSnapshotWalker& walker); + using SnapshotWalker = std::function; - [[nodiscard]] const HeaderSnapshot* get_header_segment(const SnapshotPath& path) const; - [[nodiscard]] const BodySnapshot* get_body_segment(const SnapshotPath& path) const; - [[nodiscard]] const TransactionSnapshot* get_tx_segment(const SnapshotPath& path) const; + ViewResult view_header_segment(BlockNum number, const SnapshotWalker& walker); + ViewResult view_body_segment(BlockNum number, const SnapshotWalker& walker); + ViewResult view_tx_segment(BlockNum number, const SnapshotWalker& walker); - [[nodiscard]] const HeaderSnapshot* find_header_segment(BlockNum number) const; - [[nodiscard]] const BodySnapshot* find_body_segment(BlockNum number) const; - [[nodiscard]] const TransactionSnapshot* find_tx_segment(BlockNum number) const; + using SnapshotBundleWalker = std::function; + std::size_t view_bundles(const SnapshotBundleWalker& walker); - [[nodiscard]] std::vector> missing_indexes() const; + std::size_t view_header_segments(const SnapshotWalker& walker); + std::size_t view_body_segments(const SnapshotWalker& walker); + std::size_t view_tx_segments(const SnapshotWalker& walker); - [[nodiscard]] BlockNum segment_max_block() const { return segment_max_block_; } - [[nodiscard]] BlockNum idx_max_block() const { return idx_max_block_; } + [[nodiscard]] std::optional find_header_segment(BlockNum number) const; + [[nodiscard]] std::optional find_body_segment(BlockNum number) const; + [[nodiscard]] std::optional find_tx_segment(BlockNum number) const; + + using HeaderWalker = std::function; + bool for_each_header(const HeaderWalker& fn); + + using BodyWalker = std::function; + bool for_each_body(const BodyWalker& fn); [[nodiscard]] std::optional find_block_number(Hash txn_hash) const; private: - bool reopen_header(const SnapshotPath& seg_file); - bool reopen_body(const SnapshotPath& seg_file); - bool reopen_transaction(const SnapshotPath& seg_file); - - template - const T* find_segment(const SnapshotsByPath& segments, BlockNum number) const; + void reopen_list(const SnapshotPathList& segment_files); + ViewResult view_segment(SnapshotType type, BlockNum number, const SnapshotWalker& walker); + std::size_t view_segments(SnapshotType type, const SnapshotWalker& walker); + const SnapshotBundle* find_bundle(BlockNum number) const; + std::optional find_segment(SnapshotType type, BlockNum number) const; - template - static bool reopen(SnapshotsByPath& segments, const SnapshotPath& seg_file); + [[nodiscard]] SnapshotPathList get_segment_files() const { + return get_files(kSegmentExtension); + } [[nodiscard]] SnapshotPathList get_idx_files() const { return get_files(kIdxExtension); @@ -137,7 +188,7 @@ class SnapshotRepository { [[nodiscard]] SnapshotPathList get_files(const std::string& ext) const; - [[nodiscard]] BlockNum max_idx_available() const; + [[nodiscard]] BlockNum max_idx_available(); //! The configuration settings for snapshots SnapshotSettings settings_; @@ -148,14 +199,8 @@ class SnapshotRepository { //! All types of .idx files are available - up to this block number BlockNum idx_max_block_{0}; - //! The snapshots containing the block Headers - SnapshotsByPath header_segments_; - - //! The snapshots containing the block Bodies - SnapshotsByPath body_segments_; - - //! The snapshots containing the Transactions - SnapshotsByPath tx_segments_; + //! Full snapshot bundles ordered by block_from + std::map bundles_; }; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/repository_test.cpp b/silkworm/db/snapshots/repository_test.cpp index 3f87ee7c7e..0364bfd947 100644 --- a/silkworm/db/snapshots/repository_test.cpp +++ b/silkworm/db/snapshots/repository_test.cpp @@ -38,16 +38,16 @@ TEST_CASE("SnapshotRepository::SnapshotRepository", "[silkworm][node][snapshot]" CHECK_NOTHROW(SnapshotRepository{SnapshotSettings{}}); } -TEST_CASE("SnapshotRepository::reopen_folder", "[silkworm][node][snapshot]") { +TEST_CASE("SnapshotRepository::reopen_folder.partial_bundle", "[silkworm][node][snapshot]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; test::TemporarySnapshotFile tmp_snapshot_1{tmp_dir.path(), "v1-014500-015000-headers.seg"}; test::TemporarySnapshotFile tmp_snapshot_2{tmp_dir.path(), "v1-011500-012000-bodies.seg"}; test::TemporarySnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-015000-015500-transactions.seg"}; - SnapshotSettings settings{tmp_snapshot_1.path().parent_path()}; + SnapshotSettings settings{tmp_dir.path()}; SnapshotRepository repository{settings}; - CHECK_THROWS_AS(repository.reopen_folder(), std::logic_error); + repository.reopen_folder(); CHECK(repository.header_snapshots_count() == 0); CHECK(repository.body_snapshots_count() == 0); CHECK(repository.tx_snapshots_count() == 0); @@ -74,16 +74,16 @@ TEST_CASE("SnapshotRepository::view", "[silkworm][node][snapshot]") { CHECK(repository.view_body_segments(successful_walk) == 0); CHECK(repository.view_tx_segments(successful_walk) == 0); - CHECK(repository.find_header_segment(14'500'000) == nullptr); - CHECK(repository.find_body_segment(11'500'000) == nullptr); - CHECK(repository.find_tx_segment(15'000'000) == nullptr); + CHECK_FALSE(repository.find_header_segment(14'500'000)); + CHECK_FALSE(repository.find_body_segment(11'500'000)); + CHECK_FALSE(repository.find_tx_segment(15'000'000)); } - SECTION("empty snapshots") { + SECTION("partial bundle") { test::TemporarySnapshotFile tmp_snapshot_1{tmp_dir.path(), "v1-014500-015000-headers.seg"}; test::TemporarySnapshotFile tmp_snapshot_2{tmp_dir.path(), "v1-011500-012000-bodies.seg"}; test::TemporarySnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-015000-015500-transactions.seg"}; - CHECK_THROWS_AS(repository.reopen_folder(), std::logic_error); + repository.reopen_folder(); using ViewResult = SnapshotRepository::ViewResult; CHECK(repository.view_header_segment(14'500'000, successful_walk) == ViewResult::kSnapshotNotFound); @@ -93,35 +93,31 @@ TEST_CASE("SnapshotRepository::view", "[silkworm][node][snapshot]") { CHECK(repository.view_body_segments(successful_walk) == 0); // empty snapshots are ignored by repository CHECK(repository.view_tx_segments(successful_walk) == 0); // empty snapshots are ignored by repository - CHECK(repository.find_header_segment(14'500'000) == nullptr); - CHECK(repository.find_body_segment(11'500'000) == nullptr); - CHECK(repository.find_tx_segment(15'000'000) == nullptr); + CHECK_FALSE(repository.find_header_segment(14'500'000)); + CHECK_FALSE(repository.find_body_segment(11'500'000)); + CHECK_FALSE(repository.find_tx_segment(15'000'000)); } SECTION("non-empty snapshots") { test::HelloWorldSnapshotFile tmp_snapshot_1{tmp_dir.path(), "v1-014500-015000-headers.seg"}; - test::HelloWorldSnapshotFile tmp_snapshot_2{tmp_dir.path(), "v1-011500-012000-bodies.seg"}; - test::HelloWorldSnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-015000-015500-transactions.seg"}; + test::HelloWorldSnapshotFile tmp_snapshot_2{tmp_dir.path(), "v1-014500-015000-bodies.seg"}; + test::HelloWorldSnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-014500-015000-transactions.seg"}; repository.reopen_folder(); using ViewResult = SnapshotRepository::ViewResult; CHECK(repository.view_header_segment(14'500'000, failing_walk) == ViewResult::kWalkFailed); - CHECK(repository.view_body_segment(11'500'000, failing_walk) == ViewResult::kWalkFailed); - CHECK(repository.view_tx_segment(15'000'000, failing_walk) == ViewResult::kWalkFailed); + CHECK(repository.view_body_segment(14'500'000, failing_walk) == ViewResult::kWalkFailed); + CHECK(repository.view_tx_segment(14'500'000, failing_walk) == ViewResult::kWalkFailed); CHECK(repository.view_header_segments(failing_walk) == 1); CHECK(repository.view_body_segments(failing_walk) == 1); CHECK(repository.view_tx_segments(failing_walk) == 1); CHECK(repository.view_header_segment(14'500'000, successful_walk) == ViewResult::kWalkSuccess); - CHECK(repository.view_body_segment(11'500'000, successful_walk) == ViewResult::kWalkSuccess); - CHECK(repository.view_tx_segment(15'000'000, successful_walk) == ViewResult::kWalkSuccess); + CHECK(repository.view_body_segment(14'500'000, successful_walk) == ViewResult::kWalkSuccess); + CHECK(repository.view_tx_segment(14'500'000, successful_walk) == ViewResult::kWalkSuccess); CHECK(repository.view_header_segments(successful_walk) == 1); CHECK(repository.view_body_segments(successful_walk) == 1); CHECK(repository.view_tx_segments(successful_walk) == 1); - - // CHECK(repository.find_header_segment(14'500'000) != nullptr); // needs index after check vs max_block_available - // CHECK(repository.find_body_segment(11'500'000) != nullptr); - // CHECK(repository.find_tx_segment(15'000'000) != nullptr); } } @@ -153,22 +149,22 @@ TEST_CASE("SnapshotRepository::find_segment", "[silkworm][node][snapshot]") { test::SampleTransactionSnapshotFile txn_snapshot{tmp_dir.path()}; SECTION("header w/o index") { - CHECK(repository.find_header_segment(1'500'011) == nullptr); - CHECK(repository.find_header_segment(1'500'012) == nullptr); - CHECK(repository.find_header_segment(1'500'013) == nullptr); - CHECK(repository.find_header_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_header_segment(1'500'011)); + CHECK_FALSE(repository.find_header_segment(1'500'012)); + CHECK_FALSE(repository.find_header_segment(1'500'013)); + CHECK_FALSE(repository.find_header_segment(1'500'014)); } SECTION("body w/o index") { - CHECK(repository.find_body_segment(1'500'011) == nullptr); - CHECK(repository.find_body_segment(1'500'012) == nullptr); - CHECK(repository.find_body_segment(1'500'013) == nullptr); - CHECK(repository.find_body_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_body_segment(1'500'011)); + CHECK_FALSE(repository.find_body_segment(1'500'012)); + CHECK_FALSE(repository.find_body_segment(1'500'013)); + CHECK_FALSE(repository.find_body_segment(1'500'014)); } SECTION("tx w/o index") { - CHECK(repository.find_tx_segment(1'500'011) == nullptr); - CHECK(repository.find_tx_segment(1'500'012) == nullptr); - CHECK(repository.find_tx_segment(1'500'013) == nullptr); - CHECK(repository.find_tx_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_tx_segment(1'500'011)); + CHECK_FALSE(repository.find_tx_segment(1'500'012)); + CHECK_FALSE(repository.find_tx_segment(1'500'013)); + CHECK_FALSE(repository.find_tx_segment(1'500'014)); } test::SampleHeaderSnapshotPath header_snapshot_path{header_snapshot.path()}; // necessary to tweak the block numbers @@ -184,25 +180,25 @@ TEST_CASE("SnapshotRepository::find_segment", "[silkworm][node][snapshot]") { REQUIRE_NOTHROW(repository.reopen_folder()); SECTION("header w/ index") { - CHECK(repository.find_header_segment(1'500'011) == nullptr); + CHECK_FALSE(repository.find_header_segment(1'500'011)); // CHECK(repository.find_header_segment(1'500'012) != nullptr); // needs full block number in snapshot file names // CHECK(repository.find_header_segment(1'500'013) != nullptr); // needs full block number in snapshot file names - CHECK(repository.find_header_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_header_segment(1'500'014)); } SECTION("body w/ index") { - CHECK(repository.find_body_segment(1'500'011) == nullptr); + CHECK_FALSE(repository.find_body_segment(1'500'011)); // CHECK(repository.find_body_segment(1'500'012) != nullptr); // needs full block number in snapshot file names // CHECK(repository.find_body_segment(1'500'013) != nullptr); // needs full block number in snapshot file names - CHECK(repository.find_body_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_body_segment(1'500'014)); } SECTION("tx w/ index") { - CHECK(repository.find_tx_segment(1'500'011) == nullptr); + CHECK_FALSE(repository.find_tx_segment(1'500'011)); // CHECK(repository.find_tx_segment(1'500'012) != nullptr); // needs full block number in snapshot file names // CHECK(repository.find_tx_segment(1'500'013) != nullptr); // needs full block number in snapshot file names - CHECK(repository.find_tx_segment(1'500'014) == nullptr); + CHECK_FALSE(repository.find_tx_segment(1'500'014)); } SECTION("greater than max_block_available") { - CHECK(repository.find_body_segment(repository.max_block_available() + 1) == nullptr); + CHECK_FALSE(repository.find_body_segment(repository.max_block_available() + 1)); } } diff --git a/silkworm/db/snapshots/seg/decompressor.hpp b/silkworm/db/snapshots/seg/decompressor.hpp index 527c9d41d8..a38b3b603d 100644 --- a/silkworm/db/snapshots/seg/decompressor.hpp +++ b/silkworm/db/snapshots/seg/decompressor.hpp @@ -274,6 +274,9 @@ class Decompressor { explicit Decompressor(std::filesystem::path compressed_path, std::optional compressed_region = {}); ~Decompressor(); + Decompressor(Decompressor&&) = default; + Decompressor& operator=(Decompressor&&) = default; + [[nodiscard]] const std::filesystem::path& compressed_path() const { return compressed_path_; } [[nodiscard]] std::string compressed_filename() const { return compressed_path_.filename().string(); } diff --git a/silkworm/db/snapshots/snapshot.cpp b/silkworm/db/snapshots/snapshot.cpp deleted file mode 100644 index 6c45ce5cc7..0000000000 --- a/silkworm/db/snapshots/snapshot.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - Copyright 2022 The Silkworm Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "snapshot.hpp" - -#include -#include -#include -#include -#include -#include -#include - -namespace silkworm::snapshots { - -HeaderSnapshot::HeaderSnapshot(SnapshotPath path) : Snapshot(std::move(path)) {} - -HeaderSnapshot::HeaderSnapshot(SnapshotPath path, MappedHeadersSnapshot mapped) - : Snapshot(std::move(path), mapped.segment), idx_header_hash_region_{mapped.header_hash_index} {} - -HeaderSnapshot::~HeaderSnapshot() { - close(); -} - -void HeaderSnapshot::reopen_index() { - ensure(decoder_.is_open(), "HeaderSnapshot: segment not open, call reopen_segment"); - - close_index(); - - const auto header_index_path = path().index_file(); - if (header_index_path.exists()) { - idx_header_hash_ = std::make_unique(header_index_path.path(), idx_header_hash_region_); - if (idx_header_hash_->last_write_time() < decoder_.last_write_time()) { - // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - const bool removed = std::filesystem::remove(header_index_path.path()); - ensure(removed, "HeaderSnapshot: cannot remove index file"); - close_index(); - } - } -} - -void HeaderSnapshot::close_index() { - idx_header_hash_.reset(); -} - -BodySnapshot::BodySnapshot(SnapshotPath path, std::optional segment_region) - : Snapshot(std::move(path), segment_region) {} - -BodySnapshot::BodySnapshot(SnapshotPath path, MappedBodiesSnapshot mapped) - : Snapshot(std::move(path), mapped.segment), idx_body_number_region_{mapped.block_num_index} {} - -BodySnapshot::~BodySnapshot() { - close(); -} - -void BodySnapshot::reopen_index() { - ensure(decoder_.is_open(), "BodySnapshot: segment not open, call reopen_segment"); - - close_index(); - - const auto body_index_path = path().index_file(); - if (body_index_path.exists()) { - idx_body_number_ = std::make_unique(body_index_path.path(), idx_body_number_region_); - if (idx_body_number_->last_write_time() < decoder_.last_write_time()) { - // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - const bool removed = std::filesystem::remove(body_index_path.path()); - ensure(removed, "BodySnapshot: cannot remove index file"); - close_index(); - } - } -} - -void BodySnapshot::close_index() { - idx_body_number_.reset(); -} - -TransactionSnapshot::TransactionSnapshot(SnapshotPath path) : Snapshot(std::move(path)) {} - -TransactionSnapshot::TransactionSnapshot(SnapshotPath path, MappedTransactionsSnapshot mapped) - : Snapshot(std::move(path), mapped.segment), - idx_txn_hash_region_{mapped.tx_hash_index}, - idx_txn_hash_2_block_region_{mapped.tx_hash_2_block_index} {} - -TransactionSnapshot::~TransactionSnapshot() { - close(); -} - -void TransactionSnapshot::reopen_index() { - ensure(decoder_.is_open(), "TransactionSnapshot: segment not open, call reopen_segment"); - - close_index(); - - const auto tx_hash_index_path = path().index_file_for_type(SnapshotType::transactions); - if (tx_hash_index_path.exists()) { - idx_txn_hash_ = std::make_unique(tx_hash_index_path.path(), idx_txn_hash_region_); - if (idx_txn_hash_->last_write_time() < decoder_.last_write_time()) { - // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - const bool removed = std::filesystem::remove(tx_hash_index_path.path()); - ensure(removed, "TransactionSnapshot: cannot remove tx_hash index file"); - close_index(); - } - } - - const auto tx_hash_2_block_index_path = path().index_file_for_type(SnapshotType::transactions_to_block); - if (tx_hash_2_block_index_path.exists()) { - idx_txn_hash_2_block_ = std::make_unique(tx_hash_2_block_index_path.path(), idx_txn_hash_2_block_region_); - if (idx_txn_hash_2_block_->last_write_time() < decoder_.last_write_time()) { - // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - const bool removed = std::filesystem::remove(tx_hash_2_block_index_path.path()); - ensure(removed, "TransactionSnapshot: cannot remove tx_hash_2_block index file"); - close_index(); - } - } -} - -void TransactionSnapshot::close_index() { - idx_txn_hash_.reset(); - idx_txn_hash_2_block_.reset(); -} - -} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot.hpp b/silkworm/db/snapshots/snapshot.hpp deleted file mode 100644 index 59c2717f99..0000000000 --- a/silkworm/db/snapshots/snapshot.hpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - Copyright 2022 The Silkworm Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "snapshot_reader.hpp" -#include "snapshot_word_serializer.hpp" - -namespace silkworm::snapshots { - -struct MappedHeadersSnapshot { - MemoryMappedRegion segment; - MemoryMappedRegion header_hash_index; -}; - -struct MappedBodiesSnapshot { - MemoryMappedRegion segment; - MemoryMappedRegion block_num_index; -}; - -struct MappedTransactionsSnapshot { - MemoryMappedRegion segment; - MemoryMappedRegion tx_hash_index; - MemoryMappedRegion tx_hash_2_block_index; -}; - -class HeaderSnapshot : public Snapshot { - public: - explicit HeaderSnapshot(SnapshotPath path); - HeaderSnapshot(SnapshotPath path, MappedHeadersSnapshot mapped); - ~HeaderSnapshot() override; - - [[nodiscard]] const rec_split::RecSplitIndex* idx_header_hash() const { return idx_header_hash_.get(); } - - void reopen_index() override; - - protected: - void close_index() override; - - private: - //! Index header_hash -> headers_segment_offset - std::unique_ptr idx_header_hash_; - - //! The external memory-mapped region for Headers snapshot index - std::optional idx_header_hash_region_; -}; - -using StoredBlockBody = BlockBodyForStorage; - -class BodySnapshot : public Snapshot { - public: - explicit BodySnapshot(SnapshotPath path, std::optional segment_region = std::nullopt); - BodySnapshot(SnapshotPath path, MappedBodiesSnapshot mapped); - ~BodySnapshot() override; - - [[nodiscard]] const rec_split::RecSplitIndex* idx_body_number() const { return idx_body_number_.get(); } - - void reopen_index() override; - - protected: - void close_index() override; - - private: - //! Index block_num_u64 -> bodies_segment_offset - std::unique_ptr idx_body_number_; - - //! The external memory-mapped region for Bodies snapshot index - std::optional idx_body_number_region_; -}; - -class TransactionSnapshot : public Snapshot { - public: - explicit TransactionSnapshot(SnapshotPath path); - TransactionSnapshot(SnapshotPath path, MappedTransactionsSnapshot mapped); - ~TransactionSnapshot() override; - - [[nodiscard]] const rec_split::RecSplitIndex* idx_txn_hash() const { return idx_txn_hash_.get(); } - [[nodiscard]] const rec_split::RecSplitIndex* idx_txn_hash_2_block() const { return idx_txn_hash_2_block_.get(); } - - void reopen_index() override; - - protected: - void close_index() override; - - private: - //! Index transaction_hash -> transactions_segment_offset - std::unique_ptr idx_txn_hash_; - - //! Index transaction_hash -> block_number - std::unique_ptr idx_txn_hash_2_block_; - - //! The external memory-mapped region for Transactions hash->offset index - std::optional idx_txn_hash_region_; - - //! The external memory-mapped region for Transactions hash->block_number index - std::optional idx_txn_hash_2_block_region_; -}; - -} // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_reader.cpp b/silkworm/db/snapshots/snapshot_reader.cpp index edb2bc776a..ab541da51f 100644 --- a/silkworm/db/snapshots/snapshot_reader.cpp +++ b/silkworm/db/snapshots/snapshot_reader.cpp @@ -23,8 +23,15 @@ namespace silkworm::snapshots { -Snapshot::Snapshot(SnapshotPath path, std::optional segment_region) - : path_(std::move(path)), decoder_{path_.path(), segment_region} {} +Snapshot::Snapshot( + SnapshotPath path, + std::optional segment_region) + : path_(std::move(path)), + decoder_{path_.path(), segment_region} {} + +Snapshot::~Snapshot() { + close(); +} MemoryMappedRegion Snapshot::memory_file_region() const { const auto memory_file{decoder_.memory_file()}; @@ -33,7 +40,7 @@ MemoryMappedRegion Snapshot::memory_file_region() const { } void Snapshot::reopen_segment() { - close_segment(); + close(); // Open decompressor that opens the mapped file in turns decoder_.open(); @@ -90,11 +97,6 @@ Snapshot::Iterator Snapshot::seek(uint64_t offset, std::optional hash_pref } void Snapshot::close() { - close_segment(); - close_index(); -} - -void Snapshot::close_segment() { // Close decompressor that closes the mapped file in turns decoder_.close(); } diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp index 60354aff26..25b2afe572 100644 --- a/silkworm/db/snapshots/snapshot_reader.hpp +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -76,8 +76,13 @@ class Snapshot { static inline const auto kPageSize{os::page_size()}; - explicit Snapshot(SnapshotPath path, std::optional segment_region = std::nullopt); - virtual ~Snapshot() = default; + explicit Snapshot( + SnapshotPath path, + std::optional segment_region = std::nullopt); + ~Snapshot(); + + Snapshot(Snapshot&&) = default; + Snapshot& operator=(Snapshot&&) = default; [[nodiscard]] SnapshotPath path() const { return path_; } [[nodiscard]] std::filesystem::path fs_path() const { return path_.path(); } @@ -91,18 +96,14 @@ class Snapshot { [[nodiscard]] MemoryMappedRegion memory_file_region() const; void reopen_segment(); - virtual void reopen_index() = 0; + void close(); Iterator begin(std::shared_ptr serializer) const; Iterator end() const; Iterator seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr serializer) const; - void close(); - - protected: - void close_segment(); - virtual void close_index() = 0; + private: seg::Decompressor::Iterator seek_decoder(uint64_t offset, std::optional hash_prefix) const; //! The path of the segment file for this snapshot diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index 79bfcc79db..ae578e5439 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -14,8 +14,6 @@ limitations under the License. */ -#include "snapshot.hpp" - #include #include @@ -34,6 +32,7 @@ #include #include +#include "snapshot_reader.hpp" #include "txn_snapshot_word_serializer.hpp" namespace silkworm::snapshots { @@ -60,10 +59,6 @@ class Snapshot_ForTest : public Snapshot { explicit Snapshot_ForTest(std::filesystem::path path) : Snapshot(*SnapshotPath::parse(std::move(path))) {} Snapshot_ForTest(const std::filesystem::path& tmp_dir, BlockNum block_from, BlockNum block_to) : Snapshot(SnapshotPath_ForTest{tmp_dir, block_from, block_to}) {} - ~Snapshot_ForTest() override { close(); } - - void reopen_index() override {} - void close_index() override {} }; template @@ -143,11 +138,11 @@ TEST_CASE("HeaderSnapshot::header_by_number OK", "[silkworm][node][snapshot][ind auto header_index = HeaderIndex::make(header_snapshot_path); REQUIRE_NOTHROW(header_index.build()); - HeaderSnapshot header_snapshot{header_snapshot_path}; + Snapshot header_snapshot{header_snapshot_path}; header_snapshot.reopen_segment(); - header_snapshot.reopen_index(); - Index idx_header_hash{*header_snapshot.idx_header_hash()}; + Index idx_header_hash{header_snapshot_path.index_file()}; + idx_header_hash.reopen_index(); HeaderFindByBlockNumQuery header_by_number{header_snapshot, idx_header_hash}; CHECK(!header_by_number.exec(1'500'011)); @@ -185,11 +180,11 @@ TEST_CASE("BodySnapshot::body_by_number OK", "[silkworm][node][snapshot][index]" auto body_index = BodyIndex::make(body_snapshot_path); REQUIRE_NOTHROW(body_index.build()); - BodySnapshot body_snapshot{body_snapshot_path}; + Snapshot body_snapshot{body_snapshot_path}; body_snapshot.reopen_segment(); - body_snapshot.reopen_index(); - Index idx_body_number{*body_snapshot.idx_body_number()}; + Index idx_body_number{body_snapshot_path.index_file()}; + idx_body_number.reopen_index(); BodyFindByBlockNumQuery body_by_number{body_snapshot, idx_body_number}; CHECK(!body_by_number.exec(1'500'011)); @@ -214,11 +209,11 @@ TEST_CASE("TransactionSnapshot::txn_by_id OK", "[silkworm][node][snapshot][index auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); CHECK_NOTHROW(tx_index.build()); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; + Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); - Index idx_txn_hash{*tx_snapshot.idx_txn_hash()}; + Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); TransactionFindByIdQuery txn_by_id{tx_snapshot, idx_txn_hash}; const auto transaction = txn_by_id.exec(7'341'272); @@ -243,14 +238,15 @@ TEST_CASE("TransactionSnapshot::block_num_by_txn_hash OK", "[silkworm][node][sna auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_snapshot_path, tx_snapshot_path); REQUIRE_NOTHROW(tx_index_hash_to_block.build()); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; + Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); - Index idx_txn_hash{*tx_snapshot.idx_txn_hash()}; + Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); TransactionFindByIdQuery txn_by_id{tx_snapshot, idx_txn_hash}; - Index idx_txn_hash_2_block{*tx_snapshot.idx_txn_hash_2_block()}; + Index idx_txn_hash_2_block{tx_snapshot_path.index_file_for_type(SnapshotType::transactions_to_block)}; + idx_txn_hash_2_block.reopen_index(); TransactionBlockNumByTxnHashQuery block_num_by_txn_hash{idx_txn_hash_2_block, TransactionFindByHashQuery{tx_snapshot, idx_txn_hash}}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 @@ -284,11 +280,11 @@ TEST_CASE("TransactionSnapshot::txn_range OK", "[silkworm][node][snapshot][index auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); REQUIRE_NOTHROW(tx_index.build()); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; + Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); - Index idx_txn_hash{*tx_snapshot.idx_txn_hash()}; + Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); TransactionRangeFromIdQuery txn_range{tx_snapshot, idx_txn_hash}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 @@ -324,11 +320,11 @@ TEST_CASE("TransactionSnapshot::txn_rlp_range OK", "[silkworm][node][snapshot][i auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); REQUIRE_NOTHROW(tx_index.build()); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; + Snapshot tx_snapshot{tx_snapshot_path}; tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); - Index idx_txn_hash{*tx_snapshot.idx_txn_hash()}; + Index idx_txn_hash{tx_snapshot_path.index_file()}; + idx_txn_hash.reopen_index(); TransactionPayloadRlpRangeFromIdQuery txn_rlp_range{tx_snapshot, idx_txn_hash}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 @@ -457,74 +453,32 @@ TEST_CASE("slice_tx_payload", "[silkworm][node][snapshot]") { } TEST_CASE("HeaderSnapshot::reopen_index regeneration", "[silkworm][node][snapshot][index]") { + // SKIP("TODO: see Index::reopen_index"); + return; + SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; test::SampleHeaderSnapshotFile sample_header_snapshot{tmp_dir.path()}; test::SampleHeaderSnapshotPath header_snapshot_path{sample_header_snapshot.path()}; - auto header_index = HeaderIndex::make(header_snapshot_path); - REQUIRE_NOTHROW(header_index.build()); - HeaderSnapshot header_snapshot{header_snapshot_path}; - header_snapshot.reopen_segment(); - header_snapshot.reopen_index(); - REQUIRE(std::filesystem::exists(header_snapshot.path().index_file().path())); - - // Move 1 hour to the future the last write time for sample header snapshot - const auto last_write_time_diff = move_last_write_time(sample_header_snapshot.path(), 1h); - REQUIRE(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); - - // Verify that reopening the index removes the index file because it was created in the past - CHECK(std::filesystem::exists(header_snapshot.path().index_file().path())); - header_snapshot.reopen_index(); - CHECK_FALSE(std::filesystem::exists(header_snapshot.path().index_file().path())); -} - -TEST_CASE("BodySnapshot::reopen_index regeneration", "[silkworm][node][snapshot][index]") { - SetLogVerbosityGuard guard{log::Level::kNone}; - TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile sample_body_snapshot{tmp_dir.path()}; - test::SampleBodySnapshotPath body_snapshot_path{sample_body_snapshot.path()}; - auto body_index = BodyIndex::make(body_snapshot_path); - REQUIRE_NOTHROW(body_index.build()); + auto index_builder = HeaderIndex::make(header_snapshot_path); + REQUIRE_NOTHROW(index_builder.build()); - BodySnapshot body_snapshot{body_snapshot_path}; - body_snapshot.reopen_segment(); - body_snapshot.reopen_index(); - CHECK(std::filesystem::exists(body_snapshot.path().index_file().path())); - - // Move 1 hour to the future the last write time for sample body snapshot - const auto last_write_time_diff = move_last_write_time(sample_body_snapshot.path(), 1h); - REQUIRE(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); - - // Verify that reopening the index removes the index file if created in the past - CHECK(std::filesystem::exists(body_snapshot.path().index_file().path())); - body_snapshot.reopen_index(); - CHECK_FALSE(std::filesystem::exists(body_snapshot.path().index_file().path())); -} - -TEST_CASE("TransactionSnapshot::reopen_index regeneration", "[silkworm][node][snapshot][index]") { - SetLogVerbosityGuard guard{log::Level::kNone}; - TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - test::SampleBodySnapshotPath body_snapshot_path{body_snapshot.path()}; - test::SampleTransactionSnapshotFile sample_tx_snapshot{tmp_dir.path()}; - test::SampleTransactionSnapshotPath tx_snapshot_path{sample_tx_snapshot.path()}; - auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); - REQUIRE_NOTHROW(tx_index.build()); + Snapshot snapshot{header_snapshot_path}; + snapshot.reopen_segment(); - TransactionSnapshot tx_snapshot{tx_snapshot_path}; - tx_snapshot.reopen_segment(); - tx_snapshot.reopen_index(); - CHECK(std::filesystem::exists(tx_snapshot.path().index_file().path())); + Index index{snapshot.path().index_file()}; + index.reopen_index(); + REQUIRE(std::filesystem::exists(snapshot.path().index_file().path())); - // Move 1 hour to the future the last write time for sample tx snapshot - const auto last_write_time_diff = move_last_write_time(sample_tx_snapshot.path(), 1h); + // Move 1 hour to the future the last write time for sample header snapshot + const auto last_write_time_diff = move_last_write_time(snapshot.path().path(), 1h); REQUIRE(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); - // Verify that reopening the index removes the index file if created in the past - CHECK(std::filesystem::exists(tx_snapshot.path().index_file().path())); - tx_snapshot.reopen_index(); - CHECK_FALSE(std::filesystem::exists(tx_snapshot.path().index_file().path())); + // Verify that reopening the index removes the index file because it was created in the past + CHECK(std::filesystem::exists(snapshot.path().index_file().path())); + index.reopen_index(); + CHECK_FALSE(std::filesystem::exists(snapshot.path().index_file().path())); } } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_index.cpp b/silkworm/db/snapshots/txn_index.cpp index 688bbe189a..0314d38b80 100644 --- a/silkworm/db/snapshots/txn_index.cpp +++ b/silkworm/db/snapshots/txn_index.cpp @@ -17,7 +17,7 @@ #include "txn_index.hpp" #include "body_txs_amount_query.hpp" -#include "snapshot.hpp" +#include "snapshot_reader.hpp" #include "txn_snapshot_word_serializer.hpp" namespace silkworm::snapshots { @@ -38,7 +38,7 @@ SnapshotPath TransactionIndex::bodies_segment_path(const SnapshotPath& segment_p std::pair TransactionIndex::compute_txs_amount( SnapshotPath bodies_segment_path, std::optional bodies_segment_region) { - BodySnapshot bodies_snapshot{std::move(bodies_segment_path), bodies_segment_region}; + Snapshot bodies_snapshot{std::move(bodies_segment_path), bodies_segment_region}; bodies_snapshot.reopen_segment(); auto result = BodyTxsAmountQuery{bodies_snapshot}.exec(); return {result.first_tx_id, result.count}; From aa3b101b7191e4dc4d0ee7b6a0d38ac93c24f338 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Fri, 19 Apr 2024 15:42:50 +0200 Subject: [PATCH 22/37] rename serializer -> deserializer --- silkworm/db/snapshots/body_snapshot.hpp | 6 ++-- silkworm/db/snapshots/header_snapshot.hpp | 6 ++-- silkworm/db/snapshots/snapshot_reader.cpp | 26 ++++++++-------- silkworm/db/snapshots/snapshot_reader.hpp | 30 +++++++++---------- .../db/snapshots/snapshot_word_serializer.hpp | 4 +-- silkworm/db/snapshots/txn_snapshot.hpp | 4 +-- .../txn_snapshot_word_serializer.hpp | 8 ++--- 7 files changed, 42 insertions(+), 42 deletions(-) diff --git a/silkworm/db/snapshots/body_snapshot.hpp b/silkworm/db/snapshots/body_snapshot.hpp index bec0a0a591..f2a1e43cec 100644 --- a/silkworm/db/snapshots/body_snapshot.hpp +++ b/silkworm/db/snapshots/body_snapshot.hpp @@ -27,16 +27,16 @@ namespace silkworm::snapshots { void decode_word_into_body(ByteView word, BlockBodyForStorage& body); -struct BodySnapshotWordSerializer : public SnapshotWordSerializer { +struct BodySnapshotWordDeserializer : public SnapshotWordDeserializer { BlockBodyForStorage value; - ~BodySnapshotWordSerializer() override = default; + ~BodySnapshotWordDeserializer() override = default; void decode_word(ByteView word) override { decode_word_into_body(word, value); } }; -struct BodySnapshotReader : public SnapshotReader {}; +struct BodySnapshotReader : public SnapshotReader {}; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_snapshot.hpp b/silkworm/db/snapshots/header_snapshot.hpp index b3295d8d60..6412f7cbce 100644 --- a/silkworm/db/snapshots/header_snapshot.hpp +++ b/silkworm/db/snapshots/header_snapshot.hpp @@ -27,10 +27,10 @@ namespace silkworm::snapshots { void decode_word_into_header(ByteView word, BlockHeader& header); void check_sanity_of_header_with_metadata(const BlockHeader& header, BlockNum block_from, BlockNum block_to); -struct HeaderSnapshotWordSerializer : public SnapshotWordSerializer { +struct HeaderSnapshotWordDeserializer : public SnapshotWordDeserializer { BlockHeader value; - ~HeaderSnapshotWordSerializer() override = default; + ~HeaderSnapshotWordDeserializer() override = default; void decode_word(ByteView word) override { decode_word_into_header(word, value); @@ -41,6 +41,6 @@ struct HeaderSnapshotWordSerializer : public SnapshotWordSerializer { } }; -struct HeaderSnapshotReader : public SnapshotReader {}; +struct HeaderSnapshotReader : public SnapshotReader {}; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_reader.cpp b/silkworm/db/snapshots/snapshot_reader.cpp index ab541da51f..e3f7af1d40 100644 --- a/silkworm/db/snapshots/snapshot_reader.cpp +++ b/silkworm/db/snapshots/snapshot_reader.cpp @@ -51,27 +51,27 @@ Snapshot::Iterator& Snapshot::Iterator::operator++() { ++it_; if (has_next) { - serializer_->decode_word(*it_); - serializer_->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + deserializer_->decode_word(*it_); + deserializer_->check_sanity_with_metadata(path_.block_from(), path_.block_to()); } else { - serializer_.reset(); + deserializer_.reset(); } return *this; } bool operator==(const Snapshot::Iterator& lhs, const Snapshot::Iterator& rhs) { - return (lhs.serializer_ == rhs.serializer_) && - (!lhs.serializer_ || (lhs.it_ == rhs.it_)); + return (lhs.deserializer_ == rhs.deserializer_) && + (!lhs.deserializer_ || (lhs.it_ == rhs.it_)); } -Snapshot::Iterator Snapshot::begin(std::shared_ptr serializer) const { +Snapshot::Iterator Snapshot::begin(std::shared_ptr deserializer) const { auto it = decoder_.begin(); if (it == decoder_.end()) { return end(); } - serializer->decode_word(*it); - serializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); - return Snapshot::Iterator{std::move(it), std::move(serializer), path()}; + deserializer->decode_word(*it); + deserializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return Snapshot::Iterator{std::move(it), std::move(deserializer), path()}; } Snapshot::Iterator Snapshot::end() const { @@ -82,18 +82,18 @@ seg::Decompressor::Iterator Snapshot::seek_decoder(uint64_t offset, std::optiona return decoder_.seek(offset, hash_prefix ? ByteView{hash_prefix->bytes, 1} : ByteView{}); } -Snapshot::Iterator Snapshot::seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr serializer) const { +Snapshot::Iterator Snapshot::seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr deserializer) const { auto it = seek_decoder(offset, hash_prefix); if (it == decoder_.end()) { return end(); } try { - serializer->decode_word(*it); + deserializer->decode_word(*it); } catch (...) { return end(); } - serializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); - return Snapshot::Iterator{std::move(it), std::move(serializer), path()}; + deserializer->check_sanity_with_metadata(path_.block_from(), path_.block_to()); + return Snapshot::Iterator{std::move(it), std::move(deserializer), path()}; } void Snapshot::close() { diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp index 25b2afe572..0aa2058fa4 100644 --- a/silkworm/db/snapshots/snapshot_reader.hpp +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -45,7 +45,7 @@ class Snapshot { public: class Iterator { public: - using value_type = std::shared_ptr; + using value_type = std::shared_ptr; using iterator_category = std::input_iterator_tag; using difference_type = std::ptrdiff_t; using pointer = const value_type*; @@ -53,12 +53,12 @@ class Snapshot { Iterator( seg::Decompressor::Iterator it, - std::shared_ptr serializer, + std::shared_ptr deserializer, SnapshotPath path) - : it_(std::move(it)), serializer_(std::move(serializer)), path_(std::move(path)) {} + : it_(std::move(it)), deserializer_(std::move(deserializer)), path_(std::move(path)) {} - reference operator*() const { return serializer_; } - pointer operator->() const { return &serializer_; } + reference operator*() const { return deserializer_; } + pointer operator->() const { return &deserializer_; } Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } Iterator& operator++(); @@ -68,7 +68,7 @@ class Snapshot { private: seg::Decompressor::Iterator it_; - std::shared_ptr serializer_; + std::shared_ptr deserializer_; SnapshotPath path_; }; @@ -98,10 +98,10 @@ class Snapshot { void reopen_segment(); void close(); - Iterator begin(std::shared_ptr serializer) const; + Iterator begin(std::shared_ptr deserializer) const; Iterator end() const; - Iterator seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr serializer) const; + Iterator seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr deserializer) const; private: seg::Decompressor::Iterator seek_decoder(uint64_t offset, std::optional hash_prefix) const; @@ -112,12 +112,12 @@ class Snapshot { seg::Decompressor decoder_; }; -template +template class SnapshotReader { public: class Iterator { public: - using value_type = decltype(TWordSerializer::value); + using value_type = decltype(TWordDeserializer::value); using iterator_category = std::input_iterator_tag; using difference_type = std::ptrdiff_t; using pointer = const value_type*; @@ -140,9 +140,9 @@ class SnapshotReader { private: value_type& value() const { - SnapshotWordSerializer& base_serializer = **it_; - // dynamic_cast is safe because TWordSerializer was used when creating the Iterator - auto& s = dynamic_cast(base_serializer); + SnapshotWordDeserializer& base_deserializer = **it_; + // dynamic_cast is safe because TWordDeserializer was used when creating the Iterator + auto& s = dynamic_cast(base_deserializer); return s.value; } @@ -154,7 +154,7 @@ class SnapshotReader { SnapshotReader(const Snapshot& snapshot) : snapshot_(snapshot) {} Iterator begin() const { - return Iterator{snapshot_.begin(std::make_shared())}; + return Iterator{snapshot_.begin(std::make_shared())}; } Iterator end() const { @@ -162,7 +162,7 @@ class SnapshotReader { } Iterator seek(uint64_t offset, std::optional hash_prefix = std::nullopt) const { - return Iterator{snapshot_.seek(offset, hash_prefix, std::make_shared())}; + return Iterator{snapshot_.seek(offset, hash_prefix, std::make_shared())}; } std::optional seek_one(uint64_t offset, std::optional hash_prefix = std::nullopt) const { diff --git a/silkworm/db/snapshots/snapshot_word_serializer.hpp b/silkworm/db/snapshots/snapshot_word_serializer.hpp index e06f77df18..5cecd26295 100644 --- a/silkworm/db/snapshots/snapshot_word_serializer.hpp +++ b/silkworm/db/snapshots/snapshot_word_serializer.hpp @@ -21,8 +21,8 @@ namespace silkworm::snapshots { -struct SnapshotWordSerializer { - virtual ~SnapshotWordSerializer() = default; +struct SnapshotWordDeserializer { + virtual ~SnapshotWordDeserializer() = default; virtual void decode_word(ByteView word) = 0; virtual void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) {} }; diff --git a/silkworm/db/snapshots/txn_snapshot.hpp b/silkworm/db/snapshots/txn_snapshot.hpp index 0594eee6fe..3599cd5fe2 100644 --- a/silkworm/db/snapshots/txn_snapshot.hpp +++ b/silkworm/db/snapshots/txn_snapshot.hpp @@ -21,9 +21,9 @@ namespace silkworm::snapshots { -struct TransactionSnapshotReader : public SnapshotReader {}; +struct TransactionSnapshotReader : public SnapshotReader {}; template -struct TransactionSnapshotPayloadRlpReader : public SnapshotReader> {}; +struct TransactionSnapshotPayloadRlpReader : public SnapshotReader> {}; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp index e57675fcfd..69ab160dba 100644 --- a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp @@ -41,10 +41,10 @@ Hash tx_buffer_hash(ByteView tx_buffer, uint64_t tx_id); //! Decode transaction from snapshot word. Format is: tx_hash_1byte + sender_address_20byte + tx_rlp_bytes void decode_word_into_tx(ByteView word, Transaction& tx); -struct TransactionSnapshotWordSerializer : public SnapshotWordSerializer { +struct TransactionSnapshotWordDeserializer : public SnapshotWordDeserializer { Transaction value; - ~TransactionSnapshotWordSerializer() override = default; + ~TransactionSnapshotWordDeserializer() override = default; void decode_word(ByteView word) override { decode_word_into_tx(word, value); @@ -52,10 +52,10 @@ struct TransactionSnapshotWordSerializer : public SnapshotWordSerializer { }; template -struct TransactionSnapshotWordPayloadRlpSerializer : public SnapshotWordSerializer { +struct TransactionSnapshotWordPayloadRlpDeserializer : public SnapshotWordDeserializer { TBytes value; - ~TransactionSnapshotWordPayloadRlpSerializer() override = default; + ~TransactionSnapshotWordPayloadRlpDeserializer() override = default; void decode_word(ByteView word) override { auto data = slice_tx_data(word); From f61a88f011bb734f10bf10a63a1d459b2569500f Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Mon, 22 Apr 2024 17:51:26 +0200 Subject: [PATCH 23/37] SnapshotReader: mutable iterators and fix move support --- silkworm/db/snapshots/snapshot_reader.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp index 0aa2058fa4..28f6831ff2 100644 --- a/silkworm/db/snapshots/snapshot_reader.hpp +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -48,8 +48,8 @@ class Snapshot { using value_type = std::shared_ptr; using iterator_category = std::input_iterator_tag; using difference_type = std::ptrdiff_t; - using pointer = const value_type*; - using reference = const value_type&; + using pointer = value_type*; + using reference = value_type&; Iterator( seg::Decompressor::Iterator it, @@ -57,8 +57,7 @@ class Snapshot { SnapshotPath path) : it_(std::move(it)), deserializer_(std::move(deserializer)), path_(std::move(path)) {} - reference operator*() const { return deserializer_; } - pointer operator->() const { return &deserializer_; } + value_type operator*() const { return deserializer_; } Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } Iterator& operator++(); @@ -120,8 +119,8 @@ class SnapshotReader { using value_type = decltype(TWordDeserializer::value); using iterator_category = std::input_iterator_tag; using difference_type = std::ptrdiff_t; - using pointer = const value_type*; - using reference = const value_type&; + using pointer = value_type*; + using reference = value_type&; explicit Iterator(Snapshot::Iterator it) : it_(std::move(it)) {} From f9aa2ec1136933492842c7971be0dc05c29bbb9b Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Mon, 22 Apr 2024 17:31:13 +0200 Subject: [PATCH 24/37] static_assert iterators use ptrdiff_t difference_type in iterators --- silkworm/db/snapshots/index_builder.hpp | 4 +++- silkworm/db/snapshots/seg/decompressor.hpp | 4 +++- silkworm/db/snapshots/txs_and_bodies_query.hpp | 4 +++- silkworm/sentry/common/random.hpp | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/silkworm/db/snapshots/index_builder.hpp b/silkworm/db/snapshots/index_builder.hpp index 482c3fd545..c8e7b2c18d 100644 --- a/silkworm/db/snapshots/index_builder.hpp +++ b/silkworm/db/snapshots/index_builder.hpp @@ -56,7 +56,7 @@ struct IndexInputDataQuery { : query_(query), impl_(std::move(impl)), entry_(entry) {} using iterator_category = std::input_iterator_tag; - using difference_type = void; + using difference_type = std::ptrdiff_t; using pointer = value_type*; using reference = value_type&; @@ -75,6 +75,8 @@ struct IndexInputDataQuery { value_type entry_; }; + static_assert(std::input_or_output_iterator); + virtual ~IndexInputDataQuery() = default; virtual Iterator begin() = 0; diff --git a/silkworm/db/snapshots/seg/decompressor.hpp b/silkworm/db/snapshots/seg/decompressor.hpp index a38b3b603d..fe5ed4c6c4 100644 --- a/silkworm/db/snapshots/seg/decompressor.hpp +++ b/silkworm/db/snapshots/seg/decompressor.hpp @@ -224,7 +224,7 @@ class Decompressor { //! input_iterator concept boilerplate using iterator_category = std::input_iterator_tag; - using difference_type = void; + using difference_type = std::ptrdiff_t; using value_type = Bytes; using pointer = value_type*; using reference = value_type&; @@ -271,6 +271,8 @@ class Decompressor { std::shared_ptr read_mode_guard_; }; + static_assert(std::input_or_output_iterator); + explicit Decompressor(std::filesystem::path compressed_path, std::optional compressed_region = {}); ~Decompressor(); diff --git a/silkworm/db/snapshots/txs_and_bodies_query.hpp b/silkworm/db/snapshots/txs_and_bodies_query.hpp index 615a118116..9c1d8596da 100644 --- a/silkworm/db/snapshots/txs_and_bodies_query.hpp +++ b/silkworm/db/snapshots/txs_and_bodies_query.hpp @@ -55,7 +55,7 @@ class TxsAndBodiesQuery { }; using iterator_category = std::input_iterator_tag; - using difference_type = void; + using difference_type = std::ptrdiff_t; using pointer = value_type*; using reference = value_type&; @@ -83,6 +83,8 @@ class TxsAndBodiesQuery { std::string log_title_; }; + static_assert(std::input_or_output_iterator); + TxsAndBodiesQuery( SnapshotPath txs_segment_path, std::optional txs_segment_region, diff --git a/silkworm/sentry/common/random.hpp b/silkworm/sentry/common/random.hpp index 9c99cf9bb1..8f471af749 100644 --- a/silkworm/sentry/common/random.hpp +++ b/silkworm/sentry/common/random.hpp @@ -38,7 +38,7 @@ std::list random_list_items(std::list& l, size_t max_count) { public: [[maybe_unused]] typedef std::output_iterator_tag iterator_category; [[maybe_unused]] typedef void value_type; - [[maybe_unused]] typedef void difference_type; + [[maybe_unused]] typedef std::ptrdiff_t difference_type; [[maybe_unused]] typedef void pointer; [[maybe_unused]] typedef void reference; @@ -61,6 +61,8 @@ std::list random_list_items(std::list& l, size_t max_count) { std::list* container_; }; + static_assert(std::output_iterator); + std::list out; std::default_random_engine random_engine{std::random_device{}()}; std::sample(l.begin(), l.end(), BackInsertPtrIterator(out), max_count, random_engine); From 22e8ae5ef3dbf57213840603ab4cc858f9136af8 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Mon, 22 Apr 2024 18:06:14 +0200 Subject: [PATCH 25/37] simplify queries and readers with typedefs --- silkworm/db/snapshots/body_snapshot.hpp | 2 +- silkworm/db/snapshots/header_queries.hpp | 4 +--- silkworm/db/snapshots/header_snapshot.hpp | 2 +- silkworm/db/snapshots/txn_queries.hpp | 19 ++++--------------- silkworm/db/snapshots/txn_snapshot.hpp | 4 ++-- 5 files changed, 9 insertions(+), 22 deletions(-) diff --git a/silkworm/db/snapshots/body_snapshot.hpp b/silkworm/db/snapshots/body_snapshot.hpp index f2a1e43cec..dc93b5b33a 100644 --- a/silkworm/db/snapshots/body_snapshot.hpp +++ b/silkworm/db/snapshots/body_snapshot.hpp @@ -37,6 +37,6 @@ struct BodySnapshotWordDeserializer : public SnapshotWordDeserializer { } }; -struct BodySnapshotReader : public SnapshotReader {}; +using BodySnapshotReader = SnapshotReader; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_queries.hpp b/silkworm/db/snapshots/header_queries.hpp index b4a38b35fb..074635d6c2 100644 --- a/silkworm/db/snapshots/header_queries.hpp +++ b/silkworm/db/snapshots/header_queries.hpp @@ -31,8 +31,6 @@ struct HeaderFindByBlockNumQuery : public FindByIdQuery { } }; -struct HeaderFindByHashQuery : public FindByHashQuery { - using FindByHashQuery::FindByHashQuery; -}; +using HeaderFindByHashQuery = FindByHashQuery; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_snapshot.hpp b/silkworm/db/snapshots/header_snapshot.hpp index 6412f7cbce..93706f3d45 100644 --- a/silkworm/db/snapshots/header_snapshot.hpp +++ b/silkworm/db/snapshots/header_snapshot.hpp @@ -41,6 +41,6 @@ struct HeaderSnapshotWordDeserializer : public SnapshotWordDeserializer { } }; -struct HeaderSnapshotReader : public SnapshotReader {}; +using HeaderSnapshotReader = SnapshotReader; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_queries.hpp b/silkworm/db/snapshots/txn_queries.hpp index de570d5195..f5cadc0610 100644 --- a/silkworm/db/snapshots/txn_queries.hpp +++ b/silkworm/db/snapshots/txn_queries.hpp @@ -23,21 +23,10 @@ namespace silkworm::snapshots { -struct TransactionFindByIdQuery : public FindByIdQuery { - using FindByIdQuery::FindByIdQuery; -}; - -struct TransactionFindByHashQuery : public FindByHashQuery { - using FindByHashQuery::FindByHashQuery; -}; - -struct TransactionRangeFromIdQuery : public RangeFromIdQuery { - using RangeFromIdQuery::RangeFromIdQuery; -}; - -struct TransactionPayloadRlpRangeFromIdQuery : public RangeFromIdQuery> { - using RangeFromIdQuery>::RangeFromIdQuery; -}; +using TransactionFindByIdQuery = FindByIdQuery; +using TransactionFindByHashQuery = FindByHashQuery; +using TransactionRangeFromIdQuery = RangeFromIdQuery; +using TransactionPayloadRlpRangeFromIdQuery = RangeFromIdQuery>; class TransactionBlockNumByTxnHashQuery { public: diff --git a/silkworm/db/snapshots/txn_snapshot.hpp b/silkworm/db/snapshots/txn_snapshot.hpp index 3599cd5fe2..3030d1ffe8 100644 --- a/silkworm/db/snapshots/txn_snapshot.hpp +++ b/silkworm/db/snapshots/txn_snapshot.hpp @@ -21,9 +21,9 @@ namespace silkworm::snapshots { -struct TransactionSnapshotReader : public SnapshotReader {}; +using TransactionSnapshotReader = SnapshotReader; template -struct TransactionSnapshotPayloadRlpReader : public SnapshotReader> {}; +using TransactionSnapshotPayloadRlpReader = SnapshotReader>; } // namespace silkworm::snapshots From d54cf6eedae6d67e66dea16270f9797089449a09 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 23 Apr 2024 10:40:05 +0200 Subject: [PATCH 26/37] concepts for deserializers and readers --- silkworm/db/snapshots/basic_queries.hpp | 8 ++++---- silkworm/db/snapshots/body_snapshot.hpp | 2 ++ silkworm/db/snapshots/header_snapshot.hpp | 2 ++ silkworm/db/snapshots/snapshot_reader.hpp | 9 ++++++++- silkworm/db/snapshots/snapshot_word_serializer.hpp | 4 ++++ silkworm/db/snapshots/txn_snapshot.hpp | 2 +- silkworm/db/snapshots/txn_snapshot_word_serializer.hpp | 10 +++++++++- 7 files changed, 30 insertions(+), 7 deletions(-) diff --git a/silkworm/db/snapshots/basic_queries.hpp b/silkworm/db/snapshots/basic_queries.hpp index 2bf6c23c7c..e2079dbb20 100644 --- a/silkworm/db/snapshots/basic_queries.hpp +++ b/silkworm/db/snapshots/basic_queries.hpp @@ -26,7 +26,7 @@ namespace silkworm::snapshots { -template +template class BasicQuery { public: BasicQuery( @@ -40,7 +40,7 @@ class BasicQuery { const Index& index_; }; -template +template struct FindByIdQuery : public BasicQuery { using BasicQuery::BasicQuery; @@ -50,7 +50,7 @@ struct FindByIdQuery : public BasicQuery { } }; -template +template struct FindByHashQuery : public BasicQuery { using BasicQuery::BasicQuery; @@ -71,7 +71,7 @@ struct FindByHashQuery : public BasicQuery { } }; -template +template struct RangeFromIdQuery : public BasicQuery { using BasicQuery::BasicQuery; diff --git a/silkworm/db/snapshots/body_snapshot.hpp b/silkworm/db/snapshots/body_snapshot.hpp index dc93b5b33a..8e6301b94b 100644 --- a/silkworm/db/snapshots/body_snapshot.hpp +++ b/silkworm/db/snapshots/body_snapshot.hpp @@ -37,6 +37,8 @@ struct BodySnapshotWordDeserializer : public SnapshotWordDeserializer { } }; +static_assert(SnapshotWordDeserializerConcept); + using BodySnapshotReader = SnapshotReader; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_snapshot.hpp b/silkworm/db/snapshots/header_snapshot.hpp index 93706f3d45..c5b9f581b6 100644 --- a/silkworm/db/snapshots/header_snapshot.hpp +++ b/silkworm/db/snapshots/header_snapshot.hpp @@ -41,6 +41,8 @@ struct HeaderSnapshotWordDeserializer : public SnapshotWordDeserializer { } }; +static_assert(SnapshotWordDeserializerConcept); + using HeaderSnapshotReader = SnapshotReader; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_reader.hpp b/silkworm/db/snapshots/snapshot_reader.hpp index 28f6831ff2..e7c5bd7ec7 100644 --- a/silkworm/db/snapshots/snapshot_reader.hpp +++ b/silkworm/db/snapshots/snapshot_reader.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -111,7 +112,7 @@ class Snapshot { seg::Decompressor decoder_; }; -template +template class SnapshotReader { public: class Iterator { @@ -150,6 +151,8 @@ class SnapshotReader { static_assert(std::input_iterator); + using WordDeserializer = TWordDeserializer; + SnapshotReader(const Snapshot& snapshot) : snapshot_(snapshot) {} Iterator begin() const { @@ -184,6 +187,10 @@ class SnapshotReader { const Snapshot& snapshot_; }; +template +concept SnapshotReaderConcept = std::same_as> || + std::derived_from>; + template void iterator_read_into(It it, size_t count, std::vector& out) { std::copy_n(std::make_move_iterator(std::move(it)), count, std::back_inserter(out)); diff --git a/silkworm/db/snapshots/snapshot_word_serializer.hpp b/silkworm/db/snapshots/snapshot_word_serializer.hpp index 5cecd26295..9936155da2 100644 --- a/silkworm/db/snapshots/snapshot_word_serializer.hpp +++ b/silkworm/db/snapshots/snapshot_word_serializer.hpp @@ -27,4 +27,8 @@ struct SnapshotWordDeserializer { virtual void check_sanity_with_metadata(BlockNum /*block_from*/, BlockNum /*block_to*/) {} }; +template +concept SnapshotWordDeserializerConcept = std::derived_from && + requires(TWordDeserializer deserializer) { deserializer.value; }; + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_snapshot.hpp b/silkworm/db/snapshots/txn_snapshot.hpp index 3030d1ffe8..833e603dbb 100644 --- a/silkworm/db/snapshots/txn_snapshot.hpp +++ b/silkworm/db/snapshots/txn_snapshot.hpp @@ -23,7 +23,7 @@ namespace silkworm::snapshots { using TransactionSnapshotReader = SnapshotReader; -template +template using TransactionSnapshotPayloadRlpReader = SnapshotReader>; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp index 69ab160dba..ee3efdb8e9 100644 --- a/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp +++ b/silkworm/db/snapshots/txn_snapshot_word_serializer.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -51,7 +52,12 @@ struct TransactionSnapshotWordDeserializer : public SnapshotWordDeserializer { } }; -template +static_assert(SnapshotWordDeserializerConcept); + +template +concept BytesOrByteView = std::same_as || std::same_as; + +template struct TransactionSnapshotWordPayloadRlpDeserializer : public SnapshotWordDeserializer { TBytes value; @@ -63,4 +69,6 @@ struct TransactionSnapshotWordPayloadRlpDeserializer : public SnapshotWordDeseri } }; +static_assert(SnapshotWordDeserializerConcept>); + } // namespace silkworm::snapshots From 401cef857d7115f9f64db61a90ec339e70f8c33a Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 23 Apr 2024 16:27:56 +0200 Subject: [PATCH 27/37] remove read_senders from snapshots access_layer --- silkworm/db/access_layer.cpp | 17 ++++++++--------- silkworm/db/access_layer.hpp | 7 +++---- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/silkworm/db/access_layer.cpp b/silkworm/db/access_layer.cpp index 6cd4f5d690..0b36069c8b 100644 --- a/silkworm/db/access_layer.cpp +++ b/silkworm/db/access_layer.cpp @@ -1082,7 +1082,7 @@ bool DataModel::read_body(BlockNum height, HashAsArray hash, bool read_senders, const bool found = db::read_body(txn_, height, hash, read_senders, body); if (found) return found; - return read_body_from_snapshot(height, read_senders, body); + return read_body_from_snapshot(height, body); } bool DataModel::read_body(const Hash& hash, BlockNum height, BlockBody& body) const { @@ -1142,14 +1142,14 @@ bool DataModel::read_block(HashAsSpan hash, BlockNum number, bool read_senders, const bool found = db::read_block(txn_, hash, number, read_senders, block); if (found) return found; - return read_block_from_snapshot(number, read_senders, block); + return read_block_from_snapshot(number, block); } bool DataModel::read_block(const evmc::bytes32& hash, BlockNum number, Block& block) const { const bool found = db::read_block(txn_, hash, number, block); if (found) return found; - return read_block_from_snapshot(number, /*read_senders=*/true, block); + return read_block_from_snapshot(number, block); } void DataModel::for_last_n_headers(size_t n, absl::FunctionRef callback) const { @@ -1203,7 +1203,7 @@ bool DataModel::read_block(BlockNum number, bool read_senders, Block& block) con return read_block(hash->bytes, number, read_senders, block); } -bool DataModel::read_block_from_snapshot(BlockNum height, bool read_senders, Block& block) { +bool DataModel::read_block_from_snapshot(BlockNum height, Block& block) { if (!repository_) { return false; } @@ -1213,7 +1213,7 @@ bool DataModel::read_block_from_snapshot(BlockNum height, bool read_senders, Blo block.header = std::move(*block_header); - return read_body_from_snapshot(height, read_senders, block); + return read_body_from_snapshot(height, block); } std::optional DataModel::read_header_from_snapshot(BlockNum height) { @@ -1244,7 +1244,7 @@ std::optional DataModel::read_header_from_snapshot(const Hash& hash return block_header; } -bool DataModel::read_body_from_snapshot(BlockNum height, bool read_senders, BlockBody& body) { +bool DataModel::read_body_from_snapshot(BlockNum height, BlockBody& body) { if (!repository_) { return false; } @@ -1261,7 +1261,7 @@ bool DataModel::read_body_from_snapshot(BlockNum height, bool read_senders, Bloc const auto txn_count{stored_body->txn_count >= 2 ? stored_body->txn_count - 2 : stored_body->txn_count}; std::vector transactions; - const auto read_ok{read_transactions_from_snapshot(height, base_txn_id, txn_count, read_senders, transactions)}; + const auto read_ok{read_transactions_from_snapshot(height, base_txn_id, txn_count, transactions)}; if (!read_ok) return false; body.transactions = std::move(transactions); @@ -1285,8 +1285,7 @@ bool DataModel::is_body_in_snapshot(BlockNum height) { return false; } -bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, - bool /*read_senders*/, std::vector& txs) { +bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, std::vector& txs) { txs.reserve(txn_count); if (txn_count == 0) { return true; diff --git a/silkworm/db/access_layer.hpp b/silkworm/db/access_layer.hpp index ee2dacefd4..2d7f8f9c67 100644 --- a/silkworm/db/access_layer.hpp +++ b/silkworm/db/access_layer.hpp @@ -331,14 +331,13 @@ class DataModel { void for_last_n_headers(size_t n, absl::FunctionRef callback) const; private: - static bool read_block_from_snapshot(BlockNum height, bool read_senders, Block& block); + static bool read_block_from_snapshot(BlockNum height, Block& block); static std::optional read_header_from_snapshot(BlockNum height); static std::optional read_header_from_snapshot(const Hash& hash); - static bool read_body_from_snapshot(BlockNum height, bool read_senders, BlockBody& body); + static bool read_body_from_snapshot(BlockNum height, BlockBody& body); static bool is_body_in_snapshot(BlockNum height); static bool read_rlp_transactions_from_snapshot(BlockNum height, std::vector& rlp_txs); - static bool read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, - bool read_senders, std::vector& txs); + static bool read_transactions_from_snapshot(BlockNum height, uint64_t base_txn_id, uint64_t txn_count, std::vector& txs); [[nodiscard]] std::optional read_tx_lookup_from_db(const evmc::bytes32& tx_hash) const; [[nodiscard]] static std::optional read_tx_lookup_from_snapshot(const evmc::bytes32& tx_hash); From 95ff21d6b107e5c40e963d44e5af9809cd3e4a71 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 23 Apr 2024 16:49:34 +0200 Subject: [PATCH 28/37] rename ordinal_lookup -> lookup_by_ordinal --- cmd/dev/snapshots.cpp | 4 ++-- silkworm/db/snapshots/basic_queries.hpp | 6 +++--- silkworm/db/snapshots/body_queries.hpp | 2 +- silkworm/db/snapshots/header_queries.hpp | 2 +- silkworm/db/snapshots/index.hpp | 6 +++--- silkworm/db/snapshots/rec_split/rec_split.hpp | 12 ++++++------ .../db/snapshots/rec_split/rec_split_par_test.cpp | 2 +- .../db/snapshots/rec_split/rec_split_seq_test.cpp | 2 +- silkworm/db/snapshots/txn_queries.hpp | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cmd/dev/snapshots.cpp b/cmd/dev/snapshots.cpp index b05cf2b1bb..e5419e3311 100644 --- a/cmd/dev/snapshots.cpp +++ b/cmd/dev/snapshots.cpp @@ -326,14 +326,14 @@ void open_index(const SnapSettings& settings) { const uint64_t data_id{*settings.lookup_number}; const uint64_t enumeration{data_id - idx.base_data_id()}; if (enumeration < idx.key_count()) { - SILK_INFO << "Offset by ordinal lookup for " << data_id << ": " << idx.ordinal_lookup(enumeration); + SILK_INFO << "Offset by ordinal lookup for " << data_id << ": " << idx.lookup_by_ordinal(enumeration); } else { SILK_WARN << "Invalid absolute data number " << data_id << " for ordinal lookup"; } } else { for (size_t i{0}; i < idx.key_count(); ++i) { if (i % (idx.key_count() / 10) == 0) { - SILK_INFO << "Offset by ordinal lookup for " << i << ": " << idx.ordinal_lookup(i) + SILK_INFO << "Offset by ordinal lookup for " << i << ": " << idx.lookup_by_ordinal(i) << " [existence filter: " << int(idx.existence_filter()[i]) << "]"; } } diff --git a/silkworm/db/snapshots/basic_queries.hpp b/silkworm/db/snapshots/basic_queries.hpp index e2079dbb20..a23d185a43 100644 --- a/silkworm/db/snapshots/basic_queries.hpp +++ b/silkworm/db/snapshots/basic_queries.hpp @@ -45,7 +45,7 @@ struct FindByIdQuery : public BasicQuery { using BasicQuery::BasicQuery; std::optional exec(uint64_t id) { - size_t offset = this->index_.ordinal_lookup_by_data_id(id); + size_t offset = this->index_.lookup_by_data_id(id); return this->reader_.seek_one(offset); } }; @@ -55,7 +55,7 @@ struct FindByHashQuery : public BasicQuery { using BasicQuery::BasicQuery; std::optional exec(const Hash& hash) { - auto offset = this->index_.ordinal_lookup_by_hash(hash); + auto offset = this->index_.lookup_by_hash(hash); if (!offset) { return std::nullopt; } @@ -76,7 +76,7 @@ struct RangeFromIdQuery : public BasicQuery { using BasicQuery::BasicQuery; std::vector exec_into_vector(uint64_t first_id, uint64_t count) { - size_t offset = this->index_.ordinal_lookup_by_data_id(first_id); + size_t offset = this->index_.lookup_by_data_id(first_id); return this->reader_.read_into_vector(offset, count); } }; diff --git a/silkworm/db/snapshots/body_queries.hpp b/silkworm/db/snapshots/body_queries.hpp index 23af935a8e..accc78febc 100644 --- a/silkworm/db/snapshots/body_queries.hpp +++ b/silkworm/db/snapshots/body_queries.hpp @@ -25,7 +25,7 @@ struct BodyFindByBlockNumQuery : public FindByIdQuery { using FindByIdQuery::FindByIdQuery; std::optional exec(BlockNum id) { - // TODO: move this check inside ordinal_lookup_by_data_id if possible and remove this method + // TODO: move this check inside lookup_by_data_id if possible and remove this method if (id < reader_.block_from()) return std::nullopt; return FindByIdQuery::exec(id); } diff --git a/silkworm/db/snapshots/header_queries.hpp b/silkworm/db/snapshots/header_queries.hpp index 074635d6c2..5466a203fd 100644 --- a/silkworm/db/snapshots/header_queries.hpp +++ b/silkworm/db/snapshots/header_queries.hpp @@ -25,7 +25,7 @@ struct HeaderFindByBlockNumQuery : public FindByIdQuery { using FindByIdQuery::FindByIdQuery; std::optional exec(BlockNum id) { - // TODO: move this check inside ordinal_lookup_by_data_id if possible and remove this method + // TODO: move this check inside lookup_by_data_id if possible and remove this method if ((id < reader_.block_from()) || (id >= reader_.block_to())) return std::nullopt; return FindByIdQuery::exec(id); } diff --git a/silkworm/db/snapshots/index.hpp b/silkworm/db/snapshots/index.hpp index b77b4b8165..498b081b2e 100644 --- a/silkworm/db/snapshots/index.hpp +++ b/silkworm/db/snapshots/index.hpp @@ -36,10 +36,10 @@ class Index { : path_(std::move(path)), region_(region) {} - std::size_t ordinal_lookup_by_data_id(uint64_t id) const { return index_->ordinal_lookup_by_data_id(id); }; - std::optional ordinal_lookup_by_hash(const Hash& hash) const { return index_->ordinal_lookup_by_key(hash); }; + std::size_t lookup_by_data_id(uint64_t id) const { return index_->lookup_by_data_id(id); }; + std::optional lookup_by_hash(const Hash& hash) const { return index_->lookup_by_key(hash); }; - std::optional lookup_by_hash(const Hash& hash) const { + std::optional lookup_ordinal_by_hash(const Hash& hash) const { auto [result, found] = index_->lookup(hash); return found ? std::optional{result} : std::nullopt; } diff --git a/silkworm/db/snapshots/rec_split/rec_split.hpp b/silkworm/db/snapshots/rec_split/rec_split.hpp index 60ab4e5494..92b491f40e 100644 --- a/silkworm/db/snapshots/rec_split/rec_split.hpp +++ b/silkworm/db/snapshots/rec_split/rec_split.hpp @@ -681,20 +681,20 @@ class RecSplit { //! Return the offset of the i-th element in the index. Perfect hash table lookup is not performed, //! only access to the Elias-Fano structure containing all offsets - [[nodiscard]] std::size_t ordinal_lookup(uint64_t i) const { return ef_offsets_->get(i); } + [[nodiscard]] std::size_t lookup_by_ordinal(uint64_t i) const { return ef_offsets_->get(i); } - [[nodiscard]] std::size_t ordinal_lookup_by_data_id(uint64_t data_id) const { + [[nodiscard]] std::size_t lookup_by_data_id(uint64_t data_id) const { ensure(data_id >= base_data_id(), [&]() { - return std::string("ordinal_lookup_by_data_id: data_id is out of range") + + return std::string("lookup_by_data_id: data_id is out of range") + " data_id = " + std::to_string(data_id) + ";" + " base_data_id = " + std::to_string(base_data_id()) + ";"; }); - return ordinal_lookup(data_id - base_data_id()); + return lookup_by_ordinal(data_id - base_data_id()); } - [[nodiscard]] std::optional ordinal_lookup_by_key(ByteView key) const { + [[nodiscard]] std::optional lookup_by_key(ByteView key) const { auto [i, found] = lookup(key); - return found ? std::optional{ordinal_lookup(i)} : std::nullopt; + return found ? std::optional{lookup_by_ordinal(i)} : std::nullopt; } //! Return the number of keys used to build the RecSplit instance diff --git a/silkworm/db/snapshots/rec_split/rec_split_par_test.cpp b/silkworm/db/snapshots/rec_split/rec_split_par_test.cpp index 660d0a2265..5e6582b8af 100644 --- a/silkworm/db/snapshots/rec_split/rec_split_par_test.cpp +++ b/silkworm/db/snapshots/rec_split/rec_split_par_test.cpp @@ -262,7 +262,7 @@ TEST_CASE("RecSplit8-Par: double index lookup", "[silkworm][node][recsplit][igno const auto [enumeration_index, found] = rs2.lookup("key " + std::to_string(i)); CHECK(enumeration_index == i); CHECK(found); - CHECK(rs2.ordinal_lookup(enumeration_index) == i * 17); + CHECK(rs2.lookup_by_ordinal(enumeration_index) == i * 17); } } diff --git a/silkworm/db/snapshots/rec_split/rec_split_seq_test.cpp b/silkworm/db/snapshots/rec_split/rec_split_seq_test.cpp index 9795865b91..e961aa57f2 100644 --- a/silkworm/db/snapshots/rec_split/rec_split_seq_test.cpp +++ b/silkworm/db/snapshots/rec_split/rec_split_seq_test.cpp @@ -257,7 +257,7 @@ TEST_CASE("RecSplit8: double index lookup", "[silkworm][snapshots][recsplit][ign const auto [enumeration_index, found] = rs2.lookup("key " + std::to_string(i)); CHECK(enumeration_index == i); CHECK(found); - CHECK(rs2.ordinal_lookup(enumeration_index) == i * 17); + CHECK(rs2.lookup_by_ordinal(enumeration_index) == i * 17); } } diff --git a/silkworm/db/snapshots/txn_queries.hpp b/silkworm/db/snapshots/txn_queries.hpp index f5cadc0610..eea51cb3eb 100644 --- a/silkworm/db/snapshots/txn_queries.hpp +++ b/silkworm/db/snapshots/txn_queries.hpp @@ -39,7 +39,7 @@ class TransactionBlockNumByTxnHashQuery { std::optional exec(const Hash& hash) { // Lookup the entire txn to check that the retrieved txn hash matches (no way to know if key exists in MPHF) const auto transaction = cross_check_query_.exec(hash); - auto result = transaction ? index_.lookup_by_hash(hash) : std::nullopt; + auto result = transaction ? index_.lookup_ordinal_by_hash(hash) : std::nullopt; return result; } From 1226e02f8f1c5b58362199fd23848adfe11d6215 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 23 Apr 2024 16:59:41 +0200 Subject: [PATCH 29/37] lookup_by_data_id: return optional, check upper bound --- silkworm/db/snapshots/basic_queries.hpp | 16 ++++++++++++---- silkworm/db/snapshots/body_queries.hpp | 10 +--------- silkworm/db/snapshots/header_queries.hpp | 11 +---------- silkworm/db/snapshots/index.hpp | 2 +- silkworm/db/snapshots/rec_split/rec_split.hpp | 14 ++++++++------ 5 files changed, 23 insertions(+), 30 deletions(-) diff --git a/silkworm/db/snapshots/basic_queries.hpp b/silkworm/db/snapshots/basic_queries.hpp index a23d185a43..55be9f53e8 100644 --- a/silkworm/db/snapshots/basic_queries.hpp +++ b/silkworm/db/snapshots/basic_queries.hpp @@ -45,8 +45,12 @@ struct FindByIdQuery : public BasicQuery { using BasicQuery::BasicQuery; std::optional exec(uint64_t id) { - size_t offset = this->index_.lookup_by_data_id(id); - return this->reader_.seek_one(offset); + auto offset = this->index_.lookup_by_data_id(id); + if (!offset) { + return std::nullopt; + } + + return this->reader_.seek_one(*offset); } }; @@ -76,8 +80,12 @@ struct RangeFromIdQuery : public BasicQuery { using BasicQuery::BasicQuery; std::vector exec_into_vector(uint64_t first_id, uint64_t count) { - size_t offset = this->index_.lookup_by_data_id(first_id); - return this->reader_.read_into_vector(offset, count); + auto offset = this->index_.lookup_by_data_id(first_id); + if (!offset) { + return {}; + } + + return this->reader_.read_into_vector(*offset, count); } }; diff --git a/silkworm/db/snapshots/body_queries.hpp b/silkworm/db/snapshots/body_queries.hpp index accc78febc..732e260b8b 100644 --- a/silkworm/db/snapshots/body_queries.hpp +++ b/silkworm/db/snapshots/body_queries.hpp @@ -21,14 +21,6 @@ namespace silkworm::snapshots { -struct BodyFindByBlockNumQuery : public FindByIdQuery { - using FindByIdQuery::FindByIdQuery; - - std::optional exec(BlockNum id) { - // TODO: move this check inside lookup_by_data_id if possible and remove this method - if (id < reader_.block_from()) return std::nullopt; - return FindByIdQuery::exec(id); - } -}; +using BodyFindByBlockNumQuery = FindByIdQuery; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/header_queries.hpp b/silkworm/db/snapshots/header_queries.hpp index 5466a203fd..bb8235006b 100644 --- a/silkworm/db/snapshots/header_queries.hpp +++ b/silkworm/db/snapshots/header_queries.hpp @@ -21,16 +21,7 @@ namespace silkworm::snapshots { -struct HeaderFindByBlockNumQuery : public FindByIdQuery { - using FindByIdQuery::FindByIdQuery; - - std::optional exec(BlockNum id) { - // TODO: move this check inside lookup_by_data_id if possible and remove this method - if ((id < reader_.block_from()) || (id >= reader_.block_to())) return std::nullopt; - return FindByIdQuery::exec(id); - } -}; - +using HeaderFindByBlockNumQuery = FindByIdQuery; using HeaderFindByHashQuery = FindByHashQuery; } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/index.hpp b/silkworm/db/snapshots/index.hpp index 498b081b2e..b54bd8f6d8 100644 --- a/silkworm/db/snapshots/index.hpp +++ b/silkworm/db/snapshots/index.hpp @@ -36,7 +36,7 @@ class Index { : path_(std::move(path)), region_(region) {} - std::size_t lookup_by_data_id(uint64_t id) const { return index_->lookup_by_data_id(id); }; + std::optional lookup_by_data_id(uint64_t id) const { return index_->lookup_by_data_id(id); }; std::optional lookup_by_hash(const Hash& hash) const { return index_->lookup_by_key(hash); }; std::optional lookup_ordinal_by_hash(const Hash& hash) const { diff --git a/silkworm/db/snapshots/rec_split/rec_split.hpp b/silkworm/db/snapshots/rec_split/rec_split.hpp index 92b491f40e..38764e5900 100644 --- a/silkworm/db/snapshots/rec_split/rec_split.hpp +++ b/silkworm/db/snapshots/rec_split/rec_split.hpp @@ -683,12 +683,14 @@ class RecSplit { //! only access to the Elias-Fano structure containing all offsets [[nodiscard]] std::size_t lookup_by_ordinal(uint64_t i) const { return ef_offsets_->get(i); } - [[nodiscard]] std::size_t lookup_by_data_id(uint64_t data_id) const { - ensure(data_id >= base_data_id(), [&]() { - return std::string("lookup_by_data_id: data_id is out of range") + - " data_id = " + std::to_string(data_id) + ";" + - " base_data_id = " + std::to_string(base_data_id()) + ";"; - }); + [[nodiscard]] std::optional lookup_by_data_id(uint64_t data_id) const { + // check if data_id is not out of range + uint64_t min = base_data_id(); + uint64_t max = min + key_count() - 1; + if ((data_id < min) || (data_id > max)) { + return std::nullopt; + } + return lookup_by_ordinal(data_id - base_data_id()); } From e7a9c7948f73077a32907914b72a780a259a64f3 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 23 Apr 2024 17:23:30 +0200 Subject: [PATCH 30/37] simplify SnapshotSync::download_and_index_snapshots --- silkworm/db/snapshot_sync.cpp | 47 +++++++++++------------------------ silkworm/db/snapshot_sync.hpp | 2 -- 2 files changed, 14 insertions(+), 35 deletions(-) diff --git a/silkworm/db/snapshot_sync.cpp b/silkworm/db/snapshot_sync.cpp index ebb6f2179c..b76bd125a8 100644 --- a/silkworm/db/snapshot_sync.cpp +++ b/silkworm/db/snapshot_sync.cpp @@ -56,24 +56,20 @@ bool SnapshotSync::download_and_index_snapshots(db::RWTxn& txn) { SILK_INFO << "SnapshotSync: snapshot sync disabled, no snapshot must be downloaded"; return true; } - SILK_INFO << "SnapshotSync: snapshot repository: " << settings_.repository_dir.string(); - if (settings_.no_downloader) { - reopen(); - return true; - } - const auto snapshot_file_names = db::read_snapshots(txn); + if (!settings_.no_downloader) { + const bool download_completed = download_snapshots(snapshot_file_names); + if (!download_completed) return false; - const bool download_completed = download_snapshots(snapshot_file_names); - if (!download_completed) return false; - - db::write_snapshots(txn, snapshot_file_names); + db::write_snapshots(txn, snapshot_file_names); + SILK_INFO << "SnapshotSync: file names saved into db count=" << std::to_string(snapshot_file_names.size()); + } - SILK_INFO << "SnapshotSync: file names saved into db count=" << std::to_string(snapshot_file_names.size()); + build_missing_indexes(); - index_snapshots(); + repository_->reopen_folder(); const auto max_block_available = repository_->max_block_available(); SILK_INFO << "SnapshotSync: max block available: " << max_block_available @@ -90,12 +86,6 @@ bool SnapshotSync::download_and_index_snapshots(db::RWTxn& txn) { return true; } -void SnapshotSync::reopen() { - repository_->reopen_folder(); - SILK_INFO << "SnapshotSync: reopen completed segment_max_block=" << std::to_string(repository_->segment_max_block()) - << " idx_max_block=" << std::to_string(repository_->idx_max_block()); -} - bool SnapshotSync::download_snapshots(const std::vector& snapshot_file_names) { const auto missing_block_ranges = repository_->missing_block_ranges(); if (!missing_block_ranges.empty()) { @@ -183,24 +173,9 @@ bool SnapshotSync::download_snapshots(const std::vector& snapshot_f completed_connection.disconnect(); stats_connection.disconnect(); - reopen(); return true; } -void SnapshotSync::index_snapshots() { - if (!settings_.enabled) { - SILK_INFO << "SnapshotSync: snapshot sync disabled, no index must be created"; - return; - } - - // Build any missing snapshot index if needed, then reopen - if (repository_->idx_max_block() < repository_->segment_max_block()) { - SILK_INFO << "SnapshotSync: missing indexes detected, rebuild started"; - build_missing_indexes(); - reopen(); - } -} - bool SnapshotSync::stop() { const bool result = Stoppable::stop(); client_.stop(); @@ -215,6 +190,12 @@ void SnapshotSync::build_missing_indexes() { // Determine the missing indexes and build them in parallel const auto missing_indexes = repository_->missing_indexes(); + if (missing_indexes.empty()) { + return; + } + + SILK_INFO << "SnapshotSync: missing indexes detected, rebuild started"; + for (const auto& index : missing_indexes) { workers.push_task([=]() { try { diff --git a/silkworm/db/snapshot_sync.hpp b/silkworm/db/snapshot_sync.hpp index edc4324a6b..845cd12e8c 100644 --- a/silkworm/db/snapshot_sync.hpp +++ b/silkworm/db/snapshot_sync.hpp @@ -38,10 +38,8 @@ class SnapshotSync : public Stoppable { bool download_and_index_snapshots(db::RWTxn& txn); bool download_snapshots(const std::vector& snapshot_file_names); - void index_snapshots(); private: - void reopen(); void build_missing_indexes(); void update_database(db::RWTxn& txn, BlockNum max_block_available); void update_block_headers(db::RWTxn& txn, BlockNum max_block_available); From 668937e359a638a14b3250cde30235ab2a82b340 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Tue, 23 Apr 2024 17:26:18 +0200 Subject: [PATCH 31/37] remove SnapshotRepository::view_segment --- silkworm/db/snapshots/repository.cpp | 29 ----------------------- silkworm/db/snapshots/repository.hpp | 11 --------- silkworm/db/snapshots/repository_test.cpp | 27 ++++++++++----------- 3 files changed, 12 insertions(+), 55 deletions(-) diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index 5652f306d8..ed98b41d08 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -36,19 +36,6 @@ namespace silkworm::snapshots { namespace fs = std::filesystem; -SnapshotRepository::ViewResult SnapshotRepository::view_segment(SnapshotType type, BlockNum number, const SnapshotWalker& walker) { - // Search for target segment in reverse order (from the newest segment to the oldest one) - for (auto& entry : std::ranges::reverse_view(bundles_)) { - const auto& bundle = entry.second; - // We're looking for the segment containing the target block number in its block range - if ((bundle.block_from() <= number) && (number < bundle.block_to())) { - const bool walk_done = walker({bundle.snapshot(type), bundle.index(type)}); - return walk_done ? SnapshotRepository::kWalkSuccess : SnapshotRepository::kWalkFailed; - } - } - return SnapshotRepository::kSnapshotNotFound; -} - std::size_t SnapshotRepository::view_bundles(const SnapshotBundleWalker& walker) { // Search for target segment in reverse order (from the newest segment to the oldest one) std::size_t visited_views{0}; @@ -147,18 +134,6 @@ bool SnapshotRepository::for_each_body(const BodyWalker& fn) { return true; } -SnapshotRepository::ViewResult SnapshotRepository::view_header_segment(BlockNum number, const SnapshotWalker& walker) { - return view_segment(SnapshotType::headers, number, walker); -} - -SnapshotRepository::ViewResult SnapshotRepository::view_body_segment(BlockNum number, const SnapshotWalker& walker) { - return view_segment(SnapshotType::bodies, number, walker); -} - -SnapshotRepository::ViewResult SnapshotRepository::view_tx_segment(BlockNum number, const SnapshotWalker& walker) { - return view_segment(SnapshotType::transactions, number, walker); -} - std::size_t SnapshotRepository::view_segments(SnapshotType type, const SnapshotWalker& walker) { return view_bundles([&](const SnapshotBundle& bundle) { return walker({bundle.snapshot(type), bundle.index(type)}); @@ -324,10 +299,6 @@ void SnapshotRepository::reopen_list(const SnapshotPathList& segment_files) { } const SnapshotBundle* SnapshotRepository::find_bundle(BlockNum number) const { - if (number > max_block_available()) { - return nullptr; - } - // Search for target segment in reverse order (from the newest segment to the oldest one) for (const auto& entry : std::ranges::reverse_view(bundles_)) { const auto& bundle = entry.second; diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index 7f21281844..2d1a48b912 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -140,18 +140,8 @@ class SnapshotRepository { const Index& index; }; - enum ViewResult : uint8_t { - kSnapshotNotFound, - kWalkFailed, - kWalkSuccess - }; - using SnapshotWalker = std::function; - ViewResult view_header_segment(BlockNum number, const SnapshotWalker& walker); - ViewResult view_body_segment(BlockNum number, const SnapshotWalker& walker); - ViewResult view_tx_segment(BlockNum number, const SnapshotWalker& walker); - using SnapshotBundleWalker = std::function; std::size_t view_bundles(const SnapshotBundleWalker& walker); @@ -173,7 +163,6 @@ class SnapshotRepository { private: void reopen_list(const SnapshotPathList& segment_files); - ViewResult view_segment(SnapshotType type, BlockNum number, const SnapshotWalker& walker); std::size_t view_segments(SnapshotType type, const SnapshotWalker& walker); const SnapshotBundle* find_bundle(BlockNum number) const; std::optional find_segment(SnapshotType type, BlockNum number) const; diff --git a/silkworm/db/snapshots/repository_test.cpp b/silkworm/db/snapshots/repository_test.cpp index 0364bfd947..5d77bfb6ff 100644 --- a/silkworm/db/snapshots/repository_test.cpp +++ b/silkworm/db/snapshots/repository_test.cpp @@ -66,10 +66,10 @@ TEST_CASE("SnapshotRepository::view", "[silkworm][node][snapshot]") { SECTION("no snapshots") { repository.reopen_folder(); - using ViewResult = SnapshotRepository::ViewResult; - CHECK(repository.view_header_segment(14'500'000, successful_walk) == ViewResult::kSnapshotNotFound); - CHECK(repository.view_body_segment(11'500'000, successful_walk) == ViewResult::kSnapshotNotFound); - CHECK(repository.view_tx_segment(15'000'000, successful_walk) == ViewResult::kSnapshotNotFound); + CHECK_FALSE(repository.find_header_segment(14'500'000)); + CHECK_FALSE(repository.find_body_segment(11'500'000)); + CHECK_FALSE(repository.find_tx_segment(15'000'000)); + CHECK(repository.view_header_segments(successful_walk) == 0); CHECK(repository.view_body_segments(successful_walk) == 0); CHECK(repository.view_tx_segments(successful_walk) == 0); @@ -85,10 +85,10 @@ TEST_CASE("SnapshotRepository::view", "[silkworm][node][snapshot]") { test::TemporarySnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-015000-015500-transactions.seg"}; repository.reopen_folder(); - using ViewResult = SnapshotRepository::ViewResult; - CHECK(repository.view_header_segment(14'500'000, successful_walk) == ViewResult::kSnapshotNotFound); - CHECK(repository.view_body_segment(11'500'000, successful_walk) == ViewResult::kSnapshotNotFound); - CHECK(repository.view_tx_segment(15'000'000, successful_walk) == ViewResult::kSnapshotNotFound); + CHECK_FALSE(repository.find_header_segment(14'500'000)); + CHECK_FALSE(repository.find_body_segment(11'500'000)); + CHECK_FALSE(repository.find_tx_segment(15'000'000)); + CHECK(repository.view_header_segments(successful_walk) == 0); // empty snapshots are ignored by repository CHECK(repository.view_body_segments(successful_walk) == 0); // empty snapshots are ignored by repository CHECK(repository.view_tx_segments(successful_walk) == 0); // empty snapshots are ignored by repository @@ -104,17 +104,14 @@ TEST_CASE("SnapshotRepository::view", "[silkworm][node][snapshot]") { test::HelloWorldSnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-014500-015000-transactions.seg"}; repository.reopen_folder(); - using ViewResult = SnapshotRepository::ViewResult; - CHECK(repository.view_header_segment(14'500'000, failing_walk) == ViewResult::kWalkFailed); - CHECK(repository.view_body_segment(14'500'000, failing_walk) == ViewResult::kWalkFailed); - CHECK(repository.view_tx_segment(14'500'000, failing_walk) == ViewResult::kWalkFailed); CHECK(repository.view_header_segments(failing_walk) == 1); CHECK(repository.view_body_segments(failing_walk) == 1); CHECK(repository.view_tx_segments(failing_walk) == 1); - CHECK(repository.view_header_segment(14'500'000, successful_walk) == ViewResult::kWalkSuccess); - CHECK(repository.view_body_segment(14'500'000, successful_walk) == ViewResult::kWalkSuccess); - CHECK(repository.view_tx_segment(14'500'000, successful_walk) == ViewResult::kWalkSuccess); + CHECK(repository.find_header_segment(14'500'000).has_value()); + CHECK(repository.find_body_segment(14'500'000).has_value()); + CHECK(repository.find_tx_segment(14'500'000).has_value()); + CHECK(repository.view_header_segments(successful_walk) == 1); CHECK(repository.view_body_segments(successful_walk) == 1); CHECK(repository.view_tx_segments(successful_walk) == 1); From f8bb9ef50bdd30199f3d90f9cd4e75c112186df8 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 24 Apr 2024 12:24:21 +0200 Subject: [PATCH 32/37] move stale index removal logic to repository and use it from SnapshotSync --- silkworm/db/snapshot_sync.cpp | 1 + silkworm/db/snapshots/index.cpp | 9 ----- silkworm/db/snapshots/path.cpp | 12 ++++--- silkworm/db/snapshots/path.hpp | 20 ++++++++---- silkworm/db/snapshots/repository.cpp | 23 +++++++++++++ silkworm/db/snapshots/repository.hpp | 2 ++ silkworm/db/snapshots/repository_test.cpp | 40 +++++++++++++++++++++++ silkworm/db/snapshots/snapshot_test.cpp | 37 --------------------- 8 files changed, 88 insertions(+), 56 deletions(-) diff --git a/silkworm/db/snapshot_sync.cpp b/silkworm/db/snapshot_sync.cpp index b76bd125a8..5f82fa2cd4 100644 --- a/silkworm/db/snapshot_sync.cpp +++ b/silkworm/db/snapshot_sync.cpp @@ -67,6 +67,7 @@ bool SnapshotSync::download_and_index_snapshots(db::RWTxn& txn) { SILK_INFO << "SnapshotSync: file names saved into db count=" << std::to_string(snapshot_file_names.size()); } + repository_->remove_stale_indexes(); build_missing_indexes(); repository_->reopen_folder(); diff --git a/silkworm/db/snapshots/index.cpp b/silkworm/db/snapshots/index.cpp index d1cd3a69fd..6fd62c83ae 100644 --- a/silkworm/db/snapshots/index.cpp +++ b/silkworm/db/snapshots/index.cpp @@ -23,15 +23,6 @@ void Index::reopen_index() { if (path_.exists()) { index_ = std::make_unique(path_.path(), region_); - - // TODO: move this code or pass in snapshot_last_write_time as an argument - // snapshot_last_write_time: ensure(decoder_.is_open(), "segment not open, call reopen_segment"); - // if (index_->last_write_time() < snapshot_last_write_time) { - // // Index has been created before the segment file, needs to be ignored (and rebuilt) as inconsistent - // const bool removed = std::filesystem::remove(path_.path()); - // ensure(removed, "Index: cannot remove index file"); - // close_index(); - // } } } diff --git a/silkworm/db/snapshots/path.cpp b/silkworm/db/snapshots/path.cpp index d16e3961c6..4f4ce916b2 100644 --- a/silkworm/db/snapshots/path.cpp +++ b/silkworm/db/snapshots/path.cpp @@ -89,22 +89,26 @@ std::optional SnapshotPath::parse(fs::path path) { return SnapshotPath{std::move(path), version, block_from, block_to, *type}; } -SnapshotPath SnapshotPath::from(const fs::path& dir, uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type) { - const auto filename = SnapshotPath::build_filename(version, block_from, block_to, type); +SnapshotPath SnapshotPath::from(const fs::path& dir, uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type, const char* ext) { + const auto filename = SnapshotPath::build_filename(version, block_from, block_to, type, ext); return SnapshotPath{dir / filename, version, block_from, block_to, type}; } -fs::path SnapshotPath::build_filename(uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type) { +fs::path SnapshotPath::build_filename(uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type, const char* ext) { std::string snapshot_type_name{magic_enum::enum_name(type)}; std::string filename{absl::StrFormat("v%d-%06d-%06d-%s%s", version, block_from / kFileNameBlockScaleFactor, block_to / kFileNameBlockScaleFactor, absl::StrReplaceAll(snapshot_type_name, {{"_", "-"}}), - kSegmentExtension)}; + ext)}; return fs::path{filename}; } +SnapshotPath SnapshotPath::related_path(SnapshotType type, const char* ext) const { + return SnapshotPath::from(path_.parent_path(), version_, block_from_, block_to_, type, ext); +} + SnapshotPath::SnapshotPath(fs::path path, uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type) : path_(std::move(path)), version_(version), block_from_(block_from), block_to_(block_to), type_(type) { ensure(block_to >= block_from, "SnapshotPath: block_to less than block_from"); diff --git a/silkworm/db/snapshots/path.hpp b/silkworm/db/snapshots/path.hpp index d4630ae6c9..30a260ad73 100644 --- a/silkworm/db/snapshots/path.hpp +++ b/silkworm/db/snapshots/path.hpp @@ -65,7 +65,8 @@ class SnapshotPath { uint8_t version, BlockNum block_from, BlockNum block_to, - SnapshotType type); + SnapshotType type, + const char* ext = kSegmentExtension); [[nodiscard]] std::string filename() const { return path_.filename().string(); } @@ -100,20 +101,27 @@ class SnapshotPath { } [[nodiscard]] SnapshotPath index_file() const { - return SnapshotPath(std::filesystem::path{path_}.replace_extension(kIdxExtension), version_, block_from_, block_to_, type_); + return related_path(type_, kIdxExtension); } [[nodiscard]] SnapshotPath index_file_for_type(SnapshotType type) const { - std::filesystem::path index_path{path_}; - index_path.replace_filename(build_filename(version_, block_from_, block_to_, type)); - return SnapshotPath(index_path.replace_extension(kIdxExtension), version_, block_from_, block_to_, type); + return related_path(type, kIdxExtension); + } + + [[nodiscard]] SnapshotPath snapshot_path_for_type(SnapshotType type) const { + return related_path(type, kSegmentExtension); + } + + [[nodiscard]] std::filesystem::file_time_type last_write_time() const { + return std::filesystem::last_write_time(path_); } friend bool operator<(const SnapshotPath& lhs, const SnapshotPath& rhs); friend bool operator==(const SnapshotPath&, const SnapshotPath&) = default; protected: - static std::filesystem::path build_filename(uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type); + static std::filesystem::path build_filename(uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type, const char* ext); + SnapshotPath related_path(SnapshotType type, const char* ext) const; explicit SnapshotPath(std::filesystem::path path, uint8_t version, BlockNum block_from, BlockNum block_to, SnapshotType type); diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index ed98b41d08..a6abb30d25 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -351,4 +352,26 @@ BlockNum SnapshotRepository::max_idx_available() { return result; } +bool is_stale_index_path(const SnapshotPath& index_path) { + SnapshotType snapshot_type = (index_path.type() == SnapshotType::transactions_to_block) + ? SnapshotType::transactions + : index_path.type(); + SnapshotPath snapshot_path = index_path.snapshot_path_for_type(snapshot_type); + return (index_path.last_write_time() < snapshot_path.last_write_time()); +} + +SnapshotPathList SnapshotRepository::stale_index_paths() const { + SnapshotPathList results; + auto all_files = this->get_idx_files(); + std::copy_if(all_files.begin(), all_files.end(), std::back_inserter(results), is_stale_index_path); + return results; +} + +void SnapshotRepository::remove_stale_indexes() const { + for (auto& path : stale_index_paths()) { + const bool removed = fs::remove(path.path()); + ensure(removed, [&]() { return "SnapshotRepository::remove_stale_indexes: cannot remove index file " + path.path().string(); }); + } +} + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index 2d1a48b912..22ffb653ef 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -134,6 +134,7 @@ class SnapshotRepository { [[nodiscard]] std::vector missing_block_ranges() const; [[nodiscard]] std::vector> missing_indexes() const; + void remove_stale_indexes() const; struct SnapshotAndIndex { const Snapshot& snapshot; @@ -178,6 +179,7 @@ class SnapshotRepository { [[nodiscard]] SnapshotPathList get_files(const std::string& ext) const; [[nodiscard]] BlockNum max_idx_available(); + SnapshotPathList stale_index_paths() const; //! The configuration settings for snapshots SnapshotSettings settings_; diff --git a/silkworm/db/snapshots/repository_test.cpp b/silkworm/db/snapshots/repository_test.cpp index 5d77bfb6ff..9244da2ef4 100644 --- a/silkworm/db/snapshots/repository_test.cpp +++ b/silkworm/db/snapshots/repository_test.cpp @@ -16,6 +16,9 @@ #include "repository.hpp" +#include +#include + #include #include @@ -238,4 +241,41 @@ TEST_CASE("SnapshotRepository::find_block_number", "[silkworm][node][snapshot]") // CHECK_FALSE(block_number.has_value()); // needs correct key check in index } +template +static auto move_last_write_time(const std::filesystem::path& p, const std::chrono::duration& d) { + const auto ftime = std::filesystem::last_write_time(p); + std::filesystem::last_write_time(p, ftime + d); + return std::filesystem::last_write_time(p) - ftime; +} + +TEST_CASE("SnapshotRepository::remove_stale_indexes", "[silkworm][node][snapshot][index]") { + using namespace std::chrono_literals; + + SetLogVerbosityGuard guard{log::Level::kNone}; + TemporaryDirectory tmp_dir; + SnapshotSettings settings{tmp_dir.path()}; + SnapshotRepository repository{settings}; + + // create a snapshot file + test::SampleHeaderSnapshotFile header_snapshot_file{tmp_dir.path()}; + test::SampleHeaderSnapshotPath header_snapshot_path{header_snapshot_file.path()}; + + // build an index + auto index_builder = HeaderIndex::make(header_snapshot_path); + REQUIRE_NOTHROW(index_builder.build()); + auto index_path = index_builder.path().path(); + + // the index is not stale + repository.remove_stale_indexes(); + CHECK(std::filesystem::exists(index_path)); + + // move the snapshot last write time 1 hour to the future to make its index "stale" + const auto last_write_time_diff = move_last_write_time(header_snapshot_path.path(), 1h); + CHECK(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); + + // the index is stale + repository.remove_stale_indexes(); + CHECK_FALSE(std::filesystem::exists(index_path)); +} + } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshots/snapshot_test.cpp b/silkworm/db/snapshots/snapshot_test.cpp index ae578e5439..ffdf23a60f 100644 --- a/silkworm/db/snapshots/snapshot_test.cpp +++ b/silkworm/db/snapshots/snapshot_test.cpp @@ -37,7 +37,6 @@ namespace silkworm::snapshots { -using namespace std::chrono_literals; namespace test = test_util; using silkworm::test_util::SetLogVerbosityGuard; @@ -61,13 +60,6 @@ class Snapshot_ForTest : public Snapshot { : Snapshot(SnapshotPath_ForTest{tmp_dir, block_from, block_to}) {} }; -template -static auto move_last_write_time(const std::filesystem::path& p, const std::chrono::duration& d) { - const auto ftime = std::filesystem::last_write_time(p); - std::filesystem::last_write_time(p, ftime + d); - return std::filesystem::last_write_time(p) - ftime; -} - TEST_CASE("Snapshot::Snapshot", "[silkworm][node][snapshot][snapshot]") { TemporaryDirectory tmp_dir; SECTION("valid") { @@ -452,33 +444,4 @@ TEST_CASE("slice_tx_payload", "[silkworm][node][snapshot]") { } } -TEST_CASE("HeaderSnapshot::reopen_index regeneration", "[silkworm][node][snapshot][index]") { - // SKIP("TODO: see Index::reopen_index"); - return; - - SetLogVerbosityGuard guard{log::Level::kNone}; - TemporaryDirectory tmp_dir; - test::SampleHeaderSnapshotFile sample_header_snapshot{tmp_dir.path()}; - test::SampleHeaderSnapshotPath header_snapshot_path{sample_header_snapshot.path()}; - - auto index_builder = HeaderIndex::make(header_snapshot_path); - REQUIRE_NOTHROW(index_builder.build()); - - Snapshot snapshot{header_snapshot_path}; - snapshot.reopen_segment(); - - Index index{snapshot.path().index_file()}; - index.reopen_index(); - REQUIRE(std::filesystem::exists(snapshot.path().index_file().path())); - - // Move 1 hour to the future the last write time for sample header snapshot - const auto last_write_time_diff = move_last_write_time(snapshot.path().path(), 1h); - REQUIRE(last_write_time_diff > std::filesystem::file_time_type::duration::zero()); - - // Verify that reopening the index removes the index file because it was created in the past - CHECK(std::filesystem::exists(snapshot.path().index_file().path())); - index.reopen_index(); - CHECK_FALSE(std::filesystem::exists(snapshot.path().index_file().path())); -} - } // namespace silkworm::snapshots From 0eaf8cd09f981c30067fd70c1e241bb68bc51fe9 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 24 Apr 2024 12:58:32 +0200 Subject: [PATCH 33/37] reopen_index: throw if path is not found --- .../db/snapshots/body_txs_amount_query.cpp | 2 +- silkworm/db/snapshots/index.cpp | 4 +--- silkworm/db/snapshots/repository.cpp | 3 ++- silkworm/db/snapshots/repository_test.cpp | 19 ++++++++++++------- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/silkworm/db/snapshots/body_txs_amount_query.cpp b/silkworm/db/snapshots/body_txs_amount_query.cpp index 52d471530f..ced200ec38 100644 --- a/silkworm/db/snapshots/body_txs_amount_query.cpp +++ b/silkworm/db/snapshots/body_txs_amount_query.cpp @@ -32,7 +32,7 @@ BodyTxsAmountQuery::Result BodyTxsAmountQuery::exec() { if (number == path.block_from()) { first_tx_id = body.base_txn_id; } - if (number == path.block_to() - 1) { + if (number >= path.block_to() - 1) { last_tx_id = body.base_txn_id; last_txs_amount = body.txn_count; } diff --git a/silkworm/db/snapshots/index.cpp b/silkworm/db/snapshots/index.cpp index 6fd62c83ae..a7d4f6669d 100644 --- a/silkworm/db/snapshots/index.cpp +++ b/silkworm/db/snapshots/index.cpp @@ -21,9 +21,7 @@ namespace silkworm::snapshots { void Index::reopen_index() { close_index(); - if (path_.exists()) { - index_ = std::make_unique(path_.path(), region_); - } + index_ = std::make_unique(path_.path(), region_); } void Index::close_index() { diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index a6abb30d25..7954719f63 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -304,7 +304,8 @@ const SnapshotBundle* SnapshotRepository::find_bundle(BlockNum number) const { for (const auto& entry : std::ranges::reverse_view(bundles_)) { const auto& bundle = entry.second; // We're looking for the segment containing the target block number in its block range - if ((bundle.block_from() <= number) && (number < bundle.block_to())) { + if (((bundle.block_from() <= number) && (number < bundle.block_to())) || + ((bundle.block_from() == number) && (bundle.block_from() == bundle.block_to()))) { return &bundle; } } diff --git a/silkworm/db/snapshots/repository_test.cpp b/silkworm/db/snapshots/repository_test.cpp index 9244da2ef4..03c281e704 100644 --- a/silkworm/db/snapshots/repository_test.cpp +++ b/silkworm/db/snapshots/repository_test.cpp @@ -102,18 +102,23 @@ TEST_CASE("SnapshotRepository::view", "[silkworm][node][snapshot]") { } SECTION("non-empty snapshots") { - test::HelloWorldSnapshotFile tmp_snapshot_1{tmp_dir.path(), "v1-014500-015000-headers.seg"}; - test::HelloWorldSnapshotFile tmp_snapshot_2{tmp_dir.path(), "v1-014500-015000-bodies.seg"}; - test::HelloWorldSnapshotFile tmp_snapshot_3{tmp_dir.path(), "v1-014500-015000-transactions.seg"}; + test::SampleHeaderSnapshotFile tmp_snapshot_1{tmp_dir.path()}; + test::SampleBodySnapshotFile tmp_snapshot_2{tmp_dir.path()}; + test::SampleTransactionSnapshotFile tmp_snapshot_3{tmp_dir.path()}; + + for (auto& index_builder : repository.missing_indexes()) { + index_builder->build(); + } + repository.reopen_folder(); CHECK(repository.view_header_segments(failing_walk) == 1); CHECK(repository.view_body_segments(failing_walk) == 1); CHECK(repository.view_tx_segments(failing_walk) == 1); - CHECK(repository.find_header_segment(14'500'000).has_value()); - CHECK(repository.find_body_segment(14'500'000).has_value()); - CHECK(repository.find_tx_segment(14'500'000).has_value()); + CHECK(repository.find_header_segment(1'500'000).has_value()); + CHECK(repository.find_body_segment(1'500'000).has_value()); + CHECK(repository.find_tx_segment(1'500'000).has_value()); CHECK(repository.view_header_segments(successful_walk) == 1); CHECK(repository.view_body_segments(successful_walk) == 1); @@ -198,7 +203,7 @@ TEST_CASE("SnapshotRepository::find_segment", "[silkworm][node][snapshot]") { CHECK_FALSE(repository.find_tx_segment(1'500'014)); } SECTION("greater than max_block_available") { - CHECK_FALSE(repository.find_body_segment(repository.max_block_available() + 1)); + CHECK_FALSE(repository.find_body_segment(repository.max_block_available() + 10)); } } From c5536931da2306284cbdb9ff803f36fa20f603fa Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 24 Apr 2024 14:33:50 +0200 Subject: [PATCH 34/37] make indexes mandatory in reopen --- silkworm/capi/silkworm.cpp | 7 -- silkworm/db/snapshots/path.cpp | 8 +- silkworm/db/snapshots/repository.cpp | 114 ++++++++++++++------------- silkworm/db/snapshots/repository.hpp | 11 ++- 4 files changed, 73 insertions(+), 67 deletions(-) diff --git a/silkworm/capi/silkworm.cpp b/silkworm/capi/silkworm.cpp index cbfc681453..170b28bbda 100644 --- a/silkworm/capi/silkworm.cpp +++ b/silkworm/capi/silkworm.cpp @@ -332,9 +332,7 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn return SILKWORM_INVALID_PATH; } snapshots::Snapshot header_snapshot{*headers_segment_path, make_region(hs.segment)}; - header_snapshot.reopen_segment(); snapshots::Index idx_header_hash{headers_segment_path->index_file(), make_region(hs.header_hash_index)}; - idx_header_hash.reopen_index(); const SilkwormBodiesSnapshot& bs = snapshot->bodies; if (!bs.segment.file_path || !bs.block_num_index.file_path) { @@ -345,9 +343,7 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn return SILKWORM_INVALID_PATH; } snapshots::Snapshot body_snapshot{*bodies_segment_path, make_region(bs.segment)}; - body_snapshot.reopen_segment(); snapshots::Index idx_body_number{bodies_segment_path->index_file(), make_region(bs.block_num_index)}; - idx_body_number.reopen_index(); const SilkwormTransactionsSnapshot& ts = snapshot->transactions; if (!ts.segment.file_path || !ts.tx_hash_index.file_path || !ts.tx_hash_2_block_index.file_path) { @@ -358,11 +354,8 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn return SILKWORM_INVALID_PATH; } snapshots::Snapshot txn_snapshot{*transactions_segment_path, make_region(ts.segment)}; - txn_snapshot.reopen_segment(); snapshots::Index idx_txn_hash{transactions_segment_path->index_file_for_type(snapshots::SnapshotType::transactions), make_region(ts.tx_hash_index)}; - idx_txn_hash.reopen_index(); snapshots::Index idx_txn_hash_2_block{transactions_segment_path->index_file_for_type(snapshots::SnapshotType::transactions_to_block), make_region(ts.tx_hash_2_block_index)}; - idx_txn_hash_2_block.reopen_index(); snapshots::SnapshotBundle bundle{ .header_snapshot = std::move(header_snapshot), diff --git a/silkworm/db/snapshots/path.cpp b/silkworm/db/snapshots/path.cpp index 4f4ce916b2..268ae176fb 100644 --- a/silkworm/db/snapshots/path.cpp +++ b/silkworm/db/snapshots/path.cpp @@ -37,7 +37,7 @@ std::optional SnapshotPath::parse(fs::path path) { const std::string filename_no_ext = path.stem().string(); // Expected stem format: -<6_digit_block_from>-<6_digit_block_to>- - const std::vector tokens = absl::StrSplit(filename_no_ext, '-'); + const std::vector tokens = absl::StrSplit(filename_no_ext, absl::MaxSplits('-', 3)); if (tokens.size() != 4) { return std::nullopt; } @@ -79,8 +79,10 @@ std::optional SnapshotPath::parse(fs::path path) { return std::nullopt; } - // Expected tag format: headers|bodies|transactions (parsing relies on magic_enum, so SnapshotType items must match exactly) - std::string_view tag_str{tag.data(), tag.size()}; + // Expected tag format: headers|bodies|transactions|transactions-to-block + // parsing relies on magic_enum, so SnapshotType items must match exactly + std::string tag_str{tag.data(), tag.size()}; + std::replace(tag_str.begin(), tag_str.end(), '-', '_'); const auto type = magic_enum::enum_cast(tag_str); if (!type) { return std::nullopt; diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index 7954719f63..52c63786bc 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -61,29 +61,40 @@ void SnapshotRepository::add_snapshot_bundle(SnapshotBundle bundle) { BlockNum block_from = bundle.block_from(); BlockNum block_to = bundle.block_to(); + bundle.reopen(); + bundles_.emplace(block_from, std::move(bundle)); segment_max_block_ = std::max(segment_max_block_, block_to - 1); idx_max_block_ = max_idx_available(); } -void SnapshotRepository::reopen_folder() { - SILK_INFO << "Reopen snapshot repository folder: " << settings_.repository_dir.string(); - SnapshotPathList segment_files = get_segment_files(); - reopen_list(segment_files); - SILK_INFO << "Total reopened snapshots: " << total_snapshots_count(); +void SnapshotBundle::reopen() { + for (auto& snapshot_ref : snapshots()) { + snapshot_ref.get().reopen_segment(); + ensure(!snapshot_ref.get().empty(), [&]() { + return "invalid empty snapshot " + snapshot_ref.get().fs_path().string(); + }); + } + for (auto& index_ref : indexes()) { + index_ref.get().reopen_index(); + } +} + +void SnapshotBundle::close() { + for (auto& index_ref : indexes()) { + index_ref.get().close_index(); + } + for (auto& snapshot_ref : snapshots()) { + snapshot_ref.get().close(); + } } void SnapshotRepository::close() { SILK_TRACE << "Close snapshot repository folder: " << settings_.repository_dir.string(); for (auto& entry : bundles_) { auto& bundle = entry.second; - for (auto& index_ref : bundle.indexes()) { - index_ref.get().close_index(); - } - for (auto& snapshot_ref : bundle.snapshots()) { - snapshot_ref.get().close(); - } + bundle.close(); } } @@ -228,65 +239,59 @@ std::vector> SnapshotRepository::missing_indexes() return missing_index_list; } -void SnapshotRepository::reopen_list(const SnapshotPathList& segment_files) { - std::map header_snapshot_paths; - std::map body_snapshot_paths; - std::map txn_snapshot_paths; - - for (const SnapshotPath& path : segment_files) { - switch (path.type()) { - case SnapshotType::headers: - header_snapshot_paths.emplace(path.block_from(), path); - break; - case SnapshotType::bodies: - body_snapshot_paths.emplace(path.block_from(), path); - break; - case SnapshotType::transactions: - txn_snapshot_paths.emplace(path.block_from(), path); - break; - case SnapshotType::transactions_to_block: - assert(false); - break; - } +void SnapshotRepository::reopen_folder() { + SILK_INFO << "Reopen snapshot repository folder: " << settings_.repository_dir.string(); + SnapshotPathList all_snapshot_paths = get_segment_files(); + SnapshotPathList all_index_paths = get_idx_files(); + + std::map>> groups; + + for (size_t i = 0; i < all_snapshot_paths.size(); i++) { + auto& path = all_snapshot_paths[i]; + auto& group = groups[path.block_from()][false]; + group[path.type()] = i; + } + + for (size_t i = 0; i < all_index_paths.size(); i++) { + auto& path = all_index_paths[i]; + auto& group = groups[path.block_from()][true]; + group[path.type()] = i; } BlockNum num = 0; - if (!header_snapshot_paths.empty()) { - num = header_snapshot_paths.begin()->first; + if (!groups.empty()) { + num = groups.begin()->first; } - while ( - header_snapshot_paths.contains(num) && - body_snapshot_paths.contains(num) && - txn_snapshot_paths.contains(num)) { + while (groups.contains(num) && + (groups[num][false].size() == SnapshotBundle::kSnapshotsCount) && + (groups[num][true].size() == SnapshotBundle::kIndexesCount)) { if (!bundles_.contains(num)) { + auto snapshot_path = [&](SnapshotType type) { + return all_snapshot_paths[groups[num][false][type]]; + }; + auto index_path = [&](SnapshotType type) { + return all_index_paths[groups[num][true][type]]; + }; + SnapshotBundle bundle{ - .header_snapshot = Snapshot(header_snapshot_paths.at(num)), - .idx_header_hash = Index(header_snapshot_paths.at(num).index_file()), + .header_snapshot = Snapshot(snapshot_path(SnapshotType::headers)), + .idx_header_hash = Index(index_path(SnapshotType::headers)), - .body_snapshot = Snapshot(body_snapshot_paths.at(num)), - .idx_body_number = Index(body_snapshot_paths.at(num).index_file()), + .body_snapshot = Snapshot(snapshot_path(SnapshotType::bodies)), + .idx_body_number = Index(index_path(SnapshotType::bodies)), - .txn_snapshot = Snapshot(txn_snapshot_paths.at(num)), - .idx_txn_hash = Index(txn_snapshot_paths.at(num).index_file_for_type(SnapshotType::transactions)), - .idx_txn_hash_2_block = Index(txn_snapshot_paths.at(num).index_file_for_type(SnapshotType::transactions_to_block)), + .txn_snapshot = Snapshot(snapshot_path(SnapshotType::transactions)), + .idx_txn_hash = Index(index_path(SnapshotType::transactions)), + .idx_txn_hash_2_block = Index(index_path(SnapshotType::transactions_to_block)), }; - for (auto& snapshot_ref : bundle.snapshots()) { - snapshot_ref.get().reopen_segment(); - ensure(!snapshot_ref.get().empty(), [&]() { - return "invalid empty snapshot " + snapshot_ref.get().fs_path().string(); - }); - } + bundle.reopen(); bundles_.emplace(num, std::move(bundle)); } auto& bundle = bundles_.at(num); - for (auto& index_ref : bundle.indexes()) { - index_ref.get().reopen_index(); - } - segment_max_block_ = std::max(segment_max_block_, bundle.block_to() - 1); if (num < bundle.block_to()) { @@ -297,6 +302,7 @@ void SnapshotRepository::reopen_list(const SnapshotPathList& segment_files) { } idx_max_block_ = max_idx_available(); + SILK_INFO << "Total reopened snapshots: " << total_snapshots_count(); } const SnapshotBundle* SnapshotRepository::find_bundle(BlockNum number) const { diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index 22ffb653ef..2161dd1459 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -51,7 +51,10 @@ struct SnapshotBundle { //! Index transaction_hash -> block_num Index idx_txn_hash_2_block; - std::array, 3> snapshots() { + static constexpr size_t kSnapshotsCount = 3; + static constexpr size_t kIndexesCount = 4; + + std::array, kSnapshotsCount> snapshots() { return { header_snapshot, body_snapshot, @@ -59,7 +62,7 @@ struct SnapshotBundle { }; } - std::array, 4> indexes() { + std::array, kIndexesCount> indexes() { return { idx_header_hash, idx_body_number, @@ -100,6 +103,9 @@ struct SnapshotBundle { // assume that all snapshots have the same block range, and use one of them BlockNum block_from() const { return header_snapshot.block_from(); } BlockNum block_to() const { return header_snapshot.block_to(); } + + void reopen(); + void close(); }; //! Read-only repository for all snapshot files. @@ -163,7 +169,6 @@ class SnapshotRepository { [[nodiscard]] std::optional find_block_number(Hash txn_hash) const; private: - void reopen_list(const SnapshotPathList& segment_files); std::size_t view_segments(SnapshotType type, const SnapshotWalker& walker); const SnapshotBundle* find_bundle(BlockNum number) const; std::optional find_segment(SnapshotType type, BlockNum number) const; From d09764a7b145df850fe5d918c782ebe3f138497a Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Wed, 24 Apr 2024 15:07:15 +0200 Subject: [PATCH 35/37] simplify count/max computations --- cmd/capi/execute.cpp | 6 ++-- silkworm/db/snapshot_sync.cpp | 4 +-- silkworm/db/snapshots/repository.cpp | 38 ++++++++--------------- silkworm/db/snapshots/repository.hpp | 22 ++++--------- silkworm/db/snapshots/repository_test.cpp | 6 ++-- 5 files changed, 25 insertions(+), 51 deletions(-) diff --git a/cmd/capi/execute.cpp b/cmd/capi/execute.cpp index 1728408a1f..db6ee2ce69 100644 --- a/cmd/capi/execute.cpp +++ b/cmd/capi/execute.cpp @@ -205,9 +205,9 @@ std::vector collect_all_snapshots(SnapshotRepository& sna return true; }); - ensure(headers_snapshot_sequence.size() == snapshot_repository.header_snapshots_count(), "invalid header snapshot count"); - ensure(bodies_snapshot_sequence.size() == snapshot_repository.body_snapshots_count(), "invalid body snapshot count"); - ensure(transactions_snapshot_sequence.size() == snapshot_repository.tx_snapshots_count(), "invalid tx snapshot count"); + ensure(headers_snapshot_sequence.size() == snapshot_repository.bundles_count(), "invalid header snapshot count"); + ensure(bodies_snapshot_sequence.size() == snapshot_repository.bundles_count(), "invalid body snapshot count"); + ensure(transactions_snapshot_sequence.size() == snapshot_repository.bundles_count(), "invalid tx snapshot count"); std::vector snapshot_sequence; snapshot_sequence.reserve(headers_snapshot_sequence.size()); diff --git a/silkworm/db/snapshot_sync.cpp b/silkworm/db/snapshot_sync.cpp index 5f82fa2cd4..69502426c2 100644 --- a/silkworm/db/snapshot_sync.cpp +++ b/silkworm/db/snapshot_sync.cpp @@ -73,9 +73,7 @@ bool SnapshotSync::download_and_index_snapshots(db::RWTxn& txn) { repository_->reopen_folder(); const auto max_block_available = repository_->max_block_available(); - SILK_INFO << "SnapshotSync: max block available: " << max_block_available - << " (segment max block: " << repository_->segment_max_block() - << ", idx max block: " << repository_->idx_max_block() << ")"; + SILK_INFO << "SnapshotSync: max block available: " << max_block_available; const auto snapshot_config = Config::lookup_known_config(config_.chain_id, snapshot_file_names); const auto configured_max_block_number = snapshot_config.max_block_number(); diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index 52c63786bc..f174c063d5 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -58,15 +58,8 @@ SnapshotRepository::~SnapshotRepository() { } void SnapshotRepository::add_snapshot_bundle(SnapshotBundle bundle) { - BlockNum block_from = bundle.block_from(); - BlockNum block_to = bundle.block_to(); - bundle.reopen(); - - bundles_.emplace(block_from, std::move(bundle)); - - segment_max_block_ = std::max(segment_max_block_, block_to - 1); - idx_max_block_ = max_idx_available(); + bundles_.emplace(bundle.block_from(), std::move(bundle)); } void SnapshotBundle::reopen() { @@ -98,6 +91,15 @@ void SnapshotRepository::close() { } } +BlockNum SnapshotRepository::max_block_available() const { + if (bundles_.empty()) + return 0; + + // a bundle with the max block range is last in the sorted bundles map + auto& bundle = bundles_.rbegin()->second; + return (bundle.block_from() < bundle.block_to()) ? bundle.block_to() - 1 : bundle.block_from(); +} + std::vector SnapshotRepository::missing_block_ranges() const { const auto ordered_segments = get_segment_files(); @@ -292,7 +294,6 @@ void SnapshotRepository::reopen_folder() { } auto& bundle = bundles_.at(num); - segment_max_block_ = std::max(segment_max_block_, bundle.block_to() - 1); if (num < bundle.block_to()) { num = bundle.block_to(); @@ -301,8 +302,9 @@ void SnapshotRepository::reopen_folder() { } } - idx_max_block_ = max_idx_available(); - SILK_INFO << "Total reopened snapshots: " << total_snapshots_count(); + SILK_INFO << "Total reopened bundles: " << bundles_count() + << " snapshots: " << total_snapshots_count() + << " indexes: " << total_indexes_count(); } const SnapshotBundle* SnapshotRepository::find_bundle(BlockNum number) const { @@ -345,20 +347,6 @@ SnapshotPathList SnapshotRepository::get_files(const std::string& ext) const { return snapshot_files; } -BlockNum SnapshotRepository::max_idx_available() { - BlockNum result = 0; - for (auto& entry : bundles_) { - auto& bundle = entry.second; - for (auto& index_ref : bundle.indexes()) { - if (!index_ref.get().is_open()) { - return result; - } - } - result = bundle.block_to() - 1; - } - return result; -} - bool is_stale_index_path(const SnapshotPath& index_path) { SnapshotType snapshot_type = (index_path.type() == SnapshotType::transactions_to_block) ? SnapshotType::transactions diff --git a/silkworm/db/snapshots/repository.hpp b/silkworm/db/snapshots/repository.hpp index 2161dd1459..6a025cd93e 100644 --- a/silkworm/db/snapshots/repository.hpp +++ b/silkworm/db/snapshots/repository.hpp @@ -127,18 +127,15 @@ class SnapshotRepository { void add_snapshot_bundle(SnapshotBundle bundle); - [[nodiscard]] std::size_t header_snapshots_count() const { return bundles_.size(); } - [[nodiscard]] std::size_t body_snapshots_count() const { return bundles_.size(); } - [[nodiscard]] std::size_t tx_snapshots_count() const { return bundles_.size(); } - [[nodiscard]] std::size_t total_snapshots_count() const { - return header_snapshots_count() + body_snapshots_count() + tx_snapshots_count(); - } + [[nodiscard]] std::size_t bundles_count() const { return bundles_.size(); } + [[nodiscard]] std::size_t total_snapshots_count() const { return bundles_count() * SnapshotBundle::kSnapshotsCount; } + [[nodiscard]] std::size_t total_indexes_count() const { return bundles_count() * SnapshotBundle::kIndexesCount; } - [[nodiscard]] BlockNum segment_max_block() const { return segment_max_block_; } - [[nodiscard]] BlockNum idx_max_block() const { return idx_max_block_; } - [[nodiscard]] BlockNum max_block_available() const { return std::min(segment_max_block_, idx_max_block_); } + //! All types of .seg and .idx files are available up to this block number + [[nodiscard]] BlockNum max_block_available() const; [[nodiscard]] std::vector missing_block_ranges() const; + [[nodiscard]] std::vector> missing_indexes() const; void remove_stale_indexes() const; @@ -183,18 +180,11 @@ class SnapshotRepository { [[nodiscard]] SnapshotPathList get_files(const std::string& ext) const; - [[nodiscard]] BlockNum max_idx_available(); SnapshotPathList stale_index_paths() const; //! The configuration settings for snapshots SnapshotSettings settings_; - //! All types of .seg files are available - up to this block number - BlockNum segment_max_block_{0}; - - //! All types of .idx files are available - up to this block number - BlockNum idx_max_block_{0}; - //! Full snapshot bundles ordered by block_from std::map bundles_; }; diff --git a/silkworm/db/snapshots/repository_test.cpp b/silkworm/db/snapshots/repository_test.cpp index 03c281e704..32a3f7db26 100644 --- a/silkworm/db/snapshots/repository_test.cpp +++ b/silkworm/db/snapshots/repository_test.cpp @@ -51,9 +51,7 @@ TEST_CASE("SnapshotRepository::reopen_folder.partial_bundle", "[silkworm][node][ SnapshotSettings settings{tmp_dir.path()}; SnapshotRepository repository{settings}; repository.reopen_folder(); - CHECK(repository.header_snapshots_count() == 0); - CHECK(repository.body_snapshots_count() == 0); - CHECK(repository.tx_snapshots_count() == 0); + CHECK(repository.bundles_count() == 0); CHECK(repository.max_block_available() == 0); } @@ -203,7 +201,7 @@ TEST_CASE("SnapshotRepository::find_segment", "[silkworm][node][snapshot]") { CHECK_FALSE(repository.find_tx_segment(1'500'014)); } SECTION("greater than max_block_available") { - CHECK_FALSE(repository.find_body_segment(repository.max_block_available() + 10)); + CHECK_FALSE(repository.find_body_segment(repository.max_block_available() + 1)); } } From 7d1b167c6b794f1f66abb1bc7467fb4f7d05a4b7 Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 25 Apr 2024 10:53:53 +0200 Subject: [PATCH 36/37] demote log::Info to Debug: ETL collector flushed file --- silkworm/db/etl/collector.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/silkworm/db/etl/collector.cpp b/silkworm/db/etl/collector.cpp index e614d3758d..895d167432 100644 --- a/silkworm/db/etl/collector.cpp +++ b/silkworm/db/etl/collector.cpp @@ -50,9 +50,16 @@ void Collector::flush_buffer() { file_providers_.back()->flush(buffer_); buffer_.clear(); const auto [_, duration]{sw.stop()}; - log::Info("ETL collector flushed file", {"path", std::string(file_providers_.back()->get_file_name()), - "size", human_size(file_providers_.back()->get_file_size()), - "in", StopWatch::format(duration)}); + log::Debug( + "ETL collector flushed file", + { + "path", + std::string(file_providers_.back()->get_file_name()), + "size", + human_size(file_providers_.back()->get_file_size()), + "in", + StopWatch::format(duration), + }); } } From 14c4ef9e463a9cb9562a499dcd42c89b37f8b6ae Mon Sep 17 00:00:00 2001 From: battlmonstr Date: Thu, 25 Apr 2024 11:19:35 +0200 Subject: [PATCH 37/37] rebuild a missing transactions_to_block index if transactions index exists --- silkworm/db/snapshots/repository.cpp | 52 +++++++++++++++------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/silkworm/db/snapshots/repository.cpp b/silkworm/db/snapshots/repository.cpp index f174c063d5..92556b3024 100644 --- a/silkworm/db/snapshots/repository.cpp +++ b/silkworm/db/snapshots/repository.cpp @@ -204,40 +204,44 @@ std::optional SnapshotRepository::find_block_number(Hash txn_hash) con std::vector> SnapshotRepository::missing_indexes() const { SnapshotPathList segment_files = get_segment_files(); std::vector> missing_index_list; - missing_index_list.reserve(segment_files.size()); + for (const auto& seg_file : segment_files) { - const auto index_file = seg_file.index_file(); - SILK_TRACE << "Segment file: " << seg_file.filename() << " has index: " << index_file.filename(); - if (!std::filesystem::exists(index_file.path())) { - std::shared_ptr index; - switch (seg_file.type()) { - case SnapshotType::headers: { - index = std::make_shared(HeaderIndex::make(seg_file)); + switch (seg_file.type()) { + case SnapshotType::headers: { + if (!fs::exists(seg_file.index_file().path())) { + auto index = std::make_shared(HeaderIndex::make(seg_file)); missing_index_list.push_back(index); - break; } - case SnapshotType::bodies: { - index = std::make_shared(BodyIndex::make(seg_file)); + break; + } + case SnapshotType::bodies: { + if (!fs::exists(seg_file.index_file().path())) { + auto index = std::make_shared(BodyIndex::make(seg_file)); missing_index_list.push_back(index); - break; } - case SnapshotType::transactions: { - auto bodies_segment_path = TransactionIndex::bodies_segment_path(seg_file); - if (std::find(segment_files.begin(), segment_files.end(), bodies_segment_path) != segment_files.end()) { - index = std::make_shared(TransactionIndex::make(bodies_segment_path, seg_file)); - missing_index_list.push_back(index); - - index = std::make_shared(TransactionToBlockIndex::make(bodies_segment_path, seg_file)); - missing_index_list.push_back(index); - } - break; + break; + } + case SnapshotType::transactions: { + auto bodies_segment_path = TransactionIndex::bodies_segment_path(seg_file); + bool has_bodies_segment = (std::find(segment_files.begin(), segment_files.end(), bodies_segment_path) != segment_files.end()); + + if (!fs::exists(seg_file.index_file().path()) && has_bodies_segment) { + auto index = std::make_shared(TransactionIndex::make(bodies_segment_path, seg_file)); + missing_index_list.push_back(index); } - default: { - SILKWORM_ASSERT(false); + + if (!fs::exists(seg_file.index_file_for_type(SnapshotType::transactions_to_block).path()) && has_bodies_segment) { + auto index = std::make_shared(TransactionToBlockIndex::make(bodies_segment_path, seg_file)); + missing_index_list.push_back(index); } + break; + } + default: { + SILKWORM_ASSERT(false); } } } + return missing_index_list; }