diff --git a/silkworm/node/huffman/decompressor.cpp b/silkworm/node/huffman/decompressor.cpp index 15a9641d4f..dc74a794a0 100644 --- a/silkworm/node/huffman/decompressor.cpp +++ b/silkworm/node/huffman/decompressor.cpp @@ -480,7 +480,7 @@ uint64_t Decompressor::Iterator::next(Bytes& buffer) { uint64_t word_length = next_position(true); if (word_length == 0) { - throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_path().string()}; + throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_filename()}; } --word_length; // because when we create HT we do ++ (0 is terminator) SILK_TRACE << "Iterator::next start_offset=" << start_offset << " word_length=" << word_length; @@ -495,19 +495,19 @@ uint64_t Decompressor::Iterator::next(Bytes& buffer) { // Track position into buffer where to insert part of the word std::size_t buffer_position = buffer.size(); std::size_t last_uncovered = buffer.size(); - if (buffer.length() + word_length > buffer.capacity()) { - buffer.reserve(buffer.length() + word_length); - } buffer.resize(buffer.length() + word_length); SILK_TRACE << "Iterator::next buffer resized to: " << buffer.length(); // Fill in the patterns for (auto pos{next_position(false)}; pos != 0; pos = next_position(false)) { - // Positions where to insert are encoded relative to one another + // Positions where to insert patterns are encoded relative to one another buffer_position += pos - 1; const ByteView pattern = next_pattern(); SILK_TRACE << "Iterator::next data-from-patterns pos=" << pos << " pattern=" << to_hex(pattern); - pattern.copy(buffer.data() + buffer_position, pattern.size(), 0); + if (buffer_position > buffer.size()) { + return word_offset_; + } + pattern.copy(buffer.data() + buffer_position, std::min(pattern.size(), buffer.size() - buffer_position)); } if (bit_position_ > 0) { ++word_offset_; @@ -524,24 +524,24 @@ uint64_t Decompressor::Iterator::next(Bytes& buffer) { // Fill in data which is not the patterns for (auto pos{next_position(false)}; pos != 0; pos = next_position(false)) { - // Positions where to insert are encoded relative to one another + // Positions where to insert patterns are encoded relative to one another buffer_position += pos - 1; if (buffer_position > last_uncovered) { - uint64_t position_diff = buffer_position - last_uncovered; + std::size_t position_diff = buffer_position - last_uncovered; SILK_TRACE << "Iterator::next other-data pos=" << pos << " last_uncovered=" << last_uncovered << " buffer_position=" << buffer_position << " position_diff=" << position_diff << " data=" << to_hex(ByteView{data().data() + post_loop_offset, position_diff}); - data().copy(buffer.data() + last_uncovered, position_diff, post_loop_offset); + data().copy(buffer.data() + last_uncovered, std::min(position_diff, buffer.size() - last_uncovered), post_loop_offset); post_loop_offset += position_diff; } last_uncovered = buffer_position + next_pattern().size(); } if (word_length > last_uncovered) { - uint64_t position_diff = word_length - last_uncovered; + std::size_t position_diff = word_length - last_uncovered; SILK_TRACE << "Iterator::next other-data last_uncovered=" << last_uncovered << " buffer_position=" << buffer_position << " position_diff=" << position_diff << " data=" << to_hex(ByteView{data().data() + post_loop_offset, position_diff}); - data().copy(buffer.data() + last_uncovered, position_diff, post_loop_offset); + data().copy(buffer.data() + last_uncovered, std::min(position_diff, buffer.size() - last_uncovered), post_loop_offset); post_loop_offset += position_diff; } word_offset_ = post_loop_offset; @@ -553,7 +553,7 @@ uint64_t Decompressor::Iterator::next(Bytes& buffer) { uint64_t Decompressor::Iterator::next_uncompressed(Bytes& buffer) { uint64_t word_length = next_position(true); if (word_length == 0) { - throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_path().string()}; + throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_filename()}; } --word_length; // because when we create HT we do ++ (0 is terminator) if (word_length == 0) { @@ -579,7 +579,7 @@ uint64_t Decompressor::Iterator::next_uncompressed(Bytes& buffer) { uint64_t Decompressor::Iterator::skip() { uint64_t word_length = next_position(true); if (word_length == 0) { - throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_path().string()}; + throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_filename()}; } --word_length; // because when we create HT we do ++ (0 is terminator) if (word_length == 0) { @@ -597,7 +597,7 @@ uint64_t Decompressor::Iterator::skip() { // Positions where to insert are encoded relative to one another buffer_position += pos - 1; if (word_length < buffer_position) { - throw std::logic_error{"likely index file is invalid: " + decoder_->compressed_path().string()}; + throw std::logic_error{"likely index file is invalid: " + decoder_->compressed_filename()}; } if (buffer_position > last_uncovered) { uncovered_count += buffer_position - last_uncovered; @@ -620,7 +620,7 @@ uint64_t Decompressor::Iterator::skip() { uint64_t Decompressor::Iterator::skip_uncompressed() { uint64_t word_length = next_position(true); if (word_length == 0) { - throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_path().string()}; + throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_filename()}; } --word_length; // because when we create HT we do ++ (0 is terminator) if (word_length == 0) { @@ -682,8 +682,10 @@ uint64_t Decompressor::Iterator::next_position(bool clean) { word_offset_++; bit_position_ = 0; } + SILK_TRACE << "Iterator::next_position word_offset_=" << word_offset_ << " bit_position_=" << int(bit_position_); const PositionTable* table = decoder_->position_dict_.get(); if (table->bit_length() == 0) { + SILK_TRACE << "Iterator::next_position table->position(0)=" << table->position(0); return table->position(0); } uint8_t length{0}; @@ -696,6 +698,7 @@ uint64_t Decompressor::Iterator::next_position(bool clean) { bit_position_ += 9; // CHAR_BIT + 1 } else { bit_position_ += length; + SILK_TRACE << "Iterator::next_position table->position(code)=" << table->position(code); position = table->position(code); } word_offset_ += bit_position_ / CHAR_BIT; diff --git a/silkworm/node/huffman/decompressor.hpp b/silkworm/node/huffman/decompressor.hpp index 4e0f1f88ad..1be9eeee70 100644 --- a/silkworm/node/huffman/decompressor.hpp +++ b/silkworm/node/huffman/decompressor.hpp @@ -28,6 +28,7 @@ #include #include +#include #include namespace silkworm::huffman { @@ -235,6 +236,8 @@ class Decompressor { [[nodiscard]] const std::filesystem::path& compressed_path() const { return compressed_path_; } + [[nodiscard]] const std::string compressed_filename() const { return compressed_path_.filename().string(); } + [[nodiscard]] uint64_t words_count() const { return words_count_; } [[nodiscard]] uint64_t empty_words_count() const { return empty_words_count_; } diff --git a/silkworm/node/snapshot/snapshot.cpp b/silkworm/node/snapshot/snapshot.cpp index 4f37ce71fa..c54b85ad6a 100644 --- a/silkworm/node/snapshot/snapshot.cpp +++ b/silkworm/node/snapshot/snapshot.cpp @@ -63,6 +63,7 @@ bool Snapshot::for_each_item(const Snapshot::WordItemFunc& fn) { } std::optional Snapshot::next_item(uint64_t offset) const { + SILK_TRACE << "Snapshot::next_item offset: " << offset; auto data_iterator = decoder_.make_iterator(); data_iterator.reset(offset); @@ -71,7 +72,13 @@ std::optional Snapshot::next_item(uint64_t offset) const { return item; } item = WordItem{}; - item->offset = data_iterator.next(item->value); + try { + item->offset = data_iterator.next(item->value); + } catch (const std::runtime_error& re) { + SILK_WARN << "Snapshot::next_item invalid offset: " << offset << " what: " << re.what(); + return {}; + } + return item; } @@ -121,13 +128,15 @@ std::optional HeaderSnapshot::header_by_hash(const Hash& block_hash // First, get the header ordinal position in snapshot by using block hash as MPHF index const auto block_header_position = idx_header_hash_->lookup(block_hash); + SILK_TRACE << "HeaderSnapshot::header_by_hash block_hash: " << block_hash.to_hex() << " block_header_position: " << block_header_position; // Then, get the header offset in snapshot by using ordinal lookup const auto block_header_offset = idx_header_hash_->ordinal_lookup(block_header_position); + SILK_TRACE << "HeaderSnapshot::header_by_hash block_header_offset: " << block_header_offset; // Finally, read the next header at specified offset auto header = next_header(block_header_offset); // We *must* ensure that the retrieved header hash matches because there is no way to know if key exists in MPHF if (header and header->hash() != block_hash) { - return {}; + header.reset(); } return header; }