Skip to content

Commit

Permalink
node: fix overflow in snapshot decompressor (#1281)
Browse files Browse the repository at this point in the history
  • Loading branch information
canepat authored Jun 24, 2023
1 parent c572425 commit dcc39cb
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 17 deletions.
33 changes: 18 additions & 15 deletions silkworm/node/huffman/decompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ uint64_t Decompressor::Iterator::next(Bytes& buffer) {

uint64_t word_length = next_position(true);
if (word_length == 0) {
throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_path().string()};
throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_filename()};
}
--word_length; // because when we create HT we do ++ (0 is terminator)
SILK_TRACE << "Iterator::next start_offset=" << start_offset << " word_length=" << word_length;
Expand All @@ -495,19 +495,19 @@ uint64_t Decompressor::Iterator::next(Bytes& buffer) {
// Track position into buffer where to insert part of the word
std::size_t buffer_position = buffer.size();
std::size_t last_uncovered = buffer.size();
if (buffer.length() + word_length > buffer.capacity()) {
buffer.reserve(buffer.length() + word_length);
}
buffer.resize(buffer.length() + word_length);
SILK_TRACE << "Iterator::next buffer resized to: " << buffer.length();

// Fill in the patterns
for (auto pos{next_position(false)}; pos != 0; pos = next_position(false)) {
// Positions where to insert are encoded relative to one another
// Positions where to insert patterns are encoded relative to one another
buffer_position += pos - 1;
const ByteView pattern = next_pattern();
SILK_TRACE << "Iterator::next data-from-patterns pos=" << pos << " pattern=" << to_hex(pattern);
pattern.copy(buffer.data() + buffer_position, pattern.size(), 0);
if (buffer_position > buffer.size()) {
return word_offset_;
}
pattern.copy(buffer.data() + buffer_position, std::min(pattern.size(), buffer.size() - buffer_position));
}
if (bit_position_ > 0) {
++word_offset_;
Expand All @@ -524,24 +524,24 @@ uint64_t Decompressor::Iterator::next(Bytes& buffer) {

// Fill in data which is not the patterns
for (auto pos{next_position(false)}; pos != 0; pos = next_position(false)) {
// Positions where to insert are encoded relative to one another
// Positions where to insert patterns are encoded relative to one another
buffer_position += pos - 1;
if (buffer_position > last_uncovered) {
uint64_t position_diff = buffer_position - last_uncovered;
std::size_t position_diff = buffer_position - last_uncovered;
SILK_TRACE << "Iterator::next other-data pos=" << pos << " last_uncovered=" << last_uncovered
<< " buffer_position=" << buffer_position << " position_diff=" << position_diff
<< " data=" << to_hex(ByteView{data().data() + post_loop_offset, position_diff});
data().copy(buffer.data() + last_uncovered, position_diff, post_loop_offset);
data().copy(buffer.data() + last_uncovered, std::min(position_diff, buffer.size() - last_uncovered), post_loop_offset);
post_loop_offset += position_diff;
}
last_uncovered = buffer_position + next_pattern().size();
}
if (word_length > last_uncovered) {
uint64_t position_diff = word_length - last_uncovered;
std::size_t position_diff = word_length - last_uncovered;
SILK_TRACE << "Iterator::next other-data last_uncovered=" << last_uncovered
<< " buffer_position=" << buffer_position << " position_diff=" << position_diff
<< " data=" << to_hex(ByteView{data().data() + post_loop_offset, position_diff});
data().copy(buffer.data() + last_uncovered, position_diff, post_loop_offset);
data().copy(buffer.data() + last_uncovered, std::min(position_diff, buffer.size() - last_uncovered), post_loop_offset);
post_loop_offset += position_diff;
}
word_offset_ = post_loop_offset;
Expand All @@ -553,7 +553,7 @@ uint64_t Decompressor::Iterator::next(Bytes& buffer) {
uint64_t Decompressor::Iterator::next_uncompressed(Bytes& buffer) {
uint64_t word_length = next_position(true);
if (word_length == 0) {
throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_path().string()};
throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_filename()};
}
--word_length; // because when we create HT we do ++ (0 is terminator)
if (word_length == 0) {
Expand All @@ -579,7 +579,7 @@ uint64_t Decompressor::Iterator::next_uncompressed(Bytes& buffer) {
uint64_t Decompressor::Iterator::skip() {
uint64_t word_length = next_position(true);
if (word_length == 0) {
throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_path().string()};
throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_filename()};
}
--word_length; // because when we create HT we do ++ (0 is terminator)
if (word_length == 0) {
Expand All @@ -597,7 +597,7 @@ uint64_t Decompressor::Iterator::skip() {
// Positions where to insert are encoded relative to one another
buffer_position += pos - 1;
if (word_length < buffer_position) {
throw std::logic_error{"likely index file is invalid: " + decoder_->compressed_path().string()};
throw std::logic_error{"likely index file is invalid: " + decoder_->compressed_filename()};
}
if (buffer_position > last_uncovered) {
uncovered_count += buffer_position - last_uncovered;
Expand All @@ -620,7 +620,7 @@ uint64_t Decompressor::Iterator::skip() {
uint64_t Decompressor::Iterator::skip_uncompressed() {
uint64_t word_length = next_position(true);
if (word_length == 0) {
throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_path().string()};
throw std::runtime_error{"invalid zero word length in: " + decoder_->compressed_filename()};
}
--word_length; // because when we create HT we do ++ (0 is terminator)
if (word_length == 0) {
Expand Down Expand Up @@ -682,8 +682,10 @@ uint64_t Decompressor::Iterator::next_position(bool clean) {
word_offset_++;
bit_position_ = 0;
}
SILK_TRACE << "Iterator::next_position word_offset_=" << word_offset_ << " bit_position_=" << int(bit_position_);
const PositionTable* table = decoder_->position_dict_.get();
if (table->bit_length() == 0) {
SILK_TRACE << "Iterator::next_position table->position(0)=" << table->position(0);
return table->position(0);
}
uint8_t length{0};
Expand All @@ -696,6 +698,7 @@ uint64_t Decompressor::Iterator::next_position(bool clean) {
bit_position_ += 9; // CHAR_BIT + 1
} else {
bit_position_ += length;
SILK_TRACE << "Iterator::next_position table->position(code)=" << table->position(code);
position = table->position(code);
}
word_offset_ += bit_position_ / CHAR_BIT;
Expand Down
3 changes: 3 additions & 0 deletions silkworm/node/huffman/decompressor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <absl/functional/function_ref.h>

#include <silkworm/core/common/base.hpp>
#include <silkworm/infra/common/log.hpp>
#include <silkworm/infra/common/memory_mapped_file.hpp>

namespace silkworm::huffman {
Expand Down Expand Up @@ -235,6 +236,8 @@ class Decompressor {

[[nodiscard]] const std::filesystem::path& compressed_path() const { return compressed_path_; }

[[nodiscard]] const std::string compressed_filename() const { return compressed_path_.filename().string(); }

[[nodiscard]] uint64_t words_count() const { return words_count_; }

[[nodiscard]] uint64_t empty_words_count() const { return empty_words_count_; }
Expand Down
13 changes: 11 additions & 2 deletions silkworm/node/snapshot/snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ bool Snapshot::for_each_item(const Snapshot::WordItemFunc& fn) {
}

std::optional<Snapshot::WordItem> Snapshot::next_item(uint64_t offset) const {
SILK_TRACE << "Snapshot::next_item offset: " << offset;
auto data_iterator = decoder_.make_iterator();
data_iterator.reset(offset);

Expand All @@ -71,7 +72,13 @@ std::optional<Snapshot::WordItem> Snapshot::next_item(uint64_t offset) const {
return item;
}
item = WordItem{};
item->offset = data_iterator.next(item->value);
try {
item->offset = data_iterator.next(item->value);
} catch (const std::runtime_error& re) {
SILK_WARN << "Snapshot::next_item invalid offset: " << offset << " what: " << re.what();
return {};
}

return item;
}

Expand Down Expand Up @@ -121,13 +128,15 @@ std::optional<BlockHeader> HeaderSnapshot::header_by_hash(const Hash& block_hash

// First, get the header ordinal position in snapshot by using block hash as MPHF index
const auto block_header_position = idx_header_hash_->lookup(block_hash);
SILK_TRACE << "HeaderSnapshot::header_by_hash block_hash: " << block_hash.to_hex() << " block_header_position: " << block_header_position;
// Then, get the header offset in snapshot by using ordinal lookup
const auto block_header_offset = idx_header_hash_->ordinal_lookup(block_header_position);
SILK_TRACE << "HeaderSnapshot::header_by_hash block_header_offset: " << block_header_offset;
// Finally, read the next header at specified offset
auto header = next_header(block_header_offset);
// We *must* ensure that the retrieved header hash matches because there is no way to know if key exists in MPHF
if (header and header->hash() != block_hash) {
return {};
header.reset();
}
return header;
}
Expand Down

0 comments on commit dcc39cb

Please sign in to comment.