Skip to content

Commit

Permalink
mpt: Remove explicit extended node kind
Browse files Browse the repository at this point in the history
Notice that the formally specified extended node in
the Merkle Patricia Trie always leads to a branch node.
Therefore, we can treat branch nodes as having potentially non-empty
"extended path" and remove the explicit extended node kind.
This significantly simplifies the implementation.
  • Loading branch information
chfast committed Aug 15, 2024
1 parent e4bcf7c commit e7601b2
Showing 1 changed file with 66 additions and 126 deletions.
192 changes: 66 additions & 126 deletions test/state/mpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ namespace evmone::state
namespace
{
/// The MPT node kind.
enum class Kind : uint8_t
enum class Kind : bool
{
leaf,
ext,
branch
};

Expand Down Expand Up @@ -59,80 +58,44 @@ class Path

[[nodiscard]] bytes encode(Kind kind) const
{
assert(kind == Kind::leaf || kind == Kind::ext);
if (kind == Kind::branch && m_size == 0)
return {};

const auto kind_prefix = kind == Kind::leaf ? 0x20 : 0x00;
const auto has_odd_size = m_size % 2 != 0;
const auto nibble_prefix = has_odd_size ? (0x10 | m_nibbles[0]) : 0x00;

bytes encoded{static_cast<uint8_t>(kind_prefix | nibble_prefix)};
for (auto i = size_t{has_odd_size}; i < m_size; i += 2)
encoded.push_back(static_cast<uint8_t>((m_nibbles[i] << 4) | m_nibbles[i + 1]));
return encoded;
return rlp::encode(encoded);
}
};
} // namespace

/// The MPT Node.
///
/// The implementation is based on StackTrie from go-ethereum.
// TODO(clang-tidy-17): bug https://github.com/llvm/llvm-project/issues/50006
// NOLINTNEXTLINE(bugprone-reserved-identifier)
class MPTNode
{
static constexpr size_t num_children = 16;

Kind m_kind = Kind::leaf;
Path m_path;
bytes m_value;
std::unique_ptr<MPTNode> m_children[num_children];
std::unique_ptr<MPTNode> m_children[16];

explicit MPTNode(Kind kind, const Path& path = {}, bytes&& value = {}) noexcept
: m_kind{kind}, m_path{path}, m_value{std::move(value)}
{}

/// Creates an extended node.
static MPTNode ext(const Path& path, std::unique_ptr<MPTNode> child) noexcept
{
assert(child->m_kind == Kind::branch);
MPTNode node{Kind::ext, path};
node.m_children[0] = std::move(child);
return node;
}

/// Optionally wraps the child node with newly created extended node in case
/// the provided path is not empty.
static std::unique_ptr<MPTNode> optional_ext(
const Path& path, std::unique_ptr<MPTNode> child) noexcept
{
return (!path.empty()) ? std::make_unique<MPTNode>(ext(path, std::move(child))) :
std::move(child);
}

/// Creates a branch node out of two children and optionally extends it with an extended
/// node in case the path is not empty.
static MPTNode ext_branch(const Path& path, size_t idx1, std::unique_ptr<MPTNode> child1,
size_t idx2, std::unique_ptr<MPTNode> child2) noexcept
/// Creates a branch node out of two children and an optional extended path.
MPTNode(const Path& path, size_t idx1, MPTNode&& child1, size_t idx2, MPTNode&& child2) noexcept
: m_kind{Kind::branch}, m_path{path}
{
assert(idx1 != idx2);
assert(idx1 < num_children);
assert(idx2 < num_children);

MPTNode br{Kind::branch};
br.m_children[idx1] = std::move(child1);
br.m_children[idx2] = std::move(child2);
assert(idx1 < std::size(m_children));
assert(idx2 < std::size(m_children));

return (!path.empty()) ? ext(path, std::make_unique<MPTNode>(std::move(br))) :
std::move(br);
m_children[idx1] = std::make_unique<MPTNode>(std::move(child1));
m_children[idx2] = std::make_unique<MPTNode>(std::move(child2));
}

public:
MPTNode() = default;

/// Creates new leaf node.
static std::unique_ptr<MPTNode> leaf(const Path& path, bytes&& value) noexcept
{
return std::make_unique<MPTNode>(MPTNode{Kind::leaf, path, std::move(value)});
}
MPTNode(const Path& path, bytes&& value) noexcept : m_path{path}, m_value{std::move(value)} {}

void insert(const Path& path, bytes&& value);

Expand All @@ -143,108 +106,85 @@ void MPTNode::insert(const Path& path, bytes&& value) // NOLINT(misc-no-recursi
{
// The insertion is all about branch nodes. In happy case we will find an empty slot
// in an existing branch node. Otherwise, we need to create new branch node
// (possibly with an adjusted extended node) and transform existing nodes around it.

const auto [this_idx, insert_idx] = std::ranges::mismatch(m_path, path);

// insert_idx is always valid if requirements are fulfilled:
// (possibly with an extended path) and transform existing nodes around it.

// Let's consider the following branch node with extended path "ab".
//
// |
// |a ↙③
// |b
// |
// [a|b|c|d]
// | ②
//
//
// If the insert path prefix matches the "ab" we insert to one of the children:
// - e.g. for "aba" insert into existing child ①,
// - e.g. for "abd" create new leaf node ②.
// If the insert path prefix doesn't match "ab" we split the extended path by
// a new branch node of the "this" branch node and a new leaf node.
// E.g. for "acd" insert new branch node "a" at ③ with:
// - at "b" : the "this" branch node with empty extended path "",
// - at "c" : the new leaf node with path "d".

const auto [this_idx_it, insert_idx_it] = std::ranges::mismatch(m_path, path);

// insert_idx_it is always valid if requirements are fulfilled:
// - if m_path is not shorter than path they must have mismatched nibbles,
// given the requirement of key uniqueness and not being a prefix if existing key,
// - if m_path is shorter and matches the path prefix
// then insert_idx points at path[m_path.size()].
assert(insert_idx != path.end() && "a key must not be a prefix of another key");
// then insert_idx_it points at path[m_path.size()].
assert(insert_idx_it != path.end() && "a key must not be a prefix of another key");
const Path insert_tail{insert_idx_it + 1, path.end()};

const Path common{m_path.begin(), this_idx};
const Path insert_tail{insert_idx + 1, path.end()};

switch (m_kind)
if (m_kind == Kind::branch && this_idx_it == m_path.end()) // Paths match: go into the child.
{
case Kind::branch:
{
assert(m_path.empty()); // Branch has no path.
if (auto& child = m_children[*insert_idx]; child)
child->insert(insert_tail, std::move(value));
if (auto& child = m_children[*insert_idx_it]; child)
child->insert(insert_tail, std::move(value)); //
else
child = leaf(insert_tail, std::move(value));
break;
child = std::make_unique<MPTNode>(insert_tail, std::move(value)); //
}

case Kind::ext:
{
assert(!m_path.empty()); // Ext must have non-empty path.
if (this_idx == m_path.end()) // Paths match: go into the child.
return m_children[0]->insert({insert_idx, path.end()}, std::move(value));

// The original branch node must be pushed down, possible extended with
// the adjusted extended node if the path split point is not directly at the branch node.
// Clang Analyzer bug: https://github.com/llvm/llvm-project/issues/47814
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
auto this_branch = optional_ext({this_idx + 1, m_path.end()}, std::move(m_children[0]));
auto new_leaf = leaf(insert_tail, std::move(value));
*this =
ext_branch(common, *this_idx, std::move(this_branch), *insert_idx, std::move(new_leaf));
break;
}

case Kind::leaf:
else // ③: Shorten path of this node and insert it to the new branch node.
{
assert(!m_path.empty()); // Leaf must have non-empty path.
assert(this_idx != m_path.end() && "a key must be unique");
auto this_leaf = leaf({this_idx + 1, m_path.end()}, std::move(m_value));
auto new_leaf = leaf(insert_tail, std::move(value));
*this =
ext_branch(common, *this_idx, std::move(this_leaf), *insert_idx, std::move(new_leaf));
break;
}

default:
assert(false);
const auto this_idx = *this_idx_it;
const Path extended_path{m_path.begin(), this_idx_it};
const Path this_node_tail{this_idx_it + 1, m_path.end()};
auto this_node = std::move(*this); // invalidates this_idx_it
this_node.m_path = this_node_tail;
*this = MPTNode(extended_path, this_idx, std::move(this_node), *insert_idx_it,
MPTNode{insert_tail, std::move(value)});
}
}

/// Encodes a node and optionally hashes the encoded bytes
/// if their length exceeds the specified threshold.
static bytes encode_child(const MPTNode& child) noexcept // NOLINT(misc-no-recursion)
{
if (auto e = child.encode(); e.size() < 32)
return e; // "short" node
else
return rlp::encode(keccak256(e));
}

bytes MPTNode::encode() const // NOLINT(misc-no-recursion)
{
bytes encoded;
static constexpr auto shorten = [](bytes&& b) {
return (b.size() < 32) ? std::move(b) : rlp::encode(keccak256(b));
};

bytes encoded; // the encoded content of the node without its path
switch (m_kind)
{
case Kind::leaf:
{
encoded = rlp::encode(m_path.encode(m_kind)) + rlp::encode(m_value);
encoded = rlp::encode(m_value);
break;
}
case Kind::branch:
{
assert(m_path.empty());
static constexpr uint8_t empty = 0x80; // encoded empty child

for (const auto& child : m_children)
{
if (child)
encoded += encode_child(*child);
else
encoded += empty;
}
encoded += child ? shorten(child->encode()) : bytes{empty};
encoded += empty; // end indicator
break;
}
case Kind::ext:
{
encoded = rlp::encode(m_path.encode(m_kind)) + encode_child(*m_children[0]);

if (!m_path.empty()) // extended node
encoded = shorten(rlp::internal::wrap_list(encoded));
break;
}
}

return rlp::internal::wrap_list(encoded);
return rlp::internal::wrap_list(m_path.encode(m_kind) + encoded);
}


Expand All @@ -253,11 +193,11 @@ MPT::~MPT() noexcept = default;

void MPT::insert(bytes_view key, bytes&& value)
{
assert(key.size() <= Path::capacity() / 2); // must fit the path impl. length limit
assert(key.size() <= Path::capacity() / 2); // must fit the path implementation length limit
const Path path{key};

if (m_root == nullptr)
m_root = MPTNode::leaf(path, std::move(value));
m_root = std::make_unique<MPTNode>(path, std::move(value));
else
m_root->insert(path, std::move(value));
}
Expand Down

0 comments on commit e7601b2

Please sign in to comment.