Skip to content

Commit

Permalink
mpt: Refactor path mismatch handling
Browse files Browse the repository at this point in the history
  • Loading branch information
chfast committed Jan 2, 2024
1 parent 808d79c commit ce0bca3
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 53 deletions.
84 changes: 32 additions & 52 deletions test/state/mpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,24 +52,10 @@ class Path
}

[[nodiscard]] static constexpr size_t capacity() noexcept { return max_size; }
[[nodiscard]] size_t size() const noexcept { return m_size; }
[[nodiscard]] bool empty() const noexcept { return m_size == 0; }
[[nodiscard]] uint8_t operator[](size_t index) const noexcept { return m_nibbles[index]; }
[[nodiscard]] const uint8_t* begin() const noexcept { return m_nibbles; }
[[nodiscard]] const uint8_t* end() const noexcept { return m_nibbles + m_size; }

[[nodiscard]] Path tail(size_t pos) const noexcept
{
assert(pos > 0 && pos <= m_size); // MPT never requests whole path copy (pos == 0).
return {begin() + pos, end()};
}

[[nodiscard]] Path head(size_t size) const noexcept
{
assert(size < m_size); // MPT never requests whole path copy (size == length).
return {begin(), begin() + size};
}

[[nodiscard]] bytes encode(Kind kind) const
{
const auto is_even = m_size % 2 == 0;
Expand Down Expand Up @@ -135,13 +121,6 @@ class MPTNode
std::move(br);
}

/// Finds the position at witch two paths differ.
static size_t mismatch(const Path& p1, const Path& p2) noexcept
{
assert(p1.size() <= p2.size());
return static_cast<size_t>(std::ranges::mismatch(p1, p2).in1 - p1.begin());
}

public:
MPTNode() = default;

Expand All @@ -162,56 +141,54 @@ void MPTNode::insert(const Path& path, bytes&& value) // NOLINT(misc-no-recursi
// in an existing branch node. Otherwise, we need to create new branch node
// (possibly with an adjusted extended node) and transform existing nodes around it.

const auto [this_idx, insert_idx] = std::ranges::mismatch(m_path, path);

// insert_idx is always valid if requirements are fulfilled:
// - if m_path is not shorter than path they must have mismatched nibbles,
// - if m_path is shorter and matches the path prefix
// then insert_idx points at path[m_path.size()].
assert(insert_idx != path.end() && "a key must not be a prefix of another key");

const Path common{m_path.begin(), this_idx};
const Path insert_tail{insert_idx + 1, path.end()};

switch (m_kind)
{
case Kind::branch:
{
assert(m_path.empty()); // Branch has no path.

auto& child = m_children[path[0]];
if (!child)
child = leaf(path.tail(1), std::move(value));
if (auto& child = m_children[*insert_idx]; child)
child->insert(insert_tail, std::move(value));
else
child->insert(path.tail(1), std::move(value));
child = leaf(insert_tail, std::move(value));
break;
}

case Kind::ext:
{
assert(!m_path.empty()); // Ext must have non-empty path.

const auto mismatch_pos = mismatch(m_path, path);

if (mismatch_pos == m_path.size()) // Paths match: go into the child.
return m_children[0]->insert(path.tail(mismatch_pos), std::move(value));

const auto orig_idx = m_path[mismatch_pos];
const auto new_idx = path[mismatch_pos];
assert(!m_path.empty()); // Ext must have non-empty path.
if (this_idx == m_path.end()) // Paths match: go into the child.
return m_children[0]->insert({insert_idx, path.end()}, std::move(value));

// The original branch node must be pushed down, possible extended with
// the adjusted extended node if the path split point is not directly at the branch node.
// Clang Analyzer bug: https://github.com/llvm/llvm-project/issues/47814
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
auto orig_branch = optional_ext(m_path.tail(mismatch_pos + 1), std::move(m_children[0]));
auto new_leaf = leaf(path.tail(mismatch_pos + 1), std::move(value));
*this = ext_branch(m_path.head(mismatch_pos), orig_idx, std::move(orig_branch), new_idx,
std::move(new_leaf));
auto this_branch = optional_ext({this_idx + 1, m_path.end()}, std::move(m_children[0]));
auto new_leaf = leaf(insert_tail, std::move(value));
*this =
ext_branch(common, *this_idx, std::move(this_branch), *insert_idx, std::move(new_leaf));
break;
}

case Kind::leaf:
{
assert(!m_path.empty()); // Leaf must have non-empty path.

const auto mismatch_pos = mismatch(m_path, path);
assert(mismatch_pos != m_path.size()); // Paths must be different.

const auto orig_idx = m_path[mismatch_pos];
const auto new_idx = path[mismatch_pos];
auto orig_leaf = leaf(m_path.tail(mismatch_pos + 1), std::move(m_value));
auto new_leaf = leaf(path.tail(mismatch_pos + 1), std::move(value));
*this = ext_branch(m_path.head(mismatch_pos), orig_idx, std::move(orig_leaf), new_idx,
std::move(new_leaf));
assert(!m_path.empty()); // Leaf must have non-empty path.
assert(this_idx != m_path.end()); // Paths must be different.
auto this_leaf = leaf({this_idx + 1, m_path.end()}, std::move(m_value));
auto new_leaf = leaf(insert_tail, std::move(value));
*this =
ext_branch(common, *this_idx, std::move(this_leaf), *insert_idx, std::move(new_leaf));
break;
}

Expand Down Expand Up @@ -271,10 +248,13 @@ MPT::~MPT() noexcept = default;

void MPT::insert(bytes_view key, bytes&& value)
{
assert(key.size() <= Path::capacity() / 2); // must fit the path impl. length limit
const Path path{key};

if (m_root == nullptr)
m_root = MPTNode::leaf(Path{key}, std::move(value));
m_root = MPTNode::leaf(path, std::move(value));
else
m_root->insert(Path{key}, std::move(value));
m_root->insert(path, std::move(value));
}

[[nodiscard]] hash256 MPT::hash() const
Expand Down
7 changes: 7 additions & 0 deletions test/state/mpt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ constexpr auto emptyMPTHash =

/// Insert-only Merkle Patricia Trie implementation for getting the root hash
/// out of (key, value) pairs.
///
/// Limitations:
/// 1. The keys must not be longer than 32 bytes.
/// 2. A key must not be a prefix of another key.
/// This comes from the spec (Yellow Paper Appendix D) - a branch node cannot store a value.
/// 3. Inserted values cannot be updated (by inserting the same key again).
/// 4. Inserted values cannot be erased.
class MPT
{
std::unique_ptr<class MPTNode> m_root;
Expand Down
90 changes: 89 additions & 1 deletion test/unittests/state_mpt_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <test/state/rlp.hpp>
#include <test/utils/utils.hpp>
#include <numeric>
#include <random>
#include <ranges>

using namespace evmone;
using namespace evmone::state;
Expand Down Expand Up @@ -85,6 +87,29 @@ TEST(state_mpt, branch_node_example1)
EXPECT_EQ(hex(trie.hash()), "1aaa6f712413b9a115730852323deb5f5d796c29151a60a1f55f41a25354cd26");
}

TEST(state_mpt, branch_node_of_3)
{
// A trie of single branch node and three leaf nodes with paths of length 2.
// The branch node has leaf nodes at positions [0], [1] and [2]. All leafs have path 0.
// {0:0 1:0 2:0}

MPT trie;
trie.insert("00"_hex, "X"_b);
trie.insert("10"_hex, "Y"_b);
trie.insert("20"_hex, "Z"_b);
EXPECT_EQ(hex(trie.hash()), "5c5154e8d108dcf8b9946c8d33730ec8178345ce9d36e6feed44f0134515482d");
}

TEST(state_mpt, leaf_node_with_empty_path)
{
// Both inserted leafs have empty path in the end.
// 0:{0:"X", 1:"Y"}
MPT trie;
trie.insert("00"_hex, "X"_b);
trie.insert("01"_hex, "Y"_b);
EXPECT_EQ(hex(trie.hash()), "0a923005d10fbd4e571655cec425db7c5091db03c33891224073a55d3abc2415");
}

TEST(state_mpt, extension_node_example1)
{
// A trie of an extension node followed by a branch node with
Expand Down Expand Up @@ -155,6 +180,69 @@ TEST(state_mpt, extension_node_example2)
EXPECT_EQ(hex(trie.hash()), "ac28c08fa3ff1d0d2cc9a6423abb7af3f4dcc37aa2210727e7d3009a9b4a34e8");
}

TEST(state_mpt, keys_length_desc)
{
const auto k127 = rlp::encode(127);
const auto k128 = rlp::encode(128);
EXPECT_EQ(k127, "7f"_hex);
EXPECT_EQ(k128, "8180"_hex);

MPT asc;
asc.insert(k127, {});
asc.insert(k128, {});
EXPECT_EQ(hex(asc.hash()), "2fb7f2dee94138d79248ea2545a3ba1ceecb39e2037ed4e1d571c4d8bfbfa535");

MPT desc;
desc.insert(k128, {});
desc.insert(k127, {});
EXPECT_EQ(hex(desc.hash()), "2fb7f2dee94138d79248ea2545a3ba1ceecb39e2037ed4e1d571c4d8bfbfa535");
}

// In Ethereum lists are merkalized by creating a trie with keys of RLP-encoded enumeration.
// Therefore, the keys are of different length but none of them is a prefix of another.
// These tests create a list of N elements with empty values.
// TODO: Check with go-ethereum implementation.
static constexpr uint64_t LONG_LIST_SIZE = 100'000;
static constexpr auto LONG_LIST_HASH =
0x70760bc8a0ebcc93601519d778576ae67a81731112df8d8c1518437a52f13520_bytes32;

TEST(state_mpt, long_list_asc)
{
uint64_t keys[LONG_LIST_SIZE];
std::iota(std::begin(keys), std::end(keys), uint64_t{0});

MPT trie;
for (const auto key : keys)
trie.insert(rlp::encode(key), {});
EXPECT_EQ(trie.hash(), LONG_LIST_HASH);
}

TEST(state_mpt, long_list_desc)
{
uint64_t keys[LONG_LIST_SIZE];
std::iota(std::begin(keys), std::end(keys), uint64_t{0});
std::ranges::reverse(keys);

MPT trie;
for (const auto key : keys)
trie.insert(rlp::encode(key), {});
EXPECT_EQ(trie.hash(), LONG_LIST_HASH);
}

TEST(state_mpt, long_list_random)
{
std::random_device rd;
std::mt19937_64 gen{rd()};
uint64_t keys[LONG_LIST_SIZE];
std::iota(std::begin(keys), std::end(keys), uint64_t{0});
std::ranges::shuffle(keys, gen);

MPT trie;
for (const auto key : keys)
trie.insert(rlp::encode(key), {});
EXPECT_EQ(trie.hash(), LONG_LIST_HASH);
}

TEST(state_mpt, trie_topologies)
{
struct KVH
Expand Down Expand Up @@ -356,7 +444,7 @@ TEST(state_mpt, trie_topologies)
// Check if all insert order permutations give the same final hash.
std::vector<size_t> order(test.size());
std::iota(order.begin(), order.end(), size_t{0});
while (std::next_permutation(order.begin(), order.end()))
while (std::ranges::next_permutation(order).found)
{
MPT trie;
for (size_t i = 0; i < test.size(); ++i)
Expand Down

0 comments on commit ce0bca3

Please sign in to comment.