From ce0bca36ce363514ecc77f4f63caa009c7bc6411 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Tue, 19 Dec 2023 00:47:51 +0100 Subject: [PATCH] mpt: Refactor path mismatch handling --- test/state/mpt.cpp | 84 +++++++++++------------------ test/state/mpt.hpp | 7 +++ test/unittests/state_mpt_test.cpp | 90 ++++++++++++++++++++++++++++++- 3 files changed, 128 insertions(+), 53 deletions(-) diff --git a/test/state/mpt.cpp b/test/state/mpt.cpp index 2bc74625e3..e3381acf81 100644 --- a/test/state/mpt.cpp +++ b/test/state/mpt.cpp @@ -52,24 +52,10 @@ class Path } [[nodiscard]] static constexpr size_t capacity() noexcept { return max_size; } - [[nodiscard]] size_t size() const noexcept { return m_size; } [[nodiscard]] bool empty() const noexcept { return m_size == 0; } - [[nodiscard]] uint8_t operator[](size_t index) const noexcept { return m_nibbles[index]; } [[nodiscard]] const uint8_t* begin() const noexcept { return m_nibbles; } [[nodiscard]] const uint8_t* end() const noexcept { return m_nibbles + m_size; } - [[nodiscard]] Path tail(size_t pos) const noexcept - { - assert(pos > 0 && pos <= m_size); // MPT never requests whole path copy (pos == 0). - return {begin() + pos, end()}; - } - - [[nodiscard]] Path head(size_t size) const noexcept - { - assert(size < m_size); // MPT never requests whole path copy (size == length). - return {begin(), begin() + size}; - } - [[nodiscard]] bytes encode(Kind kind) const { const auto is_even = m_size % 2 == 0; @@ -135,13 +121,6 @@ class MPTNode std::move(br); } - /// Finds the position at witch two paths differ. - static size_t mismatch(const Path& p1, const Path& p2) noexcept - { - assert(p1.size() <= p2.size()); - return static_cast(std::ranges::mismatch(p1, p2).in1 - p1.begin()); - } - public: MPTNode() = default; @@ -162,56 +141,54 @@ void MPTNode::insert(const Path& path, bytes&& value) // NOLINT(misc-no-recursi // in an existing branch node. Otherwise, we need to create new branch node // (possibly with an adjusted extended node) and transform existing nodes around it. + const auto [this_idx, insert_idx] = std::ranges::mismatch(m_path, path); + + // insert_idx is always valid if requirements are fulfilled: + // - if m_path is not shorter than path they must have mismatched nibbles, + // - if m_path is shorter and matches the path prefix + // then insert_idx points at path[m_path.size()]. + assert(insert_idx != path.end() && "a key must not be a prefix of another key"); + + const Path common{m_path.begin(), this_idx}; + const Path insert_tail{insert_idx + 1, path.end()}; + switch (m_kind) { case Kind::branch: { assert(m_path.empty()); // Branch has no path. - - auto& child = m_children[path[0]]; - if (!child) - child = leaf(path.tail(1), std::move(value)); + if (auto& child = m_children[*insert_idx]; child) + child->insert(insert_tail, std::move(value)); else - child->insert(path.tail(1), std::move(value)); + child = leaf(insert_tail, std::move(value)); break; } case Kind::ext: { - assert(!m_path.empty()); // Ext must have non-empty path. - - const auto mismatch_pos = mismatch(m_path, path); - - if (mismatch_pos == m_path.size()) // Paths match: go into the child. - return m_children[0]->insert(path.tail(mismatch_pos), std::move(value)); - - const auto orig_idx = m_path[mismatch_pos]; - const auto new_idx = path[mismatch_pos]; + assert(!m_path.empty()); // Ext must have non-empty path. + if (this_idx == m_path.end()) // Paths match: go into the child. + return m_children[0]->insert({insert_idx, path.end()}, std::move(value)); // The original branch node must be pushed down, possible extended with // the adjusted extended node if the path split point is not directly at the branch node. // Clang Analyzer bug: https://github.com/llvm/llvm-project/issues/47814 // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) - auto orig_branch = optional_ext(m_path.tail(mismatch_pos + 1), std::move(m_children[0])); - auto new_leaf = leaf(path.tail(mismatch_pos + 1), std::move(value)); - *this = ext_branch(m_path.head(mismatch_pos), orig_idx, std::move(orig_branch), new_idx, - std::move(new_leaf)); + auto this_branch = optional_ext({this_idx + 1, m_path.end()}, std::move(m_children[0])); + auto new_leaf = leaf(insert_tail, std::move(value)); + *this = + ext_branch(common, *this_idx, std::move(this_branch), *insert_idx, std::move(new_leaf)); break; } case Kind::leaf: { - assert(!m_path.empty()); // Leaf must have non-empty path. - - const auto mismatch_pos = mismatch(m_path, path); - assert(mismatch_pos != m_path.size()); // Paths must be different. - - const auto orig_idx = m_path[mismatch_pos]; - const auto new_idx = path[mismatch_pos]; - auto orig_leaf = leaf(m_path.tail(mismatch_pos + 1), std::move(m_value)); - auto new_leaf = leaf(path.tail(mismatch_pos + 1), std::move(value)); - *this = ext_branch(m_path.head(mismatch_pos), orig_idx, std::move(orig_leaf), new_idx, - std::move(new_leaf)); + assert(!m_path.empty()); // Leaf must have non-empty path. + assert(this_idx != m_path.end()); // Paths must be different. + auto this_leaf = leaf({this_idx + 1, m_path.end()}, std::move(m_value)); + auto new_leaf = leaf(insert_tail, std::move(value)); + *this = + ext_branch(common, *this_idx, std::move(this_leaf), *insert_idx, std::move(new_leaf)); break; } @@ -271,10 +248,13 @@ MPT::~MPT() noexcept = default; void MPT::insert(bytes_view key, bytes&& value) { + assert(key.size() <= Path::capacity() / 2); // must fit the path impl. length limit + const Path path{key}; + if (m_root == nullptr) - m_root = MPTNode::leaf(Path{key}, std::move(value)); + m_root = MPTNode::leaf(path, std::move(value)); else - m_root->insert(Path{key}, std::move(value)); + m_root->insert(path, std::move(value)); } [[nodiscard]] hash256 MPT::hash() const diff --git a/test/state/mpt.hpp b/test/state/mpt.hpp index 240c31d78a..c718c7c485 100644 --- a/test/state/mpt.hpp +++ b/test/state/mpt.hpp @@ -13,6 +13,13 @@ constexpr auto emptyMPTHash = /// Insert-only Merkle Patricia Trie implementation for getting the root hash /// out of (key, value) pairs. +/// +/// Limitations: +/// 1. The keys must not be longer than 32 bytes. +/// 2. A key must not be a prefix of another key. +/// This comes from the spec (Yellow Paper Appendix D) - a branch node cannot store a value. +/// 3. Inserted values cannot be updated (by inserting the same key again). +/// 4. Inserted values cannot be erased. class MPT { std::unique_ptr m_root; diff --git a/test/unittests/state_mpt_test.cpp b/test/unittests/state_mpt_test.cpp index 3ac824c939..d11dfe17fb 100644 --- a/test/unittests/state_mpt_test.cpp +++ b/test/unittests/state_mpt_test.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include using namespace evmone; using namespace evmone::state; @@ -85,6 +87,29 @@ TEST(state_mpt, branch_node_example1) EXPECT_EQ(hex(trie.hash()), "1aaa6f712413b9a115730852323deb5f5d796c29151a60a1f55f41a25354cd26"); } +TEST(state_mpt, branch_node_of_3) +{ + // A trie of single branch node and three leaf nodes with paths of length 2. + // The branch node has leaf nodes at positions [0], [1] and [2]. All leafs have path 0. + // {0:0 1:0 2:0} + + MPT trie; + trie.insert("00"_hex, "X"_b); + trie.insert("10"_hex, "Y"_b); + trie.insert("20"_hex, "Z"_b); + EXPECT_EQ(hex(trie.hash()), "5c5154e8d108dcf8b9946c8d33730ec8178345ce9d36e6feed44f0134515482d"); +} + +TEST(state_mpt, leaf_node_with_empty_path) +{ + // Both inserted leafs have empty path in the end. + // 0:{0:"X", 1:"Y"} + MPT trie; + trie.insert("00"_hex, "X"_b); + trie.insert("01"_hex, "Y"_b); + EXPECT_EQ(hex(trie.hash()), "0a923005d10fbd4e571655cec425db7c5091db03c33891224073a55d3abc2415"); +} + TEST(state_mpt, extension_node_example1) { // A trie of an extension node followed by a branch node with @@ -155,6 +180,69 @@ TEST(state_mpt, extension_node_example2) EXPECT_EQ(hex(trie.hash()), "ac28c08fa3ff1d0d2cc9a6423abb7af3f4dcc37aa2210727e7d3009a9b4a34e8"); } +TEST(state_mpt, keys_length_desc) +{ + const auto k127 = rlp::encode(127); + const auto k128 = rlp::encode(128); + EXPECT_EQ(k127, "7f"_hex); + EXPECT_EQ(k128, "8180"_hex); + + MPT asc; + asc.insert(k127, {}); + asc.insert(k128, {}); + EXPECT_EQ(hex(asc.hash()), "2fb7f2dee94138d79248ea2545a3ba1ceecb39e2037ed4e1d571c4d8bfbfa535"); + + MPT desc; + desc.insert(k128, {}); + desc.insert(k127, {}); + EXPECT_EQ(hex(desc.hash()), "2fb7f2dee94138d79248ea2545a3ba1ceecb39e2037ed4e1d571c4d8bfbfa535"); +} + +// In Ethereum lists are merkalized by creating a trie with keys of RLP-encoded enumeration. +// Therefore, the keys are of different length but none of them is a prefix of another. +// These tests create a list of N elements with empty values. +// TODO: Check with go-ethereum implementation. +static constexpr uint64_t LONG_LIST_SIZE = 100'000; +static constexpr auto LONG_LIST_HASH = + 0x70760bc8a0ebcc93601519d778576ae67a81731112df8d8c1518437a52f13520_bytes32; + +TEST(state_mpt, long_list_asc) +{ + uint64_t keys[LONG_LIST_SIZE]; + std::iota(std::begin(keys), std::end(keys), uint64_t{0}); + + MPT trie; + for (const auto key : keys) + trie.insert(rlp::encode(key), {}); + EXPECT_EQ(trie.hash(), LONG_LIST_HASH); +} + +TEST(state_mpt, long_list_desc) +{ + uint64_t keys[LONG_LIST_SIZE]; + std::iota(std::begin(keys), std::end(keys), uint64_t{0}); + std::ranges::reverse(keys); + + MPT trie; + for (const auto key : keys) + trie.insert(rlp::encode(key), {}); + EXPECT_EQ(trie.hash(), LONG_LIST_HASH); +} + +TEST(state_mpt, long_list_random) +{ + std::random_device rd; + std::mt19937_64 gen{rd()}; + uint64_t keys[LONG_LIST_SIZE]; + std::iota(std::begin(keys), std::end(keys), uint64_t{0}); + std::ranges::shuffle(keys, gen); + + MPT trie; + for (const auto key : keys) + trie.insert(rlp::encode(key), {}); + EXPECT_EQ(trie.hash(), LONG_LIST_HASH); +} + TEST(state_mpt, trie_topologies) { struct KVH @@ -356,7 +444,7 @@ TEST(state_mpt, trie_topologies) // Check if all insert order permutations give the same final hash. std::vector order(test.size()); std::iota(order.begin(), order.end(), size_t{0}); - while (std::next_permutation(order.begin(), order.end())) + while (std::ranges::next_permutation(order).found) { MPT trie; for (size_t i = 0; i < test.size(); ++i)