Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: SMT support in trace_decoder ignores storage #693

Merged
merged 14 commits into from
Oct 16, 2024
Prev Previous commit
Next Next commit
rewrite type2
0xaatif committed Oct 6, 2024
commit f27bbdf2fac784439a2db0801e62128fecb23ba9
7 changes: 2 additions & 5 deletions trace_decoder/src/core.rs
Original file line number Diff line number Diff line change
@@ -241,11 +241,8 @@ fn start(
)
}
WireDisposition::Type2 => {
let crate::type2::Frontend {
trie,
code,
collation,
} = crate::type2::frontend(instructions)?;
let crate::type2::Frontend { trie, code } =
crate::type2::frontend(instructions)?;

todo!()
}
4 changes: 3 additions & 1 deletion trace_decoder/src/type1.rs
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@ use mpt_trie::partial_trie::OnOrphanedHashNode;
use nunny::NonEmpty;
use u4::U4;

use crate::typed_mpt::{StateMpt, StateTrie as _, StorageTrie, MptKey};
use crate::typed_mpt::{MptKey, StateMpt, StorageTrie};
use crate::wire::{Instruction, SmtLeaf};

#[derive(Debug, Clone)]
@@ -380,6 +380,8 @@ fn finish_stack(v: &mut Vec<Node>) -> anyhow::Result<Execution> {

#[test]
fn test_tries() {
use crate::typed_mpt::StateTrie as _;

for (ix, case) in
serde_json::from_str::<Vec<super::Case>>(include_str!("cases/zero_jerigon.json"))
.unwrap()
232 changes: 112 additions & 120 deletions trace_decoder/src/type2.rs
Original file line number Diff line number Diff line change
@@ -1,51 +1,45 @@
//! Frontend for the witness format emitted by e.g [`0xPolygonHermez/cdk-erigon`](https://github.com/0xPolygonHermez/cdk-erigon/)
//! Ethereum node.

use std::{
collections::{HashMap, HashSet},
iter,
};
use std::collections::{BTreeMap, HashSet};

use anyhow::{bail, ensure, Context as _};
use bitvec::vec::BitVec;
use either::Either;
use ethereum_types::BigEndianHash as _;
use itertools::{EitherOrBoth, Itertools as _};
use ethereum_types::{Address, BigEndianHash as _, U256};
use itertools::EitherOrBoth;
use keccak_hash::H256;
use nunny::NonEmpty;
use plonky2::field::types::Field;
use plonky2::field::types::{Field, Field64 as _};
use smt_trie::keys::{key_balance, key_code, key_code_length, key_nonce, key_storage};
use stackstack::Stack;

use crate::{
typed_mpt::StateSmt,
typed_mpt::SmtKey,
wire::{Instruction, SmtLeaf, SmtLeafType},
};
type SmtTrie = smt_trie::smt::Smt<smt_trie::db::MemoryDb>;

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
/// Combination of all the [`SmtLeaf::node_type`]s
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub struct CollatedLeaf {
pub balance: Option<ethereum_types::U256>,
pub nonce: Option<ethereum_types::U256>,
pub code_hash: Option<ethereum_types::H256>,
pub storage_root: Option<ethereum_types::H256>,
pub code: Option<ethereum_types::U256>,
pub code_length: Option<ethereum_types::U256>,
pub storage: BTreeMap<U256, U256>,
}

pub struct Frontend {
pub trie: SmtTrie,
pub code: HashSet<NonEmpty<Vec<u8>>>,
pub collation: HashMap<ethereum_types::Address, CollatedLeaf>,
}

/// # Panics
/// - Liberally, both in this module and the [`smt_trie`] library. Therefore, do
/// NOT call this function on untrusted inputs.
pub fn frontend(instructions: impl IntoIterator<Item = Instruction>) -> anyhow::Result<Frontend> {
let (node, code) = fold(instructions).context("couldn't fold smt from instructions")?;
let (trie, collation) =
node2trie(node).context("couldn't construct trie and collation from folded node")?;
Ok(Frontend {
trie,
code,
collation,
})
let trie = node2trie(node).context("couldn't construct trie and collation from folded node")?;
Ok(Frontend { trie, code })
}

/// Node in a binary (SMT) tree.
@@ -107,9 +101,9 @@ fn fold1(instructions: impl IntoIterator<Item = Instruction>) -> anyhow::Result<

Ok(Some(match mask {
// note that the single-child bits are reversed...
0b0001 => Node::Branch(EitherOrBoth::Left(get_child()?)),
0b0010 => Node::Branch(EitherOrBoth::Right(get_child()?)),
0b0011 => Node::Branch(EitherOrBoth::Both(get_child()?, get_child()?)),
0b_01 => Node::Branch(EitherOrBoth::Left(get_child()?)),
0b_10 => Node::Branch(EitherOrBoth::Right(get_child()?)),
0b_11 => Node::Branch(EitherOrBoth::Both(get_child()?, get_child()?)),
other => bail!("unexpected bit pattern in Branch mask: {:#b}", other),
}))
}
@@ -121,113 +115,111 @@ fn fold1(instructions: impl IntoIterator<Item = Instruction>) -> anyhow::Result<
}
}

/// Pack a [`Node`] tree into an [`SmtTrie`].
/// Also summarizes the [`Node::Leaf`]s out-of-band.
///
/// # Panics
/// - if the tree is too deep.
/// - if [`SmtLeaf::address`] or [`SmtLeaf::value`] are the wrong length.
/// - if [`SmtLeafType::Storage`] is the wrong length.
/// - [`SmtTrie`] panics internally.
fn node2trie(
node: Node,
) -> anyhow::Result<(SmtTrie, HashMap<ethereum_types::Address, CollatedLeaf>)> {
fn node2trie(node: Node) -> anyhow::Result<SmtTrie> {
let mut trie = SmtTrie::default();

let (hashes, leaves) =
iter_leaves(node).partition_map::<Vec<_>, Vec<_>, _, _, _>(|(path, leaf)| match leaf {
Either::Left(it) => Either::Left((path, it)),
Either::Right(it) => Either::Right(it),
});

let mut lens = std::collections::BTreeMap::<_, usize>::new();

for (path, hash) in hashes {
*lens.entry(path.len()).or_default() += 1;
// needs to be called before `set`, below, "to avoid any issues" according
// to the smt docs.
let mut hashes = BTreeMap::new();
let mut leaves = BTreeMap::new();
visit(&mut hashes, &mut leaves, Stack::new(), node)?;
for (key, hash) in hashes {
trie.set_hash(
bits2bits(path),
key.into_smt_bits(),
smt_trie::smt::HashOut {
elements: {
let ethereum_types::U256(arr) = ethereum_types::H256(hash).into_uint();
let ethereum_types::U256(arr) = hash.into_uint();
for u in arr {
ensure!(u < smt_trie::smt::F::ORDER);
}
arr.map(smt_trie::smt::F::from_canonical_u64)
},
},
)
);
}
dbg!(lens);

let mut collated = HashMap::<ethereum_types::Address, CollatedLeaf>::new();
for SmtLeaf {
node_type,
address,
value,
} in leaves
for (
addr,
CollatedLeaf {
balance,
nonce,
code,
code_length,
storage,
},
) in leaves
{
let address = ethereum_types::Address::from_slice(&address);
let collated = collated.entry(address).or_default();
let value = ethereum_types::U256::from_big_endian(&value);
let key = match node_type {
SmtLeafType::Balance => {
ensure!(collated.balance.is_none(), "double write of field");
collated.balance = Some(value);
smt_trie::keys::key_balance(address)
}
SmtLeafType::Nonce => {
ensure!(collated.nonce.is_none(), "double write of field");
collated.nonce = Some(value);
smt_trie::keys::key_nonce(address)
}
SmtLeafType::Code => {
ensure!(collated.code_hash.is_none(), "double write of field");
collated.code_hash = Some({
let mut it = ethereum_types::H256::zero();
value.to_big_endian(it.as_bytes_mut());
it
});
smt_trie::keys::key_code(address)
}
SmtLeafType::Storage(it) => {
ensure!(collated.storage_root.is_none(), "double write of field");
// TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/issues/275
// do we not do anything with the storage here?
smt_trie::keys::key_storage(address, ethereum_types::U256::from_big_endian(&it))
for (value, key_fn) in [
(balance, key_balance as fn(_) -> _),
(nonce, key_nonce),
(code, key_code),
(code_length, key_code_length),
] {
if let Some(value) = value {
trie.set(key_fn(addr), value);
}
SmtLeafType::CodeLength => smt_trie::keys::key_code_length(address),
};
trie.set(key, value)
}
Ok((trie, collated))
}

/// # Panics
/// - on overcapacity
fn bits2bits(ours: BitVec) -> smt_trie::bits::Bits {
let mut theirs = smt_trie::bits::Bits::empty();
for it in ours {
theirs.push_bit(it)
}
for (slot, value) in storage {
trie.set(key_storage(addr, slot), value);
}
}
theirs
Ok(trie)
}

/// Simple, inefficient visitor of all leaves of the [`Node`] tree.
#[allow(clippy::type_complexity)]
fn iter_leaves(node: Node) -> Box<dyn Iterator<Item = (BitVec, Either<[u8; 32], SmtLeaf>)>> {
fn visit(
hashes: &mut BTreeMap<SmtKey, H256>,
leaves: &mut BTreeMap<Address, CollatedLeaf>,
path: Stack<bool>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any compelling reason why common Vec could not be used as a stack, but external stackstack dependency is needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It removes a footgun, and limits misuse:

fn visit(path: &mut Vec<bool>) {
    path.push(true);
    visit(path);
    path.pop(); // often forgotten
}

It also happens to have no heap usage in this case too

node: Node,
) -> anyhow::Result<()> {
match node {
Node::Hash(it) => Box::new(iter::once((BitVec::new(), Either::Left(it)))),
Node::Branch(it) => {
let (left, right) = it.left_and_right();
let left = left
.into_iter()
.flat_map(|it| iter_leaves(*it).update(|(path, _)| path.insert(0, false)));
let right = right
.into_iter()
.flat_map(|it| iter_leaves(*it).update(|(path, _)| path.insert(0, true)));
Box::new(left.chain(right))
Node::Branch(children) => {
let (left, right) = children.left_and_right();
if let Some(left) = left {
visit(hashes, leaves, path.pushed(false), *left)?;
}
if let Some(right) = right {
visit(hashes, leaves, path.pushed(true), *right)?;
}
}
Node::Hash(hash) => {
hashes.insert(SmtKey::new(path.iter().copied())?, H256(hash));
}
Node::Leaf(SmtLeaf {
node_type,
address, // TODO(0xaatif): field should be fixed length
value, // TODO(0xaatif): field should be fixed length
}) => {
let address = Address::from_slice(&address);
let collated = leaves.entry(address).or_default();
let value = U256::from_big_endian(&value);
macro_rules! ensure {
($expr:expr) => {
::anyhow::ensure!($expr, "double write of field for address {}", address)
};
}
match node_type {
SmtLeafType::Balance => {
ensure!(collated.balance.is_none());
collated.balance = Some(value)
}
SmtLeafType::Nonce => {
ensure!(collated.nonce.is_none());
collated.nonce = Some(value)
}
SmtLeafType::Code => {
ensure!(collated.code.is_none());
collated.code = Some(value)
}
SmtLeafType::Storage(slot) => {
// TODO(0xaatif): ^ field should be fixed length
let clobbered = collated.storage.insert(U256::from_big_endian(&slot), value);
ensure!(clobbered.is_none())
}
SmtLeafType::CodeLength => {
ensure!(collated.code_length.is_none());
collated.code_length = Some(value)
}
};
}
Node::Leaf(it) => Box::new(iter::once((BitVec::new(), Either::Right(it)))),
}
Ok(())
}

#[test]
@@ -241,10 +233,10 @@ fn test_tries() {
println!("case {}", ix);
let instructions = crate::wire::parse(&case.bytes).unwrap();
let frontend = frontend(instructions).unwrap();
// assert_eq!(case.expected_state_root, {
// let mut it = [0; 32];
// smt_trie::utils::hashout2u(frontend.trie.root).to_big_endian(&mut
// it); ethereum_types::H256(it)
// });
assert_eq!(case.expected_state_root, {
let mut it = [0; 32];
smt_trie::utils::hashout2u(frontend.trie.root).to_big_endian(&mut it);
ethereum_types::H256(it)
});
}
}
4 changes: 2 additions & 2 deletions trace_decoder/src/typed_mpt.rs
Original file line number Diff line number Diff line change
@@ -244,7 +244,7 @@ impl SmtKey {
Ok(Self { bits, len })
}

fn into_bits(self) -> smt_trie::bits::Bits {
pub fn into_smt_bits(self) -> smt_trie::bits::Bits {
let mut bits = smt_trie::bits::Bits::default();
for bit in self.as_bitslice() {
bits.push_bit(*bit)
@@ -522,7 +522,7 @@ impl StateSmt {
} = self;
let mut smt = smt_trie::smt::Smt::<smt_trie::db::MemoryDb>::default();
for (k, v) in hashed_out {
smt.set_hash(k.into_bits(), conv_hash::eth2smt(*v));
smt.set_hash(k.into_smt_bits(), conv_hash::eth2smt(*v));
}
for (
addr,
6 changes: 3 additions & 3 deletions trace_decoder/src/wire.rs
Original file line number Diff line number Diff line change
@@ -82,16 +82,16 @@ pub enum Instruction {
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SmtLeaf {
pub node_type: SmtLeafType,
pub address: NonEmpty<Vec<u8>>,
pub value: NonEmpty<Vec<u8>>,
pub address: NonEmpty<Vec<u8>>, // TODO(0xaatif): this should be a fixed length
pub value: NonEmpty<Vec<u8>>, // TODO(0xaatif): this should be a fixed length
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SmtLeafType {
Balance,
Nonce,
Code,
Storage(NonEmpty<Vec<u8>>),
Storage(NonEmpty<Vec<u8>>), // TODO(0xaatif): this should be a fixed length
CodeLength,
}