diff --git a/core/rawdb/accessors_indexes_test.go b/core/rawdb/accessors_indexes_test.go index 1bb640c7e7..2faa3a1c51 100644 --- a/core/rawdb/accessors_indexes_test.go +++ b/core/rawdb/accessors_indexes_test.go @@ -44,9 +44,10 @@ func (h *testHasher) Reset() { h.hasher.Reset() } -func (h *testHasher) Update(key, val []byte) { +func (h *testHasher) Update(key, val []byte) error { h.hasher.Write(key) h.hasher.Write(val) + return nil } func (h *testHasher) Hash() common.Hash { diff --git a/core/types/block_test.go b/core/types/block_test.go index 362e534866..0a61a9663f 100644 --- a/core/types/block_test.go +++ b/core/types/block_test.go @@ -159,9 +159,10 @@ func (h *testHasher) Reset() { h.hasher.Reset() } -func (h *testHasher) Update(key, val []byte) { +func (h *testHasher) Update(key, val []byte) error { h.hasher.Write(key) h.hasher.Write(val) + return nil } func (h *testHasher) Hash() common.Hash { diff --git a/core/types/derive_sha.go b/core/types/derive_sha.go index 5cdd952d12..721989f28e 100644 --- a/core/types/derive_sha.go +++ b/core/types/derive_sha.go @@ -30,7 +30,7 @@ type DerivableList interface { // Hasher is the tool used to calculate the hash of derivable list. type Hasher interface { Reset() - Update([]byte, []byte) + Update([]byte, []byte) error Hash() common.Hash } diff --git a/core/types/hashing.go b/core/types/hashing.go new file mode 100644 index 0000000000..4d9b5d3173 --- /dev/null +++ b/core/types/hashing.go @@ -0,0 +1,69 @@ +// Copyright 2021 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package types + +import ( + "bytes" + "fmt" + "math" + "sync" + + "github.com/CortexFoundation/CortexTheseus/common" + "github.com/CortexFoundation/CortexTheseus/crypto" + "github.com/CortexFoundation/CortexTheseus/rlp" +) + +// encodeBufferPool holds temporary encoder buffers for DeriveSha and TX encoding. +var encodeBufferPool = sync.Pool{ + New: func() interface{} { return new(bytes.Buffer) }, +} + +// getPooledBuffer retrieves a buffer from the pool and creates a byte slice of the +// requested size from it. +// +// The caller should return the *bytes.Buffer object back into encodeBufferPool after use! +// The returned byte slice must not be used after returning the buffer. +func getPooledBuffer(size uint64) ([]byte, *bytes.Buffer, error) { + if size > math.MaxInt { + return nil, nil, fmt.Errorf("can't get buffer of size %d", size) + } + buf := encodeBufferPool.Get().(*bytes.Buffer) + buf.Reset() + buf.Grow(int(size)) + b := buf.Bytes()[:int(size)] + return b, buf, nil +} + +// prefixedRlpHash writes the prefix into the hasher before rlp-encoding x. +// It's used for typed transactions. +func prefixedRlpHash(prefix byte, x interface{}) (h common.Hash) { + sha := hasherPool.Get().(crypto.KeccakState) + defer hasherPool.Put(sha) + sha.Reset() + sha.Write([]byte{prefix}) + rlp.Encode(sha, x) + sha.Read(h[:]) + return h +} + +// TrieHasher is the tool used to calculate the hash of derivable list. +// This is internal, do not use. +type TrieHasher interface { + Reset() + Update([]byte, []byte) error + Hash() common.Hash +} diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go deleted file mode 100644 index ba538c08bd..0000000000 --- a/tests/fuzzers/stacktrie/trie_fuzzer.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2020 The go-ethereum Authors -// This file is part of The go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with The go-ethereum library. If not, see . - -package stacktrie - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "hash" - "io" - - "github.com/CortexFoundation/CortexTheseus/common" - "github.com/CortexFoundation/CortexTheseus/ctxcdb" - "github.com/CortexFoundation/CortexTheseus/trie" - "golang.org/x/crypto/sha3" - "slices" -) - -type fuzzer struct { - input io.Reader - exhausted bool - debugging bool -} - -func (f *fuzzer) read(size int) []byte { - out := make([]byte, size) - if _, err := f.input.Read(out); err != nil { - f.exhausted = true - } - return out -} - -func (f *fuzzer) readSlice(min, max int) []byte { - var a uint16 - binary.Read(f.input, binary.LittleEndian, &a) - size := min + int(a)%(max-min) - out := make([]byte, size) - if _, err := f.input.Read(out); err != nil { - f.exhausted = true - } - return out -} - -// spongeDb is a dummy db backend which accumulates writes in a sponge -type spongeDb struct { - sponge hash.Hash - debug bool -} - -func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement me") } -func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") } -func (s *spongeDb) Delete(key []byte) error { panic("implement me") } -func (s *spongeDb) DeleteRange(start, end []byte) error { panic("implement me") } -func (s *spongeDb) NewBatch() ctxcdb.Batch { return &spongeBatch{s} } -func (s *spongeDb) NewBatchWithSize(size int) ctxcdb.Batch { return &spongeBatch{s} } -func (s *spongeDb) NewSnapshot() (ctxcdb.Snapshot, error) { panic("implement me") } -func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") } -func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") } -func (s *spongeDb) Close() error { return nil } - -func (s *spongeDb) Put(key []byte, value []byte) error { - if s.debug { - fmt.Printf("db.Put %x : %x\n", key, value) - } - s.sponge.Write(key) - s.sponge.Write(value) - return nil -} -func (s *spongeDb) NewIterator(prefix []byte, start []byte) ctxcdb.Iterator { panic("implement me") } - -// spongeBatch is a dummy batch which immediately writes to the underlying spongedb -type spongeBatch struct { - db *spongeDb -} - -func (b *spongeBatch) Put(key, value []byte) error { - b.db.Put(key, value) - return nil -} -func (b *spongeBatch) Delete(key []byte) error { panic("implement me") } -func (b *spongeBatch) ValueSize() int { return 100 } -func (b *spongeBatch) Write() error { return nil } -func (b *spongeBatch) Reset() {} -func (b *spongeBatch) Replay(w ctxcdb.KeyValueWriter) error { return nil } - -type kv struct { - k, v []byte -} -type kvs []kv - -func (k kvs) Len() int { - return len(k) -} - -func (k kvs) Less(i, j int) bool { - return bytes.Compare(k[i].k, k[j].k) < 0 -} - -func (k kvs) Swap(i, j int) { - k[j], k[i] = k[i], k[j] -} - -// The function must return -// 1 if the fuzzer should increase priority of the -// -// given input during subsequent fuzzing (for example, the input is lexically -// correct and was parsed successfully); -// -// -1 if the input must not be added to corpus even if gives new coverage; and -// 0 otherwise -// other values are reserved for future use. -func Fuzz(data []byte) int { - f := fuzzer{ - input: bytes.NewReader(data), - exhausted: false, - } - return f.fuzz() -} - -func Debug(data []byte) int { - f := fuzzer{ - input: bytes.NewReader(data), - exhausted: false, - debugging: true, - } - return f.fuzz() -} - -func (f *fuzzer) fuzz() int { - - // This spongeDb is used to check the sequence of disk-db-writes - var ( - spongeA = &spongeDb{sponge: sha3.NewLegacyKeccak256()} - dbA = trie.NewDatabase(spongeA) - trieA, _ = trie.New(trie.TrieID(common.Hash{}), dbA) - spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()} - trieB = trie.NewStackTrie(spongeB) - vals kvs - useful bool - maxElements = 10000 - // operate on unique keys only - keys = make(map[string]struct{}) - ) - // Fill the trie with elements - for i := 0; !f.exhausted && i < maxElements; i++ { - k := f.read(32) - v := f.readSlice(1, 500) - if f.exhausted { - // If it was exhausted while reading, the value may be all zeroes, - // thus 'deletion' which is not supported on stacktrie - break - } - if _, present := keys[string(k)]; present { - // This key is a duplicate, ignore it - continue - } - keys[string(k)] = struct{}{} - vals = append(vals, kv{k: k, v: v}) - trieA.Update(k, v) - useful = true - } - if !useful { - return 0 - } - // Flush trie -> database - rootA, err := trieA.Commit(nil) - if err != nil { - panic(err) - } - // Flush memdb -> disk (sponge) - dbA.Commit(rootA, false) - - // Stacktrie requires sorted insertion - slices.SortFunc(vals, func(a, b kv) int { - return bytes.Compare(a.k, b.k) - }) - for _, kv := range vals { - if f.debugging { - fmt.Printf("{\"0x%x\" , \"0x%x\"} // stacktrie.Update\n", kv.k, kv.v) - } - trieB.Update(kv.k, kv.v) - } - rootB := trieB.Hash() - if _, err := trieB.Commit(); err != nil { - panic(err) - } - if rootA != rootB { - panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB)) - } - sumA := spongeA.sponge.Sum(nil) - sumB := spongeB.sponge.Sum(nil) - if !bytes.Equal(sumA, sumB) { - panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB)) - } - return 1 -} diff --git a/trie/bytepool.go b/trie/bytepool.go new file mode 100644 index 0000000000..4f9c5672fd --- /dev/null +++ b/trie/bytepool.go @@ -0,0 +1,64 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +// bytesPool is a pool for byte slices. It is safe for concurrent use. +type bytesPool struct { + c chan []byte + w int +} + +// newBytesPool creates a new bytesPool. The sliceCap sets the capacity of +// newly allocated slices, and the nitems determines how many items the pool +// will hold, at maximum. +func newBytesPool(sliceCap, nitems int) *bytesPool { + return &bytesPool{ + c: make(chan []byte, nitems), + w: sliceCap, + } +} + +// Get returns a slice. Safe for concurrent use. +func (bp *bytesPool) Get() []byte { + select { + case b := <-bp.c: + return b + default: + return make([]byte, 0, bp.w) + } +} + +// GetWithSize returns a slice with specified byte slice size. +func (bp *bytesPool) GetWithSize(s int) []byte { + b := bp.Get() + if cap(b) < s { + return make([]byte, s) + } + return b[:s] +} + +// Put returns a slice to the pool. Safe for concurrent use. This method +// will ignore slices that are too small or too large (>3x the cap) +func (bp *bytesPool) Put(b []byte) { + if c := cap(b); c < bp.w || c > 3*bp.w { + return + } + select { + case bp.c <- b: + default: + } +} diff --git a/trie/encoding.go b/trie/encoding.go index ce5494a19b..5e8dcafee3 100644 --- a/trie/encoding.go +++ b/trie/encoding.go @@ -104,6 +104,18 @@ func keybytesToHex(str []byte) []byte { return nibbles } +// writeHexKey writes the hexkey into the given slice. +// OBS! This method omits the termination flag. +// OBS! The dst slice must be at least 2x as large as the key +func writeHexKey(dst []byte, key []byte) []byte { + _ = dst[2*len(key)-1] + for i, b := range key { + dst[i*2] = b / 16 + dst[i*2+1] = b % 16 + } + return dst[:2*len(key)] +} + // hexToKeybytes turns hex nibbles into key bytes. // This can only be used for keys of even length. func hexToKeybytes(hex []byte) []byte { diff --git a/trie/errors.go b/trie/errors.go index 2ac7fa3888..19eec38bde 100644 --- a/trie/errors.go +++ b/trie/errors.go @@ -22,7 +22,7 @@ import ( "github.com/CortexFoundation/CortexTheseus/common" ) -// MissingNodeError is returned by the trie functions (TryGet, TryUpdate, TryDelete) +// MissingNodeError is returned by the trie functions (TryGet, Update, TryDelete) // in the case where a trie node is not present in the local database. It contains // information necessary for retrieving the missing node. type MissingNodeError struct { diff --git a/trie/hasher.go b/trie/hasher.go index d057108b44..8676c05d03 100644 --- a/trie/hasher.go +++ b/trie/hasher.go @@ -188,6 +188,14 @@ func (h *hasher) hashData(data []byte) hashNode { return n } +// hashDataTo hashes the provided data to the given destination buffer. The caller +// must ensure that the dst buffer is of appropriate size. +func (h *hasher) hashDataTo(dst, data []byte) { + h.sha.Reset() + h.sha.Write(data) + h.sha.Read(dst) +} + // proofHash is used to construct trie proofs, and returns the 'collapsed' // node (for later RLP encoding) aswell as the hashed node -- unless the // node is smaller than 32 bytes, in which case it will be returned as is. diff --git a/trie/node.go b/trie/node.go index 0f05300044..4d34c93d53 100644 --- a/trie/node.go +++ b/trie/node.go @@ -45,6 +45,27 @@ type ( } hashNode []byte valueNode []byte + + // fullnodeEncoder is a type used exclusively for encoding fullNode. + // Briefly instantiating a fullnodeEncoder and initializing with + // existing slices is less memory intense than using the fullNode type. + fullnodeEncoder struct { + Children [17][]byte + } + + // extNodeEncoder is a type used exclusively for encoding extension node. + // Briefly instantiating a extNodeEncoder and initializing with existing + // slices is less memory intense than using the shortNode type. + extNodeEncoder struct { + Key []byte + Val []byte + } + + // leafNodeEncoder is a type used exclusively for encoding leaf node. + leafNodeEncoder struct { + Key []byte + Val []byte + } ) // nilValueNode is used when collapsing internal trie nodes for hashing, since @@ -89,6 +110,7 @@ func (n *fullNode) fstring(ind string) string { } return resp + fmt.Sprintf("\n%s] ", ind) } + func (n *shortNode) fstring(ind string) string { return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+" ")) } @@ -99,6 +121,7 @@ func (n valueNode) fstring(ind string) string { return fmt.Sprintf("%x ", []byte(n)) } +// mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered. func mustDecodeNode(hash, buf []byte) node { n, err := decodeNode(hash, buf) if err != nil { diff --git a/trie/node_enc.go b/trie/node_enc.go index e3a02a9d2e..642d388f72 100644 --- a/trie/node_enc.go +++ b/trie/node_enc.go @@ -40,6 +40,20 @@ func (n *fullNode) encode(w rlp.EncoderBuffer) { w.ListEnd(offset) } +func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) { + offset := w.List() + for _, c := range n.Children { + if c == nil { + w.Write(rlp.EmptyString) + } else if len(c) < 32 { + w.Write(c) // rawNode + } else { + w.WriteBytes(c) // hashNode + } + } + w.ListEnd(offset) +} + func (n *shortNode) encode(w rlp.EncoderBuffer) { offset := w.List() w.WriteBytes(n.Key) @@ -51,6 +65,27 @@ func (n *shortNode) encode(w rlp.EncoderBuffer) { w.ListEnd(offset) } +func (n *extNodeEncoder) encode(w rlp.EncoderBuffer) { + offset := w.List() + w.WriteBytes(n.Key) + + if n.Val == nil { + w.Write(rlp.EmptyString) + } else if len(n.Val) < 32 { + w.Write(n.Val) // rawNode + } else { + w.WriteBytes(n.Val) // hashNode + } + w.ListEnd(offset) +} + +func (n *leafNodeEncoder) encode(w rlp.EncoderBuffer) { + offset := w.List() + w.WriteBytes(n.Key) // Compact format key + w.WriteBytes(n.Val) // Value node, must be non-nil + w.ListEnd(offset) +} + func (n hashNode) encode(w rlp.EncoderBuffer) { w.WriteBytes(n) } diff --git a/trie/proof.go b/trie/proof.go index 98d0af63e9..f8484fba03 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -491,7 +491,7 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, lastKey []byte, key return nil, nil, nil, false, err } for index, key := range keys { - tr.TryUpdate(key, values[index]) + tr.Update(key, values[index]) } if tr.Hash() != rootHash { return nil, nil, nil, false, fmt.Errorf("invalid proof, want hash %x, got %x", rootHash, tr.Hash()) @@ -586,7 +586,7 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, lastKey []byte, key tr.root = nil } for index, key := range keys { - tr.TryUpdate(key, values[index]) + tr.Update(key, values[index]) } if tr.Hash() != rootHash { return nil, nil, nil, false, fmt.Errorf("invalid proof, want hash %x, got %x", rootHash, tr.Hash()) diff --git a/trie/stacktrie.go b/trie/stacktrie.go index a7cf67e2c7..fc4d597c26 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -1,5 +1,5 @@ // Copyright 2020 The go-ethereum Authors -// This file is part of The go-ethereum library. +// This file is part of the go-ethereum library. // // The go-ethereum library is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by @@ -12,171 +12,129 @@ // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License -// along with The go-ethereum library. If not, see . +// along with the go-ethereum library. If not, see . package trie import ( - "bufio" "bytes" - "encoding/gob" "errors" - "fmt" - "io" "sync" "github.com/CortexFoundation/CortexTheseus/common" "github.com/CortexFoundation/CortexTheseus/core/types" - "github.com/CortexFoundation/CortexTheseus/ctxcdb" - "github.com/CortexFoundation/CortexTheseus/log" ) -var ErrCommitDisabled = errors.New("no database for committing") - -var stPool = sync.Pool{ - New: func() any { - return NewStackTrie(nil) - }, -} - -func stackTrieFromPool(db ctxcdb.KeyValueStore) *StackTrie { - st := stPool.Get().(*StackTrie) - st.db = db - return st -} +var ( + stPool = sync.Pool{New: func() any { return new(stNode) }} + bPool = newBytesPool(32, 100) + _ = types.TrieHasher((*StackTrie)(nil)) +) -func returnToPool(st *StackTrie) { - st.Reset() - stPool.Put(st) -} +// OnTrieNode is a callback method invoked when a trie node is committed +// by the stack trie. The node is only committed if it's considered complete. +// +// The caller should not modify the contents of the returned path and blob +// slice, and their contents may be changed after the call. It is up to the +// `onTrieNode` receiver function to deep-copy the data if it wants to retain +// it after the call ends. +type OnTrieNode func(path []byte, hash common.Hash, blob []byte) // StackTrie is a trie implementation that expects keys to be inserted // in order. Once it determines that a subtree will no longer be inserted // into, it will hash it and free up the memory it uses. type StackTrie struct { - nodeType uint8 // node type (as in branch, ext, leaf) - val []byte // value contained by this node if it's a leaf - key []byte // key chunk covered by this (full|ext) node - children [16]*StackTrie // list of children (for fullnodes and exts) - - db ctxcdb.KeyValueStore // Pointer to the commit db, can be nil + root *stNode + h *hasher + last []byte + onTrieNode OnTrieNode + kBuf []byte // buf space used for hex-key during insertions + pBuf []byte // buf space used for path during insertions } -// NewStackTrie allocates and initializes an empty trie. -func NewStackTrie(db ctxcdb.KeyValueStore) *StackTrie { +// NewStackTrie allocates and initializes an empty trie. The committed nodes +// will be discarded immediately if no callback is configured. +func NewStackTrie(onTrieNode OnTrieNode) *StackTrie { return &StackTrie{ - nodeType: emptyNode, - db: db, + root: stPool.Get().(*stNode), + h: newHasher(false), + onTrieNode: onTrieNode, + kBuf: make([]byte, 64), + pBuf: make([]byte, 64), } } -// NewFromBinary initialises a serialized stacktrie with the given db. -func NewFromBinary(data []byte, db ctxcdb.KeyValueStore) (*StackTrie, error) { - var st StackTrie - if err := st.UnmarshalBinary(data); err != nil { - return nil, err +func (t *StackTrie) grow(key []byte) { + if cap(t.kBuf) < 2*len(key) { + t.kBuf = make([]byte, 2*len(key)) } - // If a database is used, we need to recursively add it to every child - if db != nil { - st.setDb(db) + if cap(t.pBuf) < 2*len(key) { + t.pBuf = make([]byte, 2*len(key)) } - return &st, nil } -// MarshalBinary implements encoding.BinaryMarshaler -func (st *StackTrie) MarshalBinary() (data []byte, err error) { - var ( - b bytes.Buffer - w = bufio.NewWriter(&b) - ) - if err := gob.NewEncoder(w).Encode(struct { - Nodetype uint8 - Val []byte - Key []byte - }{ - st.nodeType, - st.val, - st.key, - }); err != nil { - return nil, err +// Update inserts a (key, value) pair into the stack trie. +func (t *StackTrie) Update(key, value []byte) error { + if len(value) == 0 { + return errors.New("trying to insert empty (deletion)") } - for _, child := range st.children { - if child == nil { - w.WriteByte(0) - continue - } - w.WriteByte(1) - if childData, err := child.MarshalBinary(); err != nil { - return nil, err - } else { - w.Write(childData) - } + t.grow(key) + k := writeHexKey(t.kBuf, key) + if bytes.Compare(t.last, k) >= 0 { + return errors.New("non-ascending key order") + } + if t.last == nil { + t.last = append([]byte{}, k...) // allocate key slice + } else { + t.last = append(t.last[:0], k...) // reuse key slice } - w.Flush() - return b.Bytes(), nil + t.insert(t.root, k, value, t.pBuf[:0]) + return nil } -// UnmarshalBinary implements encoding.BinaryUnmarshaler -func (st *StackTrie) UnmarshalBinary(data []byte) error { - r := bytes.NewReader(data) - return st.unmarshalBinary(r) +// Reset resets the stack trie object to empty state. +func (t *StackTrie) Reset() { + t.root = stPool.Get().(*stNode) + t.last = nil } -func (st *StackTrie) unmarshalBinary(r io.Reader) error { - var dec struct { - Nodetype uint8 - Val []byte - Key []byte - } - if err := gob.NewDecoder(r).Decode(&dec); err != nil { - return err - } - st.nodeType = dec.Nodetype - st.val = dec.Val - st.key = dec.Key - - var hasChild = make([]byte, 1) - for i := range st.children { - if _, err := r.Read(hasChild); err != nil { - return err - } else if hasChild[0] == 0 { - continue - } - var child StackTrie - if err := child.unmarshalBinary(r); err != nil { - return err - } - st.children[i] = &child - } - return nil +// TrieKey returns the internal key representation for the given user key. +func (t *StackTrie) TrieKey(key []byte) []byte { + k := keybytesToHex(key) + k = k[:len(k)-1] // chop the termination flag + return k } -func (st *StackTrie) setDb(db ctxcdb.KeyValueStore) { - st.db = db - for _, child := range st.children { - if child != nil { - child.setDb(db) - } - } +// stNode represents a node within a StackTrie +type stNode struct { + typ uint8 // node type (as in branch, ext, leaf) + key []byte // key chunk covered by this (leaf|ext) node + val []byte // value contained by this node if it's a leaf + children [16]*stNode // list of children (for branch and exts) } -func newLeaf(key, val []byte, db ctxcdb.KeyValueStore) *StackTrie { - st := stackTrieFromPool(db) - st.nodeType = leafNode +// newLeaf constructs a leaf node with provided node key and value. The key +// will be deep-copied in the function and safe to modify afterwards, but +// value is not. +func newLeaf(key, val []byte) *stNode { + st := stPool.Get().(*stNode) + st.typ = leafNode st.key = append(st.key, key...) st.val = val return st } -func newExt(key []byte, child *StackTrie, db ctxcdb.KeyValueStore) *StackTrie { - st := stackTrieFromPool(db) - st.nodeType = extNode +// newExt constructs an extension node with provided node key and child. The +// key will be deep-copied in the function and safe to modify afterwards. +func newExt(key []byte, child *stNode) *stNode { + st := stPool.Get().(*stNode) + st.typ = extNode st.key = append(st.key, key...) st.children[0] = child return st } -// List all values that StackTrie#nodeType can hold +// List all values that stNode#nodeType can hold const ( emptyNode = iota branchNode @@ -185,56 +143,50 @@ const ( hashedNode ) -// TryUpdate inserts a (key, value) pair into the stack trie -func (st *StackTrie) TryUpdate(key, value []byte) error { - if len(value) == 0 { - return errors.New("trying to insert empty (deletion)") +func (n *stNode) reset() *stNode { + if n.typ == hashedNode { + // On hashnodes, we 'own' the val: it is guaranteed to be not held + // by external caller. Hence, when we arrive here, we can put it back + // into the pool + bPool.Put(n.val) } - k := keybytesToHex(key) - st.insert(k[:len(k)-1], value) - return nil -} - -func (st *StackTrie) Update(key, value []byte) { - if err := st.TryUpdate(key, value); err != nil { - log.Error(fmt.Sprintf("Unhandled trie error: %v", err)) + n.key = n.key[:0] + n.val = nil + for i := range n.children { + n.children[i] = nil } -} - -func (st *StackTrie) Reset() { - st.db = nil - st.key = st.key[:0] - st.val = nil - for i := range st.children { - st.children[i] = nil - } - st.nodeType = emptyNode + n.typ = emptyNode + return n } // Helper function that, given a full key, determines the index // at which the chunk pointed by st.keyOffset is different from // the same chunk in the full key. -func (st *StackTrie) getDiffIndex(key []byte) int { - for idx, nibble := range st.key { +func (n *stNode) getDiffIndex(key []byte) int { + for idx, nibble := range n.key { if nibble != key[idx] { return idx } } - return len(st.key) + return len(n.key) } -// Helper function to that inserts a (key, value) pair into -// the trie. -func (st *StackTrie) insert(key, value []byte) { - switch st.nodeType { +// Helper function to that inserts a (key, value) pair into the trie. +// +// - The key is not retained by this method, but always copied if needed. +// - The value is retained by this method, as long as the leaf that it represents +// remains unhashed. However: it is never modified. +// - The path is not retained by this method. +func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) { + switch st.typ { case branchNode: /* Branch */ idx := int(key[0]) // Unresolve elder siblings for i := idx - 1; i >= 0; i-- { if st.children[i] != nil { - if st.children[i].nodeType != hashedNode { - st.children[i].hash() + if st.children[i].typ != hashedNode { + t.hash(st.children[i], append(path, byte(i))) } break } @@ -242,9 +194,9 @@ func (st *StackTrie) insert(key, value []byte) { // Add new child if st.children[idx] == nil { - st.children[idx] = newLeaf(key[1:], value, st.db) + st.children[idx] = newLeaf(key[1:], value) } else { - st.children[idx].insert(key[1:], value) + t.insert(st.children[idx], key[1:], value, append(path, key[0])) } case extNode: /* Ext */ @@ -259,41 +211,46 @@ func (st *StackTrie) insert(key, value []byte) { if diffidx == len(st.key) { // Ext key and key segment are identical, recurse into // the child node. - st.children[0].insert(key[diffidx:], value) + t.insert(st.children[0], key[diffidx:], value, append(path, key[:diffidx]...)) return } // Save the original part. Depending if the break is // at the extension's last byte or not, create an // intermediate extension or use the extension's child // node directly. - var n *StackTrie + var n *stNode if diffidx < len(st.key)-1 { - n = newExt(st.key[diffidx+1:], st.children[0], st.db) + // Break on the non-last byte, insert an intermediate + // extension. The path prefix of the newly-inserted + // extension should also contain the different byte. + n = newExt(st.key[diffidx+1:], st.children[0]) + t.hash(n, append(path, st.key[:diffidx+1]...)) } else { // Break on the last byte, no need to insert - // an extension node: reuse the current node + // an extension node: reuse the current node. + // The path prefix of the original part should + // still be same. n = st.children[0] + t.hash(n, append(path, st.key...)) } - // Convert to hash - n.hash() - var p *StackTrie + var p *stNode if diffidx == 0 { // the break is on the first byte, so // the current node is converted into // a branch node. st.children[0] = nil p = st - st.nodeType = branchNode + st.typ = branchNode } else { // the common prefix is at least one byte // long, insert a new intermediate branch // node. - st.children[0] = stackTrieFromPool(st.db) - st.children[0].nodeType = branchNode + st.children[0] = stPool.Get().(*stNode) + st.children[0].typ = branchNode p = st.children[0] } // Create a leaf for the inserted part - o := newLeaf(key[diffidx+1:], value, st.db) + o := newLeaf(key[diffidx+1:], value) // Insert both child leaves where they belong: origIdx := st.key[diffidx] @@ -319,18 +276,18 @@ func (st *StackTrie) insert(key, value []byte) { // Check if the split occurs at the first nibble of the // chunk. In that case, no prefix extnode is necessary. // Otherwise, create that - var p *StackTrie + var p *stNode if diffidx == 0 { // Convert current leaf into a branch - st.nodeType = branchNode + st.typ = branchNode p = st st.children[0] = nil } else { // Convert current node into an ext, // and insert a child branch node. - st.nodeType = extNode - st.children[0] = NewStackTrie(st.db) - st.children[0].nodeType = branchNode + st.typ = extNode + st.children[0] = stPool.Get().(*stNode) + st.children[0].typ = branchNode p = st.children[0] } @@ -338,11 +295,11 @@ func (st *StackTrie) insert(key, value []byte) { // value and another containing the new value. The child leaf // is hashed directly in order to free up some memory. origIdx := st.key[diffidx] - p.children[origIdx] = newLeaf(st.key[diffidx+1:], st.val, st.db) - p.children[origIdx].hash() + p.children[origIdx] = newLeaf(st.key[diffidx+1:], st.val) + t.hash(p.children[origIdx], append(path, st.key[:diffidx+1]...)) newIdx := key[diffidx] - p.children[newIdx] = newLeaf(key[diffidx+1:], value, st.db) + p.children[newIdx] = newLeaf(key[diffidx+1:], value) // Finally, cut off the key part that has been passed // over to the children. @@ -350,8 +307,8 @@ func (st *StackTrie) insert(key, value []byte) { st.val = nil case emptyNode: /* Empty */ - st.nodeType = leafNode - st.key = key + st.typ = leafNode + st.key = append(st.key, key...) // deep-copy the key as it's volatile st.val = value case hashedNode: @@ -373,142 +330,97 @@ func (st *StackTrie) insert(key, value []byte) { // - And the 'st.type' will be 'hashedNode' AGAIN // // This method also sets 'st.type' to hashedNode, and clears 'st.key'. -func (st *StackTrie) hash() { - h := newHasher(false) - defer returnHasherToPool(h) - - st.hashRec(h) -} - -func (st *StackTrie) hashRec(hasher *hasher) { - // The switch below sets this to the RLP-encoding of this node. - var encodedNode []byte - - switch st.nodeType { +func (t *StackTrie) hash(st *stNode, path []byte) { + var blob []byte // RLP-encoded node blob + switch st.typ { case hashedNode: return case emptyNode: st.val = types.EmptyRootHash.Bytes() st.key = st.key[:0] - st.nodeType = hashedNode + st.typ = hashedNode return case branchNode: - var nodes rawFullNode + var nodes fullnodeEncoder for i, child := range st.children { if child == nil { - nodes[i] = nilValueNode continue } + t.hash(child, append(path, byte(i))) + nodes.Children[i] = child.val + } + nodes.encode(t.h.encbuf) + blob = t.h.encodedBytes() - child.hashRec(hasher) - if len(child.val) < 32 { - nodes[i] = rawNode(child.val) - } else { - nodes[i] = hashNode(child.val) + for i, child := range st.children { + if child == nil { + continue } - - // Release child back to pool. st.children[i] = nil - returnToPool(child) + stPool.Put(child.reset()) // Release child back to pool. } - nodes.encode(hasher.encbuf) - encodedNode = hasher.encodedBytes() - case extNode: - st.children[0].hashRec(hasher) + // recursively hash and commit child as the first step + t.hash(st.children[0], append(path, st.key...)) - n := rawShortNode{Key: hexToCompactInPlace(st.key)} - if len(st.children[0].val) < 32 { - n.Val = rawNode(st.children[0].val) - } else { - n.Val = hashNode(st.children[0].val) + // encode the extension node + n := extNodeEncoder{ + Key: hexToCompactInPlace(st.key), + Val: st.children[0].val, } + n.encode(t.h.encbuf) + blob = t.h.encodedBytes() - n.encode(hasher.encbuf) - encodedNode = hasher.encodedBytes() - - // Release child back to pool. - returnToPool(st.children[0]) + stPool.Put(st.children[0].reset()) // Release child back to pool. st.children[0] = nil case leafNode: st.key = append(st.key, byte(16)) - n := rawShortNode{Key: hexToCompactInPlace(st.key), Val: valueNode(st.val)} - - n.encode(hasher.encbuf) - encodedNode = hasher.encodedBytes() + n := leafNodeEncoder{ + Key: hexToCompactInPlace(st.key), + Val: st.val, + } + n.encode(t.h.encbuf) + blob = t.h.encodedBytes() default: panic("invalid node type") } - - st.nodeType = hashedNode + // Convert the node type to hashNode and reset the key slice. + st.typ = hashedNode st.key = st.key[:0] - if len(encodedNode) < 32 { - st.val = common.CopyBytes(encodedNode) + + st.val = nil // Release reference to potentially externally held slice. + + // Skip committing the non-root node if the size is smaller than 32 bytes + // as tiny nodes are always embedded in their parent except root node. + if len(blob) < 32 && len(path) > 0 { + st.val = bPool.GetWithSize(len(blob)) + copy(st.val, blob) return } - // Write the hash to the 'val'. We allocate a new val here to not mutate - // input values - st.val = hasher.hashData(encodedNode) - if st.db != nil { - // TODO! Is it safe to Put the slice here? - // Do all db implementations copy the value provided? - st.db.Put(st.val, encodedNode) - } -} - -// Hash returns the hash of the current node. -func (st *StackTrie) Hash() (h common.Hash) { - hasher := newHasher(false) - defer returnHasherToPool(hasher) - - st.hashRec(hasher) - if len(st.val) == 32 { - copy(h[:], st.val) - return h + // input values. + st.val = bPool.GetWithSize(32) + t.h.hashDataTo(st.val, blob) + + // Invoke the callback it's provided. Notably, the path and blob slices are + // volatile, please deep-copy the slices in callback if the contents need + // to be retained. + if t.onTrieNode != nil { + t.onTrieNode(path, common.BytesToHash(st.val), blob) } - - // If the node's RLP isn't 32 bytes long, the node will not - // be hashed, and instead contain the rlp-encoding of the - // node. For the top level node, we need to force the hashing. - hasher.sha.Reset() - hasher.sha.Write(st.val) - hasher.sha.Read(h[:]) - return h } -// Commit will firstly hash the entrie trie if it's still not hashed -// and then commit all nodes to the associated database. Actually most -// of the trie nodes MAY have been committed already. The main purpose -// here is to commit the root node. -// -// The associated database is expected, otherwise the whole commit -// functionality should be disabled. -func (st *StackTrie) Commit() (h common.Hash, err error) { - if st.db == nil { - return common.Hash{}, ErrCommitDisabled - } - - hasher := newHasher(false) - defer returnHasherToPool(hasher) - - st.hashRec(hasher) - if len(st.val) == 32 { - copy(h[:], st.val) - return h, nil - } - - // If the node's RLP isn't 32 bytes long, the node will not - // be hashed (and committed), and instead contain the rlp-encoding of the - // node. For the top level node, we need to force the hashing+commit. - hasher.sha.Reset() - hasher.sha.Write(st.val) - hasher.sha.Read(h[:]) - st.db.Put(h[:], st.val) - return h, nil +// Hash will firstly hash the entire trie if it's still not hashed and then commit +// all leftover nodes to the associated database. Actually most of the trie nodes +// have been committed already. The main purpose here is to commit the nodes on +// right boundary. +func (t *StackTrie) Hash() common.Hash { + n := t.root + t.hash(n, nil) + return common.BytesToHash(n.val) } diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go index 55901882f7..a9887b359d 100644 --- a/trie/stacktrie_test.go +++ b/trie/stacktrie_test.go @@ -174,7 +174,7 @@ func TestStackTrieInsertAndHash(t *testing.T) { st.Reset() for j := 0; j < l; j++ { kv := &test[j] - if err := st.TryUpdate(common.FromHex(kv.K), []byte(kv.V)); err != nil { + if err := st.Update(common.FromHex(kv.K), []byte(kv.V)); err != nil { t.Fatal(err) } } @@ -193,8 +193,8 @@ func TestSizeBug(t *testing.T) { leaf := common.FromHex("290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563") value := common.FromHex("94cf40d0d2b44f2b66e07cace1372ca42b73cf21a3") - nt.TryUpdate(leaf, value) - st.TryUpdate(leaf, value) + nt.Update(leaf, value) + st.Update(leaf, value) if nt.Hash() != st.Hash() { t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) @@ -218,8 +218,8 @@ func TestEmptyBug(t *testing.T) { } for _, kv := range kvs { - nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) - st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) + nt.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + st.Update(common.FromHex(kv.K), common.FromHex(kv.V)) } if nt.Hash() != st.Hash() { @@ -241,8 +241,8 @@ func TestValLength56(t *testing.T) { } for _, kv := range kvs { - nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) - st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) + nt.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + st.Update(common.FromHex(kv.K), common.FromHex(kv.V)) } if nt.Hash() != st.Hash() { @@ -263,8 +263,8 @@ func TestUpdateSmallNodes(t *testing.T) { {"65", "3000"}, // stacktrie.Update } for _, kv := range kvs { - nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) - st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) + nt.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + st.Update(common.FromHex(kv.K), common.FromHex(kv.V)) } if nt.Hash() != st.Hash() { t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) @@ -291,8 +291,8 @@ func TestUpdateVariableKeys(t *testing.T) { {"0x3330353463653239356131303167617430", "313131"}, } for _, kv := range kvs { - nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) - st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V)) + nt.Update(common.FromHex(kv.K), common.FromHex(kv.V)) + st.Update(common.FromHex(kv.K), common.FromHex(kv.V)) } if nt.Hash() != st.Hash() { t.Fatalf("error %x != %x", st.Hash(), nt.Hash()) @@ -309,7 +309,7 @@ func TestStacktrieNotModifyValues(t *testing.T) { value := make([]byte, 1, 100) value[0] = 0x2 want := common.CopyBytes(value) - st.TryUpdate([]byte{0x01}, value) + st.Update([]byte{0x01}, value) st.Hash() if have := value; !bytes.Equal(have, want) { t.Fatalf("tiny trie: have %#x want %#x", have, want) @@ -330,7 +330,7 @@ func TestStacktrieNotModifyValues(t *testing.T) { for i := 0; i < 1000; i++ { key := common.BigToHash(keyB) value := getValue(i) - st.TryUpdate(key.Bytes(), value) + st.Update(key.Bytes(), value) vals = append(vals, value) keyB = keyB.Add(keyB, keyDelta) keyDelta.Add(keyDelta, common.Big1) @@ -346,48 +346,3 @@ func TestStacktrieNotModifyValues(t *testing.T) { } } - -// TestStacktrieSerialization tests that the stacktrie works well if we -// serialize/unserialize it a lot -func TestStacktrieSerialization(t *testing.T) { - var ( - st = NewStackTrie(nil) - nt, _ = New(TrieID(common.Hash{}), NewDatabase(memorydb.New())) - keyB = big.NewInt(1) - keyDelta = big.NewInt(1) - vals [][]byte - keys [][]byte - ) - getValue := func(i int) []byte { - if i%2 == 0 { // large - return crypto.Keccak256(big.NewInt(int64(i)).Bytes()) - } else { //small - return big.NewInt(int64(i)).Bytes() - } - } - for i := 0; i < 10; i++ { - vals = append(vals, getValue(i)) - keys = append(keys, common.BigToHash(keyB).Bytes()) - keyB = keyB.Add(keyB, keyDelta) - keyDelta.Add(keyDelta, common.Big1) - } - for i, k := range keys { - nt.TryUpdate(k, common.CopyBytes(vals[i])) - } - - for i, k := range keys { - blob, err := st.MarshalBinary() - if err != nil { - t.Fatal(err) - } - newSt, err := NewFromBinary(blob, nil) - if err != nil { - t.Fatal(err) - } - st = newSt - st.TryUpdate(k, common.CopyBytes(vals[i])) - } - if have, want := st.Hash(), nt.Hash(); have != want { - t.Fatalf("have %#x want %#x", have, want) - } -} diff --git a/trie/trie_test.go b/trie/trie_test.go index ebfe2a84f2..59061b5d6f 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -843,96 +843,6 @@ func TestCommitSequenceRandomBlobs(t *testing.T) { } } -func TestCommitSequenceStackTrie(t *testing.T) { - for count := 1; count < 200; count++ { - prng := rand.New(rand.NewSource(int64(count))) - // This spongeDb is used to check the sequence of disk-db-writes - s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"} - db := NewDatabase(s) - trie, _ := New(TrieID(common.Hash{}), db) - // Another sponge is used for the stacktrie commits - stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"} - stTrie := NewStackTrie(stackTrieSponge) - // Fill the trie with elements - for i := 1; i < count; i++ { - // For the stack trie, we need to do inserts in proper order - key := make([]byte, 32) - binary.BigEndian.PutUint64(key, uint64(i)) - var val []byte - // 50% short elements, 50% large elements - if prng.Intn(2) == 0 { - val = make([]byte, 1+prng.Intn(32)) - } else { - val = make([]byte, 1+prng.Intn(1024)) - } - prng.Read(val) - trie.TryUpdate(key, val) - stTrie.TryUpdate(key, val) - } - // Flush trie -> database - root, _ := trie.Commit(nil) - // Flush memdb -> disk (sponge) - db.Commit(root, false) - // And flush stacktrie -> disk - stRoot, err := stTrie.Commit() - if err != nil { - t.Fatalf("Failed to commit stack trie %v", err) - } - if stRoot != root { - t.Fatalf("root wrong, got %x exp %x", stRoot, root) - } - if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) { - // Show the journal - t.Logf("Expected:") - for i, v := range s.journal { - t.Logf("op %d: %v", i, v) - } - t.Logf("Stacktrie:") - for i, v := range stackTrieSponge.journal { - t.Logf("op %d: %v", i, v) - } - t.Fatalf("test %d, disk write sequence wrong:\ngot %x exp %x\n", count, got, exp) - } - } -} - -// TestCommitSequenceSmallRoot tests that a trie which is essentially only a -// small (<32 byte) shortnode with an included value is properly committed to a -// database. -// This case might not matter, since in practice, all keys are 32 bytes, which means -// that even a small trie which contains a leaf will have an extension making it -// not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do. -func TestCommitSequenceSmallRoot(t *testing.T) { - s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"} - db := NewDatabase(s) - trie, _ := New(TrieID(common.Hash{}), db) - // Another sponge is used for the stacktrie commits - stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"} - stTrie := NewStackTrie(stackTrieSponge) - // Add a single small-element to the trie(s) - key := make([]byte, 5) - key[0] = 1 - trie.TryUpdate(key, []byte{0x1}) - stTrie.TryUpdate(key, []byte{0x1}) - // Flush trie -> database - root, _ := trie.Commit(nil) - // Flush memdb -> disk (sponge) - db.Commit(root, false) - // And flush stacktrie -> disk - stRoot, err := stTrie.Commit() - if err != nil { - t.Fatalf("Failed to commit stack trie %v", err) - } - if stRoot != root { - t.Fatalf("root wrong, got %x exp %x", stRoot, root) - } - - t.Logf("root: %x\n", stRoot) - if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) { - t.Fatalf("test, disk write sequence wrong:\ngot %x exp %x\n", got, exp) - } -} - // BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie. // This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically, // storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple