From 371bc4440496bcd358f66e1e8f680f04619d500f Mon Sep 17 00:00:00 2001 From: will-2012 <117156346+will-2012@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:25:10 +0800 Subject: [PATCH] feat: sync inspect mpt tool from bsc (#52) Co-authored-by: Fynn Co-authored-by: will@2012 --- cmd/geth/dbcmd.go | 89 +++++++++++++ trie/inspect_trie.go | 296 +++++++++++++++++++++++++++++++++++++++++++ trie/trie.go | 11 ++ 3 files changed, 396 insertions(+) create mode 100644 trie/inspect_trie.go diff --git a/cmd/geth/dbcmd.go b/cmd/geth/dbcmd.go index b409b19260..e182b7ed08 100644 --- a/cmd/geth/dbcmd.go +++ b/cmd/geth/dbcmd.go @@ -30,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/cmd/utils" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/common/math" "github.com/ethereum/go-ethereum/console/prompt" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state/snapshot" @@ -62,6 +63,7 @@ Remove blockchain and state databases`, dbCompactCmd, dbGetCmd, dbDeleteCmd, + dbInspectTrieCmd, dbPutCmd, dbGetSlotsCmd, dbDumpFreezerIndex, @@ -81,6 +83,17 @@ Remove blockchain and state databases`, Usage: "Inspect the storage size for each type of data in the database", Description: `This commands iterates the entire database. If the optional 'prefix' and 'start' arguments are provided, then the iteration is limited to the given subset of data.`, } + dbInspectTrieCmd = &cli.Command{ + Action: inspectTrie, + Name: "inspect-trie", + ArgsUsage: " ", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + }, + Usage: "Inspect the MPT tree of the account and contract.", + Description: `This commands iterates the entrie WorldState.`, + } dbCheckStateContentCmd = &cli.Command{ Action: checkStateContent, Name: "check-state-content", @@ -255,6 +268,82 @@ func confirmAndRemoveDB(database string, kind string) { } } +func inspectTrie(ctx *cli.Context) error { + if ctx.NArg() < 1 { + return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) + } + + if ctx.NArg() > 3 { + return fmt.Errorf("Max 3 arguments: %v", ctx.Command.ArgsUsage) + } + + var ( + blockNumber uint64 + trieRootHash common.Hash + jobnum uint64 + ) + + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, true) + defer db.Close() + + var headerBlockHash common.Hash + if ctx.NArg() >= 1 { + if ctx.Args().Get(0) == "latest" { + headerHash := rawdb.ReadHeadHeaderHash(db) + blockNumber = *(rawdb.ReadHeaderNumber(db, headerHash)) + } else if ctx.Args().Get(0) == "snapshot" { + trieRootHash = rawdb.ReadSnapshotRoot(db) + blockNumber = math.MaxUint64 + } else { + var err error + blockNumber, err = strconv.ParseUint(ctx.Args().Get(0), 10, 64) + if err != nil { + return fmt.Errorf("failed to Parse blocknum, Args[0]: %v, err: %v", ctx.Args().Get(0), err) + } + } + + if ctx.NArg() == 1 { + jobnum = 1000 + } else { + var err error + jobnum, err = strconv.ParseUint(ctx.Args().Get(1), 10, 64) + if err != nil { + return fmt.Errorf("failed to Parse jobnum, Args[1]: %v, err: %v", ctx.Args().Get(1), err) + } + } + + if blockNumber != math.MaxUint64 { + headerBlockHash = rawdb.ReadCanonicalHash(db, blockNumber) + if headerBlockHash == (common.Hash{}) { + return fmt.Errorf("ReadHeadBlockHash empty hash") + } + blockHeader := rawdb.ReadHeader(db, headerBlockHash, blockNumber) + trieRootHash = blockHeader.Root + } + if (trieRootHash == common.Hash{}) { + log.Error("Empty root hash") + } + fmt.Printf("ReadBlockHeader, root: %v, blocknum: %v\n", trieRootHash, blockNumber) + + triedb := trie.NewDatabase(db) + theTrie, err := trie.New(trie.TrieID(trieRootHash), triedb) + if err != nil { + fmt.Printf("fail to new trie tree, err: %v, rootHash: %v\n", err, trieRootHash.String()) + return err + } + theInspect, err := trie.NewInspector(theTrie, triedb, trieRootHash, blockNumber, jobnum) + if err != nil { + return err + } + theInspect.Run() + theInspect.DisplayResult() + } + return nil +} + func inspect(ctx *cli.Context) error { var ( prefix []byte diff --git a/trie/inspect_trie.go b/trie/inspect_trie.go new file mode 100644 index 0000000000..f0f706f049 --- /dev/null +++ b/trie/inspect_trie.go @@ -0,0 +1,296 @@ +package trie + +import ( + "bytes" + "errors" + "fmt" + "math/big" + "os" + "runtime" + "sort" + "strconv" + "sync" + "sync/atomic" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/olekukonko/tablewriter" + + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "golang.org/x/sync/semaphore" +) + +const ( + DEFAULT_TRIEDBCACHE_SIZE = 1024 * 1024 * 1024 +) + +type Account struct { + Nonce uint64 + Balance *big.Int + Root common.Hash // merkle root of the storage trie + CodeHash []byte +} + +type Inspector struct { + trie *Trie // traverse trie + db *Database + stateRootHash common.Hash + blocknum uint64 + root node // root of triedb + totalNum uint64 + wg sync.WaitGroup + statLock sync.RWMutex + result map[string]*TrieTreeStat + sem *semaphore.Weighted + eoaAccountNums uint64 +} + +type TrieTreeStat struct { + isAccountTrie bool + theNodeStatByLevel [15]NodeStat + totalNodeStat NodeStat +} + +type NodeStat struct { + ShortNodeCnt uint64 + FullNodeCnt uint64 + ValueNodeCnt uint64 +} + +func (trieStat *TrieTreeStat) AtomicAdd(theNode node, height uint32) { + switch (theNode).(type) { + case *shortNode: + atomic.AddUint64(&trieStat.totalNodeStat.ShortNodeCnt, 1) + atomic.AddUint64(&(trieStat.theNodeStatByLevel[height].ShortNodeCnt), 1) + case *fullNode: + atomic.AddUint64(&trieStat.totalNodeStat.FullNodeCnt, 1) + atomic.AddUint64(&trieStat.theNodeStatByLevel[height].FullNodeCnt, 1) + case valueNode: + atomic.AddUint64(&trieStat.totalNodeStat.ValueNodeCnt, 1) + atomic.AddUint64(&((trieStat.theNodeStatByLevel[height]).ValueNodeCnt), 1) + default: + panic(errors.New("Invalid node type to statistics")) + } +} + +func (trieStat *TrieTreeStat) Display(ownerAddress string, treeType string) { + table := tablewriter.NewWriter(os.Stdout) + table.SetHeader([]string{"-", "Level", "ShortNodeCnt", "FullNodeCnt", "ValueNodeCnt"}) + if ownerAddress == "" { + table.SetCaption(true, fmt.Sprintf("%v", treeType)) + } else { + table.SetCaption(true, fmt.Sprintf("%v-%v", treeType, ownerAddress)) + } + table.SetAlignment(1) + for i := 0; i < len(trieStat.theNodeStatByLevel); i++ { + nodeStat := trieStat.theNodeStatByLevel[i] + if nodeStat.FullNodeCnt == 0 && nodeStat.ShortNodeCnt == 0 && nodeStat.ValueNodeCnt == 0 { + break + } + table.AppendBulk([][]string{ + {"-", strconv.Itoa(i), nodeStat.ShortNodeCount(), nodeStat.FullNodeCount(), nodeStat.ValueNodeCount()}, + }) + } + table.AppendBulk([][]string{ + {"Total", "-", trieStat.totalNodeStat.ShortNodeCount(), trieStat.totalNodeStat.FullNodeCount(), trieStat.totalNodeStat.ValueNodeCount()}, + }) + table.Render() +} + +func Uint64ToString(cnt uint64) string { + return fmt.Sprintf("%v", cnt) +} + +func (nodeStat *NodeStat) ShortNodeCount() string { + return Uint64ToString(nodeStat.ShortNodeCnt) +} + +func (nodeStat *NodeStat) FullNodeCount() string { + return Uint64ToString(nodeStat.FullNodeCnt) +} +func (nodeStat *NodeStat) ValueNodeCount() string { + return Uint64ToString(nodeStat.ValueNodeCnt) +} + +// NewInspector return a inspector obj +func NewInspector(tr *Trie, db *Database, stateRootHash common.Hash, blocknum uint64, jobnum uint64) (*Inspector, error) { + if tr == nil { + return nil, errors.New("trie is nil") + } + + if tr.root == nil { + return nil, errors.New("trie root is nil") + } + + ins := &Inspector{ + trie: tr, + db: db, + stateRootHash: stateRootHash, + blocknum: blocknum, + root: tr.root, + result: make(map[string]*TrieTreeStat), + totalNum: (uint64)(0), + wg: sync.WaitGroup{}, + sem: semaphore.NewWeighted(int64(jobnum)), + eoaAccountNums: 0, + } + + return ins, nil +} + +// Run statistics, external call +func (inspect *Inspector) Run() { + accountTrieStat := &TrieTreeStat{ + isAccountTrie: true, + } + if inspect.db.Scheme() == rawdb.HashScheme { + ticker := time.NewTicker(30 * time.Second) + go func() { + defer ticker.Stop() + for range ticker.C { + inspect.db.Cap(DEFAULT_TRIEDBCACHE_SIZE) + } + }() + } + + if _, ok := inspect.result[""]; !ok { + inspect.result[""] = accountTrieStat + } + log.Info("Find Account Trie Tree", "rootHash: ", inspect.trie.Hash().String(), "BlockNum: ", inspect.blocknum) + + inspect.ConcurrentTraversal(inspect.trie, accountTrieStat, inspect.root, 0, []byte{}) + inspect.wg.Wait() +} + +func (inspect *Inspector) SubConcurrentTraversal(theTrie *Trie, theTrieTreeStat *TrieTreeStat, theNode node, height uint32, path []byte) { + inspect.ConcurrentTraversal(theTrie, theTrieTreeStat, theNode, height, path) + inspect.wg.Done() +} + +func (inspect *Inspector) ConcurrentTraversal(theTrie *Trie, theTrieTreeStat *TrieTreeStat, theNode node, height uint32, path []byte) { + // print process progress + total_num := atomic.AddUint64(&inspect.totalNum, 1) + if total_num%100000 == 0 { + fmt.Printf("Complete progress: %v, go routines Num: %v\n", total_num, runtime.NumGoroutine()) + } + + // nil node + if theNode == nil { + return + } + + switch current := (theNode).(type) { + case *shortNode: + inspect.ConcurrentTraversal(theTrie, theTrieTreeStat, current.Val, height, append(path, current.Key...)) + case *fullNode: + for idx, child := range current.Children { + if child == nil { + continue + } + childPath := append(path, byte(idx)) + if inspect.sem.TryAcquire(1) { + inspect.wg.Add(1) + dst := make([]byte, len(childPath)) + copy(dst, childPath) + go inspect.SubConcurrentTraversal(theTrie, theTrieTreeStat, child, height+1, dst) + } else { + inspect.ConcurrentTraversal(theTrie, theTrieTreeStat, child, height+1, childPath) + } + } + case hashNode: + n, err := theTrie.resloveWithoutTrack(current, path) + if err != nil { + fmt.Printf("Resolve HashNode error: %v, TrieRoot: %v, Height: %v, Path: %v\n", err, theTrie.Hash().String(), height+1, path) + return + } + inspect.ConcurrentTraversal(theTrie, theTrieTreeStat, n, height, path) + return + case valueNode: + if !hasTerm(path) { + break + } + var account Account + if err := rlp.Decode(bytes.NewReader(current), &account); err != nil { + break + } + if common.BytesToHash(account.CodeHash) == types.EmptyCodeHash { + inspect.eoaAccountNums++ + } + if account.Root == (common.Hash{}) || account.Root == types.EmptyRootHash { + break + } + ownerAddress := common.BytesToHash(hexToCompact(path)) + contractTrie, err := New(StorageTrieID(inspect.stateRootHash, ownerAddress, account.Root), inspect.db) + if err != nil { + fmt.Printf("New contract trie node: %v, error: %v, Height: %v, Path: %v\n", theNode, err, height, path) + break + } + contractTrie.tracer.reset() + trieStat := &TrieTreeStat{ + isAccountTrie: false, + } + + inspect.statLock.Lock() + if _, ok := inspect.result[ownerAddress.String()]; !ok { + inspect.result[ownerAddress.String()] = trieStat + } + inspect.statLock.Unlock() + + // log.Info("Find Contract Trie Tree, rootHash: ", contractTrie.Hash().String(), "") + inspect.wg.Add(1) + go inspect.SubConcurrentTraversal(contractTrie, trieStat, contractTrie.root, 0, []byte{}) + default: + panic(errors.New("Invalid node type to traverse.")) + } + theTrieTreeStat.AtomicAdd(theNode, height) +} + +func (inspect *Inspector) DisplayResult() { + // display root hash + if _, ok := inspect.result[""]; !ok { + log.Info("Display result error", "missing account trie") + return + } + inspect.result[""].Display("", "AccountTrie") + + type SortedTrie struct { + totalNum uint64 + ownerAddress string + } + // display contract trie + var sortedTriesByNums []SortedTrie + var totalContactsNodeStat NodeStat + var contractTrieCnt uint64 = 0 + + for ownerAddress, stat := range inspect.result { + if ownerAddress == "" { + continue + } + contractTrieCnt++ + totalContactsNodeStat.ShortNodeCnt += stat.totalNodeStat.ShortNodeCnt + totalContactsNodeStat.FullNodeCnt += stat.totalNodeStat.FullNodeCnt + totalContactsNodeStat.ValueNodeCnt += stat.totalNodeStat.ValueNodeCnt + totalNodeCnt := stat.totalNodeStat.ShortNodeCnt + stat.totalNodeStat.ValueNodeCnt + stat.totalNodeStat.FullNodeCnt + sortedTriesByNums = append(sortedTriesByNums, SortedTrie{totalNum: totalNodeCnt, ownerAddress: ownerAddress}) + } + sort.Slice(sortedTriesByNums, func(i, j int) bool { + return sortedTriesByNums[i].totalNum > sortedTriesByNums[j].totalNum + }) + fmt.Println("EOA accounts num: ", inspect.eoaAccountNums) + // only display top 5 + for i, t := range sortedTriesByNums { + if i > 5 { + break + } + if stat, ok := inspect.result[t.ownerAddress]; !ok { + log.Error("Storage trie stat not found", "ownerAddress", t.ownerAddress) + } else { + stat.Display(t.ownerAddress, "ContractTrie") + } + } + fmt.Printf("Contract Trie, total trie num: %v, ShortNodeCnt: %v, FullNodeCnt: %v, ValueNodeCnt: %v\n", + contractTrieCnt, totalContactsNodeStat.ShortNodeCnt, totalContactsNodeStat.FullNodeCnt, totalContactsNodeStat.ValueNodeCnt) +} diff --git a/trie/trie.go b/trie/trie.go index d98fe992d5..3a83c37aa7 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -544,6 +544,17 @@ func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) { return mustDecodeNode(n, blob), nil } +func (t *Trie) resloveWithoutTrack(n node, prefix []byte) (node, error) { + if n, ok := n.(hashNode); ok { + blob, err := t.reader.nodeBlob(prefix, common.BytesToHash(n)) + if err != nil { + return nil, err + } + return mustDecodeNode(n, blob), nil + } + return n, nil +} + // Hash returns the root hash of the trie. It does not write to the // database and can be used even if the trie doesn't have one. func (t *Trie) Hash() common.Hash {