Skip to content

Commit

Permalink
perf: speedup pbss trienode read (bnb-chain#122)
Browse files Browse the repository at this point in the history
Co-authored-by: will@2012 <[email protected]>
  • Loading branch information
will-2012 and will@2012 authored Jul 4, 2024
1 parent 7dd5ec3 commit 685031a
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 12 deletions.
18 changes: 16 additions & 2 deletions eth/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ import (
"github.com/ethereum/go-ethereum/params"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/rpc"
"github.com/ethereum/go-ethereum/trie/triedb/pathdb"
)

const (
Expand Down Expand Up @@ -139,8 +140,21 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) {
}
config.TrieDirtyCache = 0
}
log.Info("Allocated trie memory caches", "clean", common.StorageSize(config.TrieCleanCache)*1024*1024, "dirty", common.StorageSize(config.TrieDirtyCache)*1024*1024)

// Optimize memory distribution by reallocating surplus allowance from the
// dirty cache to the clean cache.
if config.StateScheme == rawdb.PathScheme && config.TrieDirtyCache > pathdb.MaxBufferSize/1024/1024 {
log.Info("Capped dirty cache size", "provided", common.StorageSize(config.TrieDirtyCache)*1024*1024,
"adjusted", common.StorageSize(pathdb.MaxBufferSize))
log.Info("Clean cache size", "provided", common.StorageSize(config.TrieCleanCache)*1024*1024,
"adjusted", common.StorageSize(config.TrieCleanCache+config.TrieDirtyCache-pathdb.MaxBufferSize/1024/1024)*1024*1024)
config.TrieCleanCache += config.TrieDirtyCache - pathdb.MaxBufferSize/1024/1024
config.TrieDirtyCache = pathdb.MaxBufferSize / 1024 / 1024
}
log.Info("Allocated memory caches",
"state_scheme", config.StateScheme,
"trie_clean_cache", common.StorageSize(config.TrieCleanCache)*1024*1024,
"trie_dirty_cache", common.StorageSize(config.TrieDirtyCache)*1024*1024,
"snapshot_cache", common.StorageSize(config.SnapshotCache)*1024*1024)
// Assemble the Ethereum object
chainDb, err := stack.OpenDatabaseWithFreezer(ChainData, config.DatabaseCache, config.DatabaseHandles,
config.DatabaseFreezer, ChainDBNamespace, false)
Expand Down
3 changes: 2 additions & 1 deletion internal/build/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@ var DryRunFlag = flag.Bool("n", false, "dry run, don't execute commands")
// MustRun executes the given command and exits the host process for
// any error.
func MustRun(cmd *exec.Cmd) {
fmt.Println(">>>", printArgs(cmd.Args))
fmt.Println("start_cmd >>>", printArgs(cmd.Args))
if !*DryRunFlag {
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
if err := cmd.Run(); err != nil {
fmt.Println("return_err >>>", err)
log.Fatal(err)
}
}
Expand Down
17 changes: 8 additions & 9 deletions trie/triedb/pathdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ const (
// defaultCleanSize is the default memory allowance of clean cache.
defaultCleanSize = 16 * 1024 * 1024

// maxBufferSize is the maximum memory allowance of node buffer.
// MaxBufferSize is the maximum memory allowance of node buffer.
// Too large nodebuffer will cause the system to pause for a long
// time when write happens. Also, the largest batch that pebble can
// support is 4GB, node will panic if batch size exceeds this limit.
maxBufferSize = 256 * 1024 * 1024
MaxBufferSize = 256 * 1024 * 1024

// DefaultBufferSize is the default memory allowance of node buffer
// that aggregates the writes from above until it's flushed into the
Expand Down Expand Up @@ -118,10 +118,9 @@ type Config struct {
// unreasonable or unworkable.
func (c *Config) sanitize() *Config {
conf := *c
if conf.DirtyCacheSize > maxBufferSize {
conf.CleanCacheSize = conf.DirtyCacheSize - maxBufferSize
log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.DirtyCacheSize), "updated", common.StorageSize(maxBufferSize))
conf.DirtyCacheSize = maxBufferSize
if conf.DirtyCacheSize > MaxBufferSize {
log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.DirtyCacheSize), "updated", common.StorageSize(MaxBufferSize))
conf.DirtyCacheSize = MaxBufferSize
}
return &conf
}
Expand Down Expand Up @@ -508,9 +507,9 @@ func (db *Database) SetBufferSize(size int) error {
db.lock.Lock()
defer db.lock.Unlock()

if size > maxBufferSize {
log.Info("Capped node buffer size", "provided", common.StorageSize(size), "adjusted", common.StorageSize(maxBufferSize))
size = maxBufferSize
if size > MaxBufferSize {
log.Info("Capped node buffer size", "provided", common.StorageSize(size), "adjusted", common.StorageSize(MaxBufferSize))
size = MaxBufferSize
}
db.bufferSize = size
return db.tree.bottom().setBufferSize(db.bufferSize)
Expand Down
155 changes: 155 additions & 0 deletions trie/triedb/pathdb/difflayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,106 @@ import (
"github.com/ethereum/go-ethereum/trie/triestate"
)

type RefTrieNode struct {
refCount uint32
node *trienode.Node
}

type HashNodeCache struct {
lock sync.RWMutex
cache map[common.Hash]*RefTrieNode
}

func (h *HashNodeCache) length() int {
if h == nil {
return 0
}
h.lock.RLock()
defer h.lock.RUnlock()
return len(h.cache)
}

func (h *HashNodeCache) set(hash common.Hash, node *trienode.Node) {
if h == nil {
return
}
h.lock.Lock()
defer h.lock.Unlock()
if n, ok := h.cache[hash]; ok {
n.refCount++
} else {
h.cache[hash] = &RefTrieNode{1, node}
}
}

func (h *HashNodeCache) Get(hash common.Hash) *trienode.Node {
if h == nil {
return nil
}
h.lock.RLock()
defer h.lock.RUnlock()
if n, ok := h.cache[hash]; ok {
return n.node
}
return nil
}

func (h *HashNodeCache) del(hash common.Hash) {
if h == nil {
return
}
h.lock.Lock()
defer h.lock.Unlock()
n, ok := h.cache[hash]
if !ok {
return
}
if n.refCount > 0 {
n.refCount--
}
if n.refCount == 0 {
delete(h.cache, hash)
}
}

func (h *HashNodeCache) Add(ly layer) {
if h == nil {
return
}
dl, ok := ly.(*diffLayer)
if !ok {
return
}
beforeAdd := h.length()
for _, subset := range dl.nodes {
for _, node := range subset {
h.set(node.Hash, node)
}
}
diffHashCacheLengthGauge.Update(int64(h.length()))
log.Debug("Add difflayer to hash map", "root", ly.rootHash(), "block_number", dl.block, "map_len", h.length(), "add_delta", h.length()-beforeAdd)
}

func (h *HashNodeCache) Remove(ly layer) {
if h == nil {
return
}
dl, ok := ly.(*diffLayer)
if !ok {
return
}
go func() {
beforeDel := h.length()
for _, subset := range dl.nodes {
for _, node := range subset {
h.del(node.Hash)
}
}
diffHashCacheLengthGauge.Update(int64(h.length()))
log.Debug("Remove difflayer from hash map", "root", ly.rootHash(), "block_number", dl.block, "map_len", h.length(), "del_delta", beforeDel-h.length())
}()
}

// diffLayer represents a collection of modifications made to the in-memory tries
// along with associated state changes after running a block on top.
//
Expand All @@ -39,7 +139,10 @@ type diffLayer struct {
nodes map[common.Hash]map[string]*trienode.Node // Cached trie nodes indexed by owner and path
states *triestate.Set // Associated state change set for building history
memory uint64 // Approximate guess as to how much memory we use
cache *HashNodeCache // trienode cache by hash key. cache is immutable, but cache's item can be add/del.

// mutables
origin *diskLayer // The current difflayer corresponds to the underlying disklayer and is updated during cap.
parent layer // Parent layer modified by this one, never nil, **can be changed**
lock sync.RWMutex // Lock used to protect parent
}
Expand All @@ -58,6 +161,20 @@ func newDiffLayer(parent layer, root common.Hash, id uint64, block uint64, nodes
states: states,
parent: parent,
}

switch l := parent.(type) {
case *diskLayer:
dl.origin = l
dl.cache = &HashNodeCache{
cache: make(map[common.Hash]*RefTrieNode),
}
case *diffLayer:
dl.origin = l.originDiskLayer()
dl.cache = l.cache
default:
panic("unknown parent type")
}

for _, subset := range nodes {
for path, n := range subset {
dl.memory += uint64(n.Size() + len(path))
Expand All @@ -75,6 +192,18 @@ func newDiffLayer(parent layer, root common.Hash, id uint64, block uint64, nodes
return dl
}

func (dl *diffLayer) originDiskLayer() *diskLayer {
dl.lock.RLock()
defer dl.lock.RUnlock()
return dl.origin
}

func (dl *diffLayer) updateOriginDiskLayer(persistLayer *diskLayer) {
dl.lock.Lock()
defer dl.lock.Unlock()
dl.origin = persistLayer
}

// rootHash implements the layer interface, returning the root hash of
// corresponding state.
func (dl *diffLayer) rootHash() common.Hash {
Expand Down Expand Up @@ -133,6 +262,32 @@ func (dl *diffLayer) node(owner common.Hash, path []byte, hash common.Hash, dept
// Node implements the layer interface, retrieving the trie node blob with the
// provided node information. No error will be returned if the node is not found.
func (dl *diffLayer) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) {
if n := dl.cache.Get(hash); n != nil {
// The query from the hash map is fastpath,
// avoiding recursive query of 128 difflayers.
diffHashCacheHitMeter.Mark(1)
diffHashCacheReadMeter.Mark(int64(len(n.Blob)))
return n.Blob, nil
}
diffHashCacheMissMeter.Mark(1)

persistLayer := dl.originDiskLayer()
if persistLayer != nil {
blob, err := persistLayer.Node(owner, path, hash)
if err != nil {
// This is a bad case with a very low probability.
// r/w the difflayer cache and r/w the disklayer are not in the same lock,
// so in extreme cases, both reading the difflayer cache and reading the disklayer may fail, eg, disklayer is stale.
// In this case, fallback to the original 128-layer recursive difflayer query path.
diffHashCacheSlowPathMeter.Mark(1)
log.Debug("Retry difflayer due to query origin failed", "owner", owner, "path", path, "hash", hash.String(), "error", err)
return dl.node(owner, path, hash, 0)
} else { // This is the fastpath.
return blob, nil
}
}
diffHashCacheSlowPathMeter.Mark(1)
log.Debug("Retry difflayer due to origin is nil", "owner", owner, "path", path, "hash", hash.String())
return dl.node(owner, path, hash, 0)
}

Expand Down
3 changes: 3 additions & 0 deletions trie/triedb/pathdb/disklayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) {
}
log.Debug("Pruned state history", "items", pruned, "tailid", oldest)
}

// The bottom has been eaten by disklayer, releasing the hash cache of bottom difflayer.
bottom.cache.Remove(bottom)
return ndl, nil
}

Expand Down
Loading

0 comments on commit 685031a

Please sign in to comment.