Skip to content

Commit

Permalink
core/state/snapshot: port changes from 29995 (ethereum#30040)
Browse files Browse the repository at this point in the history
ethereum#29995 has been reverted due to an unexpected flaw in the state snapshot
process.

Specifically, it attempts to stop the state snapshot generation, which
could potentially
cause the system to halt if the generation is not currently running.

This pull request ports the changes made in ethereum#29995 and fixes the flaw.
  • Loading branch information
rjl493456442 authored Sep 6, 2024
1 parent 88c8459 commit d718312
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 36 deletions.
23 changes: 23 additions & 0 deletions core/state/snapshot/disklayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ func (dl *diskLayer) Stale() bool {
return dl.stale
}

// markStale sets the stale flag as true.
func (dl *diskLayer) markStale() {
dl.lock.Lock()
defer dl.lock.Unlock()

dl.stale = true
}

// Account directly retrieves the account associated with a particular hash in
// the snapshot slim data format.
func (dl *diskLayer) Account(hash common.Hash) (*types.SlimAccount, error) {
Expand Down Expand Up @@ -175,3 +183,18 @@ func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro
func (dl *diskLayer) Update(blockHash common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
return newDiffLayer(dl, blockHash, destructs, accounts, storage)
}

// stopGeneration aborts the state snapshot generation if it is currently running.
func (dl *diskLayer) stopGeneration() {
dl.lock.RLock()
generating := dl.genMarker != nil
dl.lock.RUnlock()
if !generating {
return
}
if dl.genAbort != nil {
abort := make(chan *generatorStats)
dl.genAbort <- abort
<-abort
}
}
9 changes: 1 addition & 8 deletions core/state/snapshot/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -631,16 +631,10 @@ func generateAccounts(ctx *generatorContext, dl *diskLayer, accMarker []byte) er
accMarker = nil
return nil
}
// Always reset the initial account range as 1 whenever recover from the
// interruption. TODO(rjl493456442) can we remove it?
var accountRange = accountCheckRange
if len(accMarker) > 0 {
accountRange = 1
}
origin := common.CopyBytes(accMarker)
for {
id := trie.StateTrieID(dl.root)
exhausted, last, err := dl.generateRange(ctx, id, rawdb.SnapshotAccountPrefix, snapAccount, origin, accountRange, onAccount, types.FullAccountRLP)
exhausted, last, err := dl.generateRange(ctx, id, rawdb.SnapshotAccountPrefix, snapAccount, origin, accountCheckRange, onAccount, types.FullAccountRLP)
if err != nil {
return err // The procedure it aborted, either by external signal or internal error.
}
Expand All @@ -652,7 +646,6 @@ func generateAccounts(ctx *generatorContext, dl *diskLayer, accMarker []byte) er
ctx.removeStorageLeft()
break
}
accountRange = accountCheckRange
}
return nil
}
Expand Down
38 changes: 10 additions & 28 deletions core/state/snapshot/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,24 +258,11 @@ func (t *Tree) Disable() {
for _, layer := range t.layers {
switch layer := layer.(type) {
case *diskLayer:

layer.lock.RLock()
generating := layer.genMarker != nil
layer.lock.RUnlock()
if !generating {
// Generator is already aborted or finished
break
}
// If the base layer is generating, abort it
if layer.genAbort != nil {
abort := make(chan *generatorStats)
layer.genAbort <- abort
<-abort
}
// Layer should be inactive now, mark it as stale
layer.lock.Lock()
layer.stale = true
layer.lock.Unlock()
// TODO this function will hang if it's called twice. Will
// fix it in the following PRs.
layer.stopGeneration()
layer.markStale()
layer.Release()

case *diffLayer:
// If the layer is a simple diff, simply mark as stale
Expand Down Expand Up @@ -730,16 +717,11 @@ func (t *Tree) Rebuild(root common.Hash) {
for _, layer := range t.layers {
switch layer := layer.(type) {
case *diskLayer:
// If the base layer is generating, abort it and save
if layer.genAbort != nil {
abort := make(chan *generatorStats)
layer.genAbort <- abort
<-abort
}
// Layer should be inactive now, mark it as stale
layer.lock.Lock()
layer.stale = true
layer.lock.Unlock()
// TODO this function will hang if it's called twice. Will
// fix it in the following PRs.
layer.stopGeneration()
layer.markStale()
layer.Release()

case *diffLayer:
// If the layer is a simple diff, simply mark as stale
Expand Down

0 comments on commit d718312

Please sign in to comment.