Skip to content

Commit

Permalink
Introduce frame snapshots (#3098)
Browse files Browse the repository at this point in the history
With the introduction of layered frames, each database lookup may result
in hundreds of table lookups as the frame stack is traversed.

This change restores performance by introducing snapshots to limit the
lookup depth at the expense of slightly increased memory usage.

The snapshot contains the cumulative changes of all ancestors and itself
allowing the lookup recursion to stop whenever it is encountered.

The number of snapshots to keep in memory is a tradeoff between lookup
performance and memory usage - this change starts with a simple strategy
of keeping snapshots for head frames (approximately). T

he snapshot is created during checkpointing, ie after block validation,
to make sure that it's cheap to start verifying blocks - parent
snapshots are moved to the descendant as part of checkpointing which
effectively means that head frames hold snapshots in most cases.

The outcome of this tradeoff is that applying a block to a known head is
fast while creating a new branch of history remains expensive.

Another consequence is that when persisting changes to disk, we must
re-traverse the stack of changes to build a cumulative set of changes to
be persisted.

A future strategy might be to keep additional "keyframes" along the way,
ie one per epoch for example - this would bound the "branch creation"
cost to a constant factor, but memory overhead should first be
considered.

Another strategy might be to avoid keeping snapshots for non-canonical
branches, specially when they become older and thus less likely to be
branched from.

* `level` is updated to work like a temporary serial number to maintain
its relative position in the sorting order as frames are persisted
* a `snapshot` is added to some TxFrame instances - the snapshot
collects all ancestor changes up to and including the given frame.
`level` is used as a marker to prune the snapshot of changes that have
been persisted already.
* stack traversals for the purpose of lookup stop when they encounter a
snapshot - this bounds the lookup depth to the first encountered
snapshot

After this PR, sync performance lands at about 2-3 blocks per second
(~10x improvement) - this is quite reasonable when comparing with block
import which skips the expensive state root verification and thus
achieves ~20 blk/s on the same hardware. Additional work to bring live
syncing performance in line with disk-based block import would focus on
reducing state root verification cost.
  • Loading branch information
arnetheduck authored Feb 28, 2025
1 parent 0f89c1f commit 4576727
Show file tree
Hide file tree
Showing 17 changed files with 692 additions and 641 deletions.
4 changes: 4 additions & 0 deletions execution_chain/common/common.nim
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,12 @@ proc initializeDb(com: CommonRef) =
txFrame.persistHeaderAndSetHead(com.genesisHeader,
startOfHistory=com.genesisHeader.parentHash).
expect("can persist genesis header")

doAssert(canonicalHeadHashKey().toOpenArray in txFrame)

txFrame.checkpoint(com.genesisHeader.number)
com.db.persist(txFrame)

# The database must at least contain the base and head pointers - the base
# is implicitly considered finalized
let
Expand Down
4 changes: 4 additions & 0 deletions execution_chain/core/chain/forked_chain.nim
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,12 @@ proc validateBlock(c: ForkedChainRef,
c.writeBaggage(blk, blkHash, txFrame, receipts)

# Block fully written to txFrame, mark it as such
# Checkpoint creates a snapshot of ancestor changes in txFrame - it is an
# expensive operation, specially when creating a new branch (ie when blk
# is being applied to a block that is currently not a head)
txFrame.checkpoint(blk.header.number)


c.updateBranch(parent, blk, blkHash, txFrame, move(receipts))

for i, tx in blk.transactions:
Expand Down
4 changes: 2 additions & 2 deletions execution_chain/core/chain/persist_blocks.nim
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ proc checkpoint*(p: var Persister): Result[void, string] =
)

# Move in-memory state to disk
p.vmState.ledger.txFrame.checkpoint(p.parent.number, skipSnapshot = true)
p.com.db.persist(p.vmState.ledger.txFrame)

# Get a new frame since the DB assumes ownership
p.vmState.ledger.txFrame = p.com.db.baseTxFrame().txFrameBegin()

Expand Down Expand Up @@ -172,8 +174,6 @@ proc persistBlock*(p: var Persister, blk: Block): Result[void, string] =
p.stats.txs += blk.transactions.len
p.stats.gas += blk.header.gasUsed

txFrame.checkpoint(header.number)

assign(p.parent, header)

ok()
Expand Down
22 changes: 6 additions & 16 deletions execution_chain/db/aristo/aristo_compute.nim
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import
chronicles,
eth/common/[accounts_rlp, base_rlp, hashes_rlp],
results,
"."/[aristo_desc, aristo_get, aristo_walk/persistent],
"."/[aristo_desc, aristo_get, aristo_tx_frame, aristo_walk/persistent],
./aristo_desc/desc_backend

type WriteBatch = tuple[writer: PutHdlRef, count: int, depth: int, prefix: uint64]
Expand Down Expand Up @@ -74,7 +74,7 @@ proc putKeyAtLevel(
## set (vertex data may have been committed to disk without computing the
## corresponding hash!)

if level == -2:
if level < db.db.baseTxFrame().level:
?batch.putVtx(db.db, rvid, vtx, key)

if batch.count mod batchSize == 0:
Expand All @@ -90,16 +90,6 @@ proc putKeyAtLevel(

ok()

func maxLevel(cur, other: int): int =
# Compare two levels and return the topmost in the stack, taking into account
# the odd reversal of order around the zero point
if cur < 0:
max(cur, other) # >= 0 is always more topmost than <0
elif other < 0:
cur
else:
min(cur, other) # Here the order is reversed and 0 is the top layer

template encodeLeaf(w: var RlpWriter, pfx: NibblesBuf, leafData: untyped): HashKey =
w.startList(2)
w.append(pfx.toHexPrefix(isLeaf = true).data())
Expand All @@ -123,7 +113,7 @@ proc getKey(
db: AristoTxRef, rvid: RootedVertexID, skipLayers: static bool
): Result[((HashKey, VertexRef), int), AristoError] =
ok when skipLayers:
(?db.db.getKeyBe(rvid, {GetVtxFlag.PeekCache}), -2)
(?db.db.getKeyBe(rvid, {GetVtxFlag.PeekCache}), dbLevel)
else:
?db.getKeyRc(rvid, {})

Expand Down Expand Up @@ -178,7 +168,7 @@ proc computeKeyImpl(
keyvtxl[1],
skipLayers = skipLayers,
)
level = maxLevel(level, sl)
level = max(level, sl)
skey
else:
VOID_HASH_KEY
Expand Down Expand Up @@ -252,7 +242,7 @@ proc computeKeyImpl(
template writeBranch(w: var RlpWriter): HashKey =
w.encodeBranch(vtx):
if subvid.isValid:
level = maxLevel(level, keyvtxs[n][1])
level = max(level, keyvtxs[n][1])
keyvtxs[n][0][0]
else:
VOID_HASH_KEY
Expand Down Expand Up @@ -280,7 +270,7 @@ proc computeKeyImpl(
): Result[HashKey, AristoError] =
let (keyvtx, level) =
when skipLayers:
(?db.db.getKeyBe(rvid, {GetVtxFlag.PeekCache}), -2)
(?db.db.getKeyBe(rvid, {GetVtxFlag.PeekCache}), dbLevel)
else:
?db.getKeyRc(rvid, {})

Expand Down
71 changes: 38 additions & 33 deletions execution_chain/db/aristo/aristo_desc.nim
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
{.push raises: [].}

import
std/[hashes, sets, tables],
std/[hashes, sequtils, sets, tables],
eth/common/hashes,
results,
./aristo_constants,
Expand Down Expand Up @@ -73,6 +73,21 @@ type

blockNumber*: Opt[uint64] ## Block number set when checkpointing the frame

snapshot*: Table[RootedVertexID, Snapshot]
## Optional snapshot containing the cumulative changes from ancestors and
## the current frame
snapshotLevel*: Opt[int] # base level when the snapshot was taken

level*: int
## Ancestry level of frame, increases with age but otherwise meaningless -
## used to order data by age when working with layers.
## -1 = stored in database, where relevant though typically should be
## compared with the base layer level instead.

Snapshot* = (VertexRef, HashKey, int)
## Unlike sTab/kMap, snapshot contains both vertex and key since at the time
## of writing, it's primarily used in contexts where both are present

AristoDbRef* = ref object
## Backend interface.
getVtxFn*: GetVtxFn ## Read vertex record
Expand All @@ -88,7 +103,7 @@ type

closeFn*: CloseFn ## Generic destructor

txRef*: AristoTxRef ## Bottom-most in-memory frame
txRef*: AristoTxRef ## Bottom-most in-memory frame

accLeaves*: LruCache[Hash32, VertexRef]
## Account path to payload cache - accounts are frequently accessed by
Expand Down Expand Up @@ -116,6 +131,8 @@ type
legs*: ArrayBuf[NibblesBuf.high + 1, Leg] ## Chain of vertices and IDs
tail*: NibblesBuf ## Portion of non completed path

const dbLevel* = -1

# ------------------------------------------------------------------------------
# Public helpers
# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -181,52 +198,40 @@ func isValid*(sqv: HashSet[RootedVertexID]): bool =
# Public functions, miscellaneous
# ------------------------------------------------------------------------------

# Hash set helper
func hash*(db: AristoDbRef): Hash =
## Table/KeyedQueue/HashSet mixin
cast[pointer](db).hash
func hash*(db: AristoDbRef): Hash {.error.}
func hash*(db: AristoTxRef): Hash {.error.}

# ------------------------------------------------------------------------------
# Public helpers
# ------------------------------------------------------------------------------

iterator stack*(tx: AristoTxRef): AristoTxRef =
# Stack going from base to tx
var frames: seq[AristoTxRef]
iterator rstack*(tx: AristoTxRef, stopAtSnapshot = false): AristoTxRef =
# Stack in reverse order, ie going from tx to base
var tx = tx

while tx != nil:
frames.add tx
yield tx

if stopAtSnapshot and tx.snapshotLevel.isSome():
break

tx = tx.parent

iterator stack*(tx: AristoTxRef, stopAtSnapshot = false): AristoTxRef =
# Stack going from base to tx
var frames = toSeq(tx.rstack(stopAtSnapshot))

while frames.len > 0:
yield frames.pop()

iterator rstack*(tx: AristoTxRef): (AristoTxRef, int) =
# Stack in reverse order, ie going from tx to base
var tx = tx

var i = 0
while tx != nil:
let level = if tx.parent == nil: -1 else: i
yield (tx, level)
tx = tx.parent
i += 1

proc deltaAtLevel*(db: AristoTxRef, level: int): AristoTxRef =
if level == -2:
if level < db.db.txRef.level:
nil
elif level == -1:
db.db.txRef
else:
var
frame = db
level = level

while level > 0:
frame = frame.parent
level -= 1

frame
for frame in db.rstack():
if frame.level == level:
return frame
nil

# ------------------------------------------------------------------------------
# End
Expand Down
4 changes: 2 additions & 2 deletions execution_chain/db/aristo/aristo_get.nim
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ proc getVtxRc*(
else:
return err(GetVtxNotFound)

ok (?db.db.getVtxBe(rvid, flags), -2)
ok (?db.db.getVtxBe(rvid, flags), dbLevel)

proc getVtx*(db: AristoTxRef; rvid: RootedVertexID, flags: set[GetVtxFlag] = {}): VertexRef =
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
Expand Down Expand Up @@ -103,7 +103,7 @@ proc getKeyRc*(
# The vertex is to be deleted. So is the value key.
return err(GetKeyNotFound)

ok (?db.db.getKeyBe(rvid, flags), -2)
ok (?db.db.getKeyBe(rvid, flags), dbLevel)

proc getKey*(db: AristoTxRef; rvid: RootedVertexID): HashKey =
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
Expand Down
2 changes: 1 addition & 1 deletion execution_chain/db/aristo/aristo_init/init_common.nim
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ proc finishSession*(hdl: TypedPutHdlRef; db: TypedBackendRef) =

proc initInstance*(db: AristoDbRef): Result[void, AristoError] =
let vTop = ?db.getTuvFn()
db.txRef = AristoTxRef(db: db, vTop: vTop)
db.txRef = AristoTxRef(db: db, vTop: vTop, snapshotLevel: Opt.some(0))
db.accLeaves = LruCache[Hash32, VertexRef].init(ACC_LRU_SIZE)
db.stoLeaves = LruCache[Hash32, VertexRef].init(ACC_LRU_SIZE)
ok()
Expand Down
Loading

0 comments on commit 4576727

Please sign in to comment.