Skip to content

Commit

Permalink
Aristo lazily delete larger subtrees (#2560)
Browse files Browse the repository at this point in the history
* Extract sub-tree deletion functions into separate sub-modules

* Move/rename `aristo_desc.accLruSize` => `aristo_constants.ACC_LRU_SIZE`

* Lazily delete sub-trees

why:
  This gives some control of the memory used to keep the deleted vertices
  in the cached layers. For larger sub-trees, keys and vertices might be
  on the persistent backend to a large extend. This would pull an amount
  of extra information from the backend into the cached layer.

  For lazy deleting it is enough to remember sub-trees by a small set of
  (at most 16) sub-roots to be processed when storing persistent data.
  Marking the tree root deleted immediately allows to let most of the code
  base work as before.

* Comments and cosmetics

* No need to import all for `Aristo` here

* Kludge to make `chronicle` usage in sub-modules work with `fluffy`

why:
  That `fluffy` would not run with any logging in `core_deb` is a problem
  I have known for a while. Up to now, logging was only used for debugging.

  With the current `Aristo` PR, there are cases where logging might be
  wanted but this works only if `chronicles` runs without the
  `json[dynamic]` sinks.

  So this should be re-visited.

* More of a kludge
  • Loading branch information
mjfh committed Aug 14, 2024
1 parent e3908a7 commit ce713d9
Show file tree
Hide file tree
Showing 22 changed files with 655 additions and 102 deletions.
9 changes: 8 additions & 1 deletion fluffy/fluffy.nim.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
#
# For some reason `json[dynamic]` causes problems with subsequent modules from
# `Aristo` when compiling `fluffy`. There might be a `chronicles` inport missing
# but it is not obvious where. -- jordan
#
#-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"

-d:"chronicles_sinks=textlines[dynamic]"
-d:"chronicles_runtime_filtering=on"
-d:"chronicles_disable_thread_id"

Expand Down
4 changes: 3 additions & 1 deletion fluffy/tools/beacon_lc_bridge/nim.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Use only `secp256k1` public key cryptography as an identity in LibP2P.
-d:"libp2p_pki_schemes=secp256k1"

-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
# See `fluffy.nim.cfg`
#-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
-d:"chronicles_sinks=textlines[dynamic]"
4 changes: 3 additions & 1 deletion fluffy/tools/portal_bridge/nim.cfg
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
# See `fluffy.nim.cfg`
#-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
-d:"chronicles_sinks=textlines[dynamic]"
7 changes: 1 addition & 6 deletions nimbus/db/aristo/TODO.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
* Re-visit `delTree()`. Suggestion is deleting small trees on the memory later,
otherwise only deleting the root vertex (so it becomes inaccessible) and
remember the follow up vertices which can travel through the tx-layers
to be picked up by the backend store.

* Some comletions migh be needed for the `aristo_part` module which is a
* Some comletions might be needed for the `aristo_part` module which is a
re-implementation of the module supporting *proof-mode*/partial trees.
+ Complete `partMergeStorageData()`. This function might not be needed at
all unless *snap-sync* is really revived.
Expand Down
13 changes: 13 additions & 0 deletions nimbus/db/aristo/aristo_constants.nim
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,19 @@ const
## functions with fixed assignments of the type of a state root (e.g. for
## a receipt or a transaction root.)

ACC_LRU_SIZE* = 1024 * 1024
## LRU cache size for accounts that have storage, see `.accLeaves` and
## `.stoLeaves` fields of the main descriptor.

DELETE_SUBTREE_VERTICES_MAX* = 25
## Maximum number of vertices for a tree to be deleted instantly. If the
## tree is larger, only the sub-tree root will be deleted immediately and
## subsequent entries will be deleted not until the cache layers are saved
## to the backend.
##
## Set to zero to disable in which case all sub-trees are deleted
## immediately.

static:
# must stay away from `VertexID(1)` and `VertexID(2)`
doAssert 2 < LEAST_FREE_VID
Expand Down
2 changes: 1 addition & 1 deletion nimbus/db/aristo/aristo_debug.nim
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ proc ppPayload(p: LeafPayload, db: AristoDbRef): string =
of AccountData:
result = "(" & p.account.ppAriAccount() & "," & p.stoID.ppVid & ")"
of StoData:
result = $p.stoData
result = ($p.stoData).squeeze

proc ppVtx(nd: VertexRef, db: AristoDbRef, rvid: RootedVertexID): string =
if not nd.isValid:
Expand Down
75 changes: 3 additions & 72 deletions nimbus/db/aristo/aristo_delete.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
## Aristo DB -- Patricia Trie delete funcionality
## ==============================================
##
## Delete by `Hike` type chain of vertices.

{.push raises: [].}

import
std/typetraits,
eth/common,
results,
./aristo_delete/[delete_helpers, delete_subtree],
"."/[aristo_desc, aristo_fetch, aristo_get, aristo_hike, aristo_layers,
aristo_utils]

Expand All @@ -39,79 +39,10 @@ proc branchStillNeeded(vtx: VertexRef): Result[int,void] =
# Oops, degenerated branch node
err()

# -----------

proc disposeOfVtx(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Vertex ID to clear
) =
# Remove entry
db.layersResVtx(rvid)
db.layersResKey(rvid)

# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------

proc delSubTreeImpl(
db: AristoDbRef; # Database, top layer
root: VertexID; # Root vertex
): Result[void,AristoError] =
## Implementation of *delete* sub-trie.
var
dispose = @[root]
(rootVtx, _) = db.getVtxRc((root, root)).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
follow = @[rootVtx]

# Collect list of nodes to delete
while 0 < follow.len:
var redo: seq[VertexRef]
for vtx in follow:
for vid in vtx.subVids:
# Exiting here leaves the tree as-is
let vtx = (? db.getVtxRc((root, vid)))[0]
redo.add vtx
dispose.add vid
redo.swap follow

# Mark collected vertices to be deleted
for vid in dispose:
db.disposeOfVtx((root, vid))

ok()

proc delStoTreeImpl(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
accPath: Hash256;
stoPath: NibblesBuf;
): Result[void,AristoError] =
## Implementation of *delete* sub-trie.

let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)

case vtx.vType
of Branch:
for i in 0..15:
if vtx.bVid[i].isValid:
? db.delStoTreeImpl(
(rvid.root, vtx.bVid[i]), accPath,
stoPath & vtx.ePfx & NibblesBuf.nibble(byte i))

of Leaf:
let stoPath = Hash256(data: (stoPath & vtx.lPfx).getBytes())
db.layersPutStoLeaf(AccountKey.mixUp(accPath, stoPath), nil)

db.disposeOfVtx(rvid)

ok()

proc deleteImpl(
db: AristoDbRef; # Database, top layer
hike: Hike; # Fully expanded path
Expand Down Expand Up @@ -199,7 +130,7 @@ proc deleteAccountRecord*(

# Delete storage tree if present
if stoID.isValid:
? db.delStoTreeImpl((stoID.vid, stoID.vid), accPath, NibblesBuf())
? db.delStoTreeImpl((stoID.vid, stoID.vid), accPath)

?db.deleteImpl(hike)

Expand Down Expand Up @@ -322,7 +253,7 @@ proc deleteStorageTree*(
# Mark account path Merkle keys for update
db.updateAccountForHasher accHike

? db.delStoTreeImpl((stoID.vid, stoID.vid), accPath, NibblesBuf())
? db.delStoTreeImpl((stoID.vid, stoID.vid), accPath)

# De-register the deleted storage tree from the accounts record
let leaf = wpAcc.vtx.dup # Dup on modify
Expand Down
127 changes: 127 additions & 0 deletions nimbus/db/aristo/aristo_delete/delete_debug.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# nimbus-eth1
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises: [].}

import
std/[math, strformat, times],
chronicles,
".."/[aristo_desc, aristo_get, aristo_profile]

export
aristo_profile.toStr

type
SubTreeStats* = tuple
nVtxs: int ## Number of vertices in sub-tree
nLeafs: int ## Number of leafs in sub-tree
depthMax: int ## Maximal vertex path length
nStoCache: int ## Size of storage leafs cache
elapsed: Duration ## Time spent analysing

SubTreeStatsAccu* = tuple
count: int ## Number of entries
sVtxs, qVtxs: float ## Sum and square sum of `.nVtxs`
sLeafs, qLeafs: float ## Sum and square sum of `.nLeafs`
sDepth, qDepth: float ## Sum and square sum of `.depthMax`
sElapsed: Duration ## Sum of `.elapsed`

SubTreeDist* = tuple
count: int ## Number of entries
mVtxs, dVtxs: float ## Mean and std deviation of `.nVtxs`
mLeafs, dLeafs: float ## Mean and std deviation of `.nLeafs`
mDepth, dDepth: float ## Mean and std deviation of `.depthMax`

# ------------------------------------------------------------------------------
# Prival helper
# ------------------------------------------------------------------------------

proc analyseSubTreeImpl(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
depth: int; # Recursion depth
stats: var SubTreeStats; # Statistics
) =
let (vtx, _) = db.getVtxRc(rvid).valueOr:
return

stats.nVtxs.inc

if stats.depthMax < depth:
stats.depthMax = depth

case vtx.vType:
of Branch:
for n in 0..15:
if vtx.bVid[n].isValid:
db.analyseSubTreeImpl((rvid.root,vtx.bVid[n]), depth+1, stats)
of Leaf:
stats.nLeafs.inc


func evalDist(count: int; sum, sqSum: float): tuple[mean, stdDev: float] =
result.mean = sum / count.float

let
sqMean = sqSum / count.float
meanSq = result.mean * result.mean

# Mathematically, `meanSq <= sqMean` but there might be rounding errors
# if `meanSq` and `sqMean` are approximately the same.
sigma = sqMean - min(meanSq,sqMean)

result.stdDev = sigma.sqrt

# ------------------------------------------------------------------------------
# Public analysis tools
# ------------------------------------------------------------------------------

proc analyseSubTree*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
minVtxs: int; # Accumulate if `minVtxs` <= `.nVtxs`
accu: var SubTreeStatsAccu; # For accumulated statistics
): SubTreeStats =
let start = getTime()
db.analyseSubTreeImpl(rvid, 1, result)
result.nStoCache = db.stoLeaves.len

if minVtxs <= result.nVtxs:
accu.count.inc
accu.sVtxs += result.nVtxs.float
accu.qVtxs += (result.nVtxs * result.nVtxs).float
accu.sLeafs += result.nLeafs.float
accu.qLeafs += (result.nLeafs * result.nLeafs).float
accu.sDepth += result.depthMax.float
accu.qDepth += (result.depthMax * result.depthMax).float

result.elapsed = getTime() - start
accu.sElapsed += result.elapsed # Unconditionally collecrd


func stats*(a: SubTreeStatsAccu): SubTreeDist =
result.count = a.count
(result.mVtxs, result.dVtxs) = evalDist(a.count, a.sVtxs, a.qVtxs)
(result.mLeafs, result.dLeafs) = evalDist(a.count, a.sLeafs, a.qLeafs)
(result.mDepth, result.dDepth) = evalDist(a.count, a.sDepth, a.qDepth)

func strStats*(
a: SubTreeStatsAccu;
): tuple[count, vtxs, leafs, depth, elapsed: string] =
let w = a.stats()
result.count = $w.count
result.elapsed = a.sElapsed.toStr
result.vtxs = &"{w.mVtxs:.1f}[{w.dVtxs:.1f}]"
result.leafs = &"{w.mLeafs:.1f}[{w.dLeafs:.1f}]"
result.depth = &"{w.mDepth:.1f}[{w.dDepth:.1f}]"

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
25 changes: 25 additions & 0 deletions nimbus/db/aristo/aristo_delete/delete_helpers.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# nimbus-eth1
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises: [].}

import
".."/[aristo_desc, aristo_layers]


proc disposeOfVtx*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Vertex ID to clear
) =
# Remove entry
db.layersResVtx(rvid)
db.layersResKey(rvid)

# End
20 changes: 20 additions & 0 deletions nimbus/db/aristo/aristo_delete/delete_subtree.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# nimbus-eth1
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

import ../aristo_constants

when DELETE_SUBTREE_VERTICES_MAX == 0:
import ./delete_subtree_now as del_sub
else:
import ./delete_subtree_lazy as del_sub

export del_sub

# End
Loading

0 comments on commit ce713d9

Please sign in to comment.