lru cache updates

* replace rocksdb row cache with larger rdb lru caches - these serve the same purpose but are more efficient because they skips serialization, locking and rocksdb layering * don't append fresh items to cache - this has the effect of evicting the existing items and replacing them with low-value entries that might never be read - during write-heavy periods of processing, the newly-added entries were evicted during the store loop * allow tuning rdb lru size at runtime * add (hidden) option to print lru stats at exit (replacing the compile-time flag) pre: ``` INF 2024-09-03 15:07:01.136+02:00 Imported blocks blockNumber=20012001 blocks=12000 importedSlot=9216851 txs=1837042 mgas=181911.265 bps=11.675 tps=1870.397 mgps=176.819 avgBps=10.288 avgTps=1574.889 avgMGps=155.952 elapsed=19m26s458ms ``` post: ``` INF 2024-09-03 13:54:26.730+02:00 Imported blocks blockNumber=20012001 blocks=12000 importedSlot=9216851 txs=1837042 mgas=181911.265 bps=11.637 tps=1864.384 mgps=176.250 avgBps=11.202 avgTps=1714.920 avgMGps=169.818 elapsed=17m51s211ms ``` 9%:ish import perf improvement on similar mem usage :)
status-im · Sep 3, 2024 · 4604624 · 4604624
1 parent 35cc78c
commit 4604624
Show file tree

Hide file tree

Showing 10 changed files with 164 additions and 93 deletions.
diff --git a/nimbus/config.nim b/nimbus/config.nim
@@ -410,6 +410,23 @@ type
       defaultValueDesc: $defaultBlockCacheSize
       name: "debug-rocksdb-block-cache-size".}: int
 
+    rdbKeyCacheSize {.
+      hidden
+      defaultValue: defaultRdbKeyCacheSize
+      defaultValueDesc: $defaultRdbKeyCacheSize
+      name: "debug-rdb-key-cache-size".}: int
+
+    rdbVtxCacheSize {.
+      hidden
+      defaultValue: defaultRdbVtxCacheSize
+      defaultValueDesc: $defaultRdbVtxCacheSize
+      name: "debug-rdb-vtx-cache-size".}: int
+
+    rdbPrintStats {.
+      hidden
+      desc: "Print RDB statistics at exit"
+      name: "debug-rdb-print-stats".}: bool
+
     case cmd* {.
       command
       defaultValue: NimbusCmd.noCommand }: NimbusCmd
@@ -790,12 +807,19 @@ func era1Dir*(conf: NimbusConf): OutDir =
 func eraDir*(conf: NimbusConf): OutDir =
   conf.eraDirOpt.get(OutDir(conf.dataDir.string & "/era"))
 
-func dbOptions*(conf: NimbusConf): DbOptions =
+func dbOptions*(conf: NimbusConf, noKeyCache = false): DbOptions =
   DbOptions.init(
     maxOpenFiles = conf.rocksdbMaxOpenFiles,
     writeBufferSize = conf.rocksdbWriteBufferSize,
     rowCacheSize = conf.rocksdbRowCacheSize,
     blockCacheSize = conf.rocksdbBlockCacheSize,
+    rdbKeyCacheSize =
+      if noKeyCache: 0 else: conf.rdbKeyCacheSize ,
+    rdbVtxCacheSize =
+      # The import command does not use the key cache - better give it to vtx
+      if noKeyCache: conf.rdbKeyCacheSize + conf.rdbVtxCacheSize
+      else: conf.rdbVtxCacheSize,
+    rdbPrintStats = conf.rdbPrintStats,
   )
 
 # KLUDGE: The `load()` template does currently not work within any exception

diff --git a/nimbus/db/aristo/aristo_init/persistent.nim b/nimbus/db/aristo/aristo_init/persistent.nim
@@ -38,12 +38,13 @@ export
 
 proc newAristoRdbDbRef(
     basePath: string;
+    opts: DbOptions;
     dbOpts: DbOptionsRef;
     cfOpts: ColFamilyOptionsRef;
     guestCFs: openArray[ColFamilyDescriptor];
       ): Result[(AristoDbRef, seq[ColFamilyReadWrite]), AristoError]=
   let
-    (be, oCfs) = ? rocksDbBackend(basePath, dbOpts, cfOpts, guestCFs)
+    (be, oCfs) = ? rocksDbBackend(basePath, opts, dbOpts, cfOpts, guestCFs)
     vTop = block:
       let rc = be.getTuvFn()
       if rc.isErr:
@@ -62,14 +63,15 @@ proc init*(
     T: type AristoDbRef;
     B: type RdbBackendRef;
     basePath: string;
+    opts: DbOptions;
     dbOpts: DbOptionsRef;
     cfOpts: ColFamilyOptionsRef;
     guestCFs: openArray[ColFamilyDescriptor];
       ): Result[(T, seq[ColFamilyReadWrite]), AristoError] =
   ## Generic constructor, `basePath` argument is ignored for memory backend
   ## databases (which also unconditionally succeed initialising.)
   ##
-  basePath.newAristoRdbDbRef dbOpts, cfOpts, guestCFs
+  basePath.newAristoRdbDbRef opts, dbOpts, cfOpts, guestCFs
 
 proc activateWrTrigger*(
     db: AristoDbRef;

diff --git a/nimbus/db/aristo/aristo_init/rocks_db.nim b/nimbus/db/aristo/aristo_init/rocks_db.nim
@@ -250,6 +250,7 @@ proc putBegHostingFn(db: RdbBackendRef): PutBegFn =
 
 proc rocksDbBackend*(
     path: string;
+    opts: DbOptions;
     dbOpts: DbOptionsRef;
     cfOpts: ColFamilyOptionsRef;
     guestCFs: openArray[ColFamilyDescriptor];
@@ -259,7 +260,7 @@ proc rocksDbBackend*(
 
   # Initialise RocksDB
   let oCfs = block:
-    let rc = db.rdb.init(path, dbOpts, cfOpts, guestCFs)
+    let rc = db.rdb.init(path, opts, dbOpts, cfOpts, guestCFs)
     if rc.isErr:
       when extraTraceMessages:
         trace logTxt "constructor failed",

diff --git a/nimbus/db/aristo/aristo_init/rocks_db/rdb_desc.nim b/nimbus/db/aristo/aristo_init/rocks_db/rdb_desc.nim
@@ -15,6 +15,7 @@
 
 import
   std/os,
+  std/concurrency/atomics,
   eth/common,
   rocksdb,
   stew/[endians2, keyed_queue],
@@ -53,7 +54,9 @@ type
     # handling of the longer key.)
     #
     rdKeyLru*: KeyedQueue[VertexID,HashKey] ## Read cache
+    rdKeySize*: int
     rdVtxLru*: KeyedQueue[VertexID,VertexRef] ## Read cache
+    rdVtxSize*: int
 
     basePath*: string                  ## Database directory
     trgWriteEvent*: RdbWriteEventCb    ## Database piggiback call back handler
@@ -64,13 +67,32 @@ type
     VtxCF = "AriVtx"                   ## Vertex column family name
     KeyCF = "AriKey"                   ## Hash key column family name
 
+  RdbLruCounter* = array[bool, Atomic[uint64]]
+
+  RdbStateType* = enum
+    Account
+    World
+
 const
   BaseFolder* = "nimbus"               ## Same as for Legacy DB
   DataFolder* = "aristo"               ## Legacy DB has "data"
   RdKeyLruMaxSize* = 80000
     ## Max size of read cache for keys - ~4 levels of MPT
-  RdVtxLruMaxSize* = 80000
-    ## Max size of read cache for vertex IDs - ~4 levels of MPT
+  RdVtxLruMaxSize* = 1118481
+    ## Max size of read cache for vertex IDs - ~5 levels of MPT - this should
+    ## land at about 200mb of vertex data and probably another 200mb of overhead
+    ## Notably, this cache is an important complement to the rocksdb block cache
+    ## and has a similar effect as the rocksdb row cache, albeit with lower
+    ## overhead
+
+var
+  # Hit/miss counters for LRU cache - global so as to integrate easily with
+  # nim-metrics and `uint64` to ensure that increasing them is fast - collection
+  # happens from a separate thread.
+  # TODO maybe turn this into more general framework for LRU reporting since
+  #      we have lots of caches of this sort
+  rdbVtxLruStats*: array[RdbStateType, array[VertexType, RdbLruCounter]]
+  rdbKeyLruStats*: array[RdbStateType, RdbLruCounter]
 
 # ------------------------------------------------------------------------------
 # Public functions
@@ -93,6 +115,15 @@ func dataDir*(rdb: RdbInst): string =
 template toOpenArray*(xid: AdminTabID): openArray[byte] =
   xid.uint64.toBytesBE.toOpenArray(0,7)
 
+template to*(v: RootedVertexID, T: type RdbStateType): RdbStateType =
+  if v.root == VertexID(1): RdbStateType.World else: RdbStateType.Account
+
+template inc*(v: var RdbLruCounter, hit: bool) =
+  discard v[hit].fetchAdd(1, moRelaxed)
+
+template get*(v: RdbLruCounter, hit: bool): uint64 =
+  v[hit].load(moRelaxed)
+
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
diff --git a/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim b/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim
@@ -39,21 +39,6 @@ type
   RdbVtxLruCounter = ref object of Counter
   RdbKeyLruCounter = ref object of Counter
 
-  LruCounter = array[bool, Atomic[uint64]]
-
-  StateType = enum
-    Account
-    World
-
-var
-  # Hit/miss counters for LRU cache - global so as to integrate easily with
-  # nim-metrics and `uint64` to ensure that increasing them is fast - collection
-  # happens from a separate thread.
-  # TODO maybe turn this into more general framework for LRU reporting since
-  #      we have lots of caches of this sort
-  rdbVtxLruStats: array[StateType, array[VertexType, LruCounter]]
-  rdbKeyLruStats: array[StateType, LruCounter]
-
 var
   rdbVtxLruStatsMetric {.used.} = RdbVtxLruCounter.newCollector(
     "aristo_rdb_vtx_lru_total",
@@ -64,21 +49,12 @@ var
     "aristo_rdb_key_lru_total", "HashKey LRU lookup", labels = ["state", "hit"]
   )
 
-template to(v: RootedVertexID, T: type StateType): StateType =
-  if v.root == VertexID(1): StateType.World else: StateType.Account
-
-template inc(v: var LruCounter, hit: bool) =
-  discard v[hit].fetchAdd(1, moRelaxed)
-
-template get(v: LruCounter, hit: bool): uint64 =
-  v[hit].load(moRelaxed)
-
 method collect*(collector: RdbVtxLruCounter, output: MetricHandler) =
   let timestamp = collector.now()
 
   # We don't care about synchronization between each type of metric or between
   # the metrics thread and others since small differences like this don't matter
-  for state in StateType:
+  for state in RdbStateType:
     for vtype in VertexType:
       for hit in [false, true]:
         output(
@@ -92,7 +68,7 @@ method collect*(collector: RdbVtxLruCounter, output: MetricHandler) =
 method collect*(collector: RdbKeyLruCounter, output: MetricHandler) =
   let timestamp = collector.now()
 
-  for state in StateType:
+  for state in RdbStateType:
     for hit in [false, true]:
       output(
         name = "aristo_rdb_key_lru_total",
@@ -129,10 +105,10 @@ proc getKey*(
   # Try LRU cache first
   var rc = rdb.rdKeyLru.lruFetch(rvid.vid)
   if rc.isOK:
-    rdbKeyLruStats[rvid.to(StateType)].inc(true)
+    rdbKeyLruStats[rvid.to(RdbStateType)].inc(true)
     return ok(move(rc.value))
 
-  rdbKeyLruStats[rvid.to(StateType)].inc(false)
+  rdbKeyLruStats[rvid.to(RdbStateType)].inc(false)
 
   # Otherwise fetch from backend database
   # A threadvar is used to avoid allocating an environment for onData
@@ -153,7 +129,7 @@ proc getKey*(
     return err((RdbHashKeyExpected,"")) # Parsing failed
 
   # Update cache and return
-  ok rdb.rdKeyLru.lruAppend(rvid.vid, res.value(), RdKeyLruMaxSize)
+  ok rdb.rdKeyLru.lruAppend(rvid.vid, res.value(), rdb.rdKeySize)
 
 proc getVtx*(
     rdb: var RdbInst;
@@ -162,7 +138,7 @@ proc getVtx*(
   # Try LRU cache first
   var rc = rdb.rdVtxLru.lruFetch(rvid.vid)
   if rc.isOK:
-    rdbVtxLruStats[rvid.to(StateType)][rc.value().vType].inc(true)
+    rdbVtxLruStats[rvid.to(RdbStateType)][rc.value().vType].inc(true)
     return ok(move(rc.value))
 
   # Otherwise fetch from backend database
@@ -179,61 +155,17 @@ proc getVtx*(
 
   if not gotData:
     # As a hack, we count missing data as leaf nodes
-    rdbVtxLruStats[rvid.to(StateType)][VertexType.Leaf].inc(false)
+    rdbVtxLruStats[rvid.to(RdbStateType)][VertexType.Leaf].inc(false)
     return ok(VertexRef(nil))
 
   if res.isErr():
     return err((res.error(), "Parsing failed")) # Parsing failed
 
-  rdbVtxLruStats[rvid.to(StateType)][res.value().vType].inc(false)
+  rdbVtxLruStats[rvid.to(RdbStateType)][res.value().vType].inc(false)
 
   # Update cache and return
-  ok rdb.rdVtxLru.lruAppend(rvid.vid, res.value(), RdVtxLruMaxSize)
+  ok rdb.rdVtxLru.lruAppend(rvid.vid, res.value(), rdb.rdVtxSize)
 
 # ------------------------------------------------------------------------------
 # End
 # ------------------------------------------------------------------------------
-
-when defined(printStatsAtExit):
-  # Useful hack for printing exact metrics to compare runs with different
-  # settings
-  import std/[exitprocs, strformat]
-  addExitProc(
-    proc() =
-      block vtx:
-        var misses, hits: uint64
-        echo "vtxLru(", RdVtxLruMaxSize, ")"
-        echo "   state    vtype       miss        hit      total hitrate"
-        for state in StateType:
-          for vtype in VertexType:
-            let
-              (miss, hit) = (
-                rdbVtxLruStats[state][vtype].get(false),
-                rdbVtxLruStats[state][vtype].get(true),
-              )
-              hitRate = float64(hit * 100) / (float64(hit + miss))
-            misses += miss
-            hits += hit
-            echo &"{state:>8} {vtype:>8} {miss:>10} {hit:>10} {miss+hit:>10} {hitRate:>6.2f}%"
-        let hitRate = float64(hits * 100) / (float64(hits + misses))
-        echo &"     all      all {misses:>10} {hits:>10} {misses+hits:>10} {hitRate:>6.2f}%"
-
-      block key:
-        var misses, hits: uint64
-        echo "keyLru(", RdKeyLruMaxSize, ") "
-
-        echo "   state       miss        hit      total hitrate"
-
-        for state in StateType:
-          let
-            (miss, hit) =
-              (rdbKeyLruStats[state].get(false), rdbKeyLruStats[state].get(true))
-            hitRate = float64(hit * 100) / (float64(hit + miss))
-          misses += miss
-          hits += hit
-
-          echo &"{state:>8} {miss:>10} {hit:>10} {miss+hit:>10} {hitRate:>5.2f}%"
-
-        let hitRate = float64(hits * 100) / (float64(hits + misses))
-        echo &"     all {misses:>10} {hits:>10} {misses+hits:>10} {hitRate:>5.2f}%"
-  )