Disable vid reuse

In several places, the full free vid list is being copied - at block height 3.5m, there would be 26m entries in the list (and growing) leading to the list taking more time than the rest of the processing. Also, rocksdb documentation mentions performance benefits of inserting already-sorted keys - this seems like a a loose argument for using ever-increasing vid numbers. ~3x-6x perf improvement depending on where in the block history we are. pre: ``` INF 2024-06-02 08:26:40.721+02:00 Imported blocks blockNumber=3500001 txs=1498120 gas=8890837 bps=28.796 tps=320.649 gps=1150.022 avgBps=29.002 avgTps=434.486 avgGps=2578.528 elapsed=57m28s28ms67us160ns ``` post: ``` INF 2024-06-02 21:37:12.146+02:00 Imported blocks blockNumber=3500001 blocks=100000 txs=1498120 gas=8890837 bps=155.409 tps=1730.486 gps=6206.475 avgBps=120.526 avgTps=1805.623 avgGps=10715.761 elapsed=13m49s697ms ```
status-im · Jun 2, 2024 · 3fd8bba · 3fd8bba
1 parent 7f76586
commit 3fd8bba
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 17 deletions.
diff --git a/nimbus/db/aristo/aristo_init/rocks_db.nim b/nimbus/db/aristo/aristo_init/rocks_db.nim
@@ -147,7 +147,14 @@ proc getIdgFn(db: RdbBackendRef): GetIdgFn =
         return ok w           # Compiler error with `ok(EmptyVidSeq)`
 
       # Decode data record
-      data.deblobify seq[VertexID]
+      # TODO vid reuse disabled, implementation too slow since list could have
+      #      millions of entries
+      data.deblobify(seq[VertexID]).map(proc (v: seq[VertexID]): seq[VertexID] =
+        if v.len > 1:
+          @[v[^1]]
+        else:
+          v
+      )
 
 proc getLstFn(db: RdbBackendRef): GetLstFn =
   result =

diff --git a/nimbus/db/aristo/aristo_vid.nim b/nimbus/db/aristo/aristo_vid.nim
@@ -45,19 +45,6 @@ proc vidFetch*(db: AristoDbRef; pristine = false): VertexID =
   doAssert LEAST_FREE_VID <= result.distinctBase
 
 
-proc vidPeek*(db: AristoDbRef): VertexID =
-  ## Like `new()` without consuming this *ID*. It will return the *ID* that
-  ## would be returned by the `new()` function.
-  ##
-  case db.vGen.len:
-  of 0:
-    VertexID(LEAST_FREE_VID)
-  of 1:
-    db.vGen[^1]
-  else:
-    db.vGen[^2]
-
-
 proc vidDispose*(db: AristoDbRef; vid: VertexID) =
   ## Recycle the argument `vtxID` which is useful after deleting entries from
   ## the vertex table to prevent the `VertexID` type key values small.
@@ -69,10 +56,13 @@ proc vidDispose*(db: AristoDbRef; vid: VertexID) =
       let topID = db.vGen[^1]
       # Only store smaller numbers: all numberts larger than `topID`
       # are free numbers
-      if vid < topID:
+      # TODO vid reuse disabled, implementation too slow since list could grow
+      #      to millions of entries
+      # if vid < topID:
+      #   db.top.final.vGen[^1] = vid
+      #   db.top.final.vGen.add topID
+      if vid == topID - 1: # no gap - can recycle
         db.top.final.vGen[^1] = vid
-        db.top.final.vGen.add topID
-
 
 proc vidReorg*(vGen: seq[VertexID]): seq[VertexID] =
   ## Return a compacted version of the argument vertex ID generator state