Aristo db update delete functionality (#1621)

* Fix missing branch checks in transcoder why: Symmetry problem. `Blobify()` allowed for encoding degenerate branch vertices while `Deblobify()` rejected decoding wrongly encoded data. * Update memory backend so that it rejects storing bogus vertices. why: Error behaviour made similar to the rocks DB backend. * Make sure that leaf vertex IDs are not repurposed why: This makes it easier to record leaf node changes * Update error return code for next()/right() traversal why: Returning offending vertex ID (besides error code) helps debugging * Update Merkle hasher for deleted nodes why: Not implemented, yet also: Provide cache & backend consistency check functions. This was partly re-implemented from `hashifyCheck()` * Simplify some unit tests * Fix delete function why: Was conceptually wrong
status-im · Jun 30, 2023 · dd1c8ed · dd1c8ed
1 parent aa6d478
commit dd1c8ed
Show file tree

Hide file tree

Showing 25 changed files with 1,495 additions and 532 deletions.
diff --git a/nimbus/db/aristo/aristo_check.nim b/nimbus/db/aristo/aristo_check.nim
@@ -0,0 +1,93 @@
+# nimbus-eth1
+# Copyright (c) 2021 Status Research & Development GmbH
+# Licensed under either of
+#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+#    http://www.apache.org/licenses/LICENSE-2.0)
+#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+#    http://opensource.org/licenses/MIT)
+# at your option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+## Aristo DB -- Consistency checks
+## ===============================
+##
+{.push raises: [].}
+
+import
+  std/[algorithm, sequtils, sets, tables],
+  eth/common,
+  stew/[interval_set, results],
+  ./aristo_init/[aristo_memory, aristo_rocksdb],
+  "."/[aristo_desc, aristo_get, aristo_init, aristo_vid],
+  ./aristo_hashify/hashify_helper,
+  ./aristo_check/[check_be, check_cache]
+
+# ------------------------------------------------------------------------------
+# Public functions
+# ------------------------------------------------------------------------------
+
+proc checkCache*(
+    db: AristoDb;                      # Database, top layer
+    relax = false;                     # Check existing hashes only
+      ): Result[void,(VertexID,AristoError)] =
+  ## Verify that the cache structure is correct as it would be after `merge()`
+  ## and `hashify()` operations. Unless `relaxed` is set `true` it would not
+  ## fully check against the backend, which is typically not applicable after
+  ## `delete()` operations.
+  ##
+  ## The following is verified:
+  ##
+  ## * Each `sTab[]` entry has a valid vertex which can be compiled as a node.
+  ##   If `relax` is set `false`, the Merkle hashes are recompiled and must
+  ##   match.
+  ##
+  ## * The hash table `kMap[]` and its inverse lookup table `pAmk[]` must
+  ##   correnspond.
+  ##
+  if relax:
+    let rc = db.checkCacheRelaxed()
+    if rc.isErr:
+      return rc
+  else:
+    let rc = db.checkCacheStrict()
+    if rc.isErr:
+      return rc
+
+  db.checkCacheCommon()
+
+
+proc checkBE*(
+    db: AristoDb;                      # Database, top layer
+    relax = true;                      # Not re-compiling hashes if `true`
+    cache = true;                      # Also verify cache
+      ): Result[void,(VertexID,AristoError)] =
+  ## Veryfy database backend structure. If the argument `relax` is set `false`,
+  ## all necessary Merkle hashes are compiled and verified. If the argument
+  ## `cache` is set `true`, the cache is also checked so that a `safe()`
+  ## operation will leave the backend consistent.
+  ##
+  ## The following is verified:
+  ##
+  ## * Each vertex ID on the structural table can be represented as a Merkle
+  ##   patricia Tree node. If `relax` is set `false`, the Merkle hashes are
+  ##   all recompiled and must match.
+  ##
+  ## * The set of free vertex IDa as potentally suppliedby the ID generator
+  ##   state is disjunct to the set of already used vertex IDs on the database.
+  ##   Moreover, the union of both sets is equivalent to the set of positive
+  ##   `uint64` numbers.
+  ##
+  if not db.backend.isNil:
+    let be = db.to(TypedBackendRef)
+    case be.kind:
+    of BackendMemory:
+      return be.MemBackendRef.checkBE(db, cache=cache, relax=relax)
+    of BackendRocksDB:
+      return be.RdbBackendRef.checkBE(db, cache=cache, relax=relax)
+    of BackendNone:
+      discard
+  ok()
+
+# ------------------------------------------------------------------------------
+# End
+# ------------------------------------------------------------------------------
diff --git a/nimbus/db/aristo/aristo_check/check_be.nim b/nimbus/db/aristo/aristo_check/check_be.nim
@@ -0,0 +1,175 @@
+# nimbus-eth1
+# Copyright (c) 2021 Status Research & Development GmbH
+# Licensed under either of
+#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+#    http://www.apache.org/licenses/LICENSE-2.0)
+#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+#    http://opensource.org/licenses/MIT)
+# at your option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+{.push raises: [].}
+
+import
+  std/[algorithm, sequtils, sets, tables],
+  eth/common,
+  stew/interval_set,
+  ../aristo_hashify/hashify_helper,
+  ../aristo_init/[aristo_memory, aristo_rocksdb],
+  ".."/[aristo_desc, aristo_get, aristo_vid]
+
+const
+  Vid2 = @[VertexID(2)].toHashSet
+
+# ------------------------------------------------------------------------------
+# Private helper
+# ------------------------------------------------------------------------------
+
+proc invTo(s: IntervalSetRef[VertexID,uint64]; T: type HashSet[VertexID]): T =
+  ## Convert the complement of the argument list `s` to a set of vertex IDs
+  ## as it would appear with a vertex generator state list.
+  if s.total < high(uint64):
+    for w in s.increasing:
+      if w.maxPt == high(VertexID):
+        result.incl w.minPt # last interval
+      else:
+        for pt in w.minPt .. w.maxPt:
+          result.incl pt
+
+proc toNodeBe(
+    vtx: VertexRef;                    # Vertex to convert
+    db: AristoDb;                      # Database, top layer
+      ): Result[NodeRef,VertexID] =
+  ## Similar to `toNode()` but fetching from the backend only
+  case vtx.vType:
+  of Leaf:
+    return ok NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData)
+  of Branch:
+    let node = NodeRef(vType: Branch, bVid: vtx.bVid)
+    var missing: seq[VertexID]
+    for n in 0 .. 15:
+      let vid = vtx.bVid[n]
+      if vid.isValid:
+        let rc = db.getKeyBackend vid
+        if rc.isOk and rc.value.isValid:
+          node.key[n] = rc.value
+        else:
+          return err(vid)
+      else:
+        node.key[n] = VOID_HASH_KEY
+    return ok node
+  of Extension:
+    let
+      vid = vtx.eVid
+      rc = db.getKeyBackend vid
+    if rc.isOk and rc.value.isValid:
+      let node = NodeRef(vType: Extension, ePfx: vtx.ePfx, eVid: vid)
+      node.key[0] = rc.value
+      return ok node
+    return err(vid)
+
+# ------------------------------------------------------------------------------
+# Public functions
+# ------------------------------------------------------------------------------
+
+proc checkBE*[T](
+    be: T;                             # backend descriptor
+    db: AristoDb;                      # Database, top layer
+    relax: bool;                       # Not compiling hashes if `true`
+    cache: bool;                       # Also verify cache
+      ): Result[void,(VertexID,AristoError)] =
+  ## Make sure that each vertex has a Merkle hash and vice versa. Also check
+  ## the vertex ID generator state.
+  let vids = IntervalSetRef[VertexID,uint64].init()
+  discard vids.merge Interval[VertexID,uint64].new(VertexID(1),high(VertexID))
+
+  for (_,vid,vtx) in be.walkVtx:
+    if not vtx.isValid:
+      return err((vid,CheckBeVtxInvalid))
+    let rc = db.getKeyBackend vid
+    if rc.isErr or not rc.value.isValid:
+      return err((vid,CheckBeKeyMissing))
+
+  for (_,vid,key) in be.walkKey:
+    if not key.isvalid:
+      return err((vid,CheckBeKeyInvalid))
+    let rc = db.getVtxBackend vid
+    if rc.isErr or not rc.value.isValid:
+      return err((vid,CheckBeVtxMissing))
+    let rx = rc.value.toNodeBe db # backend only
+    if rx.isErr:
+      return err((vid,CheckBeKeyCantCompile))
+    if not relax:
+      let expected = rx.value.toHashKey
+      if expected != key:
+        return err((vid,CheckBeKeyMismatch))
+    discard vids.reduce Interval[VertexID,uint64].new(vid,vid)
+
+  # Compare calculated state against database state
+  block:
+    # Extract vertex ID generator state
+    var vGen: HashSet[VertexID]
+    for (_,_,w) in be.walkIdg:
+      vGen = vGen + w.toHashSet
+    let
+      vGenExpected = vids.invTo(HashSet[VertexID])
+      delta = vGenExpected -+- vGen # symmetric difference
+    if 0 < delta.len:
+      # Exclude fringe case when there is a single root vertex only
+      if vGenExpected != Vid2 or 0 < vGen.len:
+        return err((delta.toSeq.sorted[^1],CheckBeGarbledVGen))
+
+  # Check cache against backend
+  if cache:
+
+    # Check structural table
+    for (vid,vtx) in db.top.sTab.pairs:
+      # A `kMap[]` entry must exist.
+      if not db.top.kMap.hasKey vid:
+        return err((vid,CheckBeCacheKeyMissing))
+      if vtx.isValid:
+        # Register existing vid against backend generator state
+        discard vids.reduce Interval[VertexID,uint64].new(vid,vid)
+      else:
+        # Some vertex is to be deleted, the key must be empty
+        let lbl = db.top.kMap.getOrVoid vid
+        if lbl.isValid:
+          return err((vid,CheckBeCacheKeyNonEmpty))
+        # There must be a representation on the backend DB
+        if db.getVtxBackend(vid).isErr:
+          return err((vid,CheckBeCacheVidUnsynced))
+        # Register deleted vid against backend generator state
+        discard vids.merge Interval[VertexID,uint64].new(vid,vid)
+
+    # Check key table
+    for (vid,lbl) in db.top.kMap.pairs:
+      let vtx = db.getVtx vid
+      if not db.top.sTab.hasKey(vid) and not vtx.isValid:
+        return err((vid,CheckBeCacheKeyDangling))
+      if lbl.isValid and not relax:
+        if not vtx.isValid:
+          return err((vid,CheckBeCacheVtxDangling))
+        let rc = vtx.toNode db # compile cache first
+        if rc.isErr:
+          return err((vid,CheckBeCacheKeyCantCompile))
+        let expected = rc.value.toHashKey
+        if expected != lbl.key:
+          return err((vid,CheckBeCacheKeyMismatch))
+
+    # Check vGen
+    var tmp = AristoDB(top: AristoLayerRef(vGen: db.top.vGen))
+    tmp.vidReorg()
+    let
+      vGen = tmp.top.vGen.toHashSet
+      vGenExpected = vids.invTo(HashSet[VertexID])
+      delta = vGenExpected -+- vGen # symmetric difference
+    if 0 < delta.len:
+      # Exclude fringe case when there is a single root vertex only
+      if vGenExpected != Vid2 or 0 < vGen.len:
+        return err((delta.toSeq.sorted[^1],CheckBeCacheGarbledVGen))
+
+  ok()
+
+# ------------------------------------------------------------------------------
+# End
+# ------------------------------------------------------------------------------
diff --git a/nimbus/db/aristo/aristo_check/check_cache.nim b/nimbus/db/aristo/aristo_check/check_cache.nim
@@ -0,0 +1,126 @@
+# nimbus-eth1
+# Copyright (c) 2021 Status Research & Development GmbH
+# Licensed under either of
+#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+#    http://www.apache.org/licenses/LICENSE-2.0)
+#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+#    http://opensource.org/licenses/MIT)
+# at your option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+{.push raises: [].}
+
+import
+  std/[sequtils, sets, tables],
+  eth/common,
+  stew/results,
+  ../aristo_hashify/hashify_helper,
+  ".."/[aristo_desc, aristo_get]
+
+# ------------------------------------------------------------------------------
+# Public functions
+# ------------------------------------------------------------------------------
+
+proc checkCacheStrict*(
+    db: AristoDb;                      # Database, top layer
+      ): Result[void,(VertexID,AristoError)] =
+  for (vid,vtx) in db.top.sTab.pairs:
+    let rc = vtx.toNode db
+    if rc.isErr:
+      return err((vid,CheckStkVtxIncomplete))
+
+    let lbl = db.top.kMap.getOrVoid vid
+    if not lbl.isValid:
+      return err((vid,CheckStkVtxKeyMissing))
+    if lbl.key != rc.value.toHashKey:
+      return err((vid,CheckStkVtxKeyMismatch))
+
+    let revVid = db.top.pAmk.getOrVoid lbl
+    if not revVid.isValid:
+      return err((vid,CheckStkRevKeyMissing))
+    if revVid != vid:
+      return err((vid,CheckStkRevKeyMismatch))
+
+  if 0 < db.top.pAmk.len and db.top.pAmk.len < db.top.sTab.len:
+    # Cannot have less changes than cached entries
+    return err((VertexID(0),CheckStkVtxCountMismatch))
+
+  ok()
+
+
+proc checkCacheRelaxed*(
+    db: AristoDb;                      # Database, top layer
+      ): Result[void,(VertexID,AristoError)] =
+  if 0 < db.top.pPrf.len:
+    for vid in db.top.pPrf:
+      let vtx = db.top.sTab.getOrVoid vid
+      if vtx.isValid:
+        let rc = vtx.toNode db
+        if rc.isErr:
+          return err((vid,CheckRlxVtxIncomplete))
+
+        let lbl = db.top.kMap.getOrVoid vid
+        if not lbl.isValid:
+          return err((vid,CheckRlxVtxKeyMissing))
+        if lbl.key != rc.value.toHashKey:
+          return err((vid,CheckRlxVtxKeyMismatch))
+
+        let revVid = db.top.pAmk.getOrVoid lbl
+        if not revVid.isValid:
+          return err((vid,CheckRlxRevKeyMissing))
+        if revVid != vid:
+          return err((vid,CheckRlxRevKeyMismatch))
+      else:
+        # Is be a deleted entry
+        let rc = db.getVtxBackend vid
+        if rc.isErr:
+          return err((vid,CheckRlxVidVtxBeMissing))
+        if not db.top.kMap.hasKey vid:
+          return err((vid,CheckRlxVtxEmptyKeyMissing))
+        if db.top.kMap.getOrVoid(vid).isValid:
+          return err((vid,CheckRlxVtxEmptyKeyExpected))
+  else:
+    for (vid,lbl) in db.top.kMap.pairs:
+      if lbl.isValid:                              # Otherwise to be deleted
+        let vtx = db.getVtx vid
+        if vtx.isValid:
+          let rc = vtx.toNode db
+          if rc.isOk:
+            if lbl.key != rc.value.toHashKey:
+              return err((vid,CheckRlxVtxKeyMismatch))
+
+            let revVid = db.top.pAmk.getOrVoid lbl
+            if not revVid.isValid:
+              return err((vid,CheckRlxRevKeyMissing))
+            if revVid != vid:
+              return err((vid,CheckRlxRevKeyMissing))
+            if revVid != vid:
+              return err((vid,CheckRlxRevKeyMismatch))
+  ok()
+
+
+proc checkCacheCommon*(
+    db: AristoDb;                      # Database, top layer
+      ): Result[void,(VertexID,AristoError)] =
+  # Some `kMap[]` entries may ne void indicating backend deletion
+  let kMapCount = db.top.kMap.values.toSeq.filterIt(it.isValid).len
+
+  if db.top.pAmk.len != kMapCount:
+    var knownKeys: HashSet[VertexID]
+    for (key,vid) in db.top.pAmk.pairs:
+      if not db.top.kMap.hasKey(vid):
+        return err((vid,CheckAnyRevVtxMissing))
+      if vid in knownKeys:
+        return err((vid,CheckAnyRevVtxDup))
+      knownKeys.incl vid
+    return err((VertexID(0),CheckAnyRevCountMismatch)) # should not apply(!)
+
+  for vid in db.top.pPrf:
+    if not db.top.kMap.hasKey(vid):
+      return err((vid,CheckAnyVtxLockWithoutKey))
+  ok()
+
+# ------------------------------------------------------------------------------
+# End
+# ------------------------------------------------------------------------------
+