Improve performance when extending the chain head

Also make an unrelated change which makes circleci happy, it uses an updated version of eth-utils.
ethereum · Dec 10, 2019 · 92e575b · 92e575b
1 parent 10df2a6
commit 92e575b
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 46 deletions.
diff --git a/eth/db/chain.py b/eth/db/chain.py
@@ -90,21 +90,11 @@ def get_block_uncles(self, uncles_hash: Hash32) -> Tuple[BlockHeaderAPI, ...]:
             return tuple(rlp.decode(encoded_uncles, sedes=rlp.sedes.CountableList(BlockHeader)))
 
     @classmethod
-    def _set_as_canonical_chain_head(
-            cls,
-            db: DatabaseAPI,
-            block_hash: Hash32,
-            genesis_parent_hash: Hash32,
-    ) -> Tuple[Tuple[BlockHeaderAPI, ...], Tuple[BlockHeaderAPI, ...]]:
-        try:
-            header = cls._get_block_header_by_hash(db, block_hash)
-        except HeaderNotFound:
-            raise ValueError(
-                f"Cannot use unknown block hash as canonical head: {header.hash}"
-            )
-
-        new_canonical_headers = tuple(reversed(
-            cls._find_new_ancestors(db, header, genesis_parent_hash)))
+    def _decanonicalize_old_headers(
+        cls,
+        db: DatabaseAPI,
+        new_canonical_headers: Tuple[BlockHeaderAPI, ...]
+    ) -> Tuple[BlockHeaderAPI, ...]:
         old_canonical_headers = []
 
         # remove transaction lookups for blocks that are no longer canonical
@@ -118,19 +108,16 @@ def _set_as_canonical_chain_head(
                 old_header = cls._get_block_header_by_hash(db, old_hash)
                 old_canonical_headers.append(old_header)
                 try:
-                    for transaction_hash in cls._get_block_transaction_hashes(db, old_header):
+                    transaction_hashes = cls._get_block_transaction_hashes(db, old_header)
+                    for transaction_hash in transaction_hashes:
                         cls._remove_transaction_from_canonical_chain(db, transaction_hash)
                 except MissingTrieNode:
-                    # If the transactions were never stored for the (now) non-canonical chain,
-                    #   then you don't need to remove them from the canonical chain lookup.
+                    # If the transactions were never stored for the (now) non-canonical
+                    # chain, then you don't need to remove them from the canonical chain
+                    # lookup.
                     pass
 
-        for h in new_canonical_headers:
-            cls._add_block_number_to_hash_lookup(db, h)
-
-        db.set(SchemaV1.make_canonical_head_hash_lookup_key(), header.hash)
-
-        return new_canonical_headers, tuple(old_canonical_headers)
+        return tuple(old_canonical_headers)
 
     #
     # Block API

diff --git a/eth/db/header.py b/eth/db/header.py
@@ -1,5 +1,5 @@
 import functools
-from typing import Iterable, Tuple
+from typing import cast, Iterable, Tuple
 
 import rlp
 
@@ -83,11 +83,15 @@ def get_canonical_head(self) -> BlockHeaderAPI:
 
     @classmethod
     def _get_canonical_head(cls, db: DatabaseAPI) -> BlockHeaderAPI:
+        canonical_head_hash = cls._get_canonical_head_hash(db)
+        return cls._get_block_header_by_hash(db, canonical_head_hash)
+
+    @classmethod
+    def _get_canonical_head_hash(cls, db: DatabaseAPI) -> Hash32:
         try:
-            canonical_head_hash = db[SchemaV1.make_canonical_head_hash_lookup_key()]
+            return Hash32(db[SchemaV1.make_canonical_head_hash_lookup_key()])
         except KeyError:
             raise CanonicalHeadNotFound("No canonical head set for this chain")
-        return cls._get_block_header_by_hash(db, Hash32(canonical_head_hash))
 
     #
     # Header API
@@ -173,7 +177,7 @@ def _persist_checkpoint_header(
         )
         previous_score = score - header.difficulty
         cls._set_hash_scores_to_db(db, header, previous_score)
-        cls._set_as_canonical_chain_head(db, header.hash, header.parent_hash)
+        cls._set_as_canonical_chain_head(db, header, header.parent_hash)
 
     @classmethod
     def _persist_header_chain(
@@ -226,21 +230,21 @@ def _persist_header_chain(
             score = cls._set_hash_scores_to_db(db, curr_chain_head, score)
 
         try:
-            previous_canonical_head = cls._get_canonical_head(db).hash
+            previous_canonical_head = cls._get_canonical_head_hash(db)
             head_score = cls._get_score(db, previous_canonical_head)
         except CanonicalHeadNotFound:
-            return cls._set_as_canonical_chain_head(db, curr_chain_head.hash, genesis_parent_hash)
+            return cls._set_as_canonical_chain_head(db, curr_chain_head, genesis_parent_hash)
 
         if score > head_score:
-            return cls._set_as_canonical_chain_head(db, curr_chain_head.hash, genesis_parent_hash)
+            return cls._set_as_canonical_chain_head(db, curr_chain_head, genesis_parent_hash)
 
         return tuple(), tuple()
 
     @classmethod
     def _set_as_canonical_chain_head(
         cls,
         db: DatabaseAPI,
-        block_hash: Hash32,
+        header: BlockHeaderAPI,
         genesis_parent_hash: Hash32,
     ) -> Tuple[Tuple[BlockHeaderAPI, ...], Tuple[BlockHeaderAPI, ...]]:
         """
@@ -251,14 +255,41 @@ def _set_as_canonical_chain_head(
             are no longer in the canonical chain
         """
         try:
-            header = cls._get_block_header_by_hash(db, block_hash)
-        except HeaderNotFound:
-            raise ValueError(
-                f"Cannot use unknown block hash as canonical head: {block_hash}"
+            current_canonical_head = cls._get_canonical_head_hash(db)
+        except CanonicalHeadNotFound:
+            current_canonical_head = None
+
+        new_canonical_headers: Tuple[BlockHeaderAPI, ...]
+        old_canonical_headers: Tuple[BlockHeaderAPI, ...]
+
+        if current_canonical_head and header.parent_hash == current_canonical_head:
+            # the calls to _find_new_ancestors and _decanonicalize_old_headers are
+            # relatively expensive, it's better to skip them in this case, where we're
+            # extending the canonical chain by a header
+            new_canonical_headers = (header,)
+            old_canonical_headers = ()
+        else:
+            new_canonical_headers = cast(
+                Tuple[BlockHeaderAPI, ...],
+                tuple(reversed(cls._find_new_ancestors(db, header, genesis_parent_hash)))
+            )
+            old_canonical_headers = cls._decanonicalize_old_headers(
+                db, new_canonical_headers
             )
 
-        new_canonical_headers = tuple(reversed(
-            cls._find_new_ancestors(db, header, genesis_parent_hash)))
+        for h in new_canonical_headers:
+            cls._add_block_number_to_hash_lookup(db, h)
+
+        db.set(SchemaV1.make_canonical_head_hash_lookup_key(), header.hash)
+
+        return new_canonical_headers, old_canonical_headers
+
+    @classmethod
+    def _decanonicalize_old_headers(
+        cls,
+        db: DatabaseAPI,
+        new_canonical_headers: Tuple[BlockHeaderAPI, ...]
+    ) -> Tuple[BlockHeaderAPI, ...]:
         old_canonical_headers = []
 
         for h in new_canonical_headers:
@@ -271,12 +302,7 @@ def _set_as_canonical_chain_head(
                 old_canonical_header = cls._get_block_header_by_hash(db, old_canonical_hash)
                 old_canonical_headers.append(old_canonical_header)
 
-        for h in new_canonical_headers:
-            cls._add_block_number_to_hash_lookup(db, h)
-
-        db.set(SchemaV1.make_canonical_head_hash_lookup_key(), header.hash)
-
-        return new_canonical_headers, tuple(old_canonical_headers)
+        return tuple(old_canonical_headers)
 
     @classmethod
     @to_tuple

diff --git a/eth/tools/_utils/normalization.py b/eth/tools/_utils/normalization.py
@@ -128,7 +128,7 @@ def normalize_to_address(value: AnyStr) -> Address:
         return CREATE_CONTRACT_ADDRESS
 
 
-robust_decode_hex = eth_utils.curried.hexstr_if_str(to_bytes)  # type: ignore  # https://github.com/ethereum/eth-utils/issues/156  # noqa: E501
+robust_decode_hex = eth_utils.curried.hexstr_if_str(to_bytes)
 
 
 #
@@ -247,7 +247,7 @@ def state_definition_to_dict(state_definition: GeneralState) -> AccountState:
         "nonce": normalize_int,
         "storage": normalize_storage
     }, required=[])),
-    eth_utils.curried.apply_formatter_if(  # type: ignore  # https://github.com/ethereum/eth-utils/issues/156  # noqa: E501
+    eth_utils.curried.apply_formatter_if(
         lambda s: isinstance(s, Iterable) and not isinstance(s, Mapping),
         state_definition_to_dict
     ),

diff --git a/newsfragments/1891.performance.rst b/newsfragments/1891.performance.rst
@@ -0,0 +1,2 @@
+Improve performance when importing a header which is a child of the current canonical
+chain tip.