Skip to content

Commit

Permalink
Minor optimizations to Bitcoin tx processing (kyuupichan#214)
Browse files Browse the repository at this point in the history
* Construct unpacking functions for byte format up front to save time
wasted on parsing format string every iteration.

* Store length ahead of time as the overhead adds up.

* Reduce object attribute lookups in hash functions.

* Clean up lib pkg API changes. Unit test new "public" API members.
Underscore prefix for internal hash module funcs. Make bytes.fromhex a
public function.

* Document recent performance findings.
  • Loading branch information
JustinTArthur authored and Neil committed Sep 9, 2017
1 parent 8cfa42e commit 0c2e5c6
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 24 deletions.
16 changes: 13 additions & 3 deletions docs/PERFORMANCE-NOTES
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Just some notes on performance with Python 3.5. I am taking this into
Just some notes on performance with Python 3.5. We are taking these into
account in the code.

- 60% faster to create lists with [] list comprehensions than tuples
Expand All @@ -16,11 +16,21 @@ account in the code.

- struct.pack, struct.unpack are over 60% faster than int.to_bytes and
int.from_bytes. They are faster little endian (presumably because
it matches the host) than big endian regardless of length.
it matches the host) than big endian regardless of length. Furthermore,
using stored packing and unpacking methods from Struct classes is faster
than using the flexible-format struct.[un]pack equivalents.

After storing the Struct('<Q').unpack_from function as unpack_uint64_from,
later calls to unpack_uint64_from(b, 0) are about 30% faster than calls to
unpack_from('<Q', b, 0).


- single-item list and tuple unpacking. Suppose b = (1, )

a, = b is a about 0.4% faster than (a,) = b
and about 45% faster than a = b[0]

- multiple assignment is faster using tuples only for 3 or more items
- multiple assignment is faster using tuples only for 3 or more items

- retrieving a previously stored length of a bytes object can be over 200%
faster than a new call to len(b)
15 changes: 10 additions & 5 deletions lib/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,22 @@
import hashlib
import hmac

from lib.util import bytes_to_int, int_to_bytes
from lib.util import bytes_to_int, int_to_bytes, hex_to_bytes

_sha256 = hashlib.sha256
_sha512 = hashlib.sha512
_new_hash = hashlib.new
_new_hmac = hmac.new


def sha256(x):
'''Simple wrapper of hashlib sha256.'''
return hashlib.sha256(x).digest()
return _sha256(x).digest()


def ripemd160(x):
'''Simple wrapper of hashlib ripemd160.'''
h = hashlib.new('ripemd160')
h = _new_hash('ripemd160')
h.update(x)
return h.digest()

Expand All @@ -51,7 +56,7 @@ def double_sha256(x):

def hmac_sha512(key, msg):
'''Use SHA-512 to provide an HMAC.'''
return hmac.new(key, msg, hashlib.sha512).digest()
return _new_hmac(key, msg, _sha512).digest()


def hash160(x):
Expand All @@ -73,7 +78,7 @@ def hash_to_hex_str(x):

def hex_str_to_hash(x):
'''Convert a displayed hex string to a binary hash.'''
return bytes(reversed(bytes.fromhex(x)))
return bytes(reversed(hex_to_bytes(x)))


class Base58Error(Exception):
Expand Down
18 changes: 10 additions & 8 deletions lib/tx.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@


from collections import namedtuple
from struct import unpack_from

from lib.util import cachedproperty
from lib.hash import double_sha256, hash_to_str
from lib.util import (cachedproperty, unpack_int32_from, unpack_int64_from,
unpack_uint16_from, unpack_uint32_from,
unpack_uint64_from)


class Tx(namedtuple("Tx", "version inputs outputs locktime")):
Expand Down Expand Up @@ -78,6 +79,7 @@ class Deserializer(object):
def __init__(self, binary, start=0):
assert isinstance(binary, bytes)
self.binary = binary
self.binary_length = len(binary)
self.cursor = start

def read_tx(self):
Expand Down Expand Up @@ -131,7 +133,7 @@ def _read_byte(self):
def _read_nbytes(self, n):
cursor = self.cursor
self.cursor = end = cursor + n
assert len(self.binary) >= end
assert self.binary_length >= end
return self.binary[cursor:end]

def _read_varbytes(self):
Expand All @@ -149,27 +151,27 @@ def _read_varint(self):
return self._read_le_uint64()

def _read_le_int32(self):
result, = unpack_from('<i', self.binary, self.cursor)
result, = unpack_int32_from(self.binary, self.cursor)
self.cursor += 4
return result

def _read_le_int64(self):
result, = unpack_from('<q', self.binary, self.cursor)
result, = unpack_int64_from(self.binary, self.cursor)
self.cursor += 8
return result

def _read_le_uint16(self):
result, = unpack_from('<H', self.binary, self.cursor)
result, = unpack_uint16_from(self.binary, self.cursor)
self.cursor += 2
return result

def _read_le_uint32(self):
result, = unpack_from('<I', self.binary, self.cursor)
result, = unpack_uint32_from(self.binary, self.cursor)
self.cursor += 4
return result

def _read_le_uint64(self):
result, = unpack_from('<Q', self.binary, self.cursor)
result, = unpack_uint64_from(self.binary, self.cursor)
self.cursor += 8
return result

Expand Down
10 changes: 9 additions & 1 deletion lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import re
import sys
from collections import Container, Mapping
from struct import pack
from struct import pack, Struct


class LoggedClass(object):
Expand Down Expand Up @@ -309,3 +309,11 @@ def protocol_version(client_req, server_min, server_max):
result = None

return result

unpack_int32_from = Struct('<i').unpack_from
unpack_int64_from = Struct('<q').unpack_from
unpack_uint16_from = Struct('<H').unpack_from
unpack_uint32_from = Struct('<I').unpack_from
unpack_uint64_from = Struct('<Q').unpack_from

hex_to_bytes = bytes.fromhex
4 changes: 2 additions & 2 deletions server/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ def assert_tx_hash(self, value):
'''Raise an RPCError if the value is not a valid transaction
hash.'''
try:
if len(bytes.fromhex(value)) == 32:
if len(util.hex_to_bytes(value)) == 32:
return
except Exception:
pass
Expand Down Expand Up @@ -898,7 +898,7 @@ async def utxo_get_address(self, tx_hash, index):
raw_tx = await self.daemon_request('getrawtransaction', tx_hash)
if not raw_tx:
return None
raw_tx = bytes.fromhex(raw_tx)
raw_tx = util.hex_to_bytes(raw_tx)
tx, tx_hash = self.coin.DESERIALIZER(raw_tx).read_tx()
if index >= len(tx.outputs):
return None
Expand Down
10 changes: 5 additions & 5 deletions server/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@

import aiohttp

import lib.util as util
from lib.util import LoggedClass, int_to_varint, hex_to_bytes
from lib.hash import hex_str_to_hash


class DaemonError(Exception):
'''Raised when the daemon returns an error in its results.'''


class Daemon(util.LoggedClass):
class Daemon(LoggedClass):
'''Handles connections to a daemon at the given URL.'''

WARMING_UP = -28
Expand Down Expand Up @@ -208,7 +208,7 @@ async def raw_blocks(self, hex_hashes):
params_iterable = ((h, False) for h in hex_hashes)
blocks = await self._send_vector('getblock', params_iterable)
# Convert hex string to bytes
return [bytes.fromhex(block) for block in blocks]
return [hex_to_bytes(block) for block in blocks]

async def mempool_hashes(self):
'''Update our record of the daemon's mempool hashes.'''
Expand Down Expand Up @@ -240,7 +240,7 @@ async def getrawtransactions(self, hex_hashes, replace_errs=True):
txs = await self._send_vector('getrawtransaction', params_iterable,
replace_errs=replace_errs)
# Convert hex strings to bytes
return [bytes.fromhex(tx) if tx else None for tx in txs]
return [hex_to_bytes(tx) if tx else None for tx in txs]

async def sendrawtransaction(self, params):
'''Broadcast a transaction to the network.'''
Expand Down Expand Up @@ -336,7 +336,7 @@ async def make_raw_block(self, b):
raw_block = header
num_txs = len(transactions)
if num_txs > 0:
raw_block += util.int_to_varint(num_txs)
raw_block += int_to_varint(num_txs)
raw_block += b''.join(transactions)
else:
raw_block += b'\x00'
Expand Down
20 changes: 20 additions & 0 deletions tests/lib/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def test_increment_byte_string():
assert util.increment_byte_string(b'\x01\x01') == b'\x01\x02'
assert util.increment_byte_string(b'\xff\xff') is None


def test_is_valid_hostname():
is_valid_hostname = util.is_valid_hostname
assert not is_valid_hostname('')
Expand Down Expand Up @@ -116,3 +117,22 @@ def test_protocol_version():
assert util.protocol_version(["0.8", "0.9"], "1.0", "1.1") is None
assert util.protocol_version(["1.1", "1.2"], "1.0", "1.1") == (1, 1)
assert util.protocol_version(["1.2", "1.3"], "1.0", "1.1") is None


def test_unpackers():
b = bytes(range(256))
assert util.unpack_int32_from(b, 0) == (50462976,)
assert util.unpack_int32_from(b, 42) == (757869354,)
assert util.unpack_int64_from(b, 0) == (506097522914230528,)
assert util.unpack_int64_from(b, 42) == (3544384782113450794,)

assert util.unpack_uint16_from(b, 0) == (256,)
assert util.unpack_uint16_from(b, 42) == (11050,)
assert util.unpack_uint32_from(b, 0) == (50462976,)
assert util.unpack_uint32_from(b, 42) == (757869354,)
assert util.unpack_uint64_from(b, 0) == (506097522914230528,)
assert util.unpack_uint64_from(b, 42) == (3544384782113450794,)

def test_hex_transforms():
h = "AABBCCDDEEFF"
assert util.hex_to_bytes(h) == b'\xaa\xbb\xcc\xdd\xee\xff'

0 comments on commit 0c2e5c6

Please sign in to comment.