From 83605e476cbe8a2782af721c20c88eba19a2c744 Mon Sep 17 00:00:00 2001
From: Calvin Kim <calvin@kcalvinalvin.info>
Date: Tue, 22 Aug 2023 07:22:06 +0900
Subject: [PATCH 1/3] btcutil: reuse serialized tx during TxHash

btcutil.Block caches the serialized raw bytes of the block during ibd.
This serialized block bytes includes the serialized tx. The current tx
hash generation will re-serialized the de-serialized tx to create the
raw bytes and it'll only then hash that.

This commit changes the code so that the re-serialization never happens,
saving tons of cpu and memory overhead.
---
 btcutil/block.go | 23 +++++++++++++++-
 btcutil/tx.go    | 68 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 85 insertions(+), 6 deletions(-)

diff --git a/btcutil/block.go b/btcutil/block.go
index 7d38abc4a0..52c909192b 100644
--- a/btcutil/block.go
+++ b/btcutil/block.go
@@ -154,12 +154,26 @@ func (b *Block) Transactions() []*Tx {
 		b.transactions = make([]*Tx, len(b.msgBlock.Transactions))
 	}
 
+	// Offset of each tx.  80 accounts for the block header size.
+	offset := 80 + wire.VarIntSerializeSize(uint64(len(b.msgBlock.Transactions)))
+
 	// Generate and cache the wrapped transactions for all that haven't
 	// already been done.
 	for i, tx := range b.transactions {
 		if tx == nil {
 			newTx := NewTx(b.msgBlock.Transactions[i])
 			newTx.SetIndex(i)
+
+			size := b.msgBlock.Transactions[i].SerializeSize()
+
+			// The block may not always have the serializedBlock.
+			if len(b.serializedBlock) > 0 {
+				// This allows for the reuse of the already serialized tx.
+				newTx.setBytes(b.serializedBlock[offset : offset+size])
+
+				// Increment offset for this block.
+				offset += size
+			}
 			b.transactions[i] = newTx
 		}
 	}
@@ -234,6 +248,9 @@ func NewBlockFromBytes(serializedBlock []byte) (*Block, error) {
 		return nil, err
 	}
 	b.serializedBlock = serializedBlock
+	// This initializes []btcutil.Tx to have the serialized raw transactions cached.
+	// Helps speed up things like generating the txhash.
+	b.Transactions()
 	return b, nil
 }
 
@@ -257,9 +274,13 @@ func NewBlockFromReader(r io.Reader) (*Block, error) {
 // NewBlockFromBlockAndBytes returns a new instance of a bitcoin block given
 // an underlying wire.MsgBlock and the serialized bytes for it.  See Block.
 func NewBlockFromBlockAndBytes(msgBlock *wire.MsgBlock, serializedBlock []byte) *Block {
-	return &Block{
+	b := &Block{
 		msgBlock:        msgBlock,
 		serializedBlock: serializedBlock,
 		blockHeight:     BlockHeightUnknown,
 	}
+	// This initializes []btcutil.Tx to have the serialized raw transactions cached.
+	// Helps speed up things like generating the txhash.
+	b.Transactions()
+	return b
 }
diff --git a/btcutil/tx.go b/btcutil/tx.go
index 5633fef90e..abc055acbe 100644
--- a/btcutil/tx.go
+++ b/btcutil/tx.go
@@ -27,6 +27,7 @@ type Tx struct {
 	txHashWitness *chainhash.Hash // Cached transaction witness hash
 	txHasWitness  *bool           // If the transaction has witness data
 	txIndex       int             // Position within a block or TxIndexUnknown
+	rawBytes      []byte          // Raw bytes for the tx in the raw block.
 }
 
 // MsgTx returns the underlying wire.MsgTx for the transaction.
@@ -37,22 +38,68 @@ func (t *Tx) MsgTx() *wire.MsgTx {
 
 // Hash returns the hash of the transaction.  This is equivalent to
 // calling TxHash on the underlying wire.MsgTx, however it caches the
-// result so subsequent calls are more efficient.
+// result so subsequent calls are more efficient.  If the Tx has the
+// raw bytes of the tx cached, it will use that and skip serialization.
 func (t *Tx) Hash() *chainhash.Hash {
 	// Return the cached hash if it has already been generated.
 	if t.txHash != nil {
 		return t.txHash
 	}
 
-	// Cache the hash and return it.
-	hash := t.msgTx.TxHash()
+	// If the rawBytes aren't available, call msgtx.TxHash.
+	if t.rawBytes == nil {
+		hash := t.msgTx.TxHash()
+		t.txHash = &hash
+		return &hash
+	}
+
+	// If we have the raw bytes, then don't call msgTx.TxHash as that has the
+	// overhead of serialization.
+	var hash chainhash.Hash
+	if t.HasWitness() {
+		// If the raw bytes contain the witness, we must strip it out before
+		// calculating the hash.
+		baseSize := t.msgTx.SerializeSizeStripped()
+		nonWitnessBytes := make([]byte, 0, baseSize)
+
+		// Append the version bytes.
+		offset := 4
+		nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[:offset]...)
+
+		// Append the input and output bytes.  -8 to account for the
+		// version bytes and the locktime bytes.
+		//
+		// Skip the 2 bytes for the witness encoding.
+		offset += 2
+		nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[offset:offset+baseSize-8]...)
+
+		// Append the last 4 bytes which are the locktime bytes.
+		nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[len(t.rawBytes)-4:]...)
+
+		// We purposely call doublehashh here instead of doublehashraw as we don't have the
+		// serialization overhead and avoiding the 1 alloc is better in this case.
+		hash = chainhash.DoubleHashRaw(func(w io.Writer) error {
+			_, err := w.Write(nonWitnessBytes)
+			return err
+		})
+	} else {
+		// If the raw bytes don't have the witness, we can use it directly.
+		//
+		// We purposely call doublehashh here instead of doublehashraw as we don't have the
+		// serialization overhead and avoiding the 1 alloc is better in this case.
+		hash = chainhash.DoubleHashRaw(func(w io.Writer) error {
+			_, err := w.Write(t.rawBytes)
+			return err
+		})
+	}
 	t.txHash = &hash
 	return &hash
 }
 
 // WitnessHash returns the witness hash (wtxid) of the transaction.  This is
 // equivalent to calling WitnessHash on the underlying wire.MsgTx, however it
-// caches the result so subsequent calls are more efficient.
+// caches the result so subsequent calls are more efficient.  If the Tx has the
+// raw bytes of the tx cached, it will use that and skip serialization.
 func (t *Tx) WitnessHash() *chainhash.Hash {
 	// Return the cached hash if it has already been generated.
 	if t.txHashWitness != nil {
@@ -60,7 +107,13 @@ func (t *Tx) WitnessHash() *chainhash.Hash {
 	}
 
 	// Cache the hash and return it.
-	hash := t.msgTx.WitnessHash()
+	var hash chainhash.Hash
+	if len(t.rawBytes) > 0 {
+		hash = chainhash.DoubleHashH(t.rawBytes)
+	} else {
+		hash = t.msgTx.WitnessHash()
+	}
+
 	t.txHashWitness = &hash
 	return &hash
 }
@@ -99,6 +152,11 @@ func NewTx(msgTx *wire.MsgTx) *Tx {
 	}
 }
 
+// setBytes sets the raw bytes of the tx.
+func (t *Tx) setBytes(bytes []byte) {
+	t.rawBytes = bytes
+}
+
 // NewTxFromBytes returns a new instance of a bitcoin transaction given the
 // serialized bytes.  See Tx.
 func NewTxFromBytes(serializedTx []byte) (*Tx, error) {

From 56de9ca878e797869553efd720846087a88e7434 Mon Sep 17 00:00:00 2001
From: Olaoluwa Osuntokun <laolu32@gmail.com>
Date: Thu, 28 Dec 2023 17:47:11 -0800
Subject: [PATCH 2/3] btcutil: align new serialization caching logic w/
 codebase style

---
 btcutil/block.go | 30 ++++++++++++++++++++++--------
 btcutil/tx.go    | 44 ++++++++++++++++++++++++++++----------------
 2 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/btcutil/block.go b/btcutil/block.go
index 52c909192b..7f8d8786e3 100644
--- a/btcutil/block.go
+++ b/btcutil/block.go
@@ -155,7 +155,9 @@ func (b *Block) Transactions() []*Tx {
 	}
 
 	// Offset of each tx.  80 accounts for the block header size.
-	offset := 80 + wire.VarIntSerializeSize(uint64(len(b.msgBlock.Transactions)))
+	offset := 80 + wire.VarIntSerializeSize(
+		uint64(len(b.msgBlock.Transactions)),
+	)
 
 	// Generate and cache the wrapped transactions for all that haven't
 	// already been done.
@@ -168,12 +170,16 @@ func (b *Block) Transactions() []*Tx {
 
 			// The block may not always have the serializedBlock.
 			if len(b.serializedBlock) > 0 {
-				// This allows for the reuse of the already serialized tx.
-				newTx.setBytes(b.serializedBlock[offset : offset+size])
+				// This allows for the reuse of the already
+				// serialized tx.
+				newTx.setBytes(
+					b.serializedBlock[offset : offset+size],
+				)
 
 				// Increment offset for this block.
 				offset += size
 			}
+
 			b.transactions[i] = newTx
 		}
 	}
@@ -248,9 +254,12 @@ func NewBlockFromBytes(serializedBlock []byte) (*Block, error) {
 		return nil, err
 	}
 	b.serializedBlock = serializedBlock
-	// This initializes []btcutil.Tx to have the serialized raw transactions cached.
-	// Helps speed up things like generating the txhash.
+
+	// This initializes []btcutil.Tx to have the serialized raw
+	// transactions cached.  Helps speed up things like generating the
+	// txhash.
 	b.Transactions()
+
 	return b, nil
 }
 
@@ -273,14 +282,19 @@ func NewBlockFromReader(r io.Reader) (*Block, error) {
 
 // NewBlockFromBlockAndBytes returns a new instance of a bitcoin block given
 // an underlying wire.MsgBlock and the serialized bytes for it.  See Block.
-func NewBlockFromBlockAndBytes(msgBlock *wire.MsgBlock, serializedBlock []byte) *Block {
+func NewBlockFromBlockAndBytes(msgBlock *wire.MsgBlock,
+	serializedBlock []byte) *Block {
+
 	b := &Block{
 		msgBlock:        msgBlock,
 		serializedBlock: serializedBlock,
 		blockHeight:     BlockHeightUnknown,
 	}
-	// This initializes []btcutil.Tx to have the serialized raw transactions cached.
-	// Helps speed up things like generating the txhash.
+
+	// This initializes []btcutil.Tx to have the serialized raw
+	// transactions cached.  Helps speed up things like generating the
+	// txhash.
 	b.Transactions()
+
 	return b
 }
diff --git a/btcutil/tx.go b/btcutil/tx.go
index abc055acbe..4f26befe32 100644
--- a/btcutil/tx.go
+++ b/btcutil/tx.go
@@ -36,10 +36,10 @@ func (t *Tx) MsgTx() *wire.MsgTx {
 	return t.msgTx
 }
 
-// Hash returns the hash of the transaction.  This is equivalent to
-// calling TxHash on the underlying wire.MsgTx, however it caches the
-// result so subsequent calls are more efficient.  If the Tx has the
-// raw bytes of the tx cached, it will use that and skip serialization.
+// Hash returns the hash of the transaction.  This is equivalent to calling
+// TxHash on the underlying wire.MsgTx, however it caches the result so
+// subsequent calls are more efficient.  If the Tx has the raw bytes of the tx
+// cached, it will use that and skip serialization.
 func (t *Tx) Hash() *chainhash.Hash {
 	// Return the cached hash if it has already been generated.
 	if t.txHash != nil {
@@ -53,45 +53,57 @@ func (t *Tx) Hash() *chainhash.Hash {
 		return &hash
 	}
 
-	// If we have the raw bytes, then don't call msgTx.TxHash as that has the
-	// overhead of serialization.
+	// If we have the raw bytes, then don't call msgTx.TxHash as that has
+	// the overhead of serialization. Instead, we can take the existing
+	// serialized bytes and hash them to speed things up.
 	var hash chainhash.Hash
 	if t.HasWitness() {
-		// If the raw bytes contain the witness, we must strip it out before
-		// calculating the hash.
+		// If the raw bytes contain the witness, we must strip it out
+		// before calculating the hash.
 		baseSize := t.msgTx.SerializeSizeStripped()
 		nonWitnessBytes := make([]byte, 0, baseSize)
 
 		// Append the version bytes.
 		offset := 4
-		nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[:offset]...)
+		nonWitnessBytes = append(
+			nonWitnessBytes, t.rawBytes[:offset]...,
+		)
 
 		// Append the input and output bytes.  -8 to account for the
 		// version bytes and the locktime bytes.
 		//
 		// Skip the 2 bytes for the witness encoding.
 		offset += 2
-		nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[offset:offset+baseSize-8]...)
+		nonWitnessBytes = append(
+			nonWitnessBytes,
+			t.rawBytes[offset:offset+baseSize-8]...,
+		)
 
 		// Append the last 4 bytes which are the locktime bytes.
-		nonWitnessBytes = append(nonWitnessBytes, t.rawBytes[len(t.rawBytes)-4:]...)
+		nonWitnessBytes = append(
+			nonWitnessBytes, t.rawBytes[len(t.rawBytes)-4:]...,
+		)
 
-		// We purposely call doublehashh here instead of doublehashraw as we don't have the
-		// serialization overhead and avoiding the 1 alloc is better in this case.
+		// We purposely call doublehashh here instead of doublehashraw
+		// as we don't have the serialization overhead and avoiding the
+		// 1 alloc is better in this case.
 		hash = chainhash.DoubleHashRaw(func(w io.Writer) error {
 			_, err := w.Write(nonWitnessBytes)
 			return err
 		})
 	} else {
-		// If the raw bytes don't have the witness, we can use it directly.
+		// If the raw bytes don't have the witness, we can use it
+		// directly.
 		//
-		// We purposely call doublehashh here instead of doublehashraw as we don't have the
-		// serialization overhead and avoiding the 1 alloc is better in this case.
+		// We purposely call doublehashh here instead of doublehashraw
+		// as we don't have the serialization overhead and avoiding the
+		// 1 alloc is better in this case.
 		hash = chainhash.DoubleHashRaw(func(w io.Writer) error {
 			_, err := w.Write(t.rawBytes)
 			return err
 		})
 	}
+
 	t.txHash = &hash
 	return &hash
 }

From e102a81268be375eb74092fb7416108269f50fc4 Mon Sep 17 00:00:00 2001
From: Olaoluwa Osuntokun <laolu32@gmail.com>
Date: Thu, 28 Dec 2023 18:07:03 -0800
Subject: [PATCH 3/3] btcutil: add benchmarks for Hash + WitnessHash

---
 btcutil/bench_test.go | 80 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 btcutil/bench_test.go

diff --git a/btcutil/bench_test.go b/btcutil/bench_test.go
new file mode 100644
index 0000000000..c1f52da5b7
--- /dev/null
+++ b/btcutil/bench_test.go
@@ -0,0 +1,80 @@
+package btcutil_test
+
+import (
+	"testing"
+
+	"github.com/btcsuite/btcd/btcutil"
+	"github.com/btcsuite/btcd/chaincfg/chainhash"
+)
+
+var (
+	bencHash *chainhash.Hash
+)
+
+// BenchmarkTxHash benchmarks the performance of calculating the hash of a
+// transaction.
+func BenchmarkTxHash(b *testing.B) {
+	// Make a new block from the test block, we'll then call the Bytes
+	// function to cache the serialized block. Afterwards we all
+	// Transactions to populate the serialization cache.
+	testBlock := btcutil.NewBlock(&Block100000)
+	_, _ = testBlock.Bytes()
+
+	// The second transaction in the block has no witness data. The first
+	// does however.
+	testTx := testBlock.Transactions()[1]
+	testTx2 := testBlock.Transactions()[0]
+
+	// Run a benchmark for the portion that needs to strip the non-witness
+	// data from the transaction.
+	b.Run("tx_hash_has_witness", func(b *testing.B) {
+		b.ResetTimer()
+		b.ReportAllocs()
+
+		var txHash *chainhash.Hash
+		for i := 0; i < b.N; i++ {
+			txHash = testTx2.Hash()
+		}
+
+		bencHash = txHash
+	})
+
+	// Next, run it for the portion that can just hash the bytes directly.
+	b.Run("tx_hash_no_witness", func(b *testing.B) {
+		b.ResetTimer()
+		b.ReportAllocs()
+
+		var txHash *chainhash.Hash
+		for i := 0; i < b.N; i++ {
+			txHash = testTx.Hash()
+		}
+
+		bencHash = txHash
+	})
+
+}
+
+// BenchmarkTxWitnessHash benchmarks the performance of calculating the hash of
+// a transaction.
+func BenchmarkTxWitnessHash(b *testing.B) {
+	// Make a new block from the test block, we'll then call the Bytes
+	// function to cache the serialized block. Afterwards we all
+	// Transactions to populate the serialization cache.
+	testBlock := btcutil.NewBlock(&Block100000)
+	_, _ = testBlock.Bytes()
+
+	// The first transaction in the block has been modified to have witness
+	// data.
+	testTx := testBlock.Transactions()[0]
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	var txHash *chainhash.Hash
+	for i := 0; i < b.N; i++ {
+		txHash = testTx.WitnessHash()
+	}
+
+	bencHash = txHash
+
+}