From 995b449fbc5d398dc793d647ee58b55df2dffdd7 Mon Sep 17 00:00:00 2001 From: David Date: Mon, 18 Mar 2024 18:51:27 +0800 Subject: [PATCH] feat(pkg): introduce `blob.go` (#644) Co-authored-by: maskpp --- driver/txlist_fetcher/blob.go | 3 +- pkg/rpc/beaconclient.go | 3 +- pkg/rpc/blob.go | 291 +++++++++++++++++++ pkg/rpc/{tx_blob.go => blob_tx.go} | 91 +----- pkg/rpc/{tx_blob_test.go => blob_tx_test.go} | 31 +- 5 files changed, 306 insertions(+), 113 deletions(-) create mode 100644 pkg/rpc/blob.go rename pkg/rpc/{tx_blob.go => blob_tx.go} (65%) rename pkg/rpc/{tx_blob_test.go => blob_tx_test.go} (59%) diff --git a/driver/txlist_fetcher/blob.go b/driver/txlist_fetcher/blob.go index feabca6c5..a5b38efac 100644 --- a/driver/txlist_fetcher/blob.go +++ b/driver/txlist_fetcher/blob.go @@ -56,7 +56,8 @@ func (d *BlobFetcher) Fetch( sha256.New(), &commitment, ) == common.BytesToHash(meta.BlobHash[:]) { - return rpc.DecodeBlob(common.FromHex(sidecar.Blob)) + blob := rpc.Blob(common.FromHex(sidecar.Blob)) + return blob.ToData() } } diff --git a/pkg/rpc/beaconclient.go b/pkg/rpc/beaconclient.go index 4195cf90e..e231b7428 100644 --- a/pkg/rpc/beaconclient.go +++ b/pkg/rpc/beaconclient.go @@ -66,7 +66,8 @@ func (c *BeaconClient) GetBlobByHash(ctx context.Context, slot *big.Int, blobHas sha256.New(), &commitment, ) == blobHash { - return DecodeBlob(common.FromHex(sidecar.Blob)) + blob := Blob(common.FromHex(sidecar.Blob)) + return blob.ToData() } } diff --git a/pkg/rpc/blob.go b/pkg/rpc/blob.go new file mode 100644 index 000000000..ee46faf25 --- /dev/null +++ b/pkg/rpc/blob.go @@ -0,0 +1,291 @@ +package rpc + +import ( + "crypto/sha256" + "errors" + "fmt" + "reflect" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/crypto/kzg4844" +) + +// Taken from: +// https://github.com/ethereum-optimism/optimism/blob/develop/op-service/eth/blob.go +const ( + BlobSize = 4096 * 32 + MaxBlobDataSize = (4*31+3)*1024 - 4 + EncodingVersion = 0 + VersionOffset = 1 // offset of the version byte in the blob encoding + Rounds = 1024 // number of encode/decode rounds + BlobTxHashVersion = 0x01 // Version byte of the commitment hash +) + +type Data = hexutil.Bytes + +var ( + ErrBlobInvalidFieldElement = errors.New("invalid field element") + ErrBlobInvalidEncodingVersion = errors.New("invalid encoding version") + ErrBlobInvalidLength = errors.New("invalid length for blob") + ErrBlobInputTooLarge = errors.New("too much data to encode in one blob") + ErrBlobExtraneousData = errors.New("non-zero data encountered where blob should be empty") + ErrBlobExtraneousDataFieldElement = errors.New("non-zero data encountered where field element should be empty") +) + +type Blob [BlobSize]byte + +func (b *Blob) KZGBlob() *kzg4844.Blob { + return (*kzg4844.Blob)(b) +} + +func (b *Blob) UnmarshalJSON(text []byte) error { + return hexutil.UnmarshalFixedJSON(reflect.TypeOf(b), text, b[:]) +} + +func (b *Blob) UnmarshalText(text []byte) error { + return hexutil.UnmarshalFixedText("Bytes32", text, b[:]) +} + +func (b *Blob) MarshalText() ([]byte, error) { + return hexutil.Bytes(b[:]).MarshalText() +} + +func (b *Blob) String() string { + return hexutil.Encode(b[:]) +} + +// TerminalString implements log.TerminalStringer, formatting a string for console +// output during logging. +func (b *Blob) TerminalString() string { + return fmt.Sprintf("%x..%x", b[:3], b[BlobSize-3:]) +} + +func (b *Blob) ComputeKZGCommitment() (kzg4844.Commitment, error) { + return kzg4844.BlobToCommitment(*b.KZGBlob()) +} + +// KZGToVersionedHash computes the "blob hash" (a.k.a. versioned-hash) of a blob-commitment, as used in a blob-tx. +// We implement it here because it is unfortunately not (currently) exposed by geth. +func KZGToVersionedHash(commitment kzg4844.Commitment) (out common.Hash) { + // EIP-4844 spec: + // def kzg_to_versioned_hash(commitment: KZGCommitment) -> VersionedHash: + // return VERSIONED_HASH_VERSION_KZG + sha256(commitment)[1:] + h := sha256.New() + h.Write(commitment[:]) + _ = h.Sum(out[:0]) + out[0] = BlobTxHashVersion + return out +} + +// VerifyBlobProof verifies that the given blob and proof corresponds to the given commitment, +// returning error if the verification fails. +func VerifyBlobProof(blob *Blob, commitment kzg4844.Commitment, proof kzg4844.Proof) error { + return kzg4844.VerifyBlobProof(*blob.KZGBlob(), commitment, proof) +} + +// FromData encodes the given input data into this blob. The encoding scheme is as follows: +// +// In each round we perform 7 reads of input of lengths (31,1,31,1,31,1,31) bytes respectively for +// a total of 127 bytes. This data is encoded into the next 4 field elements of the output by +// placing each of the 4x31 byte chunks into bytes [1:32] of its respective field element. The +// three single byte chunks (24 bits) are split into 4x6-bit chunks, each of which is written into +// the top most byte of its respective field element, leaving the top 2 bits of each field element +// empty to avoid modulus overflow. This process is repeated for up to 1024 rounds until all data +// is encoded. +// +// For only the very first output field, bytes [1:5] are used to encode the version and the length +// of the data. +func (b *Blob) FromData(data Data) error { + if len(data) > MaxBlobDataSize { + return fmt.Errorf("%w: len=%v", ErrBlobInputTooLarge, data) + } + b.Clear() + + readOffset := 0 + + // read 1 byte of input, 0 if there is no input left + read1 := func() byte { + if readOffset >= len(data) { + return 0 + } + out := data[readOffset] + readOffset++ + return out + } + + writeOffset := 0 + var buf31 [31]byte + var zero31 [31]byte + + // Read up to 31 bytes of input (left-aligned), into buf31. + read31 := func() { + if readOffset >= len(data) { + copy(buf31[:], zero31[:]) + return + } + n := copy(buf31[:], data[readOffset:]) // copy as much data as we can + copy(buf31[n:], zero31[:]) // pad with zeroes (since there might not be enough data) + readOffset += n + } + // Write a byte, updates the write-offset. + // Asserts that the write-offset matches encoding-algorithm expectations. + // Asserts that the value is 6 bits. + write1 := func(v byte) { + if writeOffset%32 != 0 { + panic(fmt.Errorf("blob encoding: invalid byte write offset: %d", writeOffset)) + } + if v&0b1100_0000 != 0 { + panic(fmt.Errorf("blob encoding: invalid 6 bit value: 0b%b", v)) + } + b[writeOffset] = v + writeOffset++ + } + // Write buf31 to the blob, updates the write-offset. + // Asserts that the write-offset matches encoding-algorithm expectations. + write31 := func() { + if writeOffset%32 != 1 { + panic(fmt.Errorf("blob encoding: invalid bytes31 write offset: %d", writeOffset)) + } + copy(b[writeOffset:], buf31[:]) + writeOffset += 31 + } + + for round := 0; round < Rounds && readOffset < len(data); round++ { + // The first field element encodes the version and the length of the data in [1:5]. + // This is a manual substitute for read31(), preparing the buf31. + if round == 0 { + buf31[0] = EncodingVersion + // Encode the length as big-endian uint24. + // The length check at the start above ensures we can always fit the length value into only 3 bytes. + ilen := uint32(len(data)) + buf31[1] = byte(ilen >> 16) + buf31[2] = byte(ilen >> 8) + buf31[3] = byte(ilen) + + readOffset += copy(buf31[4:], data[:]) + } else { + read31() + } + + x := read1() + A := x & 0b0011_1111 + write1(A) + write31() + + read31() + y := read1() + B := (y & 0b0000_1111) | ((x & 0b1100_0000) >> 2) + write1(B) + write31() + + read31() + z := read1() + C := z & 0b0011_1111 + write1(C) + write31() + + read31() + D := ((z & 0b1100_0000) >> 2) | ((y & 0b1111_0000) >> 4) + write1(D) + write31() + } + + if readOffset < len(data) { + panic(fmt.Errorf("expected to fit data but failed, read offset: %d, data: %d", readOffset, len(data))) + } + return nil +} + +// ToData decodes the blob into raw byte data. See FromData above for details on the encoding +// format. If error is returned it will be one of InvalidFieldElementError, +// InvalidEncodingVersionError and InvalidLengthError. +func (b *Blob) ToData() (Data, error) { + // check the version + if b[VersionOffset] != EncodingVersion { + return nil, fmt.Errorf( + "%w: expected version %d, got %d", ErrBlobInvalidEncodingVersion, EncodingVersion, b[VersionOffset]) + } + + // decode the 3-byte big-endian length value into a 4-byte integer + outputLen := uint32(b[2])<<16 | uint32(b[3])<<8 | uint32(b[4]) + if outputLen > MaxBlobDataSize { + return nil, fmt.Errorf("%w: got %d", ErrBlobInvalidLength, outputLen) + } + + // round 0 is special cased to copy only the remaining 27 bytes of the first field element into + // the output due to version/length encoding already occupying its first 5 bytes. + output := make(Data, MaxBlobDataSize) + copy(output[0:27], b[5:]) + + // now process remaining 3 field elements to complete round 0 + opos := 28 // current position into output buffer + ipos := 32 // current position into the input blob + var err error + encodedByte := make([]byte, 4) // buffer for the 4 6-bit chunks + encodedByte[0] = b[0] + for i := 1; i < 4; i++ { + encodedByte[i], opos, ipos, err = b.decodeFieldElement(opos, ipos, output) + if err != nil { + return nil, err + } + } + opos = reassembleBytes(opos, encodedByte, output) + + // in each remaining round we decode 4 field elements (128 bytes) of the input into 127 bytes + // of output + for i := 1; i < Rounds && opos < int(outputLen); i++ { + for j := 0; j < 4; j++ { + // save the first byte of each field element for later re-assembly + encodedByte[j], opos, ipos, err = b.decodeFieldElement(opos, ipos, output) + if err != nil { + return nil, err + } + } + opos = reassembleBytes(opos, encodedByte, output) + } + for i := int(outputLen); i < len(output); i++ { + if output[i] != 0 { + return nil, fmt.Errorf("fe=%d: %w", opos/32, ErrBlobExtraneousDataFieldElement) + } + } + output = output[:outputLen] + for ; ipos < BlobSize; ipos++ { + if b[ipos] != 0 { + return nil, fmt.Errorf("pos=%d: %w", ipos, ErrBlobExtraneousData) + } + } + return output, nil +} + +// decodeFieldElement decodes the next input field element by writing its lower 31 bytes into its +// appropriate place in the output and checking the high order byte is valid. Returns an +// InvalidFieldElementError if a field element is seen with either of its two high order bits set. +func (b *Blob) decodeFieldElement(opos, ipos int, output []byte) (byte, int, int, error) { + // two highest order bits of the first byte of each field element should always be 0 + if b[ipos]&0b1100_0000 != 0 { + return 0, 0, 0, fmt.Errorf("%w: field element: %d", ErrBlobInvalidFieldElement, ipos) + } + copy(output[opos:], b[ipos+1:ipos+32]) + return b[ipos], opos + 32, ipos + 32, nil +} + +// reassembleBytes takes the 4x6-bit chunks from encodedByte, reassembles them into 3 bytes of +// output, and places them in their appropriate output positions. +func reassembleBytes(opos int, encodedByte []byte, output []byte) int { + opos-- // account for fact that we don't output a 128th byte + x := (encodedByte[0] & 0b0011_1111) | ((encodedByte[1] & 0b0011_0000) << 2) + y := (encodedByte[1] & 0b0000_1111) | ((encodedByte[3] & 0b0000_1111) << 4) + z := (encodedByte[2] & 0b0011_1111) | ((encodedByte[3] & 0b0011_0000) << 2) + // put the re-assembled bytes in their appropriate output locations + output[opos-32] = z + output[opos-(32*2)] = y + output[opos-(32*3)] = x + return opos +} + +func (b *Blob) Clear() { + for i := 0; i < BlobSize; i++ { + b[i] = 0 + } +} diff --git a/pkg/rpc/tx_blob.go b/pkg/rpc/blob_tx.go similarity index 65% rename from pkg/rpc/tx_blob.go rename to pkg/rpc/blob_tx.go index f0d5696c0..2ba5ca856 100644 --- a/pkg/rpc/tx_blob.go +++ b/pkg/rpc/blob_tx.go @@ -13,10 +13,6 @@ import ( "github.com/holiman/uint256" ) -var ( - preLenBlob = 32 -) - var ( ErrBlobInvalid = errors.New("invalid blob encoding") ) @@ -118,7 +114,12 @@ func (c *EthClient) CreateBlobTx( // MakeSidecar makes a sidecar which only includes one blob with the given data. func MakeSidecar(data []byte) (*types.BlobTxSidecar, error) { - sideCar := &types.BlobTxSidecar{Blobs: EncodeBlobs(data)} + var blob Blob + if err := blob.FromData(data); err != nil { + return nil, err + } + + sideCar := &types.BlobTxSidecar{Blobs: []kzg4844.Blob{*blob.KZGBlob()}} for _, blob := range sideCar.Blobs { commitment, err := kzg4844.BlobToCommitment(blob) if err != nil { @@ -133,83 +134,3 @@ func MakeSidecar(data []byte) (*types.BlobTxSidecar, error) { } return sideCar, nil } - -func encode(origin []byte) []byte { - var res = make([]byte, preLenBlob, len(origin)/31*32+32) - for ; len(origin) >= 31; origin = origin[31:] { - data := [32]byte{} - copy(data[1:], origin[:31]) - res = append(res, data[:]...) - } - if len(origin) > 0 { - data := make([]byte, len(origin)+1) - copy(data[1:], origin) - res = append(res, data...) - } - - // Add length prefix - blobLen := big.NewInt(int64(len(res))).Bytes() - copy(res[preLenBlob-len(blobLen):preLenBlob], blobLen) - - return res -} - -func decode(data []byte) ([]byte, error) { - blobLen := new(big.Int).SetBytes(data[:preLenBlob]) - var lenBytes = blobLen.Uint64() - if int(lenBytes) > len(data) { - return nil, ErrBlobInvalid - } - return data[preLenBlob:lenBytes], nil -} - -// EncodeBlobs encodes bytes into a EIP-4844 blob. -func EncodeBlobs(origin []byte) []kzg4844.Blob { - data := encode(origin) - var blobs []kzg4844.Blob - for ; len(data) >= BlobBytes; data = data[BlobBytes:] { - blob := kzg4844.Blob{} - copy(blob[:], data[:BlobBytes]) - blobs = append(blobs, blob) - } - if len(data) > 0 { - blob := kzg4844.Blob{} - copy(blob[:], data) - blobs = append(blobs, blob) - } - return blobs -} - -// DecodeBlob decodes the given blob data. -func DecodeBlob(blob []byte) (res []byte, err error) { - if len(blob) != BlobBytes { - return nil, ErrBlobInvalid - } - blob, err = decode(blob) - if err != nil { - return nil, err - } - - for ; len(blob) >= 32; blob = blob[32:] { - data := [31]byte{} - copy(data[:], blob[1:]) - res = append(res, data[:]...) - } - if len(blob) > 0 { - res = append(res, blob[1:]...) - } - return res, nil -} - -// DecodeBlobs decodes the given blobs. -func DecodeBlobs(blobs []kzg4844.Blob) ([]byte, error) { - var res []byte - for _, blob := range blobs { - data, err := DecodeBlob(blob[:]) - if err != nil { - return nil, err - } - res = append(res, data...) - } - return res, nil -} diff --git a/pkg/rpc/tx_blob_test.go b/pkg/rpc/blob_tx_test.go similarity index 59% rename from pkg/rpc/tx_blob_test.go rename to pkg/rpc/blob_tx_test.go index 8a467a182..9b180086a 100644 --- a/pkg/rpc/tx_blob_test.go +++ b/pkg/rpc/blob_tx_test.go @@ -2,16 +2,15 @@ package rpc import ( "context" - "encoding/json" "os" "testing" "time" "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/rlp" "github.com/stretchr/testify/assert" "github.com/taikoxyz/taiko-client/internal/utils" @@ -61,34 +60,14 @@ func TestSendingBlobTx(t *testing.T) { } func TestMakeSideCar(t *testing.T) { - origin, err := os.ReadFile("./tx_blob.go") + origin, err := os.ReadFile("./blob_tx.go") assert.NoError(t, err) sideCar, mErr := MakeSidecar(origin) assert.NoError(t, mErr) - origin1, dErr := DecodeBlobs(sideCar.Blobs) + blob := Blob(sideCar.Blobs[0]) + origin1, dErr := blob.ToData() assert.NoError(t, dErr) - assert.Equal(t, origin, origin1) -} - -func TestSpecialEndWith0(t *testing.T) { - // nolint: lll - var txsData = ` -[{"type":"0x2","chainId":"0x28c59","nonce":"0x1cca","to":"0x0167001000000000000000000000000000010099","gas":"0x86b3","gasPrice":null,"maxPriorityFeePerGas":"0x59682f00","maxFeePerGas":"0x59682f02","value":"0x0","input":"0xa9059cbb00000000000000000000000001670010000000000000000000000000000100990000000000000000000000000000000000000000000000000000000000000001","accessList":[],"v":"0x0","r":"0x2d554e149d15575030f271403a3b359cd9d5df8acb47ae7df5845aadc54b1ee2","s":"0x39b7ce8e803c443d8fd33679948fbd0a485d88b6a55812a53d9a03a922142100","yParity":"0x0","hash":"0x27aa02a44ea343a72131fc67734c67d410ab6f65429637fbb17a08f781e77f7e"}] -` - - var txs types.Transactions - err := json.Unmarshal([]byte(txsData), &txs) - assert.NoError(t, err) - - origin, err := rlp.EncodeToBytes(txs) - assert.NoError(t, err) - - blobs := EncodeBlobs(origin) - - data, dErr := DecodeBlobs(blobs) - assert.NoError(t, dErr) - - assert.Equal(t, crypto.Keccak256Hash(origin), crypto.Keccak256Hash(data)) + assert.Equal(t, hexutil.Bytes(origin), origin1) }