From 7afa136a2d4915828cd42dd7ae1ee07864adfc6b Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Fri, 29 Jul 2022 12:58:41 -0600 Subject: [PATCH] Fix reorg race condition that can cause rare crashes Fixes issue 408. This bug was introduced by PR 393, which changed how txids are determined. That PR changed each call to the zcash getblock call into a pair of calls, the first to get the raw block data, the second to retrieve the txids in the block. (Unfortunately, you can't get both in a single getblock RPC.) But this ordering introduced a timing window in which the block at the given height can change, if a reorg occurred between the two calls. This PR reorders the getblock calls, so that the first call gets the transaction IDs, which also happens to return the block hash, so then the second getblock call can specify the block hash, rather than the height. This ensures that the two RPC calls return consistent data, definitely the same block. --- common/common.go | 72 ++++++----- common/common_test.go | 261 +++++++++++++------------------------- frontend/frontend_test.go | 22 ++-- 3 files changed, 144 insertions(+), 211 deletions(-) diff --git a/common/common.go b/common/common.go index 7de773b2..69fa8d11 100644 --- a/common/common.go +++ b/common/common.go @@ -151,7 +151,8 @@ type ( // reply to getblock verbose=1 (json includes txid list) ZcashRpcReplyGetblock1 struct { - Tx []string + Hash string + Tx []string } ) @@ -235,21 +236,51 @@ func GetLightdInfo() (*walletrpc.LightdInfo, error) { } func getBlockFromRPC(height int) (*walletrpc.CompactBlock, error) { + // `block.ParseFromSlice` correctly parses blocks containing v5 + // transactions, but incorrectly computes the IDs of the v5 transactions. + // We temporarily paper over this bug by fetching the correct txids via a + // verbose getblock RPC call, which returns the txids. + // + // Unfortunately, this RPC doesn't return the raw hex for the block, + // so a second getblock RPC (non-verbose) is needed (below). + // https://github.com/zcash/lightwalletd/issues/392 + params := make([]json.RawMessage, 2) heightJSON, err := json.Marshal(strconv.Itoa(height)) if err != nil { Log.Fatal("getBlockFromRPC bad height argument", height, err) } params[0] = heightJSON - params[1] = json.RawMessage("0") // non-verbose (raw hex) + // Fetch the block using the verbose option ("1") because it provides + // both the list of txids, which we're not yet able to compute for + // Orchard (V5) transactions, and the block hash (block ID), which + // we need to fetch the raw data format of the same block. Don't fetch + // by height in case a reorg occurs between the two getblock calls; + // using block has ensures that we're fetching the same block. + params[1] = json.RawMessage("1") result, rpcErr := RawRequest("getblock", params) - - // For some reason, the error responses are not JSON if rpcErr != nil { // Check to see if we are requesting a height the zcashd doesn't have yet if (strings.Split(rpcErr.Error(), ":"))[0] == "-8" { return nil, nil } + return nil, errors.Wrap(rpcErr, "error requesting verbose block") + } + var block1 ZcashRpcReplyGetblock1 + err = json.Unmarshal(result, &block1) + if err != nil { + return nil, err + } + blockHash, err := json.Marshal(block1.Hash) + if err != nil { + Log.Fatal("getBlockFromRPC bad block hash", block1.Hash) + } + params[0] = blockHash + params[1] = json.RawMessage("0") // non-verbose (raw hex) + result, rpcErr = RawRequest("getblock", params) + + // For some reason, the error responses are not JSON + if rpcErr != nil { return nil, errors.Wrap(rpcErr, "error requesting block") } @@ -272,36 +303,17 @@ func getBlockFromRPC(height int) (*walletrpc.CompactBlock, error) { if len(rest) != 0 { return nil, errors.New("received overlong message") } - if block.GetHeight() != height { return nil, errors.New("received unexpected height block") } - - // `block.ParseFromSlice` correctly parses blocks containing v5 transactions, but - // incorrectly computes the IDs of the v5 transactions. We temporarily paper over this - // bug by fetching the correct txids via a second getblock RPC call. - // https://github.com/zcash/lightwalletd/issues/392 - { - params[1] = json.RawMessage("1") // JSON with list of txids - result, rpcErr := RawRequest("getblock", params) - if rpcErr != nil { - return nil, errors.Wrap(rpcErr, "error requesting verbose block") - } - var block1 ZcashRpcReplyGetblock1 - err = json.Unmarshal(result, &block1) + for i, t := range block.Transactions() { + txid, err := hex.DecodeString(block1.Tx[i]) if err != nil { - return nil, err - } - for i, t := range block.Transactions() { - txid, err := hex.DecodeString(block1.Tx[i]) - if err != nil { - return nil, errors.Wrap(err, "error decoding getblock txid") - } - // convert from big-endian - t.SetTxID(parser.Reverse(txid)) + return nil, errors.Wrap(err, "error decoding getblock txid") } + // convert from big-endian + t.SetTxID(parser.Reverse(txid)) } - return block.ToCompact(), nil } @@ -369,7 +381,9 @@ func BlockIngestor(c *BlockCache, rep int) { var block *walletrpc.CompactBlock block, err = getBlockFromRPC(height) if err != nil { - Log.Fatal("getblock ", height, " failed, will retry: ", err) + Log.Info("getblock ", height, " failed, will retry: ", err) + Time.Sleep(8 * time.Second) + continue } if block != nil && c.HashMatch(block.PrevHash) { if err = c.Add(height, block); err != nil { diff --git a/common/common_test.go b/common/common_test.go index d618822e..ac1e633b 100644 --- a/common/common_test.go +++ b/common/common_test.go @@ -170,7 +170,18 @@ func checkSleepMethod(count int, duration time.Duration, expected string, method } // There are four test blocks, 0..3 +// To make the tests easier to understand, we fake out the hash of the block at height X +// by just prepending "0000" to X in string form. For example, the "hash" of block 380640 +// is "0000380640". It may be better to make the (fake) hashes 32 bytes (64 characters), +// and that may be required in the future, but for now this works okay. func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage, error) { + var arg string + if len(params) > 1 { + err := json.Unmarshal(params[0], &arg) + if err != nil { + testT.Fatal("could not unmarshal", method, "arg:", params[0]) + } + } step++ // request the first two blocks very quickly (syncing), // then next block isn't yet available @@ -182,19 +193,17 @@ func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage return r, nil case 2: checkSleepMethod(0, 0, "getblock", method) - var height string - err := json.Unmarshal(params[0], &height) - if err != nil { - testT.Fatal("could not unmarshal height") - } - if height != "380640" { + if arg != "380640" { testT.Fatal("incorrect height requested") } // height 380640 - return blocks[0], nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380640\"}"), nil case 3: checkSleepMethod(0, 0, "getblock", method) - return []byte("{\"Tx\": [\"00\"]}"), nil + if arg != "0000380640" { + testT.Fatal("incorrect hash requested") + } + return blocks[0], nil case 4: checkSleepMethod(0, 0, "getbestblockhash", method) // This hash doesn't matter, won't match anything @@ -202,19 +211,17 @@ func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage return r, nil case 5: checkSleepMethod(0, 0, "getblock", method) - var height string - err := json.Unmarshal(params[0], &height) - if err != nil { - testT.Fatal("could not unmarshal height") - } - if height != "380641" { + if arg != "380641" { testT.Fatal("incorrect height requested") } // height 380641 - return blocks[1], nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380641\"}"), nil case 6: checkSleepMethod(0, 0, "getblock", method) - return []byte("{\"Tx\": [\"00\"]}"), nil + if arg != "0000380641" { + testT.Fatal("incorrect hash requested") + } + return blocks[1], nil case 7: // Return the expected block hash, so we're synced, should // then sleep for 2 seconds, then another getbestblockhash @@ -234,19 +241,17 @@ func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage return r, nil case 10: checkSleepMethod(2, 4, "getblock", method) - var height string - err := json.Unmarshal(params[0], &height) - if err != nil { - testT.Fatal("could not unmarshal height") - } - if height != "380642" { + if arg != "380642" { testT.Fatal("incorrect height requested") } // height 380642 - return blocks[2], nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380642\"}"), nil case 11: checkSleepMethod(2, 4, "getblock", method) - return []byte("{\"Tx\": [\"00\"]}"), nil + if arg != "0000380642" { + testT.Fatal("incorrect hash requested") + } + return blocks[2], nil case 12: // Simulate still no new block, still synced, should // sleep for 2 seconds, then another getbestblockhash @@ -264,12 +269,7 @@ func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage // It thinks there may simply be a new block, but we'll say // there is no block at this height (380642 was replaced). checkSleepMethod(3, 6, "getblock", method) - var height string - err := json.Unmarshal(params[0], &height) - if err != nil { - testT.Fatal("could not unmarshal height") - } - if height != "380643" { + if arg != "380643" { testT.Fatal("incorrect height requested") } return nil, errors.New("-8: Block height out of range") @@ -282,23 +282,21 @@ func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage case 16: // It should have backed up one block checkSleepMethod(3, 6, "getblock", method) - var height string - err := json.Unmarshal(params[0], &height) - if err != nil { - testT.Fatal("could not unmarshal height") - } - if height != "380642" { + if arg != "380642" { testT.Fatal("incorrect height requested") } // height 380642 - return blocks[2], nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380642\"}"), nil case 17: checkSleepMethod(3, 6, "getblock", method) - return []byte("{\"Tx\": [\"00\"]}"), nil + if arg != "0000380642" { + testT.Fatal("incorrect height requested") + } + return blocks[2], nil case 18: // We're back to the same state as case 9, and this time // we'll make it back up 2 blocks (rather than one) - checkSleepMethod(3, 6, "getbestblockhash", method) // XXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXX + checkSleepMethod(3, 6, "getbestblockhash", method) // hash doesn't matter, just something that doesn't match r, _ := json.Marshal("5656") return r, nil @@ -306,12 +304,7 @@ func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage // It thinks there may simply be a new block, but we'll say // there is no block at this height (380642 was replaced). checkSleepMethod(3, 6, "getblock", method) - var height string - err := json.Unmarshal(params[0], &height) - if err != nil { - testT.Fatal("could not unmarshal height") - } - if height != "380643" { + if arg != "380643" { testT.Fatal("incorrect height requested") } return nil, errors.New("-8: Block height out of range") @@ -324,12 +317,7 @@ func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage // Like case 13, it should have backed up one block, but // this time we'll make it back up one more checkSleepMethod(3, 6, "getblock", method) - var height string - err := json.Unmarshal(params[0], &height) - if err != nil { - testT.Fatal("could not unmarshal height") - } - if height != "380642" { + if arg != "380642" { testT.Fatal("incorrect height requested") } return nil, errors.New("-8: Block height out of range") @@ -341,18 +329,16 @@ func blockIngestorStub(method string, params []json.RawMessage) (json.RawMessage case 23: // It should have backed up one more checkSleepMethod(3, 6, "getblock", method) - var height string - err := json.Unmarshal(params[0], &height) - if err != nil { - testT.Fatal("could not unmarshal height") - } - if height != "380641" { + if arg != "380641" { testT.Fatal("incorrect height requested") } - return blocks[1], nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380641\"}"), nil case 24: checkSleepMethod(3, 6, "getblock", method) - return []byte("{\"Tx\": [\"00\"]}"), nil + if arg != "0000380641" { + testT.Fatal("incorrect height requested") + } + return blocks[1], nil } testT.Error("blockIngestorStub called too many times") return nil, nil @@ -383,8 +369,8 @@ func getblockStub(method string, params []json.RawMessage) (json.RawMessage, err if method != "getblock" { testT.Error("unexpected method") } - var height string - err := json.Unmarshal(params[0], &height) + var arg string + err := json.Unmarshal(params[0], &arg) if err != nil { testT.Fatal("could not unmarshal height") } @@ -392,116 +378,28 @@ func getblockStub(method string, params []json.RawMessage) (json.RawMessage, err step++ switch step { case 1: - if height != "380640" { - testT.Error("unexpected height") + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380640\"}"), nil + case 2: + if arg != "0000380640" { + testT.Error("unexpected hash") } // Sunny-day return blocks[0], nil - case 2: - return []byte("{\"Tx\": [\"00\"]}"), nil case 3: - if height != "380641" { - testT.Error("unexpected height") + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380641\"}"), nil + case 4: + if arg != "0000380641" { + testT.Error("unexpected hash") } // Sunny-day return blocks[1], nil - case 4: - return []byte("{\"Tx\": [\"00\"]}"), nil case 5: - if height != "380642" { - testT.Error("unexpected height", height) + if arg != "380642" { + testT.Error("unexpected hash") } - // Simulate that we're synced (caught up); + // Simulate that we're synced (caught up, latest block 380641); // this should cause one 10s sleep (then retry). return nil, errors.New("-8: Block height out of range") - case 6: - if sleepCount != 1 || sleepDuration != 2*time.Second { - testT.Error("unexpected sleeps", sleepCount, sleepDuration) - } - if height != "380642" { - testT.Error("unexpected height", height) - } - // Simulate that we're still caught up; this should cause a 1s - // wait then a check for reorg to shorter chain (back up one). - return nil, errors.New("-8: Block height out of range") - case 7: - if sleepCount != 1 || sleepDuration != 2*time.Second { - testT.Error("unexpected sleeps", sleepCount, sleepDuration) - } - // Back up to 41. - if height != "380641" { - testT.Error("unexpected height", height) - } - // Return the expected block (as normally happens, no actual reorg), - // ingestor will immediately re-request the next block (42). - return blocks[1], nil - case 8: - return []byte("{\"Tx\": [\"00\"]}"), nil - case 9: - if sleepCount != 1 || sleepDuration != 2*time.Second { - testT.Error("unexpected sleeps", sleepCount, sleepDuration) - } - if height != "380642" { - testT.Error("unexpected height", height) - } - // Block 42 has now finally appeared, it will immediately ask for 43. - return blocks[2], nil - case 10: - return []byte("{\"Tx\": [\"00\"]}"), nil - case 11: - if sleepCount != 1 || sleepDuration != 2*time.Second { - testT.Error("unexpected sleeps", sleepCount, sleepDuration) - } - if height != "380643" { - testT.Error("unexpected height", height) - } - // Simulate a reorg by modifying the block's hash temporarily, - // this causes a 1s sleep and then back up one block (to 42). - blocks[3][9]++ // first byte of the prevhash - return blocks[3], nil - case 12: - return []byte("{\"Tx\": [\"00\"]}"), nil - case 13: - blocks[3][9]-- // repair first byte of the prevhash - if sleepCount != 1 || sleepDuration != 2*time.Second { - testT.Error("unexpected sleeps", sleepCount, sleepDuration) - } - if height != "380642" { - testT.Error("unexpected height ", height) - } - return blocks[2], nil - case 14: - return []byte("{\"Tx\": [\"00\"]}"), nil - case 15: - if sleepCount != 1 || sleepDuration != 2*time.Second { - testT.Error("unexpected sleeps", sleepCount, sleepDuration) - } - if height != "380643" { - testT.Error("unexpected height ", height) - } - // Instead of returning expected (43), simulate block unmarshal - // failure, should cause 10s sleep, retry - return nil, nil - case 16: - if sleepCount != 2 || sleepDuration != 12*time.Second { - testT.Error("unexpected sleeps", sleepCount, sleepDuration) - } - if height != "380643" { - testT.Error("unexpected height ", height) - } - // Back to sunny-day - return blocks[3], nil - case 17: - return []byte("{\"Tx\": [\"00\"]}"), nil - case 18: - if sleepCount != 2 || sleepDuration != 12*time.Second { - testT.Error("unexpected sleeps", sleepCount, sleepDuration) - } - if height != "380644" { - testT.Error("unexpected height ", height) - } - // next block not ready - return nil, nil } testT.Error("getblockStub called too many times") return nil, nil @@ -538,7 +436,7 @@ func TestGetBlockRange(t *testing.T) { } } - // try to read in block 380642, but this will fail (see case 3 above) + // try to read in block 380642, but this will fail (see case 5 above) select { case err := <-errChan: // this will also catch context.DeadlineExceeded from the timeout @@ -546,47 +444,59 @@ func TestGetBlockRange(t *testing.T) { t.Fatal("unexpected error:", err) } case <-blockChan: - t.Fatal("reading height 22 should have failed") + t.Fatal("reading height 380642 should have failed") } + if step != 5 { + t.Fatal("unexpected step:", step) + } step = 0 os.RemoveAll(unitTestPath) } // There are four test blocks, 0..3 func getblockStubReverse(method string, params []json.RawMessage) (json.RawMessage, error) { - var height string - err := json.Unmarshal(params[0], &height) + var arg string + err := json.Unmarshal(params[0], &arg) if err != nil { - testT.Fatal("could not unmarshal height") + testT.Fatal("could not unmarshal arg") } step++ switch step { case 1: - if height != "380642" { + if arg != "380642" { testT.Error("unexpected height") } // Sunny-day - return blocks[2], nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380642\"}"), nil case 2: - return []byte("{\"Tx\": [\"00\"]}"), nil + if arg != "0000380642" { + testT.Error("unexpected hash") + } + return blocks[2], nil case 3: - if height != "380641" { + if arg != "380641" { testT.Error("unexpected height") } // Sunny-day - return blocks[1], nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380641\"}"), nil case 4: - return []byte("{\"Tx\": [\"00\"]}"), nil + if arg != "0000380641" { + testT.Error("unexpected hash") + } + return blocks[1], nil case 5: - if height != "380640" { + if arg != "380640" { testT.Error("unexpected height") } // Sunny-day - return blocks[0], nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380640\"}"), nil case 6: - return []byte("{\"Tx\": [\"00\"]}"), nil + if arg != "0000380640" { + testT.Error("unexpected hash") + } + return blocks[0], nil } testT.Error("getblockStub called too many times") return nil, nil @@ -635,6 +545,9 @@ func TestGetBlockRangeReverse(t *testing.T) { t.Fatal("unexpected Height:", cBlock.Height) } } + if step != 6 { + t.Fatal("unexpected step:", step) + } step = 0 os.RemoveAll(unitTestPath) } diff --git a/frontend/frontend_test.go b/frontend/frontend_test.go index 6c04b064..385a7f21 100644 --- a/frontend/frontend_test.go +++ b/frontend/frontend_test.go @@ -136,23 +136,29 @@ func TestGetTransaction(t *testing.T) { } func getblockStub(method string, params []json.RawMessage) (json.RawMessage, error) { + if method != "getblock" { + testT.Fatal("unexpected method:", method) + } step++ - var height string - err := json.Unmarshal(params[0], &height) + var arg string + err := json.Unmarshal(params[0], &arg) if err != nil { testT.Fatal("could not unmarshal height") } - if height != "380640" { - testT.Fatal("unexpected getblock height", height) - } // Test retry logic (for the moment, it's very simple, just one retry). switch step { case 1: - return blocks[0], nil - case 2: + if arg != "380640" { + testT.Fatal("unexpected getblock height", arg) + } // verbose mode (getblock height 1), return transaction list - return []byte("{\"Tx\": [\"00\"]}"), nil + return []byte("{\"Tx\": [\"00\"], \"Hash\": \"0000380640\"}"), nil + case 2: + if arg != "0000380640" { + testT.Fatal("unexpected getblock height", arg) + } + return blocks[0], nil case 3: return nil, errors.New("getblock test error, too many requests") }