From 3532d3d2ef5720f6e793f13fe14bd048355c1dfc Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 1 Apr 2022 12:00:13 +1030
Subject: [PATCH] channeld: send error, not warning, if peer has old commitment
 number.

This is the minimal change to meet the desired outcome of https://github.com/lightning/bolts/issues/934
which wants to give obsolete-db nodes a chance to fix things up, before we
close the channel.

We need to dance around a bit here, since we *will* close the channel if
we receive an ERROR, so we suppress that.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 channeld/channeld.c      | 14 ++++----
 tests/test_connection.py | 70 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/channeld/channeld.c b/channeld/channeld.c
index ddce25310914..3a1da34e6a1b 100644
--- a/channeld/channeld.c
+++ b/channeld/channeld.c
@@ -2966,12 +2966,14 @@ static void peer_reconnect(struct peer *peer,
 		}
 		retransmit_revoke_and_ack = true;
 	} else if (next_revocation_number < peer->next_index[LOCAL] - 1) {
-		peer_failed_err(peer->pps,
-				&peer->channel_id,
-				"bad reestablish revocation_number: %"PRIu64
-				" vs %"PRIu64,
-				next_revocation_number,
-				peer->next_index[LOCAL]);
+		/* Send a warning here!  Because this is what it looks like if peer is
+		 * in the past, and they might still recover. */
+		peer_failed_warn(peer->pps,
+				 &peer->channel_id,
+				 "bad reestablish revocation_number: %"PRIu64
+				 " vs %"PRIu64,
+				 next_revocation_number,
+				 peer->next_index[LOCAL]);
 	} else if (next_revocation_number > peer->next_index[LOCAL] - 1) {
 		if (!check_extra_fields)
 			/* They don't support option_data_loss_protect or
diff --git a/tests/test_connection.py b/tests/test_connection.py
index 4e5eb8be1753..0d0436139d5c 100644
--- a/tests/test_connection.py
+++ b/tests/test_connection.py
@@ -2774,11 +2774,11 @@ def test_opener_simple_reconnect(node_factory, bitcoind):
 
 
 @unittest.skipIf(os.getenv('TEST_DB_PROVIDER', 'sqlite3') != 'sqlite3', "sqlite3-specific DB rollback")
-@pytest.mark.developer("needs LIGHTNINGD_DEV_LOG_IO")
 @pytest.mark.openchannel('v1')
 @pytest.mark.openchannel('v2')
 def test_dataloss_protection(node_factory, bitcoind):
     l1 = node_factory.get_node(may_reconnect=True, options={'log-level': 'io'},
+                               allow_warning=True,
                                feerates=(7500, 7500, 7500, 7500))
     l2 = node_factory.get_node(may_reconnect=True, options={'log-level': 'io'},
                                feerates=(7500, 7500, 7500, 7500), allow_broken_log=True)
@@ -2846,14 +2846,16 @@ def test_dataloss_protection(node_factory, bitcoind):
     # l2 should freak out!
     l2.daemon.wait_for_log("Peer permanent failure in CHANNELD_NORMAL: Awaiting unilateral close")
 
-    # l1 should drop to chain.
-    l1.wait_for_channel_onchain(l2.info['id'])
-
     # l2 must NOT drop to chain.
     l2.daemon.wait_for_log("Cannot broadcast our commitment tx: they have a future one")
     assert not l2.daemon.is_in_log('sendrawtx exit 0',
                                    start=l2.daemon.logsearch_start)
 
+    # l1 should drop to chain, but doesn't always receive ERROR before it sends warning.
+    # We have to reconnect once
+    l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
+    l1.wait_for_channel_onchain(l2.info['id'])
+
     closetxid = only_one(bitcoind.rpc.getrawmempool(False))
 
     # l2 should still recover something!
@@ -2872,6 +2874,66 @@ def test_dataloss_protection(node_factory, bitcoind):
     assert (closetxid, "confirmed") in set([(o['txid'], o['status']) for o in l2.rpc.listfunds()['outputs']])
 
 
+@unittest.skipIf(os.getenv('TEST_DB_PROVIDER', 'sqlite3') != 'sqlite3', "sqlite3-specific DB rollback")
+@pytest.mark.openchannel('v1')
+@pytest.mark.openchannel('v2')
+@pytest.mark.developer("needs dev-disconnect, dev-no-reconnect")
+def test_dataloss_protection_no_broadcast(node_factory, bitcoind):
+    # If l2 sends an old version, but *doesn't* send an error, l1 should not broadcast tx.
+    # (https://github.com/lightning/bolts/issues/934)
+    l1 = node_factory.get_node(may_reconnect=True,
+                               feerates=(7500, 7500, 7500, 7500),
+                               allow_warning=True,
+                               options={'dev-no-reconnect': None})
+    l2 = node_factory.get_node(may_reconnect=True,
+                               feerates=(7500, 7500, 7500, 7500), allow_broken_log=True,
+                               disconnect=['-WIRE_ERROR'],
+                               options={'dev-no-reconnect': None})
+
+    l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
+    l1.fundchannel(l2, 10**6)
+    l2.stop()
+
+    # Save copy of the db.
+    dbpath = os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, "lightningd.sqlite3")
+    orig_db = open(dbpath, "rb").read()
+    l2.start()
+
+    l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
+    # After an htlc, we should get different results (two more commits)
+    l1.pay(l2, 200000000)
+
+    # Make sure both sides consider it completely settled (has received both
+    # REVOKE_AND_ACK)
+    l1.daemon.wait_for_logs(["peer_in WIRE_REVOKE_AND_ACK"] * 2)
+    l2.daemon.wait_for_logs(["peer_in WIRE_REVOKE_AND_ACK"] * 2)
+
+    # Now, move l2 back in time.
+    l2.stop()
+    # Save new db
+    new_db = open(dbpath, "rb").read()
+    # Overwrite with OLD db.
+    open(dbpath, "wb").write(orig_db)
+    l2.start()
+
+    l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
+    # l2 should freak out!
+    l2.daemon.wait_for_logs(["Peer permanent failure in CHANNELD_NORMAL: Awaiting unilateral close"])
+
+    # l1 should NOT drop to chain, since it didn't receive an error.
+    time.sleep(5)
+    assert bitcoind.rpc.getrawmempool(False) == []
+
+    # fix up l2.
+    l2.stop()
+    open(dbpath, "wb").write(new_db)
+    l2.start()
+
+    # All is forgiven
+    l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
+    l1.pay(l2, 200000000)
+
+
 @pytest.mark.developer("needs dev_disconnect")
 def test_restart_multi_htlc_rexmit(node_factory, bitcoind, executor):
     # l1 disables commit timer once we send first htlc, dies on commit