From 447403f8e36800f7cb360c687ea77072c6c96c56 Mon Sep 17 00:00:00 2001 From: Ibrahim Kettaneh Date: Fri, 13 Sep 2024 16:42:59 -0400 Subject: [PATCH] raft: refortify followers if they are not fortified This commit makes the leader refortify followers that needs fortification. It keeps checking on every heartbeat timeout. Moreover, the leader now skips sending fortification messages to followers whose stores don't provide support in the store-liveness-fabric. --- pkg/raft/BUILD.bazel | 1 + pkg/raft/raft.go | 5 + pkg/raft/raft_paper_test.go | 53 ++-- .../async_storage_writes_append_aba_race.txt | 19 +- pkg/raft/testdata/checkquorum.txt | 48 +++- pkg/raft/testdata/fortification_basic.txt | 1 - pkg/raft/testdata/refortification_basic.txt | 231 ++++++++++++++++++ .../snapshot_succeed_via_app_resp.txt | 10 +- 8 files changed, 343 insertions(+), 25 deletions(-) create mode 100644 pkg/raft/testdata/refortification_basic.txt diff --git a/pkg/raft/BUILD.bazel b/pkg/raft/BUILD.bazel index 4061909348c0..1761144ae87a 100644 --- a/pkg/raft/BUILD.bazel +++ b/pkg/raft/BUILD.bazel @@ -58,6 +58,7 @@ go_test( "//pkg/raft/rafttest", "//pkg/raft/tracker", "//pkg/settings/cluster", + "//pkg/testutils", "@com_github_cockroachdb_datadriven//:datadriven", "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index 96350dcba212..9fe0ad5282d6 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -1028,6 +1028,11 @@ func (r *raft) tickHeartbeat() { if err := r.Step(pb.Message{From: r.id, Type: pb.MsgBeat}); err != nil { r.logger.Debugf("error occurred during checking sending heartbeat: %v", err) } + + // Try to refortify any followers that don't currently support us. + r.maybeBcastFortify() + // TODO(ibrahim): add/call maybeUnpauseAndBcastAppend() here. + } } diff --git a/pkg/raft/raft_paper_test.go b/pkg/raft/raft_paper_test.go index 866e51450853..86b8651154e2 100644 --- a/pkg/raft/raft_paper_test.go +++ b/pkg/raft/raft_paper_test.go @@ -35,6 +35,7 @@ import ( "testing" pb "github.com/cockroachdb/cockroach/pkg/raft/raftpb" + "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -105,23 +106,45 @@ func TestStartAsFollower(t *testing.T) { func TestLeaderBcastBeat(t *testing.T) { // heartbeat interval hi := 1 - r := newTestRaft(1, 10, hi, newTestMemoryStorage(withPeers(1, 2, 3))) - r.becomeCandidate() - r.becomeLeader() - for i := 0; i < 10; i++ { - mustAppendEntry(r, pb.Entry{Index: uint64(i) + 1}) - } - for i := 0; i < hi; i++ { - r.tick() - } + testutils.RunTrueAndFalse(t, "store-liveness-enabled", + func(t *testing.T, storeLivenessEnabled bool) { + var r *raft + if storeLivenessEnabled { + r = newTestRaft(1, 10, hi, + newTestMemoryStorage(withPeers(1, 2, 3))) + } else { + r = newTestRaft(1, 10, hi, + newTestMemoryStorage(withPeers(1, 2, 3)), withFortificationDisabled()) + } - msgs := r.readMessages() - sort.Sort(messageSlice(msgs)) - assert.Equal(t, []pb.Message{ - {From: 1, To: 2, Term: 1, Type: pb.MsgHeartbeat}, - {From: 1, To: 3, Term: 1, Type: pb.MsgHeartbeat}, - }, msgs) + r.becomeCandidate() + r.becomeLeader() + + for i := 0; i < 10; i++ { + mustAppendEntry(r, pb.Entry{Index: uint64(i) + 1}) + } + + for i := 0; i < hi; i++ { + r.tick() + } + + msgs := r.readMessages() + sort.Sort(messageSlice(msgs)) + if storeLivenessEnabled { + assert.Equal(t, []pb.Message{ + {From: 1, To: 2, Term: 1, Type: pb.MsgFortifyLeader}, + {From: 1, To: 3, Term: 1, Type: pb.MsgFortifyLeader}, + {From: 1, To: 2, Term: 1, Type: pb.MsgHeartbeat}, + {From: 1, To: 3, Term: 1, Type: pb.MsgHeartbeat}, + }, msgs) + } else { + assert.Equal(t, []pb.Message{ + {From: 1, To: 2, Term: 1, Type: pb.MsgHeartbeat}, + {From: 1, To: 3, Term: 1, Type: pb.MsgHeartbeat}, + }, msgs) + } + }) } func TestFollowerStartElection(t *testing.T) { diff --git a/pkg/raft/testdata/async_storage_writes_append_aba_race.txt b/pkg/raft/testdata/async_storage_writes_append_aba_race.txt index c1f6e0a6ce79..32e7cba194ad 100644 --- a/pkg/raft/testdata/async_storage_writes_append_aba_race.txt +++ b/pkg/raft/testdata/async_storage_writes_append_aba_race.txt @@ -414,20 +414,32 @@ Messages: 4->5 MsgHeartbeat Term:3 Log:0/0 4->6 MsgHeartbeat Term:3 Log:0/0 4->7 MsgHeartbeat Term:3 Log:0/0 +4->1 MsgFortifyLeader Term:3 Log:0/0 +4->2 MsgFortifyLeader Term:3 Log:0/0 +4->3 MsgFortifyLeader Term:3 Log:0/0 +4->5 MsgFortifyLeader Term:3 Log:0/0 +4->6 MsgFortifyLeader Term:3 Log:0/0 +4->7 MsgFortifyLeader Term:3 Log:0/0 +4->AppendThread MsgStorageAppend Term:0 Log:0/0 Responses:[ + 4->4 MsgFortifyLeaderResp Term:3 Log:0/0 LeadEpoch:1 +] deliver-msgs 1 ---- 4->1 MsgHeartbeat Term:3 Log:0/0 INFO 1 [term: 2] received a MsgHeartbeat message with higher term from 4 [term: 3] INFO 1 became follower at term 3 +4->1 MsgFortifyLeader Term:3 Log:0/0 process-ready 1 ---- Ready MustSync=true: -HardState Term:3 Commit:11 Lead:4 LeadEpoch:0 +HardState Term:3 Commit:11 Lead:4 LeadEpoch:1 Messages: 1->4 MsgHeartbeatResp Term:3 Log:0/0 -1->AppendThread MsgStorageAppend Term:3 Log:0/0 Commit:11 Lead:4 +1->AppendThread MsgStorageAppend Term:3 Log:0/0 Commit:11 Lead:4 LeadEpoch:1 Responses:[ + 1->4 MsgFortifyLeaderResp Term:3 Log:0/0 LeadEpoch:1 +] deliver-msgs 4 ---- @@ -513,8 +525,9 @@ INFO mark (term,index)=(2,12) mismatched the last accepted term 3 in unstable lo process-append-thread 1 ---- Processing: -1->AppendThread MsgStorageAppend Term:3 Log:0/0 Commit:11 Lead:4 +1->AppendThread MsgStorageAppend Term:3 Log:0/0 Commit:11 Lead:4 LeadEpoch:1 Responses: +1->4 MsgFortifyLeaderResp Term:3 Log:0/0 LeadEpoch:1 raft-log 1 ---- diff --git a/pkg/raft/testdata/checkquorum.txt b/pkg/raft/testdata/checkquorum.txt index f642ce72ba47..4ac9dd920303 100644 --- a/pkg/raft/testdata/checkquorum.txt +++ b/pkg/raft/testdata/checkquorum.txt @@ -73,31 +73,57 @@ INFO 1 became follower at term 1 stabilize ---- > 1 handling Ready - Ready MustSync=false: + Ready MustSync=true: State:StateFollower + HardState Term:1 Vote:1 Commit:11 Lead:1 LeadEpoch:2 Messages: 1->2 MsgHeartbeat Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 1->2 MsgHeartbeat Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 1->2 MsgHeartbeat Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 1->2 MsgHeartbeat Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 1->2 MsgHeartbeat Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 > 2 receiving messages 1->2 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + INFO 2 [term: 2] ignored a MsgFortifyLeader message with lower term from 1 [term: 1] 1->2 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + INFO 2 [term: 2] ignored a MsgFortifyLeader message with lower term from 1 [term: 1] 1->2 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + INFO 2 [term: 2] ignored a MsgFortifyLeader message with lower term from 1 [term: 1] 1->2 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + INFO 2 [term: 2] ignored a MsgFortifyLeader message with lower term from 1 [term: 1] 1->2 MsgHeartbeat Term:1 Log:0/0 + 1->2 MsgFortifyLeader Term:1 Log:0/0 + INFO 2 [term: 2] ignored a MsgFortifyLeader message with lower term from 1 [term: 1] > 3 receiving messages 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 > 2 handling Ready Ready MustSync=false: Messages: @@ -107,13 +133,19 @@ stabilize 2->1 MsgAppResp Term:2 Log:0/0 2->1 MsgAppResp Term:2 Log:0/0 > 3 handling Ready - Ready MustSync=false: + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:11 Lead:1 LeadEpoch:2 Messages: 3->1 MsgHeartbeatResp Term:1 Log:0/0 3->1 MsgHeartbeatResp Term:1 Log:0/0 3->1 MsgHeartbeatResp Term:1 Log:0/0 3->1 MsgHeartbeatResp Term:1 Log:0/0 3->1 MsgHeartbeatResp Term:1 Log:0/0 + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 > 1 receiving messages 2->1 MsgAppResp Term:2 Log:0/0 INFO 1 [term: 1] received a MsgAppResp message with higher term from 2 [term: 2] @@ -132,6 +164,16 @@ stabilize INFO 1 [term: 2] ignored a MsgHeartbeatResp message with lower term from 3 [term: 1] 3->1 MsgHeartbeatResp Term:1 Log:0/0 INFO 1 [term: 2] ignored a MsgHeartbeatResp message with lower term from 3 [term: 1] + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + INFO 1 [term: 2] ignored a MsgFortifyLeaderResp message with lower term from 3 [term: 1] + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + INFO 1 [term: 2] ignored a MsgFortifyLeaderResp message with lower term from 3 [term: 1] + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + INFO 1 [term: 2] ignored a MsgFortifyLeaderResp message with lower term from 3 [term: 1] + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + INFO 1 [term: 2] ignored a MsgFortifyLeaderResp message with lower term from 3 [term: 1] + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + INFO 1 [term: 2] ignored a MsgFortifyLeaderResp message with lower term from 3 [term: 1] > 1 handling Ready Ready MustSync=true: HardState Term:2 Commit:11 Lead:0 LeadEpoch:0 @@ -165,7 +207,7 @@ INFO 1 [logterm: 1, index: 11, vote: 0] cast MsgVote for 2 [logterm: 1, index: 1 deliver-msgs 3 ---- 2->3 MsgVote Term:3 Log:1/11 -INFO 3 [logterm: 1, index: 11, vote: 1] ignored MsgVote from 2 [logterm: 1, index: 11] at term 1: recently received communication from leader (remaining ticks: 3) +INFO 3 [logterm: 1, index: 11, vote: 1] ignored MsgVote from 2 [logterm: 1, index: 11] at term 1: recently received communication from leader (remaining ticks: 3) and supporting fortified leader 1 at epoch 2 stabilize ---- diff --git a/pkg/raft/testdata/fortification_basic.txt b/pkg/raft/testdata/fortification_basic.txt index 3c5ec1c3518a..43296dd0a87f 100644 --- a/pkg/raft/testdata/fortification_basic.txt +++ b/pkg/raft/testdata/fortification_basic.txt @@ -45,7 +45,6 @@ withdraw-support 3 1 2 2 1 1 3 x 1 1 - campaign 1 ---- INFO 1 is starting a new election at term 0 diff --git a/pkg/raft/testdata/refortification_basic.txt b/pkg/raft/testdata/refortification_basic.txt new file mode 100644 index 000000000000..ad516b498a3b --- /dev/null +++ b/pkg/raft/testdata/refortification_basic.txt @@ -0,0 +1,231 @@ +# Basic tests for leader refortification. + +log-level none +---- +ok + +add-nodes 3 voters=(1,2,3) index=2 +---- +ok + +log-level info +---- +ok + +# Muck around with StoreLiveness to make it somewhat interesting. +bump-epoch 1 +---- + 1 2 3 +1 2 1 1 +2 2 1 1 +3 2 1 1 + +withdraw-support 1 1 +---- + 1 2 3 +1 x 1 1 +2 2 1 1 +3 2 1 1 + +grant-support 1 1 +---- + 1 2 3 +1 3 1 1 +2 2 1 1 +3 2 1 1 + +withdraw-support 3 1 +---- + 1 2 3 +1 3 1 1 +2 2 1 1 +3 x 1 1 + +campaign 1 +---- +INFO 1 is starting a new election at term 0 +INFO 1 became candidate at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 2 at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 3 at term 1 + +stabilize +---- +> 1 handling Ready + Ready MustSync=true: + State:StateCandidate + HardState Term:1 Vote:1 Commit:2 Lead:0 LeadEpoch:0 + Messages: + 1->2 MsgVote Term:1 Log:1/2 + 1->3 MsgVote Term:1 Log:1/2 + INFO 1 received MsgVoteResp from 1 at term 1 + INFO 1 has received 1 MsgVoteResp votes and 0 vote rejections +> 2 receiving messages + 1->2 MsgVote Term:1 Log:1/2 + INFO 2 [term: 0] received a MsgVote message with higher term from 1 [term: 1] + INFO 2 became follower at term 1 + INFO 2 [logterm: 1, index: 2, vote: 0] cast MsgVote for 1 [logterm: 1, index: 2] at term 1 +> 3 receiving messages + 1->3 MsgVote Term:1 Log:1/2 + INFO 3 [term: 0] received a MsgVote message with higher term from 1 [term: 1] + INFO 3 became follower at term 1 + INFO 3 [logterm: 1, index: 2, vote: 0] cast MsgVote for 1 [logterm: 1, index: 2] at term 1 +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:2 Lead:0 LeadEpoch:0 + Messages: + 2->1 MsgVoteResp Term:1 Log:0/0 +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:2 Lead:0 LeadEpoch:0 + Messages: + 3->1 MsgVoteResp Term:1 Log:0/0 +> 1 receiving messages + 2->1 MsgVoteResp Term:1 Log:0/0 + INFO 1 received MsgVoteResp from 2 at term 1 + INFO 1 has received 2 MsgVoteResp votes and 0 vote rejections + INFO 1 became leader at term 1 + 3->1 MsgVoteResp Term:1 Log:0/0 +> 1 handling Ready + Ready MustSync=true: + State:StateLeader + HardState Term:1 Vote:1 Commit:2 Lead:1 LeadEpoch:3 + Entries: + 1/3 EntryNormal "" + Messages: + 1->2 MsgFortifyLeader Term:1 Log:0/0 + 1->2 MsgApp Term:1 Log:1/2 Commit:2 Entries:[1/3 EntryNormal ""] + 1->3 MsgApp Term:1 Log:1/2 Commit:2 Entries:[1/3 EntryNormal ""] +> 2 receiving messages + 1->2 MsgFortifyLeader Term:1 Log:0/0 + 1->2 MsgApp Term:1 Log:1/2 Commit:2 Entries:[1/3 EntryNormal ""] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/2 Commit:2 Entries:[1/3 EntryNormal ""] +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:2 Lead:1 LeadEpoch:2 + Entries: + 1/3 EntryNormal "" + Messages: + 2->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + 2->1 MsgAppResp Term:1 Log:0/3 Commit:2 +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:2 Lead:1 LeadEpoch:0 + Entries: + 1/3 EntryNormal "" + Messages: + 3->1 MsgAppResp Term:1 Log:0/3 Commit:2 +> 1 receiving messages + 2->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:2 + 2->1 MsgAppResp Term:1 Log:0/3 Commit:2 + 3->1 MsgAppResp Term:1 Log:0/3 Commit:2 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:3 Lead:1 LeadEpoch:3 + CommittedEntries: + 1/3 EntryNormal "" + Messages: + 1->2 MsgApp Term:1 Log:1/3 Commit:3 + 1->3 MsgApp Term:1 Log:1/3 Commit:3 +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/3 Commit:3 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/3 Commit:3 +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:3 Lead:1 LeadEpoch:2 + CommittedEntries: + 1/3 EntryNormal "" + Messages: + 2->1 MsgAppResp Term:1 Log:0/3 Commit:3 +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:3 Lead:1 LeadEpoch:0 + CommittedEntries: + 1/3 EntryNormal "" + Messages: + 3->1 MsgAppResp Term:1 Log:0/3 Commit:3 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/3 Commit:3 + 3->1 MsgAppResp Term:1 Log:0/3 Commit:3 + +# On the next heartbeat, the leader still won't send a MsgFortifyLeader to +# follower 3 because it doesn't support it in the store liveness fabric. +tick-heartbeat 1 +---- +ok + +stabilize 1 +---- +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->2 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgHeartbeat Term:1 Log:0/0 + +grant-support 3 1 +---- + 1 2 3 +1 3 1 1 +2 2 1 1 +3 3 1 1 + +# Now that follower 3 supports the leader in the store liveness fabric, the +# leader will try to fortify it on the next heartbeat. +tick-heartbeat 1 +---- +ok + +stabilize 1 3 +---- +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->2 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 +> 3 receiving messages + 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:3 Lead:1 LeadEpoch:3 + Messages: + 3->1 MsgHeartbeatResp Term:1 Log:0/0 + 3->1 MsgHeartbeatResp Term:1 Log:0/0 + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:3 +> 1 receiving messages + 3->1 MsgHeartbeatResp Term:1 Log:0/0 + 3->1 MsgHeartbeatResp Term:1 Log:0/0 + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:3 + +# If the follower supports the leader at an older epoch, the leader will try +# to refortify it on the next heartbeat timeout. +withdraw-support 3 1 +---- + 1 2 3 +1 3 1 1 +2 2 1 1 +3 x 1 1 + +grant-support 3 1 +---- + 1 2 3 +1 4 1 1 +2 2 1 1 +3 4 1 1 + +tick-heartbeat 1 +---- +ok + +stabilize 1 +---- +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:3 Lead:1 LeadEpoch:4 + Messages: + 1->2 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgHeartbeat Term:1 Log:0/0 + 1->3 MsgFortifyLeader Term:1 Log:0/0 diff --git a/pkg/raft/testdata/snapshot_succeed_via_app_resp.txt b/pkg/raft/testdata/snapshot_succeed_via_app_resp.txt index 97a230c1364d..dc8501658067 100644 --- a/pkg/raft/testdata/snapshot_succeed_via_app_resp.txt +++ b/pkg/raft/testdata/snapshot_succeed_via_app_resp.txt @@ -68,6 +68,7 @@ Ready MustSync=false: Messages: 1->2 MsgHeartbeat Term:1 Log:0/0 1->3 MsgHeartbeat Term:1 Log:0/0 +1->3 MsgFortifyLeader Term:1 Log:0/0 # Iterate until no more work is done by the new peer. It receives the heartbeat # and responds. @@ -77,12 +78,14 @@ stabilize 3 1->3 MsgHeartbeat Term:1 Log:0/0 INFO 3 [term: 0] received a MsgHeartbeat message with higher term from 1 [term: 1] INFO 3 became follower at term 1 + 1->3 MsgFortifyLeader Term:1 Log:0/0 > 3 handling Ready Ready MustSync=true: - HardState Term:1 Commit:0 Lead:1 LeadEpoch:0 + HardState Term:1 Commit:0 Lead:1 LeadEpoch:1 Messages: 3->1 MsgHeartbeatResp Term:1 Log:0/0 - + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + # The leader in turn will realize that n3 needs a snapshot, which it initiates. stabilize 1 ---- @@ -90,6 +93,7 @@ stabilize 1 3->1 MsgHeartbeatResp Term:1 Log:0/0 DEBUG 1 [firstindex: 12, commit: 11] sent snapshot[index: 11, term: 1] to 3 [StateProbe match=0 next=11 sentCommit=10 matchCommit=0] DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=12 sentCommit=11 matchCommit=0 paused pendingSnap=11] + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 > 1 handling Ready Ready MustSync=false: Messages: @@ -117,7 +121,7 @@ stabilize 3 INFO 3 [commit: 11] restored snapshot [index: 11, term: 1] > 3 handling Ready Ready MustSync=true: - HardState Term:1 Commit:11 Lead:1 LeadEpoch:0 + HardState Term:1 Commit:11 Lead:1 LeadEpoch:1 Snapshot Index:11 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false Messages: 3->1 MsgAppResp Term:1 Log:0/11 Commit:11