Skip to content

Commit

Permalink
rafttest: reduce waitLeader flakiness
Browse files Browse the repository at this point in the history
There are a few tests requiring a stable leader. For example,
TestBasicProgress waits for a leader, submits 100 proposals, and expects
that all 100 proposals are committed. In rare cases, a leader is
elected, and the test proceeds, but in the meantime another node
campaigns and wins a higher-term election. After this, some proposals
end up not committed (legitimately), and the test fails.

This commit modifies the waitLeader function with a better heuristic for
a stable leader. It now waits until the leader has the highest term in
the cluster, which more reliably (although not 100%) guarantees that
there is no in-flight campaign that is about to win.

Signed-off-by: Pavel Kalinnikov <[email protected]>
  • Loading branch information
pav-kv committed Apr 10, 2024
1 parent d9cb993 commit 5535384
Showing 1 changed file with 23 additions and 18 deletions.
41 changes: 23 additions & 18 deletions rafttest/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestBasicProgress(t *testing.T) {
nodes = append(nodes, n)
}

waitLeader(nodes)
waitStableLeader(nodes)

for i := 0; i < 100; i++ {
nodes[0].Propose(context.TODO(), []byte("somedata"))
Expand All @@ -59,7 +59,7 @@ func TestRestart(t *testing.T) {
nodes = append(nodes, n)
}

l := waitLeader(nodes)
l := waitStableLeader(nodes)
k1, k2 := (l+1)%5, (l+2)%5

for i := 0; i < 30; i++ {
Expand Down Expand Up @@ -97,7 +97,7 @@ func TestPause(t *testing.T) {
nodes = append(nodes, n)
}

waitLeader(nodes)
waitStableLeader(nodes)

for i := 0; i < 30; i++ {
nodes[0].Propose(context.TODO(), []byte("somedata"))
Expand All @@ -123,26 +123,31 @@ func TestPause(t *testing.T) {
}
}

func waitLeader(ns []*node) int {
var l map[uint64]struct{}
var lindex = -1

// waitStableLeader waits until there is a stable leader in the cluster. It
// heuristically assumes that there is a stable leader when there is a node in
// StateLeader among the highest-term nodes.
//
// Note that this function would not work properly in clusters with "network"
// partitions, in which a node can have the highest term, and yet never become a
// leader.
func waitStableLeader(ns []*node) int {
for {
l = make(map[uint64]struct{})

lead := -1
var maxTerm uint64
for i, n := range ns {
lead := n.Status().SoftState.Lead
if lead != 0 {
l[lead] = struct{}{}
if n.id == lead {
lindex = i
}
st := n.Status()
if st.Term > maxTerm {
lead = -1
maxTerm = st.Term
}
if st.RaftState == raft.StateLeader {
lead = i
}
}

if len(l) == 1 && lindex != -1 {
return lindex
if lead != -1 {
return lead
}
time.Sleep(time.Millisecond)
}
}

Expand Down

0 comments on commit 5535384

Please sign in to comment.