diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 476b1320bc31..a38e3cadae66 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -34,6 +34,7 @@ jobs: echo "${TARGET}" case "${TARGET}" in linux-amd64-e2e) + make gofail-enable VERBOSE=1 GOOS=linux GOARCH=amd64 CPU=4 EXPECT_DEBUG=true make test-e2e-release ;; linux-386-e2e) diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 0a8e2967e85b..1d48fa6732cb 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -1827,6 +1827,8 @@ func (s *EtcdServer) apply( s.setTerm(e.Term) case raftpb.EntryConfChange: + // gofail: var beforeApplyOneConfChange struct{} + // We need to toApply all WAL entries on top of v2store // and only 'unapplied' (e.Index>backend.ConsistentIndex) on the backend. shouldApplyV3 := membership.ApplyV2storeOnly diff --git a/tests/e2e/ctl_v3_member_test.go b/tests/e2e/ctl_v3_member_test.go index ccdc01bdf4da..ece5308a07a2 100644 --- a/tests/e2e/ctl_v3_member_test.go +++ b/tests/e2e/ctl_v3_member_test.go @@ -15,12 +15,15 @@ package e2e import ( + "context" "encoding/json" "fmt" "io" "reflect" "strings" + "sync" "testing" + "time" "github.com/stretchr/testify/require" @@ -55,6 +58,81 @@ func TestCtlV3MemberUpdatePeerTLS(t *testing.T) { testCtl(t, memberUpdateTest, withCfg(*e2e.NewConfigPeerTLS())) } +func TestCtlV3ConsistentMemberList(t *testing.T) { + e2e.BeforeTest(t) + + epc, err := e2e.NewEtcdProcessCluster(context.TODO(), t, + e2e.WithClusterSize(1), + e2e.WithWaitClusterReadyTimeout(0), + e2e.WithEnvVars(map[string]string{"GOFAIL_FAILPOINTS": `beforeApplyOneConfChange=sleep("2s")`}), + ) + require.NoError(t, err, "failed to start etcd cluster: %v", err) + defer func() { + derr := epc.Close() + require.NoError(t, derr, "failed to close etcd cluster: %v", derr) + }() + + t.Log("Adding and then removing a learner") + resp, err := epc.Etcdctl().MemberAddAsLearner(context.TODO(), "newLearner", []string{fmt.Sprintf("http://localhost:%d", e2e.EtcdProcessBasePort+11)}) + require.NoError(t, err) + _, err = epc.Etcdctl().MemberRemove(context.TODO(), resp.Member.ID) + require.NoError(t, err) + t.Logf("Added and then removed a learner with ID: %x", resp.Member.ID) + + var wg sync.WaitGroup + wg.Add(2) + stopc := make(chan struct{}, 2) + + t.Log("Starting a goroutine to repeatedly restart etcdserver") + go func() { + defer func() { + stopc <- struct{}{} + wg.Done() + }() + for i := 0; i < 3; i++ { + select { + case <-stopc: + return + default: + } + + merr := epc.Procs[0].Restart(context.TODO()) + require.NoError(t, merr) + epc.WaitLeader(t) + + time.Sleep(100 * time.Millisecond) + } + }() + + t.Log("Starting a goroutine to repeated check the member list") + count := 0 + go func() { + defer func() { + stopc <- struct{}{} + wg.Done() + }() + + for { + select { + case <-stopc: + return + default: + } + + mresp, merr := epc.Etcdctl().MemberList(context.TODO(), true) + if merr != nil { + continue + } + + count++ + require.Equal(t, 1, len(mresp.Members)) + } + }() + + wg.Wait() + t.Logf("Checked the member list %d times", count) +} + func memberListTest(cx ctlCtx) { if err := ctlV3MemberList(cx); err != nil { cx.t.Fatalf("memberListTest ctlV3MemberList error (%v)", err) diff --git a/tests/framework/e2e/cluster.go b/tests/framework/e2e/cluster.go index e9c50df0f0f4..94d2f2e99267 100644 --- a/tests/framework/e2e/cluster.go +++ b/tests/framework/e2e/cluster.go @@ -193,6 +193,8 @@ type EtcdProcessClusterConfig struct { ExperimentalWarningUnaryRequestDuration time.Duration PeerProxy bool WatchProcessNotifyInterval time.Duration + + WaitClusterReadyTimeout time.Duration } func DefaultConfig() *EtcdProcessClusterConfig { @@ -371,6 +373,14 @@ func WithWatchProcessNotifyInterval(interval time.Duration) EPClusterOption { return func(c *EtcdProcessClusterConfig) { c.WatchProcessNotifyInterval = interval } } +func WithWaitClusterReadyTimeout(readyTimeout time.Duration) EPClusterOption { + return func(c *EtcdProcessClusterConfig) { c.WaitClusterReadyTimeout = readyTimeout } +} + +func WithEnvVars(ev map[string]string) EPClusterOption { + return func(c *EtcdProcessClusterConfig) { c.EnvVars = ev } +} + func WithPeerProxy(enabled bool) EPClusterOption { return func(c *EtcdProcessClusterConfig) { c.PeerProxy = enabled } } @@ -610,6 +620,9 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfig(tb testing.TB, i in if cfg.WatchProcessNotifyInterval != 0 { args = append(args, "--experimental-watch-progress-notify-interval", cfg.WatchProcessNotifyInterval.String()) } + if cfg.WaitClusterReadyTimeout != 0 { + args = append(args, "--experimental-wait-cluster-ready-timeout", cfg.WaitClusterReadyTimeout.String()) + } if cfg.SnapshotCatchUpEntries != etcdserver.DefaultSnapshotCatchUpEntries { if cfg.Version == CurrentVersion || (cfg.Version == MinorityLastVersion && i <= cfg.ClusterSize/2) || (cfg.Version == QuorumLastVersion && i > cfg.ClusterSize/2) { args = append(args, "--experimental-snapshot-catchup-entries", fmt.Sprintf("%d", cfg.SnapshotCatchUpEntries))