From 3c3585caffaa06feb1e30a63164d58b745573f74 Mon Sep 17 00:00:00 2001 From: Paul Lorenz Date: Tue, 11 Feb 2025 20:50:46 -0500 Subject: [PATCH] Disconnect and event on non-member peers. Fixes #2742 --- controller/event/cluster.go | 20 +++++++++++--------- controller/raft/mesh/mesh.go | 19 +++++++++++++++++++ controller/raft/raft.go | 10 ++++++++++ zititest/models/smoke/actions/bootstrap.go | 2 +- 4 files changed, 41 insertions(+), 10 deletions(-) diff --git a/controller/event/cluster.go b/controller/event/cluster.go index 96d86fdff..1a08badd8 100644 --- a/controller/event/cluster.go +++ b/controller/event/cluster.go @@ -36,6 +36,7 @@ const ( ClusterIsLeaderless ClusterEventType = "state.is_leaderless" ClusterStateReadOnly ClusterEventType = "state.ro" ClusterStateReadWrite ClusterEventType = "state.rw" + ClusterPeerNotMember ClusterEventType = "peer.not_member" ) // A ClusterPeer represents a controller which is a member of the cluster. @@ -72,15 +73,16 @@ func (self *ClusterPeer) String() string { // A ClusterEvent marks a change to the controller HA cluster. // ClusterEvents can be of the following types: -// - peer.connected -// - peer.disconnected -// - members.changed -// - leadership.gained -// - leadership.lost -// - state.has_leader -// - state.is_leaderless -// - state.ro -// - state.rw +// - peer.connected - a peer connected +// - peer.disconnected - a peer disconnected +// - peer.not_member - a peer connected, but was not a member and didn't join the cluster +// - members.changed - A peer was added to or removed from the cluster +// - leadership.gained - The node become the cluster leader +// - leadership.lost - The node lost cluster leadership +// - state.has_leader - The cluster gained a leader +// - state.is_leaderless - The cluster became leaderless +// - state.ro - The cluster is not accepting state changes, likely due to version mismatches in cluster members +// - state.rw - The cluster is accepting state changes // // Example: Cluster Members Changed Event // diff --git a/controller/raft/mesh/mesh.go b/controller/raft/mesh/mesh.go index 275f12b32..8d128de2f 100644 --- a/controller/raft/mesh/mesh.go +++ b/controller/raft/mesh/mesh.go @@ -281,6 +281,8 @@ type Env interface { GetClusterId() string GetVersionProvider() versions.VersionProvider GetEventDispatcher() event.Dispatcher + IsPeerMember(id string) bool + IsLeader() bool } // Mesh provides the networking layer to raft @@ -823,6 +825,23 @@ func (self *impl) AcceptUnderlay(underlay channel.Underlay) error { binding.AddReceiveHandlerF(RaftConnectType, peer.handleReceiveConnect) binding.AddReceiveHandlerF(RaftDisconnectType, peer.handleReceiveDisconnect) binding.AddCloseHandler(peer) + + if self.env.IsLeader() && !self.env.IsPeerMember(id) { + time.AfterFunc(time.Minute, func() { + if !self.env.IsPeerMember(id) && !binding.GetChannel().IsClosed() { + logger := pfxlog.Logger().WithField("peer", peer.Id) + logger.Info("disconnecting non-member peer after 1 minute") + if err := binding.GetChannel().Close(); err != nil { + log.WithError(err).Error("error closing channel to non-member peer") + } + + evt := event.NewClusterEvent(event.ClusterPeerNotMember) + evt.Peers = self.GetEventPeerList(peer) + self.env.GetEventDispatcher().AcceptClusterEvent(evt) + } + }) + } + return self.PeerConnected(peer, false) }) diff --git a/controller/raft/raft.go b/controller/raft/raft.go index 160c97bb8..17b795893 100644 --- a/controller/raft/raft.go +++ b/controller/raft/raft.go @@ -170,6 +170,16 @@ func (self *Controller) GetEventDispatcher() event.Dispatcher { return self.env.GetEventDispatcher() } +func (self *Controller) IsPeerMember(id string) bool { + result := self.Fsm.GetCurrentState(self.Raft) + for _, srv := range result.Servers { + if string(srv.ID) == id { + return true + } + } + return false +} + func (self *Controller) GetListenerHeaders() map[int32][]byte { return map[int32][]byte{ mesh.ClusterIdHeader: []byte(self.clusterId.Load()), diff --git a/zititest/models/smoke/actions/bootstrap.go b/zititest/models/smoke/actions/bootstrap.go index 298e66fc3..9d57fce92 100644 --- a/zititest/models/smoke/actions/bootstrap.go +++ b/zititest/models/smoke/actions/bootstrap.go @@ -54,7 +54,7 @@ func (a *bootstrapAction) bind(m *model.Model) model.Action { workflow.AddAction(component.Start(".ctrl")) if isHA { - workflow.AddAction(semaphore.Sleep(2 * time.Second)) + workflow.AddAction(semaphore.Sleep(5 * time.Second)) workflow.AddAction(edge.InitRaftController("#ctrl1")) }