Skip to content

Commit

Permalink
add method (*EtcdServer) IsRaftLoopBlocked to support checking whethe…
Browse files Browse the repository at this point in the history
…r the raft loop is blocked

Signed-off-by: Benjamin Wang <[email protected]>
  • Loading branch information
ahrtr committed Oct 8, 2023
1 parent 01a0d8b commit 37e4060
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
17 changes: 17 additions & 0 deletions server/etcdserver/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ type raftNode struct {

stopped chan struct{}
done chan struct{}

// used by liveness probe to check whether the raftloop is blocked.
dummyc chan struct{}
}

type raftNodeConfig struct {
Expand Down Expand Up @@ -142,6 +145,7 @@ func newRaftNode(cfg raftNodeConfig) *raftNode {
applyc: make(chan toApply),
stopped: make(chan struct{}),
done: make(chan struct{}),
dummyc: make(chan struct{}),
}
if r.heartbeat == 0 {
r.ticker = &time.Ticker{}
Expand Down Expand Up @@ -322,6 +326,8 @@ func (r *raftNode) start(rh *raftReadyHandler) {
// notify etcdserver that raft has already been notified or advanced.
raftAdvancedC <- struct{}{}
}
case <-r.dummyc:
r.lg.Debug("Received dummy event")
case <-r.stopped:
return
}
Expand Down Expand Up @@ -413,6 +419,17 @@ func (r *raftNode) onStop() {
close(r.done)
}

func (r *raftNode) trySendDummyEvent(timeout time.Duration) error {
select {
case r.dummyc <- struct{}{}:
case <-r.done:
case <-time.After(timeout):
return fmt.Errorf("failed to send dummy event in %s", timeout.String())
}

return nil
}

// for testing
func (r *raftNode) pauseSending() {
p := r.transport.(rafthttp.Pausable)
Expand Down
11 changes: 11 additions & 0 deletions server/etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1262,6 +1262,17 @@ func (s *EtcdServer) Stop() {
s.HardStop()
}

// IsRaftLoopBlocked checks whether the raft loop has blocked for at least
// the duration specified by `timeout`, and it defaults to 2*ElectionTimeout,
// which is the maximum time to trigger a new leader election.
// If the returned error isn't nil, then it's blocked; otherwise not.
func (s *EtcdServer) IsRaftLoopBlocked(timeout time.Duration) error {
if timeout == 0 {
timeout = 2 * s.Cfg.ElectionTimeout()
}
return s.r.trySendDummyEvent(timeout)
}

// ReadyNotify returns a channel that will be closed when the server
// is ready to serve client requests
func (s *EtcdServer) ReadyNotify() <-chan struct{} { return s.readych }
Expand Down

0 comments on commit 37e4060

Please sign in to comment.