Skip to content

Commit

Permalink
add configurable no progress timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
SzyWilliam committed Jan 2, 2025
1 parent 320b207 commit 9f8abfc
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
10 changes: 10 additions & 0 deletions ratis-docs/src/site/markdown/configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,16 @@ When bootstrapping a new peer, If the gap between the match index of the
peer and the leader's latest committed index is less than this gap, we
treat the peer as caught-up. Increase this number when write throughput is high.

---------------------------------------------------------------------------------
| **Property** | `raft.server.boostrap.timeout` |
|:----------------|:------------------------------------|
| **Description** | timeout of bootstrapping a new peer |
| **Type** | TimeDuration |
| **Default** | 3 times of timeoutMax |

During the initialization of a new peer, the leader will classify the bootstrap process as "NO PROGRESS"
if it fails to receive any RPC responses from this peer within this specified timeout period.

---------------------------------------------------------------------------------
### ThreadPool - Configurations related to server thread pools.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,21 @@ static void setStagingCatchupGap(RaftProperties properties, int stagingCatchupGa
setInt(properties::setInt, STAGING_CATCHUP_GAP_KEY, stagingCatchupGap);
}

String BOOTSTRAP_TIMEOUT_KEY = PREFIX + ".boostrap.timeout";

TimeDuration BOOTSTRAP_TIMEOUT_DEFAULT = null;

static TimeDuration bootstrapTimeout(RaftProperties properties) {
final TimeDuration fallbackFirstElectionTimeoutMax = Rpc.timeoutMax(properties, null).multiply(3);
return getTimeDuration(properties.getTimeDuration(fallbackFirstElectionTimeoutMax.getUnit()),
BOOTSTRAP_TIMEOUT_KEY, BOOTSTRAP_TIMEOUT_DEFAULT,
Rpc.TIMEOUT_MAX_KEY, fallbackFirstElectionTimeoutMax, getDefaultLog());
}
static void setBootstrapTimeout(RaftProperties properties, TimeDuration bootstrapTimeout) {
setTimeDuration(properties::setTimeDuration, BOOTSTRAP_TIMEOUT_KEY, bootstrapTimeout);
}


interface ThreadPool {
String PREFIX = RaftServerConfigKeys.PREFIX + ".threadpool";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ boolean isApplied() {

private final boolean logMetadataEnabled;
private final int stagingCatchupGap;
private final TimeDuration bootstrapTimeout;
private final RaftServerMetricsImpl raftServerMetrics;
private final LogAppenderMetrics logAppenderMetrics;
private final long followerMaxGapThreshold;
Expand All @@ -365,6 +366,7 @@ boolean isApplied() {

final RaftProperties properties = server.getRaftServer().getProperties();
stagingCatchupGap = RaftServerConfigKeys.stagingCatchupGap(properties);
bootstrapTimeout = RaftServerConfigKeys.bootstrapTimeout(properties);

final ServerState state = server.getState();
this.raftLog = state.getLog();
Expand Down Expand Up @@ -791,7 +793,7 @@ public void run() {
private BootStrapProgress checkProgress(FollowerInfo follower, long committed) {
Preconditions.assertTrue(!isCaughtUp(follower));
final Timestamp progressTime = Timestamp.currentTime().addTimeMs(-server.getMaxTimeoutMs());
final Timestamp timeoutTime = Timestamp.currentTime().addTimeMs(-3L * server.getMaxTimeoutMs());
final Timestamp timeoutTime = Timestamp.currentTime().addTimeMs(-bootstrapTimeout.toLong(TimeUnit.MILLISECONDS));
if (follower.getLastRpcResponseTime().compareTo(timeoutTime) < 0) {
LOG.debug("{} detects a follower {} timeout ({}ms) for bootstrapping", this, follower,
follower.getLastRpcResponseTime().elapsedTimeMs());
Expand Down

0 comments on commit 9f8abfc

Please sign in to comment.