Skip to content

Commit

Permalink
Allow to disable the removal of maintenance mode when a SS outside of…
Browse files Browse the repository at this point in the history
… the maintenance zone fails (#11207)
  • Loading branch information
johscheuer authored Feb 16, 2024
1 parent e8856e9 commit 8e434df
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 1 deletion.
1 change: 1 addition & 0 deletions fdbclient/ServerKnobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( DD_STORAGE_WIGGLE_PAUSE_THRESHOLD, 10 ); if( randomize && BUGGIFY ) DD_STORAGE_WIGGLE_PAUSE_THRESHOLD = 1000;
init( DD_STORAGE_WIGGLE_STUCK_THRESHOLD, 20 );
init( DD_BUILD_EXTRA_TEAMS_OVERRIDE, 10 ); if( randomize && BUGGIFY ) DD_BUILD_EXTRA_TEAMS_OVERRIDE = 2;
init( DD_REMOVE_MAINTENANCE_ON_FAILURE, true ); if( randomize && BUGGIFY ) DD_REMOVE_MAINTENANCE_ON_FAILURE = false;
init( ENABLE_STORAGE_QUEUE_AWARE_TEAM_SELECTION, false ); if( randomize && BUGGIFY ) ENABLE_STORAGE_QUEUE_AWARE_TEAM_SELECTION = true;
init( TRACE_STORAGE_QUEUE_AWARE_GET_TEAM_FOR_MANUAL_SPLIT_ONLY, true ); if (isSimulated) TRACE_STORAGE_QUEUE_AWARE_GET_TEAM_FOR_MANUAL_SPLIT_ONLY = false;
init( DD_TARGET_STORAGE_QUEUE_SIZE, TARGET_BYTES_PER_STORAGE_SERVER*0.35 ); if( randomize && BUGGIFY ) DD_TARGET_STORAGE_QUEUE_SIZE = TARGET_BYTES_PER_STORAGE_SERVER*0.035;
Expand Down
2 changes: 2 additions & 0 deletions fdbclient/ServerKnobs.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ class ServerKnobs : public KnobsImpl<ServerKnobs> {
double DD_FAILURE_TIME;
double DD_ZERO_HEALTHY_TEAM_DELAY;
int DD_BUILD_EXTRA_TEAMS_OVERRIDE; // build extra teams to allow data movement to progress. must be larger than 0
bool DD_REMOVE_MAINTENANCE_ON_FAILURE; // If set to true DD will remove the maintenance mode if another SS fails
// outside of the maintenance zone.

// Run storage enginee on a child process on the same machine with storage process
bool REMOTE_KV_STORE;
Expand Down
3 changes: 2 additions & 1 deletion fdbserver/DDTeamCollection.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1684,7 +1684,8 @@ class DDTeamCollectionImpl {
.detail("ServerID", interf.id())
.detail("Status", status->toString());
status->isFailed = false;
} else if (self->clearHealthyZoneFuture.isReady()) {
} else if (SERVER_KNOBS->DD_REMOVE_MAINTENANCE_ON_FAILURE &&
self->clearHealthyZoneFuture.isReady()) {
self->clearHealthyZoneFuture = clearHealthyZone(self->cx);
TraceEvent("MaintenanceZoneCleared", self->distributorId).log();
self->healthyZone.set(Optional<Key>());
Expand Down

0 comments on commit 8e434df

Please sign in to comment.