Skip to content

Commit

Permalink
Always make use of the no_wait flag in the exclude controller to redu…
Browse files Browse the repository at this point in the history
…ce the noise from exclusions that timeout (#2175)
  • Loading branch information
johscheuer authored Nov 27, 2024
1 parent b388ae0 commit ab5148d
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 3 deletions.
8 changes: 6 additions & 2 deletions controllers/exclude_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR
}

r.Recorder.Event(cluster, corev1.EventTypeNormal, "ExcludingProcesses", fmt.Sprintf("Excluding %v", fdbProcessesToExclude))
err = adminClient.ExcludeProcesses(fdbProcessesToExclude)
// We use the no_wait exclusion here to trigger the exclusion without waiting for the data movement to complete.
// There is no need to wait for the data movement to complete in this call as later calls will verify that the
// data is moved and the processes are fully excluded. Using the no_wait flag here will reduce the timeout errors
// as those are hit most of the time if at least one storage process is included in the exclusion list.
err = adminClient.ExcludeProcessesWithNoWait(fdbProcessesToExclude, true)
if err != nil {
return &requeue{curError: err, delayedRequeue: true}
}
Expand All @@ -206,7 +210,7 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR
}
}

// Reset the SecondsSinceLastRecovered sine the operator just excluded some processes, which will cause a recovery.
// Reset the SecondsSinceLastRecovered since the operator just excluded some processes, which will cause a recovery.
status.Cluster.RecoveryState.SecondsSinceLastRecovered = 0.0

return nil
Expand Down
8 changes: 7 additions & 1 deletion fdbclient/admin_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,12 @@ func (client *cliAdminClient) ResetMaintenanceMode() error {

// ExcludeProcesses starts evacuating processes so that they can be removed from the database.
func (client *cliAdminClient) ExcludeProcesses(addresses []fdbv1beta2.ProcessAddress) error {
return client.ExcludeProcessesWithNoWait(addresses, client.Cluster.GetUseNonBlockingExcludes())
}

// ExcludeProcessesWithNoWait starts evacuating processes so that they can be removed from the database. If noWait is
// set to true, the exclude command will not block until all data is moved away from the processes.
func (client *cliAdminClient) ExcludeProcessesWithNoWait(addresses []fdbv1beta2.ProcessAddress, noWait bool) error {
if len(addresses) == 0 {
return nil
}
Expand All @@ -388,7 +394,7 @@ func (client *cliAdminClient) ExcludeProcesses(addresses []fdbv1beta2.ProcessAdd

var excludeCommand strings.Builder
excludeCommand.WriteString("exclude ")
if version.HasNonBlockingExcludes(client.Cluster.GetUseNonBlockingExcludes()) {
if version.HasNonBlockingExcludes(noWait) {
excludeCommand.WriteString("no_wait ")
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/fdbadminclient/admin_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ type AdminClient interface {
// from the database.
ExcludeProcesses(addresses []fdbv1beta2.ProcessAddress) error

// ExcludeProcessesWithNoWait starts evacuating processes so that they can be removed from the database. If noWait is
// set to true, the exclude command will not block until all data is moved away from the processes.
ExcludeProcessesWithNoWait(addresses []fdbv1beta2.ProcessAddress, noWait bool) error

// IncludeProcesses removes processes from the exclusion list and allows
// them to take on roles again.
IncludeProcesses(addresses []fdbv1beta2.ProcessAddress) error
Expand Down
6 changes: 6 additions & 0 deletions pkg/fdbadminclient/mock/admin_client_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,12 @@ func (client *AdminClient) ExcludeProcesses(addresses []fdbv1beta2.ProcessAddres
return nil
}

// ExcludeProcessesWithNoWait starts evacuating processes so that they can be removed from the database. If noWait is
// set to true, the exclude command will not block until all data is moved away from the processes.
func (client *AdminClient) ExcludeProcessesWithNoWait(addresses []fdbv1beta2.ProcessAddress, _ bool) error {
return client.ExcludeProcesses(addresses)
}

// IncludeProcesses removes processes from the exclusion list and allows
// them to take on roles again.
func (client *AdminClient) IncludeProcesses(addresses []fdbv1beta2.ProcessAddress) error {
Expand Down

0 comments on commit ab5148d

Please sign in to comment.