From 38c7db6af45bf5da397c7bdfd9b98bfc79c9e6c3 Mon Sep 17 00:00:00 2001 From: secwall Date: Mon, 22 Jan 2024 14:04:00 +0100 Subject: [PATCH] Skip active nodes update if local master node is offline --- internal/app/manager.go | 10 ++++++---- internal/app/repair.go | 18 ++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/internal/app/manager.go b/internal/app/manager.go index a21deb0..3420113 100644 --- a/internal/app/manager.go +++ b/internal/app/manager.go @@ -65,7 +65,7 @@ func (app *App) stateManager() appState { return stateMaintenance } - app.repairLocalNode(master) + updateActive := app.repairLocalNode(master) var switchover Switchover if err := app.dcs.Get(pathCurrentSwitch, &switchover); err == nil { @@ -141,9 +141,11 @@ func (app *App) stateManager() appState { delete(app.nodeFailTime, master) app.repairShard(shardState, activeNodes, master) - err = app.updateActiveNodes(shardState, shardStateDcs, activeNodes, master) - if err != nil { - app.logger.Error("Failed to update active nodes in dcs", "error", err) + if updateActive { + err = app.updateActiveNodes(shardState, shardStateDcs, activeNodes, master) + if err != nil { + app.logger.Error("Failed to update active nodes in dcs", "error", err) + } } return stateManager diff --git a/internal/app/repair.go b/internal/app/repair.go index 08f136a..dae3751 100644 --- a/internal/app/repair.go +++ b/internal/app/repair.go @@ -107,7 +107,7 @@ func (app *App) repairReplica(node *redis.Node, masterState, state *HostState, m } } -func (app *App) repairLocalNode(master string) { +func (app *App) repairLocalNode(master string) bool { local := app.shard.Local() offline, err := local.IsOffline(app.ctx) @@ -129,24 +129,24 @@ func (app *App) repairLocalNode(master string) { } if !offline { - return + return true } shardState, err := app.getShardStateFromDB() if err != nil { app.logger.Error("Local repair: unable to get actual shard state", "error", err) - return + return false } state, ok := shardState[local.FQDN()] if !ok { app.logger.Error("Local repair: unable to find local node in shard state") - return + return true } if master == local.FQDN() && len(shardState) != 1 { activeNodes, err := app.GetActiveNodes() if err != nil { app.logger.Error("Unable to get active nodes for local node repair", "error", err) - return + return true } activeSet := make(map[string]struct{}, len(activeNodes)) for _, node := range activeNodes { @@ -166,22 +166,24 @@ func (app *App) repairLocalNode(master string) { } if aheadHosts != 0 { app.logger.Error(fmt.Sprintf("Not making local node online: %d nodes are ahead in replication history", aheadHosts)) - return + return false } } } else if state.ReplicaState == nil { err, rewriteErr := local.SetReadOnly(app.ctx, false) if err != nil { app.logger.Error("Unable to make local node read-only", "error", err) - return + return true } if rewriteErr != nil { app.logger.Error("Unable rewrite conf after making local node read-only", "error", rewriteErr) - return + return true } } err = local.SetOnline(app.ctx) if err != nil { app.logger.Error("Unable to set local node online", "error", err) + return false } + return true }