Skip to content

Commit

Permalink
Skip active nodes update if local master node is offline
Browse files Browse the repository at this point in the history
  • Loading branch information
secwall committed Jan 22, 2024
1 parent 5272e27 commit 3ef0d7e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 12 deletions.
10 changes: 6 additions & 4 deletions internal/app/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (app *App) stateManager() appState {
return stateMaintenance
}

app.repairLocalNode(master)
updateActive := app.repairLocalNode(master)

var switchover Switchover
if err := app.dcs.Get(pathCurrentSwitch, &switchover); err == nil {
Expand Down Expand Up @@ -141,9 +141,11 @@ func (app *App) stateManager() appState {
delete(app.nodeFailTime, master)
app.repairShard(shardState, activeNodes, master)

err = app.updateActiveNodes(shardState, shardStateDcs, activeNodes, master)
if err != nil {
app.logger.Error("Failed to update active nodes in dcs", "error", err)
if updateActive {
err = app.updateActiveNodes(shardState, shardStateDcs, activeNodes, master)
if err != nil {
app.logger.Error("Failed to update active nodes in dcs", "error", err)
}
}

return stateManager
Expand Down
18 changes: 10 additions & 8 deletions internal/app/repair.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (app *App) repairReplica(node *redis.Node, masterState, state *HostState, m
}
}

func (app *App) repairLocalNode(master string) {
func (app *App) repairLocalNode(master string) bool {
local := app.shard.Local()

offline, err := local.IsOffline(app.ctx)
Expand All @@ -129,24 +129,24 @@ func (app *App) repairLocalNode(master string) {
}

if !offline {
return
return true
}

shardState, err := app.getShardStateFromDB()
if err != nil {
app.logger.Error("Local repair: unable to get actual shard state", "error", err)
return
return false
}
state, ok := shardState[local.FQDN()]
if !ok {
app.logger.Error("Local repair: unable to find local node in shard state")
return
return true
}
if master == local.FQDN() && len(shardState) != 1 {
activeNodes, err := app.GetActiveNodes()
if err != nil {
app.logger.Error("Unable to get active nodes for local node repair", "error", err)
return
return true
}
activeSet := make(map[string]struct{}, len(activeNodes))
for _, node := range activeNodes {
Expand All @@ -166,22 +166,24 @@ func (app *App) repairLocalNode(master string) {
}
if aheadHosts != 0 {
app.logger.Error(fmt.Sprintf("Not making local node online: %d nodes are ahead in replication history", aheadHosts))
return
return false
}
}
} else if state.ReplicaState == nil {
err, rewriteErr := local.SetReadOnly(app.ctx, false)
if err != nil {
app.logger.Error("Unable to make local node read-only", "error", err)
return
return true
}
if rewriteErr != nil {
app.logger.Error("Unable rewrite conf after making local node read-only", "error", rewriteErr)
return
return true
}
}
err = local.SetOnline(app.ctx)
if err != nil {
app.logger.Error("Unable to set local node online", "error", err)
return false
}
return true
}

0 comments on commit 3ef0d7e

Please sign in to comment.