Skip to content

Commit

Permalink
Reworked draining nodes logic and pingHost retries/delay
Browse files Browse the repository at this point in the history
Signed-off-by: Dimitar <[email protected]>
  • Loading branch information
cranzy committed Jan 24, 2025
1 parent a6efba8 commit abb12aa
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 7 deletions.
6 changes: 5 additions & 1 deletion pkg/product/mke/api/cluster_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strconv"
"strings"
"sync"
"time"

"github.com/Mirantis/mcc/pkg/constant"
common "github.com/Mirantis/mcc/pkg/product/common/api"
Expand Down Expand Up @@ -267,7 +268,10 @@ func pingHost(h *Host, address string, waitgroup *sync.WaitGroup, errCh chan<- e
}
return nil
},
retry.Attempts(12), // last attempt should wait ~7min
retry.MaxJitter(time.Second*3),
retry.Delay(time.Second*30),
retry.DelayType(retry.FixedDelay),
retry.Attempts(10), // should try for ~5min
)
if err != nil {
errCh <- fmt.Errorf("MKE health check failed: %w", err)
Expand Down
32 changes: 26 additions & 6 deletions pkg/product/mke/phase/uninstall_mcr.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,32 @@ func (p *UninstallMCR) Title() string {

// Run installs the engine on each host.
func (p *UninstallMCR) Run() error {
workers := p.Config.Spec.Workers()
managers := p.Config.Spec.Managers()
swarmLeader := p.Config.Spec.SwarmLeader()

// Drain all workers
for _, h := range workers {
if err := mcr.DrainNode(swarmLeader, h); err != nil {
return fmt.Errorf("%s: drain worker node: %w", h, err)
}
}

// Drain all managers
for _, h := range managers {
if swarmLeader.Address() == h.Address() {
continue
}
if err := mcr.DrainNode(swarmLeader, h); err != nil {
return fmt.Errorf("%s: draining manager node: %w", h, err)
}
}

// Drain the leader
if err := mcr.DrainNode(swarmLeader, swarmLeader); err != nil {
return fmt.Errorf("%s: drain leader node: %w", swarmLeader, err)
}

if err := phase.RunParallelOnHosts(p.Config.Spec.Hosts, p.Config, p.uninstallMCR); err != nil {
return fmt.Errorf("uninstall container runtime: %w", err)
}
Expand All @@ -31,12 +57,6 @@ func (p *UninstallMCR) Run() error {
func (p *UninstallMCR) uninstallMCR(h *api.Host, config *api.ClusterConfig) error {
log.Infof("%s: uninstalling container runtime", h)

leader := config.Spec.SwarmLeader()

if err := mcr.DrainNode(leader, h); err != nil {
return fmt.Errorf("%s: drain node: %w", h, err)
}

uVolumeCmd := h.Configurer.DockerCommandf("volume prune -f")
log.Infof("%s: unmounted dangling volumes", h)

Expand Down

0 comments on commit abb12aa

Please sign in to comment.