Skip to content

Commit

Permalink
ccruntime: processCcRuntimeDeleteRequest split
Browse files Browse the repository at this point in the history
We are currently experiencing issues with finalizers hanging when
deleting ccruntime. Initial debugging has pinpointed the problem to the
processCcRuntimeDeleteRequest method.

This method is large and has a cyclomatic complexity of 21. We should
take this opportunity to use early returns to reduce nesting and
simplify control flow. Additionally, this is moving the finalizer
handling logic to its own method.

This refactor should not change the current logic, only improve
readability and maintainability.

Related to #391.

Signed-off-by: Beraldo Leal <[email protected]>
  • Loading branch information
beraldoleal committed Aug 14, 2024
1 parent 402e795 commit aa7b076
Showing 1 changed file with 90 additions and 74 deletions.
164 changes: 90 additions & 74 deletions controllers/ccruntime_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,13 @@ func (r *CcRuntimeReconciler) setCleanupNodeLabels() (ctrl.Result, error) {
}

func (r *CcRuntimeReconciler) processCcRuntimeDeleteRequest() (ctrl.Result, error) {
var result = ctrl.Result{}

// Create the uninstall DaemonSet
ds := r.processDaemonset(UninstallOperation)
if err := controllerutil.SetControllerReference(r.ccRuntime, ds, r.Scheme); err != nil {
r.Log.Error(err, "Failed setting ControllerReference for uninstall DS")
return ctrl.Result{}, err
}

foundDs := &appsv1.DaemonSet{}
err := r.Client.Get(context.TODO(), types.NamespacedName{Name: ds.Name, Namespace: ds.Namespace}, foundDs)
if err != nil && errors.IsNotFound(err) {
Expand All @@ -207,90 +206,107 @@ func (r *CcRuntimeReconciler) processCcRuntimeDeleteRequest() (ctrl.Result, erro
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 10}, err
}

if contains(r.ccRuntime.GetFinalizers(), RuntimeConfigFinalizer) {
// Check for nodes with label set by install DS prestop hook.
// If no nodes exist then remove finalizer and reconcile
err, nodes := r.getNodesWithLabels(r.ccRuntime.Spec.Config.UninstallDoneLabel)
if err != nil {
r.Log.Error(err, "Error in getting list of nodes with uninstallDoneLabel")
return ctrl.Result{}, err
}
finishedNodes := len(nodes.Items)
if !allNodesDone(finishedNodes, r) {
result, err = r.setCleanupNodeLabels()
if err != nil {
r.Log.Error(err, "updating the cleanup labels on nodes failed")
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, err
}
} else {
if r.ccRuntime.Spec.Config.PostUninstall.Image == "" {
if !contains(r.ccRuntime.GetFinalizers(), RuntimeConfigFinalizer) {
return ctrl.Result{}, err
}

// GetFinalizers contains RuntimeConfigFinalizer, lets handle in a different
// method.
return handleFinalizers(r)
}

func handleFinalizers(r *CcRuntimeReconciler) (ctrl.Result, error) {
var result = ctrl.Result{}

// Check for nodes with label set by install DS prestop hook.
// If no nodes exist then remove finalizer and reconcile
err, nodes := r.getNodesWithLabels(r.ccRuntime.Spec.Config.UninstallDoneLabel)
if err != nil {
r.Log.Error(err, "Error in getting list of nodes with uninstallDoneLabel")
return ctrl.Result{}, err
}

finishedNodes := len(nodes.Items)
if allNodesDone(finishedNodes, r) {
if r.ccRuntime.Spec.Config.PostUninstall.Image != "" {
// FXIME: This should be treated in a better way, as just having the sleep
// here won't do us any good in the future.
//
// What's basically happening, and forcing us to do this, is the
// fact that the Uninstall and postUninstall daemonsets are being
// started at exactly the same time, leading to a race condition
// when changing the containerd configuration.
//
// When looking at the kata-containers payload code, we see that the
// the label is only set after containerd is successfully reconfigured,
// and looking at this function we see we shouldn't reach this part
// before the label is set. However, that's not what we're facing ...
time.Sleep(time.Second * 60)

result, err = handlePostUninstall(r)
if !result.Requeue {
controllerutil.RemoveFinalizer(r.ccRuntime, RuntimeConfigFinalizer)
} else if r.ccRuntime.Spec.Config.PostUninstall.Image != "" {
// FXIME: This should be treated in a better way, as just having the sleep
// here won't do us any good in the future.
//
// What's basically happening, and forcing us to do this, is the
// fact that the Uninstall and postUninstall daemonsets are being
// started at exactly the same time, leading to a race condition
// when changing the containerd configuration.
//
// When looking at the kata-containers payload code, we see that the
// the label is only set after containerd is successfully reconfigured,
// and looking at this function we see we shouldn't reach this part
// before the label is set. However, that's not what we're facing ...
time.Sleep(time.Second * 60)

result, err = handlePostUninstall(r)
if !result.Requeue {
controllerutil.RemoveFinalizer(r.ccRuntime, RuntimeConfigFinalizer)
result, err = r.updateCcRuntime()
if err != nil {
return result, err
}
result, err = r.deleteUninstallDaemonsets()
prepostLabels := map[string]string{}
if r.ccRuntime.Spec.Config.PreInstall.Image != "" {
prepostLabels[PreInstallDoneLabel[0]] = PreInstallDoneLabel[1]
}
if r.ccRuntime.Spec.Config.PostUninstall.Image != "" {
prepostLabels[PostUninstallDoneLabel[0]] = PostUninstallDoneLabel[1]
result, err = r.updateCcRuntime()
if err != nil {
return result, err
}

}
err, nodes := r.getNodesWithLabels(prepostLabels)
if err != nil {
r.Log.Error(err, "an error occured when getting the list of nodes from which we want to"+
"remove preinstall/postuninstall labels")
return ctrl.Result{}, err
}
postuninstalledNodes := len(nodes.Items)
if !allNodesDone(postuninstalledNodes, r) {
return ctrl.Result{Requeue: true}, nil
} else {
if postuninstalledNodes > 0 {
result, err = r.removeNodeLabels(nodes)
if err != nil {
r.Log.Error(err, "removing the labels from nodes failed")
return ctrl.Result{}, err
}
result, err = r.deleteUninstallDaemonsets()
prepostLabels := map[string]string{}

if r.ccRuntime.Spec.Config.PreInstall.Image != "" {
prepostLabels[PreInstallDoneLabel[0]] = PreInstallDoneLabel[1]
}

if r.ccRuntime.Spec.Config.PostUninstall.Image != "" {
prepostLabels[PostUninstallDoneLabel[0]] = PostUninstallDoneLabel[1]

}

err, nodes := r.getNodesWithLabels(prepostLabels)
if err != nil {
r.Log.Error(err, "an error occured when getting the list of nodes from which we want to"+
"remove preinstall/postuninstall labels")
return ctrl.Result{}, err
}

postuninstalledNodes := len(nodes.Items)
if !allNodesDone(postuninstalledNodes, r) {
return ctrl.Result{Requeue: true}, nil
} else {
if postuninstalledNodes > 0 {
result, err = r.removeNodeLabels(nodes)
if err != nil {
r.Log.Error(err, "removing the labels from nodes failed")
return ctrl.Result{}, err
}
}
}
}
if err != nil {
return result, err
}
result, err = r.updateCcRuntime()
return result, err
}

result, err = r.updateUninstallationStatus(finishedNodes)
controllerutil.RemoveFinalizer(r.ccRuntime, RuntimeConfigFinalizer)

if err != nil {
r.Log.Info("Error from updateUninstallationStatus")
return result, err
}
result.Requeue = true
result.RequeueAfter = time.Second * 10
result, err = r.updateCcRuntime()
return result, err
}

result, err = r.setCleanupNodeLabels()
if err != nil {
r.Log.Error(err, "updating the cleanup labels on nodes failed")
return ctrl.Result{Requeue: true, RequeueAfter: time.Second * 5}, err
}

result, err = r.updateUninstallationStatus(finishedNodes)
if err != nil {
r.Log.Info("Error from updateUninstallationStatus")
return result, err
}
result.Requeue = true
result.RequeueAfter = time.Second * 10
return result, err
}

Expand Down

0 comments on commit aa7b076

Please sign in to comment.