Skip to content

Commit

Permalink
adding node selector to snr daemonset
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Shitrit <[email protected]>
  • Loading branch information
mshitrit committed Nov 23, 2023
1 parent e80b04d commit 72bd5a5
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 23 deletions.
55 changes: 32 additions & 23 deletions controllers/selfnoderemediation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,13 @@ import (
)

const (
SNRFinalizer = "self-node-remediation.medik8s.io/snr-finalizer"
nhcTimeOutAnnotation = "remediation.medik8s.io/nhc-timed-out"
SNRFinalizer = "self-node-remediation.medik8s.io/snr-finalizer"
nhcTimeOutAnnotation = "remediation.medik8s.io/nhc-timed-out"
excludeRemediationLabel = "remediation.medik8s.io/exclude-from-remediation"

eventReasonRemediationCreated = "RemediationCreated"
eventReasonRemediationStopped = "RemediationStopped"
eventReasonRemediationSkipped = "RemediationSkipped"

//remediation
eventReasonAddFinalizer = "AddFinalizer"
Expand Down Expand Up @@ -232,12 +234,32 @@ func (r *SelfNodeRemediationReconciler) Reconcile(ctx context.Context, req ctrl.
result := ctrl.Result{}
var err error

node, err := r.getNodeFromSnr(snr)
if err != nil {
if apiErrors.IsNotFound(err) {
r.logger.Info("couldn't find node matching remediation", "node name", snr.Name)
if err = r.updateConditions(remediationFinishedNodeNotFound, snr); err != nil {
return ctrl.Result{}, err
}
r.Recorder.Event(snr, eventTypeNormal, eventReasonRemediationStopped, "couldn't find node matching remediation")
return ctrl.Result{}, nil
}
r.logger.Error(err, "failed to get node", "node name", snr.Name)
return ctrl.Result{}, err
}

if node.Labels[excludeRemediationLabel] == "true" {
r.logger.Info("remediation skipped this node is excluded from remediation", "node name", node.Name)
r.Recorder.Event(snr, eventTypeNormal, eventReasonRemediationSkipped, "remediation skipped this node is excluded from remediation")
return ctrl.Result{}, nil
}

strategy := r.getRuntimeStrategy(snr.Spec.RemediationStrategy)
switch strategy {
case v1alpha1.ResourceDeletionRemediationStrategy:
result, err = r.remediateWithResourceDeletion(snr)
result, err = r.remediateWithResourceDeletion(snr, node)
case v1alpha1.OutOfServiceTaintRemediationStrategy:
result, err = r.remediateWithOutOfServiceTaint(snr)
result, err = r.remediateWithOutOfServiceTaint(snr, node)
default:
//this should never happen since we enforce valid values with kubebuilder
err := errors.New("unsupported remediation strategy")
Expand Down Expand Up @@ -312,8 +334,8 @@ func (r *SelfNodeRemediationReconciler) getPhase(snr *v1alpha1.SelfNodeRemediati
}
}

func (r *SelfNodeRemediationReconciler) remediateWithResourceDeletion(snr *v1alpha1.SelfNodeRemediation) (ctrl.Result, error) {
return r.remediateWithResourceRemoval(snr, r.deleteResourcesWrapper)
func (r *SelfNodeRemediationReconciler) remediateWithResourceDeletion(snr *v1alpha1.SelfNodeRemediation, node *v1.Node) (ctrl.Result, error) {
return r.remediateWithResourceRemoval(snr, node, r.deleteResourcesWrapper)
}

// deleteResourcesWrapper returns a 'zero' time and nil if it completes to delete node resources successfully
Expand All @@ -323,8 +345,8 @@ func (r *SelfNodeRemediationReconciler) deleteResourcesWrapper(node *v1.Node, _
return 0, resources.DeletePods(context.Background(), r.Client, node.Name)
}

func (r *SelfNodeRemediationReconciler) remediateWithOutOfServiceTaint(snr *v1alpha1.SelfNodeRemediation) (ctrl.Result, error) {
return r.remediateWithResourceRemoval(snr, r.useOutOfServiceTaint)
func (r *SelfNodeRemediationReconciler) remediateWithOutOfServiceTaint(snr *v1alpha1.SelfNodeRemediation, node *v1.Node) (ctrl.Result, error) {
return r.remediateWithResourceRemoval(snr, node, r.useOutOfServiceTaint)
}

func (r *SelfNodeRemediationReconciler) useOutOfServiceTaint(node *v1.Node, snr *v1alpha1.SelfNodeRemediation) (time.Duration, error) {
Expand Down Expand Up @@ -352,23 +374,10 @@ func (r *SelfNodeRemediationReconciler) useOutOfServiceTaint(node *v1.Node, snr

type removeNodeResources func(*v1.Node, *v1alpha1.SelfNodeRemediation) (time.Duration, error)

func (r *SelfNodeRemediationReconciler) remediateWithResourceRemoval(snr *v1alpha1.SelfNodeRemediation, rmNodeResources removeNodeResources) (ctrl.Result, error) {
node, err := r.getNodeFromSnr(snr)
if err != nil {
if apiErrors.IsNotFound(err) {
r.logger.Info("couldn't find node matching remediation", "node name", snr.Name)
if err = r.updateConditions(remediationFinishedNodeNotFound, snr); err != nil {
return ctrl.Result{}, err
}
r.Recorder.Event(snr, eventTypeNormal, eventReasonRemediationStopped, "couldn't find node matching remediation")
return ctrl.Result{}, nil
}
r.logger.Error(err, "failed to get node", "node name", snr.Name)
return ctrl.Result{}, err
}

func (r *SelfNodeRemediationReconciler) remediateWithResourceRemoval(snr *v1alpha1.SelfNodeRemediation, node *v1.Node, rmNodeResources removeNodeResources) (ctrl.Result, error) {
result := ctrl.Result{}
phase := r.getPhase(snr)
var err error
switch phase {
case fencingStartedPhase:
result, err = r.handleFencingStartedPhase(node, snr)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,28 @@ var _ = Describe("SNR Controller", func() {
verifyEvent("Normal", "RemediationStopped", "couldn't find node matching remediation")
})
})

When("Node has exclude form remediation label", func() {
BeforeEach(func() {
node := &v1.Node{}
Expect(k8sClient.Client.Get(context.TODO(), unhealthyNodeNamespacedName, node)).To(Succeed())
node.Labels["remediation.medik8s.io/exclude-from-remediation"] = "true"
Expect(k8sClient.Client.Update(context.TODO(), node)).To(Succeed())
DeferCleanup(
func() {
node := &v1.Node{}
Expect(k8sClient.Client.Get(context.TODO(), unhealthyNodeNamespacedName, node)).To(Succeed())
delete(node.Labels, "remediation.medik8s.io/exclude-from-remediation")
Expect(k8sClient.Client.Update(context.TODO(), node)).To(Succeed())
},
)
})

It("remediation should stop", func() {
time.Sleep(time.Second)
verifyEvent("Normal", "RemediationSkipped", "remediation skipped this node is excluded from remediation")
})
})
})

Context("Automatic strategy - OutOfServiceTaint selected", func() {
Expand Down
9 changes: 9 additions & 0 deletions install/self-node-remediation-deamonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ spec:
type: Directory
serviceAccountName: self-node-remediation-controller-manager
priorityClassName: system-node-critical
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: remediation.medik8s.io/exclude-from-remediation
operator: NotIn
values:
- "true"
containers:
- args:
- --is-manager=false
Expand Down

0 comments on commit 72bd5a5

Please sign in to comment.