Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove VA deletion #158

Merged
merged 3 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 2 additions & 60 deletions controllers/selfnoderemediation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ import (
"time"

"github.com/go-logr/logr"
"github.com/medik8s/common/pkg/resources"
"github.com/pkg/errors"

v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
apiErrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/client-go/tools/record"
Expand Down Expand Up @@ -317,65 +317,7 @@ func (r *SelfNodeRemediationReconciler) remediateWithResourceDeletion(snr *v1alp
// if not, it will return a 'zero' time and non-nil error, which means exponential backoff is triggered
// SelfNodeRemediation is only used in order to match method signature required by remediateWithResourceRemoval
func (r *SelfNodeRemediationReconciler) deleteResourcesWrapper(node *v1.Node, _ *v1alpha1.SelfNodeRemediation) (time.Duration, error) {
return 0, r.deleteResources(node)
}

func (r *SelfNodeRemediationReconciler) deleteResources(node *v1.Node) error {
//fence
zero := int64(0)
backgroundDeletePolicy := metav1.DeletePropagationBackground

deleteOptions := &client.DeleteAllOfOptions{
ListOptions: client.ListOptions{
FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}),
Namespace: "",
Limit: 0,
},
DeleteOptions: client.DeleteOptions{
GracePeriodSeconds: &zero,
PropagationPolicy: &backgroundDeletePolicy,
},
}

namespaces := v1.NamespaceList{}
if err := r.Client.List(context.Background(), &namespaces); err != nil {
r.logger.Error(err, "failed to list namespaces", err)
return err
}

r.logger.Info("starting to delete node resources", "node name", node.Name)

pod := &v1.Pod{}
for _, ns := range namespaces.Items {
deleteOptions.Namespace = ns.Name
err := r.Client.DeleteAllOf(context.Background(), pod, deleteOptions)
if err != nil {
r.logger.Error(err, "failed to delete pods of unhealthy node", "namespace", ns.Name)
return err
}
}

volumeAttachments := &storagev1.VolumeAttachmentList{}
if err := r.Client.List(context.Background(), volumeAttachments); err != nil {
r.logger.Error(err, "failed to get volumeAttachments list")
return err
}
forceDeleteOption := &client.DeleteOptions{
GracePeriodSeconds: &zero,
}
for _, va := range volumeAttachments.Items {
if va.Spec.NodeName == node.Name {
err := r.Client.Delete(context.Background(), &va, forceDeleteOption)
if err != nil {
r.logger.Error(err, "failed to delete volumeAttachment", "name", va.Name)
return err
}
}
}

r.logger.Info("done deleting node resources", "node name", node.Name)

return nil
return 0, resources.DeletePods(context.Background(), r.Client, node.Name)
}

func (r *SelfNodeRemediationReconciler) remediateWithOutOfServiceTaint(snr *v1alpha1.SelfNodeRemediation) (ctrl.Result, error) {
Expand Down
5 changes: 2 additions & 3 deletions controllers/tests/config/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,8 @@ var _ = BeforeSuite(func() {
Expect(err).ToNot(HaveOccurred())

k8sClient = &shared.K8sClientWrapper{
Client: k8sManager.GetClient(),
Reader: k8sManager.GetAPIReader(),
VaFailureMessage: "simulation of client error for VA",
Client: k8sManager.GetClient(),
Reader: k8sManager.GetAPIReader(),
}
Expect(k8sClient).ToNot(BeNil())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,11 @@ var _ = Describe("SNR Controller", func() {

AfterEach(func() {
k8sClient.ShouldSimulateFailure = false
k8sClient.ShouldSimulateVaFailure = false
k8sClient.ShouldSimulatePodDeleteFailure = false
isAdditionalSetupNeeded = false
deleteRemediations()
deleteSelfNodeRemediationPod()
deleteVolumeAttachment(vaName, false)
verifyVaDeleted(vaName)
//clear node's state, this is important to remove taints, label etc.
Expect(k8sClient.Update(context.Background(), getNode(shared.UnhealthyNodeName)))
Expect(k8sClient.Update(context.Background(), getNode(shared.PeerNodeName)))
Expand Down Expand Up @@ -184,8 +183,6 @@ var _ = Describe("SNR Controller", func() {

verifySelfNodeRemediationPodDoesntExist()

verifyVaDeleted(vaName)

verifyEvent("Normal", "DeleteResources", "Remediation process - finished deleting unhealthy node resources")

verifyFinalizerExists()
Expand Down Expand Up @@ -219,7 +216,7 @@ var _ = Describe("SNR Controller", func() {
It("The snr agent attempts to keep deleting node resources during temporary api-server failure", func() {
node := verifyNodeIsUnschedulable()

k8sClient.ShouldSimulateVaFailure = true
k8sClient.ShouldSimulatePodDeleteFailure = true

addUnschedulableTaint(node)

Expand All @@ -229,16 +226,16 @@ var _ = Describe("SNR Controller", func() {

verifyNoWatchdogFood()

verifySelfNodeRemediationPodDoesntExist()

verifyVaNotDeleted(vaName)

// The kube-api calls for VA fail intentionally. In this case, we expect the snr agent to try
// to delete node resources again. So LastError is set to the error every time Reconcile()
// is triggered. If it becomes another error, it means something unintended happens.
verifyLastErrorKeepsApiErrorForVa()
verifyLastErrorKeepsApiError()

k8sClient.ShouldSimulateVaFailure = false
k8sClient.ShouldSimulatePodDeleteFailure = false

verifySelfNodeRemediationPodDoesntExist()

deleteVolumeAttachment(vaName, true)

Expand Down Expand Up @@ -391,20 +388,6 @@ func deleteVolumeAttachment(vaName string, verifyExist bool) {
ExpectWithOffset(1, err).To(Succeed())
}

func verifyVaDeleted(vaName string) {
razo7 marked this conversation as resolved.
Show resolved Hide resolved
vaKey := client.ObjectKey{
Namespace: shared.Namespace,
Name: vaName,
}

EventuallyWithOffset(1, func() bool {
va := &storagev1.VolumeAttachment{}
err := k8sClient.Get(context.Background(), vaKey, va)
return apierrors.IsNotFound(err)

}, 5*time.Second, 100*time.Millisecond).Should(BeTrue())
}

func verifyVaNotDeleted(vaName string) {
vaKey := client.ObjectKey{
Namespace: shared.Namespace,
Expand All @@ -419,15 +402,15 @@ func verifyVaNotDeleted(vaName string) {
}, 5*time.Second, 250*time.Millisecond).Should(BeFalse())
}

func verifyLastErrorKeepsApiErrorForVa() {
func verifyLastErrorKeepsApiError() {
By("Verify that LastError in SNR status has been kept kube-api error for VA")
snr := &v1alpha1.SelfNodeRemediation{}
ConsistentlyWithOffset(1, func() bool {
snrNamespacedName := client.ObjectKey{Name: shared.UnhealthyNodeName, Namespace: snrNamespace}
if err := k8sClient.Client.Get(context.Background(), snrNamespacedName, snr); err != nil {
return false
}
return snr.Status.LastError == k8sClient.VaFailureMessage
return snr.Status.LastError == k8sClient.SimulatedFailureMessage
}, 5*time.Second, 250*time.Millisecond).Should(BeTrue())
}

Expand Down
6 changes: 3 additions & 3 deletions controllers/tests/controller/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ var _ = BeforeSuite(func() {
Expect(err).ToNot(HaveOccurred())

k8sClient = &shared.K8sClientWrapper{
Client: k8sManager.GetClient(),
Reader: k8sManager.GetAPIReader(),
VaFailureMessage: "simulation of client error for VA",
Client: k8sManager.GetClient(),
Reader: k8sManager.GetAPIReader(),
SimulatedFailureMessage: "simulation of client error for delete when listing namespace",
}
Expect(k8sClient).ToNot(BeNil())

Expand Down
16 changes: 8 additions & 8 deletions controllers/tests/shared/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"errors"
"time"

storagev1 "k8s.io/api/storage/v1"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -20,10 +20,10 @@ const (

type K8sClientWrapper struct {
client.Client
Reader client.Reader
ShouldSimulateFailure bool
ShouldSimulateVaFailure bool
VaFailureMessage string
Reader client.Reader
ShouldSimulateFailure bool
ShouldSimulatePodDeleteFailure bool
SimulatedFailureMessage string
}

type MockCalculator struct {
Expand All @@ -34,9 +34,9 @@ type MockCalculator struct {
func (kcw *K8sClientWrapper) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
if kcw.ShouldSimulateFailure {
return errors.New("simulation of client error")
} else if kcw.ShouldSimulateVaFailure {
if _, ok := list.(*storagev1.VolumeAttachmentList); ok {
return errors.New(kcw.VaFailureMessage)
} else if kcw.ShouldSimulatePodDeleteFailure {
if _, ok := list.(*corev1.NamespaceList); ok {
return errors.New(kcw.SimulatedFailureMessage)
}
}
return kcw.Client.List(ctx, list, opts...)
Expand Down
40 changes: 1 addition & 39 deletions e2e/self_node_remediation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
gomegatypes "github.com/onsi/gomega/types"

v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -108,17 +107,14 @@ var _ = Describe("Self Node Remediation E2E", func() {

Context("Resource Deletion Strategy", func() {
var oldPodCreationTime time.Time
var va *storagev1.VolumeAttachment

BeforeEach(func() {
remediationStrategy = v1alpha1.ResourceDeletionRemediationStrategy
oldPodCreationTime = findSnrPod(node).CreationTimestamp.Time
va = createVolumeAttachment(node)
})

It("should delete pods and volume attachments", func() {
checkPodRecreated(node, oldPodCreationTime)
checkVaDeleted(va)
//Simulate NHC trying to delete SNR
deleteAndWait(snr)
snr = nil
Expand Down Expand Up @@ -167,17 +163,15 @@ var _ = Describe("Self Node Remediation E2E", func() {
// b) unhealthy
// - kill connectivity on one node
// - create SNR
// - verify node does reboot and and is deleted / re-created
// - verify node does reboot and is deleted / re-created

var snr *v1alpha1.SelfNodeRemediation
var va *storagev1.VolumeAttachment
var oldPodCreationTime time.Time

BeforeEach(func() {
killApiConnection(node, apiIPs, false)
snr = createSNR(node, v1alpha1.ResourceDeletionRemediationStrategy)
oldPodCreationTime = findSnrPod(node).CreationTimestamp.Time
va = createVolumeAttachment(node)
})

AfterEach(func() {
Expand All @@ -192,7 +186,6 @@ var _ = Describe("Self Node Remediation E2E", func() {
// - because the 2nd check has a small timeout only
checkReboot(node, oldBootTime)
checkPodRecreated(node, oldPodCreationTime)
checkVaDeleted(va)
if _, isExist := os.LookupEnv(skipLogsEnvVarName); !isExist {
// we can't check logs of unhealthy node anymore, check peer logs
peer := &workers.Items[1]
Expand Down Expand Up @@ -333,17 +326,14 @@ var _ = Describe("Self Node Remediation E2E", func() {

Context("Resource Deletion Strategy", func() {
var oldPodCreationTime time.Time
var va *storagev1.VolumeAttachment

BeforeEach(func() {
remediationStrategy = v1alpha1.ResourceDeletionRemediationStrategy
oldPodCreationTime = findSnrPod(controlPlaneNode).CreationTimestamp.Time
va = createVolumeAttachment(controlPlaneNode)
})

It("should delete pods and volume attachments", func() {
checkPodRecreated(controlPlaneNode, oldPodCreationTime)
checkVaDeleted(va)
//Simulate NHC trying to delete SNR
deleteAndWait(snr)
snr = nil
Expand All @@ -358,34 +348,6 @@ var _ = Describe("Self Node Remediation E2E", func() {
})
})

func checkVaDeleted(va *storagev1.VolumeAttachment) {
EventuallyWithOffset(1, func() bool {
newVa := &storagev1.VolumeAttachment{}
err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(va), newVa)
return errors.IsNotFound(err)

}, 10*time.Second, 250*time.Millisecond).Should(BeTrue())
}

func createVolumeAttachment(node *v1.Node) *storagev1.VolumeAttachment {
foo := "foo"
va := &storagev1.VolumeAttachment{
ObjectMeta: metav1.ObjectMeta{
Name: "some-va",
Namespace: testNamespace,
},
Spec: storagev1.VolumeAttachmentSpec{
Attacher: foo,
Source: storagev1.VolumeAttachmentSource{},
NodeName: node.Name,
},
}

va.Spec.Source.PersistentVolumeName = &foo
ExpectWithOffset(1, k8sClient.Create(context.Background(), va)).To(Succeed())
return va
}

func checkPodRecreated(node *v1.Node, oldPodCreationTime time.Time) bool {
return EventuallyWithOffset(1, func() time.Time {
pod := findSnrPod(node)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ require (
github.com/Masterminds/sprig v2.22.0+incompatible
github.com/go-logr/logr v1.2.3
github.com/go-ping/ping v1.1.0
github.com/medik8s/common v1.2.0
github.com/medik8s/common v1.9.0
github.com/onsi/ginkgo/v2 v2.9.1
github.com/onsi/gomega v1.27.4
github.com/openshift/api v0.0.0-20230414143018-3367bc7e6ac7 // release-4.13
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.2 h1:hAHbPm5IJGijwng3PWk09JkG9WeqChjprR5s9bBZ+OM=
github.com/matttproud/golang_protobuf_extensions v1.0.2/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/medik8s/common v1.2.0 h1:xgQOijD3rEn+PfCd4lJuV3WFdt5QA6SIaqF01rRp2as=
github.com/medik8s/common v1.2.0/go.mod h1:ZT/3hfMXJLmZEcqmxRWB5LGC8Wl+qKGGQ4zM8hOE7PY=
github.com/medik8s/common v1.9.0 h1:nMMffmj+e0l0xRB0RfpsbdDpW9upXU2MFeGGADJlwyE=
github.com/medik8s/common v1.9.0/go.mod h1:ZT/3hfMXJLmZEcqmxRWB5LGC8Wl+qKGGQ4zM8hOE7PY=
github.com/mitchellh/copystructure v1.1.2 h1:Th2TIvG1+6ma3e/0/bopBKohOTY7s4dA8V2q4EUcBJ0=
github.com/mitchellh/copystructure v1.1.2/go.mod h1:EBArHfARyrSWO/+Wyr9zwEkc6XMFB9XyNgFNmRkZZU4=
github.com/mitchellh/reflectwalk v1.0.1 h1:FVzMWA5RllMAKIdUSC8mdWo3XtwoecrH79BY70sEEpE=
Expand Down
Loading