From df0a1037c119611a7190a3559b317b7551173e35 Mon Sep 17 00:00:00 2001 From: oraz <oraz@redhat.com> Date: Wed, 28 Feb 2024 17:50:55 +0200 Subject: [PATCH 1/4] Add Eviction Timeout Field for Maintenance Time Configurable drain/deletion timeout for the user. ATM it was set to 30s --- api/v1beta1/nodemaintenance_types.go | 8 ++++++++ .../node-maintenance-operator.clusterserviceversion.yaml | 6 ++++++ .../nodemaintenance.medik8s.io_nodemaintenances.yaml | 8 ++++++++ .../nodemaintenance.medik8s.io_nodemaintenances.yaml | 8 ++++++++ .../node-maintenance-operator.clusterserviceversion.yaml | 5 +++++ controllers/controllers_suite_test.go | 4 +++- controllers/nodemaintenance_controller.go | 7 +++---- 7 files changed, 41 insertions(+), 5 deletions(-) diff --git a/api/v1beta1/nodemaintenance_types.go b/api/v1beta1/nodemaintenance_types.go index 3ce977ed7..aad31a208 100644 --- a/api/v1beta1/nodemaintenance_types.go +++ b/api/v1beta1/nodemaintenance_types.go @@ -45,6 +45,14 @@ type NodeMaintenanceSpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file + // EvictionTimeout is the timeout for pods eviction by drain/delete before giving up + // Zero means infinite + // Valid time units are "ms", "s", "m", "h". + // +kubebuilder:default:="30s" + // +kubebuilder:validation:Pattern="^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$" + // +kubebuilder:validation:Type:=string + //+operator-sdk:csv:customresourcedefinitions:type=spec + EvictionTimeout metav1.Duration `json:"evictionTimeout,omitempty"` // Node name to apply maintanance on/off //+operator-sdk:csv:customresourcedefinitions:type=spec NodeName string `json:"nodeName"` diff --git a/bundle/manifests/node-maintenance-operator.clusterserviceversion.yaml b/bundle/manifests/node-maintenance-operator.clusterserviceversion.yaml index 88d42b6ed..fd631ebcc 100755 --- a/bundle/manifests/node-maintenance-operator.clusterserviceversion.yaml +++ b/bundle/manifests/node-maintenance-operator.clusterserviceversion.yaml @@ -11,6 +11,7 @@ metadata: "name": "nodemaintenance-sample" }, "spec": { + "evictionTimeout": "30s", "nodeName": "node02", "reason": "Test node maintenance" } @@ -43,6 +44,11 @@ spec: name: nodemaintenances version: v1beta1 specDescriptors: + - description: EvictionTimeout is the timeout for pods eviction by drain/delete + before giving up Zero means infinite Valid time units are "ms", "s", "m", + "h". + displayName: Eviction Timeout + path: evictionTimeout - description: Node name to apply maintanance on/off displayName: Node Name path: nodeName diff --git a/bundle/manifests/nodemaintenance.medik8s.io_nodemaintenances.yaml b/bundle/manifests/nodemaintenance.medik8s.io_nodemaintenances.yaml index 313065c41..4f3cf1bc0 100644 --- a/bundle/manifests/nodemaintenance.medik8s.io_nodemaintenances.yaml +++ b/bundle/manifests/nodemaintenance.medik8s.io_nodemaintenances.yaml @@ -43,6 +43,14 @@ spec: spec: description: NodeMaintenanceSpec defines the desired state of NodeMaintenance properties: + evictionTimeout: + default: 30s + description: |- + EvictionTimeout is the timeout for pods eviction by drain/delete before giving up + Zero means infinite + Valid time units are "ms", "s", "m", "h". + pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$ + type: string nodeName: description: Node name to apply maintanance on/off type: string diff --git a/config/crd/bases/nodemaintenance.medik8s.io_nodemaintenances.yaml b/config/crd/bases/nodemaintenance.medik8s.io_nodemaintenances.yaml index 7e9312580..601fcf0de 100644 --- a/config/crd/bases/nodemaintenance.medik8s.io_nodemaintenances.yaml +++ b/config/crd/bases/nodemaintenance.medik8s.io_nodemaintenances.yaml @@ -41,6 +41,14 @@ spec: spec: description: NodeMaintenanceSpec defines the desired state of NodeMaintenance properties: + evictionTimeout: + default: 30s + description: |- + EvictionTimeout is the timeout for pods eviction by drain/delete before giving up + Zero means infinite + Valid time units are "ms", "s", "m", "h". + pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$ + type: string nodeName: description: Node name to apply maintanance on/off type: string diff --git a/config/manifests/bases/node-maintenance-operator.clusterserviceversion.yaml b/config/manifests/bases/node-maintenance-operator.clusterserviceversion.yaml index a3775f617..6bc9a5cd6 100644 --- a/config/manifests/bases/node-maintenance-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/node-maintenance-operator.clusterserviceversion.yaml @@ -28,6 +28,11 @@ spec: name: nodemaintenances version: v1beta1 specDescriptors: + - description: EvictionTimeout is the timeout for pods eviction by drain/delete + before giving up Zero means infinite Valid time units are "ms", "s", "m", + "h". + displayName: Eviction Timeout + path: evictionTimeout - description: Node name to apply maintanance on/off displayName: Node Name path: nodeName diff --git a/controllers/controllers_suite_test.go b/controllers/controllers_suite_test.go index 1ab698470..54431f953 100644 --- a/controllers/controllers_suite_test.go +++ b/controllers/controllers_suite_test.go @@ -20,6 +20,7 @@ import ( "context" "path/filepath" "testing" + "time" "github.com/medik8s/common/pkg/lease" . "github.com/onsi/ginkgo/v2" @@ -95,7 +96,8 @@ var _ = BeforeSuite(func() { logger: ctrl.Log.WithName("unit test"), } ctx, cancel = context.WithCancel(ctrl.SetupSignalHandler()) - drainer, err = createDrainer(ctx, cfg) + evictionTimeout := time.Duration(30) + drainer, err = createDrainer(ctx, evictionTimeout, cfg) Expect(err).NotTo(HaveOccurred()) // in test pods are not evicted, so don't wait forever for them drainer.SkipWaitForDeleteTimeoutSeconds = 0 diff --git a/controllers/nodemaintenance_controller.go b/controllers/nodemaintenance_controller.go index 668f1c8ee..51eab5b12 100644 --- a/controllers/nodemaintenance_controller.go +++ b/controllers/nodemaintenance_controller.go @@ -118,7 +118,7 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ } // Add finalizer when object is created - drainer, err := createDrainer(ctx, r.MgrConfig) + drainer, err := createDrainer(ctx, nm.Spec.EvictionTimeout.Duration, r.MgrConfig) if err != nil { return emptyResult, err } @@ -229,7 +229,7 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ } // createDrainer creates a drain.Helper struct for external cordon and drain API -func createDrainer(ctx context.Context, mgrConfig *rest.Config) (*drain.Helper, error) { +func createDrainer(ctx context.Context, evicitonTimeout time.Duration, mgrConfig *rest.Config) (*drain.Helper, error) { drainer := &drain.Helper{} //Continue even if there are pods not managed by a ReplicationController, ReplicaSet, Job, DaemonSet or StatefulSet. @@ -254,9 +254,8 @@ func createDrainer(ctx context.Context, mgrConfig *rest.Config) (*drain.Helper, //Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used. drainer.GracePeriodSeconds = -1 - // TODO - add logical value or attach from the maintenance CR //The length of time to wait before giving up, zero means infinite - drainer.Timeout = DrainerTimeout + drainer.Timeout = evicitonTimeout cs, err := kubernetes.NewForConfig(mgrConfig) if err != nil { From 3f35c905b6181088f9d17e31495f1db9938bca02 Mon Sep 17 00:00:00 2001 From: oraz <oraz@redhat.com> Date: Wed, 28 Feb 2024 17:51:01 +0200 Subject: [PATCH 2/4] Update readme and example with EvictionTimeout field --- README.md | 3 +++ config/samples/nodemaintenance_v1beta1_nodemaintenance.yaml | 1 + 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index cd19a2538..ac220caa5 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,8 @@ Follow the instructions [here](https://sdk.operatorframework.io/docs/building-op To set maintenance on a node a `NodeMaintenance` custom resource should be created. The `NodeMaintenance` CR spec contains: + +- evictionTimeout: The timeout for pods eviction by drain/delete before giving up. Zero means infinite, and the default value is 30s. - nodeName: The name of the node which will be put into maintenance mode. - reason: The reason why the node will be under maintenance. @@ -58,6 +60,7 @@ kind: NodeMaintenance metadata: name: nodemaintenance-sample spec: + evictionTimeout: "30s" nodeName: node02 reason: "Test node maintenance" diff --git a/config/samples/nodemaintenance_v1beta1_nodemaintenance.yaml b/config/samples/nodemaintenance_v1beta1_nodemaintenance.yaml index b69f6b1a2..bc247ffd2 100644 --- a/config/samples/nodemaintenance_v1beta1_nodemaintenance.yaml +++ b/config/samples/nodemaintenance_v1beta1_nodemaintenance.yaml @@ -3,5 +3,6 @@ kind: NodeMaintenance metadata: name: nodemaintenance-sample spec: + evictionTimeout: "30s" nodeName: node02 reason: "Test node maintenance" From 6cb7520053fc6b2e77d8f90ac7d51b2fbe3e255e Mon Sep 17 00:00:00 2001 From: oraz <oraz@redhat.com> Date: Mon, 4 Mar 2024 18:49:26 +0200 Subject: [PATCH 3/4] Typo fix s/eviciton/evicition, s/maintanance/maintenance. Redundant space and const --- api/v1beta1/nodemaintenance_types.go | 4 ++-- .../node-maintenance-operator.clusterserviceversion.yaml | 4 ++-- .../nodemaintenance.medik8s.io_nodemaintenances.yaml | 4 ++-- .../bases/nodemaintenance.medik8s.io_nodemaintenances.yaml | 4 ++-- .../node-maintenance-operator.clusterserviceversion.yaml | 4 ++-- controllers/nodemaintenance_controller.go | 5 ++--- controllers/taint.go | 2 +- 7 files changed, 13 insertions(+), 14 deletions(-) diff --git a/api/v1beta1/nodemaintenance_types.go b/api/v1beta1/nodemaintenance_types.go index aad31a208..3057f9d05 100644 --- a/api/v1beta1/nodemaintenance_types.go +++ b/api/v1beta1/nodemaintenance_types.go @@ -53,10 +53,10 @@ type NodeMaintenanceSpec struct { // +kubebuilder:validation:Type:=string //+operator-sdk:csv:customresourcedefinitions:type=spec EvictionTimeout metav1.Duration `json:"evictionTimeout,omitempty"` - // Node name to apply maintanance on/off + // Node name to apply maintenance on/off //+operator-sdk:csv:customresourcedefinitions:type=spec NodeName string `json:"nodeName"` - // Reason for maintanance + // Reason for maintenance //+operator-sdk:csv:customresourcedefinitions:type=spec Reason string `json:"reason,omitempty"` } diff --git a/bundle/manifests/node-maintenance-operator.clusterserviceversion.yaml b/bundle/manifests/node-maintenance-operator.clusterserviceversion.yaml index fd631ebcc..ea500c4f2 100755 --- a/bundle/manifests/node-maintenance-operator.clusterserviceversion.yaml +++ b/bundle/manifests/node-maintenance-operator.clusterserviceversion.yaml @@ -49,10 +49,10 @@ spec: "h". displayName: Eviction Timeout path: evictionTimeout - - description: Node name to apply maintanance on/off + - description: Node name to apply maintenance on/off displayName: Node Name path: nodeName - - description: Reason for maintanance + - description: Reason for maintenance displayName: Reason path: reason statusDescriptors: diff --git a/bundle/manifests/nodemaintenance.medik8s.io_nodemaintenances.yaml b/bundle/manifests/nodemaintenance.medik8s.io_nodemaintenances.yaml index 4f3cf1bc0..961793dda 100644 --- a/bundle/manifests/nodemaintenance.medik8s.io_nodemaintenances.yaml +++ b/bundle/manifests/nodemaintenance.medik8s.io_nodemaintenances.yaml @@ -52,10 +52,10 @@ spec: pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$ type: string nodeName: - description: Node name to apply maintanance on/off + description: Node name to apply maintenance on/off type: string reason: - description: Reason for maintanance + description: Reason for maintenance type: string required: - nodeName diff --git a/config/crd/bases/nodemaintenance.medik8s.io_nodemaintenances.yaml b/config/crd/bases/nodemaintenance.medik8s.io_nodemaintenances.yaml index 601fcf0de..9b82a4160 100644 --- a/config/crd/bases/nodemaintenance.medik8s.io_nodemaintenances.yaml +++ b/config/crd/bases/nodemaintenance.medik8s.io_nodemaintenances.yaml @@ -50,10 +50,10 @@ spec: pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$ type: string nodeName: - description: Node name to apply maintanance on/off + description: Node name to apply maintenance on/off type: string reason: - description: Reason for maintanance + description: Reason for maintenance type: string required: - nodeName diff --git a/config/manifests/bases/node-maintenance-operator.clusterserviceversion.yaml b/config/manifests/bases/node-maintenance-operator.clusterserviceversion.yaml index 6bc9a5cd6..d9739ad9c 100644 --- a/config/manifests/bases/node-maintenance-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/node-maintenance-operator.clusterserviceversion.yaml @@ -33,10 +33,10 @@ spec: "h". displayName: Eviction Timeout path: evictionTimeout - - description: Node name to apply maintanance on/off + - description: Node name to apply maintenance on/off displayName: Node Name path: nodeName - - description: Reason for maintanance + - description: Reason for maintenance displayName: Reason path: reason statusDescriptors: diff --git a/controllers/nodemaintenance_controller.go b/controllers/nodemaintenance_controller.go index 51eab5b12..f4a90ce1d 100644 --- a/controllers/nodemaintenance_controller.go +++ b/controllers/nodemaintenance_controller.go @@ -52,7 +52,6 @@ const ( //lease consts LeaseHolderIdentity = "node-maintenance" LeaseDuration = 3600 * time.Second - DrainerTimeout = 30 * time.Second ) // NodeMaintenanceReconciler reconciles a NodeMaintenance object @@ -229,7 +228,7 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ } // createDrainer creates a drain.Helper struct for external cordon and drain API -func createDrainer(ctx context.Context, evicitonTimeout time.Duration, mgrConfig *rest.Config) (*drain.Helper, error) { +func createDrainer(ctx context.Context, evictionTimeout time.Duration, mgrConfig *rest.Config) (*drain.Helper, error) { drainer := &drain.Helper{} //Continue even if there are pods not managed by a ReplicationController, ReplicaSet, Job, DaemonSet or StatefulSet. @@ -255,7 +254,7 @@ func createDrainer(ctx context.Context, evicitonTimeout time.Duration, mgrConfig drainer.GracePeriodSeconds = -1 //The length of time to wait before giving up, zero means infinite - drainer.Timeout = evicitonTimeout + drainer.Timeout = evictionTimeout cs, err := kubernetes.NewForConfig(mgrConfig) if err != nil { diff --git a/controllers/taint.go b/controllers/taint.go index a026366bd..208964e66 100644 --- a/controllers/taint.go +++ b/controllers/taint.go @@ -56,7 +56,7 @@ func AddOrRemoveTaint(clientset kubernetes.Interface, add bool, node *corev1.Nod return err } taintStr = "remove" - log.Infof("Maintenance taints will be removed from node %s", node.Name) + log.Infof("Maintenance taints will be removed from node %s", node.Name) patch = fmt.Sprintf(`{ "op": "replace", "path": "/spec/taints", "value": %s }`, string(removeTaints)) } From 410b51c555dc5b1419b43d7d82db6074c51856f3 Mon Sep 17 00:00:00 2001 From: oraz <oraz@redhat.com> Date: Mon, 4 Mar 2024 18:53:19 +0200 Subject: [PATCH 4/4] Add e2e test for different evicitonTimeout values Using 50s evicitonTimeout to demonstrate a normal maintenance that won't reach eviction timeout and 10s evicitonTimeout for maintenance that will reach the eviction timeout --- test/e2e/e2e_suite_test.go | 11 +++-- test/e2e/node_maintenance_test.go | 69 +++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index a13a6d2bd..e078ad9c0 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -19,6 +19,7 @@ import ( "fmt" "os" "testing" + "time" . "github.com/onsi/ginkgo/v2" "github.com/onsi/ginkgo/v2/reporters" @@ -26,7 +27,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/medik8s/node-maintenance-operator/api/v1beta1" ) @@ -41,8 +42,10 @@ var ( // The ns for test deployments testNsName string testNamespace *corev1.Namespace - //namespace leases are created in + // namespace leases are created in leaseNs = "medik8s-leases" + // default eviction timeout (30s) + evicitonTimeout = time.Second * 50 ) var _ = BeforeSuite(func() { @@ -52,7 +55,7 @@ var _ = BeforeSuite(func() { testNsName = os.Getenv("TEST_NAMESPACE") Expect(testNsName).ToNot(BeEmpty(), "TEST_NAMESPACE env var not set, can't start e2e test") testNamespace = &corev1.Namespace{ - ObjectMeta: v1.ObjectMeta{ + ObjectMeta: metav1.ObjectMeta{ Name: testNsName, }, } @@ -66,7 +69,7 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) // wait until webhooks are up and running by trying to create a CR and ignoring unexpected errors - testCR := getNodeMaintenance("webhook-test", "some-not-existing-node-name") + testCR := getNodeMaintenance("webhook-test", "some-not-existing-node-name", evicitonTimeout) _ = createCRIgnoreUnrelatedErrors(testCR) }) diff --git a/test/e2e/node_maintenance_test.go b/test/e2e/node_maintenance_test.go index 400f7bd6c..709db4b79 100644 --- a/test/e2e/node_maintenance_test.go +++ b/test/e2e/node_maintenance_test.go @@ -18,7 +18,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" nmo "github.com/medik8s/node-maintenance-operator/api/v1beta1" @@ -58,7 +58,7 @@ var _ = Describe("Starting Maintenance", func() { if controPlaneMaintenance == nil { // do this once only controlPlaneNode = controlPlaneNodes[0] - controPlaneMaintenance = getNodeMaintenance(fmt.Sprintf("test-1st-control-plane-%s", controlPlaneNode), controlPlaneNode) + controPlaneMaintenance = getNodeMaintenance(fmt.Sprintf("test-1st-control-plane-%s", controlPlaneNode), controlPlaneNode, evicitonTimeout) err = createCRIgnoreUnrelatedErrors(controPlaneMaintenance) } }) @@ -107,7 +107,7 @@ var _ = Describe("Starting Maintenance", func() { time.Sleep(10 * time.Second) controlPlaneNode := controlPlaneNodes[1] - nodeMaintenance := getNodeMaintenance(fmt.Sprintf("test-2nd-control-plane-%s", controlPlaneNode), controlPlaneNode) + nodeMaintenance := getNodeMaintenance(fmt.Sprintf("test-2nd-control-plane-%s", controlPlaneNode), controlPlaneNode, evicitonTimeout) err := createCRIgnoreUnrelatedErrors(nodeMaintenance) Expect(err).To(HaveOccurred()) @@ -118,14 +118,14 @@ var _ = Describe("Starting Maintenance", func() { Context("for a not existing node", func() { It("should fail", func() { nodeName := "doesNotExist" - nodeMaintenance := getNodeMaintenance("test-unexisting", nodeName) + nodeMaintenance := getNodeMaintenance("test-unexisting", nodeName, evicitonTimeout) err := createCRIgnoreUnrelatedErrors(nodeMaintenance) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(fmt.Sprintf(nmo.ErrorNodeNotExists, nodeName)), "Unexpected error message") }) }) - Context("for a worker node", func() { + Context("for a worker node with a high eviction timeout", func() { var maintenanceNodeName string var nodeMaintenance *nmo.NodeMaintenance @@ -135,9 +135,9 @@ var _ = Describe("Starting Maintenance", func() { // do this once only if nodeMaintenance == nil { startTime = time.Now() - createTestDeployment() + createTestDeployment(testDeployment) maintenanceNodeName = getTestDeploymentNodeName() - nodeMaintenance = getNodeMaintenance(testMaintenance, maintenanceNodeName) + nodeMaintenance = getNodeMaintenance(testMaintenance, maintenanceNodeName, evicitonTimeout) } }) @@ -147,7 +147,7 @@ var _ = Describe("Starting Maintenance", func() { }) It("should prevent creating another maintenance for the same node", func() { - nmDuplicate := getNodeMaintenance("test-duplicate", maintenanceNodeName) + nmDuplicate := getNodeMaintenance("test-duplicate", maintenanceNodeName, evicitonTimeout) err := createCRIgnoreUnrelatedErrors(nmDuplicate) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(fmt.Sprintf(nmo.ErrorNodeMaintenanceExists, maintenanceNodeName)), "Unexpected error message") @@ -193,10 +193,10 @@ var _ = Describe("Starting Maintenance", func() { }, 300*time.Second, 10*time.Second).Should(BeTrue(), "maintenance did not succeed in time") }) - It("should have been reconciled with fixed duration at least once", func() { + It("shouldn't been reconciled with fixed duration", func() { // check operator log showing it reconciled with fixed duration because of drain timeout // it should be caused by the test deployment's termination graceperiod > drain timeout - Expect(getOperatorLogs()).To(ContainSubstring(nodemaintenance.FixedDurationReconcileLog)) + Expect(getOperatorLogs()).NotTo(ContainSubstring(nodemaintenance.FixedDurationReconcileLog)) }) It("should result in unschedulable and tainted node", func() { @@ -256,6 +256,42 @@ var _ = Describe("Starting Maintenance", func() { }) }) + Context("for a worker node with a low eviction timeout", func() { + + var maintenanceNodeName string + var nodeMaintenance *nmo.NodeMaintenance + + BeforeEach(func() { + createTestDeployment("test-deployment-2") + maintenanceNodeName = getTestDeploymentNodeName() + nodeMaintenance = getNodeMaintenance(testMaintenance, maintenanceNodeName, time.Second*10) + Expect(createCRIgnoreUnrelatedErrors(nodeMaintenance)).To(Succeed()) + DeferCleanup(Client.Delete, context.TODO(), nodeMaintenance) + }) + + When("evction timeout is low", func() { + It("should succeed and be reconciled with fixed duration at least once", func() { + By("verify CR has phase is succeeded") + Eventually(func() (bool, error) { + nm := &nmo.NodeMaintenance{} + if err := Client.Get(context.TODO(), types.NamespacedName{Name: nodeMaintenance.Name}, nm); err != nil { + return false, err + } + + if nm.Status.Phase != nmo.MaintenanceSucceeded { + logInfof("phase: %s\n", nm.Status.Phase) + return false, nil + } + + return true, nil + }, 300*time.Second, 10*time.Second).Should(BeTrue(), "maintenance did not succeed in time") + By("verify printed log") + // check operator log showing it reconciled with fixed duration because of drain timeout + // it should be caused by the test deployment's termination graceperiod > drain timeout + Expect(getOperatorLogs()).To(ContainSubstring(nodemaintenance.FixedDurationReconcileLog)) + }) + }) + }) }) func getNodes() ([]string, []string) { @@ -284,7 +320,7 @@ func getNodes() ([]string, []string) { return controlPlaneNodes, workers } -func getNodeMaintenance(name, nodeName string) *nmo.NodeMaintenance { +func getNodeMaintenance(name, nodeName string, evicitonTimeout time.Duration) *nmo.NodeMaintenance { return &nmo.NodeMaintenance{ TypeMeta: metav1.TypeMeta{ Kind: "NodeMaintenance", @@ -294,8 +330,9 @@ func getNodeMaintenance(name, nodeName string) *nmo.NodeMaintenance { Name: "nodemaintenance-" + name, }, Spec: nmo.NodeMaintenanceSpec{ - NodeName: nodeName, - Reason: "Set maintenance on node for e2e testing", + EvictionTimeout: metav1.Duration{Duration: evicitonTimeout}, + NodeName: nodeName, + Reason: "Set maintenance on node for e2e testing", }, } } @@ -321,7 +358,7 @@ func createCRIgnoreUnrelatedErrors(nm *nmo.NodeMaintenance) error { return err } -func createTestDeployment() { +func createTestDeployment(testDeployment string) { dep := &appsv1.Deployment{ TypeMeta: metav1.TypeMeta{ APIVersion: "apps/v1", @@ -332,7 +369,7 @@ func createTestDeployment() { Namespace: testNsName, }, Spec: appsv1.DeploymentSpec{ - Replicas: pointer.Int32Ptr(1), + Replicas: ptr.To[int32](1), Selector: &metav1.LabelSelector{ MatchLabels: podLabel, }, @@ -372,8 +409,6 @@ func createTestDeployment() { Command: []string{"/bin/sh"}, Args: []string{"-c", "while true; do echo hello; sleep 10;done"}, }}, - // make sure we run into the drain timeout at least once - TerminationGracePeriodSeconds: pointer.Int64Ptr(int64(nodemaintenance.DrainerTimeout.Seconds()) + 50), }, }, },