From a79360d6d54292decf6cbdd6036562c3b3c00c09 Mon Sep 17 00:00:00 2001 From: Maciej Zimnoch Date: Tue, 3 Oct 2023 15:18:52 +0200 Subject: [PATCH] Enable orphaned PV e2e test on envs with multiple Nodes Orphaned PV E2E test was skipped when number of Nodes in the Kubernetes cluster was higher than 1. This is because test logic required spawning a Pod that must land on the same Node where particular Scylla Pods are landing. To ensure this behavior on multiple Node environment, we set required PodAffinity on ScyllaCluster Pods, which will attract Pods to Nodes serving PVC consumer Pod which are driving the test logic. We have a special provisioner with 'hacky' logic in the test, due to NodeAffinity on PVC being immutable. Without it, we would need to actually delete Nodes from the test to trigger logic we test, making test disruptive, serial and enforcing at least 3 Nodes in Kube cluster. Because node count requirement is resource expensive, hard to manage on developer environments I decided to stick with the provisioner. --- .../e2e/set/scyllacluster/scyllacluster_pv.go | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/test/e2e/set/scyllacluster/scyllacluster_pv.go b/test/e2e/set/scyllacluster/scyllacluster_pv.go index c83c2000e5f..716b5d2c0c0 100644 --- a/test/e2e/set/scyllacluster/scyllacluster_pv.go +++ b/test/e2e/set/scyllacluster/scyllacluster_pv.go @@ -76,22 +76,26 @@ var _ = g.Describe("ScyllaCluster Orphaned PV controller", func() { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) defer cancel() - nodes, err := f.KubeAdminClient().CoreV1().Nodes().List(ctx, metav1.ListOptions{}) - o.Expect(err).NotTo(o.HaveOccurred()) - - if len(nodes.Items) != 1 { - g.Skip( - "This test supports running at most 1 Node, as it relies on the fact that all Pods land on the same Node." + - "Replace with actual Node removal logic when CI env is bigger", - ) - } - testStorageClassName := f.Namespace() sc := scyllafixture.BasicScyllaCluster.ReadOrFail() sc.Spec.AutomaticOrphanedNodeCleanup = true sc.Spec.Datacenter.Racks[0].Members = 3 sc.Spec.Datacenter.Racks[0].Storage.StorageClassName = pointer.Ptr(testStorageClassName) + sc.Spec.Datacenter.Racks[0].Placement = &scyllav1.PlacementSpec{ + PodAffinity: &corev1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + cloneLabelKey: f.Namespace(), + }, + }, + TopologyKey: corev1.LabelHostname, + }, + }, + }, + } // This test to trigger the orphaned PV cleanup is updating NodeAffinity of PV used by ScyllaCluster to not exiting Node. // The problem is, that local volume storage provisioners which we use in CI, may set this field, which is immutable. @@ -138,7 +142,7 @@ var _ = g.Describe("ScyllaCluster Orphaned PV controller", func() { pvcClone.Spec.StorageClassName = nil framework.Infof("Creating clone PVC for %q", pvc.Name) - pvcClone, err = f.KubeClient().CoreV1().PersistentVolumeClaims(f.Namespace()).Create(ctx, pvcClone, metav1.CreateOptions{}) + pvcClone, err := f.KubeClient().CoreV1().PersistentVolumeClaims(f.Namespace()).Create(ctx, pvcClone, metav1.CreateOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) framework.Infof("Creating PVC clone consumer Pod") @@ -146,6 +150,9 @@ var _ = g.Describe("ScyllaCluster Orphaned PV controller", func() { consumerPod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: consumerPodName, + Labels: map[string]string{ + cloneLabelKey: f.Namespace(), + }, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{ @@ -256,7 +263,7 @@ var _ = g.Describe("ScyllaCluster Orphaned PV controller", func() { }() framework.By("Creating a ScyllaCluster") - sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{}) + sc, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) framework.By("Waiting for the ScyllaCluster to rollout (RV=%s)", sc.ResourceVersion)