From d2bba26c003c543612de88883f936d54aceb4ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Hern=C3=A1ndez?= Date: Wed, 10 Jul 2024 00:34:55 +0200 Subject: [PATCH] MGMT-17893: Don't destroy cluster on detach (#6532) Currently the procedure to detach a cluster that was created using hosts from a late binding pool is to first delete the `ManagedCluster` object, then add the `preserveOnDelete: true` field to the `ClusterDeployment` and then delete that `ClusterDeployment`. But the cluster deployment controller doesn't look at the `preserveOnDelete` field at all. As a result the hosts of the cluster are returned to the pool and they are provisioned again, which effectively destroys the cluster. This patch changes the deployment manager controller so that in that case it will check the `preserveOnDelete` field and if it is `true` it will delete the corresponding `Agent` objects. The result of that is that the hosts will go back to the pool, but marked the will still be marked as provisioned and they will not be provisioned again, thus avoiding the destruction of the cluster. Note that the `BareMetalHosts` will not be removed, but they will stay detached. Related: https://issues.redhat.com/browse/MGMT-17893 Signed-off-by: Juan Hernandez --- .../clusterdeployments_controller.go | 29 +++++++++--- .../clusterdeployments_controller_test.go | 44 +++++++++++++++++++ 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/internal/controller/controllers/clusterdeployments_controller.go b/internal/controller/controllers/clusterdeployments_controller.go index e43e354d2bf..d97e639f88f 100644 --- a/internal/controller/controllers/clusterdeployments_controller.go +++ b/internal/controller/controllers/clusterdeployments_controller.go @@ -1541,7 +1541,7 @@ func (r *ClusterDeploymentsReconciler) deleteClusterInstall(ctx context.Context, return buildReply(err) } -func (r *ClusterDeploymentsReconciler) shouldDeleteAgentOnUnbind(ctx context.Context, agent aiv1beta1.Agent, clusterDeployment types.NamespacedName) bool { +func (r *ClusterDeploymentsReconciler) shouldDeleteAgentOnUnbind(ctx context.Context, agent aiv1beta1.Agent, clusterDeployment types.NamespacedName) (bool, error) { log := logutil.FromContext(ctx, r.Log).WithFields(logrus.Fields{ "cluster_deployment": clusterDeployment.Name, "cluster_deployment_namespace": clusterDeployment.Namespace, @@ -1549,23 +1549,36 @@ func (r *ClusterDeploymentsReconciler) shouldDeleteAgentOnUnbind(ctx context.Con infraEnvName, ok := agent.Labels[aiv1beta1.InfraEnvNameLabel] if !ok { log.Errorf("Failed to find infraEnv name for agent %s in namespace %s", agent.Name, agent.Namespace) - return false + return false, nil } infraEnv := &aiv1beta1.InfraEnv{} if err := r.Get(ctx, types.NamespacedName{Name: infraEnvName, Namespace: agent.Namespace}, infraEnv); err != nil { log.Errorf("Failed to get infraEnv %s in namespace %s", infraEnvName, agent.Namespace) - return false + return false, nil } if infraEnv.Spec.ClusterRef != nil && infraEnv.Spec.ClusterRef.Name == clusterDeployment.Name && infraEnv.Spec.ClusterRef.Namespace == clusterDeployment.Namespace { - return true + return true, nil } - return false + // If the agent came from a late binding pool (the infrastructure environment has no cluster + // reference) then we want to delete it only if the cluster deployment is marked to preserve + // on delete. This is necessary because otherwise the host will go back to the pool, and it + // will be provisioned again, effectively destroying the cluster. + if infraEnv.Spec.ClusterRef == nil { + clusterDeploymentObject := &hivev1.ClusterDeployment{} + err := r.Get(ctx, clusterDeployment, clusterDeploymentObject) + if err != nil { + return false, err + } + return clusterDeploymentObject.Spec.PreserveOnDelete, nil + } + + return false, nil } func (r *ClusterDeploymentsReconciler) unbindAgents(ctx context.Context, log logrus.FieldLogger, clusterDeployment types.NamespacedName) error { @@ -1578,7 +1591,11 @@ func (r *ClusterDeploymentsReconciler) unbindAgents(ctx context.Context, log log if clusterAgent.Spec.ClusterDeploymentName != nil && clusterAgent.Spec.ClusterDeploymentName.Name == clusterDeployment.Name && clusterAgent.Spec.ClusterDeploymentName.Namespace == clusterDeployment.Namespace { - if r.shouldDeleteAgentOnUnbind(ctx, clusterAgent, clusterDeployment) { + shouldDeleteAgent, err := r.shouldDeleteAgentOnUnbind(ctx, clusterAgent, clusterDeployment) + if err != nil { + return err + } + if shouldDeleteAgent { log.Infof("deleting agent %s in namespace %s", clusterAgent.Name, clusterAgent.Namespace) if err := r.Delete(ctx, &agents.Items[i]); err != nil { log.WithError(err).Errorf("failed to delete agent %s in namespace %s", clusterAgent.Name, clusterAgent.Namespace) diff --git a/internal/controller/controllers/clusterdeployments_controller_test.go b/internal/controller/controllers/clusterdeployments_controller_test.go index 1248e63c197..379a18a1a7a 100644 --- a/internal/controller/controllers/clusterdeployments_controller_test.go +++ b/internal/controller/controllers/clusterdeployments_controller_test.go @@ -4157,6 +4157,15 @@ var _ = Describe("unbindAgents", func() { } Expect(c.Create(ctx, infraEnv)).To(Succeed()) + clusterDeployment := &hivev1.ClusterDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterReference.Namespace, + Name: clusterReference.Name, + }, + Spec: hivev1.ClusterDeploymentSpec{}, + } + Expect(c.Create(ctx, clusterDeployment)).To(Succeed()) + cdName := types.NamespacedName{ Name: clusterReference.Name, Namespace: clusterReference.Namespace, @@ -4198,6 +4207,41 @@ var _ = Describe("unbindAgents", func() { err = c.Get(ctx, types.NamespacedName{Name: "test-agent2", Namespace: testNamespace}, agent) Expect(k8serrors.IsNotFound(err)).To(BeTrue()) }) + + It("deletes late bound agents when cluster deployment is preserved", func() { + infraEnv := &aiv1beta1.InfraEnv{ + ObjectMeta: metav1.ObjectMeta{ + Name: infraEnvName, + Namespace: testNamespace, + }, + Spec: aiv1beta1.InfraEnvSpec{ClusterRef: nil}, + } + Expect(c.Create(ctx, infraEnv)).To(Succeed()) + + clusterDeployment := &hivev1.ClusterDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterReference.Namespace, + Name: clusterReference.Name, + }, + Spec: hivev1.ClusterDeploymentSpec{ + PreserveOnDelete: true, + }, + } + Expect(c.Create(ctx, clusterDeployment)).To(Succeed()) + + cdName := types.NamespacedName{ + Name: clusterReference.Name, + Namespace: clusterReference.Namespace, + } + Expect(cr.unbindAgents(ctx, common.GetTestLog(), cdName)).To(Succeed()) + + agent := &aiv1beta1.Agent{} + err := c.Get(ctx, types.NamespacedName{Name: "test-agent1", Namespace: testNamespace}, agent) + Expect(k8serrors.IsNotFound(err)).To(BeTrue()) + agent = &aiv1beta1.Agent{} + err = c.Get(ctx, types.NamespacedName{Name: "test-agent2", Namespace: testNamespace}, agent) + Expect(k8serrors.IsNotFound(err)).To(BeTrue()) + }) }) var _ = Describe("day2 cluster", func() {