Skip to content

Commit

Permalink
MGMT-17893: Don't destroy cluster on detach (#6532)
Browse files Browse the repository at this point in the history
Currently the procedure to detach a cluster that was created using hosts
from a late binding pool is to first delete the `ManagedCluster` object,
then add the `preserveOnDelete: true` field to the `ClusterDeployment`
and then delete that `ClusterDeployment`. But the cluster deployment
controller doesn't look at the `preserveOnDelete` field at all. As a
result the hosts of the cluster are returned to the pool and they are
provisioned again, which effectively destroys the cluster.

This patch changes the deployment manager controller so that in that
case it will check the `preserveOnDelete` field and if it is `true` it
will delete the corresponding `Agent` objects. The result of that is
that the hosts will go back to the pool, but marked the will still be
marked as provisioned and they will not be provisioned again, thus
avoiding the destruction of the cluster.

Note that the `BareMetalHosts` will not be removed, but they will stay
detached.

Related: https://issues.redhat.com/browse/MGMT-17893

Signed-off-by: Juan Hernandez <[email protected]>
  • Loading branch information
jhernand authored Jul 9, 2024
1 parent 6bdc1a4 commit d2bba26
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 6 deletions.
29 changes: 23 additions & 6 deletions internal/controller/controllers/clusterdeployments_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1541,31 +1541,44 @@ func (r *ClusterDeploymentsReconciler) deleteClusterInstall(ctx context.Context,
return buildReply(err)
}

func (r *ClusterDeploymentsReconciler) shouldDeleteAgentOnUnbind(ctx context.Context, agent aiv1beta1.Agent, clusterDeployment types.NamespacedName) bool {
func (r *ClusterDeploymentsReconciler) shouldDeleteAgentOnUnbind(ctx context.Context, agent aiv1beta1.Agent, clusterDeployment types.NamespacedName) (bool, error) {
log := logutil.FromContext(ctx, r.Log).WithFields(logrus.Fields{
"cluster_deployment": clusterDeployment.Name,
"cluster_deployment_namespace": clusterDeployment.Namespace,
})
infraEnvName, ok := agent.Labels[aiv1beta1.InfraEnvNameLabel]
if !ok {
log.Errorf("Failed to find infraEnv name for agent %s in namespace %s", agent.Name, agent.Namespace)
return false
return false, nil
}

infraEnv := &aiv1beta1.InfraEnv{}
if err := r.Get(ctx, types.NamespacedName{Name: infraEnvName, Namespace: agent.Namespace}, infraEnv); err != nil {
log.Errorf("Failed to get infraEnv %s in namespace %s", infraEnvName, agent.Namespace)
return false
return false, nil
}

if infraEnv.Spec.ClusterRef != nil &&
infraEnv.Spec.ClusterRef.Name == clusterDeployment.Name &&
infraEnv.Spec.ClusterRef.Namespace == clusterDeployment.Namespace {

return true
return true, nil
}

return false
// If the agent came from a late binding pool (the infrastructure environment has no cluster
// reference) then we want to delete it only if the cluster deployment is marked to preserve
// on delete. This is necessary because otherwise the host will go back to the pool, and it
// will be provisioned again, effectively destroying the cluster.
if infraEnv.Spec.ClusterRef == nil {
clusterDeploymentObject := &hivev1.ClusterDeployment{}
err := r.Get(ctx, clusterDeployment, clusterDeploymentObject)
if err != nil {
return false, err
}
return clusterDeploymentObject.Spec.PreserveOnDelete, nil
}

return false, nil
}

func (r *ClusterDeploymentsReconciler) unbindAgents(ctx context.Context, log logrus.FieldLogger, clusterDeployment types.NamespacedName) error {
Expand All @@ -1578,7 +1591,11 @@ func (r *ClusterDeploymentsReconciler) unbindAgents(ctx context.Context, log log
if clusterAgent.Spec.ClusterDeploymentName != nil &&
clusterAgent.Spec.ClusterDeploymentName.Name == clusterDeployment.Name &&
clusterAgent.Spec.ClusterDeploymentName.Namespace == clusterDeployment.Namespace {
if r.shouldDeleteAgentOnUnbind(ctx, clusterAgent, clusterDeployment) {
shouldDeleteAgent, err := r.shouldDeleteAgentOnUnbind(ctx, clusterAgent, clusterDeployment)
if err != nil {
return err
}
if shouldDeleteAgent {
log.Infof("deleting agent %s in namespace %s", clusterAgent.Name, clusterAgent.Namespace)
if err := r.Delete(ctx, &agents.Items[i]); err != nil {
log.WithError(err).Errorf("failed to delete agent %s in namespace %s", clusterAgent.Name, clusterAgent.Namespace)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4157,6 +4157,15 @@ var _ = Describe("unbindAgents", func() {
}
Expect(c.Create(ctx, infraEnv)).To(Succeed())

clusterDeployment := &hivev1.ClusterDeployment{
ObjectMeta: metav1.ObjectMeta{
Namespace: clusterReference.Namespace,
Name: clusterReference.Name,
},
Spec: hivev1.ClusterDeploymentSpec{},
}
Expect(c.Create(ctx, clusterDeployment)).To(Succeed())

cdName := types.NamespacedName{
Name: clusterReference.Name,
Namespace: clusterReference.Namespace,
Expand Down Expand Up @@ -4198,6 +4207,41 @@ var _ = Describe("unbindAgents", func() {
err = c.Get(ctx, types.NamespacedName{Name: "test-agent2", Namespace: testNamespace}, agent)
Expect(k8serrors.IsNotFound(err)).To(BeTrue())
})

It("deletes late bound agents when cluster deployment is preserved", func() {
infraEnv := &aiv1beta1.InfraEnv{
ObjectMeta: metav1.ObjectMeta{
Name: infraEnvName,
Namespace: testNamespace,
},
Spec: aiv1beta1.InfraEnvSpec{ClusterRef: nil},
}
Expect(c.Create(ctx, infraEnv)).To(Succeed())

clusterDeployment := &hivev1.ClusterDeployment{
ObjectMeta: metav1.ObjectMeta{
Namespace: clusterReference.Namespace,
Name: clusterReference.Name,
},
Spec: hivev1.ClusterDeploymentSpec{
PreserveOnDelete: true,
},
}
Expect(c.Create(ctx, clusterDeployment)).To(Succeed())

cdName := types.NamespacedName{
Name: clusterReference.Name,
Namespace: clusterReference.Namespace,
}
Expect(cr.unbindAgents(ctx, common.GetTestLog(), cdName)).To(Succeed())

agent := &aiv1beta1.Agent{}
err := c.Get(ctx, types.NamespacedName{Name: "test-agent1", Namespace: testNamespace}, agent)
Expect(k8serrors.IsNotFound(err)).To(BeTrue())
agent = &aiv1beta1.Agent{}
err = c.Get(ctx, types.NamespacedName{Name: "test-agent2", Namespace: testNamespace}, agent)
Expect(k8serrors.IsNotFound(err)).To(BeTrue())
})
})

var _ = Describe("day2 cluster", func() {
Expand Down

0 comments on commit d2bba26

Please sign in to comment.