Skip to content

Commit

Permalink
Handle Failed Deletion of Subinstallations (gardener#635)
Browse files Browse the repository at this point in the history
* Handle failed deletion of subinstallation (run-int-tests)

* Test (run-int-tests)

* Cleanup abort annotation and timestamp

* Test (run-int-tests)

* Unit test

* Unit test
  • Loading branch information
robertgraeff authored Nov 11, 2022
1 parent 82b358f commit 599edb2
Show file tree
Hide file tree
Showing 13 changed files with 487 additions and 63 deletions.
7 changes: 0 additions & 7 deletions pkg/landscaper/controllers/deployitem/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,6 @@ func (con *controller) writeAbortingTimeoutExceeded(ctx context.Context, di *lsv
logger = logger.WithValues(lc.KeyMethod, "writeAbortingTimeoutExceeded")
logger.Info("aborting timeout occurred")

lsv1alpha1helper.RemoveAbortOperationAndTimestamp(&di.ObjectMeta)

di.Status.DeployItemPhase = lsv1alpha1.DeployItemPhaseFailed
di.Status.JobIDFinished = di.Status.GetJobID()
di.Status.Phase = lsv1alpha1.ExecutionPhaseFailed
Expand All @@ -189,11 +187,6 @@ func (con *controller) writeAbortingTimeoutExceeded(ctx context.Context, di *lsv
return err
}

if err := con.Writer().UpdateDeployItem(ctx, read_write_layer.W000109, di); err != nil {
logger.Error(err, "unable to update deploy item")
return err
}

return nil
}

Expand Down
63 changes: 63 additions & 0 deletions pkg/landscaper/controllers/execution/reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package execution_test
import (
"context"

"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -98,6 +100,67 @@ var _ = Describe("Reconcile", func() {
Expect(apierrors.IsNotFound(testenv.Client.Get(ctx, kutil.ObjectKeyFromObject(exec), exec))).To(BeTrue(), "expect the execution to be deleted")
})

It("should cleanup the abort annotation of deploy items", func() {
ctx := context.Background()

var err error
state, err = testenv.InitResources(ctx, "./testdata/test3")
testutils.ExpectNoError(err)
Expect(testutils.CreateExampleDefaultContext(ctx, testenv.Client, state.Namespace)).To(Succeed())

// Read execution
exec := state.Executions[state.Namespace+"/exec-1"]
Expect(testenv.Client.Get(ctx, kutil.ObjectKeyFromObject(exec), exec)).To(Succeed())

// Reconcile the execution
_ = testutils.ShouldNotReconcile(ctx, ctrl, testutils.RequestFromObject(exec))

// Check orphaned deploy item
di := state.DeployItems[state.Namespace+"/di-b"]
Expect(testenv.Client.Get(ctx, kutil.ObjectKeyFromObject(di), di)).To(Succeed())
Expect(di.ObjectMeta.Annotations).NotTo(HaveKeyWithValue(lsv1alpha1.OperationAnnotation, lsv1alpha1.AbortOperation))
Expect(di.ObjectMeta.Annotations).NotTo(HaveKey(lsv1alpha1.AbortTimestampAnnotation))
Expect(di.Status.JobID).To(Equal(exec.Status.JobID))

// Delete orphaned deploy item
Expect(state.Client.Delete(ctx, di)).To(Succeed())
controllerutil.RemoveFinalizer(di, lsv1alpha1.LandscaperFinalizer)
Expect(state.Client.Update(ctx, di)).To(Succeed())

// Reconcile the execution
_ = testutils.ShouldNotReconcile(ctx, ctrl, testutils.RequestFromObject(exec))

// Check deploy item
di = state.DeployItems[state.Namespace+"/di-a"]
Expect(testenv.Client.Get(ctx, kutil.ObjectKeyFromObject(di), di)).To(Succeed())
Expect(di.ObjectMeta.Annotations).NotTo(HaveKeyWithValue(lsv1alpha1.OperationAnnotation, lsv1alpha1.AbortOperation))
Expect(di.ObjectMeta.Annotations).NotTo(HaveKey(lsv1alpha1.AbortTimestampAnnotation))
Expect(di.Status.JobID).To(Equal(exec.Status.JobID))
})

It("should cleanup the abort annotation of deploy items during delete", func() {
ctx := context.Background()

var err error
state, err = testenv.InitResources(ctx, "./testdata/test4")
testutils.ExpectNoError(err)
Expect(testutils.CreateExampleDefaultContext(ctx, testenv.Client, state.Namespace)).To(Succeed())

// Read execution
exec := state.Executions[state.Namespace+"/exec-1"]
Expect(testenv.Client.Get(ctx, kutil.ObjectKeyFromObject(exec), exec)).To(Succeed())

// Reconcile the execution
_ = testutils.ShouldNotReconcile(ctx, ctrl, testutils.RequestFromObject(exec))

// Check deploy item
di := state.DeployItems[state.Namespace+"/di-a"]
Expect(testenv.Client.Get(ctx, kutil.ObjectKeyFromObject(di), di)).To(Succeed())

Expect(di.ObjectMeta.Annotations).NotTo(HaveKeyWithValue(lsv1alpha1.OperationAnnotation, lsv1alpha1.AbortOperation))
Expect(di.ObjectMeta.Annotations).NotTo(HaveKey(lsv1alpha1.AbortTimestampAnnotation))
})

Context("Context", func() {
It("should pass the context to the deploy item", func() {
ctx := context.Background()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: 2022 SAP SE or an SAP affiliate company and Gardener contributors
#
# SPDX-License-Identifier: Apache-2.0

apiVersion: landscaper.gardener.cloud/v1alpha1
kind: Execution
metadata:
name: exec-1
namespace: {{ .Namespace }}
generation: 2
finalizers:
- finalizer.landscaper.gardener.cloud
spec:

deployItems:
- name: a
type: landscaper.gardener.cloud/helm
config:
apiVersion: manifest.deployer.landscaper.gardener.cloud/v1alpha2
kind: ProviderConfiguration

status:
phase: Failed
jobID: job2
jobIDFinished: job1
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# SPDX-FileCopyrightText: 2022 SAP SE or an SAP affiliate company and Gardener contributors
#
# SPDX-License-Identifier: Apache-2.0

apiVersion: landscaper.gardener.cloud/v1alpha1
kind: DeployItem
metadata:
name: di-a
namespace: {{ .Namespace }}
annotations:
landscaper.gardener.cloud/abort-time: "2022-11-10T17:12:34+01:00"
landscaper.gardener.cloud/operation: abort
finalizers:
- finalizer.landscaper.gardener.cloud
generation: 2
labels:
execution.landscaper.gardener.cloud/managed-by: exec-1
execution.landscaper.gardener.cloud/name: a
spec:
type: landscaper.gardener.cloud/helm
config:
apiVersion: manifest.deployer.landscaper.gardener.cloud/v1alpha2
kind: ProviderConfiguration
my-val: val1

status:
deployItemPhase: Failed
phase: Failed
jobID: job1
jobIDFinished: job1
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# SPDX-FileCopyrightText: 2022 SAP SE or an SAP affiliate company and Gardener contributors
#
# SPDX-License-Identifier: Apache-2.0

apiVersion: landscaper.gardener.cloud/v1alpha1
kind: DeployItem
metadata:
name: di-b
namespace: {{ .Namespace }}
annotations:
landscaper.gardener.cloud/abort-time: "2022-11-10T17:12:34+01:00"
landscaper.gardener.cloud/operation: abort
finalizers:
- finalizer.landscaper.gardener.cloud
generation: 2
labels:
execution.landscaper.gardener.cloud/managed-by: exec-1
execution.landscaper.gardener.cloud/name: b
spec:
type: landscaper.gardener.cloud/helm
config:
apiVersion: manifest.deployer.landscaper.gardener.cloud/v1alpha2
kind: ProviderConfiguration
my-val: val1

status:
deployItemPhase: Failed
phase: Failed
jobID: job1
jobIDFinished: job1
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# SPDX-FileCopyrightText: 2022 SAP SE or an SAP affiliate company and Gardener contributors
#
# SPDX-License-Identifier: Apache-2.0

apiVersion: landscaper.gardener.cloud/v1alpha1
kind: Execution
metadata:
name: exec-1
namespace: {{ .Namespace }}
generation: 2
deletionTimestamp: "2022-11-10T13:59:00Z"
finalizers:
- finalizer.landscaper.gardener.cloud
spec:

deployItems:
- name: a
type: landscaper.gardener.cloud/helm
config:
apiVersion: manifest.deployer.landscaper.gardener.cloud/v1alpha2
kind: ProviderConfiguration

status:
phase: DeleteFailed
jobID: job2
jobIDFinished: job1

observedGeneration: 2

deployItemRefs:
- name: a
ref:
name: di-a
namespace: test1
observedGeneration: 2

execGenerations:
- name: a
observedGeneration: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# SPDX-FileCopyrightText: 2022 SAP SE or an SAP affiliate company and Gardener contributors
#
# SPDX-License-Identifier: Apache-2.0

apiVersion: landscaper.gardener.cloud/v1alpha1
kind: DeployItem
metadata:
name: di-a
namespace: {{ .Namespace }}
annotations:
landscaper.gardener.cloud/abort-time: "2022-11-10T17:12:34+01:00"
landscaper.gardener.cloud/operation: abort
deletionTimestamp: "2022-11-10T13:59:00Z"
finalizers:
- finalizer.landscaper.gardener.cloud
generation: 2
labels:
execution.landscaper.gardener.cloud/managed-by: exec-1
execution.landscaper.gardener.cloud/name: a
spec:
type: landscaper.gardener.cloud/helm
config:
apiVersion: manifest.deployer.landscaper.gardener.cloud/v1alpha2
kind: ProviderConfiguration
my-val: val1

status:
deployItemPhase: Failed
phase: Failed
jobID: job1
jobIDFinished: job1
67 changes: 47 additions & 20 deletions pkg/landscaper/controllers/installations/reconcile_delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ import (

var (
SiblingImportError = errors.New("a sibling still imports some of the exports")
SiblingDeleteError = errors.New("deletion of a sibling failed")
)

func (c *Controller) handleDeletionPhaseInit(ctx context.Context, inst *lsv1alpha1.Installation) (fatalError lserrors.LsError, normalError lserrors.LsError) {
op := "handleDeletionPhaseInit"

if err := c.deleteAllowed(ctx, inst); err != nil {
return nil, lserrors.NewWrappedError(err, op, "deleteAllowed", err.Error())
fatalError, normalError = c.deleteAllowed(ctx, inst)
if fatalError != nil || normalError != nil {
return fatalError, normalError
}

exec, err := executions.GetExecutionForInstallation(ctx, c.Client(), inst)
Expand Down Expand Up @@ -177,42 +179,67 @@ func (c *Controller) handleDeletionPhaseDeleting(ctx context.Context, inst *lsv1
return true, false, nil
}

func (c *Controller) deleteAllowed(ctx context.Context, inst *lsv1alpha1.Installation) lserrors.LsError {
func (c *Controller) deleteAllowed(ctx context.Context, inst *lsv1alpha1.Installation) (fatalError lserrors.LsError, normalError lserrors.LsError) {
op := "DeleteInstallationAllowed"

v, ok := inst.GetAnnotations()[lsv1alpha1.DeleteIgnoreSuccessors]
if ok && v == "true" {
return nil
return nil, nil
}

_, siblings, err := installations.GetParentAndSiblings(ctx, c.Client(), inst)
if err != nil {
return lserrors.NewWrappedError(err,
return nil, lserrors.NewWrappedError(err,
op, "CalculateInstallationContext", err.Error(), lsv1alpha1.ErrorInternalProblem)
}

// check if suitable for deletion
if checkIfSiblingImports(inst, installations.CreateInternalInstallationBases(siblings...)) {
return lserrors.NewWrappedError(SiblingImportError,
op, "SiblingImport", SiblingImportError.Error())
}

return nil
return checkIfSiblingImports(inst, installations.CreateInternalInstallationBases(siblings...))
}

// checkIfSiblingImports checks if a sibling imports any of the installations exports.
func checkIfSiblingImports(inst *lsv1alpha1.Installation, siblings []*installations.InstallationAndImports) bool {
func checkIfSiblingImports(inst *lsv1alpha1.Installation, siblings []*installations.InstallationAndImports) (fatalError lserrors.LsError, normalError lserrors.LsError) {
for _, sibling := range siblings {
for _, dataImport := range inst.Spec.Exports.Data {
if sibling.IsImportingData(dataImport.DataRef) {
return true
}
if isSuccessor(inst, sibling) {
return checkSuccessorSibling(inst, sibling)
}
for _, targetImport := range inst.Spec.Exports.Targets {
if sibling.IsImportingData(targetImport.Target) {
return true
}
}

return nil, nil
}

// isSuccessor determines whether the given sibling imports any DataObject or Target that the given inst exports.
func isSuccessor(inst *lsv1alpha1.Installation, sibling *installations.InstallationAndImports) bool {
for _, dataImport := range inst.Spec.Exports.Data {
if sibling.IsImportingData(dataImport.DataRef) {
return true
}
}

for _, targetImport := range inst.Spec.Exports.Targets {
if sibling.IsImportingData(targetImport.Target) {
return true
}
}

return false
}

// checkSuccessorSibling is called during the deletion of an installation (parameter "inst")
// for each successor sibling that still exists (parameter "sibling"). There are two cases:
// - If the deletion of "sibling" has failed (with same jobID), the deletion of "inst" must also fail.
// This is achieved by a fatal error.
// - Otherwise, the existence of "sibling" means that "inst" cannot yet be deleted, but must be checked again later.
// This is achieved by a normal error.
func checkSuccessorSibling(inst *lsv1alpha1.Installation, sibling *installations.InstallationAndImports) (fatalError lserrors.LsError, normalError lserrors.LsError) {
op := "CheckSuccessorSibling"

if inst.Status.JobID == sibling.GetInstallation().Status.JobIDFinished &&
sibling.GetInstallation().Status.InstallationPhase == lsv1alpha1.InstallationPhaseDeleteFailed {
return lserrors.NewWrappedError(SiblingDeleteError,
op, "SiblingDeleteError", SiblingDeleteError.Error()), nil
}

return nil, lserrors.NewWrappedError(SiblingImportError,
op, "SiblingImport", SiblingImportError.Error())
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,29 @@ var _ = Describe("Delete", func() {
Expect(ann).To(Equal("true"))

})

It("should fail if a successor has failed", func() {
ctx := context.Background()

var err error
state, err = testenv.InitResources(ctx, "./testdata/state/test8")
Expect(err).ToNot(HaveOccurred())
Expect(testutils.CreateExampleDefaultContext(ctx, testenv.Client, state.Namespace)).To(Succeed())

inst := state.Installations[state.Namespace+"/a"]
_ = testutils.ShouldNotReconcile(ctx, ctrl, testutils.RequestFromObject(inst))

testutils.ExpectNoError(testenv.Client.Get(ctx, kutil.ObjectKeyFromObject(inst), inst))
Expect(lsutils.IsInstallationPhase(inst, lsv1alpha1.InstallationPhaseDeleteFailed)).To(BeTrue())
Expect(lsutils.IsInstallationJobIDsIdentical(inst)).To(BeTrue())

inst = state.Installations[state.Namespace+"/root"]
_ = testutils.ShouldNotReconcile(ctx, ctrl, testutils.RequestFromObject(inst))

testutils.ExpectNoError(testenv.Client.Get(ctx, kutil.ObjectKeyFromObject(inst), inst))
Expect(lsutils.IsInstallationPhase(inst, lsv1alpha1.InstallationPhaseDeleteFailed)).To(BeTrue())
Expect(lsutils.IsInstallationJobIDsIdentical(inst)).To(BeTrue())
})
})

Context("Controller", func() {
Expand Down Expand Up @@ -216,5 +239,4 @@ var _ = Describe("Delete", func() {
}, 20*time.Second, 1*time.Second).Should(Succeed(), "the installation should be deleted")
})
})

})
Loading

0 comments on commit 599edb2

Please sign in to comment.