From 62e2602c3f073e781f2f1ef619d4c92e830c3032 Mon Sep 17 00:00:00 2001 From: Fabricio Aguiar Date: Wed, 22 May 2024 15:24:15 +0100 Subject: [PATCH] Report when backoff limit is exceeded Depends-On: https://github.com/openstack-k8s-operators/lib-common/pull/504 Signed-off-by: Fabricio Aguiar --- ...ansibleee.openstack.org_openstackansibleees.yaml | 3 +++ api/v1beta1/openstack_ansibleee_types.go | 4 ++++ ...ansibleee.openstack.org_openstackansibleees.yaml | 3 +++ controllers/openstack_ansibleee_controller.go | 12 ++++++++++-- docs/assemblies/openstack_ansibleee.adoc | 5 +++++ go.mod | 2 +- go.sum | 2 ++ .../kuttl/tests/run_failed_playbook/01-assert.yaml | 13 +++++++------ 8 files changed, 35 insertions(+), 9 deletions(-) diff --git a/api/bases/ansibleee.openstack.org_openstackansibleees.yaml b/api/bases/ansibleee.openstack.org_openstackansibleees.yaml index 48c69ed2..0c4ac572 100644 --- a/api/bases/ansibleee.openstack.org_openstackansibleees.yaml +++ b/api/bases/ansibleee.openstack.org_openstackansibleees.yaml @@ -1575,6 +1575,9 @@ spec: - type type: object type: array + failed: + format: int64 + type: integer hash: additionalProperties: type: string diff --git a/api/v1beta1/openstack_ansibleee_types.go b/api/v1beta1/openstack_ansibleee_types.go index f39bd2fd..80f7f6e9 100644 --- a/api/v1beta1/openstack_ansibleee_types.go +++ b/api/v1beta1/openstack_ansibleee_types.go @@ -135,6 +135,10 @@ type OpenStackAnsibleEEStatus struct { // JobStatus status of the executed job (Pending/Running/Succeeded/Failed) JobStatus string `json:"JobStatus,omitempty" optional:"true"` + // The number of pods which reached phase Failed. + // +optional + Failed int32 `json:"failed,omitempty"` + // ObservedGeneration - the most recent generation observed for this // service. If the observed generation is less than the spec generation, // then the controller has not processed the latest changes injected by diff --git a/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml b/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml index 48c69ed2..0c4ac572 100644 --- a/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml +++ b/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml @@ -1575,6 +1575,9 @@ spec: - type type: object type: array + failed: + format: int64 + type: integer hash: additionalProperties: type: string diff --git a/controllers/openstack_ansibleee_controller.go b/controllers/openstack_ansibleee_controller.go index 0fa7b2c7..6ea648b8 100644 --- a/controllers/openstack_ansibleee_controller.go +++ b/controllers/openstack_ansibleee_controller.go @@ -217,13 +217,21 @@ func (r *OpenStackAnsibleEEReconciler) Reconcile(ctx context.Context, req ctrl.R } if err != nil { + var errorReason condition.Reason + errorReason = condition.ErrorReason + severity := condition.SeverityWarning + if ansibleeeJob.HasReachedLimit() { + errorReason = condition.JobReasonBackoffLimitExceeded + severity = condition.SeverityError + } instance.Status.Conditions.Set(condition.FalseCondition( condition.JobReadyCondition, - condition.ErrorReason, - condition.SeverityWarning, + errorReason, + severity, condition.JobReadyErrorMessage, err.Error())) instance.Status.JobStatus = ansibleeev1.JobStatusFailed + instance.Status.Failed = ansibleeeJob.GetTotalFailedAttempts() return ctrl.Result{}, err } diff --git a/docs/assemblies/openstack_ansibleee.adoc b/docs/assemblies/openstack_ansibleee.adoc index 4c4aaa39..2ac12607 100644 --- a/docs/assemblies/openstack_ansibleee.adoc +++ b/docs/assemblies/openstack_ansibleee.adoc @@ -213,6 +213,11 @@ OpenStackAnsibleEEStatus defines the observed state of OpenStackAnsibleEE | string | false +| failed +| The number of pods which reached phase Failed. +| int64 +| false + | observedGeneration | ObservedGeneration - the most recent generation observed for this service. If the observed generation is less than the spec generation, then the controller has not processed the latest changes injected by the opentack-operator in the top-level CR (e.g. the ContainerImage) | int64 diff --git a/go.mod b/go.mod index c76df275..9ca4323f 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 github.com/onsi/ginkgo/v2 v2.17.2 github.com/onsi/gomega v1.33.0 - github.com/openstack-k8s-operators/lib-common/modules/common v0.3.1-0.20240429052447-09a614506ca6 + github.com/openstack-k8s-operators/lib-common/modules/common v0.3.1-0.20240522141801-d6e03083e82a github.com/openstack-k8s-operators/lib-common/modules/storage v0.3.1-0.20240429052447-09a614506ca6 github.com/openstack-k8s-operators/openstack-ansibleee-operator/api v0.0.0-00010101000000-000000000000 gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index 07033173..bc6d8575 100644 --- a/go.sum +++ b/go.sum @@ -96,6 +96,8 @@ github.com/openshift/api v3.9.0+incompatible h1:fJ/KsefYuZAjmrr3+5U9yZIZbTOpVkDD github.com/openshift/api v3.9.0+incompatible/go.mod h1:dh9o4Fs58gpFXGSYfnVxGR9PnV53I8TW84pQaJDdGiY= github.com/openstack-k8s-operators/lib-common/modules/common v0.3.1-0.20240429052447-09a614506ca6 h1:WLsG3Ko+phW5xZJjncypLWGASoLqKrt05qN9Zxsad6g= github.com/openstack-k8s-operators/lib-common/modules/common v0.3.1-0.20240429052447-09a614506ca6/go.mod h1:lYhFzul37AR/6gAhTAA1KKWbOlzB3F/7014lejn883c= +github.com/openstack-k8s-operators/lib-common/modules/common v0.3.1-0.20240522141801-d6e03083e82a h1:kcASVA9sZg9DtggyJlN6JZE6pIenJgXivFK6ry7WUVM= +github.com/openstack-k8s-operators/lib-common/modules/common v0.3.1-0.20240522141801-d6e03083e82a/go.mod h1:lYhFzul37AR/6gAhTAA1KKWbOlzB3F/7014lejn883c= github.com/openstack-k8s-operators/lib-common/modules/storage v0.3.1-0.20240429052447-09a614506ca6 h1:NNSOEpTZCa9RL5sZiF4ZOlB+agBrL7q7FB9pC58d4S8= github.com/openstack-k8s-operators/lib-common/modules/storage v0.3.1-0.20240429052447-09a614506ca6/go.mod h1:C/qUWW4lW3687riZxYd+YRCtOyHZKURu3Imv6S9OP7U= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/tests/kuttl/tests/run_failed_playbook/01-assert.yaml b/tests/kuttl/tests/run_failed_playbook/01-assert.yaml index 80894b87..7200de2e 100644 --- a/tests/kuttl/tests/run_failed_playbook/01-assert.yaml +++ b/tests/kuttl/tests/run_failed_playbook/01-assert.yaml @@ -23,17 +23,18 @@ spec: preserveJobs: true status: JobStatus: Failed + failed: 4 conditions: - - message: 'Job error occured Internal error occurred: Job Failed. + - message: 'Job error occured Internal error occurred: Job has reached the specified backoff limit. Check job logs' - reason: Error - severity: Warning + reason: BackoffLimitExceeded + severity: Error status: "False" type: Ready - - message: 'Job error occured Internal error occurred: Job Failed. + - message: 'Job error occured Internal error occurred: Job has reached the specified backoff limit. Check job logs' - reason: Error - severity: Warning + reason: BackoffLimitExceeded + severity: Error status: "False" type: JobReady ---