From 3fce341e341fd1378eeeab262702ada140154c23 Mon Sep 17 00:00:00 2001 From: Fabricio Aguiar Date: Thu, 23 May 2024 19:37:02 +0100 Subject: [PATCH] Report when backoff limit is exceeded Depends-On: https://github.com/openstack-k8s-operators/lib-common/pull/504 Signed-off-by: Fabricio Aguiar --- .../ansibleee.openstack.org_openstackansibleees.yaml | 3 +++ api/v1beta1/openstack_ansibleee_types.go | 4 ++++ .../ansibleee.openstack.org_openstackansibleees.yaml | 3 +++ controllers/openstack_ansibleee_controller.go | 12 ++++++++++-- docs/assemblies/openstack_ansibleee.adoc | 5 +++++ tests/kuttl/tests/run_failed_playbook/01-assert.yaml | 9 +++++---- 6 files changed, 30 insertions(+), 6 deletions(-) diff --git a/api/bases/ansibleee.openstack.org_openstackansibleees.yaml b/api/bases/ansibleee.openstack.org_openstackansibleees.yaml index 48c69ed2..52d931e4 100644 --- a/api/bases/ansibleee.openstack.org_openstackansibleees.yaml +++ b/api/bases/ansibleee.openstack.org_openstackansibleees.yaml @@ -1575,6 +1575,9 @@ spec: - type type: object type: array + failed: + format: int32 + type: integer hash: additionalProperties: type: string diff --git a/api/v1beta1/openstack_ansibleee_types.go b/api/v1beta1/openstack_ansibleee_types.go index f39bd2fd..80f7f6e9 100644 --- a/api/v1beta1/openstack_ansibleee_types.go +++ b/api/v1beta1/openstack_ansibleee_types.go @@ -135,6 +135,10 @@ type OpenStackAnsibleEEStatus struct { // JobStatus status of the executed job (Pending/Running/Succeeded/Failed) JobStatus string `json:"JobStatus,omitempty" optional:"true"` + // The number of pods which reached phase Failed. + // +optional + Failed int32 `json:"failed,omitempty"` + // ObservedGeneration - the most recent generation observed for this // service. If the observed generation is less than the spec generation, // then the controller has not processed the latest changes injected by diff --git a/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml b/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml index 48c69ed2..52d931e4 100644 --- a/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml +++ b/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml @@ -1575,6 +1575,9 @@ spec: - type type: object type: array + failed: + format: int32 + type: integer hash: additionalProperties: type: string diff --git a/controllers/openstack_ansibleee_controller.go b/controllers/openstack_ansibleee_controller.go index 0fa7b2c7..6ea648b8 100644 --- a/controllers/openstack_ansibleee_controller.go +++ b/controllers/openstack_ansibleee_controller.go @@ -217,13 +217,21 @@ func (r *OpenStackAnsibleEEReconciler) Reconcile(ctx context.Context, req ctrl.R } if err != nil { + var errorReason condition.Reason + errorReason = condition.ErrorReason + severity := condition.SeverityWarning + if ansibleeeJob.HasReachedLimit() { + errorReason = condition.JobReasonBackoffLimitExceeded + severity = condition.SeverityError + } instance.Status.Conditions.Set(condition.FalseCondition( condition.JobReadyCondition, - condition.ErrorReason, - condition.SeverityWarning, + errorReason, + severity, condition.JobReadyErrorMessage, err.Error())) instance.Status.JobStatus = ansibleeev1.JobStatusFailed + instance.Status.Failed = ansibleeeJob.GetTotalFailedAttempts() return ctrl.Result{}, err } diff --git a/docs/assemblies/openstack_ansibleee.adoc b/docs/assemblies/openstack_ansibleee.adoc index 4c4aaa39..5bdb3996 100644 --- a/docs/assemblies/openstack_ansibleee.adoc +++ b/docs/assemblies/openstack_ansibleee.adoc @@ -213,6 +213,11 @@ OpenStackAnsibleEEStatus defines the observed state of OpenStackAnsibleEE | string | false +| failed +| The number of pods which reached phase Failed. +| int32 +| false + | observedGeneration | ObservedGeneration - the most recent generation observed for this service. If the observed generation is less than the spec generation, then the controller has not processed the latest changes injected by the opentack-operator in the top-level CR (e.g. the ContainerImage) | int64 diff --git a/tests/kuttl/tests/run_failed_playbook/01-assert.yaml b/tests/kuttl/tests/run_failed_playbook/01-assert.yaml index aaa34029..0fe93fd4 100644 --- a/tests/kuttl/tests/run_failed_playbook/01-assert.yaml +++ b/tests/kuttl/tests/run_failed_playbook/01-assert.yaml @@ -23,17 +23,18 @@ spec: preserveJobs: true status: JobStatus: Failed + failed: 4 conditions: - message: 'Job error occurred Internal error occurred: Job has reached the specified backoff limit. Check job logs' - reason: Error - severity: Warning + reason: BackoffLimitExceeded + severity: Error status: "False" type: Ready - message: 'Job error occurred Internal error occurred: Job has reached the specified backoff limit. Check job logs' - reason: Error - severity: Warning + reason: BackoffLimitExceeded + severity: Error status: "False" type: JobReady ---