From 1dfa222adf7207fba2252b477fcb918d59e374e3 Mon Sep 17 00:00:00 2001 From: Fabricio Aguiar Date: Tue, 7 May 2024 14:06:33 +0100 Subject: [PATCH] Report when backoff limit is exceeded Depends-On: https://github.com/openstack-k8s-operators/lib-common/pull/504 Signed-off-by: Fabricio Aguiar --- ...ansibleee.openstack.org_openstackansibleees.yaml | 3 +++ api/v1beta1/openstack_ansibleee_types.go | 4 ++++ ...ansibleee.openstack.org_openstackansibleees.yaml | 3 +++ controllers/openstack_ansibleee_controller.go | 11 +++++++++-- docs/assemblies/openstack_ansibleee.adoc | 5 +++++ .../kuttl/tests/run_failed_playbook/01-assert.yaml | 13 +++++++------ 6 files changed, 31 insertions(+), 8 deletions(-) diff --git a/api/bases/ansibleee.openstack.org_openstackansibleees.yaml b/api/bases/ansibleee.openstack.org_openstackansibleees.yaml index 48c69ed2..0c4ac572 100644 --- a/api/bases/ansibleee.openstack.org_openstackansibleees.yaml +++ b/api/bases/ansibleee.openstack.org_openstackansibleees.yaml @@ -1575,6 +1575,9 @@ spec: - type type: object type: array + failed: + format: int64 + type: integer hash: additionalProperties: type: string diff --git a/api/v1beta1/openstack_ansibleee_types.go b/api/v1beta1/openstack_ansibleee_types.go index f39bd2fd..80f7f6e9 100644 --- a/api/v1beta1/openstack_ansibleee_types.go +++ b/api/v1beta1/openstack_ansibleee_types.go @@ -135,6 +135,10 @@ type OpenStackAnsibleEEStatus struct { // JobStatus status of the executed job (Pending/Running/Succeeded/Failed) JobStatus string `json:"JobStatus,omitempty" optional:"true"` + // The number of pods which reached phase Failed. + // +optional + Failed int32 `json:"failed,omitempty"` + // ObservedGeneration - the most recent generation observed for this // service. If the observed generation is less than the spec generation, // then the controller has not processed the latest changes injected by diff --git a/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml b/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml index 48c69ed2..0c4ac572 100644 --- a/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml +++ b/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml @@ -1575,6 +1575,9 @@ spec: - type type: object type: array + failed: + format: int64 + type: integer hash: additionalProperties: type: string diff --git a/controllers/openstack_ansibleee_controller.go b/controllers/openstack_ansibleee_controller.go index 0fa7b2c7..a11c3471 100644 --- a/controllers/openstack_ansibleee_controller.go +++ b/controllers/openstack_ansibleee_controller.go @@ -217,13 +217,20 @@ func (r *OpenStackAnsibleEEReconciler) Reconcile(ctx context.Context, req ctrl.R } if err != nil { + errorReason := condition.ErrorReason + severity := condition.SeverityWarning + if ansibleeeJob.HasReachedLimit() { + errorReason = condition.JobReasonBackoffLimitExceeded + severity = condition.SeverityError + } instance.Status.Conditions.Set(condition.FalseCondition( condition.JobReadyCondition, - condition.ErrorReason, - condition.SeverityWarning, + errorReason, + severity, condition.JobReadyErrorMessage, err.Error())) instance.Status.JobStatus = ansibleeev1.JobStatusFailed + instance.Status.Failed = ansibleeeJob.GetTotalFailedAttempts() return ctrl.Result{}, err } diff --git a/docs/assemblies/openstack_ansibleee.adoc b/docs/assemblies/openstack_ansibleee.adoc index 4c4aaa39..2ac12607 100644 --- a/docs/assemblies/openstack_ansibleee.adoc +++ b/docs/assemblies/openstack_ansibleee.adoc @@ -213,6 +213,11 @@ OpenStackAnsibleEEStatus defines the observed state of OpenStackAnsibleEE | string | false +| failed +| The number of pods which reached phase Failed. +| int64 +| false + | observedGeneration | ObservedGeneration - the most recent generation observed for this service. If the observed generation is less than the spec generation, then the controller has not processed the latest changes injected by the opentack-operator in the top-level CR (e.g. the ContainerImage) | int64 diff --git a/tests/kuttl/tests/run_failed_playbook/01-assert.yaml b/tests/kuttl/tests/run_failed_playbook/01-assert.yaml index 80894b87..7200de2e 100644 --- a/tests/kuttl/tests/run_failed_playbook/01-assert.yaml +++ b/tests/kuttl/tests/run_failed_playbook/01-assert.yaml @@ -23,17 +23,18 @@ spec: preserveJobs: true status: JobStatus: Failed + failed: 4 conditions: - - message: 'Job error occured Internal error occurred: Job Failed. + - message: 'Job error occured Internal error occurred: Job has reached the specified backoff limit. Check job logs' - reason: Error - severity: Warning + reason: BackoffLimitExceeded + severity: Error status: "False" type: Ready - - message: 'Job error occured Internal error occurred: Job Failed. + - message: 'Job error occured Internal error occurred: Job has reached the specified backoff limit. Check job logs' - reason: Error - severity: Warning + reason: BackoffLimitExceeded + severity: Error status: "False" type: JobReady ---