From 9bbeafdbbcbf3cae714829767306984746894e5a Mon Sep 17 00:00:00 2001 From: Fabricio Aguiar Date: Thu, 9 May 2024 14:58:42 +0100 Subject: [PATCH] Track retries and backoff limit Depends-On: https://github.com/openstack-k8s-operators/openstack-ansibleee-operator/pull/356 Signed-off-by: Fabricio Aguiar --- ...enstack.org_openstackdataplanedeployments.yaml | 4 ++++ api/v1beta1/openstackdataplanedeployment_types.go | 5 +++++ api/v1beta1/zz_generated.deepcopy.go | 5 +++++ ...enstack.org_openstackdataplanedeployments.yaml | 4 ++++ .../dataplane-operator.clusterserviceversion.yaml | 12 +++++++----- docs/assemblies/custom_resources.adoc | 5 +++++ pkg/deployment/deployment.go | 15 ++++++++++++--- pkg/util/ansible_execution.go | 1 + 8 files changed, 43 insertions(+), 8 deletions(-) diff --git a/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml b/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml index b06f69dd8..a79eeb071 100644 --- a/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml +++ b/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml @@ -52,6 +52,10 @@ spec: type: string ansibleTags: type: string + backoffLimit: + default: 6 + format: int32 + type: integer deploymentRequeueTime: default: 15 minimum: 1 diff --git a/api/v1beta1/openstackdataplanedeployment_types.go b/api/v1beta1/openstackdataplanedeployment_types.go index b1e149661..f280b519f 100644 --- a/api/v1beta1/openstackdataplanedeployment_types.go +++ b/api/v1beta1/openstackdataplanedeployment_types.go @@ -29,6 +29,11 @@ type OpenStackDataPlaneDeploymentSpec struct { // NodeSets is the list of NodeSets deployed NodeSets []string `json:"nodeSets"` + // BackoffLimit allows to define the maximum number of retried executions (defaults to 6). + // +kubebuilder:default:=6 + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors={"urn:alm:descriptor:com.tectonic.ui:number"} + BackoffLimit *int32 `json:"backoffLimit,omitempty"` + // AnsibleTags for ansible execution // +kubebuilder:validation:Optional AnsibleTags string `json:"ansibleTags,omitempty"` diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 882eb9e20..236e3e56e 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -294,6 +294,11 @@ func (in *OpenStackDataPlaneDeploymentSpec) DeepCopyInto(out *OpenStackDataPlane *out = make([]string, len(*in)) copy(*out, *in) } + if in.BackoffLimit != nil { + in, out := &in.BackoffLimit, &out.BackoffLimit + *out = new(int32) + **out = **in + } if in.AnsibleExtraVars != nil { in, out := &in.AnsibleExtraVars, &out.AnsibleExtraVars *out = make(map[string]json.RawMessage, len(*in)) diff --git a/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml b/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml index b06f69dd8..a79eeb071 100644 --- a/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml +++ b/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml @@ -52,6 +52,10 @@ spec: type: string ansibleTags: type: string + backoffLimit: + default: 6 + format: int32 + type: integer deploymentRequeueTime: default: 15 minimum: 1 diff --git a/config/manifests/bases/dataplane-operator.clusterserviceversion.yaml b/config/manifests/bases/dataplane-operator.clusterserviceversion.yaml index 752014707..e27ea708f 100644 --- a/config/manifests/bases/dataplane-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/dataplane-operator.clusterserviceversion.yaml @@ -19,6 +19,13 @@ spec: displayName: OpenStack Data Plane Deployments kind: OpenStackDataPlaneDeployment name: openstackdataplanedeployments.dataplane.openstack.org + specDescriptors: + - description: BackoffLimit allows to define the maximum number of retried executions + (defaults to 6). + displayName: Backoff Limit + path: backoffLimit + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number statusDescriptors: - description: Conditions displayName: Conditions @@ -72,11 +79,6 @@ spec: path: conditions x-descriptors: - urn:alm:descriptor:io.kubernetes.conditions - - description: Deployed - displayName: Deployed - path: deployed - x-descriptors: - - urn:alm:descriptor:com.tectonic.ui:booleanSwitch version: v1beta1 - description: OpenStackDataPlaneService is the Schema for the openstackdataplaneservices API OpenStackDataPlaneService name must be a valid RFC1123 as it is used in diff --git a/docs/assemblies/custom_resources.adoc b/docs/assemblies/custom_resources.adoc index 401999426..e4cf74cd9 100644 --- a/docs/assemblies/custom_resources.adoc +++ b/docs/assemblies/custom_resources.adoc @@ -653,6 +653,11 @@ OpenStackDataPlaneDeploymentSpec defines the desired state of OpenStackDataPlane | []string | true +| backoffLimit +| BackoffLimit allows to define the maximum number of retried executions (defaults to 6). +| *int32 +| false + | ansibleTags | AnsibleTags for ansible execution | string diff --git a/pkg/deployment/deployment.go b/pkg/deployment/deployment.go index 623822bfa..f782f950b 100644 --- a/pkg/deployment/deployment.go +++ b/pkg/deployment/deployment.go @@ -199,12 +199,21 @@ func (d *Deployer) ConditionalDeploy( } if ansibleEE.Status.JobStatus == ansibleeev1.JobStatusFailed { + severity := condition.SeverityWarning + errorReason := condition.ErrorReason + errorMsg := fmt.Sprintf("attempt #%d - execution.name %s execution.namespace %s execution.status.jobstatus: %s", ansibleEE.Status.failed, ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.JobStatus) + ansibleCondition := ansibleEE.Status.Conditions.Get(condition.JobReadyCondition) + if ansibleCondition.Reason == condition.JobReasonBackoffLimitExceeded { + severity = condition.SeverityError + errorReason = condition.JobReasonBackoffLimitExceeded + errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.status.jobstatus: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.JobStatus) + } log.Info(fmt.Sprintf("Condition %s error", readyCondition)) - err = fmt.Errorf("execution.name %s Execution.namespace %s Execution.status.jobstatus: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.JobStatus) + err = fmt.Errorf(errorMsg) nsConditions.Set(condition.FalseCondition( readyCondition, - condition.ErrorReason, - condition.SeverityError, + errorReason, + severity, readyErrorMessage, err.Error())) } diff --git a/pkg/util/ansible_execution.go b/pkg/util/ansible_execution.go index 4fd8902df..6672d47bb 100644 --- a/pkg/util/ansible_execution.go +++ b/pkg/util/ansible_execution.go @@ -107,6 +107,7 @@ func AnsibleExecution( if len(service.Spec.Playbook) > 0 { ansibleEE.Spec.Playbook = service.Spec.Playbook } + ansibleEE.Spec.BackoffLimit = deployment.Spec.BackoffLimit // If we have a service that ought to be deployed everywhere // substitute the existing play target with 'all'