Skip to content

Commit

Permalink
Add kuttl test for failed service
Browse files Browse the repository at this point in the history
Signed-off-by: Fabricio Aguiar <[email protected]>
  • Loading branch information
fao89 committed May 31, 2024
1 parent 2de1ba7 commit 428dfbb
Show file tree
Hide file tree
Showing 19 changed files with 437 additions and 170 deletions.
6 changes: 3 additions & 3 deletions api/v1beta1/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ const (
NodeSetDeploymentErrorMessage = "Deployment error occurred %s for NodeSet"

// NodeSetServiceDeploymentReadyMessage ready
NodeSetServiceDeploymentReadyMessage = "%s Deployment ready"
NodeSetServiceDeploymentReadyMessage = "Deployment ready for %s service"

// NodeSetServiceDeploymentReadyWaitingMessage not yet ready
NodeSetServiceDeploymentReadyWaitingMessage = "%s Deployment not yet ready"
NodeSetServiceDeploymentReadyWaitingMessage = "Deployment not yet ready for %s service"

// NodeSetServiceDeploymentErrorMessage error
NodeSetServiceDeploymentErrorMessage = "%s Deployment error occurred"
NodeSetServiceDeploymentErrorMessage = "Deployment error occurred in %s service"
)
51 changes: 26 additions & 25 deletions controllers/openstackdataplanedeployment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,14 +214,12 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context,
condition.SeverityError,
dataplanev1.ServiceErrorMessage,
err.Error())
if len(instance.Spec.ServicesOverride) == 0 {
nsConditions.MarkFalse(
dataplanev1.NodeSetDeploymentReadyCondition,
condition.ErrorReason,
condition.SeverityError,
dataplanev1.ServiceErrorMessage,
err.Error())
}
nsConditions.MarkFalse(
dataplanev1.NodeSetDeploymentReadyCondition,
condition.ErrorReason,
condition.SeverityError,
dataplanev1.ServiceErrorMessage,
err.Error())
return ctrl.Result{}, err
}
if service.Spec.TLSCert != nil {
Expand All @@ -234,14 +232,12 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context,
condition.SeverityError,
condition.TLSInputErrorMessage,
err.Error())
if len(instance.Spec.ServicesOverride) == 0 {
nsConditions.MarkFalse(
dataplanev1.NodeSetDeploymentReadyCondition,
condition.ErrorReason,
condition.SeverityError,
condition.TLSInputErrorMessage,
err.Error())
}
nsConditions.MarkFalse(
dataplanev1.NodeSetDeploymentReadyCondition,
condition.ErrorReason,
condition.SeverityError,
condition.TLSInputErrorMessage,
err.Error())
return ctrl.Result{}, err
} else if (*result != ctrl.Result{}) {
return *result, nil // requeue here
Expand All @@ -253,10 +249,11 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context,

// All nodeSets successfully fetched.
// Mark InputReadyCondition=True
instance.Status.Conditions.MarkTrue(condition.InputReadyCondition, condition.ReadyMessage)
instance.Status.Conditions.MarkTrue(condition.InputReadyCondition, condition.InputReadyMessage)
shouldRequeue := false
haveError := false
deploymentErrMsg := ""
backoffLimitReached := false

globalInventorySecrets := map[string]string{}
globalSSHKeySecrets := map[string]string{}
Expand Down Expand Up @@ -347,12 +344,9 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context,
} else {
deploymentErrMsg = fmt.Sprintf("%s & %s", deploymentErrMsg, errMsg)
}
nsConditions.MarkFalse(
dataplanev1.NodeSetDeploymentReadyCondition,
condition.ErrorReason,
condition.SeverityError,
condition.DeploymentReadyErrorMessage,
err.Error())
nsConditions.Set(nsConditions.Mirror(dataplanev1.NodeSetDeploymentReadyCondition))
errorReason := nsConditions.Get(dataplanev1.NodeSetDeploymentReadyCondition).Reason
backoffLimitReached = errorReason == condition.JobReasonBackoffLimitExceeded
}

if deployResult != nil {
Expand All @@ -367,10 +361,17 @@ func (r *OpenStackDataPlaneDeploymentReconciler) Reconcile(ctx context.Context,
}

if haveError {
var reason condition.Reason
reason = condition.ErrorReason
severity := condition.SeverityWarning
if backoffLimitReached {
reason = condition.JobReasonBackoffLimitExceeded
severity = condition.SeverityError
}
instance.Status.Conditions.MarkFalse(
condition.DeploymentReadyCondition,
condition.ErrorReason,
condition.SeverityError,
reason,
severity,
condition.DeploymentReadyErrorMessage,
deploymentErrMsg)
return ctrl.Result{}, fmt.Errorf(deploymentErrMsg)
Expand Down
16 changes: 8 additions & 8 deletions docs/assemblies/interacting_with_ansible.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ retrieved from the API.
Sample output when the default list of services:

NAME NETWORKATTACHMENTS STATUS MESSAGE
configure-network-edpm-compute True AnsibleExecutionJob complete
configure-os-edpm-compute True AnsibleExecutionJob complete
install-os-edpm-compute True AnsibleExecutionJob complete
libvirt-edpm-compute True AnsibleExecutionJob complete
nova-edpm-compute True AnsibleExecutionJob complete
run-os-edpm-compute True AnsibleExecutionJob complete
telemetry-edpm-compute True AnsibleExecutionJob complete
validate-network-edpm-compute True AnsibleExecutionJob complete
configure-network-edpm-compute True Job completed
configure-os-edpm-compute True Job completed
install-os-edpm-compute True Job completed
libvirt-edpm-compute True Job completed
nova-edpm-compute True Job completed
run-os-edpm-compute True Job completed
telemetry-edpm-compute True Job completed
validate-network-edpm-compute True Job completed

Querying for pods with the OpenStackAnsibleEE label

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ The following example output shows all services and their job condition for all
$ oc get openstackansibleee
NAME NETWORKATTACHMENTS STATUS MESSAGE
bootstrap-openstack-edpm ["ctlplane"] True AnsibleExecutionJob complete
download-cache-openstack-edpm ["ctlplane"] False AnsibleExecutionJob is running
repo-setup-openstack-edpm ["ctlplane"] True AnsibleExecutionJob complete
validate-network-another-osdpd ["ctlplane"] False AnsibleExecutionJob is running
bootstrap-openstack-edpm ["ctlplane"] True Job completed
download-cache-openstack-edpm ["ctlplane"] False Job in progress
repo-setup-openstack-edpm ["ctlplane"] True Job completed
validate-network-another-osdpd ["ctlplane"] False Job in progress
----

. Filter for the name and service for a specific deployment:
Expand All @@ -54,20 +54,19 @@ The following example filters the list to only show services and their job condi
$ oc get openstackansibleee -l openstackdataplanedeployment=openstack-edpm-ipam1
NAME NETWORKATTACHMENTS STATUS MESSAGE
bootstrap-openstack-edpm ["ctlplane"] True AnsibleExecutionJob complete
download-cache-openstack-edpm ["ctlplane"] False AnsibleExecutionJob is running
repo-setup-openstack-edpm ["ctlplane"] True AnsibleExecutionJob complete
bootstrap-openstack-edpm ["ctlplane"] True Job completed
download-cache-openstack-edpm ["ctlplane"] False Job in progress
repo-setup-openstack-edpm ["ctlplane"] True Job completed
----

.Job Condition Messages

AnsibleEE jobs have an associated condition message that indicates the current state of the service job. This condition message is displayed in the `MESSAGE` field of the `oc get openstackansibleee` command output. Jobs return one of the following conditions when queried:

* `AnsibleExecutionJob not started`: The job has not started.
* `AnsibleExecutionJob not found`: The job could not be found.
* `AnsibleExecutionJob is running`: The job is currently running.
* `AnsibleExecutionJob complete`: The job execution is complete.
* `AnsibleExecutionJob error occured <error_message>`: The job execution stopped unexpectedly. The `<error_message>` is replaced with a specific error message.
* `Job not started`: The job has not started.
* `Job in progress`: The job is currently running.
* `Job completed`: The job execution is complete.
* `Job error occured <error_message>`: The job execution stopped unexpectedly. The `<error_message>` is replaced with a specific error message.

To further investigate a service displaying a particular job condition message, use the command `oc logs job/<service>` to display the logs associated with that service. For example, to display the logs for the `repo-setup-openstack-edpm` service, use the command `oc logs job/repo-setup-openstack-edpm`.

Expand Down
37 changes: 30 additions & 7 deletions pkg/deployment/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,26 @@ func (d *Deployer) Deploy(services []string) (*ctrl.Result, error) {
copy(aeeSpecMounts, d.AeeSpec.ExtraMounts)
// Deploy the composable services
for _, service := range services {
deployName = service
readyCondition = condition.Type(fmt.Sprintf("Service%sDeploymentReady", strcase.ToCamel(service)))
readyWaitingMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentReadyWaitingMessage, deployName)
readyMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentReadyMessage, deployName)
readyErrorMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentErrorMessage, deployName) + " error %s"

nsConditions := d.Status.NodeSetConditions[d.NodeSet.Name]
log.Info("Deploying service", "service", service)
foundService, err := GetService(d.Ctx, d.Helper, service)
if err != nil {
nsConditions.Set(condition.FalseCondition(
readyCondition,
condition.ErrorReason,
condition.SeverityError,
readyErrorMessage,
err.Error()))
d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions
return &ctrl.Result{}, err
}
deployName = foundService.Name
readyCondition = condition.Type(fmt.Sprintf("Service%sDeploymentReady", strcase.ToCamel(service)))
readyWaitingMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentReadyWaitingMessage, deployName)
readyMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentReadyMessage, deployName)
readyErrorMessage = fmt.Sprintf(dataplanev1.NodeSetServiceDeploymentErrorMessage, deployName)

containerImages := dataplaneutil.GetContainerImages(d.Version)
if containerImages.AnsibleeeImage != nil {
d.AeeSpec.OpenStackAnsibleEERunnerImage = *containerImages.AnsibleeeImage
Expand All @@ -94,6 +104,13 @@ func (d *Deployer) Deploy(services []string) (*ctrl.Result, error) {
copy(d.AeeSpec.ExtraMounts, aeeSpecMounts)
d.AeeSpec, err = d.addServiceExtraMounts(foundService)
if err != nil {
nsConditions.Set(condition.FalseCondition(
readyCondition,
condition.ErrorReason,
condition.SeverityError,
readyErrorMessage,
err.Error()))
d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions
return &ctrl.Result{}, err
}

Expand All @@ -103,6 +120,13 @@ func (d *Deployer) Deploy(services []string) (*ctrl.Result, error) {
d.AeeSpec, err = d.addCertMounts(services)
}
if err != nil {
nsConditions.Set(condition.FalseCondition(
readyCondition,
condition.ErrorReason,
condition.SeverityError,
readyErrorMessage,
err.Error()))
d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions
return &ctrl.Result{}, err
}
}
Expand All @@ -116,7 +140,6 @@ func (d *Deployer) Deploy(services []string) (*ctrl.Result, error) {
foundService,
)

nsConditions := d.Status.NodeSetConditions[d.NodeSet.Name]
if err != nil || !nsConditions.IsTrue(readyCondition) {
log.Info(fmt.Sprintf("Condition %s not ready", readyCondition))
return &ctrl.Result{}, err
Expand Down Expand Up @@ -206,7 +229,7 @@ func (d *Deployer) ConditionalDeploy(

if ansibleEE.Status.JobStatus == ansibleeev1.JobStatusFailed {
errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s execution.status.jobstatus: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.JobStatus)
ansibleCondition := ansibleEE.Status.Conditions.Get(condition.JobReadyCondition)
ansibleCondition := ansibleEE.Status.Conditions.Get(condition.ReadyCondition)
if ansibleCondition.Reason == condition.JobReasonBackoffLimitExceeded {
errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.status.jobstatus: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.JobStatus)
}
Expand Down
Loading

0 comments on commit 428dfbb

Please sign in to comment.