Skip to content

Commit

Permalink
ctrl: nrop: pack the common return args in a struct
Browse files Browse the repository at this point in the history
the reconciliation steps are returning a common
(and growing) set of values, let's pack them
in a struct, since we always want to return
the same tuple anyway for consistency.

Signed-off-by: Francesco Romani <[email protected]>
  • Loading branch information
ffromani committed Nov 19, 2024
1 parent 188c089 commit 9912e06
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 43 deletions.
4 changes: 2 additions & 2 deletions config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
images:
- name: controller
newName: quay.io/openshift-kni/numaresources-operator
newTag: 4.18.999-snapshot
newName: quay.io/fromani/numaresources-operator
newTag: 4.18.1001
72 changes: 62 additions & 10 deletions controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,70 @@ package controllers

import (
"errors"
"time"

ctrl "sigs.k8s.io/controller-runtime"

"github.com/openshift-kni/numaresources-operator/pkg/status"
)

type reconcileStep struct {
Result ctrl.Result
ConditionInfo conditionInfo
Error error
}

// Done returns true if a reconciliation step is fully completed,
// meaning all the substeps have been performed successfully, false otherwise
func (rs reconcileStep) Done() bool {
return rs.ConditionInfo.Type == status.ConditionAvailable
}

// EarlyStop returns true if we should shortcut after a reconcile substep,
// false otherwise
func (rs reconcileStep) EarlyStop() bool {
return rs.ConditionInfo.Type != status.ConditionAvailable
}

// reconcileStepSuccess returns a reconcileStep which tells the caller
// the reconciliation attempt was completed successfully
func reconcileStepSuccess() reconcileStep {
return reconcileStep{
Result: ctrl.Result{},
ConditionInfo: availableConditionInfo(),
}
}

// reconcileStepOngoing returns a reconcileStep which tells the caller
// the reconciliation attempt needs to wait for external condition (e.g.
// MachineConfig updates and to retry to reconcile after the `after` value.
func reconcileStepOngoing(after time.Duration) reconcileStep {
return reconcileStep{
Result: ctrl.Result{RequeueAfter: after},
ConditionInfo: progressingConditionInfo(),
}
}

// reconcileStepFailed returns a reconcileStep which tells the caller
// the reconciliation attempt failed with the given error `err`, and
// that the NUMAResourcesOperator condition should be Degraded.
func reconcileStepFailed(err error) reconcileStep {
return reconcileStep{
Result: ctrl.Result{},
ConditionInfo: degradedConditionInfoFromError(err),
Error: err,
}
}

// conditionInfo holds all the data needed to build a Condition
type conditionInfo struct {
Type string // like metav1.Condition.Type
Reason string
Message string
}

// WithReason override the conditionInfo reason with the given value,
// and returns a new updated conditionInfo
func (ci conditionInfo) WithReason(reason string) conditionInfo {
ret := ci
if ret.Reason == "" {
Expand All @@ -36,6 +90,8 @@ func (ci conditionInfo) WithReason(reason string) conditionInfo {
return ret
}

// WithMessage override the conditionInfo message with the given value,
// and returns a new updated conditionInfo
func (ci conditionInfo) WithMessage(message string) conditionInfo {
ret := ci
if ret.Message == "" {
Expand All @@ -44,19 +100,25 @@ func (ci conditionInfo) WithMessage(message string) conditionInfo {
return ret
}

// availableConditionInfo returns a conditionInfo ready to build
// an Available condition
func availableConditionInfo() conditionInfo {
return conditionInfo{
Type: status.ConditionAvailable,
Reason: "AsExpected",
}
}

// progressingConditionInfo returns a conditionInfo ready to build
// an Progressing condition
func progressingConditionInfo() conditionInfo {
return conditionInfo{
Type: status.ConditionProgressing,
}
}

// degradedConditionInfo returns a conditionInfo ready to build
// an Degraded condition with the given error `err`.
func degradedConditionInfoFromError(err error) conditionInfo {
return conditionInfo{
Type: status.ConditionDegraded,
Expand All @@ -65,16 +127,6 @@ func degradedConditionInfoFromError(err error) conditionInfo {
}
}

func fillConditionInfoFromError(cond conditionInfo, err error) conditionInfo {
if cond.Message == "" {
cond.Message = messageFromError(err)
}
if cond.Reason == "" {
cond.Reason = reasonFromError(err)
}
return cond
}

func reasonFromError(err error) string {
if err == nil {
return "AsExpected"
Expand Down
63 changes: 33 additions & 30 deletions controllers/numaresourcesoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,14 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr

curStatus := instance.Status.DeepCopy()

result, err := r.reconcileResource(ctx, instance, trees)
step := r.reconcileResource(ctx, instance, trees)

if condition == status.ConditionAvailable && multiMCPsErr != nil {
_, _ = r.degradeStatus(ctx, instance, validation.NodeGroupsError, multiMCPsErr)
if step.Done() && multiMCPsErr != nil {
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, multiMCPsErr)
}

if !status.IsUpdatedNUMAResourcesOperator(curStatus, &instance.Status) {
return result, err
return step.Result, step.Error
}

updErr := r.Client.Status().Update(ctx, instance)
Expand All @@ -181,7 +181,7 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, fmt.Errorf("could not update status for object %s: %w", client.ObjectKeyFromObject(instance), updErr)
}

return result, err
return step.Result, step.Error
}

// updateStatusConditionsIfNeeded returns true if conditions were updated.
Expand Down Expand Up @@ -216,27 +216,27 @@ func (r *NUMAResourcesOperatorReconciler) degradeStatus(ctx context.Context, ins
return ctrl.Result{}, nil
}

func (r *NUMAResourcesOperatorReconciler) reconcileResourceAPI(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) (bool, ctrl.Result, conditionInfo, error) {
func (r *NUMAResourcesOperatorReconciler) reconcileResourceAPI(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) reconcileStep {
applied, err := r.syncNodeResourceTopologyAPI(ctx)
if err != nil {
r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedCRDInstall", "Failed to install Node Resource Topology CRD: %v", err)
err = fmt.Errorf("FailedAPISync: %w", err)
return true, ctrl.Result{}, degradedConditionInfoFromError(err), err
return reconcileStepFailed(err)
}
if applied {
r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulCRDInstall", "Node Resource Topology CRD installed")
}
return false, ctrl.Result{}, availableConditionInfo(), nil
return reconcileStepSuccess()
}

func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) (bool, ctrl.Result, conditionInfo, error) {
func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) reconcileStep {
// we need to sync machine configs first and wait for the MachineConfigPool updates
// before checking additional components for updates
mcpUpdatedFunc, err := r.syncMachineConfigs(ctx, instance, trees)
if err != nil {
r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedMCSync", "Failed to set up machine configuration for worker nodes: %v", err)
err = fmt.Errorf("failed to sync machine configs: %w", err)
return true, ctrl.Result{}, degradedConditionInfoFromError(err), err
return reconcileStepFailed(err)
}
r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulMCSync", "Enabled machine configuration for worker nodes")

Expand All @@ -247,22 +247,22 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx con

if !allMCPsUpdated {
// the Machine Config Pool still did not apply the machine config, wait for one minute
return true, ctrl.Result{RequeueAfter: numaResourcesRetryPeriod}, progressingConditionInfo(), nil
return reconcileStepOngoing(numaResourcesRetryPeriod)
}
instance.Status.MachineConfigPools = syncMachineConfigPoolNodeGroupConfigStatuses(instance.Status.MachineConfigPools, trees)

return false, ctrl.Result{}, availableConditionInfo(), nil
return reconcileStepSuccess()
}

func (r *NUMAResourcesOperatorReconciler) reconcileResourceDaemonSet(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) ([]poolDaemonSet, bool, ctrl.Result, conditionInfo, error) {
func (r *NUMAResourcesOperatorReconciler) reconcileResourceDaemonSet(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) ([]poolDaemonSet, reconcileStep) {
daemonSetsInfoPerMCP, err := r.syncNUMAResourcesOperatorResources(ctx, instance, trees)
if err != nil {
r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedRTECreate", "Failed to create Resource-Topology-Exporter DaemonSets: %v", err)
err = fmt.Errorf("FailedRTESync: %w", err)
return nil, true, ctrl.Result{}, degradedConditionInfoFromError(err), err
return nil, reconcileStepFailed(err)
}
if len(daemonSetsInfoPerMCP) == 0 {
return nil, false, ctrl.Result{}, availableConditionInfo(), nil
return nil, reconcileStepSuccess()
}

r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulRTECreate", "Created Resource-Topology-Exporter DaemonSets")
Expand All @@ -271,40 +271,43 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceDaemonSet(ctx context
instance.Status.DaemonSets = dssWithReadyStatus
instance.Status.RelatedObjects = relatedobjects.ResourceTopologyExporter(r.Namespace, dssWithReadyStatus)
if err != nil {
return nil, true, ctrl.Result{}, degradedConditionInfoFromError(err), err
return nil, reconcileStepFailed(err)
}
if !allDSsUpdated {
return nil, true, ctrl.Result{RequeueAfter: 5 * time.Second}, progressingConditionInfo(), nil
return nil, reconcileStepOngoing(5 * time.Second)
}

return daemonSetsInfoPerMCP, false, ctrl.Result{}, availableConditionInfo(), nil
return daemonSetsInfoPerMCP, reconcileStepSuccess()
}

func (r *NUMAResourcesOperatorReconciler) reconcileResource(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) (ctrl.Result, error) {
if done, res, cond, err := r.reconcileResourceAPI(ctx, instance, trees); done {
updateStatusConditionsIfNeeded(instance, fillConditionInfoFromError(cond, err))
return res, err
func (r *NUMAResourcesOperatorReconciler) reconcileResource(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) reconcileStep {
if step := r.reconcileResourceAPI(ctx, instance, trees); step.EarlyStop() {
updateStatusConditionsIfNeeded(instance, step.ConditionInfo)
return step
}

if r.Platform == platform.OpenShift {
if done, res, cond, err := r.reconcileResourceMachineConfig(ctx, instance, trees); done {
updateStatusConditionsIfNeeded(instance, fillConditionInfoFromError(cond, err))
return res, err
if step := r.reconcileResourceMachineConfig(ctx, instance, trees); step.EarlyStop() {
updateStatusConditionsIfNeeded(instance, step.ConditionInfo)
return step
}
}

dsPerMCP, done, res, cond, err := r.reconcileResourceDaemonSet(ctx, instance, trees)
if done {
updateStatusConditionsIfNeeded(instance, fillConditionInfoFromError(cond, err))
return res, err
dsPerMCP, step := r.reconcileResourceDaemonSet(ctx, instance, trees)
if step.EarlyStop() {
updateStatusConditionsIfNeeded(instance, step.ConditionInfo)
return step
}

// all fields of NodeGroupStatus are required so publish the status only when all daemonset and MCPs are updated which
// is a certain thing if we got to this point otherwise the function would have returned already
instance.Status.NodeGroups = syncNodeGroupsStatus(instance, dsPerMCP)

updateStatusConditionsIfNeeded(instance, availableConditionInfo())
return ctrl.Result{}, nil
return reconcileStep{
Result: ctrl.Result{},
ConditionInfo: availableConditionInfo(),
}
}

func (r *NUMAResourcesOperatorReconciler) syncDaemonSetsStatuses(ctx context.Context, rd client.Reader, daemonSetsInfo []poolDaemonSet) ([]nropv1.NamespacedName, bool, error) {
Expand Down
3 changes: 2 additions & 1 deletion controllers/numaresourcesoperator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
})

Context("with node group with MCP selector that matches more than one MCP", func() {
It("should update the CR condition to degraded when annotation is not enabled but still creat all needed objects", func() {
It("should update the CR condition to degraded when annotation is not enabled but still create all needed objects", func() {
mcpName1 := "test1"
label1 := map[string]string{
"test1": "test1",
Expand Down Expand Up @@ -336,6 +336,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Expect(availableCondition.Status).To(Equal(metav1.ConditionTrue))
})
})

Context("with correct NRO and SELinuxPolicyConfigAnnotation not set", func() {
It("should create all objects, RTE daemonsets and MCPs will get updated from the first reconcile iteration", func() {
mcpName := "test1"
Expand Down
2 changes: 2 additions & 0 deletions doc/examples/nrop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ apiVersion: nodetopology.openshift.io/v1
kind: NUMAResourcesOperator
metadata:
name: numaresourcesoperator
annotations:
config.node.openshift-kni.io/selinux-policy: "custom"
spec:
nodeGroups:
- config:
Expand Down

0 comments on commit 9912e06

Please sign in to comment.