Skip to content

Commit

Permalink
Merge pull request #1130 from openshift-kni/streamline-metrics
Browse files Browse the repository at this point in the history
rte: metrics: merge manifests into existing manifests handling
  • Loading branch information
openshift-merge-bot[bot] authored Jan 2, 2025
2 parents 11961f9 + 1fc6921 commit 7d57ff8
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 161 deletions.
78 changes: 16 additions & 62 deletions controllers/numaresourcesoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ import (
"github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"
"github.com/k8stopologyawareschedwg/deployer/pkg/manifests"
apimanifests "github.com/k8stopologyawareschedwg/deployer/pkg/manifests/api"
rtemanifests "github.com/k8stopologyawareschedwg/deployer/pkg/manifests/rte"
k8swgrteupdate "github.com/k8stopologyawareschedwg/deployer/pkg/objectupdate/rte"
nropv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1"
nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1/helper/nodegroup"
Expand All @@ -61,11 +60,8 @@ import (
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
"github.com/openshift-kni/numaresources-operator/pkg/loglevel"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
apistate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/api"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/compare"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/merge"
rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte"
"github.com/openshift-kni/numaresources-operator/pkg/status"
Expand All @@ -86,16 +82,15 @@ type poolDaemonSet struct {
// NUMAResourcesOperatorReconciler reconciles a NUMAResourcesOperator object
type NUMAResourcesOperatorReconciler struct {
client.Client
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtemanifests.Manifests
RTEMetricsManifests rtemetricsmanifests.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtestate.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
}

// TODO: narrow down
Expand Down Expand Up @@ -528,30 +523,30 @@ func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx

// using a slice of poolDaemonSet instead of a map because Go maps assignment order is not consistent and non-deterministic
dsPoolPairs := []poolDaemonSet{}
err = rteupdate.DaemonSetUserImageSettings(r.RTEManifests.DaemonSet, instance.Spec.ExporterImage, r.Images.Preferred(), r.ImagePullPolicy)
err = rteupdate.DaemonSetUserImageSettings(r.RTEManifests.Core.DaemonSet, instance.Spec.ExporterImage, r.Images.Preferred(), r.ImagePullPolicy)
if err != nil {
return dsPoolPairs, err
}

err = rteupdate.DaemonSetPauseContainerSettings(r.RTEManifests.DaemonSet)
err = rteupdate.DaemonSetPauseContainerSettings(r.RTEManifests.Core.DaemonSet)
if err != nil {
return dsPoolPairs, err
}

err = loglevel.UpdatePodSpec(&r.RTEManifests.DaemonSet.Spec.Template.Spec, manifests.ContainerNameRTE, instance.Spec.LogLevel)
err = loglevel.UpdatePodSpec(&r.RTEManifests.Core.DaemonSet.Spec.Template.Spec, manifests.ContainerNameRTE, instance.Spec.LogLevel)
if err != nil {
return dsPoolPairs, err
}

// ConfigMap should be provided by the kubeletconfig reconciliation loop
if r.RTEManifests.ConfigMap != nil {
cmHash, err := hash.ComputeCurrentConfigMap(ctx, r.Client, r.RTEManifests.ConfigMap)
if r.RTEManifests.Core.ConfigMap != nil {
cmHash, err := hash.ComputeCurrentConfigMap(ctx, r.Client, r.RTEManifests.Core.ConfigMap)
if err != nil {
return dsPoolPairs, err
}
rteupdate.DaemonSetHashAnnotation(r.RTEManifests.DaemonSet, cmHash)
rteupdate.DaemonSetHashAnnotation(r.RTEManifests.Core.DaemonSet, cmHash)
}
rteupdate.SecurityContextConstraint(r.RTEManifests.SecurityContextConstraint, annotations.IsCustomPolicyEnabled(instance.Annotations))
rteupdate.SecurityContextConstraint(r.RTEManifests.Core.SecurityContextConstraint, annotations.IsCustomPolicyEnabled(instance.Annotations))

processor := func(poolName string, gdm *rtestate.GeneratedDesiredManifest) error {
err := daemonsetUpdater(poolName, gdm)
Expand Down Expand Up @@ -583,29 +578,6 @@ func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx
}
}

for _, obj := range r.RTEMetricsManifests.ToObjects() {
// Check if the object already exists
existingObj := obj.DeepCopyObject().(client.Object)
err := r.Client.Get(ctx, client.ObjectKeyFromObject(obj), existingObj)
if err != nil && !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to get %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
}
if apierrors.IsNotFound(err) {
err := controllerutil.SetControllerReference(instance, obj, r.Scheme)
if err != nil {
return nil, fmt.Errorf("failed to set controller reference to %s %s: %w", obj.GetNamespace(), obj.GetName(), err)
}
err = r.Client.Create(ctx, obj)
if err != nil {
return nil, fmt.Errorf("failed to create %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
}
} else {
if err := updateIfNeeded(ctx, existingObj, obj, r.Client); err != nil {
return nil, err
}
}
}

if len(dsPoolPairs) < len(trees) {
klog.Warningf("daemonset and tree size mismatch: expected %d got in daemonsets %d", len(trees), len(dsPoolPairs))
}
Expand Down Expand Up @@ -808,21 +780,3 @@ func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nr
return nil, fmt.Errorf("unsupported platform")
}
}

func updateIfNeeded(ctx context.Context, existingObj, desiredObj client.Object, cli client.Client) error {
merged, err := merge.MetadataForUpdate(existingObj, desiredObj)
if err != nil {
return fmt.Errorf("could not merge object %s with existing: %w", desiredObj.GetName(), err)
}
isEqual, err := compare.Object(existingObj, merged)
if err != nil {
return fmt.Errorf("could not compare object %s with existing: %w", desiredObj.GetName(), err)
}
if !isEqual {
err = cli.Update(ctx, desiredObj)
if err != nil {
return fmt.Errorf("failed to update %s/%s: %w", desiredObj.GetNamespace(), desiredObj.GetName(), err)
}
}
return nil
}
49 changes: 24 additions & 25 deletions controllers/numaresourcesoperator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,15 @@ func NewFakeNUMAResourcesOperatorReconciler(plat platform.Platform, platVersion
recorder := record.NewFakeRecorder(bufferSize)

return &NUMAResourcesOperatorReconciler{
Client: fakeClient,
Scheme: scheme.Scheme,
Platform: plat,
APIManifests: apiManifests,
RTEManifests: rteManifests,
RTEMetricsManifests: rtemetricsmanifests,
Namespace: testNamespace,
Client: fakeClient,
Scheme: scheme.Scheme,
Platform: plat,
APIManifests: apiManifests,
RTEManifests: rte.Manifests{
Core: rteManifests,
Metrics: rtemetricsmanifests,
},
Namespace: testNamespace,
Images: images.Data{
Builtin: testImageSpec,
},
Expand Down Expand Up @@ -361,7 +363,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
BeforeEach(func() {
By("Create a new Daemonset with correct name but not owner reference")

ds := reconciler.RTEManifests.DaemonSet.DeepCopy()
ds := reconciler.RTEManifests.Core.DaemonSet.DeepCopy()
ds.Name = objectnames.GetComponentName(nro.Name, pn2)
ds.Namespace = testNamespace

Expand Down Expand Up @@ -902,7 +904,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
ds := &appsv1.DaemonSet{}
Expect(reconciler.Client.Get(context.TODO(), dsKey, ds)).To(Succeed())

Expect(ds.Spec.Template.Spec.Tolerations).To(Equal(reconciler.RTEManifests.DaemonSet.Spec.Template.Spec.Tolerations), "mismatched DS default tolerations")
Expect(ds.Spec.Template.Spec.Tolerations).To(Equal(reconciler.RTEManifests.Core.DaemonSet.Spec.Template.Spec.Tolerations), "mismatched DS default tolerations")
})

It("should add the extra tolerations in the DS objects", func() {
Expand Down Expand Up @@ -1041,7 +1043,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Expect(err).ToNot(HaveOccurred())

Expect(reconciler.Client.Get(context.TODO(), dsKey, ds)).To(Succeed())
Expect(ds.Spec.Template.Spec.Tolerations).To(Equal(reconciler.RTEManifests.DaemonSet.Spec.Template.Spec.Tolerations), "DS tolerations not restored to defaults")
Expect(ds.Spec.Template.Spec.Tolerations).To(Equal(reconciler.RTEManifests.Core.DaemonSet.Spec.Template.Spec.Tolerations), "DS tolerations not restored to defaults")

nroUpdated := &nropv1.NUMAResourcesOperator{}
Expect(reconciler.Client.Get(context.TODO(), client.ObjectKeyFromObject(nro), nroUpdated)).ToNot(HaveOccurred())
Expand Down Expand Up @@ -1481,33 +1483,30 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
}
Expect(reconciler.Client.Get(context.TODO(), mcp2DSKey, ds)).ToNot(HaveOccurred())

By("Check All RTE metrics components are created")
for _, obj := range reconciler.RTEMetricsManifests.ToObjects() {
objectKey := client.ObjectKeyFromObject(obj)
switch obj.(type) {
case *corev1.Service:
service := &corev1.Service{}
Expect(reconciler.Client.Get(context.TODO(), objectKey, service)).ToNot(HaveOccurred())
default:
}
serKey := client.ObjectKey{
Name: "numaresources-rte-metrics-service", // TODO: staticize
Namespace: testNamespace,
}
ser := &corev1.Service{}
By("Check All RTE metrics components are created")
Expect(reconciler.Client.Get(context.TODO(), serKey, ser)).ToNot(HaveOccurred())
})
When("daemonsets are ready", func() {
var dsDesiredNumberScheduled int32
var dsNumReady int32
BeforeEach(func() {
dsDesiredNumberScheduled = reconciler.RTEManifests.DaemonSet.Status.DesiredNumberScheduled
dsNumReady = reconciler.RTEManifests.DaemonSet.Status.NumberReady
dsDesiredNumberScheduled = reconciler.RTEManifests.Core.DaemonSet.Status.DesiredNumberScheduled
dsNumReady = reconciler.RTEManifests.Core.DaemonSet.Status.NumberReady

reconciler.RTEManifests.DaemonSet.Status.DesiredNumberScheduled = int32(len(nro.Spec.NodeGroups))
reconciler.RTEManifests.DaemonSet.Status.NumberReady = reconciler.RTEManifests.DaemonSet.Status.DesiredNumberScheduled
reconciler.RTEManifests.Core.DaemonSet.Status.DesiredNumberScheduled = int32(len(nro.Spec.NodeGroups))
reconciler.RTEManifests.Core.DaemonSet.Status.NumberReady = reconciler.RTEManifests.Core.DaemonSet.Status.DesiredNumberScheduled

_, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() {
reconciler.RTEManifests.DaemonSet.Status.DesiredNumberScheduled = dsDesiredNumberScheduled
reconciler.RTEManifests.DaemonSet.Status.NumberReady = dsNumReady
reconciler.RTEManifests.Core.DaemonSet.Status.DesiredNumberScheduled = dsDesiredNumberScheduled
reconciler.RTEManifests.Core.DaemonSet.Status.NumberReady = dsNumReady

_, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expand Down
48 changes: 29 additions & 19 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ import (
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/controlplane"
schedmanifests "github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/manifests/sched"
rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte"
schedupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/sched"
"github.com/openshift-kni/numaresources-operator/pkg/version"
Expand Down Expand Up @@ -229,8 +230,19 @@ func main() {
}
klog.InfoS("manifests loaded", "component", "RTE")

rteMetricsManifests, err := rtemetricsmanifests.GetManifests(namespace)
if err != nil {
klog.ErrorS(err, "unable to load the RTE metrics manifests")
os.Exit(1)
}
klog.InfoS("manifests loaded", "component", "RTEMetrics")

if params.renderMode {
os.Exit(manageRendering(params.render, clusterPlatform, apiManifests, rteManifests, namespace, params.enableScheduler))
rteMf := rtestate.Manifests{
Core: rteManifests,
Metrics: rteMetricsManifests,
}
os.Exit(manageRendering(params.render, clusterPlatform, apiManifests, rteMf, namespace, params.enableScheduler))
}

klog.InfoS("metrics server", "enabled", params.enableMetrics, "addr", params.metricsAddr)
Expand Down Expand Up @@ -269,24 +281,21 @@ func main() {
klog.ErrorS(err, "unable to render RTE manifests", "controller", "NUMAResourcesOperator")
os.Exit(1)
}
rteMetricsManifests, err := rtemetricsmanifests.GetManifests(namespace)
if err != nil {
klog.ErrorS(err, "unable to load the RTE metrics manifests")
os.Exit(1)
}

if err = (&controllers.NUMAResourcesOperatorReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("numaresources-controller"),
APIManifests: apiManifests,
RTEManifests: rteManifestsRendered,
RTEMetricsManifests: rteMetricsManifests,
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("numaresources-controller"),
APIManifests: apiManifests,
RTEManifests: rtestate.Manifests{
Core: rteManifestsRendered,
Metrics: rteMetricsManifests,
},
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
}).SetupWithManager(mgr); err != nil {
klog.ErrorS(err, "unable to create controller", "controller", "NUMAResourcesOperator")
os.Exit(1)
Expand Down Expand Up @@ -363,7 +372,7 @@ func manageIntrospection() int {
return 0
}

func manageRendering(render RenderParams, clusterPlatform platform.Platform, apiMf apimanifests.Manifests, rteMf rtemanifests.Manifests, namespace string, enableScheduler bool) int {
func manageRendering(render RenderParams, clusterPlatform platform.Platform, apiMf apimanifests.Manifests, rteMf rtestate.Manifests, namespace string, enableScheduler bool) int {
if render.NRTCRD {
if err := renderObjects(apiMf.ToObjects()); err != nil {
klog.ErrorS(err, "unable to render manifests")
Expand Down Expand Up @@ -398,12 +407,13 @@ func manageRendering(render RenderParams, clusterPlatform platform.Platform, api
User: render.Image.Exporter,
Builtin: images.SpecPath(),
}
mf, err := renderRTEManifests(rteMf, render.Namespace, imgs)
mf, err := renderRTEManifests(rteMf.Core, render.Namespace, imgs)
if err != nil {
klog.ErrorS(err, "unable to render RTE manifests")
return 1
}
objs = append(objs, mf.ToObjects()...)
objs = append(objs, rteMf.Metrics.ToObjects()...) // no rendering needed

if err := renderObjects(objs); err != nil {
klog.ErrorS(err, "unable to render manifests")
Expand Down
Loading

0 comments on commit 7d57ff8

Please sign in to comment.