Skip to content

Commit

Permalink
Set co/node-tuning Degraded when conflicting TuneD profiles exist
Browse files Browse the repository at this point in the history
It is possible to create Tuned CRs with TuneD profiles of the same name.
This is problematic when the duplicate TuneD profiles have different
content.  While the problem is obvious when inspecting the operator
logs, this configuration issue is serious enough it should be more
visible.

This change makes the node-tuning ClusterOperator object Degraded when
conflicting TuneD profiles are created.  This in turn creates an alert
for cluster administrators to deal with this misconfiguration.
  • Loading branch information
jmencak committed Nov 14, 2024
1 parent 5d3f56f commit 8658666
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 29 deletions.
6 changes: 6 additions & 0 deletions pkg/operator/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ type Controller struct {
// tracked as having kernel command-line conflict due to belonging
// to the same MCP.
bootcmdlineConflict map[string]bool

// tunedProfileConflict is the internal operator's cache of duplicate TuneD
// profile names with different profile content.
tunedProfileConflict []string
}

type wqKey struct {
Expand Down Expand Up @@ -118,6 +122,7 @@ func NewController() (*Controller, error) {
}

controller.bootcmdlineConflict = map[string]bool{}
controller.tunedProfileConflict = []string{}

// Initial event to bootstrap CR if it doesn't exist.
controller.workqueue.AddRateLimited(wqKey{kind: wqKindTuned, name: tunedv1.TunedDefaultResourceName})
Expand Down Expand Up @@ -624,6 +629,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
}

metrics.ProfileCalculated(profileMf.Name, computed.TunedProfileName)
c.tunedProfileConflict = computed.TuneDProfileConflict

profile, err := c.listers.TunedProfiles.Get(profileMf.Name)
if err != nil {
Expand Down
65 changes: 37 additions & 28 deletions pkg/operator/profilecalculator.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,13 @@ func (pc *ProfileCalculator) nodeChangeHandler(nodeName string) (bool, error) {
}

type ComputedProfile struct {
TunedProfileName string
AllProfiles []tunedv1.TunedProfile
Deferred util.DeferMode
MCLabels map[string]string
NodePoolName string
Operand tunedv1.OperandConfig
TunedProfileName string
AllProfiles []tunedv1.TunedProfile
Deferred util.DeferMode
MCLabels map[string]string
NodePoolName string
Operand tunedv1.OperandConfig
TuneDProfileConflict []string
}

type RecommendedProfile struct {
Expand All @@ -182,7 +183,7 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile,
return ComputedProfile{}, fmt.Errorf("failed to list Tuned: %v", err)
}

profilesAll := tunedProfiles(tunedList)
profilesAll, profileConflict := tunedProfiles(tunedList)
recommendAll := TunedRecommend(tunedList)
recommendProfile := func(nodeName string, iStart int) (int, RecommendedProfile, error) {
var i int
Expand Down Expand Up @@ -247,14 +248,16 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile,
_, err = pc.listers.TunedResources.Get(tunedv1.TunedDefaultResourceName)
if err != nil {
return ComputedProfile{
TunedProfileName: defaultProfile,
Operand: recommendedProfile.Config,
TunedProfileName: defaultProfile,
Operand: recommendedProfile.Config,
TuneDProfileConflict: profileConflict,
}, fmt.Errorf("failed to get Tuned %s: %v", tunedv1.TunedDefaultResourceName, err)
}

return ComputedProfile{
TunedProfileName: defaultProfile,
Operand: recommendedProfile.Config,
TunedProfileName: defaultProfile,
Operand: recommendedProfile.Config,
TuneDProfileConflict: profileConflict,
}, fmt.Errorf("the default Tuned CR misses a catch-all profile selection")
}

Expand Down Expand Up @@ -292,11 +295,12 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile,
}

return ComputedProfile{
TunedProfileName: recommendedProfile.TunedProfileName,
AllProfiles: profilesAll,
Deferred: recommendedProfile.Deferred,
MCLabels: recommendedProfile.Labels,
Operand: recommendedProfile.Config,
TunedProfileName: recommendedProfile.TunedProfileName,
AllProfiles: profilesAll,
Deferred: recommendedProfile.Deferred,
MCLabels: recommendedProfile.Labels,
Operand: recommendedProfile.Config,
TuneDProfileConflict: profileConflict,
}, err
}

Expand Down Expand Up @@ -345,7 +349,7 @@ func (pc *ProfileCalculator) calculateProfileHyperShift(nodeName string) (Comput
}
tunedList = append(tunedList, defaultTuned)

profilesAll := tunedProfiles(tunedList)
profilesAll, profileConflict := tunedProfiles(tunedList)
recommendAll := TunedRecommend(tunedList)
recommendProfile := func(nodeName string, iStart int) (int, HypershiftRecommendedProfile, error) {
var i int
Expand Down Expand Up @@ -385,9 +389,10 @@ func (pc *ProfileCalculator) calculateProfileHyperShift(nodeName string) (Comput

if iStop == len(recommendAll) {
return ComputedProfile{
TunedProfileName: defaultProfile,
AllProfiles: profilesAll,
Operand: recommendedProfile.Config,
TunedProfileName: defaultProfile,
AllProfiles: profilesAll,
Operand: recommendedProfile.Config,
TuneDProfileConflict: profileConflict,
}, fmt.Errorf("the default Tuned CR misses a catch-all profile selection")
}

Expand Down Expand Up @@ -425,11 +430,12 @@ func (pc *ProfileCalculator) calculateProfileHyperShift(nodeName string) (Comput
}

return ComputedProfile{
TunedProfileName: recommendedProfile.TunedProfileName,
AllProfiles: profilesAll,
Deferred: recommendedProfile.Deferred,
NodePoolName: recommendedProfile.NodePoolName,
Operand: recommendedProfile.Config,
TunedProfileName: recommendedProfile.TunedProfileName,
AllProfiles: profilesAll,
Deferred: recommendedProfile.Deferred,
NodePoolName: recommendedProfile.NodePoolName,
Operand: recommendedProfile.Config,
TuneDProfileConflict: profileConflict,
}, err
}

Expand Down Expand Up @@ -693,9 +699,11 @@ func (pc *ProfileCalculator) getNodePoolNameForNode(node *corev1.Node) (string,
}

// tunedProfiles returns a name-sorted TunedProfile slice out of
// a slice of Tuned objects.
func tunedProfiles(tunedSlice []*tunedv1.Tuned) []tunedv1.TunedProfile {
// a slice of Tuned objects and a slice of duplicate TuneD profile
// names with different profile content.
func tunedProfiles(tunedSlice []*tunedv1.Tuned) ([]tunedv1.TunedProfile, []string) {
tunedProfiles := []tunedv1.TunedProfile{}
profileConflict := []string{}
m := map[string]tunedv1.TunedProfile{}

for _, tuned := range tunedSlice {
Expand All @@ -711,6 +719,7 @@ func tunedProfiles(tunedSlice []*tunedv1.Tuned) []tunedv1.TunedProfile {
klog.Infof("duplicate profiles names %s but they have the same contents", *v.Name)
} else {
klog.Errorf("ERROR: duplicate profiles named %s with different contents found in Tuned CR %q", *v.Name, tuned.Name)
profileConflict = append(profileConflict, *v.Name)
}
}
m[*v.Name] = v
Expand All @@ -727,7 +736,7 @@ func tunedProfiles(tunedSlice []*tunedv1.Tuned) []tunedv1.TunedProfile {
return *tunedProfiles[i].Name < *tunedProfiles[j].Name
})

return tunedProfiles
return tunedProfiles, profileConflict
}

type TunedRecommendInfo struct {
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/profilecalculator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ func TestTunedProfiles(t *testing.T) {
)

for i, tc := range tests {
tunedProfilesSorted := tunedProfiles(tc.input)
tunedProfilesSorted, _ := tunedProfiles(tc.input)

if !reflect.DeepEqual(tc.expectedOutput, tunedProfilesSorted) {
t.Errorf(
Expand Down
7 changes: 7 additions & 0 deletions pkg/operator/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,13 @@ func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.C
availableCondition.Message = fmt.Sprintf("%v/%v Profiles failed to be applied", numDegradedProfiles, len(profileList))
}

if len(c.tunedProfileConflict) > 0 {
klog.Infof(fmt.Sprintf("TuneD profile(s) %v with conflicting content exist", c.tunedProfileConflict))
degradedCondition.Status = configv1.ConditionTrue
degradedCondition.Reason = "ProfileConflict"
degradedCondition.Message = fmt.Sprintf("TuneD profile(s) %v with conflicting content exist", c.tunedProfileConflict)
}

numConflict := c.numProfilesWithBootcmdlineConflict(profileList)
if numConflict > 0 {
klog.Infof(fmt.Sprintf("%v/%v Profiles with bootcmdline conflict", numConflict, len(profileList)))
Expand Down

0 comments on commit 8658666

Please sign in to comment.