Skip to content

Commit

Permalink
fix: added new and grouping allowed sno pathological events
Browse files Browse the repository at this point in the history
  • Loading branch information
jeff-roche committed Dec 10, 2024
1 parent 8737ae6 commit 678dab7
Showing 1 changed file with 37 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -479,10 +479,7 @@ func NewUniversalPathologicalEventMatchers(kubeConfig *rest.Config, finalInterva
topologyAwareMatcher := newTopologyAwareHintsDisabledDuringTaintTestsPathologicalEventMatcher(finalIntervals)
registry.AddPathologicalEventMatcherOrDie(topologyAwareMatcher)

singleNodeConnectionRefusedMatcher := newSingleNodeConnectionRefusedEventMatcher(finalIntervals)
singleNodeKubeAPIServerProgressingMatcher := newSingleNodeKubeAPIProgressingEventMatcher(finalIntervals)
registry.AddPathologicalEventMatcherOrDie(singleNodeConnectionRefusedMatcher)
registry.AddPathologicalEventMatcherOrDie(singleNodeKubeAPIServerProgressingMatcher)
addAllowedSingleNodeMatchers(registry, finalIntervals)

vsphereConfigurationTestsRollOutTooOftenMatcher := newVsphereConfigurationTestsRollOutTooOftenEventMatcher(finalIntervals)
registry.AddPathologicalEventMatcherOrDie(vsphereConfigurationTestsRollOutTooOftenMatcher)
Expand Down Expand Up @@ -774,6 +771,42 @@ func IsDuringAPIServerProgressingOnSNO(topology string, events monitorapi.Interv
}
}

func addAllowedSingleNodeMatchers(registry *AllowedPathologicalEventRegistry, finalIntervals monitorapi.Intervals) {
snoTopology := v1.SingleReplicaTopologyMode

// This is a list of known and allowed single node events
allowedEvents := []EventMatcher{
&SimplePathologicalEventMatcher{
name: "AllowSingleNodeOperatorControllerBackOffRestartingFailedContainer",
messageReasonRegex: AllowBackOffRestartingFailedContainer.messageReasonRegex,
messageHumanRegex: AllowBackOffRestartingFailedContainer.messageHumanRegex,
topology: &snoTopology,
locatorKeyRegexes: map[monitorapi.LocatorKey]*regexp.Regexp{
monitorapi.LocatorNamespaceKey: regexp.MustCompile("openshift-operator-controller"),
monitorapi.LocatorPodKey: regexp.MustCompile("operator-controller-controller-manager"),
},
jira: "https://issues.redhat.com/browse/OCPBUGS-45071",
},
&SimplePathologicalEventMatcher{
name: "AllowSingleNodeCatalogdControllerBackOffRestartingFailedContainer",
messageReasonRegex: AllowBackOffRestartingFailedContainer.messageReasonRegex,
messageHumanRegex: AllowBackOffRestartingFailedContainer.messageHumanRegex,
topology: &snoTopology,
locatorKeyRegexes: map[monitorapi.LocatorKey]*regexp.Regexp{
monitorapi.LocatorNamespaceKey: regexp.MustCompile("openshift-catalogd"),
monitorapi.LocatorPodKey: regexp.MustCompile("catalogd-controller-manager"),
},
jira: "https://issues.redhat.com/browse/OCPBUGS-45071",
},
newSingleNodeConnectionRefusedEventMatcher(finalIntervals),
newSingleNodeKubeAPIProgressingEventMatcher(finalIntervals),
}

for _, event := range allowedEvents {
registry.AddPathologicalEventMatcherOrDie(event)
}
}

func getInstallCompletionTime(kubeClientConfig *rest.Config) *metav1.Time {
configClient, err := configclient.NewForConfig(kubeClientConfig)
if err != nil {
Expand Down

0 comments on commit 678dab7

Please sign in to comment.