From 678dab7de111723253c5f67a05ded69317cd29c7 Mon Sep 17 00:00:00 2001 From: Jeff Roche Date: Thu, 5 Dec 2024 10:55:06 -0500 Subject: [PATCH] fix: added new and grouping allowed sno pathological events --- .../duplicated_event_patterns.go | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/pkg/monitortestlibrary/pathologicaleventlibrary/duplicated_event_patterns.go b/pkg/monitortestlibrary/pathologicaleventlibrary/duplicated_event_patterns.go index dba3218f18d5..3255eae7c962 100644 --- a/pkg/monitortestlibrary/pathologicaleventlibrary/duplicated_event_patterns.go +++ b/pkg/monitortestlibrary/pathologicaleventlibrary/duplicated_event_patterns.go @@ -479,10 +479,7 @@ func NewUniversalPathologicalEventMatchers(kubeConfig *rest.Config, finalInterva topologyAwareMatcher := newTopologyAwareHintsDisabledDuringTaintTestsPathologicalEventMatcher(finalIntervals) registry.AddPathologicalEventMatcherOrDie(topologyAwareMatcher) - singleNodeConnectionRefusedMatcher := newSingleNodeConnectionRefusedEventMatcher(finalIntervals) - singleNodeKubeAPIServerProgressingMatcher := newSingleNodeKubeAPIProgressingEventMatcher(finalIntervals) - registry.AddPathologicalEventMatcherOrDie(singleNodeConnectionRefusedMatcher) - registry.AddPathologicalEventMatcherOrDie(singleNodeKubeAPIServerProgressingMatcher) + addAllowedSingleNodeMatchers(registry, finalIntervals) vsphereConfigurationTestsRollOutTooOftenMatcher := newVsphereConfigurationTestsRollOutTooOftenEventMatcher(finalIntervals) registry.AddPathologicalEventMatcherOrDie(vsphereConfigurationTestsRollOutTooOftenMatcher) @@ -774,6 +771,42 @@ func IsDuringAPIServerProgressingOnSNO(topology string, events monitorapi.Interv } } +func addAllowedSingleNodeMatchers(registry *AllowedPathologicalEventRegistry, finalIntervals monitorapi.Intervals) { + snoTopology := v1.SingleReplicaTopologyMode + + // This is a list of known and allowed single node events + allowedEvents := []EventMatcher{ + &SimplePathologicalEventMatcher{ + name: "AllowSingleNodeOperatorControllerBackOffRestartingFailedContainer", + messageReasonRegex: AllowBackOffRestartingFailedContainer.messageReasonRegex, + messageHumanRegex: AllowBackOffRestartingFailedContainer.messageHumanRegex, + topology: &snoTopology, + locatorKeyRegexes: map[monitorapi.LocatorKey]*regexp.Regexp{ + monitorapi.LocatorNamespaceKey: regexp.MustCompile("openshift-operator-controller"), + monitorapi.LocatorPodKey: regexp.MustCompile("operator-controller-controller-manager"), + }, + jira: "https://issues.redhat.com/browse/OCPBUGS-45071", + }, + &SimplePathologicalEventMatcher{ + name: "AllowSingleNodeCatalogdControllerBackOffRestartingFailedContainer", + messageReasonRegex: AllowBackOffRestartingFailedContainer.messageReasonRegex, + messageHumanRegex: AllowBackOffRestartingFailedContainer.messageHumanRegex, + topology: &snoTopology, + locatorKeyRegexes: map[monitorapi.LocatorKey]*regexp.Regexp{ + monitorapi.LocatorNamespaceKey: regexp.MustCompile("openshift-catalogd"), + monitorapi.LocatorPodKey: regexp.MustCompile("catalogd-controller-manager"), + }, + jira: "https://issues.redhat.com/browse/OCPBUGS-45071", + }, + newSingleNodeConnectionRefusedEventMatcher(finalIntervals), + newSingleNodeKubeAPIProgressingEventMatcher(finalIntervals), + } + + for _, event := range allowedEvents { + registry.AddPathologicalEventMatcherOrDie(event) + } +} + func getInstallCompletionTime(kubeClientConfig *rest.Config) *metav1.Time { configClient, err := configclient.NewForConfig(kubeClientConfig) if err != nil {