diff --git a/pkg/monitortests/clusterversionoperator/operatorstateanalyzer/monitortest.go b/pkg/monitortests/clusterversionoperator/operatorstateanalyzer/monitortest.go index 3fbad8c81453..6fae8d748dc5 100644 --- a/pkg/monitortests/clusterversionoperator/operatorstateanalyzer/monitortest.go +++ b/pkg/monitortests/clusterversionoperator/operatorstateanalyzer/monitortest.go @@ -2,10 +2,15 @@ package operatorstateanalyzer import ( "context" + "fmt" + "path/filepath" + "sort" "time" "github.com/openshift/origin/pkg/monitortestframework" + "github.com/sirupsen/logrus" + "github.com/openshift/origin/pkg/dataloader" "github.com/openshift/origin/pkg/monitor/monitorapi" "github.com/openshift/origin/pkg/test/ginkgo/junitapi" "k8s.io/client-go/rest" @@ -14,6 +19,16 @@ import ( type operatorStateChecker struct { } +type OperatorStateMetrics struct { + OperatorName string + ProgressingCount int + TotalProgressingSeconds float64 + MaxIndividualProgressingSeconds float64 + DegradedCount int + TotalDegradedSeconds float64 + MaxIndividualDegradedSeconds float64 +} + func NewAnalyzer() monitortestframework.MonitorTest { return &operatorStateChecker{} } @@ -44,9 +59,102 @@ func (*operatorStateChecker) EvaluateTestsFromConstructedIntervals(ctx context.C } func (*operatorStateChecker) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error { + metrics := calculateOperatorStateMetrics(finalIntervals) + if len(metrics) > 0 { + rows := generateRowsFromMetrics(metrics) + dataFile := dataloader.DataFile{ + TableName: "operator_state_metrics", + Schema: map[string]dataloader.DataType{ + "Operator": dataloader.DataTypeString, + "State": dataloader.DataTypeString, + "Count": dataloader.DataTypeInteger, + "TotalSeconds": dataloader.DataTypeFloat64, + "MaxIndividualDurationSeconds": dataloader.DataTypeFloat64, + }, + Rows: rows, + } + fileName := filepath.Join(storageDir, fmt.Sprintf("operator-state-metrics%s-%s", timeSuffix, dataloader.AutoDataLoaderSuffix)) + if err := dataloader.WriteDataFile(fileName, dataFile); err != nil { + return fmt.Errorf("failed to write operator state metrics: %w", err) + } + logrus.Infof("Write operator state metrics to %s successfully.", fileName) + } + return nil } +// calculateOperatorStateMetrics processes raw intervals and aggregates them into a metrics summary map. +func calculateOperatorStateMetrics(finalIntervals monitorapi.Intervals) map[string]*OperatorStateMetrics { + metrics := make(map[string]*OperatorStateMetrics) + + for _, interval := range finalIntervals { + if interval.Source != monitorapi.SourceOperatorState { + continue + } + if interval.Locator.Type != monitorapi.LocatorTypeClusterOperator { + continue + } + operatorName := interval.Locator.Keys[monitorapi.LocatorClusterOperatorKey] + if _, ok := metrics[operatorName]; !ok { + metrics[operatorName] = &OperatorStateMetrics{OperatorName: operatorName} + } + + duration := interval.To.Sub(interval.From).Seconds() + condition := interval.Message.Annotations[monitorapi.AnnotationCondition] + + switch condition { + case "Progressing": + metrics[operatorName].ProgressingCount++ + metrics[operatorName].TotalProgressingSeconds += duration + if duration > metrics[operatorName].MaxIndividualProgressingSeconds { + metrics[operatorName].MaxIndividualProgressingSeconds = duration + } + case "Degraded": + metrics[operatorName].DegradedCount++ + metrics[operatorName].TotalDegradedSeconds += duration + if duration > metrics[operatorName].MaxIndividualDegradedSeconds { + metrics[operatorName].MaxIndividualDegradedSeconds = duration + } + } + } + return metrics +} + +// generateRowsFromMetrics converts the aggregated metrics map into a slice of rows for the dataloader. +func generateRowsFromMetrics(metrics map[string]*OperatorStateMetrics) []map[string]string { + rows := []map[string]string{} + + // Sort operator names for consistent output order in tests + operatorNames := make([]string, 0, len(metrics)) + for name := range metrics { + operatorNames = append(operatorNames, name) + } + sort.Strings(operatorNames) + + for _, operatorName := range operatorNames { + metric := metrics[operatorName] + if metric.ProgressingCount > 0 { + rows = append(rows, map[string]string{ + "Operator": operatorName, + "State": "Progressing", + "Count": fmt.Sprintf("%d", metric.ProgressingCount), + "TotalSeconds": fmt.Sprintf("%f", metric.TotalProgressingSeconds), + "MaxIndividualDurationSeconds": fmt.Sprintf("%f", metric.MaxIndividualProgressingSeconds), + }) + } + if metric.DegradedCount > 0 { + rows = append(rows, map[string]string{ + "Operator": operatorName, + "State": "Degraded", + "Count": fmt.Sprintf("%d", metric.DegradedCount), + "TotalSeconds": fmt.Sprintf("%f", metric.TotalDegradedSeconds), + "MaxIndividualDurationSeconds": fmt.Sprintf("%f", metric.MaxIndividualDegradedSeconds), + }) + } + } + return rows +} + func (*operatorStateChecker) Cleanup(ctx context.Context) error { // TODO wire up the start to a context we can kill here return nil diff --git a/pkg/monitortests/clusterversionoperator/operatorstateanalyzer/monitortest_test.go b/pkg/monitortests/clusterversionoperator/operatorstateanalyzer/monitortest_test.go new file mode 100644 index 000000000000..c61b5714e798 --- /dev/null +++ b/pkg/monitortests/clusterversionoperator/operatorstateanalyzer/monitortest_test.go @@ -0,0 +1,168 @@ +package operatorstateanalyzer + +import ( + "testing" + "time" + + "github.com/openshift/origin/pkg/monitor/monitorapi" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOperatorStateAnalyzer(t *testing.T) { + tests := []struct { + name string + intervals monitorapi.Intervals + expectedMetrics map[string]*OperatorStateMetrics + expectedRows []map[string]string + }{ + { + name: "single operator, progressing and degraded", + intervals: monitorapi.Intervals{ + makeTestInterval("operator-a", "Progressing", 10), + makeTestInterval("operator-a", "Progressing", 5), + makeTestInterval("operator-a", "Degraded", 15), + }, + expectedMetrics: map[string]*OperatorStateMetrics{ + "operator-a": { + OperatorName: "operator-a", + ProgressingCount: 2, + TotalProgressingSeconds: 15, + MaxIndividualProgressingSeconds: 10, + DegradedCount: 1, + TotalDegradedSeconds: 15, + MaxIndividualDegradedSeconds: 15, + }, + }, + expectedRows: []map[string]string{ + { + "Operator": "operator-a", + "State": "Progressing", + "Count": "2", + "TotalSeconds": "15.000000", + "MaxIndividualDurationSeconds": "10.000000", + }, + { + "Operator": "operator-a", + "State": "Degraded", + "Count": "1", + "TotalSeconds": "15.000000", + "MaxIndividualDurationSeconds": "15.000000", + }, + }, + }, + { + name: "multiple operators", + intervals: monitorapi.Intervals{ + makeTestInterval("operator-a", "Progressing", 10), + makeTestInterval("operator-b", "Degraded", 20), + }, + expectedMetrics: map[string]*OperatorStateMetrics{ + "operator-a": { + OperatorName: "operator-a", + ProgressingCount: 1, + TotalProgressingSeconds: 10, + MaxIndividualProgressingSeconds: 10, + }, + "operator-b": { + OperatorName: "operator-b", + DegradedCount: 1, + TotalDegradedSeconds: 20, + MaxIndividualDegradedSeconds: 20, + }, + }, + expectedRows: []map[string]string{ + { + "Operator": "operator-a", + "State": "Progressing", + "Count": "1", + "TotalSeconds": "10.000000", + "MaxIndividualDurationSeconds": "10.000000", + }, + { + "Operator": "operator-b", + "State": "Degraded", + "Count": "1", + "TotalSeconds": "20.000000", + "MaxIndividualDurationSeconds": "20.000000", + }, + }, + }, + { + name: "no relevant intervals", + intervals: monitorapi.Intervals{}, + expectedMetrics: map[string]*OperatorStateMetrics{}, + expectedRows: []map[string]string{}, + }, + { + name: "operator with only degraded state", + intervals: monitorapi.Intervals{ + makeTestInterval("operator-c", "Degraded", 30), + }, + expectedMetrics: map[string]*OperatorStateMetrics{ + "operator-c": { + OperatorName: "operator-c", + DegradedCount: 1, + TotalDegradedSeconds: 30, + MaxIndividualDegradedSeconds: 30, + }, + }, + expectedRows: []map[string]string{ + { + "Operator": "operator-c", + "State": "Degraded", + "Count": "1", + "TotalSeconds": "30.000000", + "MaxIndividualDurationSeconds": "30.000000", + }, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // Test calculateOperatorStateMetrics + metrics := calculateOperatorStateMetrics(tc.intervals) + require.Equal(t, len(tc.expectedMetrics), len(metrics), "number of operators should match") + for op, expected := range tc.expectedMetrics { + actual, ok := metrics[op] + require.True(t, ok, "operator %s not found in metrics", op) + assert.Equal(t, expected.OperatorName, actual.OperatorName, "OperatorName should match") + assert.Equal(t, expected.ProgressingCount, actual.ProgressingCount, "ProgressingCount should match") + assert.InDelta(t, expected.TotalProgressingSeconds, actual.TotalProgressingSeconds, 0.001, "TotalProgressingSeconds should match") + assert.InDelta(t, expected.MaxIndividualProgressingSeconds, actual.MaxIndividualProgressingSeconds, 0.001, "MaxIndividualProgressingSeconds should match") + assert.Equal(t, expected.DegradedCount, actual.DegradedCount, "DegradedCount should match") + assert.InDelta(t, expected.TotalDegradedSeconds, actual.TotalDegradedSeconds, 0.001, "TotalDegradedSeconds should match") + assert.InDelta(t, expected.MaxIndividualDegradedSeconds, actual.MaxIndividualDegradedSeconds, 0.001, "MaxIndividualDegradedSeconds should match") + } + + // Test generateRowsFromMetrics + rows := generateRowsFromMetrics(metrics) + assert.ElementsMatch(t, tc.expectedRows, rows, "generated rows should match expected rows") + }) + } +} + +// Helper function to create intervals for testing +func makeTestInterval(operatorName, condition string, durationSeconds float64) monitorapi.Interval { + from := time.Unix(1, 0) + to := from.Add(time.Duration(durationSeconds * float64(time.Second))) + return monitorapi.Interval{ + Source: monitorapi.SourceOperatorState, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Type: monitorapi.LocatorTypeClusterOperator, + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorClusterOperatorKey: operatorName, + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationCondition: condition, + }, + }, + }, + From: from, + To: to, + } +}