diff --git a/pkg/telemetry/telemetry.go b/pkg/telemetry/telemetry.go index b680728784..0c91f32e67 100644 --- a/pkg/telemetry/telemetry.go +++ b/pkg/telemetry/telemetry.go @@ -5,6 +5,7 @@ package telemetry import ( "context" + "errors" "fmt" "maps" "os" @@ -18,6 +19,7 @@ import ( "github.com/microsoft/ApplicationInsights-Go/appinsights/contracts" "github.com/microsoft/retina/pkg/exporter" "github.com/microsoft/retina/pkg/log" + "github.com/prometheus/client_golang/prometheus" io_prometheus_client "github.com/prometheus/client_model/go" ) @@ -26,6 +28,8 @@ var ( version string mbShift uint64 = 20 + ErrorNilCombinedGatherer = errors.New("exporter.CombinedGatherer is nil") + // property keys kernelversion = "kernelversion" sysmem = "sysmem" @@ -178,7 +182,7 @@ func (t *TelemetryClient) heartbeat(ctx context.Context) { t.trackWarning(err, "failed to get cpu usage") } - metricscardinality, err := metricsCardinality() + metricscardinality, err := metricsCardinality(exporter.CombinedGatherer) if err != nil { t.trackWarning(err, "failed to get metrics cardinality") } @@ -189,8 +193,12 @@ func (t *TelemetryClient) heartbeat(ctx context.Context) { maps.Copy(props, t.profile.GetMemoryUsage()) t.TrackEvent("heartbeat", props) } -func metricsCardinality() (int, error) { - metricFamilies, err := exporter.CombinedGatherer.Gather() +func metricsCardinality(gatherer prometheus.Gatherer) (int, error) { + if gatherer == nil { + return 0, fmt.Errorf("failed to get metrics Gatherer: %w", ErrorNilCombinedGatherer) + } + + metricFamilies, err := gatherer.Gather() if err != nil { return 0, fmt.Errorf("failed to gather metrics: %w", err) } @@ -203,23 +211,44 @@ func metricsCardinality() (int, error) { case io_prometheus_client.MetricType_HISTOGRAM: metrics := mf.GetMetric() for _, m := range metrics { - metricscardinality += len(m.GetHistogram().GetBucket()) + 3 // +3 for le="+Inf", _sum and _count + histogram := m.GetHistogram() + if histogram != nil { + buckets := histogram.GetBucket() + if buckets != nil { + metricscardinality += len(buckets) + 3 // +3 for le="+Inf", _sum and _count + } + } } case io_prometheus_client.MetricType_GAUGE_HISTOGRAM: metrics := mf.GetMetric() for _, m := range metrics { - metricscardinality += len(m.GetHistogram().GetBucket()) + 3 // +3 for le="+Inf", _sum and _count + histogram := m.GetHistogram() + if histogram != nil { + buckets := histogram.GetBucket() + if buckets != nil { + metricscardinality += len(buckets) + 3 // +3 for le="+Inf", _sum and _count + } + } } case io_prometheus_client.MetricType_SUMMARY: metrics := mf.GetMetric() for _, m := range metrics { - metricscardinality += len(m.GetSummary().GetQuantile()) + 2 // +2 for _sum and _count + summary := m.GetSummary() + if summary != nil { + quantiles := summary.GetQuantile() + if quantiles != nil { + metricscardinality += len(quantiles) + 2 // +2 for _sum and _count + } + } } default: - metricscardinality += len(mf.GetMetric()) + metrics := mf.GetMetric() + if metrics != nil { + metricscardinality += len(metrics) + } } } diff --git a/pkg/telemetry/telemetry_test.go b/pkg/telemetry/telemetry_test.go index ae8c050dd9..6dfae581db 100644 --- a/pkg/telemetry/telemetry_test.go +++ b/pkg/telemetry/telemetry_test.go @@ -11,6 +11,8 @@ import ( "sync" "testing" + "github.com/prometheus/client_golang/prometheus" + io_prometheus_client "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" ) @@ -97,6 +99,175 @@ func TestHeartbeat(t *testing.T) { } } +func TestMetricsCardinality_nilCombinedGatherer(t *testing.T) { + var gatherer prometheus.Gatherer + + t.Run("Combined Gatherer is nil", func(t *testing.T) { + m, err := metricsCardinality(gatherer) + require.Error(t, err) + require.Equal(t, 0, m) + }) +} + +type tGatherer struct { + mf []*io_prometheus_client.MetricFamily +} + +func (t *tGatherer) Gather() ([]*io_prometheus_client.MetricFamily, error) { + return t.mf, nil +} + +func TestMetricsCardinality(t *testing.T) { + counterType := io_prometheus_client.MetricType_COUNTER + histogramType := io_prometheus_client.MetricType_HISTOGRAM + gaugeHistogramType := io_prometheus_client.MetricType_GAUGE_HISTOGRAM + summaryType := io_prometheus_client.MetricType_SUMMARY + + simpleCounter := prometheus.NewCounter(prometheus.CounterOpts{ //nolint:promlinter // its a test + Name: "test_counter", + Help: "test counter", + }) + + histogram3buckets := prometheus.NewHistogram(prometheus.HistogramOpts{ //nolint:promlinter // its a test + Name: "3buckets_histogram", + Help: "test histogram", + Buckets: []float64{1, 2, 3}, + }) + + histogram5buckets := prometheus.NewHistogram(prometheus.HistogramOpts{ //nolint:promlinter // its a test + Name: "5buckets_histogram", + Help: "test histogram", + Buckets: []float64{1, 2, 3, 4, 5}, + }) + summary := prometheus.NewSummary(prometheus.SummaryOpts{ //nolint:promlinter // its a test + Name: "test_summary", + Help: "test summary", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }) + + outputCounter := io_prometheus_client.Metric{} + err := simpleCounter.Write(&outputCounter) + require.NoError(t, err) + testCounter := io_prometheus_client.MetricFamily{ + Type: &counterType, + Metric: []*io_prometheus_client.Metric{ + &outputCounter, + }, + } + + outputHistogram3Buckets := io_prometheus_client.Metric{} + err = histogram3buckets.Write(&outputHistogram3Buckets) + require.NoError(t, err) + test3BucketsHistogram := io_prometheus_client.MetricFamily{ + Type: &histogramType, + Metric: []*io_prometheus_client.Metric{ + &outputHistogram3Buckets, + }, + } + + outputHistogram5Buckets := io_prometheus_client.Metric{} + err = histogram5buckets.Write(&outputHistogram5Buckets) + require.NoError(t, err) + test5BucketsGaugeHistogram := io_prometheus_client.MetricFamily{ + Type: &gaugeHistogramType, + Metric: []*io_prometheus_client.Metric{ + &outputHistogram5Buckets, + }, + } + + outputSummary := io_prometheus_client.Metric{} + err = summary.Write(&outputSummary) + require.NoError(t, err) + testSummary := io_prometheus_client.MetricFamily{ + Type: &summaryType, + Metric: []*io_prometheus_client.Metric{ + &outputSummary, + }, + } + + testcases := []struct { + name string + mf []*io_prometheus_client.MetricFamily + expectedCardinality int + }{ + { + "Simple Counter", + []*io_prometheus_client.MetricFamily{&testCounter}, + 1, + }, + { + "3 Buckets Histogram", + []*io_prometheus_client.MetricFamily{&test3BucketsHistogram}, + 6, + }, + { + "5 Buckets Gauge Histogram", + []*io_prometheus_client.MetricFamily{&test5BucketsGaugeHistogram}, + 8, + }, + { + "Summary", + []*io_prometheus_client.MetricFamily{&testSummary}, + 5, + }, + } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + testGatherer := tGatherer{ + mf: tc.mf, + } + gotCardinality, err := metricsCardinality(&testGatherer) + require.NoError(t, err) + require.Equal(t, tc.expectedCardinality, gotCardinality) + }) + } +} + +func TestMetricsCardinality_NilHistogram(t *testing.T) { + histogramType := io_prometheus_client.MetricType_HISTOGRAM + gaugeHistogramType := io_prometheus_client.MetricType_GAUGE_HISTOGRAM + summaryType := io_prometheus_client.MetricType_SUMMARY + + nilHistogram := io_prometheus_client.MetricFamily{ + Type: &histogramType, + Metric: []*io_prometheus_client.Metric{ + { + Histogram: nil, + }, + }, + } + nilGaugeHistogram := io_prometheus_client.MetricFamily{ + Type: &gaugeHistogramType, + Metric: []*io_prometheus_client.Metric{ + { + Histogram: nil, + }, + }, + } + nilSummary := io_prometheus_client.MetricFamily{ + Type: &summaryType, + Metric: []*io_prometheus_client.Metric{ + { + Summary: nil, + }, + }, + } + + testGatherer := tGatherer{ + mf: []*io_prometheus_client.MetricFamily{ + &nilHistogram, + &nilGaugeHistogram, + &nilSummary, + }, + } + + t.Run("Nil Histogram", func(t *testing.T) { + m, err := metricsCardinality(&testGatherer) + require.NoError(t, err) + require.Equal(t, 0, m) + }) +} + func TestTelemetryClient_StopPerf(t *testing.T) { type fields struct { properties map[string]string