Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(heartbeat): add nil check in heartbeat's metricscardinality #1293

Merged
merged 1 commit into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 36 additions & 7 deletions pkg/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package telemetry

import (
"context"
"errors"
"fmt"
"maps"
"os"
Expand All @@ -18,6 +19,7 @@ import (
"github.com/microsoft/ApplicationInsights-Go/appinsights/contracts"
"github.com/microsoft/retina/pkg/exporter"
"github.com/microsoft/retina/pkg/log"
"github.com/prometheus/client_golang/prometheus"
io_prometheus_client "github.com/prometheus/client_model/go"
)

Expand All @@ -26,6 +28,8 @@ var (
version string
mbShift uint64 = 20

ErrorNilCombinedGatherer = errors.New("exporter.CombinedGatherer is nil")

// property keys
kernelversion = "kernelversion"
sysmem = "sysmem"
Expand Down Expand Up @@ -178,7 +182,7 @@ func (t *TelemetryClient) heartbeat(ctx context.Context) {
t.trackWarning(err, "failed to get cpu usage")
}

metricscardinality, err := metricsCardinality()
metricscardinality, err := metricsCardinality(exporter.CombinedGatherer)
if err != nil {
t.trackWarning(err, "failed to get metrics cardinality")
}
Expand All @@ -189,8 +193,12 @@ func (t *TelemetryClient) heartbeat(ctx context.Context) {
maps.Copy(props, t.profile.GetMemoryUsage())
t.TrackEvent("heartbeat", props)
}
func metricsCardinality() (int, error) {
metricFamilies, err := exporter.CombinedGatherer.Gather()
func metricsCardinality(gatherer prometheus.Gatherer) (int, error) {
if gatherer == nil {
return 0, fmt.Errorf("failed to get metrics Gatherer: %w", ErrorNilCombinedGatherer)
}

metricFamilies, err := gatherer.Gather()
if err != nil {
return 0, fmt.Errorf("failed to gather metrics: %w", err)
}
Expand All @@ -203,23 +211,44 @@ func metricsCardinality() (int, error) {
case io_prometheus_client.MetricType_HISTOGRAM:
metrics := mf.GetMetric()
for _, m := range metrics {
metricscardinality += len(m.GetHistogram().GetBucket()) + 3 // +3 for le="+Inf", _sum and _count
histogram := m.GetHistogram()
if histogram != nil {
buckets := histogram.GetBucket()
if buckets != nil {
metricscardinality += len(buckets) + 3 // +3 for le="+Inf", _sum and _count
}
}
}

case io_prometheus_client.MetricType_GAUGE_HISTOGRAM:
metrics := mf.GetMetric()
for _, m := range metrics {
metricscardinality += len(m.GetHistogram().GetBucket()) + 3 // +3 for le="+Inf", _sum and _count
histogram := m.GetHistogram()
if histogram != nil {
buckets := histogram.GetBucket()
if buckets != nil {
metricscardinality += len(buckets) + 3 // +3 for le="+Inf", _sum and _count
}
}
}

case io_prometheus_client.MetricType_SUMMARY:
metrics := mf.GetMetric()
for _, m := range metrics {
metricscardinality += len(m.GetSummary().GetQuantile()) + 2 // +2 for _sum and _count
summary := m.GetSummary()
if summary != nil {
quantiles := summary.GetQuantile()
if quantiles != nil {
metricscardinality += len(quantiles) + 2 // +2 for _sum and _count
}
}
}

default:
metricscardinality += len(mf.GetMetric())
metrics := mf.GetMetric()
if metrics != nil {
metricscardinality += len(metrics)
}

}
}
Expand Down
171 changes: 171 additions & 0 deletions pkg/telemetry/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"sync"
"testing"

"github.com/prometheus/client_golang/prometheus"
io_prometheus_client "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -97,6 +99,175 @@ func TestHeartbeat(t *testing.T) {
}
}

func TestMetricsCardinality_nilCombinedGatherer(t *testing.T) {
var gatherer prometheus.Gatherer

t.Run("Combined Gatherer is nil", func(t *testing.T) {
m, err := metricsCardinality(gatherer)
require.Error(t, err)
require.Equal(t, 0, m)
})
}

type tGatherer struct {
mf []*io_prometheus_client.MetricFamily
}

func (t *tGatherer) Gather() ([]*io_prometheus_client.MetricFamily, error) {
return t.mf, nil
}

func TestMetricsCardinality(t *testing.T) {
counterType := io_prometheus_client.MetricType_COUNTER
histogramType := io_prometheus_client.MetricType_HISTOGRAM
gaugeHistogramType := io_prometheus_client.MetricType_GAUGE_HISTOGRAM
summaryType := io_prometheus_client.MetricType_SUMMARY

simpleCounter := prometheus.NewCounter(prometheus.CounterOpts{ //nolint:promlinter // its a test
Name: "test_counter",
Help: "test counter",
})

histogram3buckets := prometheus.NewHistogram(prometheus.HistogramOpts{ //nolint:promlinter // its a test
Name: "3buckets_histogram",
Help: "test histogram",
Buckets: []float64{1, 2, 3},
})

histogram5buckets := prometheus.NewHistogram(prometheus.HistogramOpts{ //nolint:promlinter // its a test
Name: "5buckets_histogram",
Help: "test histogram",
Buckets: []float64{1, 2, 3, 4, 5},
})
summary := prometheus.NewSummary(prometheus.SummaryOpts{ //nolint:promlinter // its a test
Name: "test_summary",
Help: "test summary",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
})

outputCounter := io_prometheus_client.Metric{}
err := simpleCounter.Write(&outputCounter)
require.NoError(t, err)
testCounter := io_prometheus_client.MetricFamily{
Type: &counterType,
Metric: []*io_prometheus_client.Metric{
&outputCounter,
},
}

outputHistogram3Buckets := io_prometheus_client.Metric{}
err = histogram3buckets.Write(&outputHistogram3Buckets)
require.NoError(t, err)
test3BucketsHistogram := io_prometheus_client.MetricFamily{
Type: &histogramType,
Metric: []*io_prometheus_client.Metric{
&outputHistogram3Buckets,
},
}

outputHistogram5Buckets := io_prometheus_client.Metric{}
err = histogram5buckets.Write(&outputHistogram5Buckets)
require.NoError(t, err)
test5BucketsGaugeHistogram := io_prometheus_client.MetricFamily{
Type: &gaugeHistogramType,
Metric: []*io_prometheus_client.Metric{
&outputHistogram5Buckets,
},
}

outputSummary := io_prometheus_client.Metric{}
err = summary.Write(&outputSummary)
require.NoError(t, err)
testSummary := io_prometheus_client.MetricFamily{
Type: &summaryType,
Metric: []*io_prometheus_client.Metric{
&outputSummary,
},
}

testcases := []struct {
name string
mf []*io_prometheus_client.MetricFamily
expectedCardinality int
}{
{
"Simple Counter",
[]*io_prometheus_client.MetricFamily{&testCounter},
1,
},
{
"3 Buckets Histogram",
[]*io_prometheus_client.MetricFamily{&test3BucketsHistogram},
6,
},
{
"5 Buckets Gauge Histogram",
[]*io_prometheus_client.MetricFamily{&test5BucketsGaugeHistogram},
8,
},
{
"Summary",
[]*io_prometheus_client.MetricFamily{&testSummary},
5,
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
testGatherer := tGatherer{
mf: tc.mf,
}
gotCardinality, err := metricsCardinality(&testGatherer)
require.NoError(t, err)
require.Equal(t, tc.expectedCardinality, gotCardinality)
})
}
}

func TestMetricsCardinality_NilHistogram(t *testing.T) {
histogramType := io_prometheus_client.MetricType_HISTOGRAM
gaugeHistogramType := io_prometheus_client.MetricType_GAUGE_HISTOGRAM
summaryType := io_prometheus_client.MetricType_SUMMARY

nilHistogram := io_prometheus_client.MetricFamily{
Type: &histogramType,
Metric: []*io_prometheus_client.Metric{
{
Histogram: nil,
},
},
}
nilGaugeHistogram := io_prometheus_client.MetricFamily{
Type: &gaugeHistogramType,
Metric: []*io_prometheus_client.Metric{
{
Histogram: nil,
},
},
}
nilSummary := io_prometheus_client.MetricFamily{
Type: &summaryType,
Metric: []*io_prometheus_client.Metric{
{
Summary: nil,
},
},
}

testGatherer := tGatherer{
mf: []*io_prometheus_client.MetricFamily{
&nilHistogram,
&nilGaugeHistogram,
&nilSummary,
},
}

t.Run("Nil Histogram", func(t *testing.T) {
m, err := metricsCardinality(&testGatherer)
require.NoError(t, err)
require.Equal(t, 0, m)
})
}

func TestTelemetryClient_StopPerf(t *testing.T) {
type fields struct {
properties map[string]string
Expand Down
Loading