Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(heartbeat): add nil check in heartbeat's metricscardinality #1293

Merged
merged 1 commit into from
Jan 31, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
fix(heartbeat): add nil check in heartbeat's metricscardinality
Signed-off-by: Alex Castilio dos Santos <[email protected]>
alexcastilio committed Jan 31, 2025

Verified

This commit was signed with the committer’s verified signature.
nsrip-dd Nick Ripley
commit ccbaf9fe77347532b3294480c33d15cd1af3b964
43 changes: 36 additions & 7 deletions pkg/telemetry/telemetry.go
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@ package telemetry

import (
"context"
"errors"
"fmt"
"maps"
"os"
@@ -18,6 +19,7 @@ import (
"github.com/microsoft/ApplicationInsights-Go/appinsights/contracts"
"github.com/microsoft/retina/pkg/exporter"
"github.com/microsoft/retina/pkg/log"
"github.com/prometheus/client_golang/prometheus"
io_prometheus_client "github.com/prometheus/client_model/go"
)

@@ -26,6 +28,8 @@ var (
version string
mbShift uint64 = 20

ErrorNilCombinedGatherer = errors.New("exporter.CombinedGatherer is nil")

// property keys
kernelversion = "kernelversion"
sysmem = "sysmem"
@@ -178,7 +182,7 @@ func (t *TelemetryClient) heartbeat(ctx context.Context) {
t.trackWarning(err, "failed to get cpu usage")
}

metricscardinality, err := metricsCardinality()
metricscardinality, err := metricsCardinality(exporter.CombinedGatherer)
if err != nil {
t.trackWarning(err, "failed to get metrics cardinality")
}
@@ -189,8 +193,12 @@ func (t *TelemetryClient) heartbeat(ctx context.Context) {
maps.Copy(props, t.profile.GetMemoryUsage())
t.TrackEvent("heartbeat", props)
}
func metricsCardinality() (int, error) {
metricFamilies, err := exporter.CombinedGatherer.Gather()
func metricsCardinality(gatherer prometheus.Gatherer) (int, error) {
if gatherer == nil {
return 0, fmt.Errorf("failed to get metrics Gatherer: %w", ErrorNilCombinedGatherer)
}

metricFamilies, err := gatherer.Gather()
if err != nil {
return 0, fmt.Errorf("failed to gather metrics: %w", err)
}
@@ -203,23 +211,44 @@ func metricsCardinality() (int, error) {
case io_prometheus_client.MetricType_HISTOGRAM:
metrics := mf.GetMetric()
for _, m := range metrics {
metricscardinality += len(m.GetHistogram().GetBucket()) + 3 // +3 for le="+Inf", _sum and _count
histogram := m.GetHistogram()
if histogram != nil {
buckets := histogram.GetBucket()
if buckets != nil {
metricscardinality += len(buckets) + 3 // +3 for le="+Inf", _sum and _count
}
}
}

case io_prometheus_client.MetricType_GAUGE_HISTOGRAM:
metrics := mf.GetMetric()
for _, m := range metrics {
metricscardinality += len(m.GetHistogram().GetBucket()) + 3 // +3 for le="+Inf", _sum and _count
histogram := m.GetHistogram()
if histogram != nil {
buckets := histogram.GetBucket()
if buckets != nil {
metricscardinality += len(buckets) + 3 // +3 for le="+Inf", _sum and _count
}
}
}

case io_prometheus_client.MetricType_SUMMARY:
metrics := mf.GetMetric()
for _, m := range metrics {
metricscardinality += len(m.GetSummary().GetQuantile()) + 2 // +2 for _sum and _count
summary := m.GetSummary()
if summary != nil {
quantiles := summary.GetQuantile()
if quantiles != nil {
metricscardinality += len(quantiles) + 2 // +2 for _sum and _count
}
}
}

default:
metricscardinality += len(mf.GetMetric())
metrics := mf.GetMetric()
if metrics != nil {
metricscardinality += len(metrics)
}

}
}
171 changes: 171 additions & 0 deletions pkg/telemetry/telemetry_test.go
Original file line number Diff line number Diff line change
@@ -11,6 +11,8 @@ import (
"sync"
"testing"

"github.com/prometheus/client_golang/prometheus"
io_prometheus_client "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/require"
)

@@ -97,6 +99,175 @@ func TestHeartbeat(t *testing.T) {
}
}

func TestMetricsCardinality_nilCombinedGatherer(t *testing.T) {
var gatherer prometheus.Gatherer

t.Run("Combined Gatherer is nil", func(t *testing.T) {
m, err := metricsCardinality(gatherer)
require.Error(t, err)
require.Equal(t, 0, m)
})
}

type tGatherer struct {
mf []*io_prometheus_client.MetricFamily
}

func (t *tGatherer) Gather() ([]*io_prometheus_client.MetricFamily, error) {
return t.mf, nil
}

func TestMetricsCardinality(t *testing.T) {
counterType := io_prometheus_client.MetricType_COUNTER
histogramType := io_prometheus_client.MetricType_HISTOGRAM
gaugeHistogramType := io_prometheus_client.MetricType_GAUGE_HISTOGRAM
summaryType := io_prometheus_client.MetricType_SUMMARY

simpleCounter := prometheus.NewCounter(prometheus.CounterOpts{ //nolint:promlinter // its a test
Name: "test_counter",
Help: "test counter",
})

histogram3buckets := prometheus.NewHistogram(prometheus.HistogramOpts{ //nolint:promlinter // its a test
Name: "3buckets_histogram",
Help: "test histogram",
Buckets: []float64{1, 2, 3},
})

histogram5buckets := prometheus.NewHistogram(prometheus.HistogramOpts{ //nolint:promlinter // its a test
Name: "5buckets_histogram",
Help: "test histogram",
Buckets: []float64{1, 2, 3, 4, 5},
})
summary := prometheus.NewSummary(prometheus.SummaryOpts{ //nolint:promlinter // its a test
Name: "test_summary",
Help: "test summary",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
})

outputCounter := io_prometheus_client.Metric{}
err := simpleCounter.Write(&outputCounter)
require.NoError(t, err)
testCounter := io_prometheus_client.MetricFamily{
Type: &counterType,
Metric: []*io_prometheus_client.Metric{
&outputCounter,
},
}

outputHistogram3Buckets := io_prometheus_client.Metric{}
err = histogram3buckets.Write(&outputHistogram3Buckets)
require.NoError(t, err)
test3BucketsHistogram := io_prometheus_client.MetricFamily{
Type: &histogramType,
Metric: []*io_prometheus_client.Metric{
&outputHistogram3Buckets,
},
}

outputHistogram5Buckets := io_prometheus_client.Metric{}
err = histogram5buckets.Write(&outputHistogram5Buckets)
require.NoError(t, err)
test5BucketsGaugeHistogram := io_prometheus_client.MetricFamily{
Type: &gaugeHistogramType,
Metric: []*io_prometheus_client.Metric{
&outputHistogram5Buckets,
},
}

outputSummary := io_prometheus_client.Metric{}
err = summary.Write(&outputSummary)
require.NoError(t, err)
testSummary := io_prometheus_client.MetricFamily{
Type: &summaryType,
Metric: []*io_prometheus_client.Metric{
&outputSummary,
},
}

testcases := []struct {
name string
mf []*io_prometheus_client.MetricFamily
expectedCardinality int
}{
{
"Simple Counter",
[]*io_prometheus_client.MetricFamily{&testCounter},
1,
},
{
"3 Buckets Histogram",
[]*io_prometheus_client.MetricFamily{&test3BucketsHistogram},
6,
},
{
"5 Buckets Gauge Histogram",
[]*io_prometheus_client.MetricFamily{&test5BucketsGaugeHistogram},
8,
},
{
"Summary",
[]*io_prometheus_client.MetricFamily{&testSummary},
5,
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
testGatherer := tGatherer{
mf: tc.mf,
}
gotCardinality, err := metricsCardinality(&testGatherer)
require.NoError(t, err)
require.Equal(t, tc.expectedCardinality, gotCardinality)
})
}
}

func TestMetricsCardinality_NilHistogram(t *testing.T) {
histogramType := io_prometheus_client.MetricType_HISTOGRAM
gaugeHistogramType := io_prometheus_client.MetricType_GAUGE_HISTOGRAM
summaryType := io_prometheus_client.MetricType_SUMMARY

nilHistogram := io_prometheus_client.MetricFamily{
Type: &histogramType,
Metric: []*io_prometheus_client.Metric{
{
Histogram: nil,
},
},
}
nilGaugeHistogram := io_prometheus_client.MetricFamily{
Type: &gaugeHistogramType,
Metric: []*io_prometheus_client.Metric{
{
Histogram: nil,
},
},
}
nilSummary := io_prometheus_client.MetricFamily{
Type: &summaryType,
Metric: []*io_prometheus_client.Metric{
{
Summary: nil,
},
},
}

testGatherer := tGatherer{
mf: []*io_prometheus_client.MetricFamily{
&nilHistogram,
&nilGaugeHistogram,
&nilSummary,
},
}

t.Run("Nil Histogram", func(t *testing.T) {
m, err := metricsCardinality(&testGatherer)
require.NoError(t, err)
require.Equal(t, 0, m)
})
}

func TestTelemetryClient_StopPerf(t *testing.T) {
type fields struct {
properties map[string]string