diff --git a/.chloggen/prometheus-metric-types-help.yaml b/.chloggen/prometheus-metric-types-help.yaml new file mode 100644 index 000000000000..b8167777213a --- /dev/null +++ b/.chloggen/prometheus-metric-types-help.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: prometheusexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: reject metrics whose types have changed, use pre-existing descriptions when help strings change + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [28617] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/exporter/prometheusexporter/collector.go b/exporter/prometheusexporter/collector.go index be676ac7c9a5..42035add2fbc 100644 --- a/exporter/prometheusexporter/collector.go +++ b/exporter/prometheusexporter/collector.go @@ -7,13 +7,17 @@ import ( "encoding/hex" "fmt" "sort" + "sync" + "time" "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/model" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" conventions "go.opentelemetry.io/collector/semconv/v1.25.0" "go.uber.org/zap" + "google.golang.org/protobuf/proto" prometheustranslator "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus" ) @@ -30,6 +34,13 @@ type collector struct { addMetricSuffixes bool namespace string constLabels prometheus.Labels + metricFamilies sync.Map + metricExpiration time.Duration +} + +type metricFamily struct { + lastSeen time.Time + mf *dto.MetricFamily } func newCollector(config *Config, logger *zap.Logger) *collector { @@ -40,6 +51,7 @@ func newCollector(config *Config, logger *zap.Logger) *collector { sendTimestamps: config.SendTimestamps, constLabels: config.ConstLabels, addMetricSuffixes: config.AddMetricSuffixes, + metricExpiration: config.MetricExpiration, } } @@ -104,7 +116,13 @@ func (c *collector) convertMetric(metric pmetric.Metric, resourceAttrs pcommon.M return nil, errUnknownMetricType } -func (c *collector) getMetricMetadata(metric pmetric.Metric, attributes pcommon.Map, resourceAttrs pcommon.Map) (*prometheus.Desc, []string) { +func (c *collector) getMetricMetadata(metric pmetric.Metric, mType *dto.MetricType, attributes pcommon.Map, resourceAttrs pcommon.Map) (*prometheus.Desc, []string, error) { + name := prometheustranslator.BuildCompliantName(metric, c.namespace, c.addMetricSuffixes) + help, err := c.validateMetrics(name, metric.Description(), mType) + if err != nil { + return nil, nil, err + } + keys := make([]string, 0, attributes.Len()+2) // +2 for job and instance labels. values := make([]string, 0, attributes.Len()+2) @@ -123,18 +141,17 @@ func (c *collector) getMetricMetadata(metric pmetric.Metric, attributes pcommon. values = append(values, instance) } - return prometheus.NewDesc( - prometheustranslator.BuildCompliantName(metric, c.namespace, c.addMetricSuffixes), - metric.Description(), - keys, - c.constLabels, - ), values + return prometheus.NewDesc(name, help, keys, c.constLabels), values, nil } func (c *collector) convertGauge(metric pmetric.Metric, resourceAttrs pcommon.Map) (prometheus.Metric, error) { ip := metric.Gauge().DataPoints().At(0) - desc, attributes := c.getMetricMetadata(metric, ip.Attributes(), resourceAttrs) + desc, attributes, err := c.getMetricMetadata(metric, dto.MetricType_GAUGE.Enum(), ip.Attributes(), resourceAttrs) + if err != nil { + return nil, err + } + var value float64 switch ip.ValueType() { case pmetric.NumberDataPointValueTypeInt: @@ -162,11 +179,16 @@ func (c *collector) convertSum(metric pmetric.Metric, resourceAttrs pcommon.Map) ip := metric.Sum().DataPoints().At(0) metricType := prometheus.GaugeValue + mType := dto.MetricType_GAUGE.Enum() if metric.Sum().IsMonotonic() { metricType = prometheus.CounterValue + mType = dto.MetricType_COUNTER.Enum() } - desc, attributes := c.getMetricMetadata(metric, ip.Attributes(), resourceAttrs) + desc, attributes, err := c.getMetricMetadata(metric, mType, ip.Attributes(), resourceAttrs) + if err != nil { + return nil, err + } var value float64 switch ip.ValueType() { case pmetric.NumberDataPointValueTypeInt: @@ -182,7 +204,6 @@ func (c *collector) convertSum(metric pmetric.Metric, resourceAttrs pcommon.Map) } var m prometheus.Metric - var err error if metricType == prometheus.CounterValue && ip.StartTimestamp().AsTime().Unix() > 0 { m, err = prometheus.NewConstMetricWithCreatedTimestamp(desc, metricType, value, ip.StartTimestamp().AsTime(), attributes...) } else { @@ -218,9 +239,11 @@ func (c *collector) convertSummary(metric pmetric.Metric, resourceAttrs pcommon. quantiles[qvj.Quantile()] = qvj.Value() } - desc, attributes := c.getMetricMetadata(metric, point.Attributes(), resourceAttrs) + desc, attributes, err := c.getMetricMetadata(metric, dto.MetricType_SUMMARY.Enum(), point.Attributes(), resourceAttrs) + if err != nil { + return nil, err + } var m prometheus.Metric - var err error if point.StartTimestamp().AsTime().Unix() > 0 { m, err = prometheus.NewConstSummaryWithCreatedTimestamp(desc, point.Count(), point.Sum(), quantiles, point.StartTimestamp().AsTime(), attributes...) } else { @@ -237,7 +260,10 @@ func (c *collector) convertSummary(metric pmetric.Metric, resourceAttrs pcommon. func (c *collector) convertDoubleHistogram(metric pmetric.Metric, resourceAttrs pcommon.Map) (prometheus.Metric, error) { ip := metric.Histogram().DataPoints().At(0) - desc, attributes := c.getMetricMetadata(metric, ip.Attributes(), resourceAttrs) + desc, attributes, err := c.getMetricMetadata(metric, dto.MetricType_HISTOGRAM.Enum(), ip.Attributes(), resourceAttrs) + if err != nil { + return nil, err + } indicesMap := make(map[float64]int) buckets := make([]float64, 0, ip.BucketCounts().Len()) @@ -266,7 +292,6 @@ func (c *collector) convertDoubleHistogram(metric pmetric.Metric, resourceAttrs exemplars := convertExemplars(ip.Exemplars()) var m prometheus.Metric - var err error if ip.StartTimestamp().AsTime().Unix() > 0 { m, err = prometheus.NewConstHistogramWithCreatedTimestamp(desc, ip.Count(), ip.Sum(), points, ip.StartTimestamp().AsTime(), attributes...) } else { @@ -403,4 +428,50 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { ch <- m c.logger.Debug(fmt.Sprintf("metric served: %s", m.Desc().String())) } + c.cleanupMetricFamilies() +} + +func (c *collector) validateMetrics(name, description string, metricType *dto.MetricType) (help string, err error) { + now := time.Now() + v, exist := c.metricFamilies.Load(name) + if !exist { + c.metricFamilies.Store(name, metricFamily{ + lastSeen: now, + mf: &dto.MetricFamily{ + Name: proto.String(name), + Help: proto.String(description), + Type: metricType, + }, + }) + return description, nil + } + emf := v.(metricFamily) + if emf.mf.GetType() != *metricType { + return "", fmt.Errorf("instrument type conflict, using existing type definition. instrument: %s, existing: %s, dropped: %s", name, emf.mf.GetType(), *metricType) + } + emf.lastSeen = now + c.metricFamilies.Store(name, emf) + if emf.mf.GetHelp() != description { + c.logger.Info( + "Instrument description conflict, using existing", + zap.String("instrument", name), + zap.String("existing", emf.mf.GetHelp()), + zap.String("dropped", description), + ) + } + return emf.mf.GetHelp(), nil +} + +func (c *collector) cleanupMetricFamilies() { + expirationTime := time.Now().Add(-c.metricExpiration) + + c.metricFamilies.Range(func(key, value any) bool { + v := value.(metricFamily) + if expirationTime.After(v.lastSeen) { + c.logger.Debug("metric expired", zap.String("instrument", key.(string))) + c.metricFamilies.Delete(key) + return true + } + return true + }) } diff --git a/exporter/prometheusexporter/collector_test.go b/exporter/prometheusexporter/collector_test.go index 9b5d31d7efdb..f1b87d9fe10b 100644 --- a/exporter/prometheusexporter/collector_test.go +++ b/exporter/prometheusexporter/collector_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" io_prometheus_client "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" @@ -17,6 +18,7 @@ import ( conventions "go.opentelemetry.io/collector/semconv/v1.25.0" "go.uber.org/zap" "go.uber.org/zap/zapcore" + "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/timestamppb" prometheustranslator "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus" @@ -66,25 +68,85 @@ func TestConvertInvalidDataType(t *testing.T) { } } -func TestConvertInvalidMetric(t *testing.T) { - for _, mType := range []pmetric.MetricType{ - pmetric.MetricTypeHistogram, - pmetric.MetricTypeSum, - pmetric.MetricTypeGauge, - } { - metric := pmetric.NewMetric() - switch mType { - case pmetric.MetricTypeGauge: - metric.SetEmptyGauge().DataPoints().AppendEmpty() - case pmetric.MetricTypeSum: - metric.SetEmptySum().DataPoints().AppendEmpty() - case pmetric.MetricTypeHistogram: - metric.SetEmptyHistogram().DataPoints().AppendEmpty() - } - c := collector{} +func TestConvertMetric(t *testing.T) { + tests := []struct { + description string + mName string + mType pmetric.MetricType + mapVals map[string]metricFamily + err bool + }{ + { + description: "invalid histogram metric", + mType: pmetric.MetricTypeHistogram, + err: true, + }, + { + description: "invalid sum metric", + mType: pmetric.MetricTypeSum, + err: true, + }, + { + description: "invalid gauge metric", + mType: pmetric.MetricTypeGauge, + err: true, + }, + { + description: "metric type conflict", + mName: "testgauge", + mType: pmetric.MetricTypeGauge, + mapVals: map[string]metricFamily{ + "testgauge": { + mf: &io_prometheus_client.MetricFamily{ + Name: proto.String("testgauge"), + Type: dto.MetricType_COUNTER.Enum(), + }, + }, + }, + err: true, + }, + { + description: "metric description conflict", + mName: "testgauge", + mType: pmetric.MetricTypeGauge, + mapVals: map[string]metricFamily{ + "testgauge": { + mf: &io_prometheus_client.MetricFamily{ + Name: proto.String("testgauge"), + Type: dto.MetricType_GAUGE.Enum(), + Help: proto.String("test help value"), + }, + }, + }, + err: false, + }, + } + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + metric := pmetric.NewMetric() + metric.SetName(tt.mName) + switch tt.mType { + case pmetric.MetricTypeGauge: + metric.SetEmptyGauge().DataPoints().AppendEmpty() + case pmetric.MetricTypeSum: + metric.SetEmptySum().DataPoints().AppendEmpty() + case pmetric.MetricTypeHistogram: + metric.SetEmptyHistogram().DataPoints().AppendEmpty() + } + c := collector{ + logger: zap.NewNop(), + } + for k, v := range tt.mapVals { + c.metricFamilies.Store(k, v) + } - _, err := c.convertMetric(metric, pcommon.NewMap()) - require.Error(t, err) + _, err := c.convertMetric(metric, pcommon.NewMap()) + if tt.err { + require.Error(t, err) + return + } + require.NoError(t, err) + }) } }