From 5338888062f4e9ca3ac2d807130c48c70a68807d Mon Sep 17 00:00:00 2001 From: keisku Date: Mon, 17 Jun 2024 02:46:42 +0000 Subject: [PATCH] remove tag number limitation of ksm aggregator --- .../cluster/ksm/kubernetes_state.go | 2 +- .../ksm/kubernetes_state_aggregators.go | 241 +-- .../ksm/kubernetes_state_aggregators_test.go | 1474 ++++++++++++++--- .../cluster/ksm/kubernetes_state_defaults.go | 149 +- .../cluster/ksm/kubernetes_state_test.go | 28 - ...gregator-metric-tags-d941cee0ebe64b1f.yaml | 13 + 6 files changed, 1478 insertions(+), 429 deletions(-) create mode 100644 releasenotes/notes/expand-aggregator-metric-tags-d941cee0ebe64b1f.yaml diff --git a/pkg/collector/corechecks/cluster/ksm/kubernetes_state.go b/pkg/collector/corechecks/cluster/ksm/kubernetes_state.go index a071948f460f0..c2fa9a4f3b037 100644 --- a/pkg/collector/corechecks/cluster/ksm/kubernetes_state.go +++ b/pkg/collector/corechecks/cluster/ksm/kubernetes_state.go @@ -534,7 +534,7 @@ func (k *KSMCheck) processMetrics(sender sender.Sender, metrics map[string][]ksm // First check for aggregator, because the check use _labels metrics to aggregate values. if aggregator, found := k.metricAggregators[metricFamily.Name]; found { for _, m := range metricFamily.ListMetrics { - aggregator.accumulate(m) + aggregator.accumulate(m, labelJoiner) } // Some metrics can be aggregated and consumed as-is or by a transformer. // So, let’s continue the processing. diff --git a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_aggregators.go b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_aggregators.go index 5a74e88cd2e51..09955cee74d9f 100644 --- a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_aggregators.go +++ b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_aggregators.go @@ -8,7 +8,10 @@ package ksm import ( + "cmp" "fmt" + "slices" + "strings" "github.com/DataDog/datadog-agent/pkg/aggregator/sender" ksmstore "github.com/DataDog/datadog-agent/pkg/kubestatemetrics/store" @@ -18,23 +21,61 @@ import ( ) type metricAggregator interface { - accumulate(ksmstore.DDMetric) + accumulate(ksmstore.DDMetric, *labelJoiner) flush(sender.Sender, *KSMCheck, *labelJoiner) } -// maxNumberOfAllowedLabels contains the maximum number of labels that can be used to aggregate metrics. -// The only reason why there is a maximum is because the `accumulator` map is indexed on the label values -// and GO accepts arrays as valid map key type, but not slices. -// This hard coded limit is fine because the metrics to aggregate and the label list to use are hardcoded -// in the code and cannot be arbitrarily set by the end-user. -const maxNumberOfAllowedLabels = 4 +const accumulateDelimiter = "|" + +// accumulateKey is a key used to accumulate metrics in the aggregator. +// Go does not allow slices or map as valid map keys, so we make string instaed of slice of label keys and values. +// E.g., for labels: {key:"a",value:"1"}, {key:"b",value:"2"}, {key:"c",value:"3"}, the accumulate key will be "a|b|c" and "1|2|3". +type accumulateKey struct { + keys string + values string +} + +func makeAccumulateKey(labels []label) accumulateKey { + keys := make([]string, len(labels)) + vals := make([]string, len(labels)) + // Sort labels to ensure the order is deterministic. + slices.SortFunc(labels, func(a, b label) int { + v := cmp.Compare(a.key, b.key) + if v == 0 { + return cmp.Compare(a.value, b.value) + } + return v + }) + for i, l := range labels { + keys[i] = l.key + vals[i] = l.value + } + return accumulateKey{ + keys: strings.Join(keys, accumulateDelimiter), + values: strings.Join(vals, accumulateDelimiter), + } +} + +func (a accumulateKey) labels() map[string]string { + keys := strings.Split(a.keys, accumulateDelimiter) + values := strings.Split(a.values, accumulateDelimiter) + if len(keys) != len(values) { + log.Errorf("BUG in KSM metric aggregator: keys and values have different lengths") + return nil + } + labels := make(map[string]string, len(keys)) + for i := range keys { + labels[keys[i]] = values[i] + } + return labels + +} type counterAggregator struct { ddMetricName string ksmMetricName string - allowedLabels []string - accumulator map[[maxNumberOfAllowedLabels]string]float64 + accumulator map[accumulateKey]float64 } type sumValuesAggregator struct { @@ -51,15 +92,9 @@ type resourceAggregator struct { ddMetricPrefix string ddMetricSuffix string ksmMetricName string - allowedLabels []string allowedResources []string - accumulators map[string]map[[maxNumberOfAllowedLabels]string]float64 -} - -type cronJob struct { - namespace string - name string + accumulators map[string]map[accumulateKey]float64 } type cronJobState struct { @@ -68,7 +103,7 @@ type cronJobState struct { } type lastCronJobAggregator struct { - accumulator map[cronJob]cronJobState + accumulator map[accumulateKey]cronJobState } type lastCronJobCompleteAggregator struct { @@ -79,60 +114,36 @@ type lastCronJobFailedAggregator struct { aggregator *lastCronJobAggregator } -func newSumValuesAggregator(ddMetricName, ksmMetricName string, allowedLabels []string) metricAggregator { - if len(allowedLabels) > maxNumberOfAllowedLabels { - // `maxNumberOfAllowedLabels` is hardcoded to the maximum number of labels passed to this function from the metricsAggregators definition below. - // The only possibility to arrive here is to add a new aggregator in `metricAggregator` below and to forget to update `maxNumberOfAllowedLabels` accordingly. - log.Error("BUG in KSM metric aggregator") - return nil - } - +func newSumValuesAggregator(ddMetricName, ksmMetricName string) metricAggregator { return &sumValuesAggregator{ counterAggregator{ ddMetricName: ddMetricName, ksmMetricName: ksmMetricName, - allowedLabels: allowedLabels, - accumulator: make(map[[maxNumberOfAllowedLabels]string]float64), + accumulator: make(map[accumulateKey]float64), }, } } -func newCountObjectsAggregator(ddMetricName, ksmMetricName string, allowedLabels []string) metricAggregator { - if len(allowedLabels) > maxNumberOfAllowedLabels { - // `maxNumberOfAllowedLabels` is hardcoded to the maximum number of labels passed to this function from the metricsAggregators definition below. - // The only possibility to arrive here is to add a new aggregator in `metricAggregator` below and to forget to update `maxNumberOfAllowedLabels` accordingly. - log.Error("BUG in KSM metric aggregator") - return nil - } - +func newCountObjectsAggregator(ddMetricName, ksmMetricName string) metricAggregator { return &countObjectsAggregator{ counterAggregator{ ddMetricName: ddMetricName, ksmMetricName: ksmMetricName, - allowedLabels: allowedLabels, - accumulator: make(map[[maxNumberOfAllowedLabels]string]float64), + accumulator: make(map[accumulateKey]float64), }, } } -func newResourceValuesAggregator(ddMetricPrefix, ddMetricSuffix, ksmMetricName string, allowedLabels, allowedResources []string) metricAggregator { - if len(allowedLabels) > maxNumberOfAllowedLabels { - // `maxNumberOfAllowedLabels` is hardcoded to the maximum number of labels passed to this function from the metricsAggregators definition below. - // The only possibility to arrive here is to add a new aggregator in `metricAggregator` below and to forget to update `maxNumberOfAllowedLabels` accordingly. - log.Error("BUG in KSM metric aggregator") - return nil - } - - accumulators := make(map[string]map[[maxNumberOfAllowedLabels]string]float64) +func newResourceValuesAggregator(ddMetricPrefix, ddMetricSuffix, ksmMetricName string, allowedResources []string) metricAggregator { + accumulators := make(map[string]map[accumulateKey]float64) for _, allowedResource := range allowedResources { - accumulators[allowedResource] = make(map[[maxNumberOfAllowedLabels]string]float64) + accumulators[allowedResource] = make(map[accumulateKey]float64) } return &resourceAggregator{ ddMetricPrefix: ddMetricPrefix, ddMetricSuffix: ddMetricSuffix, ksmMetricName: ksmMetricName, - allowedLabels: allowedLabels, allowedResources: allowedResources, accumulators: accumulators, } @@ -140,69 +151,40 @@ func newResourceValuesAggregator(ddMetricPrefix, ddMetricSuffix, ksmMetricName s func newLastCronJobAggregator() *lastCronJobAggregator { return &lastCronJobAggregator{ - accumulator: make(map[cronJob]cronJobState), + accumulator: make(map[accumulateKey]cronJobState), } } -func (a *sumValuesAggregator) accumulate(metric ksmstore.DDMetric) { - var labelValues [maxNumberOfAllowedLabels]string - - for i, allowedLabel := range a.allowedLabels { - if allowedLabel == "" { - break - } - - labelValues[i] = metric.Labels[allowedLabel] - } - - a.accumulator[labelValues] += metric.Val +func (a *sumValuesAggregator) accumulate(metric ksmstore.DDMetric, lj *labelJoiner) { + a.accumulator[makeAccumulateKey(lj.getLabelsToAdd(metric.Labels))] += metric.Val } -func (a *countObjectsAggregator) accumulate(metric ksmstore.DDMetric) { - var labelValues [maxNumberOfAllowedLabels]string - - for i, allowedLabel := range a.allowedLabels { - if allowedLabel == "" { - break - } - - labelValues[i] = metric.Labels[allowedLabel] - } - - a.accumulator[labelValues]++ +func (a *countObjectsAggregator) accumulate(metric ksmstore.DDMetric, lj *labelJoiner) { + a.accumulator[makeAccumulateKey(lj.getLabelsToAdd(metric.Labels))]++ } -func (a *resourceAggregator) accumulate(metric ksmstore.DDMetric) { +func (a *resourceAggregator) accumulate(metric ksmstore.DDMetric, lj *labelJoiner) { resource := renameResource(metric.Labels["resource"]) if _, ok := a.accumulators[resource]; !ok { return } - var labelValues [maxNumberOfAllowedLabels]string - - for i, allowedLabel := range a.allowedLabels { - if allowedLabel == "" { - break - } - - labelValues[i] = metric.Labels[allowedLabel] - } - + ls := lj.getLabelsToAdd(metric.Labels) if _, ok := a.accumulators[resource]; ok { - a.accumulators[resource][labelValues] += metric.Val + a.accumulators[resource][makeAccumulateKey(ls)] += metric.Val } } -func (a *lastCronJobCompleteAggregator) accumulate(metric ksmstore.DDMetric) { - a.aggregator.accumulate(metric, servicecheck.ServiceCheckOK) +func (a *lastCronJobCompleteAggregator) accumulate(metric ksmstore.DDMetric, lj *labelJoiner) { + a.aggregator.accumulate(metric, servicecheck.ServiceCheckOK, lj) } -func (a *lastCronJobFailedAggregator) accumulate(metric ksmstore.DDMetric) { - a.aggregator.accumulate(metric, servicecheck.ServiceCheckCritical) +func (a *lastCronJobFailedAggregator) accumulate(metric ksmstore.DDMetric, lj *labelJoiner) { + a.aggregator.accumulate(metric, servicecheck.ServiceCheckCritical, lj) } -func (a *lastCronJobAggregator) accumulate(metric ksmstore.DDMetric, state servicecheck.ServiceCheckStatus) { +func (a *lastCronJobAggregator) accumulate(metric ksmstore.DDMetric, state servicecheck.ServiceCheckStatus, lj *labelJoiner) { if condition, found := metric.Labels["condition"]; !found || condition != "true" { return } @@ -210,7 +192,7 @@ func (a *lastCronJobAggregator) accumulate(metric ksmstore.DDMetric, state servi return } - namespace, found := metric.Labels["namespace"] + _, found := metric.Labels["namespace"] if !found { return } @@ -226,8 +208,9 @@ func (a *lastCronJobAggregator) accumulate(metric ksmstore.DDMetric, state servi return } - if lastCronJob, found := a.accumulator[cronJob{namespace: namespace, name: cronjobName}]; !found || lastCronJob.id < id { - a.accumulator[cronJob{namespace: namespace, name: cronjobName}] = cronJobState{ + key := makeAccumulateKey(lj.getLabelsToAdd(metric.Labels)) + if lastCronJob, found := a.accumulator[key]; !found || lastCronJob.id < id { + a.accumulator[key] = cronJobState{ id: id, state: state, } @@ -235,42 +218,21 @@ func (a *lastCronJobAggregator) accumulate(metric ksmstore.DDMetric, state servi } func (a *counterAggregator) flush(sender sender.Sender, k *KSMCheck, labelJoiner *labelJoiner) { - for labelValues, count := range a.accumulator { - - labels := make(map[string]string) - for i, allowedLabel := range a.allowedLabels { - if allowedLabel == "" { - break - } - - labels[allowedLabel] = labelValues[i] - } - - hostname, tags := k.hostnameAndTags(labels, labelJoiner, labelsMapperOverride(a.ksmMetricName)) - + for accumulatorKey, count := range a.accumulator { + hostname, tags := k.hostnameAndTags(accumulatorKey.labels(), labelJoiner, labelsMapperOverride(a.ksmMetricName)) sender.Gauge(ksmMetricPrefix+a.ddMetricName, count, hostname, tags) } - - a.accumulator = make(map[[maxNumberOfAllowedLabels]string]float64) + a.accumulator = make(map[accumulateKey]float64) } func (a *resourceAggregator) flush(sender sender.Sender, k *KSMCheck, labelJoiner *labelJoiner) { for _, resource := range a.allowedResources { metricName := fmt.Sprintf("%s%s.%s_%s", ksmMetricPrefix, a.ddMetricPrefix, resource, a.ddMetricSuffix) - for labelValues, count := range a.accumulators[resource] { - labels := make(map[string]string) - for i, allowedLabel := range a.allowedLabels { - if allowedLabel == "" { - break - } - - labels[allowedLabel] = labelValues[i] - } - - hostname, tags := k.hostnameAndTags(labels, labelJoiner, labelsMapperOverride(a.ksmMetricName)) + for key, count := range a.accumulators[resource] { + hostname, tags := k.hostnameAndTags(key.labels(), labelJoiner, labelsMapperOverride(a.ksmMetricName)) sender.Gauge(metricName, count, hostname, tags) } - a.accumulators[resource] = make(map[[maxNumberOfAllowedLabels]string]float64) + a.accumulators[resource] = make(map[accumulateKey]float64) } } @@ -283,20 +245,12 @@ func (a *lastCronJobFailedAggregator) flush(sender sender.Sender, k *KSMCheck, l } func (a *lastCronJobAggregator) flush(sender sender.Sender, k *KSMCheck, labelJoiner *labelJoiner) { - for cronjob, state := range a.accumulator { - hostname, tags := k.hostnameAndTags( - map[string]string{ - "namespace": cronjob.namespace, - "cronjob": cronjob.name, - }, - labelJoiner, - nil, - ) - + for accumulatorKey, state := range a.accumulator { + hostname, tags := k.hostnameAndTags(accumulatorKey.labels(), labelJoiner, map[string]string{"job_name": "cronjob"}) sender.ServiceCheck(ksmMetricPrefix+"cronjob.complete", state.state, hostname, tags, "") } - a.accumulator = make(map[cronJob]cronJobState) + a.accumulator = make(map[accumulateKey]cronJobState) } func defaultMetricAggregators() map[string]metricAggregator { @@ -306,97 +260,78 @@ func defaultMetricAggregators() map[string]metricAggregator { "kube_configmap_info": newCountObjectsAggregator( "configmap.count", "kube_configmap_info", - []string{"namespace"}, ), "kube_secret_info": newCountObjectsAggregator( "secret.count", "kube_secret_info", - []string{"namespace"}, ), "kube_apiservice_labels": newCountObjectsAggregator( "apiservice.count", "kube_apiservice_labels", - []string{}, ), "kube_customresourcedefinition_labels": newCountObjectsAggregator( "crd.count", "kube_customresourcedefinition_labels", - []string{}, ), "kube_persistentvolume_status_phase": newSumValuesAggregator( "persistentvolumes.by_phase", "kube_persistentvolume_status_phase", - []string{"storageclass", "phase"}, ), "kube_service_spec_type": newCountObjectsAggregator( "service.count", "kube_service_spec_type", - []string{"namespace", "type"}, ), "kube_namespace_status_phase": newSumValuesAggregator( "namespace.count", "kube_namespace_status_phase", - []string{"phase"}, ), "kube_replicaset_owner": newCountObjectsAggregator( "replicaset.count", "kube_replicaset_owner", - []string{"namespace", "owner_name", "owner_kind"}, ), "kube_job_owner": newCountObjectsAggregator( "job.count", "kube_job_owner", - []string{"namespace", "owner_name", "owner_kind"}, ), "kube_deployment_labels": newCountObjectsAggregator( "deployment.count", "kube_deployment_labels", - []string{"namespace"}, ), "kube_daemonset_labels": newCountObjectsAggregator( "daemonset.count", "kube_daemonset_labels", - []string{"namespace"}, ), "kube_statefulset_labels": newCountObjectsAggregator( "statefulset.count", "kube_statefulset_labels", - []string{"namespace"}, ), "kube_cronjob_labels": newCountObjectsAggregator( "cronjob.count", "kube_cronjob_labels", - []string{"namespace"}, ), "kube_endpoint_labels": newCountObjectsAggregator( "endpoint.count", "kube_endpoint_labels", - []string{"namespace"}, ), "kube_horizontalpodautoscaler_labels": newCountObjectsAggregator( "hpa.count", "kube_horizontalpodautoscaler_labels", - []string{"namespace"}, ), "kube_verticalpodautoscaler_labels": newCountObjectsAggregator( "vpa.count", "kube_verticalpodautoscaler_labels", - []string{"namespace"}, ), "kube_node_info": newCountObjectsAggregator( "node.count", "kube_node_info", - []string{"kubelet_version", "container_runtime_version", "kernel_version", "os_image"}, ), "kube_pod_info": newCountObjectsAggregator( "pod.count", "kube_pod_info", - []string{"node", "namespace", "created_by_kind", "created_by_name"}, ), "kube_ingress_labels": newCountObjectsAggregator( "ingress.count", "kube_ingress_labels", - []string{"namespace"}, ), "kube_job_complete": &lastCronJobCompleteAggregator{aggregator: cronJobAggregator}, "kube_job_failed": &lastCronJobFailedAggregator{aggregator: cronJobAggregator}, @@ -404,29 +339,25 @@ func defaultMetricAggregators() map[string]metricAggregator { "node", "allocatable.total", "kube_node_status_allocatable", - []string{}, []string{"cpu", "memory", "gpu", "mig"}, ), "kube_node_status_capacity": newResourceValuesAggregator( "node", "capacity.total", "kube_node_status_capacity", - []string{}, []string{"cpu", "memory", "gpu", "mig"}, ), "kube_pod_container_resource_with_owner_tag_requests": newResourceValuesAggregator( "container", "requested.total", "kube_pod_container_resource_with_owner_tag_requests", - []string{"namespace", "container", "owner_name", "owner_kind"}, []string{"cpu", "memory"}, ), "kube_pod_container_resource_with_owner_tag_limits": newResourceValuesAggregator( "container", "limit.total", "kube_pod_container_resource_with_owner_tag_limits", - []string{"namespace", "container", "owner_name", "owner_kind"}, - []string{"cpu", "memory", "gpu", "mig"}, + []string{"cpu", "memory"}, ), } } diff --git a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_aggregators_test.go b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_aggregators_test.go index ac7017e2348bc..075d558339559 100644 --- a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_aggregators_test.go +++ b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_aggregators_test.go @@ -9,6 +9,7 @@ package ksm import ( "testing" + "time" "github.com/DataDog/datadog-agent/pkg/aggregator/mocksender" core "github.com/DataDog/datadog-agent/pkg/collector/corechecks" @@ -18,299 +19,1344 @@ import ( var _ metricAggregator = &sumValuesAggregator{} var _ metricAggregator = &countObjectsAggregator{} +var _ metricAggregator = &resourceAggregator{} var _ metricAggregator = &lastCronJobCompleteAggregator{} var _ metricAggregator = &lastCronJobFailedAggregator{} -func Test_counterAggregator(t *testing.T) { +func Test_MetricAggregators(t *testing.T) { tests := []struct { - name string - ddMetricName string - allowedLabels []string - metrics []ksmstore.DDMetric - expected []metricsExpected + name string + labelsAsTags map[string]map[string]string + ddMetricsFams []ksmstore.DDMetricsFam + expectedMetrics []metricsExpected + expectedServiceChecks []serviceCheck }{ { - name: "One allowed label", - ddMetricName: "my.count", - allowedLabels: []string{"foo"}, - metrics: []ksmstore.DDMetric{ - { - Labels: map[string]string{ - "foo": "foo1", - "bar": "bar1", - }, - Val: 1, + name: "sumValuesAggregator aggregates namespace.count", + labelsAsTags: map[string]map[string]string{ + "namespace": { + "test_label_1": "tag1", + }, + "pod": { + "test_label_2": "tag2", }, + }, + ddMetricsFams: []ksmstore.DDMetricsFam{ { - Labels: map[string]string{ - "foo": "foo1", - "bar": "bar2", - }, - Val: 2, + Name: "kube_namespace_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "namespace": "default", + "label_test_label_1": "value1", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "foo": "foo2", - "bar": "bar1", - }, - Val: 4, + Name: "kube_namespace_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "namespace": "default", + "phase": "Active", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "foo": "foo2", - "bar": "bar2", - }, - Val: 8, + Name: "kube_namespace_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "namespace": "test1", + "phase": "Active", + }, + Val: 1, + }}, + }, + { + Name: "kube_namespace_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "namespace": "test2", + "phase": "Active", + }, + Val: 1, + }}, }, }, - expected: []metricsExpected{ + expectedMetrics: []metricsExpected{ { - name: "kubernetes_state.my.count", - val: 1 + 2, - tags: []string{"foo:foo1"}, + val: 1, + name: "kubernetes_state.namespace.count", + tags: []string{"phase:Active", "tag1:value1"}, + hostname: "", }, { - name: "kubernetes_state.my.count", - val: 4 + 8, - tags: []string{"foo:foo2"}, + val: 2, + name: "kubernetes_state.namespace.count", + tags: []string{"phase:Active"}, + hostname: "", }, }, }, { - name: "Two allowed labels", - ddMetricName: "my.count", - allowedLabels: []string{"foo", "bar"}, - metrics: []ksmstore.DDMetric{ - { - Labels: map[string]string{ - "foo": "foo1", - "bar": "bar1", - "baz": "baz1", - }, - Val: 1, + name: "sumValuesAggregator aggregates persistentvolumes.by_phase", + labelsAsTags: map[string]map[string]string{ + "persistentvolumes": { + "test_label_1": "tag1", }, + "persistentvolume": { + "test_label_2": "tag2", + }, + "node": { + "test_label_3": "tag3", + }, + }, + ddMetricsFams: []ksmstore.DDMetricsFam{ { - Labels: map[string]string{ - "foo": "foo1", - "bar": "bar1", - "baz": "baz2", - }, - Val: 2, + Name: "kube_persistentvolume_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "persistentvolume": "pv-available-1", + "label_tags_datadoghq_com_env": "test", + "label_test_label_1": "value1", + "label_test_label_2": "value2", + "label_test_label_3": "value3", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "foo": "foo1", - "bar": "bar2", - "baz": "baz1", - }, - Val: 4, + Name: "kube_persistentvolume_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "persistentvolume": "pv-available-1", + "phase": "Available", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "foo": "foo1", - "bar": "bar2", - "baz": "baz2", - }, - Val: 8, + Name: "kube_persistentvolume_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "persistentvolume": "pv-available-2", + "phase": "Available", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "foo": "foo2", - "bar": "bar1", - "baz": "baz1", - }, - Val: 16, + Name: "kube_persistentvolume_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "persistentvolume": "pv-pending-1", + "phase": "Pending", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "foo": "foo2", - "bar": "bar1", - "baz": "baz2", - }, - Val: 32, + Name: "kube_persistentvolume_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "persistentvolume": "pv-released-1", + "phase": "Released", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "foo": "foo2", - "bar": "bar2", - "baz": "baz1", - }, - Val: 64, + Name: "kube_persistentvolume_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "persistentvolume": "pv-available-3", + "phase": "Available", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "foo": "foo2", - "bar": "bar2", - "baz": "baz2", - }, - Val: 128, + Name: "kube_persistentvolume_status_phase", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "persistentvolume": "pv-released-2", + "phase": "Released", + }, + Val: 1, + }}, }, }, - expected: []metricsExpected{ + expectedMetrics: []metricsExpected{ { - name: "kubernetes_state.my.count", - val: 1 + 2, - tags: []string{"foo:foo1", "bar:bar1"}, + val: 1, + name: "kubernetes_state.persistentvolumes.by_phase", + tags: []string{"phase:Available", "tag2:value2", "env:test"}, + hostname: "", }, { - name: "kubernetes_state.my.count", - val: 4 + 8, - tags: []string{"foo:foo1", "bar:bar2"}, + val: 2, + name: "kubernetes_state.persistentvolumes.by_phase", + tags: []string{"phase:Available"}, + hostname: "", }, { - name: "kubernetes_state.my.count", - val: 16 + 32, - tags: []string{"foo:foo2", "bar:bar1"}, + val: 1, + name: "kubernetes_state.persistentvolumes.by_phase", + tags: []string{"phase:Pending"}, + hostname: "", }, { - name: "kubernetes_state.my.count", - val: 64 + 128, - tags: []string{"foo:foo2", "bar:bar2"}, + val: 2, + name: "kubernetes_state.persistentvolumes.by_phase", + tags: []string{"phase:Released"}, + hostname: "", }, }, }, - } - - ksmCheck := newKSMCheck(core.NewCheckBase(CheckName), &KSMConfig{}) - - for _, tt := range tests { - s := mocksender.NewMockSender("ksm") - s.SetupAcceptAll() - - t.Run(tt.name, func(t *testing.T) { - agg := newSumValuesAggregator(tt.ddMetricName, "", tt.allowedLabels) - for _, metric := range tt.metrics { - agg.accumulate(metric) - } - - agg.flush(s, ksmCheck, newLabelJoiner(ksmCheck.instance.labelJoins)) - - s.AssertNumberOfCalls(t, "Gauge", len(tt.expected)) - for _, expected := range tt.expected { - s.AssertMetric(t, "Gauge", expected.name, expected.val, expected.hostname, expected.tags) - } - }) - } -} - -func Test_lastCronJobAggregator(t *testing.T) { - tests := []struct { - name string - metricsComplete []ksmstore.DDMetric - metricsFailed []ksmstore.DDMetric - expected *serviceCheck - }{ { - name: "Last job succeeded", - metricsComplete: []ksmstore.DDMetric{ + name: "countObjectsAggregator aggregates configmap.count", + labelsAsTags: map[string]map[string]string{ + "configmap": { + "test_label_1": "tag1", + "test_label_2": "tag2", + "test_label_3": "tag3", + }, + }, + ddMetricsFams: []ksmstore.DDMetricsFam{ { - Labels: map[string]string{ - "namespace": "foo", - "job_name": "bar-112", - "condition": "true", - }, - Val: 1, + Name: "kube_configmap_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "configmap": "default-configmap-1", + "namespace": "default", + "label_test_label_1": "value1", + "label_test_label_2": "value2", + "label_test_label_3": "value3", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "namespace": "foo", - "job_name": "bar-114", - "condition": "true", - }, - Val: 1, + Name: "kube_configmap_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "configmap": "default-configmap-2", + "namespace": "default", + "label_test_label_1": "value1", + "label_test_label_3": "value3", + }, + Val: 1, + }}, + }, + { + Name: "kube_configmap_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "configmap": "test-configmap-2", + "namespace": "test", + "label_tags_datadoghq_com_env": "unittest", + "label_helm_sh_chart": "unittest", + }, + Val: 1, + }}, + }, + { + Name: "kube_configmap_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "configmap": "default-configmap-1", + "namespace": "default", + }, + Val: 1, + }}, + }, + { + Name: "kube_configmap_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "configmap": "default-configmap-2", + "namespace": "default", + }, + Val: 1, + }}, + }, + { + Name: "kube_configmap_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "configmap": "default-configmap-3", + "namespace": "default", + }, + Val: 1, + }}, + }, + { + Name: "kube_configmap_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "configmap": "test-configmap-1", + "namespace": "test", + }, + Val: 1, + }}, + }, + { + Name: "kube_configmap_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "configmap": "test-configmap-2", + "namespace": "test", + }, + Val: 1, + }}, }, }, - metricsFailed: []ksmstore.DDMetric{ + expectedMetrics: []metricsExpected{ { - Labels: map[string]string{ - "namespace": "foo", - "job_name": "bar-113", - "condition": "true", - }, - Val: 1, + val: 1, + name: "kubernetes_state.configmap.count", + tags: []string{"kube_namespace:default"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.configmap.count", + tags: []string{"kube_namespace:test"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.configmap.count", + tags: []string{"helm_chart:unittest", "env:unittest", "kube_namespace:test"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.configmap.count", + tags: []string{"kube_namespace:default", "tag1:value1", "tag2:value2", "tag3:value3"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.configmap.count", + tags: []string{"kube_namespace:default", "tag1:value1", "tag3:value3"}, + hostname: "", }, }, - expected: &serviceCheck{ - name: "kubernetes_state.cronjob.complete", - status: servicecheck.ServiceCheckOK, - tags: []string{"namespace:foo", "cronjob:bar"}, - message: "", + }, + { + name: "countObjectsAggregator aggregates pod.count", + labelsAsTags: map[string]map[string]string{ + "pod": { + "test_label_1": "tag1", + "test_label_2": "tag2", + "test_label_3": "tag3", + }, + }, + ddMetricsFams: []ksmstore.DDMetricsFam{ + { + Name: "kube_pod_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "pod": "datadog-agent-cptrq", + "namespace": "datadog", + "uid": "3bfcf55c-d399-433f-9a1e-64bb1dbefaa8", + "label_tags_datadoghq_com_env": "test", + "label_tags_datadoghq_com_service": "datadog-agent", + "label_tags_datadoghq_com_version": "7", + "label_test_label_1": "value1", + }, + Val: 1, + }}, + }, + { + Name: "kube_pod_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "pod": "datadog-agent-xrjxw", + "namespace": "datadog", + "uid": "4afcf55c-d399-433f-9a1e-64bb1dbefaa8", + "label_tags_datadoghq_com_env": "test", + "label_tags_datadoghq_com_service": "datadog-agent", + "label_tags_datadoghq_com_version": "7", + "label_test_label_2": "value2", + }, + Val: 1, + }}, + }, + { + Name: "kube_pod_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "pod": "datadog-agent-sbzqv", + "namespace": "datadog", + "uid": "5afcf55c-d399-433f-9a1e-64bb1dbefaa8", + "label_tags_datadoghq_com_env": "test", + "label_tags_datadoghq_com_service": "datadog-agent", + "label_tags_datadoghq_com_version": "7", + "label_test_label_3": "value3", + }, + Val: 1, + }}, + }, + { + Name: "kube_pod_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "pod": "datadog-agent-cptrq", + "namespace": "datadog", + "uid": "3bfcf55c-d399-433f-9a1e-64bb1dbefaa8", + "hostip": "10.139.52.153", + "podip": "10.49.37.201", + "node": "nodeA", + "created_by_kind": "DaemonSet", + "created_by_name": "datadog-agent", + "priority_class": "", + "host_network": "false", + }, + Val: 1, + }}, + }, + { + Name: "kube_pod_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "pod": "datadog-agent-xrjxw", + "namespace": "datadog", + "uid": "4afcf55c-d399-433f-9a1e-64bb1dbefaa8", + "hostip": "10.111.234.116", + "podip": "10.8.217.222", + "node": "nodeB", + "created_by_kind": "DaemonSet", + "created_by_name": "datadog-agent", + "priority_class": "", + "host_network": "false", + }, + Val: 1, + }}, + }, + { + Name: "kube_pod_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "pod": "datadog-agent-sbzqv", + "namespace": "datadog", + "uid": "5afcf55c-d399-433f-9a1e-64bb1dbefaa8", + "hostip": "10.161.24.133", + "podip": "10.209.11.206", + "node": "nodeC", + "created_by_kind": "DaemonSet", + "created_by_name": "datadog-agent", + "priority_class": "", + "host_network": "false", + }, + Val: 1, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + { + val: 1, + name: "kubernetes_state.pod.count", + tags: []string{"kube_namespace:datadog", "tag1:value1", "env:test", "service:datadog-agent", "version:7", "kube_daemon_set:datadog-agent", "node:nodeA"}, + hostname: "nodeA", + }, + { + val: 1, + name: "kubernetes_state.pod.count", + tags: []string{"kube_namespace:datadog", "tag2:value2", "env:test", "service:datadog-agent", "version:7", "kube_daemon_set:datadog-agent", "node:nodeB"}, + hostname: "nodeB", + }, + { + val: 1, + name: "kubernetes_state.pod.count", + tags: []string{"kube_namespace:datadog", "tag3:value3", "env:test", "service:datadog-agent", "version:7", "kube_daemon_set:datadog-agent", "node:nodeC"}, + hostname: "nodeC", + }, }, }, { - name: "Last job failed", - metricsFailed: []ksmstore.DDMetric{ + name: "countObjectsAggregator aggregates job.count", + labelsAsTags: map[string]map[string]string{}, + ddMetricsFams: []ksmstore.DDMetricsFam{ { - Labels: map[string]string{ - "namespace": "foo", - "job_name": "bar-112", - "condition": "true", - }, - Val: 1, + Name: "kube_job_owner", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "a-1562319360", + "namespace": "test-ns-a", + // Why are the following tags empty? + // See https://github.com/kubernetes/kube-state-metrics/issues/1919 + "owner_kind": "", + "owner_name": "", + "owner_is_controller": "", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_owner", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "a-1562319361", + "namespace": "test-ns-a", + "owner_kind": "", + "owner_name": "", + "owner_is_controller": "", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_owner", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "b-1562319360", + "namespace": "test-ns-b", + "owner_kind": "", + "owner_name": "", + "owner_is_controller": "", + }, + Val: 1, + }}, }, { - Labels: map[string]string{ - "namespace": "foo", - "job_name": "bar-114", - "condition": "true", + Name: "kube_job_owner", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "b-1562319361", + "namespace": "test-ns-b", + "owner_kind": "", + "owner_name": "", + "owner_is_controller": "", + }, + Val: 1, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + { + val: 2, + name: "kubernetes_state.job.count", + tags: []string{"kube_namespace:test-ns-a"}, + hostname: "", + }, + { + val: 2, + name: "kubernetes_state.job.count", + tags: []string{"kube_namespace:test-ns-b"}, + hostname: "", + }, + }, + }, + { + name: "countObjectsAggregator aggregates cronjob.count", + labelsAsTags: map[string]map[string]string{}, + ddMetricsFams: []ksmstore.DDMetricsFam{ + { + Name: "kube_cronjob_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "cronjob": "hello", + "namespace": "test-ns-a", + "label_tags_datadoghq_com_env": "test-env", + "label_tags_datadoghq_com_service": "hello-service", + "label_tags_datadoghq_com_version": "1.0.0", + "label_tag_1": "value1", + }, + Val: 1, + }}, + }, + { + Name: "kube_cronjob_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "cronjob": "hello2", + "namespace": "test-ns-a", + "label_tags_datadoghq_com_env": "test-env", + "label_tags_datadoghq_com_service": "hello-service2", + "label_tags_datadoghq_com_version": "2.0.0", + "label_tag_1": "value1", + "label_tag_2": "value2", + }, + Val: 1, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + { + val: 1, + name: "kubernetes_state.cronjob.count", + tags: []string{"kube_namespace:test-ns-a", "env:test-env", "service:hello-service", "version:1.0.0"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.cronjob.count", + tags: []string{"kube_namespace:test-ns-a", "env:test-env", "service:hello-service2", "version:2.0.0"}, + hostname: "", + }, + }, + }, + { + name: "countObjectsAggregator aggregates node.count", + labelsAsTags: map[string]map[string]string{}, + ddMetricsFams: []ksmstore.DDMetricsFam{ + { + Name: "kube_node_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "node": "node-a", + "label_tags_datadoghq_com_env": "test", + "label_app_kubernetes_io_name": "test-kubernetes-io-name", + "label_app_kubernetes_io_instance": "test-kubernetes-io-instance", + "label_app_kubernetes_io_version": "test-kubernetes-io-version", + "label_app_kubernetes_io_component": "test-kubernetes-io-component", + "label_app_kubernetes_io_part_of": "test-kubernetes-io-part-of", + "label_app_kubernetes_io_managed_by": "test-kubernetes-io-managed-by", + "label_topology_kubernetes_io_region": "test-topology-kubernetes-io-region", + "label_topology_kubernetes_io_zone": "test-topology-kubernetes-io-zone", + }, + Val: 1, + }}, + }, + { + Name: "kube_node_info", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "node": "node-a", + "kernel_version": "kernel-version-1", + "os_image": "os-image-1", + "container_runtime_version": "container-runtime-version-1", + "kubelet_version": "kubelet-version-1", + "kubeproxy_version": "kubelet-proxy-version-1", + "pod_cidr": "10.10.0.0/23", + "provider_id": "provider-id-1", + "system_uuid": "0190247a-0235-7df1-ba57-b3e3f55e69d9", + "internal_ip": "10.10.0.5", + }, + Val: 1, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + { + val: 1, + name: "kubernetes_state.node.count", + tags: []string{ + "kube_app_instance:test-kubernetes-io-instance", + "kube_app_name:test-kubernetes-io-name", + "kernel_version:kernel-version-1", + "kubelet_version:kubelet-version-1", + "kube_zone:test-topology-kubernetes-io-zone", + "os_image:os-image-1", + "kube_app_managed_by:test-kubernetes-io-managed-by", + "kube_app_version:test-kubernetes-io-version", + "container_runtime_version:container-runtime-version-1", + "kube_app_component:test-kubernetes-io-component", + "kube_region:test-topology-kubernetes-io-region", + "kube_app_part_of:test-kubernetes-io-part-of", + "env:test", }, - Val: 1, + hostname: "", }, }, - metricsComplete: []ksmstore.DDMetric{ + }, + { + name: "countObjectsAggregator aggregates replicaset.count", + labelsAsTags: map[string]map[string]string{}, + ddMetricsFams: []ksmstore.DDMetricsFam{ + { + Name: "kube_replicaset_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "replicaset": "squirtle-54cd675574", + "namespace": "pokemon", + "label_tags_datadoghq_com_env": "ddenv", + "label_tags_datadoghq_com_service": "ddservice", + "label_tags_datadoghq_com_version": "ddversion", + }, + Val: 1, + }}, + }, { - Labels: map[string]string{ - "namespace": "foo", - "job_name": "bar-113", - "condition": "true", + Name: "kube_replicaset_owner", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "replicaset": "squirtle-54cd675574", + "namespace": "pokemon", + "owner_kind": "Deployment", + "owner_name": "squirtle", + "owner_is_controller": "true", + }, + Val: 1, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + { + val: 1, + name: "kubernetes_state.replicaset.count", + tags: []string{ + "kube_namespace:pokemon", + "kube_deployment:squirtle", + "owner_is_controller:true", + "env:ddenv", + "service:ddservice", + "version:ddversion", }, - Val: 1, + hostname: "", + }, + }, + }, + { + name: "resourceAggregator aggregates node.cpu_allocatable and node.cpu_allocatable.total", + labelsAsTags: map[string]map[string]string{}, + ddMetricsFams: []ksmstore.DDMetricsFam{ + { + Name: "kube_node_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "node": "node-a", + "label_tags_datadoghq_com_env": "test-env-a", + }, + Val: 1, + }}, + }, + { + Name: "kube_node_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "node": "node-b", + "label_tags_datadoghq_com_env": "test-env-b", + }, + Val: 1, + }}, + }, + { + Name: "kube_node_status_allocatable", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "node": "node-a", + "resource": "cpu", + "unit": "cpu", + }, + Val: 0.5, + }}, + }, + { + Name: "kube_node_status_allocatable", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "node": "node-b", + "resource": "cpu", + "unit": "cpu", + }, + Val: 1.5, + }}, + }, + { + Name: "kube_node_status_allocatable", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "node": "node-c", + "resource": "cpu", + "unit": "cpu", + }, + Val: 0.75, + }}, + }, + { + Name: "kube_node_status_allocatable", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "node": "node-d", + "resource": "cpu", + "unit": "cpu", + }, + Val: 0.8, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + { + val: 0.5, + name: "kubernetes_state.node.cpu_allocatable", + tags: []string{"env:test-env-a", "resource:cpu", "unit:cpu", "node:node-a"}, + hostname: "node-a", + }, + { + val: 1.5, + name: "kubernetes_state.node.cpu_allocatable", + tags: []string{"env:test-env-b", "resource:cpu", "unit:cpu", "node:node-b"}, + hostname: "node-b", + }, + { + val: 0.75, + name: "kubernetes_state.node.cpu_allocatable", + tags: []string{"resource:cpu", "unit:cpu", "node:node-c"}, + hostname: "node-c", + }, + { + val: 0.8, + name: "kubernetes_state.node.cpu_allocatable", + tags: []string{"resource:cpu", "unit:cpu", "node:node-d"}, + hostname: "node-d", + }, + { + val: 0.5, + name: "kubernetes_state.node.cpu_allocatable.total", + tags: []string{"env:test-env-a"}, + hostname: "", + }, + { + val: 1.5, + name: "kubernetes_state.node.cpu_allocatable.total", + tags: []string{"env:test-env-b"}, + hostname: "", + }, + { + val: 1.55, + name: "kubernetes_state.node.cpu_allocatable.total", + tags: []string{}, + hostname: "", }, }, - expected: &serviceCheck{ - name: "kubernetes_state.cronjob.complete", - status: servicecheck.ServiceCheckCritical, - tags: []string{"namespace:foo", "cronjob:bar"}, - message: "", + }, + { + name: "resourceAggregator aggregates container.memory_requested.total", + labelsAsTags: map[string]map[string]string{ + "pod": { + "test_label_1": "tag1", + "test_label_2": "tag2", + "test_label_3": "tag3", + }, + }, + ddMetricsFams: []ksmstore.DDMetricsFam{ + { + Name: "kube_pod_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "pod": "abc-797f764658-458wg", + "namespace": "ns-a", + "uid": "3bfcf55c-d399-433f-9a1e-64bb1dbefaa8", + "label_tags_datadoghq_com_env": "test", + "label_tags_datadoghq_com_service": "abc-service", + "label_tags_datadoghq_com_version": "1.0.0", + "label_test_label_1": "value1", + "label_test_label_2": "value2", + }, + Val: 1, + }}, + }, + { + Name: "kube_pod_container_resource_with_owner_tag_requests", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "container": "container-abc", + "namespace": "ns-a", + "node": "node-a", + "resource": "memory", + "unit": "byte", + "owner_kind": "Deployment", + "owner_name": "deployment-abc", + "pod": "abc-797f764658-458wg", + "uid": "3bfcf55c-d399-433f-9a1e-64bb1dbefaa8", + }, + Val: 10, + }}, + }, + { + Name: "kube_pod_container_resource_with_owner_tag_requests", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "container": "container-def", + "namespace": "ns-a", + "node": "node-a", + "resource": "memory", + "unit": "byte", + "owner_kind": "Deployment", + "owner_name": "deployment-abc", + "pod": "abc-797f764658-458wg", + "uid": "3bfcf55c-d399-433f-9a1e-64bb1dbefaa8", + }, + Val: 10, + }}, + }, + { + Name: "kube_pod_container_resource_with_owner_tag_requests", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "container": "container-hij", + "namespace": "ns-a", + "node": "node-a", + "resource": "memory", + "unit": "byte", + "owner_kind": "Deployment", + "owner_name": "deployment-abc", + "pod": "abc-797f764658-458wg", + "uid": "3bfcf55c-d399-433f-9a1e-64bb1dbefaa8", + }, + Val: 10, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + // We don't expect kube_container_name here because this is a pod-level metric. + { + val: 30, + name: "kubernetes_state.container.memory_requested.total", + tags: []string{"tag1:value1", "tag2:value2", "env:test", "service:abc-service", "kube_namespace:ns-a", "node:node-a", "resource:memory", "unit:byte", "version:1.0.0", "kube_deployment:deployment-abc", "pod_name:abc-797f764658-458wg"}, + hostname: "node-a", + }, + }, + }, + { + name: "lastCronJobCompleteAggregator aggregates job.completion.succeeded and job.complete", + labelsAsTags: map[string]map[string]string{ + "job": { + "test_label_1": "tag1", + "test_label_2": "tag2", + "test_label_3": "tag3", + }, + }, + ddMetricsFams: []ksmstore.DDMetricsFam{ + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562319360", + "namespace": "ns-test", + "label_tags_datadoghq_com_env": "env-test", + "label_tags_datadoghq_com_service": "service-hello", + "label_tags_datadoghq_com_version": "1.0.0", + "label_test_label_1": "value1", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562319361", + "namespace": "ns-test", + "label_tags_datadoghq_com_env": "env-test", + "label_tags_datadoghq_com_service": "service-hello", + "label_tags_datadoghq_com_version": "1.0.0", + "label_test_label_1": "value1", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562391362", + "namespace": "ns-test", + "label_tags_datadoghq_com_env": "env-test", + "label_tags_datadoghq_com_service": "service-hello", + "label_tags_datadoghq_com_version": "1.0.0", + "label_test_label_1": "value1", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "konnichiwa-1562133906", + "namespace": "ns-test", + "label_tags_datadoghq_com_env": "env-test", + "label_tags_datadoghq_com_service": "service-konnichiwa", + "label_tags_datadoghq_com_version": "1.2.0", + "label_test_label_2": "value2", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "bonjour-1562134910", + "namespace": "ns-test-1", + "label_tags_datadoghq_com_version": "2.0.0", + "label_test_label_3": "value3", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_complete", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562319360", + "namespace": "ns-test", + "condition": "true", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_complete", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562319361", + "namespace": "ns-test", + "condition": "true", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_complete", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562391362", + "namespace": "ns-test", + "condition": "true", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_complete", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "konnichiwa-1562133906", + "namespace": "ns-test", + "condition": "true", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_complete", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "bonjour-1562134910", + "namespace": "ns-test-1", + "condition": "true", + }, + Val: 1, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + { + val: 1, + name: "kubernetes_state.job.completion.succeeded", + tags: []string{"kube_job:hello-1562319360", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.job.completion.succeeded", + tags: []string{"kube_job:hello-1562319361", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.job.completion.succeeded", + tags: []string{"kube_job:hello-1562391362", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.job.completion.succeeded", + tags: []string{"kube_job:konnichiwa-1562133906", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag2:value2", "service:service-konnichiwa", "version:1.2.0", "kube_cronjob:konnichiwa"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.job.completion.succeeded", + tags: []string{"kube_job:bonjour-1562134910", "kube_namespace:ns-test-1", "kube_namespace:ns-test-1", "condition:true", "tag3:value3", "version:2.0.0", "kube_cronjob:bonjour"}, + hostname: "", + }, + }, + expectedServiceChecks: []serviceCheck{ + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckOK, + hostname: "", + tags: []string{"kube_job:hello-1562319360", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + message: "", + }, + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckOK, + hostname: "", + tags: []string{"kube_job:hello-1562319361", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + message: "", + }, + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckOK, + hostname: "", + tags: []string{"kube_job:hello-1562391362", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + message: "", + }, + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckOK, + hostname: "", + tags: []string{"kube_job:konnichiwa-1562133906", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag2:value2", "service:service-konnichiwa", "version:1.2.0", "kube_cronjob:konnichiwa"}, + message: "", + }, + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckOK, + hostname: "", + tags: []string{"kube_job:bonjour-1562134910", "kube_namespace:ns-test-1", "kube_namespace:ns-test-1", "condition:true", "tag3:value3", "version:2.0.0", "kube_cronjob:bonjour"}, + message: "", + }, + }, + }, + { + name: "lastCronJobFailedAggregator aggregates job.completion.failure and job.complete", + labelsAsTags: map[string]map[string]string{ + "job": { + "test_label_1": "tag1", + "test_label_2": "tag2", + "test_label_3": "tag3", + }, + }, + ddMetricsFams: []ksmstore.DDMetricsFam{ + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562319360", + "namespace": "ns-test", + "label_tags_datadoghq_com_env": "env-test", + "label_tags_datadoghq_com_service": "service-hello", + "label_tags_datadoghq_com_version": "1.0.0", + "label_test_label_1": "value1", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562319361", + "namespace": "ns-test", + "label_tags_datadoghq_com_env": "env-test", + "label_tags_datadoghq_com_service": "service-hello", + "label_tags_datadoghq_com_version": "1.0.0", + "label_test_label_1": "value1", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562391362", + "namespace": "ns-test", + "label_tags_datadoghq_com_env": "env-test", + "label_tags_datadoghq_com_service": "service-hello", + "label_tags_datadoghq_com_version": "1.0.0", + "label_test_label_1": "value1", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "konnichiwa-1562133906", + "namespace": "ns-test", + "label_tags_datadoghq_com_env": "env-test", + "label_tags_datadoghq_com_service": "service-konnichiwa", + "label_tags_datadoghq_com_version": "1.2.0", + "label_test_label_2": "value2", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_labels", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "bonjour-1562134910", + "namespace": "ns-test-1", + "label_tags_datadoghq_com_version": "2.0.0", + "label_test_label_3": "value3", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_failed", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562319360", + "namespace": "ns-test", + "condition": "true", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_failed", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562319361", + "namespace": "ns-test", + "condition": "true", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_failed", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "hello-1562391362", + "namespace": "ns-test", + "condition": "true", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_failed", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "konnichiwa-1562133906", + "namespace": "ns-test", + "condition": "true", + }, + Val: 1, + }}, + }, + { + Name: "kube_job_failed", + ListMetrics: []ksmstore.DDMetric{{ + Labels: map[string]string{ + "job_name": "bonjour-1562134910", + "namespace": "ns-test-1", + "condition": "true", + }, + Val: 1, + }}, + }, + }, + expectedMetrics: []metricsExpected{ + { + val: 1, + name: "kubernetes_state.job.completion.failed", + tags: []string{"kube_job:hello-1562319360", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.job.completion.failed", + tags: []string{"kube_job:hello-1562319361", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.job.completion.failed", + tags: []string{"kube_job:hello-1562391362", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.job.completion.failed", + tags: []string{"kube_job:konnichiwa-1562133906", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag2:value2", "service:service-konnichiwa", "version:1.2.0", "kube_cronjob:konnichiwa"}, + hostname: "", + }, + { + val: 1, + name: "kubernetes_state.job.completion.failed", + tags: []string{"kube_job:bonjour-1562134910", "kube_namespace:ns-test-1", "kube_namespace:ns-test-1", "condition:true", "tag3:value3", "version:2.0.0", "kube_cronjob:bonjour"}, + hostname: "", + }, + }, + expectedServiceChecks: []serviceCheck{ + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckCritical, + hostname: "", + tags: []string{"kube_job:hello-1562319360", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + message: "", + }, + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckCritical, + hostname: "", + tags: []string{"kube_job:hello-1562319361", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + message: "", + }, + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckCritical, + hostname: "", + tags: []string{"kube_job:hello-1562391362", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag1:value1", "service:service-hello", "version:1.0.0", "kube_cronjob:hello"}, + message: "", + }, + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckCritical, + hostname: "", + tags: []string{"kube_job:konnichiwa-1562133906", "kube_namespace:ns-test", "kube_namespace:ns-test", "env:env-test", "condition:true", "tag2:value2", "service:service-konnichiwa", "version:1.2.0", "kube_cronjob:konnichiwa"}, + message: "", + }, + { + name: "kubernetes_state.job.complete", + status: servicecheck.ServiceCheckCritical, + hostname: "", + tags: []string{"kube_job:bonjour-1562134910", "kube_namespace:ns-test-1", "kube_namespace:ns-test-1", "condition:true", "tag3:value3", "version:2.0.0", "kube_cronjob:bonjour"}, + message: "", + }, }, }, } - ksmCheck := newKSMCheck(core.NewCheckBase(CheckName), &KSMConfig{}) - for _, tt := range tests { - s := mocksender.NewMockSender("ksm") - s.SetupAcceptAll() - t.Run(tt.name, func(t *testing.T) { - agg := newLastCronJobAggregator() - aggComplete := &lastCronJobCompleteAggregator{aggregator: agg} - aggFailed := &lastCronJobFailedAggregator{aggregator: agg} - - for _, metric := range tt.metricsComplete { - aggComplete.accumulate(metric) + s := mocksender.NewMockSender("ksm") + s.SetupAcceptAll() + if tt.labelsAsTags == nil { + tt.labelsAsTags = make(map[string]map[string]string) } - for _, metric := range tt.metricsFailed { - aggFailed.accumulate(metric) + ksmCheck := newKSMCheck( + core.NewCheckBase(CheckName), + &KSMConfig{ + LabelJoins: make(map[string]*JoinsConfigWithoutLabelsMapping), + LabelsAsTags: tt.labelsAsTags, + LabelsMapper: make(map[string]string), + }, + ) + ksmCheck.mergeLabelJoins(defaultLabelJoins()) + ksmCheck.processLabelJoins() + ksmCheck.processLabelsAsTags() + ksmCheck.mergeAnnotationsAsTags(defaultAnnotationsAsTags()) + ksmCheck.processAnnotationsAsTags() + ksmCheck.mergeLabelsMapper(defaultLabelsMapper()) + ddMetricsFamilies := map[string][]ksmstore.DDMetricsFam{ + "test": nil, } - - agg.flush(s, ksmCheck, newLabelJoiner(ksmCheck.instance.labelJoins)) - - s.AssertServiceCheck(t, tt.expected.name, tt.expected.status, "", tt.expected.tags, tt.expected.message) - s.AssertNumberOfCalls(t, "ServiceCheck", 1) - - // Ingest the metrics in the other order - for _, metric := range tt.metricsFailed { - aggFailed.accumulate(metric) + ddMetricsFamilies["test"] = append(ddMetricsFamilies["test"], tt.ddMetricsFams...) + lj := newLabelJoiner(ksmCheck.instance.labelJoins) + // NOTE: Metrics with a value of 0 are not inseterted into labelJoiner. + // See (*KSMCheck).metricFilter() for more details. + lj.insertFamilies(ddMetricsFamilies) + ksmCheck.processMetrics(s, ddMetricsFamilies, lj, time.Now()) + for _, m := range tt.expectedMetrics { + s.AssertMetric(t, "Gauge", m.name, m.val, m.hostname, m.tags) } - for _, metric := range tt.metricsComplete { - aggComplete.accumulate(metric) + for _, sc := range tt.expectedServiceChecks { + s.AssertServiceCheck(t, sc.name, sc.status, sc.hostname, sc.tags, sc.message) } - - agg.flush(s, ksmCheck, newLabelJoiner(ksmCheck.instance.labelJoins)) - - s.AssertServiceCheck(t, tt.expected.name, tt.expected.status, "", tt.expected.tags, tt.expected.message) - s.AssertNumberOfCalls(t, "ServiceCheck", 2) }) } } diff --git a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_defaults.go b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_defaults.go index 3cc481dda3fa7..9f9f7f407b864 100644 --- a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_defaults.go +++ b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_defaults.go @@ -156,13 +156,69 @@ func defaultLabelJoins() map[string]*JoinsConfigWithoutLabelsMapping { } return map[string]*JoinsConfigWithoutLabelsMapping{ - "kube_pod_status_phase": { - LabelsToMatch: getLabelToMatchForKind("pod"), + // Metrics definition: https://github.com/kubernetes/kube-state-metrics/tree/main/docs/metrics + // Cluster + "kube_namespace_status_phase": { + LabelsToMatch: getLabelToMatchForKind("namespace"), LabelsToGet: []string{"phase"}, }, - "kube_pod_info": { - LabelsToMatch: getLabelToMatchForKind("pod"), - LabelsToGet: []string{"node", "created_by_kind", "created_by_name", "priority_class"}, + "kube_node_labels": { + LabelsToMatch: getLabelToMatchForKind("node"), + LabelsToGet: []string{ + "label_tags_datadoghq_com_env", + "label_app_kubernetes_io_name", + "label_app_kubernetes_io_instance", + "label_app_kubernetes_io_version", + "label_app_kubernetes_io_component", + "label_app_kubernetes_io_part_of", + "label_app_kubernetes_io_managed_by", + "label_topology_kubernetes_io_region", // k8s v1.17+ + "label_topology_kubernetes_io_zone", // k8s v1.17+ + "label_failure_domain_beta_kubernetes_io_region", // k8s < v1.17 + "label_failure_domain_beta_kubernetes_io_zone", // k8s < v1.17 + }, + }, + "kube_node_info": { + LabelsToMatch: getLabelToMatchForKind("node"), + LabelsToGet: []string{"kernel_version", "os_image", "container_runtime_version", "kubelet_version"}, + }, + "kube_node_status_allocatable": { + LabelsToMatch: getLabelToMatchForKind("node"), + LabelsToGet: []string{}, + }, + "kube_node_status_capacity": { + LabelsToMatch: getLabelToMatchForKind("node"), + LabelsToGet: []string{}, + }, + // Service + "kube_endpoint_labels": { + LabelsToMatch: getLabelToMatchForKind("endpoint"), + LabelsToGet: append(defaultStandardLabels, []string{"namespace"}...), + }, + "kube_ingress_labels": { + LabelsToMatch: getLabelToMatchForKind("ingress"), + LabelsToGet: append(defaultStandardLabels, []string{"namespace"}...), + }, + "kube_service_spec_type": { + LabelsToMatch: getLabelToMatchForKind("service"), + LabelsToGet: []string{"namespace", "type"}, + }, + // Storage + "kube_configmap_labels": { + LabelsToMatch: getLabelToMatchForKind("configmap"), + LabelsToGet: defaultStandardLabels, + }, + "kube_configmap_info": { + LabelsToMatch: getLabelToMatchForKind("configmap"), + LabelsToGet: []string{"namespace"}, + }, + "kube_persistentvolume_labels": { + LabelsToMatch: getLabelToMatchForKind("persistentvolume"), + LabelsToGet: defaultStandardLabels, + }, + "kube_persistentvolume_status_phase": { + LabelsToMatch: getLabelToMatchForKind("persistentvolume"), + LabelsToGet: []string{"phase"}, }, "kube_persistentvolume_info": { LabelsToMatch: getLabelToMatchForKind("persistentvolume"), @@ -172,50 +228,79 @@ func defaultLabelJoins() map[string]*JoinsConfigWithoutLabelsMapping { LabelsToMatch: getLabelToMatchForKind("persistentvolumeclaim"), LabelsToGet: []string{"storageclass"}, }, + "kube_secret_info": { + LabelsToMatch: getLabelToMatchForKind("secret"), + LabelsToGet: []string{"secret", "namespace"}, + }, + // Workload + "kube_cronjob_labels": { + LabelsToMatch: getLabelToMatchForKind("cronjob"), + LabelsToGet: append(defaultStandardLabels, []string{"namespace"}...), + }, + "kube_daemonset_labels": { + LabelsToMatch: getLabelToMatchForKind("daemonset"), + LabelsToGet: append(defaultStandardLabels, []string{"namespace"}...), + }, + "kube_deployment_labels": { + LabelsToMatch: getLabelToMatchForKind("deployment"), + LabelsToGet: append(defaultStandardLabels, []string{"namespace"}...), + }, + "kube_horizontalpodautoscaler_labels": { + LabelsToMatch: getLabelToMatchForKind("horizontalpodautoscaler"), + LabelsToGet: append(defaultStandardLabels, []string{"namespace"}...), + }, + "kube_job_owner": { + LabelsToMatch: getLabelToMatchForKind("job"), + LabelsToGet: []string{"namespace", "owner_kind", "owner_name", "owner_is_controller"}, + }, + "kube_job_complete": { + LabelsToMatch: getLabelToMatchForKind("job"), + LabelsToGet: []string{"namespace", "condition"}, + }, + "kube_job_failed": { + LabelsToMatch: getLabelToMatchForKind("job"), + LabelsToGet: []string{"namespace", "condition"}, + }, + "kube_job_labels": { + LabelsToMatch: getLabelToMatchForKind("job"), + LabelsToGet: defaultStandardLabels, + }, "kube_pod_labels": { LabelsToMatch: getLabelToMatchForKind("pod"), LabelsToGet: defaultStandardLabels, }, + "kube_pod_status_phase": { + LabelsToMatch: getLabelToMatchForKind("pod"), + LabelsToGet: []string{"phase"}, + }, "kube_pod_status_reason": { LabelsToMatch: getLabelToMatchForKind("pod"), LabelsToGet: []string{"reason"}, }, - "kube_deployment_labels": { - LabelsToMatch: getLabelToMatchForKind("deployment"), - LabelsToGet: defaultStandardLabels, + "kube_pod_info": { + LabelsToMatch: getLabelToMatchForKind("pod"), + LabelsToGet: []string{"node", "created_by_kind", "created_by_name", "priority_class", "namespace"}, }, "kube_replicaset_labels": { LabelsToMatch: getLabelToMatchForKind("replicaset"), - LabelsToGet: defaultStandardLabels, + LabelsToGet: append(defaultStandardLabels, []string{"namespace"}...), }, - "kube_daemonset_labels": { - LabelsToMatch: getLabelToMatchForKind("daemonset"), - LabelsToGet: defaultStandardLabels, + "kube_replicaset_owner": { + LabelsToMatch: getLabelToMatchForKind("replicaset"), + LabelsToGet: []string{"namespace", "owner_kind", "owner_name", "owner_is_controller"}, }, "kube_statefulset_labels": { LabelsToMatch: getLabelToMatchForKind("statefulset"), LabelsToGet: defaultStandardLabels, }, - "kube_job_labels": { - LabelsToMatch: getLabelToMatchForKind("job"), - LabelsToGet: defaultStandardLabels, - }, - "kube_cronjob_labels": { - LabelsToMatch: getLabelToMatchForKind("cronjob"), - LabelsToGet: defaultStandardLabels, - }, - "kube_node_labels": { - LabelsToMatch: getLabelToMatchForKind("node"), - LabelsToGet: []string{ - "label_topology_kubernetes_io_region", // k8s v1.17+ - "label_topology_kubernetes_io_zone", // k8s v1.17+ - "label_failure_domain_beta_kubernetes_io_region", // k8s < v1.17 - "label_failure_domain_beta_kubernetes_io_zone", // k8s < v1.17 - }, + // Custom, defined in the datadog-agent side + "kube_pod_container_resource_with_owner_tag_requests": { + LabelsToMatch: getLabelToMatchForKind("pod"), + LabelsToGet: []string{"pod", "node", "resource", "unit", "owner_kind", "owner_name", "namespace"}, }, - "kube_node_info": { - LabelsToMatch: getLabelToMatchForKind("node"), - LabelsToGet: []string{"container_runtime_version", "kernel_version", "kubelet_version", "os_image"}, + "kube_pod_container_resource_with_owner_tag_limits": { + LabelsToMatch: getLabelToMatchForKind("pod"), + LabelsToGet: []string{"pod", "node", "resource", "unit", "owner_kind", "owner_name", "namespace"}, }, } } @@ -237,6 +322,8 @@ func getLabelToMatchForKind(kind string) []string { return []string{"node"} case "persistentvolume": // persistent volumes are not namespaced return []string{"persistentvolume"} + case "namespace": // namespace are not namespaced + return []string{"namespace"} default: return []string{kind, "namespace"} } diff --git a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_test.go b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_test.go index 6b552d3654527..8447eed690a66 100644 --- a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_test.go +++ b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_test.go @@ -487,34 +487,6 @@ func TestProcessMetrics(t *testing.T) { }, }, }, - { - name: "phase tag for ns", - config: &KSMConfig{LabelsMapper: defaultLabelsMapper()}, - metricsToProcess: map[string][]ksmstore.DDMetricsFam{ - "kube_namespace_status_phase": { - { - Type: "*v1.Namespace", - Name: "kube_namespace_status_phase", - ListMetrics: []ksmstore.DDMetric{ - { - Labels: map[string]string{"namespace": "default", "phase": "Active"}, - Val: 1, - }, - }, - }, - }, - }, - metricsToGet: []ksmstore.DDMetricsFam{}, - metricTransformers: defaultMetricTransformers(), - expected: []metricsExpected{ - { - name: "kubernetes_state.namespace.count", - val: 1, - tags: []string{"phase:Active"}, - hostname: "", - }, - }, - }, { name: "ingress metric", config: &KSMConfig{LabelsMapper: defaultLabelsMapper()}, diff --git a/releasenotes/notes/expand-aggregator-metric-tags-d941cee0ebe64b1f.yaml b/releasenotes/notes/expand-aggregator-metric-tags-d941cee0ebe64b1f.yaml new file mode 100644 index 0000000000000..09afe36a6bf88 --- /dev/null +++ b/releasenotes/notes/expand-aggregator-metric-tags-d941cee0ebe64b1f.yaml @@ -0,0 +1,13 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + ``kubernetes_state.*.count``, ``kubernetes_state.node.{cpu,memory,gpu,mig}_allocatable.total``, + ``kubernetes_state.job.complete``, ``kubernetes_state.job.completion.{succeeded,failed}``, + etc support standard labels and user-defined labels through LabelsAsTags config.