From e2aad6063992918fb63b937c5cad15262cf0b61d Mon Sep 17 00:00:00 2001 From: csongnr <115833851+csongnr@users.noreply.github.com> Date: Fri, 19 Jul 2024 16:35:54 -0700 Subject: [PATCH] [nr-k8s-otel-collector] low data mode default to true, add host metrics to ldm (#1429) #### Is this a new chart #### What this PR does / why we need it: #### Which issue this PR fixes *(optional, in `fixes #(, fixes #, ...)` format, will close that issue when PR gets merged)* - fixes # #### Special notes for your reviewer: #### Checklist [Place an '[x]' (no spaces) in all applicable fields. Please remove unrelated fields.] - [x] Chart Version bumped - [x] Variables are documented in the README.md - [x] Title of the PR starts with chart name (e.g. `[mychartname]`) --- charts/nr-k8s-otel-collector/Chart.yaml | 2 +- charts/nr-k8s-otel-collector/README.md | 2 +- .../templates/daemonset-configmap.yaml | 85 ++++++++++++++++++- .../templates/deployment-configmap.yaml | 16 +++- charts/nr-k8s-otel-collector/values.yaml | 4 +- 5 files changed, 102 insertions(+), 7 deletions(-) diff --git a/charts/nr-k8s-otel-collector/Chart.yaml b/charts/nr-k8s-otel-collector/Chart.yaml index 010005021..7f2c47dc8 100644 --- a/charts/nr-k8s-otel-collector/Chart.yaml +++ b/charts/nr-k8s-otel-collector/Chart.yaml @@ -17,7 +17,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.6.0 +version: 0.7.0 dependencies: - name: common-library diff --git a/charts/nr-k8s-otel-collector/README.md b/charts/nr-k8s-otel-collector/README.md index f996465cc..286547202 100644 --- a/charts/nr-k8s-otel-collector/README.md +++ b/charts/nr-k8s-otel-collector/README.md @@ -105,7 +105,7 @@ daemonset: | kube-state-metrics.prometheusScrape | bool | `false` | Disable prometheus from auto-discovering KSM and potentially scraping duplicated data | | labels | object | `{}` | Additional labels for chart objects | | licenseKey | string | `""` | This set this license key to use. Can be configured also with `global.licenseKey` | -| lowDataMode | bool | `false` | Send only the metrics required to light up the NR kubernetes UI | +| lowDataMode | bool | `false` | Send only the metrics required to light up the NR kubernetes UI, this agent defaults to setting lowDataMode true, but if this setting is unset, lowDataMode will be set to false | | nodeSelector | object | `{}` | Sets all pods' node selector. Can be configured also with `global.nodeSelector` | | nrStaging | bool | `false` | Send the metrics to the staging backend. Requires a valid staging license key. Can be configured also with `global.nrStaging` | | podLabels | object | `{}` | Additional labels for chart pods | diff --git a/charts/nr-k8s-otel-collector/templates/daemonset-configmap.yaml b/charts/nr-k8s-otel-collector/templates/daemonset-configmap.yaml index 3377b65fb..88c8e9a60 100644 --- a/charts/nr-k8s-otel-collector/templates/daemonset-configmap.yaml +++ b/charts/nr-k8s-otel-collector/templates/daemonset-configmap.yaml @@ -305,6 +305,55 @@ data: - value: 'false' new_value: 'true' + metricstransform/hostmetrics: + transforms: + - include: process\.(cpu\.utilization|disk\.io|memory\.(usage|virtual)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: system\.cpu\.(utilization|load_average\.(15m|1m|5m)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: system\.disk\.(io_time|operation_time|operations) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: system\.(filesystem|memory)\.(usage|utilization) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: system\.network\.(errors|io|packets) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + + filter/exclude_metrics_low_data_mode: metrics: metric: @@ -317,6 +366,22 @@ data: - truncate_all(attributes, 4095) - truncate_all(resource.attributes, 4095) + # group system.cpu metrics by cpu + metricstransform/hostmetrics_cpu: + transforms: + - include: system.cpu.utilization + action: update + operations: + - action: aggregate_labels + label_set: [ state ] + aggregation_type: mean + - include: system.paging.operations + action: update + operations: + - action: aggregate_labels + label_set: [ direction ] + aggregation_type: sum + # following system.% metrics reduce metrics reported by hostmetrics receiver filter/exclude_cpu_utilization: metrics: @@ -378,10 +443,23 @@ data: - key: type action: delete + resourcedetection/env: + detectors: ["env", "system"] + override: false + system: + hostname_sources: ["os"] + resource_attributes: + host.id: + enabled: true + resourcedetection/cloudproviders: - detectors: [env, gcp, eks, azure, aks, ec2, ecs] + detectors: [gcp, eks, azure, aks, ec2, ecs] timeout: 2s override: false + ec2: + resource_attributes: + host.name: + enabled: false resource: attributes: @@ -480,7 +558,7 @@ data: {{- if or .Values.receivers.hostmetrics.enabled (or .Values.receivers.kubeletstats.enabled .Values.receivers.prometheus.enabled) }} metrics: receivers: - {{- if and .Values.receivers.hostmetrics.enabled (include "newrelic.common.privileged" .) (not (include "newrelic.common.lowDataMode" .)) }} + {{- if and .Values.receivers.hostmetrics.enabled (include "newrelic.common.privileged" .) }} - hostmetrics {{- end }} {{- if .Values.receivers.kubeletstats.enabled }} @@ -495,8 +573,10 @@ data: - metricstransform/kubeletstats - metricstransform/cadvisor - metricstransform/kubelet + - metricstransform/hostmetrics - filter/exclude_metrics_low_data_mode {{- end }} + - metricstransform/hostmetrics_cpu - transform/truncate - filter/exclude_cpu_utilization - filter/exclude_memory_utilization @@ -508,6 +588,7 @@ data: - filter/exclude_system_paging - filter/exclude_network - attributes/exclude_system_paging + - resourcedetection/env - resourcedetection/cloudproviders - resource {{- if include "newrelic.common.lowDataMode" . }} diff --git a/charts/nr-k8s-otel-collector/templates/deployment-configmap.yaml b/charts/nr-k8s-otel-collector/templates/deployment-configmap.yaml index 5eab58430..bb76a470b 100644 --- a/charts/nr-k8s-otel-collector/templates/deployment-configmap.yaml +++ b/charts/nr-k8s-otel-collector/templates/deployment-configmap.yaml @@ -389,10 +389,23 @@ data: datapoint: - metric.name == "kube_pod_container_status" and value_double == 0.0 + resourcedetection/env: + detectors: ["env", "system"] + override: false + system: + hostname_sources: ["os"] + resource_attributes: + host.id: + enabled: true + resourcedetection/cloudproviders: - detectors: [env, gcp, eks, azure, aks, ec2, ecs] + detectors: [gcp, eks, azure, aks, ec2, ecs] timeout: 2s override: false + ec2: + resource_attributes: + host.name: + enabled: false resource/metrics: attributes: @@ -519,6 +532,7 @@ data: - resource/low_data_mode_inator {{- end }} - resource/metrics + - resourcedetection/env - resourcedetection/cloudproviders - batch - groupbyattrs diff --git a/charts/nr-k8s-otel-collector/values.yaml b/charts/nr-k8s-otel-collector/values.yaml index a1455fa8a..06b410bfb 100644 --- a/charts/nr-k8s-otel-collector/values.yaml +++ b/charts/nr-k8s-otel-collector/values.yaml @@ -142,6 +142,6 @@ receivers: # @default -- `true` enabled: true -# -- (bool) Send only the metrics required to light up the NR kubernetes UI +# -- (bool) Send only the metrics required to light up the NR kubernetes UI, this agent defaults to setting lowDataMode true, but if this setting is unset, lowDataMode will be set to false # @default -- `false` -lowDataMode: +lowDataMode: true