diff --git a/charts/nr-k8s-otel-collector/Chart.yaml b/charts/nr-k8s-otel-collector/Chart.yaml index cd9b8c742..010005021 100644 --- a/charts/nr-k8s-otel-collector/Chart.yaml +++ b/charts/nr-k8s-otel-collector/Chart.yaml @@ -17,7 +17,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.5.3 +version: 0.6.0 dependencies: - name: common-library diff --git a/charts/nr-k8s-otel-collector/README.md b/charts/nr-k8s-otel-collector/README.md index e38da4322..f996465cc 100644 --- a/charts/nr-k8s-otel-collector/README.md +++ b/charts/nr-k8s-otel-collector/README.md @@ -105,6 +105,7 @@ daemonset: | kube-state-metrics.prometheusScrape | bool | `false` | Disable prometheus from auto-discovering KSM and potentially scraping duplicated data | | labels | object | `{}` | Additional labels for chart objects | | licenseKey | string | `""` | This set this license key to use. Can be configured also with `global.licenseKey` | +| lowDataMode | bool | `false` | Send only the metrics required to light up the NR kubernetes UI | | nodeSelector | object | `{}` | Sets all pods' node selector. Can be configured also with `global.nodeSelector` | | nrStaging | bool | `false` | Send the metrics to the staging backend. Requires a valid staging license key. Can be configured also with `global.nrStaging` | | podLabels | object | `{}` | Additional labels for chart pods | @@ -114,14 +115,12 @@ daemonset: | rbac.create | bool | `true` | Specifies whether RBAC resources should be created | | receivers.filelog.enabled | bool | `true` | Specifies whether the `filelog` receiver is enabled | | receivers.hostmetrics.enabled | bool | `true` | Specifies whether the `hostmetrics` receiver is enabled | -| receivers.hostmetrics.scrapeInterval | string | `20s` | Sets the scrape interval for the `hostmetrics` receiver | -| receivers.k8sCluster.enabled | bool | `true` | Specifies whether the `k8s_cluster` receiver is enabled | -| receivers.k8sCluster.scrapeInterval | string | `20s` | Sets the scrape interval for the `k8s_cluster` receiver | +| receivers.hostmetrics.scrapeInterval | string | `1m` | Sets the scrape interval for the `hostmetrics` receiver | | receivers.k8sEvents.enabled | bool | `true` | Specifies whether the `k8s_events` receiver is enabled | | receivers.kubeletstats.enabled | bool | `true` | Specifies whether the `kubeletstats` receiver is enabled | -| receivers.kubeletstats.scrapeInterval | string | `20s` | Sets the scrape interval for the `kubeletstats` receiver | +| receivers.kubeletstats.scrapeInterval | string | `1m` | Sets the scrape interval for the `kubeletstats` receiver | | receivers.prometheus.enabled | bool | `true` | Specifies whether the `prometheus` receiver is enabled | -| receivers.prometheus.scrapeInterval | string | `20s` | Sets the scrape interval for the `prometheus` receiver | +| receivers.prometheus.scrapeInterval | string | `1m` | Sets the scrape interval for the `prometheus` receiver | | serviceAccount | object | See `values.yaml` | Settings controlling ServiceAccount creation | | serviceAccount.create | bool | `true` | Specifies whether a ServiceAccount should be created | | tolerations | list | `[]` | Sets all pods' tolerations to node taints. Can be configured also with `global.tolerations` | diff --git a/charts/nr-k8s-otel-collector/templates/daemonset-configmap.yaml b/charts/nr-k8s-otel-collector/templates/daemonset-configmap.yaml index 5d81b398f..3377b65fb 100644 --- a/charts/nr-k8s-otel-collector/templates/daemonset-configmap.yaml +++ b/charts/nr-k8s-otel-collector/templates/daemonset-configmap.yaml @@ -205,6 +205,111 @@ data: to: resource["k8s.pod.uid"] processors: + metricstransform/ldm: + transforms: + - include: .* + match_type: regexp + action: update + operations: + - action: add_label + new_label: low.data.mode + new_value: 'false' + + metricstransform/kubeletstats: + transforms: + - include: container\.(cpu\.utilization|filesystem\.(capacity|usage)|memory\.usage) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: k8s\.node\.(cpu\.(time|utilization)|filesystem\.(capacity|usage)|memory\.(available|working_set)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: k8s\.pod\.(filesystem\.(available|capacity|usage)|memory\.(working_set)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + + metricstransform/cadvisor: + transforms: + - include: container_cpu_(cfs_(periods_total|throttled_periods_total)|usage_seconds_total) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: container_memory_working_set_bytes + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: container_network_(working_set_bytes|receive_(bytes_total|errors_total)|transmit_(bytes_total|errors_total)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: container_spec_memory_limit_bytes + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + + metricstransform/kubelet: + transforms: + - include: go_(goroutines|threads) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: process_resident_memory_bytes + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + + filter/exclude_metrics_low_data_mode: + metrics: + metric: + - 'HasAttrOnDatapoint("low.data.mode", "false")' + transform/truncate: log_statements: - context: log @@ -295,6 +400,22 @@ data: - key: service_name action: delete + transform/low_data_mode_inator: + metric_statements: + - context: metric + statements: + - set(description, "") + - set(unit, "") + + resource/low_data_mode_inator: + attributes: + - key: http.scheme + action: delete + - key: net.host.name + action: delete + - key: net.host.port + action: delete + k8sattributes: auth_type: "serviceAccount" passthrough: false @@ -328,7 +449,15 @@ data: action: upsert from_attribute: namespace + memory_limiter: + check_interval: 1s + limit_percentage: 80 + spike_limit_percentage: 25 + batch: + send_batch_max_size: 1000 + timeout: 30s + send_batch_size : 800 exporters: otlphttp/newrelic: @@ -351,7 +480,7 @@ data: {{- if or .Values.receivers.hostmetrics.enabled (or .Values.receivers.kubeletstats.enabled .Values.receivers.prometheus.enabled) }} metrics: receivers: - {{- if and .Values.receivers.hostmetrics.enabled (include "newrelic.common.privileged" .) }} + {{- if and .Values.receivers.hostmetrics.enabled (include "newrelic.common.privileged" .) (not (include "newrelic.common.lowDataMode" .)) }} - hostmetrics {{- end }} {{- if .Values.receivers.kubeletstats.enabled }} @@ -361,7 +490,14 @@ data: - prometheus {{- end }} processors: - # - transform/truncate + {{- if include "newrelic.common.lowDataMode" . }} + - metricstransform/ldm + - metricstransform/kubeletstats + - metricstransform/cadvisor + - metricstransform/kubelet + - filter/exclude_metrics_low_data_mode + {{- end }} + - transform/truncate - filter/exclude_cpu_utilization - filter/exclude_memory_utilization - filter/exclude_memory_usage @@ -374,8 +510,13 @@ data: - attributes/exclude_system_paging - resourcedetection/cloudproviders - resource + {{- if include "newrelic.common.lowDataMode" . }} + - transform/low_data_mode_inator + - resource/low_data_mode_inator + {{- end }} - k8sattributes - attributes/self + - memory_limiter - batch exporters: - otlphttp/newrelic @@ -384,7 +525,11 @@ data: logs: receivers: - filelog - processors: [transform/truncate, resource, k8sattributes, batch] + processors: + - transform/truncate + - resource + - k8sattributes + - batch exporters: - otlphttp/newrelic {{- end }} \ No newline at end of file diff --git a/charts/nr-k8s-otel-collector/templates/deployment-configmap.yaml b/charts/nr-k8s-otel-collector/templates/deployment-configmap.yaml index 43c4578ec..5eab58430 100644 --- a/charts/nr-k8s-otel-collector/templates/deployment-configmap.yaml +++ b/charts/nr-k8s-otel-collector/templates/deployment-configmap.yaml @@ -122,152 +122,6 @@ data: target_label: job_label replacement: scheduler - k8s_cluster: - auth_type: serviceAccount - node_conditions_to_report: - - Ready - collection_interval: {{ .Values.receivers.k8sCluster.scrapeInterval }} - metrics: - k8s.container.cpu_request: - enabled: false - k8s.container.cpu_limit: - enabled: false - k8s.container.memory_request: - enabled: false - k8s.container.memory_limit: - enabled: false - k8s.container.storage_request: - enabled: false - k8s.container.storage_limit: - enabled: false - k8s.container.ephemeralstorage_request: - enabled: false - k8s.container.ephemeralstorage_limit: - enabled: false - k8s.container.restarts: - enabled: false - k8s.container.ready: - enabled: false - k8s.pod.phase: - enabled: false - k8s.pod.status_reason: - enabled: false - k8s.deployment.desired: - enabled: false - k8s.deployment.available: - enabled: false - k8s.cronjob.active_jobs: - enabled: false - k8s.daemonset.current_scheduled_nodes: - enabled: false - k8s.daemonset.desired_scheduled_nodes: - enabled: false - k8s.daemonset.misscheduled_nodes: - enabled: false - k8s.daemonset.ready_nodes: - enabled: false - k8s.hpa.max_replicas: - enabled: false - k8s.hpa.min_replicas: - enabled: false - k8s.hpa.current_replicas: - enabled: false - k8s.hpa.desired_replicas: - enabled: false - k8s.job.active_pods: - enabled: false - k8s.job.desired_successful_pods: - enabled: false - k8s.job.failed_pods: - enabled: false - k8s.job.max_parallel_pods: - enabled: false - k8s.job.successful_pods: - enabled: false - k8s.namespace.phase: - enabled: false - k8s.replicaset.desired: - enabled: false - k8s.replicaset.available: - enabled: false - k8s.replication_controller.desired: - enabled: false - k8s.replication_controller.available: - enabled: false - k8s.resource_quota.hard_limit: - enabled: false - k8s.resource_quota.used: - enabled: false - k8s.statefulset.desired_pods: - enabled: false - k8s.statefulset.ready_pods: - enabled: false - k8s.statefulset.current_pods: - enabled: false - k8s.statefulset.updated_pods: - enabled: false - k8s.node.condition: - enabled: false - resource_attributes: - k8s.namespace.uid: - enabled: false - k8s.namespace.name: - enabled: false - k8s.node.uid: - enabled: false - k8s.node.name: - enabled: false - container.id: - enabled: false - container.image.name: - enabled: false - container.image.tag: - enabled: false - k8s.container.name: - enabled: false - k8s.pod.name: - enabled: false - k8s.pod.uid: - enabled: false - k8s.pod.qos_class: - enabled: false - k8s.replicaset.name: - enabled: false - k8s.replicaset.uid: - enabled: false - k8s.replicationcontroller.name: - enabled: false - k8s.replicationcontroller.uid: - enabled: false - k8s.resourcequota.uid: - enabled: false - k8s.resourcequota.name: - enabled: false - k8s.statefulset.uid: - enabled: false - k8s.statefulset.name: - enabled: false - k8s.deployment.uid: - enabled: false - k8s.deployment.name: - enabled: false - k8s.cronjob.uid: - enabled: false - k8s.cronjob.name: - enabled: false - k8s.daemonset.name: - enabled: false - k8s.daemonset.uid: - enabled: false - k8s.hpa.uid: - enabled: false - k8s.hpa.name: - enabled: false - k8s.job.name: - enabled: false - k8s.job.uid: - enabled: false - processors: groupbyattrs: keys: @@ -317,7 +171,7 @@ data: metricstransform/k8s_cluster_info: transforms: - - include: k8s.node.condition_ready + - include: kubernetes_build_info action: update new_name: k8s.cluster.info @@ -348,6 +202,173 @@ data: new_label: container_phase new_value: terminated + metricstransform/ldm: + transforms: + - include: .* + match_type: regexp + action: update + operations: + - action: add_label + new_label: low.data.mode + new_value: 'false' + + metricstransform/k8s_cluster_info_ldm: + transforms: + - include: k8s.cluster.info + action: update + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + + metricstransform/ksm: + transforms: + - include: kube_cronjob_(created|spec_suspend|status_(active|last_schedule_time)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_daemonset_(created|status_(current_number_scheduled|desired_number_scheduled|updated_number_scheduled)|status_number_(available|misscheduled|ready|unavailable)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_deployment_(created|metadata_generation|spec_(replicas|strategy_rollingupdate_max_surge)|status_(condition|observed_generation|replicas)|status_replicas_(available|ready|unavailable|updated)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_horizontalpodautoscaler_(spec_(max_replicas|min_replicas)|status_(condition|current_replicas|desired_replicas)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_job_(complete|created|failed|spec_(active_deadline_seconds|completions|parallelism)|status_(active|completion_time|failed|start_time|succeeded)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_node_status_(allocatable|condition) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_persistentvolume_(capacity_bytes|created|info|status_phase) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_persistentvolumeclaim_(created|info|resource_requests_storage_bytes|status_phase) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_pod_container_(resource_(limits|requests)|status_(phase|ready|restarts_total|waiting_reason)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_pod_(created|info|status_(phase|ready|ready_time|scheduled|scheduled_time)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_service_(annotations|created|info|labels|spec_type|status_load_balancer_ingress) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_statefulset_(created|persistentvolumeclaim_retention_policy|replicas|status_(current_revision|replicas)|status_replicas_(available|current|ready|updated)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + + metricstransform/apiserver: + transforms: + - include: apiserver_storage_objects + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: go_(goroutines|threads) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: process_resident_memory_bytes + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + + filter/exclude_metrics_low_data_mode: + metrics: + metric: + - 'HasAttrOnDatapoint("low.data.mode", "false")' + filter/exclude_zero_value_kube_node_status_condition: metrics: datapoint: @@ -408,6 +429,22 @@ data: action: upsert value: 'true' + transform/low_data_mode_inator: + metric_statements: + - context: metric + statements: + - set(description, "") + - set(unit, "") + + resource/low_data_mode_inator: + attributes: + - key: http.scheme + action: delete + - key: net.host.name + action: delete + - key: net.host.port + action: delete + k8sattributes: auth_type: "serviceAccount" passthrough: false @@ -441,7 +478,15 @@ data: action: upsert from_attribute: namespace + memory_limiter: + check_interval: 1s + limit_percentage: 80 + spike_limit_percentage: 25 + batch: + send_batch_max_size: 1000 + timeout: 30s + send_batch_size : 800 exporters: otlphttp/newrelic: @@ -465,6 +510,14 @@ data: - filter/exclude_zero_value_kube_persistentvolumeclaim_status_phase - filter/exclude_zero_value_kube_pod_status_phase - filter/exclude_zero_value_kube_pod_container_status + {{- if include "newrelic.common.lowDataMode" . }} + - metricstransform/ldm + - metricstransform/k8s_cluster_info_ldm + - metricstransform/ksm + - filter/exclude_metrics_low_data_mode + - transform/low_data_mode_inator + - resource/low_data_mode_inator + {{- end }} - resource/metrics - resourcedetection/cloudproviders - batch @@ -473,20 +526,24 @@ data: exporters: - otlphttp/newrelic {{- end }} - {{- if or .Values.receivers.prometheus.enabled .Values.receivers.k8sCluster.enabled }} + {{- if .Values.receivers.prometheus.enabled }} metrics: receivers: - {{- if .Values.receivers.prometheus.enabled }} - prometheus - {{- end }} - {{- if .Values.receivers.k8sCluster.enabled }} - - k8s_cluster - {{- end }} processors: - metricstransform/k8s_cluster_info + {{- if include "newrelic.common.lowDataMode" . }} + - metricstransform/ldm + - metricstransform/k8s_cluster_info_ldm + - metricstransform/apiserver + - filter/exclude_metrics_low_data_mode + - transform/low_data_mode_inator + - resource/low_data_mode_inator + {{- end }} - resource/metrics - k8sattributes - attributes/self + - memory_limiter - batch exporters: - otlphttp/newrelic @@ -501,7 +558,9 @@ data: logs/events: receivers: - k8s_events - processors: [resource/events, batch] + processors: + - resource/events + - batch exporters: - otlphttp/newrelic {{- end }} diff --git a/charts/nr-k8s-otel-collector/values.yaml b/charts/nr-k8s-otel-collector/values.yaml index 76c7cd3ea..a1455fa8a 100644 --- a/charts/nr-k8s-otel-collector/values.yaml +++ b/charts/nr-k8s-otel-collector/values.yaml @@ -117,15 +117,8 @@ receivers: # @default -- `true` enabled: true # -- Sets the scrape interval for the `prometheus` receiver - # @default -- `20s` - scrapeInterval: 20s - k8sCluster: - # -- (bool) Specifies whether the `k8s_cluster` receiver is enabled - # @default -- `true` - enabled: true - # -- Sets the scrape interval for the `k8s_cluster` receiver - # @default -- `20s` - scrapeInterval: 20s + # @default -- `1m` + scrapeInterval: 1m k8sEvents: # -- (bool) Specifies whether the `k8s_events` receiver is enabled # @default -- `true` @@ -135,16 +128,20 @@ receivers: # @default -- `true` enabled: true # -- Sets the scrape interval for the `hostmetrics` receiver - # @default -- `20s` - scrapeInterval: 20s + # @default -- `1m` + scrapeInterval: 1m kubeletstats: # -- (bool) Specifies whether the `kubeletstats` receiver is enabled # @default -- `true` enabled: true # -- Sets the scrape interval for the `kubeletstats` receiver - # @default -- `20s` - scrapeInterval: 20s + # @default -- `1m` + scrapeInterval: 1m filelog: # -- (bool) Specifies whether the `filelog` receiver is enabled # @default -- `true` enabled: true + +# -- (bool) Send only the metrics required to light up the NR kubernetes UI +# @default -- `false` +lowDataMode: