From fd2a906de372df383476ce0816e6858a4e653cb9 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Wed, 13 Nov 2024 12:00:48 +0100 Subject: [PATCH 1/2] Update Telemeter metrics allow-list --- configuration/telemeter/metrics.json | 6 ++++++ resources/services/telemeter-template.yaml | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/configuration/telemeter/metrics.json b/configuration/telemeter/metrics.json index 1908aeb316..b8324c3124 100644 --- a/configuration/telemeter/metrics.json +++ b/configuration/telemeter/metrics.json @@ -1,4 +1,9 @@ [ + "openshift_logging:log_forwarder_input_type:sum", + "openshift_logging:log_forwarder_output_type:sum", + "openshift_logging:log_forwarder_pipelines:sum", + "openshift_logging:log_forwarders:sum", + "openshift_logging:vector_component_received_bytes_total:rate5m", "{__name__=\":apiserver_v1_image_imports:sum\"}", "{__name__=\"ALERTS\",alertstate=\"firing\",severity=~\"critical|warning|info|none\"}", "{__name__=\"ALERTS\",alertstate=\"firing\"}", @@ -194,6 +199,7 @@ "{__name__=\"type:tempo_operator_tempostack_multi_tenancy:sum\",type=~\"enabled|disabled\"}", "{__name__=\"type:tempo_operator_tempostack_storage_backend:sum\",type=~\"azure|gcs|s3\"}", "{__name__=\"up\"}", + "{__name__=\"vendor_model:node_accelerator_cards:sum\",vendor=~\"NVIDIA\",model=~\"A100|RTX_A6000|RTX_4090|A40|V100\"}", "{__name__=\"visual_web_terminal_sessions_total\"}", "{__name__=\"workload:cpu_usage_cores:sum\"}", "{__name__=\"workload:memory_usage_bytes:sum\"}", diff --git a/resources/services/telemeter-template.yaml b/resources/services/telemeter-template.yaml index 4e231e6784..f74c84b612 100644 --- a/resources/services/telemeter-template.yaml +++ b/resources/services/telemeter-template.yaml @@ -93,6 +93,11 @@ objects: - --memcached=memcached-0.memcached.${NAMESPACE}.svc.cluster.local:11211 - --memcached=memcached-1.memcached.${NAMESPACE}.svc.cluster.local:11211 - --memcached=memcached-2.memcached.${NAMESPACE}.svc.cluster.local:11211 + - --whitelist=openshift_logging:log_forwarder_input_type:sum + - --whitelist=openshift_logging:log_forwarder_output_type:sum + - --whitelist=openshift_logging:log_forwarder_pipelines:sum + - --whitelist=openshift_logging:log_forwarders:sum + - --whitelist=openshift_logging:vector_component_received_bytes_total:rate5m - --whitelist={__name__=":apiserver_v1_image_imports:sum"} - --whitelist={__name__="alerts",alertstate="firing",severity=~"critical|warning|info|none"} - --whitelist={__name__="alerts",alertstate="firing"} @@ -288,6 +293,7 @@ objects: - --whitelist={__name__="type:tempo_operator_tempostack_multi_tenancy:sum",type=~"enabled|disabled"} - --whitelist={__name__="type:tempo_operator_tempostack_storage_backend:sum",type=~"azure|gcs|s3"} - --whitelist={__name__="up"} + - --whitelist={__name__="vendor_model:node_accelerator_cards:sum",vendor=~"NVIDIA",model=~"A100|RTX_A6000|RTX_4090|A40|V100"} - --whitelist={__name__="visual_web_terminal_sessions_total"} - --whitelist={__name__="workload:cpu_usage_cores:sum"} - --whitelist={__name__="workload:memory_usage_bytes:sum"} @@ -389,6 +395,11 @@ objects: - --memcached=memcached-0.memcached.${NAMESPACE}.svc.cluster.local:11211 - --memcached=memcached-1.memcached.${NAMESPACE}.svc.cluster.local:11211 - --memcached=memcached-2.memcached.${NAMESPACE}.svc.cluster.local:11211 + - --whitelist=openshift_logging:log_forwarder_input_type:sum + - --whitelist=openshift_logging:log_forwarder_output_type:sum + - --whitelist=openshift_logging:log_forwarder_pipelines:sum + - --whitelist=openshift_logging:log_forwarders:sum + - --whitelist=openshift_logging:vector_component_received_bytes_total:rate5m - --whitelist={__name__=":apiserver_v1_image_imports:sum"} - --whitelist={__name__="alerts",alertstate="firing",severity=~"critical|warning|info|none"} - --whitelist={__name__="alerts",alertstate="firing"} @@ -584,6 +595,7 @@ objects: - --whitelist={__name__="type:tempo_operator_tempostack_multi_tenancy:sum",type=~"enabled|disabled"} - --whitelist={__name__="type:tempo_operator_tempostack_storage_backend:sum",type=~"azure|gcs|s3"} - --whitelist={__name__="up"} + - --whitelist={__name__="vendor_model:node_accelerator_cards:sum",vendor=~"NVIDIA",model=~"A100|RTX_A6000|RTX_4090|A40|V100"} - --whitelist={__name__="visual_web_terminal_sessions_total"} - --whitelist={__name__="workload:cpu_usage_cores:sum"} - --whitelist={__name__="workload:memory_usage_bytes:sum"} From 336171b2f9bb6c23f71d677dd2e5a3670b5ac95b Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Wed, 13 Nov 2024 13:45:02 +0100 Subject: [PATCH 2/2] Fix generated files --- crds/loki.grafana.com_alertingrules.libsonnet | 4 ++-- crds/loki.grafana.com_recordingrules.libsonnet | 4 ++-- resources/crds/observatorium-logs-crds-template.yaml | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crds/loki.grafana.com_alertingrules.libsonnet b/crds/loki.grafana.com_alertingrules.libsonnet index bc42583674..5c7c08718d 100644 --- a/crds/loki.grafana.com_alertingrules.libsonnet +++ b/crds/loki.grafana.com_alertingrules.libsonnet @@ -8,11 +8,11 @@ }, creationTimestamp: null, labels: { - 'app.kubernetes.io/instance': 'loki-operator-v0.7.0', + 'app.kubernetes.io/instance': 'loki-operator-v0.7.1', 'app.kubernetes.io/managed-by': 'operator-lifecycle-manager', 'app.kubernetes.io/name': 'loki-operator', 'app.kubernetes.io/part-of': 'loki-operator', - 'app.kubernetes.io/version': '0.7.0', + 'app.kubernetes.io/version': '0.7.1', }, name: 'alertingrules.loki.grafana.com', }, diff --git a/crds/loki.grafana.com_recordingrules.libsonnet b/crds/loki.grafana.com_recordingrules.libsonnet index d45f30eaf4..8309b7d6de 100644 --- a/crds/loki.grafana.com_recordingrules.libsonnet +++ b/crds/loki.grafana.com_recordingrules.libsonnet @@ -8,11 +8,11 @@ }, creationTimestamp: null, labels: { - 'app.kubernetes.io/instance': 'loki-operator-v0.7.0', + 'app.kubernetes.io/instance': 'loki-operator-v0.7.1', 'app.kubernetes.io/managed-by': 'operator-lifecycle-manager', 'app.kubernetes.io/name': 'loki-operator', 'app.kubernetes.io/part-of': 'loki-operator', - 'app.kubernetes.io/version': '0.7.0', + 'app.kubernetes.io/version': '0.7.1', }, name: 'recordingrules.loki.grafana.com', }, diff --git a/resources/crds/observatorium-logs-crds-template.yaml b/resources/crds/observatorium-logs-crds-template.yaml index 79d11f3f1d..7c678487b6 100644 --- a/resources/crds/observatorium-logs-crds-template.yaml +++ b/resources/crds/observatorium-logs-crds-template.yaml @@ -11,11 +11,11 @@ objects: controller-gen.kubebuilder.io/version: v0.16.3 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.7.0 + app.kubernetes.io/instance: loki-operator-v0.7.1 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.7.0 + app.kubernetes.io/version: 0.7.1 name: alertingrules.loki.grafana.com spec: group: loki.grafana.com @@ -348,11 +348,11 @@ objects: controller-gen.kubebuilder.io/version: v0.16.3 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.7.0 + app.kubernetes.io/instance: loki-operator-v0.7.1 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.7.0 + app.kubernetes.io/version: 0.7.1 name: recordingrules.loki.grafana.com spec: group: loki.grafana.com