From 24db44516ae6eaeafa1a45460375f80d7a171fbe Mon Sep 17 00:00:00 2001 From: Chris Werner Rau Date: Thu, 6 Mar 2025 15:53:22 +0100 Subject: [PATCH] feat(base-cluster/monitoring)!: migrate promtail to alloy (#1347) - update loki to an up-to-date version - also replace otel collector with alloy, saving resources! --- .github/trusted_registries.yaml | 2 + charts/base-cluster/README.md.gotmpl | 21 ++ .../base-cluster/templates/ingress/nginx.yaml | 7 +- .../templates/kyverno/kyverno.yaml | 14 +- .../kyverno-policies.yaml | 6 +- .../templates/monitoring/alloy.yaml | 259 ++++++++++++++++++ .../_prometheus_config.yaml | 5 +- .../monitoring/{loki => logs}/dashboard.yaml | 4 +- .../monitoring/logs/grafana-loki.yaml | 22 ++ .../templates/monitoring/logs/loki.yaml | 103 +++++++ .../templates/monitoring/loki/loki.yaml | 101 ------- .../monitoring/tracing/grafana-tempo.yaml | 21 ++ .../tracing/opentelemetry-collector.yaml | 57 ---- charts/base-cluster/values.schema.json | 4 - charts/base-cluster/values.yaml | 11 +- 15 files changed, 451 insertions(+), 186 deletions(-) create mode 100644 charts/base-cluster/templates/monitoring/alloy.yaml rename charts/base-cluster/templates/monitoring/{loki => logs}/dashboard.yaml (91%) create mode 100644 charts/base-cluster/templates/monitoring/logs/grafana-loki.yaml create mode 100644 charts/base-cluster/templates/monitoring/logs/loki.yaml delete mode 100644 charts/base-cluster/templates/monitoring/loki/loki.yaml delete mode 100644 charts/base-cluster/templates/monitoring/tracing/opentelemetry-collector.yaml diff --git a/.github/trusted_registries.yaml b/.github/trusted_registries.yaml index c47750ad7..90cbfa6bd 100644 --- a/.github/trusted_registries.yaml +++ b/.github/trusted_registries.yaml @@ -30,6 +30,8 @@ ghcr.io: aquasecurity: ALL_IMAGES kyverno: ALL_IMAGES teutonet: ALL_IMAGES + jimmidyson: + configmap-reload: ALL_TAGS quay.io: cilium: ALL_IMAGES jetstack: ALL_IMAGES diff --git a/charts/base-cluster/README.md.gotmpl b/charts/base-cluster/README.md.gotmpl index 55d9133b0..db0c0c4cd 100644 --- a/charts/base-cluster/README.md.gotmpl +++ b/charts/base-cluster/README.md.gotmpl @@ -315,4 +315,25 @@ upgrade, they will be recreated in version 6. This also makes kyverno HA, so be aware that kyverno will need more resources in you cluster. +### 6.x.x -> 7.0.0 + +This release allows the user to use the predefined k8s ClusterRoles +(`admin`, `edit`, `view`, ...). + +This usage might clash with custom roles named `admin`, `edit`, `view`, ... and +therefore needs to be adjusted + +### 7.x.x -> 8.0.0 + +This release migrates the now unsupported `loki-stack` to the normal `loki` helm +chart. + +This is a breaking change because, apart from a new storage engine, the deployment +also moves from the `loki` namespace to `monitoring` to keep in line with every +other monitoring deployment, which in turn also deletes the `loki` namespace + +This also replaces `promtail` and the `otel-collector` with `alloy`, using + +makes this a drop-in change. + {{ .Files.Get "values.md" }} diff --git a/charts/base-cluster/templates/ingress/nginx.yaml b/charts/base-cluster/templates/ingress/nginx.yaml index e7f098ab2..7e40331ff 100644 --- a/charts/base-cluster/templates/ingress/nginx.yaml +++ b/charts/base-cluster/templates/ingress/nginx.yaml @@ -27,7 +27,8 @@ spec: serviceMonitor: enabled: {{ .Values.monitoring.prometheus.enabled }} additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 12 }} - {{- if .Values.monitoring.tracing.enabled }} + {{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "otlp") | fromYaml }} + {{- if and $telemetryConf.enabled .Values.monitoring.prometheus.enabled }} opentelemetry: enabled: true {{- if and .Values.global.imageRegistry false }} @@ -40,10 +41,10 @@ spec: use-gzip: true enable-brotli: true enable-underscores-in-headers: true - {{- if .Values.monitoring.tracing.enabled }} + {{- if $telemetryConf.enabled }} enable-opentelemetry: true opentelemetry-operation-name: ingress - otlp-collector-host: open-telemetry-collector-opentelemetry-collector.monitoring + otlp-collector-host: {{ $telemetryConf.host }} {{- end }} service: annotations: diff --git a/charts/base-cluster/templates/kyverno/kyverno.yaml b/charts/base-cluster/templates/kyverno/kyverno.yaml index 2fc152ea1..1f956b2f7 100644 --- a/charts/base-cluster/templates/kyverno/kyverno.yaml +++ b/charts/base-cluster/templates/kyverno/kyverno.yaml @@ -62,25 +62,27 @@ spec: # this only works in version 3 admissionController: replicas: 3 - {{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }} + {{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "jaeger" "serviceProtocol" "grpc") | fromYaml -}} + {{- $telemetryEnabled := and $telemetryConf.enabled .Values.monitoring.prometheus.enabled -}} + {{- if $telemetryEnabled }} tracing: &tracingConfig enabled: true - address: open-telemetry-collector-opentelemetry-collector.monitoring - port: 14250 # jaeger-grpc + address: {{ $telemetryConf.host }} + port: {{ $telemetryConf.port }} {{- end }} backgroundController: replicas: 2 - {{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }} + {{- if $telemetryEnabled }} tracing: *tracingConfig {{- end }} reportsController: replicas: 2 - {{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }} + {{- if $telemetryEnabled }} tracing: *tracingConfig {{- end }} cleanupController: replicas: 2 - {{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }} + {{- if $telemetryEnabled }} tracing: *tracingConfig {{- end }} podDisruptionBudget: diff --git a/charts/base-cluster/templates/kyverno/policies/kyverno-base-policies/kyverno-policies.yaml b/charts/base-cluster/templates/kyverno/policies/kyverno-base-policies/kyverno-policies.yaml index 615242592..06a58c602 100644 --- a/charts/base-cluster/templates/kyverno/policies/kyverno-base-policies/kyverno-policies.yaml +++ b/charts/base-cluster/templates/kyverno/policies/kyverno-base-policies/kyverno-policies.yaml @@ -32,7 +32,7 @@ spec: namespaces: - kube-system - default - {{ $lokiPromtail := dict "resources" (dict "namespaces" (list "loki") "kinds" (list "Pod") "names" (list "loki-promtail-*")) -}} + {{ $alloy := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "alloy-*")) -}} {{- $syncEtcdSecret := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "sync-etcd-secret-*")) -}} {{- $nodeExporter := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "kube-prometheus-stack-prometheus-node-exporter-*")) -}} {{- $nfsServerProvisioner := dict "resources" (dict "namespaces" (list "nfs-server-provisioner") "kinds" (list "Pod") "names" (list "nfs-server-provisioner-0")) -}} @@ -46,8 +46,8 @@ spec: {{- $disallowHostPorts := list -}} {{- if .Values.monitoring.loki.enabled -}} - {{- $disallowHostPath = append $disallowHostPath $lokiPromtail -}} - {{- $runAsNonRoot = append $runAsNonRoot $lokiPromtail -}} + {{- $disallowHostPath = append $disallowHostPath $alloy -}} + {{- $runAsNonRoot = append $runAsNonRoot $alloy -}} {{- end -}} {{- if .Values.monitoring.prometheus.enabled -}} {{- $disallowHostPath = append $disallowHostPath $syncEtcdSecret -}} diff --git a/charts/base-cluster/templates/monitoring/alloy.yaml b/charts/base-cluster/templates/monitoring/alloy.yaml new file mode 100644 index 000000000..e07e6feb9 --- /dev/null +++ b/charts/base-cluster/templates/monitoring/alloy.yaml @@ -0,0 +1,259 @@ +{{- if and .Values.monitoring.prometheus.enabled (or .Values.monitoring.tracing.enabled .Values.monitoring.loki.enabled) -}} +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: alloy + namespace: monitoring + labels: {{- include "common.labels.standard" $ | nindent 4 }} + app.kubernetes.io/component: alloy + app.kubernetes.io/part-of: monitoring +spec: + chart: + spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "grafana" "chart" "alloy" "context" $) | nindent 6 }} + interval: 1h + driftDetection: + mode: enabled + install: + timeout: 10m0s + crds: Skip + upgrade: + timeout: 10m0s + crds: Skip + dependsOn: + - name: kube-prometheus-stack + namespace: monitoring + values: + {{- if .Values.global.imageRegistry }} + global: + image: + registry: {{ $.Values.global.imageRegistry }} + {{- end }} + alloy: + enableReporting: false + resources: {{- include "common.resources" .Values.monitoring.loki.promtail | nindent 8 }} + {{- if .Values.monitoring.loki.enabled }} + mounts: + varlog: true + {{- end }} + securityContext: + seccompProfile: + type: RuntimeDefault + configMap: + content: | + {{- if .Values.monitoring.loki.enabled }} + discovery.kubernetes "pods" { + role = "pod" + } + + discovery.relabel "pods" { + targets = discovery.kubernetes.pods.targets + + rule { + source_labels = ["__meta_kubernetes_pod_controller_name"] + regex = "([0-9a-z-.]+?)(-[0-9a-f]{8,10})?" + target_label = "__tmp_controller_name" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app", "__tmp_controller_name", "__meta_kubernetes_pod_name"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "app" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance", "__meta_kubernetes_pod_label_instance"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "instance" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "__meta_kubernetes_pod_label_component"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "component" + } + + rule { + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "node_name" + } + + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + rule { + source_labels = ["namespace", "app"] + separator = "/" + target_label = "job" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + target_label = "__path__" + replacement = "/var/log/pods/*$1/*.log" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash", "__meta_kubernetes_pod_annotation_kubernetes_io_config_hash", "__meta_kubernetes_pod_container_name"] + separator = "/" + regex = "true/(.*)" + target_label = "__path__" + replacement = "/var/log/pods/*$1/*.log" + } + } + + local.file_match "pods" { + path_targets = discovery.relabel.pods.output + } + + loki.source.file "pods" { + targets = local.file_match.pods.targets + forward_to = [loki.process.pods.receiver] + } + + loki.process "pods" { + forward_to = [loki.write.default.receiver] + + stage.cri { } + } + + loki.write "default" { + endpoint { + url = "http://loki:3100/loki/api/v1/push" + } + external_labels = {} + } + {{- end }} + + {{- if .Values.monitoring.tracing.enabled }} + otelcol.receiver.otlp "default" { + grpc { } + + http { } + + output { + traces = [otelcol.processor.k8sattributes.default.input] + } + } + + otelcol.receiver.jaeger "default" { + protocols { + grpc { } + + thrift_http { } + + thrift_compact { + max_packet_size = "63KiB488B" + } + } + + output { + traces = [otelcol.processor.k8sattributes.default.input] + } + } + + otelcol.receiver.zipkin "default" { + output { + traces = [otelcol.processor.k8sattributes.default.input] + } + } + + otelcol.processor.k8sattributes "default" { + auth_type = "serviceAccount" + + extract { + metadata = ["k8s.namespace.name", "k8s.deployment.name", "k8s.statefulset.name", "k8s.daemonset.name", "k8s.cronjob.name", "k8s.job.name", "k8s.node.name", "k8s.pod.name", "k8s.pod.uid", "k8s.pod.start_time"] + } + + pod_association { + source { + from = "resource_attribute" + name = "k8s.pod.ip" + } + } + + pod_association { + source { + from = "resource_attribute" + name = "k8s.pod.uid" + } + } + + pod_association { + source { + from = "connection" + } + } + + output { + traces = [otelcol.processor.batch.default.input] + } + } + + otelcol.processor.batch "default" { + output { + traces = [otelcol.exporter.otlp.tempo.input] + } + } + + otelcol.exporter.otlp "tempo" { + client { + endpoint = "grafana-tempo-distributor:4317" + + tls { + insecure = true + } + } + } + {{- end }} + extraPorts: + - name: jaeger-compact + port: 6831 + protocol: UDP + targetPort: 6831 + - name: jaeger-grpc + port: 14250 + protocol: TCP + targetPort: 14250 + - name: jaeger-thrift + port: 14268 + protocol: TCP + targetPort: 14268 + - name: metrics + port: 8888 + protocol: TCP + targetPort: 8888 + - name: otlp + port: 4317 + appProtocol: grpc + protocol: TCP + targetPort: 4317 + - name: otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 + - name: zipkin + port: 9411 + appProtocol: http/protobuf + protocol: TCP + targetPort: 9411 + crds: + create: false + controller: + priorityClassName: monitoring-components + serviceMonitor: + enabled: true + additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 10 }} +{{- end -}} diff --git a/charts/base-cluster/templates/monitoring/kube-prometheus-stack/_prometheus_config.yaml b/charts/base-cluster/templates/monitoring/kube-prometheus-stack/_prometheus_config.yaml index 2a9a9bb25..5522148ec 100644 --- a/charts/base-cluster/templates/monitoring/kube-prometheus-stack/_prometheus_config.yaml +++ b/charts/base-cluster/templates/monitoring/kube-prometheus-stack/_prometheus_config.yaml @@ -65,11 +65,12 @@ prometheusSpec: - __address__ target_label: cluster replacement: {{ .Values.global.clusterName }} - {{- if .Values.monitoring.tracing.enabled }} + {{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "otlp") | fromYaml }} + {{- if $telemetryConf.enabled }} tracingConfig: clientType: grpc samplingFraction: "0.1" insecure: true - endpoint: open-telemetry-collector-opentelemetry-collector.monitoring:4317 + endpoint: {{ printf "%s:%d" $telemetryConf.host $telemetryConf.port }} {{- end }} {{- end -}} diff --git a/charts/base-cluster/templates/monitoring/loki/dashboard.yaml b/charts/base-cluster/templates/monitoring/logs/dashboard.yaml similarity index 91% rename from charts/base-cluster/templates/monitoring/loki/dashboard.yaml rename to charts/base-cluster/templates/monitoring/logs/dashboard.yaml index 587a4c73d..8234158cd 100644 --- a/charts/base-cluster/templates/monitoring/loki/dashboard.yaml +++ b/charts/base-cluster/templates/monitoring/logs/dashboard.yaml @@ -3,10 +3,10 @@ apiVersion: v1 kind: ConfigMap metadata: name: grafana-dashboard-loki - namespace: loki + namespace: monitoring labels: {{- include "common.labels.standard" $ | nindent 4 }} grafana_dashboard: "1" app.kubernetes.io/component: loki app.kubernetes.io/part-of: monitoring data: {{- (.Files.Glob "dashboards/loki.json").AsConfig | nindent 2 }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/base-cluster/templates/monitoring/logs/grafana-loki.yaml b/charts/base-cluster/templates/monitoring/logs/grafana-loki.yaml new file mode 100644 index 000000000..f7434fdd7 --- /dev/null +++ b/charts/base-cluster/templates/monitoring/logs/grafana-loki.yaml @@ -0,0 +1,22 @@ +{{- if and .Values.monitoring.prometheus.enabled .Values.monitoring.loki.enabled -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasource-loki + namespace: monitoring + labels: {{- include "common.labels.standard" $ | nindent 4 }} + app.kubernetes.io/component: loki + app.kubernetes.io/part-of: monitoring + grafana_datasource: "1" +data: + grafana-loki-datasource.yaml: |- + apiVersion: 1 + datasources: + - name: Loki + type: loki + access: proxy + url: "http://loki:3100" + version: 1 + isDefault: false + uid: loki +{{- end -}} diff --git a/charts/base-cluster/templates/monitoring/logs/loki.yaml b/charts/base-cluster/templates/monitoring/logs/loki.yaml new file mode 100644 index 000000000..8e8927818 --- /dev/null +++ b/charts/base-cluster/templates/monitoring/logs/loki.yaml @@ -0,0 +1,103 @@ +{{- if and .Values.monitoring.prometheus.enabled .Values.monitoring.loki.enabled -}} +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: loki + namespace: monitoring + labels: {{- include "common.labels.standard" $ | nindent 4 }} + app.kubernetes.io/component: loki + app.kubernetes.io/part-of: monitoring +spec: + chart: + spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "grafana" "chart" "loki" "context" $) | nindent 6 }} + interval: 1h + driftDetection: + mode: enabled + install: + timeout: 10m0s + upgrade: + timeout: 10m0s + dependsOn: + - name: kube-prometheus-stack + namespace: monitoring + values: + {{- if .Values.global.imageRegistry }} + global: + registry: {{ $.Values.global.imageRegistry }} + {{- end }} + deploymentMode: SingleBinary + lokiCanary: + enabled: false + test: + enabled: false + chunksCache: + enabled: false + resultsCache: + enabled: false + gateway: + enabled: false + ruler: + enabled: false + sidecar: + rules: + enabled: false + loki: + enableServiceLinks: false + livenessProbe: + httpGet: + path: /ready + port: http-metrics + initialDelaySeconds: 45 + containerSecurityContext: + seccompProfile: + type: RuntimeDefault + privileged: false + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + auth_enabled: false + storage: + type: filesystem + storage_config: + tsdb_shipper: + active_index_directory: /var/loki/tsdb-index + filesystem: + directory: /var/loki/chunks + schemaConfig: + configs: + - from: "2025-02-18" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + commonConfig: + replication_factor: 1 + compactor: + retention_enabled: true + delete_request_store: filesystem + ingester: + chunk_block_size: 524288 + chunk_target_size: 5242880 + tableManager: + retention_deletes_enabled: true + retention_period: 6w + backend: + replicas: 0 + read: + replicas: 0 + write: + replicas: 0 + singleBinary: + replicas: 1 + persistence: {{- include "common.storage.class" (dict "persistence" .Values.monitoring.loki.persistence "global" $.Values.global) | nindent 8 }} + enabled: true + size: {{ .Values.monitoring.loki.persistence.size }} + resources: {{- include "common.resources" .Values.monitoring.loki | nindent 8 }} + monitoring: + serviceMonitor: + enabled: true + additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 10 }} +{{- end -}} diff --git a/charts/base-cluster/templates/monitoring/loki/loki.yaml b/charts/base-cluster/templates/monitoring/loki/loki.yaml deleted file mode 100644 index 10ecb3ba4..000000000 --- a/charts/base-cluster/templates/monitoring/loki/loki.yaml +++ /dev/null @@ -1,101 +0,0 @@ -{{- if and .Values.monitoring.prometheus.enabled .Values.monitoring.loki.enabled -}} -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: loki - namespace: loki - labels: {{- include "common.labels.standard" $ | nindent 4 }} - app.kubernetes.io/component: loki - app.kubernetes.io/part-of: monitoring -spec: - chart: - spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "grafana" "chart" "loki-stack" "context" $) | nindent 6 }} - interval: 1h - driftDetection: - mode: enabled - install: - timeout: 10m0s - upgrade: - timeout: 10m0s - dependsOn: - - name: kube-prometheus-stack - namespace: monitoring - values: - test_pod: - image: {{ printf "%s/bats/bats:1.8.2" ($.Values.global.imageRegistry | default (include "base-cluster.defaultRegistry" (dict))) }} - loki: - extraArgs: - # TODO: switch to compactor and boltdb-shipper - target: all,table-manager - isDefault: false - url: {{ `http://{{ (include "loki.serviceName" .) }}.{{ .Release.Namespace }}:{{ .Values.loki.service.port }}` }} - image: - repository: {{ printf "%s/grafana/loki" ($.Values.global.imageRegistry | default (include "base-cluster.defaultRegistry" (dict))) }} - resources: {{- include "common.resources" .Values.monitoring.loki | nindent 8 }} - replicas: {{ .Values.monitoring.loki.replicas }} - persistence: {{- include "common.storage.class" (dict "persistence" .Values.monitoring.loki.persistence "global" $.Values.global) | nindent 8 }} - enabled: true - size: {{ .Values.monitoring.loki.persistence.size }} - containerSecurityContext: - seccompProfile: - type: RuntimeDefault - privileged: false - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - serviceMonitor: - enabled: true - additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 10 }} - config: - table_manager: - retention_deletes_enabled: true - retention_period: 6w - chunk_store_config: - max_look_back_period: 6w - ingester: - chunk_block_size: 524288 - chunk_target_size: 5242880 - storage_config: - boltdb: - directory: /data/loki/index - schema_config: - configs: - - from: "2018-04-15" - index: - period: 1w - prefix: index_ - object_store: filesystem - schema: v9 - store: boltdb - datasource: - uid: loki -{{/* TODO: Implement */}} -{{/* - from: 2022-09-15*/}} -{{/* store: boltdb-shipper*/}} -{{/* object_store: filesystem*/}} -{{/* schema: v12*/}} -{{/* index:*/}} -{{/* prefix: index_*/}} -{{/* period: 24h*/}} - promtail: - {{- if .Values.global.imageRegistry }} - image: - registry: {{ $.Values.global.imageRegistry }} - {{- end }} - resources: {{- include "common.resources" .Values.monitoring.loki.promtail | nindent 8 }} - containerSecurityContext: - seccompProfile: - type: RuntimeDefault - priorityClassName: monitoring-components - serviceMonitor: - enabled: true - labels: {{- toYaml .Values.monitoring.labels | nindent 10 }} - grafana: - enabled: false - sidecar: - datasources: - enabled: true - prometheus: - enabled: false - {{- end -}} diff --git a/charts/base-cluster/templates/monitoring/tracing/grafana-tempo.yaml b/charts/base-cluster/templates/monitoring/tracing/grafana-tempo.yaml index b665cf643..c0b1b40b7 100644 --- a/charts/base-cluster/templates/monitoring/tracing/grafana-tempo.yaml +++ b/charts/base-cluster/templates/monitoring/tracing/grafana-tempo.yaml @@ -22,6 +22,27 @@ spec: imageRegistry: {{ $.Values.global.imageRegistry }} {{- end }} ingester: {{- include "common.resourcesWithPreset" .Values.monitoring.tracing.ingester | nindent 6 }} + networkPolicy: + allowExternalEgress: false + compactor: + networkPolicy: + allowExternalEgress: false + distributor: + networkPolicy: + allowExternalEgress: false + metricsGenerator: + networkPolicy: + allowExternalEgress: false + querier: + networkPolicy: + allowExternalEgress: false + queryFrontend: + networkPolicy: + allowExternalEgress: false + vulture: + enabled: false + networkPolicy: + allowExternalEgress: false tempo: traces: jaeger: diff --git a/charts/base-cluster/templates/monitoring/tracing/opentelemetry-collector.yaml b/charts/base-cluster/templates/monitoring/tracing/opentelemetry-collector.yaml deleted file mode 100644 index 8dec42525..000000000 --- a/charts/base-cluster/templates/monitoring/tracing/opentelemetry-collector.yaml +++ /dev/null @@ -1,57 +0,0 @@ -{{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled -}} -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: open-telemetry-collector - namespace: monitoring - labels: {{- include "common.labels.standard" $ | nindent 4 }} - app.kubernetes.io/component: tracing - app.kubernetes.io/part-of: monitoring -spec: - chart: - spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "open-telemetry" "chart" "opentelemetry-collector" "context" $) | nindent 6 }} - interval: 1h - driftDetection: - mode: enabled - dependsOn: - - name: kube-prometheus-stack - namespace: monitoring - upgrade: - timeout: 5m - values: - image: - repository: {{ printf "%s/otel/opentelemetry-collector-contrib" ($.Values.global.imageRegistry | default (include "base-cluster.defaultRegistry" (dict))) }} - mode: daemonset - service: - enabled: true - config: - receivers: - prometheus: null - service: - pipelines: - logs: null - metrics: null - traces: - processors: - - batch - exporters: - - otlp - exporters: - otlp: - endpoint: grafana-tempo-distributor:4317 - tls: - insecure: true - presets: - kubernetesAttributes: - enabled: true - ports: - metrics: - enabled: true - podMonitor: - enabled: true - extraLabels: {{- toYaml .Values.monitoring.labels | nindent 8 }} - prometheusRule: - enabled: true - defaultRules: - enabled: true -{{- end -}} diff --git a/charts/base-cluster/values.schema.json b/charts/base-cluster/values.schema.json index 13219ec2a..aa876688e 100644 --- a/charts/base-cluster/values.schema.json +++ b/charts/base-cluster/values.schema.json @@ -869,10 +869,6 @@ }, "additionalProperties": false }, - "replicas": { - "type": "integer", - "minimum": 1 - }, "resourcesPreset": { "$ref": "#/$defs/resourcesPreset" }, diff --git a/charts/base-cluster/values.yaml b/charts/base-cluster/values.yaml index 05d9f3d51..a6a9b05e8 100644 --- a/charts/base-cluster/values.yaml +++ b/charts/base-cluster/values.yaml @@ -41,11 +41,6 @@ global: requests: cpu: 20m memory: 100Mi - loki: - condition: "{{ .Values.monitoring.loki.enabled }}" - additionalLabels: - app.kubernetes.io/component: loki - app.kubernetes.io/part-of: monitoring trivy: condition: "{{ .Values.monitoring.securityScanning.enabled }}" additionalLabels: @@ -112,8 +107,9 @@ global: grafana: url: https://grafana.github.io/helm-charts charts: - loki-stack: 2.x.x - condition: "{{ and .Values.monitoring.prometheus.enabled .Values.monitoring.loki.enabled }}" + loki: 6.x.x + alloy: 0.x.x + condition: "{{ and .Values.monitoring.prometheus.enabled (or .Values.monitoring.loki.enabled .Values.monitoring.tracing.enabled) }}" bitnami: url: oci://docker.io/bitnamicharts charts: @@ -311,7 +307,6 @@ monitoring: persistence: storageClass: "" size: 10Gi - replicas: 1 resourcesPreset: nano resources: requests: