From 24db44516ae6eaeafa1a45460375f80d7a171fbe Mon Sep 17 00:00:00 2001
From: Chris Werner Rau <cwr@teuto.net>
Date: Thu, 6 Mar 2025 15:53:22 +0100
Subject: [PATCH] feat(base-cluster/monitoring)!: migrate promtail to alloy
 (#1347)

- update loki to an up-to-date version
- also replace otel collector with alloy, saving resources!
---
 .github/trusted_registries.yaml               |   2 +
 charts/base-cluster/README.md.gotmpl          |  21 ++
 .../base-cluster/templates/ingress/nginx.yaml |   7 +-
 .../templates/kyverno/kyverno.yaml            |  14 +-
 .../kyverno-policies.yaml                     |   6 +-
 .../templates/monitoring/alloy.yaml           | 259 ++++++++++++++++++
 .../_prometheus_config.yaml                   |   5 +-
 .../monitoring/{loki => logs}/dashboard.yaml  |   4 +-
 .../monitoring/logs/grafana-loki.yaml         |  22 ++
 .../templates/monitoring/logs/loki.yaml       | 103 +++++++
 .../templates/monitoring/loki/loki.yaml       | 101 -------
 .../monitoring/tracing/grafana-tempo.yaml     |  21 ++
 .../tracing/opentelemetry-collector.yaml      |  57 ----
 charts/base-cluster/values.schema.json        |   4 -
 charts/base-cluster/values.yaml               |  11 +-
 15 files changed, 451 insertions(+), 186 deletions(-)
 create mode 100644 charts/base-cluster/templates/monitoring/alloy.yaml
 rename charts/base-cluster/templates/monitoring/{loki => logs}/dashboard.yaml (91%)
 create mode 100644 charts/base-cluster/templates/monitoring/logs/grafana-loki.yaml
 create mode 100644 charts/base-cluster/templates/monitoring/logs/loki.yaml
 delete mode 100644 charts/base-cluster/templates/monitoring/loki/loki.yaml
 delete mode 100644 charts/base-cluster/templates/monitoring/tracing/opentelemetry-collector.yaml

diff --git a/.github/trusted_registries.yaml b/.github/trusted_registries.yaml
index c47750ad7..90cbfa6bd 100644
--- a/.github/trusted_registries.yaml
+++ b/.github/trusted_registries.yaml
@@ -30,6 +30,8 @@ ghcr.io:
   aquasecurity: ALL_IMAGES
   kyverno: ALL_IMAGES
   teutonet: ALL_IMAGES
+  jimmidyson:
+    configmap-reload: ALL_TAGS
 quay.io:
   cilium: ALL_IMAGES
   jetstack: ALL_IMAGES
diff --git a/charts/base-cluster/README.md.gotmpl b/charts/base-cluster/README.md.gotmpl
index 55d9133b0..db0c0c4cd 100644
--- a/charts/base-cluster/README.md.gotmpl
+++ b/charts/base-cluster/README.md.gotmpl
@@ -315,4 +315,25 @@ upgrade, they will be recreated in version 6.
 This also makes kyverno HA, so be aware that kyverno will need more resources in
 you cluster.
 
+### 6.x.x -> 7.0.0
+
+This release allows the user to use the predefined k8s ClusterRoles
+(`admin`, `edit`, `view`, ...).
+
+This usage might clash with custom roles named `admin`, `edit`, `view`, ... and
+therefore needs to be adjusted
+
+### 7.x.x -> 8.0.0
+
+This release migrates the now unsupported `loki-stack` to the normal `loki` helm
+chart.
+
+This is a breaking change because, apart from a new storage engine, the deployment
+also moves from the `loki` namespace to `monitoring` to keep in line with every
+other monitoring deployment, which in turn also deletes the `loki` namespace
+
+This also replaces `promtail` and the `otel-collector` with `alloy`, using
+<https://github.com/teutonet/teutonet-helm-charts/blob/main/charts/common/templates/_telemetry.tpl>
+makes this a drop-in change.
+
 {{ .Files.Get "values.md"  }}
diff --git a/charts/base-cluster/templates/ingress/nginx.yaml b/charts/base-cluster/templates/ingress/nginx.yaml
index e7f098ab2..7e40331ff 100644
--- a/charts/base-cluster/templates/ingress/nginx.yaml
+++ b/charts/base-cluster/templates/ingress/nginx.yaml
@@ -27,7 +27,8 @@ spec:
         serviceMonitor:
           enabled: {{ .Values.monitoring.prometheus.enabled }}
           additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 12 }}
-      {{- if .Values.monitoring.tracing.enabled }}
+      {{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "otlp") | fromYaml }}
+      {{- if and $telemetryConf.enabled .Values.monitoring.prometheus.enabled }}
       opentelemetry:
         enabled: true
         {{- if and .Values.global.imageRegistry false }}
@@ -40,10 +41,10 @@ spec:
         use-gzip: true
         enable-brotli: true
         enable-underscores-in-headers: true
-        {{- if .Values.monitoring.tracing.enabled }}
+        {{- if $telemetryConf.enabled }}
         enable-opentelemetry: true
         opentelemetry-operation-name: ingress
-        otlp-collector-host: open-telemetry-collector-opentelemetry-collector.monitoring
+        otlp-collector-host: {{ $telemetryConf.host }}
         {{- end }}
       service:
         annotations:
diff --git a/charts/base-cluster/templates/kyverno/kyverno.yaml b/charts/base-cluster/templates/kyverno/kyverno.yaml
index 2fc152ea1..1f956b2f7 100644
--- a/charts/base-cluster/templates/kyverno/kyverno.yaml
+++ b/charts/base-cluster/templates/kyverno/kyverno.yaml
@@ -62,25 +62,27 @@ spec:
     # this only works in version 3
     admissionController:
       replicas: 3
-      {{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }}
+      {{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "jaeger" "serviceProtocol" "grpc") | fromYaml -}}
+      {{- $telemetryEnabled := and $telemetryConf.enabled .Values.monitoring.prometheus.enabled -}}
+      {{- if $telemetryEnabled }}
       tracing: &tracingConfig
         enabled: true
-        address: open-telemetry-collector-opentelemetry-collector.monitoring
-        port: 14250 # jaeger-grpc
+        address: {{ $telemetryConf.host }}
+        port: {{ $telemetryConf.port }}
       {{- end }}
     backgroundController:
       replicas: 2
-      {{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }}
+      {{- if $telemetryEnabled }}
       tracing: *tracingConfig
       {{- end }}
     reportsController:
       replicas: 2
-      {{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }}
+      {{- if $telemetryEnabled }}
       tracing: *tracingConfig
       {{- end }}
     cleanupController:
       replicas: 2
-      {{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }}
+      {{- if $telemetryEnabled }}
       tracing: *tracingConfig
       {{- end }}
     podDisruptionBudget:
diff --git a/charts/base-cluster/templates/kyverno/policies/kyverno-base-policies/kyverno-policies.yaml b/charts/base-cluster/templates/kyverno/policies/kyverno-base-policies/kyverno-policies.yaml
index 615242592..06a58c602 100644
--- a/charts/base-cluster/templates/kyverno/policies/kyverno-base-policies/kyverno-policies.yaml
+++ b/charts/base-cluster/templates/kyverno/policies/kyverno-base-policies/kyverno-policies.yaml
@@ -32,7 +32,7 @@ spec:
           namespaces:
             - kube-system
             - default
-    {{ $lokiPromtail := dict "resources" (dict "namespaces" (list "loki") "kinds" (list "Pod") "names" (list "loki-promtail-*")) -}}
+    {{ $alloy := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "alloy-*")) -}}
     {{- $syncEtcdSecret := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "sync-etcd-secret-*")) -}}
     {{- $nodeExporter := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "kube-prometheus-stack-prometheus-node-exporter-*")) -}}
     {{- $nfsServerProvisioner := dict "resources" (dict "namespaces" (list "nfs-server-provisioner") "kinds" (list "Pod") "names" (list "nfs-server-provisioner-0")) -}}
@@ -46,8 +46,8 @@ spec:
     {{- $disallowHostPorts := list -}}
 
     {{- if .Values.monitoring.loki.enabled -}}
-      {{- $disallowHostPath = append $disallowHostPath $lokiPromtail -}}
-      {{- $runAsNonRoot = append $runAsNonRoot $lokiPromtail -}}
+      {{- $disallowHostPath = append $disallowHostPath $alloy -}}
+      {{- $runAsNonRoot = append $runAsNonRoot $alloy -}}
     {{- end -}}
     {{- if .Values.monitoring.prometheus.enabled -}}
       {{- $disallowHostPath = append $disallowHostPath $syncEtcdSecret -}}
diff --git a/charts/base-cluster/templates/monitoring/alloy.yaml b/charts/base-cluster/templates/monitoring/alloy.yaml
new file mode 100644
index 000000000..e07e6feb9
--- /dev/null
+++ b/charts/base-cluster/templates/monitoring/alloy.yaml
@@ -0,0 +1,259 @@
+{{- if and .Values.monitoring.prometheus.enabled (or .Values.monitoring.tracing.enabled .Values.monitoring.loki.enabled) -}}
+apiVersion: helm.toolkit.fluxcd.io/v2
+kind: HelmRelease
+metadata:
+  name: alloy
+  namespace: monitoring
+  labels: {{- include "common.labels.standard" $ | nindent 4 }}
+    app.kubernetes.io/component: alloy
+    app.kubernetes.io/part-of: monitoring
+spec:
+  chart:
+    spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "grafana" "chart" "alloy" "context" $) | nindent 6 }}
+  interval: 1h
+  driftDetection:
+    mode: enabled
+  install:
+    timeout: 10m0s
+    crds: Skip
+  upgrade:
+    timeout: 10m0s
+    crds: Skip
+  dependsOn:
+    - name: kube-prometheus-stack
+      namespace: monitoring
+  values:
+    {{- if .Values.global.imageRegistry }}
+    global:
+      image:
+        registry: {{ $.Values.global.imageRegistry }}
+    {{- end }}
+    alloy:
+      enableReporting: false
+      resources: {{- include "common.resources" .Values.monitoring.loki.promtail | nindent 8 }}
+      {{- if .Values.monitoring.loki.enabled }}
+      mounts:
+        varlog: true
+      {{- end }}
+      securityContext:
+        seccompProfile:
+          type: RuntimeDefault
+      configMap:
+        content: |
+          {{- if .Values.monitoring.loki.enabled }}
+          discovery.kubernetes "pods" {
+          	role = "pod"
+          }
+
+          discovery.relabel "pods" {
+          	targets = discovery.kubernetes.pods.targets
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_controller_name"]
+          		regex         = "([0-9a-z-.]+?)(-[0-9a-f]{8,10})?"
+          		target_label  = "__tmp_controller_name"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app", "__tmp_controller_name", "__meta_kubernetes_pod_name"]
+          		regex         = "^;*([^;]+)(;.*)?$"
+          		target_label  = "app"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance", "__meta_kubernetes_pod_label_instance"]
+          		regex         = "^;*([^;]+)(;.*)?$"
+          		target_label  = "instance"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "__meta_kubernetes_pod_label_component"]
+          		regex         = "^;*([^;]+)(;.*)?$"
+          		target_label  = "component"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_node_name"]
+          		target_label  = "node_name"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_namespace"]
+          		target_label  = "namespace"
+          	}
+
+          	rule {
+          		source_labels = ["namespace", "app"]
+          		separator     = "/"
+          		target_label  = "job"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_name"]
+          		target_label  = "pod"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_container_name"]
+          		target_label  = "container"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
+          		separator     = "/"
+          		target_label  = "__path__"
+          		replacement   = "/var/log/pods/*$1/*.log"
+          	}
+
+          	rule {
+          		source_labels = ["__meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash", "__meta_kubernetes_pod_annotation_kubernetes_io_config_hash", "__meta_kubernetes_pod_container_name"]
+          		separator     = "/"
+          		regex         = "true/(.*)"
+          		target_label  = "__path__"
+          		replacement   = "/var/log/pods/*$1/*.log"
+          	}
+          }
+
+          local.file_match "pods" {
+          	path_targets = discovery.relabel.pods.output
+          }
+
+          loki.source.file "pods" {
+          	targets               = local.file_match.pods.targets
+          	forward_to            = [loki.process.pods.receiver]
+          }
+
+          loki.process "pods" {
+          	forward_to = [loki.write.default.receiver]
+
+          	stage.cri { }
+          }
+
+          loki.write "default" {
+          	endpoint {
+          		url = "http://loki:3100/loki/api/v1/push"
+          	}
+          	external_labels = {}
+          }
+          {{- end }}
+
+          {{- if .Values.monitoring.tracing.enabled }}
+          otelcol.receiver.otlp "default" {
+            grpc { }
+
+            http { }
+
+            output {
+              traces = [otelcol.processor.k8sattributes.default.input]
+            }
+          }
+
+          otelcol.receiver.jaeger "default" {
+            protocols {
+              grpc { }
+
+              thrift_http { }
+
+              thrift_compact {
+                max_packet_size = "63KiB488B"
+              }
+            }
+
+            output {
+              traces = [otelcol.processor.k8sattributes.default.input]
+            }
+          }
+
+          otelcol.receiver.zipkin "default" {
+            output {
+              traces = [otelcol.processor.k8sattributes.default.input]
+            }
+          }
+
+          otelcol.processor.k8sattributes "default" {
+            auth_type = "serviceAccount"
+
+            extract {
+              metadata = ["k8s.namespace.name", "k8s.deployment.name", "k8s.statefulset.name", "k8s.daemonset.name", "k8s.cronjob.name", "k8s.job.name", "k8s.node.name", "k8s.pod.name", "k8s.pod.uid", "k8s.pod.start_time"]
+            }
+
+            pod_association {
+              source {
+                from = "resource_attribute"
+                name = "k8s.pod.ip"
+              }
+            }
+
+            pod_association {
+              source {
+                from = "resource_attribute"
+                name = "k8s.pod.uid"
+              }
+            }
+
+            pod_association {
+              source {
+                from = "connection"
+              }
+            }
+
+            output {
+              traces = [otelcol.processor.batch.default.input]
+            }
+          }
+
+          otelcol.processor.batch "default" {
+            output {
+              traces = [otelcol.exporter.otlp.tempo.input]
+            }
+          }
+
+          otelcol.exporter.otlp "tempo" {
+            client {
+              endpoint = "grafana-tempo-distributor:4317"
+
+              tls {
+                insecure = true
+              }
+            }
+          }
+          {{- end }}
+      extraPorts:
+        - name: jaeger-compact
+          port: 6831
+          protocol: UDP
+          targetPort: 6831
+        - name: jaeger-grpc
+          port: 14250
+          protocol: TCP
+          targetPort: 14250
+        - name: jaeger-thrift
+          port: 14268
+          protocol: TCP
+          targetPort: 14268
+        - name: metrics
+          port: 8888
+          protocol: TCP
+          targetPort: 8888
+        - name: otlp
+          port: 4317
+          appProtocol: grpc
+          protocol: TCP
+          targetPort: 4317
+        - name: otlp-http
+          port: 4318
+          protocol: TCP
+          targetPort: 4318
+        - name: zipkin
+          port: 9411
+          appProtocol: http/protobuf
+          protocol: TCP
+          targetPort: 9411
+    crds:
+      create: false
+    controller:
+      priorityClassName: monitoring-components
+    serviceMonitor:
+      enabled: true
+      additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 10 }}
+{{- end -}}
diff --git a/charts/base-cluster/templates/monitoring/kube-prometheus-stack/_prometheus_config.yaml b/charts/base-cluster/templates/monitoring/kube-prometheus-stack/_prometheus_config.yaml
index 2a9a9bb25..5522148ec 100644
--- a/charts/base-cluster/templates/monitoring/kube-prometheus-stack/_prometheus_config.yaml
+++ b/charts/base-cluster/templates/monitoring/kube-prometheus-stack/_prometheus_config.yaml
@@ -65,11 +65,12 @@ prometheusSpec:
         - __address__
       target_label: cluster
       replacement: {{ .Values.global.clusterName }}
-  {{- if .Values.monitoring.tracing.enabled }}
+  {{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "otlp") | fromYaml }}
+  {{- if $telemetryConf.enabled }}
   tracingConfig:
     clientType: grpc
     samplingFraction: "0.1"
     insecure: true
-    endpoint: open-telemetry-collector-opentelemetry-collector.monitoring:4317
+    endpoint: {{ printf "%s:%d" $telemetryConf.host $telemetryConf.port }}
   {{- end }}
 {{- end -}}
diff --git a/charts/base-cluster/templates/monitoring/loki/dashboard.yaml b/charts/base-cluster/templates/monitoring/logs/dashboard.yaml
similarity index 91%
rename from charts/base-cluster/templates/monitoring/loki/dashboard.yaml
rename to charts/base-cluster/templates/monitoring/logs/dashboard.yaml
index 587a4c73d..8234158cd 100644
--- a/charts/base-cluster/templates/monitoring/loki/dashboard.yaml
+++ b/charts/base-cluster/templates/monitoring/logs/dashboard.yaml
@@ -3,10 +3,10 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
   name: grafana-dashboard-loki
-  namespace: loki
+  namespace: monitoring
   labels: {{- include "common.labels.standard" $ | nindent 4 }}
     grafana_dashboard: "1"
     app.kubernetes.io/component: loki
     app.kubernetes.io/part-of: monitoring
 data: {{- (.Files.Glob "dashboards/loki.json").AsConfig | nindent 2 }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/charts/base-cluster/templates/monitoring/logs/grafana-loki.yaml b/charts/base-cluster/templates/monitoring/logs/grafana-loki.yaml
new file mode 100644
index 000000000..f7434fdd7
--- /dev/null
+++ b/charts/base-cluster/templates/monitoring/logs/grafana-loki.yaml
@@ -0,0 +1,22 @@
+{{- if and .Values.monitoring.prometheus.enabled .Values.monitoring.loki.enabled -}}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: grafana-datasource-loki
+  namespace: monitoring
+  labels: {{- include "common.labels.standard" $ | nindent 4 }}
+    app.kubernetes.io/component: loki
+    app.kubernetes.io/part-of: monitoring
+    grafana_datasource: "1"
+data:
+  grafana-loki-datasource.yaml: |-
+    apiVersion: 1
+    datasources:
+      - name: Loki
+        type: loki
+        access: proxy
+        url: "http://loki:3100"
+        version: 1
+        isDefault: false
+        uid: loki
+{{- end -}}
diff --git a/charts/base-cluster/templates/monitoring/logs/loki.yaml b/charts/base-cluster/templates/monitoring/logs/loki.yaml
new file mode 100644
index 000000000..8e8927818
--- /dev/null
+++ b/charts/base-cluster/templates/monitoring/logs/loki.yaml
@@ -0,0 +1,103 @@
+{{- if and .Values.monitoring.prometheus.enabled .Values.monitoring.loki.enabled -}}
+apiVersion: helm.toolkit.fluxcd.io/v2
+kind: HelmRelease
+metadata:
+  name: loki
+  namespace: monitoring
+  labels: {{- include "common.labels.standard" $ | nindent 4 }}
+    app.kubernetes.io/component: loki
+    app.kubernetes.io/part-of: monitoring
+spec:
+  chart:
+    spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "grafana" "chart" "loki" "context" $) | nindent 6 }}
+  interval: 1h
+  driftDetection:
+    mode: enabled
+  install:
+    timeout: 10m0s
+  upgrade:
+    timeout: 10m0s
+  dependsOn:
+    - name: kube-prometheus-stack
+      namespace: monitoring
+  values:
+    {{- if .Values.global.imageRegistry }}
+    global:
+      registry: {{ $.Values.global.imageRegistry }}
+    {{- end }}
+    deploymentMode: SingleBinary
+    lokiCanary:
+      enabled: false
+    test:
+      enabled: false
+    chunksCache:
+      enabled: false
+    resultsCache:
+      enabled: false
+    gateway:
+      enabled: false
+    ruler:
+      enabled: false
+    sidecar:
+      rules:
+        enabled: false
+    loki:
+      enableServiceLinks: false
+      livenessProbe:
+        httpGet:
+          path: /ready
+          port: http-metrics
+        initialDelaySeconds: 45
+      containerSecurityContext:
+        seccompProfile:
+          type: RuntimeDefault
+        privileged: false
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop:
+            - ALL
+      auth_enabled: false
+      storage:
+        type: filesystem
+      storage_config:
+        tsdb_shipper:
+          active_index_directory: /var/loki/tsdb-index
+        filesystem:
+          directory: /var/loki/chunks
+      schemaConfig:
+        configs:
+          - from: "2025-02-18"
+            store: tsdb
+            object_store: filesystem
+            schema: v13
+            index:
+              prefix: index_
+              period: 24h
+      commonConfig:
+        replication_factor: 1
+      compactor:
+        retention_enabled: true
+        delete_request_store: filesystem
+      ingester:
+        chunk_block_size: 524288
+        chunk_target_size: 5242880
+    tableManager:
+      retention_deletes_enabled: true
+      retention_period: 6w
+    backend:
+      replicas: 0
+    read:
+      replicas: 0
+    write:
+      replicas: 0
+    singleBinary:
+      replicas: 1
+      persistence: {{- include "common.storage.class" (dict "persistence" .Values.monitoring.loki.persistence "global" $.Values.global) | nindent 8 }}
+        enabled: true
+        size: {{ .Values.monitoring.loki.persistence.size }}
+      resources: {{- include "common.resources" .Values.monitoring.loki | nindent 8 }}
+    monitoring:
+      serviceMonitor:
+        enabled: true
+        additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 10 }}
+{{- end -}}
diff --git a/charts/base-cluster/templates/monitoring/loki/loki.yaml b/charts/base-cluster/templates/monitoring/loki/loki.yaml
deleted file mode 100644
index 10ecb3ba4..000000000
--- a/charts/base-cluster/templates/monitoring/loki/loki.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-{{- if and .Values.monitoring.prometheus.enabled .Values.monitoring.loki.enabled -}}
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: loki
-  namespace: loki
-  labels: {{- include "common.labels.standard" $ | nindent 4 }}
-    app.kubernetes.io/component: loki
-    app.kubernetes.io/part-of: monitoring
-spec:
-  chart:
-    spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "grafana" "chart" "loki-stack" "context" $) | nindent 6 }}
-  interval: 1h
-  driftDetection:
-    mode: enabled
-  install:
-    timeout: 10m0s
-  upgrade:
-    timeout: 10m0s
-  dependsOn:
-    - name: kube-prometheus-stack
-      namespace: monitoring
-  values:
-    test_pod:
-      image: {{ printf "%s/bats/bats:1.8.2" ($.Values.global.imageRegistry | default (include "base-cluster.defaultRegistry" (dict))) }}
-    loki:
-      extraArgs:
-        # TODO: switch to compactor and boltdb-shipper
-        target: all,table-manager
-      isDefault: false
-      url: {{ `http://{{ (include "loki.serviceName" .) }}.{{ .Release.Namespace }}:{{ .Values.loki.service.port }}` }}
-      image:
-        repository: {{ printf "%s/grafana/loki" ($.Values.global.imageRegistry | default (include "base-cluster.defaultRegistry" (dict))) }}
-      resources: {{- include "common.resources" .Values.monitoring.loki | nindent 8 }}
-      replicas: {{ .Values.monitoring.loki.replicas }}
-      persistence: {{- include "common.storage.class" (dict "persistence" .Values.monitoring.loki.persistence "global" $.Values.global) | nindent 8 }}
-        enabled: true
-        size: {{ .Values.monitoring.loki.persistence.size }}
-      containerSecurityContext:
-        seccompProfile:
-          type: RuntimeDefault
-        privileged: false
-        allowPrivilegeEscalation: false
-        capabilities:
-          drop:
-            - ALL
-      serviceMonitor:
-        enabled: true
-        additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 10 }}
-      config:
-        table_manager:
-          retention_deletes_enabled: true
-          retention_period: 6w
-        chunk_store_config:
-          max_look_back_period: 6w
-        ingester:
-          chunk_block_size: 524288
-          chunk_target_size: 5242880
-        storage_config:
-          boltdb:
-            directory: /data/loki/index
-        schema_config:
-          configs:
-            - from: "2018-04-15"
-              index:
-                period: 1w
-                prefix: index_
-              object_store: filesystem
-              schema: v9
-              store: boltdb
-      datasource:
-        uid: loki
-{{/* TODO: Implement */}}
-{{/*            - from: 2022-09-15*/}}
-{{/*              store: boltdb-shipper*/}}
-{{/*              object_store: filesystem*/}}
-{{/*              schema: v12*/}}
-{{/*              index:*/}}
-{{/*                prefix: index_*/}}
-{{/*                period: 24h*/}}
-    promtail:
-      {{- if .Values.global.imageRegistry }}
-      image:
-        registry: {{ $.Values.global.imageRegistry }}
-      {{- end }}
-      resources: {{- include "common.resources" .Values.monitoring.loki.promtail | nindent 8 }}
-      containerSecurityContext:
-        seccompProfile:
-          type: RuntimeDefault
-      priorityClassName: monitoring-components
-      serviceMonitor:
-        enabled: true
-        labels: {{- toYaml .Values.monitoring.labels | nindent 10 }}
-    grafana:
-      enabled: false
-      sidecar:
-        datasources:
-          enabled: true
-    prometheus:
-      enabled: false
-  {{- end -}}
diff --git a/charts/base-cluster/templates/monitoring/tracing/grafana-tempo.yaml b/charts/base-cluster/templates/monitoring/tracing/grafana-tempo.yaml
index b665cf643..c0b1b40b7 100644
--- a/charts/base-cluster/templates/monitoring/tracing/grafana-tempo.yaml
+++ b/charts/base-cluster/templates/monitoring/tracing/grafana-tempo.yaml
@@ -22,6 +22,27 @@ spec:
       imageRegistry: {{ $.Values.global.imageRegistry }}
     {{- end }}
     ingester: {{- include "common.resourcesWithPreset" .Values.monitoring.tracing.ingester | nindent 6 }}
+      networkPolicy:
+        allowExternalEgress: false
+    compactor:
+      networkPolicy:
+        allowExternalEgress: false
+    distributor:
+      networkPolicy:
+        allowExternalEgress: false
+    metricsGenerator:
+      networkPolicy:
+        allowExternalEgress: false
+    querier:
+      networkPolicy:
+        allowExternalEgress: false
+    queryFrontend:
+      networkPolicy:
+        allowExternalEgress: false
+    vulture:
+      enabled: false
+      networkPolicy:
+        allowExternalEgress: false
     tempo:
       traces:
         jaeger:
diff --git a/charts/base-cluster/templates/monitoring/tracing/opentelemetry-collector.yaml b/charts/base-cluster/templates/monitoring/tracing/opentelemetry-collector.yaml
deleted file mode 100644
index 8dec42525..000000000
--- a/charts/base-cluster/templates/monitoring/tracing/opentelemetry-collector.yaml
+++ /dev/null
@@ -1,57 +0,0 @@
-{{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled -}}
-apiVersion: helm.toolkit.fluxcd.io/v2
-kind: HelmRelease
-metadata:
-  name: open-telemetry-collector
-  namespace: monitoring
-  labels: {{- include "common.labels.standard" $ | nindent 4 }}
-    app.kubernetes.io/component: tracing
-    app.kubernetes.io/part-of: monitoring
-spec:
-  chart:
-    spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "open-telemetry" "chart" "opentelemetry-collector" "context" $) | nindent 6 }}
-  interval: 1h
-  driftDetection:
-    mode: enabled
-  dependsOn:
-    - name: kube-prometheus-stack
-      namespace: monitoring
-  upgrade:
-    timeout: 5m
-  values:
-    image:
-      repository: {{ printf "%s/otel/opentelemetry-collector-contrib" ($.Values.global.imageRegistry | default (include "base-cluster.defaultRegistry" (dict))) }}
-    mode: daemonset
-    service:
-      enabled: true
-    config:
-      receivers:
-        prometheus: null
-      service:
-        pipelines:
-          logs: null
-          metrics: null
-          traces:
-            processors:
-              - batch
-            exporters:
-              - otlp
-      exporters:
-        otlp:
-          endpoint: grafana-tempo-distributor:4317
-          tls:
-            insecure: true
-    presets:
-      kubernetesAttributes:
-        enabled: true
-    ports:
-      metrics:
-        enabled: true
-    podMonitor:
-      enabled: true
-      extraLabels: {{- toYaml .Values.monitoring.labels | nindent 8 }}
-    prometheusRule:
-      enabled: true
-      defaultRules:
-        enabled: true
-{{- end -}}
diff --git a/charts/base-cluster/values.schema.json b/charts/base-cluster/values.schema.json
index 13219ec2a..aa876688e 100644
--- a/charts/base-cluster/values.schema.json
+++ b/charts/base-cluster/values.schema.json
@@ -869,10 +869,6 @@
               },
               "additionalProperties": false
             },
-            "replicas": {
-              "type": "integer",
-              "minimum": 1
-            },
             "resourcesPreset": {
               "$ref": "#/$defs/resourcesPreset"
             },
diff --git a/charts/base-cluster/values.yaml b/charts/base-cluster/values.yaml
index 05d9f3d51..a6a9b05e8 100644
--- a/charts/base-cluster/values.yaml
+++ b/charts/base-cluster/values.yaml
@@ -41,11 +41,6 @@ global:
           requests:
             cpu: 20m
             memory: 100Mi
-    loki:
-      condition: "{{ .Values.monitoring.loki.enabled }}"
-      additionalLabels:
-        app.kubernetes.io/component: loki
-        app.kubernetes.io/part-of: monitoring
     trivy:
       condition: "{{ .Values.monitoring.securityScanning.enabled }}"
       additionalLabels:
@@ -112,8 +107,9 @@ global:
     grafana:
       url: https://grafana.github.io/helm-charts
       charts:
-        loki-stack: 2.x.x
-      condition: "{{ and .Values.monitoring.prometheus.enabled .Values.monitoring.loki.enabled }}"
+        loki: 6.x.x
+        alloy: 0.x.x
+      condition: "{{ and .Values.monitoring.prometheus.enabled (or .Values.monitoring.loki.enabled .Values.monitoring.tracing.enabled) }}"
     bitnami:
       url: oci://docker.io/bitnamicharts
       charts:
@@ -311,7 +307,6 @@ monitoring:
     persistence:
       storageClass: ""
       size: 10Gi
-    replicas: 1
     resourcesPreset: nano
     resources:
       requests: