From 867f58edf92e375b037d28aca1fadf8d95441234 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Wed, 3 Jul 2024 08:33:33 -0400 Subject: [PATCH 1/4] fix: update image versions for remote write --- k8s/operator/manifests/operator.yaml | 2 +- operator/src/monitoring/opentelemetry.rs | 143 ++++------------------- operator/src/monitoring/otel-config.yaml | 82 +++++++++++++ operator/src/monitoring/prom-config.yaml | 4 + operator/src/monitoring/prometheus.rs | 55 +++++---- operator/src/utils/mod.rs | 54 ++++++++- 6 files changed, 197 insertions(+), 143 deletions(-) create mode 100644 operator/src/monitoring/otel-config.yaml create mode 100644 operator/src/monitoring/prom-config.yaml diff --git a/k8s/operator/manifests/operator.yaml b/k8s/operator/manifests/operator.yaml index 1fb41be8..0056d34d 100644 --- a/k8s/operator/manifests/operator.yaml +++ b/k8s/operator/manifests/operator.yaml @@ -33,7 +33,7 @@ rules: resources: ["events"] verbs: ["create"] - apiGroups: ["rbac.authorization.k8s.io"] - resources: ["clusterroles", "clusterrolebindings"] + resources: ["clusterroles", "clusterrolebindings", "roles", "rolebindings"] verbs: ["create", "get", "patch"] - apiGroups: ["keramik.3box.io"] resources: ["networks", "networks/status", "simulations", "simulations/status"] diff --git a/operator/src/monitoring/opentelemetry.rs b/operator/src/monitoring/opentelemetry.rs index c995c844..020c9dd6 100644 --- a/operator/src/monitoring/opentelemetry.rs +++ b/operator/src/monitoring/opentelemetry.rs @@ -9,7 +9,7 @@ use k8s_openapi::{ PodSpec, PodTemplateSpec, ResourceRequirements, ServicePort, ServiceSpec, Volume, VolumeMount, }, - rbac::v1::{ClusterRole, ClusterRoleBinding, PolicyRule, RoleRef, Subject}, + rbac::v1::{PolicyRule, RoleRef, Subject, Role, RoleBinding}, }, apimachinery::pkg::{ api::resource::Quantity, @@ -27,7 +27,7 @@ use crate::{ resource_limits::ResourceLimitsConfig, }, utils::{ - apply_account, apply_cluster_role, apply_cluster_role_binding, apply_config_map, + apply_account, apply_namespaced_role, apply_namespaced_role_binding, apply_config_map, apply_service, apply_stateful_set, Clock, Context, }, }; @@ -35,8 +35,8 @@ use crate::{ pub const OTEL_APP: &str = "otel"; pub const OTEL_SERVICE_NAME: &str = "otel"; -pub const OTEL_CR_BINDING: &str = "monitoring-cluster-role-binding"; -pub const OTEL_CR: &str = "monitoring-cluster-role"; +pub const OTEL_ROLE_BINDING: &str = "monitoring-role-binding"; +pub const OTEL_ROLE: &str = "monitoring-role"; pub const OTEL_ACCOUNT: &str = "monitoring-service-account"; pub const OTEL_CONFIG_MAP_NAME: &str = "otel-config"; @@ -52,14 +52,14 @@ pub async fn apply( orefs: &[OwnerReference], ) -> Result<(), kube::error::Error> { apply_account(cx.clone(), ns, orefs.to_vec(), OTEL_ACCOUNT).await?; - apply_cluster_role(cx.clone(), ns, orefs.to_vec(), OTEL_CR, cluster_role()).await?; - apply_cluster_role_binding( + apply_namespaced_role(cx.clone(), ns, orefs.to_vec(), OTEL_ROLE, namespace_role()).await?; + apply_namespaced_role_binding( cx.clone(), + ns, orefs.to_vec(), - OTEL_CR_BINDING, - cluster_role_binding(ns), - ) - .await?; + OTEL_ROLE_BINDING, + role_binding(ns), + ).await?; apply_config_map( cx.clone(), ns, @@ -172,7 +172,7 @@ fn stateful_set_spec(config: &OtelConfig) -> StatefulSetSpec { }), containers: vec![Container { name: "opentelemetry".to_owned(), - image: Some("public.ecr.aws/r5b3e0r5/3box/otelcol".to_owned()), + image: Some("otel/opentelemetry-collector-contrib:0.104.0".to_owned()), args: Some(vec!["--config=/config/otel-config.yaml".to_owned()]), ports: Some(vec![ ContainerPort { @@ -257,8 +257,14 @@ fn stateful_set_spec(config: &OtelConfig) -> StatefulSetSpec { } } -fn cluster_role() -> ClusterRole { - ClusterRole { + +fn config_map_data() -> BTreeMap { + let config_str = include_str!("./otel-config.yaml"); // Adjust the path as necessary + BTreeMap::from_iter(vec![("otel-config.yaml".to_owned(), config_str.to_owned())]) +} + +fn namespace_role() -> Role { + Role { rules: Some(vec![PolicyRule { api_groups: Some(vec!["".to_owned()]), resources: Some(vec!["pods".to_owned()]), @@ -269,11 +275,11 @@ fn cluster_role() -> ClusterRole { } } -fn cluster_role_binding(ns: &str) -> ClusterRoleBinding { - ClusterRoleBinding { +fn role_binding(ns: &str) -> RoleBinding { + RoleBinding { role_ref: RoleRef { - kind: "ClusterRole".to_owned(), - name: OTEL_CR.to_owned(), + kind: "Role".to_owned(), + name: OTEL_ROLE.to_owned(), api_group: "rbac.authorization.k8s.io".to_owned(), }, subjects: Some(vec![Subject { @@ -285,106 +291,3 @@ fn cluster_role_binding(ns: &str) -> ClusterRoleBinding { ..Default::default() } } - -fn config_map_data() -> BTreeMap { - // Include a config that will scrape pods in the network - BTreeMap::from_iter(vec![( - "otel-config.yaml".to_owned(), - r#"--- -receivers: - # Push based metrics - otlp: - protocols: - grpc: - endpoint: 0.0.0.0:4317 - # Pull based metrics - prometheus: - config: - scrape_configs: - - job_name: 'kubernetes-service-endpoints' - scrape_interval: 10s - scrape_timeout: 1s - - kubernetes_sd_configs: - - role: pod - - # Only container ports named `metrics` will be considered valid targets. - # - # Setup relabel rules to give meaning to the following k8s annotations: - # prometheus/path - URL path of the metrics endpoint - # - # Example: - # annotations: - # prometheus/path: "/api/v0/metrics" - relabel_configs: - - source_labels: [__meta_kubernetes_pod_container_port_name] - action: keep - regex: "metrics" - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: kubernetes_pod - - source_labels: [__meta_kubernetes_pod_container_name] - action: replace - target_label: kubernetes_container - -processors: - batch: - -exporters: - # This is unused but can be easily added for debugging. - logging: - # can be one of detailed | normal | basic - verbosity: detailed - # Log all messages, do not sample - sampling_initial: 1 - sampling_thereafter: 1 - otlp/jaeger: - endpoint: jaeger:4317 - tls: - insecure: true - prometheus: - endpoint: 0.0.0.0:9464 - # Keep stale metrics around for 1h before dropping - # This helps as simulation metrics are stale once the simulation stops. - metric_expiration: 1h - resource_to_telemetry_conversion: - enabled: true - prometheus/simulation: - endpoint: 0.0.0.0:9465 - # Keep stale metrics around for 1h before dropping - # This helps as simulation metrics are stale once the simulation stops. - metric_expiration: 1h - resource_to_telemetry_conversion: - enabled: true - -service: - pipelines: - traces: - receivers: [otlp] - processors: [batch] - exporters: [otlp/jaeger] - metrics: - receivers: [otlp,prometheus] - processors: [batch] - exporters: [prometheus] - metrics/simulation: - receivers: [otlp] - processors: [batch] - exporters: [prometheus/simulation] - # Enable telemetry on the collector itself - telemetry: - logs: - level: info - metrics: - level: detailed - address: 0.0.0.0:8888"# - .to_owned(), - )]) -} diff --git a/operator/src/monitoring/otel-config.yaml b/operator/src/monitoring/otel-config.yaml new file mode 100644 index 00000000..b876ba8a --- /dev/null +++ b/operator/src/monitoring/otel-config.yaml @@ -0,0 +1,82 @@ +--- +receivers: + # Push based metrics + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + # Pull based metrics + prometheus/scrape_configs: + config: + scrape_configs: + - job_name: 'kubernetes-service-endpoints' + scrape_interval: 10s + scrape_timeout: 1s + + kubernetes_sd_configs: + - role: pod + namespaces: + own_namespace: true + # Only container ports named `metrics` will be considered valid targets. + # + # Setup relabel rules to give meaning to the following k8s annotations: + # prometheus/path - URL path of the metrics endpoint + # + # Example: + # annotations: + # prometheus/path: "/api/v0/metrics" + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_port_name] + action: keep + regex: "metrics" + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod + - source_labels: [__meta_kubernetes_pod_container_name] + action: replace + target_label: kubernetes_container + +processors: + batch: + +exporters: + # This is unused but can be easily added for debugging. + logging: + # can be one of detailed | normal | basic + verbosity: detailed + # Log all messages, do not sample + sampling_initial: 1 + sampling_thereafter: 1 + prometheus/endpoint: + endpoint: 0.0.0.0:9464 + # Keep stale metrics around for 1h before dropping + # This helps as simulation metrics are stale once the simulation stops. + metric_expiration: 1h + resource_to_telemetry_conversion: + enabled: true + # Remote write to prometheus + prometheusremotewrite: + endpoint: "http://prometheus:9090/api/v1/write" + tls: + insecure: true + +service: + pipelines: + metrics: + receivers: [otlp,prometheus/scrape_configs] + processors: [batch] + exporters: [prometheus/endpoint,prometheusremotewrite] + # Enable telemetry on the collector itself + telemetry: + logs: + level: debug + metrics: + level: detailed + address: 0.0.0.0:8888 diff --git a/operator/src/monitoring/prom-config.yaml b/operator/src/monitoring/prom-config.yaml new file mode 100644 index 00000000..9f58c20e --- /dev/null +++ b/operator/src/monitoring/prom-config.yaml @@ -0,0 +1,4 @@ +global: + scrape_interval: 10s + scrape_timeout: 5s + diff --git a/operator/src/monitoring/prometheus.rs b/operator/src/monitoring/prometheus.rs index 301df490..9ad5b441 100644 --- a/operator/src/monitoring/prometheus.rs +++ b/operator/src/monitoring/prometheus.rs @@ -4,27 +4,26 @@ use k8s_openapi::{ api::{ apps::v1::StatefulSetSpec, core::v1::{ - ConfigMapVolumeSource, Container, ContainerPort, PodSpec, PodTemplateSpec, - ResourceRequirements, Volume, VolumeMount, + ConfigMapVolumeSource, Container, ContainerPort, PodSpec, PodTemplateSpec, ResourceRequirements, ServicePort, ServiceSpec, Volume, VolumeMount }, }, apimachinery::pkg::{ api::resource::Quantity, - apis::meta::v1::ObjectMeta, - apis::meta::v1::{LabelSelector, OwnerReference}, + apis::meta::v1::{LabelSelector, ObjectMeta, OwnerReference}, util::intstr::IntOrString, }, }; use rand::RngCore; use crate::{ network::{ipfs_rpc::IpfsRpcClient, resource_limits::ResourceLimitsConfig}, - utils::{apply_config_map, apply_stateful_set, Clock, Context}, + utils::{apply_config_map, apply_service, apply_stateful_set, Clock, Context}, }; use crate::labels::selector_labels; pub const PROM_APP: &str = "prometheus"; pub const PROM_CONFIG_MAP_NAME: &str = "prom-config"; +pub const PROM_SERVICE_NAME: &str = "prometheus"; pub struct PrometheusConfig { pub dev_mode: bool, @@ -44,6 +43,14 @@ pub async fn apply( config_map_data(), ) .await?; + apply_service( + cx.clone(), + ns, + orefs.to_vec(), + PROM_SERVICE_NAME, + service_spec(), +) +.await?; apply_stateful_set( cx.clone(), ns, @@ -79,6 +86,23 @@ fn resource_requirements(dev_mode: bool) -> ResourceRequirements { } } +fn service_spec() -> ServiceSpec { + ServiceSpec { + ports: Some(vec![ + ServicePort { + name: Some("prometheus".to_owned()), + port: 9090, + protocol: Some("TCP".to_owned()), + target_port: Some(IntOrString::Int(9090)), + ..Default::default() + }, + ]), + selector: selector_labels(PROM_APP), + type_: Some("ClusterIP".to_owned()), + ..Default::default() + } +} + fn stateful_set_spec(dev_mode: bool) -> StatefulSetSpec { StatefulSetSpec { replicas: Some(1), @@ -94,10 +118,11 @@ fn stateful_set_spec(dev_mode: bool) -> StatefulSetSpec { spec: Some(PodSpec { containers: vec![Container { name: "prometheus".to_owned(), - image: Some("prom/prometheus:v2.42.0".to_owned()), + image: Some("prom/prometheus:v2.45.6".to_owned()), command: Some(vec![ "/bin/prometheus".to_owned(), "--web.enable-lifecycle".to_owned(), + "--web.enable-remote-write-receiver".to_owned(), "--config.file=/config/prom-config.yaml".to_owned(), ]), ports: Some(vec![ContainerPort { @@ -132,22 +157,10 @@ fn stateful_set_spec(dev_mode: bool) -> StatefulSetSpec { } fn config_map_data() -> BTreeMap { + let config_str = include_str!("./prom-config.yaml"); + BTreeMap::from_iter(vec![( "prom-config.yaml".to_owned(), - r#" - global: - scrape_interval: 10s - scrape_timeout: 5s - - scrape_configs: - - job_name: services - metrics_path: /metrics - honor_labels: true - static_configs: - - targets: - - 'localhost:9090' - - 'otel:9464' - - 'otel:8888'"# - .to_owned(), + config_str.to_owned(), )]) } diff --git a/operator/src/utils/mod.rs b/operator/src/utils/mod.rs index ca52b5ad..2f1c44d8 100644 --- a/operator/src/utils/mod.rs +++ b/operator/src/utils/mod.rs @@ -6,12 +6,13 @@ pub mod test; use std::{collections::BTreeMap, sync::Arc}; +use k8s_openapi::api::rbac::v1::RoleBinding; use k8s_openapi::{ api::{ apps::v1::{StatefulSet, StatefulSetSpec, StatefulSetStatus}, batch::v1::{Job, JobSpec, JobStatus}, core::v1::{ConfigMap, EnvVar, Service, ServiceAccount, ServiceSpec, ServiceStatus}, - rbac::v1::{ClusterRole, ClusterRoleBinding}, + rbac::v1::{ClusterRole, ClusterRoleBinding, Role}, }, apimachinery::pkg::apis::meta::v1::OwnerReference, chrono::{DateTime, Utc}, @@ -255,6 +256,30 @@ pub async fn apply_cluster_role( Ok(role) } +/// Apply namespaced role +pub async fn apply_namespaced_role( + cx: Arc>, + ns: &str, + orefs: Vec, + name: &str, + r: Role, +) -> Result { + let serverside = PatchParams::apply(CONTROLLER_NAME); + let roles: Api = Api::namespaced(cx.k_client.clone(), ns); + // Server-side apply namespaced role + let role: Role = Role { + metadata: ObjectMeta { + name: Some(name.to_owned()), + owner_references: Some(orefs), + labels: managed_labels(), + ..r.metadata + }, + ..r + }; + let role = roles.patch(name, &serverside, &Patch::Apply(role)).await?; + Ok(role) +} + /// Apply cluster role binding pub async fn apply_cluster_role_binding( cx: Arc>, @@ -281,6 +306,32 @@ pub async fn apply_cluster_role_binding( Ok(role_binding) } +/// Apply namespaced role binding +pub async fn apply_namespaced_role_binding( + cx: Arc>, + ns: &str, + orefs: Vec, + name: &str, + rb: RoleBinding, +) -> Result { + let serverside = PatchParams::apply(CONTROLLER_NAME); + let roles: Api = Api::namespaced(cx.k_client.clone(), ns); + // Server-side apply namespaced role binding + let role_binding: RoleBinding = RoleBinding { + metadata: ObjectMeta { + name: Some(name.to_owned()), + owner_references: Some(orefs), + labels: managed_labels(), + ..rb.metadata + }, + ..rb + }; + let role_binding = roles + .patch(name, &serverside, &Patch::Apply(role_binding)) + .await?; + Ok(role_binding) +} + /// Apply a config map pub async fn apply_config_map( cx: Arc>, @@ -339,3 +390,4 @@ pub fn override_and_sort_env_vars( // Sort env vars so we can have stable tests env.sort_unstable_by(|a, b| a.name.cmp(&b.name)); } + From 8457e3d49b02d4e9031ac288aa2c355d2c9c20f1 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Wed, 3 Jul 2024 08:39:02 -0400 Subject: [PATCH 2/4] fix: UPDATE_EXPECT --- .../src/network/testdata/opentelemetry_config | 25 +++- .../src/network/testdata/opentelemetry_cr | 25 +--- .../src/network/testdata/opentelemetry_crb | 27 ++-- .../src/network/testdata/opentelemetry_sa | 72 ++++++++- .../network/testdata/opentelemetry_service | 41 +----- .../testdata/opentelemetry_stateful_set | 137 ++++-------------- operator/src/network/testdata/prom_config | 2 +- .../src/network/testdata/prom_stateful_set | 75 ++-------- 8 files changed, 156 insertions(+), 248 deletions(-) diff --git a/operator/src/network/testdata/opentelemetry_config b/operator/src/network/testdata/opentelemetry_config index 88773503..a1c74221 100644 --- a/operator/src/network/testdata/opentelemetry_config +++ b/operator/src/network/testdata/opentelemetry_config @@ -1,22 +1,31 @@ Request { method: "PATCH", - uri: "/api/v1/namespaces/keramik-test/configmaps/otel-config?&fieldManager=keramik", + uri: "/apis/rbac.authorization.k8s.io/v1/namespaces/keramik-test/rolebindings/monitoring-role-binding?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "v1", - "data": { - "otel-config.yaml": "---\nreceivers:\n # Push based metrics\n otlp:\n protocols:\n grpc:\n endpoint: 0.0.0.0:4317\n # Pull based metrics\n prometheus:\n config:\n scrape_configs:\n - job_name: 'kubernetes-service-endpoints'\n scrape_interval: 10s\n scrape_timeout: 1s\n\n kubernetes_sd_configs:\n - role: pod\n\n # Only container ports named `metrics` will be considered valid targets.\n #\n # Setup relabel rules to give meaning to the following k8s annotations:\n # prometheus/path - URL path of the metrics endpoint\n #\n # Example:\n # annotations:\n # prometheus/path: \"/api/v0/metrics\"\n relabel_configs:\n - source_labels: [__meta_kubernetes_pod_container_port_name]\n action: keep\n regex: \"metrics\"\n - source_labels: [__meta_kubernetes_pod_annotation_prometheus_path]\n action: replace\n target_label: __metrics_path__\n regex: (.+)\n - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: kubernetes_namespace\n - source_labels: [__meta_kubernetes_pod_name]\n action: replace\n target_label: kubernetes_pod\n - source_labels: [__meta_kubernetes_pod_container_name]\n action: replace\n target_label: kubernetes_container\n\nprocessors:\n batch:\n\nexporters:\n # This is unused but can be easily added for debugging.\n logging:\n # can be one of detailed | normal | basic\n verbosity: detailed\n # Log all messages, do not sample\n sampling_initial: 1\n sampling_thereafter: 1\n otlp/jaeger:\n endpoint: jaeger:4317\n tls:\n insecure: true\n prometheus:\n endpoint: 0.0.0.0:9464\n # Keep stale metrics around for 1h before dropping\n # This helps as simulation metrics are stale once the simulation stops.\n metric_expiration: 1h\n resource_to_telemetry_conversion:\n enabled: true\n prometheus/simulation:\n endpoint: 0.0.0.0:9465\n # Keep stale metrics around for 1h before dropping\n # This helps as simulation metrics are stale once the simulation stops.\n metric_expiration: 1h\n resource_to_telemetry_conversion:\n enabled: true\n\nservice:\n pipelines:\n traces:\n receivers: [otlp]\n processors: [batch]\n exporters: [otlp/jaeger]\n metrics:\n receivers: [otlp,prometheus]\n processors: [batch]\n exporters: [prometheus]\n metrics/simulation:\n receivers: [otlp]\n processors: [batch]\n exporters: [prometheus/simulation]\n # Enable telemetry on the collector itself\n telemetry:\n logs:\n level: info\n metrics:\n level: detailed\n address: 0.0.0.0:8888" - }, - "kind": "ConfigMap", + "apiVersion": "rbac.authorization.k8s.io/v1", + "kind": "RoleBinding", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "otel-config", + "name": "monitoring-role-binding", "ownerReferences": [] - } + }, + "roleRef": { + "apiGroup": "rbac.authorization.k8s.io", + "kind": "Role", + "name": "monitoring-role" + }, + "subjects": [ + { + "kind": "ServiceAccount", + "name": "monitoring-service-account", + "namespace": "keramik-test" + } + ] }, } diff --git a/operator/src/network/testdata/opentelemetry_cr b/operator/src/network/testdata/opentelemetry_cr index eac369f9..b8154fd4 100644 --- a/operator/src/network/testdata/opentelemetry_cr +++ b/operator/src/network/testdata/opentelemetry_cr @@ -1,34 +1,19 @@ Request { method: "PATCH", - uri: "/apis/rbac.authorization.k8s.io/v1/clusterroles/monitoring-cluster-role?&fieldManager=keramik", + uri: "/api/v1/namespaces/keramik-test/serviceaccounts/monitoring-service-account?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "rbac.authorization.k8s.io/v1", - "kind": "ClusterRole", + "apiVersion": "v1", + "kind": "ServiceAccount", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "monitoring-cluster-role", + "name": "monitoring-service-account", "ownerReferences": [] - }, - "rules": [ - { - "apiGroups": [ - "" - ], - "resources": [ - "pods" - ], - "verbs": [ - "get", - "list", - "watch" - ] - } - ] + } }, } diff --git a/operator/src/network/testdata/opentelemetry_crb b/operator/src/network/testdata/opentelemetry_crb index c825bbeb..ef09e58e 100644 --- a/operator/src/network/testdata/opentelemetry_crb +++ b/operator/src/network/testdata/opentelemetry_crb @@ -1,30 +1,33 @@ Request { method: "PATCH", - uri: "/apis/rbac.authorization.k8s.io/v1/clusterrolebindings/monitoring-cluster-role-binding?&fieldManager=keramik", + uri: "/apis/rbac.authorization.k8s.io/v1/namespaces/keramik-test/roles/monitoring-role?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { "apiVersion": "rbac.authorization.k8s.io/v1", - "kind": "ClusterRoleBinding", + "kind": "Role", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "monitoring-cluster-role-binding", + "name": "monitoring-role", "ownerReferences": [] }, - "roleRef": { - "apiGroup": "rbac.authorization.k8s.io", - "kind": "ClusterRole", - "name": "monitoring-cluster-role" - }, - "subjects": [ + "rules": [ { - "kind": "ServiceAccount", - "name": "monitoring-service-account", - "namespace": "keramik-test" + "apiGroups": [ + "" + ], + "resources": [ + "pods" + ], + "verbs": [ + "get", + "list", + "watch" + ] } ] }, diff --git a/operator/src/network/testdata/opentelemetry_sa b/operator/src/network/testdata/opentelemetry_sa index b8154fd4..3454fd83 100644 --- a/operator/src/network/testdata/opentelemetry_sa +++ b/operator/src/network/testdata/opentelemetry_sa @@ -1,19 +1,83 @@ Request { method: "PATCH", - uri: "/api/v1/namespaces/keramik-test/serviceaccounts/monitoring-service-account?&fieldManager=keramik", + uri: "/apis/apps/v1/namespaces/keramik-test/statefulsets/prometheus?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "v1", - "kind": "ServiceAccount", + "apiVersion": "apps/v1", + "kind": "StatefulSet", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "monitoring-service-account", + "name": "prometheus", "ownerReferences": [] + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "app": "prometheus" + } + }, + "serviceName": "", + "template": { + "metadata": { + "labels": { + "app": "prometheus" + } + }, + "spec": { + "containers": [ + { + "command": [ + "/bin/prometheus", + "--web.enable-lifecycle", + "--web.enable-remote-write-receiver", + "--config.file=/config/prom-config.yaml" + ], + "image": "prom/prometheus:v2.45.6", + "name": "prometheus", + "ports": [ + { + "containerPort": 9090, + "name": "webui" + } + ], + "resources": { + "limits": { + "cpu": "250m", + "ephemeral-storage": "1Gi", + "memory": "1Gi" + }, + "requests": { + "cpu": "250m", + "ephemeral-storage": "1Gi", + "memory": "1Gi" + } + }, + "volumeMounts": [ + { + "mountPath": "/config", + "name": "config", + "readOnly": true + } + ] + } + ], + "volumes": [ + { + "configMap": { + "defaultMode": 493, + "name": "prom-config" + }, + "name": "config" + } + ] + } + } } }, } diff --git a/operator/src/network/testdata/opentelemetry_service b/operator/src/network/testdata/opentelemetry_service index 36fe3384..bc0e188e 100644 --- a/operator/src/network/testdata/opentelemetry_service +++ b/operator/src/network/testdata/opentelemetry_service @@ -1,51 +1,22 @@ Request { method: "PATCH", - uri: "/api/v1/namespaces/keramik-test/services/otel?&fieldManager=keramik", + uri: "/api/v1/namespaces/keramik-test/configmaps/otel-config?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { "apiVersion": "v1", - "kind": "Service", + "data": { + "otel-config.yaml": "---\nreceivers:\n # Push based metrics\n otlp:\n protocols:\n grpc:\n endpoint: 0.0.0.0:4317\n # Pull based metrics\n prometheus/scrape_configs:\n config:\n scrape_configs:\n - job_name: 'kubernetes-service-endpoints'\n scrape_interval: 10s\n scrape_timeout: 1s\n\n kubernetes_sd_configs:\n - role: pod\n namespaces:\n own_namespace: true\n # Only container ports named `metrics` will be considered valid targets.\n #\n # Setup relabel rules to give meaning to the following k8s annotations:\n # prometheus/path - URL path of the metrics endpoint\n #\n # Example:\n # annotations:\n # prometheus/path: \"/api/v0/metrics\"\n relabel_configs:\n - source_labels: [__meta_kubernetes_pod_container_port_name]\n action: keep\n regex: \"metrics\"\n - source_labels: [__meta_kubernetes_pod_annotation_prometheus_path]\n action: replace\n target_label: __metrics_path__\n regex: (.+)\n - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: kubernetes_namespace\n - source_labels: [__meta_kubernetes_pod_name]\n action: replace\n target_label: kubernetes_pod\n - source_labels: [__meta_kubernetes_pod_container_name]\n action: replace\n target_label: kubernetes_container\n\nprocessors:\n batch:\n\nexporters:\n # This is unused but can be easily added for debugging.\n logging:\n # can be one of detailed | normal | basic\n verbosity: detailed\n # Log all messages, do not sample\n sampling_initial: 1\n sampling_thereafter: 1\n prometheus/endpoint:\n endpoint: 0.0.0.0:9464\n # Keep stale metrics around for 1h before dropping\n # This helps as simulation metrics are stale once the simulation stops.\n metric_expiration: 1h\n resource_to_telemetry_conversion:\n enabled: true\n # Remote write to prometheus\n prometheusremotewrite:\n endpoint: \"http://prometheus:9090/api/v1/write\"\n tls:\n insecure: true\n\nservice:\n pipelines:\n metrics:\n receivers: [otlp,prometheus/scrape_configs]\n processors: [batch]\n exporters: [prometheus/endpoint,prometheusremotewrite]\n # Enable telemetry on the collector itself\n telemetry:\n logs:\n level: debug\n metrics:\n level: detailed\n address: 0.0.0.0:8888\n" + }, + "kind": "ConfigMap", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "otel", + "name": "otel-config", "ownerReferences": [] - }, - "spec": { - "ports": [ - { - "name": "otlp-receiver", - "port": 4317, - "protocol": "TCP", - "targetPort": 4317 - }, - { - "name": "all-metrics", - "port": 9464, - "protocol": "TCP", - "targetPort": 9464 - }, - { - "name": "sim-metrics", - "port": 9465, - "protocol": "TCP", - "targetPort": 9465 - }, - { - "name": "self-metrics", - "port": 8888, - "protocol": "TCP", - "targetPort": 8888 - } - ], - "selector": { - "app": "otel" - }, - "type": "ClusterIP" } }, } diff --git a/operator/src/network/testdata/opentelemetry_stateful_set b/operator/src/network/testdata/opentelemetry_stateful_set index 0e9e6214..36fe3384 100644 --- a/operator/src/network/testdata/opentelemetry_stateful_set +++ b/operator/src/network/testdata/opentelemetry_stateful_set @@ -1,126 +1,51 @@ Request { method: "PATCH", - uri: "/apis/apps/v1/namespaces/keramik-test/statefulsets/opentelemetry?&fieldManager=keramik", + uri: "/api/v1/namespaces/keramik-test/services/otel?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "apps/v1", - "kind": "StatefulSet", + "apiVersion": "v1", + "kind": "Service", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "opentelemetry", + "name": "otel", "ownerReferences": [] }, "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "app": "otel" - } - }, - "serviceName": "otel", - "template": { - "metadata": { - "labels": { - "app": "otel" - } + "ports": [ + { + "name": "otlp-receiver", + "port": 4317, + "protocol": "TCP", + "targetPort": 4317 }, - "spec": { - "containers": [ - { - "args": [ - "--config=/config/otel-config.yaml" - ], - "image": "public.ecr.aws/r5b3e0r5/3box/otelcol", - "name": "opentelemetry", - "ports": [ - { - "containerPort": 4317, - "name": "otlp-receiver" - }, - { - "containerPort": 9464, - "name": "all-metrics" - }, - { - "containerPort": 9465, - "name": "sim-metrics" - }, - { - "containerPort": 8888, - "name": "self-metrics" - } - ], - "resources": { - "limits": { - "cpu": "250m", - "ephemeral-storage": "1Gi", - "memory": "1Gi" - }, - "requests": { - "cpu": "250m", - "ephemeral-storage": "1Gi", - "memory": "1Gi" - } - }, - "volumeMounts": [ - { - "mountPath": "/config", - "name": "config", - "readOnly": true - }, - { - "mountPath": "/data", - "name": "otel-data", - "readOnly": false - } - ] - } - ], - "securityContext": { - "fsGroup": 2000 - }, - "serviceAccountName": "monitoring-service-account", - "volumes": [ - { - "configMap": { - "defaultMode": 493, - "name": "otel-config" - }, - "name": "config" - }, - { - "name": "otel-data", - "persistentVolumeClaim": { - "claimName": "otel-data" - } - } - ] - } - }, - "volumeClaimTemplates": [ { - "apiVersion": "v1", - "kind": "PersistentVolumeClaim", - "metadata": { - "name": "otel-data" - }, - "spec": { - "accessModes": [ - "ReadWriteOnce" - ], - "resources": { - "requests": { - "storage": "10Gi" - } - } - } + "name": "all-metrics", + "port": 9464, + "protocol": "TCP", + "targetPort": 9464 + }, + { + "name": "sim-metrics", + "port": 9465, + "protocol": "TCP", + "targetPort": 9465 + }, + { + "name": "self-metrics", + "port": 8888, + "protocol": "TCP", + "targetPort": 8888 } - ] + ], + "selector": { + "app": "otel" + }, + "type": "ClusterIP" } }, } diff --git a/operator/src/network/testdata/prom_config b/operator/src/network/testdata/prom_config index 164f445b..11311c40 100644 --- a/operator/src/network/testdata/prom_config +++ b/operator/src/network/testdata/prom_config @@ -8,7 +8,7 @@ Request { body: { "apiVersion": "v1", "data": { - "prom-config.yaml": "\n global:\n scrape_interval: 10s\n scrape_timeout: 5s\n\n scrape_configs:\n - job_name: services\n metrics_path: /metrics\n honor_labels: true\n static_configs:\n - targets:\n - 'localhost:9090'\n - 'otel:9464'\n - 'otel:8888'" + "prom-config.yaml": "global:\n scrape_interval: 10s\n scrape_timeout: 5s\n\n" }, "kind": "ConfigMap", "metadata": { diff --git a/operator/src/network/testdata/prom_stateful_set b/operator/src/network/testdata/prom_stateful_set index 26f48cf0..0ec61487 100644 --- a/operator/src/network/testdata/prom_stateful_set +++ b/operator/src/network/testdata/prom_stateful_set @@ -1,13 +1,13 @@ Request { method: "PATCH", - uri: "/apis/apps/v1/namespaces/keramik-test/statefulsets/prometheus?&fieldManager=keramik", + uri: "/api/v1/namespaces/keramik-test/services/prometheus?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "apps/v1", - "kind": "StatefulSet", + "apiVersion": "v1", + "kind": "Service", "metadata": { "labels": { "managed-by": "keramik" @@ -16,67 +16,18 @@ Request { "ownerReferences": [] }, "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "app": "prometheus" + "ports": [ + { + "name": "prometheus", + "port": 9090, + "protocol": "TCP", + "targetPort": 9090 } + ], + "selector": { + "app": "prometheus" }, - "serviceName": "", - "template": { - "metadata": { - "labels": { - "app": "prometheus" - } - }, - "spec": { - "containers": [ - { - "command": [ - "/bin/prometheus", - "--web.enable-lifecycle", - "--config.file=/config/prom-config.yaml" - ], - "image": "prom/prometheus:v2.42.0", - "name": "prometheus", - "ports": [ - { - "containerPort": 9090, - "name": "webui" - } - ], - "resources": { - "limits": { - "cpu": "250m", - "ephemeral-storage": "1Gi", - "memory": "1Gi" - }, - "requests": { - "cpu": "250m", - "ephemeral-storage": "1Gi", - "memory": "1Gi" - } - }, - "volumeMounts": [ - { - "mountPath": "/config", - "name": "config", - "readOnly": true - } - ] - } - ], - "volumes": [ - { - "configMap": { - "defaultMode": 493, - "name": "prom-config" - }, - "name": "config" - } - ] - } - } + "type": "ClusterIP" } }, } From ba09e758356f350ed86b5746a192b5907e087542 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Wed, 3 Jul 2024 14:47:36 -0400 Subject: [PATCH 3/4] chore: update tests --- operator/src/network/controller.rs | 20 ++- .../src/network/testdata/opentelemetry_config | 25 +--- .../src/network/testdata/opentelemetry_cr | 19 --- .../src/network/testdata/opentelemetry_rb | 31 ++++ .../{opentelemetry_crb => opentelemetry_role} | 0 .../src/network/testdata/opentelemetry_sa | 72 +-------- .../network/testdata/opentelemetry_service | 41 +++++- .../testdata/opentelemetry_stateful_set | 137 ++++++++++++++---- operator/src/network/testdata/prom_service | 33 +++++ .../src/network/testdata/prom_stateful_set | 76 ++++++++-- 10 files changed, 292 insertions(+), 162 deletions(-) delete mode 100644 operator/src/network/testdata/opentelemetry_cr create mode 100644 operator/src/network/testdata/opentelemetry_rb rename operator/src/network/testdata/{opentelemetry_crb => opentelemetry_role} (100%) create mode 100644 operator/src/network/testdata/prom_service diff --git a/operator/src/network/controller.rs b/operator/src/network/controller.rs index 067fbd0a..09e32bf3 100644 --- a/operator/src/network/controller.rs +++ b/operator/src/network/controller.rs @@ -3938,10 +3938,11 @@ mod tests { expect_file!["./testdata/jaeger_service"], expect_file!["./testdata/jaeger_stateful_set"], expect_file!["./testdata/prom_config"], + expect_file!["./testdata/prom_service"], expect_file!["./testdata/prom_stateful_set"], expect_file!["./testdata/opentelemetry_sa"], - expect_file!["./testdata/opentelemetry_cr"], - expect_file!["./testdata/opentelemetry_crb"], + expect_file!["./testdata/opentelemetry_role"], + expect_file!["./testdata/opentelemetry_rb"], expect_file!["./testdata/opentelemetry_config"], expect_file!["./testdata/opentelemetry_service"], expect_file!["./testdata/opentelemetry_stateful_set"], @@ -3974,10 +3975,11 @@ mod tests { expect_file!["./testdata/jaeger_service"], expect_file!["./testdata/jaeger_stateful_set"], expect_file!["./testdata/prom_config"], + expect_file!["./testdata/prom_service"], expect_file!["./testdata/prom_stateful_set"], expect_file!["./testdata/opentelemetry_sa"], - expect_file!["./testdata/opentelemetry_cr"], - expect_file!["./testdata/opentelemetry_crb"], + expect_file!["./testdata/opentelemetry_role"], + expect_file!["./testdata/opentelemetry_rb"], expect_file!["./testdata/opentelemetry_config"], expect_file!["./testdata/opentelemetry_service"], expect_file!["./testdata/opentelemetry_stateful_set"], @@ -4031,10 +4033,11 @@ mod tests { expect_file!["./testdata/jaeger_service"], expect_file!["./testdata/jaeger_stateful_set"], expect_file!["./testdata/prom_config"], + expect_file!["./testdata/prom_service"], expect_file!["./testdata/prom_stateful_set"], expect_file!["./testdata/opentelemetry_sa"], - expect_file!["./testdata/opentelemetry_cr"], - expect_file!["./testdata/opentelemetry_crb"], + expect_file!["./testdata/opentelemetry_role"], + expect_file!["./testdata/opentelemetry_rb"], expect_file!["./testdata/opentelemetry_config"], expect_file!["./testdata/opentelemetry_service"], expect_file!["./testdata/opentelemetry_stateful_set"], @@ -4087,10 +4090,11 @@ mod tests { expect_file!["./testdata/jaeger_service"], expect_file!["./testdata/jaeger_stateful_set"], expect_file!["./testdata/prom_config"], + expect_file!["./testdata/prom_service"], expect_file!["./testdata/prom_stateful_set"], expect_file!["./testdata/opentelemetry_sa"], - expect_file!["./testdata/opentelemetry_cr"], - expect_file!["./testdata/opentelemetry_crb"], + expect_file!["./testdata/opentelemetry_role"], + expect_file!["./testdata/opentelemetry_rb"], expect_file!["./testdata/opentelemetry_config"], expect_file!["./testdata/opentelemetry_service"], expect_file!["./testdata/opentelemetry_stateful_set"], diff --git a/operator/src/network/testdata/opentelemetry_config b/operator/src/network/testdata/opentelemetry_config index a1c74221..bc0e188e 100644 --- a/operator/src/network/testdata/opentelemetry_config +++ b/operator/src/network/testdata/opentelemetry_config @@ -1,31 +1,22 @@ Request { method: "PATCH", - uri: "/apis/rbac.authorization.k8s.io/v1/namespaces/keramik-test/rolebindings/monitoring-role-binding?&fieldManager=keramik", + uri: "/api/v1/namespaces/keramik-test/configmaps/otel-config?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "rbac.authorization.k8s.io/v1", - "kind": "RoleBinding", + "apiVersion": "v1", + "data": { + "otel-config.yaml": "---\nreceivers:\n # Push based metrics\n otlp:\n protocols:\n grpc:\n endpoint: 0.0.0.0:4317\n # Pull based metrics\n prometheus/scrape_configs:\n config:\n scrape_configs:\n - job_name: 'kubernetes-service-endpoints'\n scrape_interval: 10s\n scrape_timeout: 1s\n\n kubernetes_sd_configs:\n - role: pod\n namespaces:\n own_namespace: true\n # Only container ports named `metrics` will be considered valid targets.\n #\n # Setup relabel rules to give meaning to the following k8s annotations:\n # prometheus/path - URL path of the metrics endpoint\n #\n # Example:\n # annotations:\n # prometheus/path: \"/api/v0/metrics\"\n relabel_configs:\n - source_labels: [__meta_kubernetes_pod_container_port_name]\n action: keep\n regex: \"metrics\"\n - source_labels: [__meta_kubernetes_pod_annotation_prometheus_path]\n action: replace\n target_label: __metrics_path__\n regex: (.+)\n - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: kubernetes_namespace\n - source_labels: [__meta_kubernetes_pod_name]\n action: replace\n target_label: kubernetes_pod\n - source_labels: [__meta_kubernetes_pod_container_name]\n action: replace\n target_label: kubernetes_container\n\nprocessors:\n batch:\n\nexporters:\n # This is unused but can be easily added for debugging.\n logging:\n # can be one of detailed | normal | basic\n verbosity: detailed\n # Log all messages, do not sample\n sampling_initial: 1\n sampling_thereafter: 1\n prometheus/endpoint:\n endpoint: 0.0.0.0:9464\n # Keep stale metrics around for 1h before dropping\n # This helps as simulation metrics are stale once the simulation stops.\n metric_expiration: 1h\n resource_to_telemetry_conversion:\n enabled: true\n # Remote write to prometheus\n prometheusremotewrite:\n endpoint: \"http://prometheus:9090/api/v1/write\"\n tls:\n insecure: true\n\nservice:\n pipelines:\n metrics:\n receivers: [otlp,prometheus/scrape_configs]\n processors: [batch]\n exporters: [prometheus/endpoint,prometheusremotewrite]\n # Enable telemetry on the collector itself\n telemetry:\n logs:\n level: debug\n metrics:\n level: detailed\n address: 0.0.0.0:8888\n" + }, + "kind": "ConfigMap", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "monitoring-role-binding", + "name": "otel-config", "ownerReferences": [] - }, - "roleRef": { - "apiGroup": "rbac.authorization.k8s.io", - "kind": "Role", - "name": "monitoring-role" - }, - "subjects": [ - { - "kind": "ServiceAccount", - "name": "monitoring-service-account", - "namespace": "keramik-test" - } - ] + } }, } diff --git a/operator/src/network/testdata/opentelemetry_cr b/operator/src/network/testdata/opentelemetry_cr deleted file mode 100644 index b8154fd4..00000000 --- a/operator/src/network/testdata/opentelemetry_cr +++ /dev/null @@ -1,19 +0,0 @@ -Request { - method: "PATCH", - uri: "/api/v1/namespaces/keramik-test/serviceaccounts/monitoring-service-account?&fieldManager=keramik", - headers: { - "accept": "application/json", - "content-type": "application/apply-patch+yaml", - }, - body: { - "apiVersion": "v1", - "kind": "ServiceAccount", - "metadata": { - "labels": { - "managed-by": "keramik" - }, - "name": "monitoring-service-account", - "ownerReferences": [] - } - }, -} diff --git a/operator/src/network/testdata/opentelemetry_rb b/operator/src/network/testdata/opentelemetry_rb new file mode 100644 index 00000000..a1c74221 --- /dev/null +++ b/operator/src/network/testdata/opentelemetry_rb @@ -0,0 +1,31 @@ +Request { + method: "PATCH", + uri: "/apis/rbac.authorization.k8s.io/v1/namespaces/keramik-test/rolebindings/monitoring-role-binding?&fieldManager=keramik", + headers: { + "accept": "application/json", + "content-type": "application/apply-patch+yaml", + }, + body: { + "apiVersion": "rbac.authorization.k8s.io/v1", + "kind": "RoleBinding", + "metadata": { + "labels": { + "managed-by": "keramik" + }, + "name": "monitoring-role-binding", + "ownerReferences": [] + }, + "roleRef": { + "apiGroup": "rbac.authorization.k8s.io", + "kind": "Role", + "name": "monitoring-role" + }, + "subjects": [ + { + "kind": "ServiceAccount", + "name": "monitoring-service-account", + "namespace": "keramik-test" + } + ] + }, +} diff --git a/operator/src/network/testdata/opentelemetry_crb b/operator/src/network/testdata/opentelemetry_role similarity index 100% rename from operator/src/network/testdata/opentelemetry_crb rename to operator/src/network/testdata/opentelemetry_role diff --git a/operator/src/network/testdata/opentelemetry_sa b/operator/src/network/testdata/opentelemetry_sa index 3454fd83..b8154fd4 100644 --- a/operator/src/network/testdata/opentelemetry_sa +++ b/operator/src/network/testdata/opentelemetry_sa @@ -1,83 +1,19 @@ Request { method: "PATCH", - uri: "/apis/apps/v1/namespaces/keramik-test/statefulsets/prometheus?&fieldManager=keramik", + uri: "/api/v1/namespaces/keramik-test/serviceaccounts/monitoring-service-account?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "apps/v1", - "kind": "StatefulSet", + "apiVersion": "v1", + "kind": "ServiceAccount", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "prometheus", + "name": "monitoring-service-account", "ownerReferences": [] - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "app": "prometheus" - } - }, - "serviceName": "", - "template": { - "metadata": { - "labels": { - "app": "prometheus" - } - }, - "spec": { - "containers": [ - { - "command": [ - "/bin/prometheus", - "--web.enable-lifecycle", - "--web.enable-remote-write-receiver", - "--config.file=/config/prom-config.yaml" - ], - "image": "prom/prometheus:v2.45.6", - "name": "prometheus", - "ports": [ - { - "containerPort": 9090, - "name": "webui" - } - ], - "resources": { - "limits": { - "cpu": "250m", - "ephemeral-storage": "1Gi", - "memory": "1Gi" - }, - "requests": { - "cpu": "250m", - "ephemeral-storage": "1Gi", - "memory": "1Gi" - } - }, - "volumeMounts": [ - { - "mountPath": "/config", - "name": "config", - "readOnly": true - } - ] - } - ], - "volumes": [ - { - "configMap": { - "defaultMode": 493, - "name": "prom-config" - }, - "name": "config" - } - ] - } - } } }, } diff --git a/operator/src/network/testdata/opentelemetry_service b/operator/src/network/testdata/opentelemetry_service index bc0e188e..36fe3384 100644 --- a/operator/src/network/testdata/opentelemetry_service +++ b/operator/src/network/testdata/opentelemetry_service @@ -1,22 +1,51 @@ Request { method: "PATCH", - uri: "/api/v1/namespaces/keramik-test/configmaps/otel-config?&fieldManager=keramik", + uri: "/api/v1/namespaces/keramik-test/services/otel?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { "apiVersion": "v1", - "data": { - "otel-config.yaml": "---\nreceivers:\n # Push based metrics\n otlp:\n protocols:\n grpc:\n endpoint: 0.0.0.0:4317\n # Pull based metrics\n prometheus/scrape_configs:\n config:\n scrape_configs:\n - job_name: 'kubernetes-service-endpoints'\n scrape_interval: 10s\n scrape_timeout: 1s\n\n kubernetes_sd_configs:\n - role: pod\n namespaces:\n own_namespace: true\n # Only container ports named `metrics` will be considered valid targets.\n #\n # Setup relabel rules to give meaning to the following k8s annotations:\n # prometheus/path - URL path of the metrics endpoint\n #\n # Example:\n # annotations:\n # prometheus/path: \"/api/v0/metrics\"\n relabel_configs:\n - source_labels: [__meta_kubernetes_pod_container_port_name]\n action: keep\n regex: \"metrics\"\n - source_labels: [__meta_kubernetes_pod_annotation_prometheus_path]\n action: replace\n target_label: __metrics_path__\n regex: (.+)\n - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: kubernetes_namespace\n - source_labels: [__meta_kubernetes_pod_name]\n action: replace\n target_label: kubernetes_pod\n - source_labels: [__meta_kubernetes_pod_container_name]\n action: replace\n target_label: kubernetes_container\n\nprocessors:\n batch:\n\nexporters:\n # This is unused but can be easily added for debugging.\n logging:\n # can be one of detailed | normal | basic\n verbosity: detailed\n # Log all messages, do not sample\n sampling_initial: 1\n sampling_thereafter: 1\n prometheus/endpoint:\n endpoint: 0.0.0.0:9464\n # Keep stale metrics around for 1h before dropping\n # This helps as simulation metrics are stale once the simulation stops.\n metric_expiration: 1h\n resource_to_telemetry_conversion:\n enabled: true\n # Remote write to prometheus\n prometheusremotewrite:\n endpoint: \"http://prometheus:9090/api/v1/write\"\n tls:\n insecure: true\n\nservice:\n pipelines:\n metrics:\n receivers: [otlp,prometheus/scrape_configs]\n processors: [batch]\n exporters: [prometheus/endpoint,prometheusremotewrite]\n # Enable telemetry on the collector itself\n telemetry:\n logs:\n level: debug\n metrics:\n level: detailed\n address: 0.0.0.0:8888\n" - }, - "kind": "ConfigMap", + "kind": "Service", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "otel-config", + "name": "otel", "ownerReferences": [] + }, + "spec": { + "ports": [ + { + "name": "otlp-receiver", + "port": 4317, + "protocol": "TCP", + "targetPort": 4317 + }, + { + "name": "all-metrics", + "port": 9464, + "protocol": "TCP", + "targetPort": 9464 + }, + { + "name": "sim-metrics", + "port": 9465, + "protocol": "TCP", + "targetPort": 9465 + }, + { + "name": "self-metrics", + "port": 8888, + "protocol": "TCP", + "targetPort": 8888 + } + ], + "selector": { + "app": "otel" + }, + "type": "ClusterIP" } }, } diff --git a/operator/src/network/testdata/opentelemetry_stateful_set b/operator/src/network/testdata/opentelemetry_stateful_set index 36fe3384..f9fb89d7 100644 --- a/operator/src/network/testdata/opentelemetry_stateful_set +++ b/operator/src/network/testdata/opentelemetry_stateful_set @@ -1,51 +1,126 @@ Request { method: "PATCH", - uri: "/api/v1/namespaces/keramik-test/services/otel?&fieldManager=keramik", + uri: "/apis/apps/v1/namespaces/keramik-test/statefulsets/opentelemetry?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "v1", - "kind": "Service", + "apiVersion": "apps/v1", + "kind": "StatefulSet", "metadata": { "labels": { "managed-by": "keramik" }, - "name": "otel", + "name": "opentelemetry", "ownerReferences": [] }, "spec": { - "ports": [ - { - "name": "otlp-receiver", - "port": 4317, - "protocol": "TCP", - "targetPort": 4317 - }, - { - "name": "all-metrics", - "port": 9464, - "protocol": "TCP", - "targetPort": 9464 - }, - { - "name": "sim-metrics", - "port": 9465, - "protocol": "TCP", - "targetPort": 9465 + "replicas": 1, + "selector": { + "matchLabels": { + "app": "otel" + } + }, + "serviceName": "otel", + "template": { + "metadata": { + "labels": { + "app": "otel" + } }, - { - "name": "self-metrics", - "port": 8888, - "protocol": "TCP", - "targetPort": 8888 + "spec": { + "containers": [ + { + "args": [ + "--config=/config/otel-config.yaml" + ], + "image": "otel/opentelemetry-collector-contrib:0.104.0", + "name": "opentelemetry", + "ports": [ + { + "containerPort": 4317, + "name": "otlp-receiver" + }, + { + "containerPort": 9464, + "name": "all-metrics" + }, + { + "containerPort": 9465, + "name": "sim-metrics" + }, + { + "containerPort": 8888, + "name": "self-metrics" + } + ], + "resources": { + "limits": { + "cpu": "250m", + "ephemeral-storage": "1Gi", + "memory": "1Gi" + }, + "requests": { + "cpu": "250m", + "ephemeral-storage": "1Gi", + "memory": "1Gi" + } + }, + "volumeMounts": [ + { + "mountPath": "/config", + "name": "config", + "readOnly": true + }, + { + "mountPath": "/data", + "name": "otel-data", + "readOnly": false + } + ] + } + ], + "securityContext": { + "fsGroup": 2000 + }, + "serviceAccountName": "monitoring-service-account", + "volumes": [ + { + "configMap": { + "defaultMode": 493, + "name": "otel-config" + }, + "name": "config" + }, + { + "name": "otel-data", + "persistentVolumeClaim": { + "claimName": "otel-data" + } + } + ] } - ], - "selector": { - "app": "otel" }, - "type": "ClusterIP" + "volumeClaimTemplates": [ + { + "apiVersion": "v1", + "kind": "PersistentVolumeClaim", + "metadata": { + "name": "otel-data" + }, + "spec": { + "accessModes": [ + "ReadWriteOnce" + ], + "resources": { + "requests": { + "storage": "10Gi" + } + } + } + } + ] } }, } diff --git a/operator/src/network/testdata/prom_service b/operator/src/network/testdata/prom_service new file mode 100644 index 00000000..0ec61487 --- /dev/null +++ b/operator/src/network/testdata/prom_service @@ -0,0 +1,33 @@ +Request { + method: "PATCH", + uri: "/api/v1/namespaces/keramik-test/services/prometheus?&fieldManager=keramik", + headers: { + "accept": "application/json", + "content-type": "application/apply-patch+yaml", + }, + body: { + "apiVersion": "v1", + "kind": "Service", + "metadata": { + "labels": { + "managed-by": "keramik" + }, + "name": "prometheus", + "ownerReferences": [] + }, + "spec": { + "ports": [ + { + "name": "prometheus", + "port": 9090, + "protocol": "TCP", + "targetPort": 9090 + } + ], + "selector": { + "app": "prometheus" + }, + "type": "ClusterIP" + } + }, +} diff --git a/operator/src/network/testdata/prom_stateful_set b/operator/src/network/testdata/prom_stateful_set index 0ec61487..3454fd83 100644 --- a/operator/src/network/testdata/prom_stateful_set +++ b/operator/src/network/testdata/prom_stateful_set @@ -1,13 +1,13 @@ Request { method: "PATCH", - uri: "/api/v1/namespaces/keramik-test/services/prometheus?&fieldManager=keramik", + uri: "/apis/apps/v1/namespaces/keramik-test/statefulsets/prometheus?&fieldManager=keramik", headers: { "accept": "application/json", "content-type": "application/apply-patch+yaml", }, body: { - "apiVersion": "v1", - "kind": "Service", + "apiVersion": "apps/v1", + "kind": "StatefulSet", "metadata": { "labels": { "managed-by": "keramik" @@ -16,18 +16,68 @@ Request { "ownerReferences": [] }, "spec": { - "ports": [ - { - "name": "prometheus", - "port": 9090, - "protocol": "TCP", - "targetPort": 9090 - } - ], + "replicas": 1, "selector": { - "app": "prometheus" + "matchLabels": { + "app": "prometheus" + } }, - "type": "ClusterIP" + "serviceName": "", + "template": { + "metadata": { + "labels": { + "app": "prometheus" + } + }, + "spec": { + "containers": [ + { + "command": [ + "/bin/prometheus", + "--web.enable-lifecycle", + "--web.enable-remote-write-receiver", + "--config.file=/config/prom-config.yaml" + ], + "image": "prom/prometheus:v2.45.6", + "name": "prometheus", + "ports": [ + { + "containerPort": 9090, + "name": "webui" + } + ], + "resources": { + "limits": { + "cpu": "250m", + "ephemeral-storage": "1Gi", + "memory": "1Gi" + }, + "requests": { + "cpu": "250m", + "ephemeral-storage": "1Gi", + "memory": "1Gi" + } + }, + "volumeMounts": [ + { + "mountPath": "/config", + "name": "config", + "readOnly": true + } + ] + } + ], + "volumes": [ + { + "configMap": { + "defaultMode": 493, + "name": "prom-config" + }, + "name": "config" + } + ] + } + } } }, } From 725cc5be3c99eaf6c3489c068faf0b0a3602cd79 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Wed, 3 Jul 2024 14:49:37 -0400 Subject: [PATCH 4/4] chore: cargo fmt --- operator/src/monitoring/opentelemetry.rs | 8 ++--- operator/src/monitoring/prometheus.rs | 41 +++++++++++------------- operator/src/utils/mod.rs | 1 - 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/operator/src/monitoring/opentelemetry.rs b/operator/src/monitoring/opentelemetry.rs index 020c9dd6..e6a0b73a 100644 --- a/operator/src/monitoring/opentelemetry.rs +++ b/operator/src/monitoring/opentelemetry.rs @@ -9,7 +9,7 @@ use k8s_openapi::{ PodSpec, PodTemplateSpec, ResourceRequirements, ServicePort, ServiceSpec, Volume, VolumeMount, }, - rbac::v1::{PolicyRule, RoleRef, Subject, Role, RoleBinding}, + rbac::v1::{PolicyRule, Role, RoleBinding, RoleRef, Subject}, }, apimachinery::pkg::{ api::resource::Quantity, @@ -27,7 +27,7 @@ use crate::{ resource_limits::ResourceLimitsConfig, }, utils::{ - apply_account, apply_namespaced_role, apply_namespaced_role_binding, apply_config_map, + apply_account, apply_config_map, apply_namespaced_role, apply_namespaced_role_binding, apply_service, apply_stateful_set, Clock, Context, }, }; @@ -59,7 +59,8 @@ pub async fn apply( orefs.to_vec(), OTEL_ROLE_BINDING, role_binding(ns), - ).await?; + ) + .await?; apply_config_map( cx.clone(), ns, @@ -257,7 +258,6 @@ fn stateful_set_spec(config: &OtelConfig) -> StatefulSetSpec { } } - fn config_map_data() -> BTreeMap { let config_str = include_str!("./otel-config.yaml"); // Adjust the path as necessary BTreeMap::from_iter(vec![("otel-config.yaml".to_owned(), config_str.to_owned())]) diff --git a/operator/src/monitoring/prometheus.rs b/operator/src/monitoring/prometheus.rs index 9ad5b441..6c4e9397 100644 --- a/operator/src/monitoring/prometheus.rs +++ b/operator/src/monitoring/prometheus.rs @@ -4,12 +4,14 @@ use k8s_openapi::{ api::{ apps::v1::StatefulSetSpec, core::v1::{ - ConfigMapVolumeSource, Container, ContainerPort, PodSpec, PodTemplateSpec, ResourceRequirements, ServicePort, ServiceSpec, Volume, VolumeMount + ConfigMapVolumeSource, Container, ContainerPort, PodSpec, PodTemplateSpec, + ResourceRequirements, ServicePort, ServiceSpec, Volume, VolumeMount, }, }, apimachinery::pkg::{ api::resource::Quantity, - apis::meta::v1::{LabelSelector, ObjectMeta, OwnerReference}, util::intstr::IntOrString, + apis::meta::v1::{LabelSelector, ObjectMeta, OwnerReference}, + util::intstr::IntOrString, }, }; use rand::RngCore; @@ -44,13 +46,13 @@ pub async fn apply( ) .await?; apply_service( - cx.clone(), - ns, - orefs.to_vec(), - PROM_SERVICE_NAME, - service_spec(), -) -.await?; + cx.clone(), + ns, + orefs.to_vec(), + PROM_SERVICE_NAME, + service_spec(), + ) + .await?; apply_stateful_set( cx.clone(), ns, @@ -88,15 +90,13 @@ fn resource_requirements(dev_mode: bool) -> ResourceRequirements { fn service_spec() -> ServiceSpec { ServiceSpec { - ports: Some(vec![ - ServicePort { - name: Some("prometheus".to_owned()), - port: 9090, - protocol: Some("TCP".to_owned()), - target_port: Some(IntOrString::Int(9090)), - ..Default::default() - }, - ]), + ports: Some(vec![ServicePort { + name: Some("prometheus".to_owned()), + port: 9090, + protocol: Some("TCP".to_owned()), + target_port: Some(IntOrString::Int(9090)), + ..Default::default() + }]), selector: selector_labels(PROM_APP), type_: Some("ClusterIP".to_owned()), ..Default::default() @@ -159,8 +159,5 @@ fn stateful_set_spec(dev_mode: bool) -> StatefulSetSpec { fn config_map_data() -> BTreeMap { let config_str = include_str!("./prom-config.yaml"); - BTreeMap::from_iter(vec![( - "prom-config.yaml".to_owned(), - config_str.to_owned(), - )]) + BTreeMap::from_iter(vec![("prom-config.yaml".to_owned(), config_str.to_owned())]) } diff --git a/operator/src/utils/mod.rs b/operator/src/utils/mod.rs index 2f1c44d8..8f4db965 100644 --- a/operator/src/utils/mod.rs +++ b/operator/src/utils/mod.rs @@ -390,4 +390,3 @@ pub fn override_and_sort_env_vars( // Sort env vars so we can have stable tests env.sort_unstable_by(|a, b| a.name.cmp(&b.name)); } -