Merge pull request #732 from vishnuchalla/ws-enhancements

Enhancements for workers-scale
cloud-bulldozer · Nov 1, 2024 · 0fe179d · 0fe179d
2 parents e4e1f13 + 49740a1
commit 0fe179d
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 2 deletions.
diff --git a/workloads/kube-burner-ocp-wrapper/metrics-profiles/workers-scale/metrics-report.yml b/workloads/kube-burner-ocp-wrapper/metrics-profiles/workers-scale/metrics-report.yml
@@ -181,6 +181,26 @@
   metricName: max-memory-sum-workers
   instant: true
 
+- query: avg(avg_over_time(sum(irate(node_cpu_seconds_total{mode!="idle", mode!="steal"}[2m]) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) by (instance)[{{.elapsed}}:]))
+  metricName: cpu-infra
+  instant: true
+
+- query: max(max_over_time(sum(irate(node_cpu_seconds_total{mode!="idle", mode!="steal"}[2m]) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) by (instance)[{{.elapsed}}:]))
+  metricName: max-cpu-infra
+  instant: true
+
+- query: avg(avg_over_time((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)[{{.elapsed}}:]) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)"))
+  metricName: memory-infra
+  instant: true
+
+- query: max(max_over_time((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)[{{.elapsed}}:]) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)"))
+  metricName: max-memory-infra
+  instant: true
+
+- query: max_over_time(sum((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)"))[{{.elapsed}}:])
+  metricName: max-memory-sum-infra
+  instant: true
+
 # Monitoring
 
 - query: avg(avg_over_time(sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-monitoring", pod=~"prometheus-k8s.+"}[2m])) by (pod)[{{.elapsed}}:]))
@@ -197,4 +217,22 @@
 
 - query: max(max_over_time(sum(container_memory_rss{name!="", namespace="openshift-monitoring", pod=~"prometheus-k8s.+"}) by (pod)[{{.elapsed}}:]))
   metricName: max-memory-prometheus
+  instant: true
+
+# Machines
+
+- query: avg(avg_over_time(sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-machine-api"}[2m])) by (pod)[{{.elapsed}}:]))
+  metricName: cpu-openshift-machine-api
+  instant: true
+
+- query: max(max_over_time(sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-machine-api"}[2m])) by (pod)[{{.elapsed}}:]))
+  metricName: max-cpu-openshift-machine-api
+  instant: true
+
+- query: avg(avg_over_time(sum(container_memory_rss{name!="", namespace="openshift-machine-api"}) by (pod)[{{.elapsed}}:]))
+  metricName: memory-openshift-machine-api
+  instant: true
+
+- query: max(max_over_time(sum(container_memory_rss{name!="", namespace="openshift-machine-api"}) by (pod)[{{.elapsed}}:]))
+  metricName: max-memory-openshift-machine-api
   instant: true
diff --git a/workloads/kube-burner-ocp-wrapper/metrics-profiles/workers-scale/metrics.yml b/workloads/kube-burner-ocp-wrapper/metrics-profiles/workers-scale/metrics.yml
@@ -28,6 +28,9 @@
 - query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) > 0
   metricName: nodeCPU-Masters
 
+- query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) > 0
+  metricName: nodeCPU-Infra
+
 # We compute memory utilization by substrating available memory to the total
 
 - query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")
@@ -36,6 +39,9 @@
 - query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)")
   metricName: nodeMemoryUtilization-Workers
 
+- query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")
+  metricName: nodeMemoryUtilization-Infra
+
 # Etcd metrics
 
 - query: sum(rate(etcd_server_leader_changes_seen_total[2m]))

diff --git a/workloads/kube-burner-ocp-wrapper/run.sh b/workloads/kube-burner-ocp-wrapper/run.sh
@@ -8,7 +8,7 @@ LOG_LEVEL=${LOG_LEVEL:-info}
 if [ "$KUBE_BURNER_VERSION" = "default" ]; then
     unset KUBE_BURNER_VERSION
 fi
-KUBE_BURNER_VERSION=${KUBE_BURNER_VERSION:-1.4.3}
+KUBE_BURNER_VERSION=${KUBE_BURNER_VERSION:-1.4.4}
 CHURN=${CHURN:-true}
 WORKLOAD=${WORKLOAD:?}
 QPS=${QPS:-20}
@@ -124,7 +124,6 @@ download_binary
 if [[ ${WORKLOAD} =~ "index" || ${WORKLOAD} =~ "workers-scale" ]]; then
   if [[ "$START_TIME" != 0 && "$END_TIME" != 0 ]]; then
     JOB_START=$(date -u -d "@$START_TIME" +"%Y-%m-%dT%H:%M:%SZ")
-    END_TIME=$(expr $END_TIME + 600)
     JOB_END=$(date -u -d "@$END_TIME" +"%Y-%m-%dT%H:%M:%SZ")
   fi
   cmd="${KUBE_DIR}/kube-burner-ocp ${WORKLOAD} --uuid=${UUID} --start=$START_TIME --end=$END_TIME --log-level ${LOG_LEVEL} --gc=${GC}"