From 8dd92576d556e1e5a2e04e62f613dce7a426e83a Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Sun, 22 Oct 2023 21:15:29 +0800 Subject: [PATCH 1/2] fix default ready pods only container query Signed-off-by: Zach Zhu --- config/manager/config.yaml | 4 ++-- pkg/metric/provider/prometheus/provider_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/manager/config.yaml b/config/manager/config.yaml index 6887f77..f3d0ded 100644 --- a/config/manager/config.yaml +++ b/config/manager/config.yaml @@ -16,7 +16,7 @@ data: resourceRules: cpu: containerQuery: sum by (<<.GroupBy>>) (irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m])) - readyPodsOnlyContainerQuery: sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (pod) group_left sum by (pod) (irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m]))) + readyPodsOnlyContainerQuery: sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (namespace, pod) group_left sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m]))) resources: overrides: namespace: @@ -26,7 +26,7 @@ data: containerLabel: container memory: containerQuery: sum by (<<.GroupBy>>) (container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>}) - readyPodsOnlyContainerQuery: sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (pod) group_left sum by (pod) (container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>})) + readyPodsOnlyContainerQuery: sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (namespace, pod) group_left sum by (namespace, pod) (container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>})) resources: overrides: namespace: diff --git a/pkg/metric/provider/prometheus/provider_test.go b/pkg/metric/provider/prometheus/provider_test.go index fa0c87f..627ce02 100644 --- a/pkg/metric/provider/prometheus/provider_test.go +++ b/pkg/metric/provider/prometheus/provider_test.go @@ -63,7 +63,7 @@ var ( }, ContainerLabel: "container", }, - ReadyPodsOnlyContainerQuery: `sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (pod) group_left sum by (pod) (irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m])))`, + ReadyPodsOnlyContainerQuery: `sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (namespace, pod) group_left () sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m])))`, }, Memory: ResourceRule{ ResourceRule: promadaptercfg.ResourceRule{ @@ -76,7 +76,7 @@ var ( }, ContainerLabel: "container", }, - ReadyPodsOnlyContainerQuery: `sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (pod) group_left sum by (pod) (container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>}))`, + ReadyPodsOnlyContainerQuery: `sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (namespace, pod) group_left () sum by (namespace, pod) (container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>}))`, }, Window: prommodel.Duration(3 * time.Minute), }, From a97402e4207c48b93774eb977ebe65bcfab813e1 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Tue, 24 Oct 2023 17:57:40 +0800 Subject: [PATCH 2/2] update manifests for v0.2 Signed-off-by: Zach Zhu --- config/default/kustomization.yaml | 2 +- config/manager/config.yaml | 28 +++++++++++-- config/manager/kustomization.yaml | 1 + config/manager/manager.yaml | 7 ++++ config/manager/service.yaml | 19 +++++++++ config/rbac/algorithm_job_role.yaml | 44 +++++++++++++++++++++ config/rbac/algorithm_job_role_binding.yaml | 19 +++++++++ config/rbac/kustomization.yaml | 2 + config/rbac/service_account.yaml | 14 +++++++ 9 files changed, 131 insertions(+), 5 deletions(-) create mode 100644 config/manager/service.yaml create mode 100644 config/rbac/algorithm_job_role.yaml create mode 100644 config/rbac/algorithm_job_role_binding.yaml diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 2f0c81d..671293e 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -28,7 +28,7 @@ patchesStrategicMerge: # Protect the /metrics endpoint by putting it behind auth. # If you want your controller-manager to expose the /metrics # endpoint w/o any authn/z, please comment the following line. -- manager_auth_proxy_patch.yaml +#- manager_auth_proxy_patch.yaml diff --git a/config/manager/config.yaml b/config/manager/config.yaml index f3d0ded..b8e5f79 100644 --- a/config/manager/config.yaml +++ b/config/manager/config.yaml @@ -15,8 +15,18 @@ data: prometheus-metrics-config.yaml: | resourceRules: cpu: - containerQuery: sum by (<<.GroupBy>>) (irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m])) - readyPodsOnlyContainerQuery: sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (namespace, pod) group_left sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m]))) + containerQuery: |- + sum by (<<.GroupBy>>) ( + irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m]) + ) + readyPodsOnlyContainerQuery: |- + sum by (<<.GroupBy>>) ( + (kube_pod_status_ready{condition="true"} == 1) + * on (namespace, pod) group_left () + sum by (namespace, pod) ( + irate(container_cpu_usage_seconds_total{container!="",container!="POD",<<.LabelMatchers>>}[3m]) + ) + ) resources: overrides: namespace: @@ -25,8 +35,18 @@ data: resource: pod containerLabel: container memory: - containerQuery: sum by (<<.GroupBy>>) (container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>}) - readyPodsOnlyContainerQuery: sum by (<<.GroupBy>>) ((kube_pod_status_ready{condition="true"} == 1) * on (namespace, pod) group_left sum by (namespace, pod) (container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>})) + containerQuery: |- + sum by (<<.GroupBy>>) ( + container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>} + ) + readyPodsOnlyContainerQuery: |- + sum by (<<.GroupBy>>) ( + (kube_pod_status_ready{condition="true"} == 1) + * on (namespace, pod) group_left () + sum by (namespace, pod) ( + container_memory_working_set_bytes{container!="",container!="POD",<<.LabelMatchers>>} + ) + ) resources: overrides: namespace: diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index ad54bd9..cfd1a1e 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -1,3 +1,4 @@ resources: - config.yaml - manager.yaml +- service.yaml diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 5d7fc66..8056abe 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -46,6 +46,9 @@ spec: - --zap-log-level=1 - --leader-elect - --prometheus-metrics-config=/etc/kapacity/prometheus-metrics-config.yaml + - --algorithm-job-namespace=kapacity-system + - --algorithm-job-default-service-account=kapacity-algorithm-job + - --algorithm-job-default-metrics-server-addr=kapacity-grpc-service:9090 image: controller:latest name: manager securityContext: @@ -65,6 +68,10 @@ spec: port: 8081 initialDelaySeconds: 5 periodSeconds: 10 + ports: + - containerPort: 9090 + name: grpc-server + protocol: TCP volumeMounts: - name: config mountPath: /etc/kapacity diff --git a/config/manager/service.yaml b/config/manager/service.yaml new file mode 100644 index 0000000..f7c1188 --- /dev/null +++ b/config/manager/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: service + app.kubernetes.io/instance: grpc-service + app.kubernetes.io/component: manager + app.kubernetes.io/created-by: kapacity + app.kubernetes.io/part-of: kapacity + app.kubernetes.io/managed-by: kustomize + name: grpc-service + namespace: system +spec: + ports: + - port: 9090 + protocol: TCP + targetPort: 9090 + selector: + control-plane: controller-manager diff --git a/config/rbac/algorithm_job_role.yaml b/config/rbac/algorithm_job_role.yaml new file mode 100644 index 0000000..6dc1927 --- /dev/null +++ b/config/rbac/algorithm_job_role.yaml @@ -0,0 +1,44 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: algorithm-job-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: kapacity + app.kubernetes.io/part-of: kapacity + app.kubernetes.io/managed-by: kustomize + name: algorithm-job-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - create + - get + - list + - update + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - '*' + resources: + - '*/scale' + verbs: + - get +- apiGroups: + - autoscaling.kapacitystack.io + resources: + - horizontalportraits + verbs: + - get + - list + - watch diff --git a/config/rbac/algorithm_job_role_binding.yaml b/config/rbac/algorithm_job_role_binding.yaml new file mode 100644 index 0000000..1af9cb6 --- /dev/null +++ b/config/rbac/algorithm_job_role_binding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: clusterrolebinding + app.kubernetes.io/instance: algorithm-job-rolebinding + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: kapacity + app.kubernetes.io/part-of: kapacity + app.kubernetes.io/managed-by: kustomize + name: algorithm-job-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: algorithm-job-role +subjects: +- kind: ServiceAccount + name: algorithm-job + namespace: system diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 731832a..710383b 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -9,6 +9,8 @@ resources: - role_binding.yaml - leader_election_role.yaml - leader_election_role_binding.yaml +- algorithm_job_role.yaml +- algorithm_job_role_binding.yaml # Comment the following 4 lines if you want to disable # the auth proxy (https://github.com/brancz/kube-rbac-proxy) # which protects your /metrics endpoint. diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml index 51b9339..2006f90 100644 --- a/config/rbac/service_account.yaml +++ b/config/rbac/service_account.yaml @@ -10,3 +10,17 @@ metadata: app.kubernetes.io/managed-by: kustomize name: controller-manager namespace: system + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: serviceaccount + app.kuberentes.io/instance: algorithm-job + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: kapacity + app.kubernetes.io/part-of: kapacity + app.kubernetes.io/managed-by: kustomize + name: algorithm-job + namespace: system