diff --git a/Makefile b/Makefile index 80670bb7..457c211e 100644 --- a/Makefile +++ b/Makefile @@ -150,6 +150,7 @@ install-fake-crds: kubectl apply -f test/functional/resources/crds/manifestworks_crd.yaml kubectl apply -f test/functional/resources/crds/clusterversions_crd.yaml kubectl apply -f test/functional/resources/crds/servicemonitor_crd.yaml + kubectl apply -f test/functional/resources/crds/prometheusrules_crd.yaml @sleep 10 .PHONY: kind-cluster-setup diff --git a/overlays/deploy/kustomization.yaml b/overlays/deploy/kustomization.yaml index b1145cda..d2e1d514 100644 --- a/overlays/deploy/kustomization.yaml +++ b/overlays/deploy/kustomization.yaml @@ -8,6 +8,7 @@ bases: - ../../deploy resources: - servicemonitor.yaml +- prometheusrule.yaml images: - name: clusterlifecycle-state-metrics newName: quay.io/itdove/clusterlifecycle-state-metrics diff --git a/overlays/deploy/prometheusrule.yaml b/overlays/deploy/prometheusrule.yaml new file mode 100644 index 00000000..56655cc9 --- /dev/null +++ b/overlays/deploy/prometheusrule.yaml @@ -0,0 +1,15 @@ +# Copyright Contributors to the Open Cluster Management project + +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + name: clusterlifecycle-state-metrics + name: clusterlifecycle-state-metrics.rules + namespace: openshift-monitoring +spec: + groups: + - name: acm_managed_cluster_worker_cores.rules + rules: + - expr: sum by (hub_cluster_id, managed_cluster_id) (acm_managed_cluster_worker_cores) + record: 'acm_managed_cluster_worker_cores:sum' diff --git a/overlays/test/http/kustomization.yaml b/overlays/test/http/kustomization.yaml index 8f91e064..240a082a 100644 --- a/overlays/test/http/kustomization.yaml +++ b/overlays/test/http/kustomization.yaml @@ -3,6 +3,7 @@ bases: - ../../../deploy - servicemonitor.yaml +- prometheusrule.yaml patchesStrategicMerge: - deployment.yaml diff --git a/overlays/test/http/prometheusrule.yaml b/overlays/test/http/prometheusrule.yaml new file mode 100644 index 00000000..6cdf7451 --- /dev/null +++ b/overlays/test/http/prometheusrule.yaml @@ -0,0 +1,16 @@ +# Copyright Contributors to the Open Cluster Management project + +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + name: clusterlifecycle-state-metrics + release: prometheus + name: clusterlifecycle-state-metrics.rules + namespace: openshift-monitoring +spec: + groups: + - name: acm_managed_cluster_worker_cores.rules + rules: + - expr: sum by (hub_cluster_id, managed_cluster_id) (acm_managed_cluster_worker_cores) + record: 'acm_managed_cluster_worker_cores:sum' diff --git a/test/functional/managedcluster_test.go b/test/functional/managedcluster_test.go index 5b89dc30..4e5e6852 100644 --- a/test/functional/managedcluster_test.go +++ b/test/functional/managedcluster_test.go @@ -14,6 +14,7 @@ import ( "io/ioutil" "net/http" "reflect" + "regexp" "sort" "strings" "time" @@ -193,6 +194,30 @@ var _ = Describe("ManagedCluster Metrics", func() { }) } + assertRecordingRule := func(record, expr string) { + var re = regexp.MustCompile(`[0-9]{10}\.[0-9]{3}`) + By("Querying recording rule ...", func() { + Eventually(func() bool { + resp, recordBody, err := queryMetrics(record) + if err != nil || resp.StatusCode != http.StatusOK { + return false + } + recordBody = strings.Replace(recordBody, fmt.Sprintf("\"__name__\":\"%s\",", record), "", -1) + recordBody = re.ReplaceAllString(recordBody, `0000000000.000`) + klog.Infof("Querying record %s response: %s", record, recordBody) + + resp, exprBody, err := queryMetrics(expr) + if err != nil || resp.StatusCode != http.StatusOK { + return false + } + exprBody = re.ReplaceAllString(exprBody, `0000000000.000`) + klog.Infof("Querying with expr %s response: %s", expr, exprBody) + + return recordBody == exprBody + }).Should(BeTrue()) + }) + } + Context("acm_managed_cluster_info", func() { AfterEach(func() { By("Query metrics by sum(acm_managed_cluster_info) by (hub_cluster_id)", func() { @@ -510,6 +535,8 @@ var _ = Describe("ManagedCluster Metrics", func() { }) Context("acm_managed_cluster_worker_cores", func() { + ruleExpr := "sum(acm_managed_cluster_worker_cores)+by+(hub_cluster_id,managed_cluster_id)" + AfterEach(func() { By("Query metrics by count(acm_managed_cluster_worker_cores) by (hub_cluster_id)", func() { Eventually(func() error { @@ -543,6 +570,7 @@ var _ = Describe("ManagedCluster Metrics", func() { It("should get metric", func() { assertGetMetrics("acm_managed_cluster_worker_cores", clusterWorkerCoresResponse) + assertRecordingRule("acm_managed_cluster_worker_cores:sum", ruleExpr) }) It("should reflect the change on the managed cluster", func() { @@ -555,6 +583,7 @@ var _ = Describe("ManagedCluster Metrics", func() { }) assertGetMetrics("acm_managed_cluster_worker_cores", clusterWorkerCoresUpdatedResponse) + assertRecordingRule("acm_managed_cluster_worker_cores:sum", ruleExpr) }) }) }) diff --git a/test/functional/resources/crds/prometheusrules_crd.yaml b/test/functional/resources/crds/prometheusrules_crd.yaml new file mode 100644 index 00000000..493868a8 --- /dev/null +++ b/test/functional/resources/crds/prometheusrules_crd.yaml @@ -0,0 +1,130 @@ +# Copyright Contributors to the Open Cluster Management project + +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: prometheusrules.monitoring.coreos.com +spec: + conversion: + strategy: None + group: monitoring.coreos.com + names: + categories: + - prometheus-operator + kind: PrometheusRule + listKind: PrometheusRuleList + plural: prometheusrules + shortNames: + - promrule + singular: prometheusrule + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: PrometheusRule defines recording and alerting rules for a Prometheus + instance + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: Specification of desired alerting rule definitions for Prometheus. + properties: + groups: + description: Content of Prometheus rule file + items: + description: RuleGroup is a list of sequentially evaluated recording + and alerting rules. + properties: + interval: + description: Interval determines how often rules in the group + are evaluated. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + limit: + description: Limit the number of alerts an alerting rule and + series a recording rule can produce. Limit is supported starting + with Prometheus >= 2.31 and Thanos Ruler >= 0.24. + type: integer + name: + description: Name of the rule group. + minLength: 1 + type: string + partial_response_strategy: + description: 'PartialResponseStrategy is only used by ThanosRuler + and will be ignored by Prometheus instances. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response' + pattern: ^(?i)(abort|warn)?$ + type: string + rules: + description: List of alerting and recording rules. + items: + description: 'Rule describes an alerting or recording rule + See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) + or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules) + rule' + properties: + alert: + description: Name of the alert. Must be a valid label + value. Only one of `record` and `alert` must be set. + type: string + annotations: + additionalProperties: + type: string + description: Annotations to add to each alert. Only valid + for alerting rules. + type: object + expr: + anyOf: + - type: integer + - type: string + description: PromQL expression to evaluate. + x-kubernetes-int-or-string: true + for: + description: Alerts are considered firing once they have + been returned for this long. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + keep_firing_for: + description: KeepFiringFor defines how long an alert will + continue firing after the condition that triggered it + has cleared. + minLength: 1 + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + labels: + additionalProperties: + type: string + description: Labels to add or overwrite. + type: object + record: + description: Name of the time series to output to. Must + be a valid metric name. Only one of `record` and `alert` + must be set. + type: string + required: + - expr + type: object + type: array + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + required: + - spec + type: object + served: true + storage: true