Skip to content

Commit

Permalink
add recording rule
Browse files Browse the repository at this point in the history
Signed-off-by: Yang Le <[email protected]>
  • Loading branch information
elgnay committed May 27, 2024
1 parent 7b31080 commit 9878afb
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ install-fake-crds:
kubectl apply -f test/functional/resources/crds/manifestworks_crd.yaml
kubectl apply -f test/functional/resources/crds/clusterversions_crd.yaml
kubectl apply -f test/functional/resources/crds/servicemonitor_crd.yaml
kubectl apply -f test/functional/resources/crds/prometheusrules_crd.yaml
@sleep 10

.PHONY: kind-cluster-setup
Expand Down
1 change: 1 addition & 0 deletions overlays/deploy/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ bases:
- ../../deploy
resources:
- servicemonitor.yaml
- prometheusrule.yaml
images:
- name: clusterlifecycle-state-metrics
newName: quay.io/itdove/clusterlifecycle-state-metrics
Expand Down
15 changes: 15 additions & 0 deletions overlays/deploy/prometheusrule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright Contributors to the Open Cluster Management project

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
name: clusterlifecycle-state-metrics
name: clusterlifecycle-state-metrics.rules
namespace: openshift-monitoring
spec:
groups:
- name: acm_managed_cluster_worker_cores.rules
rules:
- expr: sum by (hub_cluster_id, managed_cluster_id) (acm_managed_cluster_worker_cores)
record: 'acm_managed_cluster_worker_cores:sum'
1 change: 1 addition & 0 deletions overlays/test/http/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
bases:
- ../../../deploy
- servicemonitor.yaml
- prometheusrule.yaml

patchesStrategicMerge:
- deployment.yaml
Expand Down
16 changes: 16 additions & 0 deletions overlays/test/http/prometheusrule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright Contributors to the Open Cluster Management project

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
name: clusterlifecycle-state-metrics
release: prometheus
name: clusterlifecycle-state-metrics.rules
namespace: openshift-monitoring
spec:
groups:
- name: acm_managed_cluster_worker_cores.rules
rules:
- expr: sum by (hub_cluster_id, managed_cluster_id) (acm_managed_cluster_worker_cores)
record: 'acm_managed_cluster_worker_cores:sum'
29 changes: 29 additions & 0 deletions test/functional/managedcluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"io/ioutil"
"net/http"
"reflect"
"regexp"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -193,6 +194,30 @@ var _ = Describe("ManagedCluster Metrics", func() {
})
}

assertRecordingRule := func(record, expr string) {
var re = regexp.MustCompile(`[0-9]{10}\.[0-9]{3}`)
By("Querying recording rule ...", func() {
Eventually(func() bool {
resp, recordBody, err := queryMetrics(record)
if err != nil || resp.StatusCode != http.StatusOK {
return false
}
recordBody = strings.Replace(recordBody, fmt.Sprintf("\"__name__\":\"%s\",", record), "", -1)
recordBody = re.ReplaceAllString(recordBody, `0000000000.000`)
klog.Infof("Querying record %s response: %s", record, recordBody)

resp, exprBody, err := queryMetrics(expr)
if err != nil || resp.StatusCode != http.StatusOK {
return false
}
exprBody = re.ReplaceAllString(exprBody, `0000000000.000`)
klog.Infof("Querying with expr %s response: %s", expr, exprBody)

return recordBody == exprBody
}).Should(BeTrue())
})
}

Context("acm_managed_cluster_info", func() {
AfterEach(func() {
By("Query metrics by sum(acm_managed_cluster_info) by (hub_cluster_id)", func() {
Expand Down Expand Up @@ -510,6 +535,8 @@ var _ = Describe("ManagedCluster Metrics", func() {
})

Context("acm_managed_cluster_worker_cores", func() {
ruleExpr := "sum(acm_managed_cluster_worker_cores)+by+(hub_cluster_id,managed_cluster_id)"

AfterEach(func() {
By("Query metrics by count(acm_managed_cluster_worker_cores) by (hub_cluster_id)", func() {
Eventually(func() error {
Expand Down Expand Up @@ -543,6 +570,7 @@ var _ = Describe("ManagedCluster Metrics", func() {

It("should get metric", func() {
assertGetMetrics("acm_managed_cluster_worker_cores", clusterWorkerCoresResponse)
assertRecordingRule("acm_managed_cluster_worker_cores:sum", ruleExpr)
})

It("should reflect the change on the managed cluster", func() {
Expand All @@ -555,6 +583,7 @@ var _ = Describe("ManagedCluster Metrics", func() {
})

assertGetMetrics("acm_managed_cluster_worker_cores", clusterWorkerCoresUpdatedResponse)
assertRecordingRule("acm_managed_cluster_worker_cores:sum", ruleExpr)
})
})
})
Expand Down
130 changes: 130 additions & 0 deletions test/functional/resources/crds/prometheusrules_crd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright Contributors to the Open Cluster Management project

apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: prometheusrules.monitoring.coreos.com
spec:
conversion:
strategy: None
group: monitoring.coreos.com
names:
categories:
- prometheus-operator
kind: PrometheusRule
listKind: PrometheusRuleList
plural: prometheusrules
shortNames:
- promrule
singular: prometheusrule
scope: Namespaced
versions:
- name: v1
schema:
openAPIV3Schema:
description: PrometheusRule defines recording and alerting rules for a Prometheus
instance
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired alerting rule definitions for Prometheus.
properties:
groups:
description: Content of Prometheus rule file
items:
description: RuleGroup is a list of sequentially evaluated recording
and alerting rules.
properties:
interval:
description: Interval determines how often rules in the group
are evaluated.
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
limit:
description: Limit the number of alerts an alerting rule and
series a recording rule can produce. Limit is supported starting
with Prometheus >= 2.31 and Thanos Ruler >= 0.24.
type: integer
name:
description: Name of the rule group.
minLength: 1
type: string
partial_response_strategy:
description: 'PartialResponseStrategy is only used by ThanosRuler
and will be ignored by Prometheus instances. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response'
pattern: ^(?i)(abort|warn)?$
type: string
rules:
description: List of alerting and recording rules.
items:
description: 'Rule describes an alerting or recording rule
See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/)
or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules)
rule'
properties:
alert:
description: Name of the alert. Must be a valid label
value. Only one of `record` and `alert` must be set.
type: string
annotations:
additionalProperties:
type: string
description: Annotations to add to each alert. Only valid
for alerting rules.
type: object
expr:
anyOf:
- type: integer
- type: string
description: PromQL expression to evaluate.
x-kubernetes-int-or-string: true
for:
description: Alerts are considered firing once they have
been returned for this long.
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
keep_firing_for:
description: KeepFiringFor defines how long an alert will
continue firing after the condition that triggered it
has cleared.
minLength: 1
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
labels:
additionalProperties:
type: string
description: Labels to add or overwrite.
type: object
record:
description: Name of the time series to output to. Must
be a valid metric name. Only one of `record` and `alert`
must be set.
type: string
required:
- expr
type: object
type: array
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
type: object
required:
- spec
type: object
served: true
storage: true

0 comments on commit 9878afb

Please sign in to comment.