Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add recording rule #154

Merged
merged 1 commit into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ install-fake-crds:
kubectl apply -f test/functional/resources/crds/manifestworks_crd.yaml
kubectl apply -f test/functional/resources/crds/clusterversions_crd.yaml
kubectl apply -f test/functional/resources/crds/servicemonitor_crd.yaml
kubectl apply -f test/functional/resources/crds/prometheusrules_crd.yaml
@sleep 10

.PHONY: kind-cluster-setup
Expand Down
1 change: 1 addition & 0 deletions overlays/deploy/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ bases:
- ../../deploy
resources:
- servicemonitor.yaml
- prometheusrule.yaml
images:
- name: clusterlifecycle-state-metrics
newName: quay.io/itdove/clusterlifecycle-state-metrics
Expand Down
15 changes: 15 additions & 0 deletions overlays/deploy/prometheusrule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright Contributors to the Open Cluster Management project

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
name: clusterlifecycle-state-metrics
name: clusterlifecycle-state-metrics.rules
namespace: openshift-monitoring
spec:
groups:
- name: acm_managed_cluster_worker_cores.rules
rules:
- expr: sum by (hub_cluster_id, managed_cluster_id) (acm_managed_cluster_worker_cores)
record: 'acm_managed_cluster_worker_cores:sum'
1 change: 1 addition & 0 deletions overlays/test/http/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
bases:
- ../../../deploy
- servicemonitor.yaml
- prometheusrule.yaml

patchesStrategicMerge:
- deployment.yaml
Expand Down
16 changes: 16 additions & 0 deletions overlays/test/http/prometheusrule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright Contributors to the Open Cluster Management project

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
name: clusterlifecycle-state-metrics
release: prometheus
name: clusterlifecycle-state-metrics.rules
namespace: openshift-monitoring
spec:
groups:
- name: acm_managed_cluster_worker_cores.rules
rules:
- expr: sum by (hub_cluster_id, managed_cluster_id) (acm_managed_cluster_worker_cores)
record: 'acm_managed_cluster_worker_cores:sum'
29 changes: 29 additions & 0 deletions test/functional/managedcluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"io/ioutil"
"net/http"
"reflect"
"regexp"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -193,6 +194,30 @@ var _ = Describe("ManagedCluster Metrics", func() {
})
}

assertRecordingRule := func(record, expr string) {
var re = regexp.MustCompile(`[0-9]{10}\.[0-9]{3}`)
By("Querying recording rule ...", func() {
Eventually(func() bool {
resp, recordBody, err := queryMetrics(record)
if err != nil || resp.StatusCode != http.StatusOK {
return false
}
recordBody = strings.Replace(recordBody, fmt.Sprintf("\"__name__\":\"%s\",", record), "", -1)
recordBody = re.ReplaceAllString(recordBody, `0000000000.000`)
klog.Infof("Querying record %s response: %s", record, recordBody)

resp, exprBody, err := queryMetrics(expr)
if err != nil || resp.StatusCode != http.StatusOK {
return false
}
exprBody = re.ReplaceAllString(exprBody, `0000000000.000`)
klog.Infof("Querying with expr %s response: %s", expr, exprBody)

return recordBody == exprBody
}).Should(BeTrue())
})
}

Context("acm_managed_cluster_info", func() {
AfterEach(func() {
By("Query metrics by sum(acm_managed_cluster_info) by (hub_cluster_id)", func() {
Expand Down Expand Up @@ -510,6 +535,8 @@ var _ = Describe("ManagedCluster Metrics", func() {
})

Context("acm_managed_cluster_worker_cores", func() {
ruleExpr := "sum(acm_managed_cluster_worker_cores)+by+(hub_cluster_id,managed_cluster_id)"

AfterEach(func() {
By("Query metrics by count(acm_managed_cluster_worker_cores) by (hub_cluster_id)", func() {
Eventually(func() error {
Expand Down Expand Up @@ -543,6 +570,7 @@ var _ = Describe("ManagedCluster Metrics", func() {

It("should get metric", func() {
assertGetMetrics("acm_managed_cluster_worker_cores", clusterWorkerCoresResponse)
assertRecordingRule("acm_managed_cluster_worker_cores:sum", ruleExpr)
})

It("should reflect the change on the managed cluster", func() {
Expand All @@ -555,6 +583,7 @@ var _ = Describe("ManagedCluster Metrics", func() {
})

assertGetMetrics("acm_managed_cluster_worker_cores", clusterWorkerCoresUpdatedResponse)
assertRecordingRule("acm_managed_cluster_worker_cores:sum", ruleExpr)
})
})
})
Expand Down
130 changes: 130 additions & 0 deletions test/functional/resources/crds/prometheusrules_crd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright Contributors to the Open Cluster Management project

apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: prometheusrules.monitoring.coreos.com
spec:
conversion:
strategy: None
group: monitoring.coreos.com
names:
categories:
- prometheus-operator
kind: PrometheusRule
listKind: PrometheusRuleList
plural: prometheusrules
shortNames:
- promrule
singular: prometheusrule
scope: Namespaced
versions:
- name: v1
schema:
openAPIV3Schema:
description: PrometheusRule defines recording and alerting rules for a Prometheus
instance
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of desired alerting rule definitions for Prometheus.
properties:
groups:
description: Content of Prometheus rule file
items:
description: RuleGroup is a list of sequentially evaluated recording
and alerting rules.
properties:
interval:
description: Interval determines how often rules in the group
are evaluated.
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
limit:
description: Limit the number of alerts an alerting rule and
series a recording rule can produce. Limit is supported starting
with Prometheus >= 2.31 and Thanos Ruler >= 0.24.
type: integer
name:
description: Name of the rule group.
minLength: 1
type: string
partial_response_strategy:
description: 'PartialResponseStrategy is only used by ThanosRuler
and will be ignored by Prometheus instances. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response'
pattern: ^(?i)(abort|warn)?$
type: string
rules:
description: List of alerting and recording rules.
items:
description: 'Rule describes an alerting or recording rule
See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/)
or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules)
rule'
properties:
alert:
description: Name of the alert. Must be a valid label
value. Only one of `record` and `alert` must be set.
type: string
annotations:
additionalProperties:
type: string
description: Annotations to add to each alert. Only valid
for alerting rules.
type: object
expr:
anyOf:
- type: integer
- type: string
description: PromQL expression to evaluate.
x-kubernetes-int-or-string: true
for:
description: Alerts are considered firing once they have
been returned for this long.
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
keep_firing_for:
description: KeepFiringFor defines how long an alert will
continue firing after the condition that triggered it
has cleared.
minLength: 1
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
labels:
additionalProperties:
type: string
description: Labels to add or overwrite.
type: object
record:
description: Name of the time series to output to. Must
be a valid metric name. Only one of `record` and `alert`
must be set.
type: string
required:
- expr
type: object
type: array
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
type: object
required:
- spec
type: object
served: true
storage: true
Loading