Skip to content

Commit

Permalink
Remove noisy SLO alerts rules/raw (#600)
Browse files Browse the repository at this point in the history
* remove noisy alerts rules/raw

* remove only GET endpoint
  • Loading branch information
coleenquadros authored Sep 20, 2023
1 parent 454c825 commit 3bbcaf0
Show file tree
Hide file tree
Showing 6 changed files with 0 additions and 1,047 deletions.
12 changes: 0 additions & 12 deletions configuration/observatorium/slo.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,18 +347,6 @@ func ObservatoriumSLOs(envName rhobsInstanceEnv, signal signal) []pyrrav1alpha1.
alertName: "APIRulesRawWriteAvailabilityErrorBudgetBurning",
sloType: sloTypeAvailability,
},
{
name: "api-rules-raw-read-availability-slo",
labels: map[string]string{
slo.PropagationLabelsPrefix + "service": "observatorium-api",
"instance": string(envName),
},
description: "API /rules/raw endpoint for reads is burning too much error budget to guarantee availability SLOs.",
successOrErrorsExpr: "http_requests_total{job=\"" + apiJobSelector[envName] + "\", handler=\"rules-raw\", method=\"GET\", group=\"metricsv1\", code=~\"^5..$\"}",
totalExpr: "http_requests_total{job=\"" + apiJobSelector[envName] + "\", handler=\"rules-raw\", method=\"GET\", group=\"metricsv1\"}",
alertName: "APIRulesRawReadAvailabilityErrorBudgetBurning",
sloType: sloTypeAvailability,
},
{
name: "api-rules-read-availability-slo",
labels: map[string]string{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -980,213 +980,6 @@ spec:
labels:
slo: api-rules-raw-write-availability-slo
record: pyrra_errors_total
- interval: 2m30s
name: api-rules-raw-read-availability-slo-increase
rules:
- expr: sum by(code) (increase(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[4w]))
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
slo: api-rules-raw-read-availability-slo
record: http_requests:increase4w
- alert: SLOMetricAbsent
annotations:
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
message: API /rules/raw endpoint for reads is burning too much error budget
to guarantee availability SLOs.
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
expr: absent(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"})
== 1
for: 2m
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
severity: medium
slo: api-rules-raw-read-availability-slo
- interval: 30s
name: api-rules-raw-read-availability-slo
rules:
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[5m]))
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[5m]))
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
slo: api-rules-raw-read-availability-slo
record: http_requests:burnrate5m
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[30m]))
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[30m]))
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
slo: api-rules-raw-read-availability-slo
record: http_requests:burnrate30m
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[1h]))
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[1h]))
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
slo: api-rules-raw-read-availability-slo
record: http_requests:burnrate1h
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[2h]))
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[2h]))
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
slo: api-rules-raw-read-availability-slo
record: http_requests:burnrate2h
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[6h]))
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[6h]))
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
slo: api-rules-raw-read-availability-slo
record: http_requests:burnrate6h
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[1d]))
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[1d]))
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
slo: api-rules-raw-read-availability-slo
record: http_requests:burnrate1d
- expr: sum(rate(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[4d]))
/ sum(rate(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}[4d]))
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
method: GET
service: observatorium-api
slo: api-rules-raw-read-availability-slo
record: http_requests:burnrate4d
- alert: APIRulesRawReadAvailabilityErrorBudgetBurning
annotations:
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
message: API /rules/raw endpoint for reads is burning too much error budget
to guarantee availability SLOs.
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
expr: http_requests:burnrate5m{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
> (14 * (1-0.99)) and http_requests:burnrate1h{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
> (14 * (1-0.99))
for: 2m
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
long_burnrate_window: 1h
method: GET
service: observatorium-api
severity: high
short_burnrate_window: 5m
slo: api-rules-raw-read-availability-slo
- alert: APIRulesRawReadAvailabilityErrorBudgetBurning
annotations:
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
message: API /rules/raw endpoint for reads is burning too much error budget
to guarantee availability SLOs.
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
expr: http_requests:burnrate30m{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
> (7 * (1-0.99)) and http_requests:burnrate6h{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
> (7 * (1-0.99))
for: 15m
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
long_burnrate_window: 6h
method: GET
service: observatorium-api
severity: high
short_burnrate_window: 30m
slo: api-rules-raw-read-availability-slo
- alert: APIRulesRawReadAvailabilityErrorBudgetBurning
annotations:
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
message: API /rules/raw endpoint for reads is burning too much error budget
to guarantee availability SLOs.
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
expr: http_requests:burnrate2h{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
> (2 * (1-0.99)) and http_requests:burnrate1d{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
> (2 * (1-0.99))
for: 1h
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
long_burnrate_window: 1d
method: GET
service: observatorium-api
severity: medium
short_burnrate_window: 2h
slo: api-rules-raw-read-availability-slo
- alert: APIRulesRawReadAvailabilityErrorBudgetBurning
annotations:
dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
message: API /rules/raw endpoint for reads is burning too much error budget
to guarantee availability SLOs.
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
expr: http_requests:burnrate6h{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
> (1 * (1-0.99)) and http_requests:burnrate4d{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET",slo="api-rules-raw-read-availability-slo"}
> (1 * (1-0.99))
for: 3h
labels:
group: metricsv1
handler: rules-raw
job: observatorium-observatorium-mst-api
long_burnrate_window: 4d
method: GET
service: observatorium-api
severity: medium
short_burnrate_window: 6h
slo: api-rules-raw-read-availability-slo
- interval: 30s
name: api-rules-raw-read-availability-slo-generic
rules:
- expr: "0.99"
labels:
slo: api-rules-raw-read-availability-slo
record: pyrra_objective
- expr: 2419200
labels:
slo: api-rules-raw-read-availability-slo
record: pyrra_window
- expr: 1 - sum(http_requests:increase4w{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}
or vector(0)) / sum(http_requests:increase4w{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"})
labels:
slo: api-rules-raw-read-availability-slo
record: pyrra_availability
- expr: sum(http_requests_total{group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"})
labels:
slo: api-rules-raw-read-availability-slo
record: pyrra_requests_total
- expr: sum(http_requests_total{code=~"^5..$",group="metricsv1",handler="rules-raw",job="observatorium-observatorium-mst-api",method="GET"}
or vector(0))
labels:
slo: api-rules-raw-read-availability-slo
record: pyrra_errors_total
- interval: 2m30s
name: api-rules-read-availability-slo-increase
rules:
Expand Down
Loading

0 comments on commit 3bbcaf0

Please sign in to comment.