Skip to content

Commit

Permalink
feat(magic): some features some fixes
Browse files Browse the repository at this point in the history
mainly fixed duration issues
also, added some feature to devel, osko dashboards, kustomize for asier
deployment and other cool stuff
modified function responsible for creating the alerting rule when
magiAlerting is enabled, also added some basic mapping for opsgenie,
pagerduty and custom alerting tool. Currently not working as expected
tho
  • Loading branch information
Hy3n4 committed Nov 30, 2024
1 parent 0f8b1b1 commit 38e56e7
Show file tree
Hide file tree
Showing 17 changed files with 904 additions and 76 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ $(ENVTEST): $(LOCALBIN)

.PHONY: deploydev
deploydev:
@$(KUBECTL) apply -R -f devel/
@$(KUBECTL) apply -k devel/
@echo "Waiting for services to come online for the port-forwards..."
@until [ "$$($(KUBECTL) get pods -l app=grafana -o jsonpath='{.items}')}" != "[]" ] && \
[ "$$($(KUBECTL) get pods -l app=grafana -o jsonpath='{.items[0].status.containerStatuses[0].ready}')" == "true" ]; do \
Expand Down
15 changes: 8 additions & 7 deletions api/osko/v1alpha1/mimirrule_types.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package v1alpha1

import (
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus/common/model"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
Expand Down Expand Up @@ -33,13 +34,13 @@ type RuleGroup struct {
}

type Rule struct {
Record string `json:"record,omitempty"`
Alert string `json:"alert,omitempty"`
Expr string `json:"expr"`
For model.Duration `json:"for,omitempty"`
KeepFiringFor model.Duration `json:"keep_firing_for,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
Record string `json:"record,omitempty"`
Alert string `json:"alert,omitempty"`
Expr string `json:"expr"`
For *monitoringv1.Duration `json:"for,omitempty"`
KeepFiringFor model.Duration `json:"keep_firing_for,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
}

//+kubebuilder:object:root=true
Expand Down
6 changes: 6 additions & 0 deletions api/osko/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions config/crd/bases/osko.dev_mimirrules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ spec:
type: string
for:
description: |-
Duration wraps time.Duration. It is used to parse the custom duration format
from YAML.
This type should not propagate beyond the scope of input/output processing.
format: int64
type: integer
Duration is a valid time duration that can be parsed by Prometheus model.ParseDuration() function.
Supported units: y, w, d, h, m, s, ms
Examples: `30s`, `1m`, `1h20m15s`, `15d`
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
keep_firing_for:
description: |-
Duration wraps time.Duration. It is used to parse the custom duration format
Expand Down
5 changes: 4 additions & 1 deletion config/samples/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
## Append samples of your project ##
resources:
- osko_v1alpha1_alertmanagerconfig.yaml
- openslo_v1_datasource.yaml
- openslo_v1_slo.yaml
- config_secret.yaml
- osko_v1alpha1_alertmanagerconfig.yaml
# +kubebuilder:scaffold:manifestskustomizesamples
28 changes: 14 additions & 14 deletions config/samples/openslo_v1_slo.yaml
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
apiVersion: openslo.com/v1
kind: SLO
metadata:
name: mimir-ingestion-latency
labels:
label.osko.dev/team: "infrastructure"
label.osko.dev/system: "gatekeeper"
label.osko.dev/domain: "security"
label.osko.dev/team: "infra"
label.osko.dev/system: "monitoring"
label.osko.dev/domain: "observability"
label.osko.dev/service: "mimir"
annotations:
osko.dev/datasourceRef: "mimir-infra-ds"
osko.dev/magicAlerting: "true"
name: gatekeeper-webhook-response-time
spec:
budgetingMethod: Occurrences
description: 99% of Gatekeeper webhook requests return in less than 0.5s
description: 95% of all queries should have a latency of less than 300 milliseconds
indicator:
metadata:
name: gatekeeper-webhook-less-than-05s
name: distributor-query-success-latency
spec:
description: 99% of Gatekeeper webhook requests return in less than 0.5s
description: 95% of all queries should have a latency of less than 500 milliseconds
ratioMetric:
good:
metricSource:
metricSourceRef: mimir-infra-ds
type: Mimir
spec:
query: controller_runtime_webhook_latency_seconds_bucket{le="0.5", job="gatekeeper-metrics"}
query: cortex_distributor_query_duration_seconds_bucket{le="0.5", method="Distributor.QueryStream", status_code="200"}
total:
metricSource:
metricSourceRef: mimir-infra-ds
type: Mimir
spec:
query: controller_runtime_webhook_latency_seconds_count{job="gatekeeper-metrics"}
query: cortex_distributor_query_duration_seconds_count{method="Distributor.QueryStream"}
objectives:
- displayName: gatekeeper-webhook-less-than-05s
target: '0.99'
service: testing
- target: "0.99"
service: mimir
timeWindow:
- duration: 28d
isRolling: true
- duration: 28d
isRolling: true
11 changes: 10 additions & 1 deletion devel/grafana-agent/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,19 @@ data:
}
}
}
prometheus.scrape "static" {
prometheus.relabel "cluster" {
rule {
target_label = "cluster"
replacement = "local"
}
forward_to = [
prometheus.remote_write.local.receiver,
]
}
prometheus.scrape "static" {
forward_to = [
prometheus.relabel.cluster.receiver,
]
targets = [
{
"__address__" = "mimir-service:9009",
Expand Down
13 changes: 8 additions & 5 deletions devel/grafana-agent/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ spec:
- name: grafana-agent
image: grafana/agent:latest
args:
- 'run'
- '/etc/agent/agent.river'
- '--storage.path=/tmp/agent'
- '--server.http.listen-addr=127.0.0.1:80'
- '--server.http.ui-path-prefix=/'
- "run"
- "/etc/agent/agent.river"
- "--storage.path=/tmp/agent"
- "--server.http.listen-addr=127.0.0.1:12345"
- "--server.http.ui-path-prefix=/"
volumeMounts:
- name: config-volume
mountPath: /etc/agent
Expand All @@ -31,6 +31,9 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
ports:
- containerPort: 12345
name: http-agent

volumes:
- name: config-volume
Expand Down
Loading

0 comments on commit 38e56e7

Please sign in to comment.