Skip to content

Commit

Permalink
feat(slo): use record rules in PrometheusRules
Browse files Browse the repository at this point in the history
create rules from sli expressions and create labels based on the [adr](adr/0001_rule_outputs.md)
  • Loading branch information
Hy3n4 authored Oct 31, 2023
2 parents 88e3c0c + e694079 commit 2f91682
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 25 deletions.
134 changes: 109 additions & 25 deletions internal/controller/openslo/slo_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,22 @@ func (r *SLOReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
sli.Spec.RatioMetric = slo.Spec.Indicator.Spec.RatioMetric
}
log.Info("SLI created", "SLI Name", sli.Name, "SLI Namespace", sli.Namespace, "SLI RatioMetric", sli.Spec.RatioMetric)
} else {
err = utils.UpdateStatus(
ctx,
slo,
r.Client,
"Ready",
metav1.ConditionFalse,
"SLIObjectNotFound",
"SLI Object not found",
)
if err != nil {
log.Error(err, "Failed to update SLO status")
return ctrl.Result{}, err
}
log.Error(err, "SLO has no SLI reference")
return ctrl.Result{}, err
}

// Check if this PrometheusRule already exists
Expand Down Expand Up @@ -140,21 +156,7 @@ func (r *SLOReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
}
}

for _, rule := range promRule.Spec.Groups[0].Rules {
if rule.Expr != intstr.Parse(fmt.Sprintf("sum(rate(%s[%s])) / sum(rate(%s[%s]))",
sli.Spec.RatioMetric.Good.MetricSource.Spec,
slo.Spec.TimeWindow[0].Duration,
sli.Spec.RatioMetric.Total.MetricSource.Spec,
slo.Spec.TimeWindow[0].Duration,
)) {
promRule.Spec.Groups[0].Rules[0].Expr = intstr.Parse(fmt.Sprintf("sum(rate(%s[%s])) / sum(rate(%s[%s]))",
sli.Spec.RatioMetric.Good.MetricSource.Spec,
slo.Spec.TimeWindow[0].Duration,
sli.Spec.RatioMetric.Total.MetricSource.Spec,
slo.Spec.TimeWindow[0].Duration,
))
}
}
//TODO: Update the PrometheusRule object and write the result back if there are any changes, possibly using reflect.DeepEqual and reflect.Copy

if err := r.Update(ctx, promRule); err != nil {
if apierrors.IsNotFound(err) {
Expand Down Expand Up @@ -199,6 +201,95 @@ func (r *SLOReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
}

func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1.SLI) (*monitoringv1.PrometheusRule, error) {
var monitoringRules []monitoringv1.Rule
var totalRule monitoringv1.Rule
var goodRule monitoringv1.Rule
var badRule monitoringv1.Rule
var ratioRule monitoringv1.Rule
defaultRateWindow := "1m"
burnRateTimeWindows := []string{"1h", "6h", "3d"}
l := utils.LabelGeneratorParams{Slo: slo, Sli: sli}
m := utils.MetricLabelParams{Slo: slo, Sli: sli}

// for now, total and good are required. bad is optional and is calculated as (total - good) if not provided
// TODO: validate that the SLO budgeting method is Occurrences and that the SLIs are all ratio metrics in other case throw an error
totalRule.Record = fmt.Sprintf("osko_sli_ratio_total")
totalRule.Expr = intstr.Parse(fmt.Sprintf("sum(increase(%s[%s]))",
sli.Spec.RatioMetric.Total.MetricSource.Spec,
defaultRateWindow,
))
totalRule.Labels = l.NewMetricLabelGenerator()

monitoringRules = append(monitoringRules, totalRule)

goodRule.Record = fmt.Sprintf("osko_sli_ratio_good")
goodRule.Expr = intstr.Parse(fmt.Sprintf("sum(increase(%s[%s]))",
sli.Spec.RatioMetric.Good.MetricSource.Spec,
defaultRateWindow,
))
goodRule.Labels = l.NewMetricLabelGenerator()

monitoringRules = append(monitoringRules, goodRule)

basicRuleQuery := fmt.Sprintf("(1-%s) * sum(increase(%s{%s}[%s])) - (sum(increase(%s{%s}[%s])) - sum(increase(%s{%s}[%s])))",
slo.Spec.Objectives[0].Target,
totalRule.Record,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
totalRule.Record,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
goodRule.Record,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
)

if sli.Spec.RatioMetric.Bad != (openslov1.MetricSpec{}) {
badRule.Record = fmt.Sprint("osko_sli_ratio_bad")
badRule.Expr = intstr.Parse(fmt.Sprintf("sum(increase(%s[%s]))",
sli.Spec.RatioMetric.Bad.MetricSource.Spec,
defaultRateWindow,
))
badRule.Labels = l.NewMetricLabelGenerator()
basicRuleQuery = fmt.Sprintf("(1-%s) * sum(increase(%s{%s}[%s])) - sum(increase(%s{%s}[%s])))",
slo.Spec.Objectives[0].Target,
totalRule.Record,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
badRule.Expr.StrVal,
m.NewMetricLabelCompiler(),
slo.Spec.TimeWindow[0].Duration,
)
monitoringRules = append(monitoringRules, badRule)
}

mRule := monitoringv1.Rule{
Record: fmt.Sprint("osko_error_budget_available"),
Expr: intstr.Parse(fmt.Sprint(basicRuleQuery)),
Labels: l.NewMetricLabelGenerator(),
}

monitoringRules = append(monitoringRules, mRule)

// Calculate Error ratios for 1h, 6h, 3d
for _, timeWindow := range burnRateTimeWindows {
l.TimeWindow = timeWindow
ratioRule.Record = fmt.Sprintf("osko_sli_ratio")
ratioRule.Expr = intstr.Parse(fmt.Sprintf("(sum(increase(%s{%s}[%s]))-sum(increase(%s{%s}[%s])))/sum(increase(%s{%s}[%s]))",
totalRule.Record,
m.NewMetricLabelCompiler(),
timeWindow,
goodRule.Record,
m.NewMetricLabelCompiler(),
timeWindow,
totalRule.Record,
m.NewMetricLabelCompiler(),
timeWindow,
))
ratioRule.Labels = l.NewMetricLabelGenerator()
monitoringRules = append(monitoringRules, ratioRule)
}

rule := &monitoringv1.PrometheusRule{
TypeMeta: metav1.TypeMeta{
APIVersion: "monitoring.coreos.com/v1",
Expand All @@ -212,18 +303,11 @@ func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1.
},
Spec: monitoringv1.PrometheusRuleSpec{
Groups: []monitoringv1.RuleGroup{{
Name: slo.Name,
Rules: []monitoringv1.Rule{{
Expr: intstr.Parse(fmt.Sprintf("sum(rate(%s[%s])) / sum(rate(%s[%s]))",
sli.Spec.RatioMetric.Good.MetricSource.Spec,
slo.Spec.TimeWindow[0].Duration,
sli.Spec.RatioMetric.Total.MetricSource.Spec,
slo.Spec.TimeWindow[0].Duration,
)),
}}}},
Name: slo.Name,
Rules: monitoringRules,
}},
},
}

// Set SLO instance as the owner and controller.
err := ctrl.SetControllerReference(slo, rule, r.Scheme)
if err != nil {
Expand Down
62 changes: 62 additions & 0 deletions internal/utils/common_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,23 @@ import (
openslov1 "github.com/oskoperator/osko/apis/openslo/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"strings"
"time"
)

type LabelGeneratorParams struct {
Slo *openslov1.SLO
Sli *openslov1.SLI
TimeWindow string
}

type MetricLabelParams struct {
Slo *openslov1.SLO
Sli *openslov1.SLI
TimeWindow string
Labels map[string]string
}

// UpdateCondition checks if the condition of the given type is already in the slice
// if the condition already exists and has the same status, return the unmodified conditions
// if the condition exists and has a different status, remove it and add the new one
Expand Down Expand Up @@ -57,3 +71,51 @@ func UpdateStatus(ctx context.Context, slo *openslov1.SLO, r client.Client, cond
slo.Status.Ready = reason
return r.Status().Update(ctx, slo)
}

func ExtractMetricNameFromQuery(query string) string {
index := strings.Index(query, "{")
if index == -1 {
return ""
}

subStr := query[:index]
return subStr
}

func (m MetricLabelParams) NewMetricLabelCompiler() string {
window := string(m.Slo.Spec.TimeWindow[0].Duration)
if m.TimeWindow != "" {
window = m.TimeWindow
}

labelString := `sli_name="` + m.Sli.Name + `", slo_name="` + m.Slo.Name + `", service="` + m.Slo.Spec.Service + `", window="` + window + `"`
for k, v := range m.Labels {
labelString += `, ` + k + `="` + v + `"`
}

return labelString
}

func (l LabelGeneratorParams) NewMetricLabelGenerator() map[string]string {
window := string(l.Slo.Spec.TimeWindow[0].Duration)
if l.TimeWindow != "" {
window = l.TimeWindow
}
return map[string]string{
"sli_name": l.Sli.Name,
"slo_name": l.Slo.Name,
"service": l.Slo.Spec.Service,
"window": window,
}
}

func MergeLabels(ms ...map[string]string) map[string]string {
res := map[string]string{}
for _, m := range ms {
for k, v := range m {
res[k] = v
}
}

return res
}

0 comments on commit 2f91682

Please sign in to comment.