From 1a1f78d605ee3294f6164f2887b87d07aa646e00 Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Tue, 30 Jan 2024 11:01:27 +0100 Subject: [PATCH 01/11] init work --- apis/vshn/v1/dbaas_vshn_mariadb.go | 21 +++ apis/vshn/v1/dbaas_vshn_postgresql.go | 21 +++ apis/vshn/v1/dbaas_vshn_redis.go | 21 +++ apis/vshn/v1/vshn_minio.go | 21 +++ package/crossplane.yaml | 2 +- .../functions/common/non_SLA_prom_rules.go | 137 ++++++++++++++++++ .../functions/vshnmariadb/register.go | 10 +- .../functions/vshnminio/register.go | 10 +- .../functions/vshnpostgres/register.go | 4 + .../functions/vshnredis/register.go | 4 + 10 files changed, 248 insertions(+), 3 deletions(-) create mode 100644 pkg/comp-functions/functions/common/non_SLA_prom_rules.go diff --git a/apis/vshn/v1/dbaas_vshn_mariadb.go b/apis/vshn/v1/dbaas_vshn_mariadb.go index 2d6ba0b4d2..4a14a41159 100644 --- a/apis/vshn/v1/dbaas_vshn_mariadb.go +++ b/apis/vshn/v1/dbaas_vshn_mariadb.go @@ -2,6 +2,7 @@ package v1 import ( "fmt" + "strings" xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" v1 "github.com/vshn/appcat/v4/apis/v1" @@ -224,3 +225,23 @@ func (v *VSHNMariaDB) GetFullMaintenanceSchedule() VSHNDBaaSMaintenanceScheduleS schedule.TimeOfDay = v.GetMaintenanceTimeOfDay() return schedule } + +// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary +func (mdb *VSHNMariaDB) GetInstanceNamespaceRegex() (string, []string, error) { + // from vshn-postgresql-customer-namespace-whatever + // make vshn-postgresql-(.+)-.+ + // required for Prometheus queries + instanceNamespace := mdb.GetInstanceNamespace() + // vshn- <- takes 5 letters, anything shorter that 7 makes no sense + if len(instanceNamespace) < 7 { + return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace is way too short") + } + + splitted := strings.Split(instanceNamespace, "-") + // at least [vshn, serviceName] should be present + if len(instanceNamespace) < 2 { + return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace broken during splitting") + } + + return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil +} diff --git a/apis/vshn/v1/dbaas_vshn_postgresql.go b/apis/vshn/v1/dbaas_vshn_postgresql.go index a27d2589ff..5d3314570e 100644 --- a/apis/vshn/v1/dbaas_vshn_postgresql.go +++ b/apis/vshn/v1/dbaas_vshn_postgresql.go @@ -2,6 +2,7 @@ package v1 import ( "fmt" + "strings" xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" v1 "github.com/vshn/appcat/v4/apis/v1" @@ -327,3 +328,23 @@ type XVSHNPostgreSQLList struct { func (pg *VSHNPostgreSQL) GetInstanceNamespace() string { return fmt.Sprintf("vshn-postgresql-%s", pg.GetName()) } + +// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary +func (pg *VSHNPostgreSQL) GetInstanceNamespaceRegex() (string, []string, error) { + // from vshn-postgresql-customer-namespace-whatever + // make vshn-postgresql-(.+)-.+ + // required for Prometheus queries + instanceNamespace := pg.GetInstanceNamespace() + // vshn- <- takes 5 letters, anything shorter that 7 makes no sense + if len(instanceNamespace) < 7 { + return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace is way too short") + } + + splitted := strings.Split(instanceNamespace, "-") + // at least [vshn, serviceName] should be present + if len(instanceNamespace) < 2 { + return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace broken during splitting") + } + + return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil +} diff --git a/apis/vshn/v1/dbaas_vshn_redis.go b/apis/vshn/v1/dbaas_vshn_redis.go index 823ff08809..71931d9db2 100644 --- a/apis/vshn/v1/dbaas_vshn_redis.go +++ b/apis/vshn/v1/dbaas_vshn_redis.go @@ -2,6 +2,7 @@ package v1 import ( "fmt" + "strings" xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" v1 "github.com/vshn/appcat/v4/apis/v1" @@ -248,3 +249,23 @@ func (v *VSHNRedis) GetFullMaintenanceSchedule() VSHNDBaaSMaintenanceScheduleSpe schedule.TimeOfDay = v.GetMaintenanceTimeOfDay() return schedule } + +// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary +func (redis *VSHNRedis) GetInstanceNamespaceRegex() (string, []string, error) { + // from vshn-postgresql-customer-namespace-whatever + // make vshn-postgresql-(.+)-.+ + // required for Prometheus queries + instanceNamespace := redis.GetInstanceNamespace() + // vshn- <- takes 5 letters, anything shorter that 7 makes no sense + if len(instanceNamespace) < 7 { + return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace is way too short") + } + + splitted := strings.Split(instanceNamespace, "-") + // at least [vshn, serviceName] should be present + if len(instanceNamespace) < 2 { + return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace broken during splitting") + } + + return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil +} diff --git a/apis/vshn/v1/vshn_minio.go b/apis/vshn/v1/vshn_minio.go index 5a67d66f8b..bf335a9f2f 100644 --- a/apis/vshn/v1/vshn_minio.go +++ b/apis/vshn/v1/vshn_minio.go @@ -2,6 +2,7 @@ package v1 import ( "fmt" + "strings" xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" v1 "github.com/vshn/appcat/v4/apis/v1" @@ -186,3 +187,23 @@ func (v *VSHNMinio) GetFullMaintenanceSchedule() VSHNDBaaSMaintenanceScheduleSpe schedule.TimeOfDay = v.GetMaintenanceTimeOfDay() return schedule } + +// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary +func (minio *VSHNMinio) GetInstanceNamespaceRegex() (string, []string, error) { + // from vshn-postgresql-customer-namespace-whatever + // make vshn-postgresql-(.+)-.+ + // required for Prometheus queries + instanceNamespace := minio.GetInstanceNamespace() + // vshn- <- takes 5 letters, anything shorter that 7 makes no sense + if len(instanceNamespace) < 7 { + return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace is way too short") + } + + splitted := strings.Split(instanceNamespace, "-") + // at least [vshn, serviceName] should be present + if len(instanceNamespace) < 2 { + return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace broken during splitting") + } + + return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil +} diff --git a/package/crossplane.yaml b/package/crossplane.yaml index a06a4971c1..44f503b842 100644 --- a/package/crossplane.yaml +++ b/package/crossplane.yaml @@ -4,6 +4,6 @@ kind: Function metadata: name: function-appcat spec: - image: ghcr.io/vshn/appcat:latest + image: ghcr.io/vshn/appcat:nonslaalerts crossplane: version: ">=1.14.0" diff --git a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go b/pkg/comp-functions/functions/common/non_SLA_prom_rules.go new file mode 100644 index 0000000000..15171348ca --- /dev/null +++ b/pkg/comp-functions/functions/common/non_SLA_prom_rules.go @@ -0,0 +1,137 @@ +package common + +import ( + "context" + "fmt" + "reflect" + + fnproto "github.com/crossplane/function-sdk-go/proto/v1beta1" + promV1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/vshn/appcat/v4/pkg/comp-functions/runtime" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + controllerruntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var ( + SYN_TEAM string = "schedar" + SEVERITY_CRITICAL string = "critical" + MEMORY_CONTAINERS = map[string]string{ + "mariadb": "mariadb", + "minio": "minio", + "postgresql": "patroni", + "redis": "redis", + } +) + +type InstanceNamespacer interface { + GetInstanceNamespace() string + GetInstanceNamespaceRegex() (string, []string, error) +} + +func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *runtime.ServiceRuntime) *fnproto.Result { + return func(ctx context.Context, svc *runtime.ServiceRuntime) *fnproto.Result { + + log := controllerruntime.LoggerFrom(ctx) + log.Info("adding non SLA prometheus rules") + + log.V(1).Info("Transforming", "obj", svc) + + err := svc.GetObservedComposite(obj) + if err != nil { + return runtime.NewFatalResult(fmt.Errorf("Can't get composite: %w", err)) + } + elem, ok := obj.(InstanceNamespacer) + if !ok { + return runtime.NewWarningResult(fmt.Sprintf("Type %s doesn't implement Alerter interface", reflect.TypeOf(obj).String())) + } + + instanceNamespaceRegex, instanceNamespaceSplitted, err := elem.GetInstanceNamespaceRegex() + if err != nil { + return runtime.NewWarningResult("Instance namespace looks broken, " + err.Error()) + } + + err = generatePromeRules(elem.GetInstanceNamespace(), instanceNamespaceRegex, MEMORY_CONTAINERS[instanceNamespaceSplitted[1]], svc) + if err != nil { + return runtime.NewFatalResult(err) + } + + return nil + } +} + +func generatePromeRules(namespace, namespaceRegex, serviceName string, svc *runtime.ServiceRuntime) error { + var minuteInterval, hourInterval, twoHourInterval promV1.Duration + minuteInterval = "1m" + hourInterval = "1h" + twoHourInterval = "2h" + + prometheusRules := &promV1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nonSLORules", + Namespace: namespace, + }, + Spec: promV1.PrometheusRuleSpec{ + Groups: []promV1.RuleGroup{ + promV1.RuleGroup{ + Rules: []promV1.Rule{ + promV1.Rule{ + Alert: serviceName + "PersistentVolumeFillingUp", + Annotations: map[string]string{ + "description": "The volume claimed by the instance {{ $labels.name }} in namespace {{ $labels.label_appcat_vshn_io_claim_namespace }} is only {{ $value | humanizePercentage }} free.", + "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup", + "summary": "PersistentVolume is filling up.", + }, + Expr: intstr.IntOrString{ + Type: intstr.String, + StrVal: fmt.Sprintf("label_replace( bottomk(1, (kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}) < 0.03 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} > 0 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}== 1) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", namespaceRegex), + }, + For: minuteInterval, + Labels: map[string]string{ + "severity": SEVERITY_CRITICAL, + "syn_team": SYN_TEAM, + }, + }, + promV1.Rule{ + Alert: serviceName + "PersistentVolumeExpectedToFillUp", + Annotations: map[string]string{ + "description": "Based on recent sampling, the volume claimed by the instance {{ $labels.name }} in namespace {{ $labels.label_appcat_vshn_io_claim_namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.", + "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup", + "summary": "PersistentVolume is expected to fill up.", + }, + Expr: intstr.IntOrString{ + Type: intstr.String, + StrVal: fmt.Sprintf("label_replace( bottomk(1, (kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}) < 0.15 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0 unlesson(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}== 1) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", namespaceRegex), + }, + For: hourInterval, + Labels: map[string]string{ + "severity": SEVERITY_CRITICAL, + "syn_team": SYN_TEAM, + }, + }, + promV1.Rule{ + Alert: serviceName + "MemoryCritical", + Annotations: map[string]string{ + "description": "The memory claimed by the instance {{ $labels.name }} in namespace {{ $labels.label_appcat_vshn_io_claim_namespace }} has been over 85% for 2 hours.\n Please reducde the load of this instance, or increase the memory.", + "runbook_url": "https://hub.syn.tools/appcat/runbooks/vshn-generic.html#MemoryCritical", + "summary": "Memory usage critical.", + }, + Expr: intstr.IntOrString{ + Type: intstr.String, + StrVal: fmt.Sprintf("label_replace( topk(1, (max(container_memory_working_set_bytes{container=\"%s\"})without (name, id) / on(container,pod,namespace) kube_pod_container_resource_limits{resource=\"memory\"}* 100) > 85) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", serviceName, namespaceRegex), + }, + For: twoHourInterval, + Labels: map[string]string{ + "severity": SEVERITY_CRITICAL, + "syn_team": SYN_TEAM, + }, + }, + }, + }, + }, + }, + } + + return svc.SetDesiredKubeObject(prometheusRules, "non_sla_alerts") +} diff --git a/pkg/comp-functions/functions/vshnmariadb/register.go b/pkg/comp-functions/functions/vshnmariadb/register.go index 01cc29ed9f..98f02f3b19 100644 --- a/pkg/comp-functions/functions/vshnmariadb/register.go +++ b/pkg/comp-functions/functions/vshnmariadb/register.go @@ -1,6 +1,10 @@ package vshnmariadb -import "github.com/vshn/appcat/v4/pkg/comp-functions/runtime" +import ( + vshnv1 "github.com/vshn/appcat/v4/apis/vshn/v1" + "github.com/vshn/appcat/v4/pkg/comp-functions/functions/common" + "github.com/vshn/appcat/v4/pkg/comp-functions/runtime" +) func init() { runtime.RegisterService("mariadb", runtime.Service{ @@ -18,6 +22,10 @@ func init() { Name: "backup", Execute: AddBackupMariadb, }, + { + Name: "non-sla-prometheus-rules", + Execute: common.GenerateNonSLAPromRules(&vshnv1.VSHNMariaDB{}), + }, }, }) } diff --git a/pkg/comp-functions/functions/vshnminio/register.go b/pkg/comp-functions/functions/vshnminio/register.go index 91d700fdd5..771f80ef9a 100644 --- a/pkg/comp-functions/functions/vshnminio/register.go +++ b/pkg/comp-functions/functions/vshnminio/register.go @@ -1,6 +1,10 @@ package vshnminio -import "github.com/vshn/appcat/v4/pkg/comp-functions/runtime" +import ( + vshnv1 "github.com/vshn/appcat/v4/apis/vshn/v1" + "github.com/vshn/appcat/v4/pkg/comp-functions/functions/common" + "github.com/vshn/appcat/v4/pkg/comp-functions/runtime" +) func init() { runtime.RegisterService("minio", runtime.Service{ @@ -18,6 +22,10 @@ func init() { Name: "maintenance", Execute: AddMaintenanceJob, }, + { + Name: "non-sla-prometheus-rules", + Execute: common.GenerateNonSLAPromRules(&vshnv1.VSHNMinio{}), + }, }, }) } diff --git a/pkg/comp-functions/functions/vshnpostgres/register.go b/pkg/comp-functions/functions/vshnpostgres/register.go index 9e99427260..6565947cfc 100644 --- a/pkg/comp-functions/functions/vshnpostgres/register.go +++ b/pkg/comp-functions/functions/vshnpostgres/register.go @@ -61,6 +61,10 @@ func init() { Name: "delay-cluster-deployment", Execute: DelayClusterDeployment, }, + { + Name: "non-sla-prometheus-rules", + Execute: common.GenerateNonSLAPromRules(&vshnv1.VSHNPostgreSQL{}), + }, }, }) } diff --git a/pkg/comp-functions/functions/vshnredis/register.go b/pkg/comp-functions/functions/vshnredis/register.go index 8fedc51b90..5d94aded5d 100644 --- a/pkg/comp-functions/functions/vshnredis/register.go +++ b/pkg/comp-functions/functions/vshnredis/register.go @@ -49,6 +49,10 @@ func init() { Name: "user-alerting", Execute: common.AddUserAlerting(&vshnv1.VSHNRedis{}), }, + { + Name: "non-sla-prometheus-rules", + Execute: common.GenerateNonSLAPromRules(&vshnv1.VSHNRedis{}), + }, }, }) } From bf13f41e4db5bbf4923b75b168f6776a91f766ae Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Tue, 30 Jan 2024 11:35:36 +0100 Subject: [PATCH 02/11] fixing missing fields and typos --- .../functions/common/non_SLA_prom_rules.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go b/pkg/comp-functions/functions/common/non_SLA_prom_rules.go index 15171348ca..d670a61031 100644 --- a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go +++ b/pkg/comp-functions/functions/common/non_SLA_prom_rules.go @@ -26,6 +26,7 @@ var ( ) type InstanceNamespacer interface { + GetName() string GetInstanceNamespace() string GetInstanceNamespaceRegex() (string, []string, error) } @@ -52,7 +53,7 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r return runtime.NewWarningResult("Instance namespace looks broken, " + err.Error()) } - err = generatePromeRules(elem.GetInstanceNamespace(), instanceNamespaceRegex, MEMORY_CONTAINERS[instanceNamespaceSplitted[1]], svc) + err = generatePromeRules(elem.GetName(), elem.GetInstanceNamespace(), instanceNamespaceRegex, MEMORY_CONTAINERS[instanceNamespaceSplitted[1]], svc) if err != nil { return runtime.NewFatalResult(err) } @@ -61,7 +62,7 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r } } -func generatePromeRules(namespace, namespaceRegex, serviceName string, svc *runtime.ServiceRuntime) error { +func generatePromeRules(name, namespace, namespaceRegex, serviceName string, svc *runtime.ServiceRuntime) error { var minuteInterval, hourInterval, twoHourInterval promV1.Duration minuteInterval = "1m" hourInterval = "1h" @@ -69,12 +70,13 @@ func generatePromeRules(namespace, namespaceRegex, serviceName string, svc *runt prometheusRules := &promV1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ - Name: "nonSLORules", + Name: serviceName + "-non-slo-rules", Namespace: namespace, }, Spec: promV1.PrometheusRuleSpec{ Groups: []promV1.RuleGroup{ promV1.RuleGroup{ + Name: serviceName + "-non-slo-rules", Rules: []promV1.Rule{ promV1.Rule{ Alert: serviceName + "PersistentVolumeFillingUp", @@ -102,7 +104,7 @@ func generatePromeRules(namespace, namespaceRegex, serviceName string, svc *runt }, Expr: intstr.IntOrString{ Type: intstr.String, - StrVal: fmt.Sprintf("label_replace( bottomk(1, (kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}) < 0.15 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0 unlesson(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}== 1) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", namespaceRegex), + StrVal: fmt.Sprintf("label_replace( bottomk(1, (kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}) < 0.15 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}== 1) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", namespaceRegex), }, For: hourInterval, Labels: map[string]string{ @@ -133,5 +135,5 @@ func generatePromeRules(namespace, namespaceRegex, serviceName string, svc *runt }, } - return svc.SetDesiredKubeObject(prometheusRules, "non_sla_alerts") + return svc.SetDesiredKubeObject(prometheusRules, name+"-non-sla-alerts") } From b37ba4dbe3bf0b2768c2b25989614bc5853fbda0 Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Wed, 31 Jan 2024 15:53:16 +0100 Subject: [PATCH 03/11] fixing deadlock in Redis --- pkg/comp-functions/functions/vshnredis/redis_deploy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/comp-functions/functions/vshnredis/redis_deploy.go b/pkg/comp-functions/functions/vshnredis/redis_deploy.go index bd34723a50..a95184fb64 100644 --- a/pkg/comp-functions/functions/vshnredis/redis_deploy.go +++ b/pkg/comp-functions/functions/vshnredis/redis_deploy.go @@ -24,7 +24,7 @@ func DeployRedis(ctx context.Context, svc *runtime.ServiceRuntime) *xfnproto.Res err = common.BootstrapInstanceNs(ctx, comp, "redis", "namespace-conditions", svc) if err != nil { err = fmt.Errorf("cannot bootstrap instance namespace: %w", err) - return runtime.NewFatalResult(err) + return runtime.NewWarningResult(err.Error()) } return nil From 4c71f0248dd54990b31435acb155f2aaa656267d Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Thu, 1 Feb 2024 16:52:55 +0100 Subject: [PATCH 04/11] add PR suggestions --- pkg/comp-functions/functions/common/interfaces.go | 1 + .../functions/common/non_SLA_prom_rules.go | 10 ++-------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/pkg/comp-functions/functions/common/interfaces.go b/pkg/comp-functions/functions/common/interfaces.go index 612c3cb51a..05a7b88417 100644 --- a/pkg/comp-functions/functions/common/interfaces.go +++ b/pkg/comp-functions/functions/common/interfaces.go @@ -9,4 +9,5 @@ type InfoGetter interface { GetBackupSchedule() string GetBackupRetention() vshnv1.K8upRetentionPolicy GetServiceName() string + GetInstanceNamespaceRegex() (string, []string, error) } diff --git a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go b/pkg/comp-functions/functions/common/non_SLA_prom_rules.go index d670a61031..9eb38e61e0 100644 --- a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go +++ b/pkg/comp-functions/functions/common/non_SLA_prom_rules.go @@ -25,12 +25,6 @@ var ( } ) -type InstanceNamespacer interface { - GetName() string - GetInstanceNamespace() string - GetInstanceNamespaceRegex() (string, []string, error) -} - func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *runtime.ServiceRuntime) *fnproto.Result { return func(ctx context.Context, svc *runtime.ServiceRuntime) *fnproto.Result { @@ -43,7 +37,7 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r if err != nil { return runtime.NewFatalResult(fmt.Errorf("Can't get composite: %w", err)) } - elem, ok := obj.(InstanceNamespacer) + elem, ok := obj.(InfoGetter) if !ok { return runtime.NewWarningResult(fmt.Sprintf("Type %s doesn't implement Alerter interface", reflect.TypeOf(obj).String())) } @@ -55,7 +49,7 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r err = generatePromeRules(elem.GetName(), elem.GetInstanceNamespace(), instanceNamespaceRegex, MEMORY_CONTAINERS[instanceNamespaceSplitted[1]], svc) if err != nil { - return runtime.NewFatalResult(err) + return runtime.NewWarningResult("can't create prometheus rules: " + err.Error()) } return nil From ef94cec5063208352a0bffebb685f6790662790d Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Fri, 2 Feb 2024 10:08:26 +0100 Subject: [PATCH 05/11] add PR suggestions && test case --- apis/vshn/v1/dbaas_vshn_mariadb.go | 21 --------- apis/vshn/v1/dbaas_vshn_postgresql.go | 21 --------- apis/vshn/v1/dbaas_vshn_redis.go | 21 --------- apis/vshn/v1/vshn_minio.go | 21 --------- .../functions/common/interfaces.go | 1 - .../functions/common/non_SLA_prom_rules.go | 33 +++++++++++++- .../common/non_SLA_prom_rules_test.go | 43 +++++++++++++++++++ 7 files changed, 74 insertions(+), 87 deletions(-) create mode 100644 pkg/comp-functions/functions/common/non_SLA_prom_rules_test.go diff --git a/apis/vshn/v1/dbaas_vshn_mariadb.go b/apis/vshn/v1/dbaas_vshn_mariadb.go index 4a14a41159..2d6ba0b4d2 100644 --- a/apis/vshn/v1/dbaas_vshn_mariadb.go +++ b/apis/vshn/v1/dbaas_vshn_mariadb.go @@ -2,7 +2,6 @@ package v1 import ( "fmt" - "strings" xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" v1 "github.com/vshn/appcat/v4/apis/v1" @@ -225,23 +224,3 @@ func (v *VSHNMariaDB) GetFullMaintenanceSchedule() VSHNDBaaSMaintenanceScheduleS schedule.TimeOfDay = v.GetMaintenanceTimeOfDay() return schedule } - -// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary -func (mdb *VSHNMariaDB) GetInstanceNamespaceRegex() (string, []string, error) { - // from vshn-postgresql-customer-namespace-whatever - // make vshn-postgresql-(.+)-.+ - // required for Prometheus queries - instanceNamespace := mdb.GetInstanceNamespace() - // vshn- <- takes 5 letters, anything shorter that 7 makes no sense - if len(instanceNamespace) < 7 { - return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace is way too short") - } - - splitted := strings.Split(instanceNamespace, "-") - // at least [vshn, serviceName] should be present - if len(instanceNamespace) < 2 { - return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace broken during splitting") - } - - return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil -} diff --git a/apis/vshn/v1/dbaas_vshn_postgresql.go b/apis/vshn/v1/dbaas_vshn_postgresql.go index 5d3314570e..a27d2589ff 100644 --- a/apis/vshn/v1/dbaas_vshn_postgresql.go +++ b/apis/vshn/v1/dbaas_vshn_postgresql.go @@ -2,7 +2,6 @@ package v1 import ( "fmt" - "strings" xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" v1 "github.com/vshn/appcat/v4/apis/v1" @@ -328,23 +327,3 @@ type XVSHNPostgreSQLList struct { func (pg *VSHNPostgreSQL) GetInstanceNamespace() string { return fmt.Sprintf("vshn-postgresql-%s", pg.GetName()) } - -// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary -func (pg *VSHNPostgreSQL) GetInstanceNamespaceRegex() (string, []string, error) { - // from vshn-postgresql-customer-namespace-whatever - // make vshn-postgresql-(.+)-.+ - // required for Prometheus queries - instanceNamespace := pg.GetInstanceNamespace() - // vshn- <- takes 5 letters, anything shorter that 7 makes no sense - if len(instanceNamespace) < 7 { - return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace is way too short") - } - - splitted := strings.Split(instanceNamespace, "-") - // at least [vshn, serviceName] should be present - if len(instanceNamespace) < 2 { - return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace broken during splitting") - } - - return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil -} diff --git a/apis/vshn/v1/dbaas_vshn_redis.go b/apis/vshn/v1/dbaas_vshn_redis.go index 71931d9db2..823ff08809 100644 --- a/apis/vshn/v1/dbaas_vshn_redis.go +++ b/apis/vshn/v1/dbaas_vshn_redis.go @@ -2,7 +2,6 @@ package v1 import ( "fmt" - "strings" xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" v1 "github.com/vshn/appcat/v4/apis/v1" @@ -249,23 +248,3 @@ func (v *VSHNRedis) GetFullMaintenanceSchedule() VSHNDBaaSMaintenanceScheduleSpe schedule.TimeOfDay = v.GetMaintenanceTimeOfDay() return schedule } - -// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary -func (redis *VSHNRedis) GetInstanceNamespaceRegex() (string, []string, error) { - // from vshn-postgresql-customer-namespace-whatever - // make vshn-postgresql-(.+)-.+ - // required for Prometheus queries - instanceNamespace := redis.GetInstanceNamespace() - // vshn- <- takes 5 letters, anything shorter that 7 makes no sense - if len(instanceNamespace) < 7 { - return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace is way too short") - } - - splitted := strings.Split(instanceNamespace, "-") - // at least [vshn, serviceName] should be present - if len(instanceNamespace) < 2 { - return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace broken during splitting") - } - - return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil -} diff --git a/apis/vshn/v1/vshn_minio.go b/apis/vshn/v1/vshn_minio.go index bf335a9f2f..5a67d66f8b 100644 --- a/apis/vshn/v1/vshn_minio.go +++ b/apis/vshn/v1/vshn_minio.go @@ -2,7 +2,6 @@ package v1 import ( "fmt" - "strings" xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" v1 "github.com/vshn/appcat/v4/apis/v1" @@ -187,23 +186,3 @@ func (v *VSHNMinio) GetFullMaintenanceSchedule() VSHNDBaaSMaintenanceScheduleSpe schedule.TimeOfDay = v.GetMaintenanceTimeOfDay() return schedule } - -// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary -func (minio *VSHNMinio) GetInstanceNamespaceRegex() (string, []string, error) { - // from vshn-postgresql-customer-namespace-whatever - // make vshn-postgresql-(.+)-.+ - // required for Prometheus queries - instanceNamespace := minio.GetInstanceNamespace() - // vshn- <- takes 5 letters, anything shorter that 7 makes no sense - if len(instanceNamespace) < 7 { - return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace is way too short") - } - - splitted := strings.Split(instanceNamespace, "-") - // at least [vshn, serviceName] should be present - if len(instanceNamespace) < 2 { - return "", nil, fmt.Errorf("giveMeNamespaceRegex: instance namespace broken during splitting") - } - - return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil -} diff --git a/pkg/comp-functions/functions/common/interfaces.go b/pkg/comp-functions/functions/common/interfaces.go index 05a7b88417..612c3cb51a 100644 --- a/pkg/comp-functions/functions/common/interfaces.go +++ b/pkg/comp-functions/functions/common/interfaces.go @@ -9,5 +9,4 @@ type InfoGetter interface { GetBackupSchedule() string GetBackupRetention() vshnv1.K8upRetentionPolicy GetServiceName() string - GetInstanceNamespaceRegex() (string, []string, error) } diff --git a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go b/pkg/comp-functions/functions/common/non_SLA_prom_rules.go index 9eb38e61e0..741eb3548e 100644 --- a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go +++ b/pkg/comp-functions/functions/common/non_SLA_prom_rules.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "reflect" + "strings" fnproto "github.com/crossplane/function-sdk-go/proto/v1beta1" promV1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" @@ -35,14 +36,14 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r err := svc.GetObservedComposite(obj) if err != nil { - return runtime.NewFatalResult(fmt.Errorf("Can't get composite: %w", err)) + return runtime.NewFatalResult(fmt.Errorf("can't get composite: %w", err)) } elem, ok := obj.(InfoGetter) if !ok { return runtime.NewWarningResult(fmt.Sprintf("Type %s doesn't implement Alerter interface", reflect.TypeOf(obj).String())) } - instanceNamespaceRegex, instanceNamespaceSplitted, err := elem.GetInstanceNamespaceRegex() + instanceNamespaceRegex, instanceNamespaceSplitted, err := getInstanceNamespaceRegex(elem.GetInstanceNamespace()) if err != nil { return runtime.NewWarningResult("Instance namespace looks broken, " + err.Error()) } @@ -131,3 +132,31 @@ func generatePromeRules(name, namespace, namespaceRegex, serviceName string, svc return svc.SetDesiredKubeObject(prometheusRules, name+"-non-sla-alerts") } + +// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary +func getInstanceNamespaceRegex(instanceNamespace string) (string, []string, error) { + // from instance namespace, f.e. vshn-postgresql-customer-namespace-whatever + // make vshn-postgresql-(.+)-.+ + // vshn-redis-(.+)-.+ + // vshn-minio-(.+)-.+ + // required for Prometheus queries + + // vshn- <- takes 5 letters, anything shorter that 7 makes no sense + if len(instanceNamespace) < 7 { + return "", nil, fmt.Errorf("GetInstanceNamespaceRegex: instance namespace is way too short") + } + + splitted := strings.Split(instanceNamespace, "-") + // at least [vshn, serviceName] should be present + if len(splitted) < 3 { + return "", nil, fmt.Errorf("GetInstanceNamespaceRegex: instance namespace broken during splitting") + } + + for _, val := range splitted { + if len(val) == 0 { + return "", nil, fmt.Errorf("GetInstanceNamespaceRegex: broken instance namespace, name ending with hyphen: %s", val) + } + } + + return fmt.Sprintf("%s-%s-(.+)-.+", splitted[0], splitted[1]), splitted, nil +} diff --git a/pkg/comp-functions/functions/common/non_SLA_prom_rules_test.go b/pkg/comp-functions/functions/common/non_SLA_prom_rules_test.go new file mode 100644 index 0000000000..d97fa32523 --- /dev/null +++ b/pkg/comp-functions/functions/common/non_SLA_prom_rules_test.go @@ -0,0 +1,43 @@ +package common + +import ( + "regexp" + "testing" +) + +func TestMain(t *testing.T) { + checkValidNamespaceRegex := regexp.MustCompile(`[a-z]*-[a-z]*-\(\.\+\)\-\.\+`) + goodTestCases := []string{ + "vshn-postgresql-development-app1", + "vshn-postgresql-prod-app2", + "vshn-minio-main-cluster-prod", + "vshn-mariadb-prd", + "vshn-kafka-with-very-long-but-valid-name-including-many-separators-because-we-can", + } + + brokenCases := []string{ + "", + "vshn-postgresql", + "vshn-postgresql-", + "a-b-a", + "vshnpostgresqlnoseparator", + "vshn-redi1s-sadfasd", + } + + for _, val := range goodTestCases { + regex, _, err := getInstanceNamespaceRegex(val) + if err != nil && !checkValidNamespaceRegex.MatchString(regex) { + t.Logf("Failed goodTestCases test case for: %s, with error: %v", val, err) + t.FailNow() + } + } + + for _, val := range brokenCases { + regex, d1, err := getInstanceNamespaceRegex(val) + if err == nil && checkValidNamespaceRegex.MatchString(regex) { + t.Logf("Failed brokenCases test case for: %s, with error: %v", val, err) + t.Log(regex, d1) + t.FailNow() + } + } +} From 535fbbfaf1e9efe5d2911021f55264a50e6b70ff Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Fri, 2 Feb 2024 11:31:33 +0100 Subject: [PATCH 06/11] defaulting package image version --- package/crossplane.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/crossplane.yaml b/package/crossplane.yaml index 44f503b842..a06a4971c1 100644 --- a/package/crossplane.yaml +++ b/package/crossplane.yaml @@ -4,6 +4,6 @@ kind: Function metadata: name: function-appcat spec: - image: ghcr.io/vshn/appcat:nonslaalerts + image: ghcr.io/vshn/appcat:latest crossplane: version: ">=1.14.0" From e33f77c774df2ed3d1bd7f993cb2b9bc0a1cbb87 Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Fri, 2 Feb 2024 13:04:49 +0100 Subject: [PATCH 07/11] renaming files --- ...LA_prom_rules.go => non_sla_prom_rules.go} | 41 ++++++++++--------- ...les_test.go => non_sla_prom_rules_test.go} | 0 2 files changed, 22 insertions(+), 19 deletions(-) rename pkg/comp-functions/functions/common/{non_SLA_prom_rules.go => non_sla_prom_rules.go} (86%) rename pkg/comp-functions/functions/common/{non_SLA_prom_rules_test.go => non_sla_prom_rules_test.go} (100%) diff --git a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go b/pkg/comp-functions/functions/common/non_sla_prom_rules.go similarity index 86% rename from pkg/comp-functions/functions/common/non_SLA_prom_rules.go rename to pkg/comp-functions/functions/common/non_sla_prom_rules.go index 741eb3548e..c806242aeb 100644 --- a/pkg/comp-functions/functions/common/non_SLA_prom_rules.go +++ b/pkg/comp-functions/functions/common/non_sla_prom_rules.go @@ -16,9 +16,9 @@ import ( ) var ( - SYN_TEAM string = "schedar" - SEVERITY_CRITICAL string = "critical" - MEMORY_CONTAINERS = map[string]string{ + synTeam string = "schedar" + severityCritical string = "critical" + memoryContainers = map[string]string{ "mariadb": "mariadb", "minio": "minio", "postgresql": "patroni", @@ -43,12 +43,7 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r return runtime.NewWarningResult(fmt.Sprintf("Type %s doesn't implement Alerter interface", reflect.TypeOf(obj).String())) } - instanceNamespaceRegex, instanceNamespaceSplitted, err := getInstanceNamespaceRegex(elem.GetInstanceNamespace()) - if err != nil { - return runtime.NewWarningResult("Instance namespace looks broken, " + err.Error()) - } - - err = generatePromeRules(elem.GetName(), elem.GetInstanceNamespace(), instanceNamespaceRegex, MEMORY_CONTAINERS[instanceNamespaceSplitted[1]], svc) + err = generatePromeRules(elem, svc) if err != nil { return runtime.NewWarningResult("can't create prometheus rules: " + err.Error()) } @@ -57,12 +52,20 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r } } -func generatePromeRules(name, namespace, namespaceRegex, serviceName string, svc *runtime.ServiceRuntime) error { +func generatePromeRules(elem InfoGetter, svc *runtime.ServiceRuntime) error { var minuteInterval, hourInterval, twoHourInterval promV1.Duration minuteInterval = "1m" hourInterval = "1h" twoHourInterval = "2h" + instanceNamespaceRegex, instanceNamespaceSplitted, err := getInstanceNamespaceRegex(elem.GetInstanceNamespace()) + if err != nil { + return fmt.Errorf("getInstanceNamespaceRegex func failed to parse instance namespace: %s, with err: %s", elem.GetInstanceNamespace(), err.Error()) + } + + name := elem.GetName() + namespace := elem.GetInstanceNamespace() + serviceName := memoryContainers[instanceNamespaceSplitted[1]] prometheusRules := &promV1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ Name: serviceName + "-non-slo-rules", @@ -82,12 +85,12 @@ func generatePromeRules(name, namespace, namespaceRegex, serviceName string, svc }, Expr: intstr.IntOrString{ Type: intstr.String, - StrVal: fmt.Sprintf("label_replace( bottomk(1, (kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}) < 0.03 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} > 0 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}== 1) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", namespaceRegex), + StrVal: fmt.Sprintf("label_replace( bottomk(1, (kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}) < 0.03 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} > 0 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}== 1) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", instanceNamespaceRegex), }, For: minuteInterval, Labels: map[string]string{ - "severity": SEVERITY_CRITICAL, - "syn_team": SYN_TEAM, + "severity": severityCritical, + "syn_team": synTeam, }, }, promV1.Rule{ @@ -99,12 +102,12 @@ func generatePromeRules(name, namespace, namespaceRegex, serviceName string, svc }, Expr: intstr.IntOrString{ Type: intstr.String, - StrVal: fmt.Sprintf("label_replace( bottomk(1, (kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}) < 0.15 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}== 1) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", namespaceRegex), + StrVal: fmt.Sprintf("label_replace( bottomk(1, (kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}) < 0.15 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace,persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}== 1) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", instanceNamespaceRegex), }, For: hourInterval, Labels: map[string]string{ - "severity": SEVERITY_CRITICAL, - "syn_team": SYN_TEAM, + "severity": severityCritical, + "syn_team": synTeam, }, }, promV1.Rule{ @@ -116,12 +119,12 @@ func generatePromeRules(name, namespace, namespaceRegex, serviceName string, svc }, Expr: intstr.IntOrString{ Type: intstr.String, - StrVal: fmt.Sprintf("label_replace( topk(1, (max(container_memory_working_set_bytes{container=\"%s\"})without (name, id) / on(container,pod,namespace) kube_pod_container_resource_limits{resource=\"memory\"}* 100) > 85) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", serviceName, namespaceRegex), + StrVal: fmt.Sprintf("label_replace( topk(1, (max(container_memory_working_set_bytes{container=\"%s\"})without (name, id) / on(container,pod,namespace) kube_pod_container_resource_limits{resource=\"memory\"}* 100) > 85) * on(namespace) group_left(label_appcat_vshn_io_claim_namespace)kube_namespace_labels, \"name\", \"$1\", \"namespace\",\"%s\")", serviceName, instanceNamespaceRegex), }, For: twoHourInterval, Labels: map[string]string{ - "severity": SEVERITY_CRITICAL, - "syn_team": SYN_TEAM, + "severity": severityCritical, + "syn_team": synTeam, }, }, }, diff --git a/pkg/comp-functions/functions/common/non_SLA_prom_rules_test.go b/pkg/comp-functions/functions/common/non_sla_prom_rules_test.go similarity index 100% rename from pkg/comp-functions/functions/common/non_SLA_prom_rules_test.go rename to pkg/comp-functions/functions/common/non_sla_prom_rules_test.go From 658f019a42a74c427af6c702e86bdec662123328 Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Fri, 2 Feb 2024 15:40:52 +0100 Subject: [PATCH 08/11] fixing postgresql and minio --- apis/vshn/v1/dbaas_vshn_postgresql.go | 11 +++++++++++ apis/vshn/v1/vshn_minio.go | 11 +++++++++++ .../functions/common/non_sla_prom_rules.go | 8 +++++--- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/apis/vshn/v1/dbaas_vshn_postgresql.go b/apis/vshn/v1/dbaas_vshn_postgresql.go index a27d2589ff..de6073c726 100644 --- a/apis/vshn/v1/dbaas_vshn_postgresql.go +++ b/apis/vshn/v1/dbaas_vshn_postgresql.go @@ -327,3 +327,14 @@ type XVSHNPostgreSQLList struct { func (pg *VSHNPostgreSQL) GetInstanceNamespace() string { return fmt.Sprintf("vshn-postgresql-%s", pg.GetName()) } + +// GetBackupRetention returns the retention definition for this backup. +// !!! This is just a placeholder to satisfy InfoGetter interface +func (v *VSHNPostgreSQL) GetBackupRetention() K8upRetentionPolicy { + return K8upRetentionPolicy{} +} + +// GetServiceName returns the name of this service +func (v *VSHNPostgreSQL) GetServiceName() string { + return "postgresql" +} diff --git a/apis/vshn/v1/vshn_minio.go b/apis/vshn/v1/vshn_minio.go index 5a67d66f8b..ceeb242334 100644 --- a/apis/vshn/v1/vshn_minio.go +++ b/apis/vshn/v1/vshn_minio.go @@ -186,3 +186,14 @@ func (v *VSHNMinio) GetFullMaintenanceSchedule() VSHNDBaaSMaintenanceScheduleSpe schedule.TimeOfDay = v.GetMaintenanceTimeOfDay() return schedule } + +// GetBackupRetention returns the retention definition for this backup. +// !!! This is just a placeholder to satisfy InfoGetter interface !!! +func (v *VSHNMinio) GetBackupRetention() K8upRetentionPolicy { + return K8upRetentionPolicy{} +} + +// GetServiceName returns the name of this service +func (v *VSHNMinio) GetServiceName() string { + return "minio" +} diff --git a/pkg/comp-functions/functions/common/non_sla_prom_rules.go b/pkg/comp-functions/functions/common/non_sla_prom_rules.go index c806242aeb..0c038d667a 100644 --- a/pkg/comp-functions/functions/common/non_sla_prom_rules.go +++ b/pkg/comp-functions/functions/common/non_sla_prom_rules.go @@ -3,7 +3,6 @@ package common import ( "context" "fmt" - "reflect" "strings" fnproto "github.com/crossplane/function-sdk-go/proto/v1beta1" @@ -30,7 +29,7 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r return func(ctx context.Context, svc *runtime.ServiceRuntime) *fnproto.Result { log := controllerruntime.LoggerFrom(ctx) - log.Info("adding non SLA prometheus rules") + log.Info("Satrting non SLA prometheus rules") log.V(1).Info("Transforming", "obj", svc) @@ -40,14 +39,17 @@ func GenerateNonSLAPromRules(obj client.Object) func(ctx context.Context, svc *r } elem, ok := obj.(InfoGetter) if !ok { - return runtime.NewWarningResult(fmt.Sprintf("Type %s doesn't implement Alerter interface", reflect.TypeOf(obj).String())) + return runtime.NewFatalResult(err) } err = generatePromeRules(elem, svc) if err != nil { + log.Info("broken addition") return runtime.NewWarningResult("can't create prometheus rules: " + err.Error()) } + log.Info("\n\n\n\nRules added successfully = " + elem.GetInstanceNamespace()) + return nil } } From f25debebff0181deeb264e4456d58e242c1b6944 Mon Sep 17 00:00:00 2001 From: wejdross Date: Fri, 2 Feb 2024 15:44:25 +0100 Subject: [PATCH 09/11] Update pkg/comp-functions/functions/common/non_sla_prom_rules.go Co-authored-by: Bigli <9610820+TheBigLee@users.noreply.github.com> --- pkg/comp-functions/functions/common/non_sla_prom_rules.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/comp-functions/functions/common/non_sla_prom_rules.go b/pkg/comp-functions/functions/common/non_sla_prom_rules.go index 0c038d667a..2c00e1bd7b 100644 --- a/pkg/comp-functions/functions/common/non_sla_prom_rules.go +++ b/pkg/comp-functions/functions/common/non_sla_prom_rules.go @@ -138,7 +138,7 @@ func generatePromeRules(elem InfoGetter, svc *runtime.ServiceRuntime) error { return svc.SetDesiredKubeObject(prometheusRules, name+"-non-sla-alerts") } -// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted insatnce namespace and error if necessary +// Get InstanceNamespaceRegex returns regex for prometheus rules, splitted instance namespace and error if necessary func getInstanceNamespaceRegex(instanceNamespace string) (string, []string, error) { // from instance namespace, f.e. vshn-postgresql-customer-namespace-whatever // make vshn-postgresql-(.+)-.+ From e3139f420e603bd69720990664c450211b6c32d1 Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Fri, 2 Feb 2024 16:55:46 +0100 Subject: [PATCH 10/11] fix rebase --- apis/vshn/v1/dbaas_vshn_postgresql.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apis/vshn/v1/dbaas_vshn_postgresql.go b/apis/vshn/v1/dbaas_vshn_postgresql.go index 1d98a04990..01a7acad57 100644 --- a/apis/vshn/v1/dbaas_vshn_postgresql.go +++ b/apis/vshn/v1/dbaas_vshn_postgresql.go @@ -332,6 +332,9 @@ func (pg *VSHNPostgreSQL) GetInstanceNamespace() string { return fmt.Sprintf("vshn-postgresql-%s", pg.GetName()) } +func (pg *XVSHNPostgreSQL) GetInstanceNamespace() string { + return fmt.Sprintf("vshn-postgresql-%s", pg.GetName()) +} // GetBackupRetention returns the retention definition for this backup. // !!! This is just a placeholder to satisfy InfoGetter interface From a9e4e7ae64b75344c356203099f0e2500ecf9b57 Mon Sep 17 00:00:00 2001 From: "lukasz.widera@vshn.ch" Date: Fri, 2 Feb 2024 16:56:26 +0100 Subject: [PATCH 11/11] fix linter --- pkg/comp-functions/functions/vshnpostgres/register.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/comp-functions/functions/vshnpostgres/register.go b/pkg/comp-functions/functions/vshnpostgres/register.go index d300560c85..7fd7dc0f27 100644 --- a/pkg/comp-functions/functions/vshnpostgres/register.go +++ b/pkg/comp-functions/functions/vshnpostgres/register.go @@ -64,8 +64,8 @@ func init() { { Name: "non-sla-prometheus-rules", Execute: common.GenerateNonSLAPromRules(&vshnv1.VSHNPostgreSQL{}), - }, - { + }, + { Name: "pgbouncer-settings", Execute: addPGBouncerSettings, },