Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP feat: add Incidents Detection backend #573

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,18 @@ spec:
- patch
- update
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- autoscaling
resources:
Expand Down Expand Up @@ -439,6 +451,16 @@ spec:
- create
- get
- update
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- create
- delete
- get
- patch
- update
- apiGroups:
- monitoring.rhobs
resources:
Expand Down
1 change: 1 addition & 0 deletions cmd/operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ var defaultImages = map[string]string{
"ui-distributed-tracing": "quay.io/openshift-observability-ui/distributed-tracing-console-plugin:v0.2.0",
"ui-logging": "quay.io/openshift-logging/logging-view-plugin:6.0.0",
"korrel8r": "quay.io/korrel8r/korrel8r:0.7.2",
"incidents": "quay.io/afalossi/cluster-health-analyzer:latest",
}

func imagesUsed() []string {
Expand Down
22 changes: 22 additions & 0 deletions deploy/operator/observability-operator-cluster-role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@ rules:
- patch
- update
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- autoscaling
resources:
Expand Down Expand Up @@ -176,6 +188,16 @@ rules:
- create
- get
- update
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- create
- delete
- get
- patch
- update
- apiGroups:
- monitoring.rhobs
resources:
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ require (
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus-community/prom-label-proxy v0.10.0 // indirect
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2 // indirect
github.com/prometheus/alertmanager v0.27.0 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus-community/prom-label-proxy v0.10.0 h1:49S1uMRHuTUKoWU9Oj3hrKM2m8F3qkY3TPAOyKQnAVo=
github.com/prometheus-community/prom-label-proxy v0.10.0/go.mod h1:hUQJDmGpo2bVBA03jBur8vvicfA8dbJdDg5hCjPZPw8=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2 h1:6UsAv+jAevuGO2yZFU/BukV4o9NKnFMOuoouSA4G0ns=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2/go.mod h1:XYrdZw5dW12Cjkt4ndbeNZZTBp4UCHtW0ccR9+sTtPU=
github.com/prometheus/alertmanager v0.27.0 h1:V6nTa2J5V4s8TG4C4HtrBP/WNSebCCTYGGv4qecA/+I=
github.com/prometheus/alertmanager v0.27.0/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
Expand Down
12 changes: 12 additions & 0 deletions pkg/controllers/uiplugin/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ func pluginComponentReconcilers(plugin *uiv1alpha1.UIPlugin, pluginInfo UIPlugin
}
}

isIncidentsEnabled := pluginInfo.IncidentsImage != ""
if isIncidentsEnabled {
serviceAccountName := plugin.Name + serviceAccountSuffix
// components = append(components, reconciler.NewUpdater(newIncidentsClusterRoleBinding(namespace, serviceAccountName, "cluster-monitoring-view", serviceAccountName), plugin))
components = append(components, reconciler.NewUpdater(newIncidentsClusterRoleBinding(namespace, serviceAccountName, "system:auth-delegator", serviceAccountName+":system:auth-delegator"), plugin))
components = append(components, reconciler.NewUpdater(newIncidentsPrometheusRole(namespace), plugin))
components = append(components, reconciler.NewUpdater(newIncidentsPrometheusRoleBinding(namespace), plugin))
components = append(components, reconciler.NewUpdater(newIncidentsService(namespace), plugin))
components = append(components, reconciler.NewUpdater(newIncidentsDeployment(namespace, serviceAccountName, pluginInfo), plugin))
components = append(components, reconciler.NewUpdater(newIncidentsServiceMonitor(namespace), plugin))
}

return components
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/uiplugin/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ const (
//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheuses/api,resourceNames=k8s,verbs=get;create;update
//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=alertmanagers/api,resourceNames=main,verbs=get;list

// RBAC for Incidents Detection
//+kubebuilder:rbac:groups=authentication.k8s.io,resources=tokenreviews,verbs=create
//+kubebuilder:rbac:groups=authorization.k8s.io,resources=subjectaccessreviews,verbs=create
//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;create;update;patch;delete

// RegisterWithManager registers the controller with Manager
func RegisterWithManager(mgr ctrl.Manager, opts Options) error {
logger := ctrl.Log.WithName("observability-ui")
Expand Down
240 changes: 240 additions & 0 deletions pkg/controllers/uiplugin/incidents.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
package uiplugin

import (
monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
)

const (
incidentsName = "cluster-health-analyzer"
volumeMountName = incidentsName + "-tls"
)

func newIncidentsClusterRoleBinding(namespace string, serviceAccountName string, roleName string, crbName string) *rbacv1.ClusterRoleBinding {
return &rbacv1.ClusterRoleBinding{
TypeMeta: metav1.TypeMeta{
APIVersion: rbacv1.SchemeGroupVersion.String(),
Kind: "ClusterRoleBinding",
},
ObjectMeta: metav1.ObjectMeta{
Name: crbName,
},
Subjects: []rbacv1.Subject{
{
APIGroup: corev1.SchemeGroupVersion.Group,
Kind: "ServiceAccount",
Name: serviceAccountName,
Namespace: namespace,
},
},
RoleRef: rbacv1.RoleRef{
APIGroup: rbacv1.SchemeGroupVersion.Group,
Kind: "ClusterRole",
Name: roleName,
},
}
}

func newIncidentsPrometheusRole(namespace string) *rbacv1.Role {
role := &rbacv1.Role{
TypeMeta: metav1.TypeMeta{
APIVersion: rbacv1.SchemeGroupVersion.String(),
Kind: "Role",
},
ObjectMeta: metav1.ObjectMeta{
Name: "prometheus-k8s",
Namespace: namespace,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{""},
Resources: []string{"services", "endpoints", "pods"},
Verbs: []string{"get", "list", "watch"},
},
},
}
return role
}

func newIncidentsPrometheusRoleBinding(namespace string) *rbacv1.RoleBinding {
roleBinding := &rbacv1.RoleBinding{
TypeMeta: metav1.TypeMeta{
APIVersion: rbacv1.SchemeGroupVersion.String(),
Kind: "RoleBinding",
},
ObjectMeta: metav1.ObjectMeta{
Name: "prometheus-k8s",
Namespace: namespace,
},
RoleRef: rbacv1.RoleRef{
APIGroup: rbacv1.SchemeGroupVersion.Group,
Kind: "Role",
Name: "prometheus-k8s",
},
Subjects: []rbacv1.Subject{
{
Kind: "ServiceAccount",
Name: "prometheus-k8s",
Namespace: "openshift-monitoring",
},
},
}
return roleBinding
}

func newIncidentsService(namespace string) *corev1.Service {
service := &corev1.Service{
TypeMeta: metav1.TypeMeta{
APIVersion: corev1.SchemeGroupVersion.String(),
Kind: "Service",
},
ObjectMeta: metav1.ObjectMeta{
Name: incidentsName,
Namespace: namespace,
Annotations: map[string]string{
"service.beta.openshift.io/serving-cert-secret-name": volumeMountName,
},
Labels: componentLabels(incidentsName),
},
Spec: corev1.ServiceSpec{
Ports: []corev1.ServicePort{
{
Name: "metrics",
Port: 8443,
TargetPort: intstr.FromString("metrics"),
},
},
Selector: map[string]string{
"app.kubernetes.io/instance": incidentsName,
},
Type: corev1.ServiceTypeClusterIP,
},
}

return service
}

func newIncidentsDeployment(namespace string, serviceAccountName string, pluginInfo UIPluginInfo) *appsv1.Deployment {
deploy := &appsv1.Deployment{
TypeMeta: metav1.TypeMeta{
APIVersion: appsv1.SchemeGroupVersion.String(),
Kind: "Deployment",
},
ObjectMeta: metav1.ObjectMeta{
Name: incidentsName,
Namespace: namespace,
Labels: componentLabels(incidentsName),
},
Spec: appsv1.DeploymentSpec{
Replicas: ptr.To(int32(1)),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/instance": incidentsName,
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: componentLabels(incidentsName),
},
Spec: corev1.PodSpec{
ServiceAccountName: serviceAccountName,
AutomountServiceAccountToken: ptr.To(true),
Containers: []corev1.Container{
{
Name: incidentsName,
Image: pluginInfo.IncidentsImage,
ImagePullPolicy: corev1.PullAlways,
Args: []string{
"--tls-cert-file=/etc/tls/private/tls.crt",
"--tls-private-key-file=/etc/tls/private/tls.key",
},
Env: []corev1.EnvVar{
{
Name: "PROM_URL",
Value: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091/",
},
},
SecurityContext: &corev1.SecurityContext{
RunAsNonRoot: ptr.To(true),
AllowPrivilegeEscalation: ptr.To(false),
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Ports: []corev1.ContainerPort{
{
ContainerPort: 8443,
Name: "metrics",
},
},
TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError,
VolumeMounts: []corev1.VolumeMount{
{
MountPath: "/etc/tls/private",
Name: volumeMountName,
ReadOnly: true,
},
},
},
},
Volumes: []corev1.Volume{
{
Name: volumeMountName,
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: volumeMountName,
},
},
},
},
},
},
},
}
return deploy
}

func newIncidentsServiceMonitor(namespace string) *monv1.ServiceMonitor {
serviceMonitor := &monv1.ServiceMonitor{
TypeMeta: metav1.TypeMeta{
APIVersion: monv1.SchemeGroupVersion.String(),
Kind: "ServiceMonitor",
},
ObjectMeta: metav1.ObjectMeta{
Name: incidentsName,
Namespace: namespace,
},
Spec: monv1.ServiceMonitorSpec{
Endpoints: []monv1.Endpoint{
{
Interval: "30s",
Port: "metrics",
Scheme: "https",
TLSConfig: &monv1.TLSConfig{
SafeTLSConfig: monv1.SafeTLSConfig{
ServerName: ptr.To(incidentsName + "." + namespace + ".svc"),
},
CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt",
CertFile: "/etc/prometheus/secrets/metrics-client-certs/tls.crt",
KeyFile: "/etc/prometheus/secrets/metrics-client-certs/tls.key",
},
},
},
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/instance": incidentsName,
},
},
},
}

return serviceMonitor
}
3 changes: 3 additions & 0 deletions pkg/controllers/uiplugin/plugin_info_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
type UIPluginInfo struct {
Image string
Korrel8rImage string
IncidentsImage string
LokiServiceNames map[string]string
Name string
ConsoleName string
Expand Down Expand Up @@ -132,6 +133,8 @@ func PluginInfoBuilder(ctx context.Context, k client.Client, plugin *uiv1alpha1.
return nil, err
}

pluginInfo.IncidentsImage = pluginConf.Images["incidents"]

pluginInfo.Korrel8rImage = pluginConf.Images["korrel8r"]
pluginInfo.LokiServiceNames[OpenshiftLoggingNs], err = getLokiServiceName(ctx, k, OpenshiftLoggingNs)
if err != nil {
Expand Down
Loading