diff --git a/Makefile b/Makefile index fbd70823c..cdd8d8567 100644 --- a/Makefile +++ b/Makefile @@ -297,3 +297,9 @@ redhat-certificated-image-build: redhat-certificated-image-push: ## Push the bundle image. echo $(OPERATOR_IMG) $(MAKE) image-push IMG=$(OPERATOR_IMG) + +##@ Generate the metrics documentation +.PHONY: generate-metricsdocs +generate-metricsdocs: + mkdir -p $(shell pwd)/docs/monitoring + go run -ldflags="${LDFLAGS}" ./pkg/monitoring/metricsdocs > docs/monitoring/metrics.md \ No newline at end of file diff --git a/charts/function-mesh-operator/templates/controller-manager-deployment.yaml b/charts/function-mesh-operator/templates/controller-manager-deployment.yaml index 276ad2231..d367dd24f 100644 --- a/charts/function-mesh-operator/templates/controller-manager-deployment.yaml +++ b/charts/function-mesh-operator/templates/controller-manager-deployment.yaml @@ -22,6 +22,10 @@ spec: app.kubernetes.io/name: {{ template "function-mesh-operator.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} app.kubernetes.io/component: controller-manager + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: "/metrics" + prometheus.io/port: "{{ .Values.controllerManager.metrics.port }}" spec: {{- if .Values.controllerManager.serviceAccount }} serviceAccount: {{ .Values.controllerManager.serviceAccount }} @@ -38,12 +42,15 @@ spec: resources: {{ toYaml .Values.controllerManager.resources | indent 12 }} {{- end }} - {{- if .Values.admissionWebhook.enabled }} ports: + {{- if .Values.admissionWebhook.enabled }} - containerPort: 9443 name: webhook-server protocol: TCP {{- end}} + - containerPort: {{ .Values.controllerManager.metrics.port }} + name: http-metrics + protocol: TCP command: - /manager args: @@ -63,6 +70,18 @@ spec: value: {{ .Values.admissionWebhook.enabled | quote }} volumeMounts: {{- include "function-mesh-operator.volumeMounts" . | nindent 8 }} + livenessProbe: + httpGet: + path: /healthz + port: {{ .Values.controllerManager.healthProbe.port }} + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: {{ .Values.controllerManager.healthProbe.port }} + initialDelaySeconds: 5 + periodSeconds: 10 {{- with .Values.controllerManager.nodeSelector }} nodeSelector: {{ toYaml . | indent 8 }} @@ -79,4 +98,4 @@ spec: priorityClassName: {{ .Values.controllerManager.priorityClassName }} {{- end }} volumes: - {{- include "function-mesh-operator.volumes" . | nindent 6 -}} + {{- include "function-mesh-operator.volumes" . | nindent 6 -}} \ No newline at end of file diff --git a/charts/function-mesh-operator/templates/controller-manager-metrics-service.yaml b/charts/function-mesh-operator/templates/controller-manager-metrics-service.yaml new file mode 100644 index 000000000..7944aae8c --- /dev/null +++ b/charts/function-mesh-operator/templates/controller-manager-metrics-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: function-mesh-controller-manager-metrics-service + namespace: {{ .Release.Namespace }} + labels: + {{- include "function-mesh-operator.labels" . | nindent 4 }} +spec: + ports: + - name: http-metrics + port: {{ .Values.controllerManager.metrics.port }} + protocol: TCP + targetPort: {{ .Values.controllerManager.metrics.port }} + selector: + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: controller-manager \ No newline at end of file diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 81827e995..89ee3068a 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -39,6 +39,9 @@ patchesStrategicMerge: # 'CERTMANAGER' needs to be enabled to use ca injection - webhookcainjection_patch.yaml +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +- manager_metrics_patch.yaml + # the following config is for teaching kustomize how to do var substitution vars: # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml new file mode 100644 index 000000000..54f7995b7 --- /dev/null +++ b/config/default/manager_metrics_patch.yaml @@ -0,0 +1,19 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: "/metrics" + prometheus.io/port: "8080" + spec: + containers: + - name: manager + ports: + - containerPort: 8080 + name: http-metrics + protocol: TCP diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index d025ce24e..5b123381c 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -42,5 +42,17 @@ spec: requests: cpu: 100m memory: 20Mi + livenessProbe: + httpGet: + path: /healthz + port: 8000 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 serviceAccountName: controller-manager terminationGracePeriodSeconds: 10 diff --git a/controllers/function_controller.go b/controllers/function_controller.go index c8a845f59..d6f09e1b4 100644 --- a/controllers/function_controller.go +++ b/controllers/function_controller.go @@ -19,6 +19,9 @@ package controllers import ( "context" + "time" + + "github.com/streamnative/function-mesh/pkg/monitoring" v1 "k8s.io/api/batch/v1" "k8s.io/client-go/rest" @@ -63,6 +66,15 @@ type FunctionReconciler struct { func (r *FunctionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = r.Log.WithValues("function", req.NamespacedName) + startTime := time.Now() + + defer func() { + monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("function", req.NamespacedName.Name, + req.NamespacedName.Namespace).Inc() + monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("function", req.NamespacedName.Name, + req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds())) + }() + // your logic here function := &v1alpha1.Function{} err := r.Get(ctx, req.NamespacedName, function) diff --git a/controllers/sink_controller.go b/controllers/sink_controller.go index ebe22eec0..348105a64 100644 --- a/controllers/sink_controller.go +++ b/controllers/sink_controller.go @@ -19,6 +19,9 @@ package controllers import ( "context" + "time" + + "github.com/streamnative/function-mesh/pkg/monitoring" v1 "k8s.io/api/batch/v1" "k8s.io/client-go/rest" @@ -62,6 +65,15 @@ type SinkReconciler struct { func (r *SinkReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = r.Log.WithValues("sink", req.NamespacedName) + startTime := time.Now() + + defer func() { + monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("sink", req.NamespacedName.Name, + req.NamespacedName.Namespace).Inc() + monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("sink", req.NamespacedName.Name, + req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds())) + }() + // your logic here sink := &v1alpha1.Sink{} err := r.Get(ctx, req.NamespacedName, sink) diff --git a/controllers/source_controller.go b/controllers/source_controller.go index d14946905..a2d7dfcce 100644 --- a/controllers/source_controller.go +++ b/controllers/source_controller.go @@ -19,6 +19,9 @@ package controllers import ( "context" + "time" + + "github.com/streamnative/function-mesh/pkg/monitoring" "github.com/go-logr/logr" "github.com/streamnative/function-mesh/api/compute/v1alpha1" @@ -62,6 +65,15 @@ type SourceReconciler struct { func (r *SourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = r.Log.WithValues("source", req.NamespacedName) + startTime := time.Now() + + defer func() { + monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("source", req.NamespacedName.Name, + req.NamespacedName.Namespace).Inc() + monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("source", req.NamespacedName.Name, + req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds())) + }() + // your logic here source := &v1alpha1.Source{} err := r.Get(ctx, req.NamespacedName, source) diff --git a/docs/monitoring/metrics.md b/docs/monitoring/metrics.md new file mode 100644 index 000000000..e0d9b15e7 --- /dev/null +++ b/docs/monitoring/metrics.md @@ -0,0 +1,13 @@ +# Operator Metrics +This document aims to help users that are not familiar with metrics exposed by this operator. +The metrics documentation is auto-generated by the utility tool "pkg/monitoring/metricsdocs" and reflects all of the metrics that are exposed by the operator. + +## Operator Metrics List +### function_mesh_reconcile_count +Number of reconcile operations Type: Counter. +### function_mesh_reconcile_latency +Latency of reconcile operations, bucket boundaries are 10ms, 100ms, 1s, 10s, 30s and 60s. Type: Histogram. +## Developing new metrics +After developing new metrics or changing old ones, please run "make generate-metricsdocs" to regenerate this document. + +If you feel that the new metric doesn't follow these rules, please change "pkg/monitoring/metricsdocs" according to your needs. diff --git a/main.go b/main.go index 32e8a2c01..a58e12ebe 100644 --- a/main.go +++ b/main.go @@ -24,6 +24,10 @@ import ( "os" "strconv" + "sigs.k8s.io/controller-runtime/pkg/healthz" + + "github.com/streamnative/function-mesh/pkg/monitoring" + "github.com/go-logr/logr" computev1alpha1 "github.com/streamnative/function-mesh/api/compute/v1alpha1" "github.com/streamnative/function-mesh/controllers" @@ -53,6 +57,8 @@ func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(computev1alpha1.AddToScheme(scheme)) + + monitoring.RegisterMetrics() // +kubebuilder:scaffold:scheme } @@ -203,6 +209,15 @@ func main() { } // +kubebuilder:scaffold:builder + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") diff --git a/manifests/crd.yaml b/manifests/crd.yaml index 18f03582c..7c79cd2d1 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -55,6 +55,8 @@ spec: type: string downloaderImage: type: string + filebeatImage: + type: string forwardSourceMessageProperty: type: boolean funcConfig: @@ -256,6 +258,11 @@ spec: type: object logTopic: type: string + logTopicAgent: + enum: + - runtime + - sidecar + type: string maxMessageRetry: format: int32 type: integer @@ -3623,6 +3630,8 @@ spec: type: string downloaderImage: type: string + filebeatImage: + type: string golang: properties: go: @@ -3817,6 +3826,13 @@ spec: required: - jar type: object + logTopic: + type: string + logTopicAgent: + enum: + - runtime + - sidecar + type: string maxMessageRetry: format: int32 type: integer @@ -6929,6 +6945,8 @@ spec: type: string downloaderImage: type: string + filebeatImage: + type: string forwardSourceMessageProperty: type: boolean golang: @@ -7053,6 +7071,13 @@ spec: required: - jar type: object + logTopic: + type: string + logTopicAgent: + enum: + - runtime + - sidecar + type: string maxReplicas: format: int32 type: integer @@ -10300,6 +10325,8 @@ spec: type: string downloaderImage: type: string + filebeatImage: + type: string forwardSourceMessageProperty: type: boolean funcConfig: @@ -10501,6 +10528,11 @@ spec: type: object logTopic: type: string + logTopicAgent: + enum: + - runtime + - sidecar + type: string maxMessageRetry: format: int32 type: integer @@ -13941,6 +13973,8 @@ spec: type: string downloaderImage: type: string + filebeatImage: + type: string golang: properties: go: @@ -14135,6 +14169,13 @@ spec: required: - jar type: object + logTopic: + type: string + logTopicAgent: + enum: + - runtime + - sidecar + type: string maxMessageRetry: format: int32 type: integer @@ -17320,6 +17361,8 @@ spec: type: string downloaderImage: type: string + filebeatImage: + type: string forwardSourceMessageProperty: type: boolean golang: @@ -17444,6 +17487,13 @@ spec: required: - jar type: object + logTopic: + type: string + logTopicAgent: + enum: + - runtime + - sidecar + type: string maxReplicas: format: int32 type: integer diff --git a/pkg/monitoring/metrics.go b/pkg/monitoring/metrics.go new file mode 100644 index 000000000..3f1810105 --- /dev/null +++ b/pkg/monitoring/metrics.go @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package monitoring + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +// MetricDescription is an exported struct that defines the metric description (Name, Help) +// as a new type named MetricDescription. +type MetricDescription struct { + Name string + Help string + Type string +} + +// metricsDescription is a map of string keys (metrics) to MetricDescription values (Name, Help). +var metricDescription = map[string]MetricDescription{ + "FunctionMeshControllerReconcileCount": { + Name: "function_mesh_reconcile_count", + Help: "Number of reconcile operations", + Type: "Counter", + }, + "FunctionMeshControllerReconcileLatency": { + Name: "function_mesh_reconcile_latency", + Help: "Latency of reconcile operations, bucket boundaries are 10ms, 100ms, 1s, 10s, 30s and 60s.", + Type: "Histogram", + }, +} + +var ( + // FunctionMeshControllerReconcileCount will count how many reconcile operations been done. + FunctionMeshControllerReconcileCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: metricDescription["FunctionMeshControllerReconcileCount"].Name, + Help: metricDescription["FunctionMeshControllerReconcileCount"].Help, + }, []string{"type", "name", "namespace"}, + ) + + // FunctionMeshControllerReconcileLatency will measure the latency of reconcile operations. + FunctionMeshControllerReconcileLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: metricDescription["FunctionMeshControllerReconcileLatency"].Name, + Help: metricDescription["FunctionMeshControllerReconcileLatency"].Help, + // Bucket boundaries are 10ms, 100ms, 1s, 10s, 30s and 60s. + Buckets: []float64{ + 10, 100, 1000, 10000, 30000, 60000, + }, + }, []string{"type", "name", "namespace"}, + ) +) + +// RegisterMetrics will register metrics with the global prometheus registry +func RegisterMetrics() { + metrics.Registry.MustRegister(collectors.NewBuildInfoCollector()) + metrics.Registry.MustRegister(FunctionMeshControllerReconcileCount) + metrics.Registry.MustRegister(FunctionMeshControllerReconcileLatency) +} + +// ListMetrics will create a slice with the metrics available in metricDescription +func ListMetrics() []MetricDescription { + v := make([]MetricDescription, 0, len(metricDescription)) + // Insert value (Name, Help) for each metric + for _, value := range metricDescription { + v = append(v, value) + } + + return v +} diff --git a/pkg/monitoring/metricsdocs/metricsdocs.go b/pkg/monitoring/metricsdocs/metricsdocs.go new file mode 100644 index 000000000..1844628a6 --- /dev/null +++ b/pkg/monitoring/metricsdocs/metricsdocs.go @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package main + +import ( + "fmt" + "sort" + + "github.com/streamnative/function-mesh/pkg/monitoring" +) + +// please run "make generate-metricsdocs" to run this tool and update metrics documentation +const ( + title = "# Operator Metrics\n" + background = "This document aims to help users that are not familiar with metrics exposed by this operator.\n" + + "The metrics documentation is auto-generated by the utility tool \"pkg/monitoring/metricsdocs\" and reflects all of the metrics that are exposed by the operator.\n\n" + + KVSpecificMetrics = "## Operator Metrics List\n" + + opening = title + + background + + KVSpecificMetrics + + // footer + footerHeading = "## Developing new metrics\n" + footerContent = "After developing new metrics or changing old ones, please run \"make generate-metricsdocs\" to regenerate this document.\n\n" + + "If you feel that the new metric doesn't follow these rules, please change \"pkg/monitoring/metricsdocs\" according to your needs.\n" + + footer = footerHeading + footerContent +) + +// TODO: scaffolding these helpers with operator-lib: https://github.com/operator-framework/operator-lib. + +// metricList contains the name, description, and type for each metric. +func main() { + metricList := metricDescriptionListToMetricList(monitoring.ListMetrics()) + sort.Sort(metricList) + writeToStdOut(metricList) +} + +// writeToStdOut receives a list of metrics and prints them to STDOUT. +func writeToStdOut(metricsList metricList) { + fmt.Print(opening) + metricsList.writeOut() + fmt.Print(footer) +} + +// Metric is an exported struct that defines the metric +// name, description, and type as a new type named Metric. +type Metric struct { + name string + description string + metricType string +} + +func metricDescriptionToMetric(md monitoring.MetricDescription) Metric { + return Metric{ + name: md.Name, + description: md.Help, + metricType: md.Type, + } +} + +// writeOut receives a metric of type metric and prints +// the metric name, description, and type. +func (m Metric) writeOut() { + fmt.Println("###", m.name) + fmt.Println(m.description, "Type: "+m.metricType+".") +} + +// metricList is an array that contain metrics from type metric, +// as a new type named metricList. +type metricList []Metric + +// metricDescriptionListToMetricList collects the metrics exposed by the +// operator, and inserts them into the metricList array. +func metricDescriptionListToMetricList(mdl []monitoring.MetricDescription) metricList { + res := make([]Metric, len(mdl)) + for i, md := range mdl { + res[i] = metricDescriptionToMetric(md) + } + + return res +} + +// Len implements sort.Interface.Len +func (m metricList) Len() int { + return len(m) +} + +// Less implements sort.Interface.Less +func (m metricList) Less(i, j int) bool { + return m[i].name < m[j].name +} + +// Swap implements sort.Interface.Swap +func (m metricList) Swap(i, j int) { + m[i], m[j] = m[j], m[i] +} + +func (m metricList) writeOut() { + for _, met := range m { + met.writeOut() + } +}