diff --git a/Makefile b/Makefile index fbd70823c..cdd8d8567 100644 --- a/Makefile +++ b/Makefile @@ -297,3 +297,9 @@ redhat-certificated-image-build: redhat-certificated-image-push: ## Push the bundle image. echo $(OPERATOR_IMG) $(MAKE) image-push IMG=$(OPERATOR_IMG) + +##@ Generate the metrics documentation +.PHONY: generate-metricsdocs +generate-metricsdocs: + mkdir -p $(shell pwd)/docs/monitoring + go run -ldflags="${LDFLAGS}" ./pkg/monitoring/metricsdocs > docs/monitoring/metrics.md \ No newline at end of file diff --git a/controllers/function_controller.go b/controllers/function_controller.go index c8a845f59..d6f09e1b4 100644 --- a/controllers/function_controller.go +++ b/controllers/function_controller.go @@ -19,6 +19,9 @@ package controllers import ( "context" + "time" + + "github.com/streamnative/function-mesh/pkg/monitoring" v1 "k8s.io/api/batch/v1" "k8s.io/client-go/rest" @@ -63,6 +66,15 @@ type FunctionReconciler struct { func (r *FunctionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = r.Log.WithValues("function", req.NamespacedName) + startTime := time.Now() + + defer func() { + monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("function", req.NamespacedName.Name, + req.NamespacedName.Namespace).Inc() + monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("function", req.NamespacedName.Name, + req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds())) + }() + // your logic here function := &v1alpha1.Function{} err := r.Get(ctx, req.NamespacedName, function) diff --git a/controllers/sink_controller.go b/controllers/sink_controller.go index ebe22eec0..348105a64 100644 --- a/controllers/sink_controller.go +++ b/controllers/sink_controller.go @@ -19,6 +19,9 @@ package controllers import ( "context" + "time" + + "github.com/streamnative/function-mesh/pkg/monitoring" v1 "k8s.io/api/batch/v1" "k8s.io/client-go/rest" @@ -62,6 +65,15 @@ type SinkReconciler struct { func (r *SinkReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = r.Log.WithValues("sink", req.NamespacedName) + startTime := time.Now() + + defer func() { + monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("sink", req.NamespacedName.Name, + req.NamespacedName.Namespace).Inc() + monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("sink", req.NamespacedName.Name, + req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds())) + }() + // your logic here sink := &v1alpha1.Sink{} err := r.Get(ctx, req.NamespacedName, sink) diff --git a/controllers/source_controller.go b/controllers/source_controller.go index d14946905..a2d7dfcce 100644 --- a/controllers/source_controller.go +++ b/controllers/source_controller.go @@ -19,6 +19,9 @@ package controllers import ( "context" + "time" + + "github.com/streamnative/function-mesh/pkg/monitoring" "github.com/go-logr/logr" "github.com/streamnative/function-mesh/api/compute/v1alpha1" @@ -62,6 +65,15 @@ type SourceReconciler struct { func (r *SourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = r.Log.WithValues("source", req.NamespacedName) + startTime := time.Now() + + defer func() { + monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("source", req.NamespacedName.Name, + req.NamespacedName.Namespace).Inc() + monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("source", req.NamespacedName.Name, + req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds())) + }() + // your logic here source := &v1alpha1.Source{} err := r.Get(ctx, req.NamespacedName, source) diff --git a/docs/monitoring/metrics.md b/docs/monitoring/metrics.md new file mode 100644 index 000000000..e0d9b15e7 --- /dev/null +++ b/docs/monitoring/metrics.md @@ -0,0 +1,13 @@ +# Operator Metrics +This document aims to help users that are not familiar with metrics exposed by this operator. +The metrics documentation is auto-generated by the utility tool "pkg/monitoring/metricsdocs" and reflects all of the metrics that are exposed by the operator. + +## Operator Metrics List +### function_mesh_reconcile_count +Number of reconcile operations Type: Counter. +### function_mesh_reconcile_latency +Latency of reconcile operations, bucket boundaries are 10ms, 100ms, 1s, 10s, 30s and 60s. Type: Histogram. +## Developing new metrics +After developing new metrics or changing old ones, please run "make generate-metricsdocs" to regenerate this document. + +If you feel that the new metric doesn't follow these rules, please change "pkg/monitoring/metricsdocs" according to your needs. diff --git a/main.go b/main.go index 32e8a2c01..a58e12ebe 100644 --- a/main.go +++ b/main.go @@ -24,6 +24,10 @@ import ( "os" "strconv" + "sigs.k8s.io/controller-runtime/pkg/healthz" + + "github.com/streamnative/function-mesh/pkg/monitoring" + "github.com/go-logr/logr" computev1alpha1 "github.com/streamnative/function-mesh/api/compute/v1alpha1" "github.com/streamnative/function-mesh/controllers" @@ -53,6 +57,8 @@ func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(computev1alpha1.AddToScheme(scheme)) + + monitoring.RegisterMetrics() // +kubebuilder:scaffold:scheme } @@ -203,6 +209,15 @@ func main() { } // +kubebuilder:scaffold:builder + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") diff --git a/pkg/monitoring/metrics.go b/pkg/monitoring/metrics.go new file mode 100644 index 000000000..60fe0cccd --- /dev/null +++ b/pkg/monitoring/metrics.go @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package monitoring + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +// MetricDescription is an exported struct that defines the metric description (Name, Help) +// as a new type named MetricDescription. +type MetricDescription struct { + Name string + Help string + Type string +} + +// metricsDescription is a map of string keys (metrics) to MetricDescription values (Name, Help). +var metricDescription = map[string]MetricDescription{ + "FunctionMeshControllerReconcileCount": { + Name: "function_mesh_reconcile_count", + Help: "Number of reconcile operations", + Type: "Counter", + }, + "FunctionMeshControllerReconcileLatency": { + Name: "function_mesh_reconcile_latency", + Help: "Latency of reconcile operations, bucket boundaries are 10ms, 100ms, 1s, 10s, 30s and 60s.", + Type: "Histogram", + }, +} + +var ( + // FunctionMeshControllerReconcileCount will count how many reconcile operations been done. + FunctionMeshControllerReconcileCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: metricDescription["FunctionMeshControllerReconcileCount"].Name, + Help: metricDescription["FunctionMeshControllerReconcileCount"].Help, + }, []string{"type", "name", "namespace"}, + ) + + // FunctionMeshControllerReconcileLatency will measure the latency of reconcile operations. + FunctionMeshControllerReconcileLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: metricDescription["FunctionMeshControllerReconcileLatency"].Name, + Help: metricDescription["FunctionMeshControllerReconcileLatency"].Help, + // Bucket boundaries are 10ms, 100ms, 1s, 10s, 30s and 60s. + Buckets: []float64{ + 10, 100, 1000, 10000, 30000, 60000, + }, + }, []string{"type", "name", "namespace"}, + ) +) + +// RegisterMetrics will register metrics with the global prometheus registry +func RegisterMetrics() { + // use new go collector + metrics.Registry.Unregister(prometheus.NewGoCollector()) + metrics.Registry.MustRegister(collectors.NewGoCollector(collectors.WithGoCollections(collectors.GoRuntimeMetricsCollection))) + metrics.Registry.MustRegister(FunctionMeshControllerReconcileCount) + metrics.Registry.MustRegister(FunctionMeshControllerReconcileLatency) +} + +// ListMetrics will create a slice with the metrics available in metricDescription +func ListMetrics() []MetricDescription { + v := make([]MetricDescription, 0, len(metricDescription)) + // Insert value (Name, Help) for each metric + for _, value := range metricDescription { + v = append(v, value) + } + + return v +} diff --git a/pkg/monitoring/metricsdocs/metricsdocs.go b/pkg/monitoring/metricsdocs/metricsdocs.go new file mode 100644 index 000000000..1844628a6 --- /dev/null +++ b/pkg/monitoring/metricsdocs/metricsdocs.go @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package main + +import ( + "fmt" + "sort" + + "github.com/streamnative/function-mesh/pkg/monitoring" +) + +// please run "make generate-metricsdocs" to run this tool and update metrics documentation +const ( + title = "# Operator Metrics\n" + background = "This document aims to help users that are not familiar with metrics exposed by this operator.\n" + + "The metrics documentation is auto-generated by the utility tool \"pkg/monitoring/metricsdocs\" and reflects all of the metrics that are exposed by the operator.\n\n" + + KVSpecificMetrics = "## Operator Metrics List\n" + + opening = title + + background + + KVSpecificMetrics + + // footer + footerHeading = "## Developing new metrics\n" + footerContent = "After developing new metrics or changing old ones, please run \"make generate-metricsdocs\" to regenerate this document.\n\n" + + "If you feel that the new metric doesn't follow these rules, please change \"pkg/monitoring/metricsdocs\" according to your needs.\n" + + footer = footerHeading + footerContent +) + +// TODO: scaffolding these helpers with operator-lib: https://github.com/operator-framework/operator-lib. + +// metricList contains the name, description, and type for each metric. +func main() { + metricList := metricDescriptionListToMetricList(monitoring.ListMetrics()) + sort.Sort(metricList) + writeToStdOut(metricList) +} + +// writeToStdOut receives a list of metrics and prints them to STDOUT. +func writeToStdOut(metricsList metricList) { + fmt.Print(opening) + metricsList.writeOut() + fmt.Print(footer) +} + +// Metric is an exported struct that defines the metric +// name, description, and type as a new type named Metric. +type Metric struct { + name string + description string + metricType string +} + +func metricDescriptionToMetric(md monitoring.MetricDescription) Metric { + return Metric{ + name: md.Name, + description: md.Help, + metricType: md.Type, + } +} + +// writeOut receives a metric of type metric and prints +// the metric name, description, and type. +func (m Metric) writeOut() { + fmt.Println("###", m.name) + fmt.Println(m.description, "Type: "+m.metricType+".") +} + +// metricList is an array that contain metrics from type metric, +// as a new type named metricList. +type metricList []Metric + +// metricDescriptionListToMetricList collects the metrics exposed by the +// operator, and inserts them into the metricList array. +func metricDescriptionListToMetricList(mdl []monitoring.MetricDescription) metricList { + res := make([]Metric, len(mdl)) + for i, md := range mdl { + res[i] = metricDescriptionToMetric(md) + } + + return res +} + +// Len implements sort.Interface.Len +func (m metricList) Len() int { + return len(m) +} + +// Less implements sort.Interface.Less +func (m metricList) Less(i, j int) bool { + return m[i].name < m[j].name +} + +// Swap implements sort.Interface.Swap +func (m metricList) Swap(i, j int) { + m[i], m[j] = m[j], m[i] +} + +func (m metricList) writeOut() { + for _, met := range m { + met.writeOut() + } +}