From d192587ab41117ddec3d0f854a7834ba78f0ae3d Mon Sep 17 00:00:00 2001 From: Marques Johansson Date: Sat, 20 Jul 2024 17:49:08 -0400 Subject: [PATCH] feat: add metrics command line args Signed-off-by: Marques Johansson --- cmd/provider/main.go | 42 +++++++++++++++++++++++++-------- internal/metrics/metrics.go | 46 +++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 9 deletions(-) create mode 100644 internal/metrics/metrics.go diff --git a/cmd/provider/main.go b/cmd/provider/main.go index 5cd3bb7..381a735 100644 --- a/cmd/provider/main.go +++ b/cmd/provider/main.go @@ -28,7 +28,9 @@ import ( "github.com/crossplane/crossplane-runtime/pkg/feature" "github.com/crossplane/crossplane-runtime/pkg/logging" "github.com/crossplane/crossplane-runtime/pkg/ratelimiter" + "github.com/crossplane/crossplane-runtime/pkg/reconciler/managed" "github.com/crossplane/crossplane-runtime/pkg/resource" + "github.com/crossplane/crossplane-runtime/pkg/statemetrics" upcontroller "github.com/crossplane/upjet/pkg/controller" "github.com/crossplane/upjet/pkg/terraform" "gopkg.in/alecthomas/kingpin.v2" @@ -38,6 +40,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/crossplane-contrib/provider-jet-equinix/apis" "github.com/crossplane-contrib/provider-jet-equinix/apis/v1alpha1" @@ -45,15 +48,18 @@ import ( "github.com/crossplane-contrib/provider-jet-equinix/internal/clients" "github.com/crossplane-contrib/provider-jet-equinix/internal/controller" "github.com/crossplane-contrib/provider-jet-equinix/internal/features" + equinixmetrics "github.com/crossplane-contrib/provider-jet-equinix/internal/metrics" ) func main() { var ( - app = kingpin.New(filepath.Base(os.Args[0]), "Terraform based Crossplane provider for Equinix").DefaultEnvars() - debug = app.Flag("debug", "Run with debug logging.").Short('d').Bool() - syncInterval = app.Flag("sync", "Sync interval controls how often all resources will be double checked for drift.").Short('s').Default("1h").Duration() - leaderElection = app.Flag("leader-election", "Use leader election for the controller manager.").Short('l').Default("false").OverrideDefaultFromEnvar("LEADER_ELECTION").Bool() - maxReconcileRate = app.Flag("max-reconcile-rate", "The global maximum rate per second at which resources may checked for drift from the desired state.").Default("10").Int() + app = kingpin.New(filepath.Base(os.Args[0]), "Terraform based Crossplane provider for Equinix").DefaultEnvars() + debug = app.Flag("debug", "Run with debug logging.").Short('d').Bool() + syncInterval = app.Flag("sync", "Sync interval controls how often all resources will be double checked for drift.").Short('s').Default("1h").Duration() + pollInterval = app.Flag("poll", "Poll interval controls how often an individual resource should be checked for drift.").Default("10m").Duration() + leaderElection = app.Flag("leader-election", "Use leader election for the controller manager.").Short('l').Default("false").OverrideDefaultFromEnvar("LEADER_ELECTION").Bool() + pollStateMetricInterval = app.Flag("poll-state-metric", "State metric recording interval").Default("5s").Duration() + maxReconcileRate = app.Flag("max-reconcile-rate", "The global maximum rate per second at which resources may checked for drift from the desired state.").Default("10").Int() namespace = app.Flag("namespace", "Namespace used to set as default scope in default secret store config.").Default("crossplane-system").Envar("POD_NAMESPACE").String() enableExternalSecretStores = app.Flag("enable-external-secret-stores", "Enable support for ExternalSecretStores.").Default("false").Envar("ENABLE_EXTERNAL_SECRET_STORES").Bool() @@ -71,12 +77,16 @@ func main() { ctrl.SetLogger(zl) } - log.Debug("Starting", "sync-period", syncInterval.String()) + // currently, we configure the jitter to be the 5% of the poll interval + pollJitter := time.Duration(float64(*pollInterval) * 0.05) + log.Debug("Starting", "sync-interval", syncInterval.String(), + "poll-interval", pollInterval.String(), "poll-jitter", pollJitter, "max-reconcile-rate", *maxReconcileRate) cfg, err := ctrl.GetConfig() kingpin.FatalIfError(err, "Cannot get API server rest config") + kingpin.FatalIfError(equinixmetrics.SetupMetrics(), "Cannot setup Linode metrics hook") - mgr, err := ctrl.NewManager(cfg, ctrl.Options{ + mgr, err := ctrl.NewManager(ratelimiter.LimitRESTConfig(cfg, *maxReconcileRate), ctrl.Options{ LeaderElection: *leaderElection, LeaderElectionID: "crossplane-leader-election-provider-jet-equinix", Cache: cache.Options{ @@ -87,8 +97,19 @@ func main() { RenewDeadline: func() *time.Duration { d := 50 * time.Second; return &d }(), }) kingpin.FatalIfError(err, "Cannot create controller manager") + + mm := managed.NewMRMetricRecorder() + sm := statemetrics.NewMRStateMetrics() + + metrics.Registry.MustRegister(mm) + metrics.Registry.MustRegister(sm) kingpin.FatalIfError(apis.AddToScheme(mgr.GetScheme()), "Cannot add Equinix APIs to scheme") + mo := xpcontroller.MetricOptions{ + PollStateMetricInterval: *pollStateMetricInterval, + MRMetrics: mm, + MRStateMetrics: sm, + } ctx := context.Background() provider, err := config.GetProvider(ctx, false) kingpin.FatalIfError(err, "Cannot initialize the provider configuration") @@ -96,15 +117,17 @@ func main() { Options: xpcontroller.Options{ Logger: log, GlobalRateLimiter: ratelimiter.NewGlobal(*maxReconcileRate), - PollInterval: 1 * time.Minute, - MaxConcurrentReconciles: 1, + PollInterval: *pollInterval, + MaxConcurrentReconciles: *maxReconcileRate, Features: &feature.Flags{}, + MetricOptions: &mo, }, Provider: provider, // use the following WorkspaceStoreOption to enable the shared gRPC mode // terraform.WithProviderRunner(terraform.NewSharedProvider(log, os.Getenv("TERRAFORM_NATIVE_PROVIDER_PATH"), terraform.WithNativeProviderArgs("-debuggable"))) WorkspaceStore: terraform.NewWorkspaceStore(log), SetupFn: clients.TerraformSetupBuilder(provider.TerraformProvider), + PollJitter: pollJitter, OperationTrackerStore: upcontroller.NewOperationStore(log), } @@ -128,6 +151,7 @@ func main() { // Ensure default store config exists. kingpin.FatalIfError(resource.Ignore(kerrors.IsAlreadyExists, mgr.GetClient().Create(ctx, &v1alpha1.StoreConfig{ + TypeMeta: metav1.TypeMeta{}, ObjectMeta: metav1.ObjectMeta{ Name: "default", }, diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 0000000..609a159 --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,46 @@ +package metrics + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + + k8smetrics "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + metricsEquinixApiResponseCodesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "equinix_api_responses_total", + Help: "Number of Equinix API responses by return code and first 5 digits of the token", + }, []string{"service", "method", "code", "account"}) + + metricsEquinixApiResponseCodesLast5m = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "equinix_api_responses_last_5m", + Help: "Number of Equinix API responses by return code and first 5 digits of the token", + }, []string{"service", "method", "code", "account"}) +) + +// SetupMetrics will register the known Prometheus metrics with controller-runtime's metrics registry +func SetupMetrics() error { + k8smetrics.Registry.MustRegister( + metricsEquinixApiResponseCodesTotal, + metricsEquinixApiResponseCodesLast5m, + ) + + go func() { + // Reset the counters every 5 minutes + ticker := time.NewTicker(5 * time.Minute) + for range ticker.C { + metricsEquinixApiResponseCodesLast5m.Reset() + } + }() + + return nil +} + +// IncEquinixAPIResp will increment the equinix_api_responses_total metric for the specified service, operation, and responseCode tuple +func IncEquinixAPIResp(service, method, code, account string) error { + metricsEquinixApiResponseCodesLast5m.WithLabelValues(service, method, code, account).Inc() + metricsEquinixApiResponseCodesTotal.WithLabelValues(service, method, code, account).Inc() + return nil +}