From b094671cb123077ceda02abcb13a7626366ab85b Mon Sep 17 00:00:00 2001 From: Ales Stimec Date: Tue, 11 Jun 2024 11:15:46 +0200 Subject: [PATCH] Initial monitoring cleanup. --- internal/auth/oauth2.go | 38 +++++++- internal/jimm/watcher.go | 2 + internal/jujuapi/modelmanager.go | 17 ++-- internal/servermon/monitoring.go | 158 ++++--------------------------- internal/servermon/request.go | 29 ------ 5 files changed, 62 insertions(+), 182 deletions(-) delete mode 100644 internal/servermon/request.go diff --git a/internal/auth/oauth2.go b/internal/auth/oauth2.go index a02780767..daf9e355d 100644 --- a/internal/auth/oauth2.go +++ b/internal/auth/oauth2.go @@ -30,6 +30,7 @@ import ( "github.com/canonical/jimm/api/params" "github.com/canonical/jimm/internal/dbmodel" "github.com/canonical/jimm/internal/errors" + "github.com/canonical/jimm/internal/servermon" ) const ( @@ -305,8 +306,20 @@ func (as *AuthenticationService) MintSessionToken(email string, secretKey string } // VerifySessionToken calls the exported VerifySessionToken function. -func (as *AuthenticationService) VerifySessionToken(token string, secretKey string) (jwt.Token, error) { - return VerifySessionToken(token, secretKey) +func (as *AuthenticationService) VerifySessionToken(token string, secretKey string) (_ jwt.Token, err error) { + defer func() { + if err != nil { + servermon.AuthenticationFailCount.WithLabelValues("VerifySessionToken").Inc() + } else { + servermon.AuthenticationSuccessCount.WithLabelValues("VerifySessionToken").Inc() + } + }() + + jwt, err := VerifySessionToken(token, secretKey) + if err != nil { + return nil, errors.E(err) + } + return jwt, nil } // UpdateIdentity updates the database with the display name and access token set for the user. @@ -377,7 +390,15 @@ func VerifySessionToken(token string, secretKey string) (jwt.Token, error) { } // VerifyClientCredentials verifies the provided client ID and client secret. -func (as *AuthenticationService) VerifyClientCredentials(ctx context.Context, clientID string, clientSecret string) error { +func (as *AuthenticationService) VerifyClientCredentials(ctx context.Context, clientID string, clientSecret string) (err error) { + defer func() { + if err != nil { + servermon.AuthenticationFailCount.WithLabelValues("VerifyClientCredentials").Inc() + } else { + servermon.AuthenticationSuccessCount.WithLabelValues("VerifyClientCredentials").Inc() + } + }() + cfg := clientcredentials.Config{ ClientID: clientID, ClientSecret: clientSecret, @@ -386,7 +407,7 @@ func (as *AuthenticationService) VerifyClientCredentials(ctx context.Context, cl AuthStyle: oauth2.AuthStyle(as.oauthConfig.Endpoint.AuthStyle), } - _, err := cfg.Token(ctx) + _, err = cfg.Token(ctx) if err != nil { zapctx.Error(ctx, "client credential verification failed", zap.Error(err)) return errors.E(errors.CodeUnauthorized, "invalid client credentials") @@ -425,8 +446,15 @@ func (as *AuthenticationService) CreateBrowserSession( // AuthenticateBrowserSession updates the session for a browser, additionally // retrieving new access tokens upon expiry. If this cannot be done, the cookie // is deleted and an error is returned. -func (as *AuthenticationService) AuthenticateBrowserSession(ctx context.Context, w http.ResponseWriter, req *http.Request) (context.Context, error) { +func (as *AuthenticationService) AuthenticateBrowserSession(ctx context.Context, w http.ResponseWriter, req *http.Request) (_ context.Context, err error) { const op = errors.Op("auth.AuthenticationService.AuthenticateBrowserSession") + defer func() { + if err != nil { + servermon.AuthenticationFailCount.WithLabelValues("AuthenticateBrowserSession").Inc() + } else { + servermon.AuthenticationSuccessCount.WithLabelValues("AuthenticateBrowserSession").Inc() + } + }() session, err := as.sessionStore.Get(req, SessionName) if err != nil { diff --git a/internal/jimm/watcher.go b/internal/jimm/watcher.go index 2ec0e4bcc..e321d6082 100644 --- a/internal/jimm/watcher.go +++ b/internal/jimm/watcher.go @@ -17,6 +17,7 @@ import ( "github.com/canonical/jimm/internal/db" "github.com/canonical/jimm/internal/dbmodel" "github.com/canonical/jimm/internal/errors" + "github.com/canonical/jimm/internal/servermon" ) // Publisher defines the interface used by the Watcher @@ -353,6 +354,7 @@ func (w *Watcher) watchController(ctx context.Context, ctl *dbmodel.Controller) if err != nil { return errors.E(op, err) } + servermon.MonitorDeltasReceivedCount.WithLabelValues(ctl.UUID).Add(float64(len(deltas))) for _, d := range deltas { eid := d.Entity.EntityId() ctx := zapctx.WithFields(ctx, zap.String("model-uuid", eid.ModelUUID), zap.String("kind", eid.Kind), zap.String("id", eid.Id)) diff --git a/internal/jujuapi/modelmanager.go b/internal/jujuapi/modelmanager.go index b012fae61..3587db3fa 100644 --- a/internal/jujuapi/modelmanager.go +++ b/internal/jujuapi/modelmanager.go @@ -153,15 +153,16 @@ func (r *controllerRoot) CreateModel(ctx context.Context, args jujuparams.ModelC return jujuparams.ModelInfo{}, errors.E(op, err) } info, err := r.jimm.AddModel(ctx, r.user, &mca) - if err == nil { - servermon.ModelsCreatedCount.Inc() - if r.controllerUUIDMasking { - info.ControllerUUID = r.params.ControllerUUID - } - return *info, nil + if err != nil { + servermon.ModelsCreatedFailCount.Inc() + return jujuparams.ModelInfo{}, errors.E(op, err) + } + + servermon.ModelsCreatedCount.Inc() + if r.controllerUUIDMasking { + info.ControllerUUID = r.params.ControllerUUID } - servermon.ModelsCreatedFailCount.Inc() - return jujuparams.ModelInfo{}, errors.E(op, err) + return *info, nil } // DestroyModels implements the ModelManager facade's DestroyModels diff --git a/internal/servermon/monitoring.go b/internal/servermon/monitoring.go index 77fda74bf..01d29662a 100644 --- a/internal/servermon/monitoring.go +++ b/internal/servermon/monitoring.go @@ -5,171 +5,64 @@ package servermon import ( - "github.com/juju/mgomonitor" "github.com/prometheus/client_golang/prometheus" ) var ( + AuthenticationFailCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "jimm", + Name: "auth", + Help: "The number of failed authentications.", + }, []string{"method"}) + AuthenticationSuccessCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "jimm", + Name: "auth", + Help: "The number of successful authentications.", + }, []string{"method"}) QueryTimeAuditLogCleanUpHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ - Namespace: "jem", + Namespace: "jimm", Name: "db_query_audit_clean_up_duration_seconds", Help: "Histogram of query time for audit_log clean up in seconds", Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, }) - AuthenticationFailCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "auth", - Name: "authentication_fail", - Help: "The number of failed authentications.", - }) - AuthenticationSuccessCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "auth", - Name: "authentication_success", - Help: "The number of successful authentications.", - }) - AuthenticatorPoolGet = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "auth", - Name: "pool_get", - Help: "The number of times an Authenticator has been retrieved from the pool.", - }) - AuthenticatorPoolNew = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "auth", - Name: "pool_new", - Help: "The number of times a new Authenticator has been created by the pool.", - }) - AuthenticatorPoolPut = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "auth", - Name: "pool_put", - Help: "The number of times an Authenticator has been replaced into the pool.", - }) ConcurrentWebsocketConnections = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: "jem", + Namespace: "jimm", Subsystem: "websocket", Name: "concurrent_connections", Help: "The number of concurrent websocket connections", }) - DatabaseFailCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "database", - Name: "fail_count", - Help: "The number of times a database error was considered fatal.", - }) - DeployedUnitCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "websocket", - Name: "deployed_unit_count", - Help: "The number of deployed units.", - }) - LoginFailCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "websocket", - Name: "login_fail_count", - Help: "The number of failed logins attempted.", - }) - LoginRedirectCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "websocket", - Name: "login_redirect_count", - Help: "The number of logins redirected to another controller.", - }) - LoginSuccessCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "websocket", - Name: "login_success_count", - Help: "The number of successful logins completed.", - }) - ModelLifetime = prometheus.NewHistogram(prometheus.HistogramOpts{ - Namespace: "jem", - Subsystem: "health", - Name: "model_lifetime", - Help: "The length of time (in hours) models had existed at the point they are destroyed.", - // Buckets are in hours for this histogram. - Buckets: []float64{ - 1.0 / 6, - 1.0 / 2, - 1, - 6, - 24, - 7 * 24, - 28 * 24, - 6 * 28 * 24, - 365 * 24, - 2 * 365 * 24, - 5 * 365 * 24, - }, - }) ModelsCreatedCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", + Namespace: "jimm", Subsystem: "websocket", Name: "models_created_count", Help: "The number of models created.", }) ModelsCreatedFailCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", + Namespace: "jimm", Subsystem: "websocket", Name: "models_created_fail_count", Help: "The number of fails attempting to create models.", }) - ModelsDestroyedCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "websocket", - Name: "models_destroyed_count", - Help: "The number of models destroyed.", - }) MonitorDeltasReceivedCount = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: "jem", + Namespace: "jimm", Subsystem: "monitor", Name: "deltas_received_count", Help: "The number of watcher deltas received.", }, []string{"controller"}) - MonitorDeltaBatchesReceivedCount = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "monitor", - Name: "delta_batches_received_count", - Help: "The number of watcher delta batches received.", - }, []string{"controller"}) MonitorErrorsCount = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: "jem", + Namespace: "jimm", Subsystem: "monitor", Name: "errors_count", Help: "The number of monitoring errors found.", }, []string{"controller"}) - MonitorLeaseGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "jem", - Subsystem: "monitor", - Name: "lease_gauge", - Help: "The number of current monitor leases held", - }, []string{"controller"}) - requestDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{ - Namespace: "jem", - Subsystem: "handler", - Name: "request_duration", - Help: "The duration of a web request in seconds.", - }, []string{"path_pattern"}) - StatsCollectFailCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "health", - Name: "stats_collect_fail_count", - Help: "The number of times we failed to collect stats from mongo.", - }) VaultConfigured = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: "jem", + Namespace: "jimm", Subsystem: "vault", Name: "configured", Help: "Indicator that a vault is configured.", }) - VaultSecretRefreshes = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "jem", - Subsystem: "vault", - Name: "secret_refreshes", - Help: "The number of times the secret has been refreshed.", - }) WebsocketRequestDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{ - Namespace: "jem", + Namespace: "jimm", Subsystem: "websocket", Name: "request_duration", Help: "The duration of a websocket request in seconds.", @@ -179,26 +72,11 @@ var ( func init() { prometheus.MustRegister(AuthenticationFailCount) prometheus.MustRegister(AuthenticationSuccessCount) - prometheus.MustRegister(AuthenticatorPoolGet) - prometheus.MustRegister(AuthenticatorPoolNew) - prometheus.MustRegister(AuthenticatorPoolPut) prometheus.MustRegister(ConcurrentWebsocketConnections) - prometheus.MustRegister(DatabaseFailCount) - prometheus.MustRegister(DeployedUnitCount) - prometheus.MustRegister(LoginFailCount) - prometheus.MustRegister(LoginRedirectCount) - prometheus.MustRegister(LoginSuccessCount) - prometheus.MustRegister(ModelLifetime) prometheus.MustRegister(ModelsCreatedCount) prometheus.MustRegister(ModelsCreatedFailCount) prometheus.MustRegister(MonitorDeltasReceivedCount) - prometheus.MustRegister(MonitorDeltaBatchesReceivedCount) prometheus.MustRegister(MonitorErrorsCount) - prometheus.MustRegister(MonitorLeaseGauge) - prometheus.MustRegister(requestDuration) - prometheus.MustRegister(StatsCollectFailCount) prometheus.MustRegister(VaultConfigured) - prometheus.MustRegister(VaultSecretRefreshes) prometheus.MustRegister(WebsocketRequestDuration) - prometheus.MustRegister(mgomonitor.NewCollector("jem")) } diff --git a/internal/servermon/request.go b/internal/servermon/request.go deleted file mode 100644 index 1f9cba9ae..000000000 --- a/internal/servermon/request.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2016 Canonical Ltd. - -package servermon - -import ( - "time" -) - -// Request represents an API request that is being monitored. -// A request can only be used for a single API request at -// any one time. -type Request struct { - startTime time.Time - label string -} - -// Start should be called when an API request starts. -// The path holds the URL path to the API request. -func (r *Request) Start(path string) { - r.label = path - r.startTime = time.Now() -} - -// End should be called when the API request completes. -// The Request value may then be reused for another -// API request. -func (r *Request) End() { - requestDuration.WithLabelValues(r.label).Observe(float64(time.Since(r.startTime)) / float64(time.Second)) -}