From 1a8c6ce4cd98db83e5562ec4e96a116e0ee756bc Mon Sep 17 00:00:00 2001 From: Stephen O'Dwyer <67125983+angrieralien@users.noreply.github.com> Date: Fri, 28 Apr 2023 11:47:41 -0400 Subject: [PATCH] adds funcs to metric.go to handle audit metrics --- cmd/app.go | 61 +++++++++++++++++++++++++++---------- internal/metrics/metrics.go | 43 +++++++++++++++++++++----- 2 files changed, 81 insertions(+), 23 deletions(-) diff --git a/cmd/app.go b/cmd/app.go index 4d51eea9..fc6f1cda 100644 --- a/cmd/app.go +++ b/cmd/app.go @@ -6,6 +6,7 @@ import ( "fmt" "net/http" "os" + "strconv" "time" "github.com/go-logr/zapr" @@ -47,16 +48,17 @@ const ( ) type appConfig struct { - bootID string - auditlogpath string - auditLogDirPath string - enableMetrics bool - enableHealthz bool - enableAuditMetrics bool - auditMetricsSecondsInterval int - httpServerReadTimeout time.Duration - httpServerReadHeaderTimeout time.Duration - logLevel zapcore.Level + bootID string + auditlogpath string + auditLogDirPath string + enableMetrics bool + enableHealthz bool + enableAuditMetrics bool + auditMetricsSecondsInterval int + auditLogWriteTimeSecondThreshold int + httpServerReadTimeout time.Duration + httpServerReadHeaderTimeout time.Duration + logLevel zapcore.Level } func parseFlags(osArgs []string) (*appConfig, error) { @@ -75,12 +77,20 @@ func parseFlags(osArgs []string) (*appConfig, error) { flagSet.BoolVar(&config.enableMetrics, "metrics", false, "Enable Prometheus HTTP /metrics server") flagSet.BoolVar(&config.enableHealthz, "healthz", false, "Enable HTTP health endpoints server") flagSet.BoolVar(&config.enableAuditMetrics, "auditMetrics", false, "Enable Prometheus audit.log metrics") - flagSet.IntVar(&config.auditMetricsSecondsInterval, "audit-seconds-interval", 15, "Number of seconds to collect audit metrics") - flagSet.IntVar(&config.auditLogLastModifyInterval, "audit-log-last-modify-interval", false, "Number of seconds to collect audit metrics") flagSet.DurationVar(&config.httpServerReadTimeout, "http-server-read-timeout", DefaultHTTPServerReadTimeout, "HTTP server read timeout") flagSet.DurationVar(&config.httpServerReadHeaderTimeout, "http-server-read-header-timeout", DefaultHTTPServerReadHeaderTimeout, "HTTP server read header timeout") + flagSet.IntVar( + &config.auditMetricsSecondsInterval, + "audit-seconds-interval", + 15, + "Number of seconds to collect audit metrics") + flagSet.IntVar( + &config.auditLogWriteTimeSecondThreshold, + "audit-log-last-modify-seconds-threshold", + 86400, + "maximum second diff between current date and last modify time") flagSet.Usage = func() { os.Stderr.WriteString(usage) @@ -177,6 +187,11 @@ func Run(ctx context.Context, osArgs []string, h *health.Health, optLoggerConfig logins := make(chan common.RemoteUserLogin) pprov := metrics.NewPrometheusMetricsProvider() + handleAuditLogMetrics(groupCtx, eg, + pprov, + appCfg.auditMetricsSecondsInterval, + appCfg.auditLogWriteTimeSecondThreshold, + ) runProcessorsForSSHLogins(groupCtx, logins, eg, distro, mid, nodename, appCfg.bootID, lastReadJournalTS, eventWriter, h, pprov) @@ -332,20 +347,34 @@ func lastReadJournalTimeStamp() uint64 { return lastRead } -func collectAuditLogMetrics( +func handleAuditLogMetrics( + ctx context.Context, eg *errgroup.Group, pprov *metrics.PrometheusMetricsProvider, auditMetricsSecondsInterval int, + auditLogWriteTimeSecondThreshold int, ) { + eg.Go(func() error { tickChan := time.NewTicker(time.Second * time.Duration(auditMetricsSecondsInterval)).C for { select { case <-tickChan: - + s, err := os.Stat("/var/log/audit/audit.log") + if err != nil { + return fmt.Errorf("error stat-ing /var/log/audit/audit.log") + } + + if time.Now().Sub(s.ModTime()).Seconds() > float64(auditLogWriteTimeSecondThreshold) { + pprov.SetAuditLogCheck(0, strconv.Itoa(auditLogWriteTimeSecondThreshold)) + } else { + pprov.SetAuditLogCheck(1, strconv.Itoa(auditLogWriteTimeSecondThreshold)) + } + + pprov.SetAuditLogModifyTime(float64(s.ModTime().Unix())) + case <-ctx.Done(): + return ctx.Err() } } - - return nil }) } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index b310b243..a666ce85 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -7,8 +7,10 @@ import ( // PrometheusMetricsProvider is a metrics provider that uses Prometheus. type PrometheusMetricsProvider struct { - remoteLogins *prometheus.CounterVec - errors *prometheus.CounterVec + auditLogCheck *prometheus.GaugeVec + auditLogModifyTime *prometheus.GaugeVec + errors *prometheus.CounterVec + remoteLogins *prometheus.CounterVec } // NewPrometheusMetricsProvider returns a new PrometheusMetricsProvider. @@ -28,14 +30,23 @@ func NewPrometheusMetricsProvider() *PrometheusMetricsProvider { // - For more information about the labels, see the `ErrorType` func NewPrometheusMetricsProviderForRegisterer(r prometheus.Registerer) *PrometheusMetricsProvider { p := &PrometheusMetricsProvider{ - remoteLogins: prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "remote_logins_total", + auditLogCheck: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "audit_log_check", Namespace: MetricsNamespace, - Help: "The total number of remote logins.", + Help: "Checks audit.log is being written to. 0 for negative, 1 for positive", }, - []string{"method", "outcome"}, + []string{"threshold_time_in_seconds"}, ), + auditLogModifyTime: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "audit_log_modify_time", + Namespace: MetricsNamespace, + Help: "Sets audit.log last modify time", + }, + []string{}, + ), + errors: prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "errors_total", @@ -44,6 +55,14 @@ func NewPrometheusMetricsProviderForRegisterer(r prometheus.Registerer) *Prometh }, []string{"type"}, ), + remoteLogins: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "remote_logins_total", + Namespace: MetricsNamespace, + Help: "The total number of remote logins.", + }, + []string{"method", "outcome"}, + ), } // This is variadic function so we can pass as many metrics as we want @@ -60,3 +79,13 @@ func (p *PrometheusMetricsProvider) IncLogins(loginType LoginType, outcome Outco func (p *PrometheusMetricsProvider) IncErrors(errorType ErrorType) { p.errors.WithLabelValues(string(errorType)).Inc() } + +// SetAuditCheck sets status of audit.log writes. 0 for negative, 1 for positive. +func (p *PrometheusMetricsProvider) SetAuditLogCheck(result float64, threshold string) { + p.auditLogCheck.WithLabelValues(threshold).Set(result) +} + +// SetAuditLogModifyTime sets last modify time in seconds. +func (p *PrometheusMetricsProvider) SetAuditLogModifyTime(result float64) { + p.auditLogModifyTime.WithLabelValues().Set(result) +}