From f05789c287c3832b1a4464ee568e4f928da1e72b Mon Sep 17 00:00:00 2001 From: Martin Hansen Date: Mon, 15 Jul 2024 11:36:59 +0200 Subject: [PATCH 1/4] rewriting event log metrics --- README.md | 8 +++---- internal/collector/client.go | 41 ++++++++++++++++++++++++++------- internal/collector/collector.go | 14 +++++------ internal/collector/metrics.go | 25 ++++++++++---------- internal/collector/model.go | 27 +++++++++++----------- internal/config/config.go | 26 +++++++++++++++++++++ internal/config/env.go | 4 +++- internal/config/model.go | 10 +++++++- sample-config.yml | 13 +++++++---- 9 files changed, 116 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 63e3a2c..18b95d0 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ metrics: system: true sensors: true power: true - sel: false + events: false storage: false memory: false network: false @@ -125,11 +125,11 @@ idrac_power_control_avg_consumed_watts{id="0",name="System Power Control"} 166 idrac_power_control_interval_in_minutes{id="0",name="System Power Control"} 1 ``` -### System Event Log -On iDRAC only, the system event log can also be exported. This is not exactly an ordinary metric, but it is often convenient to be informed about new entries in the event log. The value of this metric is the unix timestamp for when the entry was created (as reported by iDRAC). +### Event Log +This is not exactly an ordinary metric, but it is often convenient to be informed about new entries in the event log. The value of this metric is the unix timestamp for when the entry was created. ```text -idrac_sel_entry{id="1",message="The process of installing an operating system or hypervisor is successfully completed",component="BaseOSBoot/InstallationStatus",severity="OK"} 1631175352 +idrac_log_entry{id="1",message="The process of installing an operating system or hypervisor is successfully completed",severity="OK"} 1631175352 ``` ### Storage diff --git a/internal/collector/client.go b/internal/collector/client.go index 344db38..35fcd20 100644 --- a/internal/collector/client.go +++ b/internal/collector/client.go @@ -38,6 +38,7 @@ type Client struct { storagePath string memoryPath string networkPath string + eventPath string } func newHttpClient() *http.Client { @@ -137,6 +138,16 @@ func (client *Client) findAllEndpoints() error { } } + // Path for event log + switch client.vendor { + case DELL: + client.eventPath = "/redfish/v1/Managers/iDRAC.Embedded.1/Logs/Sel" + case LENOVO: + client.eventPath = "/redfish/v1/Systems/1/LogServices/PlatformLog/Entries" + case HPE: + client.eventPath = "/redfish/v1/Systems/1/LogServices/IML/Entries" + } + return nil } @@ -287,23 +298,37 @@ func (client *Client) RefreshPower(mc *Collector, ch chan<- prometheus.Metric) e return nil } -func (client *Client) RefreshIdracSel(mc *Collector, ch chan<- prometheus.Metric) error { - if client.vendor != DELL { +func (client *Client) RefreshEventLog(mc *Collector, ch chan<- prometheus.Metric) error { + if client.eventPath == "" { return nil } - resp := IdracSelResponse{} - err := client.redfishGet(redfishRootPath+"/Managers/iDRAC.Embedded.1/Logs/Sel", &resp) + resp := EventLogResponse{} + err := client.redfishGet(client.eventPath, &resp) if err != nil { return err } + level := config.Config.Event.SeverityLevel + maxage := config.Config.Event.MaxAgeSeconds + for _, e := range resp.Members { - st := string(e.SensorType) - if st == "" { - st = "Unknown" + t, err := time.Parse(time.RFC3339, e.Created) + if err != nil { + continue + } + + d := time.Since(t) + if d.Seconds() > maxage { + continue + } + + severity := health2value(e.Severity) + if severity < level { + continue } - ch <- mc.NewSelEntry(e.Id, e.Message, st, e.Severity, e.Created) + + ch <- mc.NewEventLogEntry(e.Id, e.Message, e.Severity, t) } return nil diff --git a/internal/collector/collector.go b/internal/collector/collector.go index 159f5df..071aabd 100644 --- a/internal/collector/collector.go +++ b/internal/collector/collector.go @@ -61,7 +61,7 @@ type Collector struct { PowerControlInterval *prometheus.Desc // System event log - SelEntry *prometheus.Desc + EventLogEntry *prometheus.Desc // Disk drives DriveInfo *prometheus.Desc @@ -210,10 +210,10 @@ func NewCollector() *Collector { "Interval for measurements of power control system", []string{"id", "name"}, nil, ), - SelEntry: prometheus.NewDesc( - prometheus.BuildFQName(prefix, "sel", "entry"), + EventLogEntry: prometheus.NewDesc( + prometheus.BuildFQName(prefix, "log", "entry"), "Entry from the system event log", - []string{"id", "message", "component", "severity"}, nil, + []string{"id", "message", "severity"}, nil, ), DriveInfo: prometheus.NewDesc( prometheus.BuildFQName(prefix, "drive", "info"), @@ -310,7 +310,7 @@ func (collector *Collector) Describe(ch chan<- *prometheus.Desc) { ch <- collector.PowerControlMaxConsumedWatts ch <- collector.PowerControlAvgConsumedWatts ch <- collector.PowerControlInterval - ch <- collector.SelEntry + ch <- collector.EventLogEntry ch <- collector.DriveInfo ch <- collector.DriveHealth ch <- collector.DriveCapacity @@ -372,10 +372,10 @@ func (collector *Collector) Collect(ch chan<- prometheus.Metric) { }() } - if config.Config.Collect.SEL { + if config.Config.Collect.Events { wg.Add(1) go func() { - err := collector.client.RefreshIdracSel(collector, ch) + err := collector.client.RefreshEventLog(collector, ch) if err != nil { collector.errors.Add(1) } diff --git a/internal/collector/metrics.go b/internal/collector/metrics.go index cd253c4..1426869 100644 --- a/internal/collector/metrics.go +++ b/internal/collector/metrics.go @@ -8,7 +8,7 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -func health2value(health string) float64 { +func health2value(health string) int { switch health { case "OK": return 0 @@ -20,7 +20,7 @@ func health2value(health string) float64 { return 10 } -func linkstatus2value(status string) float64 { +func linkstatus2value(status string) int { switch status { case "Up", "LinkUp": return 1 @@ -45,7 +45,7 @@ func (mc *Collector) NewSystemHealth(health string) prometheus.Metric { return prometheus.MustNewConstMetric( mc.SystemHealth, prometheus.GaugeValue, - value, + float64(value), health, ) } @@ -117,7 +117,7 @@ func (mc *Collector) NewSensorsFanHealth(id, name, health string) prometheus.Met return prometheus.MustNewConstMetric( mc.SensorsFanHealth, prometheus.GaugeValue, - value, + float64(value), id, name, health, @@ -140,7 +140,7 @@ func (mc *Collector) NewPowerSupplyHealth(health, id string) prometheus.Metric { return prometheus.MustNewConstMetric( mc.PowerSupplyHealth, prometheus.GaugeValue, - value, + float64(value), id, health, ) @@ -251,14 +251,13 @@ func (mc *Collector) NewPowerControlInterval(interval int, id, name string) prom ) } -func (mc *Collector) NewSelEntry(id string, message string, component string, severity string, created time.Time) prometheus.Metric { +func (mc *Collector) NewEventLogEntry(id string, message string, severity string, created time.Time) prometheus.Metric { return prometheus.MustNewConstMetric( - mc.SelEntry, + mc.EventLogEntry, prometheus.CounterValue, float64(created.Unix()), id, message, - component, severity, ) } @@ -292,7 +291,7 @@ func (mc *Collector) NewDriveHealth(id, health string) prometheus.Metric { return prometheus.MustNewConstMetric( mc.DriveHealth, prometheus.GaugeValue, - value, + float64(value), id, health, ) @@ -336,7 +335,7 @@ func (mc *Collector) NewMemoryModuleHealth(id, health string) prometheus.Metric return prometheus.MustNewConstMetric( mc.MemoryModuleHealth, prometheus.GaugeValue, - value, + float64(value), id, health, ) @@ -365,7 +364,7 @@ func (mc *Collector) NewNetworkInterfaceHealth(id, health string) prometheus.Met return prometheus.MustNewConstMetric( mc.NetworkInterfaceHealth, prometheus.GaugeValue, - value, + float64(value), id, health, ) @@ -376,7 +375,7 @@ func (mc *Collector) NewNetworkPortHealth(iface, id, health string) prometheus.M return prometheus.MustNewConstMetric( mc.NetworkPortHealth, prometheus.GaugeValue, - value, + float64(value), iface, id, health, @@ -398,7 +397,7 @@ func (mc *Collector) NewNetworkPortLinkUp(iface, id, status string) prometheus.M return prometheus.MustNewConstMetric( mc.NetworkPortLinkUp, prometheus.GaugeValue, - value, + float64(value), iface, id, status, diff --git a/internal/collector/model.go b/internal/collector/model.go index e5e2193..51d9bb5 100644 --- a/internal/collector/model.go +++ b/internal/collector/model.go @@ -2,7 +2,6 @@ package collector import ( "strconv" - "time" ) const ( @@ -472,21 +471,21 @@ func (psu *PowerSupplyUnit) GetOutputPower() float64 { return psu.LastPowerOutputWatts } -type IdracSelResponse struct { +type EventLogResponse struct { Name string `json:"Name"` Description string `json:"Description"` Members []struct { - Id string `json:"Id"` - Name string `json:"Name"` - Created time.Time `json:"Created"` - Description string `json:"Description"` - EntryCode xstring `json:"EntryCode"` - EntryType string `json:"EntryType"` - Message string `json:"Message"` - MessageArgs []any `json:"MessageArgs"` - MessageId string `json:"MessageId"` - SensorNumber int `json:"SensorNumber"` - SensorType xstring `json:"SensorType"` - Severity string `json:"Severity"` + Id string `json:"Id"` + Name string `json:"Name"` + Created string `json:"Created"` + Description string `json:"Description"` + EntryCode xstring `json:"EntryCode"` + EntryType string `json:"EntryType"` + Message string `json:"Message"` + MessageArgs []any `json:"MessageArgs"` + MessageId string `json:"MessageId"` + SensorNumber int `json:"SensorNumber"` + SensorType xstring `json:"SensorType"` + Severity string `json:"Severity"` } `json:"Members"` } diff --git a/internal/config/config.go b/internal/config/config.go index 122af28..b2353a6 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -3,6 +3,8 @@ package config import ( "math" "os" + "strings" + "time" "github.com/mrlhansen/idrac_exporter/internal/log" "gopkg.in/yaml.v2" @@ -54,6 +56,7 @@ func ReadConfig(filename string) { readConfigEnv() + // main section if Config.Address == "" { Config.Address = "0.0.0.0" } @@ -74,6 +77,7 @@ func ReadConfig(filename string) { Config.MetricsPrefix = "idrac" } + // hosts section if len(Config.Hosts) == 0 { log.Fatal("Invalid configuration: empty section: hosts") } @@ -87,4 +91,26 @@ func ReadConfig(filename string) { } v.Hostname = k } + + // events section + switch strings.ToLower(Config.Event.Severity) { + case "ok": + Config.Event.SeverityLevel = 0 + case "warning", "": + Config.Event.SeverityLevel = 1 + case "critical": + Config.Event.SeverityLevel = 2 + default: + log.Fatal("Invalid configuration: invalid value: %s", Config.Event.Severity) + } + + if Config.Event.MaxAge == "" { + Config.Event.MaxAge = "168h" + } + + t, err := time.ParseDuration(Config.Event.MaxAge) + if err != nil { + log.Fatal("Invalid configuration: unable to parse duration: %v", err) + } + Config.Event.MaxAgeSeconds = t.Seconds() } diff --git a/internal/config/env.go b/internal/config/env.go index aac5189..c131b91 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -49,6 +49,8 @@ func readConfigEnv() { getEnvString("CONFIG_METRICS_PREFIX", &Config.MetricsPrefix) getEnvString("CONFIG_DEFAULT_USERNAME", &username) getEnvString("CONFIG_DEFAULT_PASSWORD", &password) + getEnvString("CONFIG_EVENT_SEVERITY", &Config.Event.Severity) + getEnvString("CONFIG_EVENT_MAXAGE", &Config.Event.MaxAge) getEnvUint("CONFIG_PORT", &Config.Port) getEnvUint("CONFIG_TIMEOUT", &Config.Timeout) @@ -56,7 +58,7 @@ func readConfigEnv() { getEnvBool("CONFIG_METRICS_SYSTEM", &Config.Collect.System) getEnvBool("CONFIG_METRICS_SENSORS", &Config.Collect.Sensors) - getEnvBool("CONFIG_METRICS_SEL", &Config.Collect.SEL) + getEnvBool("CONFIG_METRICS_EVENTS", &Config.Collect.Events) getEnvBool("CONFIG_METRICS_POWER", &Config.Collect.Power) getEnvBool("CONFIG_METRICS_STORAGE", &Config.Collect.Storage) getEnvBool("CONFIG_METRICS_MEMORY", &Config.Collect.Memory) diff --git a/internal/config/model.go b/internal/config/model.go index fc93d5b..6303c22 100644 --- a/internal/config/model.go +++ b/internal/config/model.go @@ -11,19 +11,27 @@ type HostConfig struct { type CollectConfig struct { System bool `yaml:"system"` Sensors bool `yaml:"sensors"` - SEL bool `yaml:"sel"` + Events bool `yaml:"events"` Power bool `yaml:"power"` Storage bool `yaml:"storage"` Memory bool `yaml:"memory"` Network bool `yaml:"network"` } +type EventConfig struct { + Severity string `yaml:"severity"` + MaxAge string `yaml:"maxage"` + SeverityLevel int + MaxAgeSeconds float64 +} + type RootConfig struct { mutex sync.Mutex Address string `yaml:"address"` Port uint `yaml:"port"` MetricsPrefix string `yaml:"metrics_prefix"` Collect CollectConfig `yaml:"metrics"` + Event EventConfig `yaml:"events"` Timeout uint `yaml:"timeout"` Retries uint `yaml:"retries"` Hosts map[string]*HostConfig `yaml:"hosts"` diff --git a/sample-config.yml b/sample-config.yml index 18373b3..87d8e11 100644 --- a/sample-config.yml +++ b/sample-config.yml @@ -51,14 +51,19 @@ hosts: # The metrics section is used to select different groups of metrics. # See the README file for a detailed list of metrics in each group. # Each section can also be enabled using an environment variable. -# -# The "sel" section (System Event Log) only works on iDRAC, but -# the remaining sections work for all supported machine types. metrics: system: false # CONFIG_METRICS_SYSTEM=true sensors: false # CONFIG_METRICS_SENSORS=true power: false # CONFIG_METRICS_POWER=true - sel: false # CONFIG_METRICS_SEL=true + events: false # CONFIG_METRICS_EVENTS=true storage: false # CONFIG_METRICS_STORAGE=true memory: false # CONFIG_METRICS_MEMORY=true network: false # CONFIG_METRICS_NETWORK=true + +# The events section is used for filtering events and it is only needed when +# the "events" metrics are enabled. Events can be filtered based on minimum +# severity and maximum age. +# Severity must be one of "ok", "warning", "critical" +events: + severity: warning # CONFIG_EVENTS_SEVERITY=warning + maxage: 168h # CONFIG_EVENTS_MAXAGE=168h From 71f2da582675cd771dbaf4e2781e6e663f546926 Mon Sep 17 00:00:00 2001 From: Martin Hansen Date: Tue, 16 Jul 2024 12:55:12 +0200 Subject: [PATCH 2/4] various fixes --- README.md | 4 ++-- charts/idrac-exporter/values.yaml | 3 +-- go.mod | 1 + go.sum | 2 ++ idrac.yml.template | 3 +-- internal/collector/client.go | 21 +++++++++++---------- internal/config/config.go | 6 +++--- internal/config/env.go | 4 ++-- sample-config.yml | 7 +++---- 9 files changed, 26 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 18b95d0..d2b64d9 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ metrics: As shown in the above example, under `hosts` you can specify login information for individual hosts via their IP address, otherwise the exporter will attempt to use the login information under `default`. The login user only needs read-only permissions. Under `metrics` you can select what kind of metrics that should be returned, as described in more detail below. -For a detailed description of the configuration, please see the [sample-config.yml](sample-config.yml) file. +**For a detailed description of the configuration, please see the [sample-config.yml](sample-config.yml) file.** Because the metrics are collected on-demand it can take several minutes to scrape the metrics endpoint, depending on how many metrics groups are selected in the configuration file. For this reason you should carefully select the metrics of interest and make sure Prometheus is configured with a sufficiently high scrape timeout value. @@ -125,7 +125,7 @@ idrac_power_control_avg_consumed_watts{id="0",name="System Power Control"} 166 idrac_power_control_interval_in_minutes{id="0",name="System Power Control"} 1 ``` -### Event Log +### System Event Log This is not exactly an ordinary metric, but it is often convenient to be informed about new entries in the event log. The value of this metric is the unix timestamp for when the entry was created. ```text diff --git a/charts/idrac-exporter/values.yaml b/charts/idrac-exporter/values.yaml index 5d5cc51..d45022a 100644 --- a/charts/idrac-exporter/values.yaml +++ b/charts/idrac-exporter/values.yaml @@ -86,7 +86,6 @@ idracConfig: | address: 0.0.0.0 # Running in a container, this makes sense port: 9348 # Listen port timeout: 60 # HTTP timeout (in seconds) for Redfish API calls - retries: 10 # Number of retries before a target is marked as unreachable hosts: default: username: IDRAC_USERNAME @@ -95,7 +94,7 @@ idracConfig: | system: true sensors: true power: true - sel: true # iDRAC only + events: true storage: true memory: true network: true diff --git a/go.mod b/go.mod index 0d215db..dde57d1 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.20 require ( github.com/prometheus/client_golang v1.19.0 github.com/prometheus/common v0.48.0 + github.com/xhit/go-str2duration/v2 v2.1.0 gopkg.in/yaml.v2 v2.4.0 ) diff --git a/go.sum b/go.sum index 8df546f..9f029db 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,8 @@ github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5E github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= +github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= diff --git a/idrac.yml.template b/idrac.yml.template index 65db758..b81cb99 100644 --- a/idrac.yml.template +++ b/idrac.yml.template @@ -1,7 +1,6 @@ address: 0.0.0.0 # Running in a container, this makes sense port: 9348 # Listen port timeout: 60 # HTTP timeout (in seconds) for Redfish API calls -retries: 10 # Number of retries before a target is marked as unreachable hosts: default: username: "$IDRAC_USERNAME" @@ -10,7 +9,7 @@ metrics: system: true sensors: true power: true - sel: true + events: true storage: true memory: true network: true diff --git a/internal/collector/client.go b/internal/collector/client.go index 35fcd20..ee67f73 100644 --- a/internal/collector/client.go +++ b/internal/collector/client.go @@ -124,6 +124,16 @@ func (client *Client) findAllEndpoints() error { client.vendor = H3C } + // Path for event log + switch client.vendor { + case DELL: + client.eventPath = "/redfish/v1/Managers/iDRAC.Embedded.1/Logs/Sel" + case LENOVO: + client.eventPath = "/redfish/v1/Systems/1/LogServices/PlatformLog/Entries" + case HPE: + client.eventPath = "/redfish/v1/Systems/1/LogServices/IML/Entries" + } + // Fix for Inspur bug if client.vendor == INSPUR { client.storagePath = strings.ReplaceAll(client.storagePath, "Storages", "Storage") @@ -134,20 +144,11 @@ func (client *Client) findAllEndpoints() error { if strings.Contains(root.Name, "HP RESTful") { client.memoryPath = "/redfish/v1/Systems/1/Memory/" client.storagePath = "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/" + client.eventPath = "" client.version = 4 } } - // Path for event log - switch client.vendor { - case DELL: - client.eventPath = "/redfish/v1/Managers/iDRAC.Embedded.1/Logs/Sel" - case LENOVO: - client.eventPath = "/redfish/v1/Systems/1/LogServices/PlatformLog/Entries" - case HPE: - client.eventPath = "/redfish/v1/Systems/1/LogServices/IML/Entries" - } - return nil } diff --git a/internal/config/config.go b/internal/config/config.go index b2353a6..2073a0a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -4,9 +4,9 @@ import ( "math" "os" "strings" - "time" "github.com/mrlhansen/idrac_exporter/internal/log" + "github.com/xhit/go-str2duration/v2" "gopkg.in/yaml.v2" ) @@ -105,10 +105,10 @@ func ReadConfig(filename string) { } if Config.Event.MaxAge == "" { - Config.Event.MaxAge = "168h" + Config.Event.MaxAge = "7d" } - t, err := time.ParseDuration(Config.Event.MaxAge) + t, err := str2duration.ParseDuration(Config.Event.MaxAge) if err != nil { log.Fatal("Invalid configuration: unable to parse duration: %v", err) } diff --git a/internal/config/env.go b/internal/config/env.go index c131b91..ab7a9d9 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -49,8 +49,8 @@ func readConfigEnv() { getEnvString("CONFIG_METRICS_PREFIX", &Config.MetricsPrefix) getEnvString("CONFIG_DEFAULT_USERNAME", &username) getEnvString("CONFIG_DEFAULT_PASSWORD", &password) - getEnvString("CONFIG_EVENT_SEVERITY", &Config.Event.Severity) - getEnvString("CONFIG_EVENT_MAXAGE", &Config.Event.MaxAge) + getEnvString("CONFIG_EVENTS_SEVERITY", &Config.Event.Severity) + getEnvString("CONFIG_EVENTS_MAXAGE", &Config.Event.MaxAge) getEnvUint("CONFIG_PORT", &Config.Port) getEnvUint("CONFIG_TIMEOUT", &Config.Timeout) diff --git a/sample-config.yml b/sample-config.yml index 87d8e11..3b35e0f 100644 --- a/sample-config.yml +++ b/sample-config.yml @@ -60,10 +60,9 @@ metrics: memory: false # CONFIG_METRICS_MEMORY=true network: false # CONFIG_METRICS_NETWORK=true -# The events section is used for filtering events and it is only needed when -# the "events" metrics are enabled. Events can be filtered based on minimum -# severity and maximum age. +# The events section is used for filtering events when the "events" metrics group +# is enabled. Events can be filtered based on minimum severity and maximum age. # Severity must be one of "ok", "warning", "critical" events: severity: warning # CONFIG_EVENTS_SEVERITY=warning - maxage: 168h # CONFIG_EVENTS_MAXAGE=168h + maxage: 7d # CONFIG_EVENTS_MAXAGE=7d From fc8b3125b9d35dd91cf49ede99fe5918443b257c Mon Sep 17 00:00:00 2001 From: Martin Hansen Date: Wed, 17 Jul 2024 09:17:41 +0200 Subject: [PATCH 3/4] updated dashboards --- README.md | 2 +- grafana/idrac.json | 27 +++++++-------------------- grafana/idrac_overview.json | 27 +++++++-------------------- internal/collector/collector.go | 2 +- internal/collector/model.go | 1 + 5 files changed, 17 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index d2b64d9..e9e8553 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ idrac_power_control_interval_in_minutes{id="0",name="System Power Control"} 1 This is not exactly an ordinary metric, but it is often convenient to be informed about new entries in the event log. The value of this metric is the unix timestamp for when the entry was created. ```text -idrac_log_entry{id="1",message="The process of installing an operating system or hypervisor is successfully completed",severity="OK"} 1631175352 +idrac_events_log_entry{id="1",message="The process of installing an operating system or hypervisor is successfully completed",severity="OK"} 1631175352 ``` ### Storage diff --git a/grafana/idrac.json b/grafana/idrac.json index 567feb8..bab885e 100644 --- a/grafana/idrac.json +++ b/grafana/idrac.json @@ -25,12 +25,12 @@ "uid": "${datasource}" }, "enable": true, - "expr": "idrac_sel_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 < ${__to} and \nidrac_sel_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 > ${__from}", + "expr": "idrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 < ${__to} and \nidrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 > ${__from}", "iconColor": "purple", "name": "SEL", - "tagKeys": "instance,component,severity", + "tagKeys": "instance,severity", "textFormat": "{{ message }}", - "titleFormat": "{{id}}:{{ component }}", + "titleFormat": "{{ id }}", "useValueForTime": "on" } ] @@ -2644,18 +2644,6 @@ } ] }, - { - "matcher": { - "id": "byName", - "options": "component" - }, - "properties": [ - { - "id": "custom.width", - "value": 217 - } - ] - }, { "matcher": { "id": "byName", @@ -2699,10 +2687,10 @@ }, "editorMode": "code", "exemplar": false, - "expr": "idrac_sel_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 < ${__to} and \nidrac_sel_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 > ${__from}", + "expr": "idrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 < ${__to} and \nidrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 > ${__from}", "format": "table", "instant": true, - "legendFormat": "{{id}} {{component}}: {{message}}", + "legendFormat": "{{ id }}: {{ message }}", "range": false, "refId": "A" } @@ -2737,7 +2725,7 @@ "id": "filterFieldsByName", "options": { "include": { - "pattern": "message|id|component|severity|Value" + "pattern": "message|id|severity|Value" } } }, @@ -2747,10 +2735,9 @@ "excludeByName": {}, "indexByName": { "Value": 0, - "component": 4, "id": 2, "instance": 1, - "message": 5, + "message": 4, "severity": 3 }, "renameByName": { diff --git a/grafana/idrac_overview.json b/grafana/idrac_overview.json index 61eff1e..fc4b2ef 100644 --- a/grafana/idrac_overview.json +++ b/grafana/idrac_overview.json @@ -25,12 +25,12 @@ "uid": "${datasource}" }, "enable": true, - "expr": "idrac_sel_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 < ${__to} and \nidrac_sel_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 > ${__from}", + "expr": "idrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 < ${__to} and \nidrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 > ${__from}", "iconColor": "purple", "name": "SEL", - "tagKeys": "instance,component,severity", + "tagKeys": "instance,severity", "textFormat": "{{ message }}", - "titleFormat": "{{id}}:{{ component }}", + "titleFormat": "{{ id }}", "useValueForTime": "on" } ] @@ -1828,18 +1828,6 @@ } ] }, - { - "matcher": { - "id": "byName", - "options": "component" - }, - "properties": [ - { - "id": "custom.width", - "value": 217 - } - ] - }, { "matcher": { "id": "byName", @@ -1881,10 +1869,10 @@ }, "editorMode": "code", "exemplar": false, - "expr": "idrac_sel_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 < ${__to} and \nidrac_sel_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 > ${__from}", + "expr": "idrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 < ${__to} and \nidrac_events_log_entry{job=~\"$job\", instance=~\"$instance\"} * 1000 > ${__from}", "format": "table", "instant": true, - "legendFormat": "{{id}} {{component}}: {{message}}", + "legendFormat": "{{ id }}: {{ message }}", "range": false, "refId": "A" } @@ -1919,7 +1907,7 @@ "id": "filterFieldsByName", "options": { "include": { - "pattern": "instance|message|id|component|severity|Value" + "pattern": "instance|message|id|severity|Value" } } }, @@ -1929,10 +1917,9 @@ "excludeByName": {}, "indexByName": { "Value": 0, - "component": 4, "id": 2, "instance": 1, - "message": 5, + "message": 4, "severity": 3 }, "renameByName": { diff --git a/internal/collector/collector.go b/internal/collector/collector.go index 071aabd..d2faf94 100644 --- a/internal/collector/collector.go +++ b/internal/collector/collector.go @@ -211,7 +211,7 @@ func NewCollector() *Collector { []string{"id", "name"}, nil, ), EventLogEntry: prometheus.NewDesc( - prometheus.BuildFQName(prefix, "log", "entry"), + prometheus.BuildFQName(prefix, "events", "log_entry"), "Entry from the system event log", []string{"id", "message", "severity"}, nil, ), diff --git a/internal/collector/model.go b/internal/collector/model.go index 51d9bb5..5e53a7b 100644 --- a/internal/collector/model.go +++ b/internal/collector/model.go @@ -476,6 +476,7 @@ type EventLogResponse struct { Description string `json:"Description"` Members []struct { Id string `json:"Id"` + EventId string `json:"EventId"` Name string `json:"Name"` Created string `json:"Created"` Description string `json:"Description"` From e656d1caf19e715cdad1f4170d51f56a6e567227 Mon Sep 17 00:00:00 2001 From: Martin Hansen Date: Fri, 19 Jul 2024 13:29:04 +0200 Subject: [PATCH 4/4] final fixes --- internal/collector/client.go | 2 +- internal/collector/metrics.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/collector/client.go b/internal/collector/client.go index ee67f73..c33bec6 100644 --- a/internal/collector/client.go +++ b/internal/collector/client.go @@ -127,7 +127,7 @@ func (client *Client) findAllEndpoints() error { // Path for event log switch client.vendor { case DELL: - client.eventPath = "/redfish/v1/Managers/iDRAC.Embedded.1/Logs/Sel" + client.eventPath = "/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries" case LENOVO: client.eventPath = "/redfish/v1/Systems/1/LogServices/PlatformLog/Entries" case HPE: diff --git a/internal/collector/metrics.go b/internal/collector/metrics.go index 1426869..69ecaa7 100644 --- a/internal/collector/metrics.go +++ b/internal/collector/metrics.go @@ -257,7 +257,7 @@ func (mc *Collector) NewEventLogEntry(id string, message string, severity string prometheus.CounterValue, float64(created.Unix()), id, - message, + strings.TrimSpace(message), severity, ) }