diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index a2a03a791a93..379109d6732f 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -233,6 +233,7 @@ otherwise no tag is added. {issue}42208[42208] {pull}42403[42403] - Fixed errors in the `elasticsearch.index` metricset when index settings are missing. {issue}42424[42424] {pull}42426[42426] - Fixed panic caused by uninitialized meraki device wifi0 and wifi1 struct pointers in the device WiFi data fetching. {issue}42745[42745] {pull}42746[42746] - Only fetch cluster-level index stats summary {issue}36019[36019] {pull}42901[42901] +- Fixed an issue in Metricbeat's Windows module where data collection would fail if the data was unavailable. {issue}42802[42802] {pull}42803[42803] *Osquerybeat* diff --git a/metricbeat/module/windows/perfmon/reader.go b/metricbeat/module/windows/perfmon/reader.go index cc156f843e5b..a158be266670 100644 --- a/metricbeat/module/windows/perfmon/reader.go +++ b/metricbeat/module/windows/perfmon/reader.go @@ -96,13 +96,8 @@ func (re *Reader) Read() ([]mb.Event, error) { // Some counters, such as rate counters, require two counter values in order to compute a displayable value. In this case we must call PdhCollectQueryData twice before calling PdhGetFormattedCounterValue. // For more information, see Collecting Performance Data (https://docs.microsoft.com/en-us/windows/desktop/PerfCtrs/collecting-performance-data). if err := re.query.CollectData(); err != nil { - // users can encounter the case no counters are found (services/processes stopped), this should not generate an event with the error message, - //could be the case the specific services are started after and picked up by the next RefreshCounterPaths func - if err == pdh.PDH_NO_COUNTERS { //nolint:errorlint // Bad linter! This is always errno or nil. - re.log.Warnf("%s %v", collectFailedMsg, err) - } else { - return nil, fmt.Errorf("%v: %w", collectFailedMsg, err) - } + err = re.handleCollectDataError(err) + return nil, err } // Get the values. @@ -121,6 +116,19 @@ func (re *Reader) Read() ([]mb.Event, error) { return events, nil } +func (re *Reader) handleCollectDataError(err error) error { + // users can encounter the case no counters are found (services/processes stopped), this should not generate an event with the error message, + //could be the case the specific services are started after and picked up by the next RefreshCounterPaths func + if err == pdh.PDH_NO_COUNTERS || err == pdh.PDH_NO_DATA { //nolint:errorlint // linter complains about comparing error using '==' operator but here error is always of type pdh.PdhErrno (or nil) so `errors.Is` is redundant here + re.log.Warnf("%s %v", collectFailedMsg, err) + + // Ensure the returned error is nil to prevent the Elastic Agent from transitioning to an UNHEALTHY state. + return nil + } + + return fmt.Errorf("%v: %w", collectFailedMsg, err) +} + func (re *Reader) getValues() (map[string][]pdh.CounterValue, error) { var val map[string][]pdh.CounterValue // Sleep for one second before collecting the second raw value- diff --git a/metricbeat/module/windows/perfmon/reader_test.go b/metricbeat/module/windows/perfmon/reader_test.go index 55f29d404f10..9a26b7c87285 100644 --- a/metricbeat/module/windows/perfmon/reader_test.go +++ b/metricbeat/module/windows/perfmon/reader_test.go @@ -20,11 +20,13 @@ package perfmon import ( + "errors" "testing" "github.com/stretchr/testify/assert" "github.com/elastic/beats/v7/metricbeat/helper/windows/pdh" + "github.com/elastic/elastic-agent-libs/logp" ) func TestGetCounter(t *testing.T) { @@ -153,3 +155,46 @@ func TestIsWildcard(t *testing.T) { result = isWildcard(queries, instance) assert.False(t, result) } + +func Test_handleCollectDataError(t *testing.T) { + tests := []struct { + name string + + mockErr error + expectedErrMsg string + }{ + { + name: "no counters error", + + mockErr: pdh.PDH_NO_COUNTERS, + expectedErrMsg: "", + }, + { + name: "no data error", + + mockErr: pdh.PDH_NO_DATA, + expectedErrMsg: "", + }, + { + name: "unexpected error", + + mockErr: errors.New("test error"), + expectedErrMsg: "failed collecting counter values: test error", + }, + } + + reader := &Reader{ + log: logp.NewLogger("perfmon"), + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := reader.handleCollectDataError(tt.mockErr) + if tt.expectedErrMsg == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tt.expectedErrMsg) + } + }) + } +}