Skip to content

Commit

Permalink
fix: don't fail merticbeat/windows/perfmon when no data is available (#…
Browse files Browse the repository at this point in the history
…42803) (#42951)

* fix: don't fail merticbeat/windows/perfmon when no data is available

* add CHANGELOG.next.asciidoc entry

* fix: linter issue

* fix: linter issue

* fix: fix code according to PR comments

* refactor: make CollectData error handling testable

* fix: add mossing imports to windows/perfmon/reader_test.go

* fix: add mossing imports to windows/perfmon/reader_test.go

---------

Co-authored-by: Pierre HILBERT <[email protected]>
(cherry picked from commit 0ea31fd)

Co-authored-by: stefans-elastic <[email protected]>
  • Loading branch information
mergify[bot] and stefans-elastic authored Feb 28, 2025
1 parent 30ac154 commit 160cce6
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ otherwise no tag is added. {issue}42208[42208] {pull}42403[42403]
- Fixed errors in the `elasticsearch.index` metricset when index settings are missing. {issue}42424[42424] {pull}42426[42426]
- Fixed panic caused by uninitialized meraki device wifi0 and wifi1 struct pointers in the device WiFi data fetching. {issue}42745[42745] {pull}42746[42746]
- Only fetch cluster-level index stats summary {issue}36019[36019] {pull}42901[42901]
- Fixed an issue in Metricbeat's Windows module where data collection would fail if the data was unavailable. {issue}42802[42802] {pull}42803[42803]

*Osquerybeat*

Expand Down
22 changes: 15 additions & 7 deletions metricbeat/module/windows/perfmon/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,8 @@ func (re *Reader) Read() ([]mb.Event, error) {
// Some counters, such as rate counters, require two counter values in order to compute a displayable value. In this case we must call PdhCollectQueryData twice before calling PdhGetFormattedCounterValue.
// For more information, see Collecting Performance Data (https://docs.microsoft.com/en-us/windows/desktop/PerfCtrs/collecting-performance-data).
if err := re.query.CollectData(); err != nil {
// users can encounter the case no counters are found (services/processes stopped), this should not generate an event with the error message,
//could be the case the specific services are started after and picked up by the next RefreshCounterPaths func
if err == pdh.PDH_NO_COUNTERS { //nolint:errorlint // Bad linter! This is always errno or nil.
re.log.Warnf("%s %v", collectFailedMsg, err)
} else {
return nil, fmt.Errorf("%v: %w", collectFailedMsg, err)
}
err = re.handleCollectDataError(err)
return nil, err
}

// Get the values.
Expand All @@ -121,6 +116,19 @@ func (re *Reader) Read() ([]mb.Event, error) {
return events, nil
}

func (re *Reader) handleCollectDataError(err error) error {
// users can encounter the case no counters are found (services/processes stopped), this should not generate an event with the error message,
//could be the case the specific services are started after and picked up by the next RefreshCounterPaths func
if err == pdh.PDH_NO_COUNTERS || err == pdh.PDH_NO_DATA { //nolint:errorlint // linter complains about comparing error using '==' operator but here error is always of type pdh.PdhErrno (or nil) so `errors.Is` is redundant here
re.log.Warnf("%s %v", collectFailedMsg, err)

// Ensure the returned error is nil to prevent the Elastic Agent from transitioning to an UNHEALTHY state.
return nil
}

return fmt.Errorf("%v: %w", collectFailedMsg, err)
}

func (re *Reader) getValues() (map[string][]pdh.CounterValue, error) {
var val map[string][]pdh.CounterValue
// Sleep for one second before collecting the second raw value-
Expand Down
45 changes: 45 additions & 0 deletions metricbeat/module/windows/perfmon/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
package perfmon

import (
"errors"
"testing"

"github.com/stretchr/testify/assert"

"github.com/elastic/beats/v7/metricbeat/helper/windows/pdh"
"github.com/elastic/elastic-agent-libs/logp"
)

func TestGetCounter(t *testing.T) {
Expand Down Expand Up @@ -153,3 +155,46 @@ func TestIsWildcard(t *testing.T) {
result = isWildcard(queries, instance)
assert.False(t, result)
}

func Test_handleCollectDataError(t *testing.T) {
tests := []struct {
name string

mockErr error
expectedErrMsg string
}{
{
name: "no counters error",

mockErr: pdh.PDH_NO_COUNTERS,
expectedErrMsg: "",
},
{
name: "no data error",

mockErr: pdh.PDH_NO_DATA,
expectedErrMsg: "",
},
{
name: "unexpected error",

mockErr: errors.New("test error"),
expectedErrMsg: "failed collecting counter values: test error",
},
}

reader := &Reader{
log: logp.NewLogger("perfmon"),
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := reader.handleCollectDataError(tt.mockErr)
if tt.expectedErrMsg == "" {
assert.NoError(t, err)
} else {
assert.EqualError(t, err, tt.expectedErrMsg)
}
})
}
}

0 comments on commit 160cce6

Please sign in to comment.