From 4a7dbdea75d5c9060993e400bd88614066362731 Mon Sep 17 00:00:00 2001 From: zack olson Date: Fri, 17 Jan 2025 16:29:37 -0500 Subject: [PATCH 1/3] add timeout to osqueryinstance Healthy calls --- pkg/osquery/runtime/osqueryinstance.go | 50 ++++++++++++++++---------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/pkg/osquery/runtime/osqueryinstance.go b/pkg/osquery/runtime/osqueryinstance.go index 5918c3201..9345b2a92 100644 --- a/pkg/osquery/runtime/osqueryinstance.go +++ b/pkg/osquery/runtime/osqueryinstance.go @@ -71,6 +71,9 @@ const ( // The maximum amount of time to wait for the osquery socket to be available -- overrides context deadline maxSocketWaitTime = 30 * time.Second + + // How long to wait for a single osqueryinstance healthcheck before forcibly returning error + healtcheckTimeout = 10 * time.Second ) // OsqueryInstanceOption is a functional option pattern for defining how an @@ -129,31 +132,40 @@ func (i *OsqueryInstance) Healthy() error { return errors.New("instance not started") } - for _, srv := range i.extensionManagerServers { - serverStatus, err := srv.Ping(context.TODO()) - if err != nil { - return fmt.Errorf("could not ping extension server: %w", err) + resultsChan := make(chan error) + gowrapper.Go(context.TODO(), i.slogger, func() { + for _, srv := range i.extensionManagerServers { + serverStatus, err := srv.Ping(context.TODO()) + if err != nil { + resultsChan <- fmt.Errorf("could not ping extension server: %w", err) + return + } + if serverStatus.Code != 0 { + resultsChan <- fmt.Errorf("ping extension server returned %d: %s", serverStatus.Code, serverStatus.Message) + return + } } - if serverStatus.Code != 0 { - return fmt.Errorf("ping extension server returned %d: %s", - serverStatus.Code, - serverStatus.Message) + clientStatus, err := i.extensionManagerClient.Ping() + if err != nil { + resultsChan <- fmt.Errorf("could not ping osquery extension client: %w", err) + return + } + if clientStatus.Code != 0 { + resultsChan <- fmt.Errorf("ping extension client returned %d: %s", clientStatus.Code, clientStatus.Message) + return } - } - clientStatus, err := i.extensionManagerClient.Ping() - if err != nil { - return fmt.Errorf("could not ping osquery extension client: %w", err) - } - if clientStatus.Code != 0 { - return fmt.Errorf("ping extension client returned %d: %s", - clientStatus.Code, - clientStatus.Message) + resultsChan <- nil + }) + // Wait until we either receive an error or nil result from the healthcheck goroutine, or exceed our timeout threshold + select { + case maybeErr := <-resultsChan: + return maybeErr + case <-time.After(healtcheckTimeout): + return fmt.Errorf("osqueryinstance healthcheck exceeded timeout of %s", healtcheckTimeout.String()) } - - return nil } func (i *OsqueryInstance) Query(query string) ([]map[string]string, error) { From aabbb57cf6597c5e07c7f3c54352cc690075cc68 Mon Sep 17 00:00:00 2001 From: zack olson Date: Tue, 21 Jan 2025 10:56:59 -0500 Subject: [PATCH 2/3] return explicit nil from error check in results chan --- pkg/osquery/runtime/osqueryinstance.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/osquery/runtime/osqueryinstance.go b/pkg/osquery/runtime/osqueryinstance.go index 9345b2a92..32bea39fb 100644 --- a/pkg/osquery/runtime/osqueryinstance.go +++ b/pkg/osquery/runtime/osqueryinstance.go @@ -162,7 +162,11 @@ func (i *OsqueryInstance) Healthy() error { // Wait until we either receive an error or nil result from the healthcheck goroutine, or exceed our timeout threshold select { case maybeErr := <-resultsChan: - return maybeErr + if maybeErr != nil { + return fmt.Errorf("encountered error during healthcheck: %w", maybeErr) + } + + return nil case <-time.After(healtcheckTimeout): return fmt.Errorf("osqueryinstance healthcheck exceeded timeout of %s", healtcheckTimeout.String()) } From 7de0b30c40b69ac4c40100e6a789d729523628e0 Mon Sep 17 00:00:00 2001 From: zack olson Date: Tue, 21 Jan 2025 11:42:28 -0500 Subject: [PATCH 3/3] PR feedback: typo in healthcheckTimeout --- pkg/osquery/runtime/osqueryinstance.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/osquery/runtime/osqueryinstance.go b/pkg/osquery/runtime/osqueryinstance.go index 32bea39fb..0f68968b5 100644 --- a/pkg/osquery/runtime/osqueryinstance.go +++ b/pkg/osquery/runtime/osqueryinstance.go @@ -73,7 +73,7 @@ const ( maxSocketWaitTime = 30 * time.Second // How long to wait for a single osqueryinstance healthcheck before forcibly returning error - healtcheckTimeout = 10 * time.Second + healthcheckTimeout = 10 * time.Second ) // OsqueryInstanceOption is a functional option pattern for defining how an @@ -167,8 +167,8 @@ func (i *OsqueryInstance) Healthy() error { } return nil - case <-time.After(healtcheckTimeout): - return fmt.Errorf("osqueryinstance healthcheck exceeded timeout of %s", healtcheckTimeout.String()) + case <-time.After(healthcheckTimeout): + return fmt.Errorf("osqueryinstance healthcheck exceeded timeout of %s", healthcheckTimeout.String()) } }