diff --git a/config/resources/defaults.yaml b/config/resources/defaults.yaml index bffddd719..9085f3cd6 100644 --- a/config/resources/defaults.yaml +++ b/config/resources/defaults.yaml @@ -42,6 +42,7 @@ Server: WebHost: "0.0.0.0" EnableUI: true RegistrationRetryInterval: 10s + StartupTimeout: 10s UILoginRateLimit: 1 Director: DefaultResponse: cache diff --git a/docs/parameters.yaml b/docs/parameters.yaml index a22e717e2..65ef6eaa8 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -1719,6 +1719,14 @@ type: stringSlice default: none components: ["registry","origin","cache"] --- +name: Server.StartupTimeout +description: |+ + The amount of time the pelican server will wait for its components and services to startup. + If the timeout is hit while waiting on a component, the server will shutdown. +type: duration +default: 10s +components: ["origin", "cache", "registry", "director"] +--- name: Server.EnablePprof description: |+ A boolean to enable or disable the [pprof](https://pkg.go.dev/runtime/pprof) endpoints for debugging. diff --git a/param/parameters.go b/param/parameters.go index 2a56e3bc1..776558dea 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -357,6 +357,7 @@ var ( Origin_SelfTestInterval = DurationParam{"Origin.SelfTestInterval"} Registry_InstitutionsUrlReloadMinutes = DurationParam{"Registry.InstitutionsUrlReloadMinutes"} Server_RegistrationRetryInterval = DurationParam{"Server.RegistrationRetryInterval"} + Server_StartupTimeout = DurationParam{"Server.StartupTimeout"} Transport_DialerKeepAlive = DurationParam{"Transport.DialerKeepAlive"} Transport_DialerTimeout = DurationParam{"Transport.DialerTimeout"} Transport_ExpectContinueTimeout = DurationParam{"Transport.ExpectContinueTimeout"} diff --git a/param/parameters_struct.go b/param/parameters_struct.go index 4c134ac55..32a147760 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -245,6 +245,7 @@ type Config struct { Modules []string `mapstructure:"modules"` RegistrationRetryInterval time.Duration `mapstructure:"registrationretryinterval"` SessionSecretFile string `mapstructure:"sessionsecretfile"` + StartupTimeout time.Duration `mapstructure:"startuptimeout"` TLSCACertificateDirectory string `mapstructure:"tlscacertificatedirectory"` TLSCACertificateFile string `mapstructure:"tlscacertificatefile"` TLSCAKey string `mapstructure:"tlscakey"` @@ -536,6 +537,7 @@ type configWithType struct { Modules struct { Type string; Value []string } RegistrationRetryInterval struct { Type string; Value time.Duration } SessionSecretFile struct { Type string; Value string } + StartupTimeout struct { Type string; Value time.Duration } TLSCACertificateDirectory struct { Type string; Value string } TLSCACertificateFile struct { Type string; Value string } TLSCAKey struct { Type string; Value string } diff --git a/server_utils/server_utils.go b/server_utils/server_utils.go index 996c48aad..e13e91f5f 100644 --- a/server_utils/server_utils.go +++ b/server_utils/server_utils.go @@ -140,7 +140,7 @@ func GetTopologyJSON(ctx context.Context, includeDowned bool) (*TopologyNamespac // Logging messages emitted will refer to `server` (e.g., origin, cache, director) // Pass true to statusMismatch to allow a mismatch of expected status code and what's returned not fail immediately func WaitUntilWorking(ctx context.Context, method, reqUrl, server string, expectedStatus int, statusMismatch bool) error { - expiry := time.Now().Add(10 * time.Second) + expiry := time.Now().Add(param.Server_StartupTimeout.GetDuration()) ctx, cancel := context.WithDeadline(ctx, expiry) defer cancel() ticker := time.NewTicker(50 * time.Millisecond) @@ -215,7 +215,7 @@ func WaitUntilWorking(ctx context.Context, method, reqUrl, server string, expect } case <-ctx.Done(): if statusError != nil { - return errors.Wrapf(statusError, "url %s didn't respond with the expected status code %d within 10s", reqUrl, expectedStatus) + return errors.Wrapf(statusError, "url %s didn't respond with the expected status code %d within %s", reqUrl, expectedStatus, param.Server_StartupTimeout.GetDuration().String()) } return ctx.Err() } @@ -224,7 +224,7 @@ func WaitUntilWorking(ctx context.Context, method, reqUrl, server string, expect if statusError != nil { return errors.Wrapf(statusError, "url %s didn't respond with the expected status code %d within 10s", reqUrl, expectedStatus) } else { - return errors.Errorf("The %s server at %s either did not startup or did not respond quickly enough after 10s of waiting", server, reqUrl) + return errors.Errorf("The %s server at %s either did not startup or did not respond quickly enough after %s of waiting", server, reqUrl, param.Server_StartupTimeout.GetDuration().String()) } } diff --git a/server_utils/server_utils_test.go b/server_utils/server_utils_test.go index 3eba893f9..741f5854e 100644 --- a/server_utils/server_utils_test.go +++ b/server_utils/server_utils_test.go @@ -29,6 +29,7 @@ import ( "github.com/sirupsen/logrus" "github.com/sirupsen/logrus/hooks/test" + "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -136,4 +137,24 @@ func TestWaitUntilWorking(t *testing.T) { assert.Equal(t, logrus.InfoLevel, hook.LastEntry().Level) assert.Contains(t, hook.LastEntry().Message, expectedErrorMsg) }) + + t.Run("server-short-timeout", func(t *testing.T) { + viper.Set("Server.StartupTimeout", "1s") + earlyCancelCtx, earlyCancel := context.WithCancel(ctx) + go func() { + <-time.After(1500 * time.Millisecond) + earlyCancel() + }() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // WaitUntilWorking as a 1s timeout, so we make sure to wait longer than that + <-time.After(2000 * time.Millisecond) + w.WriteHeader(http.StatusOK) // 200 + })) + defer server.Close() + + err := WaitUntilWorking(earlyCancelCtx, "GET", server.URL, "testServer", http.StatusOK, false) + require.Error(t, err) + expectedErrorMsg := fmt.Sprintf("The testServer server at %s either did not startup or did not respond quickly enough after 1s of waiting", server.URL) + assert.Equal(t, expectedErrorMsg, err.Error()) + }) }