Skip to content

Commit

Permalink
Make server startup timeout configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
bbockelm committed Jul 16, 2024
1 parent ef5e20e commit 250fb11
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 3 deletions.
1 change: 1 addition & 0 deletions config/resources/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Server:
WebHost: "0.0.0.0"
EnableUI: true
RegistrationRetryInterval: 10s
StartupTimeout: 10s
UILoginRateLimit: 1
Director:
DefaultResponse: cache
Expand Down
8 changes: 8 additions & 0 deletions docs/parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1719,6 +1719,14 @@ type: stringSlice
default: none
components: ["registry","origin","cache"]
---
name: Server.StartupTimeout
description: |+
The amount of time the pelican server will wait for its components and services to startup.
If the timeout is hit while waiting on a component, the server will shutdown.
type: duration
default: 10s
components: ["origin", "cache", "registry", "director"]
---
name: Server.EnablePprof
description: |+
A boolean to enable or disable the [pprof](https://pkg.go.dev/runtime/pprof) endpoints for debugging.
Expand Down
1 change: 1 addition & 0 deletions param/parameters.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions param/parameters_struct.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions server_utils/server_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func GetTopologyJSON(ctx context.Context, includeDowned bool) (*TopologyNamespac
// Logging messages emitted will refer to `server` (e.g., origin, cache, director)
// Pass true to statusMismatch to allow a mismatch of expected status code and what's returned not fail immediately
func WaitUntilWorking(ctx context.Context, method, reqUrl, server string, expectedStatus int, statusMismatch bool) error {
expiry := time.Now().Add(10 * time.Second)
expiry := time.Now().Add(param.Server_StartupTimeout.GetDuration())
ctx, cancel := context.WithDeadline(ctx, expiry)
defer cancel()
ticker := time.NewTicker(50 * time.Millisecond)
Expand Down Expand Up @@ -215,7 +215,7 @@ func WaitUntilWorking(ctx context.Context, method, reqUrl, server string, expect
}
case <-ctx.Done():
if statusError != nil {
return errors.Wrapf(statusError, "url %s didn't respond with the expected status code %d within 10s", reqUrl, expectedStatus)
return errors.Wrapf(statusError, "url %s didn't respond with the expected status code %d within %s", reqUrl, expectedStatus, param.Server_StartupTimeout.GetDuration().String())
}
return ctx.Err()
}
Expand All @@ -224,7 +224,7 @@ func WaitUntilWorking(ctx context.Context, method, reqUrl, server string, expect
if statusError != nil {
return errors.Wrapf(statusError, "url %s didn't respond with the expected status code %d within 10s", reqUrl, expectedStatus)
} else {
return errors.Errorf("The %s server at %s either did not startup or did not respond quickly enough after 10s of waiting", server, reqUrl)
return errors.Errorf("The %s server at %s either did not startup or did not respond quickly enough after %s of waiting", server, reqUrl, param.Server_StartupTimeout.GetDuration().String())
}
}

Expand Down
21 changes: 21 additions & 0 deletions server_utils/server_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/spf13/viper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -136,4 +137,24 @@ func TestWaitUntilWorking(t *testing.T) {
assert.Equal(t, logrus.InfoLevel, hook.LastEntry().Level)
assert.Contains(t, hook.LastEntry().Message, expectedErrorMsg)
})

t.Run("server-short-timeout", func(t *testing.T) {
viper.Set("Server.StartupTimeout", "1s")
earlyCancelCtx, earlyCancel := context.WithCancel(ctx)
go func() {
<-time.After(1500 * time.Millisecond)
earlyCancel()
}()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// WaitUntilWorking as a 1s timeout, so we make sure to wait longer than that
<-time.After(2000 * time.Millisecond)
w.WriteHeader(http.StatusOK) // 200
}))
defer server.Close()

err := WaitUntilWorking(earlyCancelCtx, "GET", server.URL, "testServer", http.StatusOK, false)
require.Error(t, err)
expectedErrorMsg := fmt.Sprintf("The testServer server at %s either did not startup or did not respond quickly enough after 1s of waiting", server.URL)
assert.Equal(t, expectedErrorMsg, err.Error())
})
}

0 comments on commit 250fb11

Please sign in to comment.