Skip to content

Commit

Permalink
Merge pull request PelicanPlatform#1514 from bbockelm/retry_metadata
Browse files Browse the repository at this point in the history
Retry metadata lookup
  • Loading branch information
turetske authored Jul 25, 2024
2 parents 6e574c6 + 22caac1 commit e1b917f
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 13 deletions.
50 changes: 37 additions & 13 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,31 @@ func GetAllPrefixes() []ConfigPrefix {
return prefixes
}

// Helper function to start a metadata request.
//
// We see periodic timeouts when doing metadata lookup at sites.
// Adding a modest retry will, hopefully, reduce the number of errors
// that propagate out to users
func startMetadataQuery(ctx context.Context, httpClient *http.Client, discoveryUrl *url.URL) (result *http.Response, err error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, discoveryUrl.String(), nil)
if err != nil {
err = errors.Wrapf(err, "Failure when doing federation metadata request creation for %s", discoveryUrl)
return
}
req.Header.Set("User-Agent", "pelican/"+version)

result, err = httpClient.Do(req)
if err != nil {
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
err = MetadataTimeoutErr.Wrap(err)
} else {
err = NewMetadataError(err, "Error occurred when querying for metadata")
}
}
return
}

// This function is for discovering federations as specified by a url during a pelican:// transfer.
// this does not populate global fields and is more temporary per url
func DiscoverUrlFederation(ctx context.Context, federationDiscoveryUrl string) (metadata FederationDiscovery, err error) {
Expand Down Expand Up @@ -478,24 +503,23 @@ func DiscoverUrlFederation(ctx context.Context, federationDiscoveryUrl string) (
Transport: GetTransport(),
Timeout: time.Second * 5,
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, discoveryUrl.String(), nil)
if err != nil {
err = errors.Wrapf(err, "Failure when doing federation metadata request creation for %s", discoveryUrl)
return
}
req.Header.Set("User-Agent", "pelican/"+version)

result, err := httpClient.Do(req)
if err != nil {
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
err = MetadataTimeoutErr.Wrap(err)
return
var result *http.Response
for idx := 1; idx <= 3; idx++ {
result, err = startMetadataQuery(ctx, &httpClient, discoveryUrl)
if err == nil {
break
} else if errors.Is(err, MetadataTimeoutErr) && ctx.Err() == nil {
log.Warningln("Timeout occurred when querying discovery URL", discoveryUrl.String(), "for metadata;", 3-idx, "retries remaining")
time.Sleep(2 * time.Second)
} else {
err = NewMetadataError(err, "Error occurred when querying for metadata")
return
}
}
if errors.Is(err, MetadataTimeoutErr) {
log.Errorln("3 timeouts occurred when querying discovery URL", discoveryUrl.String())
return
}

if result.Body != nil {
defer result.Body.Close()
Expand Down
44 changes: 44 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/spf13/viper"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -835,6 +836,49 @@ func TestDiscoverUrlFederation(t *testing.T) {
viper.Reset()
})

t.Run("TestMetadataDiscoveryTimeoutRetry", func(t *testing.T) {
viper.Set("tlsskipverify", true)
viper.Set("Logging.Level", "Debug")
viper.Set("Transport.ResponseHeaderTimeout", "300ms")
InitConfig()
err := InitClient()
require.NoError(t, err)

hook := test.NewGlobal()
logrus.SetLevel(logrus.WarnLevel)

// Create a server that sleeps for a longer duration than the timeout
ctr := 0
server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if ctr < 1 {
time.Sleep(2 * time.Second)
}

ctr += 1
w.WriteHeader(200)
_, err := w.Write([]byte("{\"director_endpoint\": \"https://osdf-director.osg-htc.org\", \"namespace_registration_endpoint\": \"https://osdf-registry.osg-htc.org\", \"jwks_uri\": \"https://osg-htc.org/osdf/public_signing_key.jwks\"}"))
assert.NoError(t, err)
}))
defer server.Close()

// Set a short timeout for the test
timeout := 5 * time.Second

// Create a context with the timeout
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

// Call the function with the server URL and the context
_, err = DiscoverUrlFederation(ctx, server.URL)

assert.Equal(t, log.WarnLevel, hook.LastEntry().Level)
assert.Equal(t, "Timeout occurred when querying discovery URL "+server.URL+"/.well-known/pelican-configuration for metadata; 2 retries remaining", hook.LastEntry().Message)

// Assert that the error is the expected metadata timeout error
assert.NoError(t, err)
viper.Reset()
})

t.Run("TestCanceledContext", func(t *testing.T) {
// Create a server that waits for the context to be canceled
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down

0 comments on commit e1b917f

Please sign in to comment.