From c8d012de5e0424d7e866ed339d330c257434568a Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Tue, 17 Sep 2024 18:20:47 +0000 Subject: [PATCH] Use the pelican_url package throughout Pelican With the scaffolding in the pelican_url package, this commit hooks up the rest of Pelican to the machinery. --- broker/token_utils.go | 2 +- client/acquire_token.go | 15 +- client/director.go | 30 ++- client/director_test.go | 24 +- client/errorAccum.go | 4 +- client/fed_linux_test.go | 43 ++-- client/fed_test.go | 181 +++++++------- client/handle_http.go | 169 ++----------- client/handle_http_test.go | 244 ------------------- client/main.go | 339 ++++++++------------------- client/main_test.go | 267 ++++++++++++++------- client/sharing_url.go | 71 +----- client/sharing_url_test.go | 126 ++-------- cmd/config_mgr.go | 18 +- cmd/object_copy.go | 5 - cmd/object_get.go | 5 - cmd/object_share.go | 5 +- cmd/plugin.go | 8 +- cmd/registry_client.go | 16 +- config/config.go | 208 +++------------- config/config_test.go | 219 +---------------- director/director.go | 27 ++- director/discovery.go | 13 +- director/discovery_test.go | 5 +- director/origin_api.go | 2 +- fed_test_utils/fed.go | 23 ++ launcher_utils/advertise.go | 4 +- launcher_utils/advertise_test.go | 7 +- launcher_utils/register_namespace.go | 10 +- local_cache/local_cache.go | 13 +- origin/origin_ui.go | 2 +- origin/reg_status.go | 2 +- origin/reg_status_test.go | 21 +- registry/registry.go | 8 +- registry/registry_test.go | 13 +- server_utils/registry.go | 2 +- token/token_verify.go | 2 +- utils/client.go | 79 ------- utils/client_test.go | 162 ------------- utils/utils.go | 37 --- 40 files changed, 645 insertions(+), 1786 deletions(-) delete mode 100644 utils/client.go diff --git a/broker/token_utils.go b/broker/token_utils.go index 332b1bc76..b2fc1c8bb 100644 --- a/broker/token_utils.go +++ b/broker/token_utils.go @@ -86,7 +86,7 @@ func getRegistryIssValue(prefix string) (iss string, err error) { if err != nil { return } - namespaceUrlStr := fedInfo.NamespaceRegistrationEndpoint + namespaceUrlStr := fedInfo.RegistryEndpoint if namespaceUrlStr == "" { err = errors.New("namespace URL is not set") return diff --git a/client/acquire_token.go b/client/acquire_token.go index 9183658e7..3b2190212 100644 --- a/client/acquire_token.go +++ b/client/acquire_token.go @@ -24,8 +24,6 @@ import ( "fmt" "io/fs" "net/http" - "net/url" - "os" "path" "path/filepath" "strconv" @@ -41,6 +39,7 @@ import ( "github.com/pelicanplatform/pelican/config" oauth2 "github.com/pelicanplatform/pelican/oauth2" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" ) @@ -60,7 +59,7 @@ type ( // of the process. tokenGenerator struct { DirResp *server_structs.DirectorResponse - Destination *url.URL + Destination *pelican_url.PelicanURL TokenLocation string TokenName string IsWrite bool @@ -79,7 +78,7 @@ type ( } ) -func newTokenGenerator(dest *url.URL, dirResp *server_structs.DirectorResponse, isWrite bool, enableAcquire bool) *tokenGenerator { +func newTokenGenerator(dest *pelican_url.PelicanURL, dirResp *server_structs.DirectorResponse, isWrite bool, enableAcquire bool) *tokenGenerator { return &tokenGenerator{ DirResp: dirResp, Destination: dest, @@ -549,7 +548,7 @@ func registerClient(dirResp server_structs.DirectorResponse) (*config.PrefixEntr // Given a URL and a director Response, attempt to acquire a valid // token for that URL. -func AcquireToken(destination *url.URL, dirResp server_structs.DirectorResponse, opts config.TokenGenerationOpts) (string, error) { +func AcquireToken(dest string, dirResp server_structs.DirectorResponse, opts config.TokenGenerationOpts) (string, error) { log.Debugln("Acquiring a token from configuration and OAuth2") @@ -621,7 +620,7 @@ func AcquireToken(destination *url.URL, dirResp server_structs.DirectorResponse, var acceptableToken *config.TokenEntry = nil acceptableUnexpiredToken := "" for idx, token := range prefixEntry.Tokens { - if !tokenIsAcceptable(token.AccessToken, destination.Path, dirResp, opts) { + if !tokenIsAcceptable(token.AccessToken, dest, dirResp, opts) { continue } if acceptableToken == nil { @@ -676,7 +675,7 @@ func AcquireToken(destination *url.URL, dirResp server_structs.DirectorResponse, } } - token, err := oauth2.AcquireToken(issuer, prefixEntry, dirResp, destination.Path, opts) + token, err := oauth2.AcquireToken(issuer, prefixEntry, dirResp, dest, opts) if errors.Is(err, oauth2.ErrUnknownClient) { // We use anonymously-registered clients; OA4MP can periodically garbage collect these to prevent DoS // In this case, we register a new client and try to acquire again. @@ -690,7 +689,7 @@ func AcquireToken(destination *url.URL, dirResp server_structs.DirectorResponse, log.Warningln("Failed to save new token to configuration file:", err) } - if token, err = oauth2.AcquireToken(issuer, prefixEntry, dirResp, destination.Path, opts); err != nil { + if token, err = oauth2.AcquireToken(issuer, prefixEntry, dirResp, dest, opts); err != nil { return "", err } } else if err != nil { diff --git a/client/director.go b/client/director.go index 2ad92faea..69da1823c 100644 --- a/client/director.go +++ b/client/director.go @@ -32,14 +32,22 @@ import ( log "github.com/sirupsen/logrus" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/utils" ) // Make a request to the director for a given verb/resource; return the // HTTP response object only if a 307 is returned. -func queryDirector(ctx context.Context, verb, sourcePath, directorUrl string) (resp *http.Response, err error) { - resourceUrl := directorUrl + sourcePath +func queryDirector(ctx context.Context, verb string, pUrl *pelican_url.PelicanURL) (resp *http.Response, err error) { + resourceUrl, err := url.Parse(pUrl.FedInfo.DirectorEndpoint) + if err != nil { + log.Errorln("Failed to parse the director URL:", err) + return nil, err + } + resourceUrl.Path = pUrl.Path + resourceUrl.RawQuery = pUrl.RawQuery + // Here we use http.Transport to prevent the client from following the director's // redirect. We use the Location url elsewhere (plus we still need to do the token // dance!) @@ -52,7 +60,7 @@ func queryDirector(ctx context.Context, verb, sourcePath, directorUrl string) (r }, } - req, err := http.NewRequestWithContext(ctx, verb, resourceUrl, nil) + req, err := http.NewRequestWithContext(ctx, verb, resourceUrl.String(), nil) if err != nil { log.Errorln("Failed to create an HTTP request:", err) return nil, err @@ -161,28 +169,26 @@ func parseServersFromDirectorResponse(resp *http.Response) (servers []*url.URL, } // Retrieve federation namespace information for a given URL. -func GetDirectorInfoForPath(ctx context.Context, resourcePath, directorUrl string, isPut bool, query string) (parsedResponse server_structs.DirectorResponse, err error) { - if directorUrl == "" { +func GetDirectorInfoForPath(ctx context.Context, pUrl *pelican_url.PelicanURL, isPut bool) (parsedResponse server_structs.DirectorResponse, err error) { + if pUrl.FedInfo.DirectorEndpoint == "" { return server_structs.DirectorResponse{}, - errors.Errorf("unable to retrieve information from a Director for object %s because no director URL was provided", resourcePath) + errors.Errorf("unable to retrieve information from a Director for object %s because none was found in pelican URL metadata.", pUrl.Path) } - log.Debugln("Will query director at", directorUrl, "for object", resourcePath) + log.Debugln("Will query director at", pUrl.FedInfo.DirectorEndpoint, "for object", pUrl.Path) verb := "GET" if isPut { verb = "PUT" } - if query != "" { - resourcePath += "?" + query - } + var dirResp *http.Response - dirResp, err = queryDirector(ctx, verb, resourcePath, directorUrl) + dirResp, err = queryDirector(ctx, verb, pUrl) if err != nil { if isPut && dirResp != nil && dirResp.StatusCode == 405 { err = errors.New("error 405: No writeable origins were found") return } else { - err = errors.Wrapf(err, "error while querying the director at %s", directorUrl) + err = errors.Wrapf(err, "error while querying the director at %s", pUrl.FedInfo.DirectorEndpoint) return } } diff --git a/client/director_test.go b/client/director_test.go index e9e0f0e5d..673e11c8f 100644 --- a/client/director_test.go +++ b/client/director_test.go @@ -21,6 +21,7 @@ package client import ( "bytes" "context" + "fmt" "io" "net/http" "net/http/httptest" @@ -28,6 +29,7 @@ import ( "github.com/stretchr/testify/assert" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/utils" ) @@ -102,8 +104,14 @@ func TestQueryDirector(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(handler)) defer server.Close() + pUrl := pelican_url.PelicanURL{ + FedInfo: pelican_url.FederationDiscovery{ + DirectorEndpoint: server.URL, + }, + Path: "/foo/bar", + } // Call QueryDirector with the test server URL and a source path - actualResp, err := queryDirector(context.Background(), "GET", "/foo/bar", server.URL) + actualResp, err := queryDirector(context.Background(), "GET", &pUrl) if err != nil { t.Fatal(err) } @@ -163,7 +171,7 @@ func TestGetDirectorInfoForPath(t *testing.T) { directorUrl: "", isPut: false, query: "", - expectedError: "unable to retrieve information from a Director for object /test because no director URL was provided", + expectedError: "unable to retrieve information from a Director for object /test because none was found in pelican URL metadata.", }, { name: "Successful GET request", @@ -194,7 +202,17 @@ func TestGetDirectorInfoForPath(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() - _, err := GetDirectorInfoForPath(ctx, tt.resourcePath, tt.directorUrl, tt.isPut, tt.query) + urlStr := fmt.Sprintf("pelican://foo%s", tt.resourcePath) + if tt.query != "" { + urlStr += "?" + tt.query + } + + pUrl, err := pelican_url.Parse(urlStr, nil, nil) + assert.NoError(t, err) + + pUrl.FedInfo.DirectorEndpoint = tt.directorUrl + + _, err = GetDirectorInfoForPath(ctx, pUrl, tt.isPut) if tt.expectedError != "" { assert.Error(t, err) assert.Contains(t, err.Error(), tt.expectedError) diff --git a/client/errorAccum.go b/client/errorAccum.go index 6ecd56609..35703e4e9 100644 --- a/client/errorAccum.go +++ b/client/errorAccum.go @@ -28,7 +28,7 @@ import ( grab "github.com/opensaucerer/grab/v3" - "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" ) type ( @@ -143,7 +143,7 @@ func IsRetryable(err error) bool { if errors.Is(err, grab.ErrBadLength) { return false } - if errors.Is(err, config.MetadataTimeoutErr) { + if errors.Is(err, pelican_url.MetadataTimeoutErr) { return true } // There's little a user can do about a TCP connection reset besides retry; if it diff --git a/client/fed_linux_test.go b/client/fed_linux_test.go index 3129605bd..fe43928be 100644 --- a/client/fed_linux_test.go +++ b/client/fed_linux_test.go @@ -23,6 +23,7 @@ package client_test import ( "fmt" "io/fs" + "net/url" "os" "path/filepath" "strconv" @@ -38,6 +39,7 @@ import ( config "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/fed_test_utils" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_utils" "github.com/pelicanplatform/pelican/token" "github.com/pelicanplatform/pelican/token_scopes" @@ -49,6 +51,8 @@ func TestRecursiveUploadsAndDownloads(t *testing.T) { server_utils.ResetOriginExports() fed := fed_test_utils.NewFedTest(t, mixedAuthOriginCfg) + discoveryUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) + require.NoError(t, err) te, err := client.NewTransferEngine(fed.Ctx) require.NoError(t, err) @@ -124,20 +128,20 @@ func TestRecursiveUploadsAndDownloads(t *testing.T) { // Set path for object to upload/download tempPath := tempDir dirName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s/%s", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), + uploadUrl := fmt.Sprintf("pelican://%s%s/%s/%s", discoveryUrl.Host, export.FederationPrefix, "osdf_osdf", dirName) // Upload the file with PUT - transferDetailsUpload, err := client.DoPut(fed.Ctx, tempDir, uploadURL, true, client.WithTokenLocation(tempToken.Name())) + transferDetailsUpload, err := client.DoPut(fed.Ctx, tempDir, uploadUrl, true, client.WithTokenLocation(tempToken.Name())) require.NoError(t, err) verifySuccessfulTransfer(t, transferDetailsUpload) // Download the files we just uploaded var transferDetailsDownload []client.TransferResults if export.Capabilities.PublicReads { - transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadURL, t.TempDir(), true) + transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadUrl, t.TempDir(), true) } else { - transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadURL, t.TempDir(), true, client.WithTokenLocation(tempToken.Name())) + transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadUrl, t.TempDir(), true, client.WithTokenLocation(tempToken.Name())) } require.NoError(t, err) verifySuccessfulTransfer(t, transferDetailsDownload) @@ -151,22 +155,21 @@ func TestRecursiveUploadsAndDownloads(t *testing.T) { require.NoError(t, err) }() assert.NoError(t, err) + + oldHost, err := pelican_url.SetOsdfDiscoveryHost(discoveryUrl.String()) + require.NoError(t, err) + defer func() { + _, _ = pelican_url.SetOsdfDiscoveryHost(oldHost) + }() + for _, export := range fed.Exports { // Set path for object to upload/download tempPath := tempDir dirName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("osdf:///%s/%s/%s", export.FederationPrefix, "osdf_osdf", dirName) - hostname := fmt.Sprintf("%v:%v", param.Server_WebHost.GetString(), param.Server_WebPort.GetInt()) - - // Set our metadata values in config since that is what this url scheme - prefix combo does in handle_http - metadata, err := config.DiscoverUrlFederation(fed.Ctx, "https://"+hostname) - assert.NoError(t, err) - viper.Set("Federation.DirectorUrl", metadata.DirectorEndpoint) - viper.Set("Federation.RegistryUrl", metadata.NamespaceRegistrationEndpoint) - viper.Set("Federation.DiscoveryUrl", hostname) + uploadUrl := fmt.Sprintf("osdf://%s/%s/%s", export.FederationPrefix, "osdf_osdf", dirName) // Upload the file with PUT - transferDetailsUpload, err := client.DoPut(fed.Ctx, tempDir, uploadURL, true, client.WithTokenLocation(tempToken.Name())) + transferDetailsUpload, err := client.DoPut(fed.Ctx, tempDir, uploadUrl, true, client.WithTokenLocation(tempToken.Name())) require.NoError(t, err) verifySuccessfulTransfer(t, transferDetailsUpload) @@ -174,9 +177,9 @@ func TestRecursiveUploadsAndDownloads(t *testing.T) { tmpDir := t.TempDir() var transferDetailsDownload []client.TransferResults if export.Capabilities.PublicReads { - transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadURL, tmpDir, true) + transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadUrl, tmpDir, true) } else { - transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadURL, tmpDir, true, client.WithTokenLocation(tempToken.Name())) + transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadUrl, tmpDir, true, client.WithTokenLocation(tempToken.Name())) } require.NoError(t, err) verifySuccessfulTransfer(t, transferDetailsDownload) @@ -195,19 +198,19 @@ func TestRecursiveUploadsAndDownloads(t *testing.T) { // Set path for object to upload/download tempPath := tempDir dirName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s/%s", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), + uploadUrl := fmt.Sprintf("pelican://%s%s/%s/%s", discoveryUrl.Host, export.FederationPrefix, "osdf_osdf", dirName) // Upload the file with PUT - transferDetailsUpload, err := client.DoPut(fed.Ctx, tempDir, uploadURL, true, client.WithTokenLocation(tempToken.Name())) + transferDetailsUpload, err := client.DoPut(fed.Ctx, tempDir, uploadUrl, true, client.WithTokenLocation(tempToken.Name())) require.NoError(t, err) verifySuccessfulTransfer(t, transferDetailsUpload) // Download the files we just uploaded var transferDetailsDownload []client.TransferResults if export.Capabilities.PublicReads { - transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadURL, t.TempDir(), true) + transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadUrl, t.TempDir(), true) } else { - transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadURL, t.TempDir(), true, client.WithTokenLocation(tempToken.Name())) + transferDetailsDownload, err = client.DoGet(fed.Ctx, uploadUrl, t.TempDir(), true, client.WithTokenLocation(tempToken.Name())) } require.NoError(t, err) verifySuccessfulTransfer(t, transferDetailsDownload) diff --git a/client/fed_test.go b/client/fed_test.go index 93e2176f4..623af7cd8 100644 --- a/client/fed_test.go +++ b/client/fed_test.go @@ -44,11 +44,11 @@ import ( "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/fed_test_utils" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_utils" "github.com/pelicanplatform/pelican/test_utils" "github.com/pelicanplatform/pelican/token" "github.com/pelicanplatform/pelican/token_scopes" - "github.com/pelicanplatform/pelican/utils" ) var ( @@ -108,6 +108,8 @@ func TestGetAndPutAuth(t *testing.T) { viper.Reset() server_utils.ResetOriginExports() fed := fed_test_utils.NewFedTest(t, bothAuthOriginCfg) + discoveryUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) + assert.NoError(t, err) // Other set-up items: testFileContent := "test file content" @@ -139,9 +141,8 @@ func TestGetAndPutAuth(t *testing.T) { for _, export := range fed.Exports { tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s/%s", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), + uploadURL := fmt.Sprintf("pelican://%s%s/%s/%s", discoveryUrl.Host, export.FederationPrefix, "osdf_osdf", fileName) - // Upload the file with PUT transferResultsUpload, err := client.DoPut(fed.Ctx, tempFile.Name(), uploadURL, false, client.WithTokenLocation(tempToken.Name())) require.NoError(t, err) @@ -161,29 +162,30 @@ func TestGetAndPutAuth(t *testing.T) { require.NoError(t, err) }() assert.NoError(t, err) - viper.Set(param.Federation_DiscoveryUrl.GetName(), fmt.Sprintf("%s://%s:%s", "https", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()))) // Set path for object to upload/download for _, export := range fed.Exports { tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("%s/%s/%s", + uploadUrlStr := fmt.Sprintf("%s/%s/%s", export.FederationPrefix, "osdf_osdf", fileName) - - uploadURL, err := utils.UrlWithFederation(uploadURL) + uploadUrl, err := pelican_url.Parse(uploadUrlStr, nil, []pelican_url.DiscoveryOption{pelican_url.WithDiscoveryUrl(discoveryUrl)}) assert.NoError(t, err) // Upload the file with PUT - transferResultsUpload, err := client.DoPut(fed.Ctx, tempFile.Name(), uploadURL, false, client.WithTokenLocation(tempToken.Name())) - assert.NoError(t, err) - assert.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) + transferResultsUpload, err := client.DoPut(fed.Ctx, tempFile.Name(), uploadUrl.String(), false, client.WithTokenLocation(tempToken.Name())) + require.NoError(t, err) + require.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) - queryURL := uploadURL + "?directread" + queryUrl, err := pelican_url.Parse(uploadUrlStr+"?directread", nil, []pelican_url.DiscoveryOption{pelican_url.WithDiscoveryUrl(discoveryUrl)}) + assert.NoError(t, err) tempDir := t.TempDir() // Download that same file with GET - transferResultsDownload, err := client.DoGet(fed.Ctx, queryURL, tempDir, false, client.WithTokenLocation(tempToken.Name())) - assert.NoError(t, err) - assert.Equal(t, transferResultsDownload[0].TransferredBytes, transferResultsUpload[0].TransferredBytes) + transferResultsDownload, err := client.DoGet(fed.Ctx, queryUrl.String(), tempDir, false, client.WithTokenLocation(tempToken.Name())) + + require.NoError(t, err) + require.Equal(t, transferResultsDownload[0].TransferredBytes, transferResultsUpload[0].TransferredBytes) + stats, err := os.Stat(filepath.Join(tempDir, fileName)) assert.NoError(t, err) assert.NotNil(t, stats) @@ -208,8 +210,8 @@ func TestGetAndPutAuth(t *testing.T) { // Upload the file with PUT transferResultsUpload, err := client.DoPut(fed.Ctx, tempFile.Name(), uploadURL, false, client.WithToken(tmpTkn)) - assert.NoError(t, err) - assert.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) + require.NoError(t, err) + require.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) // Download that same file with GET transferResultsDownload, err := client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false, client.WithToken(tmpTkn)) @@ -231,13 +233,13 @@ func TestGetAndPutAuth(t *testing.T) { // Set path for object to upload/download tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s/%s", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), + uploadURL := fmt.Sprintf("pelican://%s%s/%s/%s", discoveryUrl.Host, export.FederationPrefix, "osdf_osdf", fileName) // Upload the file with PUT transferResultsUpload, err := client.DoPut(fed.Ctx, tempFile.Name(), uploadURL, false, client.WithTokenLocation(tempToken.Name())) - assert.NoError(t, err) - assert.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) + require.NoError(t, err) + require.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) // Download that same file with GET transferResultsDownload, err := client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false, client.WithTokenLocation(tempToken.Name())) @@ -255,28 +257,26 @@ func TestGetAndPutAuth(t *testing.T) { require.NoError(t, err) }() + oldHost, err := pelican_url.SetOsdfDiscoveryHost(discoveryUrl.String()) + require.NoError(t, err) + defer func() { + _, _ = pelican_url.SetOsdfDiscoveryHost(oldHost) + }() + for _, export := range fed.Exports { // Set path for object to upload/download tempPath := tempFile.Name() fileName := filepath.Base(tempPath) // Minimal fix of test as it is soon to be replaced - uploadURL := fmt.Sprintf("osdf://%s/%s", export.FederationPrefix, fileName) - hostname := fmt.Sprintf("%v:%v", param.Server_WebHost.GetString(), param.Server_WebPort.GetInt()) - - // Set our metadata values in config since that is what this url scheme - prefix combo does in handle_http - metadata, err := config.DiscoverUrlFederation(fed.Ctx, "https://"+hostname) - assert.NoError(t, err) - viper.Set("Federation.DirectorUrl", metadata.DirectorEndpoint) - viper.Set("Federation.RegistryUrl", metadata.NamespaceRegistrationEndpoint) - viper.Set("Federation.DiscoveryUrl", hostname) + uploadUrl := fmt.Sprintf("osdf://%s/%s", export.FederationPrefix, fileName) // Upload the file with PUT - transferResultsUpload, err := client.DoPut(fed.Ctx, tempFile.Name(), uploadURL, false, client.WithTokenLocation(tempToken.Name())) - assert.NoError(t, err) - assert.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) + transferResultsUpload, err := client.DoPut(fed.Ctx, tempFile.Name(), uploadUrl, false, client.WithTokenLocation(tempToken.Name())) + require.NoError(t, err) + require.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) // Download that same file with GET - transferResultsDownload, err := client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false, client.WithTokenLocation(tempToken.Name())) + transferResultsDownload, err := client.DoGet(fed.Ctx, uploadUrl, t.TempDir(), false, client.WithTokenLocation(tempToken.Name())) assert.NoError(t, err) assert.Equal(t, transferResultsDownload[0].TransferredBytes, transferResultsUpload[0].TransferredBytes) } @@ -292,6 +292,8 @@ func TestCopyAuth(t *testing.T) { viper.Reset() server_utils.ResetOriginExports() fed := fed_test_utils.NewFedTest(t, bothAuthOriginCfg) + discoveryUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) + assert.NoError(t, err) te, err := client.NewTransferEngine(fed.Ctx) require.NoError(t, err) @@ -325,7 +327,7 @@ func TestCopyAuth(t *testing.T) { for _, export := range fed.Exports { tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s/%s", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), + uploadURL := fmt.Sprintf("pelican://%s%s/%s/%s", discoveryUrl.Host, export.FederationPrefix, "osdf_osdf", fileName) // Upload the file with COPY @@ -348,27 +350,26 @@ func TestCopyAuth(t *testing.T) { require.NoError(t, err) }() - viper.Set(param.Federation_DiscoveryUrl.GetName(), fmt.Sprintf("%s://%s:%s", "https", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()))) - // Set path for object to upload/download for _, export := range fed.Exports { tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("%s/%s/%s", + uploadUrlStr := fmt.Sprintf("%s/%s/%s", export.FederationPrefix, "osdf_osdf", fileName) - uploadURL, err := utils.UrlWithFederation(uploadURL) + uploadUrl, err := pelican_url.Parse(uploadUrlStr, nil, []pelican_url.DiscoveryOption{pelican_url.WithDiscoveryUrl(discoveryUrl)}) assert.NoError(t, err) // Upload the file with COPY - transferResultsUpload, err := client.DoCopy(fed.Ctx, tempFile.Name(), uploadURL, false, client.WithTokenLocation(tempToken.Name())) + transferResultsUpload, err := client.DoCopy(fed.Ctx, tempFile.Name(), uploadUrl.String(), false, client.WithTokenLocation(tempToken.Name())) assert.NoError(t, err) assert.Equal(t, int64(17), transferResultsUpload[0].TransferredBytes) - queryURL := uploadURL + "?directread" + queryUrl, err := pelican_url.Parse(uploadUrlStr+"?directread", nil, []pelican_url.DiscoveryOption{pelican_url.WithDiscoveryUrl(discoveryUrl)}) + assert.NoError(t, err) tempDir := t.TempDir() // Download that same file with COPY - transferResultsDownload, err := client.DoCopy(fed.Ctx, queryURL, tempDir, false, client.WithTokenLocation(tempToken.Name())) + transferResultsDownload, err := client.DoCopy(fed.Ctx, queryUrl.String(), tempDir, false, client.WithTokenLocation(tempToken.Name())) assert.NoError(t, err) assert.Equal(t, int64(17), transferResultsDownload[0].TransferredBytes) stats, err := os.Stat(filepath.Join(tempDir, fileName)) @@ -390,7 +391,7 @@ func TestCopyAuth(t *testing.T) { // Set path for object to upload/download tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s/%s", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), + uploadURL := fmt.Sprintf("pelican://%s%s/%s/%s", discoveryUrl.Host, export.FederationPrefix, "osdf_osdf", fileName) // Upload the file with PUT @@ -414,31 +415,28 @@ func TestCopyAuth(t *testing.T) { require.NoError(t, err) }() + oldHost, err := pelican_url.SetOsdfDiscoveryHost(discoveryUrl.String()) + require.NoError(t, err) + defer func() { + _, _ = pelican_url.SetOsdfDiscoveryHost(oldHost) + }() + for _, export := range fed.Exports { // Set path for object to upload/download tempPath := tempFile.Name() fileName := filepath.Base(tempPath) // Minimal fix of test as it is soon to be replaced - uploadURL := fmt.Sprintf("osdf://%s/%s", export.FederationPrefix, fileName) - hostname := fmt.Sprintf("%v:%v", param.Server_WebHost.GetString(), param.Server_WebPort.GetInt()) - - // Set our metadata values in config since that is what this url scheme - prefix combo does in handle_http - metadata, err := config.DiscoverUrlFederation(fed.Ctx, "https://"+hostname) - assert.NoError(t, err) - viper.Set("Federation.DirectorUrl", metadata.DirectorEndpoint) - viper.Set("Federation.RegistryUrl", metadata.NamespaceRegistrationEndpoint) - viper.Set("Federation.DiscoveryUrl", hostname) + uploadUrl := fmt.Sprintf("osdf://%s/%s", export.FederationPrefix, fileName) // Upload the file with PUT - transferResultsUpload, err := client.DoCopy(fed.Ctx, tempFile.Name(), uploadURL, false, client.WithTokenLocation(tempToken.Name())) + transferResultsUpload, err := client.DoCopy(fed.Ctx, tempFile.Name(), uploadUrl, false, client.WithTokenLocation(tempToken.Name())) assert.NoError(t, err) assert.Equal(t, transferResultsUpload[0].TransferredBytes, int64(17)) // Download that same file with GET - transferResultsDownload, err := client.DoCopy(fed.Ctx, uploadURL, t.TempDir(), false, client.WithTokenLocation(tempToken.Name())) + transferResultsDownload, err := client.DoCopy(fed.Ctx, uploadUrl, t.TempDir(), false, client.WithTokenLocation(tempToken.Name())) assert.NoError(t, err) assert.Equal(t, transferResultsDownload[0].TransferredBytes, transferResultsUpload[0].TransferredBytes) - } }) t.Cleanup(func() { @@ -494,6 +492,8 @@ func TestObjectStat(t *testing.T) { defer server_utils.ResetOriginExports() defer viper.Reset() fed := fed_test_utils.NewFedTest(t, mixedAuthOriginCfg) + discoveryUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) + require.NoError(t, err) // Other set-up items: testFileContent := "test file content" @@ -522,7 +522,7 @@ func TestObjectStat(t *testing.T) { // This tests object stat with no flags set t.Run("testPelicanObjectStatNoFlags", func(t *testing.T) { for _, export := range fed.Exports { - statUrl := fmt.Sprintf("pelican://%s:%d%s/hello_world.txt", param.Server_Hostname.GetString(), param.Server_WebPort.GetInt(), export.FederationPrefix) + statUrl := fmt.Sprintf("pelican://%s%s/hello_world.txt", discoveryUrl.Host, export.FederationPrefix) var got client.FileInfo if export.Capabilities.PublicReads { statInfo, err := client.DoStat(fed.Ctx, statUrl, client.WithTokenLocation("")) @@ -541,7 +541,7 @@ func TestObjectStat(t *testing.T) { // This tests object stat when used on a directory t.Run("testPelicanObjectStatOnDirectory", func(t *testing.T) { for _, export := range fed.Exports { - statUrl := fmt.Sprintf("pelican://%s:%s%s/test", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), export.FederationPrefix) + statUrl := fmt.Sprintf("pelican://%s%s/test", discoveryUrl.Host, export.FederationPrefix) if export.Capabilities.PublicReads { statInfo, err := client.DoStat(fed.Ctx, statUrl, client.WithTokenLocation("")) require.NoError(t, err) @@ -562,6 +562,13 @@ func TestObjectStat(t *testing.T) { _, err := config.SetPreferredPrefix(oldPref) require.NoError(t, err) }() + + oldHost, err := pelican_url.SetOsdfDiscoveryHost(discoveryUrl.String()) + require.NoError(t, err) + defer func() { + _, _ = pelican_url.SetOsdfDiscoveryHost(oldHost) + }() + testFileContent := "test file content" // Drop the testFileContent into the origin directory tempFile, err := os.Create(filepath.Join(fed.Exports[0].StoragePrefix, "test.txt")) @@ -574,16 +581,7 @@ func TestObjectStat(t *testing.T) { tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - statUrl := fmt.Sprintf("osdf://%s/%s", fed.Exports[0].FederationPrefix, fileName) - hostname := fmt.Sprintf("%v:%v", param.Server_WebHost.GetString(), param.Server_WebPort.GetInt()) - - // Set our metadata values in config since that is what this url scheme - prefix combo does in handle_http - metadata, err := config.DiscoverUrlFederation(fed.Ctx, "https://"+hostname) - assert.NoError(t, err) - viper.Set("Federation.DirectorUrl", metadata.DirectorEndpoint) - viper.Set("Federation.RegistryUrl", metadata.NamespaceRegistrationEndpoint) - viper.Set("Federation.DiscoveryUrl", hostname) // Stat the file statInfo, err := client.DoStat(fed.Ctx, statUrl) @@ -591,30 +589,6 @@ func TestObjectStat(t *testing.T) { assert.Equal(t, int64(17), int64(statInfo.Size)) assert.Equal(t, fmt.Sprintf("%s/%s", fed.Exports[0].FederationPrefix, fileName), statInfo.Name) }) - - // Ensure stat fails if it does not recognize the url scheme - t.Run("testObjectStatIncorrectScheme", func(t *testing.T) { - testFileContent := "test file content" - // Drop the testFileContent into the origin directory - tempFile, err := os.Create(filepath.Join(fed.Exports[0].StoragePrefix, "test.txt")) - assert.NoError(t, err, "Error creating temp file") - _, err = tempFile.WriteString(testFileContent) - assert.NoError(t, err, "Error writing to temp file") - tempFile.Close() - - viper.Set("Logging.DisableProgressBars", true) - - // Set path for object to upload/download - tempPath := tempFile.Name() - fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("some://incorrect/scheme/%s", fileName) - - // Stat the file - objStat, err := client.DoStat(fed.Ctx, uploadURL) - assert.Error(t, err) - assert.Nil(t, objStat) - assert.Contains(t, err.Error(), "Do not understand the destination scheme: some. Permitted values are file, osdf, pelican, stash, ") - }) } // Test the functionality of the direct reads feature (?directread) @@ -625,6 +599,8 @@ func TestDirectReads(t *testing.T) { server_utils.ResetOriginExports() viper.Set("Origin.EnableDirectReads", true) fed := fed_test_utils.NewFedTest(t, bothPublicOriginCfg) + discoveryUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) + require.NoError(t, err) export := fed.Exports[0] testFileContent := "test file content" // Drop the testFileContent into the origin directory @@ -640,8 +616,7 @@ func TestDirectReads(t *testing.T) { // Set path for object to upload/download tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s?directread", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), - export.FederationPrefix, fileName) + uploadURL := fmt.Sprintf("pelican://%s%s/%s?directread", discoveryUrl.Host, export.FederationPrefix, fileName) // Download the file with GET. Shouldn't need a token to succeed transferResults, err := client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false) @@ -665,6 +640,8 @@ func TestDirectReads(t *testing.T) { viper.Reset() server_utils.ResetOriginExports() fed := fed_test_utils.NewFedTest(t, pubOriginNoDirectRead) + discoveryUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) + require.NoError(t, err) export := fed.Exports[0] testFileContent := "test file content" // Drop the testFileContent into the origin directory @@ -680,8 +657,7 @@ func TestDirectReads(t *testing.T) { // Set path for object to upload/download tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s?directread", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), - export.FederationPrefix, fileName) + uploadURL := fmt.Sprintf("pelican://%s%s/%s?directread", discoveryUrl.Host, export.FederationPrefix, fileName) // Download the file with GET. Shouldn't need a token to succeed _, err = client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false) @@ -694,6 +670,8 @@ func TestDirectReads(t *testing.T) { viper.Reset() server_utils.ResetOriginExports() fed := fed_test_utils.NewFedTest(t, pubExportNoDirectRead) + discoveryUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) + require.NoError(t, err) export := fed.Exports[0] export.Capabilities.DirectReads = false testFileContent := "test file content" @@ -710,8 +688,7 @@ func TestDirectReads(t *testing.T) { // Set path for object to upload/download tempPath := tempFile.Name() fileName := filepath.Base(tempPath) - uploadURL := fmt.Sprintf("pelican://%s:%s%s/%s?directread", param.Server_Hostname.GetString(), strconv.Itoa(param.Server_WebPort.GetInt()), - export.FederationPrefix, fileName) + uploadURL := fmt.Sprintf("pelican://%s%s/%s?directread", discoveryUrl.Host, export.FederationPrefix, fileName) // Download the file with GET. Shouldn't need a token to succeed _, err = client.DoGet(fed.Ctx, uploadURL, t.TempDir(), false) @@ -727,21 +704,25 @@ func TestNewTransferJob(t *testing.T) { server_utils.ResetOriginExports() defer server_utils.ResetOriginExports() fed := fed_test_utils.NewFedTest(t, mixedAuthOriginCfg) - + discoveryUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) + require.NoError(t, err) te, err := client.NewTransferEngine(fed.Ctx) require.NoError(t, err) + tr := config.GetTransport() + httpClient := &http.Client{Transport: tr} + // Test when we have a failure during namespace lookup (here we will get a 404) t.Run("testFailureToGetNamespaceInfo", func(t *testing.T) { tc, err := te.NewClient() assert.NoError(t, err) // have a file/namespace that does not exist - mockRemoteUrl, err := url.Parse("/first/something/file.txt") + mockRemoteUrl, err := pelican_url.Parse(fmt.Sprintf("pelican://%s/first/something/file.txt", discoveryUrl.Host), []pelican_url.ParseOption{pelican_url.ShouldDiscover(true)}, []pelican_url.DiscoveryOption{pelican_url.WithClient(httpClient)}) require.NoError(t, err) _, err = tc.NewTransferJob(context.Background(), mockRemoteUrl, "/dest", false, false) require.Error(t, err) - assert.Contains(t, err.Error(), "failed to get namespace information for remote URL /first/something/file.txt") + assert.Contains(t, err.Error(), "failed to get namespace information for remote URL") }) // Test when we fail to get a token on our auth required namespace @@ -750,11 +731,11 @@ func TestNewTransferJob(t *testing.T) { assert.NoError(t, err) // use our auth required namespace - mockRemoteUrl, err := url.Parse("/second/namespace/hello_world.txt") + mockRemoteUrl, err := pelican_url.Parse(fmt.Sprintf("pelican://%s/second/namespace/hello_world.txt", discoveryUrl.Host), []pelican_url.ParseOption{pelican_url.ShouldDiscover(true)}, []pelican_url.DiscoveryOption{pelican_url.WithClient(httpClient)}) require.NoError(t, err) _, err = tc.NewTransferJob(context.Background(), mockRemoteUrl, "/dest", false, false) require.Error(t, err) - assert.Contains(t, err.Error(), "failed to get token for transfer: failed to find or generate a token as required for /second/namespace/hello_world.txt") + assert.Contains(t, err.Error(), "failed to get token for transfer") }) // Test success @@ -762,7 +743,7 @@ func TestNewTransferJob(t *testing.T) { tc, err := te.NewClient() assert.NoError(t, err) - remoteUrl, err := url.Parse("/first/namespace/hello_world.txt") + remoteUrl, err := pelican_url.Parse(fmt.Sprintf("pelican://%s/first/namespace/hello_world.txt", discoveryUrl.Host), []pelican_url.ParseOption{pelican_url.ShouldDiscover(true)}, []pelican_url.DiscoveryOption{pelican_url.WithClient(httpClient)}) require.NoError(t, err) _, err = tc.NewTransferJob(context.Background(), remoteUrl, t.TempDir(), false, false) assert.NoError(t, err) diff --git a/client/handle_http.go b/client/handle_http.go index e33871488..16f6a66ee 100644 --- a/client/handle_http.go +++ b/client/handle_http.go @@ -41,7 +41,6 @@ import ( "github.com/VividCortex/ewma" "github.com/google/uuid" - "github.com/jellydator/ttlcache/v3" "github.com/lestrrat-go/option" "github.com/opensaucerer/grab/v3" "github.com/pkg/errors" @@ -49,12 +48,12 @@ import ( "github.com/studio-b12/gowebdav" "github.com/vbauerster/mpb/v8" "golang.org/x/sync/errgroup" - "golang.org/x/sync/singleflight" "golang.org/x/time/rate" "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/error_codes" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" ) @@ -62,42 +61,12 @@ var ( progressCtrOnce sync.Once progressCtr *mpb.Progress - successTTL = ttlcache.DefaultTTL - failureTTL = 5 * time.Minute - - loader = ttlcache.LoaderFunc[string, cacheItem]( - func(c *ttlcache.Cache[string, cacheItem], key string) *ttlcache.Item[string, cacheItem] { - ctx := context.Background() - // Note: setting this timeout mostly for unit tests - ctx, cancel := context.WithTimeout(ctx, param.Transport_ResponseHeaderTimeout.GetDuration()) - defer cancel() - urlFederation, err := config.DiscoverUrlFederation(ctx, key) - if err != nil { - // Set a shorter TTL for failures - item := c.Set(key, cacheItem{err: err}, failureTTL) - return item - } - // Set a longer TTL for successes - item := c.Set(key, cacheItem{ - url: pelicanUrl{ - directorUrl: urlFederation.DirectorEndpoint, - }, - }, successTTL) - return item - }, - ) - PelicanError error_codes.PelicanError ) type ( classAd string - cacheItem struct { - url pelicanUrl - err error - } - // Error type for when the transfer started to return data then completely stopped StoppedTransferError struct { BytesTransferred int64 @@ -226,7 +195,7 @@ type ( cancel context.CancelFunc callback TransferCallbackFunc uuid uuid.UUID - remoteURL *url.URL + remoteURL *pelican_url.PelicanURL lookupDone atomic.Bool lookupErr error activeXfer atomic.Int64 @@ -274,7 +243,7 @@ type ( ewmaVal atomic.Int64 ewmaCtr atomic.Int64 clientLock sync.RWMutex - pelicanURLCache *ttlcache.Cache[string, cacheItem] + pelicanUrlCache *pelican_url.Cache } TransferCallbackFunc = func(path string, downloaded int64, totalSize int64, completed bool) @@ -308,10 +277,6 @@ type ( NeedsToken bool PackOption string } - - pelicanUrl struct { - directorUrl string - } ) const ( @@ -557,92 +522,6 @@ func (tr TransferResults) ID() string { return tr.jobId.String() } -func (te *TransferEngine) newPelicanURL(remoteUrl *url.URL) (pelicanURL pelicanUrl, err error) { - scheme := remoteUrl.Scheme - if remoteUrl.Host != "" { - if scheme == "osdf" || scheme == "stash" { - // in the osdf/stash case, fix url's that have a hostname - joinedPath, err := url.JoinPath(remoteUrl.Host, remoteUrl.Path) - // Prefix with a / just in case - remoteUrl.Path = path.Join("/", joinedPath) - if err != nil { - log.Errorln("Failed to join remote destination url path:", err) - return pelicanUrl{}, err - } - } else if scheme == "pelican" { - // If we have a host and url is pelican, we need to extract federation data from the host - log.Debugln("Detected pelican:// url, getting federation metadata from specified host", remoteUrl.Host) - federationUrl := &url.URL{} - federationUrl.Scheme = "https" - federationUrl.Path = "" - federationUrl.Host = remoteUrl.Host - - // Check if cache has key of federationURL, if not, loader will add it: - pelicanUrlItem := te.pelicanURLCache.Get(federationUrl.String()) - if pelicanUrlItem.Value().err != nil { - return pelicanUrl{}, pelicanUrlItem.Value().err - } else { - pelicanURL = pelicanUrlItem.Value().url - } - } - } - - // With an osdf:// url scheme, we assume the user will be using the OSDF so load in our osdf metadata for our url - if scheme == "osdf" || scheme == "stash" { - // If we are using an osdf/stash binary, we discovered the federation already --> load into local url metadata - if config.GetPreferredPrefix() == config.OsdfPrefix { - log.Debugln("In OSDF mode with osdf:// url; populating metadata with OSDF defaults") - fedInfo, err := config.GetFederation(te.ctx) - if fedInfo.DirectorEndpoint == "" { - if err != nil { - return pelicanUrl{}, errors.Wrap(err, "no OSDF metadata available") - } - return pelicanUrl{}, fmt.Errorf("OSDF default metadata is not populated in config") - } else { - pelicanURL.directorUrl = fedInfo.DirectorEndpoint - } - } else if config.GetPreferredPrefix() == config.PelicanPrefix { - // We hit this case when we are using a pelican binary but an osdf:// url, therefore we need to discover the osdf federation - log.Debugln("In Pelican mode with an osdf:// url, populating metadata with OSDF defaults") - // Check if cache has key of federationURL, if not, loader will add it: - pelicanUrlItem := te.pelicanURLCache.Get("osg-htc.org") - if pelicanUrlItem.Value().err != nil { - err = pelicanUrlItem.Value().err - return - } else { - pelicanURL = pelicanUrlItem.Value().url - } - } - } else if scheme == "pelican" && remoteUrl.Host == "" { - // We hit this case when we do not have a hostname with a pelican:// url - if param.Federation_DiscoveryUrl.GetString() == "" { - return pelicanUrl{}, errors.Errorf("pelican url scheme without discovery-url detected, please provide a federation discovery-url " + - "(e.g. pelican://) within the hostname or with the -f flag") - } - // Check if cache has key of federationURL, if not, loader will add it: - pelicanUrlItem := te.pelicanURLCache.Get(param.Federation_DiscoveryUrl.GetString()) - if pelicanUrlItem != nil { - pelicanURL = pelicanUrlItem.Value().url - } else { - return pelicanUrl{}, errors.Errorf("issue getting metadata information from cache") - } - } else if scheme == "" { - // If we don't have a url scheme, then our metadata information should be in the config - log.Debugln("No url scheme detected, getting metadata information from configuration") - if fedInfo, err := config.GetFederation(te.ctx); err == nil { - pelicanURL.directorUrl = fedInfo.DirectorEndpoint - } else { - return pelicanUrl{}, errors.Wrap(err, "failed to lookup pelican metadata from configuration") - } - - // If the values do not exist, exit with failure - if pelicanURL.directorUrl == "" { - return pelicanUrl{}, errors.New("missing metadata information in config, ensure Federation DirectorUrl, RegistryUrl, and DiscoverUrl are all set") - } - } - return -} - // Returns a new transfer engine object whose lifetime is tied // to the provided context. Will launcher worker goroutines to // handle the underlying transfers @@ -657,15 +536,9 @@ func NewTransferEngine(ctx context.Context) (te *TransferEngine, err error) { work := make(chan *clientTransferJob, 5) files := make(chan *clientTransferFile) results := make(chan *clientTransferResults, 5) - suppressedLoader := ttlcache.NewSuppressedLoader(loader, new(singleflight.Group)) - pelicanURLCache := ttlcache.New( - ttlcache.WithTTL[string, cacheItem](30*time.Minute), - ttlcache.WithLoader(suppressedLoader), - ) - // Start our cache for url metadata - // This is stopped in the `Shutdown` method - go pelicanURLCache.Start() + // Start the URL cache to avoid repeated metadata queries + pelicanUrlCache := pelican_url.StartCache() te = &TransferEngine{ ctx: ctx, @@ -682,7 +555,7 @@ func NewTransferEngine(ctx context.Context) (te *TransferEngine, err error) { closeDoneChan: make(chan bool), ewmaTick: time.NewTicker(ewmaInterval), ewma: ewma.NewMovingAverage(), - pelicanURLCache: pelicanURLCache, + pelicanUrlCache: pelicanUrlCache, } workerCount := param.Client_WorkerCount.GetInt() if workerCount <= 0 { @@ -789,7 +662,7 @@ func (te *TransferEngine) Shutdown() error { te.Close() <-te.closeDoneChan te.ewmaTick.Stop() - te.pelicanURLCache.Stop() + te.pelicanUrlCache.Stop() te.cancel() err := te.egrp.Wait() @@ -1080,7 +953,7 @@ func (te *TransferEngine) runJobHandler() error { // // The returned object can be further customized as desired. // This function does not "submit" the job for execution. -func (tc *TransferClient) NewTransferJob(ctx context.Context, remoteUrl *url.URL, localPath string, upload bool, recursive bool, options ...TransferOption) (tj *TransferJob, err error) { +func (tc *TransferClient) NewTransferJob(ctx context.Context, pUrl *pelican_url.PelicanURL, localPath string, upload bool, recursive bool, options ...TransferOption) (tj *TransferJob, err error) { id, err := uuid.NewV7() if err != nil { @@ -1089,14 +962,10 @@ func (tc *TransferClient) NewTransferJob(ctx context.Context, remoteUrl *url.URL // See if we have a projectName defined project := searchJobAd(projectName) - - pelicanURL, err := tc.engine.newPelicanURL(remoteUrl) - if err != nil { - err = errors.Wrap(err, "error generating metadata for specified url") - return + copyUrl := *pUrl // Make a copy of the input URL to avoid concurrent issues. + if _, exists := copyUrl.Query()[pelican_url.QueryRecursive]; exists { + recursive = true } - - copyUrl := *remoteUrl // Make a copy of the input URL to avoid concurrent issues. tj = &TransferJob{ prefObjServers: tc.prefObjServers, recursive: recursive, @@ -1143,11 +1012,11 @@ func (tc *TransferClient) NewTransferJob(ctx context.Context, remoteUrl *url.URL } } - tj.directorUrl = pelicanURL.directorUrl - dirResp, err := GetDirectorInfoForPath(tj.ctx, remoteUrl.Path, pelicanURL.directorUrl, upload, remoteUrl.RawQuery) + tj.directorUrl = copyUrl.FedInfo.DirectorEndpoint + dirResp, err := GetDirectorInfoForPath(tj.ctx, ©Url, upload) if err != nil { log.Errorln(err) - err = errors.Wrapf(err, "failed to get namespace information for remote URL %s", remoteUrl.String()) + err = errors.Wrapf(err, "failed to get namespace information for remote URL %s", pUrl.String()) return } tj.dirResp = dirResp @@ -1169,7 +1038,7 @@ func (tc *TransferClient) NewTransferJob(ctx context.Context, remoteUrl *url.URL } } - log.Debugf("Created new transfer job, ID %s client %s, for URL %s", tj.uuid.String(), tc.id.String(), remoteUrl.String()) + log.Debugf("Created new transfer job, ID %s client %s, for URL %s", tj.uuid.String(), tc.id.String(), copyUrl.String()) return } @@ -2521,14 +2390,14 @@ func (te *TransferEngine) walkDirUpload(job *clientTransferJob, transfers []tran } // This function performs the ls command by walking through the specified collections and printing the contents of the files -func listHttp(remoteObjectUrl *url.URL, dirResp server_structs.DirectorResponse, token *tokenGenerator) (fileInfos []FileInfo, err error) { +func listHttp(remoteUrl *pelican_url.PelicanURL, dirResp server_structs.DirectorResponse, token *tokenGenerator) (fileInfos []FileInfo, err error) { // Get our collection listing host collectionsUrl := dirResp.XPelNsHdr.CollectionsUrl log.Debugln("Collections URL: ", collectionsUrl.String()) project := searchJobAd(projectName) client := createWebDavClient(collectionsUrl, token, project) - remotePath := remoteObjectUrl.Path + remotePath := remoteUrl.Path infos, err := client.ReadDir(remotePath) if err != nil { @@ -2582,7 +2451,7 @@ func listHttp(remoteObjectUrl *url.URL, dirResp server_structs.DirectorResponse, // Otherwise, the first three caches are queried simultaneously. // For any of the queries, if the attempt with the proxy fails, a second attempt // is made without. -func statHttp(dest *url.URL, dirResp server_structs.DirectorResponse, token *tokenGenerator) (info FileInfo, err error) { +func statHttp(dest *pelican_url.PelicanURL, dirResp server_structs.DirectorResponse, token *tokenGenerator) (info FileInfo, err error) { statHosts := make([]url.URL, 0, 3) collectionsUrl := dirResp.XPelNsHdr.CollectionsUrl @@ -2608,7 +2477,7 @@ func statHttp(dest *url.URL, dirResp server_structs.DirectorResponse, token *tok for _, statUrl := range statHosts { client := gowebdav.NewAuthClient(statUrl.String(), auth) - destCopy := *dest + destCopy := *(dest.GetRawUrl()) destCopy.Host = statUrl.Host destCopy.Scheme = statUrl.Scheme diff --git a/client/handle_http_test.go b/client/handle_http_test.go index 0b321b30b..cb68c14a6 100644 --- a/client/handle_http_test.go +++ b/client/handle_http_test.go @@ -23,7 +23,6 @@ package client import ( "bytes" "context" - "encoding/json" "io/fs" "net" "net/http" @@ -44,7 +43,6 @@ import ( "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/error_codes" - "github.com/pelicanplatform/pelican/mock" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/test_utils" ) @@ -590,248 +588,6 @@ func TestProjInUserAgent(t *testing.T) { assert.Equal(t, "pelican-client/"+config.GetVersion()+" project/test", *server_test.user_agent) } -func TestNewPelicanURL(t *testing.T) { - // Set up our federation and context - ctx, cancel, egrp := test_utils.TestContext(context.Background(), t) - config.InitConfig() - - t.Run("TestOsdfOrStashSchemeWithOSDFPrefixNoError", func(t *testing.T) { - viper.Reset() - err := config.InitClient() - require.NoError(t, err) - _, err = config.SetPreferredPrefix(config.OsdfPrefix) - viper.Set("ConfigDir", t.TempDir()) - assert.NoError(t, err) - // Init config to get proper timeouts - config.InitConfig() - - te, err := NewTransferEngine(ctx) - require.NoError(t, err) - - defer func() { - require.NoError(t, te.Shutdown()) - }() - - remoteObject := "osdf:///something/somewhere/thatdoesnotexist.txt" - remoteObjectURL, err := url.Parse(remoteObject) - assert.NoError(t, err) - - // Instead of relying on osdf, let's just set our global metadata (osdf prefix does this for us) - viper.Set("Federation.DirectorUrl", "someDirectorUrl") - viper.Set("Federation.DiscoveryUrl", "someDiscoveryUrl") - - pelicanURL, err := te.newPelicanURL(remoteObjectURL) - assert.NoError(t, err) - - // Check pelicanURL properly filled out - assert.Equal(t, "someDirectorUrl", pelicanURL.directorUrl) - viper.Reset() - }) - - t.Run("TestOsdfOrStashSchemeWithOSDFPrefixWithError", func(t *testing.T) { - _, err := config.SetPreferredPrefix(config.OsdfPrefix) - require.NoError(t, err) - test_utils.InitClient(t, map[string]any{}) - - te, err := NewTransferEngine(ctx) - require.NoError(t, err) - defer func() { - require.NoError(t, te.Shutdown()) - }() - - remoteObject := "osdf:///something/somewhere/thatdoesnotexist.txt" - remoteObjectURL, err := url.Parse(remoteObject) - assert.NoError(t, err) - - // Instead of relying on osdf, let's just set our global metadata but don't set one piece - viper.Set("Federation.DiscoveryUrl", "someDiscoveryUrl") - - _, err = te.newPelicanURL(remoteObjectURL) - // Make sure we get an error - assert.Error(t, err) - viper.Reset() - }) - - t.Run("TestOsdfOrStashSchemeWithPelicanPrefixNoError", func(t *testing.T) { - test_utils.InitClient(t, map[string]any{}) - te, err := NewTransferEngine(ctx) - require.NoError(t, err) - defer func() { - require.NoError(t, te.Shutdown()) - }() - - mock.MockOSDFDiscovery(t, config.GetTransport()) - _, err = config.SetPreferredPrefix(config.PelicanPrefix) - config.InitConfig() - assert.NoError(t, err) - remoteObject := "osdf:///something/somewhere/thatdoesnotexist.txt" - remoteObjectURL, err := url.Parse(remoteObject) - assert.NoError(t, err) - - pelicanURL, err := te.newPelicanURL(remoteObjectURL) - assert.NoError(t, err) - - // Check pelicanURL properly filled out - assert.Equal(t, "https://osdf-director.osg-htc.org", pelicanURL.directorUrl) - viper.Reset() - // Note: can't really test this for an error since that would require osg-htc.org to be down - }) - - t.Run("TestStashSchemeWithPelicanPrefixNoError", func(t *testing.T) { - test_utils.InitClient(t, map[string]any{}) - te, err := NewTransferEngine(ctx) - require.NoError(t, err) - defer func() { - require.NoError(t, te.Shutdown()) - }() - - mock.MockOSDFDiscovery(t, config.GetTransport()) - _, err = config.SetPreferredPrefix(config.PelicanPrefix) - config.InitConfig() - assert.NoError(t, err) - remoteObject := "stash:///something/somewhere/thatdoesnotexist.txt" - remoteObjectURL, err := url.Parse(remoteObject) - assert.NoError(t, err) - - pelicanURL, err := te.newPelicanURL(remoteObjectURL) - assert.NoError(t, err) - - // Check pelicanURL properly filled out - assert.Equal(t, "https://osdf-director.osg-htc.org", pelicanURL.directorUrl) - viper.Reset() - // Note: can't really test this for an error since that would require osg-htc.org to be down - }) - - t.Run("TestPelicanSchemeNoError", func(t *testing.T) { - test_utils.InitClient(t, map[string]any{ - "TLSSkipVerify": true, - }) - - te, err := NewTransferEngine(ctx) - require.NoError(t, err) - defer func() { - require.NoError(t, te.Shutdown()) - }() - - // Create a server that gives us a mock response - server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // make our response: - response := config.FederationDiscovery{ - DirectorEndpoint: "director", - NamespaceRegistrationEndpoint: "registry", - JwksUri: "jwks", - BrokerEndpoint: "broker", - } - - responseJSON, err := json.Marshal(response) - if err != nil { - http.Error(w, "Internal Server Error", http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusOK) - _, err = w.Write(responseJSON) - assert.NoError(t, err) - })) - defer server.Close() - - serverURL, err := url.Parse(server.URL) - assert.NoError(t, err) - - remoteObject := "pelican://" + serverURL.Host + "/something/somewhere/thatdoesnotexist.txt" - remoteObjectURL, err := url.Parse(remoteObject) - assert.NoError(t, err) - - pelicanURL, err := te.newPelicanURL(remoteObjectURL) - assert.NoError(t, err) - - // Check pelicanURL properly filled out - assert.Equal(t, "director", pelicanURL.directorUrl) - // Check to make sure it was populated in our cache - assert.True(t, te.pelicanURLCache.Has("https://"+serverURL.Host)) - viper.Reset() - }) - - t.Run("TestPelicanSchemeWithError", func(t *testing.T) { - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - config.InitConfig() - err := config.InitClient() - require.NoError(t, err) - - te, err := NewTransferEngine(ctx) - require.NoError(t, err) - defer func() { - require.NoError(t, te.Shutdown()) - }() - - remoteObject := "pelican://some-host/something/somewhere/thatdoesnotexist.txt" - remoteObjectURL, err := url.Parse(remoteObject) - assert.NoError(t, err) - - _, err = te.newPelicanURL(remoteObjectURL) - assert.Error(t, err) - viper.Reset() - }) - - t.Run("TestPelicanSchemeMetadataTimeoutError", func(t *testing.T) { - test_utils.InitClient(t, map[string]any{ - "TLSSkipVerify": true, - "Transport.ResponseHeaderTimeout": time.Millisecond, - }) - - te, err := NewTransferEngine(ctx) - require.NoError(t, err) - defer func() { - require.NoError(t, te.Shutdown()) - }() - - // Create a server that gives us a mock response - sleepChan := make(chan bool) - server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // make our response: - response := config.FederationDiscovery{ - DirectorEndpoint: "director", - NamespaceRegistrationEndpoint: "registry", - JwksUri: "jwks", - BrokerEndpoint: "broker", - } - - responseJSON, err := json.Marshal(response) - if err != nil { - http.Error(w, "Internal Server Error", http.StatusInternalServerError) - return - } - <-sleepChan - w.WriteHeader(http.StatusOK) - _, err = w.Write(responseJSON) - assert.NoError(t, err) - })) - defer server.Close() - defer close(sleepChan) - - serverURL, err := url.Parse(server.URL) - assert.NoError(t, err) - - remoteObject := "pelican://" + serverURL.Host + "/something/somewhere/thatdoesnotexist.txt" - remoteObjectURL, err := url.Parse(remoteObject) - assert.NoError(t, err) - - _, err = te.newPelicanURL(remoteObjectURL) - assert.Error(t, err) - assert.True(t, errors.Is(err, config.MetadataTimeoutErr)) - }) - - t.Cleanup(func() { - cancel() - if err := egrp.Wait(); err != nil && err != context.Canceled && err != http.ErrServerClosed { - require.NoError(t, err) - } - // Throw in a viper.Reset for good measure. Keeps our env squeaky clean! - viper.Reset() - }) -} - // The test should prove that the function getObjectServersToTry returns the correct number of servers, // and that any duplicates are removed func TestGetObjectServersToTry(t *testing.T) { diff --git a/client/main.go b/client/main.go index 26f556aa2..c85b6f65d 100644 --- a/client/main.go +++ b/client/main.go @@ -22,6 +22,7 @@ import ( "context" "fmt" "net" + "net/http" "net/url" "runtime/debug" "strings" @@ -36,8 +37,8 @@ import ( log "github.com/sirupsen/logrus" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" - "github.com/pelicanplatform/pelican/utils" ) // Number of caches to attempt to use in any invocation @@ -53,6 +54,58 @@ type FileInfo struct { IsCollection bool } +// Given a remote path, use the client's wisdom to parse it as a Pelican URL, including metadata discovery. +// +// This will handle setting up the URL cache, passing along contexts to discovery, and passing the client context/user agent. +// Calling this should return a fully populated PelicanURL object, including any metadata that was discovered. +func ParseRemoteAsPUrl(ctx context.Context, rp string) (*pelican_url.PelicanURL, error) { + rpUrl, err := url.Parse(rp) + if err != nil { + return nil, errors.Wrap(err, "failed to parse remote path") + } + + // Set up options that get passed from Parse --> PopulateFedInfo and may be used when querying the Director + client := &http.Client{Transport: config.GetTransport()} + pOptions := []pelican_url.ParseOption{pelican_url.ShouldDiscover(true), pelican_url.ValidateQueryParams(true)} + dOptions := []pelican_url.DiscoveryOption{pelican_url.UseCached(true), pelican_url.WithContext(ctx), pelican_url.WithClient(client), pelican_url.WithUserAgent(getUserAgent(""))} + + // If we have no scheme, we'll end up assuming a Pelican url. We need to figure out which discovery endpoint to use. + if rpUrl.Scheme == "" { + fedInfo, err := config.GetFederation(ctx) + if err != nil { + return nil, errors.Wrap(err, "failed to get configured federation info") + } + + // First try to use the configured discovery endpoint + if fedInfo.DiscoveryEndpoint != "" { + discoveryUrl, err := url.Parse(fedInfo.DiscoveryEndpoint) + if err != nil { + return nil, errors.Wrap(err, "failed to parse federation discovery endpoint") + } + + dOptions = append(dOptions, pelican_url.WithDiscoveryUrl(discoveryUrl)) + } else if fedInfo.DirectorEndpoint != "" { + discoveryUrl, err := url.Parse(fedInfo.DirectorEndpoint) + if err != nil { + return nil, errors.Wrap(err, "failed to parse federation discovery endpoint") + } + + dOptions = append(dOptions, pelican_url.WithDiscoveryUrl(discoveryUrl)) + } + } + + pUrl, err := pelican_url.Parse( + rp, + pOptions, + dOptions, + ) + if err != nil { + return nil, err + } + + return pUrl, nil +} + // Check the size of a remote file in an origin func DoStat(ctx context.Context, destination string, options ...TransferOption) (fileInfo *FileInfo, err error) { @@ -66,16 +119,9 @@ func DoStat(ctx context.Context, destination string, options ...TransferOption) } }() - destUri, err := url.Parse(destination) + pUrl, err := ParseRemoteAsPUrl(ctx, destination) if err != nil { - log.Errorln("Failed to parse destination URL") - return nil, err - } - - // Check if we understand the found url scheme - err = schemeUnderstood(destUri.Scheme) - if err != nil { - return nil, err + return } te, err := NewTransferEngine(ctx) @@ -89,17 +135,12 @@ func DoStat(ctx context.Context, destination string, options ...TransferOption) } }() - pelicanURL, err := te.newPelicanURL(destUri) - if err != nil { - return nil, errors.Wrap(err, "Failed to generate pelicanURL object") - } - - dirResp, err := GetDirectorInfoForPath(ctx, destUri.Path, pelicanURL.directorUrl, false, "") + dirResp, err := GetDirectorInfoForPath(ctx, pUrl, false) if err != nil { return nil, err } - token := newTokenGenerator(destUri, &dirResp, false, true) + token := newTokenGenerator(pUrl, &dirResp, false, true) for _, option := range options { switch option.Ident() { case identTransferOptionTokenLocation{}: @@ -118,7 +159,7 @@ func DoStat(ctx context.Context, destination string, options ...TransferOption) } } - if statInfo, err := statHttp(destUri, dirResp, token); err != nil { + if statInfo, err := statHttp(pUrl, dirResp, token); err != nil { return nil, errors.Wrap(err, "failed to do the stat") } else { return &statInfo, nil @@ -126,12 +167,11 @@ func DoStat(ctx context.Context, destination string, options ...TransferOption) } func GetObjectServerHostnames(ctx context.Context, testFile string) (urls []string, err error) { - fedInfo, err := config.GetFederation(ctx) + pUrl, err := ParseRemoteAsPUrl(ctx, testFile) if err != nil { return } - - parsedDirResp, err := GetDirectorInfoForPath(ctx, testFile, fedInfo.DirectorEndpoint, false, "") + parsedDirResp, err := GetDirectorInfoForPath(ctx, pUrl, false) if err != nil { return } @@ -211,31 +251,6 @@ func generateSortedObjServers(dirResp server_structs.DirectorResponse, preferred } -func correctURLWithUnderscore(sourceFile string) (string, string) { - schemeIndex := strings.Index(sourceFile, "://") - if schemeIndex == -1 { - return sourceFile, "" - } - - originalScheme := sourceFile[:schemeIndex] - if strings.Contains(originalScheme, "_") { - scheme := strings.ReplaceAll(originalScheme, "_", ".") - sourceFile = scheme + sourceFile[schemeIndex:] - } - return sourceFile, originalScheme -} - -func schemeUnderstood(scheme string) error { - understoodSchemes := []string{"file", "osdf", "pelican", "stash", ""} - - _, foundDest := find(understoodSchemes, scheme) - if !foundDest { - return errors.Errorf("Do not understand the destination scheme: %s. Permitted values are %s", - scheme, strings.Join(understoodSchemes, ", ")) - } - return nil -} - // Function for the object ls command, we get target information for our remote object and eventually print out the contents of the specified object func DoList(ctx context.Context, remoteObject string, options ...TransferOption) (fileInfos []FileInfo, err error) { // First, create a handler for any panics that occur @@ -248,18 +263,11 @@ func DoList(ctx context.Context, remoteObject string, options ...TransferOption) } }() - remoteObjectUrl, err := url.Parse(remoteObject) + pUrl, err := ParseRemoteAsPUrl(ctx, remoteObject) if err != nil { - return nil, errors.Wrap(err, "failed to parse remote object URL") + return nil, errors.Wrapf(err, "failed to parse remote path: %s", remoteObject) } - remoteObjectScheme := remoteObjectUrl.Scheme - - // Check if we understand the found url scheme - err = schemeUnderstood(remoteObjectScheme) - if err != nil { - return nil, err - } te, err := NewTransferEngine(ctx) if err != nil { return nil, err @@ -270,18 +278,13 @@ func DoList(ctx context.Context, remoteObject string, options ...TransferOption) } }() - pelicanURL, err := te.newPelicanURL(remoteObjectUrl) - if err != nil { - return nil, errors.Wrap(err, "failed to generate pelicanURL object") - } - - dirResp, err := GetDirectorInfoForPath(ctx, remoteObjectUrl.Path, pelicanURL.directorUrl, false, "") + dirResp, err := GetDirectorInfoForPath(ctx, pUrl, false) if err != nil { return nil, err } // Get our token if needed - token := newTokenGenerator(remoteObjectUrl, &dirResp, false, true) + token := newTokenGenerator(pUrl, &dirResp, false, true) for _, option := range options { switch option.Ident() { case identTransferOptionTokenLocation{}: @@ -300,7 +303,7 @@ func DoList(ctx context.Context, remoteObject string, options ...TransferOption) } } - fileInfos, err = listHttp(remoteObjectUrl, dirResp, token) + fileInfos, err = listHttp(pUrl, dirResp, token) if err != nil { return nil, errors.Wrap(err, "failed to do the list") } @@ -326,30 +329,9 @@ func DoPut(ctx context.Context, localObject string, remoteDestination string, re } }() - remoteDestination, remoteDestScheme := correctURLWithUnderscore(remoteDestination) - remoteDestUrl, err := url.Parse(remoteDestination) - if err != nil { - log.Errorln("Failed to parse remote destination URL:", err) - return nil, err - } - - // Check if we have a query and that it is understood - err = utils.CheckValidQuery(remoteDestUrl) - if err != nil { - return - } - if remoteDestUrl.Query().Has("recursive") { - recursive = true - } - - remoteDestUrl.Scheme = remoteDestScheme - - remoteDestScheme, _ = getTokenName(remoteDestUrl) - - // Check if we understand the found url scheme - err = schemeUnderstood(remoteDestScheme) + pUrl, err := ParseRemoteAsPUrl(ctx, remoteDestination) if err != nil { - return nil, err + return nil, errors.Wrapf(err, "failed to parse remote object: %s", remoteDestination) } te, err := NewTransferEngine(ctx) @@ -366,7 +348,7 @@ func DoPut(ctx context.Context, localObject string, remoteDestination string, re if err != nil { return } - tj, err := client.NewTransferJob(context.Background(), remoteDestUrl, localObject, true, recursive) + tj, err := client.NewTransferJob(context.Background(), pUrl, localObject, true, recursive) if err != nil { return } @@ -403,40 +385,9 @@ func DoGet(ctx context.Context, remoteObject string, localDestination string, re } }() - // Parse the source with URL parse - remoteObject, remoteObjectScheme := correctURLWithUnderscore(remoteObject) - remoteObjectUrl, err := url.Parse(remoteObject) - if err != nil { - log.Errorln("Failed to parse source URL:", err) - return nil, err - } - - // Check if we have a query and that it is understood - err = utils.CheckValidQuery(remoteObjectUrl) - if err != nil { - return - } - if remoteObjectUrl.Query().Has("recursive") { - recursive = true - } - - remoteObjectUrl.Scheme = remoteObjectScheme - - // This is for condor cases: - remoteObjectScheme, _ = getTokenName(remoteObjectUrl) - - // Check if we understand the found url scheme - err = schemeUnderstood(remoteObjectScheme) + pUrl, err := ParseRemoteAsPUrl(ctx, remoteObject) if err != nil { - return nil, err - } - - if remoteObjectScheme == "osdf" || remoteObjectScheme == "pelican" { - remoteObject = remoteObjectUrl.Path - } - - if string(remoteObject[0]) != "/" { - remoteObject = "/" + remoteObject + return nil, errors.Wrapf(err, "failed to parse remote object: %s", remoteObject) } // get absolute path @@ -445,10 +396,11 @@ func DoGet(ctx context.Context, remoteObject string, localDestination string, re //Check if path exists or if its in a folder if destStat, err := os.Stat(localDestPath); os.IsNotExist(err) { localDestination = localDestPath - } else if destStat.IsDir() && remoteObjectUrl.Query().Get("pack") == "" { + } else if destStat.IsDir() && pUrl.Query().Get(pelican_url.QueryPack) == "" { // If we have an auto-pack request, it's OK for the destination to be a directory // Otherwise, get the base name of the source and append it to the destination dir. - remoteObjectFilename := path.Base(remoteObject) + // Note that we use the pUrl.Path, as this will have stripped any query params for us + remoteObjectFilename := path.Base(pUrl.Path) localDestination = path.Join(localDestPath, remoteObjectFilename) } @@ -468,7 +420,7 @@ func DoGet(ctx context.Context, remoteObject string, localDestination string, re if err != nil { return } - tj, err := tc.NewTransferJob(context.Background(), remoteObjectUrl, localDestination, false, recursive) + tj, err := tc.NewTransferJob(context.Background(), pUrl, localDestination, false, recursive) if err != nil { return } @@ -536,138 +488,39 @@ func DoCopy(ctx context.Context, sourceFile string, destination string, recursiv } }() - // Parse the source and destination with URL parse - sourceFile, source_scheme := correctURLWithUnderscore(sourceFile) - sourceURL, err := url.Parse(sourceFile) - if err != nil { - log.Errorln("Failed to parse source URL:", err) - return nil, err - } - // Check if we have a query and that it is understood - err = utils.CheckValidQuery(sourceURL) - if err != nil { - return - } - if sourceURL.Query().Has("recursive") { - recursive = true - } - - sourceURL.Scheme = source_scheme - - destination, dest_scheme := correctURLWithUnderscore(destination) - destURL, err := url.Parse(destination) + var isPut bool + // determine which direction we're headed + parsedDest, err := url.Parse(destination) if err != nil { log.Errorln("Failed to parse destination URL:", err) return nil, err } - - // Check if we have a query and that it is understood - err = utils.CheckValidQuery(destURL) + parsedSrc, err := url.Parse(sourceFile) if err != nil { - return - } - if destURL.Query().Has("recursive") { - recursive = true + log.Errorln("Failed to parse source URL:", err) + return nil, err } - destURL.Scheme = dest_scheme - - // Check for scheme here for when using condor - sourceScheme, _ := getTokenName(sourceURL) - destScheme, _ := getTokenName(destURL) - - isPut := destScheme == "stash" || destScheme == "osdf" || destScheme == "pelican" - var localPath string - var remoteURL *url.URL - if isPut { - // Verify valid scheme - if err = schemeUnderstood(destScheme); err != nil { - return nil, err - } - - log.Debugln("Detected object write to remote federation object", destURL.Path) - localPath = sourceFile - remoteURL = destURL + var remotePath string + if parsedDest.Scheme != "" && (parsedSrc.Scheme == "" || parsedSrc.Scheme == "file") { + isPut = true + log.Debugf("Detected a PUT from %s to %s", parsedSrc.Path, parsedDest.String()) + localPath = parsedSrc.Path + remotePath = parsedDest.String() + } else if (parsedDest.Scheme == "" || parsedDest.Scheme == "file") && parsedSrc.Scheme != "" { + isPut = false + log.Debugf("Detected a GET from %s to %s", parsedSrc.String(), parsedDest.Path) + localPath = parsedDest.Path + remotePath = parsedSrc.String() } else { - // Verify valid scheme - if err = schemeUnderstood(sourceScheme); err != nil { - return nil, err - } - - if destURL.Scheme == "file" { - destination = destURL.Path - } - - if sourceScheme == "stash" || sourceScheme == "osdf" || sourceScheme == "pelican" { - sourceFile = sourceURL.Path - } - - if string(sourceFile[0]) != "/" { - sourceFile = "/" + sourceFile - } - - // get absolute path - destPath, _ := filepath.Abs(destination) - - //Check if path exists or if its in a folder - if destStat, err := os.Stat(destPath); os.IsNotExist(err) { - destination = destPath - } else if destStat.IsDir() && sourceURL.Query().Get("pack") == "" { - // If we have an auto-pack request, it's OK for the destination to be a directory - // Otherwise, get the base name of the source and append it to the destination dir. - sourceFilename := path.Base(sourceFile) - destination = path.Join(destPath, sourceFilename) - } - localPath = destination - remoteURL = sourceURL + return nil, errors.New("unable to determine direction of transfer. Both source and destination are either local or remote") } - success := false - var downloaded int64 = 0 - - te, err := NewTransferEngine(ctx) - if err != nil { - return nil, err - } - - defer func() { - if err := te.Shutdown(); err != nil { - log.Errorln("Failure when shutting down transfer engine:", err) - } - }() - tc, err := te.NewClient(options...) - if err != nil { - return - } - tj, err := tc.NewTransferJob(context.Background(), remoteURL, localPath, isPut, recursive) - if err != nil { - return - } - if err = tc.Submit(tj); err != nil { - return - } - transferResults, err = tc.Shutdown() - if err == nil { - if tj.lookupErr == nil { - success = true - } else { - err = tj.lookupErr - } - } - - for _, result := range transferResults { - downloaded += result.TransferredBytes - if err == nil && result.Error != nil { - success = false - err = result.Error - } - } - - if success { - return transferResults, nil + if isPut { + return DoPut(ctx, localPath, remotePath, recursive, options...) } else { - return transferResults, err + return DoGet(ctx, remotePath, localPath, recursive, options...) } } diff --git a/client/main_test.go b/client/main_test.go index d5c637bc3..8914b239c 100644 --- a/client/main_test.go +++ b/client/main_test.go @@ -19,7 +19,11 @@ package client import ( + "context" + "fmt" "net" + "net/http" + "net/http/httptest" "net/url" "os" "path/filepath" @@ -33,7 +37,9 @@ import ( "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/mock" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" + "github.com/pelicanplatform/pelican/test_utils" ) // TestGetIps calls main.get_ips with a hostname, checking @@ -145,12 +151,12 @@ func TestGetToken(t *testing.T) { }, } - url, err := url.Parse("osdf:///user/foo") + pUrl, err := pelican_url.Parse("osdf:///user/foo", nil, nil) assert.NoError(t, err) // ENVs to test: BEARER_TOKEN, BEARER_TOKEN_FILE, XDG_RUNTIME_DIR/bt_u, TOKEN, _CONDOR_CREDS/scitoken.use, .condor_creds/scitokens.use os.Setenv("BEARER_TOKEN", "bearer_token_contents") - token := newTokenGenerator(url, &dirResp, true, false) + token := newTokenGenerator(pUrl, &dirResp, true, false) tokenContents, err := token.get() assert.NoError(t, err) assert.Equal(t, "bearer_token_contents", tokenContents) @@ -164,7 +170,7 @@ func TestGetToken(t *testing.T) { err = os.WriteFile(bearer_token_file, tmpFile, 0644) assert.NoError(t, err) os.Setenv("BEARER_TOKEN_FILE", bearer_token_file) - token = newTokenGenerator(url, &dirResp, true, false) + token = newTokenGenerator(pUrl, &dirResp, true, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -177,7 +183,7 @@ func TestGetToken(t *testing.T) { err = os.WriteFile(bearer_token_file, tmpFile, 0644) assert.NoError(t, err) os.Setenv("XDG_RUNTIME_DIR", tmpDir) - token = newTokenGenerator(url, &dirResp, true, false) + token = newTokenGenerator(pUrl, &dirResp, true, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -190,7 +196,7 @@ func TestGetToken(t *testing.T) { err = os.WriteFile(bearer_token_file, tmpFile, 0644) assert.NoError(t, err) os.Setenv("TOKEN", bearer_token_file) - token = newTokenGenerator(url, &dirResp, true, false) + token = newTokenGenerator(pUrl, &dirResp, true, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -203,7 +209,7 @@ func TestGetToken(t *testing.T) { err = os.WriteFile(bearer_token_file, tmpFile, 0644) assert.NoError(t, err) os.Setenv("_CONDOR_CREDS", tmpDir) - token = newTokenGenerator(url, &dirResp, true, false) + token = newTokenGenerator(pUrl, &dirResp, true, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -217,14 +223,14 @@ func TestGetToken(t *testing.T) { err = os.WriteFile(bearer_token_file, tmpFile, 0644) assert.NoError(t, err) os.Setenv("_CONDOR_CREDS", tmpDir) - renamedUrl, err := url.Parse("renamed+osdf:///user/ligo/frames") + pUrl, err = pelican_url.Parse("renamed+osdf:///user/ligo/frames", nil, nil) assert.NoError(t, err) ligoDirResp := server_structs.DirectorResponse{ XPelNsHdr: server_structs.XPelNs{ Namespace: "/user/ligo/frames", }, } - token = newTokenGenerator(renamedUrl, &ligoDirResp, false, false) + token = newTokenGenerator(pUrl, &ligoDirResp, false, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -239,10 +245,9 @@ func TestGetToken(t *testing.T) { assert.NoError(t, err) os.Setenv("_CONDOR_CREDS", tmpDir) // Use a valid URL, then replace the scheme - renamedUrl, err = url.Parse("renamed.handle1+osdf:///user/ligo/frames") - renamedUrl.Scheme = "renamed_handle1+osdf" + pUrl, err = pelican_url.Parse("renamed.handle1+osdf:///user/ligo/frames", nil, nil) assert.NoError(t, err) - token = newTokenGenerator(renamedUrl, &ligoDirResp, false, false) + token = newTokenGenerator(pUrl, &ligoDirResp, false, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -256,9 +261,9 @@ func TestGetToken(t *testing.T) { err = os.WriteFile(bearer_token_file, tmpFile, 0644) assert.NoError(t, err) os.Setenv("_CONDOR_CREDS", tmpDir) - renamedUrl, err = url.Parse("renamed.handle2+osdf:///user/ligo/frames") + pUrl.RawScheme = "renamed.handle2+osdf" assert.NoError(t, err) - token = newTokenGenerator(renamedUrl, &ligoDirResp, false, false) + token = newTokenGenerator(pUrl, &ligoDirResp, false, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -272,9 +277,9 @@ func TestGetToken(t *testing.T) { err = os.WriteFile(bearer_token_file, tmpFile, 0644) assert.NoError(t, err) os.Setenv("_CONDOR_CREDS", tmpDir) - renamedUrl, err = url.Parse("renamed.handle3+osdf:///user/ligo/frames") + pUrl.RawScheme = "renamed.handle3+osdf" assert.NoError(t, err) - token = newTokenGenerator(renamedUrl, &ligoDirResp, false, false) + token = newTokenGenerator(pUrl, &ligoDirResp, false, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -288,9 +293,9 @@ func TestGetToken(t *testing.T) { err = os.WriteFile(bearer_token_file, tmpFile, 0644) assert.NoError(t, err) os.Setenv("_CONDOR_CREDS", tmpDir) - renamedUrl, err = url.Parse("/user/ligo/frames") + pUrl, err = pelican_url.Parse("osdf:///user/ligo/frames", nil, nil) assert.NoError(t, err) - token = newTokenGenerator(renamedUrl, &ligoDirResp, false, false) + token = newTokenGenerator(pUrl, &ligoDirResp, false, false) token.SetTokenName("renamed") tokenContents, err = token.get() assert.NoError(t, err) @@ -309,7 +314,7 @@ func TestGetToken(t *testing.T) { assert.NoError(t, err) err = os.Chdir(tmpDir) assert.NoError(t, err) - token = newTokenGenerator(url, &dirResp, true, false) + token = newTokenGenerator(pUrl, &dirResp, true, false) tokenContents, err = token.get() assert.NoError(t, err) assert.Equal(t, token_contents, tokenContents) @@ -317,7 +322,7 @@ func TestGetToken(t *testing.T) { assert.NoError(t, err) // Check that we haven't regressed on our error messages - token = newTokenGenerator(url, &dirResp, true, false) + token = newTokenGenerator(pUrl, &dirResp, true, false) _, err = token.get() assert.EqualError(t, err, "credential is required for osdf:///user/foo but was not discovered") } @@ -331,21 +336,19 @@ func TestGetTokenName(t *testing.T) { }{ {"osdf://blah+asdf", "", "osdf"}, {"stash://blah+asdf", "", "stash"}, - {"file://blah+asdf", "", "file"}, {"tokename+osdf://blah+asdf", "tokename", "osdf"}, {"tokename+stash://blah+asdf", "tokename", "stash"}, - {"tokename+file://blah+asdf", "tokename", "file"}, {"tokename+tokename2+osdf://blah+asdf", "tokename+tokename2", "osdf"}, {"token+tokename2+stash://blah+asdf", "token+tokename2", "stash"}, {"token.use+stash://blah+asdf", "token.use", "stash"}, {"token.blah.asdf+stash://blah+asdf", "token.blah.asdf", "stash"}, } for _, c := range cases { - url, err := url.Parse(c.url) + pUrl, err := pelican_url.Parse(c.url, nil, []pelican_url.DiscoveryOption{pelican_url.WithContext(context.Background())}) assert.NoError(t, err) - scheme, tokenName := getTokenName(url) + tokenName := pUrl.GetTokenName() assert.Equal(t, c.name, tokenName) - assert.Equal(t, c.scheme, scheme) + assert.Equal(t, c.scheme, pUrl.Scheme) } } @@ -364,77 +367,183 @@ func FuzzGetTokenName(f *testing.F) { return } assert.NoError(t, err) - _, tokenName := getTokenName(url) + pUrl, err := pelican_url.Parse(urlString, nil, nil) + assert.NoError(t, err) + tokenName := pUrl.GetTokenName() assert.Equal(t, strings.ToLower(orig), tokenName, "URL: "+urlString+"URL String: "+url.String()+" Scheme: "+url.Scheme) }) } -func TestCorrectURLWithUnderscore(t *testing.T) { +// Spin up a discovery server for testing purposes +func getTestDiscoveryServer(t *testing.T) *httptest.Server { + handler := func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/.well-known/pelican-configuration" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err := w.Write([]byte(`{ + "director_endpoint": "https://director.com", + "namespace_registration_endpoint": "https://registration.com", + "broker_endpoint": "https://broker.com", + "jwks_uri": "https://tokens.com" + }`)) + assert.NoError(t, err) + } else { + http.NotFound(w, r) + } + } + server := httptest.NewTLSServer(http.HandlerFunc(handler)) + return server +} + +func assertPelicanURLEqual(t *testing.T, expected, actual *pelican_url.PelicanURL) { + assert.Equal(t, expected.Scheme, actual.Scheme, "Scheme mismatch") + assert.Equal(t, expected.Host, actual.Host, "Discovery Host mismatch") + assert.Equal(t, expected.Path, actual.Path, "Path mismatch") + assert.Equal(t, expected.FedInfo, actual.FedInfo, "Federation Info mismatch") +} + +func TestParseRemoteAsPUrl(t *testing.T) { + discServer := getTestDiscoveryServer(t) + defer discServer.Close() + discUrl, err := url.Parse(discServer.URL) + require.NoError(t, err) + + oldHost, err := pelican_url.SetOsdfDiscoveryHost(discUrl.Host) + t.Cleanup(func() { + _, _ = pelican_url.SetOsdfDiscoveryHost(oldHost) + }) + require.NoError(t, err) + tests := []struct { - name string - url string - expectedURL string - expectedScheme string + name string + rp string + discEP string // for setting global federation metadata + dirEP string // for setting global federation metadata + expected *pelican_url.PelicanURL + errMsg string }{ { - name: "LIGO URL with underscore", - url: "ligo_data://ligo.org/data/1", - expectedURL: "ligo.data://ligo.org/data/1", - expectedScheme: "ligo_data", + name: "test valid pelican url, no global metadata", + rp: fmt.Sprintf("pelican://%s/foo/bar", discUrl.Host), + discEP: "", + dirEP: "", + expected: &pelican_url.PelicanURL{Scheme: "pelican", Host: discUrl.Host, Path: "/foo/bar", FedInfo: pelican_url.FederationDiscovery{DirectorEndpoint: "https://director.com", RegistryEndpoint: "https://registration.com", BrokerEndpoint: "https://broker.com", JwksUri: "https://tokens.com"}}, + errMsg: "", + }, + { + name: "test valid osdf url, no global metadata", + rp: "osdf:///foo/bar", + discEP: "", + dirEP: "", + expected: &pelican_url.PelicanURL{Scheme: "osdf", Host: "", Path: "/foo/bar", FedInfo: pelican_url.FederationDiscovery{DirectorEndpoint: "https://director.com", RegistryEndpoint: "https://registration.com", BrokerEndpoint: "https://broker.com", JwksUri: "https://tokens.com"}}, + errMsg: "", + }, + { + name: "test valid stash url, no global metadata", + rp: "stash:///foo/bar", + discEP: "", + dirEP: "", + expected: &pelican_url.PelicanURL{Scheme: "stash", Host: "", Path: "/foo/bar", FedInfo: pelican_url.FederationDiscovery{DirectorEndpoint: "https://director.com", RegistryEndpoint: "https://registration.com", BrokerEndpoint: "https://broker.com", JwksUri: "https://tokens.com"}}, + errMsg: "", + }, + { + name: "test valid path with configured global discovery url", + rp: "/foo/bar", + discEP: discUrl.Host, + dirEP: "", + expected: &pelican_url.PelicanURL{Scheme: "pelican", Host: discUrl.Host, Path: "/foo/bar", FedInfo: pelican_url.FederationDiscovery{DirectorEndpoint: "https://director.com", RegistryEndpoint: "https://registration.com", BrokerEndpoint: "https://broker.com", JwksUri: "https://tokens.com"}}, + errMsg: "", }, { - name: "URL without underscore", - url: "http://example.com", - expectedURL: "http://example.com", - expectedScheme: "http", + name: "test valid path that falls back to configured director for discovery", + rp: "/foo/bar", + discEP: "", + dirEP: discUrl.Host, + expected: &pelican_url.PelicanURL{Scheme: "pelican", Host: discUrl.Host, Path: "/foo/bar", FedInfo: pelican_url.FederationDiscovery{DirectorEndpoint: "https://director.com", RegistryEndpoint: "https://registration.com", BrokerEndpoint: "https://broker.com", JwksUri: "https://tokens.com"}}, + errMsg: "", }, { - name: "URL with no scheme", - url: "example.com", - expectedURL: "example.com", - expectedScheme: "", + name: "test failure for path with no discovery metadata", + rp: "/foo/bar", + discEP: "", + dirEP: "", + expected: nil, + errMsg: "schemeless Pelican URLs must be used with a federation discovery URL", }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - actualURL, actualScheme := correctURLWithUnderscore(tt.url) - if actualURL != tt.expectedURL || actualScheme != tt.expectedScheme { - t.Errorf("correctURLWithUnderscore(%v) = %v, %v; want %v, %v", tt.url, actualURL, actualScheme, tt.expectedURL, tt.expectedScheme) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Set up global metadata if we have it. We do this because the function may + // fall back to configured discovery/director URLs if it can't find them in the URL + configOpts := map[string]any{"TLSSkipVerify": true} + if test.discEP != "" { + configOpts["Federation.DiscoveryUrl"] = test.discEP + } + if test.dirEP != "" { + configOpts["Federation.DirectorUrl"] = test.dirEP + } + + test_utils.InitClient(t, configOpts) + + pUrl, err := ParseRemoteAsPUrl(context.Background(), test.rp) + if test.errMsg == "" { + assert.NoError(t, err) + assertPelicanURLEqual(t, test.expected, pUrl) + } else { + assert.Error(t, err) + assert.Contains(t, err.Error(), test.errMsg) } }) } } -func TestSchemeUnderstood(t *testing.T) { - t.Run("TestProperSchemeOsdf", func(t *testing.T) { - scheme := "osdf" - err := schemeUnderstood(scheme) - assert.NoError(t, err) - }) - t.Run("TestProperSchemeStash", func(t *testing.T) { - scheme := "stash" - err := schemeUnderstood(scheme) - assert.NoError(t, err) - }) - t.Run("TestProperSchemePelican", func(t *testing.T) { - scheme := "pelican" - err := schemeUnderstood(scheme) - assert.NoError(t, err) - }) - t.Run("TestProperSchemeFile", func(t *testing.T) { - scheme := "file" - err := schemeUnderstood(scheme) - assert.NoError(t, err) - }) - t.Run("TestProperSchemeEmpty", func(t *testing.T) { - scheme := "" - err := schemeUnderstood(scheme) - assert.NoError(t, err) - }) - t.Run("TestImproperScheme", func(t *testing.T) { - scheme := "ThisSchemeDoesNotExistAndHopefullyNeverWill" - err := schemeUnderstood(scheme) - assert.Error(t, err) - }) -} +// func ParseRemoteAsPUrl(ctx context.Context, rp string) (*pelican_url.PelicanURL, error) { +// pUrl := &pelican_url.PelicanURL{} + +// rpUrl, err := url.Parse(rp) +// if err != nil { +// return nil, errors.Wrap(err, "failed to parse remote path") +// } + +// // Set up options that get passed from Parse --> PopulateFedInfo and may be used when querying the Director +// client := &http.Client{Transport: config.GetTransport()} +// pOptions := []pelican_url.ParseOption{pelican_url.ShouldDiscover(true)} +// dOptions := []pelican_url.DiscoveryOption{pelican_url.UseCached(true), pelican_url.WithContext(ctx), pelican_url.WithClient(client), pelican_url.WithUserAgent(getUserAgent(""))} + +// // If we have no scheme, we'll end up assuming a Pelican url. We need to figure out which discovery endpoint to use. +// if rpUrl.Scheme == "" { +// fedInfo, err := config.GetFederation(ctx) +// if err != nil { +// return nil, errors.Wrap(err, "failed to get configured federation info") +// } + +// // First try to use the configured discovery endpoint +// if fedInfo.DiscoveryEndpoint != "" { +// discoveryUrl, err := url.Parse(fedInfo.DiscoveryEndpoint) +// if err != nil { +// return nil, errors.Wrap(err, "failed to parse federation discovery endpoint") +// } +// dOptions = append(dOptions, pelican_url.WithDiscoveryUrl(discoveryUrl)) +// } else if fedInfo.DirectorEndpoint != "" { +// // If we don't have a discovery endpoint, try to use the director endpoint +// discoveryUrl, err := url.Parse(fedInfo.DiscoveryEndpoint) +// if err != nil { +// return nil, errors.Wrap(err, "failed to parse federation discovery endpoint") +// } + +// dOptions = append(dOptions, pelican_url.WithDiscoveryUrl(discoveryUrl)) +// } +// } + +// pUrl, err = pelican_url.Parse( +// rp, +// pOptions, +// dOptions, +// ) +// if err != nil { +// return nil, err +// } + +// return pUrl, nil +// } diff --git a/client/sharing_url.go b/client/sharing_url.go index 5149e3205..e4174892c 100644 --- a/client/sharing_url.go +++ b/client/sharing_url.go @@ -20,8 +20,6 @@ package client import ( "context" - "net/url" - "strings" "github.com/pkg/errors" log "github.com/sirupsen/logrus" @@ -29,81 +27,28 @@ import ( "github.com/pelicanplatform/pelican/config" ) -func getDirectorFromUrl(objectUrl *url.URL) (string, error) { - ctx := context.Background() - - fedInfo, err := config.GetFederation(ctx) - if err != nil { - return "", err - } - configDirectorUrl := fedInfo.DirectorEndpoint - var directorUrl string - if objectUrl.Scheme == "pelican" { - if objectUrl.Host == "" { - if configDirectorUrl == "" { - return "", errors.New("Must specify (or configure) the federation hostname with the pelican://-style URLs") - } - directorUrl = configDirectorUrl - } else { - discoveryUrl := url.URL{ - Scheme: "https", - Host: objectUrl.Host, - } - fedInfo, err := config.DiscoverUrlFederation(ctx, discoveryUrl.String()) - if err != nil { - return "", errors.Wrapf(err, "Failed to discover location of the director for the federation %s", objectUrl.Host) - } - if directorUrl = fedInfo.DirectorEndpoint; directorUrl == "" { - return "", errors.Errorf("Director for the federation %s not discovered", objectUrl.Host) - } - } - } else if objectUrl.Scheme == "osdf" && configDirectorUrl == "" { - if objectUrl.Host != "" { - objectUrl.Path = "/" + objectUrl.Host + objectUrl.Path - objectUrl.Host = "" - } - fedInfo, err := config.DiscoverUrlFederation(ctx, "https://osg-htc.org") - if err != nil { - return "", errors.Wrap(err, "Failed to discover director for the OSDF") - } - if directorUrl = fedInfo.DirectorEndpoint; directorUrl == "" { - return "", errors.Errorf("Director for the OSDF not discovered") - } - } else if objectUrl.Scheme == "" { - if configDirectorUrl == "" { - return "", errors.Errorf("Must provide a federation name for path %s (e.g., pelican://osg-htc.org/%s)", objectUrl.Path, objectUrl.Path) - } else { - directorUrl = configDirectorUrl - } - } else if objectUrl.Scheme != "osdf" { - return "", errors.Errorf("Unsupported scheme for pelican: %s://", objectUrl.Scheme) - } - return directorUrl, nil -} - -func CreateSharingUrl(ctx context.Context, objectUrl *url.URL, isWrite bool) (string, error) { - directorUrl, err := getDirectorFromUrl(objectUrl) +func CreateSharingUrl(ctx context.Context, object string, isWrite bool) (string, error) { + pUrl, err := ParseRemoteAsPUrl(ctx, object) if err != nil { - return "", err + return "", errors.Wrap(err, "Failed to parse remote path") } - objectUrl.Path = "/" + strings.TrimPrefix(objectUrl.Path, "/") - log.Debugln("Will query director for path", objectUrl.Path) - dirResp, err := queryDirector(ctx, "GET", objectUrl.Path, directorUrl) + log.Debugln("Will query director for path", pUrl.Path) + dirResp, err := queryDirector(ctx, "GET", pUrl) if err != nil { log.Errorln("Error while querying the Director:", err) - return "", errors.Wrapf(err, "Error while querying the director at %s", directorUrl) + return "", errors.Wrapf(err, "Error while querying the director at %s", pUrl.FedInfo.DirectorEndpoint) } parsedDirResp, err := ParseDirectorInfo(dirResp) if err != nil { - return "", errors.Wrapf(err, "Unable to parse response from director at %s", directorUrl) + return "", errors.Wrapf(err, "Unable to parse response from director at %s", pUrl.FedInfo.DirectorEndpoint) } opts := config.TokenGenerationOpts{Operation: config.TokenSharedRead} if isWrite { opts.Operation = config.TokenSharedWrite } - token, err := AcquireToken(objectUrl, parsedDirResp, opts) + token, err := AcquireToken(pUrl.Path, parsedDirResp, opts) if err != nil { err = errors.Wrap(err, "Failed to acquire token") } diff --git a/client/sharing_url_test.go b/client/sharing_url_test.go index 7861d0bb7..9d181bdf5 100644 --- a/client/sharing_url_test.go +++ b/client/sharing_url_test.go @@ -16,7 +16,7 @@ * ***************************************************************/ -package client +package client_test import ( "context" @@ -24,7 +24,6 @@ import ( "io" "net/http" "net/http/httptest" - "net/url" "os" "strings" "testing" @@ -34,108 +33,25 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/pelicanplatform/pelican/client" "github.com/pelicanplatform/pelican/config" ) -func TestDirectorGeneration(t *testing.T) { - returnError := false - returnErrorRef := &returnError +func TestSharingUrl(t *testing.T) { + // Construct a local server that we can poke with QueryDirector. Start with a placeholder handler + // so that we can update the server.URL with the actual server address in the handler we overwrite later. + log.SetLevel(log.DebugLevel) + // Placeholder handler + handler := func(w http.ResponseWriter, r *http.Request) {} - handler := func(w http.ResponseWriter, r *http.Request) { - discoveryConfig := `{"director_endpoint": "https://location.example.com", "namespace_registration_endpoint": "https://location.example.com/namespace", "jwks_uri": "https://location.example.com/jwks"}` - if *returnErrorRef { - w.WriteHeader(http.StatusInternalServerError) - } else { - w.WriteHeader(http.StatusOK) - _, err := w.Write([]byte(discoveryConfig)) - assert.NoError(t, err) - } - } server := httptest.NewTLSServer(http.HandlerFunc(handler)) defer server.Close() - serverURL, err := url.Parse(server.URL) - require.NoError(t, err) - - objectUrl := url.URL{ - Scheme: "pelican", - Host: serverURL.Host, - Path: "/test/foo", - } - - // Discovery works to get URL - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - config.InitConfig() - viper.Set("TLSSkipVerify", true) - require.NoError(t, config.InitClient()) - err = config.InitClient() - require.NoError(t, err) - dUrl, err := getDirectorFromUrl(&objectUrl) - require.NoError(t, err) - assert.Equal(t, dUrl, "https://location.example.com") - - // Discovery URL overrides the federation config. - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - config.InitConfig() - viper.Set("TLSSkipVerify", true) - require.NoError(t, config.InitClient()) - viper.Set("Federation.DirectorURL", "https://location2.example.com") - dUrl, err = getDirectorFromUrl(&objectUrl) - require.NoError(t, err) - assert.Equal(t, dUrl, "https://location.example.com") - - // Fallback to configuration if no discovery present - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - config.InitConfig() - require.NoError(t, config.InitClient()) - viper.Set("Federation.DirectorURL", "https://location2.example.com") - objectUrl.Host = "" - dUrl, err = getDirectorFromUrl(&objectUrl) - require.NoError(t, err) - assert.Equal(t, dUrl, "https://location2.example.com") - - // Error if server has an error - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - config.InitConfig() - viper.Set("TLSSkipVerify", true) - require.NoError(t, config.InitClient()) - returnError = true - objectUrl.Host = serverURL.Host - _, err = getDirectorFromUrl(&objectUrl) - require.Error(t, err) - - // Error if neither config nor hostname provided. - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - config.InitConfig() - require.NoError(t, config.InitClient()) - objectUrl.Host = "" - _, err = getDirectorFromUrl(&objectUrl) - require.Error(t, err) - - // Error on unknown scheme - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - config.InitConfig() - require.NoError(t, config.InitClient()) - objectUrl.Scheme = "buzzard" - _, err = getDirectorFromUrl(&objectUrl) - require.Error(t, err) -} - -func TestSharingUrl(t *testing.T) { - // Construct a local server that we can poke with QueryDirector - myUrl := "http://redirect.com" - myUrlRef := &myUrl - log.SetLevel(log.DebugLevel) - handler := func(w http.ResponseWriter, r *http.Request) { - issuerLoc := *myUrlRef + "/issuer" + // Actual handler using the updated server.URL + handler = func(w http.ResponseWriter, r *http.Request) { + issuerLoc := server.URL + "/issuer" if strings.HasPrefix(r.URL.Path, "/test") { - w.Header().Set("Location", *myUrlRef) + w.Header().Set("Location", server.URL) w.Header().Set("X-Pelican-Namespace", "namespace=/test, require-token=true") w.Header().Set("X-Pelican-Authorization", fmt.Sprintf("issuer=%s", issuerLoc)) w.Header().Set("X-Pelican-Token-Generation", fmt.Sprintf("issuer=%s, base-path=/test, strategy=OAuth2, max-scope-depth=3", issuerLoc)) @@ -145,9 +61,11 @@ func TestSharingUrl(t *testing.T) { oidcConfig := fmt.Sprintf(`{"token_endpoint": "%s/token", "registration_endpoint": "%s/register", "grant_types_supported": ["urn:ietf:params:oauth:grant-type:device_code"], "device_authorization_endpoint": "%s/device_authz"}`, issuerLoc, issuerLoc, issuerLoc) _, err := w.Write([]byte(oidcConfig)) assert.NoError(t, err) + } else if r.URL.Path == "/.well-known/pelican-configuration" { // to serve discovery information + w.WriteHeader(http.StatusOK) + _, err := w.Write([]byte(fmt.Sprintf(`{"director_endpoint": "%s"}`, server.URL))) + assert.NoError(t, err) } else if r.URL.Path == "/issuer/register" { - //requestBytes, err := io.ReadAll(r.Body) - //assert.NoError(t, err) clientConfig := `{"client_id": "client1", "client_secret": "secret", "client_secret_expires_at": 0}` w.WriteHeader(http.StatusCreated) _, err := w.Write([]byte(clientConfig)) @@ -169,28 +87,28 @@ func TestSharingUrl(t *testing.T) { w.WriteHeader(http.StatusInternalServerError) } } - server := httptest.NewServer(http.HandlerFunc(handler)) - defer server.Close() - myUrl = server.URL + // Restart the server with the updated handler + server.Config.Handler = http.HandlerFunc(handler) _, err := config.SetPreferredPrefix(config.PelicanPrefix) assert.NoError(t, err) viper.Set("ConfigDir", t.TempDir()) viper.Set("Logging.Level", "debug") + viper.Set("TLSSkipVerify", true) config.InitConfig() os.Setenv("PELICAN_SKIP_TERMINAL_CHECK", "password") defer os.Unsetenv("PELICAN_SKIP_TERMINAL_CHECK") - viper.Set("Federation.DirectorURL", myUrl) + viper.Set("Federation.DiscoveryUrl", server.URL) viper.Set("ConfigDir", t.TempDir()) err = config.InitClient() assert.NoError(t, err) // Call QueryDirector with the test server URL and a source path - testUrl, err := url.Parse("/test/foo/bar") + testObj := "/test/foo/bar" require.NoError(t, err) os.Setenv(config.GetPreferredPrefix().String()+"_SKIP_TERMINAL_CHECK", "true") - token, err := CreateSharingUrl(context.Background(), testUrl, true) + token, err := client.CreateSharingUrl(context.Background(), testObj, true) assert.NoError(t, err) assert.NotEmpty(t, token) fmt.Println(token) diff --git a/cmd/config_mgr.go b/cmd/config_mgr.go index cfb4abee1..c1099016c 100644 --- a/cmd/config_mgr.go +++ b/cmd/config_mgr.go @@ -20,7 +20,6 @@ package main import ( "fmt" - "net/url" "os" "github.com/spf13/cobra" @@ -28,6 +27,7 @@ import ( "github.com/pelicanplatform/pelican/client" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" ) var ( @@ -143,18 +143,16 @@ func addTokenSubcommands(tokenCmd *cobra.Command) { os.Exit(1) } - dest, err := url.Parse(args[1]) + pUrl, err := pelican_url.Parse( + args[1], + []pelican_url.ParseOption{pelican_url.ShouldDiscover(true)}, + nil, + ) if err != nil { fmt.Fprintln(os.Stderr, "Failed to parse URL:", err) os.Exit(1) } - fedInfo, err := config.DiscoverUrlFederation(cmd.Context(), dest.Host) - if err != nil { - fmt.Fprintln(os.Stderr, "Failed to discover federation metadata:", err) - os.Exit(1) - } - - dirResp, err := client.GetDirectorInfoForPath(cmd.Context(), dest.Path, fedInfo.DirectorEndpoint, isWrite, "") + dirResp, err := client.GetDirectorInfoForPath(cmd.Context(), pUrl, isWrite) if err != nil { fmt.Fprintln(os.Stderr, "Failed to get director info for path:", err) os.Exit(1) @@ -164,7 +162,7 @@ func addTokenSubcommands(tokenCmd *cobra.Command) { if isWrite { opts.Operation = config.TokenWrite } - token, err := client.AcquireToken(dest, dirResp, opts) + token, err := client.AcquireToken(pUrl.Path, dirResp, opts) if err != nil { fmt.Fprintln(os.Stderr, "Failed to get a token:", err) os.Exit(1) diff --git a/cmd/object_copy.go b/cmd/object_copy.go index 277d0cf32..211138c21 100644 --- a/cmd/object_copy.go +++ b/cmd/object_copy.go @@ -167,11 +167,6 @@ func copyMain(cmd *cobra.Command, args []string) { lastSrc := "" for _, src := range source { - src, result = utils.UrlWithFederation(src) - if result != nil { - lastSrc = src - break - } isRecursive, _ := cmd.Flags().GetBool("recursive") _, result = client.DoCopy(ctx, src, dest, isRecursive, client.WithCallback(pb.callback), client.WithTokenLocation(tokenLocation), client.WithCaches(caches...)) if result != nil { diff --git a/cmd/object_get.go b/cmd/object_get.go index 15cfbf3e5..b7b3b4c55 100644 --- a/cmd/object_get.go +++ b/cmd/object_get.go @@ -121,11 +121,6 @@ func getMain(cmd *cobra.Command, args []string) { lastSrc := "" for _, src := range source { - src, result = utils.UrlWithFederation(src) - if result != nil { - lastSrc = src - break - } isRecursive, _ := cmd.Flags().GetBool("recursive") _, result = client.DoGet(ctx, src, dest, isRecursive, client.WithCallback(pb.callback), client.WithTokenLocation(tokenLocation), client.WithCaches(caches...)) if result != nil { diff --git a/cmd/object_share.go b/cmd/object_share.go index 50e4a3f19..22ecde102 100644 --- a/cmd/object_share.go +++ b/cmd/object_share.go @@ -60,12 +60,13 @@ func shareMain(cmd *cobra.Command, args []string) error { return errors.New("A URL must be specified to share") } - objectUrl, err := url.Parse(args[0]) + object := args[0] + objectUrl, err := url.Parse(object) if err != nil { return errors.Wrapf(err, "Failed to parse '%v' as a URL", args[0]) } - token, err := client.CreateSharingUrl(cmd.Context(), objectUrl, isWrite) + token, err := client.CreateSharingUrl(cmd.Context(), object, isWrite) if err != nil { return errors.Wrapf(err, "Failed to create a sharing URL for %v", objectUrl.String()) } diff --git a/cmd/plugin.go b/cmd/plugin.go index 2c6d49159..488b37d6e 100644 --- a/cmd/plugin.go +++ b/cmd/plugin.go @@ -41,6 +41,7 @@ import ( "github.com/pelicanplatform/pelican/classads" "github.com/pelicanplatform/pelican/client" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/utils" ) @@ -438,14 +439,13 @@ func runPluginWorker(ctx context.Context, upload bool, workChan <-chan PluginTra break } - // Check we have valid query parameters - err := utils.CheckValidQuery(transfer.url) + pUrl, err := client.ParseRemoteAsPUrl(ctx, transfer.url.String()) if err != nil { failTransfer(transfer.url.String(), transfer.localFile, results, upload, err) return err } - if transfer.url.Query().Has("recursive") { + if transfer.url.Query().Has(pelican_url.QueryRecursive) { recursive = true } else { recursive = false @@ -458,7 +458,7 @@ func runPluginWorker(ctx context.Context, upload bool, workChan <-chan PluginTra log.Debugln("Downloading:", transfer.url, "to", transfer.localFile) } - urlCopy := *transfer.url + urlCopy := *pUrl tj, err = tc.NewTransferJob(context.Background(), &urlCopy, transfer.localFile, upload, recursive, client.WithAcquireToken(false), client.WithCaches(caches...)) if err != nil { failTransfer(transfer.url.String(), transfer.localFile, results, upload, err) diff --git a/cmd/registry_client.go b/cmd/registry_client.go index b4af457ed..8bc03d910 100644 --- a/cmd/registry_client.go +++ b/cmd/registry_client.go @@ -49,23 +49,23 @@ var withIdentity bool var prefix string var pubkeyPath string -func getNamespaceEndpoint(ctx context.Context) (string, error) { +func getRegistryEndpoint(ctx context.Context) (string, error) { fedInfo, err := config.GetFederation(ctx) if err != nil { return "", err } - namespaceEndpoint := fedInfo.NamespaceRegistrationEndpoint - if namespaceEndpoint == "" { + registryEndpoint := fedInfo.RegistryEndpoint + if registryEndpoint == "" { return "", errors.New("No namespace registry specified; either give the federation name (-f) or specify the namespace API endpoint directly (e.g., --namespace-url=https://namespace.osg-htc.org/namespaces)") } - namespaceEndpointURL, err := url.Parse(namespaceEndpoint) + registryEndpointURL, err := url.Parse(registryEndpoint) if err != nil { return "", errors.Wrap(err, "Unable to parse namespace registry url") } // Return the string, as opposed to a pointer to the URL object - return namespaceEndpointURL.String(), nil + return registryEndpointURL.String(), nil } func registerANamespace(cmd *cobra.Command, args []string) { @@ -75,7 +75,7 @@ func registerANamespace(cmd *cobra.Command, args []string) { os.Exit(1) } - namespaceEndpoint, err := getNamespaceEndpoint(cmd.Context()) + namespaceEndpoint, err := getRegistryEndpoint(cmd.Context()) if err != nil { log.Errorln("Failed to get RegistryUrl from config: ", err) os.Exit(1) @@ -143,7 +143,7 @@ func deleteANamespace(cmd *cobra.Command, args []string) { os.Exit(1) } - namespaceEndpoint, err := getNamespaceEndpoint(cmd.Context()) + namespaceEndpoint, err := getRegistryEndpoint(cmd.Context()) if err != nil { log.Errorln("Failed to get RegistryUrl from config: ", err) os.Exit(1) @@ -168,7 +168,7 @@ func listAllNamespaces(cmd *cobra.Command, args []string) { os.Exit(1) } - namespaceEndpoint, err := getNamespaceEndpoint(cmd.Context()) + namespaceEndpoint, err := getRegistryEndpoint(cmd.Context()) if err != nil { log.Errorln("Failed to get RegistryUrl from config: ", err) os.Exit(1) diff --git a/config/config.go b/config/config.go index bf9db62cc..ade3a0615 100644 --- a/config/config.go +++ b/config/config.go @@ -50,6 +50,7 @@ import ( "golang.org/x/sync/errgroup" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" ) @@ -80,27 +81,13 @@ type ( } `yaml:"OSDF"` } - FederationDiscovery struct { - DirectorEndpoint string `json:"director_endpoint"` - NamespaceRegistrationEndpoint string `json:"namespace_registration_endpoint"` - JwksUri string `json:"jwks_uri"` - BrokerEndpoint string `json:"broker_endpoint"` - } - TokenOperation int TokenGenerationOpts struct { Operation TokenOperation } - // ServerType int // ServerType is a bit mask indicating which Pelican server(s) are running in the current process - ContextKey string - - MetadataErr struct { - msg string - innerErr error - } ) const ( @@ -152,7 +139,7 @@ var ( // Note the 'once' object is a pointer so we can reset the client multiple // times during unit tests fedDiscoveryOnce *sync.Once - globalFedInfo FederationDiscovery + globalFedInfo pelican_url.FederationDiscovery globalFedErr error // Global struct validator @@ -172,8 +159,6 @@ var ( RestartFlag = make(chan any) // A channel flag to restart the server instance that launcher listens to (including cache) - MetadataTimeoutErr *MetadataErr = &MetadataErr{msg: "Timeout when querying metadata"} - watermarkUnits = []byte{'k', 'm', 'g', 't'} validPrefixes = map[ConfigPrefix]bool{ PelicanPrefix: true, @@ -185,42 +170,6 @@ var ( clientInitialized = false ) -// This function creates a new MetadataError by wrapping the previous error -func NewMetadataError(err error, msg string) *MetadataErr { - return &MetadataErr{ - msg: msg, - innerErr: err, - } -} - -func (e *MetadataErr) Error() string { - // If the inner error is nil, we don't want to print out "" - if e.innerErr != nil { - return fmt.Sprintf("%s: %v", e.msg, e.innerErr) - } else { - return e.msg - } -} - -func (e *MetadataErr) Is(target error) bool { - // We want to verify we have a timeout error - if target, ok := target.(*MetadataErr); ok { - return e.msg == target.msg - } - return false -} - -func (e *MetadataErr) Wrap(err error) error { - return &MetadataErr{ - innerErr: err, - msg: e.msg, - } -} - -func (e *MetadataErr) Unwrap() error { - return e.innerErr -} - func init() { en := en.New() uni = ut.New(en, en) @@ -369,114 +318,18 @@ func GetAllPrefixes() []ConfigPrefix { return prefixes } -// Helper function to start a metadata request. -// -// We see periodic timeouts when doing metadata lookup at sites. -// Adding a modest retry will, hopefully, reduce the number of errors -// that propagate out to users -func startMetadataQuery(ctx context.Context, httpClient *http.Client, discoveryUrl *url.URL) (result *http.Response, err error) { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, discoveryUrl.String(), nil) - if err != nil { - err = errors.Wrapf(err, "Failure when doing federation metadata request creation for %s", discoveryUrl) - return - } - req.Header.Set("User-Agent", "pelican/"+version) - - result, err = httpClient.Do(req) - if err != nil { - var netErr net.Error - if errors.As(err, &netErr) && netErr.Timeout() { - err = MetadataTimeoutErr.Wrap(err) - } else { - err = NewMetadataError(err, "Error occurred when querying for metadata") - } +// We can't parse a schemeless hostname when there's a port, so check for a scheme and add one if non exists. +func wrapWithHttpsIfNeeded(urlStr string) string { + if len(urlStr) > 0 && !strings.HasPrefix(urlStr, "http://") && !strings.HasPrefix(urlStr, "https://") { + urlStr = "https://" + urlStr } - return + return urlStr } -// This function is for discovering federations as specified by a url during a pelican:// transfer. -// this does not populate global fields and is more temporary per url -func DiscoverUrlFederation(ctx context.Context, federationDiscoveryUrl string) (metadata FederationDiscovery, err error) { - log.Debugln("Performing federation service discovery for specified url against endpoint", federationDiscoveryUrl) - federationUrl, err := url.Parse(federationDiscoveryUrl) - if err != nil { - err = errors.Wrapf(err, "invalid federation value %s:", federationDiscoveryUrl) - return - } - federationUrl.Scheme = "https" - if len(federationUrl.Path) > 0 && len(federationUrl.Host) == 0 { - federationUrl.Host = federationUrl.Path - federationUrl.Path = "" - } - - discoveryUrl, err := url.Parse(federationUrl.String()) - if err != nil { - err = errors.Wrap(err, "unable to parse federation discovery URL") - return - } - discoveryUrl.Path, err = url.JoinPath(federationUrl.Path, ".well-known/pelican-configuration") - if err != nil { - err = errors.Wrap(err, "Unable to parse federation url because of invalid path") - return - } - - httpClient := http.Client{ - Transport: GetTransport(), - Timeout: time.Second * 5, - } - - var result *http.Response - for idx := 1; idx <= 3; idx++ { - result, err = startMetadataQuery(ctx, &httpClient, discoveryUrl) - if err == nil { - break - } else if errors.Is(err, MetadataTimeoutErr) && ctx.Err() == nil { - log.Warningln("Timeout occurred when querying discovery URL", discoveryUrl.String(), "for metadata;", 3-idx, "retries remaining") - time.Sleep(2 * time.Second) - } else { - return - } - } - if errors.Is(err, MetadataTimeoutErr) { - log.Errorln("3 timeouts occurred when querying discovery URL", discoveryUrl.String()) - return - } - - if result.Body != nil { - defer result.Body.Close() - } - - body, err := io.ReadAll(result.Body) - if err != nil { - return FederationDiscovery{}, errors.Wrapf(err, "Failure when doing federation metadata read to %s", discoveryUrl) - } - - if result.StatusCode != http.StatusOK { - truncatedMessage := string(body) - if len(body) > 1000 { - truncatedMessage = string(body[:1000]) - truncatedMessage += " [... remainder truncated ...]" - } - return FederationDiscovery{}, errors.Errorf("Federation metadata discovery failed with HTTP status %d. Error message: %s", result.StatusCode, truncatedMessage) - } - - metadata = FederationDiscovery{} - err = json.Unmarshal(body, &metadata) - if err != nil { - return FederationDiscovery{}, errors.Wrapf(err, "Failure when parsing federation metadata at %s", discoveryUrl) - } - - log.Debugln("Federation service discovery resulted in director URL", metadata.DirectorEndpoint) - log.Debugln("Federation service discovery resulted in registry URL", metadata.NamespaceRegistrationEndpoint) - log.Debugln("Federation service discovery resulted in JWKS URL", metadata.JwksUri) - log.Debugln("Federation service discovery resulted in broker URL", metadata.BrokerEndpoint) - - return metadata, nil -} // Global implementation of Discover Federation, outside any caching or // delayed discovery -func discoverFederationImpl(ctx context.Context) (fedInfo FederationDiscovery, err error) { +func discoverFederationImpl(ctx context.Context) (fedInfo pelican_url.FederationDiscovery, err error) { federationStr := param.Federation_DiscoveryUrl.GetString() externalUrlStr := param.Server_ExternalWebUrl.GetString() defer func() { @@ -484,8 +337,8 @@ func discoverFederationImpl(ctx context.Context) (fedInfo FederationDiscovery, e if fedInfo.DirectorEndpoint == "" && enabledServers.IsEnabled(server_structs.DirectorType) { fedInfo.DirectorEndpoint = externalUrlStr } - if fedInfo.NamespaceRegistrationEndpoint == "" && enabledServers.IsEnabled(server_structs.RegistryType) { - fedInfo.NamespaceRegistrationEndpoint = externalUrlStr + if fedInfo.RegistryEndpoint == "" && enabledServers.IsEnabled(server_structs.RegistryType) { + fedInfo.RegistryEndpoint = externalUrlStr } if fedInfo.JwksUri == "" && enabledServers.IsEnabled(server_structs.DirectorType) { fedInfo.JwksUri = externalUrlStr + "/.well-known/issuer.jwks" @@ -493,17 +346,24 @@ func discoverFederationImpl(ctx context.Context) (fedInfo FederationDiscovery, e if fedInfo.BrokerEndpoint == "" && enabledServers.IsEnabled(server_structs.BrokerType) { fedInfo.BrokerEndpoint = externalUrlStr } + + // Make sure any values in global federation metadata are url-parseable + fedInfo.DirectorEndpoint = wrapWithHttpsIfNeeded(fedInfo.DirectorEndpoint) + fedInfo.RegistryEndpoint = wrapWithHttpsIfNeeded(fedInfo.RegistryEndpoint) + fedInfo.JwksUri = wrapWithHttpsIfNeeded(fedInfo.JwksUri) + fedInfo.BrokerEndpoint = wrapWithHttpsIfNeeded(fedInfo.BrokerEndpoint) }() - log.Debugln("Federation URL:", federationStr) + log.Debugln("Configured federation URL:", federationStr) fedInfo.DirectorEndpoint = viper.GetString("Federation.DirectorUrl") - fedInfo.NamespaceRegistrationEndpoint = viper.GetString("Federation.RegistryUrl") + fedInfo.RegistryEndpoint = viper.GetString("Federation.RegistryUrl") fedInfo.JwksUri = viper.GetString("Federation.JwkUrl") fedInfo.BrokerEndpoint = viper.GetString("Federation.BrokerUrl") - if fedInfo.DirectorEndpoint != "" && fedInfo.NamespaceRegistrationEndpoint != "" && fedInfo.JwksUri != "" && fedInfo.BrokerEndpoint != "" { + if fedInfo.DirectorEndpoint != "" && fedInfo.RegistryEndpoint != "" && fedInfo.JwksUri != "" && fedInfo.BrokerEndpoint != "" { return } + federationStr = wrapWithHttpsIfNeeded(federationStr) federationUrl, err := url.Parse(federationStr) if err != nil { err = errors.Wrapf(err, "invalid federation value %s:", federationStr) @@ -516,19 +376,27 @@ func discoverFederationImpl(ctx context.Context) (fedInfo FederationDiscovery, e return } - federationUrl.Scheme = "https" if len(federationUrl.Path) > 0 && len(federationUrl.Host) == 0 { federationUrl.Host = federationUrl.Path federationUrl.Path = "" } + fedInfo.DiscoveryEndpoint = federationUrl.String() - var metadata FederationDiscovery + var metadata pelican_url.FederationDiscovery if federationStr == "" { log.Debugln("Federation URL is unset; skipping discovery") } else if federationStr == externalUrlStr { log.Debugln("Current web engine hosts the federation; skipping auto-discovery of services") } else { - metadata, err = DiscoverUrlFederation(ctx, federationStr) + tr := GetTransport() + httpClient := &http.Client{ + Transport: tr, + Timeout: time.Second * 5, + } + + // We can't really know the service here, so set to generic Pelican + ua := "pelican/" + GetVersion() + metadata, err = pelican_url.DiscoverFederation(ctx, httpClient, ua, federationUrl) if err != nil { err = errors.Wrapf(err, "invalid federation value (%s)", federationStr) return @@ -540,9 +408,9 @@ func discoverFederationImpl(ctx context.Context) (fedInfo FederationDiscovery, e log.Debugln("Setting global director url to", metadata.DirectorEndpoint) fedInfo.DirectorEndpoint = metadata.DirectorEndpoint } - if fedInfo.NamespaceRegistrationEndpoint == "" { - log.Debugln("Setting global registry url to", metadata.NamespaceRegistrationEndpoint) - fedInfo.NamespaceRegistrationEndpoint = metadata.NamespaceRegistrationEndpoint + if fedInfo.RegistryEndpoint == "" { + log.Debugln("Setting global registry url to", metadata.RegistryEndpoint) + fedInfo.RegistryEndpoint = metadata.RegistryEndpoint } if fedInfo.JwksUri == "" { log.Debugln("Setting global jwks url to", metadata.JwksUri) @@ -568,7 +436,7 @@ func ResetFederationForTest() { // long as this is invoked after `InitClient` / `InitServer`, it is thread-safe. // If invoked before things are configured, it must be done from a single-threaded // context. -func GetFederation(ctx context.Context) (FederationDiscovery, error) { +func GetFederation(ctx context.Context) (pelican_url.FederationDiscovery, error) { if fedDiscoveryOnce == nil { fedDiscoveryOnce = &sync.Once{} } @@ -579,9 +447,11 @@ func GetFederation(ctx context.Context) (FederationDiscovery, error) { } // Set the current global federation metadata -func SetFederation(fd FederationDiscovery) { +func SetFederation(fd pelican_url.FederationDiscovery) { + viper.Set("Federation.DiscoveryUrl", fd.DiscoveryEndpoint) viper.Set("Federation.DirectorUrl", fd.DirectorEndpoint) - viper.Set("Federation.RegistryUrl", fd.NamespaceRegistrationEndpoint) + viper.Set("Federation.RegistryUrl", fd.RegistryEndpoint) + viper.Set("Federation.BrokerUrl", fd.BrokerEndpoint) viper.Set("Federation.JwkUrl", fd.JwksUri) } diff --git a/config/config_test.go b/config/config_test.go index cf93a507a..5bc88ecf2 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -20,7 +20,6 @@ package config import ( "context" - "encoding/json" "fmt" "net/http" "net/http/httptest" @@ -31,15 +30,12 @@ import ( "testing" "time" - "github.com/pkg/errors" "github.com/sirupsen/logrus" - log "github.com/sirupsen/logrus" "github.com/sirupsen/logrus/hooks/test" "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/pelicanplatform/pelican/mock" "github.com/pelicanplatform/pelican/param" "github.com/pelicanplatform/pelican/server_structs" ) @@ -465,87 +461,6 @@ func TestSetPreferredPrefix(t *testing.T) { }) } -func TestDiscoverFederation(t *testing.T) { - ctx := testConfigContext(t) - - viper.Reset() - // Server to be a "mock" federation - server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // make our response: - response := FederationDiscovery{ - DirectorEndpoint: "director", - NamespaceRegistrationEndpoint: "registry", - JwksUri: "jwks", - BrokerEndpoint: "broker", - } - - responseJSON, err := json.Marshal(response) - if err != nil { - http.Error(w, "Internal Server Error", http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusOK) - _, err = w.Write(responseJSON) - assert.NoError(t, err) - })) - defer server.Close() - transport := GetTransport() - origClientConfig := transport.TLSClientConfig - transport.TLSClientConfig = server.Client().Transport.(*http.Transport).TLSClientConfig.Clone() - t.Cleanup(func() { - transport.TLSClientConfig = origClientConfig - }) - - t.Run("testInvalidDiscoveryUrlWithPath", func(t *testing.T) { - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - viper.Set("Federation.DiscoveryURL", server.URL+"/this/is/some/path") - InitConfig() - require.NoError(t, InitClient()) - _, err := GetFederation(ctx) - require.Error(t, err) - assert.Contains(t, err.Error(), "Invalid federation discovery url is set. No path allowed for federation discovery url. Provided url: ", - "Error returned does not contain the correct error") - viper.Reset() - }) - - t.Run("testValidDiscoveryUrl", func(t *testing.T) { - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - InitConfig() - require.NoError(t, InitClient()) - transport := GetTransport() - transport.TLSClientConfig = server.Client().Transport.(*http.Transport).TLSClientConfig.Clone() - viper.Set("Federation.DiscoveryUrl", server.URL) - fedInfo, err := GetFederation(ctx) - assert.NoError(t, err) - // Assert that the metadata matches expectations - assert.Equal(t, "director", fedInfo.DirectorEndpoint, "Unexpected DirectorEndpoint") - assert.Equal(t, "registry", fedInfo.NamespaceRegistrationEndpoint, "Unexpected NamespaceRegistrationEndpoint") - assert.Equal(t, "jwks", fedInfo.JwksUri, "Unexpected JwksUri") - assert.Equal(t, "broker", fedInfo.BrokerEndpoint, "Unexpected BrokerEndpoint") - viper.Reset() - }) - - t.Run("testOsgHtcUrl", func(t *testing.T) { - viper.Reset() - viper.Set("ConfigDir", t.TempDir()) - InitConfig() - require.NoError(t, InitClient()) - mock.MockOSDFDiscovery(t, GetTransport()) - viper.Set("Federation.DiscoveryUrl", "osg-htc.org") - fedInfo, err := GetFederation(ctx) - assert.NoError(t, err) - // Assert that the metadata matches expectations - assert.Equal(t, "https://osdf-director.osg-htc.org", fedInfo.DirectorEndpoint, "Unexpected DirectorEndpoint") - assert.Equal(t, "https://osdf-registry.osg-htc.org", fedInfo.NamespaceRegistrationEndpoint, "Unexpected NamespaceRegistrationEndpoint") - assert.Equal(t, "https://osg-htc.org/osdf/public_signing_key.jwks", fedInfo.JwksUri, "Unexpected JwksUri") - assert.Equal(t, "", fedInfo.BrokerEndpoint, "Unexpected BrokerEndpoint") - viper.Reset() - }) -} - func TestCheckWatermark(t *testing.T) { t.Parallel() @@ -755,7 +670,7 @@ func TestInitServerUrl(t *testing.T) { require.NoError(t, err) fedInfo, err := GetFederation(ctx) require.NoError(t, err) - assert.Equal(t, mockWebUrlWoPort, fedInfo.NamespaceRegistrationEndpoint) + assert.Equal(t, mockWebUrlWoPort, fedInfo.RegistryEndpoint) // If Server_ExternalWebUrl is explicitly set, Federation_RegistryUrl defaults to whatever it is // But 443 port is stripped if provided @@ -765,7 +680,7 @@ func TestInitServerUrl(t *testing.T) { require.NoError(t, err) fedInfo, err = GetFederation(ctx) require.NoError(t, err) - assert.Equal(t, mockWebUrlWoPort, fedInfo.NamespaceRegistrationEndpoint) + assert.Equal(t, mockWebUrlWoPort, fedInfo.RegistryEndpoint) initConfig() viper.Set("Server.ExternalWebUrl", mockWebUrlWoPort) @@ -774,7 +689,7 @@ func TestInitServerUrl(t *testing.T) { require.NoError(t, err) fedInfo, err = GetFederation(ctx) require.NoError(t, err) - assert.Equal(t, "https://example-registry.com", fedInfo.NamespaceRegistrationEndpoint) + assert.Equal(t, "https://example-registry.com", fedInfo.RegistryEndpoint) }) t.Run("broker-url-default-to-web-url", func(t *testing.T) { @@ -810,131 +725,3 @@ func TestInitServerUrl(t *testing.T) { assert.Equal(t, "https://example-registry.com", fedInfo.BrokerEndpoint) }) } -func TestDiscoverUrlFederation(t *testing.T) { - t.Run("TestMetadataDiscoveryTimeout", func(t *testing.T) { - viper.Set("tlsskipverify", true) - err := InitClient() - assert.NoError(t, err) - // Create a server that sleeps for a longer duration than the timeout - server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - time.Sleep(2 * time.Second) - })) - defer server.Close() - - // Set a short timeout for the test - timeout := 1 * time.Second - - // Create a context with the timeout - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - // Call the function with the server URL and the context - _, err = DiscoverUrlFederation(ctx, server.URL) - - // Assert that the error is the expected metadata timeout error - assert.Error(t, err) - assert.True(t, errors.Is(err, MetadataTimeoutErr)) - viper.Reset() - }) - - t.Run("TestMetadataDiscoveryTimeoutRetry", func(t *testing.T) { - viper.Set("tlsskipverify", true) - viper.Set("Logging.Level", "Debug") - viper.Set("Transport.ResponseHeaderTimeout", "300ms") - InitConfig() - err := InitClient() - require.NoError(t, err) - - hook := test.NewGlobal() - logrus.SetLevel(logrus.WarnLevel) - - // Create a server that sleeps for a longer duration than the timeout - ctr := 0 - server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if ctr < 1 { - time.Sleep(2 * time.Second) - } - - ctr += 1 - w.WriteHeader(200) - _, err := w.Write([]byte("{\"director_endpoint\": \"https://osdf-director.osg-htc.org\", \"namespace_registration_endpoint\": \"https://osdf-registry.osg-htc.org\", \"jwks_uri\": \"https://osg-htc.org/osdf/public_signing_key.jwks\"}")) - assert.NoError(t, err) - })) - defer server.Close() - - // Set a short timeout for the test - timeout := 5 * time.Second - - // Create a context with the timeout - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - // Call the function with the server URL and the context - _, err = DiscoverUrlFederation(ctx, server.URL) - - assert.Equal(t, log.WarnLevel, hook.LastEntry().Level) - assert.Equal(t, "Timeout occurred when querying discovery URL "+server.URL+"/.well-known/pelican-configuration for metadata; 2 retries remaining", hook.LastEntry().Message) - - // Assert that the error is the expected metadata timeout error - assert.NoError(t, err) - viper.Reset() - }) - - t.Run("TestCanceledContext", func(t *testing.T) { - // Create a server that waits for the context to be canceled - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - <-r.Context().Done() - })) - defer server.Close() - - // Create a context and cancel it immediately - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - // Call the function with the server URL and the canceled context - _, err := DiscoverUrlFederation(ctx, server.URL) - - // Assert that the error is the expected context cancel error - assert.Error(t, err) - assert.True(t, errors.Is(err, context.Canceled)) - }) - - t.Run("TestValidDiscovery", func(t *testing.T) { - viper.Set("tlsskipverify", true) - err := InitClient() - assert.NoError(t, err) - // Server to be a "mock" federation - server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // make our response: - response := FederationDiscovery{ - DirectorEndpoint: "director", - NamespaceRegistrationEndpoint: "registry", - JwksUri: "jwks", - BrokerEndpoint: "broker", - } - - responseJSON, err := json.Marshal(response) - if err != nil { - http.Error(w, "Internal Server Error", http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusOK) - _, err = w.Write(responseJSON) - assert.NoError(t, err) - })) - defer server.Close() - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - metadata, err := DiscoverUrlFederation(ctx, server.URL) - assert.NoError(t, err) - - // Assert that the metadata matches expectations - assert.Equal(t, "director", metadata.DirectorEndpoint, "Unexpected DirectorEndpoint") - assert.Equal(t, "registry", metadata.NamespaceRegistrationEndpoint, "Unexpected NamespaceRegistrationEndpoint") - assert.Equal(t, "jwks", metadata.JwksUri, "Unexpected JwksUri") - assert.Equal(t, "broker", metadata.BrokerEndpoint, "Unexpected BrokerEndpoint") - viper.Reset() - }) -} diff --git a/director/director.go b/director/director.go index 2ec3a29a2..0951ffab6 100644 --- a/director/director.go +++ b/director/director.go @@ -39,6 +39,7 @@ import ( "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/metrics" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/server_utils" "github.com/pelicanplatform/pelican/token" @@ -166,9 +167,9 @@ func getRequestParameters(req *http.Request) (requestParams url.Values) { timeout = timeoutHeader[0] } - directRead := req.URL.Query().Has(utils.QueryDirectRead.String()) - skipStat := req.URL.Query().Has(utils.QuerySkipStat.String()) - preferCached := req.URL.Query().Has(utils.QueryPreferCached.String()) + directRead := req.URL.Query().Has(pelican_url.QueryDirectRead) + skipStat := req.URL.Query().Has(pelican_url.QuerySkipStat) + preferCached := req.URL.Query().Has(pelican_url.QueryPreferCached) // url.Values.Encode will help us escape all them if authz != "" { @@ -178,13 +179,13 @@ func getRequestParameters(req *http.Request) (requestParams url.Values) { requestParams.Add("pelican.timeout", timeout) } if skipStat { - requestParams.Add(utils.QuerySkipStat.String(), "") + requestParams.Add(pelican_url.QuerySkipStat, "") } if preferCached { - requestParams.Add(utils.QueryPreferCached.String(), "") + requestParams.Add(pelican_url.QueryPreferCached, "") } if directRead { - requestParams.Add(utils.QueryDirectRead.String(), "") + requestParams.Add(pelican_url.QueryDirectRead, "") } return } @@ -312,8 +313,8 @@ func versionCompatCheck(reqVer *version.Version, service string) error { // Check and validate the director-specific query params from the redirect request func checkRedirectQuery(query url.Values) error { - _, hasDirectRead := query[utils.QueryDirectRead.String()] - _, hasPreferCached := query[utils.QueryPreferCached.String()] + _, hasDirectRead := query[pelican_url.QueryDirectRead] + _, hasPreferCached := query[pelican_url.QueryPreferCached] if hasDirectRead && hasPreferCached { return errors.New("cannot have both directread and prefercached query parameters") @@ -556,11 +557,11 @@ func redirectToOrigin(ginCtx *gin.Context) { reqParams := getRequestParameters(ginCtx.Request) // Skip the stat check for object availability if either disableStat or skipstat is set - skipStat := reqParams.Has(utils.QuerySkipStat.String()) || !param.Director_EnableStat.GetBool() + skipStat := reqParams.Has(pelican_url.QuerySkipStat) || !param.Director_EnableStat.GetBool() // Include caches in the response if Director.CachesPullFromCaches is enabled // AND prefercached query parameter is set - includeCaches := param.Director_CachesPullFromCaches.GetBool() && reqParams.Has(utils.QueryPreferCached.String()) + includeCaches := param.Director_CachesPullFromCaches.GetBool() && reqParams.Has(pelican_url.QueryPreferCached) namespaceAd, originAds, cacheAds := getAdsForPath(reqPath) // if GetAdsForPath doesn't find any ads because the prefix doesn't exist, we should @@ -631,7 +632,7 @@ func redirectToOrigin(ginCtx *gin.Context) { includeCaches && ginCtx.Request.Method != http.MethodPut && ginCtx.Request.Method != "PROPFIND" && - !reqParams.Has(utils.QueryDirectRead.String()) { + !reqParams.Has(pelican_url.QueryDirectRead) { if q == nil { q = NewObjectStat() } @@ -753,7 +754,7 @@ func redirectToOrigin(ginCtx *gin.Context) { // Any client that uses this api that doesn't set directreads can talk directly to an origin // Check if we are doing a DirectRead and if it is allowed - if reqParams.Has(utils.QueryDirectRead.String()) { + if reqParams.Has(pelican_url.QueryDirectRead) { for idx, originAd := range availableAds { if originAd.DirectReads && namespaceAd.Caps.DirectReads { redirectURL = getRedirectURL(reqPath, availableAds[idx], !namespaceAd.PublicRead) @@ -881,7 +882,7 @@ func ShortcutMiddleware(defaultResponse string) gin.HandlerFunc { } // Check for the DirectRead query paramater and redirect to the origin if it's set if the origin allows DirectReads - if c.Request.URL.Query().Has(utils.QueryDirectRead.String()) { + if c.Request.URL.Query().Has(pelican_url.QueryDirectRead) { log.Debugln("directread query parameter detected, redirecting to origin") // We'll redirect to origin here and the origin will decide if it can serve the request (if direct reads are enabled) redirectToOrigin(c) diff --git a/director/discovery.go b/director/discovery.go index 1ba8bb7ea..5f304dec0 100644 --- a/director/discovery.go +++ b/director/discovery.go @@ -28,6 +28,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/server_utils" ) @@ -74,7 +75,7 @@ func federationDiscoveryHandler(ctx *gin.Context) { if directorUrl.Port() == "443" { directorUrl.Host = strings.TrimSuffix(directorUrl.Host, ":443") } - registryUrlStr := fedInfo.NamespaceRegistrationEndpoint + registryUrlStr := fedInfo.RegistryEndpoint if registryUrlStr == "" { log.Error("Bad server configuration: Federation.RegistryUrl is not set") ctx.JSON(http.StatusInternalServerError, server_structs.SimpleApiResp{ @@ -111,11 +112,11 @@ func federationDiscoveryHandler(ctx *gin.Context) { return } - rs := config.FederationDiscovery{ - DirectorEndpoint: directorUrl.String(), - NamespaceRegistrationEndpoint: registryUrl.String(), - JwksUri: jwksUri, - BrokerEndpoint: brokerUrl, + rs := pelican_url.FederationDiscovery{ + DirectorEndpoint: directorUrl.String(), + RegistryEndpoint: registryUrl.String(), + JwksUri: jwksUri, + BrokerEndpoint: brokerUrl, } jsonData, err := json.MarshalIndent(rs, "", " ") diff --git a/director/discovery_test.go b/director/discovery_test.go index 8d487986b..b83783972 100644 --- a/director/discovery_test.go +++ b/director/discovery_test.go @@ -31,6 +31,7 @@ import ( "github.com/stretchr/testify/require" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/server_utils" ) @@ -152,11 +153,11 @@ func TestFederationDiscoveryHandler(t *testing.T) { require.Equal(t, tc.statusCode, w.Result().StatusCode) body, err := io.ReadAll(w.Result().Body) require.NoError(t, err) - dis := config.FederationDiscovery{} + dis := pelican_url.FederationDiscovery{} err = json.Unmarshal(body, &dis) require.NoError(t, err) assert.Equal(t, tc.expectedDir, dis.DirectorEndpoint) - assert.Equal(t, tc.expectedReg, dis.NamespaceRegistrationEndpoint) + assert.Equal(t, tc.expectedReg, dis.RegistryEndpoint) }) } } diff --git a/director/origin_api.go b/director/origin_api.go index 27dda935d..79d57cc19 100644 --- a/director/origin_api.go +++ b/director/origin_api.go @@ -122,7 +122,7 @@ func verifyAdvertiseToken(ctx context.Context, token, namespace string) (bool, e if err != nil { return false, err } - regUrlStr := fedInfo.NamespaceRegistrationEndpoint + regUrlStr := fedInfo.RegistryEndpoint approved, err := checkNamespaceStatus(namespace, regUrlStr) if err != nil { diff --git a/fed_test_utils/fed.go b/fed_test_utils/fed.go index 0166cbae7..b0e5a7683 100644 --- a/fed_test_utils/fed.go +++ b/fed_test_utils/fed.go @@ -26,6 +26,7 @@ import ( "fmt" "io" "net/http" + "net/http/httptest" "os" "path/filepath" "strings" @@ -84,6 +85,7 @@ func NewFedTest(t *testing.T, originConfig string) (ft *FedTest) { viper.Set("ConfigDir", tmpPath) viper.Set("Logging.Level", "debug") viper.Set("Logging.Cache.Pss", "debug") + viper.Set("TLSSkipVerify", true) config.InitConfig() @@ -157,6 +159,27 @@ func NewFedTest(t *testing.T, originConfig string) (ft *FedTest) { ft.Pids = append(ft.Pids, server.GetPids()...) } + // Set up discovery for federation metadata hosting. This needs to be done AFTER launching + // servers, because they populate the param values we use to set the metadata. + handler := func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/.well-known/pelican-configuration" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err := w.Write([]byte(fmt.Sprintf(`{ + "director_endpoint": "%s", + "namespace_registration_endpoint": "%s", + "broker_endpoint": "%s", + "jwks_uri": "%s" + }`, param.Server_ExternalWebUrl.GetString(), param.Server_ExternalWebUrl.GetString(), param.Server_ExternalWebUrl.GetString(), param.Server_ExternalWebUrl.GetString()))) + assert.NoError(t, err) + } else { + http.NotFound(w, r) + } + } + discoveryServer := httptest.NewTLSServer(http.HandlerFunc(handler)) + t.Cleanup(discoveryServer.Close) + viper.Set("Federation.DiscoveryUrl", discoveryServer.URL) + desiredURL := param.Server_ExternalWebUrl.GetString() + "/api/v1.0/health" err = server_utils.WaitUntilWorking(ctx, "GET", desiredURL, "director", 200, false) require.NoError(t, err) diff --git a/launcher_utils/advertise.go b/launcher_utils/advertise.go index 3207613e9..aa5400472 100644 --- a/launcher_utils/advertise.go +++ b/launcher_utils/advertise.go @@ -110,11 +110,11 @@ func getSitenameFromReg(ctx context.Context, prefix string) (sitename string, er if err != nil { return } - if fed.NamespaceRegistrationEndpoint == "" { + if fed.RegistryEndpoint == "" { err = fmt.Errorf("unable to fetch site name from the registry. Federation.RegistryUrl or Federation.DiscoveryUrl is unset") return } - requestUrl, err := url.JoinPath(fed.NamespaceRegistrationEndpoint, "api/v1.0/registry", prefix) + requestUrl, err := url.JoinPath(fed.RegistryEndpoint, "api/v1.0/registry", prefix) if err != nil { return } diff --git a/launcher_utils/advertise_test.go b/launcher_utils/advertise_test.go index a48a1c712..4d8831900 100644 --- a/launcher_utils/advertise_test.go +++ b/launcher_utils/advertise_test.go @@ -31,6 +31,7 @@ import ( "github.com/stretchr/testify/require" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" ) @@ -41,7 +42,7 @@ func TestGetSitenameFromReg(t *testing.T) { t.Run("no-registry-url", func(t *testing.T) { config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{}) + config.SetFederation(pelican_url.FederationDiscovery{}) sitename, err := getSitenameFromReg(context.Background(), "/foo") require.Error(t, err) assert.Equal(t, "unable to fetch site name from the registry. Federation.RegistryUrl or Federation.DiscoveryUrl is unset", err.Error()) @@ -55,7 +56,7 @@ func TestGetSitenameFromReg(t *testing.T) { })) defer ts.Close() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{NamespaceRegistrationEndpoint: ts.URL}) + config.SetFederation(pelican_url.FederationDiscovery{RegistryEndpoint: ts.URL}) sitename, err := getSitenameFromReg(context.Background(), "/foo") require.Error(t, err) assert.Contains(t, err.Error(), "replied with status code 404") @@ -78,7 +79,7 @@ func TestGetSitenameFromReg(t *testing.T) { })) defer ts.Close() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{NamespaceRegistrationEndpoint: ts.URL}) + config.SetFederation(pelican_url.FederationDiscovery{RegistryEndpoint: ts.URL}) sitename, err := getSitenameFromReg(context.Background(), "/foo") require.NoError(t, err) assert.Equal(t, "bar", sitename) diff --git a/launcher_utils/register_namespace.go b/launcher_utils/register_namespace.go index 63f1866fd..8dd86334e 100644 --- a/launcher_utils/register_namespace.go +++ b/launcher_utils/register_namespace.go @@ -199,7 +199,7 @@ func keyIsRegistered(privkey jwk.Key, registryUrlStr string, prefix string) (key } } -func registerNamespacePrep(ctx context.Context, prefix string) (key jwk.Key, registrationEndpointURL string, isRegistered bool, err error) { +func registerNamespacePrep(ctx context.Context, prefix string) (key jwk.Key, registrationUrl string, isRegistered bool, err error) { // TODO: We eventually want to be able to export multiple prefixes; at that point, we'll // refactor to loop around all the namespaces if prefix == "" { @@ -215,13 +215,13 @@ func registerNamespacePrep(ctx context.Context, prefix string) (key jwk.Key, reg if err != nil { return } - namespaceEndpoint := fedInfo.NamespaceRegistrationEndpoint - if namespaceEndpoint == "" { + registryEndpoint := fedInfo.RegistryEndpoint + if registryEndpoint == "" { err = errors.New("No namespace registry specified; try passing the `-f` flag specifying the federation name") return } - registrationEndpointURL, err = url.JoinPath(namespaceEndpoint, "api", "v1.0", "registry") + registrationUrl, err = url.JoinPath(registryEndpoint, "api", "v1.0", "registry") if err != nil { err = errors.Wrap(err, "Failed to construct registration endpoint URL: %v") return @@ -237,7 +237,7 @@ func registerNamespacePrep(ctx context.Context, prefix string) (key jwk.Key, reg return } } - keyStatus, err := keyIsRegistered(key, registrationEndpointURL, prefix) + keyStatus, err := keyIsRegistered(key, registrationUrl, prefix) if err != nil { err = errors.Wrap(err, "Failed to determine whether namespace is already registered") return diff --git a/local_cache/local_cache.go b/local_cache/local_cache.go index 40fb7af43..e18f5049c 100644 --- a/local_cache/local_cache.go +++ b/local_cache/local_cache.go @@ -43,6 +43,7 @@ import ( "github.com/pelicanplatform/pelican/client" "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/token_scopes" "github.com/pelicanplatform/pelican/utils" @@ -523,10 +524,14 @@ func (sc *LocalCache) runMux() error { } } - sourceURL := *sc.directorURL - sourceURL.Path = path.Join(sourceURL.Path, path.Clean(req.request.path)) - sourceURL.Scheme = "pelican" - tj, err := sc.tc.NewTransferJob(req.ctx, &sourceURL, localPath, false, false, client.WithToken(req.request.token)) + pUrl := pelican_url.PelicanURL{ + FedInfo: pelican_url.FederationDiscovery{ + DirectorEndpoint: sc.directorURL.String(), + }, + Path: path.Join(sc.directorURL.Path, path.Clean(req.request.path)), + Scheme: "pelican", + } + tj, err := sc.tc.NewTransferJob(req.ctx, &pUrl, localPath, false, false, client.WithToken(req.request.token)) if err != nil { ds := &downloadStatus{} ds.err.Store(&err) diff --git a/origin/origin_ui.go b/origin/origin_ui.go index 64f96dfc4..07a16dc25 100644 --- a/origin/origin_ui.go +++ b/origin/origin_ui.go @@ -121,7 +121,7 @@ func handleExports(ctx *gin.Context) { tc.Lifetime = 15 * time.Minute tc.Subject = issuerUrl tc.AddScopes(token_scopes.Registry_EditRegistration) - tc.AddAudiences(fed.NamespaceRegistrationEndpoint) + tc.AddAudiences(fed.RegistryEndpoint) token, err := tc.CreateToken() if err != nil { log.Errorf("Failed to create access token for editing registration %v", err) diff --git a/origin/reg_status.go b/origin/reg_status.go index 555807e80..d176d5ac4 100644 --- a/origin/reg_status.go +++ b/origin/reg_status.go @@ -85,7 +85,7 @@ func FetchRegStatus(prefixes []string) (*server_structs.CheckNamespaceCompleteRe if err != nil { return nil, err } - regUrlStr := fed.NamespaceRegistrationEndpoint + regUrlStr := fed.RegistryEndpoint reqUrl, err := url.JoinPath(regUrlStr, "/api/v1.0/registry/namespaces/check/status") if err != nil { diff --git a/origin/reg_status_test.go b/origin/reg_status_test.go index c3c8552cf..7ced7ab0b 100644 --- a/origin/reg_status_test.go +++ b/origin/reg_status_test.go @@ -31,6 +31,7 @@ import ( "github.com/stretchr/testify/require" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/server_utils" ) @@ -82,8 +83,8 @@ func TestFetchRegStatus(t *testing.T) { defer ts.Close() viper.Reset() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{ - NamespaceRegistrationEndpoint: ts.URL, + config.SetFederation(pelican_url.FederationDiscovery{ + RegistryEndpoint: ts.URL, }) result, err := FetchRegStatus([]string{"/foo", "/bar"}) @@ -106,8 +107,8 @@ func TestFetchRegStatus(t *testing.T) { defer ts.Close() viper.Reset() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{ - NamespaceRegistrationEndpoint: ts.URL, + config.SetFederation(pelican_url.FederationDiscovery{ + RegistryEndpoint: ts.URL, }) _, err := FetchRegStatus([]string{"/foo", "/bar"}) @@ -126,8 +127,8 @@ func TestFetchRegStatus(t *testing.T) { defer ts.Close() viper.Reset() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{ - NamespaceRegistrationEndpoint: ts.URL, + config.SetFederation(pelican_url.FederationDiscovery{ + RegistryEndpoint: ts.URL, }) _, err := FetchRegStatus([]string{"/foo", "/bar"}) @@ -143,8 +144,8 @@ func TestWrapExportsByStatus(t *testing.T) { }) viper.Reset() - config.SetFederation(config.FederationDiscovery{ - NamespaceRegistrationEndpoint: "https://mock-registry.org", + config.SetFederation(pelican_url.FederationDiscovery{ + RegistryEndpoint: "https://mock-registry.org", }) registrationsStatus.DeleteAll() @@ -194,8 +195,8 @@ func TestWrapExportsByStatus(t *testing.T) { ts := mockRegistryCheck(t) defer ts.Close() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{ - NamespaceRegistrationEndpoint: ts.URL, + config.SetFederation(pelican_url.FederationDiscovery{ + RegistryEndpoint: ts.URL, }) registrationsStatus.DeleteAll() diff --git a/registry/registry.go b/registry/registry.go index 9e242852e..077ba917e 100644 --- a/registry/registry.go +++ b/registry/registry.go @@ -242,7 +242,7 @@ func keySignChallengeCommit(ctx *gin.Context, data *registrationData) (bool, map if err != nil { return false, nil, err } - registryUrl := fedInfo.NamespaceRegistrationEndpoint + registryUrl := fedInfo.RegistryEndpoint var rawkey interface{} // This is the raw key, like *rsa.PrivateKey or *ecdsa.PrivateKey if err := key.Raw(&rawkey); err != nil { @@ -1129,11 +1129,11 @@ func checkStatusHandler(ctx *gin.Context) { }) } if server_structs.IsCacheNS(prefix) { - complete.EditUrl = fmt.Sprintf("%s/view/registry/cache/edit/?id=%d", fed.NamespaceRegistrationEndpoint, ns.ID) + complete.EditUrl = fmt.Sprintf("%s/view/registry/cache/edit/?id=%d", fed.RegistryEndpoint, ns.ID) } else if server_structs.IsOriginNS(prefix) { - complete.EditUrl = fmt.Sprintf("%s/view/registry/origin/edit/?id=%d", fed.NamespaceRegistrationEndpoint, ns.ID) + complete.EditUrl = fmt.Sprintf("%s/view/registry/origin/edit/?id=%d", fed.RegistryEndpoint, ns.ID) } else { - complete.EditUrl = fmt.Sprintf("%s/view/registry/namespace/edit/?id=%d", fed.NamespaceRegistrationEndpoint, ns.ID) + complete.EditUrl = fmt.Sprintf("%s/view/registry/namespace/edit/?id=%d", fed.RegistryEndpoint, ns.ID) } err = config.GetValidate().Struct(ns) if err != nil { diff --git a/registry/registry_test.go b/registry/registry_test.go index 9fe438956..e3e47d6f5 100644 --- a/registry/registry_test.go +++ b/registry/registry_test.go @@ -34,6 +34,7 @@ import ( "github.com/stretchr/testify/require" "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/test_utils" ) @@ -254,8 +255,8 @@ func TestCheckNamespaceCompleteHandler(t *testing.T) { resetNamespaceDB(t) viper.Reset() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{ - NamespaceRegistrationEndpoint: "https://registry.org", + config.SetFederation(pelican_url.FederationDiscovery{ + RegistryEndpoint: "https://registry.org", }) mockJWKS, err := test_utils.GenerateJWKS() @@ -292,8 +293,8 @@ func TestCheckNamespaceCompleteHandler(t *testing.T) { resetNamespaceDB(t) viper.Reset() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{ - NamespaceRegistrationEndpoint: "https://registry.org", + config.SetFederation(pelican_url.FederationDiscovery{ + RegistryEndpoint: "https://registry.org", }) mockJWKS, err := test_utils.GenerateJWKS() @@ -338,8 +339,8 @@ func TestCheckNamespaceCompleteHandler(t *testing.T) { resetNamespaceDB(t) viper.Reset() config.ResetFederationForTest() - config.SetFederation(config.FederationDiscovery{ - NamespaceRegistrationEndpoint: "https://registry.org", + config.SetFederation(pelican_url.FederationDiscovery{ + RegistryEndpoint: "https://registry.org", }) mockJWKS, err := test_utils.GenerateJWKS() diff --git a/server_utils/registry.go b/server_utils/registry.go index 6fbac8ced..6f3b95e72 100644 --- a/server_utils/registry.go +++ b/server_utils/registry.go @@ -41,7 +41,7 @@ func GetNSIssuerURL(prefix string) (string, error) { return "", errors.New(fmt.Sprintf("the prefix \"%s\" is invalid", prefix)) } fedInfo, err := config.GetFederation(context.Background()) - registryUrlStr := fedInfo.NamespaceRegistrationEndpoint + registryUrlStr := fedInfo.RegistryEndpoint if registryUrlStr == "" { if err != nil { return "", err diff --git a/token/token_verify.go b/token/token_verify.go index 6ae74e1f7..22df6d72e 100644 --- a/token/token_verify.go +++ b/token/token_verify.go @@ -281,7 +281,7 @@ func GetNSIssuerURL(prefix string) (string, error) { if err != nil { return "", err } - registryUrlStr := fedInfo.NamespaceRegistrationEndpoint + registryUrlStr := fedInfo.RegistryEndpoint if registryUrlStr == "" { return "", errors.New("federation registry URL is not set and was not discovered") } diff --git a/utils/client.go b/utils/client.go deleted file mode 100644 index 1ae46c045..000000000 --- a/utils/client.go +++ /dev/null @@ -1,79 +0,0 @@ -/*************************************************************** -* -* Copyright (C) 2024, Pelican Project, Morgridge Institute for Research -* -* Licensed under the Apache License, Version 2.0 (the "License"); you -* may not use this file except in compliance with the License. You may -* obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -***************************************************************/ - -package utils - -import ( - "net/url" - - "github.com/pkg/errors" - log "github.com/sirupsen/logrus" -) - -// The available client transfer URL query parameters -type ClientQueryName string - -const ( - QueryRecursive ClientQueryName = "recursive" - QueryPack ClientQueryName = "pack" - QueryDirectRead ClientQueryName = "directread" - QuerySkipStat ClientQueryName = "skipstat" - QueryPreferCached ClientQueryName = "prefercached" -) - -func (q ClientQueryName) String() string { - return string(q) -} - -// This function checks if we have a valid query (or no query) for the transfer URL -func CheckValidQuery(transferUrl *url.URL) (err error) { - query := transferUrl.Query() - recursive, hasRecursive := query[QueryRecursive.String()] - _, hasPack := query[QueryPack.String()] - directRead, hasDirectRead := query[QueryDirectRead.String()] - _, hasSkipStat := query[QuerySkipStat.String()] - _, hasPreferCached := query[QueryPreferCached.String()] - - // If we have both recursive and pack, we should return a failure - if hasRecursive && hasPack { - return errors.New("cannot have both recursive and pack query parameters") - } - - if hasDirectRead && hasPreferCached { - return errors.New("cannot have both directread and prefercached query parameters") - } - - // If there is an argument in the directread query param, inform the user this is deprecated and their argument will be ignored - if hasDirectRead && directRead[0] != "" { - log.Warnln("Arguments (true/false) for the ?directread query have been deprecated and will be disallowed in a future release. The argument provided will be ignored") - return nil - } - - // If there is an argument in the recursive query param, inform the user this is deprecated and their argument will be ignored - if hasRecursive && recursive[0] != "" { - log.Warnln("Arguments (true/false) for the ?recursive query have been deprecated and will be disallowed in a future release. The argument provided will be ignored") - return nil - } - - // If we have no query, or we have recursive or pack, we are good - if len(query) == 0 || hasRecursive || hasPack || hasDirectRead || hasSkipStat || hasPreferCached { - return nil - } - - return errors.New("invalid query parameter(s) " + transferUrl.RawQuery + " provided in url " + transferUrl.String()) -} diff --git a/utils/client_test.go b/utils/client_test.go index 38bef2ca4..437b5bf6c 100644 --- a/utils/client_test.go +++ b/utils/client_test.go @@ -19,129 +19,11 @@ package utils import ( - "fmt" - "net/url" "testing" - "github.com/spf13/viper" "github.com/stretchr/testify/assert" - - "github.com/pelicanplatform/pelican/param" ) -// Test the functionality of CheckValidQuery and all its edge cases -func TestValidQuery(t *testing.T) { - // Test recursive query passes - t.Run("testValidRecursive", func(t *testing.T) { - transferStr := "pelican://something/here?recursive=true" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.NoError(t, err) - }) - - // Test directread query passes - t.Run("testValidDirectRead", func(t *testing.T) { - transferStr := "pelican://something/here?directread" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.NoError(t, err) - }) - - // Test pack query passes - t.Run("testValidPack", func(t *testing.T) { - transferStr := "pelican://something/here?pack=tar.gz" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.NoError(t, err) - }) - - // Test a typo/invalid query fails - t.Run("testInvalidQuery", func(t *testing.T) { - transferStr := "pelican://something/here?recrustive=true" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.Error(t, err) - assert.Contains(t, err.Error(), "invalid query parameter(s) recrustive=true provided in url pelican://something/here?recrustive=true") - }) - - // Test that both pack and recursive queries are not allowed together (only in plugin case) - t.Run("testBothPackAndRecursiveFailure", func(t *testing.T) { - transferStr := "pelican://something/here?pack=tar.gz&recursive=true" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.Error(t, err) - assert.Contains(t, err.Error(), "cannot have both recursive and pack query parameters") - }) - - // Test we pass with both pack and directread - t.Run("testBothPackAndDirectReadSuccess", func(t *testing.T) { - transferStr := "pelican://something/here?pack=tar.gz&directread" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.NoError(t, err) - }) - - // Test we pass with both recursive and directread (just plugin case) - t.Run("testBothRecursiveAndDirectReadSuccess", func(t *testing.T) { - transferStr := "pelican://something/here?recursive=true&directread" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.NoError(t, err) - }) - - // Test if we have a value assigned to directread, we fail - t.Run("testValueOnDirectReadNoFailure", func(t *testing.T) { - transferStr := "pelican://something/here?directread=false" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.NoError(t, err) - }) - - t.Run("testValidSkipStat", func(t *testing.T) { - transferStr := "pelican://something/here?skipstat" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.NoError(t, err) - }) - - t.Run("testValidPreferCached", func(t *testing.T) { - transferStr := "pelican://something/here?prefercached" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.NoError(t, err) - }) - - t.Run("testInvalidDirectReadAndPreferCached", func(t *testing.T) { - transferStr := "pelican://something/here?prefercached&directread" - transferUrl, err := url.Parse(transferStr) - assert.NoError(t, err) - - err = CheckValidQuery(transferUrl) - assert.Error(t, err) - assert.Equal(t, "cannot have both directread and prefercached query parameters", err.Error()) - }) -} - func TestApplyIPMask(t *testing.T) { t.Run("testValidIPv4", func(t *testing.T) { expectedMaskedIP := "192.168.1.0" @@ -242,47 +124,3 @@ func TestExtractVersionAndServiceFromUserAgent(t *testing.T) { assert.Equal(t, 0, len(service)) }) } - -func TestUrlWithFederation(t *testing.T) { - viper.Reset() - defer viper.Reset() - pelUrl := "pelican://somefederation.org/namespace/test.txt" - - t.Run("testNoFederation", func(t *testing.T) { - str, err := UrlWithFederation(pelUrl) - assert.NoError(t, err) - assert.Equal(t, pelUrl, str) - }) - - t.Run("testFederationNoHost", func(t *testing.T) { - viper.Set(param.Federation_DiscoveryUrl.GetName(), "somefederation.org") - namespaceOnly := "/namespace/test.txt" - str, err := UrlWithFederation(namespaceOnly) - assert.NoError(t, err) - assert.Equal(t, pelUrl, str) - }) - - t.Run("testFederationWithFedHost", func(t *testing.T) { - viper.Set(param.Federation_DiscoveryUrl.GetName(), "https://somefederation.org") - namespaceOnly := "/namespace/test.txt" - str, err := UrlWithFederation(namespaceOnly) - assert.NoError(t, err) - assert.Equal(t, pelUrl, str) - }) - - t.Run("testFederationWithPathComponent", func(t *testing.T) { - viper.Set(param.Federation_DiscoveryUrl.GetName(), "somefederation.org/path") - namespaceOnly := "/namespace/test.txt" - _, err := UrlWithFederation(namespaceOnly) - assert.Error(t, err) - assert.EqualError(t, err, fmt.Sprintf("provided federation url %s has a path component", param.Federation_DiscoveryUrl.GetString())) - }) - - t.Run("testFederationPathComponentWithHost", func(t *testing.T) { - viper.Set(param.Federation_DiscoveryUrl.GetName(), "https://somefederation.org/path") - namespaceOnly := "/namespace/test.txt" - _, err := UrlWithFederation(namespaceOnly) - assert.Error(t, err) - assert.EqualError(t, err, fmt.Sprintf("provided federation url %s has a path component", param.Federation_DiscoveryUrl.GetString())) - }) -} diff --git a/utils/utils.go b/utils/utils.go index a2d3464f1..0628e78d4 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -19,7 +19,6 @@ package utils import ( - "fmt" "net" "net/url" "regexp" @@ -30,8 +29,6 @@ import ( log "github.com/sirupsen/logrus" "golang.org/x/text/cases" "golang.org/x/text/language" - - "github.com/pelicanplatform/pelican/param" ) // snakeCaseToCamelCase converts a snake case string to camel case. @@ -130,37 +127,3 @@ func ExtractVersionAndServiceFromUserAgent(userAgent string) (reqVer, service st service = (strings.Split(userAgentSplit[0], "-"))[1] return reqVer, service } - -// Given a remote url with no host (a namespace path, essentially), then if the Federation.DiscoveryURL is -// set (either via a yaml file or via the command line flags), create a full pelican protocol URL by combining -// the two -func UrlWithFederation(remoteUrl string) (string, error) { - if param.Federation_DiscoveryUrl.IsSet() { - parsedUrl, err := url.Parse(remoteUrl) - if err != nil { - newErr := errors.New(fmt.Sprintf("error parsing source url: %s", err)) - return remoteUrl, newErr - } - if parsedUrl.Host != "" { - return remoteUrl, nil - } - parsedDiscUrl, err := url.Parse(param.Federation_DiscoveryUrl.GetString()) - if err != nil { - newErr := errors.New(fmt.Sprintf("error parsing discovery url: %s", err)) - return remoteUrl, newErr - } - if parsedDiscUrl.Scheme == "" { - parsedDiscUrl.Scheme = "https" - updatedDiscString := parsedDiscUrl.String() - parsedDiscUrl, _ = url.Parse(updatedDiscString) - } - if parsedDiscUrl.Path != "" { - newErr := errors.New(fmt.Sprintf("provided federation url %s has a path component", param.Federation_DiscoveryUrl.GetString())) - return remoteUrl, newErr - } - parsedUrl.Host = parsedDiscUrl.Host - parsedUrl.Scheme = "pelican" - return parsedUrl.String(), nil - } - return remoteUrl, nil -}