diff --git a/config/config.go b/config/config.go index 5314e12b3..da267e0b6 100644 --- a/config/config.go +++ b/config/config.go @@ -1000,30 +1000,21 @@ func InitServer(ctx context.Context, currentServers server_structs.ServerType) e // we want to be able to check if this is user-provided (which we can't do for defaults.yaml) viper.SetDefault("Origin.S3UrlStyle", "path") - if param.Cache_DataLocation.IsSet() { - log.Warningf("Deprecated configuration key %s is set. Please migrate to use %s instead", param.Cache_DataLocation.GetName(), param.Cache_LocalRoot.GetName()) - log.Warningf("Will attempt to use the value of %s as default for %s", param.Cache_DataLocation.GetName(), param.Cache_LocalRoot.GetName()) - } - if IsRootExecution() { if currentServers.IsEnabled(server_structs.OriginType) { viper.SetDefault("Origin.RunLocation", filepath.Join("/run", "pelican", "xrootd", "origin")) } if currentServers.IsEnabled(server_structs.CacheType) { - viper.SetDefault("Cache.RunLocation", filepath.Join("/run", "pelican", "xrootd", "cache")) - } - - // To ensure Cache.DataLocation still works, we default Cache.LocalRoot to Cache.DataLocation - // The logic is extracted from handleDeprecatedConfig as we manually set the default value here - viper.SetDefault(param.Cache_DataLocation.GetName(), "/run/pelican/cache") - viper.SetDefault(param.Cache_LocalRoot.GetName(), param.Cache_DataLocation.GetString()) - - if viper.IsSet("Cache.DataLocation") { - viper.SetDefault("Cache.DataLocations", []string{filepath.Join(param.Cache_DataLocation.GetString(), "data")}) - viper.SetDefault("Cache.MetaLocations", []string{filepath.Join(param.Cache_DataLocation.GetString(), "meta")}) - } else { - viper.SetDefault("Cache.DataLocations", []string{"/run/pelican/cache/data"}) - viper.SetDefault("Cache.MetaLocations", []string{"/run/pelican/cache/meta"}) + viper.SetDefault(param.Cache_RunLocation.GetName(), filepath.Join("/run", "pelican", "xrootd", "cache")) + // Several deprecated keys point to Cache.StorageLocation, and by the time here we've already mapped those + // keys in handleDeprecatedConfig(). To prevent overriding potentially-mapped deprecated keys, we only re-set + // the default here if this key is not set. + if !viper.IsSet(param.Cache_StorageLocation.GetName()) { + viper.SetDefault(param.Cache_StorageLocation.GetName(), filepath.Join("/run", "pelican", "cache")) + } + viper.SetDefault(param.Cache_NamespaceLocation.GetName(), filepath.Join(param.Cache_StorageLocation.GetString(), "namespace")) + viper.SetDefault(param.Cache_DataLocations.GetName(), []string{filepath.Join(param.Cache_StorageLocation.GetString(), "data")}) + viper.SetDefault(param.Cache_MetaLocations.GetName(), []string{filepath.Join(param.Cache_StorageLocation.GetString(), "meta")}) } viper.SetDefault("LocalCache.RunLocation", filepath.Join("/run", "pelican", "localcache")) @@ -1073,18 +1064,17 @@ func InitServer(ctx context.Context, currentServers server_structs.ServerType) e cleanupDirOnShutdown(ctx, runtimeDir) } viper.SetDefault(param.Origin_GlobusConfigLocation.GetName(), filepath.Join(runtimeDir, "xrootd", "origin", "globus")) - // To ensure Cache.DataLocation still works, we default Cache.LocalRoot to Cache.DataLocation - // The logic is extracted from handleDeprecatedConfig as we manually set the default value here - viper.SetDefault(param.Cache_DataLocation.GetName(), filepath.Join(runtimeDir, "cache")) - viper.SetDefault(param.Cache_LocalRoot.GetName(), param.Cache_DataLocation.GetString()) - - if viper.IsSet("Cache.DataLocation") { - viper.SetDefault("Cache.DataLocations", []string{filepath.Join(param.Cache_DataLocation.GetString(), "data")}) - viper.SetDefault("Cache.MetaLocations", []string{filepath.Join(param.Cache_DataLocation.GetString(), "meta")}) - } else { - viper.SetDefault("Cache.DataLocations", []string{filepath.Join(runtimeDir, "pelican/cache/data")}) - viper.SetDefault("Cache.MetaLocations", []string{filepath.Join(runtimeDir, "pelican/cache/meta")}) + + // Several deprecated keys point to Cache.StorageLocation, and by the time here we've already mapped those + // keys in handleDeprecatedConfig(). To prevent overriding potentially-mapped deprecated keys, we only re-set + // the default here if this key is not set. + if !viper.IsSet(param.Cache_StorageLocation.GetName()) { + viper.SetDefault(param.Cache_StorageLocation.GetName(), filepath.Join("/run", "pelican", "cache")) } + viper.SetDefault(param.Cache_NamespaceLocation.GetName(), filepath.Join(param.Cache_StorageLocation.GetString(), "namespace")) + viper.SetDefault(param.Cache_DataLocations.GetName(), []string{filepath.Join(param.Cache_StorageLocation.GetString(), "data")}) + viper.SetDefault(param.Cache_MetaLocations.GetName(), []string{filepath.Join(param.Cache_StorageLocation.GetString(), "meta")}) + viper.SetDefault("LocalCache.RunLocation", filepath.Join(runtimeDir, "cache")) viper.SetDefault("Origin.Multiuser", false) } diff --git a/docs/parameters.yaml b/docs/parameters.yaml index b3e24a89b..8a0e47b38 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -1066,59 +1066,61 @@ components: ["localcache"] ############################ # Cache-level configs # ############################ -name: Cache.DataLocation -description: |+ - [Deprecated] Cache.DataLocation is being deprecated and will be removed in a future release. It is replaced by a combination of Cache.DataLocations and Cache.MetaLocations -type: string -root_default: /run/pelican/xcache -default: $XDG_RUNTIME_DIR/pelican/xcache -deprecated: true -replacedby: Cache.LocalRoot -components: ["cache"] ---- -name: Cache.LocalRoot +name: Cache.StorageLocation description: |+ An absolute path to the directory where xrootd will create its default namespace, `meta`, and `data` directories. For example, - setting `Cache.LocalRoot=/run/pelican/cache` without specifying further `Cache.DataLocations` or `Cache.MetaLocations` + setting `Cache.StorageLocation=/run/pelican/cache` without specifying further `Cache.DataLocations` or `Cache.MetaLocations` values will result in the cache creating a directory structure like: ``` . └── /run/pelican/cache/ ├── data/ - │ ├── 00 #hexadecimal values + │ ├── 00 # hexadecimal name values │ ├── 01 │ ├── ... │ └── FF ├── meta/ - │ ├── 00 #hexadecimal values + │ ├── 00 # hexadecimal name values │ ├── 01 │ ├── ... │ └── FF - ├── namespace1/ - │ ├── foo1.txt --> /run/pelican/cache/data/00 - │ └── foo2.txt --> /run/pelican/cache/data/01 - └── namespace2/ - └── bar.txt --> /run/pelican/cache/data/FF + └── namespace/ + ├── namespace1/ + │ ├── foo1.txt --> /run/pelican/cache/data/00 + │ └── foo2.txt --> /run/pelican/cache/data/01 + └── namespace2/ + └── bar.txt --> /run/pelican/cache/data/FF ``` In this setup, actual data files live at `/run/pelican/cache/data` and are given hexadecimal names, while - references (symbolic links) to those files are stored in `/run/pelican/cache/namespace1`, `/run/pelican/cache/namespace2`, - etc. The `meta` directory is used for object metadata. Object requests to XRootD will be served from the namespace directories, and + references (symbolic links) to those files are stored in `/run/pelican/cache/namespace`. The `meta` directory + is used for object metadata. Object requests to XRootD will be served from the namespace directories, and resolve the underlying object through these symbolic links. - We recommend tying the `Cache.LocalRoot` to a fast storage device, such as an SSD, to ensure optimal cache performance. - + We recommend tying the `Cache.StorageLocation` to a fast storage device, such as an SSD, to ensure optimal cache performance. If this directory does not already exist, it will be created by Pelican. + + WARNING: The default value of /var/run/pelican should _never_ be used for production caches, as this directory is typically + cleared on system restarts, and may interfere with system services if it becomes full. Running a cache with the default value + set will generate a warning at cache startup. type: string root_default: /run/pelican/cache default: $XDG_RUNTIME_DIR/pelican/cache components: ["cache"] --- -name: Cache.ExportLocation +name: Cache.NamespaceLocation description: |+ - The location of the export directory. Everything under this directory will be exposed as part of the cache. This is - relative to the mount location. + A cache's namespace directory is used to duplicate/recreate the federation's namespace structure, and stores symbolic links from + object names to the actual data files (see `Cache.StorageLocation` for extra information). For example, requesting `/foo/bar.txt` from a + cache will check for the existence of a symbolic link at `${Cache.NamespaceLocation}/foo/bar.txt`, and if it exists, the cache will serve + the data file at the location the symbolic link points to. + + If this directory does not already exist, it will be created by Pelican. + + WARNING: It's important that any values for `Cache.DataLocations` and `Cache.MetaLocations` are NOT subdirectories of `Cache.NamespaceLocation`, + as this will make the raw data/meta files accessible through the cache's namespace structure, which is undefined behavior. type: string -default: / +default: ${Cache.StorageLocation}/namespace + components: ["cache"] --- name: Cache.DataLocations @@ -1132,9 +1134,11 @@ description: |+ as well as the [xrootd pfc documentation](https://xrootd.slac.stanford.edu/doc/dev56/pss_config.pdf) for the `pfc.spaces` directive. If this directory does not already exist, it will be created by Pelican. + + WARNING: It's important that any values for `Cache.DataLocations` are NOT subdirectories of `Cache.NamespaceLocation`, + as this will make the raw data files accessible through the cache's namespace structure, which is undefined behavior. type: stringSlice -root_default: ["/run/pelican/cache/data"] -default: ["$XDG_RUNTIME_DIR/pelican/cache/data"] +default: ["${Cache.StorageLocation}/data"] components: ["cache"] --- name: Cache.MetaLocations @@ -1147,9 +1151,40 @@ description: |+ as well as the [xrootd pfc documentation](https://xrootd.slac.stanford.edu/doc/dev56/pss_config.pdf) for the `pfc.spaces` directive. If this directory does not already exist, it will be created by Pelican. + + WARNING: It's important that any values for `Cache.MetaLocations` are NOT subdirectories of `Cache.NamespaceLocation`, + as this will make the raw metadata files accessible through the cache's namespace structure, which is undefined behavior. type: stringSlice -root_default: ["/run/pelican/cache/meta"] -default: ["$XDG_RUNTIME_DIR/pelican/cache/meta"] +default: ["${Cache.StorageLocation}/meta"] +components: ["cache"] +--- +name: Cache.LocalRoot +description: |+ + [Deprecated] Cache.LocalRoot is deprecated and replaced by Cache.StorageLocation. +type: string +root_default: /run/pelican/cache +default: $XDG_RUNTIME_DIR/pelican/cache +deprecated: true +replacedby: "Cache.StorageLocation" +components: ["cache"] +--- +name: Cache.DataLocation +description: |+ + [Deprecated] Cache.DataLocation is being deprecated and will be removed in a future release. It is replaced by Cache.StorageLocation +type: string +root_default: /run/pelican/cache +default: $XDG_RUNTIME_DIR/pelican/cache +deprecated: true +replacedby: Cache.StorageLocation +components: ["cache"] +--- +name: Cache.ExportLocation +description: |+ + A path that's relative to the `Cache.NamespaceLocation` where the cache will expose its contents. This path can be used to + control which namespaces are available through the cache. For example, setting `Cache.ExportLocation: /foo` will only expose + the `/foo` namespace to clients. +type: string +default: / components: ["cache"] --- name: Cache.RunLocation diff --git a/param/parameters.go b/param/parameters.go index f00cb0909..80c14729a 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -51,7 +51,8 @@ type ObjectParam struct { func GetDeprecated() map[string][]string { return map[string][]string{ - "Cache.DataLocation": {"Cache.LocalRoot"}, + "Cache.DataLocation": {"Cache.StorageLocation"}, + "Cache.LocalRoot": {"Cache.StorageLocation"}, "Director.EnableStat": {"Director.CheckOriginPresence"}, "DisableHttpProxy": {"Client.DisableHttpProxy"}, "DisableProxyFallback": {"Client.DisableProxyFallback"}, @@ -147,8 +148,10 @@ var ( Cache_HighWaterMark = StringParam{"Cache.HighWaterMark"} Cache_LocalRoot = StringParam{"Cache.LocalRoot"} Cache_LowWatermark = StringParam{"Cache.LowWatermark"} + Cache_NamespaceLocation = StringParam{"Cache.NamespaceLocation"} Cache_RunLocation = StringParam{"Cache.RunLocation"} Cache_SentinelLocation = StringParam{"Cache.SentinelLocation"} + Cache_StorageLocation = StringParam{"Cache.StorageLocation"} Cache_Url = StringParam{"Cache.Url"} Cache_XRootDPrefix = StringParam{"Cache.XRootDPrefix"} Director_CacheSortMethod = StringParam{"Director.CacheSortMethod"} diff --git a/param/parameters_struct.go b/param/parameters_struct.go index 1e39835a2..e16cbb3e2 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -36,12 +36,14 @@ type Config struct { LocalRoot string `mapstructure:"localroot"` LowWatermark string `mapstructure:"lowwatermark"` MetaLocations []string `mapstructure:"metalocations"` + NamespaceLocation string `mapstructure:"namespacelocation"` PermittedNamespaces []string `mapstructure:"permittednamespaces"` Port int `mapstructure:"port"` RunLocation string `mapstructure:"runlocation"` SelfTest bool `mapstructure:"selftest"` SelfTestInterval time.Duration `mapstructure:"selftestinterval"` SentinelLocation string `mapstructure:"sentinellocation"` + StorageLocation string `mapstructure:"storagelocation"` Url string `mapstructure:"url"` XRootDPrefix string `mapstructure:"xrootdprefix"` } `mapstructure:"cache"` @@ -338,12 +340,14 @@ type configWithType struct { LocalRoot struct { Type string; Value string } LowWatermark struct { Type string; Value string } MetaLocations struct { Type string; Value []string } + NamespaceLocation struct { Type string; Value string } PermittedNamespaces struct { Type string; Value []string } Port struct { Type string; Value int } RunLocation struct { Type string; Value string } SelfTest struct { Type string; Value bool } SelfTestInterval struct { Type string; Value time.Duration } SentinelLocation struct { Type string; Value string } + StorageLocation struct { Type string; Value string } Url struct { Type string; Value string } XRootDPrefix struct { Type string; Value string } } diff --git a/xrootd/resources/xrootd-cache.cfg b/xrootd/resources/xrootd-cache.cfg index 2bb00e454..5195851c0 100644 --- a/xrootd/resources/xrootd-cache.cfg +++ b/xrootd/resources/xrootd-cache.cfg @@ -66,7 +66,7 @@ http.tlsrequiredprefix {{$Prefix}} throttle.throttle concurrency {{.Cache.Concurrency}} {{end}} pss.origin {{.Cache.PSSOrigin}} -oss.localroot {{.Cache.LocalRoot}} +oss.localroot {{.Cache.NamespaceLocation}} pfc.spaces data meta {{- range $value := .Cache.DataLocations}} oss.space data {{$value}} diff --git a/xrootd/xrootd_config.go b/xrootd/xrootd_config.go index 3b88fb218..b8c57f9c3 100644 --- a/xrootd/xrootd_config.go +++ b/xrootd/xrootd_config.go @@ -115,7 +115,7 @@ type ( RunLocation string DataLocations []string MetaLocations []string - LocalRoot string + NamespaceLocation string PSSOrigin string Concurrency int X509ClientAuthenticationPrefixes []string @@ -273,32 +273,32 @@ func CheckOriginXrootdEnv(exportPath string, server server_structs.XRootDServer, return nil } -func CheckCacheXrootdEnv(exportPath string, server server_structs.XRootDServer, uid int, gid int) (string, error) { - viper.Set("Xrootd.Mount", exportPath) - filepath.Join(exportPath, "/") - err := config.MkdirAll(exportPath, 0775, uid, gid) - if err != nil { - return "", errors.Wrapf(err, "Unable to create export directory %v", - filepath.Dir(exportPath)) +func CheckCacheXrootdEnv(server server_structs.XRootDServer, uid int, gid int) error { + storageLocation := param.Cache_StorageLocation.GetString() + if err := config.MkdirAll(storageLocation, 0775, uid, gid); err != nil { + return errors.Wrapf(err, "Unable to create the cache's storage directory '%s'", storageLocation) + } + // Setting Cache.StorageLocation to /run/pelican/cache is a default we use for testing, but it shouldn't ever be used + // in a production setting. If the user hasn't overridden the default, log a warning. + if storageLocation == filepath.Join("/run", "pelican", "cache") { + log.Warnf("%s is set to the default /run/pelican/cache. This default is to allow quick testing but should not be used in production.", param.Cache_StorageLocation.GetName()) } - localRoot := param.Cache_LocalRoot.GetString() - - localRoot = filepath.Clean(localRoot) - err = config.MkdirAll(localRoot, 0775, uid, gid) - - if err != nil { - return "", errors.Wrapf(err, "Unable to create local root %v", - filepath.Dir(localRoot)) + namespaceLocation := param.Cache_NamespaceLocation.GetString() + if err := config.MkdirAll(namespaceLocation, 0775, uid, gid); err != nil { + return errors.Wrapf(err, "Unable to create the cache's storage directory '%s'", storageLocation) } dataPaths := param.Cache_DataLocations.GetStringSlice() for _, dPath := range dataPaths { dataPath := filepath.Clean(dPath) - err = config.MkdirAll(dataPath, 0775, uid, gid) + // Data locations should never be below the namespace location + if strings.HasPrefix(dPath, namespaceLocation) { + return errors.Errorf("A configured data location '%s' is a subdirectory of the namespace location '%s'. Please ensure these directories are not nested.", dPath, namespaceLocation) + } - if err != nil { - return "", errors.Wrapf(err, "Unable to create data directory %v", + if err := config.MkdirAll(dataPath, 0775, uid, gid); err != nil { + return errors.Wrapf(err, "Unable to create data directory %v", filepath.Dir(dataPath)) } } @@ -306,17 +306,20 @@ func CheckCacheXrootdEnv(exportPath string, server server_structs.XRootDServer, metaPaths := param.Cache_MetaLocations.GetStringSlice() for _, mPath := range metaPaths { metaPath := filepath.Clean(mPath) - err = config.MkdirAll(metaPath, 0775, uid, gid) + // Similar to data locations, meta locations should never be below the namespace location + if strings.HasPrefix(mPath, namespaceLocation) { + return errors.Errorf("The configured meta location '%s' is a subdirectory of the namespace location '%s'. Please ensure these directories are not nested.", mPath, namespaceLocation) + } - if err != nil { - return "", errors.Wrapf(err, "Unable to create meta directory %v", + if err := config.MkdirAll(metaPath, 0775, uid, gid); err != nil { + return errors.Wrapf(err, "Unable to create meta directory %v", filepath.Dir(metaPath)) } } fedInfo, err := config.GetFederation(context.Background()) if err != nil { - return "", errors.Wrap(err, "Failed to pull information from the federation") + return errors.Wrap(err, "Failed to pull information from the federation") } if discoveryUrlStr := param.Federation_DiscoveryUrl.GetString(); discoveryUrlStr != "" { @@ -327,14 +330,14 @@ func CheckCacheXrootdEnv(exportPath string, server server_structs.XRootDServer, discoveryUrl.Host = discoveryUrl.Path discoveryUrl.Path = "" } else if discoveryUrl.Path != "" && discoveryUrl.Path != "/" { - return "", errors.New("The Federation.DiscoveryUrl's path is non-empty, ensure the Federation.DiscoveryUrl has the format :") + return errors.New("The Federation.DiscoveryUrl's path is non-empty, ensure the Federation.DiscoveryUrl has the format :") } discoveryUrl.Scheme = "pelican" discoveryUrl.Path = "" discoveryUrl.RawQuery = "" viper.Set("Cache.PSSOrigin", discoveryUrl.String()) } else { - return "", errors.Wrapf(err, "Failed to parse discovery URL %s", discoveryUrlStr) + return errors.Wrapf(err, "Failed to parse discovery URL %s", discoveryUrlStr) } } @@ -343,27 +346,27 @@ func CheckCacheXrootdEnv(exportPath string, server server_structs.XRootDServer, if err == nil { log.Debugln("Parsing director URL for 'pss.origin' setting:", directorUrlStr) if directorUrl.Path != "" && directorUrl.Path != "/" { - return "", errors.New("The Federation.DirectorUrl's path is non-empty, ensure the Federation.DirectorUrl has the format :") + return errors.New("The Federation.DirectorUrl's path is non-empty, ensure the Federation.DirectorUrl has the format :") } directorUrl.Scheme = "pelican" viper.Set("Cache.PSSOrigin", directorUrl.String()) } else { - return "", errors.Wrapf(err, "Failed to parse director URL %s", directorUrlStr) + return errors.Wrapf(err, "Failed to parse director URL %s", directorUrlStr) } } if viper.GetString("Cache.PSSOrigin") == "" { - return "", errors.New("One of Federation.DiscoveryUrl or Federation.DirectorUrl must be set to configure a cache") + return errors.New("One of Federation.DiscoveryUrl or Federation.DirectorUrl must be set to configure a cache") } if cacheServer, ok := server.(*cache.CacheServer); ok { err := WriteCacheScitokensConfig(cacheServer.GetNamespaceAds()) if err != nil { - return "", errors.Wrap(err, "Failed to create scitokens configuration for the cache") + return errors.Wrap(err, "Failed to create scitokens configuration for the cache") } } - return exportPath, nil + return nil } func CheckXrootdEnv(server server_structs.XRootDServer) error { @@ -445,7 +448,7 @@ func CheckXrootdEnv(server server_structs.XRootDServer) error { if server.GetServerType().IsEnabled(server_structs.OriginType) { err = CheckOriginXrootdEnv(exportPath, server, uid, gid, groupname) } else { - exportPath, err = CheckCacheXrootdEnv(exportPath, server, uid, gid) + err = CheckCacheXrootdEnv(server, uid, gid) } if err != nil { return err diff --git a/xrootd/xrootd_config_test.go b/xrootd/xrootd_config_test.go index 1e192451b..235ad53a3 100644 --- a/xrootd/xrootd_config_test.go +++ b/xrootd/xrootd_config_test.go @@ -38,6 +38,7 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" + "github.com/pelicanplatform/pelican/cache" "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/origin" "github.com/pelicanplatform/pelican/param" @@ -626,6 +627,33 @@ func TestXrootDCacheConfig(t *testing.T) { assert.NoError(t, err) assert.NotContains(t, string(content), "http.tlsrequiredprefix") }) + + t.Run("TestNestedDataMetaNamespace", func(t *testing.T) { + testDir := t.TempDir() + viper.Set("Cache.StorageLocation", testDir) + namespaceLocation := filepath.Join(testDir, "namespace") + viper.Set("Cache.NamespaceLocation", namespaceLocation) + + cache := &cache.CacheServer{} + uid := os.Getuid() + gid := os.Getgid() + + // Data location test + nestedDataLocation := filepath.Join(namespaceLocation, "data") + viper.Set("Cache.DataLocations", []string{nestedDataLocation}) + err := CheckCacheXrootdEnv(cache, uid, gid) + require.Error(t, err) + require.Contains(t, err.Error(), "Please ensure these directories are not nested.") + // Now set to a valid location so we can hit the meta error in the next part of the test + viper.Set("Cache.DataLocations", []string{filepath.Join(testDir, "data")}) + + // Meta location test + nestedMetaLocation := filepath.Join(namespaceLocation, "meta") + viper.Set("Cache.MetaLocations", []string{nestedMetaLocation}) + err = CheckCacheXrootdEnv(cache, uid, gid) + require.Error(t, err) + require.Contains(t, err.Error(), "Please ensure these directories are not nested.") + }) } func TestUpdateAuth(t *testing.T) {