Skip to content

Commit

Permalink
Correct cache namespace/data/meta location behavior
Browse files Browse the repository at this point in the history
This commit introduces `Cache.StorageLocation` and `Cache.NamespaceLocation`,
two new variables used to correct some of the behavior around setting locations for
the cache's namespace, meta, and data directories.

One crucial change here is that the data/meta directories are no longer exported under
the namespace directory, which previously resulted in exposing these internal file
representations to users who new to look for them.

This also ties the namespace/meta/data directories to `Cache.StorageLocation`, allowing
you to set all three to the same directory through a single config.
  • Loading branch information
jhiemstrawisc committed Nov 8, 2024
1 parent b0ecc1e commit 334adb5
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 94 deletions.
50 changes: 20 additions & 30 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -1000,30 +1000,21 @@ func InitServer(ctx context.Context, currentServers server_structs.ServerType) e
// we want to be able to check if this is user-provided (which we can't do for defaults.yaml)
viper.SetDefault("Origin.S3UrlStyle", "path")

if param.Cache_DataLocation.IsSet() {
log.Warningf("Deprecated configuration key %s is set. Please migrate to use %s instead", param.Cache_DataLocation.GetName(), param.Cache_LocalRoot.GetName())
log.Warningf("Will attempt to use the value of %s as default for %s", param.Cache_DataLocation.GetName(), param.Cache_LocalRoot.GetName())
}

if IsRootExecution() {
if currentServers.IsEnabled(server_structs.OriginType) {
viper.SetDefault("Origin.RunLocation", filepath.Join("/run", "pelican", "xrootd", "origin"))
}
if currentServers.IsEnabled(server_structs.CacheType) {
viper.SetDefault("Cache.RunLocation", filepath.Join("/run", "pelican", "xrootd", "cache"))
}

// To ensure Cache.DataLocation still works, we default Cache.LocalRoot to Cache.DataLocation
// The logic is extracted from handleDeprecatedConfig as we manually set the default value here
viper.SetDefault(param.Cache_DataLocation.GetName(), "/run/pelican/cache")
viper.SetDefault(param.Cache_LocalRoot.GetName(), param.Cache_DataLocation.GetString())

if viper.IsSet("Cache.DataLocation") {
viper.SetDefault("Cache.DataLocations", []string{filepath.Join(param.Cache_DataLocation.GetString(), "data")})
viper.SetDefault("Cache.MetaLocations", []string{filepath.Join(param.Cache_DataLocation.GetString(), "meta")})
} else {
viper.SetDefault("Cache.DataLocations", []string{"/run/pelican/cache/data"})
viper.SetDefault("Cache.MetaLocations", []string{"/run/pelican/cache/meta"})
viper.SetDefault(param.Cache_RunLocation.GetName(), filepath.Join("/run", "pelican", "xrootd", "cache"))
// Several deprecated keys point to Cache.StorageLocation, and by the time here we've already mapped those
// keys in handleDeprecatedConfig(). To prevent overriding potentially-mapped deprecated keys, we only re-set
// the default here if this key is not set.
if !viper.IsSet(param.Cache_StorageLocation.GetName()) {
viper.SetDefault(param.Cache_StorageLocation.GetName(), filepath.Join("/run", "pelican", "cache"))
}
viper.SetDefault(param.Cache_NamespaceLocation.GetName(), filepath.Join(param.Cache_StorageLocation.GetString(), "namespace"))
viper.SetDefault(param.Cache_DataLocations.GetName(), []string{filepath.Join(param.Cache_StorageLocation.GetString(), "data")})
viper.SetDefault(param.Cache_MetaLocations.GetName(), []string{filepath.Join(param.Cache_StorageLocation.GetString(), "meta")})
}

viper.SetDefault("LocalCache.RunLocation", filepath.Join("/run", "pelican", "localcache"))
Expand Down Expand Up @@ -1073,18 +1064,17 @@ func InitServer(ctx context.Context, currentServers server_structs.ServerType) e
cleanupDirOnShutdown(ctx, runtimeDir)
}
viper.SetDefault(param.Origin_GlobusConfigLocation.GetName(), filepath.Join(runtimeDir, "xrootd", "origin", "globus"))
// To ensure Cache.DataLocation still works, we default Cache.LocalRoot to Cache.DataLocation
// The logic is extracted from handleDeprecatedConfig as we manually set the default value here
viper.SetDefault(param.Cache_DataLocation.GetName(), filepath.Join(runtimeDir, "cache"))
viper.SetDefault(param.Cache_LocalRoot.GetName(), param.Cache_DataLocation.GetString())

if viper.IsSet("Cache.DataLocation") {
viper.SetDefault("Cache.DataLocations", []string{filepath.Join(param.Cache_DataLocation.GetString(), "data")})
viper.SetDefault("Cache.MetaLocations", []string{filepath.Join(param.Cache_DataLocation.GetString(), "meta")})
} else {
viper.SetDefault("Cache.DataLocations", []string{filepath.Join(runtimeDir, "pelican/cache/data")})
viper.SetDefault("Cache.MetaLocations", []string{filepath.Join(runtimeDir, "pelican/cache/meta")})

// Several deprecated keys point to Cache.StorageLocation, and by the time here we've already mapped those
// keys in handleDeprecatedConfig(). To prevent overriding potentially-mapped deprecated keys, we only re-set
// the default here if this key is not set.
if !viper.IsSet(param.Cache_StorageLocation.GetName()) {
viper.SetDefault(param.Cache_StorageLocation.GetName(), filepath.Join("/run", "pelican", "cache"))
}
viper.SetDefault(param.Cache_NamespaceLocation.GetName(), filepath.Join(param.Cache_StorageLocation.GetString(), "namespace"))
viper.SetDefault(param.Cache_DataLocations.GetName(), []string{filepath.Join(param.Cache_StorageLocation.GetString(), "data")})
viper.SetDefault(param.Cache_MetaLocations.GetName(), []string{filepath.Join(param.Cache_StorageLocation.GetString(), "meta")})

viper.SetDefault("LocalCache.RunLocation", filepath.Join(runtimeDir, "cache"))
viper.SetDefault("Origin.Multiuser", false)
}
Expand Down
97 changes: 66 additions & 31 deletions docs/parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1066,59 +1066,61 @@ components: ["localcache"]
############################
# Cache-level configs #
############################
name: Cache.DataLocation
description: |+
[Deprecated] Cache.DataLocation is being deprecated and will be removed in a future release. It is replaced by a combination of Cache.DataLocations and Cache.MetaLocations
type: string
root_default: /run/pelican/xcache
default: $XDG_RUNTIME_DIR/pelican/xcache
deprecated: true
replacedby: Cache.LocalRoot
components: ["cache"]
---
name: Cache.LocalRoot
name: Cache.StorageLocation
description: |+
An absolute path to the directory where xrootd will create its default namespace, `meta`, and `data` directories. For example,
setting `Cache.LocalRoot=/run/pelican/cache` without specifying further `Cache.DataLocations` or `Cache.MetaLocations`
setting `Cache.StorageLocation=/run/pelican/cache` without specifying further `Cache.DataLocations` or `Cache.MetaLocations`
values will result in the cache creating a directory structure like:
```
.
└── /run/pelican/cache/
├── data/
│ ├── 00 #hexadecimal values
│ ├── 00 # hexadecimal name values
│ ├── 01
│ ├── ...
│ └── FF
├── meta/
│ ├── 00 #hexadecimal values
│ ├── 00 # hexadecimal name values
│ ├── 01
│ ├── ...
│ └── FF
├── namespace1/
│ ├── foo1.txt --> /run/pelican/cache/data/00
│ └── foo2.txt --> /run/pelican/cache/data/01
└── namespace2/
└── bar.txt --> /run/pelican/cache/data/FF
└── namespace/
├── namespace1/
│ ├── foo1.txt --> /run/pelican/cache/data/00
│ └── foo2.txt --> /run/pelican/cache/data/01
└── namespace2/
└── bar.txt --> /run/pelican/cache/data/FF
```
In this setup, actual data files live at `/run/pelican/cache/data` and are given hexadecimal names, while
references (symbolic links) to those files are stored in `/run/pelican/cache/namespace1`, `/run/pelican/cache/namespace2`,
etc. The `meta` directory is used for object metadata. Object requests to XRootD will be served from the namespace directories, and
references (symbolic links) to those files are stored in `/run/pelican/cache/namespace`. The `meta` directory
is used for object metadata. Object requests to XRootD will be served from the namespace directories, and
resolve the underlying object through these symbolic links.
We recommend tying the `Cache.LocalRoot` to a fast storage device, such as an SSD, to ensure optimal cache performance.
We recommend tying the `Cache.StorageLocation` to a fast storage device, such as an SSD, to ensure optimal cache performance.
If this directory does not already exist, it will be created by Pelican.
WARNING: The default value of /var/run/pelican should _never_ be used for production caches, as this directory is typically
cleared on system restarts, and may interfere with system services if it becomes full. Running a cache with the default value
set will generate a warning at cache startup.
type: string
root_default: /run/pelican/cache
default: $XDG_RUNTIME_DIR/pelican/cache
components: ["cache"]
---
name: Cache.ExportLocation
name: Cache.NamespaceLocation
description: |+
The location of the export directory. Everything under this directory will be exposed as part of the cache. This is
relative to the mount location.
A cache's namespace directory is used to duplicate/recreate the federation's namespace structure, and stores symbolic links from
object names to the actual data files (see `Cache.StorageLocation` for extra information). For example, requesting `/foo/bar.txt` from a
cache will check for the existence of a symbolic link at `${Cache.NamespaceLocation}/foo/bar.txt`, and if it exists, the cache will serve
the data file at the location the symbolic link points to.
If this directory does not already exist, it will be created by Pelican.
WARNING: It's important that any values for `Cache.DataLocations` and `Cache.MetaLocations` are NOT subdirectories of `Cache.NamespaceLocation`,
as this will make the raw data/meta files accessible through the cache's namespace structure, which is undefined behavior.
type: string
default: /
default: ${Cache.StorageLocation}/namespace

components: ["cache"]
---
name: Cache.DataLocations
Expand All @@ -1132,9 +1134,11 @@ description: |+
as well as the [xrootd pfc documentation](https://xrootd.slac.stanford.edu/doc/dev56/pss_config.pdf) for the `pfc.spaces` directive.
If this directory does not already exist, it will be created by Pelican.
WARNING: It's important that any values for `Cache.DataLocations` are NOT subdirectories of `Cache.NamespaceLocation`,
as this will make the raw data files accessible through the cache's namespace structure, which is undefined behavior.
type: stringSlice
root_default: ["/run/pelican/cache/data"]
default: ["$XDG_RUNTIME_DIR/pelican/cache/data"]
default: ["${Cache.StorageLocation}/data"]
components: ["cache"]
---
name: Cache.MetaLocations
Expand All @@ -1147,9 +1151,40 @@ description: |+
as well as the [xrootd pfc documentation](https://xrootd.slac.stanford.edu/doc/dev56/pss_config.pdf) for the `pfc.spaces` directive.
If this directory does not already exist, it will be created by Pelican.
WARNING: It's important that any values for `Cache.MetaLocations` are NOT subdirectories of `Cache.NamespaceLocation`,
as this will make the raw metadata files accessible through the cache's namespace structure, which is undefined behavior.
type: stringSlice
root_default: ["/run/pelican/cache/meta"]
default: ["$XDG_RUNTIME_DIR/pelican/cache/meta"]
default: ["${Cache.StorageLocation}/meta"]
components: ["cache"]
---
name: Cache.LocalRoot
description: |+
[Deprecated] Cache.LocalRoot is deprecated and replaced by Cache.StorageLocation.
type: string
root_default: /run/pelican/cache
default: $XDG_RUNTIME_DIR/pelican/cache
deprecated: true
replacedby: "Cache.StorageLocation"
components: ["cache"]
---
name: Cache.DataLocation
description: |+
[Deprecated] Cache.DataLocation is being deprecated and will be removed in a future release. It is replaced by Cache.StorageLocation
type: string
root_default: /run/pelican/cache
default: $XDG_RUNTIME_DIR/pelican/cache
deprecated: true
replacedby: Cache.StorageLocation
components: ["cache"]
---
name: Cache.ExportLocation
description: |+
A path that's relative to the `Cache.NamespaceLocation` where the cache will expose its contents. This path can be used to
control which namespaces are available through the cache. For example, setting `Cache.ExportLocation: /foo` will only expose
the `/foo` namespace to clients.
type: string
default: /
components: ["cache"]
---
name: Cache.RunLocation
Expand Down
5 changes: 4 additions & 1 deletion param/parameters.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions param/parameters_struct.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion xrootd/resources/xrootd-cache.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ http.tlsrequiredprefix {{$Prefix}}
throttle.throttle concurrency {{.Cache.Concurrency}}
{{end}}
pss.origin {{.Cache.PSSOrigin}}
oss.localroot {{.Cache.LocalRoot}}
oss.localroot {{.Cache.NamespaceLocation}}
pfc.spaces data meta
{{- range $value := .Cache.DataLocations}}
oss.space data {{$value}}
Expand Down
Loading

0 comments on commit 334adb5

Please sign in to comment.