From 7f72c4b0261989988bc69d02a679021879a48b05 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Wed, 27 Nov 2024 10:53:23 -0600 Subject: [PATCH 1/2] Make xrootd startup wait time a tunable value If xrootd is running under valgrind, the startup time may be much more than 10s. Provide a hidden tunable to give valgrind more time. --- config/resources/defaults.yaml | 1 + docs/parameters.yaml | 9 +++++++++ param/parameters.go | 1 + param/parameters_struct.go | 2 ++ xrootd/launch.go | 6 +++--- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/config/resources/defaults.yaml b/config/resources/defaults.yaml index 8f4d49653..cd49f4a5b 100644 --- a/config/resources/defaults.yaml +++ b/config/resources/defaults.yaml @@ -100,6 +100,7 @@ Shoveler: PortHigher: 9999 AMQPExchange: shoveled-xrd Xrootd: + MaxStartupWait: "10s" Mount: "" ManagerPort: 1213 DetailedMonitoringPort: 9930 diff --git a/docs/parameters.yaml b/docs/parameters.yaml index 028a661d2..acc62cc8a 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -2278,6 +2278,15 @@ type: string default: none components: ["origin"] --- +name: Xrootd.MaxStartupWait +description: |+ + The maximum amount of time pelican will wait for the xrootd daemons to + successfully start +type: duration +default: 10s +hidden: true +components: ["origin", "cache"] +--- ############################ # Monitoring-level configs # ############################ diff --git a/param/parameters.go b/param/parameters.go index f915935ef..cade9bfde 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -401,6 +401,7 @@ var ( Transport_ResponseHeaderTimeout = DurationParam{"Transport.ResponseHeaderTimeout"} Transport_TLSHandshakeTimeout = DurationParam{"Transport.TLSHandshakeTimeout"} Xrootd_AuthRefreshInterval = DurationParam{"Xrootd.AuthRefreshInterval"} + Xrootd_MaxStartupWait = DurationParam{"Xrootd.MaxStartupWait"} ) var ( diff --git a/param/parameters_struct.go b/param/parameters_struct.go index 8f9165ca1..b7b70845e 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -315,6 +315,7 @@ type Config struct { MacaroonsKeyFile string `mapstructure:"macaroonskeyfile" yaml:"MacaroonsKeyFile"` ManagerHost string `mapstructure:"managerhost" yaml:"ManagerHost"` ManagerPort int `mapstructure:"managerport" yaml:"ManagerPort"` + MaxStartupWait time.Duration `mapstructure:"maxstartupwait" yaml:"MaxStartupWait"` Mount string `mapstructure:"mount" yaml:"Mount"` Port int `mapstructure:"port" yaml:"Port"` RobotsTxtFile string `mapstructure:"robotstxtfile" yaml:"RobotsTxtFile"` @@ -619,6 +620,7 @@ type configWithType struct { MacaroonsKeyFile struct { Type string; Value string } ManagerHost struct { Type string; Value string } ManagerPort struct { Type string; Value int } + MaxStartupWait struct { Type string; Value time.Duration } Mount struct { Type string; Value string } Port struct { Type string; Value int } RobotsTxtFile struct { Type string; Value string } diff --git a/xrootd/launch.go b/xrootd/launch.go index 78de57ff6..b404c0e69 100644 --- a/xrootd/launch.go +++ b/xrootd/launch.go @@ -165,7 +165,7 @@ func LaunchDaemons(ctx context.Context, launchers []daemon.Launcher, egrp *errgr return } - ticker := time.NewTicker(10 * time.Second) + ticker := time.NewTicker(param.Xrootd_MaxStartupWait.GetDuration()) defer ticker.Stop() select { case <-ctx.Done(): @@ -180,8 +180,8 @@ func LaunchDaemons(ctx context.Context, launchers []daemon.Launcher, egrp *errgr portStartCallback(port) } case <-ticker.C: - log.Errorln("XRootD did not startup after 10s of waiting") - err = errors.New("XRootD did not startup after 10s of waiting") + log.Errorln("XRootD did not startup after", param.Xrootd_MaxStartupWait.GetDuration().String(), "of waiting") + err = errors.New("XRootD did not startup after " + param.Xrootd_MaxStartupWait.GetDuration().String() + " of waiting") return } From 62a2854a17b3573870004a1adeba438ceb00353a Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Wed, 27 Nov 2024 10:55:22 -0600 Subject: [PATCH 2/2] Create a tunable for default cache timeout Under valgrind, its trivial to make the default cache timeouts hit frequently - make this adjustable so stress tests can succeed. Additionally, add a few helper RPMs for debugging under valgrind. --- config/resources/defaults.yaml | 1 + docs/parameters.yaml | 10 ++++++++++ images/dev.Dockerfile | 1 + param/parameters.go | 1 + param/parameters_struct.go | 2 ++ xrootd/launch.go | 2 ++ 6 files changed, 17 insertions(+) diff --git a/config/resources/defaults.yaml b/config/resources/defaults.yaml index cd49f4a5b..2e5216874 100644 --- a/config/resources/defaults.yaml +++ b/config/resources/defaults.yaml @@ -60,6 +60,7 @@ Director: CachePresenceTTL: 1m CachePresenceCapacity: 10000 Cache: + DefaultCacheTimeout: "9.5s" Port: 8442 SelfTest: true SelfTestInterval: 15s diff --git a/docs/parameters.yaml b/docs/parameters.yaml index acc62cc8a..6e2b99a74 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -1249,6 +1249,16 @@ type: int default: 0 components: ["cache"] --- +name: Cache.DefaultCacheTimeout +description: |+ + The default value of the cache operation timeout if one is not specified by the client. + + Newer clients should always specify a timeout; changing this default is rarely necessary. +type: duration +default: 9.5s +hidden: true +components: ["cache"] +--- ############################ # Director-level configs # ############################ diff --git a/images/dev.Dockerfile b/images/dev.Dockerfile index d048f6efb..7ca7cf8e1 100644 --- a/images/dev.Dockerfile +++ b/images/dev.Dockerfile @@ -56,6 +56,7 @@ RUN yum install -y --enablerepo=osg-testing goreleaser npm xrootd-devel xrootd-s xrdcl-http jq procps docker make curl-devel java-17-openjdk-headless git cmake3 gcc-c++ openssl-devel sqlite-devel libcap-devel sssd-client \ xrootd-multiuser \ zlib-devel \ + vim valgrind gdb gtest-devel \ && yum clean all # The ADD command with a api.github.com URL in the next couple of sections diff --git a/param/parameters.go b/param/parameters.go index cade9bfde..73f624ead 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -378,6 +378,7 @@ var ( ) var ( + Cache_DefaultCacheTimeout = DurationParam{"Cache.DefaultCacheTimeout"} Cache_SelfTestInterval = DurationParam{"Cache.SelfTestInterval"} Client_SlowTransferRampupTime = DurationParam{"Client.SlowTransferRampupTime"} Client_SlowTransferWindow = DurationParam{"Client.SlowTransferWindow"} diff --git a/param/parameters_struct.go b/param/parameters_struct.go index b7b70845e..9b64899fb 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -29,6 +29,7 @@ type Config struct { Concurrency int `mapstructure:"concurrency" yaml:"Concurrency"` DataLocation string `mapstructure:"datalocation" yaml:"DataLocation"` DataLocations []string `mapstructure:"datalocations" yaml:"DataLocations"` + DefaultCacheTimeout time.Duration `mapstructure:"defaultcachetimeout" yaml:"DefaultCacheTimeout"` EnableLotman bool `mapstructure:"enablelotman" yaml:"EnableLotman"` EnableOIDC bool `mapstructure:"enableoidc" yaml:"EnableOIDC"` EnableVoms bool `mapstructure:"enablevoms" yaml:"EnableVoms"` @@ -334,6 +335,7 @@ type configWithType struct { Concurrency struct { Type string; Value int } DataLocation struct { Type string; Value string } DataLocations struct { Type string; Value []string } + DefaultCacheTimeout struct { Type string; Value time.Duration } EnableLotman struct { Type string; Value bool } EnableOIDC struct { Type string; Value bool } EnableVoms struct { Type string; Value bool } diff --git a/xrootd/launch.go b/xrootd/launch.go index b404c0e69..78416b438 100644 --- a/xrootd/launch.go +++ b/xrootd/launch.go @@ -91,6 +91,8 @@ func makeUnprivilegedXrootdLauncher(daemonName string, configPath string, isCach if confDir := os.Getenv("XRD_PLUGINCONFDIR"); confDir != "" { result.ExtraEnv = append(result.ExtraEnv, "XRD_PLUGINCONFDIR="+confDir) } + result.ExtraEnv = append(result.ExtraEnv, "XRD_PELICANFEDERATIONMETADATATIMEOUT="+param.Cache_DefaultCacheTimeout.GetDuration().String()) + result.ExtraEnv = append(result.ExtraEnv, "XRD_PELICANDEFAULTHEADERTIMEOUT="+param.Cache_DefaultCacheTimeout.GetDuration().String()) } return }