From 64093c39049d97ed0243b1b45ef12e548dd112f9 Mon Sep 17 00:00:00 2001 From: Jeff Ohrstrom Date: Thu, 30 Nov 2023 16:14:37 -0500 Subject: [PATCH] fix slurm docs with a note about --export defaults --- .../installation/resource-manager/slurm.rst | 48 +++++++++++++------ .../files/submit-yml/basic-bc-options.rst | 10 ++-- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/source/installation/resource-manager/slurm.rst b/source/installation/resource-manager/slurm.rst index 3bfcfaa3..fcb9cf44 100644 --- a/source/installation/resource-manager/slurm.rst +++ b/source/installation/resource-manager/slurm.rst @@ -6,26 +6,38 @@ Slurm A YAML cluster configuration file for a Slurm resource manager on an HPC cluster looks like: +.. warning:: + Open OnDemand's Slurm support defaults to issuing CLI commands with + the ``--export`` flag set to ``NONE``, when Slurms default is ``ALL``. + This can cause issues with jobs that require ``srun``. + + Work arounds are currently to ``export SLURM_EXPORT_ENV=ALL`` + in a :ref:`script_wrapper ` before any job scripts run. + + Alternatively, you can use ``copy_enviornment`` below with the caveat + that the PUNs environment is very different from regular shell sessions. + .. code-block:: yaml :emphasize-lines: 8- # /etc/ood/config/clusters.d/my_cluster.yml --- v2: - metadata: - title: "My Cluster" - login: - host: "my_cluster.my_center.edu" - job: - adapter: "slurm" - cluster: "my_cluster" - bin: "/path/to/slurm/bin" - conf: "/path/to/slurm.conf" - # bin_overrides: - # sbatch: "/usr/local/bin/sbatch" - # squeue: "" - # scontrol: "" - # scancel: "" + metadata: + title: "My Cluster" + login: + host: "my_cluster.my_center.edu" + job: + adapter: "slurm" + cluster: "my_cluster" + bin: "/path/to/slurm/bin" + conf: "/path/to/slurm.conf" + # bin_overrides: + # sbatch: "/usr/local/bin/sbatch" + # squeue: "" + # scontrol: "" + # scancel: "" + copy_enviornment: false with the following configuration options: @@ -36,7 +48,7 @@ cluster .. warning:: Using the ``cluster`` option is discouraged. This is because maintenance - outages on the Slurm DB will propogate to Open OnDemand. Instead sites + outages on the Slurm database will propogate to Open OnDemand. Instead sites should use different ``conf`` files for each cluster to limit maintenance outages. bin The path to the Slurm client installation binaries. @@ -54,6 +66,12 @@ bin_overrides - `scontrol` - `scancel` +copy_enviornment + Copies the enviornment of the PUN when issuing CLI commands. Default behaviour + for Open OnDemand is to use ``--export=NONE`` flag. Setting this to true will + cause Open OnDemand to issue CLI commands with ``--export=ALL``. Though this may + cause issues as the PUN's environment is very different than a regular shell session. + .. note:: If you do not have a multi-cluster Slurm setup you can remove the ``cluster: diff --git a/source/reference/files/submit-yml/basic-bc-options.rst b/source/reference/files/submit-yml/basic-bc-options.rst index e2b31251..6cfc4bdf 100644 --- a/source/reference/files/submit-yml/basic-bc-options.rst +++ b/source/reference/files/submit-yml/basic-bc-options.rst @@ -119,20 +119,22 @@ Basic Batch Connect Options header: 'echo "all done at $(date)"' +.. _script_wrapper: + .. describe:: script_wrapper (String, "%s") - wrap the script ('%s' being the script content) with commands before - and after + Wrap the script (``%s`` being the script content) with commands before + and after. Default - the script has no wrapper + The script has no wrapper. .. code-block:: yaml script_wrapper: "%s" Example - load a module before the script and echo a statement after it + Load a module before the script and echo a statement after it. .. code-block:: yaml