Skip to content

Commit

Permalink
Merge pull request #169 from aws-samples/smhp-mask-slurm-deemons
Browse files Browse the repository at this point in the history
SMHP: mask slurm daemons to prevent accidental broken cluster
  • Loading branch information
verdimrc authored Mar 1, 2024
2 parents 14c0008 + 197eee4 commit fdcc61e
Showing 1 changed file with 11 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,24 @@ main() {
echo "[INFO] START: Starting Slurm daemons"

if [[ $1 == "controller" ]]; then
echo "[INFO] This is a Controller node. Start slurm controller daemon..."
echo "[INFO] This is a Controller node. Start slurm controller daemon..."

systemctl enable --now slurmctld
fi
systemctl enable --now slurmctld

if [[ $1 == "compute" ]] || [[ $1 == "login" ]]; then
mv /etc/systemd/system/slurmd{,_DO_NOT_START_ON_CONTROLLER}.service \
|| { echo "Failed to mask slurmd, perhaps the AMI already masked it?" ; }
elif [[ $1 == "compute" ]] || [[ $1 == "login" ]]; then
echo "[INFO] Running on $1 node. Start slurm daemon..."

# Login nodes must still restart slurmd to fetch slurm.conf to /var/spool/slurmd/, however
# slurmd won't run because slurm.conf does not contain login nodes.
SLURMD_OPTIONS="--conf-server $CONTROLLER_IP_VALUES" envsubst < /etc/systemd/system/slurmd.service > slurmd.service
mv slurmd.service /etc/systemd/system/

systemctl daemon-reload
systemctl enable slurmd
systemctl restart slurmd
systemctl enable --now slurmd

mv /etc/systemd/system/slurmctld{,_DO_NOT_START_ON_CONTROLLER}.service \
|| { echo "Failed to mask slurmctldd, perhaps the AMI already masked it?" ; }
fi

echo "[INFO] Start Slurm Script completed"
Expand Down

0 comments on commit fdcc61e

Please sign in to comment.