Skip to content

Commit

Permalink
[slurm] Add slurm plugin
Browse files Browse the repository at this point in the history
Slurm is a workload manager in the HPC space, this is a start on this,
and there may be further additions in the future.

Signed-off-by: Arif Ali <[email protected]>
  • Loading branch information
arif-ali authored and TurboTurtle committed Aug 9, 2023
1 parent 46794f4 commit 7545f7d
Showing 1 changed file with 128 additions and 0 deletions.
128 changes: 128 additions & 0 deletions sos/report/plugins/slurm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# This file is part of the sos project: https://github.com/sosreport/sos
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions of
# version 2 of the GNU General Public License.
#
# See the LICENSE file in the source distribution for further information.

from sos.report.plugins import Plugin, UbuntuPlugin, RedHatPlugin
from sos.utilities import is_executable


class Slurm(Plugin, UbuntuPlugin, RedHatPlugin):

short_desc = "Slurm Workload Manager"

plugin_name = 'slurm'
profiles = ('hpc',)
packages = (
# Ubuntu
'slurm-wlm',
'slurmd',
'slurmdbd',
'slurmctld',
# EL
'slurm',
'slurm-slurmctld',
'slurm-slurmd',
'slurm-slurmdbd',
)
services = (
'slurmd',
'slurmdbd',
'slurmctld',
)

def setup(self):
""" Slurm Workload Manager
"""

self.add_copy_spec('/etc/slurm/*.conf')

if is_executable('sinfo'):
self.add_cmd_output([
'sinfo --all --list-reasons --long',
'sinfo --all --long',
])

if is_executable('squeue'):
self.add_cmd_output([
'squeue --all --long',
])

scontrol_cmds = [
'aliases',
'assoc_mgr',
'bbstat',
'burstBuffer',
'config',
'daemons',
'dwstat',
'federation',
'frontend',
'job',
'licenses',
'node',
'partition',
'reservation',
'slurmd',
'step',
'topology',
]

if is_executable('scontrol'):
self.add_cmd_output(
[f"scontrol show {i}" for i in scontrol_cmds]
)

config_file = '/etc/slurm/slurm.conf'

slurmd_log_file = '/var/log/slurmd.log'
slurmctld_log_file = '/var/log/slurmctld.log'

try:
with open(config_file, 'r') as cf:
for line in cf.read().splitlines():
if not line:
continue
words = line.split('=')
if words[0].strip() == 'SlurmdLogFile':
slurmd_log_file = words[1].strip()
if words[0].strip() == 'SlurmctldLogFile':
slurmctld_log_file = words[1].strip()
except IOError as error:
self._log_error('Could not open conf file %s: %s' %
(config_file, error))

if not self.get_option("all_logs"):
self.add_copy_spec([
slurmd_log_file,
slurmctld_log_file,
])
else:
self.add_copy_spec([
f"{slurmd_log_file}*",
f"{slurmctld_log_file}*",
])

def postproc(self):
conf_paths = "/etc/slurm"

slurm_keys = [
'AccountingStoragePass',
'JobCompPass',
]
slurm_keys_regex = r"(^\s*(%s)\s*=\s*)(.*)" % "|".join(slurm_keys)
slurmdbd_key_regex = r'(^\s*(StoragePass)\s*=\s*)(.*)'

sub = r'\1********'

self.do_file_sub(
f"{conf_paths}/slurm.conf",
slurm_keys_regex, sub
)
self.do_file_sub(
f"{conf_paths}/slurmdbd.conf",
slurmdbd_key_regex, sub
)

0 comments on commit 7545f7d

Please sign in to comment.