-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathslurm_sshare_collector.py
executable file
·61 lines (55 loc) · 1.98 KB
/
slurm_sshare_collector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python3
"""
slurm_sshare_diamond.py
A script that uses PySlurm to get the slurm sshare statistics.
"""
import sys,os,json,subprocess
import time
from os import path
import yaml
prefix = os.path.normpath(
os.path.join(os.path.abspath(os.path.dirname(__file__)))
)
external = os.path.join(prefix, 'external')
sys.path = [prefix, external] + sys.path
from prometheus_client.core import GaugeMetricFamily, REGISTRY
from prometheus_client.registry import Collector
from prometheus_client import start_http_server
class SlurmSshareCollector(Collector):
def __init__(self):
pass
def collect(self):
try:
# sshare command we will use to get the data
proc = subprocess.Popen([
'sshare',
'-ahP', '--format=User,Account,RawShares,NormShares,RawUsage,NormUsage,Fairshare'
], stdout=subprocess.PIPE, universal_newlines=True)
except:
return
else:
sshare = GaugeMetricFamily('sshare', 'Stats from sshare', labels=['account','user','field'])
for line in proc.stdout:
(User, Account, RawShares, NormShares, RawUsage, NormUsage, Fairshare) = line.strip().split('|')
Account=Account.replace(" ","")
User=User.replace(" ","")
# Need to deal with users that are set to parent for their Shares.
if User == "" and Account:
RawSharesAccount=RawShares
if RawShares == 'parent':
RawShares=RawSharesAccount
if NormShares == "":
NormShares=0
if User and Account:
sshare.add_metric([Account,User,'rawshares'],RawShares)
sshare.add_metric([Account,User,'normshares'],NormShares)
sshare.add_metric([Account,User,'rawusage'],RawUsage)
sshare.add_metric([Account,User,'normusage'],NormUsage)
sshare.add_metric([Account,User,'fairshare'],Fairshare)
yield sshare
if __name__ == "__main__":
start_http_server(9003)
REGISTRY.register(SlurmSshareCollector())
while True:
# period between collection
time.sleep(30)