-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcreate_cluster
executable file
·104 lines (90 loc) · 2.93 KB
/
create_cluster
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
# SYNOPSIS
# --------
# A script to quickly create a cluster of containers to manually test ldms.
import os
import re
import pwd
import sys
import json
import time
import argparse
import TADA
import logging
from subprocess import run, STDOUT, PIPE
from distutils.spawn import find_executable
from LDMS_Test import LDMSDCluster, LDMSDContainer, process_args, \
add_common_args, jprint, parse_ldms_ls
if __name__ != "__main__":
raise RuntimeError("This should not be impoarted as a module.")
class Debug(object):
pass
D = Debug()
logging.basicConfig(format = "%(asctime)s %(name)s %(levelname)s %(message)s",
level = logging.INFO)
log = logging.getLogger(__name__)
exec(open(os.getenv("PYTHONSTARTUP", "/dev/null")).read())
#### default values #### ---------------------------------------------
sbin_ldmsd = find_executable("ldmsd")
if sbin_ldmsd:
default_prefix, a, b = sbin_ldmsd.rsplit('/', 2)
else:
default_prefix = "/opt/ovis"
#### argument parsing #### -------------------------------------------
ap = argparse.ArgumentParser(description = "Create a cluster for manual test" )
add_common_args(ap)
ap.add_argument("--num-compute", type=int, default=1,
help="Number of compute nodes")
args = ap.parse_args()
process_args(args)
#### config variables #### ------------------------------
USER = args.user
PREFIX = args.prefix
DB = args.data_root
CLUSTERNAME = args.clustername
#### spec #### -------------------------------------------------------
spec = {
"name" : CLUSTERNAME,
"description" : "{}'s test_agg cluster".format(USER),
"type" : "NA",
"templates" : { # generic template can apply to any object by "!extends"
"compute-node": {
"daemons": [
{"name": "sshd", "type": "sshd"},
{"name": "munged", "type": "munged"},
{"name": "slurmd", "type": "slurmd"},
],
},
}, # templates
"nodes" : [
{
"hostname": "headnode",
"daemons": [
{"name": "sshd", "type": "sshd"},
{"name": "munged", "type": "munged"},
{"name": "slurmctld", "type": "slurmctld"},
],
}
] + [
{
"hostname": "node-{}".format(i),
"!extends": "compute-node",
} for i in range(1, args.num_compute +1)
], # nodes
"cap_add": [ "SYS_PTRACE", "SYS_ADMIN" ],
"image": args.image,
"env" : { "FOO": "BAR" },
"mounts": [
"{}:/opt/ovis:rw".format(PREFIX),
"{}:/db:rw".format(DB),
] + args.mount
}
log.info("-- Get or create the cluster --")
cluster = LDMSDCluster.get(spec["name"], create = True, spec = spec)
cluster.make_ssh_id()
cluster.start_daemons()
cluster.make_known_hosts()
log.info("DONE")
log.info("cluster: {}".format(cluster.name))
for cont in cluster.containers:
log.info(" {} (on host {})".format(cont.name, cont.host))