forked from giovtorres/slurm-docker-cluster
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
192 lines (164 loc) · 7.07 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
FROM rockylinux:8
LABEL org.opencontainers.image.source="https://github.com/giovtorres/slurm-docker-cluster" \
org.opencontainers.image.title="slurm-docker-cluster" \
org.opencontainers.image.description="Slurm Docker cluster on Rocky Linux 8" \
org.label-schema.docker.cmd="docker-compose up -d" \
maintainer="Giovanni Torres"
ARG SLURM_TAG=slurm-21-08-6-1
ARG GOSU_VERSION=1.11
# the bamboo user
ARG BAMBOO_USER=bamboo
ARG BAMBOO_USER_ID=996
ARG BAMBOO_GROUP=scitech
ARG BAMBOO_GROUP_ID=996
# number of slurm workers to update slurm config
ARG SLURM_NUM_NODES=3
# memory assigned in MB
ARG SLURM_NODE_MEMORY=2000
#### ENV Variables For Packages ####
ENV PEGASUS_VERSION "pegasus-5.0.8"
ENV PEGASUS_VERSION_NUM "5.0.8"
RUN <<EOT
# Create user ASAP so teh uid/gid do not get used by other installed packages.
set -x
groupadd -r --gid=$BAMBOO_GROUP_ID $BAMBOO_GROUP
useradd -m -g $BAMBOO_GROUP --password '\$1\$INpOHe38\$RghIh80Eg41A4L/xsdsbxI/' --uid=$BAMBOO_USER_ID $BAMBOO_USER
EOT
RUN set -ex \
&& dnf makecache \
&& dnf -y update \
&& dnf -y install dnf-plugins-core \
&& dnf config-manager --set-enabled powertools \
&& dnf -y install \
wget \
bzip2 \
perl \
gcc \
gcc-c++\
git \
gnupg \
make \
munge \
munge-devel \
python3-devel \
python3-pip \
python3 \
mariadb-server \
mariadb-devel \
psmisc \
bash-completion \
vim-enhanced \
http-parser-devel \
json-c-devel \
rsync \
&& dnf clean all \
&& rm -rf /var/cache/dnf
RUN alternatives --set python /usr/bin/python3
RUN pip3 install Cython nose
RUN set -ex \
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" \
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
&& rm -rf "${GNUPGHOME}" /usr/local/bin/gosu.asc \
&& chmod +x /usr/local/bin/gosu \
&& gosu nobody true
RUN set -x \
&& git clone -b ${SLURM_TAG} --single-branch --depth=1 https://github.com/SchedMD/slurm.git \
&& pushd slurm \
&& ./configure --enable-debug --prefix=/usr --sysconfdir=/etc/slurm \
--with-mysql_config=/usr/bin --libdir=/usr/lib64 \
&& make install \
&& install -D -m644 etc/cgroup.conf.example /etc/slurm/cgroup.conf.example \
&& install -D -m644 etc/slurm.conf.example /etc/slurm/slurm.conf.example \
&& install -D -m644 etc/slurmdbd.conf.example /etc/slurm/slurmdbd.conf.example \
&& install -D -m644 contribs/slurm_completion_help/slurm_completion.sh /etc/profile.d/slurm_completion.sh \
&& popd \
&& rm -rf slurm \
&& groupadd -r --gid=990 slurm \
&& useradd -r -g slurm --uid=990 slurm \
&& mkdir /etc/sysconfig/slurm \
/var/spool/slurmd \
/var/run/slurmd \
/var/run/slurmdbd \
/var/lib/slurmd \
/var/log/slurm \
/data \
&& touch /var/lib/slurmd/node_state \
/var/lib/slurmd/front_end_state \
/var/lib/slurmd/job_state \
/var/lib/slurmd/resv_state \
/var/lib/slurmd/trigger_state \
/var/lib/slurmd/assoc_mgr_state \
/var/lib/slurmd/assoc_usage \
/var/lib/slurmd/qos_usage \
/var/lib/slurmd/fed_mgr_state \
&& chown -R slurm:slurm /var/*/slurm* \
&& /sbin/create-munge-key
COPY slurm.conf /etc/slurm/slurm.conf
COPY slurmdbd.conf /etc/slurm/slurmdbd.conf
RUN set -x \
&& chown slurm:slurm /etc/slurm/slurmdbd.conf \
&& chmod 600 /etc/slurm/slurmdbd.conf
#### Update slurm.conf to increase memory available on nodes ####
RUN perl -pi.bak -e "s/^NodeName=c\[1-2\] RealMemory=1000 State=UNKNOWN/NodeName=c\[1-$SLURM_NUM_NODES\] RealMemory=$SLURM_NODE_MEMORY State=UNKNOWN/" /etc/slurm/slurm.conf \
&& perl -pi -e "s/^PartitionName=normal Default=yes Nodes=c\[1-2\]/PartitionName=normal Default=yes Nodes=c\[1-$SLURM_NUM_NODES\]/" /etc/slurm/slurm.conf
RUN <<EOT
#### Installing and configuring SSH server ####
dnf -y install openssh-server openssh-clients
perl -pi -e 's/^#RSAAuthentication yes/RSAAuthentication yes/' /etc/ssh/sshd_config
perl -pi -e 's/^#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config
perl -pi -e 's/^PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
perl -pi -e 's/^#UsePAM no/UsePAM no/' /etc/ssh/sshd_config
perl -pi -e 's/^UsePAM yes/#UsePAM yes/' /etc/ssh/sshd_config
/usr/bin/ssh-keygen -A
#### Install NFS client and create the mount dir ####
dnf install -y nfs-utils
mkdir /nfs
#### Install libraries required for Condor BLAHP to work ####
dnf install -y libtool-ltdl python3
#### Setup Bamboo User ####
chown -R $BAMBOO_USER:$BAMBOO_GROUP /data
mkdir -p /nfs/$BAMBOO_USER
chown -R $BAMBOO_USER:$BAMBOO_GROUP /nfs/$BAMBOO_USER
#### Install Pegasus from tarball ####
curl -o /opt/${PEGASUS_VERSION}.tar.gz http://download.pegasus.isi.edu/pegasus/${PEGASUS_VERSION_NUM}/pegasus-binary-${PEGASUS_VERSION_NUM}-x86_64_rhel_8.tar.gz
tar -xzvf /opt/${PEGASUS_VERSION}.tar.gz -C /opt
rm -f /opt/${PEGASUS_VERSION}.tar.gz
chmod 755 -R /opt/${PEGASUS_VERSION}
(cd /opt && ln -s ${PEGASUS_VERSION} pegasus)
#### Install globus-url-copy and CA certificates ####
dnf -y install 'dnf-command(config-manager)'
dnf -y install https://downloads.globus.org/globus-connect-server/stable/installers/repo/rpm/globus-repo-latest.noarch.rpm
dnf -y install globus-gass-copy-progs
curl -o /tmp/certs.tgz https://download.pegasus.isi.edu/containers/certificates.tar.gz
mkdir -p /etc/grid-security
tar -zxvf /tmp/certs.tgz -C /etc/grid-security/
rm -f /tmp/certs.tgz
#### Install Montage from tarball ####
dnf install -y libnsl2-devel
mkdir -p /opt/software/montage
curl -o /opt/montage.tar.gz http://montage.ipac.caltech.edu/download/Montage_v6.0.tar.gz
tar -xzvf /opt/montage.tar.gz -C /opt/software/montage
rm -f /opt/montage.tar.gz
(cd /opt/software/montage/Montage && make)
(cd /opt/software/montage && mv Montage 6.0 && ln -s 6.0 current)
chmod 755 -R /opt/software/montage/current/bin/
dnf install -y freetype
EOT
ENV PATH "/opt/${PEGASUS_VERSION}/bin:$PATH"
ENV PYTHONPATH "/opt/${PEGASUS_VERSION}/lib64/python3.6/site-packages:/opt/${PEGASUS_VERSION}/lib64/pegasus/externals/python:$PYTHONPATH"
ENV PERL5LIB "/opt/${PEGASUS_VERSION}/lib64/pegasus/perl:$PERL5LIB"
#### Configure SSH for Bamboo User ####
USER $BAMBOO_USER
RUN mkdir /home/$BAMBOO_USER/.ssh
COPY bamboo_slurm_id_rsa.pub /home/$BAMBOO_USER/.ssh/
RUN cat /home/$BAMBOO_USER/.ssh/bamboo_slurm_id_rsa.pub > /home/$BAMBOO_USER/.ssh/authorized_keys
COPY workflow_id_rsa.pub /home/$BAMBOO_USER/.ssh/
RUN cat /home/$BAMBOO_USER/.ssh/workflow_id_rsa.pub >> /home/$BAMBOO_USER/.ssh/authorized_keys
RUN chmod 700 /home/$BAMBOO_USER/.ssh/authorized_keys
USER root
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
CMD ["slurmdbd"]