From f7955ab4e9d9a61526647a9551f77859fa7ab29f Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 21 Jul 2023 13:48:05 +0100 Subject: [PATCH 01/10] Set up reverse proxying + more NFS storage --- nfs/nfs.yaml | 2 +- nfs/pvc.yaml | 2 +- slurm-cluster-chart/files/ood-cluster-config.yml | 7 ++++++- .../files/{ood_portal.yaml => ood_portal.yml} | 7 +++---- slurm-cluster-chart/templates/ood-portal-configmap.yaml | 2 +- slurm-cluster-chart/templates/slurmd-deployment.yaml | 1 + slurm-cluster-chart/templates/slurmd-service.yaml | 3 +++ 7 files changed, 16 insertions(+), 8 deletions(-) rename slurm-cluster-chart/files/{ood_portal.yaml => ood_portal.yml} (99%) diff --git a/nfs/nfs.yaml b/nfs/nfs.yaml index 742fa34..7e1d048 100644 --- a/nfs/nfs.yaml +++ b/nfs/nfs.yaml @@ -10,7 +10,7 @@ spec: - ReadWriteMany resources: requests: - storage: 1Gi + storage: 5Gi --- apiVersion: nfs.rook.io/v1alpha1 kind: NFSServer diff --git a/nfs/pvc.yaml b/nfs/pvc.yaml index 7f0a3d7..42a251c 100644 --- a/nfs/pvc.yaml +++ b/nfs/pvc.yaml @@ -8,4 +8,4 @@ spec: - ReadWriteMany resources: requests: - storage: 10Gi + storage: 5Gi diff --git a/slurm-cluster-chart/files/ood-cluster-config.yml b/slurm-cluster-chart/files/ood-cluster-config.yml index cc0ab76..9331a79 100644 --- a/slurm-cluster-chart/files/ood-cluster-config.yml +++ b/slurm-cluster-chart/files/ood-cluster-config.yml @@ -8,4 +8,9 @@ v2: cluster: "linux" adapter: "slurm" bin: "/usr/bin" - conf: "/etc/slurm/slurm.conf" \ No newline at end of file + conf: "/etc/slurm/slurm.conf" + batch_connect: + basic: + script_wrapper: | + %s + \ No newline at end of file diff --git a/slurm-cluster-chart/files/ood_portal.yaml b/slurm-cluster-chart/files/ood_portal.yml similarity index 99% rename from slurm-cluster-chart/files/ood_portal.yaml rename to slurm-cluster-chart/files/ood_portal.yml index 9be3295..6740ed1 100644 --- a/slurm-cluster-chart/files/ood_portal.yaml +++ b/slurm-cluster-chart/files/ood_portal.yml @@ -149,21 +149,21 @@ ssl: # host_regex: '[\w.-]+\.example\.com' # Default: '[^/]+' (allow reverse proxying to all hosts, this allows external # hosts as well) -#host_regex: '[^/]+' +host_regex: 'slurmd-[0-1]' # Sub-uri used to reverse proxy to backend web server running on node that # knows the full URI path # Example: # node_uri: '/node' # Default: null (disable this feature) -#node_uri: null +node_uri: "/node" # Sub-uri used to reverse proxy to backend web server running on node that # ONLY uses *relative* URI paths # Example: # rnode_uri: '/rnode' # Default: null (disable this feature) -#rnode_uri: null +rnode_uri: "/rnode" # # Per-user NGINX Passenger apps @@ -239,7 +239,6 @@ ssl: # Default: null (display error to user if mapping fails) #register_root: null -host_regex: 'head' auth: - 'AuthType Basic' - 'AuthName "private"' diff --git a/slurm-cluster-chart/templates/ood-portal-configmap.yaml b/slurm-cluster-chart/templates/ood-portal-configmap.yaml index 6770d82..ec15af9 100644 --- a/slurm-cluster-chart/templates/ood-portal-configmap.yaml +++ b/slurm-cluster-chart/templates/ood-portal-configmap.yaml @@ -4,5 +4,5 @@ metadata: name: ood-portal-configmap data: ood_portal.yml: | - {{- .Files.Get "files/ood_portal.yaml" | nindent 4 -}} + {{- .Files.Get "files/ood_portal.yml" | nindent 4 -}} \ No newline at end of file diff --git a/slurm-cluster-chart/templates/slurmd-deployment.yaml b/slurm-cluster-chart/templates/slurmd-deployment.yaml index e973e3b..5497901 100644 --- a/slurm-cluster-chart/templates/slurmd-deployment.yaml +++ b/slurm-cluster-chart/templates/slurmd-deployment.yaml @@ -35,6 +35,7 @@ spec: name: slurmd ports: - containerPort: 6818 + - containerPort: 8888 resources: {} volumeMounts: - mountPath: /etc/slurm/slurm.conf diff --git a/slurm-cluster-chart/templates/slurmd-service.yaml b/slurm-cluster-chart/templates/slurmd-service.yaml index a182ffd..9306d93 100644 --- a/slurm-cluster-chart/templates/slurmd-service.yaml +++ b/slurm-cluster-chart/templates/slurmd-service.yaml @@ -11,6 +11,9 @@ spec: - name: "6818" port: 6818 targetPort: 6818 + - name: "jupyter" + port: 8888 + targetPort: 8888 selector: app.kubernetes.io/name: slurm app.kubernetes.io/component: slurmd From 96721d355c26dbeaca917006023cdeca113f07e5 Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 21 Jul 2023 16:51:58 +0100 Subject: [PATCH 02/10] Tweaks for easier jupyter deployments --- .../files/ood-cluster-config.yml | 16 -------------- .../files/slurm-cluster-config.yml | 21 +++++++++++++++++++ .../templates/cluster-config-configmap.yaml | 4 ++-- .../templates/login-deployment.yaml | 4 ++-- 4 files changed, 25 insertions(+), 20 deletions(-) delete mode 100644 slurm-cluster-chart/files/ood-cluster-config.yml create mode 100644 slurm-cluster-chart/files/slurm-cluster-config.yml diff --git a/slurm-cluster-chart/files/ood-cluster-config.yml b/slurm-cluster-chart/files/ood-cluster-config.yml deleted file mode 100644 index 9331a79..0000000 --- a/slurm-cluster-chart/files/ood-cluster-config.yml +++ /dev/null @@ -1,16 +0,0 @@ ---- -v2: - metadata: - title: "Slurm Cluster" - login: - host: "localhost" - job: - cluster: "linux" - adapter: "slurm" - bin: "/usr/bin" - conf: "/etc/slurm/slurm.conf" - batch_connect: - basic: - script_wrapper: | - %s - \ No newline at end of file diff --git a/slurm-cluster-chart/files/slurm-cluster-config.yml b/slurm-cluster-chart/files/slurm-cluster-config.yml new file mode 100644 index 0000000..5cd10c0 --- /dev/null +++ b/slurm-cluster-chart/files/slurm-cluster-config.yml @@ -0,0 +1,21 @@ +--- +v2: + metadata: + title: "slurm-cluster" + login: + host: "localhost" + bc_queue: all + job: + cluster: "linux" + adapter: "slurm" + bin: "/usr/bin" + conf: "/etc/slurm/slurm.conf" + batch_connect: + template: "basic" + script: + native: + - "-N" + - "<%= bc_num_slots.blank? ? 1 : bc_num_slots.to_i %>" + - "-C" + - "c12" + \ No newline at end of file diff --git a/slurm-cluster-chart/templates/cluster-config-configmap.yaml b/slurm-cluster-chart/templates/cluster-config-configmap.yaml index 914a456..0b3fd68 100644 --- a/slurm-cluster-chart/templates/cluster-config-configmap.yaml +++ b/slurm-cluster-chart/templates/cluster-config-configmap.yaml @@ -3,5 +3,5 @@ kind: ConfigMap metadata: name: cluster-config data: - ood-cluster-config.yml: | - {{- .Files.Get "files/ood-cluster-config.yml" | nindent 4 -}} \ No newline at end of file + slurm-cluster.yml: | + {{- .Files.Get "files/slurm-cluster-config.yml" | nindent 4 -}} \ No newline at end of file diff --git a/slurm-cluster-chart/templates/login-deployment.yaml b/slurm-cluster-chart/templates/login-deployment.yaml index 7650d7a..73be219 100644 --- a/slurm-cluster-chart/templates/login-deployment.yaml +++ b/slurm-cluster-chart/templates/login-deployment.yaml @@ -58,8 +58,8 @@ spec: mountPath: /etc/httpd/conf/httpd.conf subPath: httpd.conf - name: cluster-config - mountPath: /etc/ood/config/clusters.d/ood-cluster-config.yml - subPath: ood-cluster-config.yml + mountPath: /etc/ood/config/clusters.d/slurm-cluster.yml + subPath: slurm-cluster.yml - name: host-keys mountPath: /tempmounts/etc/ssh resources: {} From 8bb7782b91f9c5fd6a40498bc2e2c9248fe39371 Mon Sep 17 00:00:00 2001 From: Will Date: Tue, 8 Aug 2023 11:57:58 +0100 Subject: [PATCH 03/10] Opened ports for possible websocket connections --- slurm-cluster-chart/templates/slurmd-deployment.yaml | 1 + slurm-cluster-chart/templates/slurmd-service.yaml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/slurm-cluster-chart/templates/slurmd-deployment.yaml b/slurm-cluster-chart/templates/slurmd-deployment.yaml index 5497901..bea3ee0 100644 --- a/slurm-cluster-chart/templates/slurmd-deployment.yaml +++ b/slurm-cluster-chart/templates/slurmd-deployment.yaml @@ -36,6 +36,7 @@ spec: ports: - containerPort: 6818 - containerPort: 8888 + - containerPort: 8889 resources: {} volumeMounts: - mountPath: /etc/slurm/slurm.conf diff --git a/slurm-cluster-chart/templates/slurmd-service.yaml b/slurm-cluster-chart/templates/slurmd-service.yaml index 9306d93..1f8cab9 100644 --- a/slurm-cluster-chart/templates/slurmd-service.yaml +++ b/slurm-cluster-chart/templates/slurmd-service.yaml @@ -14,6 +14,9 @@ spec: - name: "jupyter" port: 8888 targetPort: 8888 + - name: "websockify" + port: 8889 + targetPort: 8889 selector: app.kubernetes.io/name: slurm app.kubernetes.io/component: slurmd From 42ae9786065bc28a009b9032630bf09fa475d833 Mon Sep 17 00:00:00 2001 From: Will Date: Wed, 9 Aug 2023 16:03:45 +0100 Subject: [PATCH 04/10] Added Jupyter app to login node --- docker-entrypoint.sh | 8 +++++++- slurm-cluster-chart/templates/slurmd-deployment.yaml | 1 - slurm-cluster-chart/templates/slurmd-service.yaml | 3 --- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 64084e7..0333c5b 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -119,7 +119,7 @@ then echo "" >> /home/rocky/.ssh/authorized_keys #Adding newline to avoid breaking authorized_keys file cat /home/rocky/.ssh/id_rsa.pub >> /home/rocky/.ssh/authorized_keys - echo "---> Starting Apache Server" + echo "---> Setting up Apache Server" mkdir --parents /etc/ood/config/apps/shell env > /etc/ood/config/apps/shell/env @@ -129,6 +129,12 @@ then mkdir --parents /opt/rh/httpd24/root/etc/httpd/ /usr/bin/htdbm -cb /opt/rh/httpd24/root/etc/httpd/.htpasswd.dbm rocky $ROCKY_OOD_PASS + + echo "---> Setting up Jupyter App" + mkdir --parents /var/www/ood/apps/sys/jupyter + git clone https://github.com/stackhpc/OOD-containerised-jupyter.git /var/www/ood/apps/sys/jupyter + + echo "---> Starting Apache server" /usr/sbin/httpd -k start -X -e debug fi diff --git a/slurm-cluster-chart/templates/slurmd-deployment.yaml b/slurm-cluster-chart/templates/slurmd-deployment.yaml index bea3ee0..5497901 100644 --- a/slurm-cluster-chart/templates/slurmd-deployment.yaml +++ b/slurm-cluster-chart/templates/slurmd-deployment.yaml @@ -36,7 +36,6 @@ spec: ports: - containerPort: 6818 - containerPort: 8888 - - containerPort: 8889 resources: {} volumeMounts: - mountPath: /etc/slurm/slurm.conf diff --git a/slurm-cluster-chart/templates/slurmd-service.yaml b/slurm-cluster-chart/templates/slurmd-service.yaml index 1f8cab9..9306d93 100644 --- a/slurm-cluster-chart/templates/slurmd-service.yaml +++ b/slurm-cluster-chart/templates/slurmd-service.yaml @@ -14,9 +14,6 @@ spec: - name: "jupyter" port: 8888 targetPort: 8888 - - name: "websockify" - port: 8889 - targetPort: 8889 selector: app.kubernetes.io/name: slurm app.kubernetes.io/component: slurmd From c8899cc5ef7662ebcf0381a37cd5bdc566a1cab5 Mon Sep 17 00:00:00 2001 From: Will Date: Wed, 9 Aug 2023 16:06:08 +0100 Subject: [PATCH 05/10] Updated tag --- slurm-cluster-chart/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slurm-cluster-chart/values.yaml b/slurm-cluster-chart/values.yaml index 1bef86e..22926f1 100644 --- a/slurm-cluster-chart/values.yaml +++ b/slurm-cluster-chart/values.yaml @@ -1,4 +1,4 @@ -sdcImage: ghcr.io/stackhpc/slurm-docker-cluster:3daa29f +sdcImage: ghcr.io/stackhpc/slurm-docker-cluster:42ae978 replicas: slurmd: 2 From c0b2f698dc8e59280b5e082a1de7f4b3f3d884df Mon Sep 17 00:00:00 2001 From: Will Date: Wed, 9 Aug 2023 16:47:18 +0100 Subject: [PATCH 06/10] Jupyter app repo now templated --- docker-entrypoint.sh | 2 +- slurm-cluster-chart/templates/login-deployment.yaml | 4 ++++ slurm-cluster-chart/values.yaml | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 0333c5b..beb6382 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -132,7 +132,7 @@ then echo "---> Setting up Jupyter App" mkdir --parents /var/www/ood/apps/sys/jupyter - git clone https://github.com/stackhpc/OOD-containerised-jupyter.git /var/www/ood/apps/sys/jupyter + git clone $JUPYTER_REPO --branch $JUPYTER_TAG /var/www/ood/apps/sys/jupyter echo "---> Starting Apache server" /usr/sbin/httpd -k start -X -e debug diff --git a/slurm-cluster-chart/templates/login-deployment.yaml b/slurm-cluster-chart/templates/login-deployment.yaml index 73be219..08c066b 100644 --- a/slurm-cluster-chart/templates/login-deployment.yaml +++ b/slurm-cluster-chart/templates/login-deployment.yaml @@ -32,6 +32,10 @@ spec: secretKeyRef: name: htdbm-secret key: password + - name: JUPYTER_REPO + value: {{ .Values.jupyterApp.repoURL }} + - name: JUPYTER_TAG + value: {{ .Values.jupyterApp.repoTag }} ports: - containerPort: 22 - containerPort: 80 diff --git a/slurm-cluster-chart/values.yaml b/slurm-cluster-chart/values.yaml index 22926f1..42e7270 100644 --- a/slurm-cluster-chart/values.yaml +++ b/slurm-cluster-chart/values.yaml @@ -21,4 +21,7 @@ configmaps: secrets: databaseAuth: database-auth-secret mungeKey: munge-key-secret - \ No newline at end of file + +jupyterApp: + repoURL: "https://github.com/stackhpc/OOD-containerised-jupyter.git" + repoTag: "v0.1" \ No newline at end of file From a66c8634dff46c8bb8d9ba196ffc01e9ddb89408 Mon Sep 17 00:00:00 2001 From: Will Date: Wed, 9 Aug 2023 16:51:42 +0100 Subject: [PATCH 07/10] Updated tag --- slurm-cluster-chart/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slurm-cluster-chart/values.yaml b/slurm-cluster-chart/values.yaml index 42e7270..43ed295 100644 --- a/slurm-cluster-chart/values.yaml +++ b/slurm-cluster-chart/values.yaml @@ -1,4 +1,4 @@ -sdcImage: ghcr.io/stackhpc/slurm-docker-cluster:42ae978 +sdcImage: ghcr.io/stackhpc/slurm-docker-cluster:c0b2f69 replicas: slurmd: 2 From 1b752279222c5a1a2f7583b8d3d6b97e569288fd Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 18 Aug 2023 09:43:04 +0100 Subject: [PATCH 08/10] Updated values, fixed hostport and removed reverse proxy host regex checking --- slurm-cluster-chart/files/ood_portal.yml | 2 +- slurm-cluster-chart/templates/slurmd.yaml | 3 ++- slurm-cluster-chart/values.yaml | 5 +---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/slurm-cluster-chart/files/ood_portal.yml b/slurm-cluster-chart/files/ood_portal.yml index d9625db..b4ce7aa 100644 --- a/slurm-cluster-chart/files/ood_portal.yml +++ b/slurm-cluster-chart/files/ood_portal.yml @@ -150,7 +150,7 @@ ssl: # host_regex: '[\w.-]+\.example\.com' # Default: '[^/]+' (allow reverse proxying to all hosts, this allows external # hosts as well) -host_regex: 'slurmd-[0-1]' +#host_regex: 'slurmd-[0-1]' # Sub-uri used to reverse proxy to backend web server running on node that # knows the full URI path diff --git a/slurm-cluster-chart/templates/slurmd.yaml b/slurm-cluster-chart/templates/slurmd.yaml index dbf0c26..fee6018 100644 --- a/slurm-cluster-chart/templates/slurmd.yaml +++ b/slurm-cluster-chart/templates/slurmd.yaml @@ -36,7 +36,8 @@ spec: name: slurmd ports: - containerPort: 8888 - - hostPort: 6818 + - containerPort: 6818 + hostPort: 6818 resources: {} volumeMounts: - mountPath: /etc/slurm/ diff --git a/slurm-cluster-chart/values.yaml b/slurm-cluster-chart/values.yaml index e612407..cc5cf92 100644 --- a/slurm-cluster-chart/values.yaml +++ b/slurm-cluster-chart/values.yaml @@ -1,4 +1,4 @@ -sdcImage: ghcr.io/stackhpc/slurm-docker-cluster:c0b2f69 #OUTDATED, DON'T USE THIS COMMIT! +slurmImage: ghcr.io/stackhpc/slurm-docker-cluster:6a1af7b login: # Deployment resource name @@ -73,14 +73,11 @@ sshPublicKey: # Secret resource names secrets: mungeKey: munge-key-secret -<<<<<<< HEAD jupyterApp: repoURL: "https://github.com/stackhpc/OOD-containerised-jupyter.git" repoTag: "v0.1" -======= openOnDemand: #Password for default Open OnDemand user 'rocky' password: password ->>>>>>> main From ac8fe57af30cac94883a64dfb95a5deba3b0c36a Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 18 Aug 2023 12:53:51 +0100 Subject: [PATCH 09/10] Updated to expose pod name to jupyter app --- image/docker-entrypoint.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/image/docker-entrypoint.sh b/image/docker-entrypoint.sh index 38ffacf..4532fe0 100755 --- a/image/docker-entrypoint.sh +++ b/image/docker-entrypoint.sh @@ -66,6 +66,9 @@ then ulimit -n 131072 ulimit -a + # Exposes for https://github.com/stackhpc/bc_containerised_jupyter/tree/v0.1.1 + echo "POD_NAME=$POD_NAME" >> /etc/environment + start_munge echo "---> Waiting for slurmctld to become active before starting slurmd..." From 3f40a217730434d0a85abd43186d80daa4f6d7ef Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 18 Aug 2023 13:23:41 +0100 Subject: [PATCH 10/10] Changed to forked repo and updated image tag --- slurm-cluster-chart/values.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/slurm-cluster-chart/values.yaml b/slurm-cluster-chart/values.yaml index cc5cf92..64d5654 100644 --- a/slurm-cluster-chart/values.yaml +++ b/slurm-cluster-chart/values.yaml @@ -1,4 +1,4 @@ -slurmImage: ghcr.io/stackhpc/slurm-docker-cluster:6a1af7b +slurmImage: ghcr.io/stackhpc/slurm-docker-cluster:ac8fe57 login: # Deployment resource name @@ -75,8 +75,8 @@ secrets: mungeKey: munge-key-secret jupyterApp: - repoURL: "https://github.com/stackhpc/OOD-containerised-jupyter.git" - repoTag: "v0.1" + repoURL: "https://github.com/stackhpc/bc_containerised_jupyter.git" + repoTag: "v0.1.1" openOnDemand: #Password for default Open OnDemand user 'rocky'