From 570bc79285d1841de95d5b73ad72a47138b3dddb Mon Sep 17 00:00:00 2001 From: Diogo Castro Date: Fri, 25 Aug 2023 13:11:22 +0200 Subject: [PATCH] HPC service integration Exposes the shared CEPH FS share to allow submission and retrieval of files --- swan-cern/files/swan_hpc_config.py | 72 +++++++++++++++++++++++++ swan-cern/templates/config.yaml | 1 + swan-cern/templates/secrets.yaml | 11 ++++ swan-cern/templates/storageclasses.yaml | 38 +++++++++++++ swan-cern/values.yaml | 20 +++++++ 5 files changed, 142 insertions(+) create mode 100644 swan-cern/files/swan_hpc_config.py create mode 100644 swan-cern/templates/storageclasses.yaml diff --git a/swan-cern/files/swan_hpc_config.py b/swan-cern/files/swan_hpc_config.py new file mode 100644 index 00000000..b9a7a73d --- /dev/null +++ b/swan-cern/files/swan_hpc_config.py @@ -0,0 +1,72 @@ +from kubernetes_asyncio.client.models import ( + V1Volume, + V1VolumeMount, + V1PersistentVolumeClaimVolumeSource +) + + +class SwanHPCPodHookHandler(SwanSparkPodHookHandler): + + async def get_swan_user_pod(self): + await super().get_swan_user_pod() + + if self._hpc_enabled(): + self._init_hpc_volumes() + + return self.pod + + def _init_hpc_volumes(self): + """ + Mount the CEPHFS share of HPC in the user container + """ + self.pod.spec.volumes.append( + V1Volume( + name='hpc-volume', + persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( + claim_name='hpc-volume-pvc' + ) + ) + ) + + notebook_container = self._get_pod_container('notebook') + mount_path = get_config('custom.hpc.mountPath', '/hpc') + notebook_container.volume_mounts.append( + V1VolumeMount( + name='hpc-volume', + mount_path=mount_path + ) + ) + + def _hpc_enabled(self): + """ + Check if the HPC cluster access should be enabled for this user. + This is True is they belong to a special egroup and the deployment + is active + """ + + user_roles = self.spawner.user_roles + hpc_enabled = get_config('custom.hpc.enabled', False) + hpc_role = get_config('custom.hpc.role', None) + + # TODO make this a form option? + if hpc_enabled and hpc_role in user_roles: + return True + + return False + + +def spark_modify_pod_hook(spawner, pod): + """ + :param spawner: Swan Kubernetes Spawner + :type spawner: swanspawner.SwanKubeSpawner + :param pod: default pod definition set by jupyterhub + :type pod: V1Pod + + :returns: dynamically customized pod specification for user session + :rtype: V1Pod + """ + spark_pod_hook_handler = SwanHPCPodHookHandler(spawner, pod) + return spark_pod_hook_handler.get_swan_user_pod() + + +c.SwanKubeSpawner.modify_pod_hook = spark_modify_pod_hook diff --git a/swan-cern/templates/config.yaml b/swan-cern/templates/config.yaml index 376c4195..24422a58 100644 --- a/swan-cern/templates/config.yaml +++ b/swan-cern/templates/config.yaml @@ -7,4 +7,5 @@ data: options_form_config.json: {{ .Values.optionsform | toJson }} {{ (.Files.Glob "files/swan_config_cern.py").AsConfig | indent 2 }} {{ (.Files.Glob "files/swan_spark_config.py").AsConfig | indent 2 }} +{{ (.Files.Glob "files/swan_hpc_config.py").AsConfig | indent 2 }} {{ (.Files.Glob "files/private/side_container_tokens_perm.sh").AsConfig | indent 2 }} diff --git a/swan-cern/templates/secrets.yaml b/swan-cern/templates/secrets.yaml index f8ab2e6c..40df156e 100644 --- a/swan-cern/templates/secrets.yaml +++ b/swan-cern/templates/secrets.yaml @@ -8,3 +8,14 @@ data: eos.cred: {{ (required "helm --set swanCern.secrets.eos.cred=$(base64 -w0 )" .Values.swanCern.secrets.eos.cred) }} hadoop.cred: {{ (required "helm --set swanCern.secrets.hadoop.cred=$(base64 -w0 )" .Values.swanCern.secrets.hadoop.cred) }} sparkk8s.cred: {{ (required "helm --set swanCern.secrets.sparkk8s.cred=$(base64 -w0 )" .Values.swanCern.secrets.sparkk8s.cred) }} +--- +{{ if .Values.hpc.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: hpc-volume-secret + namespace: {{ .Release.Namespace }} +data: + userKey: {{ $.Values.hpc.user.key }} + userID: {{ $.Values.hpc.user.id }} +{{ end }} \ No newline at end of file diff --git a/swan-cern/templates/storageclasses.yaml b/swan-cern/templates/storageclasses.yaml new file mode 100644 index 00000000..9215acc5 --- /dev/null +++ b/swan-cern/templates/storageclasses.yaml @@ -0,0 +1,38 @@ +{{ if .Values.hpc.enabled }} +apiVersion: v1 +kind: PersistentVolume +metadata: + name: hpc-volume + namespace: {{ $.Release.Namespace }} +spec: + accessModes: + - ReadWriteMany + capacity: + storage: 1Gi + csi: + driver: cephfs.csi.ceph.com + volumeHandle: hpc-volume + nodeStageSecretRef: + name: hpc-volume-secret + namespace: {{ $.Release.Namespace }} + nodePublishSecretRef: + name: hpc-volume-secret + namespace: {{ $.Release.Namespace }} + volumeAttributes: + monitors: {{ $.Values.hpc.monitors }} + rootPath: {{ $.Values.hpc.rootPath }} + provisionVolume: "false" +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: hpc-volume-pvc + namespace: {{ $.Release.Namespace }} +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 1Gi + volumeName: hpc-volume +{{- end }} diff --git a/swan-cern/values.yaml b/swan-cern/values.yaml index 36361f5c..e38e38d2 100644 --- a/swan-cern/values.yaml +++ b/swan-cern/values.yaml @@ -1,3 +1,16 @@ +hpc: &hpc + # When enabling and configuring hpc, ensure that + # jupyterhub.custom.hpc references that config + # like below + enabled: false + # monitors: + # rootPath: + # mountPath: + # role: + # user: + # id: + # key: + swan: cvmfs: deployDaemonSet: &cvmfsDeployDS true @@ -92,6 +105,9 @@ swan: - name: swan-jh-cern mountPath: /usr/local/etc/jupyterhub/jupyterhub_config.d/3_swan_spark_config.py subPath: swan_spark_config.py + - name: swan-jh-cern + mountPath: /usr/local/etc/jupyterhub/jupyterhub_config.d/4_swan_hpc_config.py + subPath: swan_hpc_config.py - name: swan-secrets mountPath: /srv/jupyterhub/private/eos.cred subPath: eos.cred @@ -124,6 +140,8 @@ swan: path: swan_config_cern.py - key: swan_spark_config.py path: swan_spark_config.py + - key: swan_hpc_config.py + path: swan_hpc_config.py - name: swan-secrets secret: secretName: swan-cern @@ -191,6 +209,8 @@ swan: scopes: ['read:metrics'] services: [prometheus-service-monitor] custom: + hpc: + << : *hpc cull: # 4 hours timeout: 14400