From 3088a15571c54903b889b81b9bfaeb3caf55505b Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Mon, 31 Jul 2023 12:23:00 +0200 Subject: [PATCH 01/16] Update to z2jh chart v2.0.0 CHANGED: rbac.enabled must as of version 2.0.0 be configured via rbac.create and .serviceAccount.create. CHANGED: hub.fsGid must as of version 2.0.0 be configured via hub.podSecurityContext.fsGroup. rbac.create is set to true by default in the upstream chart, so no need to set it here. A service account with name "hub" is also created automatically. --- swan/Chart.lock | 6 +++--- swan/Chart.yaml | 2 +- swan/values.yaml | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/swan/Chart.lock b/swan/Chart.lock index e759c125..6864259e 100644 --- a/swan/Chart.lock +++ b/swan/Chart.lock @@ -1,7 +1,7 @@ dependencies: - name: jupyterhub repository: https://jupyterhub.github.io/helm-chart/ - version: 1.2.0 + version: 2.0.0 - name: fusex repository: https://registry.cern.ch/chartrepo/eos version: 0.1.3 @@ -14,5 +14,5 @@ dependencies: - name: cvmfs-csi repository: http://registry.cern.ch/chartrepo/cern version: 0.1.0 -digest: sha256:33fccf759bbb9ad7bec12055009bad06339834c4e6abbf5aa54207103d07bd71 -generated: "2023-10-10T17:24:39.307933+02:00" +digest: sha256:c2e327e14eb54317eae923781209a3cda509d2b22cb3f6be910d45531fb2a8d4 +generated: "2023-10-16T11:22:35.427535188+02:00" diff --git a/swan/Chart.yaml b/swan/Chart.yaml index f14f503e..4b979022 100644 --- a/swan/Chart.yaml +++ b/swan/Chart.yaml @@ -13,7 +13,7 @@ icon: https://swan.docs.cern.ch/images/logos/logo_swan_letters.png # dependencies: - name: jupyterhub - version: 1.2.0 + version: 2.0.0 repository: https://jupyterhub.github.io/helm-chart/ - name: fusex diff --git a/swan/values.yaml b/swan/values.yaml index 7a909981..d7cf2419 100644 --- a/swan/values.yaml +++ b/swan/values.yaml @@ -118,7 +118,8 @@ jupyterhub: # placeholder for hub secret token secretToken: hub: - fsGid: 0 + podSecurityContext: + fsGroup: 0 containerSecurityContext: runAsUser: 0 runAsGroup: 0 @@ -206,8 +207,6 @@ jupyterhub: # placeholder for hub cookieSecret # when empty, it generates a new randomly cookieSecret: - rbac: - enabled: true scheduling: userScheduler: enabled: false From 6dc4c50d0fa01d334066c7fd4e302cd928258826 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Mon, 31 Jul 2023 12:29:05 +0200 Subject: [PATCH 02/16] Adapt spawner-related hooks to async KubeSpawner These changes mainly imply the transformation of hooks into coroutines and the awaiting of calls to the kubernetes API to read / write objects. The post_stop_hooks can't be transformed into coroutines because they are never awaited from the upstream spawner code, and they need to be coroutines because they invoke the async k8s API. Therefore, they have been removed from here and placed in an override of the stop method in the SwanKubeSpawner, which is a coroutine. --- swan-cern/files/swan_config_cern.py | 123 +++++++++++++------------- swan-cern/files/swan_spark_config.py | 125 +++++++++++++-------------- swan/files/swan_config.py | 48 +++++----- 3 files changed, 148 insertions(+), 148 deletions(-) diff --git a/swan-cern/files/swan_config_cern.py b/swan-cern/files/swan_config_cern.py index 66fe2e2f..b1599fb8 100644 --- a/swan-cern/files/swan_config_cern.py +++ b/swan-cern/files/swan_config_cern.py @@ -1,7 +1,25 @@ import os, subprocess -from kubernetes import client -from kubernetes.client.rest import ApiException +import asyncio + +from kubernetes_asyncio.client.models import ( + V1EmptyDirVolumeSource, + V1EnvVar, + V1EnvVarSource, + V1ConfigMapVolumeSource, + V1Container, + V1KeyToPath, + V1ObjectFieldSelector, + V1ObjectMeta, + V1PodSecurityContext, + V1Secret, + V1SecretVolumeSource, + V1SELinuxOptions, + V1Volume, + V1VolumeMount, +) + +from kubernetes_asyncio.client.rest import ApiException """ Class handling KubeSpawner.modify_pod_hook(spawner,pod) call @@ -10,7 +28,7 @@ class SwanPodHookHandlerProd(SwanPodHookHandler): - def get_swan_user_pod(self): + async def get_swan_user_pod(self): super().get_swan_user_pod() # ATTENTION Spark requires this side container, so we need to create it!! @@ -19,14 +37,14 @@ def get_swan_user_pod(self): # not self.spawner.local_home: # get eos token - eos_secret_name = self._init_eos_secret() + eos_secret_name = await self._init_eos_secret() # init user containers (notebook and side-container) self._init_eos_containers(eos_secret_name) return self.pod - def _init_eos_secret(self): + async def _init_eos_secret(self): username = self.spawner.user.name user_uid = self.spawner.user_uid eos_secret_name ='eos-tokens-%s' % username @@ -39,34 +57,29 @@ def _init_eos_secret(self): except Exception as e: raise ValueError("Could not create required user credential") - # ITHADOOP-819 - Ports need to be opened using service creation, and later assigning allocated service nodeport to a pod # Create V1Secret with eos token - try: - secret_data = client.V1Secret() - - secret_meta = client.V1ObjectMeta() - secret_meta.name = eos_secret_name - secret_meta.namespace = swan_container_namespace - secret_meta.labels = { - "swan_user": username - } - secret_data.metadata = secret_meta - secret_data.data = {} - secret_data.data['krb5cc'] = eos_token_base64 + secret_data = V1Secret() + + secret_meta = V1ObjectMeta() + secret_meta.name = eos_secret_name + secret_meta.namespace = swan_container_namespace + secret_meta.labels = { + "swan_user": username + } + secret_data.metadata = secret_meta + secret_data.data = {} + secret_data.data['krb5cc'] = eos_token_base64 + try: + # eos-tokens secret is cleaned when user session ends, so try creating it + await self.spawner.api.create_namespaced_secret(swan_container_namespace, secret_data) + except ApiException: + # A secret with the same name exists, probably a remnant of a wrongly-terminated session, then replace it try: - self.spawner.api.read_namespaced_secret(eos_secret_name, swan_container_namespace) - exists = True - except ApiException: - exists = False - - if exists: - self.spawner.api.replace_namespaced_secret(eos_secret_name, swan_container_namespace, secret_data) - else: - self.spawner.api.create_namespaced_secret(swan_container_namespace, secret_data) - except ApiException as e: - raise Exception("Could not create required eos secret: %s\n" % e) + await self.spawner.api.replace_namespaced_secret(eos_secret_name, swan_container_namespace, secret_data) + except ApiException as e: + raise Exception("Could not create required eos secret: %s\n" % e) return eos_secret_name @@ -82,15 +95,15 @@ def _init_eos_containers(self, eos_secret_name): # Shared directory between notebook and side-container for tokens with correct privileges self.pod.spec.volumes.append( - client.V1Volume( + V1Volume( name='shared-pod-volume', - empty_dir=client.V1EmptyDirVolumeSource( + empty_dir=V1EmptyDirVolumeSource( medium='Memory' ) ) ) side_container_volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name='shared-pod-volume', mount_path='/srv/notebook' ) @@ -98,7 +111,7 @@ def _init_eos_containers(self, eos_secret_name): # Mount shared tokens volume that contains tokens with correct permissions notebook_container.volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name='shared-pod-volume', mount_path='/srv/notebook' ) @@ -107,15 +120,15 @@ def _init_eos_containers(self, eos_secret_name): # pod volume to mount generated eos tokens and # side-container volume mount with generated tokens self.pod.spec.volumes.append( - client.V1Volume( + V1Volume( name=eos_secret_name, - secret=client.V1SecretVolumeSource( + secret=V1SecretVolumeSource( secret_name='eos-tokens-%s' % username, ) ) ) side_container_volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name=eos_secret_name, mount_path='/srv/side-container/eos' ) @@ -124,7 +137,7 @@ def _init_eos_containers(self, eos_secret_name): # define eos kerberos credentials path for Jupyter server in notebook container notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='KRB5CCNAME', value='/srv/notebook/tokens/krb5cc' ), @@ -133,7 +146,7 @@ def _init_eos_containers(self, eos_secret_name): # define eos kerberos credentials path for notebook and terminal processes in notebook container notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='KRB5CCNAME_NB_TERM', value='/srv/notebook/tokens/writable/krb5cc_nb_term' ), @@ -142,10 +155,10 @@ def _init_eos_containers(self, eos_secret_name): # Set server hostname of the pod running jupyterhub notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='SERVER_HOSTNAME', - value_from=client.V1EnvVarSource( - field_ref=client.V1ObjectFieldSelector( + value_from=V1EnvVarSource( + field_ref=V1ObjectFieldSelector( field_path='spec.nodeName' ) ) @@ -155,12 +168,12 @@ def _init_eos_containers(self, eos_secret_name): # append as first (it will be first to spawn) side container which currently: # - refreshes the kerberos token and adjust permissions for the user self.pod.spec.volumes.append( - client.V1Volume( + V1Volume( name='side-container-scripts', - config_map=client.V1ConfigMapVolumeSource( + config_map=V1ConfigMapVolumeSource( name='swan-scripts-cern', items=[ - client.V1KeyToPath( + V1KeyToPath( key='side_container_tokens_perm.sh', path='side_container_tokens_perm.sh', ) @@ -170,7 +183,7 @@ def _init_eos_containers(self, eos_secret_name): ) ) side_container_volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name='side-container-scripts', mount_path='/srv/side-container/side_container_tokens_perm.sh', sub_path='side_container_tokens_perm.sh', @@ -179,7 +192,7 @@ def _init_eos_containers(self, eos_secret_name): env = self.spawner.get_env() pod_spec_containers.append( - client.V1Container( + V1Container( name='side-container', image='cern/cc7-base:20181210', command=['/srv/side-container/side_container_tokens_perm.sh'], @@ -203,18 +216,18 @@ def _init_eos_containers(self, eos_secret_name): # This is defined in the configuration to allow overring iindependently # of which config file is loaded first # c.SwanKubeSpawner.modify_pod_hook = swan_pod_hook -def swan_pod_hook_prod(spawner, pod): +async def swan_pod_hook_prod(spawner, pod): """ :param spawner: Swan Kubernetes Spawner :type spawner: swanspawner.SwanKubeSpawner :param pod: default pod definition set by jupyterhub - :type pod: client.V1Pod + :type pod: V1Pod :returns: dynamically customized pod specification for user session - :rtype: client.V1Pod + :rtype: V1Pod """ pod_hook_handler = SwanPodHookHandlerProd(spawner, pod) - return pod_hook_handler.get_swan_user_pod() + return await pod_hook_handler.get_swan_user_pod() swan_cull_period = get_config('custom.cull.every', 600) @@ -223,15 +236,5 @@ def swan_pod_hook_prod(spawner, pod): c.SwanKubeSpawner.modify_pod_hook = swan_pod_hook_prod -def swan_cern_post_stop_hook(spawner): - # Delete Kubernetes Secret storing eos kerberos ticket of the user - username = spawner.user.name - eos_secret_name = f"eos-tokens-{username}" - swan_container_namespace = os.environ.get('POD_NAMESPACE', 'default') - spawner.log.info('Deleting secret %s', eos_secret_name) - spawner.api.delete_namespaced_secret(eos_secret_name, swan_container_namespace) - -c.SwanKubeSpawner.post_stop_hook = swan_cern_post_stop_hook - # Required for swan systemuser.sh c.SwanKubeSpawner.cmd = None diff --git a/swan-cern/files/swan_spark_config.py b/swan-cern/files/swan_spark_config.py index caf13ed7..4d29a52b 100644 --- a/swan-cern/files/swan_spark_config.py +++ b/swan-cern/files/swan_spark_config.py @@ -1,7 +1,21 @@ import os, subprocess, time, pwd, jwt -from kubernetes import client -from kubernetes.client.rest import ApiException +from kubernetes_asyncio.client.models import ( + V1EnvVar, + V1EnvVarSource, + V1ContainerPort, + V1ObjectMeta, + V1Secret, + V1SecretKeySelector, + V1SecretVolumeSource, + V1Service, + V1ServicePort, + V1ServiceSpec, + V1Volume, + V1VolumeMount, +) + +from kubernetes_asyncio.client.rest import ApiException import swanspawner @@ -12,22 +26,22 @@ class SwanSparkPodHookHandler(SwanPodHookHandlerProd): - def get_swan_user_pod(self): - super().get_swan_user_pod() + async def get_swan_user_pod(self): + await super().get_swan_user_pod() # get hadoop token hadoop_secret_name = None if self._spark_enabled(): # cern customisation for spark clusters - hadoop_secret_name = self._init_hadoop_secret() - self._init_spark(self.pod.metadata.labels) + hadoop_secret_name = await self._init_hadoop_secret() + await self._init_spark(self.pod.metadata.labels) # init user containers (notebook and side-container) self._init_spark_containers(hadoop_secret_name) return self.pod - def _init_hadoop_secret(self): + async def _init_hadoop_secret(self): cluster = self.spawner.user_options[self.spawner.spark_cluster_field] @@ -71,9 +85,9 @@ def _init_hadoop_secret(self): # Create V1Secret with eos token try: - secret_data = client.V1Secret() + secret_data = V1Secret() - secret_meta = client.V1ObjectMeta() + secret_meta = V1ObjectMeta() secret_meta.name = hadoop_secret_name secret_meta.namespace = swan_container_namespace secret_data.metadata = secret_meta @@ -82,15 +96,15 @@ def _init_hadoop_secret(self): secret_data.data['webhdfs.toks'] = webhdfs_token_base64 try: - self.spawner.api.read_namespaced_secret(hadoop_secret_name, swan_container_namespace) + await self.spawner.api.read_namespaced_secret(hadoop_secret_name, swan_container_namespace) exists = True except ApiException: exists = False if exists: - self.spawner.api.replace_namespaced_secret(hadoop_secret_name, swan_container_namespace, secret_data) + await self.spawner.api.replace_namespaced_secret(hadoop_secret_name, swan_container_namespace, secret_data) else: - self.spawner.api.create_namespaced_secret(swan_container_namespace, secret_data) + await self.spawner.api.create_namespaced_secret(swan_container_namespace, secret_data) except ApiException as e: raise Exception("Could not create required hadoop secret: %s\n" % e) @@ -112,15 +126,15 @@ def _init_spark_containers(self, hadoop_secret_name): # side-container volume mount with generated tokens self.pod.spec.volumes.append( # V1Secret for tokens without adjusted permissions - client.V1Volume( + V1Volume( name=hadoop_secret_name, - secret=client.V1SecretVolumeSource( + secret=V1SecretVolumeSource( secret_name=hadoop_secret_name, ) ) ) side_container.volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name=hadoop_secret_name, mount_path='/srv/side-container/hadoop' ) @@ -129,31 +143,31 @@ def _init_spark_containers(self, hadoop_secret_name): # instruct sparkconnector to fetch delegation tokens from service notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='SWAN_FETCH_HADOOP_TOKENS', value='true' ), ) notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='SWAN_HADOOP_TOKEN_GENERATOR_URL', value='http://hadoop-token-generator:80' ), ) notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='KUBECONFIG', value='/srv/notebook/tokens/k8s-user.config' ), ) notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='WEBHDFS_TOKEN', - value_from=client.V1EnvVarSource( - secret_key_ref=client.V1SecretKeySelector( + value_from=V1EnvVarSource( + secret_key_ref=V1SecretKeySelector( key='webhdfs.toks', name=hadoop_secret_name ) @@ -190,7 +204,7 @@ def _spark_enabled(self): return True return False - def _init_spark(self, pod_labels): + async def _init_spark(self, pod_labels): """ Set cern related configuration for spark cluster and open ports """ @@ -209,21 +223,21 @@ def _init_spark(self, pod_labels): notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='SPARK_CLUSTER_NAME', value=cluster ) ) notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='SPARK_USER', value=username ) ) notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='MAX_MEMORY', value=max_mem ) @@ -238,7 +252,7 @@ def _init_spark(self, pod_labels): notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='SPARK_AUTH_REQUIRED', value=auth_required ) @@ -253,7 +267,7 @@ def _init_spark(self, pod_labels): notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='SPARK_CONFIG_SCRIPT', value=spark_conf_script ) @@ -268,18 +282,18 @@ def _init_spark(self, pod_labels): spark_ports_per_pod = 18 for port_id in range(1, spark_ports_per_pod + 1): service_template_ports.append( - client.V1ServicePort( + V1ServicePort( name="spark-port-" + str(port_id), port=port_id ) ) - service_template = client.V1Service( + service_template = V1Service( api_version="v1", kind="Service", - metadata=client.V1ObjectMeta( + metadata=V1ObjectMeta( name=spark_ports_service ), - spec=client.V1ServiceSpec( + spec=V1ServiceSpec( selector=pod_labels, # attach this service to the pod with label {spark_pod_label} ports=service_template_ports, type="NodePort" @@ -289,18 +303,21 @@ def _init_spark(self, pod_labels): # Create V1Service which allocates random ports for spark in k8s cluster try: # use existing if possible - self.spawner.api.delete_namespaced_service(spark_ports_service, swan_container_namespace) - service = self.spawner.api.read_namespaced_service(spark_ports_service, swan_container_namespace) + await self.spawner.api.delete_namespaced_service(spark_ports_service, swan_container_namespace) + service = await self.spawner.api.read_namespaced_service(spark_ports_service, swan_container_namespace) except ApiException: # not existing, create - service = self.spawner.api.create_namespaced_service(swan_container_namespace, service_template) + try: + service = await self.spawner.api.create_namespaced_service(swan_container_namespace, service_template) + except ApiException as e: + raise Exception("Could not create service that allocates random ports for Spark in k8s cluster: %s\n" % e) # Replace the service with allocated nodeports to map nodeport:targetport # and set these ports for the notebook container for port_id in range(len(service.spec.ports)): name = service.spec.ports[port_id].name node_port = service.spec.ports[port_id].node_port - service.spec.ports[port_id] = client.V1ServicePort( + service.spec.ports[port_id] = V1ServicePort( name=name, node_port=node_port, port=node_port, @@ -313,19 +330,20 @@ def _init_spark(self, pod_labels): # Open proper ports in the notebook container to map nodeport:targetport notebook_container.ports = self._add_or_replace_by_name( notebook_container.ports, - client.V1ContainerPort( + V1ContainerPort( name=name, container_port=node_port, # this is needed - hadoop-yarn webapp crashes on ApplicationProxy UI host_port=node_port, ) ) - self.spawner.api.replace_namespaced_service(spark_ports_service, swan_container_namespace, service) + + await self.spawner.api.replace_namespaced_service(spark_ports_service, swan_container_namespace, service) # Add ports env for spark notebook_container.env = self._add_or_replace_by_name( notebook_container.env, - client.V1EnvVar( + V1EnvVar( name='SPARK_PORTS', value=','.join(spark_ports_env) ) @@ -339,40 +357,13 @@ def spark_modify_pod_hook(spawner, pod): :param spawner: Swan Kubernetes Spawner :type spawner: swanspawner.SwanKubeSpawner :param pod: default pod definition set by jupyterhub - :type pod: client.V1Pod + :type pod: V1Pod :returns: dynamically customized pod specification for user session - :rtype: client.V1Pod + :rtype: V1Pod """ spark_pod_hook_handler = SwanSparkPodHookHandler(spawner, pod) return spark_pod_hook_handler.get_swan_user_pod() -def spark_post_stop_hook(spawner): - """ - :param spawner: Swan Kubernetes Spawner - :type spawner: swanspawner.SwanKubeSpawner - """ - - # Call the parent hook defined in the swan_config_cern.py config file - # This function is assumed to be available as a global, because the config files - # are concatenated before execution by the chart. - swan_cern_post_stop_hook(spawner) - - spark_cluster = spawner.user_options[spawner.spark_cluster_field] - if spark_cluster and spark_cluster != 'none': - username = spawner.user.name - swan_container_namespace = os.environ.get('POD_NAMESPACE', 'default') - - # Delete NodePort service opening ports for the user spark processes - spark_ports_service = f"spark-ports-{username}" - spawner.log.info('Deleting service %s', spark_ports_service) - spawner.api.delete_namespaced_service(spark_ports_service, swan_container_namespace) - - # Delete Kubernetes Secret with hadoop delegation tokens - hadoop_secret_name = f"hadoop-tokens-{username}" - spawner.log.info('Deleting secret %s', hadoop_secret_name) - spawner.api.delete_namespaced_secret(hadoop_secret_name, swan_container_namespace) - c.SwanKubeSpawner.modify_pod_hook = spark_modify_pod_hook -c.SwanKubeSpawner.post_stop_hook = spark_post_stop_hook diff --git a/swan/files/swan_config.py b/swan/files/swan_config.py index a1e32523..badd7f8d 100644 --- a/swan/files/swan_config.py +++ b/swan/files/swan_config.py @@ -1,7 +1,13 @@ import logging, os, subprocess -from kubernetes import client -from kubernetes.client.rest import ApiException +from kubernetes_asyncio.client.models import ( + V1EmptyDirVolumeSource, + V1EnvVar, + V1HostPathVolumeSource, + V1PersistentVolumeClaimVolumeSource, + V1Volume, + V1VolumeMount, +) """ Class handling KubeSpawner.modify_pod_hook(spawner,pod) call @@ -11,7 +17,7 @@ class SwanPodHookHandler: def __init__(self, spawner, pod): """ :type spawner: swanspawner.SwanKubeSpawner - :type pod: client.V1Pod + :type pod: V1Pod """ self.spawner = spawner self.pod = pod @@ -37,7 +43,7 @@ def get_swan_user_pod(self): def _get_pod_container(self, container_name): """ :returns: required container from pod spec - :rtype: client.V1Container + :rtype: V1Container """ for container in self.pod.spec.containers: if container.name == container_name: @@ -67,10 +73,10 @@ def swan_pod_hook(spawner, pod): :param spawner: Swan Kubernetes Spawner :type spawner: swanspawner.SwanKubeSpawner :param pod: default pod definition set by jupyterhub - :type pod: client.V1Pod + :type pod: V1Pod :returns: dynamically customized pod specification for user session - :rtype: client.V1Pod + :rtype: V1Pod """ pod_hook_handler = SwanPodHookHandler(spawner, pod) return pod_hook_handler.get_swan_user_pod() @@ -140,15 +146,15 @@ def swan_pod_hook(spawner, pod): # add /dev/shm (for pyTorch and others) c.SwanKubeSpawner.volumes.append( - client.V1Volume( + V1Volume( name='devshm', - empty_dir=client.V1EmptyDirVolumeSource( + empty_dir=V1EmptyDirVolumeSource( medium='Memory' ) ) ) c.SwanKubeSpawner.volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name='devshm', mount_path='/dev/shm', ) @@ -159,16 +165,16 @@ def swan_pod_hook(spawner, pod): # Access via bind-mount from the host logging.info("EOS access via DaemonSet") c.SwanKubeSpawner.volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name='eos', mount_path='/eos', mount_propagation='HostToContainer' ), ) c.SwanKubeSpawner.volumes.append( - client.V1Volume( + V1Volume( name='eos', - host_path=client.V1HostPathVolumeSource( + host_path=V1HostPathVolumeSource( path='/var/eos' ) ), @@ -178,16 +184,16 @@ def swan_pod_hook(spawner, pod): # Access via CSI driver (still a bind-mount in practical terms) logging.info("EOS access via CSI driver") c.SwanKubeSpawner.volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name='eos', mount_path='/eos', mount_propagation='HostToContainer' ), ) c.SwanKubeSpawner.volumes.append( - client.V1Volume( + V1Volume( name='eos', - host_path=client.V1HostPathVolumeSource( + host_path=V1HostPathVolumeSource( path='/var/eos' ) ), @@ -202,15 +208,15 @@ def swan_pod_hook(spawner, pod): # Access via bind-mount from the host logging.info("CVMFS access via DaemonSet") c.SwanKubeSpawner.volumes.append( - client.V1Volume( + V1Volume( name='cvmfs', - host_path=client.V1HostPathVolumeSource( + host_path=V1HostPathVolumeSource( path='/var/cvmfs' ) ) ) c.SwanKubeSpawner.volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name='cvmfs', mount_path='/cvmfs', mount_propagation='HostToContainer' @@ -224,15 +230,15 @@ def swan_pod_hook(spawner, pod): for cvmfs_repo_path in cvmfs_repos: cvmfs_repo_id = cvmfs_repo_path['mount'].replace('.', '-') c.SwanKubeSpawner.volumes.append( - client.V1Volume( + V1Volume( name='cvmfs-'+cvmfs_repo_id, - persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource( + persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( claim_name='cvmfs-'+cvmfs_repo_id+'-pvc' ) ) ) c.SwanKubeSpawner.volume_mounts.append( - client.V1VolumeMount( + V1VolumeMount( name='cvmfs-'+cvmfs_repo_id, mount_path='/cvmfs/'+cvmfs_repo_path['mount'], read_only=True From e43f7c1038d25fb02ffcbbb6cb4ba20948eb07a9 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Wed, 16 Aug 2023 16:23:30 +0200 Subject: [PATCH 03/16] Update to z2jh chart v3.0.1 --- swan/Chart.lock | 6 +++--- swan/Chart.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/swan/Chart.lock b/swan/Chart.lock index 6864259e..fbdc46e0 100644 --- a/swan/Chart.lock +++ b/swan/Chart.lock @@ -1,7 +1,7 @@ dependencies: - name: jupyterhub repository: https://jupyterhub.github.io/helm-chart/ - version: 2.0.0 + version: 3.0.1 - name: fusex repository: https://registry.cern.ch/chartrepo/eos version: 0.1.3 @@ -14,5 +14,5 @@ dependencies: - name: cvmfs-csi repository: http://registry.cern.ch/chartrepo/cern version: 0.1.0 -digest: sha256:c2e327e14eb54317eae923781209a3cda509d2b22cb3f6be910d45531fb2a8d4 -generated: "2023-10-16T11:22:35.427535188+02:00" +digest: sha256:3a6eacfc264ce15e6e1d72032a6787dbb9733d8a16b3733aa829571820033f98 +generated: "2023-10-16T11:23:57.747939863+02:00" diff --git a/swan/Chart.yaml b/swan/Chart.yaml index 4b979022..c51d2f4a 100644 --- a/swan/Chart.yaml +++ b/swan/Chart.yaml @@ -13,7 +13,7 @@ icon: https://swan.docs.cern.ch/images/logos/logo_swan_letters.png # dependencies: - name: jupyterhub - version: 2.0.0 + version: 3.0.1 repository: https://jupyterhub.github.io/helm-chart/ - name: fusex From a1eca447e3e9d638ff3ca357ef9adfa45b23453d Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Wed, 16 Aug 2023 16:25:15 +0200 Subject: [PATCH 04/16] Remove CERN-specific configuration for auth from swan chart And place in swan-cern what belongs there. --- swan-cern/values.yaml | 1 + swan/values.yaml | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/swan-cern/values.yaml b/swan-cern/values.yaml index 9372e7e8..7273b9ae 100644 --- a/swan-cern/values.yaml +++ b/swan-cern/values.yaml @@ -138,6 +138,7 @@ swan: config: KeyCloakAuthenticator: oidc_issuer: https://auth.cern.ch/auth/realms/cern + admin_role: swan-admins exchange_tokens: - eos-service - cernbox-service diff --git a/swan/values.yaml b/swan/values.yaml index d7cf2419..84ba930f 100644 --- a/swan/values.yaml +++ b/swan/values.yaml @@ -161,16 +161,13 @@ jupyterhub: KeyCloakAuthenticator: # Config missing oidc_issuer: - admin_role: swan-admins scope: - profile - email - offline_access - openid exchange_tokens: [] - logout_redirect_url: https://cern.ch/swan auto_login: True - username_key: preferred_username client_id: # placeholder, check secrets client_secret: # placeholder, check secrets oauth_callback_url: # placeholder, check secrets From 30134300a2499223d4d45694813d1f50ece11884 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Wed, 16 Aug 2023 16:29:37 +0200 Subject: [PATCH 05/16] Update renamed configuration parameter username_key got deprecated in OAuth authenticator v16. --- swan-cern/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swan-cern/values.yaml b/swan-cern/values.yaml index 7273b9ae..1b70a2cf 100644 --- a/swan-cern/values.yaml +++ b/swan-cern/values.yaml @@ -144,7 +144,7 @@ swan: - cernbox-service logout_redirect_url: https://cern.ch/swan auto_login: False - username_key: preferred_username + username_claim: preferred_username client_id: # placeholder, check secrets client_secret: # placeholder, check secrets oauth_callback_url: # placeholder, check secrets From fb867808a2a89585933704598d9f9df1abc5d34c Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Wed, 30 Aug 2023 17:55:15 +0200 Subject: [PATCH 06/16] Set read:metrics scope for prometheus-service-monitor service After the update to JH 4, the access to the hub metrics is now authenticated and requires a role with the scope read:metrics. We already had the prometheus-service-monitor service, for which the hub generates an API token. We inject such token into a Prometheus ServiceMonitor object that we create, called hub-metrics-servicemonitor, which configures Prometheus to use that token when scraping the metrics endpoint of the hub. Now, we just give the necessary permissions (read:metrics) to that service token for the scraping to succeed. --- swan-cern/values.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/swan-cern/values.yaml b/swan-cern/values.yaml index 1b70a2cf..e5293731 100644 --- a/swan-cern/values.yaml +++ b/swan-cern/values.yaml @@ -186,7 +186,11 @@ swan: services: hadoop-token-generator: {} # apiToken is generated by the chart prometheus-service-monitor: {} # apiToken is generated by the chart - + loadRoles: + prometheus: + description: Access to hub Prometheus metrics + scopes: ['read:metrics'] + services: [prometheus-service-monitor] custom: cull: # 4 hours From d33cb7dffb09b9fcfb2710b1bb1b59aa58f130f3 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Wed, 30 Aug 2023 18:02:09 +0200 Subject: [PATCH 07/16] Prevent redirection from /metrics to /hub/metrics By configuring our ServiceMonitor object for the hub metrics endpoint to use the latter path. --- swan-cern/templates/monitoring.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swan-cern/templates/monitoring.yaml b/swan-cern/templates/monitoring.yaml index 822a82fd..61232ea7 100644 --- a/swan-cern/templates/monitoring.yaml +++ b/swan-cern/templates/monitoring.yaml @@ -19,7 +19,7 @@ spec: endpoints: - port: hub interval: 30s - path: /metrics + path: /hub/metrics bearerTokenSecret: name: hub - key: hub.services.prometheus-service-monitor.apiToken \ No newline at end of file + key: hub.services.prometheus-service-monitor.apiToken From c6cd3fefbd4d6838ef1f4d86ba2610380d827892 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Thu, 31 Aug 2023 13:42:52 +0200 Subject: [PATCH 08/16] Allow all authenticated users to log in After the upgrade to a new oauthenticator, by default two lists are checked to decide if an authenticated user is allowed to log in: allowed_users and allowed_groups. Those lists are empty by default, and we do not want to fill them with any list of users / groups, since we'd like to accept any user who authenticated successfully. This is what we achieve with allow_all: https://oauthenticator.readthedocs.io/en/stable/reference/api/gen/oauthenticator.auth0.html#oauthenticator.auth0.Auth0OAuthenticator.allow_all --- swan/values.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/swan/values.yaml b/swan/values.yaml index 84ba930f..e6024ae8 100644 --- a/swan/values.yaml +++ b/swan/values.yaml @@ -168,6 +168,7 @@ jupyterhub: - openid exchange_tokens: [] auto_login: True + allow_all: True client_id: # placeholder, check secrets client_secret: # placeholder, check secrets oauth_callback_url: # placeholder, check secrets From f4f5997b3ac7e3b1d4c388cf9143bb371e941d0f Mon Sep 17 00:00:00 2001 From: Diogo Castro Date: Fri, 1 Sep 2023 10:36:05 +0200 Subject: [PATCH 09/16] Remove swan_user label This doesn't seem to be used anywhere in our code and there's already a label (hub.jupyter.org/username) that has the same effect --- swan/files/swan_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/swan/files/swan_config.py b/swan/files/swan_config.py index badd7f8d..5a449673 100644 --- a/swan/files/swan_config.py +++ b/swan/files/swan_config.py @@ -26,8 +26,7 @@ def get_swan_user_pod(self): # pod labels pod_labels = dict( - lcg_release = self.spawner.user_options[self.spawner.lcg_rel_field].split('/')[0], - swan_user = self.spawner.user.name + lcg_release = self.spawner.user_options[self.spawner.lcg_rel_field].split('/')[0] ) # update pod labels From ee62864b70224ac29307bbb550af52b15333f3b7 Mon Sep 17 00:00:00 2001 From: Diogo Castro Date: Fri, 1 Sep 2023 10:36:14 +0200 Subject: [PATCH 10/16] Cleanup --- swan-cern/files/swan_config_cern.py | 5 ----- swan-cern/files/swan_spark_config.py | 8 +------- swan/files/swan_config.py | 3 +-- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/swan-cern/files/swan_config_cern.py b/swan-cern/files/swan_config_cern.py index b1599fb8..ca78b3f8 100644 --- a/swan-cern/files/swan_config_cern.py +++ b/swan-cern/files/swan_config_cern.py @@ -1,7 +1,5 @@ import os, subprocess -import asyncio - from kubernetes_asyncio.client.models import ( V1EmptyDirVolumeSource, V1EnvVar, @@ -11,10 +9,8 @@ V1KeyToPath, V1ObjectFieldSelector, V1ObjectMeta, - V1PodSecurityContext, V1Secret, V1SecretVolumeSource, - V1SELinuxOptions, V1Volume, V1VolumeMount, ) @@ -46,7 +42,6 @@ async def get_swan_user_pod(self): async def _init_eos_secret(self): username = self.spawner.user.name - user_uid = self.spawner.user_uid eos_secret_name ='eos-tokens-%s' % username try: diff --git a/swan-cern/files/swan_spark_config.py b/swan-cern/files/swan_spark_config.py index 4d29a52b..f52a8c9a 100644 --- a/swan-cern/files/swan_spark_config.py +++ b/swan-cern/files/swan_spark_config.py @@ -1,4 +1,4 @@ -import os, subprocess, time, pwd, jwt +import subprocess from kubernetes_asyncio.client.models import ( V1EnvVar, @@ -17,8 +17,6 @@ from kubernetes_asyncio.client.rest import ApiException -import swanspawner - """ Class handling KubeSpawner.modify_pod_hook(spawner,pod) call """ @@ -116,10 +114,6 @@ def _init_spark_containers(self, hadoop_secret_name): """ notebook_container = self._get_pod_container('notebook') side_container = self._get_pod_container('side-container') - username = self.spawner.user.name - - pod_spec_containers = [] - side_container_volume_mounts = [] if hadoop_secret_name: # pod volume to mount generated hadoop tokens and diff --git a/swan/files/swan_config.py b/swan/files/swan_config.py index 5a449673..9460bacf 100644 --- a/swan/files/swan_config.py +++ b/swan/files/swan_config.py @@ -1,8 +1,7 @@ -import logging, os, subprocess +import logging from kubernetes_asyncio.client.models import ( V1EmptyDirVolumeSource, - V1EnvVar, V1HostPathVolumeSource, V1PersistentVolumeClaimVolumeSource, V1Volume, From 15a11d013808c427489167805584495d5385b0c3 Mon Sep 17 00:00:00 2001 From: Diogo Castro Date: Thu, 31 Aug 2023 20:02:06 +0200 Subject: [PATCH 11/16] Safe label username --- swan-cern/files/swan_config_cern.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/swan-cern/files/swan_config_cern.py b/swan-cern/files/swan_config_cern.py index ca78b3f8..b61c058d 100644 --- a/swan-cern/files/swan_config_cern.py +++ b/swan-cern/files/swan_config_cern.py @@ -1,4 +1,5 @@ import os, subprocess +import escapism from kubernetes_asyncio.client.models import ( V1EmptyDirVolumeSource, @@ -41,8 +42,11 @@ async def get_swan_user_pod(self): return self.pod async def _init_eos_secret(self): - username = self.spawner.user.name - eos_secret_name ='eos-tokens-%s' % username + + username = escapism.escape( + self.spawner.user.name, safe = self.spawner.safe_chars, escape_char = '-' + ).lower() + eos_secret_name = 'eos-tokens-%s' % username try: # Retrieve eos token for user From b5df2460a07c6d11755f777f255e1b9a0b91d1e7 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Tue, 17 Oct 2023 13:13:22 +0200 Subject: [PATCH 12/16] Update to z2jh chart v3.1.0 --- swan/Chart.lock | 6 +++--- swan/Chart.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/swan/Chart.lock b/swan/Chart.lock index fbdc46e0..325b5962 100644 --- a/swan/Chart.lock +++ b/swan/Chart.lock @@ -1,7 +1,7 @@ dependencies: - name: jupyterhub repository: https://jupyterhub.github.io/helm-chart/ - version: 3.0.1 + version: 3.1.0 - name: fusex repository: https://registry.cern.ch/chartrepo/eos version: 0.1.3 @@ -14,5 +14,5 @@ dependencies: - name: cvmfs-csi repository: http://registry.cern.ch/chartrepo/cern version: 0.1.0 -digest: sha256:3a6eacfc264ce15e6e1d72032a6787dbb9733d8a16b3733aa829571820033f98 -generated: "2023-10-16T11:23:57.747939863+02:00" +digest: sha256:3eac4448e52537f505f81f044fdf3e3f0b7b2967fa357a6567c5ae450ec7957e +generated: "2023-10-17T13:12:20.679327104+02:00" diff --git a/swan/Chart.yaml b/swan/Chart.yaml index c51d2f4a..9351d98a 100644 --- a/swan/Chart.yaml +++ b/swan/Chart.yaml @@ -13,7 +13,7 @@ icon: https://swan.docs.cern.ch/images/logos/logo_swan_letters.png # dependencies: - name: jupyterhub - version: 3.0.1 + version: 3.1.0 repository: https://jupyterhub.github.io/helm-chart/ - name: fusex From 99d57480078d2c604059359d6196a7dc117c7fa6 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Tue, 17 Oct 2023 13:14:32 +0200 Subject: [PATCH 13/16] Disable debug messages from the user's Jupyter server To reduce the verbosity of the user logs, and also to prevent the value of the user OAuth tokens to be printed in the logs (such message will change to "debug" level instead of "info" in a contribution we will make upstream). --- swan/values.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/swan/values.yaml b/swan/values.yaml index e6024ae8..31f02c3b 100644 --- a/swan/values.yaml +++ b/swan/values.yaml @@ -217,8 +217,6 @@ jupyterhub: enabled: false containerSecurityContext: allowPrivilegeEscalation: true - debug: - enabled: true # disable upstream cull, but enable custom one cull: enabled: false From 9755e9e42ad20fbf84c6b69769113af48f5275e2 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Tue, 17 Oct 2023 13:19:50 +0200 Subject: [PATCH 14/16] Add a role with specific permissions for the swan culler service This has been copied from what is done upstream for the standard culler service. Once we take out the mangement of the EOS tickets from the culler, we should be able to just rely on the upstream culler service. --- swan/files/swan_config.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/swan/files/swan_config.py b/swan/files/swan_config.py index 9460bacf..767661cb 100644 --- a/swan/files/swan_config.py +++ b/swan/files/swan_config.py @@ -98,6 +98,19 @@ def swan_pod_hook(spawner, pod): # Culling of users and ticket refresh if get_config("custom.cull.enabled", False): + swan_idle_culler_role = { + "name": "swan-idle-culler", + "scopes": [ + "list:users", + "read:users:activity", + "read:servers", + "delete:servers", + # "admin:users", # dynamically added if --cull-users is passed + ], + # assign the role to a jupyterhub service, so it gains these permissions + "services": ["swan-idle-culler"], + } + cull_cmd = ["swanculler"] base_url = c.JupyterHub.get("base_url", "/") cull_cmd.append("--url=http://localhost:8081" + url_path_join(base_url, "hub/api")) @@ -112,6 +125,7 @@ def swan_pod_hook(spawner, pod): if get_config("custom.cull.users"): cull_cmd.append("--cull-users=True") + swan_idle_culler_role["scopes"].append("admin:users") if get_config("custom.cull.removeNamedServers"): cull_cmd.append("--remove-named-servers") @@ -130,12 +144,13 @@ def swan_pod_hook(spawner, pod): c.JupyterHub.services.append( { - "name": "cull-idle", + "name": "swan-idle-culler", "admin": True, "command": cull_cmd, "environment": {'SWAN_DEV': os.environ.get('SWAN_DEV', 'false')} } ) + c.JupyterHub.load_roles.append(swan_idle_culler_role) # Init lists for volumes and volume_mounts From 26eac6d766e1bf775d9648a85a583b3279e85a80 Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Thu, 19 Oct 2023 17:14:08 +0200 Subject: [PATCH 15/16] Do not attempt to delete non-existing pvc When a user is being deleted (e.g. by the culler), a PVC with name "claim-username" is also attempted to be deleted by default as part of the delete_forever function. This implies an extra call to the API server to delete a PVC that we know does not exist. With the KubeSpawner's delete_pvc flag set to false, we prevent that extra call. --- swan/values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/swan/values.yaml b/swan/values.yaml index 31f02c3b..e02dacb7 100644 --- a/swan/values.yaml +++ b/swan/values.yaml @@ -178,6 +178,8 @@ jupyterhub: http_timeout: 45 start_timeout: 60 consecutive_failure_limit: 0 + KubeSpawner: + delete_pvc: False SwanKubeSpawner: # set home directory to EOS local_home: False From 366b588389f96651b6d772c6682488d65aa6e0e0 Mon Sep 17 00:00:00 2001 From: Diogo Castro Date: Mon, 23 Oct 2023 09:36:12 +0200 Subject: [PATCH 16/16] Don't refresh the token on spawn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This call is expensive and users most probably have a new token already. It has been enabled since OAuthenticatorī¸ v16.0.0 --- swan-cern/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swan-cern/values.yaml b/swan-cern/values.yaml index e5293731..c0d98d43 100644 --- a/swan-cern/values.yaml +++ b/swan-cern/values.yaml @@ -148,10 +148,10 @@ swan: client_id: # placeholder, check secrets client_secret: # placeholder, check secrets oauth_callback_url: # placeholder, check secrets - # skip refreshing tokens if already refreshed in last 110 minutes # this assumes tokens provided by keycloak are valid for 120 minutes auth_refresh_age: 6600 + refresh_pre_spawn: False JupyterHub: allow_named_servers: False SwanKubeSpawner: