diff --git a/Dockerfile b/Dockerfile index 7ca6ab0..1e59e5f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM mltooling/ssh-proxy:0.1.10 +FROM mltooling/ssh-proxy:0.1.11 WORKDIR / @@ -159,7 +159,8 @@ ENV \ START_CHP=false \ EXECUTION_MODE="local" \ HUB_NAME="mlhub" \ - CLEANUP_INTERVAL_SECONDS=3600 + CLEANUP_INTERVAL_SECONDS=3600 \ + DYNAMIC_WHITELIST_ENABLED="false" ### END CONFIGURATION ### diff --git a/README.md b/README.md index 46f4764..2437a83 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,13 @@ Here are the additional environment variables for the hub: 3600 + + DYNAMIC_WHITELIST_ENABLED + + Enables each Authenticator to use a file as a whitelist of usernames. The file must contain one whitelisted username per line and must be mounted to /resources/dynamic_whitelist.txt. The file can be dynamically modified. Keep in mind that already logged in users stay authenticated even if removed from the list - they just cannot login again. + + false + SSL_ENABLED Enable SSL. If you don't provide an ssl certificate as described in Section "Enable SSL/HTTPS", certificates will be generated automatically. As this auto-generated certificate is not signed, you have to trust it in the browser. Without ssl enabled, ssh access won't work as the container uses a single port and has to tell https and ssh traffic apart. @@ -132,7 +139,11 @@ Here are the additional environment variables for the hub: ##### Docker-local -Jupyterhub itself is configured via a `config.py` file. In case of MLHub, a default config file is stored under `/resources/jupyterhub_config.py`. If you want to override settings or set extra ones, you can put another config file under `/resources/jupyterhub_user_config.py`. Following settings should probably not be overriden: +Jupyterhub itself is configured via a `config.py` file. In case of MLHub, a default config file is stored under `/resources/jupyterhub_config.py`. If you want to override settings or set extra ones, you can put another config file under `/resources/jupyterhub_user_config.py`. +Following settings are additional to standard JupyterHub: +- `c.Spawner.workspace_images` - set the images that appear in the dropdown menu when a new named server should be created, e.g. `c.Spawner.workspace_images = [c.Spawner.image, "mltooling/ml-workspace-gpu:0.8.7", "mltooling/ml-workspace-r:0.8.7"]` + +Following settings should probably not be overriden: - `c.Spawner.environment` - we set default variables there. Instead of overriding it, you can add extra variables to the existing dict, e.g. via `c.Spawner.environment["myvar"] = "myvalue"`. - `c.DockerSpawner.prefix` and `c.DockerSpawner.name_template` - if you change those, check whether your SSH environment variables permit those names a target. Also, think about setting `c.Authenticator.username_pattern` to prevent a user having a username that is also a valid container name. - If you override ip and port connection settings, make sure to use Docker images that can handle those. diff --git a/resources/docker-entrypoint.sh b/resources/docker-entrypoint.sh index d7a525b..46dea3e 100755 --- a/resources/docker-entrypoint.sh +++ b/resources/docker-entrypoint.sh @@ -25,7 +25,7 @@ fi $_RESOURCES_PATH/scripts/setup_certs.sh function start_ssh { - """See the documentation of the ssh-proxy image - the image this hub image is based on - for more information""" + # See the documentation of the ssh-proxy image - the image this hub image is based on - for more information echo "Start SSH Daemon service" export SSH_TARGET_LABELS="mlhub.origin=$HUB_NAME" diff --git a/resources/jupyterhub_config.py b/resources/jupyterhub_config.py index f4257c6..a58262a 100644 --- a/resources/jupyterhub_config.py +++ b/resources/jupyterhub_config.py @@ -11,6 +11,9 @@ import json +from traitlets.log import get_logger +logger = get_logger() + from mlhubspawner import utils from subprocess import call @@ -40,6 +43,21 @@ def custom_normalize_username(self, username): return username Authenticator.normalize_username = custom_normalize_username +original_check_whitelist = Authenticator.check_whitelist +def dynamic_check_whitelist(self, username, authentication=None): + dynamic_whitelist_file = "/resources/dynamic_whitelist.txt" + + if os.getenv("DYNAMIC_WHITELIST_ENABLED", "false") == "true": + if not os.path.exists(dynamic_whitelist_file): + logger.error("The dynamic white list has to be mounted to '{}'. Use standard JupyterHub whitelist behavior.".format(dynamic_whitelist_file)) + else: + with open(dynamic_whitelist_file, "r") as f: + whitelisted_users = f.readlines() + return username.lower() in whitelisted_users + + return original_check_whitelist(self, username, authentication) +Authenticator.check_whitelist = dynamic_check_whitelist + ### Helper Functions ### def get_or_init(config: object, config_type: type) -> object: @@ -169,11 +187,10 @@ def combine_config_dicts(*configs) -> dict: docker_client = utils.init_docker_client(client_kwargs, tls_config) try: container = docker_client.containers.list(filters={"id": socket.gethostname()})[0] - if container.name.lower() != ENV_HUB_NAME.lower(): container.rename(ENV_HUB_NAME.lower()) except docker.errors.APIError as e: - print("Could not correctly start MLHub container. " + str(e)) + logger.error("Could not correctly start MLHub container. " + str(e)) os.kill(os.getpid(), signal.SIGTERM) # For cleanup-service diff --git a/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py b/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py index 809e7f5..5cd01ef 100644 --- a/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py +++ b/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py @@ -60,10 +60,10 @@ def get_env(self): #if self.user_options.get('gpus'): # env['NVIDIA_VISIBLE_DEVICES'] = self.user_options.get('gpus') - if self.user_options.get('cpu_limit'): - env["MAX_NUM_THREADS"] = self.user_options.get('cpu_limit') + if self.user_options.get(utils.OPTION_CPU_LIMIT): + env[utils.OPTION_MAX_NUM_THREADS] = self.user_options.get(utils.OPTION_CPU_LIMIT) - env['SSH_JUMPHOST_TARGET'] = self.pod_name + env[utils.OPTION_SSH_JUMPHOST_TARGET] = self.pod_name return env @@ -73,19 +73,19 @@ def start(self): self.saved_user_options = self.user_options - if self.user_options.get('image'): - self.image = self.user_options.get('image') + if self.user_options.get(utils.OPTION_IMAGE): + self.image = self.user_options.get(utils.OPTION_IMAGE) # Set request explicitly to 0, otherwise Kubernetes will set it to the same amount as limit # self.cpu_guarantee / self.mem_guarantee cannot be directly used, as they are of type ByteSpecification and, for example, 0G will be transformed to 0 which will not pass # the 'if cpu_guarantee' check (see https://github.com/jupyterhub/kubespawner/blob/8a6d66e04768565c0fc56c790a5fc42bfee634ec/kubespawner/objects.py#L279). # Hence, set it via extra_resource_guarantees. self.extra_resource_guarantees = {"cpu": 0, "memory": "0G"} - if self.user_options.get('cpu_limit'): - self.cpu_limit = float(self.user_options.get('cpu_limit')) + if self.user_options.get(utils.OPTION_CPU_LIMIT): + self.cpu_limit = float(self.user_options.get(utils.OPTION_CPU_LIMIT)) - if self.user_options.get('mem_limit'): - memory = str(self.user_options.get('mem_limit')) + "G" + if self.user_options.get(utils.OPTION_MEM_LIMIT): + memory = str(self.user_options.get(utils.OPTION_MEM_LIMIT)) + "G" self.mem_limit = memory.upper().replace("GB", "G").replace("KB", "K").replace("MB", "M").replace("TB", "T") #if self.user_options.get('is_mount_volume') == 'on': @@ -94,8 +94,8 @@ def start(self): # set default label 'origin' to know for sure which containers where started via the hub #self.extra_labels['pod_name'] = self.pod_name - if self.user_options.get('days_to_live'): - days_to_live_in_seconds = int(self.user_options.get('days_to_live')) * 24 * 60 * 60 # days * hours_per_day * minutes_per_hour * seconds_per_minute + if self.user_options.get(utils.OPTION_DAYS_TO_LIVE): + days_to_live_in_seconds = int(self.user_options.get(utils.OPTION_DAYS_TO_LIVE)) * 24 * 60 * 60 # days * hours_per_day * minutes_per_hour * seconds_per_minute expiration_timestamp = time.time() + days_to_live_in_seconds self.extra_labels[utils.LABEL_EXPIRATION_TIMESTAMP] = str(expiration_timestamp) else: @@ -151,12 +151,6 @@ def stop(self, now=False): except: self.log.warn("Could not delete service with name {}".format(self.pod_name)) - - def get_container_metadata(self) -> str: - if self.pod_name is None or self.pod_name == '': - return "" - - return utils.get_container_metadata(self) def get_workspace_config(self) -> str: return utils.get_workspace_config(self) diff --git a/resources/mlhubspawner/mlhubspawner/mlhubspawner.py b/resources/mlhubspawner/mlhubspawner/mlhubspawner.py index ae40d60..e58bf59 100644 --- a/resources/mlhubspawner/mlhubspawner/mlhubspawner.py +++ b/resources/mlhubspawner/mlhubspawner/mlhubspawner.py @@ -30,6 +30,8 @@ INITIAL_CIDR_SECOND_OCTET = 33 INITIAL_CIDR = "{}.33.0.0/24".format(INITIAL_CIDR_FIRST_OCTET) +OPTION_SHM_SIZE = "shm_size" + def has_complete_network_information(network): """Convenient function to check whether the docker.Network object has all required properties. @@ -134,10 +136,10 @@ def get_env(self): if self.user_options.get('gpus'): env['NVIDIA_VISIBLE_DEVICES'] = self.user_options.get('gpus') - if self.user_options.get('cpu_limit'): - env["MAX_NUM_THREADS"] = self.user_options.get('cpu_limit') + if self.user_options.get(utils.OPTION_CPU_LIMIT): + env[utils.OPTION_MAX_NUM_THREADS] = self.user_options.get(utils.OPTION_CPU_LIMIT) - env['SSH_JUMPHOST_TARGET'] = self.object_name + env[utils.OPTION_SSH_JUMPHOST_TARGET] = self.object_name return env @@ -151,22 +153,22 @@ def start(self) -> (str, int): self.saved_user_options = self.user_options - if self.user_options.get('image'): - self.image = self.user_options.get('image') + if self.user_options.get(utils.OPTION_IMAGE): + self.image = self.user_options.get(utils.OPTION_IMAGE) extra_host_config = {} - if self.user_options.get('cpu_limit'): + if self.user_options.get(utils.OPTION_CPU_LIMIT): # nano_cpus cannot be bigger than the number of CPUs of the machine (this method would currently not work in a cluster, as machines could be different than the machine where the runtime-manager and this code run. max_available_cpus = self.resource_information["cpu_count"] limited_cpus = min( - int(self.user_options.get('cpu_limit')), max_available_cpus) + int(self.user_options.get(utils.OPTION_CPU_LIMIT)), max_available_cpus) # the nano_cpu parameter of the Docker client expects an integer, not a float nano_cpus = int(limited_cpus * 1e9) extra_host_config['nano_cpus'] = nano_cpus - if self.user_options.get('mem_limit'): - extra_host_config['mem_limit'] = str(self.user_options.get( - 'mem_limit')) + "gb" + if self.user_options.get(utils.OPTION_MEM_LIMIT): + extra_host_config[utils.OPTION_MEM_LIMIT] = str(self.user_options.get( + utils.OPTION_MEM_LIMIT)) + "gb" if self.user_options.get('is_mount_volume') == 'on': # {username} and {servername} will be automatically replaced by DockerSpawner with the right values as in template_namespace @@ -176,20 +178,20 @@ def start(self) -> (str, int): extra_create_kwargs = {} # set default label 'origin' to know for sure which containers where started via the hub - extra_create_kwargs['labels'] = self.default_labels - if self.user_options.get('days_to_live'): - days_to_live_in_seconds = int(self.user_options.get('days_to_live')) * 24 * 60 * 60 # days * hours_per_day * minutes_per_hour * seconds_per_minute + extra_create_kwargs[utils.OPTION_LABELS] = self.default_labels + if self.user_options.get(utils.OPTION_DAYS_TO_LIVE): + days_to_live_in_seconds = int(self.user_options.get(utils.OPTION_DAYS_TO_LIVE)) * 24 * 60 * 60 # days * hours_per_day * minutes_per_hour * seconds_per_minute expiration_timestamp = time.time() + days_to_live_in_seconds - extra_create_kwargs['labels'][utils.LABEL_EXPIRATION_TIMESTAMP] = str(expiration_timestamp) + extra_create_kwargs[utils.OPTION_LABELS][utils.LABEL_EXPIRATION_TIMESTAMP] = str(expiration_timestamp) else: - extra_create_kwargs['labels'][utils.LABEL_EXPIRATION_TIMESTAMP] = str(0) + extra_create_kwargs[utils.OPTION_LABELS][utils.LABEL_EXPIRATION_TIMESTAMP] = str(0) - if self.user_options.get('shm_size'): - extra_host_config['shm_size'] = self.user_options.get('shm_size') + if self.user_options.get(OPTION_SHM_SIZE): + extra_host_config[OPTION_SHM_SIZE] = self.user_options.get('shm_size') if self.user_options.get('gpus'): extra_host_config['runtime'] = "nvidia" - extra_create_kwargs['labels'][utils.LABEL_NVIDIA_VISIBLE_DEVICES] = self.user_options.get('gpus') + extra_create_kwargs[utils.OPTION_LABELS][utils.LABEL_NVIDIA_VISIBLE_DEVICES] = self.user_options.get('gpus') self.extra_host_config.update(extra_host_config) self.extra_create_kwargs.update(extra_create_kwargs) diff --git a/resources/mlhubspawner/mlhubspawner/utils.py b/resources/mlhubspawner/mlhubspawner/utils.py index 47a74d3..54cfd3b 100644 --- a/resources/mlhubspawner/mlhubspawner/utils.py +++ b/resources/mlhubspawner/mlhubspawner/utils.py @@ -26,6 +26,17 @@ ENV_HUB_NAME = os.getenv("HUB_NAME", "mlhub") +OPTION_LABELS = "labels" +OPTION_DAYS_TO_LIVE = "days_to_live" +OPTION_NANO_CPUS = "nano_cpus" +OPTION_CPU_LIMIT = "cpu_limit" +OPTION_MEM_LIMIT = "mem_limit" +OPTION_IMAGE = "image" +OPTION_SHM_SIZE = "shm_size" +OPTION_SSH_JUMPHOST_TARGET = "SSH_JUMPHOST_TARGET" +OPTION_MAX_NUM_THREADS = "MAX_NUM_THREADS" + + def get_lifetime_timestamp(labels: dict) -> float: return float(labels.get(LABEL_EXPIRATION_TIMESTAMP, '0'))