diff --git a/Dockerfile b/Dockerfile
index 7ca6ab0..1e59e5f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM mltooling/ssh-proxy:0.1.10
+FROM mltooling/ssh-proxy:0.1.11
WORKDIR /
@@ -159,7 +159,8 @@ ENV \
START_CHP=false \
EXECUTION_MODE="local" \
HUB_NAME="mlhub" \
- CLEANUP_INTERVAL_SECONDS=3600
+ CLEANUP_INTERVAL_SECONDS=3600 \
+ DYNAMIC_WHITELIST_ENABLED="false"
### END CONFIGURATION ###
diff --git a/README.md b/README.md
index 46f4764..2437a83 100644
--- a/README.md
+++ b/README.md
@@ -97,6 +97,13 @@ Here are the additional environment variables for the hub:
3600 |
+
+ DYNAMIC_WHITELIST_ENABLED |
+
+ Enables each Authenticator to use a file as a whitelist of usernames. The file must contain one whitelisted username per line and must be mounted to /resources/dynamic_whitelist.txt. The file can be dynamically modified. Keep in mind that already logged in users stay authenticated even if removed from the list - they just cannot login again.
+ |
+ false |
+
SSL_ENABLED |
Enable SSL. If you don't provide an ssl certificate as described in Section "Enable SSL/HTTPS", certificates will be generated automatically. As this auto-generated certificate is not signed, you have to trust it in the browser. Without ssl enabled, ssh access won't work as the container uses a single port and has to tell https and ssh traffic apart. |
@@ -132,7 +139,11 @@ Here are the additional environment variables for the hub:
##### Docker-local
-Jupyterhub itself is configured via a `config.py` file. In case of MLHub, a default config file is stored under `/resources/jupyterhub_config.py`. If you want to override settings or set extra ones, you can put another config file under `/resources/jupyterhub_user_config.py`. Following settings should probably not be overriden:
+Jupyterhub itself is configured via a `config.py` file. In case of MLHub, a default config file is stored under `/resources/jupyterhub_config.py`. If you want to override settings or set extra ones, you can put another config file under `/resources/jupyterhub_user_config.py`.
+Following settings are additional to standard JupyterHub:
+- `c.Spawner.workspace_images` - set the images that appear in the dropdown menu when a new named server should be created, e.g. `c.Spawner.workspace_images = [c.Spawner.image, "mltooling/ml-workspace-gpu:0.8.7", "mltooling/ml-workspace-r:0.8.7"]`
+
+Following settings should probably not be overriden:
- `c.Spawner.environment` - we set default variables there. Instead of overriding it, you can add extra variables to the existing dict, e.g. via `c.Spawner.environment["myvar"] = "myvalue"`.
- `c.DockerSpawner.prefix` and `c.DockerSpawner.name_template` - if you change those, check whether your SSH environment variables permit those names a target. Also, think about setting `c.Authenticator.username_pattern` to prevent a user having a username that is also a valid container name.
- If you override ip and port connection settings, make sure to use Docker images that can handle those.
diff --git a/resources/docker-entrypoint.sh b/resources/docker-entrypoint.sh
index d7a525b..46dea3e 100755
--- a/resources/docker-entrypoint.sh
+++ b/resources/docker-entrypoint.sh
@@ -25,7 +25,7 @@ fi
$_RESOURCES_PATH/scripts/setup_certs.sh
function start_ssh {
- """See the documentation of the ssh-proxy image - the image this hub image is based on - for more information"""
+ # See the documentation of the ssh-proxy image - the image this hub image is based on - for more information
echo "Start SSH Daemon service"
export SSH_TARGET_LABELS="mlhub.origin=$HUB_NAME"
diff --git a/resources/jupyterhub_config.py b/resources/jupyterhub_config.py
index f4257c6..a58262a 100644
--- a/resources/jupyterhub_config.py
+++ b/resources/jupyterhub_config.py
@@ -11,6 +11,9 @@
import json
+from traitlets.log import get_logger
+logger = get_logger()
+
from mlhubspawner import utils
from subprocess import call
@@ -40,6 +43,21 @@ def custom_normalize_username(self, username):
return username
Authenticator.normalize_username = custom_normalize_username
+original_check_whitelist = Authenticator.check_whitelist
+def dynamic_check_whitelist(self, username, authentication=None):
+ dynamic_whitelist_file = "/resources/dynamic_whitelist.txt"
+
+ if os.getenv("DYNAMIC_WHITELIST_ENABLED", "false") == "true":
+ if not os.path.exists(dynamic_whitelist_file):
+ logger.error("The dynamic white list has to be mounted to '{}'. Use standard JupyterHub whitelist behavior.".format(dynamic_whitelist_file))
+ else:
+ with open(dynamic_whitelist_file, "r") as f:
+ whitelisted_users = f.readlines()
+ return username.lower() in whitelisted_users
+
+ return original_check_whitelist(self, username, authentication)
+Authenticator.check_whitelist = dynamic_check_whitelist
+
### Helper Functions ###
def get_or_init(config: object, config_type: type) -> object:
@@ -169,11 +187,10 @@ def combine_config_dicts(*configs) -> dict:
docker_client = utils.init_docker_client(client_kwargs, tls_config)
try:
container = docker_client.containers.list(filters={"id": socket.gethostname()})[0]
-
if container.name.lower() != ENV_HUB_NAME.lower():
container.rename(ENV_HUB_NAME.lower())
except docker.errors.APIError as e:
- print("Could not correctly start MLHub container. " + str(e))
+ logger.error("Could not correctly start MLHub container. " + str(e))
os.kill(os.getpid(), signal.SIGTERM)
# For cleanup-service
diff --git a/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py b/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py
index 809e7f5..5cd01ef 100644
--- a/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py
+++ b/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py
@@ -60,10 +60,10 @@ def get_env(self):
#if self.user_options.get('gpus'):
# env['NVIDIA_VISIBLE_DEVICES'] = self.user_options.get('gpus')
- if self.user_options.get('cpu_limit'):
- env["MAX_NUM_THREADS"] = self.user_options.get('cpu_limit')
+ if self.user_options.get(utils.OPTION_CPU_LIMIT):
+ env[utils.OPTION_MAX_NUM_THREADS] = self.user_options.get(utils.OPTION_CPU_LIMIT)
- env['SSH_JUMPHOST_TARGET'] = self.pod_name
+ env[utils.OPTION_SSH_JUMPHOST_TARGET] = self.pod_name
return env
@@ -73,19 +73,19 @@ def start(self):
self.saved_user_options = self.user_options
- if self.user_options.get('image'):
- self.image = self.user_options.get('image')
+ if self.user_options.get(utils.OPTION_IMAGE):
+ self.image = self.user_options.get(utils.OPTION_IMAGE)
# Set request explicitly to 0, otherwise Kubernetes will set it to the same amount as limit
# self.cpu_guarantee / self.mem_guarantee cannot be directly used, as they are of type ByteSpecification and, for example, 0G will be transformed to 0 which will not pass
# the 'if cpu_guarantee' check (see https://github.com/jupyterhub/kubespawner/blob/8a6d66e04768565c0fc56c790a5fc42bfee634ec/kubespawner/objects.py#L279).
# Hence, set it via extra_resource_guarantees.
self.extra_resource_guarantees = {"cpu": 0, "memory": "0G"}
- if self.user_options.get('cpu_limit'):
- self.cpu_limit = float(self.user_options.get('cpu_limit'))
+ if self.user_options.get(utils.OPTION_CPU_LIMIT):
+ self.cpu_limit = float(self.user_options.get(utils.OPTION_CPU_LIMIT))
- if self.user_options.get('mem_limit'):
- memory = str(self.user_options.get('mem_limit')) + "G"
+ if self.user_options.get(utils.OPTION_MEM_LIMIT):
+ memory = str(self.user_options.get(utils.OPTION_MEM_LIMIT)) + "G"
self.mem_limit = memory.upper().replace("GB", "G").replace("KB", "K").replace("MB", "M").replace("TB", "T")
#if self.user_options.get('is_mount_volume') == 'on':
@@ -94,8 +94,8 @@ def start(self):
# set default label 'origin' to know for sure which containers where started via the hub
#self.extra_labels['pod_name'] = self.pod_name
- if self.user_options.get('days_to_live'):
- days_to_live_in_seconds = int(self.user_options.get('days_to_live')) * 24 * 60 * 60 # days * hours_per_day * minutes_per_hour * seconds_per_minute
+ if self.user_options.get(utils.OPTION_DAYS_TO_LIVE):
+ days_to_live_in_seconds = int(self.user_options.get(utils.OPTION_DAYS_TO_LIVE)) * 24 * 60 * 60 # days * hours_per_day * minutes_per_hour * seconds_per_minute
expiration_timestamp = time.time() + days_to_live_in_seconds
self.extra_labels[utils.LABEL_EXPIRATION_TIMESTAMP] = str(expiration_timestamp)
else:
@@ -151,12 +151,6 @@ def stop(self, now=False):
except:
self.log.warn("Could not delete service with name {}".format(self.pod_name))
-
- def get_container_metadata(self) -> str:
- if self.pod_name is None or self.pod_name == '':
- return ""
-
- return utils.get_container_metadata(self)
def get_workspace_config(self) -> str:
return utils.get_workspace_config(self)
diff --git a/resources/mlhubspawner/mlhubspawner/mlhubspawner.py b/resources/mlhubspawner/mlhubspawner/mlhubspawner.py
index ae40d60..e58bf59 100644
--- a/resources/mlhubspawner/mlhubspawner/mlhubspawner.py
+++ b/resources/mlhubspawner/mlhubspawner/mlhubspawner.py
@@ -30,6 +30,8 @@
INITIAL_CIDR_SECOND_OCTET = 33
INITIAL_CIDR = "{}.33.0.0/24".format(INITIAL_CIDR_FIRST_OCTET)
+OPTION_SHM_SIZE = "shm_size"
+
def has_complete_network_information(network):
"""Convenient function to check whether the docker.Network object has all required properties.
@@ -134,10 +136,10 @@ def get_env(self):
if self.user_options.get('gpus'):
env['NVIDIA_VISIBLE_DEVICES'] = self.user_options.get('gpus')
- if self.user_options.get('cpu_limit'):
- env["MAX_NUM_THREADS"] = self.user_options.get('cpu_limit')
+ if self.user_options.get(utils.OPTION_CPU_LIMIT):
+ env[utils.OPTION_MAX_NUM_THREADS] = self.user_options.get(utils.OPTION_CPU_LIMIT)
- env['SSH_JUMPHOST_TARGET'] = self.object_name
+ env[utils.OPTION_SSH_JUMPHOST_TARGET] = self.object_name
return env
@@ -151,22 +153,22 @@ def start(self) -> (str, int):
self.saved_user_options = self.user_options
- if self.user_options.get('image'):
- self.image = self.user_options.get('image')
+ if self.user_options.get(utils.OPTION_IMAGE):
+ self.image = self.user_options.get(utils.OPTION_IMAGE)
extra_host_config = {}
- if self.user_options.get('cpu_limit'):
+ if self.user_options.get(utils.OPTION_CPU_LIMIT):
# nano_cpus cannot be bigger than the number of CPUs of the machine (this method would currently not work in a cluster, as machines could be different than the machine where the runtime-manager and this code run.
max_available_cpus = self.resource_information["cpu_count"]
limited_cpus = min(
- int(self.user_options.get('cpu_limit')), max_available_cpus)
+ int(self.user_options.get(utils.OPTION_CPU_LIMIT)), max_available_cpus)
# the nano_cpu parameter of the Docker client expects an integer, not a float
nano_cpus = int(limited_cpus * 1e9)
extra_host_config['nano_cpus'] = nano_cpus
- if self.user_options.get('mem_limit'):
- extra_host_config['mem_limit'] = str(self.user_options.get(
- 'mem_limit')) + "gb"
+ if self.user_options.get(utils.OPTION_MEM_LIMIT):
+ extra_host_config[utils.OPTION_MEM_LIMIT] = str(self.user_options.get(
+ utils.OPTION_MEM_LIMIT)) + "gb"
if self.user_options.get('is_mount_volume') == 'on':
# {username} and {servername} will be automatically replaced by DockerSpawner with the right values as in template_namespace
@@ -176,20 +178,20 @@ def start(self) -> (str, int):
extra_create_kwargs = {}
# set default label 'origin' to know for sure which containers where started via the hub
- extra_create_kwargs['labels'] = self.default_labels
- if self.user_options.get('days_to_live'):
- days_to_live_in_seconds = int(self.user_options.get('days_to_live')) * 24 * 60 * 60 # days * hours_per_day * minutes_per_hour * seconds_per_minute
+ extra_create_kwargs[utils.OPTION_LABELS] = self.default_labels
+ if self.user_options.get(utils.OPTION_DAYS_TO_LIVE):
+ days_to_live_in_seconds = int(self.user_options.get(utils.OPTION_DAYS_TO_LIVE)) * 24 * 60 * 60 # days * hours_per_day * minutes_per_hour * seconds_per_minute
expiration_timestamp = time.time() + days_to_live_in_seconds
- extra_create_kwargs['labels'][utils.LABEL_EXPIRATION_TIMESTAMP] = str(expiration_timestamp)
+ extra_create_kwargs[utils.OPTION_LABELS][utils.LABEL_EXPIRATION_TIMESTAMP] = str(expiration_timestamp)
else:
- extra_create_kwargs['labels'][utils.LABEL_EXPIRATION_TIMESTAMP] = str(0)
+ extra_create_kwargs[utils.OPTION_LABELS][utils.LABEL_EXPIRATION_TIMESTAMP] = str(0)
- if self.user_options.get('shm_size'):
- extra_host_config['shm_size'] = self.user_options.get('shm_size')
+ if self.user_options.get(OPTION_SHM_SIZE):
+ extra_host_config[OPTION_SHM_SIZE] = self.user_options.get('shm_size')
if self.user_options.get('gpus'):
extra_host_config['runtime'] = "nvidia"
- extra_create_kwargs['labels'][utils.LABEL_NVIDIA_VISIBLE_DEVICES] = self.user_options.get('gpus')
+ extra_create_kwargs[utils.OPTION_LABELS][utils.LABEL_NVIDIA_VISIBLE_DEVICES] = self.user_options.get('gpus')
self.extra_host_config.update(extra_host_config)
self.extra_create_kwargs.update(extra_create_kwargs)
diff --git a/resources/mlhubspawner/mlhubspawner/utils.py b/resources/mlhubspawner/mlhubspawner/utils.py
index 47a74d3..54cfd3b 100644
--- a/resources/mlhubspawner/mlhubspawner/utils.py
+++ b/resources/mlhubspawner/mlhubspawner/utils.py
@@ -26,6 +26,17 @@
ENV_HUB_NAME = os.getenv("HUB_NAME", "mlhub")
+OPTION_LABELS = "labels"
+OPTION_DAYS_TO_LIVE = "days_to_live"
+OPTION_NANO_CPUS = "nano_cpus"
+OPTION_CPU_LIMIT = "cpu_limit"
+OPTION_MEM_LIMIT = "mem_limit"
+OPTION_IMAGE = "image"
+OPTION_SHM_SIZE = "shm_size"
+OPTION_SSH_JUMPHOST_TARGET = "SSH_JUMPHOST_TARGET"
+OPTION_MAX_NUM_THREADS = "MAX_NUM_THREADS"
+
+
def get_lifetime_timestamp(labels: dict) -> float:
return float(labels.get(LABEL_EXPIRATION_TIMESTAMP, '0'))