From 3b77461f79712dd88ceeaf1edc04235caf986f8b Mon Sep 17 00:00:00 2001 From: Enric Tejedor Saavedra Date: Fri, 19 Apr 2024 13:21:06 +0200 Subject: [PATCH] Move GPU resource specification to pod hook This is in the context of providing a dynamic mechanism to check whether a GPU user is a participant of a SWAN event, and if so allocate their session on a certain subset of resources with a configured GPU flavor. With this move, the configuration of the GPU resource requests and limits is done in the modify pod hook. This brings some benefits: we will not need anymore to erase the value of the last resource specification that was done; we can have GPU treatment together with the rest of the computing resources; all the GPU logic can be found in one place. --- SwanSpawner/swanspawner/swankubespawner.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/SwanSpawner/swanspawner/swankubespawner.py b/SwanSpawner/swanspawner/swankubespawner.py index 0c5fe33..f73acbb 100644 --- a/SwanSpawner/swanspawner/swankubespawner.py +++ b/SwanSpawner/swanspawner/swankubespawner.py @@ -25,16 +25,6 @@ async def start(self): """Perform extra configurations required for SWAN session spawning in kubernetes. """ - - if self._gpu_requested(): - self.extra_resource_guarantees["nvidia.com/gpu"] = "1" - self.extra_resource_limits["nvidia.com/gpu"] = "1" - elif "nvidia.com/gpu" in self.extra_resource_guarantees: - del self.extra_resource_guarantees["nvidia.com/gpu"] - del self.extra_resource_limits["nvidia.com/gpu"] - - # Resource requests and limits for user pods - # CPU limit is set to what the user selects in the form # The request (guarantee) is statically set in the chart; # the resulting overcommit is acceptable since users stay idle @@ -104,7 +94,7 @@ def get_env(self): """ Set base environmental variables for swan jupyter docker image """ env = super().get_env() - if self._gpu_requested(): + if self.gpu_requested(): env.update(dict( # Configure OpenCL to use NVIDIA backend OCL_ICD_FILENAMES = 'libnvidia-opencl.so.1', @@ -112,6 +102,6 @@ def get_env(self): return env - def _gpu_requested(self): + def gpu_requested(self): """Returns true if the user requested a GPU""" return "cu" in self.user_options[self.lcg_rel_field] \ No newline at end of file