Skip to content

Commit

Permalink
Add a way to forward accelerators to Docker containers (#4492)
Browse files Browse the repository at this point in the history
* Add accelerator forwarding to Docker API

* Satisfy MyPy

* Apply suggestions from code review

* Add missing comma
  • Loading branch information
adamnovak committed Jul 6, 2023
1 parent d686dac commit b301e91
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 2 deletions.
36 changes: 34 additions & 2 deletions src/toil/lib/accelerators.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@

"""Accelerator (i.e. GPU) utilities for Toil"""

import os
import subprocess
from typing import Dict, List, Optional, Set
from typing import Dict, List, Optional, Set, Union
from xml.dom import minidom

from toil.job import AcceleratorRequirement
Expand All @@ -37,6 +38,37 @@ def have_working_nvidia_smi() -> bool:
return False
return True

@memoize
def get_host_accelerator_numbers() -> List[int]:
"""
Work out what accelerator is what.
For each accelerator visible to us, returns the host-side (for example,
outside-of-Slurm-job) number for that accelerator. It is often the same as
the apparent number.
Can be used with Docker's --gpus='"device=#,#,#"' option to forward the
right GPUs as seen from a Docker daemon.
"""

for number_list_var in ['SLURM_STEP_GPUS', 'SLURM_JOB_GPUS', 'CUDA_VISIBLE_DEVICES', 'NVIDIA_VISIBLE_DEVICES']:
# Any of these can have a list of GPU numbers, but the CUDA/NVIDIA ones
# also support a system of GPU GUIDs that we don't support.
# TODO: If Slurm confinement is set we ignore any attempt to further
# limit us with the app-level variables. Does that make sense? Writing
# code to translate through would be hard and probably not actually
# useful.
if number_list_var in os.environ:
device_string = os.environ[number_list_var]
# Parse all the numbers we have
device_numbers = [int(part) for part in device_string.split(',') if part.isnumeric()]
if len(device_numbers) > 0:
# We found some numbers, so use those
return device_numbers

# If we don't see a set of limits we understand, say we have all nvidia GPUs
return list(range(count_nvidia_gpus()))

@memoize
def have_working_nvidia_docker_runtime() -> bool:
"""
Expand Down Expand Up @@ -83,7 +115,7 @@ def get_individual_local_accelerators() -> List[AcceleratorRequirement]:
# For now we only know abput nvidia GPUs
return [{'kind': 'gpu', 'brand': 'nvidia', 'api': 'cuda', 'count': 1} for _ in range(count_nvidia_gpus())]

def get_restrictive_environment_for_local_accelerators(accelerator_numbers : Set[int]) -> Dict[str, str]:
def get_restrictive_environment_for_local_accelerators(accelerator_numbers : Union[Set[int], List[int]]) -> Dict[str, str]:
"""
Get environment variables which can be applied to a process to restrict it
to using only the given accelerator numbers.
Expand Down
32 changes: 32 additions & 0 deletions src/toil/lib/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import re
import struct
from shlex import quote
from typing import Optional, List

import requests

Expand All @@ -27,6 +28,8 @@
create_api_error_from_http_exception)
from docker.utils.socket import consume_socket_output, demux_adaptor

from toil.lib.accelerators import get_host_accelerator_numbers

logger = logging.getLogger(__name__)

FORGO = 0
Expand Down Expand Up @@ -68,6 +71,7 @@ def apiDockerCall(job,
stream=False,
demux=False,
streamfile=None,
accelerators: Optional[List[int]] = None,
timeout=365 * 24 * 60 * 60,
**kwargs):
"""
Expand Down Expand Up @@ -151,6 +155,11 @@ def toil_job(job):
not always able to abort ongoing reads and writes in order
to respect the timeout. Defaults to 1 year (i.e. wait
essentially indefinitely).
:param accelerators: Toil accelerator numbers (usually GPUs) to forward to
the container. These are interpreted in the current
Python process's environment. See
toil.lib.accelerators.get_individual_local_accelerators()
for the menu of available accelerators.
:param kwargs: Additional keyword arguments supplied to the docker API's
run command. The list is 75 keywords total, for examples
and full documentation see:
Expand Down Expand Up @@ -238,6 +247,27 @@ def toil_job(job):
if auto_remove is None:
auto_remove = remove

device_requests = []
if accelerators:
# Map accelerator numbers to host numbers
host_accelerators = []
accelerator_mapping = get_host_accelerator_numbers()
for our_number in accelerators:
if our_number >= len(accelerator_mapping):
raise RuntimeError(
f"Cannot forward accelerator {our_number} because only "
f"{len(accelerator_mapping)} accelerators are available "
f"to this job."
)
host_accelerators.append(accelerator_mapping[our_number])
# TODO: Here we assume that the host accelerators are all GPUs
device_requests.append(
docker.types.DeviceRequest(
device_ids=[','.join(host_accelerators)],
capabilities=[['gpu']]
)
)

try:
if detach is False:
# When detach is False, this returns stdout normally:
Expand All @@ -261,6 +291,7 @@ def toil_job(job):
log_config=log_config,
user=user,
environment=environment,
device_requests=device_requests,
**kwargs)

if demux is False:
Expand Down Expand Up @@ -303,6 +334,7 @@ def toil_job(job):
log_config=log_config,
user=user,
environment=environment,
device_requests=device_requests,
**kwargs)
if stdout or stderr:
if streamfile is None:
Expand Down

0 comments on commit b301e91

Please sign in to comment.