Skip to content

Commit

Permalink
Change how CUDA runtime and capabilities are defined in Condor
Browse files Browse the repository at this point in the history
update docstring
  • Loading branch information
amaltaro committed Aug 15, 2023
1 parent 5c7d13b commit 0235cd0
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 3 deletions.
20 changes: 20 additions & 0 deletions src/python/Utils/Utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import sys
from types import ModuleType, FunctionType
from gc import get_referents
from distutils.version import StrictVersion


def lowerCmsHeaders(headers):
"""
Expand Down Expand Up @@ -295,3 +297,21 @@ def encodeUnicodeToBytesConditional(value, errors="ignore", condition=True):
if condition:
return encodeUnicodeToBytes(value, errors)
return value


def orderVersionList(versionList):
"""
This function will order a list of version-style strings.
The order of precedence digits is from left to right. E.g.:
from: ["2.3.1", "1.2.3", "3.2.1", "1.3.2"]
to: ["1.2.3", "1.3.2", "2.3.1", "3.2.1"]
:param versionList: list of strings
:return: an ordered list; or the initial data if different than list.
NOTE: implementation suggested in:
https://stackoverflow.com/questions/2574080/sorting-a-list-of-dot-separated-numbers-like-software-versions
"""
if not isinstance(versionList, list):
return versionList
versionList.sort(key=StrictVersion)
return versionList
30 changes: 29 additions & 1 deletion src/python/WMCore/BossAir/Plugins/BasePlugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from builtins import object, str, bytes
from future.utils import viewvalues

from Utils.Utilities import decodeBytesToUnicode
from Utils.Utilities import decodeBytesToUnicode, orderVersionList
from WMCore.WMException import WMException
from WMCore.WMRuntime.Tools.Scram import ARCH_TO_OS, SCRAM_TO_ARCH

Expand Down Expand Up @@ -181,3 +181,31 @@ def scramArchtoRequiredArch(scramArch=None):
archs = defaultArch

return archs

@staticmethod
def cudaCapabilityToSingleVersion(capabilities=None):
"""
Given a list of CUDA capabilities (with strings in a version style),
finds the smallest version required and convert it to a single integer
for comparison/job matchmaking purposes.
Version conversion formula is: (1000 * major + 10 * medium + minor)
:param capabilities: a list of string versions
:return: an integer with the version value; 0 in case of failure
For further details:
https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html
"""
defaultRes = 0
# get an ordered list of the versions and use the very first element
capabilities = orderVersionList(capabilities)
if not capabilities:
return defaultRes

smallestVersion = capabilities[0]
smallestVersion = smallestVersion.split(".")
# deal with versions like: "1", "1.2" and "1.2.3"
for _i in range(0, 3 - len(smallestVersion)):
smallestVersion.append(0)

intVersion = int(smallestVersion[0]) * 1000 + int(smallestVersion[1]) * 10 + int(smallestVersion[2])
return intVersion
7 changes: 5 additions & 2 deletions src/python/WMCore/BossAir/Plugins/SimpleCondorPlugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,8 +566,11 @@ def getJobParameters(self, jobList):
if job.get('gpuRequirements', None):
ad['My.GPUMemoryMB'] = str(job['gpuRequirements']['GPUMemoryMB'])
cudaCapabilities = ','.join(sorted(job['gpuRequirements']['CUDACapabilities']))
ad['My.CUDACapability'] = classad.quote(str(cudaCapabilities))
ad['My.CUDARuntime'] = classad.quote(job['gpuRequirements']['CUDARuntime'])
minimalCapability = self.cudaCapabilityToSingleVersion(job['gpuRequirements']['CUDACapabilities'])
ad['My.CUDACapability'] = classad.quote(str(minimalCapability))
ad['My.OriginalCUDACapability'] = classad.quote(str(cudaCapabilities))
cudaRuntime = ','.join(sorted(job['gpuRequirements']['CUDARuntime']))
ad['My.CUDARuntime'] = classad.quote(str(cudaRuntime))
else:
ad['My.GPUMemoryMB'] = undefined
ad['My.CUDACapability'] = undefined
Expand Down

0 comments on commit 0235cd0

Please sign in to comment.