From e8c70426dd3dada317413c825470e28147b56809 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis Date: Tue, 2 Jul 2024 17:25:52 -0700 Subject: [PATCH 1/2] [mig-manager] Add --nvidia-cdi-hook-path flag Signed-off-by: Christopher Desiniotis --- cmd/nvidia-mig-manager/main.go | 11 ++++++++++- deployments/container/reconfigure-mig.sh | 17 +++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/cmd/nvidia-mig-manager/main.go b/cmd/nvidia-mig-manager/main.go index fa6d36e5..6d1d4a22 100644 --- a/cmd/nvidia-mig-manager/main.go +++ b/cmd/nvidia-mig-manager/main.go @@ -50,6 +50,7 @@ const ( DefaultGPUClientsNamespace = "default" DefaultNvidiaDriverRoot = "/run/nvidia/driver" DefaultDriverRootCtrPath = "/run/nvidia/driver" + DefaultNvidiaCDIHookPath = "/usr/local/nvidia/toolkit/nvidia-cdi-hook" ) var ( @@ -71,6 +72,7 @@ var ( driverRootCtrPath string devRoot string devRootCtrPath string + nvidiaCDIHookPath string ) type GPUClients struct { @@ -250,6 +252,13 @@ func main() { Destination: &devRootCtrPath, EnvVars: []string{"DEV_ROOT_CTR_PATH"}, }, + &cli.StringFlag{ + Name: "nvidia-cdi-hook-path", + Value: DefaultNvidiaCDIHookPath, + Usage: "Path to nvidia-cdi-hook binary on the host.", + Destination: &nvidiaCDIHookPath, + EnvVars: []string{"NVIDIA_CDI_HOOK_PATH"}, + }, } err := c.Run(os.Args) @@ -380,7 +389,7 @@ func runScript(migConfigValue string, driverLibraryPath string, nvidiaSMIPath st "-p", defaultGPUClientsNamespaceFlag, } if cdiEnabledFlag { - args = append(args, "-e", "-t", driverRoot, "-a", driverRootCtrPath, "-b", devRoot, "-j", devRootCtrPath, "-l", driverLibraryPath, "-q", nvidiaSMIPath) + args = append(args, "-e", "-t", driverRoot, "-a", driverRootCtrPath, "-b", devRoot, "-j", devRootCtrPath, "-l", driverLibraryPath, "-q", nvidiaSMIPath, "-s", nvidiaCDIHookPath) } if withRebootFlag { args = append(args, "-r") diff --git a/deployments/container/reconfigure-mig.sh b/deployments/container/reconfigure-mig.sh index 85cbcc1f..30c88171 100755 --- a/deployments/container/reconfigure-mig.sh +++ b/deployments/container/reconfigure-mig.sh @@ -32,13 +32,14 @@ DEV_ROOT="" DEV_ROOT_CTR_PATH="" DRIVER_LIBRARY_PATH="" NVIDIA_SMI_PATH="" +NVIDIA_CDI_HOOK_PATH="" export SYSTEMD_LOG_LEVEL="info" function usage() { echo "USAGE:" echo " ${0} -h " - echo " ${0} -n -f -c -p [-e -t -a -b -j -l -q ] [ -m -i -o -g -k -r -s ]" + echo " ${0} -n -f -c -p [-e -t -a -b -j -l -q -s ] [ -m -i -o -g -k -r -s ]" echo "" echo "OPTIONS:" echo " -h Display this help message" @@ -60,9 +61,10 @@ function usage() { echo " -j Root path to the NVIDIA device nodes mounted in the container" echo " -l Path to libnvidia-ml.so.1 in the container" echo " -q Path to nvidia-smi in the container" + echo " -s Path to nvidia-cdi-hook on the host" } -while getopts "hrden:f:c:m:i:o:g:k:p:t:a:b:j:l:q:" opt; do +while getopts "hrden:f:c:m:i:o:g:k:p:t:a:b:j:l:q:s:" opt; do case ${opt} in h ) # process option h usage; exit 0 @@ -121,7 +123,10 @@ while getopts "hrden:f:c:m:i:o:g:k:p:t:a:b:j:l:q:" opt; do q ) # process option q NVIDIA_SMI_PATH=${OPTARG} ;; - \? ) echo "Usage: ${0} -n -f -c -p [-e -t -a -b -j -l -q ] [ -m -i -o -g -k -r -s ]" + s ) # process option s + NVIDIA_CDI_HOOK_PATH=${OPTARG} + ;; + \? ) echo "Usage: ${0} -n -f -c -p [-e -t -a -b -j -l -q -s ] [ -m -i -o -g -k -r -s ]" ;; esac done @@ -167,6 +172,10 @@ if [ "${CDI_ENABLED}" = "true" ]; then usage; exit 1 fi fi + if [ "${NVIDIA_CDI_HOOK_PATH}" = "" ]; then + echo "Error: missing -s flag" + usage; exit 1 + fi fi HOST_GPU_CLIENT_SERVICES=(${HOST_GPU_CLIENT_SERVICES//,/ }) @@ -599,7 +608,7 @@ if [ "${CDI_ENABLED}" = "true" ]; then --dev-root=${DEV_ROOT_CTR_PATH} \ --vendor="management.nvidia.com" \ --class="gpu" \ - --nvidia-ctk-path="/usr/local/nvidia/toolkit/nvidia-ctk" | \ + --nvidia-cdi-hook-path=${NVIDIA_CDI_HOOK_PATH} | \ nvidia-ctk cdi transform root \ --from=$DRIVER_ROOT_CTR_PATH \ --to=$DRIVER_ROOT \ From 52509d0dc9ed45e198f0dd551d01e69c62704fe0 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 9 Jul 2024 20:45:20 +0200 Subject: [PATCH 2/2] Use archived centos:7 repos Signed-off-by: Evan Lezar --- deployments/systemd/packages/Dockerfile.rpm | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/deployments/systemd/packages/Dockerfile.rpm b/deployments/systemd/packages/Dockerfile.rpm index 20d4729e..78379116 100644 --- a/deployments/systemd/packages/Dockerfile.rpm +++ b/deployments/systemd/packages/Dockerfile.rpm @@ -17,6 +17,10 @@ ARG BASE_IMAGE=undefined ARG GOLANG_VERSION=undefined FROM ${BASE_IMAGE} as go-build +RUN sed -i -e "s|mirrorlist=|#mirrorlist=|g" \ + -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" \ + /etc/yum.repos.d/CentOS-* + RUN yum install -y \ ca-certificates \ gcc \ @@ -52,6 +56,11 @@ RUN make PREFIX=/artifacts cmds # build package FROM ${BASE_IMAGE} + +RUN sed -i -e "s|mirrorlist=|#mirrorlist=|g" \ + -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" \ + /etc/yum.repos.d/CentOS-* + RUN yum install -y rpm-build # envs for packaging