diff --git a/sle15/Dockerfile b/sle15/Dockerfile index 501b91c8..824454a3 100644 --- a/sle15/Dockerfile +++ b/sle15/Dockerfile @@ -1,7 +1,7 @@ ARG SLES_VERSION -FROM nvcr.io/nvidia/cuda:12.5.1-base-ubi8 as license +FROM nvcr.io/nvidia/cuda:12.6.0-base-ubi9 as license -FROM registry.suse.com/bci/golang:1.17 as build +FROM registry.suse.com/bci/golang:1.23 as build RUN zypper --non-interactive install -y git wget tar gzip @@ -12,7 +12,7 @@ RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \ go build -o vgpu-util && \ mv vgpu-util /work -FROM registry.suse.com/suse/sle15:$SLES_VERSION +FROM registry.suse.com/bci/bci-base:$SLES_VERSION #ARG BASE_URL=http://us.download.nvidia.com/XFree86/Linux-x86_64 ARG BASE_URL=https://us.download.nvidia.com/tesla diff --git a/sle15/nvidia-driver b/sle15/nvidia-driver index 57c96e20..b7fc9449 100755 --- a/sle15/nvidia-driver +++ b/sle15/nvidia-driver @@ -12,11 +12,15 @@ NVIDIA_MODULE_PARAMS=() NVIDIA_UVM_MODULE_PARAMS=() NVIDIA_MODESET_MODULE_PARAMS=() +OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-true} +[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel + _update_package_cache() { if [ "${PACKAGE_TAG:-}" != "builtin" ]; then echo "Updating the package cache..." FLAVOR="$(echo ${KERNEL_VERSION} | cut -d- -f3)" if [ "$FLAVOR" == "azure" ]; then + # consumed by container-suseconnect when calling `zypper refresh` export ADDITIONAL_MODULES="sle-module-public-cloud" fi if ! zypper refresh; then @@ -66,10 +70,13 @@ _install_prerequisites() ( echo "Installing Linux kernel source..." local version_without_flavor=$(echo ${KERNEL_VERSION} | cut -d- -f-2) + export ZYPP_MODALIAS_SYSFS=$(mktemp /tmp/modalias-XXXX) if ! zypper --non-interactive in -y --no-recommends --capability kernel-${FLAVOR} = ${version_without_flavor} kernel-${FLAVOR}-devel = ${version_without_flavor} ; then echo "FATAL: failed to install kernel packages. Ensure SLES subscription is available." + rm -f ${ZYPP_MODALIAS_SYSFS} exit 1 fi + rm -f ${ZYPP_MODALIAS_SYSFS}; unset ZYPP_MODALIAS_SYSFS echo "Generating Linux kernel version string..." extract-vmlinux /boot/vmlinuz-${KERNEL_VERSION} | strings | grep -E '^Linux version' | sed 's/^\(.*\)\s\+(.*)$/\1/' > version @@ -205,6 +212,25 @@ _get_module_params() { # Load the kernel modules and start persistenced. _load_driver() { + local nv_fw_search_path="$RUN_DIR/driver/lib/firmware" + local set_fw_path="true" + local fw_path_config_file="/sys/module/firmware_class/parameters/path" + for param in "${NVIDIA_MODULE_PARAMS[@]}"; do + if [[ "$param" == "NVreg_EnableGpuFirmware=0" ]]; then + set_fw_path="false" + fi + done + + if [[ "$set_fw_path" == "true" ]]; then + echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path" + if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then + echo "WARNING: A search path is already configured in $fw_path_config_file" + echo " Retaining the current configuration" + else + echo -n "$nv_fw_search_path" > $fw_path_config_file || echo "WARNING: Failed to configure the firmware search path" + fi + fi + echo "Parsing kernel module parameters..." _get_module_params @@ -245,9 +271,11 @@ _load_driver() { _unload_driver() { local rmmod_args=() local nvidia_deps=0 + local nvidia_modeset_deps=0 local nvidia_refs=0 local nvidia_uvm_refs=0 local nvidia_modeset_refs=0 + local nvidia_drm_refs=0 echo "Stopping NVIDIA persistence daemon..." if [ -f /var/run/nvidia-persistenced/nvidia-persistenced.pid ]; then @@ -295,6 +323,11 @@ _unload_driver() { fi echo "Unloading NVIDIA driver kernel modules..." + if [ -f /sys/module/nvidia_drm/refcnt ]; then + nvidia_drm_refs=$(< /sys/module/nvidia_drm/refcnt) + rmmod_args+=("nvidia-drm") + ((++nvidia_modeset_deps)) + fi if [ -f /sys/module/nvidia_modeset/refcnt ]; then nvidia_modeset_refs=$(< /sys/module/nvidia_modeset/refcnt) rmmod_args+=("nvidia-modeset") @@ -309,7 +342,7 @@ _unload_driver() { nvidia_refs=$(< /sys/module/nvidia/refcnt) rmmod_args+=("nvidia") fi - if [ ${nvidia_refs} -gt ${nvidia_deps} ] || [ ${nvidia_uvm_refs} -gt 0 ] || [ ${nvidia_modeset_refs} -gt 0 ]; then + if [ ${nvidia_refs} -gt ${nvidia_deps} ] || [ ${nvidia_uvm_refs} -gt 0 ] || [ ${nvidia_modeset_refs} -gt ${nvidia_modeset_deps} ] || [ ${nvidia_drm_refs} -gt 0 ]; then echo "Could not unload NVIDIA driver kernel modules, driver is in use" >&2 return 1 fi @@ -331,7 +364,8 @@ _install_driver() { if [ "${ACCEPT_LICENSE}" = "yes" ]; then install_args+=("--accept-license") fi - nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check ${install_args[@]+"${install_args[@]}"} + cd /drivers/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION + IGNORE_CC_MISMATCH=1 ./nvidia-installer --kernel-module-only --ui=none --no-nouveau-check -m=${KERNEL_TYPE} --no-rebuild-initramfs ${install_args[@]+"${install_args[@]}"} --skip-module-load # --no-drm } # Mount the driver rootfs into the run directory with the exception of sysfs. @@ -341,6 +375,16 @@ _mount_rootfs() { mount --make-private /sys mkdir -p ${RUN_DIR}/driver mount --rbind / ${RUN_DIR}/driver + + echo "Check SELinux status" + if [ -e /sys/fs/selinux ]; then + echo "SELinux is enabled" + echo "Change device files security context for selinux compatibility" + chcon -R -t container_file_t ${RUN_DIR}/driver/dev + else + echo "SELinux is disabled, skipping..." + fi + } # Unmount the driver rootfs from the run directory. @@ -419,7 +463,7 @@ _start_vgpu_topology_daemon() { nvidia-topologyd } -init() { +_prepare() { if [ "${DRIVER_TYPE}" = "vgpu" ]; then _find_vgpu_driver_version || exit 1 fi @@ -434,6 +478,10 @@ init() { echo -e "\n========== NVIDIA Software Installer ==========\n" echo -e "Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION}\n" +} + +_prepare_exclusive() { + _prepare exec 3> ${PID_FILE} if ! flock -n 3; then @@ -447,7 +495,10 @@ init() { _unload_driver || exit 1 _unmount_rootfs +} +_build() { + local cleanup=false # Install dependencies if _kernel_requires_package; then @@ -455,14 +506,20 @@ init() { _resolve_kernel_version || exit 1 _install_prerequisites _create_driver_package - #_remove_prerequisites - _cleanup_package_cache + cleanup=true fi # Build the driver _install_driver - _load_driver || exit 1 + if $cleanup; then + # Do not call _remove_prerequisites as this will delete depmod information + _cleanup_package_cache + fi +} + +_load() { _mount_rootfs + _load_driver || exit 1 _write_kernel_update_hook echo "Done, now waiting for signal" @@ -473,6 +530,26 @@ init() { exit 0 } +init() { + _prepare_exclusive + + _build + + _load +} + +build() { + _prepare + + _build +} + +load() { + _prepare_exclusive + + _load +} + update() { exec 3>&2 if exec 2> /dev/null 4< ${PID_FILE}; then @@ -511,7 +588,7 @@ update() { if _kernel_requires_package; then _create_driver_package fi - #_remove_prerequisites + # Do not call _remove_prerequisites as this will delete demod information _cleanup_package_cache echo "Done" @@ -524,6 +601,8 @@ Usage: $0 COMMAND [ARG...] Commands: init [-a | --accept-license] [-m | --max-threads MAX_THREADS] + build [-a | --accept-license] [-m | --max-threads MAX_THREADS] + load update [-k | --kernel VERSION] [-s | --sign KEYID] [-t | --tag TAG] [-m | --max-threads MAX_THREADS] EOF exit 1 @@ -535,6 +614,8 @@ fi command=$1; shift case "${command}" in init) options=$(getopt -l accept-license,max-threads: -o am: -- "$@") ;; + build) options=$(getopt -l accept-license,tag:,max-threads: -o a:t:m: -- "$@") ;; + load) options="" ;; update) options=$(getopt -l kernel:,sign:,tag:,max-threads: -o k:s:t:m: -- "$@") ;; *) usage ;; esac