From 1c4c81fff590f584aa683558bcf5ba0840e46372 Mon Sep 17 00:00:00 2001 From: Matthew Yeazel Date: Fri, 4 Oct 2024 02:33:16 +0000 Subject: [PATCH] kmod-5.10-nvidia: move to R535 branch from R470 The R470 branch is end of life. In order to keep variants using the 5.10 kernel on a supported NVIDIA driver, this commit moves the kmod package for 5.10 to build the R535 branch and brings the driver in line with the other two kernel kmod packages in packaging style. Signed-off-by: Matthew Yeazel --- packages/kmod-5.10-nvidia/.gitignore | 2 + packages/kmod-5.10-nvidia/Cargo.toml | 23 +- .../copy-open-gpu-kernel-modules.service.in | 20 + .../kmod-5.10-nvidia/kmod-5.10-nvidia.spec | 506 ++++++++++++------ .../link-tesla-kernel-modules.service.in | 5 +- .../load-open-gpu-kernel-modules.service.in | 19 + .../load-tesla-kernel-modules.service.in | 5 +- .../kmod-5.10-nvidia/nvidia-fabricmanager.cfg | 34 ++ .../nvidia-fabricmanager.service | 16 + .../kmod-5.10-nvidia/nvidia-ld.so.conf.in | 2 +- .../nvidia-open-gpu-config.toml.in | 11 + .../nvidia-open-gpu-copy-only-config.toml.in | 8 + .../nvidia-tesla-tmpfiles.conf | 5 + .../kmod-5.10-nvidia/nvidia-tmpfiles.conf.in | 4 + 14 files changed, 492 insertions(+), 168 deletions(-) create mode 100644 packages/kmod-5.10-nvidia/copy-open-gpu-kernel-modules.service.in create mode 100644 packages/kmod-5.10-nvidia/load-open-gpu-kernel-modules.service.in create mode 100644 packages/kmod-5.10-nvidia/nvidia-fabricmanager.cfg create mode 100644 packages/kmod-5.10-nvidia/nvidia-fabricmanager.service create mode 100644 packages/kmod-5.10-nvidia/nvidia-open-gpu-config.toml.in create mode 100644 packages/kmod-5.10-nvidia/nvidia-open-gpu-copy-only-config.toml.in create mode 100644 packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf diff --git a/packages/kmod-5.10-nvidia/.gitignore b/packages/kmod-5.10-nvidia/.gitignore index 0bcfb52fd..db8b415b2 100644 --- a/packages/kmod-5.10-nvidia/.gitignore +++ b/packages/kmod-5.10-nvidia/.gitignore @@ -1 +1,3 @@ NVidiaEULAforAWS.pdf +COPYING +*.rpm diff --git a/packages/kmod-5.10-nvidia/Cargo.toml b/packages/kmod-5.10-nvidia/Cargo.toml index 07cc9b724..785a9812b 100644 --- a/packages/kmod-5.10-nvidia/Cargo.toml +++ b/packages/kmod-5.10-nvidia/Cargo.toml @@ -17,13 +17,28 @@ url = "https://s3.amazonaws.com/EULA/NVidiaEULAforAWS.pdf" sha512 = "e1926fe99afc3ab5b2f2744fcd53b4046465aefb2793e2e06c4a19455a3fde895e00af1415ff1a5804c32e6a2ed0657e475de63da6c23a0e9c59feeef52f3f58" [[package.metadata.build-package.external-files]] -url = "https://us.download.nvidia.com/tesla/470.256.02/NVIDIA-Linux-x86_64-470.256.02.run" -sha512 = "a837946dd24d7945c1962a695f1f31965f3ceb6927f52cd08fd51b8db138b7a888bbeab69243f5c8468a7bd7ccd47f5dbdb48a1ca81264866c1ebb7d88628f88" +url = "https://us.download.nvidia.com/tesla/535.183.06/NVIDIA-Linux-x86_64-535.183.06.run" +sha512 = "424950ef303ea39499e96f8c90c1e0c83aee12309779d4f335769ef554ad4f7c38e98f69c64b408adc85a7cf51ea600d85222792402b9c6b7941f1af066d2a33" force-upstream = true [[package.metadata.build-package.external-files]] -url = "https://us.download.nvidia.com/tesla/470.256.02/NVIDIA-Linux-aarch64-470.256.02.run" -sha512 = "38eee5933355c34ca816a2ac0fbc4f55c19c20e1322891bfc98cb6b37d99a31218eea9314877ab0e3cf3ac6eb61f9d9d4d09d0af304b689f18b4efa721b65d5c" +url = "https://us.download.nvidia.com/tesla/535.183.06/NVIDIA-Linux-aarch64-535.183.06.run" +sha512 = "bb305f1703557461b0a0a29066c304658d9684841104c6f4d9ff44f9db90fee14ae619cd2fe3242823a5fe3a69b168b8174b163740014b15cdef36db88ba2d96" +force-upstream = true + +[[package.metadata.build-package.external-files]] +url = "https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/nvidia-fabric-manager-535.183.06-1.x86_64.rpm" +sha512 = "c3d98878363f857b2963665a0e485cb7b1afeaabd0040a970478d00ffb870ab4130ab9dfe1b7a40d1b38734636ebccec39fd1b3fc8c06abc5c07470f749b6025" +force-upstream = true + +[[package.metadata.build-package.external-files]] +url = "https://developer.download.nvidia.com/compute/cuda/repos/rhel9/sbsa/nvidia-fabric-manager-535.183.06-1.aarch64.rpm" +sha512 = "6a646cd7ea11e668f7dbe6f6bb22516107a856e3c3755f8693c91d4bed706b8b3667b853f07e84c2d0da4de7ab1107337b6a1493879d75d8c201bfe9da071b32" +force-upstream = true + +[[package.metadata.build-package.external-files]] +url = "https://raw.githubusercontent.com/NVIDIA/open-gpu-kernel-modules/535/COPYING" +sha512 = "f9cee68cbb12095af4b4e92d01c210461789ef41c70b64efefd6719d0b88468b7a67a3629c432d4d9304c730b5d1a942228a5bcc74a03ab1c411c77c758cd938" force-upstream = true [build-dependencies] diff --git a/packages/kmod-5.10-nvidia/copy-open-gpu-kernel-modules.service.in b/packages/kmod-5.10-nvidia/copy-open-gpu-kernel-modules.service.in new file mode 100644 index 000000000..2c3420b61 --- /dev/null +++ b/packages/kmod-5.10-nvidia/copy-open-gpu-kernel-modules.service.in @@ -0,0 +1,20 @@ +[Unit] +Description=Copy open GPU kernel modules +RequiresMountsFor=PREFIX/lib/modules PREFIX/src/kernels +# Rerunning this service after the system is fully loaded will override +# the already linked kernel modules. This doesn't affect the running system, +# since kernel modules are linked early in the boot sequence, but we still +# disable manual restarts to prevent unnecessary kernel modules rewrites. +RefuseManualStart=true +RefuseManualStop=true + +[Service] +Type=oneshot +ExecCondition=/usr/bin/ghostdog match-nvidia-driver open-gpu +ExecStart=/usr/bin/driverdog --modules-set nvidia-open-gpu link-modules +ExecStart=/usr/bin/driverdog --modules-set nvidia-open-gpu-copy-only link-modules +RemainAfterExit=true +StandardError=journal+console + +[Install] +RequiredBy=preconfigured.target diff --git a/packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec b/packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec index 0733338a9..955063ef7 100644 --- a/packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec +++ b/packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec @@ -1,6 +1,19 @@ -%global tesla_470 470.256.02 -%global tesla_470_libdir %{_cross_libdir}/nvidia/tesla/%{tesla_470} -%global tesla_470_bindir %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} +%global tesla_major 535 +%global tesla_minor 183 +%global tesla_patch 06 +%global tesla_ver %{tesla_major}.%{tesla_minor}.%{tesla_patch} +%if "%{?_cross_arch}" == "aarch64" +%global fm_arch sbsa +%else +%global fm_arch %{_cross_arch} +%endif + +# With the split of the firmware binary from firmware/gsp.bin to firmware/gsp_ga10x.bin +# and firmware/gsp_tu10x.bin the file format changed from executable to relocatable. +# The __spec_install_post macro will by default try to strip all binary files. +# Unfortunately the strip used is not compatible with the new file format. +# Redefine strip, so that these firmware binaries do not derail the build. +%global __strip /usr/bin/true Name: %{_cross_os}kmod-5.10-nvidia Version: 1.0.0 @@ -11,22 +24,34 @@ Summary: NVIDIA drivers for the 5.10 kernel License: Apache-2.0 OR MIT URL: http://www.nvidia.com/ -# NVIDIA .run scripts from 0 to 199 -Source0: https://us.download.nvidia.com/tesla/%{tesla_470}/NVIDIA-Linux-x86_64-%{tesla_470}.run -Source1: https://us.download.nvidia.com/tesla/%{tesla_470}/NVIDIA-Linux-aarch64-%{tesla_470}.run +# NVIDIA archives from 0 to 199 +# NVIDIA .run scripts for kernel and userspace drivers +Source0: https://us.download.nvidia.com/tesla/%{tesla_ver}/NVIDIA-Linux-x86_64-%{tesla_ver}.run +Source1: https://us.download.nvidia.com/tesla/%{tesla_ver}/NVIDIA-Linux-aarch64-%{tesla_ver}.run Source2: NVidiaEULAforAWS.pdf +Source3: COPYING + +# fabricmanager for NVSwitch +Source10: https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/nvidia-fabric-manager-%{tesla_ver}-1.x86_64.rpm +Source11: https://developer.download.nvidia.com/compute/cuda/repos/rhel9/sbsa/nvidia-fabric-manager-%{tesla_ver}-1.aarch64.rpm # Common NVIDIA conf files from 200 to 299 Source200: nvidia-tmpfiles.conf.in Source202: nvidia-dependencies-modules-load.conf +Source203: nvidia-fabricmanager.service +Source204: nvidia-fabricmanager.cfg # NVIDIA tesla conf files from 300 to 399 -Source300: nvidia-tesla-tmpfiles.conf.in +Source300: nvidia-tesla-tmpfiles.conf Source301: nvidia-tesla-build-config.toml.in -Source302: nvidia-tesla-path.env.in -Source303: nvidia-ld.so.conf.in -Source304: link-tesla-kernel-modules.service.in -Source305: load-tesla-kernel-modules.service.in +Source302: nvidia-open-gpu-config.toml.in +Source303: nvidia-open-gpu-copy-only-config.toml.in +Source304: nvidia-tesla-path.env.in +Source305: nvidia-ld.so.conf.in +Source306: link-tesla-kernel-modules.service.in +Source307: load-tesla-kernel-modules.service.in +Source308: copy-open-gpu-kernel-modules.service.in +Source309: load-open-gpu-kernel-modules.service.in BuildRequires: %{_cross_os}glibc-devel BuildRequires: %{_cross_os}kernel-5.10-archive @@ -34,33 +59,74 @@ BuildRequires: %{_cross_os}kernel-5.10-archive %description %{summary}. -%package tesla-470 -Summary: NVIDIA 470 Tesla driver -Version: %{tesla_470} +%package fabricmanager +Summary: NVIDIA fabricmanager config and service files +Requires: %{name}-tesla(fabricmanager) + +%description fabricmanager +%{summary}. + +%package open-gpu-%{tesla_major} +Summary: NVIDIA %{tesla_major} Open GPU driver +Version: %{tesla_ver} +License: MIT OR GPL-2.0-only +Requires: %{_cross_os}variant-platform(aws) + + +%description open-gpu-%{tesla_major} +%{summary}. + +%package tesla-%{tesla_major} +Summary: NVIDIA %{tesla_major} Tesla driver +Version: %{tesla_ver} License: LicenseRef-NVIDIA-AWS-EULA Requires: %{_cross_os}variant-platform(aws) Requires: %{name} +Requires: %{name}-fabricmanager +Provides: %{name}-tesla(fabricmanager) +# Compat Provides for older variant definitions +Provides: %{name}-tesla-470 +Requires: %{name}-open-gpu-%{tesla_major} -%description tesla-470 +%description tesla-%{tesla_major} %{summary} %prep # Extract nvidia sources with `-x`, otherwise the script will try to install # the driver in the current run -sh %{_sourcedir}/NVIDIA-Linux-%{_cross_arch}-%{tesla_470}.run -x +sh %{_sourcedir}/NVIDIA-Linux-%{_cross_arch}-%{tesla_ver}.run -x + +# Extract fabricmanager from the rpm via cpio rather than `%%setup` since the +# correct source is architecture-dependent. +mkdir fabricmanager-linux-%{fm_arch}-%{tesla_ver}-archive +rpm2cpio %{_sourcedir}/nvidia-fabric-manager-%{tesla_ver}-1.%{_cross_arch}.rpm | cpio -idmV -D fabricmanager-linux-%{fm_arch}-%{tesla_ver}-archive # Add the license. install -p -m 0644 %{S:2} . +install -p -m 0644 %{S:3} . %global kernel_sources %{_builddir}/kernel-devel tar -xf %{_cross_datadir}/bottlerocket/kernel-devel.tar.xz -%build -pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_470}/kernel +%define _kernel_version %(ls %{kernel_sources}/include/config/kernel.release) +%global _cross_kmoddir %{_cross_libdir}/modules/%{_kernel_version} # This recipe was based in the NVIDIA yum/dnf specs: # https://github.com/NVIDIA/yum-packaging-precompiled-kmod +# Begin open driver build +pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_ver}/kernel-open + +# We set IGNORE_CC_MISMATCH even though we are using the same compiler used to compile the kernel, if +# we don't set this flag the compilation fails +make %{?_smp_mflags} ARCH=%{_cross_karch} IGNORE_CC_MISMATCH=1 SYSSRC=%{kernel_sources} CC=%{_cross_target}-gcc LD=%{_cross_target}-ld + +# end open driver build +popd + +# Begin proprietary driver build +pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_ver}/kernel + # We set IGNORE_CC_MISMATCH even though we are using the same compiler used to compile the kernel, if # we don't set this flag the compilation fails make %{?_smp_mflags} ARCH=%{_cross_karch} IGNORE_CC_MISMATCH=1 SYSSRC=%{kernel_sources} CC=%{_cross_target}-gcc LD=%{_cross_target}-ld @@ -79,6 +145,29 @@ rm nvidia{,-modeset,-peermem}.o # don't include any linked module in the base image rm nvidia{,-modeset,-peermem,-drm}.ko +# End proprietary driver build +popd + +# Grab the list of supported devices +pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_ver}/supported-gpus +# We want to grab all the `kernelopen` enabled chips except for this list that is best held back to the proprietary driver +# 10de:1db1 is V100-16G (P3dn) +# 10de:1db5 is V100-32G (P3dn) +# 10de:1eb8 is T4 (G4dn) +# 10de:1eb4 is T4G (G5g) +# 10de:2237 is A10G (G5) +# 10de:27b8 is L4 (G6) +# 10de:26b9 is L40S (G6e) +jq -r '.chips[] | select(.features[] | contains("kernelopen")) | +select(.devid != "0x1DB1" +and .devid != "0x1DB5" +and .devid != "0x1DEB8" +and .devid != "0x1EB4" +and .devid != "0x2237" +and .devid != "0x27B8" +and .devid != "0x26B9")' supported-gpus.json | jq -s '{"open-gpu": .}' > open-gpu-supported-devices.json +# confirm "NVIDIA H100" is in the resulting file to catch shape changes +jq -e '."open-gpu"[] | select(."devid" == "0x2330") | ."features"| index("kernelopen")' open-gpu-supported-devices.json popd %install @@ -98,84 +187,139 @@ install -p -m 0644 nvidia.conf %{buildroot}%{_cross_tmpfilesdir} install -d %{buildroot}%{_cross_libdir}/modules-load.d install -p -m 0644 %{S:202} %{buildroot}%{_cross_libdir}/modules-load.d/nvidia-dependencies.conf -# Begin NVIDIA tesla 470 -pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_470} -# We install bins and libs in a versioned directory to prevent collisions with future drivers versions -install -d %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -d %{buildroot}%{tesla_470_libdir} -install -d %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install -d %{buildroot}%{_cross_factorydir}/nvidia/tesla/%{tesla_470} - -sed -e 's|__NVIDIA_VERSION__|%{tesla_470}|' %{S:300} > nvidia-tesla-%{tesla_470}.conf -install -m 0644 nvidia-tesla-%{tesla_470}.conf %{buildroot}%{_cross_tmpfilesdir}/ -sed -e 's|__NVIDIA_MODULES__|%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/|' %{S:301} > \ - nvidia-tesla-%{tesla_470}.toml -install -m 0644 nvidia-tesla-%{tesla_470}.toml %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/drivers +# NVIDIA fabric manager service unit and config +install -p -m 0644 %{S:203} %{buildroot}%{_cross_unitdir} +install -d %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/nvidia +install -p -m 0644 %{S:204} %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/nvidia/fabricmanager.cfg + +# Begin NVIDIA tesla driver +pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_ver} +# Proprietary driver +install -d %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -d %{buildroot}%{_cross_libdir}/nvidia/tesla +install -d %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install -d %{buildroot}%{_cross_factorydir}/nvidia/tesla +install -d %{buildroot}%{_cross_factorydir}/nvidia/open-gpu +install -d %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers + +install -m 0644 %{S:300} %{buildroot}%{_cross_tmpfilesdir}/nvidia-tesla.conf +sed -e 's|__NVIDIA_MODULES__|%{_cross_datadir}/nvidia/tesla/module-objects.d/|' %{S:301} > \ + nvidia-tesla.toml +install -m 0644 nvidia-tesla.toml %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/drivers +sed -e 's|__NVIDIA_MODULES__|%{_cross_datadir}/nvidia/open-gpu/drivers/|' %{S:302} > \ + nvidia-open-gpu.toml +install -m 0644 nvidia-open-gpu.toml %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/drivers +sed -e 's|__NVIDIA_MODULES__|%{_cross_datadir}/nvidia/open-gpu/drivers/|' %{S:303} > \ + nvidia-open-gpu-copy-only.toml +install -m 0644 nvidia-open-gpu-copy-only.toml %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/drivers # Install nvidia-path environment file, will be used as a drop-in for containerd.service since # libnvidia-container locates and mounts helper binaries into the containers from either # `PATH` or `NVIDIA_PATH` -sed -e 's|__NVIDIA_BINDIR__|%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}|' %{S:302} > nvidia-path.env -install -m 0644 nvidia-path.env %{buildroot}%{_cross_factorydir}/nvidia/tesla/%{tesla_470} -# We need to add `_cross_libdir/tesla_470` to the paths loaded by the ldconfig service +sed -e 's|__NVIDIA_BINDIR__|%{_cross_libexecdir}/nvidia/tesla/bin|' %{S:304} > nvidia-path.env +install -m 0644 nvidia-path.env %{buildroot}%{_cross_factorydir}/nvidia/tesla +# We need to add `_cross_libdir` to the paths loaded by the ldconfig service # because libnvidia-container uses the `ldcache` file created by the service, to locate and mount the # libraries into the containers -sed -e 's|__LIBDIR__|%{_cross_libdir}|' %{S:303} | sed -e 's|__NVIDIA_VERSION__|%{tesla_470}|' \ - > nvidia-tesla-%{tesla_470}.conf -install -m 0644 nvidia-tesla-%{tesla_470}.conf %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/ld.so.conf.d/ +sed -e 's|__LIBDIR__|%{_cross_libdir}|' %{S:305} > nvidia-tesla.conf +install -m 0644 nvidia-tesla.conf %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/ld.so.conf.d/ # Services to link/copy/load modules -sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:304} > link-tesla-kernel-modules.service -sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:305} > load-tesla-kernel-modules.service +sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:306} > link-tesla-kernel-modules.service +sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:307} > load-tesla-kernel-modules.service install -p -m 0644 \ link-tesla-kernel-modules.service \ load-tesla-kernel-modules.service \ %{buildroot}%{_cross_unitdir} -# driver -install kernel/nvidia.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia/nv-interface.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia/nv-kernel.o_binary %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-kernel.o +sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:308} > copy-open-gpu-kernel-modules.service +sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:309} > load-open-gpu-kernel-modules.service +install -p -m 0644 \ + copy-open-gpu-kernel-modules.service \ + load-open-gpu-kernel-modules.service \ + %{buildroot}%{_cross_unitdir} + +# proprietary driver +install kernel/nvidia.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia/nv-interface.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia/nv-kernel.o_binary %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-kernel.o + +# uvm +install kernel/nvidia-uvm.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-uvm.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d + +# modeset +install kernel/nvidia-modeset.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-modeset/nv-modeset-interface.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-modeset/nv-modeset-kernel.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d + +# peermem +install kernel/nvidia-peermem.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-peermem/nvidia-peermem.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d + +# drm +install kernel/nvidia-drm.mod.o %{buildroot}/%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-drm.o %{buildroot}/%{_cross_datadir}/nvidia/tesla/module-objects.d + +# open driver +install -d %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ +install kernel-open/nvidia.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ # uvm -install kernel/nvidia-uvm.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-uvm.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d +install kernel-open/nvidia-uvm.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ # modeset -install kernel/nvidia-modeset.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-modeset/nv-modeset-interface.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-modeset/nv-modeset-kernel.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d +install kernel-open/nvidia-modeset.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ # peermem -install kernel/nvidia-peermem.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-peermem/nvidia-peermem.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d +install kernel-open/nvidia-peermem.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ # drm -install kernel/nvidia-drm.mod.o %{buildroot}/%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-drm.o %{buildroot}/%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d +install kernel-open/nvidia-drm.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ +# end open driver # Binaries -install -m 755 nvidia-smi %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -m 755 nvidia-debugdump %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -m 755 nvidia-cuda-mps-control %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -m 755 nvidia-cuda-mps-server %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} +install -m 755 nvidia-smi %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -m 755 nvidia-debugdump %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -m 755 nvidia-cuda-mps-control %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -m 755 nvidia-cuda-mps-server %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin %if "%{_cross_arch}" == "x86_64" -install -m 755 nvidia-ngx-updater %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} +install -m 755 nvidia-ngx-updater %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin %endif # We install all the libraries, and filter them out in the 'files' section, so we can catch # when new libraries are added -install -m 755 *.so* %{buildroot}/%{tesla_470_libdir}/ +install -m 755 *.so* %{buildroot}/%{_cross_libdir}/nvidia/tesla/ # This library has the same SONAME as libEGL.so.1.1.0, this will cause collisions while # the symlinks are created. For now, we only symlink libEGL.so.1.1.0. -EXCLUDED_LIBS="libEGL.so.%{tesla_470}" +EXCLUDED_LIBS="libEGL.so.%{tesla_ver}" for lib in $(find . -maxdepth 1 -type f -name 'lib*.so.*' -printf '%%P\n'); do [[ "${EXCLUDED_LIBS}" =~ "${lib}" ]] && continue soname="$(%{_cross_target}-readelf -d "${lib}" | awk '/SONAME/{print $5}' | tr -d '[]')" [ -n "${soname}" ] || continue [ "${lib}" == "${soname}" ] && continue - ln -s "${lib}" %{buildroot}/%{tesla_470_libdir}/"${soname}" + ln -s "${lib}" %{buildroot}/%{_cross_libdir}/nvidia/tesla/"${soname}" +done + +# Include the firmware file for GSP support +install -d %{buildroot}%{_cross_libdir}/firmware/nvidia/%{tesla_ver} +install -p -m 0644 firmware/gsp_ga10x.bin %{buildroot}%{_cross_libdir}/firmware/nvidia/%{tesla_ver} +install -p -m 0644 firmware/gsp_tu10x.bin %{buildroot}%{_cross_libdir}/firmware/nvidia/%{tesla_ver} + +# Include the open driver supported devices file for runtime matching of the driver. This is consumed by ghostdog to match the driver to this list +install -p -m 0644 supported-gpus/open-gpu-supported-devices.json %{buildroot}%{_cross_datadir}/nvidia/open-gpu-supported-devices.json + +popd + +# Begin NVIDIA fabric manager binaries and topologies +pushd fabricmanager-linux-%{fm_arch}-%{tesla_ver}-archive +install -p -m 0755 usr/bin/nv-fabricmanager %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -p -m 0755 usr/bin/nvswitch-audit %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin + +install -d %{buildroot}%{_cross_datadir}/nvidia/tesla/nvswitch +for t in usr/share/nvidia/nvswitch/*_topology ; do + install -p -m 0644 "${t}" %{buildroot}%{_cross_datadir}/nvidia/tesla/nvswitch done popd @@ -187,149 +331,193 @@ popd %dir %{_cross_datadir}/nvidia %dir %{_cross_libdir}/modules-load.d %dir %{_cross_factorydir}%{_cross_sysconfdir}/drivers +%dir %{_cross_factorydir}%{_cross_sysconfdir}/nvidia %{_cross_tmpfilesdir}/nvidia.conf -%{_cross_libdir}/systemd/system/ %{_cross_libdir}/modules-load.d/nvidia-dependencies.conf -%files tesla-470 +%files tesla-%{tesla_major} %license NVidiaEULAforAWS.pdf -%dir %{_cross_datadir}/nvidia/tesla/%{tesla_470} -%dir %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -%dir %{tesla_470_libdir} -%dir %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -%dir %{_cross_factorydir}/nvidia/tesla/%{tesla_470} +%license fabricmanager-linux-%{fm_arch}-%{tesla_ver}-archive/usr/share/doc/nvidia-fabricmanager/third-party-notices.txt +%dir %{_cross_datadir}/nvidia/tesla +%dir %{_cross_libexecdir}/nvidia/tesla/bin +%dir %{_cross_libdir}/nvidia/tesla +%dir %{_cross_libdir}/firmware/nvidia/%{tesla_ver} +%dir %{_cross_datadir}/nvidia/tesla/module-objects.d +%dir %{_cross_factorydir}/nvidia/tesla # Service files for link/copy/loading drivers %{_cross_unitdir}/link-tesla-kernel-modules.service %{_cross_unitdir}/load-tesla-kernel-modules.service +%{_cross_unitdir}/copy-open-gpu-kernel-modules.service +%{_cross_unitdir}/load-open-gpu-kernel-modules.service # Binaries -%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-debugdump -%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-smi +%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-debugdump +%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-smi +%{_cross_libexecdir}/nvidia/tesla/bin/nv-fabricmanager +%{_cross_libexecdir}/nvidia/tesla/bin/nvswitch-audit + +# nvswitch topologies +%dir %{_cross_datadir}/nvidia/tesla/nvswitch +%{_cross_datadir}/nvidia/tesla/nvswitch/dgxa100_hgxa100_topology +%{_cross_datadir}/nvidia/tesla/nvswitch/dgx2_hgx2_topology +%{_cross_datadir}/nvidia/tesla/nvswitch/dgxh100_hgxh100_topology +%{_cross_datadir}/nvidia/tesla/nvswitch/dgxh800_hgxh800_topology # Configuration files -%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-tesla-%{tesla_470}.toml -%{_cross_factorydir}%{_cross_sysconfdir}/ld.so.conf.d/nvidia-tesla-%{tesla_470}.conf -%{_cross_factorydir}/nvidia/tesla/%{tesla_470}/nvidia-path.env +%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-tesla.toml +%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-open-gpu.toml +%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-open-gpu-copy-only.toml +%{_cross_factorydir}%{_cross_sysconfdir}/ld.so.conf.d/nvidia-tesla.conf +%{_cross_factorydir}/nvidia/tesla/nvidia-path.env +%{_cross_datadir}/nvidia/open-gpu-supported-devices.json # driver -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia.mod.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-interface.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-kernel.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia.mod.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-interface.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-kernel.o # uvm -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-uvm.mod.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-uvm.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-uvm.mod.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-uvm.o # modeset -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-modeset-interface.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-modeset-kernel.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-modeset.mod.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-modeset-interface.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-modeset-kernel.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-modeset.mod.o # tmpfiles -%{_cross_tmpfilesdir}/nvidia-tesla-%{tesla_470}.conf +%{_cross_tmpfilesdir}/nvidia-tesla.conf # We only install the libraries required by all the DRIVER_CAPABILITIES, described here: # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities # Utility libs -%{tesla_470_libdir}/libnvidia-ml.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-ml.so.1 -%{tesla_470_libdir}/libnvidia-cfg.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-cfg.so.1 -%{tesla_470_libdir}/libnvidia-nvvm.so.4.0.0 -%{tesla_470_libdir}/libnvidia-nvvm.so.4 +%{_cross_libdir}/nvidia/tesla/libnvidia-api.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-ml.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-ml.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-cfg.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-cfg.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-nvvm.so.4 +%{_cross_libdir}/nvidia/tesla/libnvidia-nvvm.so.%{tesla_ver} # Compute libs -%{tesla_470_libdir}/libcuda.so.%{tesla_470} -%{tesla_470_libdir}/libcuda.so.1 -%{tesla_470_libdir}/libnvidia-opencl.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-opencl.so.1 -%{tesla_470_libdir}/libnvidia-ptxjitcompiler.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-ptxjitcompiler.so.1 -%{tesla_470_libdir}/libnvidia-allocator.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-allocator.so.1 -%{tesla_470_libdir}/libOpenCL.so.1.0.0 -%{tesla_470_libdir}/libOpenCL.so.1 +%{_cross_libdir}/nvidia/tesla/libcuda.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libcuda.so.1 +%{_cross_libdir}/nvidia/tesla/libcudadebugger.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libcudadebugger.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-opencl.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-opencl.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-ptxjitcompiler.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-ptxjitcompiler.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-allocator.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-allocator.so.1 +%{_cross_libdir}/nvidia/tesla/libOpenCL.so.1.0.0 +%{_cross_libdir}/nvidia/tesla/libOpenCL.so.1 %if "%{_cross_arch}" == "x86_64" -%{tesla_470_libdir}/libnvidia-compiler.so.%{tesla_470} +%{_cross_libdir}/nvidia/tesla/libnvidia-pkcs11.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-pkcs11-openssl3.so.%{tesla_ver} %endif # Video libs -%{tesla_470_libdir}/libvdpau_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libvdpau_nvidia.so.1 -%{tesla_470_libdir}/libnvidia-encode.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-encode.so.1 -%{tesla_470_libdir}/libnvidia-opticalflow.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-opticalflow.so.1 -%{tesla_470_libdir}/libnvcuvid.so.%{tesla_470} -%{tesla_470_libdir}/libnvcuvid.so.1 +%{_cross_libdir}/nvidia/tesla/libvdpau_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libvdpau_nvidia.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-encode.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-encode.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-opticalflow.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-opticalflow.so.1 +%{_cross_libdir}/nvidia/tesla/libnvcuvid.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvcuvid.so.1 # Graphics libs -%{tesla_470_libdir}/libnvidia-eglcore.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-glcore.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-tls.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-glsi.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-rtcore.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-fbc.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-fbc.so.1 -%{tesla_470_libdir}/libnvoptix.so.%{tesla_470} -%{tesla_470_libdir}/libnvoptix.so.1 -%{tesla_470_libdir}/libnvidia-vulkan-producer.so.%{tesla_470} -%if "%{_cross_arch}" == "x86_64" -%{tesla_470_libdir}/libnvidia-ifr.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-ifr.so.1 -%endif +%{_cross_libdir}/nvidia/tesla/libnvidia-eglcore.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-glcore.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-tls.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-glsi.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-rtcore.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-fbc.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-fbc.so.1 +%{_cross_libdir}/nvidia/tesla/libnvoptix.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvoptix.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-vulkan-producer.so.%{tesla_ver} # Graphics GLVND libs -%{tesla_470_libdir}/libnvidia-glvkspirv.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-cbl.so.%{tesla_470} -%{tesla_470_libdir}/libGLX_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libGLX_nvidia.so.0 -%{tesla_470_libdir}/libEGL_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libEGL_nvidia.so.0 -%{tesla_470_libdir}/libGLESv2_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libGLESv2_nvidia.so.2 -%{tesla_470_libdir}/libGLESv1_CM_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libGLESv1_CM_nvidia.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-glvkspirv.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGLX_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGLX_nvidia.so.0 +%{_cross_libdir}/nvidia/tesla/libEGL_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libEGL_nvidia.so.0 +%{_cross_libdir}/nvidia/tesla/libGLESv2_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGLESv2_nvidia.so.2 +%{_cross_libdir}/nvidia/tesla/libGLESv1_CM_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGLESv1_CM_nvidia.so.1 # Graphics compat -%{tesla_470_libdir}/libEGL.so.1.1.0 -%{tesla_470_libdir}/libEGL.so.1 -%{tesla_470_libdir}/libEGL.so.%{tesla_470} -%{tesla_470_libdir}/libGL.so.1.7.0 -%{tesla_470_libdir}/libGL.so.1 -%{tesla_470_libdir}/libGLESv1_CM.so.1.2.0 -%{tesla_470_libdir}/libGLESv1_CM.so.1 -%{tesla_470_libdir}/libGLESv2.so.2.1.0 -%{tesla_470_libdir}/libGLESv2.so.2 +%{_cross_libdir}/nvidia/tesla/libEGL.so.1.1.0 +%{_cross_libdir}/nvidia/tesla/libEGL.so.1 +%{_cross_libdir}/nvidia/tesla/libEGL.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGL.so.1.7.0 +%{_cross_libdir}/nvidia/tesla/libGL.so.1 +%{_cross_libdir}/nvidia/tesla/libGLESv1_CM.so.1.2.0 +%{_cross_libdir}/nvidia/tesla/libGLESv1_CM.so.1 +%{_cross_libdir}/nvidia/tesla/libGLESv2.so.2.1.0 +%{_cross_libdir}/nvidia/tesla/libGLESv2.so.2 # NGX -%if "%{_cross_arch}" == "x86_64" -%{tesla_470_libdir}/libnvidia-ngx.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-ngx.so.1 -%endif +%{_cross_libdir}/nvidia/tesla/libnvidia-ngx.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-ngx.so.1 + +# Firmware +%{_cross_libdir}/firmware/nvidia/%{tesla_ver}/gsp_ga10x.bin +%{_cross_libdir}/firmware/nvidia/%{tesla_ver}/gsp_tu10x.bin # Neither nvidia-peermem nor nvidia-drm are included in driver container images, we exclude them # for now, and we will add them if requested -%exclude %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-peermem.mod.o -%exclude %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-peermem.o -%exclude %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-drm.mod.o -%exclude %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-drm.o -%exclude %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-cuda-mps-control -%exclude %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-cuda-mps-server +%exclude %{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-peermem.mod.o +%exclude %{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-peermem.o +%exclude %{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-drm.mod.o +%exclude %{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-drm.o +%exclude %{_cross_libexecdir}/nvidia/tesla/bin/nvidia-cuda-mps-control +%exclude %{_cross_libexecdir}/nvidia/tesla/bin/nvidia-cuda-mps-server %if "%{_cross_arch}" == "x86_64" -%exclude %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-ngx-updater +%exclude %{_cross_libexecdir}/nvidia/tesla/bin/nvidia-ngx-updater %endif # None of these libraries are required by libnvidia-container, so they # won't be used by a containerized workload -%exclude %{tesla_470_libdir}/libGLX.so.0 -%exclude %{tesla_470_libdir}/libGLdispatch.so.0 -%exclude %{tesla_470_libdir}/libOpenGL.so.0 -%exclude %{tesla_470_libdir}/libglxserver_nvidia.so.%{tesla_470} -%exclude %{tesla_470_libdir}/libnvidia-egl-wayland.so.1.1.7 -%exclude %{tesla_470_libdir}/libnvidia-gtk2.so.%{tesla_470} -%exclude %{tesla_470_libdir}/libnvidia-gtk3.so.%{tesla_470} -%exclude %{tesla_470_libdir}/nvidia_drv.so -%exclude %{tesla_470_libdir}/libnvidia-egl-wayland.so.1 +%exclude %{_cross_libdir}/nvidia/tesla/libGLX.so.0 +%exclude %{_cross_libdir}/nvidia/tesla/libGLdispatch.so.0 +%exclude %{_cross_libdir}/nvidia/tesla/libOpenGL.so.0 +%exclude %{_cross_libdir}/nvidia/tesla/libglxserver_nvidia.so.%{tesla_ver} +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-gtk2.so.%{tesla_ver} +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-gtk3.so.%{tesla_ver} +%exclude %{_cross_libdir}/nvidia/tesla/nvidia_drv.so +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-egl-wayland.so.1 +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-egl-gbm.so.1 +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-egl-gbm.so.1.1.0 +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-egl-wayland.so.1.1.11 +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-wayland-client.so.%{tesla_ver} + +%files open-gpu-%{tesla_major} +%license COPYING +%dir %{_cross_datadir}/nvidia/open-gpu/drivers +%dir %{_cross_factorydir}/nvidia/open-gpu + +# driver +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia.ko + +# uvm +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia-uvm.ko + +# modeset +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia-modeset.ko + +# drm +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia-drm.ko + +# peermem +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia-peermem.ko + +%files fabricmanager +%{_cross_factorydir}%{_cross_sysconfdir}/nvidia/fabricmanager.cfg +%{_cross_unitdir}/nvidia-fabricmanager.service diff --git a/packages/kmod-5.10-nvidia/link-tesla-kernel-modules.service.in b/packages/kmod-5.10-nvidia/link-tesla-kernel-modules.service.in index 79e5956b4..8fc779212 100644 --- a/packages/kmod-5.10-nvidia/link-tesla-kernel-modules.service.in +++ b/packages/kmod-5.10-nvidia/link-tesla-kernel-modules.service.in @@ -1,5 +1,5 @@ [Unit] -Description=Link additional kernel modules +Description=Link Tesla kernel modules RequiresMountsFor=PREFIX/lib/modules PREFIX/src/kernels # Rerunning this service after the system is fully loaded will override # the already linked kernel modules. This doesn't affect the running system, @@ -10,7 +10,8 @@ RefuseManualStop=true [Service] Type=oneshot -ExecStart=/usr/bin/driverdog link-modules +ExecCondition=/usr/bin/ghostdog match-nvidia-driver tesla +ExecStart=/usr/bin/driverdog --modules-set nvidia-tesla link-modules RemainAfterExit=true StandardError=journal+console diff --git a/packages/kmod-5.10-nvidia/load-open-gpu-kernel-modules.service.in b/packages/kmod-5.10-nvidia/load-open-gpu-kernel-modules.service.in new file mode 100644 index 000000000..3862b3e75 --- /dev/null +++ b/packages/kmod-5.10-nvidia/load-open-gpu-kernel-modules.service.in @@ -0,0 +1,19 @@ +[Unit] +Description=Load open GPU kernel modules +RequiresMountsFor=PREFIX/lib/modules PREFIX/src/kernels +After=copy-open-gpu-kernel-modules.service +Requires=copy-open-gpu-kernel-modules.service +# Disable manual restarts to prevent loading kernel modules +# that weren't linked by the running system +RefuseManualStart=true +RefuseManualStop=true + +[Service] +Type=oneshot +ExecCondition=/usr/bin/ghostdog match-nvidia-driver open-gpu +ExecStart=/usr/bin/driverdog --modules-set nvidia-open-gpu load-modules +RemainAfterExit=true +StandardError=journal+console + +[Install] +RequiredBy=preconfigured.target diff --git a/packages/kmod-5.10-nvidia/load-tesla-kernel-modules.service.in b/packages/kmod-5.10-nvidia/load-tesla-kernel-modules.service.in index 3e4128603..60024004c 100644 --- a/packages/kmod-5.10-nvidia/load-tesla-kernel-modules.service.in +++ b/packages/kmod-5.10-nvidia/load-tesla-kernel-modules.service.in @@ -1,5 +1,5 @@ [Unit] -Description=Load additional kernel modules +Description=Load Tesla kernel modules RequiresMountsFor=PREFIX/lib/modules PREFIX/src/kernels After=link-tesla-kernel-modules.service Requires=link-tesla-kernel-modules.service @@ -10,7 +10,8 @@ RefuseManualStop=true [Service] Type=oneshot -ExecStart=/usr/bin/driverdog load-modules +ExecCondition=/usr/bin/ghostdog match-nvidia-driver tesla +ExecStart=/usr/bin/driverdog --modules-set nvidia-tesla load-modules RemainAfterExit=true StandardError=journal+console diff --git a/packages/kmod-5.10-nvidia/nvidia-fabricmanager.cfg b/packages/kmod-5.10-nvidia/nvidia-fabricmanager.cfg new file mode 100644 index 000000000..f8dc08ea4 --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-fabricmanager.cfg @@ -0,0 +1,34 @@ +# Modern, systemd-aware settings: +# - Log to journal via stderr +# - Keep running in the foreground +LOG_LEVEL=4 +LOG_FILE_NAME= +DAEMONIZE=0 + +# Use Unix domain sockets instead of localhost ports. +UNIX_SOCKET_PATH=/run/nvidia/fabricmanager.sock +FM_CMD_UNIX_SOCKET_PATH=/run/nvidia/fabricmanager-cmd.sock + +# Start Fabric Manager in bare metal or full pass through virtualization mode. +FABRIC_MODE=0 +FABRIC_MODE_RESTART=0 + +# Terminate on NVSwitch and GPU config failure. +FM_STAY_RESIDENT_ON_FAILURES=0 + +# When there is a GPU to NVSwitch NVLink failure, remove the GPU with the failure +# from NVLink P2P capability. +ACCESS_LINK_FAILURE_MODE=0 + +# When there is an NVSwitch to NVSwitch NVLink failure, exit Fabric Manager. +TRUNK_LINK_FAILURE_MODE=0 + +# When there is an NVSwitch failure or an NVSwitch is excluded, abort Fabric Manager. +NVSWITCH_FAILURE_MODE=0 + +# When Fabric Manager service is stopped or terminated, abort all running CUDA jobs. +ABORT_CUDA_JOBS_ON_FM_EXIT=1 + +# Path to topology and database files. +TOPOLOGY_FILE_PATH=/usr/share/nvidia/tesla/nvswitch +DATABASE_PATH=/usr/share/nvidia/tesla/nvswitch diff --git a/packages/kmod-5.10-nvidia/nvidia-fabricmanager.service b/packages/kmod-5.10-nvidia/nvidia-fabricmanager.service new file mode 100644 index 000000000..62ae1368d --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-fabricmanager.service @@ -0,0 +1,16 @@ +[Unit] +Description=NVIDIA fabric manager service + +[Service] +ExecStart=/usr/libexec/nvidia/tesla/bin/nv-fabricmanager -c /etc/nvidia/fabricmanager.cfg +Type=simple +TimeoutSec=0 +RestartSec=5 +Restart=always +RemainAfterExit=true +StandardError=journal+console +SuccessExitStatus=255 +LimitCORE=infinity + +[Install] +WantedBy=multi-user.target diff --git a/packages/kmod-5.10-nvidia/nvidia-ld.so.conf.in b/packages/kmod-5.10-nvidia/nvidia-ld.so.conf.in index a07b0ccbb..f992bf226 100644 --- a/packages/kmod-5.10-nvidia/nvidia-ld.so.conf.in +++ b/packages/kmod-5.10-nvidia/nvidia-ld.so.conf.in @@ -1 +1 @@ -__LIBDIR__/nvidia/tesla/__NVIDIA_VERSION__/ +__LIBDIR__/nvidia/tesla/ diff --git a/packages/kmod-5.10-nvidia/nvidia-open-gpu-config.toml.in b/packages/kmod-5.10-nvidia/nvidia-open-gpu-config.toml.in new file mode 100644 index 000000000..5ae81b716 --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-open-gpu-config.toml.in @@ -0,0 +1,11 @@ +[nvidia-open-gpu] +lib-modules-path = "kernel/drivers/extra/video/nvidia/open-gpu" + +[nvidia-open-gpu.kernel-modules."nvidia.ko"] +copy-source = "__NVIDIA_MODULES__" + +[nvidia-open-gpu.kernel-modules."nvidia-modeset.ko"] +copy-source = "__NVIDIA_MODULES__" + +[nvidia-open-gpu.kernel-modules."nvidia-uvm.ko"] +copy-source = "__NVIDIA_MODULES__" diff --git a/packages/kmod-5.10-nvidia/nvidia-open-gpu-copy-only-config.toml.in b/packages/kmod-5.10-nvidia/nvidia-open-gpu-copy-only-config.toml.in new file mode 100644 index 000000000..774867d42 --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-open-gpu-copy-only-config.toml.in @@ -0,0 +1,8 @@ +[nvidia-open-gpu-copy-only] +lib-modules-path = "kernel/drivers/extra/video/nvidia/open-gpu" + +[nvidia-open-gpu-copy-only.kernel-modules."nvidia-drm.ko"] +copy-source = "__NVIDIA_MODULES__" + +[nvidia-open-gpu-copy-only.kernel-modules."nvidia-peermem.ko"] +copy-source = "__NVIDIA_MODULES__" diff --git a/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf b/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf new file mode 100644 index 000000000..fd0f44863 --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf @@ -0,0 +1,5 @@ +C /etc/drivers/nvidia-tesla.toml +C /etc/drivers/nvidia-open-gpu.toml +C /etc/drivers/nvidia-open-gpu-copy-only.toml +C /etc/containerd/nvidia.env - - - - /usr/share/factory/nvidia/tesla/nvidia-path.env +C /etc/ld.so.conf.d/nvidia-tesla.conf diff --git a/packages/kmod-5.10-nvidia/nvidia-tmpfiles.conf.in b/packages/kmod-5.10-nvidia/nvidia-tmpfiles.conf.in index d4763f280..3d3bbc489 100644 --- a/packages/kmod-5.10-nvidia/nvidia-tmpfiles.conf.in +++ b/packages/kmod-5.10-nvidia/nvidia-tmpfiles.conf.in @@ -1,2 +1,6 @@ R __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/tesla - - - - - d __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/tesla 0755 root root - - +R __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/open-gpu - - - - - +d __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/open-gpu 0755 root root - - +C /etc/nvidia/fabricmanager.cfg - - - - +d /run/nvidia 0700 root root -