diff --git a/packages/kmod-5.10-nvidia/.gitignore b/packages/kmod-5.10-nvidia/.gitignore index 0bcfb52fd..db8b415b2 100644 --- a/packages/kmod-5.10-nvidia/.gitignore +++ b/packages/kmod-5.10-nvidia/.gitignore @@ -1 +1,3 @@ NVidiaEULAforAWS.pdf +COPYING +*.rpm diff --git a/packages/kmod-5.10-nvidia/Cargo.toml b/packages/kmod-5.10-nvidia/Cargo.toml index 07cc9b724..785a9812b 100644 --- a/packages/kmod-5.10-nvidia/Cargo.toml +++ b/packages/kmod-5.10-nvidia/Cargo.toml @@ -17,13 +17,28 @@ url = "https://s3.amazonaws.com/EULA/NVidiaEULAforAWS.pdf" sha512 = "e1926fe99afc3ab5b2f2744fcd53b4046465aefb2793e2e06c4a19455a3fde895e00af1415ff1a5804c32e6a2ed0657e475de63da6c23a0e9c59feeef52f3f58" [[package.metadata.build-package.external-files]] -url = "https://us.download.nvidia.com/tesla/470.256.02/NVIDIA-Linux-x86_64-470.256.02.run" -sha512 = "a837946dd24d7945c1962a695f1f31965f3ceb6927f52cd08fd51b8db138b7a888bbeab69243f5c8468a7bd7ccd47f5dbdb48a1ca81264866c1ebb7d88628f88" +url = "https://us.download.nvidia.com/tesla/535.183.06/NVIDIA-Linux-x86_64-535.183.06.run" +sha512 = "424950ef303ea39499e96f8c90c1e0c83aee12309779d4f335769ef554ad4f7c38e98f69c64b408adc85a7cf51ea600d85222792402b9c6b7941f1af066d2a33" force-upstream = true [[package.metadata.build-package.external-files]] -url = "https://us.download.nvidia.com/tesla/470.256.02/NVIDIA-Linux-aarch64-470.256.02.run" -sha512 = "38eee5933355c34ca816a2ac0fbc4f55c19c20e1322891bfc98cb6b37d99a31218eea9314877ab0e3cf3ac6eb61f9d9d4d09d0af304b689f18b4efa721b65d5c" +url = "https://us.download.nvidia.com/tesla/535.183.06/NVIDIA-Linux-aarch64-535.183.06.run" +sha512 = "bb305f1703557461b0a0a29066c304658d9684841104c6f4d9ff44f9db90fee14ae619cd2fe3242823a5fe3a69b168b8174b163740014b15cdef36db88ba2d96" +force-upstream = true + +[[package.metadata.build-package.external-files]] +url = "https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/nvidia-fabric-manager-535.183.06-1.x86_64.rpm" +sha512 = "c3d98878363f857b2963665a0e485cb7b1afeaabd0040a970478d00ffb870ab4130ab9dfe1b7a40d1b38734636ebccec39fd1b3fc8c06abc5c07470f749b6025" +force-upstream = true + +[[package.metadata.build-package.external-files]] +url = "https://developer.download.nvidia.com/compute/cuda/repos/rhel9/sbsa/nvidia-fabric-manager-535.183.06-1.aarch64.rpm" +sha512 = "6a646cd7ea11e668f7dbe6f6bb22516107a856e3c3755f8693c91d4bed706b8b3667b853f07e84c2d0da4de7ab1107337b6a1493879d75d8c201bfe9da071b32" +force-upstream = true + +[[package.metadata.build-package.external-files]] +url = "https://raw.githubusercontent.com/NVIDIA/open-gpu-kernel-modules/535/COPYING" +sha512 = "f9cee68cbb12095af4b4e92d01c210461789ef41c70b64efefd6719d0b88468b7a67a3629c432d4d9304c730b5d1a942228a5bcc74a03ab1c411c77c758cd938" force-upstream = true [build-dependencies] diff --git a/packages/kmod-5.10-nvidia/copy-open-gpu-kernel-modules.service.in b/packages/kmod-5.10-nvidia/copy-open-gpu-kernel-modules.service.in new file mode 100644 index 000000000..2c3420b61 --- /dev/null +++ b/packages/kmod-5.10-nvidia/copy-open-gpu-kernel-modules.service.in @@ -0,0 +1,20 @@ +[Unit] +Description=Copy open GPU kernel modules +RequiresMountsFor=PREFIX/lib/modules PREFIX/src/kernels +# Rerunning this service after the system is fully loaded will override +# the already linked kernel modules. This doesn't affect the running system, +# since kernel modules are linked early in the boot sequence, but we still +# disable manual restarts to prevent unnecessary kernel modules rewrites. +RefuseManualStart=true +RefuseManualStop=true + +[Service] +Type=oneshot +ExecCondition=/usr/bin/ghostdog match-nvidia-driver open-gpu +ExecStart=/usr/bin/driverdog --modules-set nvidia-open-gpu link-modules +ExecStart=/usr/bin/driverdog --modules-set nvidia-open-gpu-copy-only link-modules +RemainAfterExit=true +StandardError=journal+console + +[Install] +RequiredBy=preconfigured.target diff --git a/packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec b/packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec index a12a815a3..5af1c33f6 100644 --- a/packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec +++ b/packages/kmod-5.10-nvidia/kmod-5.10-nvidia.spec @@ -1,6 +1,19 @@ -%global tesla_470 470.256.02 -%global tesla_470_libdir %{_cross_libdir}/nvidia/tesla/%{tesla_470} -%global tesla_470_bindir %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} +%global tesla_major 535 +%global tesla_minor 183 +%global tesla_patch 06 +%global tesla_ver %{tesla_major}.%{tesla_minor}.%{tesla_patch} +%if "%{?_cross_arch}" == "aarch64" +%global fm_arch sbsa +%else +%global fm_arch %{_cross_arch} +%endif + +# With the split of the firmware binary from firmware/gsp.bin to firmware/gsp_ga10x.bin +# and firmware/gsp_tu10x.bin the file format changed from executable to relocatable. +# The __spec_install_post macro will by default try to strip all binary files. +# Unfortunately the strip used is not compatible with the new file format. +# Redefine strip, so that these firmware binaries do not derail the build. +%global __strip /usr/bin/true Name: %{_cross_os}kmod-5.10-nvidia Version: 1.0.0 @@ -11,24 +24,36 @@ Summary: NVIDIA drivers for the 5.10 kernel License: Apache-2.0 OR MIT URL: http://www.nvidia.com/ -# NVIDIA .run scripts from 0 to 199 -Source0: https://us.download.nvidia.com/tesla/%{tesla_470}/NVIDIA-Linux-x86_64-%{tesla_470}.run -Source1: https://us.download.nvidia.com/tesla/%{tesla_470}/NVIDIA-Linux-aarch64-%{tesla_470}.run +# NVIDIA archives from 0 to 199 +# NVIDIA .run scripts for kernel and userspace drivers +Source0: https://us.download.nvidia.com/tesla/%{tesla_ver}/NVIDIA-Linux-x86_64-%{tesla_ver}.run +Source1: https://us.download.nvidia.com/tesla/%{tesla_ver}/NVIDIA-Linux-aarch64-%{tesla_ver}.run Source2: NVidiaEULAforAWS.pdf +Source3: COPYING + +# fabricmanager for NVSwitch +Source10: https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/nvidia-fabric-manager-%{tesla_ver}-1.x86_64.rpm +Source11: https://developer.download.nvidia.com/compute/cuda/repos/rhel9/sbsa/nvidia-fabric-manager-%{tesla_ver}-1.aarch64.rpm # Common NVIDIA conf files from 200 to 299 Source200: nvidia-tmpfiles.conf.in Source202: nvidia-dependencies-modules-load.conf -Source203: nvidia-sysusers.conf -Source204: nvidia-persistenced.service.in +Source203: nvidia-fabricmanager.service +Source204: nvidia-fabricmanager.cfg +Source205: nvidia-sysusers.conf +Source206: nvidia-persistenced.service # NVIDIA tesla conf files from 300 to 399 -Source300: nvidia-tesla-tmpfiles.conf.in +Source300: nvidia-tesla-tmpfiles.conf Source301: nvidia-tesla-build-config.toml.in -Source302: nvidia-tesla-path.env.in -Source303: nvidia-ld.so.conf.in -Source304: link-tesla-kernel-modules.service.in -Source305: load-tesla-kernel-modules.service.in +Source302: nvidia-open-gpu-config.toml.in +Source303: nvidia-open-gpu-copy-only-config.toml.in +Source304: nvidia-tesla-path.env.in +Source305: nvidia-ld.so.conf.in +Source306: link-tesla-kernel-modules.service.in +Source307: load-tesla-kernel-modules.service.in +Source308: copy-open-gpu-kernel-modules.service.in +Source309: load-open-gpu-kernel-modules.service.in BuildRequires: %{_cross_os}glibc-devel BuildRequires: %{_cross_os}kernel-5.10-archive @@ -36,33 +61,71 @@ BuildRequires: %{_cross_os}kernel-5.10-archive %description %{summary}. -%package tesla-470 -Summary: NVIDIA 470 Tesla driver -Version: %{tesla_470} +%package fabricmanager +Summary: NVIDIA fabricmanager config and service files +Requires: %{name}-tesla(fabricmanager) + +%description fabricmanager +%{summary}. + +%package open-gpu-%{tesla_major} +Summary: NVIDIA %{tesla_major} Open GPU driver +Version: %{tesla_ver} +License: MIT OR GPL-2.0-only +Requires: %{_cross_os}variant-platform(aws) + +%description open-gpu-%{tesla_major} +%{summary}. + +%package tesla-%{tesla_major} +Summary: NVIDIA %{tesla_major} Tesla driver +Version: %{tesla_ver} License: LicenseRef-NVIDIA-AWS-EULA Requires: %{_cross_os}variant-platform(aws) Requires: %{name} +Requires: %{name}-fabricmanager +Provides: %{name}-tesla(fabricmanager) +Requires: %{name}-open-gpu-%{tesla_major} -%description tesla-470 +%description tesla-%{tesla_major} %{summary} %prep # Extract nvidia sources with `-x`, otherwise the script will try to install # the driver in the current run -sh %{_sourcedir}/NVIDIA-Linux-%{_cross_arch}-%{tesla_470}.run -x +sh %{_sourcedir}/NVIDIA-Linux-%{_cross_arch}-%{tesla_ver}.run -x + +# Extract fabricmanager from the rpm via cpio rather than `%%setup` since the +# correct source is architecture-dependent. +mkdir fabricmanager-linux-%{fm_arch}-%{tesla_ver}-archive +rpm2cpio %{_sourcedir}/nvidia-fabric-manager-%{tesla_ver}-1.%{_cross_arch}.rpm | cpio -idmV -D fabricmanager-linux-%{fm_arch}-%{tesla_ver}-archive # Add the license. install -p -m 0644 %{S:2} . +install -p -m 0644 %{S:3} . %global kernel_sources %{_builddir}/kernel-devel tar -xf %{_cross_datadir}/bottlerocket/kernel-devel.tar.xz -%build -pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_470}/kernel +%define _kernel_version %(ls %{kernel_sources}/include/config/kernel.release) +%global _cross_kmoddir %{_cross_libdir}/modules/%{_kernel_version} # This recipe was based in the NVIDIA yum/dnf specs: # https://github.com/NVIDIA/yum-packaging-precompiled-kmod +# Begin open driver build +pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_ver}/kernel-open + +# We set IGNORE_CC_MISMATCH even though we are using the same compiler used to compile the kernel, if +# we don't set this flag the compilation fails +make %{?_smp_mflags} ARCH=%{_cross_karch} IGNORE_CC_MISMATCH=1 SYSSRC=%{kernel_sources} CC=%{_cross_target}-gcc LD=%{_cross_target}-ld + +# end open driver build +popd + +# Begin proprietary driver build +pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_ver}/kernel + # We set IGNORE_CC_MISMATCH even though we are using the same compiler used to compile the kernel, if # we don't set this flag the compilation fails make %{?_smp_mflags} ARCH=%{_cross_karch} IGNORE_CC_MISMATCH=1 SYSSRC=%{kernel_sources} CC=%{_cross_target}-gcc LD=%{_cross_target}-ld @@ -81,6 +144,29 @@ rm nvidia{,-modeset,-peermem}.o # don't include any linked module in the base image rm nvidia{,-modeset,-peermem,-drm}.ko +# End proprietary driver build +popd + +# Grab the list of supported devices +pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_ver}/supported-gpus +# We want to grab all the `kernelopen` enabled chips except for this list that is best held back to the proprietary driver +# 10de:1db1 is V100-16G (P3dn) +# 10de:1db5 is V100-32G (P3dn) +# 10de:1eb8 is T4 (G4dn) +# 10de:1eb4 is T4G (G5g) +# 10de:2237 is A10G (G5) +# 10de:27b8 is L4 (G6) +# 10de:26b9 is L40S (G6e) +jq -r '.chips[] | select(.features[] | contains("kernelopen")) | +select(.devid != "0x1DB1" +and .devid != "0x1DB5" +and .devid != "0x1DEB8" +and .devid != "0x1EB4" +and .devid != "0x2237" +and .devid != "0x27B8" +and .devid != "0x26B9")' supported-gpus.json | jq -s '{"open-gpu": .}' > open-gpu-supported-devices.json +# confirm "NVIDIA H100" is in the resulting file to catch shape changes +jq -e '."open-gpu"[] | select(."devid" == "0x2330") | ."features"| index("kernelopen")' open-gpu-supported-devices.json popd %install @@ -88,8 +174,9 @@ install -d %{buildroot}%{_cross_libexecdir} install -d %{buildroot}%{_cross_libdir} install -d %{buildroot}%{_cross_tmpfilesdir} install -d %{buildroot}%{_cross_unitdir} -install -d %{buildroot}%{_cross_bindir} install -d %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/{drivers,ld.so.conf.d} +install -d %{buildroot}%{_cross_sysusersdir} +install -d %{buildroot}%{_cross_bindir} KERNEL_VERSION=$(cat %{kernel_sources}/include/config/kernel.release) sed \ @@ -101,94 +188,147 @@ install -p -m 0644 nvidia.conf %{buildroot}%{_cross_tmpfilesdir} install -d %{buildroot}%{_cross_libdir}/modules-load.d install -p -m 0644 %{S:202} %{buildroot}%{_cross_libdir}/modules-load.d/nvidia-dependencies.conf -# Begin NVIDIA tesla 470 -pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_470} -# We install bins and libs in a versioned directory to prevent collisions with future drivers versions -install -d %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -d %{buildroot}%{tesla_470_libdir} -install -d %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install -d %{buildroot}%{_cross_factorydir}/nvidia/tesla/%{tesla_470} -install -d %{buildroot}%{_cross_sysusersdir} - -sed -e 's|__NVIDIA_VERSION__|%{tesla_470}|' %{S:300} > nvidia-tesla-%{tesla_470}.conf -install -m 0644 nvidia-tesla-%{tesla_470}.conf %{buildroot}%{_cross_tmpfilesdir}/ -sed -e 's|__NVIDIA_MODULES__|%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/|' %{S:301} > \ - nvidia-tesla-%{tesla_470}.toml -install -m 0644 nvidia-tesla-%{tesla_470}.toml %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/drivers +# NVIDIA fabric manager service unit and config +install -p -m 0644 %{S:203} %{buildroot}%{_cross_unitdir} +install -d %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/nvidia +install -p -m 0644 %{S:204} %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/nvidia/fabricmanager.cfg + +# Begin NVIDIA tesla driver +pushd NVIDIA-Linux-%{_cross_arch}-%{tesla_ver} +# Proprietary driver +install -d %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -d %{buildroot}%{_cross_libdir}/nvidia/tesla +install -d %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install -d %{buildroot}%{_cross_factorydir}/nvidia/tesla +install -d %{buildroot}%{_cross_factorydir}/nvidia/open-gpu +install -d %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers + +install -m 0644 %{S:300} %{buildroot}%{_cross_tmpfilesdir}/nvidia-tesla.conf +sed -e 's|__NVIDIA_MODULES__|%{_cross_datadir}/nvidia/tesla/module-objects.d/|' %{S:301} > \ + nvidia-tesla.toml +install -m 0644 nvidia-tesla.toml %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/drivers +sed -e 's|__NVIDIA_MODULES__|%{_cross_datadir}/nvidia/open-gpu/drivers/|' %{S:302} > \ + nvidia-open-gpu.toml +install -m 0644 nvidia-open-gpu.toml %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/drivers +sed -e 's|__NVIDIA_MODULES__|%{_cross_datadir}/nvidia/open-gpu/drivers/|' %{S:303} > \ + nvidia-open-gpu-copy-only.toml +install -m 0644 nvidia-open-gpu-copy-only.toml %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/drivers # Install nvidia-path environment file, will be used as a drop-in for containerd.service since # libnvidia-container locates and mounts helper binaries into the containers from either # `PATH` or `NVIDIA_PATH` -sed -e 's|__NVIDIA_BINDIR__|%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}|' %{S:302} > nvidia-path.env -install -m 0644 nvidia-path.env %{buildroot}%{_cross_factorydir}/nvidia/tesla/%{tesla_470} -# We need to add `_cross_libdir/tesla_470` to the paths loaded by the ldconfig service +sed -e 's|__NVIDIA_BINDIR__|%{_cross_libexecdir}/nvidia/tesla/bin|' %{S:304} > nvidia-path.env +install -m 0644 nvidia-path.env %{buildroot}%{_cross_factorydir}/nvidia/tesla +# We need to add `_cross_libdir` to the paths loaded by the ldconfig service # because libnvidia-container uses the `ldcache` file created by the service, to locate and mount the # libraries into the containers -sed -e 's|__LIBDIR__|%{_cross_libdir}|' %{S:303} | sed -e 's|__NVIDIA_VERSION__|%{tesla_470}|' \ - > nvidia-tesla-%{tesla_470}.conf -install -m 0644 nvidia-tesla-%{tesla_470}.conf %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/ld.so.conf.d/ +sed -e 's|__LIBDIR__|%{_cross_libdir}|' %{S:305} > nvidia-tesla.conf +install -m 0644 nvidia-tesla.conf %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/ld.so.conf.d/ # Services to link/copy/load modules -sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:304} > link-tesla-kernel-modules.service -sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:305} > load-tesla-kernel-modules.service +sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:306} > link-tesla-kernel-modules.service +sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:307} > load-tesla-kernel-modules.service install -p -m 0644 \ link-tesla-kernel-modules.service \ load-tesla-kernel-modules.service \ %{buildroot}%{_cross_unitdir} -# driver -install kernel/nvidia.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia/nv-interface.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia/nv-kernel.o_binary %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-kernel.o +sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:308} > copy-open-gpu-kernel-modules.service +sed -e 's|PREFIX|%{_cross_prefix}|g' %{S:309} > load-open-gpu-kernel-modules.service +install -p -m 0644 \ + copy-open-gpu-kernel-modules.service \ + load-open-gpu-kernel-modules.service \ + %{buildroot}%{_cross_unitdir} + +# proprietary driver +install kernel/nvidia.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia/nv-interface.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia/nv-kernel.o_binary %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-kernel.o # uvm -install kernel/nvidia-uvm.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-uvm.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d +install kernel/nvidia-uvm.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-uvm.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d # modeset -install kernel/nvidia-modeset.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-modeset/nv-modeset-interface.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-modeset/nv-modeset-kernel.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d +install kernel/nvidia-modeset.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-modeset/nv-modeset-interface.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-modeset/nv-modeset-kernel.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d # peermem -install kernel/nvidia-peermem.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-peermem/nvidia-peermem.o %{buildroot}%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d +install kernel/nvidia-peermem.mod.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-peermem/nvidia-peermem.o %{buildroot}%{_cross_datadir}/nvidia/tesla/module-objects.d # drm -install kernel/nvidia-drm.mod.o %{buildroot}/%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -install kernel/nvidia-drm.o %{buildroot}/%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d +install kernel/nvidia-drm.mod.o %{buildroot}/%{_cross_datadir}/nvidia/tesla/module-objects.d +install kernel/nvidia-drm.o %{buildroot}/%{_cross_datadir}/nvidia/tesla/module-objects.d + +# open driver +install -d %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ +install kernel-open/nvidia.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ + +# uvm +install kernel-open/nvidia-uvm.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ + +# modeset +install kernel-open/nvidia-modeset.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ + +# peermem +install kernel-open/nvidia-peermem.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ + +# drm +install kernel-open/nvidia-drm.ko %{buildroot}%{_cross_datadir}/nvidia/open-gpu/drivers/ +# end open driver # Binaries -install -m 755 nvidia-smi %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -m 755 nvidia-debugdump %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -m 755 nvidia-cuda-mps-control %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -m 755 nvidia-cuda-mps-server %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -install -m 755 nvidia-persistenced %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} +install -m 755 nvidia-smi %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -m 755 nvidia-debugdump %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -m 755 nvidia-cuda-mps-control %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -m 755 nvidia-cuda-mps-server %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -m 755 nvidia-persistenced %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/ install -m 4755 nvidia-modprobe %{buildroot}%{_cross_bindir} %if "%{_cross_arch}" == "x86_64" -install -m 755 nvidia-ngx-updater %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} +install -m 755 nvidia-ngx-updater %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin %endif # Users -install -m 0644 %{S:203} %{buildroot}%{_cross_sysusersdir}/nvidia.conf +install -m 0644 %{S:205} %{buildroot}%{_cross_sysusersdir}/nvidia.conf # Systemd units -sed -e 's|__NVIDIA_BINDIR__|%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}|' %{S:204} > nvidia-persistenced.service -install -m 0644 nvidia-persistenced.service %{buildroot}%{_cross_unitdir} +install -m 0644 %{S:206} %{buildroot}%{_cross_unitdir} # We install all the libraries, and filter them out in the 'files' section, so we can catch # when new libraries are added -install -m 755 *.so* %{buildroot}/%{tesla_470_libdir}/ +install -m 755 *.so* %{buildroot}/%{_cross_libdir}/nvidia/tesla/ # This library has the same SONAME as libEGL.so.1.1.0, this will cause collisions while # the symlinks are created. For now, we only symlink libEGL.so.1.1.0. -EXCLUDED_LIBS="libEGL.so.%{tesla_470}" +EXCLUDED_LIBS="libEGL.so.%{tesla_ver}" for lib in $(find . -maxdepth 1 -type f -name 'lib*.so.*' -printf '%%P\n'); do [[ "${EXCLUDED_LIBS}" =~ "${lib}" ]] && continue soname="$(%{_cross_target}-readelf -d "${lib}" | awk '/SONAME/{print $5}' | tr -d '[]')" [ -n "${soname}" ] || continue [ "${lib}" == "${soname}" ] && continue - ln -s "${lib}" %{buildroot}/%{tesla_470_libdir}/"${soname}" + ln -s "${lib}" %{buildroot}/%{_cross_libdir}/nvidia/tesla/"${soname}" +done + +# Include the firmware file for GSP support +install -d %{buildroot}%{_cross_libdir}/firmware/nvidia/%{tesla_ver} +install -p -m 0644 firmware/gsp_ga10x.bin %{buildroot}%{_cross_libdir}/firmware/nvidia/%{tesla_ver} +install -p -m 0644 firmware/gsp_tu10x.bin %{buildroot}%{_cross_libdir}/firmware/nvidia/%{tesla_ver} + +# Include the open driver supported devices file for runtime matching of the driver. This is consumed by ghostdog to match the driver to this list +install -p -m 0644 supported-gpus/open-gpu-supported-devices.json %{buildroot}%{_cross_datadir}/nvidia/open-gpu-supported-devices.json + +popd + +# Begin NVIDIA fabric manager binaries and topologies +pushd fabricmanager-linux-%{fm_arch}-%{tesla_ver}-archive +install -p -m 0755 usr/bin/nv-fabricmanager %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin +install -p -m 0755 usr/bin/nvswitch-audit %{buildroot}%{_cross_libexecdir}/nvidia/tesla/bin + +install -d %{buildroot}%{_cross_datadir}/nvidia/tesla/nvswitch +for t in usr/share/nvidia/nvswitch/*_topology ; do + install -p -m 0644 "${t}" %{buildroot}%{_cross_datadir}/nvidia/tesla/nvswitch done popd @@ -200,49 +340,65 @@ popd %dir %{_cross_datadir}/nvidia %dir %{_cross_libdir}/modules-load.d %dir %{_cross_factorydir}%{_cross_sysconfdir}/drivers +%dir %{_cross_factorydir}%{_cross_sysconfdir}/nvidia %{_cross_tmpfilesdir}/nvidia.conf -%{_cross_libdir}/systemd/system/ %{_cross_libdir}/modules-load.d/nvidia-dependencies.conf -%files tesla-470 +%files tesla-%{tesla_major} %license NVidiaEULAforAWS.pdf -%dir %{_cross_datadir}/nvidia/tesla/%{tesla_470} -%dir %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470} -%dir %{tesla_470_libdir} -%dir %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d -%dir %{_cross_factorydir}/nvidia/tesla/%{tesla_470} +%license fabricmanager-linux-%{fm_arch}-%{tesla_ver}-archive/usr/share/doc/nvidia-fabricmanager/third-party-notices.txt +%dir %{_cross_datadir}/nvidia/tesla +%dir %{_cross_libexecdir}/nvidia/tesla/bin +%dir %{_cross_libdir}/nvidia/tesla +%dir %{_cross_libdir}/firmware/nvidia/%{tesla_ver} +%dir %{_cross_datadir}/nvidia/tesla/module-objects.d +%dir %{_cross_factorydir}/nvidia/tesla # Service files for link/copy/loading drivers %{_cross_unitdir}/link-tesla-kernel-modules.service %{_cross_unitdir}/load-tesla-kernel-modules.service +%{_cross_unitdir}/copy-open-gpu-kernel-modules.service +%{_cross_unitdir}/load-open-gpu-kernel-modules.service # Binaries -%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-debugdump -%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-smi -%{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-persistenced +%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-debugdump +%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-smi +%{_cross_libexecdir}/nvidia/tesla/bin/nv-fabricmanager +%{_cross_libexecdir}/nvidia/tesla/bin/nvswitch-audit +%{_cross_libexecdir}/nvidia/tesla/bin/nvidia-persistenced %{_cross_bindir}/nvidia-modprobe +# nvswitch topologies +%dir %{_cross_datadir}/nvidia/tesla/nvswitch +%{_cross_datadir}/nvidia/tesla/nvswitch/dgxa100_hgxa100_topology +%{_cross_datadir}/nvidia/tesla/nvswitch/dgx2_hgx2_topology +%{_cross_datadir}/nvidia/tesla/nvswitch/dgxh100_hgxh100_topology +%{_cross_datadir}/nvidia/tesla/nvswitch/dgxh800_hgxh800_topology + # Configuration files -%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-tesla-%{tesla_470}.toml -%{_cross_factorydir}%{_cross_sysconfdir}/ld.so.conf.d/nvidia-tesla-%{tesla_470}.conf -%{_cross_factorydir}/nvidia/tesla/%{tesla_470}/nvidia-path.env +%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-tesla.toml +%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-open-gpu.toml +%{_cross_factorydir}%{_cross_sysconfdir}/drivers/nvidia-open-gpu-copy-only.toml +%{_cross_factorydir}%{_cross_sysconfdir}/ld.so.conf.d/nvidia-tesla.conf +%{_cross_factorydir}/nvidia/tesla/nvidia-path.env +%{_cross_datadir}/nvidia/open-gpu-supported-devices.json # driver -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia.mod.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-interface.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-kernel.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia.mod.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-interface.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-kernel.o # uvm -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-uvm.mod.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-uvm.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-uvm.mod.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-uvm.o # modeset -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-modeset-interface.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nv-modeset-kernel.o -%{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-modeset.mod.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-modeset-interface.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nv-modeset-kernel.o +%{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-modeset.mod.o # tmpfiles -%{_cross_tmpfilesdir}/nvidia-tesla-%{tesla_470}.conf +%{_cross_tmpfilesdir}/nvidia-tesla.conf # sysuser files %{_cross_sysusersdir}/nvidia.conf @@ -254,103 +410,131 @@ popd # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities # Utility libs -%{tesla_470_libdir}/libnvidia-ml.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-ml.so.1 -%{tesla_470_libdir}/libnvidia-cfg.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-cfg.so.1 -%{tesla_470_libdir}/libnvidia-nvvm.so.4.0.0 -%{tesla_470_libdir}/libnvidia-nvvm.so.4 +%{_cross_libdir}/nvidia/tesla/libnvidia-api.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-ml.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-ml.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-cfg.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-cfg.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-nvvm.so.4 +%{_cross_libdir}/nvidia/tesla/libnvidia-nvvm.so.%{tesla_ver} # Compute libs -%{tesla_470_libdir}/libcuda.so.%{tesla_470} -%{tesla_470_libdir}/libcuda.so.1 -%{tesla_470_libdir}/libnvidia-opencl.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-opencl.so.1 -%{tesla_470_libdir}/libnvidia-ptxjitcompiler.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-ptxjitcompiler.so.1 -%{tesla_470_libdir}/libnvidia-allocator.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-allocator.so.1 -%{tesla_470_libdir}/libOpenCL.so.1.0.0 -%{tesla_470_libdir}/libOpenCL.so.1 +%{_cross_libdir}/nvidia/tesla/libcuda.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libcuda.so.1 +%{_cross_libdir}/nvidia/tesla/libcudadebugger.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libcudadebugger.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-opencl.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-opencl.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-ptxjitcompiler.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-ptxjitcompiler.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-allocator.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-allocator.so.1 +%{_cross_libdir}/nvidia/tesla/libOpenCL.so.1.0.0 +%{_cross_libdir}/nvidia/tesla/libOpenCL.so.1 %if "%{_cross_arch}" == "x86_64" -%{tesla_470_libdir}/libnvidia-compiler.so.%{tesla_470} +%{_cross_libdir}/nvidia/tesla/libnvidia-pkcs11.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-pkcs11-openssl3.so.%{tesla_ver} %endif # Video libs -%{tesla_470_libdir}/libvdpau_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libvdpau_nvidia.so.1 -%{tesla_470_libdir}/libnvidia-encode.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-encode.so.1 -%{tesla_470_libdir}/libnvidia-opticalflow.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-opticalflow.so.1 -%{tesla_470_libdir}/libnvcuvid.so.%{tesla_470} -%{tesla_470_libdir}/libnvcuvid.so.1 +%{_cross_libdir}/nvidia/tesla/libvdpau_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libvdpau_nvidia.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-encode.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-encode.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-opticalflow.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-opticalflow.so.1 +%{_cross_libdir}/nvidia/tesla/libnvcuvid.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvcuvid.so.1 # Graphics libs -%{tesla_470_libdir}/libnvidia-eglcore.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-glcore.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-tls.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-glsi.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-rtcore.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-fbc.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-fbc.so.1 -%{tesla_470_libdir}/libnvoptix.so.%{tesla_470} -%{tesla_470_libdir}/libnvoptix.so.1 -%{tesla_470_libdir}/libnvidia-vulkan-producer.so.%{tesla_470} -%if "%{_cross_arch}" == "x86_64" -%{tesla_470_libdir}/libnvidia-ifr.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-ifr.so.1 -%endif +%{_cross_libdir}/nvidia/tesla/libnvidia-eglcore.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-glcore.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-tls.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-glsi.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-rtcore.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-fbc.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-fbc.so.1 +%{_cross_libdir}/nvidia/tesla/libnvoptix.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvoptix.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-vulkan-producer.so.%{tesla_ver} # Graphics GLVND libs -%{tesla_470_libdir}/libnvidia-glvkspirv.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-cbl.so.%{tesla_470} -%{tesla_470_libdir}/libGLX_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libGLX_nvidia.so.0 -%{tesla_470_libdir}/libEGL_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libEGL_nvidia.so.0 -%{tesla_470_libdir}/libGLESv2_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libGLESv2_nvidia.so.2 -%{tesla_470_libdir}/libGLESv1_CM_nvidia.so.%{tesla_470} -%{tesla_470_libdir}/libGLESv1_CM_nvidia.so.1 +%{_cross_libdir}/nvidia/tesla/libnvidia-glvkspirv.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGLX_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGLX_nvidia.so.0 +%{_cross_libdir}/nvidia/tesla/libEGL_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libEGL_nvidia.so.0 +%{_cross_libdir}/nvidia/tesla/libGLESv2_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGLESv2_nvidia.so.2 +%{_cross_libdir}/nvidia/tesla/libGLESv1_CM_nvidia.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGLESv1_CM_nvidia.so.1 # Graphics compat -%{tesla_470_libdir}/libEGL.so.1.1.0 -%{tesla_470_libdir}/libEGL.so.1 -%{tesla_470_libdir}/libEGL.so.%{tesla_470} -%{tesla_470_libdir}/libGL.so.1.7.0 -%{tesla_470_libdir}/libGL.so.1 -%{tesla_470_libdir}/libGLESv1_CM.so.1.2.0 -%{tesla_470_libdir}/libGLESv1_CM.so.1 -%{tesla_470_libdir}/libGLESv2.so.2.1.0 -%{tesla_470_libdir}/libGLESv2.so.2 +%{_cross_libdir}/nvidia/tesla/libEGL.so.1.1.0 +%{_cross_libdir}/nvidia/tesla/libEGL.so.1 +%{_cross_libdir}/nvidia/tesla/libEGL.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libGL.so.1.7.0 +%{_cross_libdir}/nvidia/tesla/libGL.so.1 +%{_cross_libdir}/nvidia/tesla/libGLESv1_CM.so.1.2.0 +%{_cross_libdir}/nvidia/tesla/libGLESv1_CM.so.1 +%{_cross_libdir}/nvidia/tesla/libGLESv2.so.2.1.0 +%{_cross_libdir}/nvidia/tesla/libGLESv2.so.2 # NGX -%if "%{_cross_arch}" == "x86_64" -%{tesla_470_libdir}/libnvidia-ngx.so.%{tesla_470} -%{tesla_470_libdir}/libnvidia-ngx.so.1 -%endif +%{_cross_libdir}/nvidia/tesla/libnvidia-ngx.so.%{tesla_ver} +%{_cross_libdir}/nvidia/tesla/libnvidia-ngx.so.1 + +# Firmware +%{_cross_libdir}/firmware/nvidia/%{tesla_ver}/gsp_ga10x.bin +%{_cross_libdir}/firmware/nvidia/%{tesla_ver}/gsp_tu10x.bin # Neither nvidia-peermem nor nvidia-drm are included in driver container images, we exclude them # for now, and we will add them if requested -%exclude %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-peermem.mod.o -%exclude %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-peermem.o -%exclude %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-drm.mod.o -%exclude %{_cross_datadir}/nvidia/tesla/%{tesla_470}/module-objects.d/nvidia-drm.o -%exclude %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-cuda-mps-control -%exclude %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-cuda-mps-server +%exclude %{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-peermem.mod.o +%exclude %{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-peermem.o +%exclude %{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-drm.mod.o +%exclude %{_cross_datadir}/nvidia/tesla/module-objects.d/nvidia-drm.o +%exclude %{_cross_libexecdir}/nvidia/tesla/bin/nvidia-cuda-mps-control +%exclude %{_cross_libexecdir}/nvidia/tesla/bin/nvidia-cuda-mps-server %if "%{_cross_arch}" == "x86_64" -%exclude %{_cross_libexecdir}/nvidia/tesla/bin/%{tesla_470}/nvidia-ngx-updater +%exclude %{_cross_libexecdir}/nvidia/tesla/bin/nvidia-ngx-updater %endif # None of these libraries are required by libnvidia-container, so they # won't be used by a containerized workload -%exclude %{tesla_470_libdir}/libGLX.so.0 -%exclude %{tesla_470_libdir}/libGLdispatch.so.0 -%exclude %{tesla_470_libdir}/libOpenGL.so.0 -%exclude %{tesla_470_libdir}/libglxserver_nvidia.so.%{tesla_470} -%exclude %{tesla_470_libdir}/libnvidia-egl-wayland.so.1.1.7 -%exclude %{tesla_470_libdir}/libnvidia-gtk2.so.%{tesla_470} -%exclude %{tesla_470_libdir}/libnvidia-gtk3.so.%{tesla_470} -%exclude %{tesla_470_libdir}/nvidia_drv.so -%exclude %{tesla_470_libdir}/libnvidia-egl-wayland.so.1 +%exclude %{_cross_libdir}/nvidia/tesla/libGLX.so.0 +%exclude %{_cross_libdir}/nvidia/tesla/libGLdispatch.so.0 +%exclude %{_cross_libdir}/nvidia/tesla/libOpenGL.so.0 +%exclude %{_cross_libdir}/nvidia/tesla/libglxserver_nvidia.so.%{tesla_ver} +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-gtk2.so.%{tesla_ver} +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-gtk3.so.%{tesla_ver} +%exclude %{_cross_libdir}/nvidia/tesla/nvidia_drv.so +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-egl-wayland.so.1 +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-egl-gbm.so.1 +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-egl-gbm.so.1.1.0 +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-egl-wayland.so.1.1.11 +%exclude %{_cross_libdir}/nvidia/tesla/libnvidia-wayland-client.so.%{tesla_ver} + +%files open-gpu-%{tesla_major} +%license COPYING +%dir %{_cross_datadir}/nvidia/open-gpu/drivers +%dir %{_cross_factorydir}/nvidia/open-gpu + +# driver +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia.ko + +# uvm +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia-uvm.ko + +# modeset +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia-modeset.ko + +# drm +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia-drm.ko + +# peermem +%{_cross_datadir}/nvidia/open-gpu/drivers/nvidia-peermem.ko + +%files fabricmanager +%{_cross_factorydir}%{_cross_sysconfdir}/nvidia/fabricmanager.cfg +%{_cross_unitdir}/nvidia-fabricmanager.service diff --git a/packages/kmod-5.10-nvidia/link-tesla-kernel-modules.service.in b/packages/kmod-5.10-nvidia/link-tesla-kernel-modules.service.in index 79e5956b4..8fc779212 100644 --- a/packages/kmod-5.10-nvidia/link-tesla-kernel-modules.service.in +++ b/packages/kmod-5.10-nvidia/link-tesla-kernel-modules.service.in @@ -1,5 +1,5 @@ [Unit] -Description=Link additional kernel modules +Description=Link Tesla kernel modules RequiresMountsFor=PREFIX/lib/modules PREFIX/src/kernels # Rerunning this service after the system is fully loaded will override # the already linked kernel modules. This doesn't affect the running system, @@ -10,7 +10,8 @@ RefuseManualStop=true [Service] Type=oneshot -ExecStart=/usr/bin/driverdog link-modules +ExecCondition=/usr/bin/ghostdog match-nvidia-driver tesla +ExecStart=/usr/bin/driverdog --modules-set nvidia-tesla link-modules RemainAfterExit=true StandardError=journal+console diff --git a/packages/kmod-5.10-nvidia/load-open-gpu-kernel-modules.service.in b/packages/kmod-5.10-nvidia/load-open-gpu-kernel-modules.service.in new file mode 100644 index 000000000..3862b3e75 --- /dev/null +++ b/packages/kmod-5.10-nvidia/load-open-gpu-kernel-modules.service.in @@ -0,0 +1,19 @@ +[Unit] +Description=Load open GPU kernel modules +RequiresMountsFor=PREFIX/lib/modules PREFIX/src/kernels +After=copy-open-gpu-kernel-modules.service +Requires=copy-open-gpu-kernel-modules.service +# Disable manual restarts to prevent loading kernel modules +# that weren't linked by the running system +RefuseManualStart=true +RefuseManualStop=true + +[Service] +Type=oneshot +ExecCondition=/usr/bin/ghostdog match-nvidia-driver open-gpu +ExecStart=/usr/bin/driverdog --modules-set nvidia-open-gpu load-modules +RemainAfterExit=true +StandardError=journal+console + +[Install] +RequiredBy=preconfigured.target diff --git a/packages/kmod-5.10-nvidia/load-tesla-kernel-modules.service.in b/packages/kmod-5.10-nvidia/load-tesla-kernel-modules.service.in index 3e4128603..60024004c 100644 --- a/packages/kmod-5.10-nvidia/load-tesla-kernel-modules.service.in +++ b/packages/kmod-5.10-nvidia/load-tesla-kernel-modules.service.in @@ -1,5 +1,5 @@ [Unit] -Description=Load additional kernel modules +Description=Load Tesla kernel modules RequiresMountsFor=PREFIX/lib/modules PREFIX/src/kernels After=link-tesla-kernel-modules.service Requires=link-tesla-kernel-modules.service @@ -10,7 +10,8 @@ RefuseManualStop=true [Service] Type=oneshot -ExecStart=/usr/bin/driverdog load-modules +ExecCondition=/usr/bin/ghostdog match-nvidia-driver tesla +ExecStart=/usr/bin/driverdog --modules-set nvidia-tesla load-modules RemainAfterExit=true StandardError=journal+console diff --git a/packages/kmod-5.10-nvidia/nvidia-fabricmanager.cfg b/packages/kmod-5.10-nvidia/nvidia-fabricmanager.cfg new file mode 100644 index 000000000..f8dc08ea4 --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-fabricmanager.cfg @@ -0,0 +1,34 @@ +# Modern, systemd-aware settings: +# - Log to journal via stderr +# - Keep running in the foreground +LOG_LEVEL=4 +LOG_FILE_NAME= +DAEMONIZE=0 + +# Use Unix domain sockets instead of localhost ports. +UNIX_SOCKET_PATH=/run/nvidia/fabricmanager.sock +FM_CMD_UNIX_SOCKET_PATH=/run/nvidia/fabricmanager-cmd.sock + +# Start Fabric Manager in bare metal or full pass through virtualization mode. +FABRIC_MODE=0 +FABRIC_MODE_RESTART=0 + +# Terminate on NVSwitch and GPU config failure. +FM_STAY_RESIDENT_ON_FAILURES=0 + +# When there is a GPU to NVSwitch NVLink failure, remove the GPU with the failure +# from NVLink P2P capability. +ACCESS_LINK_FAILURE_MODE=0 + +# When there is an NVSwitch to NVSwitch NVLink failure, exit Fabric Manager. +TRUNK_LINK_FAILURE_MODE=0 + +# When there is an NVSwitch failure or an NVSwitch is excluded, abort Fabric Manager. +NVSWITCH_FAILURE_MODE=0 + +# When Fabric Manager service is stopped or terminated, abort all running CUDA jobs. +ABORT_CUDA_JOBS_ON_FM_EXIT=1 + +# Path to topology and database files. +TOPOLOGY_FILE_PATH=/usr/share/nvidia/tesla/nvswitch +DATABASE_PATH=/usr/share/nvidia/tesla/nvswitch diff --git a/packages/kmod-5.10-nvidia/nvidia-fabricmanager.service b/packages/kmod-5.10-nvidia/nvidia-fabricmanager.service new file mode 100644 index 000000000..62ae1368d --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-fabricmanager.service @@ -0,0 +1,16 @@ +[Unit] +Description=NVIDIA fabric manager service + +[Service] +ExecStart=/usr/libexec/nvidia/tesla/bin/nv-fabricmanager -c /etc/nvidia/fabricmanager.cfg +Type=simple +TimeoutSec=0 +RestartSec=5 +Restart=always +RemainAfterExit=true +StandardError=journal+console +SuccessExitStatus=255 +LimitCORE=infinity + +[Install] +WantedBy=multi-user.target diff --git a/packages/kmod-5.10-nvidia/nvidia-ld.so.conf.in b/packages/kmod-5.10-nvidia/nvidia-ld.so.conf.in index a07b0ccbb..f992bf226 100644 --- a/packages/kmod-5.10-nvidia/nvidia-ld.so.conf.in +++ b/packages/kmod-5.10-nvidia/nvidia-ld.so.conf.in @@ -1 +1 @@ -__LIBDIR__/nvidia/tesla/__NVIDIA_VERSION__/ +__LIBDIR__/nvidia/tesla/ diff --git a/packages/kmod-5.10-nvidia/nvidia-open-gpu-config.toml.in b/packages/kmod-5.10-nvidia/nvidia-open-gpu-config.toml.in new file mode 100644 index 000000000..5ae81b716 --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-open-gpu-config.toml.in @@ -0,0 +1,11 @@ +[nvidia-open-gpu] +lib-modules-path = "kernel/drivers/extra/video/nvidia/open-gpu" + +[nvidia-open-gpu.kernel-modules."nvidia.ko"] +copy-source = "__NVIDIA_MODULES__" + +[nvidia-open-gpu.kernel-modules."nvidia-modeset.ko"] +copy-source = "__NVIDIA_MODULES__" + +[nvidia-open-gpu.kernel-modules."nvidia-uvm.ko"] +copy-source = "__NVIDIA_MODULES__" diff --git a/packages/kmod-5.10-nvidia/nvidia-open-gpu-copy-only-config.toml.in b/packages/kmod-5.10-nvidia/nvidia-open-gpu-copy-only-config.toml.in new file mode 100644 index 000000000..774867d42 --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-open-gpu-copy-only-config.toml.in @@ -0,0 +1,8 @@ +[nvidia-open-gpu-copy-only] +lib-modules-path = "kernel/drivers/extra/video/nvidia/open-gpu" + +[nvidia-open-gpu-copy-only.kernel-modules."nvidia-drm.ko"] +copy-source = "__NVIDIA_MODULES__" + +[nvidia-open-gpu-copy-only.kernel-modules."nvidia-peermem.ko"] +copy-source = "__NVIDIA_MODULES__" diff --git a/packages/kmod-5.10-nvidia/nvidia-persistenced.service.in b/packages/kmod-5.10-nvidia/nvidia-persistenced.service similarity index 69% rename from packages/kmod-5.10-nvidia/nvidia-persistenced.service.in rename to packages/kmod-5.10-nvidia/nvidia-persistenced.service index 626630258..f245599c0 100644 --- a/packages/kmod-5.10-nvidia/nvidia-persistenced.service.in +++ b/packages/kmod-5.10-nvidia/nvidia-persistenced.service @@ -4,7 +4,7 @@ After=load-tesla-kernel-modules.service load-open-gpu-kernel-modules.service [Service] Type=forking -ExecStart=__NVIDIA_BINDIR__/nvidia-persistenced --user nvidia --verbose +ExecStart=/usr/libexec/nvidia/tesla/bin/nvidia-persistenced --user nvidia --verbose [Install] RequiredBy=preconfigured.target diff --git a/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf b/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf new file mode 100644 index 000000000..fd0f44863 --- /dev/null +++ b/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf @@ -0,0 +1,5 @@ +C /etc/drivers/nvidia-tesla.toml +C /etc/drivers/nvidia-open-gpu.toml +C /etc/drivers/nvidia-open-gpu-copy-only.toml +C /etc/containerd/nvidia.env - - - - /usr/share/factory/nvidia/tesla/nvidia-path.env +C /etc/ld.so.conf.d/nvidia-tesla.conf diff --git a/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf.in b/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf.in deleted file mode 100644 index f208e1d26..000000000 --- a/packages/kmod-5.10-nvidia/nvidia-tesla-tmpfiles.conf.in +++ /dev/null @@ -1,3 +0,0 @@ -C /etc/drivers/nvidia-tesla-__NVIDIA_VERSION__.toml -C /etc/containerd/nvidia.env - - - - /usr/share/factory/nvidia/tesla/__NVIDIA_VERSION__/nvidia-path.env -C /etc/ld.so.conf.d/nvidia-tesla-__NVIDIA_VERSION__.conf diff --git a/packages/kmod-5.10-nvidia/nvidia-tmpfiles.conf.in b/packages/kmod-5.10-nvidia/nvidia-tmpfiles.conf.in index f44152b33..e58fe143e 100644 --- a/packages/kmod-5.10-nvidia/nvidia-tmpfiles.conf.in +++ b/packages/kmod-5.10-nvidia/nvidia-tmpfiles.conf.in @@ -1,3 +1,7 @@ R __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/tesla - - - - - d __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/tesla 0755 root root - - -D /var/run/nvidia-persistenced 0755 nvidia nvidia - - \ No newline at end of file +R __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/open-gpu - - - - - +d __PREFIX__/lib/modules/__KERNEL_VERSION__/kernel/drivers/extra/video/nvidia/open-gpu 0755 root root - - +C /etc/nvidia/fabricmanager.cfg - - - - +d /run/nvidia 0700 root root - +D /var/run/nvidia-persistenced 0755 nvidia nvidia - -