Skip to content

Commit

Permalink
Merge pull request #21031 from sara-nl/20240722171932_new_pr_CUDA-Sam…
Browse files Browse the repository at this point in the history
…ples118

{lang}[GCC/11.3.0] CUDA-Samples v11.8, CUDA-Samples v12.2 w/ CUDA 11.7.0, CUDA 12.2.0
  • Loading branch information
verdurin authored Aug 15, 2024
2 parents 35e3a19 + e99bfd0 commit 31e2f45
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ toolchain = {'name': 'GCC', 'version': '10.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
checksums = ['2bee5f7c89347259aaab75aa6df6e10375059bdbbaf04cc7936f5db7d54fa3ac']
patches = ['cuda-samples-11.3_multiple-sms.patch']
checksums = [
{'v11.3.tar.gz': '2bee5f7c89347259aaab75aa6df6e10375059bdbbaf04cc7936f5db7d54fa3ac'},
{'cuda-samples-11.3_multiple-sms.patch': 'b31613f4160456f0d0abf82999c7fb7eee781f0efadc8b9bbb5a02ef0f37e21d'},
]

dependencies = [
('CUDA', '11.3.1', '', SYSTEM),
Expand All @@ -32,7 +36,7 @@ local_filters += "Samples/simpleVulkan/Makefile "
local_filters += "Samples/simpleVulkanMMAP/Makefile "
local_filters += "Samples/streamOrderedAllocationIPC/Makefile "
local_filters += "Samples/vulkanImageCUDA/Makefile"
buildopts = "HOST_COMPILER=g++ FILTER_OUT='%s'" % local_filters
buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

files_to_copy = [
(['bin/%s/linux/release/*' % ARCH], 'bin'),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ toolchain = {'name': 'GCC', 'version': '11.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
checksums = ['75b858bcf9e534eaa0f129c418e661b83872d743de218df8a5278cc429f9ea98']
patches = ['cuda-samples-11.6_multiple-sms.patch']
checksums = [
{'v11.6.tar.gz': '75b858bcf9e534eaa0f129c418e661b83872d743de218df8a5278cc429f9ea98'},
{'cuda-samples-11.6_multiple-sms.patch': '8849e4882d797d155d6ebb71377fa1409205361776ade8da699452a4ecb94a0a'},
]

dependencies = [
('CUDA', '11.7.0', '', SYSTEM),
Expand All @@ -33,7 +37,7 @@ local_filters += "Samples/simpleVulkanMMAP/Makefile "
local_filters += "Samples/streamOrderedAllocationIPC/Makefile "
local_filters += "Samples/vulkanImageCUDA/Makefile"

buildopts = "HOST_COMPILER=g++ FILTER_OUT='%s'" % local_filters
buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

files_to_copy = [
(['bin/%s/linux/release/*' % ARCH], 'bin'),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
easyblock = 'MakeCp'

name = 'CUDA-Samples'
version = '11.8'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/NVIDIA/cuda-samples'
description = "Samples for CUDA Developers which demonstrates features in CUDA Toolkit"

toolchain = {'name': 'GCC', 'version': '11.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
patches = ['cuda-samples-11.6_multiple-sms.patch']
checksums = [
{'v11.8.tar.gz': '1bc02c0ca42a323f3c7a05b5682eae703681a91e95b135bfe81f848b2d6a2c51'},
{'cuda-samples-11.6_multiple-sms.patch': '8849e4882d797d155d6ebb71377fa1409205361776ade8da699452a4ecb94a0a'},
]

dependencies = [
('CUDA', '11.7.0', '', SYSTEM),
]

# Get rid of pre-built Windows DLLs and only build deviceQuery for now.
prebuildopts = "rm -r bin/win64 && "

# Filter out samples that require extensive dependencies.
local_filters = "Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/Makefile "
local_filters += "Samples/4_CUDA_Libraries/boxFilterNPP/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cudaNvSci/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleGL/Makefile "
local_filters += "Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleVulkan/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile "
local_filters += "Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile "
local_filters += "Samples/5_Domain_Specific/vulkanImageCUDA/Makefile "
local_filters += "Samples/6_Performance/LargeKernelParameter/Makefile "

buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

files_to_copy = [
(['bin/%s/linux/release/*' % ARCH], 'bin'),
'LICENSE',
]

local_binaries = ['deviceQuery', 'matrixMul', 'bandwidthTest', 'cudaOpenMP']

# Only paths are used for sanity checks.
# Commands may fail due to missing compatibility libraries that might be needed
# to be able to use this specific CUDA version in combination with the available
# NVIDIA drivers.
sanity_check_paths = {
'files': ['bin/%s' % x for x in local_binaries],
'dirs': [],
}

moduleclass = 'lang'
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ toolchain = {'name': 'GCC', 'version': '12.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
checksums = ['f758160645b366d79c2638d8dfd389f01029b8d179ab0c11726b9ef58aecebd9']
patches = ['cuda-samples-11.6_multiple-sms.patch']
checksums = [
{'v12.1.tar.gz': 'f758160645b366d79c2638d8dfd389f01029b8d179ab0c11726b9ef58aecebd9'},
{'cuda-samples-11.6_multiple-sms.patch': '8849e4882d797d155d6ebb71377fa1409205361776ade8da699452a4ecb94a0a'},
]

dependencies = [
('CUDA', '12.1.1', '', SYSTEM),
Expand Down Expand Up @@ -58,7 +62,7 @@ if ARCH == 'aarch64':
local_filters += "Samples/3_CUDA_Features/cdpQuadtree/Makefile "
local_filters += "Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile "

buildopts = "HOST_COMPILER=g++ FILTER_OUT='%s'" % local_filters
buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

# Remove libraries in the bin dir after a successful 'make'
buildopts += " && rm bin/*/linux/release/lib*.so.*"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
easyblock = 'MakeCp'

name = 'CUDA-Samples'
version = '12.2'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/NVIDIA/cuda-samples'
description = "Samples for CUDA Developers which demonstrates features in CUDA Toolkit"

toolchain = {'name': 'GCC', 'version': '11.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
patches = ['cuda-samples-11.6_multiple-sms.patch']
checksums = [
{'v12.2.tar.gz': '1823cfe28e97a9230107aa72b231f78952c0f178b71a920f036d360518480bdc'},
{'cuda-samples-11.6_multiple-sms.patch': '8849e4882d797d155d6ebb71377fa1409205361776ade8da699452a4ecb94a0a'},
]

builddependencies = [
('CMake', '3.24.3'),
]

dependencies = [
('CUDA', '12.2.0', '', SYSTEM),
]

# Get rid of pre-built Windows DLLs and only build deviceQuery for now.
prebuildopts = "rm -r bin/win64 && "

# Filter out samples that require extensive dependencies.
local_filters = "Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/Makefile "
local_filters += "Samples/4_CUDA_Libraries/boxFilterNPP/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cudaNvSci/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleGL/Makefile "
local_filters += "Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleVulkan/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile "
local_filters += "Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile "
local_filters += "Samples/5_Domain_Specific/vulkanImageCUDA/Makefile "
local_filters += "Samples/6_Performance/LargeKernelParameter/Makefile "

buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

files_to_copy = [
(['bin/%s/linux/release/*' % ARCH], 'bin'),
'LICENSE',
]

local_binaries = ['deviceQuery', 'matrixMul', 'bandwidthTest', 'cudaOpenMP']

# Only paths are used for sanity checks.
# Commands may fail due to missing compatibility libraries that might be needed
# to be able to use this specific CUDA version in combination with the available
# NVIDIA drivers.
sanity_check_paths = {
'files': ['bin/%s' % x for x in local_binaries],
'dirs': [],
}

moduleclass = 'lang'
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Fixes "nvcc fatal: Option '--ptx (-ptx)' is not allowed when compiling for multiple GPU architectures"
# fatal compilation issue when building for multiple SM architectures
# More info, see https://github.com/NVIDIA/cuda-samples/issues/289

# Author: Caspar van Leeuwen

diff -Nru cuda-samples-11.3.orig/Samples/memMapIPCDrv/Makefile cuda-samples-11.3/Samples/memMapIPCDrv/Makefile
--- cuda-samples-11.3.orig/Samples/memMapIPCDrv/Makefile 2024-07-29 13:17:10.330743000 +0200
+++ cuda-samples-11.3/Samples/memMapIPCDrv/Makefile 2024-07-29 13:19:13.158507504 +0200
@@ -321,6 +321,12 @@
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
+
+# Generate the explicit PTX file for the lowest SM architecture in $(SMS), so it works on all SMS listed there
+LOWEST_SM := $(firstword $(sort $(SMS)))
+ifneq ($(LOWEST_SM),)
+GENCODE_FLAGS_LOWEST_SM += -gencode arch=compute_$(LOWEST_SM),code=compute_$(LOWEST_SM)
+endif
endif

ifeq ($(TARGET_OS),darwin)
@@ -401,7 +407,7 @@
endif

$(PTX_FILE): memMapIpc_kernel.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -ptx $<
+ $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS_LOWEST_SM) -o $@ -ptx $<
$(EXEC) mkdir -p data
$(EXEC) cp -f $@ ./data
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Fixes "nvcc fatal: Option '--ptx (-ptx)' is not allowed when compiling for multiple GPU architectures"
# fatal compilation issue when building for multiple SM architectures
# More info, see https://github.com/NVIDIA/cuda-samples/issues/289

# Author: Caspar van Leeuwen

diff -Nru cuda-samples-12.2.orig/Samples/3_CUDA_Features/memMapIPCDrv/Makefile cuda-samples-12.2/Samples/3_CUDA_Features/memMapIPCDrv/Makefile
--- cuda-samples-12.2.orig/Samples/3_CUDA_Features/memMapIPCDrv/Makefile 2024-07-29 12:14:28.538848000 +0200
+++ cuda-samples-12.2/Samples/3_CUDA_Features/memMapIPCDrv/Makefile 2024-07-29 13:02:45.134261829 +0200
@@ -313,6 +313,12 @@
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
+
+# Generate the explicit PTX file for the lowest SM architecture in $(SMS), so it works on all SMS listed there
+LOWEST_SM := $(firstword $(sort $(SMS)))
+ifneq ($(LOWEST_SM),)
+GENCODE_FLAGS_LOWEST_SM += -gencode arch=compute_$(LOWEST_SM),code=compute_$(LOWEST_SM)
+endif
endif

ifeq ($(TARGET_OS),darwin)
@@ -394,7 +400,7 @@
endif

$(PTX_FILE): memMapIpc_kernel.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -ptx $<
+ $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS_LOWEST_SM) -o $@ -ptx $<
$(EXEC) mkdir -p data
$(EXEC) cp -f $@ ./data
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
diff -Nru cuda-samples-12.2.orig/Samples/3_CUDA_Features/ptxjit/Makefile cuda-samples-12.2/Samples/3_CUDA_Features/ptxjit/Makefile
--- cuda-samples-12.2.orig/Samples/3_CUDA_Features/ptxjit/Makefile 2024-07-29 12:14:28.546771000 +0200
+++ cuda-samples-12.2/Samples/3_CUDA_Features/ptxjit/Makefile 2024-07-29 13:02:38.741961008 +0200
@@ -307,6 +307,12 @@
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
+
+# Generate the explicit PTX file for the lowest SM architecture in $(SMS), so it works on all SMS listed there
+LOWEST_SM := $(firstword $(sort $(SMS)))
+ifneq ($(LOWEST_SM),)
+GENCODE_FLAGS_LOWEST_SM += -gencode arch=compute_$(LOWEST_SM),code=compute_$(LOWEST_SM)
+endif
endif

ifeq ($(TARGET_OS),darwin)
@@ -390,7 +396,7 @@
endif

$(PTX_FILE): ptxjit_kernel.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -ptx $<
+ $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS_LOWEST_SM) -o $@ -ptx $<
$(EXEC) mkdir -p data
$(EXEC) cp -f $@ ./data
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)

0 comments on commit 31e2f45

Please sign in to comment.