diff --git a/cub/test/CMakeLists.txt b/cub/test/CMakeLists.txt index aa0482a3b14..7049d682920 100644 --- a/cub/test/CMakeLists.txt +++ b/cub/test/CMakeLists.txt @@ -26,6 +26,15 @@ option(METAL_BUILD_EXAMPLES OFF) option(METAL_BUILD_TESTS OFF) CPMAddPackage("gh:brunocodutra/metal@2.1.4") +CPMAddPackage( + NAME NVTX + GITHUB_REPOSITORY NVIDIA/NVTX + GIT_TAG release-v3 + DOWNLOAD_ONLY + SYSTEM +) +include("${NVTX_SOURCE_DIR}/c/nvtxImportedTargets.cmake") + find_package(CUDAToolkit) set(curand_default OFF) @@ -280,6 +289,10 @@ function(cub_add_test target_name_var test_name test_src cub_target launcher_id) target_include_directories(${test_target} PRIVATE "${CUB_SOURCE_DIR}/test") target_compile_definitions(${test_target} PRIVATE CUB_DETAIL_DEBUG_ENABLE_SYNC) + if ("${test_target}" MATCHES "nvtx_in_usercode") + target_link_libraries(${test_target} nvtx3-cpp) + endif() + if (CUB_IN_THRUST) thrust_fix_clang_nvcc_build_for(${test_target}) endif() diff --git a/cub/test/test_nvtx_in_usercode.cu b/cub/test/test_nvtx_in_usercode.cu new file mode 100644 index 00000000000..3271b80e4a1 --- /dev/null +++ b/cub/test/test_nvtx_in_usercode.cu @@ -0,0 +1,22 @@ +#include // internal include of NVTX + +#include + +#include // user-side include of NVTX, retrieved elsewhere + +struct Op +{ + _CCCL_HOST_DEVICE void operator()(int i) const + { + printf("%d\n", i); + } +}; + +int main() +{ + nvtx3::scoped_range range("user-range"); // user-side use of NVTX + + thrust::counting_iterator it{0}; + cub::DeviceFor::ForEach(it, it + 16, Op{}); // internal use of NVTX + cudaDeviceSynchronize(); +}