From ec5ea792fc10d4c03aa80e0f88522c81569c5464 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 31 May 2024 16:03:36 +0200 Subject: [PATCH] Run sycl ampere benchmarks on cuda (#68) * Add generic example runner * Init d and ref_d with different values * Move runner to benchmark folder * Add generic example runner * Add Ampere half-float example * Add generic example runner * Add Ampere half-float example * Add Ampere half-float example * Add Ampere bfloat-float example * Add generic example runner * Add Ampere half-float example * Add Ampere bfloat-float example * Run sycl ampere example on cuda --- CMakeLists.txt | 5 +---- benchmarks/CMakeLists.txt | 22 ++++++++++--------- benchmarks/ampere/CMakeLists.txt | 4 ++-- ...ere_gemm_bf16_bf16_fp32_tensor_op_fp32.cu} | 0 ...ere_gemm_fp16_fp16_fp32_tensor_op_fp32.cu} | 0 5 files changed, 15 insertions(+), 16 deletions(-) rename benchmarks/ampere/{bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cpp => bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cu} (100%) rename benchmarks/ampere/{bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cpp => bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cu} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 91408ddaa9..46b95fbaa6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -159,10 +159,7 @@ set(CUTLASS_ENABLE_LIBRARY ${CUTLASS_ENABLE_LIBRARY_INIT} CACHE BOOL "Enable CUT set(CUTLASS_ENABLE_PROFILER ${CUTLASS_ENABLE_LIBRARY} CACHE BOOL "Enable CUTLASS Profiler") set(CUTLASS_ENABLE_PERFORMANCE ${CUTLASS_ENABLE_PROFILER} CACHE BOOL "Enable CUTLASS Performance") option(CUTLASS_ENABLE_DEBUG_PRINTS "Whether or not to enable debug prints in CUTLASS kernels" OFF) - -if (CUTLASS_ENABLE_SYCL) - set(CUTLASS_ENABLE_BENCHMARKS ON CACHE BOOL "Enable CUTLASS Benchmarks") -endif() +set(CUTLASS_ENABLE_BENCHMARKS ON CACHE BOOL "Enable CUTLASS Benchmarks") set(CUTLASS_ENABLE_TESTS ${CUTLASS_ENABLE_TESTS_INIT} CACHE BOOL "Enable CUTLASS Tests") set(CUTLASS_ENABLE_GTEST_UNIT_TESTS ${CUTLASS_ENABLE_TESTS} CACHE BOOL "Enable CUTLASS GTest-based Unit Tests") diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index d499f839a9..e4ec52b63f 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -43,30 +43,32 @@ function(cutlass_benchmark_add_executable NAME) add_dependencies(cutlass_benchmarks ${NAME}) + if (NOT CUTLASS_ENABLE_SYCL) + SET(ADD_CUDA ON) + endif() + target_link_libraries( ${NAME} PRIVATE CUTLASS cutlass_tools_util_includes - ) - - target_include_directories( - ${NAME} - PRIVATE - ${CUTLASS_BENCHMARKS_COMMON_SOURCE_DIR} - ) + $<$:nvidia::cublas> + $<$:cuda> + ) - add_sycl_to_target(TARGET ${NAME}) + if (CUTLASS_ENABLE_SYCL) + add_sycl_to_target(TARGET ${NAME}) + endif() install( TARGETS ${NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - ) + ) endfunction() if(SYCL_INTEL_TARGET) add_subdirectory(pvc) endif() -if (SYCL_NVIDIA_TARGET) +if(SYCL_NVIDIA_TARGET OR NOT CUTLASS_ENABLE_SYCL) add_subdirectory(ampere) endif() diff --git a/benchmarks/ampere/CMakeLists.txt b/benchmarks/ampere/CMakeLists.txt index 666d9cac60..a77901594b 100644 --- a/benchmarks/ampere/CMakeLists.txt +++ b/benchmarks/ampere/CMakeLists.txt @@ -29,10 +29,10 @@ cutlass_benchmark_add_executable( bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32 - bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cpp + bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cu ) cutlass_benchmark_add_executable( bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32 - bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cpp + bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cu ) diff --git a/benchmarks/ampere/bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cpp b/benchmarks/ampere/bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cu similarity index 100% rename from benchmarks/ampere/bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cpp rename to benchmarks/ampere/bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cu diff --git a/benchmarks/ampere/bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cpp b/benchmarks/ampere/bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cu similarity index 100% rename from benchmarks/ampere/bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cpp rename to benchmarks/ampere/bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cu