Skip to content

Commit

Permalink
Run sycl ampere benchmarks on cuda (#68)
Browse files Browse the repository at this point in the history
* Add generic example runner

* Init d and ref_d with different values

* Move runner to benchmark folder

* Add generic example runner

* Add Ampere half-float example

* Add generic example runner

* Add Ampere half-float example

* Add Ampere half-float example

* Add Ampere bfloat-float example

* Add generic example runner

* Add Ampere half-float example

* Add Ampere bfloat-float example

* Run sycl ampere example on cuda
  • Loading branch information
aacostadiaz authored May 31, 2024
1 parent f3144b2 commit ec5ea79
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 16 deletions.
5 changes: 1 addition & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,7 @@ set(CUTLASS_ENABLE_LIBRARY ${CUTLASS_ENABLE_LIBRARY_INIT} CACHE BOOL "Enable CUT
set(CUTLASS_ENABLE_PROFILER ${CUTLASS_ENABLE_LIBRARY} CACHE BOOL "Enable CUTLASS Profiler")
set(CUTLASS_ENABLE_PERFORMANCE ${CUTLASS_ENABLE_PROFILER} CACHE BOOL "Enable CUTLASS Performance")
option(CUTLASS_ENABLE_DEBUG_PRINTS "Whether or not to enable debug prints in CUTLASS kernels" OFF)

if (CUTLASS_ENABLE_SYCL)
set(CUTLASS_ENABLE_BENCHMARKS ON CACHE BOOL "Enable CUTLASS Benchmarks")
endif()
set(CUTLASS_ENABLE_BENCHMARKS ON CACHE BOOL "Enable CUTLASS Benchmarks")

set(CUTLASS_ENABLE_TESTS ${CUTLASS_ENABLE_TESTS_INIT} CACHE BOOL "Enable CUTLASS Tests")
set(CUTLASS_ENABLE_GTEST_UNIT_TESTS ${CUTLASS_ENABLE_TESTS} CACHE BOOL "Enable CUTLASS GTest-based Unit Tests")
Expand Down
22 changes: 12 additions & 10 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,30 +43,32 @@ function(cutlass_benchmark_add_executable NAME)

add_dependencies(cutlass_benchmarks ${NAME})

if (NOT CUTLASS_ENABLE_SYCL)
SET(ADD_CUDA ON)
endif()

target_link_libraries(
${NAME}
PRIVATE
CUTLASS
cutlass_tools_util_includes
)

target_include_directories(
${NAME}
PRIVATE
${CUTLASS_BENCHMARKS_COMMON_SOURCE_DIR}
)
$<$<BOOL:${CUTLASS_ENABLE_CUBLAS}>:nvidia::cublas>
$<$<BOOL:${ADD_CUDA}>:cuda>
)

add_sycl_to_target(TARGET ${NAME})
if (CUTLASS_ENABLE_SYCL)
add_sycl_to_target(TARGET ${NAME})
endif()

install(
TARGETS ${NAME}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
)
endfunction()

if(SYCL_INTEL_TARGET)
add_subdirectory(pvc)
endif()
if (SYCL_NVIDIA_TARGET)
if(SYCL_NVIDIA_TARGET OR NOT CUTLASS_ENABLE_SYCL)
add_subdirectory(ampere)
endif()
4 changes: 2 additions & 2 deletions benchmarks/ampere/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@

cutlass_benchmark_add_executable(
bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32
bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cpp
bench_ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cu
)

cutlass_benchmark_add_executable(
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cpp
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cu
)

0 comments on commit ec5ea79

Please sign in to comment.