Skip to content

Commit

Permalink
Add bfloat-bfloat example
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz committed May 31, 2024
1 parent b6ae4eb commit 4e2b2e8
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 3 deletions.
3 changes: 0 additions & 3 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,3 @@ if(SYCL_INTEL_TARGET)
else(SYCL_NVIDIA_TARGET OR NOT CUTLASS_ENABLE_SYCL)
add_subdirectory(ampere)
endif()
if(SYCL_NVIDIA_TARGET OR NOT CUTLASS_ENABLE_SYCL)
add_subdirectory(ampere)
endif()
5 changes: 5 additions & 0 deletions benchmarks/ampere/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,8 @@ cutlass_benchmark_add_executable(
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cu
)

cutlass_benchmark_add_executable(
bench_ampere_gemm_bf16_bf16_bf16_tensor_op_fp32
bench_ampere_gemm_bf16_bf16_bf16_tensor_op_fp32.cu
)
9 changes: 9 additions & 0 deletions benchmarks/common/benchmark_runner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,15 @@ struct BenchmarkRunner {
cutlass::DeviceAllocation<ElementOutput> block_D;
cutlass::DeviceAllocation<ElementOutput> block_ref_D;

ElementOutput epsilon;
ElementOutput nonzero_floor;

ExampleRunner() : epsilon(static_cast<ElementOutput>(0.1f)),
nonzero_floor(static_cast<ElementOutput>(0.1f)) {};

ExampleRunner(ElementOutput epsilon, ElementOutput nonzeroFloor) :
epsilon(epsilon), nonzero_floor(nonzeroFloor) {}

//
// Methods
//
Expand Down

0 comments on commit 4e2b2e8

Please sign in to comment.