Skip to content

Commit

Permalink
Fix issue when profiler is not enable
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz committed Jul 16, 2024
1 parent e365408 commit 55c873c
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 13 deletions.
6 changes: 5 additions & 1 deletion benchmarks/common/benchmark_runner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
*
**************************************************************************************************/

#if defined(CUTLASS_ENABLE_SYCL)
#define CUTLASS_SYCLCOMPAT_PROFILING_ENABLED
#endif

#include "cutlass/gemm/device/gemm.h"
#include "cutlass/epilogue/collective/default_epilogue.hpp"
#include "cutlass/gemm/device/gemm_universal.h"
Expand Down Expand Up @@ -208,7 +212,7 @@ struct BenchmarkRunner {
auto problem_shape_MNKL = cute::append<4>(problem_size, 1);
auto [M, N, K, L] = problem_shape_MNKL;

#if defined(CUTLASS_ENABLE_SYCL)
#if defined(CUTLASS_SYCLCOMPAT_PROFILING_ENABLED)
sycl::property_list prop = {
sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
*
**************************************************************************************************/

#define CUTLASS_SYCLCOMPAT_PROFILING_ENABLED

#include <cstdlib>
#include <cstdio>

Expand Down
8 changes: 6 additions & 2 deletions examples/cute/tutorial/sgemm_1_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
*
**************************************************************************************************/

#define CUTLASS_SYCLCOMPAT_PROFILING_ENABLED

#include "cutlass/util/GPU_Clock.hpp"
#include "cutlass/util/print_error.hpp"

Expand Down Expand Up @@ -295,12 +297,13 @@ void gemm_nt(int m, int n, int k, Alpha alpha, TA const* A, int ldA, TB const* B
auto dimBlock = syclcompat::dim3(size(tC));
auto dimGrid = syclcompat::dim3(size(ceil_div(M, bM)), size(ceil_div(N, bN)));

syclcompat::launch<
auto event = syclcompat::launch<
gemm_device<decltype(prob_shape), decltype(cta_tiler), TA, decltype(dA), decltype(sA),
decltype(tA), TB, decltype(dB), decltype(sB), decltype(tB), TC, decltype(dC),
decltype(sC), decltype(tC), Alpha, Beta>>(dimGrid, dimBlock, prob_shape,
cta_tiler, A, dA, sA, tA, B, dB, sB, tB,
C, dC, sC, tC, alpha, beta);
EventManager::getInstance().addEvent(event);
}

// Setup params for a TN GEMM
Expand Down Expand Up @@ -341,12 +344,13 @@ void gemm_tn(int m, int n, int k, Alpha alpha, TA const* A, int ldA, TB const* B
auto dimBlock = syclcompat::dim3(size(tC));
auto dimGrid = syclcompat::dim3(size(ceil_div(M, bM)), size(ceil_div(N, bN)));

syclcompat::launch<
auto event = syclcompat::launch<
gemm_device<decltype(prob_shape), decltype(cta_tiler), TA, decltype(dA), decltype(sA),
decltype(tA), TB, decltype(dB), decltype(sB), decltype(tB), TC, decltype(dC),
decltype(sC), decltype(tC), Alpha, Beta>>(dimGrid, dimBlock, prob_shape,
cta_tiler, A, dA, sA, tA, B, dB, sB, tB,
C, dC, sC, tC, alpha, beta);
EventManager::getInstance().addEvent(event);
}

template <class TA, class TB, class TC, class Alpha, class Beta>
Expand Down
8 changes: 6 additions & 2 deletions examples/cute/tutorial/sgemm_2_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
*
**************************************************************************************************/

#define CUTLASS_SYCLCOMPAT_PROFILING_ENABLED

#include "cutlass/util/GPU_Clock.hpp"
#include "cutlass/util/print_error.hpp"

Expand Down Expand Up @@ -287,12 +289,13 @@ void gemm_nt(int m, int n, int k, Alpha alpha, TA const* A, int ldA, TB const* B

auto dimBlock = syclcompat::dim3(size(mmaC));
auto dimGrid = syclcompat::dim3(size(ceil_div(M, bM)), size(ceil_div(N, bN)));
syclcompat::launch<
auto event = syclcompat::launch<
gemm_device<decltype(prob_shape), decltype(cta_tiler), TA, decltype(dA), decltype(sA),
decltype(copyA), TB, decltype(dB), decltype(sB), decltype(copyB), TC,
decltype(dC), decltype(sC), decltype(mmaC), Alpha, Beta>>(
dimGrid, dimBlock, prob_shape, cta_tiler, A, dA, sA, copyA, B, dB, sB, copyB, C, dC,
sC, mmaC, alpha, beta);
EventManager::getInstance().addEvent(event);
}

// Setup params for a TN GEMM
Expand Down Expand Up @@ -361,12 +364,13 @@ void gemm_tn(int m, int n, int k, Alpha alpha, TA const* A, int ldA, TB const* B

auto dimBlock = syclcompat::dim3(size(mmaC));
auto dimGrid = syclcompat::dim3(size(ceil_div(M, bM)), size(ceil_div(N, bN)));
syclcompat::launch<
auto event = syclcompat::launch<
gemm_device<decltype(prob_shape), decltype(cta_tiler), TA, decltype(dA), decltype(sA),
decltype(copyA), TB, decltype(dB), decltype(sB), decltype(copyB), TC,
decltype(dC), decltype(sC), decltype(mmaC), Alpha, Beta>>(
dimGrid, dimBlock, prob_shape, cta_tiler, A, dA, sA, copyA, B, dB, sB, copyB, C, dC,
sC, mmaC, alpha, beta);
EventManager::getInstance().addEvent(event);
}

template <class TA, class TB, class TC, class Alpha, class Beta>
Expand Down
8 changes: 6 additions & 2 deletions examples/cute/tutorial/sgemm_sm70_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
*
**************************************************************************************************/

#define CUTLASS_SYCLCOMPAT_PROFILING_ENABLED

#include "cutlass/util/GPU_Clock.hpp"
#include "cutlass/util/print_error.hpp"

Expand Down Expand Up @@ -283,12 +285,13 @@ void gemm_nt(int m, int n, int k, Alpha alpha, TA const* A, int ldA, TB const* B

auto dimBlock = syclcompat::dim3(size(mmaC));
auto dimGrid = syclcompat::dim3(size(ceil_div(M, bM)), size(ceil_div(N, bN)));
syclcompat::launch<
auto event = syclcompat::launch<
gemm_device<decltype(prob_shape), decltype(cta_tiler), TA, decltype(dA), decltype(sA),
decltype(copyA), TB, decltype(dB), decltype(sB), decltype(copyB), TC,
decltype(dC), decltype(sC), decltype(mmaC), Alpha, Beta>>(
dimGrid, dimBlock, prob_shape, cta_tiler, A, dA, sA, copyA, B, dB, sB, copyB, C, dC,
sC, mmaC, alpha, beta);
EventManager::getInstance().addEvent(event);
}

// Setup params for a TN GEMM
Expand Down Expand Up @@ -349,12 +352,13 @@ void gemm_tn(int m, int n, int k, Alpha alpha, TA const* A, int ldA, TB const* B

auto dimBlock = syclcompat::dim3(size(mmaC));
auto dimGrid = syclcompat::dim3(size(ceil_div(M, bM)), size(ceil_div(N, bN)));
syclcompat::launch<
auto event = syclcompat::launch<
gemm_device<decltype(prob_shape), decltype(cta_tiler), TA, decltype(dA), decltype(sA),
decltype(copyA), TB, decltype(dB), decltype(sB), decltype(copyB), TC,
decltype(dC), decltype(sC), decltype(mmaC), Alpha, Beta>>(
dimGrid, dimBlock, prob_shape, cta_tiler, A, dA, sA, copyA, B, dB, sB, copyB, C, dC,
sC, mmaC, alpha, beta);
EventManager::getInstance().addEvent(event);
}

template <class TA, class TB, class TC, class Alpha, class Beta>
Expand Down
8 changes: 6 additions & 2 deletions examples/cute/tutorial/sgemm_sm80_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
*
**************************************************************************************************/

#define CUTLASS_SYCLCOMPAT_PROFILING_ENABLED

#include "cutlass/util/GPU_Clock.hpp"
#include "cutlass/util/print_error.hpp"

Expand Down Expand Up @@ -362,12 +364,13 @@ void gemm_nt(int m, int n, int k, Alpha alpha, TA const* A, int ldA, TB const* B

auto dimBlock = syclcompat::dim3(size(mmaC));
auto dimGrid = syclcompat::dim3(size(ceil_div(M, bM)), size(ceil_div(N, bN)));
syclcompat::launch<
auto event = syclcompat::launch<
gemm_device<decltype(prob_shape), decltype(cta_tiler), TA, decltype(dA), decltype(sA),
decltype(copyA), TB, decltype(dB), decltype(sB), decltype(copyB), TC,
decltype(dC), decltype(sC), decltype(mmaC), Alpha, Beta>>(
dimGrid, dimBlock, prob_shape, cta_tiler, A, dA, sA, copyA, B, dB, sB, copyB, C, dC, sC, mmaC,
alpha, beta);
EventManager::getInstance().addEvent(event);
}

// Setup params for a NT GEMM
Expand Down Expand Up @@ -433,12 +436,13 @@ void gemm_tn(int m, int n, int k, Alpha alpha, TA const* A, int ldA, TB const* B

auto dimBlock = syclcompat::dim3(size(mmaC));
auto dimGrid = syclcompat::dim3(size(ceil_div(M, bM)), size(ceil_div(N, bN)));
syclcompat::launch<
auto event = syclcompat::launch<
gemm_device<decltype(prob_shape), decltype(cta_tiler), TA, decltype(dA), decltype(sA),
decltype(copyA), TB, decltype(dB), decltype(sB), decltype(copyB), TC,
decltype(dC), decltype(sC), decltype(mmaC), Alpha, Beta>>(
dimGrid, dimBlock, prob_shape, cta_tiler, A, dA, sA, copyA, B, dB, sB, copyB, C, dC, sC, mmaC,
alpha, beta);
EventManager::getInstance().addEvent(event);
}

template <class TA, class TB, class TC, class Alpha, class Beta>
Expand Down
10 changes: 10 additions & 0 deletions examples/sycl/pvc/pvc_bfloat_dpas_gemm_cute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
*
**************************************************************************************************/

#define CUTLASS_SYCLCOMPAT_PROFILING_ENABLED

#include "cutlass/gemm/device/gemm.h"
#include "cutlass/epilogue/collective/default_epilogue.hpp"
#include "cutlass/epilogue/collective/intel_pvc_epilogue.hpp"
Expand Down Expand Up @@ -260,6 +262,14 @@ struct ExampleRunner {

initialize(problem_size);

sycl::property_list prop = {
sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()
};

auto q = sycl::queue(syclcompat::get_default_context(), syclcompat::get_current_device(), prop);
syclcompat::set_default_queue(q);

typename Gemm::GemmKernel::Arguments arguments{
cutlass::gemm::GemmUniversalMode::kGemm,
problem_size,
Expand Down
10 changes: 6 additions & 4 deletions tools/util/include/cutlass/util/sycl_event_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,20 @@ class EventManager {

};

void syclEventDestroy(SyclEvent const& event) {
inline void syclEventDestroy(SyclEvent const& event) {
EventManager::getInstance().eventDestroy();
}

void syclEventRecord(SyclEvent &event) {
inline void syclEventRecord(SyclEvent &event) {
EventManager::getInstance().startRecording(event);
}

void syclEventSynchronize(SyclEvent const& begin, SyclEvent const& end) {
inline void syclEventSynchronize(SyclEvent const& begin, SyclEvent const& end) {
EventManager::getInstance().wait(begin, end);
}

void syclEventElapsedTime(float* time, SyclEvent const& begin, SyclEvent const& end) {
inline void syclEventElapsedTime(float* time, SyclEvent const& begin, SyclEvent const& end) {
#if defined(CUTLASS_SYCLCOMPAT_PROFILING_ENABLED)
*time = EventManager::getInstance().getEventElapsedTimeMs(begin, end);
#endif
}

0 comments on commit 55c873c

Please sign in to comment.