Skip to content

Commit

Permalink
fix build
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz committed Dec 10, 2024
1 parent a47e6de commit 044bcf7
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 31 deletions.
2 changes: 1 addition & 1 deletion examples/sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ endif()

if (CUTLASS_ENABLE_SYCL)
add_subdirectory(device_agnostic)
endif()
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@
#include "cutlass/tensor_view.h"
#include "cutlass/coord.h"

#include "common.hpp"

using namespace cute;

///////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -193,6 +191,18 @@ struct ExampleRunner {
return passed;
}

template <typename T>
void initialize_block(cutlass::DeviceAllocation<T> block_device, uint64_t seed) {
std::mt19937 rng(std::random_device{}());
std::uniform_real_distribution<> dist(0.0f, 1.0f);
rng.seed(seed);

auto block_host = std::vector<ElementA>(block_device.size());
for (auto& element : block_host) {
element = static_cast<T>(dist(rng)); // Cast to ElementA type if needed
}
}

/// Initialize operands to be used in the GEMM and reference GEMM
void initialize(const ProblemShapeType& problem_size) {
auto problem_shape_MNKL = cute::append<4>(problem_size, 1);
Expand Down
34 changes: 26 additions & 8 deletions examples/sycl/device_agnostic/device_agnostic_gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@
#include "cutlass/util/GPU_Clock.hpp"

#include <cute/tensor.hpp>
#include <vector>
#include <random>

#include "cutlass/util/command_line.h"
#include "cutlass/util/device_memory.h"
#include "cutlass/util/packed_stride.hpp"
#include "cutlass/util/reference/device/gemm_complex.h"
#include "cutlass/util/reference/device/tensor_compare.h"
#include "common.hpp"

#include "cutlass/util/device_memory.h"
#include "cutlass/util/reference/device/sycl_tensor_fill.h"
Expand Down Expand Up @@ -106,9 +106,7 @@ struct Options {

///////////////////////////////////////////////////////////////////////////////////////////////////

template <
class Gemm
>
template <class Gemm>
struct ExampleRunner {

using StrideA = typename Gemm::GemmKernel::StrideA;
Expand Down Expand Up @@ -189,6 +187,18 @@ struct ExampleRunner {
return passed;
}

template <typename T>
void initialize_block(cutlass::DeviceAllocation<T> block_device, uint64_t seed) {
std::mt19937 rng(std::random_device{}());
std::uniform_real_distribution<> dist(0.0f, 1.0f);
rng.seed(seed);

auto block_host = std::vector<ElementA>(block_device.size());
for (auto& element : block_host) {
element = static_cast<T>(dist(rng)); // Cast to ElementA type if needed
}
}

/// Initialize operands to be used in the GEMM and reference GEMM
void initialize(const ProblemShapeType& problem_size) {
auto problem_shape_MNKL = cute::append<4>(problem_size, 1);
Expand All @@ -205,6 +215,14 @@ struct ExampleRunner {
block_D.reset(M * N * L);
block_ref_D.reset(M * N * L);

auto block_A_host = std::vector<ElementA>(block_A.size());
auto block_B_host = std::vector<ElementA>(block_B.size());
auto block_C_host = std::vector<ElementA>(block_C.size());
auto block_D_host = std::vector<ElementA>(block_D.size());
auto block_ref_D_host = std::vector<ElementA>(block_ref_D.size());



initialize_block(block_A, seed + 2023);
initialize_block(block_B, seed + 2022);
initialize_block(block_C, seed + 2021);
Expand Down Expand Up @@ -356,15 +374,15 @@ int main(int argc, const char** argv)
>;

using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
Shape<int, int, int, int>,
CollectiveMainloop,
CollectiveEpilogue
>;
Shape<int, int, int, int>,
CollectiveMainloop,
CollectiveEpilogue>;

using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;

ExampleRunner<Gemm> runner;

runner.run(options, hw_info);

return 0;
}
File renamed without changes.
18 changes: 13 additions & 5 deletions include/cutlass/gemm/device/gemm_universal_adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,16 +456,24 @@ class GemmUniversalAdapter<

using namespace syclcompat::experimental;
#if defined (SYCL_INTEL_TARGET)
auto event = launch<device_kernel<GemmKernel>>(launch_policy{
sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)},
kernel_properties{sycl_exp::sub_group_size<DispatchPolicy::SubgroupSize>}
}, params);
if constexpr (cute::is_same_v<DispatchPolicy, MainloopDeviceAgnostic>) {
auto event = launch<device_kernel<GemmKernel>>(launch_policy{
sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)}
}, params);
EventManager::getInstance().addEvent(event);
} else {
auto event = launch<device_kernel<GemmKernel>>(launch_policy{
sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)},
kernel_properties{sycl_exp::sub_group_size<DispatchPolicy::SubgroupSize>}
}, params);
EventManager::getInstance().addEvent(event);
}
#else
auto event = launch<device_kernel<GemmKernel>>(launch_policy{
sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)}},
params);
#endif
EventManager::getInstance().addEvent(event);
#endif
#else
#if (CUTLASS_DEBUG_TRACE_LEVEL > 1)
CUTLASS_TRACE_HOST("GemmUniversal::run: Launching kernel with cutlass::kernel_launch");
Expand Down
16 changes: 1 addition & 15 deletions test/unit/cute/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ else()
#test_unit_cute_hopper
test_unit_cute_msvc_compilation
)
elseif(SYCL_NVIDIA_TARGET)
else()
add_custom_target(
cutlass_test_unit_cute
DEPENDS
Expand All @@ -122,19 +122,5 @@ else()
test_unit_cute_intel_xe
)

else()
add_custom_target(
cutlass_test_unit_cute
DEPENDS
cutlass_test_unit_cute_layout
cutlass_test_unit_cute_core
)

add_custom_target(
test_unit_cute
DEPENDS
test_unit_cute_layout
test_unit_cute_core
)
endif()
endif()

0 comments on commit 044bcf7

Please sign in to comment.