fix build

codeplaysoftware · Dec 10, 2024 · 044bcf7 · 044bcf7
1 parent a47e6de
commit 044bcf7
Show file tree

Hide file tree

Showing 6 changed files with 53 additions and 31 deletions.
diff --git a/examples/sycl/CMakeLists.txt b/examples/sycl/CMakeLists.txt
@@ -33,4 +33,4 @@ endif()
 
 if (CUTLASS_ENABLE_SYCL)
   add_subdirectory(device_agnostic)
-endif()
+endif()
diff --git a/examples/sycl/device_agnostic/device_agnostic_collective_builder.cpp b/examples/sycl/device_agnostic/device_agnostic_collective_builder.cpp
@@ -48,8 +48,6 @@
 #include "cutlass/tensor_view.h"
 #include "cutlass/coord.h"
 
-#include "common.hpp"
-
 using namespace cute;
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
@@ -193,6 +191,18 @@ struct ExampleRunner {
     return passed;
   }
 
+  template <typename T>
+  void initialize_block(cutlass::DeviceAllocation<T> block_device, uint64_t seed) {
+    std::mt19937 rng(std::random_device{}());
+    std::uniform_real_distribution<> dist(0.0f, 1.0f);
+    rng.seed(seed);
+
+    auto block_host = std::vector<ElementA>(block_device.size());
+    for (auto& element : block_host) {
+      element = static_cast<T>(dist(rng)); // Cast to ElementA type if needed
+    }
+  }
+
   /// Initialize operands to be used in the GEMM and reference GEMM
   void initialize(const ProblemShapeType& problem_size) {
     auto problem_shape_MNKL = cute::append<4>(problem_size, 1);

diff --git a/examples/sycl/device_agnostic/device_agnostic_gemm.cpp b/examples/sycl/device_agnostic/device_agnostic_gemm.cpp
@@ -36,14 +36,14 @@
 #include "cutlass/util/GPU_Clock.hpp"
 
 #include <cute/tensor.hpp>
+#include <vector>
 #include <random>
 
 #include "cutlass/util/command_line.h"
 #include "cutlass/util/device_memory.h"
 #include "cutlass/util/packed_stride.hpp"
 #include "cutlass/util/reference/device/gemm_complex.h"
 #include "cutlass/util/reference/device/tensor_compare.h"
-#include "common.hpp"
 
 #include "cutlass/util/device_memory.h"
 #include "cutlass/util/reference/device/sycl_tensor_fill.h"
@@ -106,9 +106,7 @@ struct Options {
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <
-  class Gemm
->
+template <class Gemm>
 struct ExampleRunner {
 
   using StrideA = typename Gemm::GemmKernel::StrideA;
@@ -189,6 +187,18 @@ struct ExampleRunner {
     return passed;
   }
 
+  template <typename T>
+  void initialize_block(cutlass::DeviceAllocation<T> block_device, uint64_t seed) {
+    std::mt19937 rng(std::random_device{}());
+    std::uniform_real_distribution<> dist(0.0f, 1.0f);
+    rng.seed(seed);
+
+    auto block_host = std::vector<ElementA>(block_device.size());
+    for (auto& element : block_host) {
+      element = static_cast<T>(dist(rng)); // Cast to ElementA type if needed
+    }
+  }
+
   /// Initialize operands to be used in the GEMM and reference GEMM
   void initialize(const ProblemShapeType& problem_size) {
     auto problem_shape_MNKL = cute::append<4>(problem_size, 1);
@@ -205,6 +215,14 @@ struct ExampleRunner {
     block_D.reset(M * N * L);
     block_ref_D.reset(M * N * L);
 
+    auto block_A_host = std::vector<ElementA>(block_A.size());
+    auto block_B_host = std::vector<ElementA>(block_B.size());
+    auto block_C_host = std::vector<ElementA>(block_C.size());
+    auto block_D_host = std::vector<ElementA>(block_D.size());
+    auto block_ref_D_host = std::vector<ElementA>(block_ref_D.size());
+
+
+
     initialize_block(block_A, seed + 2023);
     initialize_block(block_B, seed + 2022);
     initialize_block(block_C, seed + 2021);
@@ -356,15 +374,15 @@ int main(int argc, const char** argv)
   >;
 
   using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
-  Shape<int, int, int, int>,
-  CollectiveMainloop,
-  CollectiveEpilogue
-  >;
+    Shape<int, int, int, int>,
+    CollectiveMainloop,
+    CollectiveEpilogue>;
 
   using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
 
   ExampleRunner<Gemm> runner;
 
   runner.run(options, hw_info);
+
   return 0;
 }
diff --git a/examples/sycl/common.hpp → examples/sycl/pvc/common.hpp b/examples/sycl/common.hpp → examples/sycl/pvc/common.hpp
diff --git a/include/cutlass/gemm/device/gemm_universal_adapter.h b/include/cutlass/gemm/device/gemm_universal_adapter.h
@@ -456,16 +456,24 @@ class GemmUniversalAdapter<
 
         using namespace syclcompat::experimental;
 #if defined (SYCL_INTEL_TARGET)
-        auto event = launch<device_kernel<GemmKernel>>(launch_policy{
-          sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)},
-          kernel_properties{sycl_exp::sub_group_size<DispatchPolicy::SubgroupSize>}
-        }, params);
+        if constexpr (cute::is_same_v<DispatchPolicy, MainloopDeviceAgnostic>) {
+          auto event = launch<device_kernel<GemmKernel>>(launch_policy{
+            sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)}
+          }, params);
+          EventManager::getInstance().addEvent(event);
+        } else {
+          auto event = launch<device_kernel<GemmKernel>>(launch_policy{
+            sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)},
+            kernel_properties{sycl_exp::sub_group_size<DispatchPolicy::SubgroupSize>}
+          }, params);
+          EventManager::getInstance().addEvent(event);
+        }
 #else
         auto event = launch<device_kernel<GemmKernel>>(launch_policy{
           sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)}},
           params);
-#endif
         EventManager::getInstance().addEvent(event);
+#endif
 #else
 #if (CUTLASS_DEBUG_TRACE_LEVEL > 1)
         CUTLASS_TRACE_HOST("GemmUniversal::run: Launching kernel with cutlass::kernel_launch");

diff --git a/test/unit/cute/CMakeLists.txt b/test/unit/cute/CMakeLists.txt
@@ -102,7 +102,7 @@ else()
       #test_unit_cute_hopper
       test_unit_cute_msvc_compilation
       )
-  elseif(SYCL_NVIDIA_TARGET)
+  else()
   add_custom_target(
     cutlass_test_unit_cute
     DEPENDS
@@ -122,19 +122,5 @@ else()
     test_unit_cute_intel_xe
     )
 
-  else()
-  add_custom_target(
-    cutlass_test_unit_cute
-    DEPENDS
-    cutlass_test_unit_cute_layout
-    cutlass_test_unit_cute_core
-  )
-
-  add_custom_target(
-    test_unit_cute
-    DEPENDS
-    test_unit_cute_layout
-    test_unit_cute_core
-  )
   endif()
 endif()
-Original file line number
+Diff line change
@@ Expand Up / @@ -33,4 +33,4 @@ endif() @@
     if (CUTLASS_ENABLE_SYCL)
       add_subdirectory(device_agnostic)
-    endif()
+    endif()