Skip to content

Commit

Permalink
Enable CUTE APIs (Copy, MMA etc.) for Intel GPU (PVC) (#131)
Browse files Browse the repository at this point in the history
Add Cute components for Intel XE 

---------

Co-authored-by: jiyang1011 <[email protected]>
  • Loading branch information
taozha2 and jiyang1011 authored Oct 22, 2024
1 parent 321c531 commit e0c3ceb
Show file tree
Hide file tree
Showing 30 changed files with 8,433 additions and 894 deletions.
14 changes: 7 additions & 7 deletions benchmarks/pvc/gemm_configuration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@ struct Gemm_OperandB;

template<>
struct Gemm_OperandA<bfloat16_t, layout::RowMajor> {
using GmemTiledCopy = XE_2D_U16x8x16x4x2_LD_N;
using GmemTiledCopy = XE_2D_U16x8x16_LD_N;
};

template<>
struct Gemm_OperandB<bfloat16_t, layout::RowMajor> {
using GmemTiledCopy = XE_2D_U16x16x16x2x2_V;
using GmemTiledCopy = XE_2D_U16x16x16_LD_V;
};

} // namespace details
Expand All @@ -93,12 +93,12 @@ struct GemmConfiguration<
bfloat16_t, LayoutB,
float, LayoutC,
float> {
using TileShape = Shape<_256, _256, _32>;
using TileShape = Shape<_256, _256, _16>;
using DispatchPolicy = MainloopIntelPVC<3>;;
using TiledMma = TiledMMA<
MMA_Atom<XE_8x16x16_F32BF16BF16F32_TT>,
Layout<Shape<_1,_1,_1>>,
Tile<_32,_64,_32>>;
Layout<Shape<_1,_8,_1>>,
Tile<_64,_128,_16>>;

// A
using OperandA = detail::Gemm_OperandA<bfloat16_t, LayoutA>;
Expand Down Expand Up @@ -132,9 +132,9 @@ struct GemmConfiguration<
float,
TagToStrideC_t<LayoutC>,
FusionCallBacks,
XE_2D_U32x8x16x1x1_LD_N,
XE_2D_U32x8x16_LD_N,
void, void,
XE_2D_U32x8x16x1x1_ST_N,
XE_2D_U32x8x16_ST_N,
void, void>;

using GemmKernel = kernel::GemmUniversal<
Expand Down
14 changes: 7 additions & 7 deletions examples/sycl/pvc/pvc_gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,15 +306,15 @@ int main(int argc, const char** argv)
using LayoutC = cutlass::layout::RowMajor;
using LayoutD = cutlass::layout::RowMajor;

using GmemTiledCopyA = XE_2D_U16x8x16x4x2_LD_N;
using GmemTiledCopyB = XE_2D_U16x16x16x2x2_V;
using GmemTiledCopyA = XE_2D_U16x8x16_LD_N;
using GmemTiledCopyB = XE_2D_U16x16x16_LD_V;

// Workgroup-level tile
using TileShape = Shape<_256, _256, _32>;
using TileShape = Shape<_256, _128, _16>;

using TiledMma = TiledMMA<MMA_Atom<XE_8x16x16_F32BF16BF16F32_TT>,
Layout<Shape<_1,_1,_1>>,
Tile<_32,_64,_32>>; // Subgroup level-tile
Layout<Shape<_8,_2,_1>>,
Tile<_64,_32,_16>>; // Subgroup level-tile

constexpr int PipelineStages = 3;
using GEMMDispatchPolicy = cutlass::gemm::MainloopIntelPVC<PipelineStages>;
Expand All @@ -333,9 +333,9 @@ int main(int argc, const char** argv)
ElementOutput,
cutlass::gemm::TagToStrideC_t<LayoutD>,
FusionCallBacks,
XE_2D_U32x8x16x1x1_LD_N,
XE_2D_U32x8x16_LD_N,
void, void,
XE_2D_U32x8x16x1x1_ST_N,
XE_2D_U32x8x16_ST_N,
void, void>;

// Mainloop
Expand Down
Loading

0 comments on commit e0c3ceb

Please sign in to comment.