Skip to content

Commit

Permalink
xe_load_aux experiment
Browse files Browse the repository at this point in the history
  • Loading branch information
FMarno committed Dec 5, 2024
1 parent 45e7111 commit c3bf6d6
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions include/cutlass/epilogue/fusion/xe_visitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,13 @@ struct XeAuxLoad {
// Optionally append 1s until problem shape is rank-4 in case its is only rank-3 (MNK)
auto problem_shape_mnkl = append<4>(problem_shape, 1);
auto [M, N, K, L] = problem_shape_mnkl;
// args.dAux: 4096, 1
// args.dAux is the stride of aux
// TODO(finlay): figure out what `with` does and why the arguments are so
// TODO(codeplay): This assumes a packed aux matrix
// pack the front of the stride with zero to simulate a broadcast
auto dAux = prepend<3>(args.dAux, 0);
auto M_AUX = get<1>(dAux);
auto N_AUX = get<1>(dAux);
XE_Copy_Aux xe_load_aux = make_tiled_copy(Copy_Atom<Trait_Aux, Element>{}.with(
args.ptr_aux, N, M, N), // TODO what is N, M, N, where do the strides go?
args.ptr_aux, N_AUX, M_AUX, N_AUX),
Layout<Shape<_1, SubgroupSize>>{},
make_layout(make_shape(get<0>(typename Trait_Aux::Shape_MN{}),
get<1>(typename Trait_Aux::Shape_MN{}) / SubgroupSize{})));
Expand Down

0 comments on commit c3bf6d6

Please sign in to comment.