Skip to content

Commit 0a2ac40

Browse files
committed
Update example/ck_tile/40_streamk_gemm/README.md
1 parent 4a1b242 commit 0a2ac40

File tree

3 files changed

+22
-6
lines changed

3 files changed

+22
-6
lines changed

example/ck_tile/40_streamk_gemm/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ args:
2323
-b_layout tensor B data layout (default: C)
2424
-c_layout tensor C data layout (default: R)
2525
-reduction_strategy strategy for storing results in C tensor. atomic/reduction (default:atomic)
26-
-persistent_dp persistent strategy for data-parallel section. 0. Non-persistent, 1 persistent.")
26+
-persistent_dp persistent strategy for data-parallel section. Set to 0 for non-persistent or to 1 for persistent. (default:0)
2727
-stride_a tensor A stride (default:0)
2828
-stride_b tensor B stride (default:0)
2929
-stride_c tensor C stride (default:0)

example/ck_tile/40_streamk_gemm/run_gemm_example.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ int run_gemm_example_with_layouts(int argc,
278278
<< " persistent_dp=" << arg_parser.get_str("persistent_dp") << " " << ave_time
279279
<< " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, " << std::endl;
280280

281-
bool pass = true;
281+
bool pass = false;
282282

283283
// Memory on host to store gpu reference result
284284
ck_tile::HostTensor<CDataType> c_m_n_ref(

example/ck_tile/40_streamk_gemm/streamk_gemm_basic.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,14 +212,30 @@ int run_gemm_example(int argc, char* argv[])
212212
else if(data_type == "fp8")
213213
{
214214
using TypeConfig = StreamKGemmTypeConfig<ck_tile::fp8_t, ck_tile::fp8_t, ck_tile::half_t>;
215-
return run_gemm_example_prec_type<GemmConfig<ck_tile::fp8_t>, TypeConfig>(
216-
a_layout, b_layout, argc, argv);
215+
if(persistent_dp)
216+
{
217+
return run_gemm_example_prec_type<GemmConfig<ck_tile::fp8_t, true>, TypeConfig>(
218+
a_layout, b_layout, argc, argv);
219+
}
220+
else
221+
{
222+
return run_gemm_example_prec_type<GemmConfig<ck_tile::fp8_t, false>, TypeConfig>(
223+
a_layout, b_layout, argc, argv);
224+
}
217225
}
218226
else if(data_type == "bf8")
219227
{
220228
using TypeConfig = StreamKGemmTypeConfig<ck_tile::bf8_t, ck_tile::bf8_t, ck_tile::half_t>;
221-
return run_gemm_example_prec_type<GemmConfig<ck_tile::bf8_t>, TypeConfig>(
222-
a_layout, b_layout, argc, argv);
229+
if(persistent_dp)
230+
{
231+
return run_gemm_example_prec_type<GemmConfig<ck_tile::bf8_t, true>, TypeConfig>(
232+
a_layout, b_layout, argc, argv);
233+
}
234+
else
235+
{
236+
return run_gemm_example_prec_type<GemmConfig<ck_tile::bf8_t, false>, TypeConfig>(
237+
a_layout, b_layout, argc, argv);
238+
}
223239
}
224240
else
225241
{

0 commit comments

Comments
 (0)