Skip to content

Commit

Permalink
[tuner]: fixed the format of lowering_config
Browse files Browse the repository at this point in the history
Signed-off-by: Bangtian Liu <[email protected]>
  • Loading branch information
bangtianliu committed Nov 30, 2024
1 parent b802b6f commit 86c187a
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 91 deletions.
32 changes: 9 additions & 23 deletions tuner/tuner/candidate_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ def apply_configuration(
expr1 = re.compile(
r"LLVMGPUVectorDistribute workgroup_size = \[.+\] subgroup_size = ([0-9]+),"
)
expr2 = re.compile(r"workgroup = \[\[([0-9]+)(, ([0-9]+))+\]\]")
expr3 = re.compile(r"reduction = \[\[([0-9]+)(, ([0-9]+))+\]\]")
expr2 = re.compile(r"workgroup = \[([0-9]+)(, ([0-9]+))+\]")
expr3 = re.compile(r"reduction = \[([0-9]+)(, ([0-9]+))+\]")
expr4 = re.compile(r"gpu_pipeline_options = #iree_gpu\.pipeline_options<([^>]*)>")
expr5 = re.compile(r"\"amdgpu-waves-per-eu\" = \"([0-9])\"")
repl0 = f"<intrinsic = {intrinsic}, subgroup_m_count = {subgroup_m_count}, subgroup_n_count = {subgroup_n_count}>"
repl1 = f'LLVMGPUVectorDistribute workgroup_size = [{", ".join(map(str, configuration.workgroup_size))}] subgroup_size = {configuration.subgroup_size},'
repl2 = f'workgroup = [[{", ".join(map(str, workgroup_sizes))}]]'
repl3 = f'reduction = [[{", ".join(map(str, reduction_sizes))}]]'
repl2 = f'workgroup = [{", ".join(map(str, workgroup_sizes))}]'
repl3 = f'reduction = [{", ".join(map(str, reduction_sizes))}]'
repl4 = f"gpu_pipeline_options = {configuration.gpu_pipeline_options}"
repl5 = f'"amdgpu-waves-per-eu" = "{configuration.waves_per_eu}"'

Expand Down Expand Up @@ -125,8 +125,6 @@ class MmtTuner(DispatchTuner, MmtParser):
def get_transform_function_mmt(
self, problem_size: ProblemSize, functionName: str, configuration: Configuration
) -> str:
workgroup_sizes = ", ".join(map(str, get_mmt_workgroup_sizes(configuration)))
reduction_sizes = ", ".join(map(str, get_mmt_reduction_sizes(configuration)))
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()
Expand All @@ -141,7 +139,7 @@ def get_transform_function_mmt(
transform.iree.match.cast_compatible_type %lhs = tensor<{problem_size.lhs_type}> : !transform.any_value
transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
lowering_config = {configuration.lowering_config}>,
translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand Down Expand Up @@ -218,7 +216,7 @@ def get_transform_function_conv(
outs(%out : {output}) -> {output}
}} : (!transform.any_op) -> (!transform.any_value, !transform.any_value)
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
lowering_config = {configuration.lowering_config}>,
translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand Down Expand Up @@ -290,7 +288,7 @@ def get_transform_function_broadcast_rhs_mmt(
transform.iree.match.cast_compatible_type %lhs = tensor<{lhs_dynamic_batch}> : !transform.any_value
transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
lowering_config = {configuration.lowering_config}>,
translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand Down Expand Up @@ -361,12 +359,6 @@ def get_transform_function_batch_mmt(
functionName: str,
configuration: Configuration,
) -> str:
workgroup_sizes = ", ".join(
map(str, get_batch_mmt_workgroup_sizes(configuration))
)
reduction_sizes = ", ".join(
map(str, get_batch_mmt_reduction_sizes(configuration))
)
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()
Expand All @@ -382,7 +374,7 @@ def get_transform_function_batch_mmt(
transform.iree.match.cast_compatible_type %lhs = tensor<{problem_size.lhs_type}> : !transform.any_value
transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
lowering_config = {configuration.lowering_config}>,
translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand Down Expand Up @@ -436,12 +428,6 @@ def get_transform_function_batch_matmul(
input1 = f"tensor<{problem_size.rhs_type}>"
output = f"tensor<{problem_size.res_type}>"

workgroup_sizes = ", ".join(
map(str, get_contract_workgroup_sizes(configuration, tile_dims))
)
reduction_sizes = ", ".join(
map(str, get_contract_reduction_sizes(configuration, tile_dims))
)
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()
Expand All @@ -459,7 +445,7 @@ def get_transform_function_batch_matmul(
outs(%out : {output}) -> {output}
}} : (!transform.any_op) -> (!transform.any_value, !transform.any_value)
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
lowering_config = {configuration.lowering_config}>,
translation_info = #iree_codegen.translation_info<LLVMGPUPadAndVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand Down
64 changes: 32 additions & 32 deletions tuner/tuner/candidate_gen_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_apply_params_mmt(tuner_ctx: common.TunerContext) -> None:
mlir_template = [
"<intrinsic = #iree_gpu.mma_layout<MFMA_F32_32x32x8_F16>, subgroup_m_count = 16, subgroup_n_count = 16>",
"<LLVMGPUVectorDistribute workgroup_size = [16, 16] subgroup_size = 16,",
"<workgroup = [[8, 8, 8]], reduction = [[8, 8, 8]]>",
"<workgroup = [8, 8, 8], reduction = [8, 8, 8]>",
"gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = None>",
'{llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}',
]
Expand All @@ -50,7 +50,7 @@ def test_apply_params_mmt(tuner_ctx: common.TunerContext) -> None:
mma_attr = iree_gpu.MMAAttr.get(mma_intrinsic)
lowering_config = common.get_lowering_config(
tuner_ctx=tuner_ctx,
mma_attr=mma_attr,
mma_kind=mma_attr,
workgroup=[8, 8, 0],
reduction=[0, 0, 8],
subgroup_m_count=16,
Expand Down Expand Up @@ -89,8 +89,8 @@ def test_apply_params_mmt(tuner_ctx: common.TunerContext) -> None:
"LLVMGPUVectorDistribute workgroup_size = [16, 16, 1] subgroup_size = 16"
in modified
)
assert "workgroup = [[8, 8, 0]]" in modified
assert "reduction = [[0, 0, 8]]" in modified
assert "workgroup = [8, 8, 0]" in modified
assert "reduction = [0, 0, 8]" in modified
assert (
"gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = true>"
in modified
Expand All @@ -102,7 +102,7 @@ def test_apply_params_conv(tuner_ctx: common.TunerContext) -> None:
mlir_template = [
"<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>, subgroup_m_count = 16, subgroup_n_count = 16>",
"<LLVMGPUVectorDistribute workgroup_size = [256, 1, 1] subgroup_size = 64,",
"<workgroup = [[1, 1, 64, 128, 1, 1, 32]], reduction = [[1, 1, 64, 128, 1, 1, 32]]>",
"<workgroup = [1, 1, 64, 128, 1, 1, 32], reduction = [1, 1, 64, 128, 1, 1, 32]>",
'gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = true>, {llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}',
]

Expand All @@ -112,7 +112,7 @@ def test_apply_params_conv(tuner_ctx: common.TunerContext) -> None:
mma_attr = iree_gpu.MMAAttr.get(mma_intrinsic)
lowering_config = common.get_lowering_config(
tuner_ctx=tuner_ctx,
mma_attr=mma_attr,
mma_kind=mma_attr,
workgroup=[464, 320, 0],
reduction=[0, 0, 16],
subgroup_m_count=1,
Expand Down Expand Up @@ -155,8 +155,8 @@ def test_apply_params_conv(tuner_ctx: common.TunerContext) -> None:
"LLVMGPUVectorDistribute workgroup_size = [256, 1, 1] subgroup_size = 64"
in modified
)
assert "workgroup = [[1, 1, 464, 320, 1, 1, 0]]" in modified
assert "reduction = [[0, 0, 0, 0, 0, 0, 16]]" in modified
assert "workgroup = [1, 1, 464, 320, 1, 1, 0]" in modified
assert "reduction = [0, 0, 0, 0, 0, 0, 16]" in modified
assert (
"gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = <Transpose>>"
in modified
Expand All @@ -168,7 +168,7 @@ def test_apply_params_contract(tuner_ctx: common.TunerContext) -> None:
mlir_template = [
"<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>, subgroup_m_count = 2, subgroup_n_count = 2>}>",
"<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64,",
"<workgroup = [[1, 1, 1, 64, 64, 128]], reduction = [[1, 1, 1, 64, 64, 128]]>",
"<workgroup = [1, 1, 1, 64, 64, 128], reduction = [1, 1, 1, 64, 64, 128]>",
'{llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}',
]

Expand All @@ -185,7 +185,7 @@ def test_apply_params_contract(tuner_ctx: common.TunerContext) -> None:
mma_attr = iree_gpu.MMAAttr.get(mma_intrinsic)
lowering_config = common.get_lowering_config(
tuner_ctx=tuner_ctx,
mma_attr=mma_attr,
mma_kind=mma_attr,
workgroup=[480, 384, 0],
reduction=[0, 0, 32],
subgroup_m_count=1,
Expand Down Expand Up @@ -214,16 +214,16 @@ def test_apply_params_contract(tuner_ctx: common.TunerContext) -> None:
"LLVMGPUVectorDistribute workgroup_size = [256, 1, 1] subgroup_size = 64"
in new_mlir
)
assert "workgroup = [[1, 480, 384, 0]]" in new_mlir
assert "reduction = [[0, 0, 0, 32]]" in new_mlir
assert "workgroup = [1, 480, 384, 0]" in new_mlir
assert "reduction = [0, 0, 0, 32]" in new_mlir
assert '{llvm_func_attrs = {"amdgpu-waves-per-eu" = "2"}' in new_mlir


def test_apply_params_batch_matmul(tuner_ctx: common.TunerContext) -> None:
mlir_template = [
"<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>, subgroup_m_count = 4, subgroup_n_count = 1>}>",
"<LLVMGPUVectorDistribute workgroup_size = [64, 4, 1] subgroup_size = 64,",
"<workgroup = [[1, 128, 64, 64]], reduction = [[1, 128, 64, 64]]>",
"<workgroup = [1, 128, 64, 64], reduction = [1, 128, 64, 64]>",
'{llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}',
]

Expand All @@ -240,7 +240,7 @@ def test_apply_params_batch_matmul(tuner_ctx: common.TunerContext) -> None:
mma_attr = iree_gpu.MMAAttr.get(mma_intrinsic)
lowering_config = common.get_lowering_config(
tuner_ctx=tuner_ctx,
mma_attr=mma_attr,
mma_kind=mma_attr,
workgroup=[416, 320, 0],
reduction=[0, 0, 128],
subgroup_m_count=2,
Expand Down Expand Up @@ -273,16 +273,16 @@ def test_apply_params_batch_matmul(tuner_ctx: common.TunerContext) -> None:
"LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64"
in modified
)
assert "workgroup = [[1, 416, 320, 0]]" in modified
assert "reduction = [[0, 0, 0, 128]]" in modified
assert "workgroup = [1, 416, 320, 0]" in modified
assert "reduction = [0, 0, 0, 128]" in modified
assert '{llvm_func_attrs = {"amdgpu-waves-per-eu" = "2"}' in modified


def test_apply_params_batch_mmt_float(tuner_ctx: common.TunerContext) -> None:
mlir_template = [
"<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>, subgroup_m_count = 4, subgroup_n_count = 1>}>",
"<LLVMGPUVectorDistribute workgroup_size = [64, 4, 1] subgroup_size = 64,",
"<workgroup = [[1, 128, 128, 64]], reduction = [[1, 128, 128, 64]]>",
"<workgroup = [1, 128, 128, 64], reduction = [1, 128, 128, 64]>",
'{llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}',
]

Expand All @@ -298,7 +298,7 @@ def test_apply_params_batch_mmt_float(tuner_ctx: common.TunerContext) -> None:
mma_attr = iree_gpu.MMAAttr.get(mma_intrinsic)
lowering_config = common.get_lowering_config(
tuner_ctx=tuner_ctx,
mma_attr=mma_attr,
mma_kind=mma_attr,
workgroup=[128, 64, 0],
reduction=[0, 0, 128],
subgroup_m_count=2,
Expand Down Expand Up @@ -329,16 +329,16 @@ def test_apply_params_batch_mmt_float(tuner_ctx: common.TunerContext) -> None:
"LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64"
in modified
)
assert "workgroup = [[1, 128, 64, 0]]" in modified
assert "reduction = [[0, 0, 0, 128]]" in modified
assert "workgroup = [1, 128, 64, 0]" in modified
assert "reduction = [0, 0, 0, 128]" in modified
assert '{llvm_func_attrs = {"amdgpu-waves-per-eu" = "2"}' in modified


def test_apply_params_batch_mmt_int(tuner_ctx: common.TunerContext) -> None:
mlir_template = [
"<intrinsic = #iree_gpu.mma_layout<MFMA_I32_16x16x32_I8>, subgroup_m_count = 4, subgroup_n_count = 1>}>",
"<LLVMGPUVectorDistribute workgroup_size = [64, 4, 1] subgroup_size = 64,",
"<workgroup = [[1, 128, 128, 64]], reduction = [[1, 128, 128, 64]]>",
"<workgroup = [1, 128, 128, 64], reduction = [1, 128, 128, 64]>",
'{llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}',
]

Expand All @@ -354,7 +354,7 @@ def test_apply_params_batch_mmt_int(tuner_ctx: common.TunerContext) -> None:
mma_attr = iree_gpu.MMAAttr.get(mma_intrinsic)
lowering_config = common.get_lowering_config(
tuner_ctx=tuner_ctx,
mma_attr=mma_attr,
mma_kind=mma_attr,
workgroup=[128, 64, 0],
reduction=[0, 0, 128],
subgroup_m_count=2,
Expand Down Expand Up @@ -387,8 +387,8 @@ def test_apply_params_batch_mmt_int(tuner_ctx: common.TunerContext) -> None:
"LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64"
in modified
)
assert "workgroup = [[1, 128, 64, 0]]" in modified
assert "reduction = [[0, 0, 0, 128]]" in modified
assert "workgroup = [1, 128, 64, 0]" in modified
assert "reduction = [0, 0, 0, 128]" in modified
assert '{llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}' in modified

assert embeddable
Expand All @@ -408,8 +408,8 @@ def test_apply_params_batch_mmt_int(tuner_ctx: common.TunerContext) -> None:
"%config = transform.param.constant #iree_codegen.compilation_info<"
in embeddable
)
assert "workgroup = [[1, 128, 64, 0]]" in embeddable
assert "reduction = [[0, 0, 0, 128]]" in embeddable
assert "workgroup = [128, 64, 0]" in embeddable
assert "reduction = [0, 0, 128]" in embeddable
assert 'llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}' in embeddable
assert "workgroup_size = [128, 2, 1] subgroup_size = 64" in embeddable

Expand All @@ -418,7 +418,7 @@ def test_apply_params_broadcast_rhs_mmt(tuner_ctx: common.TunerContext) -> None:
mlir_template = [
"<intrinsic = #iree_gpu.mma_layout<MFMA_I32_16x16x32_I8>, subgroup_m_count = 4, subgroup_n_count = 1>}>",
"<LLVMGPUVectorDistribute workgroup_size = [64, 4, 1] subgroup_size = 64,",
"<workgroup = [[1, 128, 128, 64]], reduction = [[1, 128, 128, 64]]>",
"<workgroup = [1, 128, 128, 64]], reduction = [1, 128, 128, 64]>",
'{llvm_func_attrs = {"amdgpu-waves-per-eu" = "1"}',
]

Expand All @@ -434,7 +434,7 @@ def test_apply_params_broadcast_rhs_mmt(tuner_ctx: common.TunerContext) -> None:
mma_attr = iree_gpu.MMAAttr.get(mma_intrinsic)
lowering_config = common.get_lowering_config(
tuner_ctx=tuner_ctx,
mma_attr=mma_attr,
mma_kind=mma_attr,
workgroup=[128, 64, 0],
reduction=[0, 0, 128],
subgroup_m_count=2,
Expand Down Expand Up @@ -470,8 +470,8 @@ def test_apply_params_broadcast_rhs_mmt(tuner_ctx: common.TunerContext) -> None:
"LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64"
in modified
)
assert "workgroup = [[1, 128, 64, 0]]" in modified
assert "reduction = [[0, 0, 0, 128]]" in modified
assert "workgroup = [1, 128, 64, 0]" in modified
assert "reduction = [0, 0, 0, 128]" in modified
assert '{llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}' in modified

assert embeddable
Expand All @@ -492,8 +492,8 @@ def test_apply_params_broadcast_rhs_mmt(tuner_ctx: common.TunerContext) -> None:
"%config = transform.param.constant #iree_codegen.compilation_info<"
in embeddable
)
assert "workgroup = [[1, 128, 64, 0]]" in embeddable
assert "reduction = [[0, 0, 0, 128]]" in embeddable
assert "workgroup = [128, 64, 0]" in embeddable
assert "reduction = [0, 0, 128]" in embeddable
assert 'llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}' in embeddable
assert "workgroup_size = [128, 2, 1] subgroup_size = 64" in embeddable

Expand Down
Loading

0 comments on commit 86c187a

Please sign in to comment.