Skip to content

Commit

Permalink
[tuner]: address comments
Browse files Browse the repository at this point in the history
Signed-off-by: Bangtian Liu <[email protected]>
  • Loading branch information
bangtianliu committed Nov 29, 2024
1 parent 589900d commit b802b6f
Show file tree
Hide file tree
Showing 6 changed files with 364 additions and 286 deletions.
137 changes: 85 additions & 52 deletions tuner/tuner/candidate_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,39 +40,46 @@


def apply_configuration(
template: list[str], configuration: Configuration, tile_sizes: list[int]
template: list[str],
configuration: Configuration,
workgroup_sizes: list[int],
reduction_sizes: list[int],
) -> str:
intrinsic = configuration.intrinsic
subgroup_m_count = configuration.subgroup_m_count
subgroup_n_count = configuration.subgroup_n_count
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()
tune_logger.info(f"Applying: {configuration}")
expr0 = re.compile(
r"<intrinsic = #iree_gpu\.mma_layout<(.+)>, subgroup_m_count = ([0-9]+), subgroup_n_count = ([0-9]+)>"
)
expr1 = re.compile(
r"LLVMGPUVectorDistribute workgroup_size = \[.+\] subgroup_size = ([0-9]+),"
)
expr2 = re.compile(r"tile_sizes = \[\[([0-9]+)(, ([0-9]+))+\]\]")
expr3 = re.compile(r"gpu_pipeline_options = #iree_gpu\.pipeline_options<([^>]*)>")
expr4 = re.compile(r"\"amdgpu-waves-per-eu\" = \"([0-9])\"")
expr2 = re.compile(r"workgroup = \[\[([0-9]+)(, ([0-9]+))+\]\]")
expr3 = re.compile(r"reduction = \[\[([0-9]+)(, ([0-9]+))+\]\]")
expr4 = re.compile(r"gpu_pipeline_options = #iree_gpu\.pipeline_options<([^>]*)>")
expr5 = re.compile(r"\"amdgpu-waves-per-eu\" = \"([0-9])\"")
repl0 = f"<intrinsic = {intrinsic}, subgroup_m_count = {subgroup_m_count}, subgroup_n_count = {subgroup_n_count}>"
repl1 = f'LLVMGPUVectorDistribute workgroup_size = [{", ".join(map(str, configuration.workgroup_size))}] subgroup_size = {configuration.subgroup_size},'
repl2 = f'tile_sizes = [[{", ".join(map(str, tile_sizes))}]]'
repl3 = f"gpu_pipeline_options = {configuration.gpu_pipeline_options}"
repl4 = f'"amdgpu-waves-per-eu" = "{configuration.waves_per_eu}"'
repl2 = f'workgroup = [[{", ".join(map(str, workgroup_sizes))}]]'
repl3 = f'reduction = [[{", ".join(map(str, reduction_sizes))}]]'
repl4 = f"gpu_pipeline_options = {configuration.gpu_pipeline_options}"
repl5 = f'"amdgpu-waves-per-eu" = "{configuration.waves_per_eu}"'

new_mlir = ""
for line in template:
if "intrinsic =" in line:
line = re.sub(expr0, repl0, line)
if "LLVMGPUVectorDistribute " in line:
line = re.sub(expr1, repl1, line)
if "tile_sizes" in line:
if "workgroup" in line:
line = re.sub(expr2, repl2, line)
if "gpu_pipeline_options =" in line:
if "reduction" in line:
line = re.sub(expr3, repl3, line)
if "amdgpu-waves-per-eu" in line:
if "gpu_pipeline_options =" in line:
line = re.sub(expr4, repl4, line)
if "amdgpu-waves-per-eu" in line:
line = re.sub(expr5, repl5, line)
new_mlir += line

return new_mlir
Expand Down Expand Up @@ -118,10 +125,11 @@ class MmtTuner(DispatchTuner, MmtParser):
def get_transform_function_mmt(
self, problem_size: ProblemSize, functionName: str, configuration: Configuration
) -> str:
tile_sizes = ", ".join(map(str, get_mmt_tile_sizes(configuration)))
intrinsic = configuration.intrinsic
subgroup_m_count = configuration.subgroup_m_count
subgroup_n_count = configuration.subgroup_n_count
workgroup_sizes = ", ".join(map(str, get_mmt_workgroup_sizes(configuration)))
reduction_sizes = ", ".join(map(str, get_mmt_reduction_sizes(configuration)))
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()

wg_x, wg_y, wg_z = configuration.workgroup_size
extra_config = get_pipeline_config(configuration)
Expand All @@ -133,7 +141,7 @@ def get_transform_function_mmt(
transform.iree.match.cast_compatible_type %lhs = tensor<{problem_size.lhs_type}> : !transform.any_value
transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[{tile_sizes}]]>,
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand All @@ -159,7 +167,10 @@ def apply_params(
"// ",
)
modified += apply_configuration(
template, configuration, get_mmt_tile_sizes(configuration)
template,
configuration,
get_mmt_workgroup_sizes(configuration),
get_mmt_reduction_sizes(configuration),
)
embeddable = indent(
self.get_transform_function_mmt(problem_size, f"match_op", configuration),
Expand All @@ -169,13 +180,6 @@ def apply_params(


class ConvTuner(DispatchTuner, ConvParser):
# int64_t n = outputShape[0];
# int64_t oh = outputShape[1];
# int64_t ow = outputShape[2];
# int64_t oc = outputShape[3];
# int64_t fh = filterShape[0];
# int64_t fw = filterShape[1];
# int64_t ic = filterShape[2];
def get_transform_function_conv(
self, problem_size: ProblemSize, functionName: str, configuration: Configuration
) -> str:
Expand All @@ -191,10 +195,15 @@ def get_transform_function_conv(
filter = f"tensor<{problem_size.rhs_type}>"
output = f"tensor<{dynamic_batch_output_ty}>"

tile_sizes = ", ".join(map(str, self.get_conv_tile_sizes(configuration)))
intrinsic = configuration.intrinsic
subgroup_m_count = configuration.subgroup_m_count
subgroup_n_count = configuration.subgroup_n_count
workgroup_sizes = ", ".join(
map(str, self.get_conv_workgroup_sizes(configuration))
)
reduction_sizes = ", ".join(
map(str, self.get_conv_reduction_sizes(configuration))
)
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()

wg_x, wg_y, wg_z = configuration.workgroup_size
extra_config = get_pipeline_config(configuration)
Expand All @@ -209,7 +218,7 @@ def get_transform_function_conv(
outs(%out : {output}) -> {output}
}} : (!transform.any_op) -> (!transform.any_value, !transform.any_value)
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[{tile_sizes}]]>,
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand Down Expand Up @@ -237,7 +246,10 @@ def apply_params(
"// ",
)
modified += apply_configuration(
template, configuration, self.get_conv_tile_sizes(configuration)
template,
configuration,
self.get_conv_workgroup_sizes(configuration),
self.get_conv_reduction_sizes(configuration),
)
embeddable = indent(
self.get_transform_function_conv(problem_size, f"match_op", configuration),
Expand All @@ -253,10 +265,15 @@ def get_transform_function_broadcast_rhs_mmt(
functionName: str,
configuration: Configuration,
) -> str:
tile_sizes = ", ".join(map(str, get_batch_mmt_tile_sizes(configuration)))
intrinsic = configuration.intrinsic
subgroup_m_count = configuration.subgroup_m_count
subgroup_n_count = configuration.subgroup_n_count
workgroup_sizes = ", ".join(
map(str, get_batch_mmt_workgroup_sizes(configuration))
)
reduction_sizes = ", ".join(
map(str, get_batch_mmt_reduction_sizes(configuration))
)
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()

wg_x, wg_y, wg_z = configuration.workgroup_size
extra_config = get_pipeline_config(configuration)
Expand All @@ -273,7 +290,7 @@ def get_transform_function_broadcast_rhs_mmt(
transform.iree.match.cast_compatible_type %lhs = tensor<{lhs_dynamic_batch}> : !transform.any_value
transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[{tile_sizes}]]>,
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand All @@ -299,7 +316,10 @@ def apply_params_broadcast_rhs_mmt(
"// ",
)
modified += apply_configuration(
template, configuration, get_batch_mmt_tile_sizes(configuration)
template,
configuration,
get_batch_mmt_workgroup_sizes(configuration),
get_batch_mmt_reduction_sizes(configuration),
)

embeddable = indent(
Expand Down Expand Up @@ -327,7 +347,8 @@ def apply_params(
apply_configuration(
template,
configuration,
get_contract_tile_sizes(configuration, self.tile_dims),
get_contract_workgroup_sizes(configuration, self.tile_dims),
get_contract_reduction_sizes(configuration, self.tile_dims),
),
"",
)
Expand All @@ -340,10 +361,15 @@ def get_transform_function_batch_mmt(
functionName: str,
configuration: Configuration,
) -> str:
tile_sizes = ", ".join(map(str, get_batch_mmt_tile_sizes(configuration)))
intrinsic = configuration.intrinsic
subgroup_m_count = configuration.subgroup_m_count
subgroup_n_count = configuration.subgroup_n_count
workgroup_sizes = ", ".join(
map(str, get_batch_mmt_workgroup_sizes(configuration))
)
reduction_sizes = ", ".join(
map(str, get_batch_mmt_reduction_sizes(configuration))
)
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()

wg_x, wg_y, wg_z = configuration.workgroup_size
extra_config = get_pipeline_config(configuration)
Expand All @@ -356,7 +382,7 @@ def get_transform_function_batch_mmt(
transform.iree.match.cast_compatible_type %lhs = tensor<{problem_size.lhs_type}> : !transform.any_value
transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[{tile_sizes}]]>,
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand All @@ -383,7 +409,10 @@ def apply_params(
"// ",
)
modified += apply_configuration(
template, configuration, get_batch_mmt_tile_sizes(configuration)
template,
configuration,
get_batch_mmt_workgroup_sizes(configuration),
get_batch_mmt_reduction_sizes(configuration),
)

embeddable = indent(
Expand All @@ -407,12 +436,15 @@ def get_transform_function_batch_matmul(
input1 = f"tensor<{problem_size.rhs_type}>"
output = f"tensor<{problem_size.res_type}>"

tile_sizes = ", ".join(
map(str, get_contract_tile_sizes(configuration, tile_dims))
workgroup_sizes = ", ".join(
map(str, get_contract_workgroup_sizes(configuration, tile_dims))
)
reduction_sizes = ", ".join(
map(str, get_contract_reduction_sizes(configuration, tile_dims))
)
intrinsic = configuration.intrinsic
subgroup_m_count = configuration.subgroup_m_count
subgroup_n_count = configuration.subgroup_n_count
intrinsic = configuration.intrinsic()
subgroup_m_count = configuration.subgroup_m_count()
subgroup_n_count = configuration.subgroup_n_count()

wg_x, wg_y, wg_z = configuration.workgroup_size
extra_config = get_pipeline_config(configuration)
Expand All @@ -427,7 +459,7 @@ def get_transform_function_batch_matmul(
outs(%out : {output}) -> {output}
}} : (!transform.any_op) -> (!transform.any_value, !transform.any_value)
%config = transform.param.constant #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[{tile_sizes}]]>,
lowering_config = #iree_codegen.lowering_config<workgroup = [[{workgroup_sizes}]], reduction = [[{reduction_sizes}]]>,
translation_info = #iree_codegen.translation_info<LLVMGPUPadAndVectorDistribute
workgroup_size = [{wg_x}, {wg_y}, {wg_z}] subgroup_size = {configuration.subgroup_size},
{{mma_schedule = #iree_gpu.mma_schedule<
Expand Down Expand Up @@ -458,7 +490,8 @@ def apply_params(
modified += apply_configuration(
template,
configuration,
get_contract_tile_sizes(configuration, self.tile_dims),
get_contract_workgroup_sizes(configuration, self.tile_dims),
get_contract_reduction_sizes(configuration, self.tile_dims),
)

embeddable = indent(
Expand Down
Loading

0 comments on commit b802b6f

Please sign in to comment.