diff --git a/tuner/tuner/candidate_gen.py b/tuner/tuner/candidate_gen.py index e81dcdbd9..41a26a927 100644 --- a/tuner/tuner/candidate_gen.py +++ b/tuner/tuner/candidate_gen.py @@ -52,7 +52,10 @@ def apply_configuration( workgroup_sizes = lowering_config.workgroup_tile_sizes reduction_sizes = lowering_config.reduction_tile_sizes gpu_pipeline_options = configuration.translation_info.configuration[ - GPU_PIPELINE_OPTIONS + GPU_PIPELINE_OPTIONS_KEY + ] + waves_per_eu = configuration.translation_info.configuration[LLVM_FUNC_ATTRS_KEY][ + WAVES_PER_EU_KEY ] tune_logger.info(f"Applying: {configuration}") expr0 = re.compile( @@ -70,7 +73,7 @@ def apply_configuration( repl2 = f"workgroup = {workgroup_sizes}" repl3 = f"reduction = {reduction_sizes}" repl4 = f"gpu_pipeline_options = {gpu_pipeline_options}" - repl5 = f'"amdgpu-waves-per-eu" = "{configuration.waves_per_eu}"' + repl5 = f'"amdgpu-waves-per-eu" = {waves_per_eu}' new_mlir = "" for line in template: @@ -131,15 +134,6 @@ class MmtTuner(DispatchTuner, MmtParser): def get_transform_function_mmt( self, problem_size: ProblemSize, functionName: str, configuration: Configuration ) -> str: - lowering_config = configuration.lowering_config - intrinsic = lowering_config.mma_kind - ( - subgroup_m_count, - subgroup_n_count, - ) = lowering_config.subgroup_count_mn - - wg_x, wg_y, wg_z = configuration.translation_info.workgroup_size - extra_config = get_pipeline_config(configuration) return f""" transform.named_sequence @{functionName}(%matmul: !transform.any_op {{transform.readonly}}) -> (!transform.any_op, !transform.any_param) {{ %mmt = transform.include @match_mmt_f16_f16_f32 failures(propagate) (%matmul) : (!transform.any_op) -> !transform.any_op @@ -148,13 +142,8 @@ def get_transform_function_mmt( transform.iree.match.cast_compatible_type %lhs = tensor<{problem_size.lhs_type}> : !transform.any_value transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value %config = transform.param.constant #iree_codegen.compilation_info< - lowering_config = {configuration.lowering_config}>, - translation_info = #iree_codegen.translation_info - {extra_config}}}> + lowering_config = {configuration.lowering_config}, + translation_info = {configuration.translation_info} > -> !transform.any_param transform.yield %matmul, %config : !transform.any_op, !transform.any_param }} @@ -200,16 +189,6 @@ def get_transform_function_conv( filter = f"tensor<{problem_size.rhs_type}>" output = f"tensor<{dynamic_batch_output_ty}>" - lowering_config = configuration.lowering_config - intrinsic = lowering_config.mma_kind - ( - subgroup_m_count, - subgroup_n_count, - ) = lowering_config.subgroup_count_mn - - wg_x, wg_y, wg_z = configuration.translation_info.workgroup_size - extra_config = get_pipeline_config(configuration) - return f""" transform.named_sequence @{functionName}(%conv: !transform.any_op {{transform.readonly}}) -> (!transform.any_op, !transform.any_param) {{ @@ -220,13 +199,8 @@ def get_transform_function_conv( outs(%out : {output}) -> {output} }} : (!transform.any_op) -> (!transform.any_value, !transform.any_value) %config = transform.param.constant #iree_codegen.compilation_info< - lowering_config = {configuration.lowering_config}>, - translation_info = #iree_codegen.translation_info - {extra_config}}}> + lowering_config = {configuration.lowering_config}, + translation_info = {configuration.translation_info} > -> !transform.any_param transform.yield %conv, %config : !transform.any_op, !transform.any_param }} @@ -265,16 +239,6 @@ def get_transform_function_broadcast_rhs_mmt( functionName: str, configuration: Configuration, ) -> str: - lowering_config = configuration.lowering_config - intrinsic = lowering_config.mma_kind - ( - subgroup_m_count, - subgroup_n_count, - ) = lowering_config.subgroup_count_mn - - wg_x, wg_y, wg_z = configuration.translation_info.workgroup_size - extra_config = get_pipeline_config(configuration) - lhs_dynamic_batch = problem_size.lhs_type lhs_dynamic_batch.shape = lhs_dynamic_batch.shape.copy() lhs_dynamic_batch.shape[0] = -1 @@ -287,13 +251,8 @@ def get_transform_function_broadcast_rhs_mmt( transform.iree.match.cast_compatible_type %lhs = tensor<{lhs_dynamic_batch}> : !transform.any_value transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value %config = transform.param.constant #iree_codegen.compilation_info< - lowering_config = {configuration.lowering_config}>, - translation_info = #iree_codegen.translation_info - {extra_config}}}> + lowering_config = {configuration.lowering_config}, + translation_info = {configuration.translation_info} > -> !transform.any_param transform.yield %generic, %config : !transform.any_op, !transform.any_param }} @@ -354,16 +313,6 @@ def get_transform_function_batch_mmt( functionName: str, configuration: Configuration, ) -> str: - lowering_config = configuration.lowering_config - intrinsic = lowering_config.mma_kind - ( - subgroup_m_count, - subgroup_n_count, - ) = lowering_config.subgroup_count_mn - - wg_x, wg_y, wg_z = configuration.translation_info.workgroup_size - extra_config = get_pipeline_config(configuration) - return f""" transform.named_sequence @{functionName}(%generic: !transform.any_op {{transform.readonly}}) -> (!transform.any_op, !transform.any_param) {{ %mmt = transform.include @match_batch_mmt_i8_i8_i32 failures(propagate) (%generic) : (!transform.any_op) -> !transform.any_op @@ -372,13 +321,8 @@ def get_transform_function_batch_mmt( transform.iree.match.cast_compatible_type %lhs = tensor<{problem_size.lhs_type}> : !transform.any_value transform.iree.match.cast_compatible_type %rhs = tensor<{problem_size.rhs_type}> : !transform.any_value %config = transform.param.constant #iree_codegen.compilation_info< - lowering_config = {configuration.lowering_config}>, - translation_info = #iree_codegen.translation_info - {extra_config}}}> + lowering_config = {configuration.lowering_config}, + translation_info ={configuration.translation_info} > -> !transform.any_param transform.yield %generic, %config : !transform.any_op, !transform.any_param }} @@ -424,16 +368,6 @@ def get_transform_function_batch_matmul( input1 = f"tensor<{problem_size.rhs_type}>" output = f"tensor<{problem_size.res_type}>" - lowering_config = configuration.lowering_config - intrinsic = lowering_config.mma_kind - ( - subgroup_m_count, - subgroup_n_count, - ) = lowering_config.subgroup_count_mn - - wg_x, wg_y, wg_z = configuration.translation_info.workgroup_size - extra_config = get_pipeline_config(configuration) - return f""" transform.named_sequence @{functionName}(%batch_matmul: !transform.any_op {{transform.readonly}}) -> (!transform.any_op, !transform.any_param) {{ @@ -444,13 +378,8 @@ def get_transform_function_batch_matmul( outs(%out : {output}) -> {output} }} : (!transform.any_op) -> (!transform.any_value, !transform.any_value) %config = transform.param.constant #iree_codegen.compilation_info< - lowering_config = {configuration.lowering_config}>, - translation_info = #iree_codegen.translation_info - {extra_config}}}> + lowering_config = {configuration.lowering_config}, + translation_info = {configuration.translation_info} > -> !transform.any_param transform.yield %batch_matmul, %config : !transform.any_op, !transform.any_param }} diff --git a/tuner/tuner/candidate_gen_test.py b/tuner/tuner/candidate_gen_test.py index 75fc46714..63819e599 100644 --- a/tuner/tuner/candidate_gen_test.py +++ b/tuner/tuner/candidate_gen_test.py @@ -58,19 +58,22 @@ def test_apply_params_mmt(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=16, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get(prefetch_shared_memory=True) - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get(prefetch_shared_memory=True) + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("8")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [16, 16, 1], 16, pipeline_option_dict + pipeline_attr, None, [16, 16, 1], 16, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=8, ) problem_size = common.ProblemSize( @@ -126,23 +129,26 @@ def test_apply_params_conv(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=4, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get( + pipeline_options = iree_gpu.PipelineOptionsAttr.get( reorder_workgroups_strategy=iree_gpu.ReorderWorkgroupsStrategyAttr.get( iree_gpu.ReorderWorkgroupsStrategy.Transpose ) ) - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("2")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [256, 1, 1], 64, pipeline_option_dict + pipeline_attr, None, [256, 1, 1], 64, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=2, ) problem_size = common.ProblemSize( @@ -207,19 +213,22 @@ def test_apply_params_contract(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=4, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("2")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [256, 1, 1], 64, pipeline_option_dict + pipeline_attr, None, [256, 1, 1], 64, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=2, ) tf_mlir = candidate_gen.ContractionTuner("mk", "nk", tile_dims).apply_params( @@ -270,19 +279,22 @@ def test_apply_params_batch_matmul(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=2, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("2")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [128, 2, 1], 64, pipeline_option_dict + pipeline_attr, None, [128, 2, 1], 64, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=2, ) tf_mlir = candidate_gen.BatchMatmulTuner("mk", "nk", tile_dims).apply_params( @@ -336,19 +348,22 @@ def test_apply_params_batch_mmt_float(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=2, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("2")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [128, 2, 1], 64, pipeline_option_dict + pipeline_attr, None, [128, 2, 1], 64, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=2, ) tf_mlir = candidate_gen.BatchMmtTuner().apply_params( @@ -400,19 +415,22 @@ def test_apply_params_batch_mmt_int(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=2, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("4")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [128, 2, 1], 64, pipeline_option_dict + pipeline_attr, None, [128, 2, 1], 64, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=4, ) tf_mlir = candidate_gen.BatchMmtTuner().apply_params( @@ -488,19 +506,22 @@ def test_apply_params_broadcast_rhs_mmt(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=2, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("4")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [128, 2, 1], 64, pipeline_option_dict + pipeline_attr, None, [128, 2, 1], 64, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=4, ) tf_mlir = candidate_gen.ContractionTuner( diff --git a/tuner/tuner/common.py b/tuner/tuner/common.py index 78bfa9eec..2ccb254b4 100644 --- a/tuner/tuner/common.py +++ b/tuner/tuner/common.py @@ -115,11 +115,14 @@ def is_comptible(mma_intrinsic: iree_gpu.MMAIntrinsic) -> bool: class Configuration: translation_info: iree_codegen.TranslationInfoAttr lowering_config: iree_gpu.LoweringConfigAttr - waves_per_eu: int # The key name for GPUPipelineOptionsAttr in the translation info config dictionary. -GPU_PIPELINE_OPTIONS = "gpu_pipeline_options" +GPU_PIPELINE_OPTIONS_KEY = "gpu_pipeline_options" +# The key name for llvm_func_attrs attribute in the translation info config dictionary. +LLVM_FUNC_ATTRS_KEY = "llvm_func_attrs" +# The Key name for the 'amdgpu-waves-per-eu' within the llvm_func_attrs attribute. +WAVES_PER_EU_KEY = "amdgpu-waves-per-eu" def get_lowering_config( @@ -160,19 +163,6 @@ def get_lowering_config( return iree_gpu.LoweringConfigAttr.get(lowering_config_attrs) -def get_pipeline_config(configuration: Configuration) -> str: - extra_config = "" - pipeline_options = configuration.translation_info.configuration[ - GPU_PIPELINE_OPTIONS - ] - if pipeline_options != iree_gpu.PipelineOptionsAttr.get(): - extra_config += f", gpu_pipeline_options = {pipeline_options}" - - if configuration.waves_per_eu != 2: - extra_config += f', llvm_func_attrs = {{"amdgpu-waves-per-eu" = "{configuration.waves_per_eu}"}}' - return extra_config - - def read_input_mlir(filename: str) -> list[str]: with open(filename, "r") as f: return f.readlines() diff --git a/tuner/tuner/common_test.py b/tuner/tuner/common_test.py index 3bdc93c4e..26e9e57c9 100644 --- a/tuner/tuner/common_test.py +++ b/tuner/tuner/common_test.py @@ -86,43 +86,45 @@ def test_get_pipeline_config(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=1, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("2")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [16, 16, 1], 32, pipeline_option_dict + pipeline_attr, None, [16, 16, 1], 32, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=2, ) - config1_str: str = common.get_pipeline_config(config) - assert config1_str == "" + config1_str: str = str(config.translation_info.configuration["llvm_func_attrs"]) + assert config1_str == '{"amdgpu-waves-per-eu" = "2"}' - config.waves_per_eu = 4 - config2_str: str = common.get_pipeline_config(config) - assert config2_str == ', llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}' - - pipeline_option = iree_gpu.PipelineOptionsAttr.get(prefetch_shared_memory=True) - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get(prefetch_shared_memory=True) + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("4")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + "llvm_func_attrs": waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [16, 16, 1], 32, pipeline_option_dict + pipeline_attr, None, [16, 16, 1], 32, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=4, ) - config3_str = common.get_pipeline_config(config) + config2_str: str = str(config.translation_info.configuration) assert ( - config3_str - == ', gpu_pipeline_options = #iree_gpu.pipeline_options, llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}' + config2_str + == '{gpu_pipeline_options = #iree_gpu.pipeline_options, llvm_func_attrs = {"amdgpu-waves-per-eu" = "4"}}' ) @@ -226,19 +228,22 @@ def test_get_lowering_config(tuner_ctx: common.TunerContext) -> None: ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("2")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + "llvm_func_attrs": waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [16, 16, 1], 32, pipeline_option_dict + pipeline_attr, None, [16, 16, 1], 32, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=2, ) assert config.lowering_config.mma_kind is None diff --git a/tuner/tuner/dispatch_constraints.py b/tuner/tuner/dispatch_constraints.py index 33df4a4c9..8086c75e3 100644 --- a/tuner/tuner/dispatch_constraints.py +++ b/tuner/tuner/dispatch_constraints.py @@ -247,17 +247,25 @@ def generate_solutions( pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get({GPU_PIPELINE_OPTIONS: pipeline_option}) + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get( + {WAVES_PER_EU_KEY: ir.StringAttr.get(str(lookup(waves_per_eu)))} + ) + config_dict = ir.DictAttr.get( + { + GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } + ) translation_info = iree_codegen.TranslationInfoAttr.get( pipeline_attr, None, [lookup(wg_x), lookup(wg_y), lookup(wg_z)], lookup(subgroup_size), - pipeline_option_dict, + config_dict, ) - config = Configuration(translation_info, lowering_config, lookup(waves_per_eu)) + config = Configuration(translation_info, lowering_config) solver.add(z3.simplify(z3.Not(z3.And(list(x == model[x] for x in all_vars))))) i += 1 yield config diff --git a/tuner/tuner/dispatch_parser_test.py b/tuner/tuner/dispatch_parser_test.py index ccd49b351..3ed0faf22 100644 --- a/tuner/tuner/dispatch_parser_test.py +++ b/tuner/tuner/dispatch_parser_test.py @@ -52,19 +52,22 @@ def test_get_mmt_tile_sizes(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=4, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("0")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [], 0, pipeline_option_dict + pipeline_attr, None, [], 0, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=0, ) lowering_config = config.lowering_config assert lowering_config.workgroup_tile_sizes == [128, 320, 0] @@ -83,19 +86,22 @@ def test_get_conv_tile_sizes(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=4, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("1")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [256, 1, 1], 64, pipeline_option_dict + pipeline_attr, None, [256, 1, 1], 64, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=1, ) assert config.lowering_config.workgroup_tile_sizes == [1, 1, 464, 320, 1, 1, 0] assert config.lowering_config.reduction_tile_sizes == [0, 0, 0, 0, 0, 0, 16] @@ -113,19 +119,22 @@ def test_get_contract_tile_sizes(tuner_ctx: common.TunerContext) -> None: subgroup_n_count=1, ) pipeline_attr = iree_codegen.DispatchLoweringPassPipelineAttr.get( - iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorize + iree_codegen.DispatchLoweringPassPipeline.LLVMGPUVectorDistribute ) - pipeline_option = iree_gpu.PipelineOptionsAttr.get() - pipeline_option_dict = ir.DictAttr.get( - {common.GPU_PIPELINE_OPTIONS: pipeline_option} + pipeline_options = iree_gpu.PipelineOptionsAttr.get() + waves_per_eu_dict = ir.DictAttr.get({"amdgpu-waves-per-eu": ir.StringAttr.get("2")}) + config_dict = ir.DictAttr.get( + { + common.GPU_PIPELINE_OPTIONS_KEY: pipeline_options, + common.LLVM_FUNC_ATTRS_KEY: waves_per_eu_dict, + } ) translation_info = iree_codegen.TranslationInfoAttr.get( - pipeline_attr, None, [16, 16, 1], 32, pipeline_option_dict + pipeline_attr, None, [16, 16, 1], 32, config_dict ) config = common.Configuration( translation_info=translation_info, lowering_config=lowering_config, - waves_per_eu=2, ) assert dispatch_parser.get_contract_workgroup_sizes(config, "mnk") == [4, 8, 0] assert dispatch_parser.get_contract_reduction_sizes(config, "mnk") == [0, 0, 16]