From 746f164766a733b8d1da4ae125122c8c7c2639dd Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Mon, 6 Nov 2023 13:54:02 +0100 Subject: [PATCH] better formatting for the card from the start and adding the possiblitity to choose avx --- MG5aMC/mg5amcnlo | 2 +- .../PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 71 ++++++++++++++----- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 11 +-- 3 files changed, 61 insertions(+), 23 deletions(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 49c93e01b8..8a18cc2423 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 49c93e01b8596cbdb4e65f628601de1e6f08c744 +Subproject commit 8a18cc2423616ee91c4f9d74eec0cb2901e0fd2a diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index f3e3ddb38f..f32dd25aad 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -29,19 +29,16 @@ def compile(self, *args, **opts): if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): - import pathlib - import os - pjoin = os.path.join - if 'cwd' in opts: - path = pjoin(opts['cwd'], os.pardir, os.pardir, 'Source', 'make_opts') - common_run_interface.CommonRunCmd.update_make_opts_full( - path, - {'FPTYPE': self.run_card['floating_type']}) + if 'cwd' in opts and os.path.basename(opts['cwd']) == 'Source': + path = pjoin(opts['cwd'], 'make_opts') + avx_type = self.run_card['avx_type'] if self.run_card['avx_type'] != 'auto' else '' + common_run_interface.CommonRunCmd.update_make_opts_full(path, + {'FPTYPE': self.run_card['floating_type'], + 'AVX': avx_type }) + misc.sprint('FPTYPE checked') - - + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -56,8 +53,26 @@ def compile(self, *args, **opts): else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +# Phase-Space Optimization ------------------------------------------------------------------------------------ +template_on = \ +"""#********************************************************************* +# SIMD/GPU Parametrization +#********************************************************************* + %(floating_type)s = floating_type ! single precision(f), double precision (d), mixed (m) [double for amplitude, single for color] + %(avx_type)s = avx_type ! for SIMD, technology to use for the vectorization + %(cudacpp_backend)s = cudacpp_backend ! Fortran/CPP/CUDA switch mode to use +""" + +template_off = '' + +plugin_block = banner_mod.RunBlock('simd', template_on=template_on, template_off=template_off) + + class CPPRunCard(banner_mod.RunCardLO): + blocks = banner_mod.RunCardLO.blocks + [plugin_block] + def reset_simd(self, old_value, new_value, name): if not hasattr(self, 'path'): raise Exception @@ -75,8 +90,14 @@ def reset_makeopts(self, old_value, new_value, name): if not hasattr(self, 'path'): raise Exception + avx_value = self['avx_type'] if self['avx_type'] != 'auto' else '' + if name == 'floating_type': - common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': new_value}) + common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': new_value, 'AVX': avx_value}) + elif name == 'avx_type': + if new_value == 'Auto': + new_value = '' + common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': self['floating_type'], 'AVX': new_value}) else: raise Exception @@ -88,10 +109,21 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) self.add_param('floating_type', 'd', include=False, hidden=False, - fct_mod=(self.reset_makeopts,(),{})) - + fct_mod=(self.reset_makeopts,(),{}), + allowed=['m','d','f']) + self.add_param('avx_type', 'auto', include=False, hidden=False, + fct_mod=(self.reset_makeopts,(),{}), + allowed=['auto', 'none', 'sse4', 'avx2','512y','512z']) + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False, + allowed=['Fortan', 'CPP', 'CUDA']) + self['vector_size'] = 16 # already setup in default class (just change value) + self['aloha_flag'] = '--fast-math' + self['matrix_flag'] = '-O3' + self.display_block.append('simd') + self.display_block.append('psoptim') + + def write_one_include_file(self, output_dir, incname, output_file=None): """write one include file at the time""" @@ -115,10 +147,13 @@ def check_validity(self): class GPURunCard(CPPRunCard): + def default_setup(self): - super(CPPRunCard, self).default_setup() - self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) - self.add_param('floating_type', 'd', include=False, hidden=False) + + super().default_setup() + # change default value: + self['cudacpp_backend'] = 'CUDA' + self['vector_size'] = 16384 # already setup in default class (just change value) MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 53cbf7f621..b85eef9ac6 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -149,6 +149,11 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU): ###helas_exporter = None helas_exporter = model_handling.PLUGIN_GPUFOHelasCallWriter # this is one of the main fixes for issue #341! + # Default class for the run_card to use + from . import launch_plugin + run_card_class = launch_plugin.CPPRunCard + + # AV (default from OM's tutorial) - add a debug printout def __init__(self, *args, **kwargs): self.in_madevent_mode = False # see MR #747 @@ -203,7 +208,6 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): cmdhistory is the list of command used so far. MG5options are all the options of the main interface outputflags is a list of options provided when doing the output command""" - misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self)) if self.in_madevent_mode: self.add_input_for_banner() if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] @@ -221,7 +225,8 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): raise Exception('ERROR! the O/S call to patchMad.sh failed') self.add_madevent_plugin_fct() - return super().finalize(matrix_element, cmdhistory, MG5options, outputflag) + # do not call standard finalize since is this is already done... + #return super().finalize(matrix_element, cmdhistory, MG5options, outputflag) # AV (default from OM's tutorial) - overload settings and add a debug printout def modify_grouping(self, matrix_element): @@ -254,11 +259,9 @@ def add_madevent_plugin_fct(self): """ plugin_path = os.path.dirname(os.path.realpath( __file__ )) - files.cp(pjoin(plugin_path, 'plugin_interface.py'), pjoin(self.dir_path, 'bin', 'internal')) files.cp(pjoin(plugin_path, 'launch_plugin.py'), pjoin(self.dir_path, 'bin', 'internal')) files.ln( pjoin(self.dir_path, 'lib'), pjoin(self.dir_path, 'SubProcesses')) - #------------------------------------------------------------------------------------ class SIMD_ProcessExporter(PLUGIN_ProcessExporter):