From 746f164766a733b8d1da4ae125122c8c7c2639dd Mon Sep 17 00:00:00 2001
From: Olivier Mattelaer <olivier.mattelaer@uclouvain.be>
Date: Mon, 6 Nov 2023 13:54:02 +0100
Subject: [PATCH] better formatting for the card from the start and adding the
 possiblitity to choose avx

---
 MG5aMC/mg5amcnlo                              |  2 +-
 .../PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 71 ++++++++++++++-----
 .../PLUGIN/CUDACPP_SA_OUTPUT/output.py        | 11 +--
 3 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo
index 49c93e01b8..8a18cc2423 160000
--- a/MG5aMC/mg5amcnlo
+++ b/MG5aMC/mg5amcnlo
@@ -1 +1 @@
-Subproject commit 49c93e01b8596cbdb4e65f628601de1e6f08c744
+Subproject commit 8a18cc2423616ee91c4f9d74eec0cb2901e0fd2a
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
index f3e3ddb38f..f32dd25aad 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
@@ -29,19 +29,16 @@ def compile(self, *args, **opts):
         if not self.options['nb_core'] or self.options['nb_core'] == 'None':
             self.options['nb_core'] = multiprocessing.cpu_count()
     
-        if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):
-            import pathlib
-            import os
-            pjoin = os.path.join
-            if 'cwd' in opts:
-                path = pjoin(opts['cwd'], os.pardir, os.pardir, 'Source', 'make_opts')
-                common_run_interface.CommonRunCmd.update_make_opts_full(
-                path,
-                {'FPTYPE': self.run_card['floating_type']})
+        if 'cwd' in opts and os.path.basename(opts['cwd']) == 'Source':
+            path = pjoin(opts['cwd'], 'make_opts')
+            avx_type = self.run_card['avx_type'] if self.run_card['avx_type'] != 'auto' else ''
+            common_run_interface.CommonRunCmd.update_make_opts_full(path,
+                {'FPTYPE': self.run_card['floating_type'],
+                 'AVX':  avx_type })
+            misc.sprint('FPTYPE checked')
 
 
-
-            
+        if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):            
             cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py
             logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend)
             if cudacpp_backend == 'FORTRAN':
@@ -56,8 +53,26 @@ def compile(self, *args, **opts):
         else:
             return misc.compile(nb_core=self.options['nb_core'], *args, **opts)
 
+
+# Phase-Space Optimization ------------------------------------------------------------------------------------
+template_on = \
+"""#*********************************************************************
+# SIMD/GPU Parametrization
+#*********************************************************************
+   %(floating_type)s = floating_type ! single precision(f), double precision (d), mixed (m) [double for amplitude, single for color]
+   %(avx_type)s =  avx_type  ! for SIMD, technology to use for the vectorization
+   %(cudacpp_backend)s = cudacpp_backend ! Fortran/CPP/CUDA switch mode to use
+"""
+
+template_off = ''
+
+plugin_block = banner_mod.RunBlock('simd', template_on=template_on, template_off=template_off)
+
+
 class CPPRunCard(banner_mod.RunCardLO):
 
+    blocks = banner_mod.RunCardLO.blocks + [plugin_block]
+
     def reset_simd(self, old_value, new_value, name):
         if not hasattr(self, 'path'):
             raise Exception
@@ -75,8 +90,14 @@ def reset_makeopts(self, old_value, new_value, name):
         if not hasattr(self, 'path'):
             raise Exception
 
+        avx_value = self['avx_type'] if self['avx_type'] != 'auto' else ''
+
         if name == 'floating_type':
-            common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': new_value})
+            common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': new_value, 'AVX': avx_value})
+        elif name == 'avx_type':
+            if new_value == 'Auto':
+                new_value = ''
+            common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': self['floating_type'], 'AVX': new_value})
         else:
             raise Exception
 
@@ -88,10 +109,21 @@ def plugin_input(self, finput):
 
     def default_setup(self):
         super().default_setup()
-        self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False)
         self.add_param('floating_type', 'd', include=False, hidden=False,
-                       fct_mod=(self.reset_makeopts,(),{}))
-        
+                       fct_mod=(self.reset_makeopts,(),{}),
+                       allowed=['m','d','f'])
+        self.add_param('avx_type', 'auto', include=False, hidden=False,
+                       fct_mod=(self.reset_makeopts,(),{}),
+                       allowed=['auto', 'none', 'sse4', 'avx2','512y','512z'])
+        self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False,
+                       allowed=['Fortan', 'CPP', 'CUDA'])
+        self['vector_size'] = 16 # already setup in default class (just change value)
+        self['aloha_flag'] = '--fast-math'
+        self['matrix_flag'] = '-O3'
+        self.display_block.append('simd')
+        self.display_block.append('psoptim')
+
+
 
     def write_one_include_file(self, output_dir, incname, output_file=None):
         """write one include file at the time"""
@@ -115,10 +147,13 @@ def check_validity(self):
 
 class GPURunCard(CPPRunCard):
 
+
     def default_setup(self):
-        super(CPPRunCard, self).default_setup()
-        self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False)
-        self.add_param('floating_type', 'd', include=False, hidden=False)
+
+        super().default_setup()
+        # change default value:
+        self['cudacpp_backend'] = 'CUDA'
+        self['vector_size'] = 16384 # already setup in default class (just change value)
 
 
 MEINTERFACE = CPPMEInterface
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py
index 53cbf7f621..b85eef9ac6 100644
--- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py
+++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py
@@ -149,6 +149,11 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU):
     ###helas_exporter = None
     helas_exporter = model_handling.PLUGIN_GPUFOHelasCallWriter # this is one of the main fixes for issue #341!
 
+    # Default class for the run_card to use
+    from . import launch_plugin
+    run_card_class = launch_plugin.CPPRunCard
+
+
     # AV (default from OM's tutorial) - add a debug printout
     def __init__(self, *args, **kwargs):
         self.in_madevent_mode = False # see MR #747
@@ -203,7 +208,6 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag):
 	    cmdhistory is the list of command used so far.
 	    MG5options are all the options of the main interface
 	    outputflags is a list of options provided when doing the output command"""
-        misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self))
         if self.in_madevent_mode:
             self.add_input_for_banner()
             if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL']
@@ -221,7 +225,8 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag):
                 raise Exception('ERROR! the O/S call to patchMad.sh failed')
             
             self.add_madevent_plugin_fct()
-        return super().finalize(matrix_element, cmdhistory, MG5options, outputflag)
+        # do not call standard finalize since is this is already done...
+        #return super().finalize(matrix_element, cmdhistory, MG5options, outputflag)
 
     # AV (default from OM's tutorial) - overload settings and add a debug printout
     def modify_grouping(self, matrix_element):
@@ -254,11 +259,9 @@ def add_madevent_plugin_fct(self):
         """
         
         plugin_path = os.path.dirname(os.path.realpath( __file__ ))
-        files.cp(pjoin(plugin_path, 'plugin_interface.py'), pjoin(self.dir_path, 'bin', 'internal'))
         files.cp(pjoin(plugin_path, 'launch_plugin.py'), pjoin(self.dir_path, 'bin', 'internal'))
         files.ln( pjoin(self.dir_path, 'lib'),  pjoin(self.dir_path, 'SubProcesses'))
 
-
 #------------------------------------------------------------------------------------
 class SIMD_ProcessExporter(PLUGIN_ProcessExporter):