Merge pull request #788 from madgraph5/floating_type_interface

allow to choose fptype via the run_card
madgraph5 · Nov 15, 2023 · b9f16e9 · b9f16e9
2 parents a059006 + 4c91250
commit b9f16e9
Show file tree

Hide file tree

Showing 85 changed files with 1,487 additions and 704 deletions.
diff --git a/.github/workflows/testsuite_allprocesses.yml b/.github/workflows/testsuite_allprocesses.yml
@@ -19,8 +19,9 @@ on:
     branches: [ master ]
 
   # Trigger the all-processes workflow when new changes to the workflow are pushed
-  push:
-    paths: [ .github/workflows/testsuite* ]
+  # (NB: this is now disabled to avoid triggering two jobs when pushing to a branch for which a PR is opened)
+  ###push:
+  ###  paths: [ .github/workflows/testsuite* ]
 
 #----------------------------------------------------------------------------------------------------------------------------------
 

diff --git a/.github/workflows/testsuite_oneprocess.sh b/.github/workflows/testsuite_oneprocess.sh
@@ -35,19 +35,23 @@ function codegen() {
     ./CODEGEN/generateAndCompare.sh -q ${proc%.sa}
   fi
   # Check if there are any differences to the current repo
-  ###compare=true # enable comparison to current git repo
-  compare=false # disable comparison to current git repo
-  if [ ${compare} ] && [ "$(git ls-tree --name-only HEAD ${proc})" != "" ]; then
+  ###compare=1 # enable comparison to current git repo
+  compare=0 # disable comparison to current git repo
+  if [ "${compare}" != "0" ] && [ "$(git ls-tree --name-only HEAD ${proc})" != "" ]; then
+    echo
+    echo "Compare newly generated code for ${proc} to that in the madgraph4gpu github repository"
     git checkout HEAD ${proc}/CODEGEN*.txt
     if [ "${proc%.mad}" != "${proc}" ]; then
       git checkout HEAD ${proc}/Cards/me5_configuration.txt
       ###sed -i 's/DEFAULT_F2PY_COMPILER=f2py.*/DEFAULT_F2PY_COMPILER=f2py3/' ${proc}/Source/make_opts
       git checkout HEAD ${proc}/Source/make_opts
     fi
-    echo
     echo "git diff (start)"
     git diff --exit-code
     echo "git diff (end)"
+  else
+    echo
+    echo "(SKIP comparison of newly generated code for ${proc} to that in the madgraph4gpu github repository)"
   fi
 }
 

diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
@@ -16,22 +16,28 @@
     import internal.misc as misc
     import internal.extended_cmd as extended_cmd
     import internal.banner as banner_mod
+    import internal.common_run_interface as common_run_interface
 else:
     import madgraph.interface.madevent_interface as madevent_interface
     import madgraph.various.misc as misc
     import madgraph.interface.extended_cmd as extended_cmd
     import madgraph.various.banner as banner_mod
+    import madgraph.interface.common_run_interface as common_run_interface
 
 class CPPMEInterface(madevent_interface.MadEventCmdShell):
     def compile(self, *args, **opts):
         """ """
         import multiprocessing
         if not self.options['nb_core'] or self.options['nb_core'] == 'None':
-            self.options['nb_core'] = multiprocessing.cpu_count()
-        if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):
-            import pathlib
-            import os
-            pjoin = os.path.join
+            self.options['nb_core'] = multiprocessing.cpu_count()    
+        if 'cwd' in opts and os.path.basename(opts['cwd']) == 'Source':
+            path = pjoin(opts['cwd'], 'make_opts')
+            avx_level = self.run_card['avx_level'] if self.run_card['avx_level'] != 'auto' else ''
+            common_run_interface.CommonRunCmd.update_make_opts_full(path,
+                {'FPTYPE': self.run_card['floating_type'],
+                 'AVX': avx_level })
+            misc.sprint('FPTYPE checked')
+        if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):            
             cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py
             logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend)
             if cudacpp_backend == 'FORTRAN':
@@ -46,30 +52,83 @@ def compile(self, *args, **opts):
         else:
             return misc.compile(nb_core=self.options['nb_core'], *args, **opts)
 
+# Phase-Space Optimization ------------------------------------------------------------------------------------
+template_on = \
+"""#***********************************************************************
+# SIMD/GPU configuration for the CUDACPP plugin
+#************************************************************************
+ %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)
+ %(avx_level)s = avx_level ! SIMD vectorization level: none, sse4, avx2, 512y, 512z, auto
+ %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: FORTRAN, CPP, CUDA
+"""
+
+template_off = ''
+plugin_block = banner_mod.RunBlock('simd', template_on=template_on, template_off=template_off)
+
 class CPPRunCard(banner_mod.RunCardLO):
+    blocks = banner_mod.RunCardLO.blocks + [plugin_block]
+
     def reset_simd(self, old_value, new_value, name):
         if not hasattr(self, 'path'):
-            logger.warning('WARNING! CPPRunCard instance has no attribute path')
-            return
-            ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path')
+            raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') # now ok after fixing #790
         if name == "vector_size" and new_value <= int(old_value):
             # code can handle the new size -> do not recompile
             return
         Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source')
         subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
+    def reset_makeopts(self, old_value, new_value, name):
+        if not hasattr(self, 'path'):
+            raise Exception
+        avx_value = self['avx_level'] if self['avx_level'] != 'auto' else ''
+        if name == 'floating_type':
+            common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': new_value, 'AVX': avx_value})
+        elif name == 'avx_level':
+            if new_value == 'auto': new_value = ''
+            common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': self['floating_type'], 'AVX': new_value})
+        else:
+            raise Exception
+        Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source')
+        subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
     def plugin_input(self, finput):
         return
 
     def default_setup(self):
         super().default_setup()
-        self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False)
+        self.add_param('floating_type', 'd', include=False, hidden=False,
+                       fct_mod=(self.reset_makeopts,(),{}),
+                       allowed=['m','d','f'])
+        self.add_param('avx_level', 'auto', include=False, hidden=False,
+                       fct_mod=(self.reset_makeopts,(),{}),
+                       allowed=['auto', 'none', 'sse4', 'avx2','512y','512z'])
+        self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False,
+                       allowed=['Fortan', 'CPP', 'CUDA'])
+        self['vector_size'] = 16 # already setup in default class (just change value)
+        self['aloha_flag'] = '--fast-math'
+        self['matrix_flag'] = '-O3'
+        self.display_block.append('simd')
+        self.display_block.append('psoptim')
 
+    # OM/AV - overload the default version in banner.py
     def write_one_include_file(self, output_dir, incname, output_file=None):
         """write one include file at the time"""
-        if incname == "vector.inc" and 'vector_size' not in self.user_set:
-            return
-        super().write_one_include_file(output_dir, incname, output_file)
+        if incname == "vector.inc":
+            if 'vector_size' not in self.user_set: return
+            if output_file is None: vectorinc=pjoin(output_dir,incname)
+            else: vectorinc=output_file
+            with open(vectorinc+'.new','w') as fileout:
+                with open(vectorinc) as filein:
+                    for line in filein:
+                        if line.startswith('C'): fileout.write(line)
+            super().write_one_include_file(output_dir, incname, output_file)
+            with open(vectorinc+'.new','a') as fileout:
+                with open(vectorinc) as filein:
+                    for line in filein:
+                        if not line.startswith('\n'): fileout.write(line)
+            os.replace(vectorinc+'.new',vectorinc)
+        else:
+            super().write_one_include_file(output_dir, incname, output_file)
 
     def check_validity(self):
         """ensure that PLUGIN information are consistent"""
@@ -82,13 +141,10 @@ def check_validity(self):
 
 class GPURunCard(CPPRunCard):
     def default_setup(self):
-        super(CPPRunCard, self).default_setup()
-        self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False)
-
-#class CUDACPPRunCard(CPPRunCard):
-#    def default_setup(self):
-#        super(CPPRunCard, self).default_setup()
-#        self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False)
+        super().default_setup()
+        # change default value:
+        self['cudacpp_backend'] = 'CUDA'
+        self['vector_size'] = 16384 # already setup in default class (just change value)
 
 MEINTERFACE = CPPMEInterface
 RunCard = CPPRunCard
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py
@@ -149,6 +149,11 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU):
     ###helas_exporter = None
     helas_exporter = model_handling.PLUGIN_GPUFOHelasCallWriter # this is one of the main fixes for issue #341!
 
+    # Default class for the run_card to use
+    from . import launch_plugin
+    run_card_class = launch_plugin.CPPRunCard
+
+
     # AV (default from OM's tutorial) - add a debug printout
     def __init__(self, *args, **kwargs):
         self.in_madevent_mode = False # see MR #747
@@ -203,7 +208,7 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag):
            cmdhistory is the list of command used so far.
            MG5options are all the options of the main interface
            outputflags is a list of options provided when doing the output command"""
-        misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self))
+        ###misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self))
         if self.in_madevent_mode:
             self.add_input_for_banner()
             if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL']
@@ -237,7 +242,8 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag):
                 raise Exception('ERROR! the O/S call to patchMad.sh failed')
             # Additional patching (OM)
             self.add_madevent_plugin_fct() # Added by OM
-        return super().finalize(matrix_element, cmdhistory, MG5options, outputflag)
+        # do not call standard finalize since is this is already done...
+        #return super().finalize(matrix_element, cmdhistory, MG5options, outputflag)
 
     # AV (default from OM's tutorial) - overload settings and add a debug printout
     def modify_grouping(self, matrix_element):

diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh
@@ -152,7 +152,7 @@ function codeGenAndDiff()
   echo -e "\n+++ Generate code for '$proc'\n"
   ###exit 0 # FOR DEBUGGING
   # Vector size for mad/madonly meexporter (VECSIZE_MEMMAX)
-  vecsize=16384 # NB THIS IS NO LONGER IGNORED (but will eventually be tunable via runcards)
+  vecsize=32 # NB THIS IS NO LONGER IGNORED (but will eventually be tunable via runcards)
   # Generate code for the specific process
   pushd $MG5AMC_HOME >& /dev/null
   mkdir -p ../TMPOUT
@@ -201,13 +201,28 @@ function codeGenAndDiff()
     cat ${outproc}_log.txt | egrep -v '(Crash Annotation)' > ${outproc}_log.txt.new # remove firefox 'glxtest: libEGL initialize failed' errors
     \mv ${outproc}_log.txt.new ${outproc}_log.txt
   fi
+  # Check the code generation log for errors 
+  if [ -d ${outproc} ] && ! grep -q "Please report this bug" ${outproc}_log.txt; then
+    ###cat ${outproc}_log.txt; exit 0 # FOR DEBUGGING
+    cat ${MG5AMC_HOME}/${outproc}_log.txt | { egrep 'INFO: (Try|Creat|Organiz|Process)' || true; }
+  else
+    echo "*** ERROR! Code generation failed"
+    cat ${MG5AMC_HOME}/${outproc}_log.txt
+    echo "*** ERROR! Code generation failed"
+    exit 1
+  fi
   # Patches moved here from patchMad.sh after Olivier's PR #764 (THIS IS ONLY NEEDED IN THE MADGRAPH4GPU GIT REPO)  
   if [ "${OUTBCK}" == "mad" ]; then
     # Force the use of strategy SDE=1 in multichannel mode (see #419)
     sed -i 's/2  = sde_strategy/1  = sde_strategy/' ${outproc}/Cards/run_card.dat
+    # Force the use of VECSIZE_MEMMAX=16384
+    sed -i 's/16 = vector_size/16384 = vector_size/' ${outproc}/Cards/run_card.dat
+    # Force the use of fast-math in Fortran builds
+    sed -i 's/-O = global_flag.*/-O3 -ffast-math -fbounds-check = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O)/' ${outproc}/Cards/run_card.dat
     # Generate run_card.inc and param_card.inc (include stdout and stderr in the code generation log which is later checked for errors)
     # These two steps are part of "cd Source; make" but they actually are code-generating steps
-    ${outproc}/bin/madevent treatcards run  >> ${outproc}_log.txt 2>&1 # AV BUG! THIS MAY SILENTLY FAIL (check if output contains "Please report this bug")
+    # Note: treatcards run also regenerates vector.inc if vector_size has changed in the runcard
+    ${outproc}/bin/madevent treatcards run >> ${outproc}_log.txt 2>&1 # AV BUG! THIS MAY SILENTLY FAIL (check if output contains "Please report this bug")
     ${outproc}/bin/madevent treatcards param >> ${outproc}_log.txt 2>&1 # AV BUG! THIS MAY SILENTLY FAIL (check if output contains "Please report this bug")
     # Cleanup
     \rm -f ${outproc}/crossx.html
@@ -223,37 +238,18 @@ function codeGenAndDiff()
     touch ${outproc}/HTML/.keep # new file
     if [ "${patchlevel}" != "0" ]; then
       # Add global flag '-O3 -ffast-math -fbounds-check' as in previous gridpacks
+      # (FIXME? these flags are already set in the runcards, why are they not propagated to make_opts?)
       echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${outproc}/Source/make_opts.new
       cat ${outproc}/Source/make_opts >> ${outproc}/Source/make_opts.new
       \mv ${outproc}/Source/make_opts.new ${outproc}/Source/make_opts
     fi
     if [ "${patchlevel}" == "2" ]; then
       sed -i 's/DEFAULT_F2PY_COMPILER=f2py.*/DEFAULT_F2PY_COMPILER=f2py3/' ${outproc}/Source/make_opts
-      cat ${outproc}/Source/make_opts | sed '/#end/q' | sort > ${outproc}/Source/make_opts.new
+      cat ${outproc}/Source/make_opts | sed '/#end/q' | head --lines=-1 | sort > ${outproc}/Source/make_opts.new
       cat ${outproc}/Source/make_opts | sed -n -e '/#end/,$p' >> ${outproc}/Source/make_opts.new
       \mv ${outproc}/Source/make_opts.new ${outproc}/Source/make_opts
-      echo "
-#*********************************************************************
-# Options for the cudacpp plugin
-#*********************************************************************
-
-# Set cudacpp-specific values of non-cudacpp-specific options
--O3 -ffast-math -fbounds-check = global_flag ! build flags for Fortran code (for a fair comparison to cudacpp)
-
-# New cudacpp-specific options (default values are defined in banner.py)
-CPP = cudacpp_backend ! valid backends are FORTRAN, CPP, CUDA" >> ${outproc}/Cards/run_card.dat
     fi
   fi
-  # Check the code generation log for errors 
-  if [ -d ${outproc} ] && ! grep -q "Please report this bug" ${outproc}_log.txt; then
-    ###cat ${outproc}_log.txt; exit 0 # FOR DEBUGGING
-    cat ${MG5AMC_HOME}/${outproc}_log.txt | { egrep 'INFO: (Try|Creat|Organiz|Process)' || true; }
-  else
-    echo "*** ERROR! Code generation failed"
-    cat ${MG5AMC_HOME}/${outproc}_log.txt
-    echo "*** ERROR! Code generation failed"
-    exit 1
-  fi
   popd >& /dev/null
   # Choose which directory must be copied (for gridpack generation: untar and modify the gridpack)
   if [ "${SCRBCK}" == "gridpack" ]; then
+1 −1		madgraph/interface/madevent_interface.py
+4 −2		madgraph/various/banner.py