Merge pull request #764 from madgraph5/new_interface

New interface
madgraph5 · Sep 15, 2023 · fbdacbd · fbdacbd
2 parents f75e994 + 87afdc2
commit fbdacbd
Show file tree

Hide file tree

Showing 15 changed files with 306 additions and 186 deletions.
diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
@@ -25,16 +25,19 @@ index 617f10b93..dbe08b846 100644
  	$(RM) *.o $(LIBRARIES) $(BINARIES)
  	cd PDF; make clean; cd ..
  	cd PDF/gammaUPC; make clean; cd ../../
-@@ -132,4 +132,9 @@ clean:
+@@ -132,4 +132,11 @@ clean:
  	cd BIAS/ptj_bias; make clean; cd ../..
  	if [ -d $(CUTTOOLSDIR) ]; then cd $(CUTTOOLSDIR); make clean; cd ..; fi
  	if [ -d $(IREGIDIR) ]; then cd $(IREGIDIR); make clean; cd ..; fi
 +
 +clean: cleanSource
  	for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done;
 +
-+cleanall: cleanSource
++cleanavx:
 +	for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done;
++cleanall: cleanSource # THIS IS THE ONE
++	for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done;
++
 diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile
 index 348c283be..74db44d84 100644
 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile
@@ -78,12 +81,12 @@ index 348c283be..74db44d84 100644
 +CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)")
 +###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV))
 +###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))"))
-+CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//)
-+ifeq ($(CUDACPP_BUILDDIR),)
-+$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!)
-+else
++CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>/dev/null | awk '/Building/{print $$3}' | sed s/BUILDDIR=//)
++#ifeq ($(CUDACPP_BUILDDIR),)
++#$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!)
++#else
 +$(info CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)')
-+endif
++#endif
 +CUDACPP_COMMONLIB=mg5amc_common
 +CUDACPP_CXXLIB=mg5amc_$(processid_short)_cpp
 +CUDACPP_CULIB=mg5amc_$(processid_short)_cuda
@@ -110,7 +113,7 @@ index 348c283be..74db44d84 100644
 
 -$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX)
 -	$(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp
-+LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458
++#LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (flag not universal -> skip?)
 
 -$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL)
 -	$(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp
@@ -283,44 +286,6 @@ index 348c283be..74db44d84 100644
 +
 +distclean: cleanall # Clean all fortran and cudacpp builds as well as the googletest installation
 +	$(MAKE) -f $(CUDACPP_MAKEFILE) distclean
-diff --git b/epochX/cudacpp/gg_tt.mad/Source/make_opts a/epochX/cudacpp/gg_tt.mad/Source/make_opts
-index 57f5f7bb9..bd3c24228 100644
---- b/epochX/cudacpp/gg_tt.mad/Source/make_opts
-+++ a/epochX/cudacpp/gg_tt.mad/Source/make_opts
-@@ -1,12 +1,17 @@
-+pdlabel1=
-+pdlabel2=
-+lhapdf=
-+PYTHIA8_PATH=NotInstalled
-+MG5AMC_VERSION=3.5.0_lo_vect
- GLOBAL_FLAG=-O3 -ffast-math -fbounds-check
-+ALOHA_FLAG=
-+MATRIX_FLAG=
- DEFAULT_CPP_COMPILER=g++
- MACFLAG=
- STDLIB=-lstdc++
- STDLIB_FLAG=
- DEFAULT_F_COMPILER=gfortran
- DEFAULT_F2PY_COMPILER=f2py3
--MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime
--PYTHIA8_PATH=NotInstalled
- #end_of_make_opts_variables
-
- BIASLIBDIR=../../../lib/
-diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
-index 8f8df219d..7624b9f55 100755
---- b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
-+++ a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
-@@ -4187,7 +4187,8 @@ class RunCardLO(RunCard):
-         self.add_param('mxxmin4pdg',[-1.], system=True)
-         self.add_param('mxxpart_antipart', [False], system=True)
-
--        
-+        # CUDACPP parameters
-+        self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False)
-
-     def check_validity(self):
-         """ """
 diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py
 index 4dd71db86..3b8ec3121 100755
 --- b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py
@@ -32,7 +32,12 @@
     #    allows the command "output myformat PATH" in madgraph.
     #    MYCLASS should inherit from class madgraph.iolibs.export_v4.VirtualExporter
     import PLUGIN.CUDACPP_OUTPUT.output as output
-    new_output = { 'standalone_cudacpp' : output.PLUGIN_ProcessExporter }
+    new_output = { 'madevent_simd' : output.SIMD_ProcessExporter,
+                   'madevent_gpu' : output.GPU_ProcessExporter,
+                   'standalone_cudacpp' : output.PLUGIN_ProcessExporter,
+                   'standalone_simd' :  output.SIMD_ProcessExporter,
+                   'standalone_cuda' :  output.GPU_ProcessExporter,
+                  }
 
     # 2. Define new way to handle the cluster.
     #    Example: new_cluster = {'mycluster': MYCLUSTERCLASS}

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
@@ -0,0 +1,100 @@
+
+import logging
+import os
+import subprocess
+pjoin = os.path.join
+logger = logging.getLogger('cmdprint') # for stdout
+
+try:
+    import madgraph
+except ImportError:
+    import internal.madevent_interface as madevent_interface
+    import internal.misc as misc
+    import internal.extended_cmd as extended_cmd
+    import internal.banner as banner_mod
+else:
+    import madgraph.interface.madevent_interface as madevent_interface
+    import madgraph.various.misc as misc
+    import madgraph.interface.extended_cmd as extended_cmd
+    import madgraph.various.banner as banner_mod
+
+class CPPMEInterface(madevent_interface.MadEventCmdShell):
+
+    def compile(self, *args, **opts):
+        """ """
+
+        import multiprocessing
+        if not self.options['nb_core'] or self.options['nb_core'] == 'None':
+            self.options['nb_core'] = multiprocessing.cpu_count()
+
+        if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):
+            import pathlib
+            import os
+            pjoin = os.path.join
+
+
+
+
+            cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py
+            logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend)
+            if cudacpp_backend == 'FORTRAN':
+                args[0][0] = 'madevent_fortran_link'
+            elif cudacpp_backend == 'CPP':
+                args[0][0] = 'madevent_cpp_link'
+            elif cudacpp_backend == 'CUDA':
+                args[0][0] = 'madevent_cuda_link'
+            else:
+                raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported")
+            return misc.compile(nb_core=self.options['nb_core'], *args, **opts)
+        else:
+            return misc.compile(nb_core=self.options['nb_core'], *args, **opts)
+
+class CPPRunCard(banner_mod.RunCardLO):
+
+    def reset_simd(self, old_value, new_value, name):
+        if not hasattr(self, 'path'):
+            raise Exception
+
+        if name == "vector_size" and new_value <= int(old_value):
+            # code can handle the new size -> do not recompile
+            return
+
+        Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source')
+        subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    def plugin_input(self, finput):
+        return
+
+    def default_setup(self):
+        super().default_setup()
+        self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False)
+
+
+    def write_one_include_file(self, output_dir, incname, output_file=None):
+        """write one include file at the time"""
+
+        if incname == "vector.inc" and 'vector_size' not in self.user_set:
+            return
+        super().write_one_include_file(output_dir, incname, output_file)
+
+
+    def check_validity(self):
+        """ensure that PLUGIN information are consistent"""
+
+        super().check_validity()
+
+        if self['SDE_strategy'] != 1:
+            logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode')
+            self['sde_strategy'] = 1
+
+        if self['hel_recycling']:
+            self['hel_recycling'] = False
+
+class GPURunCard(CPPRunCard):
+
+    def default_setup(self):
+        super(CPPRunCard, self).default_setup()
+        self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False)
+
+MEINTERFACE = CPPMEInterface
+RunCard = CPPRunCard
diff --git a/...DEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/...DEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc
@@ -112,10 +112,17 @@ namespace mg5amcCpu
     // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
     bool ok = true; // this is just an assumption!
     const std::string tag = "arm neon (128bit as in SSE4.2)";
-#else
+#elif  defined(__x86_64__) || defined(__i386__)
     bool known = true;
     bool ok = __builtin_cpu_supports( "sse4.2" );
     const std::string tag = "nehalem (SSE4.2)";
+#else
+     bool known = false; // __builtin_cpu_supports is not supported
+    // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html
+    // See https://stackoverflow.com/q/62783908
+    // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
+    bool ok = true; // this is just an assumption!
+    const std::string tag = "arm neon (128bit as in SSE4.2)";
 #endif
 #else
     bool known = true;

diff --git a/...hX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/...hX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk
@@ -27,6 +27,7 @@ UNAME_S := $(shell uname -s)
 UNAME_P := $(shell uname -p)
 ###$(info UNAME_P='$(UNAME_P)')
 
+include ../../Source/make_opts
 #-------------------------------------------------------------------------------
 
 #=== Configure common compiler flags for C++ and CUDA
@@ -220,7 +221,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),)
 override OMPFLAGS = -fopenmp
 ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578)
 else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),)
-override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578)
+override OMPFLAGS = -fopenmp # disable OpenMP MT on Apple clang (builds fail in the CI #578)
 else
 override OMPFLAGS = -fopenmp
 ###override OMPFLAGS = # disable OpenMP MT (default before #575)
@@ -554,7 +555,7 @@ endif
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o
 $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib)
-	$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
+	$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp
 
 ifneq ($(NVCC),)
 $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o

diff --git a/...GIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/...GIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc
@@ -405,8 +405,7 @@ namespace mg5amcCpu
     {
       // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396)
       constexpr int nprocesses = %(nproc)i;
-      static_assert( nprocesses == 1, "Assume nprocesses == 1" );
-      // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2)
+      static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" );
       constexpr int process_id = %(proc_id)i; // code generation source: %(proc_id_source)s
       static_assert( process_id == 1, "Assume process_id == 1" );
     }
+6 −4		Template/LO/SubProcesses/dummy_fct.f
+18 −4		Template/LO/bin/generate_events
+22 −4		Template/LO/bin/madevent
+5 −5		Template/NLO/SubProcesses/fks_singular.f
+10 −5		Template/NLO/SubProcesses/genps_fks.f
+2 −0		Template/NLO/SubProcesses/makefile_fks_dir
+34 −16		Template/NLO/SubProcesses/test_soft_col_limits.f
+7 −4		madgraph/interface/common_run_interface.py
+7 −1		madgraph/interface/extended_cmd.py
+11 −1		madgraph/interface/launch_ext_program.py
+4 −2		madgraph/interface/madevent_interface.py
+12 −2		madgraph/interface/madgraph_interface.py
+67 −21		madgraph/iolibs/export_v4.py
+1 −1		madgraph/iolibs/group_subprocs.py
+1 −0		madgraph/iolibs/template_files/auto_dsig_v4.inc
+7 −2		madgraph/madevent/gen_ximprove.py
+211 −70		madgraph/various/banner.py
+1 −1		models/check_param_card.py