From dbb99400d3e18ef89b79db534657a6618b0e3b45 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 3 Oct 2024 16:16:25 +0300 Subject: [PATCH] [amd] in gg_tt.mad and CODEGEN, workaround for FPE #1011 in vxxxxx on HIP: replace "pvec0 / ( vmass * pp )" by "pvec0 / vmass / pp" --- .../CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h | 5 ++++- epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h index 95ffb65cd0..fcfc4b3153 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h @@ -451,7 +451,10 @@ } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h index 561a125384..febf1dcf42 100644 --- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. )