Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
a
Browse files Browse the repository at this point in the history
valassi committed Jul 20, 2023
1 parent beb8b5f commit a6eb55f
Showing 1 changed file with 48 additions and 48 deletions.
96 changes: 48 additions & 48 deletions epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
Original file line number Diff line number Diff line change
@@ -35,22 +35,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0'
make[1]: Nothing to be done for 'all'.
make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'

DATE: 2023-06-16_22:51:08
DATE: 2023-07-20_17:36:31

On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
=========================================================================
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP=
Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0]
Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
EvtsPerSec[Rmb+ME] (23) = ( 4.924818e+07 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 1.131524e+08 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 1.265700e+08 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 4.003367e+07 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 1.129514e+08 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 1.264428e+08 ) sec^-1
MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0
TOTAL : 0.548095 sec
2,264,164,972 cycles # 2.867 GHz
2,877,964,725 instructions # 1.27 insn per cycle
0.846931183 seconds time elapsed
TOTAL : 0.559201 sec
2,344,901,473 cycles # 2.919 GHz
2,930,110,698 instructions # 1.25 insn per cycle
0.861924488 seconds time elapsed
runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1
==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
@@ -68,15 +68,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD)
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 1.970787e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 2.034500e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 2.034500e+05 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 1.900323e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 1.964314e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 1.964314e+05 ) sec^-1
MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0
TOTAL : 5.428827 sec
16,813,146,395 cycles # 3.096 GHz
45,522,826,465 instructions # 2.71 insn per cycle
5.435350903 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0)
TOTAL : 5.666953 sec
17,034,536,839 cycles # 3.000 GHz
45,603,039,359 instructions # 2.68 insn per cycle
5.680979221 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
@@ -94,15 +94,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 3.561847e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 3.792373e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 3.792373e+05 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 3.403774e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 3.621403e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 3.621403e+05 ) sec^-1
MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0
TOTAL : 3.052689 sec
9,449,457,732 cycles # 3.092 GHz
26,574,621,752 instructions # 2.81 insn per cycle
3.065025864 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 2475) (avx2: 0) (512y: 0) (512z: 0)
TOTAL : 3.226902 sec
9,890,519,824 cycles # 3.053 GHz
27,411,762,268 instructions # 2.77 insn per cycle
3.245927523 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 2528) (avx2: 0) (512y: 0) (512z: 0)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
@@ -120,15 +120,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 6.062194e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 6.740435e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 6.740435e+05 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 5.940305e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 6.630785e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 6.630785e+05 ) sec^-1
MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0
TOTAL : 1.837966 sec
5,299,704,595 cycles # 2.875 GHz
11,318,276,197 instructions # 2.14 insn per cycle
1.850204362 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2317) (512y: 0) (512z: 0)
TOTAL : 1.911307 sec
5,554,271,159 cycles # 2.887 GHz
11,718,779,585 instructions # 2.11 insn per cycle
1.930632243 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 0) (512z: 0)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
@@ -146,15 +146,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 6.705723e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 7.548710e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 7.548710e+05 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 6.524135e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 7.359151e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 7.359151e+05 ) sec^-1
MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0
TOTAL : 1.671188 sec
4,885,067,241 cycles # 2.915 GHz
10,738,150,017 instructions # 2.20 insn per cycle
1.677721158 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 84) (512z: 0)
TOTAL : 1.751816 sec
5,153,612,038 cycles # 2.921 GHz
11,141,453,691 instructions # 2.16 insn per cycle
1.765393243 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2177) (512y: 100) (512z: 0)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.
@@ -172,15 +172,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
FP precision = DOUBLE (NaN/abnormal=0, zero=0)
Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
OMP threads / `nproc --all` = 1 / 4
EvtsPerSec[Rmb+ME] (23) = ( 4.237343e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 4.564057e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 4.564057e+05 ) sec^-1
EvtsPerSec[Rmb+ME] (23) = ( 4.152598e+05 ) sec^-1
EvtsPerSec[MatrixElems] (3) = ( 4.487658e+05 ) sec^-1
EvtsPerSec[MECalcOnly] (3a) = ( 4.487658e+05 ) sec^-1
MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0
TOTAL : 2.582849 sec
5,275,527,208 cycles # 2.039 GHz
7,074,506,056 instructions # 1.34 insn per cycle
2.594504687 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1084) (512y: 95) (512z: 1629)
TOTAL : 2.676718 sec
5,425,832,216 cycles # 2.018 GHz
7,519,714,033 instructions # 1.39 insn per cycle
2.690815651 seconds time elapsed
=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1133) (512y: 122) (512z: 1711)
-------------------------------------------------------------------------
runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe
[ PASSED ] 6 tests.

0 comments on commit a6eb55f

Please sign in to comment.