Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bench: avoid peakflops register dependencies #442

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions bench/x86-64/peakflops.ptt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ INSTR_CONST 32
INSTR_LOOP 20
UOPS 19
movaps FPR1, [rip+SCALAR]
movaps FPR2, [rip+SCALAR]
movaps FPR3, [rip+SCALAR]
movaps FPR4, [rip+SCALAR]
movaps FPR5, [rip+SCALAR]
Expand All @@ -27,19 +26,19 @@ movaps FPR16, [rip+SCALAR]
.align 32
LOOP 1
movsd FPR2, [STR0 + GPR1 * 8 ]
addsd FPR1, FPR2
addsd FPR3, FPR4
mulsd FPR5, FPR6
mulsd FPR7, FPR8
mulsd FPR9, FPR10
addsd FPR11, FPR12
addsd FPR13, FPR14
mulsd FPR15, FPR16
addsd FPR1, FPR2
addsd FPR3, FPR4
mulsd FPR5, FPR6
mulsd FPR7, FPR8
mulsd FPR9, FPR10
addsd FPR11, FPR12
addsd FPR13, FPR14
mulsd FPR15, FPR16
mulsd FPR1, FPR2
addsd FPR3, FPR2
mulsd FPR4, FPR2
addsd FPR5, FPR2
mulsd FPR6, FPR2
addsd FPR7, FPR2
mulsd FPR8, FPR2
addsd FPR9, FPR2
mulsd FPR10, FPR2
addsd FPR11, FPR2
mulsd FPR12, FPR2
addsd FPR13, FPR2
mulsd FPR14, FPR2
addsd FPR15, FPR2
mulsd FPR16, FPR2
addsd FPR9, FPR2
33 changes: 16 additions & 17 deletions bench/x86-64/peakflops_sp.ptt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ INSTR_CONST 32
INSTR_LOOP 20
UOPS 19
movaps FPR1, [rip+SCALAR]
movaps FPR2, [rip+SCALAR]
movaps FPR3, [rip+SCALAR]
movaps FPR4, [rip+SCALAR]
movaps FPR5, [rip+SCALAR]
Expand All @@ -27,19 +26,19 @@ movaps FPR16, [rip+SCALAR]
.align 32
LOOP 1
movss FPR2, [STR0 + GPR1 * 4 ]
addss FPR1, FPR2
addss FPR3, FPR4
mulss FPR5, FPR6
mulss FPR7, FPR8
mulss FPR9, FPR10
addss FPR11, FPR12
addss FPR13, FPR14
mulss FPR15, FPR16
addss FPR1, FPR2
addss FPR3, FPR4
mulss FPR5, FPR6
mulss FPR7, FPR8
mulss FPR9, FPR10
addss FPR11, FPR12
addss FPR13, FPR14
mulss FPR15, FPR16
mulss FPR1, FPR2
addss FPR3, FPR2
mulss FPR4, FPR2
addss FPR5, FPR2
mulss FPR6, FPR2
addss FPR7, FPR2
mulss FPR8, FPR2
addss FPR9, FPR2
mulss FPR10, FPR2
addss FPR11, FPR2
mulss FPR12, FPR2
addss FPR13, FPR2
mulss FPR14, FPR2
addss FPR15, FPR2
mulss FPR15, FPR2
addss FPR9, FPR2
35 changes: 17 additions & 18 deletions bench/x86-64/peakflops_sp_sse.ptt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ INSTR_CONST 32
INSTR_LOOP 20
UOPS 19
movaps xmm0, [rip+SCALAR]
movaps xmm1, [rip+SCALAR]
movaps xmm2, [rip+SCALAR]
movaps xmm3, [rip+SCALAR]
movaps xmm4, [rip+SCALAR]
Expand All @@ -26,20 +25,20 @@ movaps xmm14, [rip+SCALAR]
movaps xmm15, [rip+SCALAR]
.align 32
LOOP 4
movaps xmm2, [STR0 + GPR1 * 4 ]
addps xmm1, xmm2
mulps xmm3, xmm4
addps xmm5, xmm6
mulps xmm7, xmm8
addps xmm9, xmm10
mulps xmm11, xmm12
addps xmm13, xmm14
mulps xmm15, xmm0
addps xmm1, xmm2
mulps xmm3, xmm4
addps xmm5, xmm6
mulps xmm7, xmm8
addps xmm9, xmm10
mulps xmm11, xmm12
addps xmm13, xmm14
mulps xmm15, xmm0
movaps xmm1, [STR0 + GPR1 * 4 ]
mulps xmm0, xmm1
addps xmm2, xmm1
mulps xmm3, xmm1
addps xmm4, xmm1
mulps xmm5, xmm1
addps xmm6, xmm1
mulps xmm7, xmm1
addps xmm8, xmm1
mulps xmm9, xmm1
addps xmm10, xmm1
mulps xmm11, xmm1
addps xmm12, xmm1
mulps xmm13, xmm1
addps xmm14, xmm1
mulps xmm15, xmm1
addps xmm8, xmm1
32 changes: 16 additions & 16 deletions bench/x86-64/peakflops_sse.ptt
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,19 @@ movapd xmm15, [rip+SCALAR]
.align 32
LOOP 2
movapd xmm1, [STR0 + GPR1 * 8 ]
addpd xmm1, xmm2
mulpd xmm3, xmm4
addpd xmm5, xmm6
mulpd xmm7, xmm8
addpd xmm9, xmm10
mulpd xmm11, xmm12
addpd xmm13, xmm14
mulpd xmm15, xmm0
addpd xmm1, xmm2
mulpd xmm3, xmm4
addpd xmm5, xmm6
mulpd xmm7, xmm8
addpd xmm9, xmm10
mulpd xmm11, xmm12
addpd xmm13, xmm14
mulpd xmm15, xmm0
mulpd xmm0, xmm1
addpd xmm2, xmm1
mulpd xmm3, xmm1
addpd xmm4, xmm1
mulpd xmm5, xmm1
addpd xmm6, xmm1
mulpd xmm7, xmm1
addpd xmm8, xmm1
mulpd xmm9, xmm1
addpd xmm10, xmm1
mulpd xmm11, xmm1
addpd xmm12, xmm1
mulpd xmm13, xmm1
addpd xmm14, xmm1
mulpd xmm15, xmm1
addpd xmm8, xmm1