Skip to content

Commit

Permalink
Merge pull request #1894 from albinahlback/mulhigh_valgrind
Browse files Browse the repository at this point in the history
Fix memory usage in assembly mulhigh_basecase for x86_64
  • Loading branch information
albinahlback authored Mar 25, 2024
2 parents 1d3734a + dbd5fa2 commit 5279684
Showing 1 changed file with 22 additions and 25 deletions.
47 changes: 22 additions & 25 deletions src/mpn_extras/x86_64/broadwell/mulhigh_basecase.asm
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
dnl X64-64 mpn_mullo_basecase optimised for Intel Broadwell.

dnl Contributed to the GNU project by Torbjorn Granlund.

dnl Copyright 2017 Free Software Foundation, Inc.
dnl
dnl Copyright 2017 Free Software Foundation, Inc.
dnl Contributed to the GNU project by Torbjorn Granlund.
dnl Copyright (C) 2024 Albin Ahlbäck
dnl
dnl This file is part of FLINT.
dnl
dnl FLINT is free software: you can redistribute it and/or modify it under
dnl the terms of the GNU Lesser General Public License (LGPL) as published
dnl by the Free Software Foundation; either version 3 of the License, or
dnl (at your option) any later version. See <https://www.gnu.org/licenses/>.
dnl

dnl This file is part of the GNU MP Library.
dnl
Expand All @@ -29,16 +36,6 @@ dnl
dnl You should have received copies of the GNU General Public License and the
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
dnl see https://www.gnu.org/licenses/.
dnl
dnl Copyright (C) 2024 Albin Ahlbäck
dnl
dnl This file is part of FLINT.
dnl
dnl FLINT is free software: you can redistribute it and/or modify it under
dnl the terms of the GNU Lesser General Public License (LGPL) as published
dnl by the Free Software Foundation; either version 3 of the License, or
dnl (at your option) any later version. See <https://www.gnu.org/licenses/>.
dnl

include(`config.m4')

Expand All @@ -49,18 +46,17 @@ define(`n', `%rcx')

define(`bp', `%r8')
define(`jmpreg', `%r9')
define(`nn', `%r10')
define(`m', `%r13')
define(`mm', `%r14')

define(`rx', `%rax')
define(`nn', `%rbp')
define(`mm', `%rbx')
define(`m', `%r14')

define(`r0', `%r11')
define(`r1', `%rbx')
define(`r2', `%rbp')
define(`r3', `%r12')
define(`rx', `%rax')

dnl Idea: Do similar to mpn_mullo_basecase for Skylake.
define(`r0', `%r10')
define(`r1', `%r11')
define(`r2', `%r12')
define(`r3', `%r13')

TEXT
ALIGN(32)
Expand Down Expand Up @@ -212,17 +208,18 @@ L(end): adox 0*8(rp), r2
lea -1*8(m), m
lea 1*8(bp), bp C Increase bp
lea 2*8(rp,m), rp C Reset rp
mov 0*8(bp), %rdx C Load bp
cmp R32(m), R32(mm)
jge L(jmp)
C If |m| < |mm|: goto jmpreg, but first do high part
mov 0*8(bp), %rdx C Load bp
or R32(nn), R32(n) C Reset n, CF and OF
mulx -2*8(ap), r1, r1
adcx r1, rx
jmp *jmpreg
C If |m| > |mm|: goto fin
L(jmp): jg L(fin)
C If |m| = |mm|: goto jmpreg
mov 0*8(bp), %rdx C Load bp
or R32(nn), R32(n) C Reset n, clear CF and OF
jmp *jmpreg

Expand Down

0 comments on commit 5279684

Please sign in to comment.