From 7ee142e3f6143d7c613ce75ca3f797b76032c39e Mon Sep 17 00:00:00 2001 From: Marina Yatsina Date: Thu, 3 Dec 2015 08:55:33 +0000 Subject: [PATCH 001/364] [X86] Add support for fcomip, fucomip for Intel syntax According to x86 spec, fcomip and fucomip should be supported for Intel syntax. Differential Revision: http://reviews.llvm.org/D15104 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254595 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 4 ++-- test/MC/X86/intel-syntax.s | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0571b07d2f8b..1e66739026e2 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2651,14 +2651,14 @@ def : MnemonicAlias<"fcmova", "fcmovnbe", "att">; def : MnemonicAlias<"fcmovnae", "fcmovb", "att">; def : MnemonicAlias<"fcmovna", "fcmovbe", "att">; def : MnemonicAlias<"fcmovae", "fcmovnb", "att">; -def : MnemonicAlias<"fcomip", "fcompi", "att">; +def : MnemonicAlias<"fcomip", "fcompi">; def : MnemonicAlias<"fildq", "fildll", "att">; def : MnemonicAlias<"fistpq", "fistpll", "att">; def : MnemonicAlias<"fisttpq", "fisttpll", "att">; def : MnemonicAlias<"fldcww", "fldcw", "att">; def : MnemonicAlias<"fnstcww", "fnstcw", "att">; def : MnemonicAlias<"fnstsww", "fnstsw", "att">; -def : MnemonicAlias<"fucomip", "fucompi", "att">; +def : MnemonicAlias<"fucomip", "fucompi">; def : MnemonicAlias<"fwait", "wait">; def : MnemonicAlias<"fxsaveq", "fxsave64", "att">; diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index b79b21dc9691..71bf6cc0ffdb 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -736,3 +736,8 @@ fbld tbyte ptr [eax] fbstp tbyte ptr [eax] // CHECK: fbld (%eax) // CHECK: fbstp (%eax) + +fcomip st, st(2) +fucomip st, st(2) +// CHECK: fcompi %st(2) +// CHECK: fucompi %st(2) From 1051eae13a0f6397361617b1efab909cc7ba7c19 Mon Sep 17 00:00:00 2001 From: Zlatko Buljan Date: Thu, 3 Dec 2015 09:56:39 +0000 Subject: [PATCH 002/364] [mips][DSP] Add DSPr1 and DSPr2 tests for the standard encodings Differential Revision: http://reviews.llvm.org/D15141 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254598 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/Disassembler/Mips/dsp/valid.txt | 125 +++++++++++++ test/MC/Disassembler/Mips/dspr2/valid.txt | 173 +++++++++++++++++ test/MC/Mips/dsp/valid.s | 127 +++++++++++++ test/MC/Mips/dspr2/valid.s | 217 +++++++++++++++++----- 4 files changed, 597 insertions(+), 45 deletions(-) create mode 100644 test/MC/Disassembler/Mips/dsp/valid.txt create mode 100644 test/MC/Disassembler/Mips/dspr2/valid.txt create mode 100644 test/MC/Mips/dsp/valid.s diff --git a/test/MC/Disassembler/Mips/dsp/valid.txt b/test/MC/Disassembler/Mips/dsp/valid.txt new file mode 100644 index 000000000000..e6ca900dde55 --- /dev/null +++ b/test/MC/Disassembler/Mips/dsp/valid.txt @@ -0,0 +1,125 @@ +# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=dsp | FileCheck %s + + 0x7c 0x02 0x0a 0x52 # CHECK: absq_s.ph $1, $2 + 0x7c 0x06 0x2c 0x52 # CHECK: absq_s.w $5, $6 + 0x7d 0x09 0x3a 0x90 # CHECK: addq.ph $7, $8, $9 + 0x7d 0x6c 0x53 0x90 # CHECK: addq_s.ph $10, $11, $12 + 0x7d 0xcf 0x6d 0x90 # CHECK: addq_s.w $13, $14, $15 + 0x7f 0xbe 0xe4 0x10 # CHECK: addsc $gp, $sp, $fp + 0x7c 0xe8 0x30 0x10 # CHECK: addu.qb $6, $7, $8 + 0x7d 0x4b 0x49 0x10 # CHECK: addu_s.qb $9, $10, $11 + 0x7d 0xae 0x64 0x50 # CHECK: addwc $12, $13, $14 + 0x7c 0x1a 0xce 0xd2 # CHECK: bitrev $25, $26 + 0x04 0x1c 0x14 0x9b # CHECK: bposge32 21104 + 0x7f 0x7c 0x02 0x11 # CHECK: cmp.eq.ph $27, $gp + 0x7f 0xbe 0x02 0x51 # CHECK: cmp.lt.ph $sp, $fp + 0x7f 0xe1 0x02 0x91 # CHECK: cmp.le.ph $ra, $1 + 0x7d 0x8d 0x59 0x11 # CHECK: cmpgu.eq.qb $11, $12, $13 + 0x7d 0xf0 0x71 0x51 # CHECK: cmpgu.lt.qb $14, $15, $16 + 0x7e 0x53 0x89 0x91 # CHECK: cmpgu.le.qb $17, $18, $19 + 0x7e 0x95 0x00 0x11 # CHECK: cmpu.eq.qb $20, $21 + 0x7e 0xd7 0x00 0x51 # CHECK: cmpu.lt.qb $22, $23 + 0x7f 0x19 0x00 0x91 # CHECK: cmpu.le.qb $24, $25 + 0x7c 0x22 0x09 0x30 # CHECK: dpaq_s.w.ph $ac1, $1, $2 + 0x7c 0x64 0x13 0x30 # CHECK: dpaq_sa.l.w $ac2, $3, $4 + 0x7d 0x2a 0x08 0xf0 # CHECK: dpau.h.qbl $ac1, $9, $10 + 0x7d 0x6c 0x09 0xf0 # CHECK: dpau.h.qbr $ac1, $11, $12 + 0x7e 0x32 0x01 0x70 # CHECK: dpsq_s.w.ph $ac0, $17, $18 + 0x7e 0x74 0x0b 0x70 # CHECK: dpsq_sa.l.w $ac1, $19, $20 + 0x7c 0xa6 0x02 0xf0 # CHECK: dpsu.h.qbl $ac0, $5, $6 + 0x7c 0xe8 0x0b 0xf0 # CHECK: dpsu.h.qbr $ac1, $7, $8 + 0x7f 0xe1 0x00 0xb8 # CHECK: extp $1, $ac0, 31 + 0x7c 0x02 0x0a 0xb8 # CHECK: extpdp $2, $ac1, 0 + 0x7c 0x83 0x12 0xf8 # CHECK: extpdpv $3, $ac2, $4 + 0x7c 0xc5 0x18 0xf8 # CHECK: extpv $5, $ac3, $6 + 0x7f 0xe7 0x00 0x38 # CHECK: extr.w $7, $ac0, 31 + 0x7d 0xe8 0x09 0x38 # CHECK: extr_r.w $8, $ac1, 15 + 0x7c 0xe9 0x11 0xb8 # CHECK: extr_rs.w $9, $ac2, 7 + 0x7c 0x6a 0x1b 0xb8 # CHECK: extr_s.h $10, $ac3, 3 + 0x7d 0x8b 0x00 0x78 # CHECK: extrv.w $11, $ac0, $12 + 0x7d 0xcd 0x09 0x78 # CHECK: extrv_r.w $13, $ac1, $14 + 0x7e 0x0f 0x11 0xf8 # CHECK: extrv_rs.w $15, $ac2, $16 + 0x7e 0x51 0x1b 0xf8 # CHECK: extrv_s.h $17, $ac3, $18 + 0x7e 0x93 0x00 0x0c # CHECK: insv $19, $20 + 0x7f 0x54 0x51 0x8a # CHECK: lbux $10, $20($26) + 0x7f 0x75 0x59 0x0a # CHECK: lhx $11, $21($27) + 0x7f 0x96 0x60 0x0a # CHECK: lwx $12, $22($gp) + 0x70 0xc7 0x08 0x00 # CHECK: madd $ac1, $6, $7 + 0x71 0x09 0x08 0x01 # CHECK: maddu $ac1, $8, $9 + 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7 + 0x71 0x09 0x00 0x01 # CHECK: maddu $8, $9 + 0x7c 0x64 0x15 0x30 # CHECK: maq_s.w.phl $ac2, $3, $4 + 0x7c 0xa6 0x1c 0x30 # CHECK: maq_sa.w.phl $ac3, $5, $6 + 0x7c 0xe8 0x05 0xb0 # CHECK: maq_s.w.phr $ac0, $7, $8 + 0x7d 0x2a 0x0c 0xb0 # CHECK: maq_sa.w.phr $ac1, $9, $10 + 0x00 0x20 0x70 0x10 # CHECK: mfhi $14, $ac1 + 0x00 0x20 0x78 0x12 # CHECK: mflo $15, $ac1 + 0x00 0x00 0x70 0x10 # CHECK: mfhi $14 + 0x00 0x00 0x78 0x12 # CHECK: mflo $15 + 0x7d 0x8d 0x5c 0x90 # CHECK: modsub $11, $12, $13 + 0x71 0x4b 0x18 0x04 # CHECK: msub $ac3, $10, $11 + 0x71 0x8d 0x10 0x05 # CHECK: msubu $ac2, $12, $13 + 0x71 0x4b 0x00 0x04 # CHECK: msub $10, $11 + 0x71 0x8d 0x00 0x05 # CHECK: msubu $12, $13 + 0x02 0x00 0x18 0x11 # CHECK: mthi $16, $ac3 + 0x02 0x00 0x00 0x11 # CHECK: mthi $16 + 0x7d 0xc0 0x17 0xf8 # CHECK: mthlip $14, $ac2 + 0x02 0x20 0x10 0x13 # CHECK: mtlo $17, $ac2 + 0x02 0x20 0x00 0x13 # CHECK: mtlo $17 + 0x7e 0xd7 0xaf 0x10 # CHECK: muleq_s.w.phl $21, $22, $23 + 0x7f 0x3a 0xc7 0x50 # CHECK: muleq_s.w.phr $24, $25, $26 + 0x7f 0x9d 0xd9 0x90 # CHECK: muleu_s.ph.qbl $27, $gp, $sp + 0x7f 0xe1 0xf1 0xd0 # CHECK: muleu_s.ph.qbr $fp, $ra, $1 + 0x7c 0x64 0x17 0xd0 # CHECK: mulq_rs.ph $2, $3, $4 + 0x7e 0x11 0x01 0xb0 # CHECK: mulsaq_s.w.ph $ac0, $16, $17 + 0x00 0x43 0x18 0x18 # CHECK: mult $ac3, $2, $3 + 0x00 0x85 0x10 0x19 # CHECK: multu $ac2, $4, $5 + 0x00 0x43 0x00 0x18 # CHECK: mult $2, $3 + 0x00 0x85 0x00 0x19 # CHECK: multu $4, $5 + 0x7e 0x74 0x93 0x91 # CHECK: packrl.ph $18, $19, $20 + 0x7d 0xe3 0x3a 0xd1 # CHECK: pick.ph $7, $15, $3 + 0x7c 0x88 0x10 0xd1 # CHECK: pick.qb $2, $4, $8 + 0x7c 0x15 0xa3 0x12 # CHECK: preceq.w.phl $20, $21 + 0x7c 0x16 0xab 0x52 # CHECK: preceq.w.phr $21, $22 + 0x7c 0x17 0xb1 0x12 # CHECK: precequ.ph.qbl $22, $23 + 0x7c 0x19 0xc1 0x92 # CHECK: precequ.ph.qbla $24, $25 + 0x7c 0x18 0xb9 0x52 # CHECK: precequ.ph.qbr $23, $24 + 0x7c 0x1a 0xc9 0xd2 # CHECK: precequ.ph.qbra $25, $26 + 0x7c 0x1b 0xd7 0x12 # CHECK: preceu.ph.qbl $26, $27 + 0x7c 0x1d 0xe7 0x92 # CHECK: preceu.ph.qbla $gp, $sp + 0x7c 0x1c 0xdf 0x52 # CHECK: preceu.ph.qbr $27, $gp + 0x7c 0x1e 0xef 0xd2 # CHECK: preceu.ph.qbra $sp, $fp + 0x7e 0x53 0x8d 0x11 # CHECK: precrq.ph.w $17, $18, $19 + 0x7e 0x32 0x83 0x11 # CHECK: precrq.qb.ph $16, $17, $18 + 0x7e 0x95 0x9b 0xd1 # CHECK: precrqu_s.qb.ph $19, $20, $21 + 0x7e 0x74 0x95 0x51 # CHECK: precrq_rs.ph.w $18, $19, $20 + 0x7c 0x40 0x0d 0x10 # CHECK: raddu.w.qb $1, $2 + 0x7d 0x00 0x2c 0xb8 # CHECK: rddsp $5, 256 + 0x7c 0x0c 0x12 0x92 # CHECK: repl.ph $2, 12 + 0x7c 0x55 0x08 0x92 # CHECK: repl.qb $1, 85 + 0x7c 0x02 0x0a 0xd2 # CHECK: replv.ph $1, $2 + 0x7c 0x02 0x08 0xd2 # CHECK: replv.qb $1, $2 + 0x7d 0x00 0x0e 0xb8 # CHECK: shilo $ac1, 16 + 0x7c 0x40 0x0e 0xf8 # CHECK: shilov $ac1, $2 + 0x7c 0x62 0x0a 0x13 # CHECK: shll.ph $1, $2, 3 + 0x7c 0x62 0x0b 0x13 # CHECK: shll_s.ph $1, $2, 3 + 0x7c 0x62 0x08 0x13 # CHECK: shll.qb $1, $2, 3 + 0x7c 0x62 0x0a 0x93 # CHECK: shllv.ph $1, $2, $3 + 0x7c 0x62 0x0b 0x93 # CHECK: shllv_s.ph $1, $2, $3 + 0x7c 0x62 0x08 0x93 # CHECK: shllv.qb $1, $2, $3 + 0x7c 0x62 0x0d 0x93 # CHECK: shllv_s.w $1, $2, $3 + 0x7c 0x62 0x0d 0x13 # CHECK: shll_s.w $1, $2, 3 + 0x7c 0x22 0x2a 0x53 # CHECK: shra.ph $5, $2, 1 + 0x7c 0x22 0x2b 0x53 # CHECK: shra_r.ph $5, $2, 1 + 0x7c 0x62 0x0a 0xd3 # CHECK: shrav.ph $1, $2, $3 + 0x7c 0x62 0x0b 0xd3 # CHECK: shrav_r.ph $1, $2, $3 + 0x7c 0x62 0x0d 0xd3 # CHECK: shrav_r.w $1, $2, $3 + 0x7c 0x22 0x0d 0x53 # CHECK: shra_r.w $1, $2, 1 + 0x7c 0x42 0x08 0x53 # CHECK: shrl.qb $1, $2, 2 + 0x7c 0x62 0x08 0xd3 # CHECK: shrlv.qb $1, $2, $3 + 0x7c 0x43 0x0a 0xd0 # CHECK: subq.ph $1, $2, $3 + 0x7c 0x43 0x0b 0xd0 # CHECK: subq_s.ph $1, $2, $3 + 0x7c 0x43 0x0d 0xd0 # CHECK: subq_s.w $1, $2, $3 + 0x7c 0x43 0x08 0x50 # CHECK: subu.qb $1, $2, $3 + 0x7c 0x43 0x09 0x50 # CHECK: subu_s.qb $1, $2, $3 + 0x7c 0x20 0x04 0xf8 # CHECK: wrdsp $1, 0 diff --git a/test/MC/Disassembler/Mips/dspr2/valid.txt b/test/MC/Disassembler/Mips/dspr2/valid.txt new file mode 100644 index 000000000000..b1b5a332dc56 --- /dev/null +++ b/test/MC/Disassembler/Mips/dspr2/valid.txt @@ -0,0 +1,173 @@ +# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=dspr2 | FileCheck %s + + 0x7c 0x02 0x0a 0x52 # CHECK: absq_s.ph $1, $2 + 0x7c 0x04 0x18 0x52 # CHECK: absq_s.qb $3, $4 + 0x7c 0x06 0x2c 0x52 # CHECK: absq_s.w $5, $6 + 0x7d 0x09 0x3a 0x90 # CHECK: addq.ph $7, $8, $9 + 0x7d 0x6c 0x53 0x90 # CHECK: addq_s.ph $10, $11, $12 + 0x7d 0xcf 0x6d 0x90 # CHECK: addq_s.w $13, $14, $15 + 0x7e 0x32 0x82 0x18 # CHECK: addqh.ph $16, $17, $18 + 0x7e 0x95 0x9a 0x98 # CHECK: addqh_r.ph $19, $20, $21 + 0x7e 0xf8 0xb4 0x18 # CHECK: addqh.w $22, $23, $24 + 0x7f 0x5b 0xcc 0x98 # CHECK: addqh_r.w $25, $26, $27 + 0x7f 0xbe 0xe4 0x10 # CHECK: addsc $gp, $sp, $fp + 0x7c 0x22 0xfa 0x10 # CHECK: addu.ph $ra, $1, $2 + 0x7c 0x85 0x1b 0x10 # CHECK: addu_s.ph $3, $4, $5 + 0x7c 0xe8 0x30 0x10 # CHECK: addu.qb $6, $7, $8 + 0x7d 0x4b 0x49 0x10 # CHECK: addu_s.qb $9, $10, $11 + 0x7d 0xae 0x64 0x50 # CHECK: addwc $12, $13, $14 + 0x7e 0x11 0x78 0x18 # CHECK: adduh.qb $15, $16, $17 + 0x7e 0x74 0x90 0x98 # CHECK: adduh_r.qb $18, $19, $20 + 0x7e 0xd5 0x00 0x31 # CHECK: append $21, $22, 0 + 0x7f 0x17 0x1c 0x31 # CHECK: balign $23, $24, 3 + 0x7c 0x1a 0xce 0xd2 # CHECK: bitrev $25, $26 + 0x04 0x1c 0x14 0x9b # CHECK: bposge32 21104 + 0x7f 0x7c 0x02 0x11 # CHECK: cmp.eq.ph $27, $gp + 0x7f 0xbe 0x02 0x51 # CHECK: cmp.lt.ph $sp, $fp + 0x7f 0xe1 0x02 0x91 # CHECK: cmp.le.ph $ra, $1 + 0x7c 0x64 0x16 0x11 # CHECK: cmpgdu.eq.qb $2, $3, $4 + 0x7c 0xc7 0x2e 0x51 # CHECK: cmpgdu.lt.qb $5, $6, $7 + 0x7d 0x2a 0x46 0x91 # CHECK: cmpgdu.le.qb $8, $9, $10 + 0x7d 0x8d 0x59 0x11 # CHECK: cmpgu.eq.qb $11, $12, $13 + 0x7d 0xf0 0x71 0x51 # CHECK: cmpgu.lt.qb $14, $15, $16 + 0x7e 0x53 0x89 0x91 # CHECK: cmpgu.le.qb $17, $18, $19 + 0x7e 0x95 0x00 0x11 # CHECK: cmpu.eq.qb $20, $21 + 0x7e 0xd7 0x00 0x51 # CHECK: cmpu.lt.qb $22, $23 + 0x7f 0x19 0x00 0x91 # CHECK: cmpu.le.qb $24, $25 + 0x7f 0x5b 0x00 0x30 # CHECK: dpa.w.ph $ac0, $26, $27 + 0x7c 0x22 0x09 0x30 # CHECK: dpaq_s.w.ph $ac1, $1, $2 + 0x7c 0x64 0x13 0x30 # CHECK: dpaq_sa.l.w $ac2, $3, $4 + 0x7c 0xa6 0x1e 0x30 # CHECK: dpaqx_s.w.ph $ac3, $5, $6 + 0x7c 0xe8 0x06 0xb0 # CHECK: dpaqx_sa.w.ph $ac0, $7, $8 + 0x7d 0x2a 0x08 0xf0 # CHECK: dpau.h.qbl $ac1, $9, $10 + 0x7d 0x6c 0x09 0xf0 # CHECK: dpau.h.qbr $ac1, $11, $12 + 0x7d 0xae 0x12 0x30 # CHECK: dpax.w.ph $ac2, $13, $14 + 0x7d 0xf0 0x18 0x70 # CHECK: dps.w.ph $ac3, $15, $16 + 0x7e 0x32 0x01 0x70 # CHECK: dpsq_s.w.ph $ac0, $17, $18 + 0x7e 0x74 0x0b 0x70 # CHECK: dpsq_sa.l.w $ac1, $19, $20 + 0x7c 0x22 0x16 0x70 # CHECK: dpsqx_s.w.ph $ac2, $1, $2 + 0x7c 0x64 0x1e 0xf0 # CHECK: dpsqx_sa.w.ph $ac3, $3, $4 + 0x7c 0xa6 0x02 0xf0 # CHECK: dpsu.h.qbl $ac0, $5, $6 + 0x7c 0xe8 0x0b 0xf0 # CHECK: dpsu.h.qbr $ac1, $7, $8 + 0x7d 0x2a 0x12 0x70 # CHECK: dpsx.w.ph $ac2, $9, $10 + 0x7f 0xe1 0x00 0xb8 # CHECK: extp $1, $ac0, 31 + 0x7c 0x02 0x0a 0xb8 # CHECK: extpdp $2, $ac1, 0 + 0x7c 0x83 0x12 0xf8 # CHECK: extpdpv $3, $ac2, $4 + 0x7c 0xc5 0x18 0xf8 # CHECK: extpv $5, $ac3, $6 + 0x7f 0xe7 0x00 0x38 # CHECK: extr.w $7, $ac0, 31 + 0x7d 0xe8 0x09 0x38 # CHECK: extr_r.w $8, $ac1, 15 + 0x7c 0xe9 0x11 0xb8 # CHECK: extr_rs.w $9, $ac2, 7 + 0x7c 0x6a 0x1b 0xb8 # CHECK: extr_s.h $10, $ac3, 3 + 0x7d 0x8b 0x00 0x78 # CHECK: extrv.w $11, $ac0, $12 + 0x7d 0xcd 0x09 0x78 # CHECK: extrv_r.w $13, $ac1, $14 + 0x7e 0x0f 0x11 0xf8 # CHECK: extrv_rs.w $15, $ac2, $16 + 0x7e 0x51 0x1b 0xf8 # CHECK: extrv_s.h $17, $ac3, $18 + 0x7e 0x93 0x00 0x0c # CHECK: insv $19, $20 + 0x7f 0x54 0x51 0x8a # CHECK: lbux $10, $20($26) + 0x7f 0x75 0x59 0x0a # CHECK: lhx $11, $21($27) + 0x7f 0x96 0x60 0x0a # CHECK: lwx $12, $22($gp) + 0x70 0xc7 0x08 0x00 # CHECK: madd $ac1, $6, $7 + 0x71 0x09 0x08 0x01 # CHECK: maddu $ac1, $8, $9 + 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7 + 0x71 0x09 0x00 0x01 # CHECK: maddu $8, $9 + 0x7c 0x64 0x15 0x30 # CHECK: maq_s.w.phl $ac2, $3, $4 + 0x7c 0xa6 0x1c 0x30 # CHECK: maq_sa.w.phl $ac3, $5, $6 + 0x7c 0xe8 0x05 0xb0 # CHECK: maq_s.w.phr $ac0, $7, $8 + 0x7d 0x2a 0x0c 0xb0 # CHECK: maq_sa.w.phr $ac1, $9, $10 + 0x00 0x20 0x70 0x10 # CHECK: mfhi $14, $ac1 + 0x00 0x20 0x78 0x12 # CHECK: mflo $15, $ac1 + 0x00 0x00 0x70 0x10 # CHECK: mfhi $14 + 0x00 0x00 0x78 0x12 # CHECK: mflo $15 + 0x7d 0x8d 0x5c 0x90 # CHECK: modsub $11, $12, $13 + 0x71 0x4b 0x18 0x04 # CHECK: msub $ac3, $10, $11 + 0x71 0x8d 0x10 0x05 # CHECK: msubu $ac2, $12, $13 + 0x71 0x4b 0x00 0x04 # CHECK: msub $10, $11 + 0x71 0x8d 0x00 0x05 # CHECK: msubu $12, $13 + 0x02 0x00 0x18 0x11 # CHECK: mthi $16, $ac3 + 0x02 0x00 0x00 0x11 # CHECK: mthi $16 + 0x7d 0xc0 0x17 0xf8 # CHECK: mthlip $14, $ac2 + 0x02 0x20 0x10 0x13 # CHECK: mtlo $17, $ac2 + 0x02 0x20 0x00 0x13 # CHECK: mtlo $17 + 0x7e 0x11 0x7b 0x18 # CHECK: mul.ph $15, $16, $17 + 0x7e 0x74 0x93 0x98 # CHECK: mul_s.ph $18, $19, $20 + 0x7e 0xd7 0xaf 0x10 # CHECK: muleq_s.w.phl $21, $22, $23 + 0x7f 0x3a 0xc7 0x50 # CHECK: muleq_s.w.phr $24, $25, $26 + 0x7f 0x9d 0xd9 0x90 # CHECK: muleu_s.ph.qbl $27, $gp, $sp + 0x7f 0xe1 0xf1 0xd0 # CHECK: muleu_s.ph.qbr $fp, $ra, $1 + 0x7c 0x64 0x17 0xd0 # CHECK: mulq_rs.ph $2, $3, $4 + 0x7c 0xc7 0x2d 0xd8 # CHECK: mulq_rs.w $5, $6, $7 + 0x7d 0x2a 0x47 0x90 # CHECK: mulq_s.ph $8, $9, $10 + 0x7d 0x8d 0x5d 0x98 # CHECK: mulq_s.w $11, $12, $13 + 0x7d 0xcf 0x18 0xb0 # CHECK: mulsa.w.ph $ac3, $14, $15 + 0x7e 0x11 0x01 0xb0 # CHECK: mulsaq_s.w.ph $ac0, $16, $17 + 0x00 0x43 0x18 0x18 # CHECK: mult $ac3, $2, $3 + 0x00 0x85 0x10 0x19 # CHECK: multu $ac2, $4, $5 + 0x00 0x43 0x00 0x18 # CHECK: mult $2, $3 + 0x00 0x85 0x00 0x19 # CHECK: multu $4, $5 + 0x7e 0x74 0x93 0x91 # CHECK: packrl.ph $18, $19, $20 + 0x7d 0xe3 0x3a 0xd1 # CHECK: pick.ph $7, $15, $3 + 0x7c 0x88 0x10 0xd1 # CHECK: pick.qb $2, $4, $8 + 0x7c 0x15 0xa3 0x12 # CHECK: preceq.w.phl $20, $21 + 0x7c 0x16 0xab 0x52 # CHECK: preceq.w.phr $21, $22 + 0x7c 0x17 0xb1 0x12 # CHECK: precequ.ph.qbl $22, $23 + 0x7c 0x19 0xc1 0x92 # CHECK: precequ.ph.qbla $24, $25 + 0x7c 0x18 0xb9 0x52 # CHECK: precequ.ph.qbr $23, $24 + 0x7c 0x1a 0xc9 0xd2 # CHECK: precequ.ph.qbra $25, $26 + 0x7c 0x1b 0xd7 0x12 # CHECK: preceu.ph.qbl $26, $27 + 0x7c 0x1d 0xe7 0x92 # CHECK: preceu.ph.qbla $gp, $sp + 0x7c 0x1c 0xdf 0x52 # CHECK: preceu.ph.qbr $27, $gp + 0x7c 0x1e 0xef 0xd2 # CHECK: preceu.ph.qbra $sp, $fp + 0x7f 0x19 0xbb 0x51 # CHECK: precr.qb.ph $23, $24, $25 + 0x7f 0x38 0x07 0x91 # CHECK: precr_sra.ph.w $24, $25, 0 + 0x7f 0x38 0xff 0x91 # CHECK: precr_sra.ph.w $24, $25, 31 + 0x7f 0x59 0x07 0xd1 # CHECK: precr_sra_r.ph.w $25, $26, 0 + 0x7f 0x59 0xff 0xd1 # CHECK: precr_sra_r.ph.w $25, $26, 31 + 0x7e 0x53 0x8d 0x11 # CHECK: precrq.ph.w $17, $18, $19 + 0x7e 0x32 0x83 0x11 # CHECK: precrq.qb.ph $16, $17, $18 + 0x7e 0x95 0x9b 0xd1 # CHECK: precrqu_s.qb.ph $19, $20, $21 + 0x7e 0x74 0x95 0x51 # CHECK: precrq_rs.ph.w $18, $19, $20 + 0x7c 0x41 0x18 0x71 # CHECK: prepend $1, $2, 3 + 0x7c 0x40 0x0d 0x10 # CHECK: raddu.w.qb $1, $2 + 0x7d 0x00 0x2c 0xb8 # CHECK: rddsp $5, 256 + 0x7c 0x0c 0x12 0x92 # CHECK: repl.ph $2, 12 + 0x7c 0x55 0x08 0x92 # CHECK: repl.qb $1, 85 + 0x7c 0x02 0x0a 0xd2 # CHECK: replv.ph $1, $2 + 0x7c 0x02 0x08 0xd2 # CHECK: replv.qb $1, $2 + 0x7d 0x00 0x0e 0xb8 # CHECK: shilo $ac1, 16 + 0x7c 0x40 0x0e 0xf8 # CHECK: shilov $ac1, $2 + 0x7c 0x62 0x0a 0x13 # CHECK: shll.ph $1, $2, 3 + 0x7c 0x62 0x0b 0x13 # CHECK: shll_s.ph $1, $2, 3 + 0x7c 0x62 0x08 0x13 # CHECK: shll.qb $1, $2, 3 + 0x7c 0x62 0x0a 0x93 # CHECK: shllv.ph $1, $2, $3 + 0x7c 0x62 0x0b 0x93 # CHECK: shllv_s.ph $1, $2, $3 + 0x7c 0x62 0x08 0x93 # CHECK: shllv.qb $1, $2, $3 + 0x7c 0x62 0x0d 0x93 # CHECK: shllv_s.w $1, $2, $3 + 0x7c 0x62 0x0d 0x13 # CHECK: shll_s.w $1, $2, 3 + 0x7c 0x50 0x11 0x13 # CHECK: shra.qb $2, $16, 2 + 0x7c 0x50 0x11 0x53 # CHECK: shra_r.qb $2, $16, 2 + 0x7c 0x22 0x2a 0x53 # CHECK: shra.ph $5, $2, 1 + 0x7c 0x22 0x2b 0x53 # CHECK: shra_r.ph $5, $2, 1 + 0x7c 0x62 0x0a 0xd3 # CHECK: shrav.ph $1, $2, $3 + 0x7c 0x62 0x0b 0xd3 # CHECK: shrav_r.ph $1, $2, $3 + 0x7c 0x62 0x09 0x93 # CHECK: shrav.qb $1, $2, $3 + 0x7c 0x62 0x09 0xd3 # CHECK: shrav_r.qb $1, $2, $3 + 0x7c 0x62 0x0d 0xd3 # CHECK: shrav_r.w $1, $2, $3 + 0x7c 0x22 0x0d 0x53 # CHECK: shra_r.w $1, $2, 1 + 0x7c 0x42 0x0e 0x53 # CHECK: shrl.ph $1, $2, 2 + 0x7c 0x42 0x08 0x53 # CHECK: shrl.qb $1, $2, 2 + 0x7c 0x62 0x0e 0xd3 # CHECK: shrlv.ph $1, $2, $3 + 0x7c 0x62 0x08 0xd3 # CHECK: shrlv.qb $1, $2, $3 + 0x7c 0x43 0x0a 0xd0 # CHECK: subq.ph $1, $2, $3 + 0x7c 0x43 0x0b 0xd0 # CHECK: subq_s.ph $1, $2, $3 + 0x7c 0x43 0x0d 0xd0 # CHECK: subq_s.w $1, $2, $3 + 0x7c 0x43 0x0a 0x58 # CHECK: subqh.ph $1, $2, $3 + 0x7c 0x43 0x0a 0xd8 # CHECK: subqh_r.ph $1, $2, $3 + 0x7c 0x43 0x0c 0x58 # CHECK: subqh.w $1, $2, $3 + 0x7c 0x43 0x0c 0xd8 # CHECK: subqh_r.w $1, $2, $3 + 0x7c 0x49 0x32 0x50 # CHECK: subu.ph $6, $2, $9 + 0x7c 0x64 0x13 0x50 # CHECK: subu_s.ph $2, $3, $4 + 0x7c 0x43 0x08 0x50 # CHECK: subu.qb $1, $2, $3 + 0x7c 0x43 0x09 0x50 # CHECK: subu_s.qb $1, $2, $3 + 0x7c 0x43 0x08 0x58 # CHECK: subuh.qb $1, $2, $3 + 0x7c 0x43 0x08 0xd8 # CHECK: subuh_r.qb $1, $2, $3 + 0x7c 0x20 0x04 0xf8 # CHECK: wrdsp $1, 0 diff --git a/test/MC/Mips/dsp/valid.s b/test/MC/Mips/dsp/valid.s new file mode 100644 index 000000000000..804669c5e464 --- /dev/null +++ b/test/MC/Mips/dsp/valid.s @@ -0,0 +1,127 @@ +# RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dsp %s | FileCheck %s +# +# CHECK: .text + .set noat + absq_s.ph $1, $2 # CHECK: absq_s.ph $1, $2 # encoding: [0x7c,0x02,0x0a,0x52] + absq_s.w $5, $6 # CHECK: absq_s.w $5, $6 # encoding: [0x7c,0x06,0x2c,0x52] + addq.ph $7, $8, $9 # CHECK: addq.ph $7, $8, $9 # encoding: [0x7d,0x09,0x3a,0x90] + addq_s.ph $10, $11, $12 # CHECK: addq_s.ph $10, $11, $12 # encoding: [0x7d,0x6c,0x53,0x90] + addq_s.w $13, $14, $15 # CHECK: addq_s.w $13, $14, $15 # encoding: [0x7d,0xcf,0x6d,0x90] + addsc $gp, $sp, $fp # CHECK: addsc $gp, $sp, $fp # encoding: [0x7f,0xbe,0xe4,0x10] + addu.qb $6, $7, $8 # CHECK: addu.qb $6, $7, $8 # encoding: [0x7c,0xe8,0x30,0x10] + addu_s.qb $9, $10, $11 # CHECK: addu_s.qb $9, $10, $11 # encoding: [0x7d,0x4b,0x49,0x10] + addwc $12, $13, $14 # CHECK: addwc $12, $13, $14 # encoding: [0x7d,0xae,0x64,0x50] + bitrev $25, $26 # CHECK: bitrev $25, $26 # encoding: [0x7c,0x1a,0xce,0xd2] + bposge32 21100 # CHECK: bposge32 21100 # encoding: [0x04,0x1c,0x14,0x9b] + cmp.eq.ph $27, $gp # CHECK: cmp.eq.ph $27, $gp # encoding: [0x7f,0x7c,0x02,0x11] + cmp.lt.ph $sp, $fp # CHECK: cmp.lt.ph $sp, $fp # encoding: [0x7f,0xbe,0x02,0x51] + cmp.le.ph $ra, $1 # CHECK: cmp.le.ph $ra, $1 # encoding: [0x7f,0xe1,0x02,0x91] + cmpgu.eq.qb $11, $12, $13 # CHECK: cmpgu.eq.qb $11, $12, $13 # encoding: [0x7d,0x8d,0x59,0x11] + cmpgu.lt.qb $14, $15, $16 # CHECK: cmpgu.lt.qb $14, $15, $16 # encoding: [0x7d,0xf0,0x71,0x51] + cmpgu.le.qb $17, $18, $19 # CHECK: cmpgu.le.qb $17, $18, $19 # encoding: [0x7e,0x53,0x89,0x91] + cmpu.eq.qb $20, $21 # CHECK: cmpu.eq.qb $20, $21 # encoding: [0x7e,0x95,0x00,0x11] + cmpu.lt.qb $22, $23 # CHECK: cmpu.lt.qb $22, $23 # encoding: [0x7e,0xd7,0x00,0x51] + cmpu.le.qb $24, $25 # CHECK: cmpu.le.qb $24, $25 # encoding: [0x7f,0x19,0x00,0x91] + dpaq_s.w.ph $ac1, $1, $2 # CHECK: dpaq_s.w.ph $ac1, $1, $2 # encoding: [0x7c,0x22,0x09,0x30] + dpaq_sa.l.w $ac2, $3, $4 # CHECK: dpaq_sa.l.w $ac2, $3, $4 # encoding: [0x7c,0x64,0x13,0x30] + dpau.h.qbl $ac1, $9, $10 # CHECK: dpau.h.qbl $ac1, $9, $10 # encoding: [0x7d,0x2a,0x08,0xf0] + dpau.h.qbr $ac1, $11, $12 # CHECK: dpau.h.qbr $ac1, $11, $12 # encoding: [0x7d,0x6c,0x09,0xf0] + dpsq_s.w.ph $ac0, $17, $18 # CHECK: dpsq_s.w.ph $ac0, $17, $18 # encoding: [0x7e,0x32,0x01,0x70] + dpsq_sa.l.w $ac1, $19, $20 # CHECK: dpsq_sa.l.w $ac1, $19, $20 # encoding: [0x7e,0x74,0x0b,0x70] + dpsu.h.qbl $ac0, $5, $6 # CHECK: dpsu.h.qbl $ac0, $5, $6 # encoding: [0x7c,0xa6,0x02,0xf0] + dpsu.h.qbr $ac1, $7, $8 # CHECK: dpsu.h.qbr $ac1, $7, $8 # encoding: [0x7c,0xe8,0x0b,0xf0] + extp $1, $ac0, 31 # CHECK: extp $1, $ac0, 31 # encoding: [0x7f,0xe1,0x00,0xb8] + extpdp $2, $ac1, 0 # CHECK: extpdp $2, $ac1, 0 # encoding: [0x7c,0x02,0x0a,0xb8] + extpdpv $3, $ac2, $4 # CHECK: extpdpv $3, $ac2, $4 # encoding: [0x7c,0x83,0x12,0xf8] + extpv $5, $ac3, $6 # CHECK: extpv $5, $ac3, $6 # encoding: [0x7c,0xc5,0x18,0xf8] + extr.w $7, $ac0, 31 # CHECK: extr.w $7, $ac0, 31 # encoding: [0x7f,0xe7,0x00,0x38] + extr_r.w $8, $ac1, 15 # CHECK: extr_r.w $8, $ac1, 15 # encoding: [0x7d,0xe8,0x09,0x38] + extr_rs.w $9, $ac2, 7 # CHECK: extr_rs.w $9, $ac2, 7 # encoding: [0x7c,0xe9,0x11,0xb8] + extr_s.h $10, $ac3, 3 # CHECK: extr_s.h $10, $ac3, 3 # encoding: [0x7c,0x6a,0x1b,0xb8] + extrv.w $11, $ac0, $12 # CHECK: extrv.w $11, $ac0, $12 # encoding: [0x7d,0x8b,0x00,0x78] + extrv_r.w $13, $ac1, $14 # CHECK: extrv_r.w $13, $ac1, $14 # encoding: [0x7d,0xcd,0x09,0x78] + extrv_rs.w $15, $ac2, $16 # CHECK: extrv_rs.w $15, $ac2, $16 # encoding: [0x7e,0x0f,0x11,0xf8] + extrv_s.h $17, $ac3, $18 # CHECK: extrv_s.h $17, $ac3, $18 # encoding: [0x7e,0x51,0x1b,0xf8] + insv $19, $20 # CHECK: insv $19, $20 # encoding: [0x7e,0x93,0x00,0x0c] + lbux $10, $20($26) # CHECK: lbux $10, $20($26) # encoding: [0x7f,0x54,0x51,0x8a] + lhx $11, $21($27) # CHECK: lhx $11, $21($27) # encoding: [0x7f,0x75,0x59,0x0a] + lwx $12, $22($gp) # CHECK: lwx $12, $22($gp) # encoding: [0x7f,0x96,0x60,0x0a] + madd $ac1, $6, $7 # CHECK: madd $ac1, $6, $7 # encoding: [0x70,0xc7,0x08,0x00] + maddu $ac0, $8, $9 # CHECK: maddu $ac0, $8, $9 # encoding: [0x71,0x09,0x00,0x01] + madd $6, $7 # CHECK: madd $6, $7 # encoding: [0x70,0xc7,0x00,0x00] + maddu $8, $9 # CHECK: maddu $8, $9 # encoding: [0x71,0x09,0x00,0x01] + maq_s.w.phl $ac2, $3, $4 # CHECK: maq_s.w.phl $ac2, $3, $4 # encoding: [0x7c,0x64,0x15,0x30] + maq_sa.w.phl $ac3, $5, $6 # CHECK: maq_sa.w.phl $ac3, $5, $6 # encoding: [0x7c,0xa6,0x1c,0x30] + maq_s.w.phr $ac0, $7, $8 # CHECK: maq_s.w.phr $ac0, $7, $8 # encoding: [0x7c,0xe8,0x05,0xb0] + maq_sa.w.phr $ac1, $9, $10 # CHECK: maq_sa.w.phr $ac1, $9, $10 # encoding: [0x7d,0x2a,0x0c,0xb0] + mfhi $14, $ac1 # CHECK: mfhi $14, $ac1 # encoding: [0x00,0x20,0x70,0x10] + mflo $15, $ac0 # CHECK: mflo $15, $ac0 # encoding: [0x00,0x00,0x78,0x12] + mfhi $14 # CHECK: mfhi $14 # encoding: [0x00,0x00,0x70,0x10] + mflo $15 # CHECK: mflo $15 # encoding: [0x00,0x00,0x78,0x12] + modsub $11, $12, $13 # CHECK: modsub $11, $12, $13 # encoding: [0x7d,0x8d,0x5c,0x90] + msub $ac3, $10, $11 # CHECK: msub $ac3, $10, $11 # encoding: [0x71,0x4b,0x18,0x04] + msubu $ac2, $12, $13 # CHECK: msubu $ac2, $12, $13 # encoding: [0x71,0x8d,0x10,0x05] + msub $10, $11 # CHECK: msub $10, $11 # encoding: [0x71,0x4b,0x00,0x04] + msubu $12, $13 # CHECK: msubu $12, $13 # encoding: [0x71,0x8d,0x00,0x05] + mthi $16, $ac3 # CHECK: mthi $16, $ac3 # encoding: [0x02,0x00,0x18,0x11] + mthi $16 # CHECK: mthi $16 # encoding: [0x02,0x00,0x00,0x11] + mthlip $14, $ac2 # CHECK: mthlip $14, $ac2 # encoding: [0x7d,0xc0,0x17,0xf8] + mtlo $17, $ac2 # CHECK: mtlo $17, $ac2 # encoding: [0x02,0x20,0x10,0x13] + mtlo $17 # CHECK: mtlo $17 # encoding: [0x02,0x20,0x00,0x13] + muleq_s.w.phl $21, $22, $23 # CHECK: muleq_s.w.phl $21, $22, $23 # encoding: [0x7e,0xd7,0xaf,0x10] + muleq_s.w.phr $24, $25, $26 # CHECK: muleq_s.w.phr $24, $25, $26 # encoding: [0x7f,0x3a,0xc7,0x50] + muleu_s.ph.qbl $27, $gp, $sp # CHECK: muleu_s.ph.qbl $27, $gp, $sp # encoding: [0x7f,0x9d,0xd9,0x90] + muleu_s.ph.qbr $fp, $ra, $1 # CHECK: muleu_s.ph.qbr $fp, $ra, $1 # encoding: [0x7f,0xe1,0xf1,0xd0] + mulq_rs.ph $2, $3, $4 # CHECK: mulq_rs.ph $2, $3, $4 # encoding: [0x7c,0x64,0x17,0xd0] + mulsaq_s.w.ph $ac0, $16, $17 # CHECK: mulsaq_s.w.ph $ac0, $16, $17 # encoding: [0x7e,0x11,0x01,0xb0] + mult $ac3, $2, $3 # CHECK: mult $ac3, $2, $3 # encoding: [0x00,0x43,0x18,0x18] + multu $ac2, $4, $5 # CHECK: multu $ac2, $4, $5 # encoding: [0x00,0x85,0x10,0x19] + mult $2, $3 # CHECK: mult $2, $3 # encoding: [0x00,0x43,0x00,0x18] + multu $4, $5 # CHECK: multu $4, $5 # encoding: [0x00,0x85,0x00,0x19] + packrl.ph $18, $19, $20 # CHECK: packrl.ph $18, $19, $20 # encoding: [0x7e,0x74,0x93,0x91] + pick.ph $7, $15, $3 # CHECK: pick.ph $7, $15, $3 # encoding: [0x7d,0xe3,0x3a,0xd1] + pick.qb $2, $4, $8 # CHECK: pick.qb $2, $4, $8 # encoding: [0x7c,0x88,0x10,0xd1] + preceq.w.phl $20, $21 # CHECK: preceq.w.phl $20, $21 # encoding: [0x7c,0x15,0xa3,0x12] + preceq.w.phr $21, $22 # CHECK: preceq.w.phr $21, $22 # encoding: [0x7c,0x16,0xab,0x52] + precequ.ph.qbl $22, $23 # CHECK: precequ.ph.qbl $22, $23 # encoding: [0x7c,0x17,0xb1,0x12] + precequ.ph.qbla $24, $25 # CHECK: precequ.ph.qbla $24, $25 # encoding: [0x7c,0x19,0xc1,0x92] + precequ.ph.qbr $23, $24 # CHECK: precequ.ph.qbr $23, $24 # encoding: [0x7c,0x18,0xb9,0x52] + precequ.ph.qbra $25, $26 # CHECK: precequ.ph.qbra $25, $26 # encoding: [0x7c,0x1a,0xc9,0xd2] + preceu.ph.qbl $26, $27 # CHECK: preceu.ph.qbl $26, $27 # encoding: [0x7c,0x1b,0xd7,0x12] + preceu.ph.qbla $gp, $sp # CHECK: preceu.ph.qbla $gp, $sp # encoding: [0x7c,0x1d,0xe7,0x92] + preceu.ph.qbr $27, $gp # CHECK: preceu.ph.qbr $27, $gp # encoding: [0x7c,0x1c,0xdf,0x52] + preceu.ph.qbra $sp, $fp # CHECK: preceu.ph.qbra $sp, $fp # encoding: [0x7c,0x1e,0xef,0xd2] + precrq.ph.w $17, $18, $19 # CHECK: precrq.ph.w $17, $18, $19 # encoding: [0x7e,0x53,0x8d,0x11] + precrq.qb.ph $16, $17, $18 # CHECK: precrq.qb.ph $16, $17, $18 # encoding: [0x7e,0x32,0x83,0x11] + precrqu_s.qb.ph $19, $20, $21 # CHECK: precrqu_s.qb.ph $19, $20, $21 # encoding: [0x7e,0x95,0x9b,0xd1] + precrq_rs.ph.w $18, $19, $20 # CHECK: precrq_rs.ph.w $18, $19, $20 # encoding: [0x7e,0x74,0x95,0x51] + raddu.w.qb $1, $2 # CHECK: raddu.w.qb $1, $2 # encoding: [0x7c,0x40,0x0d,0x10] + rddsp $5, 256 # CHECK: rddsp $5, 256 # encoding: [0x7d,0x00,0x2c,0xb8] + repl.ph $2, 12 # CHECK: repl.ph $2, 12 # encoding: [0x7c,0x0c,0x12,0x92] + repl.qb $1, 85 # CHECK: repl.qb $1, 85 # encoding: [0x7c,0x55,0x08,0x92] + replv.ph $1, $2 # CHECK: replv.ph $1, $2 # encoding: [0x7c,0x02,0x0a,0xd2] + replv.qb $1, $2 # CHECK: replv.qb $1, $2 # encoding: [0x7c,0x02,0x08,0xd2] + shilo $ac1, 16 # CHECK: shilo $ac1, 16 # encoding: [0x7d,0x00,0x0e,0xb8] + shilov $ac1, $2 # CHECK: shilov $ac1, $2 # encoding: [0x7c,0x40,0x0e,0xf8] + shll.ph $1, $2, 3 # CHECK: shll.ph $1, $2, 3 # encoding: [0x7c,0x62,0x0a,0x13] + shll_s.ph $1, $2, 3 # CHECK: shll_s.ph $1, $2, 3 # encoding: [0x7c,0x62,0x0b,0x13] + shll.qb $1, $2, 3 # CHECK: shll.qb $1, $2, 3 # encoding: [0x7c,0x62,0x08,0x13] + shllv.ph $1, $2, $3 # CHECK: shllv.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0a,0x93] + shllv_s.ph $1, $2, $3 # CHECK: shllv_s.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0b,0x93] + shllv.qb $1, $2, $3 # CHECK: shllv.qb $1, $2, $3 # encoding: [0x7c,0x62,0x08,0x93] + shllv_s.w $1, $2, $3 # CHECK: shllv_s.w $1, $2, $3 # encoding: [0x7c,0x62,0x0d,0x93] + shll_s.w $1, $2, 3 # CHECK: shll_s.w $1, $2, 3 # encoding: [0x7c,0x62,0x0d,0x13] + shra.ph $5, $2, 1 # CHECK: shra.ph $5, $2, 1 # encoding: [0x7c,0x22,0x2a,0x53] + shra_r.ph $5, $2, 1 # CHECK: shra_r.ph $5, $2, 1 # encoding: [0x7c,0x22,0x2b,0x53] + shrav.ph $1, $2, $3 # CHECK: shrav.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0a,0xd3] + shrav_r.ph $1, $2, $3 # CHECK: shrav_r.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0b,0xd3] + shrav_r.w $1, $2, $3 # CHECK: shrav_r.w $1, $2, $3 # encoding: [0x7c,0x62,0x0d,0xd3] + shra_r.w $1, $2, 1 # CHECK: shra_r.w $1, $2, 1 # encoding: [0x7c,0x22,0x0d,0x53] + shrl.qb $1, $2, 2 # CHECK: shrl.qb $1, $2, 2 # encoding: [0x7c,0x42,0x08,0x53] + shrlv.qb $1, $2, $3 # CHECK: shrlv.qb $1, $2, $3 # encoding: [0x7c,0x62,0x08,0xd3] + subq.ph $1, $2, $3 # CHECK: subq.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0a,0xd0] + subq_s.ph $1, $2, $3 # CHECK: subq_s.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0b,0xd0] + subq_s.w $1, $2, $3 # CHECK: subq_s.w $1, $2, $3 # encoding: [0x7c,0x43,0x0d,0xd0] + subu.qb $1, $2, $3 # CHECK: subu.qb $1, $2, $3 # encoding: [0x7c,0x43,0x08,0x50] + subu_s.qb $1, $2, $3 # CHECK: subu_s.qb $1, $2, $3 # encoding: [0x7c,0x43,0x09,0x50] + wrdsp $1, 0 # CHECK: wrdsp $1, 0 # encoding: [0x7c,0x20,0x04,0xf8] diff --git a/test/MC/Mips/dspr2/valid.s b/test/MC/Mips/dspr2/valid.s index d86081ec464c..ce9bd7309d7f 100644 --- a/test/MC/Mips/dspr2/valid.s +++ b/test/MC/Mips/dspr2/valid.s @@ -1,48 +1,175 @@ # RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dspr2 %s | FileCheck %s # # CHECK: .text - precrq.qb.ph $16,$17,$18 # CHECK: precrq.qb.ph $16, $17, $18 # encoding: [0x7e,0x32,0x83,0x11] - precrq.ph.w $17,$18,$19 # CHECK: precrq.ph.w $17, $18, $19 # encoding: [0x7e,0x53,0x8d,0x11] - precrq_rs.ph.w $18,$19,$20 # CHECK: precrq_rs.ph.w $18, $19, $20 # encoding: [0x7e,0x74,0x95,0x51] - precrqu_s.qb.ph $19,$20,$21 # CHECK: precrqu_s.qb.ph $19, $20, $21 # encoding: [0x7e,0x95,0x9b,0xd1] - preceq.w.phl $20,$21 # CHECK: preceq.w.phl $20, $21 # encoding: [0x7c,0x15,0xa3,0x12] - preceq.w.phr $21,$22 # CHECK: preceq.w.phr $21, $22 # encoding: [0x7c,0x16,0xab,0x52] - precequ.ph.qbl $22,$23 # CHECK: precequ.ph.qbl $22, $23 # encoding: [0x7c,0x17,0xb1,0x12] - precequ.ph.qbr $23,$24 # CHECK: precequ.ph.qbr $23, $24 # encoding: [0x7c,0x18,0xb9,0x52] - precequ.ph.qbla $24,$25 # CHECK: precequ.ph.qbla $24, $25 # encoding: [0x7c,0x19,0xc1,0x92] - precequ.ph.qbra $25,$26 # CHECK: precequ.ph.qbra $25, $26 # encoding: [0x7c,0x1a,0xc9,0xd2] - preceu.ph.qbl $26,$27 # CHECK: preceu.ph.qbl $26, $27 # encoding: [0x7c,0x1b,0xd7,0x12] - preceu.ph.qbr $27,$28 # CHECK: preceu.ph.qbr $27, $gp # encoding: [0x7c,0x1c,0xdf,0x52] - preceu.ph.qbla $28,$29 # CHECK: preceu.ph.qbla $gp, $sp # encoding: [0x7c,0x1d,0xe7,0x92] - preceu.ph.qbra $29,$30 # CHECK: preceu.ph.qbra $sp, $fp # encoding: [0x7c,0x1e,0xef,0xd2] - precr.qb.ph $23,$24,$25 # CHECK: precr.qb.ph $23, $24, $25 # encoding: [0x7f,0x19,0xbb,0x51] - precr_sra.ph.w $24,$25,0 # CHECK: precr_sra.ph.w $24, $25, 0 # encoding: [0x7f,0x38,0x07,0x91] - precr_sra.ph.w $24,$25,31 # CHECK: precr_sra.ph.w $24, $25, 31 # encoding: [0x7f,0x38,0xff,0x91] - precr_sra_r.ph.w $25,$26,0 # CHECK: precr_sra_r.ph.w $25, $26, 0 # encoding: [0x7f,0x59,0x07,0xd1] - precr_sra_r.ph.w $25,$26,31 # CHECK: precr_sra_r.ph.w $25, $26, 31 # encoding: [0x7f,0x59,0xff,0xd1] - lbux $10, $s4($26) # CHECK: lbux $10, $20($26) # encoding: [0x7f,0x54,0x51,0x8a] - lhx $11, $s5($27) # CHECK: lhx $11, $21($27) # encoding: [0x7f,0x75,0x59,0x0a] - lwx $12, $s6($28) # CHECK: lwx $12, $22($gp) # encoding: [0x7f,0x96,0x60,0x0a] - mult $ac3, $2, $3 # CHECK: mult $ac3, $2, $3 # encoding: [0x00,0x43,0x18,0x18] - multu $ac2, $4, $5 # CHECK: multu $ac2, $4, $5 # encoding: [0x00,0x85,0x10,0x19] - madd $ac1, $6, $7 # CHECK: madd $ac1, $6, $7 # encoding: [0x70,0xc7,0x08,0x00] - maddu $ac0, $8, $9 # CHECK: maddu $ac0, $8, $9 # encoding: [0x71,0x09,0x00,0x01] - msub $ac3, $10, $11 # CHECK: msub $ac3, $10, $11 # encoding: [0x71,0x4b,0x18,0x04] - msubu $ac2, $12, $13 # CHECK: msubu $ac2, $12, $13 # encoding: [0x71,0x8d,0x10,0x05] - mfhi $14, $ac1 # CHECK: mfhi $14, $ac1 # encoding: [0x00,0x20,0x70,0x10] - mflo $15, $ac0 # CHECK: mflo $15, $ac0 # encoding: [0x00,0x00,0x78,0x12] - mthi $16, $ac3 # CHECK: mthi $16, $ac3 # encoding: [0x02,0x00,0x18,0x11] - mtlo $17, $ac2 # CHECK: mtlo $17, $ac2 # encoding: [0x02,0x20,0x10,0x13] - mult $2, $3 # CHECK: mult $2, $3 # encoding: [0x00,0x43,0x00,0x18] - multu $4, $5 # CHECK: multu $4, $5 # encoding: [0x00,0x85,0x00,0x19] - madd $6, $7 # CHECK: madd $6, $7 # encoding: [0x70,0xc7,0x00,0x00] - maddu $8, $9 # CHECK: maddu $8, $9 # encoding: [0x71,0x09,0x00,0x01] - msub $10, $11 # CHECK: msub $10, $11 # encoding: [0x71,0x4b,0x00,0x04] - msubu $12, $13 # CHECK: msubu $12, $13 # encoding: [0x71,0x8d,0x00,0x05] - mfhi $14 # CHECK: mfhi $14 # encoding: [0x00,0x00,0x70,0x10] - mflo $15 # CHECK: mflo $15 # encoding: [0x00,0x00,0x78,0x12] - mthi $16 # CHECK: mthi $16 # encoding: [0x02,0x00,0x00,0x11] - mtlo $17 # CHECK: mtlo $17 # encoding: [0x02,0x20,0x00,0x13] - append $2, $3, 3 # CHECK: append $2, $3, 3 # encoding: [0x7c,0x62,0x18,0x31] - balign $4, $5, 1 # CHECK: balign $4, $5, 1 # encoding: [0x7c,0xa4,0x0c,0x31] - prepend $6, $7, 4 # CHECK: prepend $6, $7, 4 # encoding: [0x7c,0xe6,0x20,0x71] + .set noat + absq_s.ph $1, $2 # CHECK: absq_s.ph $1, $2 # encoding: [0x7c,0x02,0x0a,0x52] + absq_s.qb $3, $4 # CHECK: absq_s.qb $3, $4 # encoding: [0x7c,0x04,0x18,0x52] + absq_s.w $5, $6 # CHECK: absq_s.w $5, $6 # encoding: [0x7c,0x06,0x2c,0x52] + addq.ph $7, $8, $9 # CHECK: addq.ph $7, $8, $9 # encoding: [0x7d,0x09,0x3a,0x90] + addq_s.ph $10, $11, $12 # CHECK: addq_s.ph $10, $11, $12 # encoding: [0x7d,0x6c,0x53,0x90] + addq_s.w $13, $14, $15 # CHECK: addq_s.w $13, $14, $15 # encoding: [0x7d,0xcf,0x6d,0x90] + addqh.ph $16, $17, $18 # CHECK: addqh.ph $16, $17, $18 # encoding: [0x7e,0x32,0x82,0x18] + addqh_r.ph $19, $20, $21 # CHECK: addqh_r.ph $19, $20, $21 # encoding: [0x7e,0x95,0x9a,0x98] + addqh.w $22, $23, $24 # CHECK: addqh.w $22, $23, $24 # encoding: [0x7e,0xf8,0xb4,0x18] + addqh_r.w $25, $26, $27 # CHECK: addqh_r.w $25, $26, $27 # encoding: [0x7f,0x5b,0xcc,0x98] + addsc $gp, $sp, $fp # CHECK: addsc $gp, $sp, $fp # encoding: [0x7f,0xbe,0xe4,0x10] + addu.ph $ra, $1, $2 # CHECK: addu.ph $ra, $1, $2 # encoding: [0x7c,0x22,0xfa,0x10] + addu_s.ph $3, $4, $5 # CHECK: addu_s.ph $3, $4, $5 # encoding: [0x7c,0x85,0x1b,0x10] + addu.qb $6, $7, $8 # CHECK: addu.qb $6, $7, $8 # encoding: [0x7c,0xe8,0x30,0x10] + addu_s.qb $9, $10, $11 # CHECK: addu_s.qb $9, $10, $11 # encoding: [0x7d,0x4b,0x49,0x10] + addwc $12, $13, $14 # CHECK: addwc $12, $13, $14 # encoding: [0x7d,0xae,0x64,0x50] + adduh.qb $15, $16, $17 # CHECK: adduh.qb $15, $16, $17 # encoding: [0x7e,0x11,0x78,0x18] + adduh_r.qb $18, $19, $20 # CHECK: adduh_r.qb $18, $19, $20 # encoding: [0x7e,0x74,0x90,0x98] + append $21, $22, 0 # CHECK: append $21, $22, 0 # encoding: [0x7e,0xd5,0x00,0x31] + balign $23, $24, 3 # CHECK: balign $23, $24, 3 # encoding: [0x7f,0x17,0x1c,0x31] + bitrev $25, $26 # CHECK: bitrev $25, $26 # encoding: [0x7c,0x1a,0xce,0xd2] + bposge32 21100 # CHECK: bposge32 21100 # encoding: [0x04,0x1c,0x14,0x9b] + cmp.eq.ph $27, $gp # CHECK: cmp.eq.ph $27, $gp # encoding: [0x7f,0x7c,0x02,0x11] + cmp.lt.ph $sp, $fp # CHECK: cmp.lt.ph $sp, $fp # encoding: [0x7f,0xbe,0x02,0x51] + cmp.le.ph $ra, $1 # CHECK: cmp.le.ph $ra, $1 # encoding: [0x7f,0xe1,0x02,0x91] + cmpgdu.eq.qb $2, $3, $4 # CHECK: cmpgdu.eq.qb $2, $3, $4 # encoding: [0x7c,0x64,0x16,0x11] + cmpgdu.lt.qb $5, $6, $7 # CHECK: cmpgdu.lt.qb $5, $6, $7 # encoding: [0x7c,0xc7,0x2e,0x51] + cmpgdu.le.qb $8, $9, $10 # CHECK: cmpgdu.le.qb $8, $9, $10 # encoding: [0x7d,0x2a,0x46,0x91] + cmpgu.eq.qb $11, $12, $13 # CHECK: cmpgu.eq.qb $11, $12, $13 # encoding: [0x7d,0x8d,0x59,0x11] + cmpgu.lt.qb $14, $15, $16 # CHECK: cmpgu.lt.qb $14, $15, $16 # encoding: [0x7d,0xf0,0x71,0x51] + cmpgu.le.qb $17, $18, $19 # CHECK: cmpgu.le.qb $17, $18, $19 # encoding: [0x7e,0x53,0x89,0x91] + cmpu.eq.qb $20, $21 # CHECK: cmpu.eq.qb $20, $21 # encoding: [0x7e,0x95,0x00,0x11] + cmpu.lt.qb $22, $23 # CHECK: cmpu.lt.qb $22, $23 # encoding: [0x7e,0xd7,0x00,0x51] + cmpu.le.qb $24, $25 # CHECK: cmpu.le.qb $24, $25 # encoding: [0x7f,0x19,0x00,0x91] + dpa.w.ph $ac0, $26, $27 # CHECK: dpa.w.ph $ac0, $26, $27 # encoding: [0x7f,0x5b,0x00,0x30] + dpaq_s.w.ph $ac1, $1, $2 # CHECK: dpaq_s.w.ph $ac1, $1, $2 # encoding: [0x7c,0x22,0x09,0x30] + dpaq_sa.l.w $ac2, $3, $4 # CHECK: dpaq_sa.l.w $ac2, $3, $4 # encoding: [0x7c,0x64,0x13,0x30] + dpaqx_s.w.ph $ac3, $5, $6 # CHECK: dpaqx_s.w.ph $ac3, $5, $6 # encoding: [0x7c,0xa6,0x1e,0x30] + dpaqx_sa.w.ph $ac0, $7, $8 # CHECK: dpaqx_sa.w.ph $ac0, $7, $8 # encoding: [0x7c,0xe8,0x06,0xb0] + dpau.h.qbl $ac1, $9, $10 # CHECK: dpau.h.qbl $ac1, $9, $10 # encoding: [0x7d,0x2a,0x08,0xf0] + dpau.h.qbr $ac1, $11, $12 # CHECK: dpau.h.qbr $ac1, $11, $12 # encoding: [0x7d,0x6c,0x09,0xf0] + dpax.w.ph $ac2, $13, $14 # CHECK: dpax.w.ph $ac2, $13, $14 # encoding: [0x7d,0xae,0x12,0x30] + dps.w.ph $ac3, $15, $16 # CHECK: dps.w.ph $ac3, $15, $16 # encoding: [0x7d,0xf0,0x18,0x70] + dpsq_s.w.ph $ac0, $17, $18 # CHECK: dpsq_s.w.ph $ac0, $17, $18 # encoding: [0x7e,0x32,0x01,0x70] + dpsq_sa.l.w $ac1, $19, $20 # CHECK: dpsq_sa.l.w $ac1, $19, $20 # encoding: [0x7e,0x74,0x0b,0x70] + dpsqx_s.w.ph $ac2, $1, $2 # CHECK: dpsqx_s.w.ph $ac2, $1, $2 # encoding: [0x7c,0x22,0x16,0x70] + dpsqx_sa.w.ph $ac3, $3, $4 # CHECK: dpsqx_sa.w.ph $ac3, $3, $4 # encoding: [0x7c,0x64,0x1e,0xf0] + dpsu.h.qbl $ac0, $5, $6 # CHECK: dpsu.h.qbl $ac0, $5, $6 # encoding: [0x7c,0xa6,0x02,0xf0] + dpsu.h.qbr $ac1, $7, $8 # CHECK: dpsu.h.qbr $ac1, $7, $8 # encoding: [0x7c,0xe8,0x0b,0xf0] + dpsx.w.ph $ac2, $9, $10 # CHECK: dpsx.w.ph $ac2, $9, $10 # encoding: [0x7d,0x2a,0x12,0x70] + extp $1, $ac0, 31 # CHECK: extp $1, $ac0, 31 # encoding: [0x7f,0xe1,0x00,0xb8] + extpdp $2, $ac1, 0 # CHECK: extpdp $2, $ac1, 0 # encoding: [0x7c,0x02,0x0a,0xb8] + extpdpv $3, $ac2, $4 # CHECK: extpdpv $3, $ac2, $4 # encoding: [0x7c,0x83,0x12,0xf8] + extpv $5, $ac3, $6 # CHECK: extpv $5, $ac3, $6 # encoding: [0x7c,0xc5,0x18,0xf8] + extr.w $7, $ac0, 31 # CHECK: extr.w $7, $ac0, 31 # encoding: [0x7f,0xe7,0x00,0x38] + extr_r.w $8, $ac1, 15 # CHECK: extr_r.w $8, $ac1, 15 # encoding: [0x7d,0xe8,0x09,0x38] + extr_rs.w $9, $ac2, 7 # CHECK: extr_rs.w $9, $ac2, 7 # encoding: [0x7c,0xe9,0x11,0xb8] + extr_s.h $10, $ac3, 3 # CHECK: extr_s.h $10, $ac3, 3 # encoding: [0x7c,0x6a,0x1b,0xb8] + extrv.w $11, $ac0, $12 # CHECK: extrv.w $11, $ac0, $12 # encoding: [0x7d,0x8b,0x00,0x78] + extrv_r.w $13, $ac1, $14 # CHECK: extrv_r.w $13, $ac1, $14 # encoding: [0x7d,0xcd,0x09,0x78] + extrv_rs.w $15, $ac2, $16 # CHECK: extrv_rs.w $15, $ac2, $16 # encoding: [0x7e,0x0f,0x11,0xf8] + extrv_s.h $17, $ac3, $18 # CHECK: extrv_s.h $17, $ac3, $18 # encoding: [0x7e,0x51,0x1b,0xf8] + insv $19, $20 # CHECK: insv $19, $20 # encoding: [0x7e,0x93,0x00,0x0c] + lbux $10, $20($26) # CHECK: lbux $10, $20($26) # encoding: [0x7f,0x54,0x51,0x8a] + lhx $11, $21($27) # CHECK: lhx $11, $21($27) # encoding: [0x7f,0x75,0x59,0x0a] + lwx $12, $22($gp) # CHECK: lwx $12, $22($gp) # encoding: [0x7f,0x96,0x60,0x0a] + madd $ac1, $6, $7 # CHECK: madd $ac1, $6, $7 # encoding: [0x70,0xc7,0x08,0x00] + maddu $ac0, $8, $9 # CHECK: maddu $ac0, $8, $9 # encoding: [0x71,0x09,0x00,0x01] + madd $6, $7 # CHECK: madd $6, $7 # encoding: [0x70,0xc7,0x00,0x00] + maddu $8, $9 # CHECK: maddu $8, $9 # encoding: [0x71,0x09,0x00,0x01] + maq_s.w.phl $ac2, $3, $4 # CHECK: maq_s.w.phl $ac2, $3, $4 # encoding: [0x7c,0x64,0x15,0x30] + maq_sa.w.phl $ac3, $5, $6 # CHECK: maq_sa.w.phl $ac3, $5, $6 # encoding: [0x7c,0xa6,0x1c,0x30] + maq_s.w.phr $ac0, $7, $8 # CHECK: maq_s.w.phr $ac0, $7, $8 # encoding: [0x7c,0xe8,0x05,0xb0] + maq_sa.w.phr $ac1, $9, $10 # CHECK: maq_sa.w.phr $ac1, $9, $10 # encoding: [0x7d,0x2a,0x0c,0xb0] + mfhi $14, $ac1 # CHECK: mfhi $14, $ac1 # encoding: [0x00,0x20,0x70,0x10] + mflo $15, $ac0 # CHECK: mflo $15, $ac0 # encoding: [0x00,0x00,0x78,0x12] + mfhi $14 # CHECK: mfhi $14 # encoding: [0x00,0x00,0x70,0x10] + mflo $15 # CHECK: mflo $15 # encoding: [0x00,0x00,0x78,0x12] + modsub $11, $12, $13 # CHECK: modsub $11, $12, $13 # encoding: [0x7d,0x8d,0x5c,0x90] + msub $ac3, $10, $11 # CHECK: msub $ac3, $10, $11 # encoding: [0x71,0x4b,0x18,0x04] + msubu $ac2, $12, $13 # CHECK: msubu $ac2, $12, $13 # encoding: [0x71,0x8d,0x10,0x05] + msub $10, $11 # CHECK: msub $10, $11 # encoding: [0x71,0x4b,0x00,0x04] + msubu $12, $13 # CHECK: msubu $12, $13 # encoding: [0x71,0x8d,0x00,0x05] + mthi $16, $ac3 # CHECK: mthi $16, $ac3 # encoding: [0x02,0x00,0x18,0x11] + mthi $16 # CHECK: mthi $16 # encoding: [0x02,0x00,0x00,0x11] + mthlip $14, $ac2 # CHECK: mthlip $14, $ac2 # encoding: [0x7d,0xc0,0x17,0xf8] + mtlo $17, $ac2 # CHECK: mtlo $17, $ac2 # encoding: [0x02,0x20,0x10,0x13] + mtlo $17 # CHECK: mtlo $17 # encoding: [0x02,0x20,0x00,0x13] + mul.ph $15, $16, $17 # CHECK: mul.ph $15, $16, $17 # encoding: [0x7e,0x11,0x7b,0x18] + mul_s.ph $18, $19, $20 # CHECK: mul_s.ph $18, $19, $20 # encoding: [0x7e,0x74,0x93,0x98] + muleq_s.w.phl $21, $22, $23 # CHECK: muleq_s.w.phl $21, $22, $23 # encoding: [0x7e,0xd7,0xaf,0x10] + muleq_s.w.phr $24, $25, $26 # CHECK: muleq_s.w.phr $24, $25, $26 # encoding: [0x7f,0x3a,0xc7,0x50] + muleu_s.ph.qbl $27, $gp, $sp # CHECK: muleu_s.ph.qbl $27, $gp, $sp # encoding: [0x7f,0x9d,0xd9,0x90] + muleu_s.ph.qbr $fp, $ra, $1 # CHECK: muleu_s.ph.qbr $fp, $ra, $1 # encoding: [0x7f,0xe1,0xf1,0xd0] + mulq_rs.ph $2, $3, $4 # CHECK: mulq_rs.ph $2, $3, $4 # encoding: [0x7c,0x64,0x17,0xd0] + mulq_rs.w $5, $6, $7 # CHECK: mulq_rs.w $5, $6, $7 # encoding: [0x7c,0xc7,0x2d,0xd8] + mulq_s.ph $8, $9, $10 # CHECK: mulq_s.ph $8, $9, $10 # encoding: [0x7d,0x2a,0x47,0x90] + mulq_s.w $11, $12, $13 # CHECK: mulq_s.w $11, $12, $13 # encoding: [0x7d,0x8d,0x5d,0x98] + mulsa.w.ph $ac3, $14, $15 # CHECK: mulsa.w.ph $ac3, $14, $15 # encoding: [0x7d,0xcf,0x18,0xb0] + mulsaq_s.w.ph $ac0, $16, $17 # CHECK: mulsaq_s.w.ph $ac0, $16, $17 # encoding: [0x7e,0x11,0x01,0xb0] + mult $ac3, $2, $3 # CHECK: mult $ac3, $2, $3 # encoding: [0x00,0x43,0x18,0x18] + multu $ac2, $4, $5 # CHECK: multu $ac2, $4, $5 # encoding: [0x00,0x85,0x10,0x19] + mult $2, $3 # CHECK: mult $2, $3 # encoding: [0x00,0x43,0x00,0x18] + multu $4, $5 # CHECK: multu $4, $5 # encoding: [0x00,0x85,0x00,0x19] + packrl.ph $18, $19, $20 # CHECK: packrl.ph $18, $19, $20 # encoding: [0x7e,0x74,0x93,0x91] + pick.ph $7, $15, $3 # CHECK: pick.ph $7, $15, $3 # encoding: [0x7d,0xe3,0x3a,0xd1] + pick.qb $2, $4, $8 # CHECK: pick.qb $2, $4, $8 # encoding: [0x7c,0x88,0x10,0xd1] + preceq.w.phl $20,$21 # CHECK: preceq.w.phl $20, $21 # encoding: [0x7c,0x15,0xa3,0x12] + preceq.w.phr $21,$22 # CHECK: preceq.w.phr $21, $22 # encoding: [0x7c,0x16,0xab,0x52] + precequ.ph.qbl $22,$23 # CHECK: precequ.ph.qbl $22, $23 # encoding: [0x7c,0x17,0xb1,0x12] + precequ.ph.qbla $24,$25 # CHECK: precequ.ph.qbla $24, $25 # encoding: [0x7c,0x19,0xc1,0x92] + precequ.ph.qbr $23,$24 # CHECK: precequ.ph.qbr $23, $24 # encoding: [0x7c,0x18,0xb9,0x52] + precequ.ph.qbra $25,$26 # CHECK: precequ.ph.qbra $25, $26 # encoding: [0x7c,0x1a,0xc9,0xd2] + preceu.ph.qbl $26,$27 # CHECK: preceu.ph.qbl $26, $27 # encoding: [0x7c,0x1b,0xd7,0x12] + preceu.ph.qbla $28,$29 # CHECK: preceu.ph.qbla $gp, $sp # encoding: [0x7c,0x1d,0xe7,0x92] + preceu.ph.qbr $27,$28 # CHECK: preceu.ph.qbr $27, $gp # encoding: [0x7c,0x1c,0xdf,0x52] + preceu.ph.qbra $29,$30 # CHECK: preceu.ph.qbra $sp, $fp # encoding: [0x7c,0x1e,0xef,0xd2] + precr.qb.ph $23,$24,$25 # CHECK: precr.qb.ph $23, $24, $25 # encoding: [0x7f,0x19,0xbb,0x51] + precr_sra.ph.w $24,$25,0 # CHECK: precr_sra.ph.w $24, $25, 0 # encoding: [0x7f,0x38,0x07,0x91] + precr_sra.ph.w $24,$25,31 # CHECK: precr_sra.ph.w $24, $25, 31 # encoding: [0x7f,0x38,0xff,0x91] + precr_sra_r.ph.w $25,$26,0 # CHECK: precr_sra_r.ph.w $25, $26, 0 # encoding: [0x7f,0x59,0x07,0xd1] + precr_sra_r.ph.w $25,$26,31 # CHECK: precr_sra_r.ph.w $25, $26, 31 # encoding: [0x7f,0x59,0xff,0xd1] + precrq.ph.w $17,$18,$19 # CHECK: precrq.ph.w $17, $18, $19 # encoding: [0x7e,0x53,0x8d,0x11] + precrq.qb.ph $16,$17,$18 # CHECK: precrq.qb.ph $16, $17, $18 # encoding: [0x7e,0x32,0x83,0x11] + precrqu_s.qb.ph $19,$20,$21 # CHECK: precrqu_s.qb.ph $19, $20, $21 # encoding: [0x7e,0x95,0x9b,0xd1] + precrq_rs.ph.w $18,$19,$20 # CHECK: precrq_rs.ph.w $18, $19, $20 # encoding: [0x7e,0x74,0x95,0x51] + prepend $1, $2, 3 # CHECK: prepend $1, $2, 3 # encoding: [0x7c,0x41,0x18,0x71] + raddu.w.qb $1, $2 # CHECK: raddu.w.qb $1, $2 # encoding: [0x7c,0x40,0x0d,0x10] + rddsp $5, 256 # CHECK: rddsp $5, 256 # encoding: [0x7d,0x00,0x2c,0xb8] + repl.ph $2, 12 # CHECK: repl.ph $2, 12 # encoding: [0x7c,0x0c,0x12,0x92] + repl.qb $1, 85 # CHECK: repl.qb $1, 85 # encoding: [0x7c,0x55,0x08,0x92] + replv.ph $1, $2 # CHECK: replv.ph $1, $2 # encoding: [0x7c,0x02,0x0a,0xd2] + replv.qb $1, $2 # CHECK: replv.qb $1, $2 # encoding: [0x7c,0x02,0x08,0xd2] + shilo $ac1, 16 # CHECK: shilo $ac1, 16 # encoding: [0x7d,0x00,0x0e,0xb8] + shilov $ac1, $2 # CHECK: shilov $ac1, $2 # encoding: [0x7c,0x40,0x0e,0xf8] + shll.ph $1, $2, 3 # CHECK: shll.ph $1, $2, 3 # encoding: [0x7c,0x62,0x0a,0x13] + shll_s.ph $1, $2, 3 # CHECK: shll_s.ph $1, $2, 3 # encoding: [0x7c,0x62,0x0b,0x13] + shll.qb $1, $2, 3 # CHECK: shll.qb $1, $2, 3 # encoding: [0x7c,0x62,0x08,0x13] + shllv.ph $1, $2, $3 # CHECK: shllv.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0a,0x93] + shllv_s.ph $1, $2, $3 # CHECK: shllv_s.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0b,0x93] + shllv.qb $1, $2, $3 # CHECK: shllv.qb $1, $2, $3 # encoding: [0x7c,0x62,0x08,0x93] + shllv_s.w $1, $2, $3 # CHECK: shllv_s.w $1, $2, $3 # encoding: [0x7c,0x62,0x0d,0x93] + shll_s.w $1, $2, 3 # CHECK: shll_s.w $1, $2, 3 # encoding: [0x7c,0x62,0x0d,0x13] + shra.qb $2, $16, 2 # CHECK: shra.qb $2, $16, 2 # encoding: [0x7c,0x50,0x11,0x13] + shra_r.qb $2, $16, 2 # CHECK: shra_r.qb $2, $16, 2 # encoding: [0x7c,0x50,0x11,0x53] + shra.ph $5, $2, 1 # CHECK: shra.ph $5, $2, 1 # encoding: [0x7c,0x22,0x2a,0x53] + shra_r.ph $5, $2, 1 # CHECK: shra_r.ph $5, $2, 1 # encoding: [0x7c,0x22,0x2b,0x53] + shrav.ph $1, $2, $3 # CHECK: shrav.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0a,0xd3] + shrav_r.ph $1, $2, $3 # CHECK: shrav_r.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0b,0xd3] + shrav.qb $1, $2, $3 # CHECK: shrav.qb $1, $2, $3 # encoding: [0x7c,0x62,0x09,0x93] + shrav_r.qb $1, $2, $3 # CHECK: shrav_r.qb $1, $2, $3 # encoding: [0x7c,0x62,0x09,0xd3] + shrav_r.w $1, $2, $3 # CHECK: shrav_r.w $1, $2, $3 # encoding: [0x7c,0x62,0x0d,0xd3] + shra_r.w $1, $2, 1 # CHECK: shra_r.w $1, $2, 1 # encoding: [0x7c,0x22,0x0d,0x53] + shrl.ph $1, $2, 2 # CHECK: shrl.ph $1, $2, 2 # encoding: [0x7c,0x42,0x0e,0x53] + shrl.qb $1, $2, 2 # CHECK: shrl.qb $1, $2, 2 # encoding: [0x7c,0x42,0x08,0x53] + shrlv.ph $1, $2, $3 # CHECK: shrlv.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0e,0xd3] + shrlv.qb $1, $2, $3 # CHECK: shrlv.qb $1, $2, $3 # encoding: [0x7c,0x62,0x08,0xd3] + subq.ph $1, $2, $3 # CHECK: subq.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0a,0xd0] + subq_s.ph $1, $2, $3 # CHECK: subq_s.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0b,0xd0] + subq_s.w $1, $2, $3 # CHECK: subq_s.w $1, $2, $3 # encoding: [0x7c,0x43,0x0d,0xd0] + subqh.ph $1, $2, $3 # CHECK: subqh.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0a,0x58] + subqh_r.ph $1, $2, $3 # CHECK: subqh_r.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0a,0xd8] + subqh.w $1, $2, $3 # CHECK: subqh.w $1, $2, $3 # encoding: [0x7c,0x43,0x0c,0x58] + subqh_r.w $1, $2, $3 # CHECK: subqh_r.w $1, $2, $3 # encoding: [0x7c,0x43,0x0c,0xd8] + subu.ph $6, $2, $9 # CHECK: subu.ph $6, $2, $9 # encoding: [0x7c,0x49,0x32,0x50] + subu_s.ph $2, $3, $4 # CHECK: subu_s.ph $2, $3, $4 # encoding: [0x7c,0x64,0x13,0x50] + subu.qb $1, $2, $3 # CHECK: subu.qb $1, $2, $3 # encoding: [0x7c,0x43,0x08,0x50] + subu_s.qb $1, $2, $3 # CHECK: subu_s.qb $1, $2, $3 # encoding: [0x7c,0x43,0x09,0x50] + subuh.qb $1, $2, $3 # CHECK: subuh.qb $1, $2, $3 # encoding: [0x7c,0x43,0x08,0x58] + subuh_r.qb $1, $2, $3 # CHECK: subuh_r.qb $1, $2, $3 # encoding: [0x7c,0x43,0x08,0xd8] + wrdsp $1, 0 # CHECK: wrdsp $1, 0 # encoding: [0x7c,0x20,0x04,0xf8] From 3a64f1988f06b903d52d74eebceb77ff31581956 Mon Sep 17 00:00:00 2001 From: Marina Yatsina Date: Thu, 3 Dec 2015 12:17:03 +0000 Subject: [PATCH 003/364] [X86] MS inline asm: produce error when encountering " ptr " Currently " ptr " treated as in MS inline asm, ignoring the " ptr" completely and possibly ignoring the intention of the user. Fixed llvm to produce an error when encountering " ptr " operands. For example: andpd xmm1,xmmword ptr xmm1 --> andpd xmm1, xmm1 though andpd has 2 possible matching formats - andpd xmm, xmm/m128 Patch by: ziv.izhar@intel.com Differential Revision: http://reviews.llvm.org/D14607 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 13 +++++++++++-- test/MC/X86/intel-syntax-ambiguous.s | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 7089c1f7592b..d53ab71f3d5a 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1693,12 +1693,14 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { return ParseIntelOperator(IOK_TYPE); } + bool PtrInOperand = false; unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { Parser.Lex(); // Eat operand size (e.g., byte, word). if (Tok.getString() != "PTR" && Tok.getString() != "ptr") return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); Parser.Lex(); // Eat ptr. + PtrInOperand = true; } Start = Tok.getLoc(); @@ -1754,9 +1756,16 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { if (!ParseRegister(RegNo, Start, End)) { // If this is a segment register followed by a ':', then this is the start // of a segment override, otherwise this is a normal register reference. - if (getLexer().isNot(AsmToken::Colon)) + // In case it is a normal register and there is ptr in the operand this + // is an error + if (getLexer().isNot(AsmToken::Colon)){ + if (PtrInOperand){ + return ErrorOperand(Start, "expected memory operand after " + "'ptr', found register operand instead"); + } return X86Operand::CreateReg(RegNo, Start, End); - + } + return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); } diff --git a/test/MC/X86/intel-syntax-ambiguous.s b/test/MC/X86/intel-syntax-ambiguous.s index fe1fe5023902..e90cca820043 100644 --- a/test/MC/X86/intel-syntax-ambiguous.s +++ b/test/MC/X86/intel-syntax-ambiguous.s @@ -45,3 +45,15 @@ add rax, 3 fadd "?half@?0??bar@@YAXXZ@4NA" // CHECK: error: ambiguous operand size for instruction 'fadd' + +// Instruction line with PTR inside check that they don't accept register as memory. + +// CHECK: error: expected memory operand after 'ptr', found register operand instead +// CHECK: andps xmm1, xmmword ptr xmm1 +andps xmm1, xmmword ptr xmm1 +// CHECK: error: expected memory operand after 'ptr', found register operand instead +// CHECK: andps xmmword ptr xmm1, xmm1 +andps xmmword ptr xmm1, xmm1 +// CHECK: error: expected memory operand after 'ptr', found register operand instead +// CHECK: mov dword ptr eax, ebx +mov dword ptr eax, ebx From 927e4ae257f323569b8f8ecbc06e7d8cc198ea51 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 3 Dec 2015 14:35:15 +0000 Subject: [PATCH 004/364] Delete dead code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254609 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Linker/Linker.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h index 0c7dc910a65c..f9890935126e 100644 --- a/include/llvm/Linker/Linker.h +++ b/include/llvm/Linker/Linker.h @@ -71,8 +71,6 @@ class Linker { Linker(Module &M, DiagnosticHandlerFunction DiagnosticHandler); Linker(Module &M); - Module &getModule() const { return Composite; } - /// \brief Link \p Src into the composite. The source is destroyed. /// /// Passing OverrideSymbols as true will have symbols from Src From 90ebc9e32048eec171225cd26e72001a2b973035 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 3 Dec 2015 14:48:20 +0000 Subject: [PATCH 005/364] Don't pass member variables to member functions. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254610 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 67613967f490..a6a26be6a44f 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -451,7 +451,7 @@ class ModuleLinker { /// Handles cloning of a global values from the source module into /// the destination module, including setting the attributes and visibility. - GlobalValue *copyGlobalValueProto(TypeMapTy &TypeMap, const GlobalValue *SGV, + GlobalValue *copyGlobalValueProto(const GlobalValue *SGV, const GlobalValue *DGV, bool ForDefinition); /// Check if we should promote the given local value to global scope. @@ -526,10 +526,9 @@ class ModuleLinker { /// Functions that take care of cloning a specific global value type /// into the destination module. - GlobalVariable *copyGlobalVariableProto(TypeMapTy &TypeMap, - const GlobalVariable *SGVar); - Function *copyFunctionProto(TypeMapTy &TypeMap, const Function *SF); - GlobalValue *copyGlobalAliasProto(TypeMapTy &TypeMap, const GlobalAlias *SGA); + GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar); + Function *copyFunctionProto(const Function *SF); + GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA); /// Helper methods to check if we are importing from or potentially /// exporting from the current source module. @@ -762,8 +761,7 @@ GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) { /// Loop through the global variables in the src module and merge them into the /// dest module. GlobalVariable * -ModuleLinker::copyGlobalVariableProto(TypeMapTy &TypeMap, - const GlobalVariable *SGVar) { +ModuleLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) { // No linking to be performed or linking from the source: simply create an // identical version of the symbol over in the dest module... the // initializer will be filled in later by LinkGlobalInits. @@ -779,8 +777,7 @@ ModuleLinker::copyGlobalVariableProto(TypeMapTy &TypeMap, /// Link the function in the source module into the destination module if /// needed, setting up mapping information. -Function *ModuleLinker::copyFunctionProto(TypeMapTy &TypeMap, - const Function *SF) { +Function *ModuleLinker::copyFunctionProto(const Function *SF) { // If there is no linkage to be performed or we are linking from the source, // bring SF over. return Function::Create(TypeMap.get(SF->getFunctionType()), @@ -788,8 +785,7 @@ Function *ModuleLinker::copyFunctionProto(TypeMapTy &TypeMap, } /// Set up prototypes for any aliases that come over from the source module. -GlobalValue *ModuleLinker::copyGlobalAliasProto(TypeMapTy &TypeMap, - const GlobalAlias *SGA) { +GlobalValue *ModuleLinker::copyGlobalAliasProto(const GlobalAlias *SGA) { // If there is no linkage to be performed or we're linking from the source, // bring over SGA. auto *Ty = TypeMap.get(SGA->getValueType()); @@ -820,18 +816,17 @@ void ModuleLinker::setVisibility(GlobalValue *NewGV, const GlobalValue *SGV, NewGV->setVisibility(Visibility); } -GlobalValue *ModuleLinker::copyGlobalValueProto(TypeMapTy &TypeMap, - const GlobalValue *SGV, +GlobalValue *ModuleLinker::copyGlobalValueProto(const GlobalValue *SGV, const GlobalValue *DGV, bool ForDefinition) { GlobalValue *NewGV; if (auto *SGVar = dyn_cast(SGV)) { - NewGV = copyGlobalVariableProto(TypeMap, SGVar); + NewGV = copyGlobalVariableProto(SGVar); } else if (auto *SF = dyn_cast(SGV)) { - NewGV = copyFunctionProto(TypeMap, SF); + NewGV = copyFunctionProto(SF); } else { if (ForDefinition) - NewGV = copyGlobalAliasProto(TypeMap, cast(SGV)); + NewGV = copyGlobalAliasProto(cast(SGV)); else NewGV = new GlobalVariable( DstM, TypeMap.get(SGV->getType()->getElementType()), @@ -1418,7 +1413,7 @@ bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { if (DoneLinkingBodies) return false; - NewGV = copyGlobalValueProto(TypeMap, SGV, DGV, LinkFromSrc); + NewGV = copyGlobalValueProto(SGV, DGV, LinkFromSrc); } NewGV->setUnnamedAddr(HasUnnamedAddr); From 695e5cca23d23b10b128ecccfa4399a7f5b306d7 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 3 Dec 2015 15:41:33 +0000 Subject: [PATCH 006/364] [Hexagon] Fix instruction descriptor flags for memory access size git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254613 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index af67481e4f47..47a6f8636276 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -90,12 +90,16 @@ namespace HexagonII { PostInc = 6 // Post increment addressing mode }; + // MemAccessSize is represented as 1+log2(N) where N is size in bits. enum class MemAccessSize { NoMemAccess = 0, // Not a memory acces instruction. ByteAccess = 1, // Byte access instruction (memb). HalfWordAccess = 2, // Half word access instruction (memh). WordAccess = 3, // Word access instruction (memw). - DoubleWordAccess = 4 // Double word access instruction (memd) + DoubleWordAccess = 4, // Double word access instruction (memd) + // 5, // We do not have a 16 byte vector access. + Vector64Access = 7, // 64 Byte vector access instruction (vmem). + Vector128Access = 8 // 128 Byte vector access instruction (vmem). }; // MCInstrDesc TSFlags @@ -175,7 +179,7 @@ namespace HexagonII { AddrModeMask = 0x7, // Access size for load/store instructions. MemAccessSizePos = 43, - MemAccesSizeMask = 0x7, + MemAccesSizeMask = 0xf, // Branch predicted taken. TakenPos = 47, From 7fc4331080c9daadec246a43dad5d7095d81ecd9 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 3 Dec 2015 16:36:16 +0000 Subject: [PATCH 007/364] Simplify ValueMap handling. We now just return values and let ValueMap handle the map. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254615 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 91 ++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 42 deletions(-) diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index a6a26be6a44f..4bc0ad039ac7 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -513,10 +513,10 @@ class ModuleLinker { void upgradeMismatchedGlobals(); bool linkIfNeeded(GlobalValue &GV); - bool linkAppendingVarProto(GlobalVariable *DstGV, - const GlobalVariable *SrcGV); + Constant *linkAppendingVarProto(GlobalVariable *DstGV, + const GlobalVariable *SrcGV); - bool linkGlobalValueProto(GlobalValue *GV); + Constant *linkGlobalValueProto(GlobalValue *GV); bool linkModuleFlagsMetadata(); void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src); @@ -856,8 +856,7 @@ Value *ModuleLinker::materializeDeclFor(Value *V) { if (!SGV) return nullptr; - linkGlobalValueProto(SGV); - return ValueMap[SGV]; + return linkGlobalValueProto(SGV); } void ValueMaterializerTy::materializeInitFor(GlobalValue *New, @@ -1277,8 +1276,8 @@ static void getArrayElements(const Constant *C, /// If there were any appending global variables, link them together now. /// Return true on error. -bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, - const GlobalVariable *SrcGV) { +Constant *ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, + const GlobalVariable *SrcGV) { ArrayType *SrcTy = cast(TypeMap.get(SrcGV->getType()->getElementType())); Type *EltTy = SrcTy->getElementType(); @@ -1286,32 +1285,46 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, if (DstGV) { ArrayType *DstTy = cast(DstGV->getType()->getElementType()); - if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) - return emitError( + if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) { + emitError( "Linking globals named '" + SrcGV->getName() + "': can only link appending global with another appending global!"); + return nullptr; + } // Check to see that they two arrays agree on type. - if (EltTy != DstTy->getElementType()) - return emitError("Appending variables with different element types!"); - if (DstGV->isConstant() != SrcGV->isConstant()) - return emitError("Appending variables linked with different const'ness!"); + if (EltTy != DstTy->getElementType()) { + emitError("Appending variables with different element types!"); + return nullptr; + } + if (DstGV->isConstant() != SrcGV->isConstant()) { + emitError("Appending variables linked with different const'ness!"); + return nullptr; + } - if (DstGV->getAlignment() != SrcGV->getAlignment()) - return emitError( + if (DstGV->getAlignment() != SrcGV->getAlignment()) { + emitError( "Appending variables with different alignment need to be linked!"); + return nullptr; + } - if (DstGV->getVisibility() != SrcGV->getVisibility()) - return emitError( + if (DstGV->getVisibility() != SrcGV->getVisibility()) { + emitError( "Appending variables with different visibility need to be linked!"); + return nullptr; + } - if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) - return emitError( + if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) { + emitError( "Appending variables with different unnamed_addr need to be linked!"); + return nullptr; + } - if (StringRef(DstGV->getSection()) != SrcGV->getSection()) - return emitError( + if (StringRef(DstGV->getSection()) != SrcGV->getSection()) { + emitError( "Appending variables with different section name need to be linked!"); + return nullptr; + } } SmallVector DstElements; @@ -1347,9 +1360,10 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, // Propagate alignment, visibility and section info. copyGVAttributes(NG, SrcGV); - // Replace any uses of the two global variables with uses of the new - // global. - ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType())); + Constant *Ret = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType())); + + // Stop recursion. + ValueMap[SrcGV] = Ret; for (auto *V : SrcElements) { DstElements.push_back( @@ -1358,15 +1372,17 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, NG->setInitializer(ConstantArray::get(NewType, DstElements)); + // Replace any uses of the two global variables with uses of the new + // global. if (DstGV) { DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); DstGV->eraseFromParent(); } - return false; + return Ret; } -bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { +Constant *ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { GlobalValue *DGV = getLinkedToGlobal(SGV); // Handle the ultra special appending linkage case first. @@ -1390,12 +1406,7 @@ bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { LinkFromSrc = true; } else if (DGV) { if (shouldLinkFromSource(LinkFromSrc, *DGV, *SGV)) - return true; - } - - if (!LinkFromSrc && DGV) { - // Make sure to remember this mapping. - ValueMap[SGV] = ConstantExpr::getBitCast(DGV, TypeMap.get(SGV->getType())); + return nullptr; } if (DGV) @@ -1411,7 +1422,7 @@ bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { // metadata linking), don't link in the global value due to this // reference, simply map it to null. if (DoneLinkingBodies) - return false; + return nullptr; NewGV = copyGlobalValueProto(SGV, DGV, LinkFromSrc); } @@ -1434,16 +1445,12 @@ bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { NewGVar->setConstant(false); } - // Make sure to remember this mapping. - if (NewGV != DGV) { - if (DGV) { - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType())); - DGV->eraseFromParent(); - } - ValueMap[SGV] = NewGV; + if (NewGV != DGV && DGV) { + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType())); + DGV->eraseFromParent(); } - return false; + return ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType())); } /// Update the initializers in the Dest module now that all globals that may be @@ -1534,7 +1541,7 @@ bool ModuleLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) { // are linked in. Otherwise, linkonce and other lazy linked GVs will // not be materialized if they aren't referenced. for (auto *SGV : ComdatMembers[SC]) { - auto *DGV = cast_or_null(ValueMap[SGV]); + auto *DGV = cast_or_null(ValueMap.lookup(SGV)); if (DGV && !DGV->isDeclaration()) continue; MapValue(SGV, ValueMap, RF_MoveDistinctMDs, &TypeMap, &ValMaterializer); From 44386caaacbb8f931bd2048da6a00db6beb17169 Mon Sep 17 00:00:00 2001 From: Colin LeMahieu Date: Thu, 3 Dec 2015 16:37:21 +0000 Subject: [PATCH 008/364] [Hexagon] NFC Using canonicalizePacket to compound/duplex/pad packets rather than doing it separately. This also ensures the integrated assembler path matches the assembly parser path. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254616 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 28 ++++++++++-------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 19769258ee89..8a6d6555d901 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -191,29 +191,23 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { MachineBasicBlock::const_instr_iterator MII = MI->getIterator(); unsigned IgnoreCount = 0; - for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) { + for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) if (MII->getOpcode() == TargetOpcode::DBG_VALUE || MII->getOpcode() == TargetOpcode::IMPLICIT_DEF) ++IgnoreCount; - else { + else HexagonLowerToMC(MCII, &*MII, MCB, *this); - } - } } - else { + else HexagonLowerToMC(MCII, MI, MCB, *this); - HexagonMCInstrInfo::padEndloop(OutStreamer->getContext(), MCB); - } - // Examine the packet and try to find instructions that can be converted - // to compounds. - HexagonMCInstrInfo::tryCompound(MCII, OutStreamer->getContext(), MCB); - // Examine the packet and convert pairs of instructions to duplex - // instructions when possible. - SmallVector possibleDuplexes; - possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB); - HexagonMCShuffle(MCII, *Subtarget, OutStreamer->getContext(), MCB, - possibleDuplexes); - EmitToStreamer(*OutStreamer, MCB); + + bool Ok = HexagonMCInstrInfo::canonicalizePacket( + MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr); + assert(Ok); + (void)Ok; + if(HexagonMCInstrInfo::bundleSize(MCB) == 0) + return; + OutStreamer->EmitInstruction(MCB, getSubtargetInfo()); } extern "C" void LLVMInitializeHexagonAsmPrinter() { From 55c790e29c334af76790f6ee01df6b0575f668ec Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 3 Dec 2015 16:47:20 +0000 Subject: [PATCH 009/364] [Hexagon] Implement CONCAT_VECTORS for HVX using V6_vcombine git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254617 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 10 ++++++++++ lib/Target/Hexagon/HexagonISelLowering.h | 1 + lib/Target/Hexagon/HexagonInstrInfoV60.td | 16 +++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 04f5b6649293..a75f391a4eea 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2042,6 +2042,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; + case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; @@ -2346,6 +2347,7 @@ SDValue HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + bool UseHVX = Subtarget.useHVXOps(); EVT VT = Op.getValueType(); unsigned NElts = Op.getNumOperands(); SDValue Vec = Op.getOperand(0); @@ -2376,6 +2378,14 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, } } + if (UseHVX) { + SDValue Vec0 = Op.getOperand(1); + uint64_t VS = VecVT.getSizeInBits(); + assert((VS == 64*8 && Subtarget.useHVXSglOps()) || + (VS == 128*8 && Subtarget.useHVXDblOps())); + SDValue Combined = DAG.getNode(HexagonISD::VCOMBINE, dl, VT, Vec0, Vec); + return Combined; + } for (unsigned i = 0, e = NElts; i != e; ++i) { unsigned OpIdx = NElts - i - 1; SDValue Operand = Op.getOperand(OpIdx); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 64033d95ee3c..b6d39fe91728 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -80,6 +80,7 @@ bool isPositiveHalfWord(SDNode *N); INSERTRP, EXTRACTU, EXTRACTURP, + VCOMBINE, TC_RETURN, EH_RETURN, DCFETCH, diff --git a/lib/Target/Hexagon/HexagonInstrInfoV60.td b/lib/Target/Hexagon/HexagonInstrInfoV60.td index 394df0fdd6e3..897ada081534 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV60.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -1535,6 +1535,20 @@ let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in defm V6_vcombine : T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc; +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; + +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; + +def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), + (v16i32 VectorRegs:$Vt))), + (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), + (v32i32 VecDblRegs:$Vt))), + (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in { defm V6_vsathub : T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc; @@ -1872,7 +1886,7 @@ defm V6_vasrhbrndsat : V6_vasrhbrndsat_enc; } -// Assemlber mapped -- alias? +// Assembler mapped -- alias? //defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc; let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in { defm V6_vshuffvdd : From 5d94a27cae83d67fa8890f74a03ff22979adaec3 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 3 Dec 2015 17:07:12 +0000 Subject: [PATCH 010/364] Friendly takeover of the Hexagon backend git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254620 91177308-0d34-0410-b5e6-96231b3b80d8 --- CODE_OWNERS.TXT | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT index 0a6f057341e8..cdd0f8dc933c 100644 --- a/CODE_OWNERS.TXT +++ b/CODE_OWNERS.TXT @@ -53,10 +53,6 @@ N: Quentin Colombet E: qcolombet@apple.com D: Register allocators -N: Anshuman Dasgupta -E: adasgupt@codeaurora.org -D: Hexagon Backend - N: Duncan P. N. Exon Smith E: dexonsmith@apple.com D: Branch weights and BlockFrequencyInfo @@ -138,6 +134,10 @@ N: Richard Osborne E: richard@xmos.com D: XCore Backend +N: Krzysztof Parzyszek +E: kparzysz@codeaurora.org +D: Hexagon Backend + N: Chad Rosier E: mcrosier@codeaurora.org D: Fast-Isel From 23a903a5175908226b6aecc1832d32e820b3f091 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 3 Dec 2015 17:19:58 +0000 Subject: [PATCH 011/364] AArch64FastISel: Use cbz/cbnz to branch on i1 In the case of a conditional branch without a preceding cmp we used to emit a "and; cmp; b.eq/b.ne" sequence, use tbz/tbnz instead. Differential Revision: http://reviews.llvm.org/D15122 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254621 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 86 ++++++------------- test/CodeGen/AArch64/arm64-fast-isel-br.ll | 15 +--- .../AArch64/fast-isel-branch-cond-mask.ll | 3 +- .../AArch64/fast-isel-branch-cond-split.ll | 8 +- 4 files changed, 32 insertions(+), 80 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index b7849d5bbc26..cae2d5276296 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2275,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && isValueAvailable(CI)) { // Try to optimize or fold the cmp. @@ -2307,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch // instruction. - CC = getCompareCC(Predicate); + AArch64CC::CondCode CC = getCompareCC(Predicate); AArch64CC::CondCode ExtraCC = AArch64CC::AL; switch (Predicate) { default: @@ -2335,37 +2334,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; - } - } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && isValueAvailable(TI) && - isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(TI->getOperand(0)); - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) { - CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, - AArch64::sub_32); - CondIsKill = true; - } - - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = AArch64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2383,20 +2351,23 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { } else FuncInfo.MBB->addSuccessorWithoutProb(Target); return true; - } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned CondReg = getRegForValue(BI->getCondition()); - if (!CondReg) - return false; + } else { + AArch64CC::CondCode CC = AArch64CC::NE; + if (foldXALUIntrinsic(CC, I, BI->getCondition())) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (!CondReg) + return false; - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } } unsigned CondReg = getRegForValue(BI->getCondition()); @@ -2404,26 +2375,19 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { return false; bool CondRegIsKill = hasTrivialKill(BI->getCondition()); - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - // - // FIXME: Optimize this with TBZW/TBZNW. - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondRegIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - + // i1 conditions come as i32 values, test the lowest bit with tb(n)z. + unsigned Opcode = AArch64::TBNZW; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = AArch64CC::EQ; + Opcode = AArch64::TBZW; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) + const MCInstrDesc &II = TII.get(Opcode); + unsigned ConstrainedCondReg + = constrainOperandRegClass(II, CondReg, II.getNumDefs()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) + .addImm(0) .addMBB(TBB); finishCondBranch(BI->getParent(), TBB, FBB); diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll index 0ef7b143df80..55c9c6036ed5 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -94,9 +94,7 @@ entry: store i32 %c, i32* %c.addr, align 4 store i64 %d, i64* %d.addr, align 8 %0 = load i16, i16* %b.addr, align 2 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: b.eq LBB4_2 +; CHECK: tbz w0, #0, LBB4_2 %conv = trunc i16 %0 to i1 br i1 %conv, label %if.then, label %if.end @@ -106,9 +104,7 @@ if.then: ; preds = %entry if.end: ; preds = %if.then, %entry %1 = load i32, i32* %c.addr, align 4 -; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1 -; CHECK: cmp w[[REG]], #0 -; CHECK: b.eq LBB4_4 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_4 %conv1 = trunc i32 %1 to i1 br i1 %conv1, label %if.then3, label %if.end4 @@ -118,8 +114,7 @@ if.then3: ; preds = %if.end if.end4: ; preds = %if.then3, %if.end %2 = load i64, i64* %d.addr, align 8 -; CHECK: cmp w{{[0-9]+}}, #0 -; CHECK: b.eq LBB4_6 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_6 %conv5 = trunc i64 %2 to i1 br i1 %conv5, label %if.then7, label %if.end8 @@ -139,9 +134,7 @@ define i32 @trunc64(i64 %foo) nounwind { ; CHECK: trunc64 ; CHECK: and [[REG1:x[0-9]+]], x0, #0x1 ; CHECK: mov x[[REG2:[0-9]+]], [[REG1]] -; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0x1 -; CHECK: cmp [[REG3]], #0 -; CHECK: b.eq LBB5_2 +; CHECK: tbz w[[REG2]], #0, LBB5_2 %a = and i64 %foo, 1 %b = trunc i64 %a to i1 br i1 %b, label %if.then, label %if.else diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll index c018b2778b04..55fbf63319ee 100644 --- a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll +++ b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll @@ -4,8 +4,7 @@ define void @test(i64 %a, i64 %b, i2* %c) { ; CHECK-LABEL: test ; CHECK: and [[REG1:w[0-9]+]], w8, #0x3 ; CHECK-NEXT: strb [[REG1]], {{\[}}x2{{\]}} -; CHECK: and [[REG2:w[0-9]+]], w8, #0x1 -; CHECK-NEXT: cmp [[REG2]], #0 +; CHECK-NEXT: tbz w9, #0, %1 = trunc i64 %a to i2 %2 = trunc i64 %b to i1 ; Force fast-isel to fall back to SDAG. diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll index 5248b9253e7a..e04a62b85c8e 100644 --- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll +++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll @@ -44,9 +44,7 @@ bb4: ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, eq ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.ne +; CHECK-NEXT: tbnz w8, #0, define i64 @test_or_unpredictable(i32 %a, i32 %b) { bb1: %0 = icmp eq i32 %a, 0 @@ -68,9 +66,7 @@ bb4: ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, ne ; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.eq +; CHECK-NEXT: tbz w8, #0, define i64 @test_and_unpredictable(i32 %a, i32 %b) { bb1: %0 = icmp ne i32 %a, 0 From 83c34652ffe9b368401aea98d9a4cc1ef6fa32d3 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 3 Dec 2015 17:53:34 +0000 Subject: [PATCH 012/364] [Hexagon] Remove variable unused in NDEBUG build git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254623 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index a75f391a4eea..b59fe6b67044 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2380,9 +2380,8 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, if (UseHVX) { SDValue Vec0 = Op.getOperand(1); - uint64_t VS = VecVT.getSizeInBits(); - assert((VS == 64*8 && Subtarget.useHVXSglOps()) || - (VS == 128*8 && Subtarget.useHVXDblOps())); + assert((VecVT.getSizeInBits() == 64*8 && Subtarget.useHVXSglOps()) || + (VecVT.getSizeInBits() == 128*8 && Subtarget.useHVXDblOps())); SDValue Combined = DAG.getNode(HexagonISD::VCOMBINE, dl, VT, Vec0, Vec); return Combined; } From 1c14f2864d7e7b87609c9d4c9b8295d0140ec978 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 3 Dec 2015 18:20:05 +0000 Subject: [PATCH 013/364] [ThinLTO] Appending linkage fixes Summary: Fix import from module with appending var, which cannot be imported. The first fix is to remove an overly-aggressive error check. The second fix is to deal with restructuring introduced to the module linker yesterday in r254418 (actually, this fix was included already in r254559, just added some additional cleanup). Test by Mehdi Amini. Reviewers: joker.eph, rafael Subscribers: joker.eph, llvm-commits Differential Revision: http://reviews.llvm.org/D15156 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254624 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 14 ++++++++----- .../Inputs/funcimport_appending_global.ll | 6 ++++++ test/Linker/funcimport_appending_global.ll | 20 +++++++++++++++++++ 3 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 test/Linker/Inputs/funcimport_appending_global.ll create mode 100644 test/Linker/funcimport_appending_global.ll diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 4bc0ad039ac7..55ab1824740b 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -726,8 +726,10 @@ GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) { // It would be incorrect to import an appending linkage variable, // since it would cause global constructors/destructors to be // executed multiple times. This should have already been handled - // by linkGlobalValueProto. - llvm_unreachable("Cannot import appending linkage variable"); + // by linkIfNeeded, and we will assert in shouldLinkFromSource + // if we try to import, so we simply return AppendingLinkage here + // as this helper is called more widely in getLinkedToGlobal. + return GlobalValue::AppendingLinkage; case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: @@ -1015,8 +1017,7 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, // We always have to add Src if it has appending linkage. if (Src.hasAppendingLinkage()) { - // Caller should have already determined that we can't link from source - // when importing (see comments in linkGlobalValueProto). + // Should have prevented importing for appending linkage in linkIfNeeded. assert(!isPerformingImport()); LinkFromSrc = true; return false; @@ -1387,9 +1388,12 @@ Constant *ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { // Handle the ultra special appending linkage case first. assert(!DGV || SGV->hasAppendingLinkage() == DGV->hasAppendingLinkage()); - if (SGV->hasAppendingLinkage()) + if (SGV->hasAppendingLinkage()) { + // Should have prevented importing for appending linkage in linkIfNeeded. + assert(!isPerformingImport()); return linkAppendingVarProto(cast_or_null(DGV), cast(SGV)); + } bool LinkFromSrc = true; Comdat *C = nullptr; diff --git a/test/Linker/Inputs/funcimport_appending_global.ll b/test/Linker/Inputs/funcimport_appending_global.ll new file mode 100644 index 000000000000..413b890b02ad --- /dev/null +++ b/test/Linker/Inputs/funcimport_appending_global.ll @@ -0,0 +1,6 @@ +@v = weak global i8 1 +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* @v}] + +define void @foo() { + ret void +} diff --git a/test/Linker/funcimport_appending_global.ll b/test/Linker/funcimport_appending_global.ll new file mode 100644 index 000000000000..190d31ee8c7f --- /dev/null +++ b/test/Linker/funcimport_appending_global.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as -function-summary %s -o %t.bc +; RUN: llvm-as -function-summary %p/Inputs/funcimport_appending_global.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc + +; Do the import now +; RUN: llvm-link %t.bc -functionindex=%t3.thinlto.bc -import=foo:%t2.bc -S | FileCheck %s + +; Ensure that global constructor (appending linkage) is not imported +; CHECK-NOT: @llvm.global_ctors = {{.*}}@foo + +declare void @f() +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null}] + +define i32 @main() { +entry: + call void @foo() + ret i32 0 +} + +declare void @foo() From d61481245dc307bd0c15b7a681fc0b196884589c Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 3 Dec 2015 18:41:59 +0000 Subject: [PATCH 014/364] dwarfdump: Correctly indentify the indicies for DWP records The indicies are one-based, not zero-based, per the spec. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254626 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARF/DWARFUnitIndex.cpp | 2 +- test/DebugInfo/dwarfdump-dwp.test | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 1f1921649b57..96b316957dfd 100644 --- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -133,7 +133,7 @@ void DWARFUnitIndex::dump(raw_ostream &OS) const { for (unsigned i = 0; i != Header.NumBuckets; ++i) { auto &Row = Rows[i]; if (auto *Contribs = Row.Contributions.get()) { - OS << format("%5u 0x%016" PRIx64 " ", i, Row.Signature); + OS << format("%5u 0x%016" PRIx64 " ", i + 1, Row.Signature); for (unsigned i = 0; i != Header.NumColumns; ++i) { auto &Contrib = Contribs[i]; OS << format("[0x%08x, 0x%08x) ", Contrib.Offset, diff --git a/test/DebugInfo/dwarfdump-dwp.test b/test/DebugInfo/dwarfdump-dwp.test index af5de5067e69..8aef636d4d9a 100644 --- a/test/DebugInfo/dwarfdump-dwp.test +++ b/test/DebugInfo/dwarfdump-dwp.test @@ -39,15 +39,15 @@ RUN: llvm-dwarfdump %p/Inputs/dwarfdump-dwp.x86_64.o | FileCheck %s ; CHECK-NEXT: version = 2 slots = 16 ; CHECK: Index Signature INFO ABBREV LINE STR_OFFSETS ; CHECK-NEXT: ----- ------------------ ------------------------ ------------------------ ------------------------ ------------------------ -; CHECK-NEXT: 2 0xfef104c25502f092 [0x0000002d, 0x0000005f) [0x00000043, 0x0000008e) [0x0000001a, 0x00000034) [0x00000010, 0x00000024) -; CHECK-NEXT: 8 0x03c30756e2d45008 [0x00000000, 0x0000002d) [0x00000000, 0x00000043) [0x00000000, 0x0000001a) [0x00000000, 0x00000010) +; CHECK-NEXT: 3 0xfef104c25502f092 [0x0000002d, 0x0000005f) [0x00000043, 0x0000008e) [0x0000001a, 0x00000034) [0x00000010, 0x00000024) +; CHECK-NEXT: 9 0x03c30756e2d45008 [0x00000000, 0x0000002d) [0x00000000, 0x00000043) [0x00000000, 0x0000001a) [0x00000000, 0x00000010) ; CHECK: .debug_tu_index contents: ; CHECK-NEXT: version = 2 slots = 16 ; CHECK: Index Signature TYPES ABBREV LINE STR_OFFSETS ; CHECK-NEXT: ----- ------------------ ------------------------ ------------------------ ------------------------ ------------------------ -; CHECK-NEXT: 8 0x1d02f3be30cc5688 [0x00000024, 0x00000048) [0x00000043, 0x0000008e) [0x0000001a, 0x00000034) [0x00000010, 0x00000024) -; CHECK-NEXT: 12 0x3875c0e21cda63fc [0x00000000, 0x00000024) [0x00000000, 0x00000043) [0x00000000, 0x0000001a) [0x00000000, 0x00000010) +; CHECK-NEXT: 9 0x1d02f3be30cc5688 [0x00000024, 0x00000048) [0x00000043, 0x0000008e) [0x0000001a, 0x00000034) [0x00000010, 0x00000024) +; CHECK-NEXT: 13 0x3875c0e21cda63fc [0x00000000, 0x00000024) [0x00000000, 0x00000043) [0x00000000, 0x0000001a) [0x00000000, 0x00000010) ; TODO: use the index section offset info to correctly dump strings in debug info ; TODO: use the index section offset info to correctly dump file names in debug info From 0123bc6beac1ca9a06450e98d8e829d32ce52498 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Thu, 3 Dec 2015 18:45:39 +0000 Subject: [PATCH 015/364] [CMake] Add option LLVM_EXTERNALIZE_DEBUGINFO Summary: This adds support for generating dSYM files and stripping debug info from executables and dylibs. It also supports passing -object_path_lto to the linker to generate dSYMs for LTO builds. Reviewers: bogner, friss Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15133 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254627 91177308-0d34-0410-b5e6-96231b3b80d8 --- CMakeLists.txt | 3 +++ cmake/modules/AddLLVM.cmake | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc46413640c5..c4ff8f3cd28b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -301,6 +301,9 @@ endif( LLVM_USE_INTEL_JITEVENTS ) option(LLVM_USE_OPROFILE "Use opagent JIT interface to inform OProfile about JIT code" OFF) +option(LLVM_EXTERNALIZE_DEBUGINFO + "Generate dSYM files and strip executables and libraries (Darwin Only)" OFF) + # If enabled, verify we are on a platform that supports oprofile. if( LLVM_USE_OPROFILE ) if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index b5517d0b893d..97ac96ed4281 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -512,6 +512,10 @@ function(llvm_add_library name) add_dependencies(${objlib} ${LLVM_COMMON_DEPENDS}) endforeach() endif() + + if(ARG_SHARED OR ARG_MODULE) + llvm_externalize_debuginfo(${name}) + endif() endfunction() macro(add_llvm_library name) @@ -655,6 +659,8 @@ macro(add_llvm_executable name) if( LLVM_COMMON_DEPENDS ) add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} ) endif( LLVM_COMMON_DEPENDS ) + + llvm_externalize_debuginfo(${name}) endmacro(add_llvm_executable name) function(export_executable_symbols target) @@ -1168,3 +1174,24 @@ function(add_llvm_tool_symlink name dest) endif() endif() endfunction() + +function(llvm_externalize_debuginfo name) + if(NOT LLVM_EXTERNALIZE_DEBUGINFO) + return() + endif() + + if(APPLE) + if(CMAKE_CXX_FLAGS MATCHES "-flto" + OR CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE} MATCHES "-flto") + + set(lto_object ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${name}-lto.o) + set_target_properties(${name} PROPERTIES + LINK_FLAGS "-Wl,-object_path_lto -Wl,${lto_object}") + endif() + add_custom_command(TARGET ${name} POST_BUILD + COMMAND xcrun dsymutil $ + COMMAND xcrun strip -Sl $) + else() + message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!") + endif() +endfunction() From c4b843ccb787bfd31dc0ce3d01c4c61b5c86ca58 Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Thu, 3 Dec 2015 18:55:28 +0000 Subject: [PATCH 016/364] [WinEH] Avoid infinite loop in BranchFolding for multiple single block funclets Differential Revision: http://reviews.llvm.org/D14996 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254629 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 8 ++ .../BranchFolding/single-block-funclets.ll | 110 ++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 test/Transforms/BranchFolding/single-block-funclets.ll diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 54d92ad67a97..c6a6476747e6 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1564,6 +1564,14 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // removed, move this block to the end of the function. MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; SmallVector PrevCond; + // We're looking for cases where PrevBB could possibly fall through to + // FallThrough, but if FallThrough is an EH pad that wouldn't be useful + // so here we skip over any EH pads so we might have a chance to find + // a branch target from PrevBB. + while (FallThrough != MF.end() && FallThrough->isEHPad()) + ++FallThrough; + // Now check to see if the current block is sitting between PrevBB and + // a block to which it could fall through. if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && PrevBB.isSuccessor(&*FallThrough)) { diff --git a/test/Transforms/BranchFolding/single-block-funclets.ll b/test/Transforms/BranchFolding/single-block-funclets.ll new file mode 100644 index 000000000000..21c7818e5195 --- /dev/null +++ b/test/Transforms/BranchFolding/single-block-funclets.ll @@ -0,0 +1,110 @@ +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s + +declare i32 @__CxxFrameHandler3(...) + +declare void @throw() +declare i16 @f() + +define i16 @test1(i16 %a, i8* %b) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +entry: + %cmp = icmp eq i16 %a, 10 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %call1 = invoke i16 @f() + to label %cleanup unwind label %catch.dispatch + +if.else: + %call2 = invoke i16 @f() + to label %cleanup unwind label %catch.dispatch + +catch.dispatch: + catchpad [i8* null, i32 8, i8* null] + to label %catch unwind label %catch.dispatch.2 + +catch: + invoke void @throw() noreturn + to label %unreachable unwind label %catchendblock + +catch.dispatch.2: + catchpad [i8* null, i32 64, i8* null] + to label %catch.2 unwind label %catchendblock + +catch.2: + store i8 1, i8* %b + invoke void @throw() noreturn + to label %unreachable unwind label %catchendblock + +catchendblock: + catchendpad unwind to caller + +cleanup: + %retval = phi i16 [ %call1, %if.then ], [ %call2, %if.else ] + ret i16 %retval + +unreachable: + unreachable +} + +; This test verifies the case where two funclet blocks meet the old criteria +; to be placed at the end. The order of the blocks is not important for the +; purposes of this test. The failure mode is an infinite loop during +; compilation. +; +; CHECK-LABEL: .def test1; + +define i16 @test2(i16 %a, i8* %b) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +entry: + %cmp = icmp eq i16 %a, 10 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %call1 = invoke i16 @f() + to label %cleanup unwind label %catch.dispatch + +if.else: + %call2 = invoke i16 @f() + to label %cleanup unwind label %catch.dispatch + +catch.dispatch: + catchpad [i8* null, i32 8, i8* null] + to label %catch unwind label %catch.dispatch.2 + +catch: + invoke void @throw() noreturn + to label %unreachable unwind label %catchendblock + +catch.dispatch.2: + %c2 = catchpad [i8* null, i32 32, i8* null] + to label %catch.2 unwind label %catch.dispatch.3 + +catch.2: + store i8 1, i8* %b + catchret %c2 to label %cleanup + +catch.dispatch.3: + %c3 = catchpad [i8* null, i32 64, i8* null] + to label %catch.3 unwind label %catchendblock + +catch.3: + store i8 2, i8* %b + catchret %c3 to label %cleanup + +catchendblock: + catchendpad unwind to caller + +cleanup: + %retval = phi i16 [ %call1, %if.then ], [ %call2, %if.else ], [ -1, %catch.2 ], [ -1, %catch.3 ] + ret i16 %retval + +unreachable: + unreachable +} + +; This test verifies the case where three funclet blocks all meet the old +; criteria to be placed at the end. The order of the blocks is not important +; for the purposes of this test. The failure mode is an infinite loop during +; compilation. +; +; CHECK-LABEL: .def test2; + From fd1c9c504318bbe41d43169d3be415bfa978caad Mon Sep 17 00:00:00 2001 From: Easwaran Raman Date: Thu, 3 Dec 2015 19:03:20 +0000 Subject: [PATCH 017/364] Test commit. Remove blank spaces at the end of comments git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254630 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InlineCost.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 26f2e7ff504a..6d7d74999061 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -115,11 +115,11 @@ class CallAnalyzer : public InstVisitor { /// inlining has the given attribute set either at the call site or the /// function declaration. Primarily used to inspect call site specific /// attributes since these can be more precise than the ones on the callee - /// itself. + /// itself. bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); /// Return true if the given value is known non null within the callee if - /// inlined through this particular callsite. + /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); // Custom analysis routines. From 2272eac9491a7be71409e3e76ec09eccb678aee5 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 3 Dec 2015 19:10:55 +0000 Subject: [PATCH 018/364] Simplify test. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254631 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Linker/weakextern.ll | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll index 8d479a0d39b3..e1754e60547e 100644 --- a/test/Linker/weakextern.ll +++ b/test/Linker/weakextern.ll @@ -1,12 +1,8 @@ -; RUN: llvm-as < %s > %t.bc -; RUN: llvm-as < %p/testlink.ll > %t2.bc -; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc -; RUN: llvm-dis < %t1.bc | FileCheck %s +; RUN: llvm-link %s %s %p/testlink.ll -S | FileCheck %s ; CHECK: kallsyms_names = extern_weak ; CHECK: Inte = global i32 ; CHECK: MyVar = external global i32 -@kallsyms_names = extern_weak global [0 x i8] ; <[0 x i8]*> [#uses=0] -@MyVar = extern_weak global i32 ; [#uses=0] -@Inte = extern_weak global i32 ; [#uses=0] - +@kallsyms_names = extern_weak global [0 x i8] +@MyVar = extern_weak global i32 +@Inte = extern_weak global i32 From 44a7fca4326b5bd76d584b488cacd74f37965f13 Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Thu, 3 Dec 2015 19:30:38 +0000 Subject: [PATCH 019/364] Fix buildbot failures git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254636 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/BranchFolding/single-block-funclets.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Transforms/BranchFolding/single-block-funclets.ll b/test/Transforms/BranchFolding/single-block-funclets.ll index 21c7818e5195..b2286ac33597 100644 --- a/test/Transforms/BranchFolding/single-block-funclets.ll +++ b/test/Transforms/BranchFolding/single-block-funclets.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s +; REQUIRES: X86 declare i32 @__CxxFrameHandler3(...) From 5779341a6d68c4f4971c4f0b0033b41f6cb10662 Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Thu, 3 Dec 2015 19:41:25 +0000 Subject: [PATCH 020/364] Move branch folding test to a better location. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254640 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/branchfolding-catchpads.ll} | 1 - 1 file changed, 1 deletion(-) rename test/{Transforms/BranchFolding/single-block-funclets.ll => CodeGen/X86/branchfolding-catchpads.ll} (99%) diff --git a/test/Transforms/BranchFolding/single-block-funclets.ll b/test/CodeGen/X86/branchfolding-catchpads.ll similarity index 99% rename from test/Transforms/BranchFolding/single-block-funclets.ll rename to test/CodeGen/X86/branchfolding-catchpads.ll index b2286ac33597..21c7818e5195 100644 --- a/test/Transforms/BranchFolding/single-block-funclets.ll +++ b/test/CodeGen/X86/branchfolding-catchpads.ll @@ -1,5 +1,4 @@ ; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s -; REQUIRES: X86 declare i32 @__CxxFrameHandler3(...) From 208ed9b5fb3eb00c3cc52e647df6cfce49168449 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Thu, 3 Dec 2015 19:47:04 +0000 Subject: [PATCH 021/364] [CMake] Removing an unnecessary layer of variable indirection This prevents passthrough variables from having values. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254641 91177308-0d34-0410-b5e6-96231b3b80d8 --- cmake/modules/LLVMExternalProjectUtils.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/LLVMExternalProjectUtils.cmake b/cmake/modules/LLVMExternalProjectUtils.cmake index 5d8fb71e08b3..c2d9f530c200 100644 --- a/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/cmake/modules/LLVMExternalProjectUtils.cmake @@ -95,7 +95,7 @@ function(llvm_ExternalProject_Add name source_dir) if(variableName MATCHES "^${nameCanon}") string(REPLACE ";" "\;" value "${${variableName}}") list(APPEND PASSTHROUGH_VARIABLES - -D${variableName}=${${value}}) + -D${variableName}=${value}) endif() endforeach() From 3a8af93eb7ae93e59adec0ae6e30352816ea200d Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 3 Dec 2015 20:46:59 +0000 Subject: [PATCH 022/364] [X86] Put no-op ADJCALLSTACK markers around all dynamic lowerings Summary: These ADJCALLSTACK markers don't generate code, but they keep dynamic alloca code that calls chkstk out of the prologue. This slightly pessimizes inalloca calls by preventing some register copy coalescing, but I can live with that. Reviewers: qcolombet Subscribers: hans, llvm-commits Differential Revision: http://reviews.llvm.org/D15200 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254645 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 69 +++++++++++--------------- test/CodeGen/X86/inalloca-stdcall.ll | 5 +- test/CodeGen/X86/inalloca.ll | 15 +++--- test/CodeGen/X86/shrink-wrap-chkstk.ll | 37 ++++++++++++++ 4 files changed, 78 insertions(+), 48 deletions(-) create mode 100644 test/CodeGen/X86/shrink-wrap-chkstk.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 241bbfd331c1..2cf1d4ba30ee 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15622,54 +15622,40 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SplitStack; SDLoc dl(Op); + // Get the inputs. + SDNode *Node = Op.getNode(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + unsigned Align = cast(Op.getOperand(2))->getZExtValue(); + EVT VT = Node->getValueType(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + + bool Is64Bit = Subtarget->is64Bit(); + MVT SPTy = getPointerTy(DAG.getDataLayout()); + + SDValue Result; if (!Lower) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDNode* Node = Op.getNode(); - unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" - " not tell us which reg is the stack pointer!"); + " not tell us which reg is the stack pointer!"); EVT VT = Node->getValueType(0); - SDValue Tmp1 = SDValue(Node, 0); - SDValue Tmp2 = SDValue(Node, 1); SDValue Tmp3 = Node->getOperand(2); - SDValue Chain = Tmp1.getOperand(0); - - // Chain the dynamic stack allocation so that it doesn't modify the stack - // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), - SDLoc(Node)); - SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) - Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, dl, VT)); - Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), - DAG.getIntPtrConstant(0, dl, true), SDValue(), - SDLoc(Node)); - - SDValue Ops[2] = { Tmp1, Tmp2 }; - return DAG.getMergeValues(Ops, dl); - } - - // Get the inputs. - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - unsigned Align = cast(Op.getOperand(2))->getZExtValue(); - EVT VT = Op.getNode()->getValueType(0); - - bool Is64Bit = Subtarget->is64Bit(); - MVT SPTy = getPointerTy(DAG.getDataLayout()); - - if (SplitStack) { + Result = DAG.getNode(ISD::AND, dl, VT, Result, + DAG.getConstant(-(uint64_t)Align, dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain + } else if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { @@ -15687,10 +15673,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); - SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, + Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); - SDValue Ops1[2] = { Value, Chain }; - return DAG.getMergeValues(Ops1, dl); } else { SDValue Flag; const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX); @@ -15712,9 +15696,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); } - SDValue Ops1[2] = { SP, Chain }; - return DAG.getMergeValues(Ops1, dl); + Result = SP; } + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + SDValue Ops[2] = {Result, Chain}; + return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll index e5f6ea70e9cb..4f7e4092a99c 100644 --- a/test/CodeGen/X86/inalloca-stdcall.ll +++ b/test/CodeGen/X86/inalloca-stdcall.ll @@ -14,8 +14,9 @@ define void @g() { %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%esp) -; CHECK: movl $42, 4(%esp) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call x86_stdcallcc void @f(%Foo* inalloca %b) ; CHECK: calll _f@8 ; CHECK-NOT: %esp diff --git a/test/CodeGen/X86/inalloca.ll b/test/CodeGen/X86/inalloca.ll index 904366219ab7..e523c945a69f 100644 --- a/test/CodeGen/X86/inalloca.ll +++ b/test/CodeGen/X86/inalloca.ll @@ -14,8 +14,9 @@ entry: %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%esp) -; CHECK: movl $42, 4(%esp) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call void @f(%Foo* inalloca %b) ; CHECK: calll _f ret void @@ -33,8 +34,9 @@ entry: %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%esp) -; CHECK: movl $42, 4(%esp) +; CHECK: movl %esp, %eax +; CHECK: movl $13, (%eax) +; CHECK: movl $42, 4(%eax) call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b) ; CHECK: movl $1, %eax ; CHECK: calll _inreg_with_inalloca @@ -53,8 +55,9 @@ entry: %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK-DAG: movl $13, (%esp) -; CHECK-DAG: movl $42, 4(%esp) +; CHECK: movl %esp, %eax +; CHECK-DAG: movl $13, (%eax) +; CHECK-DAG: movl $42, 4(%eax) call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b) ; CHECK-DAG: xorl %ecx, %ecx ; CHECK: calll _thiscall_with_inalloca diff --git a/test/CodeGen/X86/shrink-wrap-chkstk.ll b/test/CodeGen/X86/shrink-wrap-chkstk.ll new file mode 100644 index 000000000000..c0b2b45e676f --- /dev/null +++ b/test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s + +; chkstk cannot come before the usual prologue, since it adjusts ESP. + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-pc-windows-msvc18.0.0" + +%struct.S = type { [12 x i8] } + +define x86_thiscallcc void @call_inalloca(i1 %x) { +entry: + %argmem = alloca inalloca <{ %struct.S }>, align 4 + %argidx1 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 0 + %argidx2 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 1 + store i8 42, i8* %argidx2, align 4 + br i1 %x, label %bb1, label %bb2 + +bb1: + store i8 42, i8* %argidx1, align 4 + br label %bb2 + +bb2: + call void @inalloca_params(<{ %struct.S }>* inalloca nonnull %argmem) + ret void +} + +; CHECK-LABEL: _call_inalloca: # @call_inalloca +; CHECK: pushl %ebp +; CHECK: movl %esp, %ebp +; CHECK: movl $12, %eax +; CHECK: calll __chkstk +; CHECK: calll _inalloca_params +; CHECK: movl %ebp, %esp +; CHECK: popl %ebp +; CHECK: retl + +declare void @inalloca_params(<{ %struct.S }>* inalloca) From 52c4f7de0934eef2019b77a14dc94415459d3fc1 Mon Sep 17 00:00:00 2001 From: Easwaran Raman Date: Thu, 3 Dec 2015 20:57:37 +0000 Subject: [PATCH 023/364] Interface to attach maximum function count from PGO to module as module flags. This provides interface to get and set maximum function counts to Module. This would allow things like determination of function hotness. The actual setting of this max function count will have to be done in the frontend. Differential Revision: http://reviews.llvm.org/D15003 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254647 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/Module.h | 11 +++++++++++ lib/IR/Module.cpp | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index 4e99c4256730..6cf75e747e06 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_MODULE_H #define LLVM_IR_MODULE_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/DataLayout.h" @@ -639,6 +640,16 @@ class Module { /// \brief Set the PIC level (small or large model) void setPICLevel(PICLevel::Level PL); /// @} + + /// @name Utility functions for querying and setting PGO counts + /// @{ + + /// \brief Set maximum function count in PGO mode + void setMaximumFunctionCount(uint64_t); + + /// \brief Returns maximum function count in PGO mode + Optional getMaximumFunctionCount(); + /// @} }; /// An raw_ostream inserter for modules. diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index 2b9adad44ba7..2acd9db210db 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -491,3 +491,15 @@ PICLevel::Level Module::getPICLevel() const { void Module::setPICLevel(PICLevel::Level PL) { addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL); } + +void Module::setMaximumFunctionCount(uint64_t Count) { + addModuleFlag(ModFlagBehavior::Error, "MaxFunctionCount", Count); +} + +Optional Module::getMaximumFunctionCount() { + auto *Val = + cast_or_null(getModuleFlag("MaxFunctionCount")); + if (!Val) + return None; + return cast(Val->getValue())->getZExtValue(); +} From 03212a0ad9e68062010028bb737c69312a6ad80f Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 3 Dec 2015 21:27:59 +0000 Subject: [PATCH 024/364] [RuntimeDyld] DenseMap -> std::unordered_map DenseMap is most applicable when both keys and values are small. In this case, the value violates that assumption, causing quite significant memory overhead. A std::unordered_map is more appropriate in this case (or at least fixed the memory problems I was seeing). Differential Revision: http://reviews.llvm.org/D14910 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254651 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 4 ++-- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index dd02ece3a9f1..a95f3bbe4179 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -97,11 +97,11 @@ void RuntimeDyldImpl::resolveRelocations() { // The Section here (Sections[i]) refers to the section in which the // symbol for the relocation is located. The SectionID in the relocation // entry provides the section to which the relocation will be applied. - int Idx = it->getFirst(); + int Idx = it->first; uint64_t Addr = Sections[Idx].getLoadAddress(); DEBUG(dbgs() << "Resolving relocations Section #" << Idx << "\t" << format("%p", (uintptr_t)Addr) << "\n"); - resolveRelocationList(it->getSecond(), Addr); + resolveRelocationList(it->second, Addr); } Relocations.clear(); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 914efd24660a..dafd3c8793c3 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -30,6 +30,7 @@ #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include +#include #include using namespace llvm; @@ -264,7 +265,7 @@ class RuntimeDyldImpl { // Relocations to sections already loaded. Indexed by SectionID which is the // source of the address. The target where the address will be written is // SectionID/Offset in the relocation itself. - DenseMap Relocations; + std::unordered_map Relocations; // Relocations to external symbols that are not yet resolved. Symbols are // external when they aren't found in the global symbol table of all loaded From 70e1c7be4416f3db4a0ca9688879203428e67ac1 Mon Sep 17 00:00:00 2001 From: Colin LeMahieu Date: Thu, 3 Dec 2015 21:44:28 +0000 Subject: [PATCH 025/364] [Hexagon] Adding shuffling resources for HVX instructions and tests for instruction encodings. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254652 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Hexagon/MCTargetDesc/HexagonShuffler.cpp | 88 +++- .../Hexagon/MCTargetDesc/HexagonShuffler.h | 50 ++- test/MC/Hexagon/test.s | 4 + test/MC/Hexagon/v60-alu.s | 312 +++++++++++++ test/MC/Hexagon/v60-permute.s | 51 +++ test/MC/Hexagon/v60-shift.s | 39 ++ test/MC/Hexagon/v60-vcmp.s | 84 ++++ test/MC/Hexagon/v60-vmem.s | 424 ++++++++++++++++++ test/MC/Hexagon/v60-vmpy-acc.s | 123 +++++ test/MC/Hexagon/v60-vmpy1.s | 138 ++++++ test/MC/Hexagon/v60lookup.s | 14 + 11 files changed, 1320 insertions(+), 7 deletions(-) create mode 100644 test/MC/Hexagon/test.s create mode 100644 test/MC/Hexagon/v60-alu.s create mode 100644 test/MC/Hexagon/v60-permute.s create mode 100644 test/MC/Hexagon/v60-shift.s create mode 100644 test/MC/Hexagon/v60-vcmp.s create mode 100644 test/MC/Hexagon/v60-vmem.s create mode 100644 test/MC/Hexagon/v60-vmpy-acc.s create mode 100644 test/MC/Hexagon/v60-vmpy1.s create mode 100644 test/MC/Hexagon/v60lookup.s diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 45e1909ede5a..6ceb848ba20c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -95,6 +95,60 @@ unsigned HexagonResource::setWeight(unsigned s) { return (Weight); } +HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL; + +bool HexagonCVIResource::SetUp = HexagonCVIResource::setup(); + +bool HexagonCVIResource::setup() { + assert(!TUL); + TUL = new (TypeUnitsAndLanes); + + (*TUL)[HexagonII::TypeCVI_VA] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); + (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VM_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VM_ST] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); + + return true; +} + +HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, + MCInst const *id) + : HexagonResource(s) { + unsigned T = HexagonMCInstrInfo::getType(MCII, *id); + + if (TUL->count(T)) { + // For an HVX insn. + Valid = true; + setUnits((*TUL)[T].first); + setLanes((*TUL)[T].second); + setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad()); + setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore()); + } else { + // For core insns. + Valid = false; + setUnits(0); + setLanes(0); + setLoad(false); + setStore(false); + } +} + HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI) : MCII(MCII), STI(STI) { @@ -109,7 +163,7 @@ void HexagonShuffler::reset() { void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, unsigned S, bool X) { - HexagonInstr PI(ID, Extender, S, X); + HexagonInstr PI(MCII, ID, Extender, S, X); Packet.push_back(PI); } @@ -128,6 +182,8 @@ bool HexagonShuffler::check() { // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; + // Number of HVX loads, HVX stores. + unsigned CVIloads = 0, CVIstores = 0; // Number of duplex insns, solo insns. unsigned duplex = 0, solo = 0; // Number of insns restricting other insns in the packet to A and X types, @@ -170,6 +226,12 @@ bool HexagonShuffler::check() { case HexagonII::TypeJ: ++jumps; break; + case HexagonII::TypeCVI_VM_VP_LDU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_LD: + case HexagonII::TypeCVI_VM_TMP_LD: + case HexagonII::TypeCVI_VM_CUR_LD: + ++CVIloads; case HexagonII::TypeLD: ++loads; ++memory; @@ -178,6 +240,11 @@ bool HexagonShuffler::check() { if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn()) ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. break; + case HexagonII::TypeCVI_VM_STU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_ST: + case HexagonII::TypeCVI_VM_NEW_ST: + ++CVIstores; case HexagonII::TypeST: ++stores; ++memory; @@ -205,9 +272,9 @@ bool HexagonShuffler::check() { } // Check if the packet is legal. - if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) || - (solo && size() > 1) || (onlyAX && neitherAnorX > 1) || - (onlyAX && xtypeFloat)) { + if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || + (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || + (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { Error = SHUFFLE_ERROR_INVALID; return false; } @@ -338,6 +405,19 @@ bool HexagonShuffler::check() { return false; } } + // Verify the CVI slot subscriptions. + { + HexagonUnitAuction AuctionCVI; + + std::sort(begin(), end(), HexagonInstr::lessCVI); + + for (iterator I = begin(); I != end(); ++I) + for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid? + if (!AuctionCVI.bid(I->CVI.getUnits() << i)) { + Error = SHUFFLE_ERROR_SLOTS; + return false; + } + } Error = SHUFFLE_SUCCESS; return true; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 6355c3275a38..174f10fb2580 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -51,6 +51,44 @@ class HexagonResource { }; }; +// HVX insn resources. +class HexagonCVIResource : public HexagonResource { + typedef std::pair UnitsAndLanes; + typedef llvm::DenseMap TypeUnitsAndLanes; + + // Available HVX slots. + enum { + CVI_NONE = 0, + CVI_XLANE = 1 << 0, + CVI_SHIFT = 1 << 1, + CVI_MPY0 = 1 << 2, + CVI_MPY1 = 1 << 3 + }; + + static bool SetUp; + static bool setup(); + static TypeUnitsAndLanes *TUL; + + // Count of adjacent slots that the insn requires to be executed. + unsigned Lanes; + // Flag whether the insn is a load or a store. + bool Load, Store; + // Flag whether the HVX resources are valid. + bool Valid; + + void setLanes(unsigned l) { Lanes = l; }; + void setLoad(bool f = true) { Load = f; }; + void setStore(bool f = true) { Store = f; }; + +public: + HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id); + + bool isValid() const { return (Valid); }; + unsigned getLanes() const { return (Lanes); }; + bool mayLoad() const { return (Load); }; + bool mayStore() const { return (Store); }; +}; + // Handle to an insn used by the shuffling algorithm. class HexagonInstr { friend class HexagonShuffler; @@ -58,12 +96,14 @@ class HexagonInstr { MCInst const *ID; MCInst const *Extender; HexagonResource Core; + HexagonCVIResource CVI; bool SoloException; public: - HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s, - bool x = false) - : ID(id), Extender(Extender), Core(s), SoloException(x){}; + HexagonInstr(MCInstrInfo const &MCII, MCInst const *id, + MCInst const *Extender, unsigned s, bool x = false) + : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id), + SoloException(x){}; MCInst const *getDesc() const { return (ID); }; @@ -79,6 +119,10 @@ class HexagonInstr { static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) { return (HexagonResource::lessUnits(A.Core, B.Core)); }; + // Check if the handles are in ascending order by HVX slots. + static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) { + return (HexagonResource::lessUnits(A.CVI, B.CVI)); + }; }; // Bundle shuffler. diff --git a/test/MC/Hexagon/test.s b/test/MC/Hexagon/test.s new file mode 100644 index 000000000000..e60578e65930 --- /dev/null +++ b/test/MC/Hexagon/test.s @@ -0,0 +1,4 @@ +#RUN: llvm-mc -filetype=obj -triple=hexagon -mcpu=hexagonv60 %s + +{ vmem (r0 + #0) = v0 + r0 = memw(r0) } \ No newline at end of file diff --git a/test/MC/Hexagon/v60-alu.s b/test/MC/Hexagon/v60-alu.s new file mode 100644 index 000000000000..1583c3da2cb7 --- /dev/null +++ b/test/MC/Hexagon/v60-alu.s @@ -0,0 +1,312 @@ +#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \ +#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \ +#RUN: FileCheck %s + +#CHECK: 1ce2cbd7 { v23.w = vavg(v11.w,{{ *}}v2.w):rnd } +v23.w=vavg(v11.w,v2.w):rnd + +#CHECK: 1cf4d323 { v3.h = vnavg(v19.h,{{ *}}v20.h) } +v3.h=vnavg(v19.h,v20.h) + +#CHECK: 1cffce9a { v26.uh = vavg(v14.uh,{{ *}}v31.uh):rnd } +v26.uh=vavg(v14.uh,v31.uh):rnd + +#CHECK: 1ce5cba1 { v1.h = vavg(v11.h,{{ *}}v5.h):rnd } +v1.h=vavg(v11.h,v5.h):rnd + +#CHECK: 1cc0d012 { v18.ub = vabsdiff(v16.ub,{{ *}}v0.ub) } +v18.ub=vabsdiff(v16.ub,v0.ub) + +#CHECK: 1cc2de29 { v9.uh = vabsdiff(v30.h,{{ *}}v2.h) } +v9.uh=vabsdiff(v30.h,v2.h) + +#CHECK: 1ce9ca06 { v6.b = vnavg(v10.ub,{{ *}}v9.ub) } +v6.b=vnavg(v10.ub,v9.ub) + +#CHECK: 1caacf90 { v17:16.w = vadd(v15.h,{{ *}}v10.h) } +v17:16.w=vadd(v15.h,v10.h) + +#CHECK: 1cb4cabe { v31:30.h = vsub(v10.ub,{{ *}}v20.ub) } +v31:30.h=vsub(v10.ub,v20.ub) + +#CHECK: 1cb8cada { v27:26.w = vsub(v10.uh,{{ *}}v24.uh) } +v27:26.w=vsub(v10.uh,v24.uh) + +#CHECK: 1cbcdbe8 { v9:8.w = vsub(v27.h,{{ *}}v28.h) } +v9:8.w=vsub(v27.h,v28.h) + +#CHECK: 1caeca00 { v1:0.h = vsub(v11:10.h,{{ *}}v15:14.h):sat } +v1:0.h=vsub(v11:10.h,v15:14.h):sat + +#CHECK: 1ca8c43e { v31:30.w = vsub(v5:4.w,{{ *}}v9:8.w):sat } +v31:30.w=vsub(v5:4.w,v9:8.w):sat + +#CHECK: 1cbad95c { v29:28.h = vadd(v25.ub,{{ *}}v26.ub) } +v29:28.h=vadd(v25.ub,v26.ub) + +#CHECK: 1ca1dc64 { v5:4.w = vadd(v28.uh,{{ *}}v1.uh) } +v5:4.w=vadd(v28.uh,v1.uh) + +#CHECK: 1c79c350 { v16.h = vsub(v3.h,{{ *}}v25.h):sat } +v16.h=vsub(v3.h,v25.h):sat + +#CHECK: 1c7fd364 { v4.w = vsub(v19.w,{{ *}}v31.w):sat } +v4.w=vsub(v19.w,v31.w):sat + +#CHECK: 1c67d816 { v22.ub = vsub(v24.ub,{{ *}}v7.ub):sat } +v22.ub=vsub(v24.ub,v7.ub):sat + +#CHECK: 1c7ddc2f { v15.uh = vsub(v28.uh,{{ *}}v29.uh):sat } +v15.uh=vsub(v28.uh,v29.uh):sat + +#CHECK: 1c5cc6d7 { v23.h = vsub(v6.h,{{ *}}v28.h) } +v23.h=vsub(v6.h,v28.h) + +#CHECK: 1c54cae4 { v4.w = vsub(v10.w,{{ *}}v20.w) } +v4.w=vsub(v10.w,v20.w) + +#CHECK: 1c4dc78b { v11.w = vadd(v7.w,{{ *}}v13.w):sat } +v11.w=vadd(v7.w,v13.w):sat + +#CHECK: 1c48c7a4 { v4.b = vsub(v7.b,{{ *}}v8.b) } +v4.b=vsub(v7.b,v8.b) + +#CHECK: 1cdec3b0 { v16.uh = vavg(v3.uh,{{ *}}v30.uh) } +v16.uh=vavg(v3.uh,v30.uh) + +#CHECK: 1c76dc98 { v25:24.b = vadd(v29:28.b,{{ *}}v23:22.b) } +v25:24.b=vadd(v29:28.b,v23:22.b) + +#CHECK: 1c7ad4a6 { v7:6.h = vadd(v21:20.h,{{ *}}v27:26.h) } +v7:6.h=vadd(v21:20.h,v27:26.h) + +#CHECK: 1cc7c564 { v4.uw = vabsdiff(v5.w,{{ *}}v7.w) } +v4.uw=vabsdiff(v5.w,v7.w) + +#CHECK: 1cd2cdc1 { v1.h = vavg(v13.h,{{ *}}v18.h) } +v1.h=vavg(v13.h,v18.h) + +#CHECK: 1cd5d246 { v6.uh = vabsdiff(v18.uh,{{ *}}v21.uh) } +v6.uh=vabsdiff(v18.uh,v21.uh) + +#CHECK: 1cdcd987 { v7.ub = vavg(v25.ub,{{ *}}v28.ub) } +v7.ub=vavg(v25.ub,v28.ub) + +#CHECK: 1c92c6e4 { v5:4.uh = vsub(v7:6.uh,{{ *}}v19:18.uh):sat } +v5:4.uh=vsub(v7:6.uh,v19:18.uh):sat + +#CHECK: 1c86dace { v15:14.ub = vsub(v27:26.ub,{{ *}}v7:6.ub):sat } +v15:14.ub=vsub(v27:26.ub,v7:6.ub):sat + +#CHECK: 1cffc07c { v28.ub = vavg(v0.ub,{{ *}}v31.ub):rnd } +v28.ub=vavg(v0.ub,v31.ub):rnd + +#CHECK: 1cf8d851 { v17.w = vnavg(v24.w,{{ *}}v24.w) } +v17.w=vnavg(v24.w,v24.w) + +#CHECK: 1c70d2e6 { v7:6.ub = vadd(v19:18.ub,{{ *}}v17:16.ub):sat } +v7:6.ub=vadd(v19:18.ub,v17:16.ub):sat + +#CHECK: 1c72dec6 { v7:6.w = vadd(v31:30.w,{{ *}}v19:18.w) } +v7:6.w=vadd(v31:30.w,v19:18.w) + +#CHECK: 1c92d23e { v31:30.h = vadd(v19:18.h,{{ *}}v19:18.h):sat } +v31:30.h=vadd(v19:18.h,v19:18.h):sat + +#CHECK: 1c94de1e { v31:30.uh = vadd(v31:30.uh,{{ *}}v21:20.uh):sat } +v31:30.uh=vadd(v31:30.uh,v21:20.uh):sat + +#CHECK: 1c9ec07c { v29:28.b = vsub(v1:0.b,{{ *}}v31:30.b) } +v29:28.b=vsub(v1:0.b,v31:30.b) + +#CHECK: 1c88da56 { v23:22.w = vadd(v27:26.w,{{ *}}v9:8.w):sat } +v23:22.w=vadd(v27:26.w,v9:8.w):sat + +#CHECK: 1c9acab8 { v25:24.w = vsub(v11:10.w,{{ *}}v27:26.w) } +v25:24.w=vsub(v11:10.w,v27:26.w) + +#CHECK: 1c82d282 { v3:2.h = vsub(v19:18.h,{{ *}}v3:2.h) } +v3:2.h=vsub(v19:18.h,v3:2.h) + +#CHECK: 1c2bd9a6 { v6 = vand(v25,{{ *}}v11) } +v6=vand(v25,v11) + +#CHECK: 1c43c22d { v13.ub = vadd(v2.ub,{{ *}}v3.ub):sat } +v13.ub=vadd(v2.ub,v3.ub):sat + +#CHECK: 1c59d707 { v7.w = vadd(v23.w,{{ *}}v25.w) } +v7.w=vadd(v23.w,v25.w) + +#CHECK: 1c3fc9e1 { v1 = vxor(v9,{{ *}}v31) } +v1=vxor(v9,v31) + +#CHECK: 1c2acbdf { v31 = vor(v11,{{ *}}v10) } +v31=vor(v11,v10) + +#CHECK: 1cdaccf6 { v22.w = vavg(v12.w,{{ *}}v26.w) } +v22.w=vavg(v12.w,v26.w) + +#CHECK: 1c5ac767 { v7.h = vadd(v7.h,{{ *}}v26.h):sat } +v7.h=vadd(v7.h,v26.h):sat + +#CHECK: 1c40d956 { v22.uh = vadd(v25.uh,{{ *}}v0.uh):sat } +v22.uh=vadd(v25.uh,v0.uh):sat + +#CHECK: 1fbbd611 { v17.w = vasr(v22.w{{ *}},{{ *}}v27.w) } +v17.w=vasr(v22.w,v27.w) + +#CHECK: 1fbad835 { v21.w = vlsr(v24.w{{ *}},{{ *}}v26.w) } +v21.w=vlsr(v24.w,v26.w) + +#CHECK: 1f79cedc { v28.b = vround(v14.h{{ *}},{{ *}}v25.h):sat } +v28.b=vround(v14.h,v25.h):sat + +#CHECK: 1f69c4e0 { v0.ub = vround(v4.h{{ *}},{{ *}}v9.h):sat } +v0.ub=vround(v4.h,v9.h):sat + +#CHECK: 1f72c485 { v5.h = vround(v4.w{{ *}},{{ *}}v18.w):sat } +v5.h=vround(v4.w,v18.w):sat + +#CHECK: 1f6bc8b1 { v17.uh = vround(v8.w{{ *}},{{ *}}v11.w):sat } +v17.uh=vround(v8.w,v11.w):sat + +#CHECK: 1f71c25b { v27.ub = vsat(v2.h{{ *}},{{ *}}v17.h) } +v27.ub=vsat(v2.h,v17.h) + +#CHECK: 1f66c560 { v0.h = vsat(v5.w{{ *}},{{ *}}v6.w) } +v0.h=vsat(v5.w,v6.w) + +#CHECK: 1fb3d148 { v8.h = vlsr(v17.h{{ *}},{{ *}}v19.h) } +v8.h=vlsr(v17.h,v19.h) + +#CHECK: 1fbec56e { v14.h = vasr(v5.h{{ *}},{{ *}}v30.h) } +v14.h=vasr(v5.h,v30.h) + +#CHECK: 1fb2d2a2 { v2.h = vasl(v18.h{{ *}},{{ *}}v18.h) } +v2.h=vasl(v18.h,v18.h) + +#CHECK: 1faccc95 { v21.w = vasl(v12.w{{ *}},{{ *}}v12.w) } +v21.w=vasl(v12.w,v12.w) + +#CHECK: 1fb9c1e2 { v2.h = vadd(v1.h{{ *}},{{ *}}v25.h) } +v2.h=vadd(v1.h,v25.h) + +#CHECK: 1fbbd5df { v31.b = vadd(v21.b{{ *}},{{ *}}v27.b) } +v31.b=vadd(v21.b,v27.b) + +#CHECK: 1f25c578 { v24 = vrdelta(v5{{ *}},{{ *}}v5) } +v24=vrdelta(v5,v5) + +#CHECK: 1f22c62a { v10 = vdelta(v6{{ *}},{{ *}}v2) } +v10=vdelta(v6,v2) + +#CHECK: 1f20d102 { v2.w = vmax(v17.w{{ *}},{{ *}}v0.w) } +v2.w=vmax(v17.w,v0.w) + +#CHECK: 1f1ed6fc { v28.h = vmax(v22.h{{ *}},{{ *}}v30.h) } +v28.h=vmax(v22.h,v30.h) + +#CHECK: 1f0cc8d8 { v24.uh = vmax(v8.uh{{ *}},{{ *}}v12.uh) } +v24.uh=vmax(v8.uh,v12.uh) + +#CHECK: 1f00c1b0 { v16.ub = vmax(v1.ub{{ *}},{{ *}}v0.ub) } +v16.ub=vmax(v1.ub,v0.ub) + +#CHECK: 1f12d08e { v14.w = vmin(v16.w{{ *}},{{ *}}v18.w) } +v14.w=vmin(v16.w,v18.w) + +#CHECK: 1f1ad466 { v6.h = vmin(v20.h{{ *}},{{ *}}v26.h) } +v6.h=vmin(v20.h,v26.h) + +#CHECK: 1f13df5d { v29.uh = vmin(v31.uh{{ *}},{{ *}}v19.uh) } +v29.uh=vmin(v31.uh,v19.uh) + +#CHECK: 1f09c226 { v6.ub = vmin(v2.ub{{ *}},{{ *}}v9.ub) } +v6.ub=vmin(v2.ub,v9.ub) + +#CHECK: 1f41d34f { v15.b = vshuffo(v19.b{{ *}},{{ *}}v1.b) } +v15.b=vshuffo(v19.b,v1.b) + +#CHECK: 1f5fc72e { v14.b = vshuffe(v7.b{{ *}},{{ *}}v31.b) } +v14.b=vshuffe(v7.b,v31.b) + +#CHECK: 1f34d0f7 { v23.b = vdeale(v16.b{{ *}},{{ *}}v20.b) } +v23.b=vdeale(v16.b,v20.b) + +#CHECK: 1f4bd6c4 { v5:4.b = vshuffoe(v22.b{{ *}},{{ *}}v11.b) } +v5:4.b=vshuffoe(v22.b,v11.b) + +#CHECK: 1f5dcea2 { v3:2.h = vshuffoe(v14.h{{ *}},{{ *}}v29.h) } +v3:2.h=vshuffoe(v14.h,v29.h) + +#CHECK: 1f4fd186 { v6.h = vshuffo(v17.h{{ *}},{{ *}}v15.h) } +v6.h=vshuffo(v17.h,v15.h) + +#CHECK: 1f5bda79 { v25.h = vshuffe(v26.h{{ *}},{{ *}}v27.h) } +v25.h=vshuffe(v26.h,v27.h) + +#CHECK: 1f41d1f2 { v19:18 = vcombine(v17{{ *}},{{ *}}v1) } +v19:18=vcombine(v17,v1) + +#CHECK: 1e82f432 { if (!q2) v18.b -= v20.b } +if (!q2) v18.b-=v20.b + +#CHECK: 1ec2fd13 { if (q3) v19.w -= v29.w } +if (q3) v19.w-=v29.w + +#CHECK: 1e81fef9 { if (q2) v25.h -= v30.h } +if (q2) v25.h-=v30.h + +#CHECK: 1e81e2d3 { if (q2) v19.b -= v2.b } +if (q2) v19.b-=v2.b + +#CHECK: 1e41ecad { if (!q1) v13.w += v12.w } +if (!q1) v13.w+=v12.w + +#CHECK: 1e41e789 { if (!q1) v9.h += v7.h } +if (!q1) v9.h+=v7.h + +#CHECK: 1e81e967 { if (!q2) v7.b += v9.b } +if (!q2) v7.b+=v9.b + +#CHECK: 1e41f04f { if (q1) v15.w += v16.w } +if (q1) v15.w+=v16.w + +#CHECK: 1e01e838 { if (q0) v24.h += v8.h } +if (q0) v24.h+=v8.h + +#CHECK: 1ec1f112 { if (q3) v18.b += v17.b } +if (q3) v18.b+=v17.b + +#CHECK: 1e42f67b { if (!q1) v27.w -= v22.w } +if (!q1) v27.w-=v22.w + +#CHECK: 1e82ea5b { if (!q2) v27.h -= v10.h } +if (!q2) v27.h-=v10.h + +#CHECK: 1e00c586 { v6 = vnot(v5) } +v6=vnot(v5) + +#CHECK: 1e00df70 { v16.w = vabs(v31.w):sat } +v16.w=vabs(v31.w):sat + +#CHECK: 1e00d45f { v31.w = vabs(v20.w) } +v31.w=vabs(v20.w) + +#CHECK: 1e00db2f { v15.h = vabs(v27.h):sat } +v15.h=vabs(v27.h):sat + +#CHECK: 1e00d001 { v1.h = vabs(v16.h) } +v1.h=vabs(v16.h) + +#CHECK: 1e02c832 { v19:18.uh = vzxt(v8.ub) } +v19:18.uh=vzxt(v8.ub) + +#CHECK: 1e02c98a { v11:10.w = vsxt(v9.h) } +v11:10.w=vsxt(v9.h) + +#CHECK: 1e02cf76 { v23:22.h = vsxt(v15.b) } +v23:22.h=vsxt(v15.b) + +#CHECK: 1e02c258 { v25:24.uw = vzxt(v2.uh) } +v25:24.uw=vzxt(v2.uh) diff --git a/test/MC/Hexagon/v60-permute.s b/test/MC/Hexagon/v60-permute.s new file mode 100644 index 000000000000..b3544bd0a57b --- /dev/null +++ b/test/MC/Hexagon/v60-permute.s @@ -0,0 +1,51 @@ +#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \ +#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \ +#RUN: FileCheck %s + +#CHECK: 1fd2d5cf { v15.b = vpack(v21.h{{ *}},{{ *}}v18.h):sat } +v15.b=vpack(v21.h,v18.h):sat + +#CHECK: 1fd7d7a2 { v2.ub = vpack(v23.h{{ *}},{{ *}}v23.h):sat } +v2.ub=vpack(v23.h,v23.h):sat + +#CHECK: 1fc7d464 { v4.h = vpacke(v20.w{{ *}},{{ *}}v7.w) } +v4.h=vpacke(v20.w,v7.w) + +#CHECK: 1fc2c75b { v27.b = vpacke(v7.h{{ *}},{{ *}}v2.h) } +v27.b=vpacke(v7.h,v2.h) + +#CHECK: 1fc9c5ed { v13.uh = vpack(v5.w{{ *}},{{ *}}v9.w):sat } +v13.uh=vpack(v5.w,v9.w):sat + +#CHECK: 1ff1d81f { v31.h = vpack(v24.w{{ *}},{{ *}}v17.w):sat } +v31.h=vpack(v24.w,v17.w):sat + +#CHECK: 1fe6c435 { v21.b = vpacko(v4.h{{ *}},{{ *}}v6.h) } +v21.b=vpacko(v4.h,v6.h) + +#CHECK: 1febc140 { v0.h = vpacko(v1.w{{ *}},{{ *}}v11.w) } +v0.h=vpacko(v1.w,v11.w) + +#CHECK: 1e01d256 { v23:22.h = vunpack(v18.b) } +v23:22.h=vunpack(v18.b) + +#CHECK: 1e01cc38 { v25:24.uw = vunpack(v12.uh) } +v25:24.uw=vunpack(v12.uh) + +#CHECK: 1e01c61e { v31:30.uh = vunpack(v6.ub) } +v31:30.uh=vunpack(v6.ub) + +#CHECK: 1e01d778 { v25:24.w = vunpack(v23.h) } +v25:24.w=vunpack(v23.h) + +#CHECK: 1e00c0e0 { v0.b = vdeal(v0.b) } +v0.b=vdeal(v0.b) + +#CHECK: 1e00d5c9 { v9.h = vdeal(v21.h) } +v9.h=vdeal(v21.h) + +#CHECK: 1e02cb1c { v28.b = vshuff(v11.b) } +v28.b=vshuff(v11.b) + +#CHECK: 1e01d8fe { v30.h = vshuff(v24.h) } +v30.h=vshuff(v24.h) diff --git a/test/MC/Hexagon/v60-shift.s b/test/MC/Hexagon/v60-shift.s new file mode 100644 index 000000000000..3d0c334debb9 --- /dev/null +++ b/test/MC/Hexagon/v60-shift.s @@ -0,0 +1,39 @@ +#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \ +#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \ +#RUN: FileCheck %s + +#CHECK: 198fd829 { v9.uw = vlsr(v24.uw,{{ *}}r15) } +v9.uw=vlsr(v24.uw,r15) + +#CHECK: 1999d645 { v5.uh = vlsr(v22.uh,{{ *}}r25) } +v5.uh=vlsr(v22.uh,r25) + +#CHECK: 198cc303 { v3.h = vasl(v3.h,{{ *}}r12) } +v3.h=vasl(v3.h,r12) + +#CHECK: 1965d7ac { v12.w = vasr(v23.w,{{ *}}r5) } +v12.w=vasr(v23.w,r5) + +#CHECK: 197dddc3 { v3.h = vasr(v29.h,{{ *}}r29) } +v3.h=vasr(v29.h,r29) + +#CHECK: 197adde8 { v8.w = vasl(v29.w,{{ *}}r26) } +v8.w=vasl(v29.w,r26) + +#CHECK: 1977cc26 { v6 = vror(v12,{{ *}}r23) } +v6=vror(v12,r23) + +#CHECK: 1e02cfad { v13.uw = vcl0(v15.uw) } +v13.uw=vcl0(v15.uw) + +#CHECK: 1e02defb { v27.uh = vcl0(v30.uh) } +v27.uh=vcl0(v30.uh) + +#CHECK: 1e03de90 { v16.w = vnormamt(v30.w) } +v16.w=vnormamt(v30.w) + +#CHECK: 1e03d4a3 { v3.h = vnormamt(v20.h) } +v3.h=vnormamt(v20.h) + +#CHECK: 1e02c2d8 { v24.h = vpopcount(v2.h) } +v24.h=vpopcount(v2.h) diff --git a/test/MC/Hexagon/v60-vcmp.s b/test/MC/Hexagon/v60-vcmp.s new file mode 100644 index 000000000000..c7f4e128be63 --- /dev/null +++ b/test/MC/Hexagon/v60-vcmp.s @@ -0,0 +1,84 @@ +#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \ +#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \ +#RUN: FileCheck %s + +#CHECK: 1c81f142 { q2 |= vcmp.eq(v17.b{{ *}},{{ *}}v1.b) } +q2|=vcmp.eq(v17.b,v1.b) + +#CHECK: 1c84fb2a { q2 &= vcmp.gt(v27.uw{{ *}},{{ *}}v4.uw) } +q2&=vcmp.gt(v27.uw,v4.uw) + +#CHECK: 1c8cf826 { q2 &= vcmp.gt(v24.uh{{ *}},{{ *}}v12.uh) } +q2&=vcmp.gt(v24.uh,v12.uh) + +#CHECK: 1c80e720 { q0 &= vcmp.gt(v7.ub{{ *}},{{ *}}v0.ub) } +q0&=vcmp.gt(v7.ub,v0.ub) + +#CHECK: 1c9aed1a { q2 &= vcmp.gt(v13.w{{ *}},{{ *}}v26.w) } +q2&=vcmp.gt(v13.w,v26.w) + +#CHECK: 1c8de516 { q2 &= vcmp.gt(v5.h{{ *}},{{ *}}v13.h) } +q2&=vcmp.gt(v5.h,v13.h) + +#CHECK: 1c8dfc11 { q1 &= vcmp.gt(v28.b{{ *}},{{ *}}v13.b) } +q1&=vcmp.gt(v28.b,v13.b) + +#CHECK: 1c94fa0b { q3 &= vcmp.eq(v26.w{{ *}},{{ *}}v20.w) } +q3&=vcmp.eq(v26.w,v20.w) + +#CHECK: 1c83e206 { q2 &= vcmp.eq(v2.h{{ *}},{{ *}}v3.h) } +q2&=vcmp.eq(v2.h,v3.h) + +#CHECK: 1c85e900 { q0 &= vcmp.eq(v9.b{{ *}},{{ *}}v5.b) } +q0&=vcmp.eq(v9.b,v5.b) + +#CHECK: 1c9cfca8 { q0 ^= vcmp.gt(v28.uw{{ *}},{{ *}}v28.uw) } +q0^=vcmp.gt(v28.uw,v28.uw) + +#CHECK: 1c81faa0 { q0 ^= vcmp.gt(v26.ub{{ *}},{{ *}}v1.ub) } +q0^=vcmp.gt(v26.ub,v1.ub) + +#CHECK: 1c96f0a4 { q0 ^= vcmp.gt(v16.uh{{ *}},{{ *}}v22.uh) } +q0^=vcmp.gt(v16.uh,v22.uh) + +#CHECK: 1c9bf795 { q1 ^= vcmp.gt(v23.h{{ *}},{{ *}}v27.h) } +q1^=vcmp.gt(v23.h,v27.h) + +#CHECK: 1c9de698 { q0 ^= vcmp.gt(v6.w{{ *}},{{ *}}v29.w) } +q0^=vcmp.gt(v6.w,v29.w) + +#CHECK: 1c82ef8a { q2 ^= vcmp.eq(v15.w{{ *}},{{ *}}v2.w) } +q2^=vcmp.eq(v15.w,v2.w) + +#CHECK: 1c99e891 { q1 ^= vcmp.gt(v8.b{{ *}},{{ *}}v25.b) } +q1^=vcmp.gt(v8.b,v25.b) + +#CHECK: 1c8afe55 { q1 |= vcmp.gt(v30.h{{ *}},{{ *}}v10.h) } +q1|=vcmp.gt(v30.h,v10.h) + +#CHECK: 1c92ef50 { q0 |= vcmp.gt(v15.b{{ *}},{{ *}}v18.b) } +q0|=vcmp.gt(v15.b,v18.b) + +#CHECK: 1c9ffb4b { q3 |= vcmp.eq(v27.w{{ *}},{{ *}}v31.w) } +q3|=vcmp.eq(v27.w,v31.w) + +#CHECK: 1c87e944 { q0 |= vcmp.eq(v9.h{{ *}},{{ *}}v7.h) } +q0|=vcmp.eq(v9.h,v7.h) + +#CHECK: 1c8ee768 { q0 |= vcmp.gt(v7.uw{{ *}},{{ *}}v14.uw) } +q0|=vcmp.gt(v7.uw,v14.uw) + +#CHECK: 1c92e265 { q1 |= vcmp.gt(v2.uh{{ *}},{{ *}}v18.uh) } +q1|=vcmp.gt(v2.uh,v18.uh) + +#CHECK: 1c80f062 { q2 |= vcmp.gt(v16.ub{{ *}},{{ *}}v0.ub) } +q2|=vcmp.gt(v16.ub,v0.ub) + +#CHECK: 1c91f75a { q2 |= vcmp.gt(v23.w{{ *}},{{ *}}v17.w) } +q2|=vcmp.gt(v23.w,v17.w) + +#CHECK: 1c86fe84 { q0 ^= vcmp.eq(v30.h{{ *}},{{ *}}v6.h) } +q0^=vcmp.eq(v30.h,v6.h) + +#CHECK: 1c86ec82 { q2 ^= vcmp.eq(v12.b{{ *}},{{ *}}v6.b) } +q2^=vcmp.eq(v12.b,v6.b) diff --git a/test/MC/Hexagon/v60-vmem.s b/test/MC/Hexagon/v60-vmem.s new file mode 100644 index 000000000000..fe202251ec4b --- /dev/null +++ b/test/MC/Hexagon/v60-vmem.s @@ -0,0 +1,424 @@ +#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \ +#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \ +#RUN: FileCheck %s + +#CHECK: 292cc11b { vmem(r12++#1) = v27 } +{ + vmem(r12++#1)=v27 +} + +#CHECK: 294dc319 { v25 = vmem(r13++#3):nt } +{ + v25=vmem(r13++#3):nt +} + +#CHECK: 2904c1fb { v27 = vmemu(r4++#1) } +{ + v27=vmemu(r4++#1) +} + +#CHECK: 291dc01f { v31 = vmem(r29++#0) } +{ + v31=vmem(r29++#0) +} + +#CHECK: 293ec0ff { vmemu(r30++#0) = v31 } +{ + vmemu(r30++#0)=v31 +} + +#CHECK: 296ec411 { vmem(r14++#-4):nt = v17 } +{ + vmem(r14++#-4):nt=v17 +} + +#CHECK: 29fec62f { if (!p0) vmem(r30++#-2):nt = v15 } +{ + if (!p0) vmem(r30++#-2):nt=v15 +} + +#CHECK: 29f9c914 { if (p1) vmem(r25++#1):nt = v20 } +{ + if (p1) vmem(r25++#1):nt=v20 +} + +#CHECK: 2984de30 { if (!q3) vmem(r4++#-2) = v16 } +{ + if (!q3) vmem(r4++#-2)=v16 +} + +#CHECK: 2992dd1f { if (q3) vmem(r18++#-3) = v31 } +{ + if (q3) vmem(r18++#-3)=v31 +} + +#CHECK: 29c9c425 { if (!q0) vmem(r9++#-4):nt = v5 } +{ + if (!q0) vmem(r9++#-4):nt=v5 +} + +#CHECK: 29d1cf11 { if (q1) vmem(r17++#-1):nt = v17 } +{ + if (q1) vmem(r17++#-1):nt=v17 +} + +#CHECK: 29a7c328 { if (!p0) vmem(r7++#3) = v8 } +{ + if (!p0) vmem(r7++#3)=v8 +} + +#CHECK: 29b6cc1d { if (p1) vmem(r22++#-4) = v29 } +{ + if (p1) vmem(r22++#-4)=v29 +} + +#CHECK: 29abc5fe { if (!p0) vmemu(r11++#-3) = v30 } +{ + if (!p0) vmemu(r11++#-3)=v30 +} + +#CHECK: 29b8d5c4 { if (p2) vmemu(r24++#-3) = v4 } +{ + if (p2) vmemu(r24++#-3)=v4 +} + +#CHECK: 2860e407 { vmem(r0+#-4):nt = v7 } +{ + vmem(r0+#-4):nt=v7 +} + +#CHECK: 2830e2e7 { vmemu(r16+#-6) = v7 } +{ + vmemu(r16+#-6)=v7 +} + +#CHECK: 2839c316 { vmem(r25+#3) = v22 } +{ + vmem(r25+#3)=v22 +} +#CHECK: 284be316 { v22 = vmem(r11+#-5):nt } +{ + v22=vmem(r11+#-5):nt +} + +#CHECK: 280ec1e6 { v6 = vmemu(r14+#1) } +{ + v6=vmemu(r14+#1) +} + +#CHECK: 280ae50c { v12 = vmem(r10+#-3) } +{ + v12=vmem(r10+#-3) +} + +#CHECK: 2b62e005 { vmem(r2++m1):nt = v5 } +{ + vmem(r2++m1):nt=v5 +} + +#CHECK: 2b28e0f2 { vmemu(r8++m1) = v18 } +{ + vmemu(r8++m1)=v18 +} + +#CHECK: 2b42e019 { v25 = vmem(r2++m1):nt } +{ + v25=vmem(r2++m1):nt +} + +#CHECK: 2b2ce009 { vmem(r12++m1) = v9 } +{ + vmem(r12++m1)=v9 +} + +#CHECK: 2b03c005 { v5 = vmem(r3++m0) } +{ + v5=vmem(r3++m0) +} + + +#CHECK: 2b0ec0f5 { v21 = vmemu(r14++m0) } +{ + v21=vmemu(r14++m0) +} + +#CHECK: 2be8c022 { if (!p0) vmem(r8++m0):nt = v2 } +{ + if (!p0) vmem(r8++m0):nt=v2 +} + +#CHECK: 2bebd813 { if (p3) vmem(r11++m0):nt = v19 } +{ + if (p3) vmem(r11++m0):nt=v19 +} + +#CHECK: 2ba5e0e7 { if (!p0) vmemu(r5++m1) = v7 } +{ + if (!p0) vmemu(r5++m1)=v7 +} + +#CHECK: 2ba4f0dd { if (p2) vmemu(r4++m1) = v29 } +{ + if (p2) vmemu(r4++m1)=v29 +} + +#CHECK: 2ba4e828 { if (!p1) vmem(r4++m1) = v8 } +{ + if (!p1) vmem(r4++m1)=v8 +} + +#CHECK: 2bbae803 { if (p1) vmem(r26++m1) = v3 } +{ + if (p1) vmem(r26++m1)=v3 +} + +#CHECK: 2bc9c027 { if (!q0) vmem(r9++m0):nt = v7 } +{ + if (!q0) vmem(r9++m0):nt=v7 +} + +#CHECK: 2bcfc001 { if (q0) vmem(r15++m0):nt = v1 } +{ + if (q0) vmem(r15++m0):nt=v1 +} + +#CHECK: 2b97f031 { if (!q2) vmem(r23++m1) = v17 } +{ + if (!q2) vmem(r23++m1)=v17 +} + +#CHECK: 2b8ad809 { if (q3) vmem(r10++m0) = v9 } +{ + if (q3) vmem(r10++m0)=v9 +} + +#CHECK: 28c7f438 { if (!q2) vmem(r7+#-4):nt = v24 } +{ + if (!q2) vmem(r7+#-4):nt=v24 +} + +#CHECK: 28d1eb15 { if (q1) vmem(r17+#-5):nt = v21 } +{ + if (q1) vmem(r17+#-5):nt=v21 +} + +#CHECK: 289cfe2b { if (!q3) vmem(r28+#-2) = v11 } +{ + if (!q3) vmem(r28+#-2)=v11 +} + +#CHECK: 288eef0f { if (q1) vmem(r14+#-1) = v15 } +{ + if (q1) vmem(r14+#-1)=v15 +} + +#CHECK: 28a2d1e1 { if (!p2) vmemu(r2+#1) = v1 } +{ + if (!p2) vmemu(r2+#1)=v1 +} + +#CHECK: 28bcf4db { if (p2) vmemu(r28+#-4) = v27 } +{ + if (p2) vmemu(r28+#-4)=v27 +} + +#CHECK: 28b2c925 { if (!p1) vmem(r18+#1) = v5 } +{ + if (!p1) vmem(r18+#1)=v5 +} + +#CHECK: 28afe41a { if (p0) vmem(r15+#-4) = v26 } +{ + if (p0) vmem(r15+#-4)=v26 +} + +#CHECK: 28f7fd3a { if (!p3) vmem(r23+#-3):nt = v26 } +{ + if (!p3) vmem(r23+#-3):nt=v26 +} + +#CHECK: 28f5fd10 { if (p3) vmem(r21+#-3):nt = v16 } +{ + if (p3) vmem(r21+#-3):nt=v16 +} + +#CHECK: 2945c440 v0.tmp = vmem(r5++#-4):nt } +{ + v0.tmp=vmem(r5++#-4):nt + v26=v0 +} + +#CHECK: 2942c338 v24.cur = vmem(r2++#3):nt } +{ + v24.cur=vmem(r2++#3):nt + v6=v24 +} + +#CHECK: 2908c157 v23.tmp = vmem(r8++#1) } +{ + v25=v23 + v23.tmp=vmem(r8++#1) +} + +#CHECK: 2903c72d v13.cur = vmem(r3++#-1) } +{ + v13.cur=vmem(r3++#-1) + v21=v13 +} + +#CHECK: 2855c743 v3.tmp = vmem(r21+#7):nt } +{ + v3.tmp=vmem(r21+#7):nt + v21=v3 +} + +#CHECK: 2856e025 v5.cur = vmem(r22+#-8):nt } +{ + v5.cur=vmem(r22+#-8):nt + v29=v5 +} + +#CHECK: 2802c555 v21.tmp = vmem(r2+#5) } +{ + v31=v21 + v21.tmp=vmem(r2+#5) +} + +#CHECK: 2814e12a v10.cur = vmem(r20+#-7) } +{ + v9=v10 + v10.cur=vmem(r20+#-7) +} + + +#CHECK: 2b52c02c v12.cur = vmem(r18++m0):nt } +{ + v12.cur=vmem(r18++m0):nt + v25=v12 +} + +#CHECK: 2b4ae043 v3.tmp = vmem(r10++m1):nt } +{ + v25=v3 + v3.tmp=vmem(r10++m1):nt +} + +#CHECK: 2b06c025 v5.cur = vmem(r6++m0) } +{ + v5.cur=vmem(r6++m0) + v10=v5 +} + +#CHECK: 2b17e048 v8.tmp = vmem(r23++m1) } +{ + v8.tmp=vmem(r23++m1) + v28=v8 +} + +#CHECK: 282ee422 vmem(r14+#-4) = v14.new } +{ + v14 = v14 + vmem(r14+#-4)=v14.new +} + +#CHECK: 2866e222 vmem(r6+#-6):nt = v16.new } +{ + v16 = v8 + vmem(r6+#-6):nt=v16.new +} + +#CHECK: 28b1cd42 if(p1) vmem(r17+#5) = v17.new } +{ + v17 = v25 + if(p1)vmem(r17+#5)=v17.new +} + +#CHECK: 28bbeb6a if(!p1) vmem(r27+#-5) = v17.new } +{ + v17 = v15 + if(!p1)vmem(r27+#-5)=v17.new +} + +#CHECK: 28e4d252 if(p2) vmem(r4+#2):nt = v24.new } +{ + v24 = v10 + if(p2)vmem(r4+#2):nt=v24.new +} + +#CHECK: 28f8d17a if(!p2) vmem(r24+#1):nt = v4.new } +{ + v4 = v8 + if(!p2)vmem(r24+#1):nt=v4.new +} + +#CHECK: 2924c322 vmem(r4++#3) = v4.new } +{ + v4 = v3 + vmem(r4++#3)=v4.new +} + +#CHECK: 2961c122 vmem(r1++#1):nt = v7.new } +{ + v7 = v8 + vmem(r1++#1):nt=v7.new +} + +#CHECK: 29a6d042 if(p2) vmem(r6++#0) = v11.new } +{ + v11 = v13 + if(p2)vmem(r6++#0)=v11.new +} + +#CHECK: 29a2cb6a if(!p1) vmem(r2++#3) = v25.new } +{ + v25 = v17 + if(!p1)vmem(r2++#3)=v25.new +} + +#CHECK: 29f5c952 if(p1) vmem(r21++#1):nt = v14.new } +{ + v14 = v13 + if(p1)vmem(r21++#1):nt=v14.new +} + +#CHECK: 29f7cd7a if(!p1) vmem(r23++#-3):nt = v1.new } +{ + v1 = v0 + if(!p1)vmem(r23++#-3):nt=v1.new +} + +#CHECK: 2b3ec022 vmem(r30++m0) = v10.new } +{ + v10 = v23 + vmem(r30++m0)=v10.new +} + +#CHECK: 2b6fc022 vmem(r15++m0):nt = v19.new } +{ + v19 = v20 + vmem(r15++m0):nt=v19.new +} + +#CHECK: 2bb7f042 if(p2) vmem(r23++m1) = v6.new } +{ + v6 = v30 + if(p2)vmem(r23++m1)=v6.new +} + +#CHECK: 2ba2f06a if(!p2) vmem(r2++m1) = v12.new } +{ + v12 = v9 + if(!p2)vmem(r2++m1)=v12.new +} + +#CHECK: 2be7e852 if(p1) vmem(r7++m1):nt = v3.new } +{ + v3 = v13 + if(p1)vmem(r7++m1):nt=v3.new +} + +#CHECK: 2bfdd07a if(!p2) vmem(r29++m0):nt = v29.new } +{ + v29 = v9 + if(!p2)vmem(r29++m0):nt=v29.new +} diff --git a/test/MC/Hexagon/v60-vmpy-acc.s b/test/MC/Hexagon/v60-vmpy-acc.s new file mode 100644 index 000000000000..c39a9252b563 --- /dev/null +++ b/test/MC/Hexagon/v60-vmpy-acc.s @@ -0,0 +1,123 @@ +#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \ +#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \ +#RUN: FileCheck %s + +#CHECK: 1936ee37 { v23.w += vdmpy(v15:14.h,r22.uh,#1):sat } +v23.w += vdmpy(v15:14.h,r22.uh,#1):sat + +#CHECK: 193bf90f { v15.w += vdmpy(v25.h,r27.uh):sat } +v15.w += vdmpy(v25.h,r27.uh):sat + +#CHECK: 1902fcf0 { v17:16.h += vdmpy(v29:28.ub,r2.b) } +v17:16.h += vdmpy(v29:28.ub,r2.b) + +#CHECK: 190cffd1 { v17.h += vdmpy(v31.ub,r12.b) } +v17.h += vdmpy(v31.ub,r12.b) + +#CHECK: 1900f5ac { v12.w += vrmpy(v21.ub,r0.b) } +v12.w += vrmpy(v21.ub,r0.b) + +#CHECK: 1905fb86 { v6.uw += vrmpy(v27.ub,r5.ub) } +v6.uw += vrmpy(v27.ub,r5.ub) + +#CHECK: 191de570 { v16.w += vdmpy(v5.h,r29.b) } +v16.w += vdmpy(v5.h,r29.b) + +#CHECK: 191de846 { v7:6.w += vtmpy(v9:8.h,r29.b) } +v7:6.w += vtmpy(v9:8.h,r29.b) + +#CHECK: 190bfa22 { v3:2.h += vtmpy(v27:26.ub,r11.b) } +v3:2.h += vtmpy(v27:26.ub,r11.b) + +#CHECK: 1915e408 { v9:8.h += vtmpy(v5:4.b,r21.b) } +v9:8.h += vtmpy(v5:4.b,r21.b) + +#CHECK: 1987f71e { v31:30.uh += vmpy(v23.ub,r7.ub) } +v31:30.uh += vmpy(v23.ub,r7.ub) + +#CHECK: 1969ff47 { v7.w += vasl(v31.w,r9) } +v7.w += vasl(v31.w,r9) + +#CHECK: 196de3b0 { v16.w += vasr(v3.w,r13) } +v16.w += vasr(v3.w,r13) + +#CHECK: 1977fe0a { v11:10.uw += vdsad(v31:30.uh,r23.uh) } +v11:10.uw += vdsad(v31:30.uh,r23.uh) + +#CHECK: 196eee36 { v22.h += vmpyi(v14.h,r14.b) } +v22.h += vmpyi(v14.h,r14.b) + +#CHECK: 1931faac { v13:12.h += vmpy(v26.ub,r17.b) } +v13:12.h += vmpy(v26.ub,r17.b) + +#CHECK: 193cfc94 { v21:20.w += vdmpy(v29:28.h,r28.b) } +v21:20.w += vdmpy(v29:28.h,r28.b) + +#CHECK: 1934fc62 { v2.w += vdmpy(v28.h,r20.h):sat } +v2.w += vdmpy(v28.h,r20.h):sat + +#CHECK: 1925fe5f { v31.w += vdmpy(v31:30.h,r5.h):sat } +v31.w += vdmpy(v31:30.h,r5.h):sat + +#CHECK: 194efe36 { v23:22.uw += vmpy(v30.uh,r14.uh) } +v23:22.uw += vmpy(v30.uh,r14.uh) + +#CHECK: 1948e306 { v7:6.w += vmpy(v3.h,r8.h):sat } +v7:6.w += vmpy(v3.h,r8.h):sat + +#CHECK: 192af2f8 { v25:24.w += vmpa(v19:18.h,r10.b) } +v25:24.w += vmpa(v19:18.h,r10.b) + +#CHECK: 1926e4da { v27:26.h += vmpa(v5:4.ub,r6.b) } +v27:26.h += vmpa(v5:4.ub,r6.b) + +#CHECK: 194ff078 { v24.w += vmpyi(v16.w,r15.h) } +v24.w += vmpyi(v16.w,r15.h) + +#CHECK: 1946e247 { v7.w += vmpyi(v2.w,r6.b) } +v7.w += vmpyi(v2.w,r6.b) + +#CHECK: 1c3fead5 { v21.w += vmpyo(v10.w,v31.h):<<1:sat:shift } +v21.w += vmpyo(v10.w,v31.h):<<1:sat:shift + +#CHECK: 1c30e1fa { v26.w += vmpyo(v1.w,v16.h):<<1:rnd:sat:shift } +v26.w += vmpyo(v1.w,v16.h):<<1:rnd:sat:shift + +#CHECK: 1c34f690 { v16.h += vmpyi(v22.h,v20.h) } +v16.h += vmpyi(v22.h,v20.h) + +#CHECK: 1c34f4b5 { v21.w += vmpyie(v20.w,v20.uh) } +v21.w += vmpyie(v20.w,v20.uh) + +#CHECK: 1c54f804 { v4.w += vmpyie(v24.w,v20.h) } +v4.w += vmpyie(v24.w,v20.h) + +#CHECK: 1c1ff6f4 { v21:20.w += vmpy(v22.h,v31.h) } +v21:20.w += vmpy(v22.h,v31.h) + +#CHECK: 1c31f026 { v7:6.w += vmpy(v16.h,v17.uh) } +v7:6.w += vmpy(v16.h,v17.uh) + +#CHECK: 1c12fb98 { v25:24.h += vmpy(v27.b,v18.b) } +v25:24.h += vmpy(v27.b,v18.b) + +#CHECK: 1c17fcc0 { v1:0.h += vmpy(v28.ub,v23.b) } +v1:0.h += vmpy(v28.ub,v23.b) + +#CHECK: 1c16f26f { v15.w += vdmpy(v18.h,v22.h):sat } +v15.w += vdmpy(v18.h,v22.h):sat + +#CHECK: 1c0bea3a { v26.w += vrmpy(v10.b,v11.b) } +v26.w += vrmpy(v10.b,v11.b) + +#CHECK: 1c15eb47 { v7.w += vrmpy(v11.ub,v21.b) } +v7.w += vrmpy(v11.ub,v21.b) + +#CHECK: 1c26e40e { v15:14.uw += vmpy(v4.uh,v6.uh) } +v15:14.uw += vmpy(v4.uh,v6.uh) + +#CHECK: 1c0df9a8 { v9:8.uh += vmpy(v25.ub,v13.ub) } +v9:8.uh += vmpy(v25.ub,v13.ub) + +#CHECK: 1c0afc15 { v21.uw += vrmpy(v28.ub,v10.ub) } +v21.uw += vrmpy(v28.ub,v10.ub) diff --git a/test/MC/Hexagon/v60-vmpy1.s b/test/MC/Hexagon/v60-vmpy1.s new file mode 100644 index 000000000000..1f36a5e95ddb --- /dev/null +++ b/test/MC/Hexagon/v60-vmpy1.s @@ -0,0 +1,138 @@ +#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \ +#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \ +#RUN: FileCheck %s + +#CHECK: 1939c223 { v3.w = vdmpy(v3:2.h,{{ *}}r25.uh,{{ *}}#1):sat } +v3.w=vdmpy(v3:2.h,r25.uh,#1):sat + +#CHECK: 1936de0d { v13.w = vdmpy(v30.h,{{ *}}r22.uh):sat } +v13.w=vdmpy(v30.h,r22.uh):sat + +#CHECK: 1919ccea { v11:10.h = vdmpy(v13:12.ub,{{ *}}r25.b) } +v11:10.h=vdmpy(v13:12.ub,r25.b) + +#CHECK: 1918ced6 { v22.h = vdmpy(v14.ub,{{ *}}r24.b) } +v22.h=vdmpy(v14.ub,r24.b) + +#CHECK: 1911deba { v27:26.uw = vdsad(v31:30.uh,{{ *}}r17.uh) } +v27:26.uw=vdsad(v31:30.uh,r17.uh) + +#CHECK: 1908da97 { v23.w = vrmpy(v26.ub,{{ *}}r8.b) } +v23.w=vrmpy(v26.ub,r8.b) + +#CHECK: 1915c974 { v20.uw = vrmpy(v9.ub,{{ *}}r21.ub) } +v20.uw=vrmpy(v9.ub,r21.ub) + +#CHECK: 190dd446 { v6.w = vdmpy(v20.h,{{ *}}r13.b) } +v6.w=vdmpy(v20.h,r13.b) + +#CHECK: 190ec030 { v17:16.h = vtmpy(v1:0.ub,{{ *}}r14.b) } +v17:16.h=vtmpy(v1:0.ub,r14.b) + +#CHECK: 1918de1c { v29:28.h = vtmpy(v31:30.b,{{ *}}r24.b) } +v29:28.h=vtmpy(v31:30.b,r24.b) + +#CHECK: 198dddf1 { v17.w = vmpyi(v29.w,{{ *}}r13.h) } +v17.w=vmpyi(v29.w,r13.h) + +#CHECK: 19bccb13 { v19.w = vmpyi(v11.w,{{ *}}r28.b) } +v19.w=vmpyi(v11.w,r28.b) + +#CHECK: 19c8cb0a { v11:10.uh = vmpy(v11.ub,{{ *}}r8.ub) } +v11:10.uh=vmpy(v11.ub,r8.ub) + +#CHECK: 1973d012 { v18.h = vmpyi(v16.h,{{ *}}r19.b) } +v18.h=vmpyi(v16.h,r19.b) + +#CHECK: 1922d1aa { v11:10.h = vmpy(v17.ub,{{ *}}r2.b) } +v11:10.h=vmpy(v17.ub,r2.b) + +#CHECK: 1936ce9c { v29:28.w = vdmpy(v15:14.h,{{ *}}r22.b) } +v29:28.w=vdmpy(v15:14.h,r22.b) + +#CHECK: 1925d86b { v11.w = vdmpy(v25:24.h,{{ *}}r5.h):sat } +v11.w=vdmpy(v25:24.h,r5.h):sat + +#CHECK: 1925c255 { v21.w = vdmpy(v2.h,{{ *}}r5.h):sat } +v21.w=vdmpy(v2.h,r5.h):sat + +#CHECK: 1941d424 { v4.h = vmpy(v20.h,{{ *}}r1.h):<<1:sat } +v4.h=vmpy(v20.h,r1.h):<<1:sat + +#CHECK: 1943cf0a { v11:10.w = vmpy(v15.h,{{ *}}r3.h) } +v11:10.w=vmpy(v15.h,r3.h) + +#CHECK: 193ec2f0 { v17:16.w = vmpa(v3:2.h,{{ *}}r30.b) } +v17:16.w=vmpa(v3:2.h,r30.b) + +#CHECK: 193ddcde { v31:30.h = vmpa(v29:28.ub,{{ *}}r29.b) } +v31:30.h=vmpa(v29:28.ub,r29.b) + +#CHECK: 1946de76 { v23:22.uw = vmpy(v30.uh,{{ *}}r6.uh) } +v23:22.uw=vmpy(v30.uh,r6.uh) + +#CHECK: 1945c945 { v5.h = vmpy(v9.h,{{ *}}r5.h):<<1:rnd:sat } +v5.h=vmpy(v9.h,r5.h):<<1:rnd:sat + +#CHECK: 19b0c280 { v1:0.w = vtmpy(v3:2.h,{{ *}}r16.b) } +v1:0.w=vtmpy(v3:2.h,r16.b) + +#CHECK: 1c34d937 { v23.h = vmpy(v25.h,{{ *}}v20.h):<<1:rnd:sat } +v23.h=vmpy(v25.h,v20.h):<<1:rnd:sat + +#CHECK: 1c36c90a { v11:10.uw = vmpy(v9.uh,{{ *}}v22.uh) } +v11:10.uw=vmpy(v9.uh,v22.uh) + +#CHECK: 1c09c3ec { v13:12.w = vmpy(v3.h,{{ *}}v9.h) } +v13:12.w=vmpy(v3.h,v9.h) + +#CHECK: 1c0dd1d8 { v25:24.h = vmpy(v17.ub,{{ *}}v13.b) } +v25:24.h=vmpy(v17.ub,v13.b) + +#CHECK: 1c0dc0a4 { v5:4.uh = vmpy(v0.ub,{{ *}}v13.ub) } +v5:4.uh=vmpy(v0.ub,v13.ub) + +#CHECK: 1c14df84 { v5:4.h = vmpy(v31.b,{{ *}}v20.b) } +v5:4.h=vmpy(v31.b,v20.b) + +#CHECK: 1c16d77c { v28.w = vdmpy(v23.h,{{ *}}v22.h):sat } +v28.w=vdmpy(v23.h,v22.h):sat + +#CHECK: 1c08d84f { v15.w = vrmpy(v24.ub,{{ *}}v8.b) } +v15.w=vrmpy(v24.ub,v8.b) + +#CHECK: 1c06da29 { v9.w = vrmpy(v26.b,{{ *}}v6.b) } +v9.w=vrmpy(v26.b,v6.b) + +#CHECK: 1c1ac805 { v5.uw = vrmpy(v8.ub,{{ *}}v26.ub) } +v5.uw=vrmpy(v8.ub,v26.ub) + +#CHECK: 1c39d089 { v9.h = vmpyi(v16.h,{{ *}}v25.h) } +v9.h=vmpyi(v16.h,v25.h) + +#CHECK: 1c3ecc64 { v5:4.h = vmpa(v13:12.ub,{{ *}}v31:30.b) } +v5:4.h=vmpa(v13:12.ub,v31:30.b) + +#CHECK: 1c21ce54 { v21:20.w = vmpy(v14.h,{{ *}}v1.uh) } +v21:20.w=vmpy(v14.h,v1.uh) + +#CHECK: 1cf2c6f0 { v17:16.h = vmpa(v7:6.ub,{{ *}}v19:18.ub) } +v17:16.h=vmpa(v7:6.ub,v19:18.ub) + +#CHECK: 1fcdc82b { v11.w = vmpyio(v8.w{{ *}},{{ *}}v13.h) } +v11.w=vmpyio(v8.w,v13.h) + +#CHECK: 1fdeda10 { v16.w = vmpyie(v26.w{{ *}},{{ *}}v30.uh) } +v16.w=vmpyie(v26.w,v30.uh) + +#CHECK: 1ff2c2a6 { v6.w = vmpye(v2.w{{ *}},{{ *}}v18.uh) } +v6.w=vmpye(v2.w,v18.uh) + +#CHECK: 1ff7cbfa { v26.w = vmpyo(v11.w{{ *}},{{ *}}v23.h):<<1:sat } +v26.w=vmpyo(v11.w,v23.h):<<1:sat + +#CHECK: 1f5cd411 { v17.w = vmpyo(v20.w{{ *}},{{ *}}v28.h):<<1:rnd:sat } +v17.w=vmpyo(v20.w,v28.h):<<1:rnd:sat + +#CHECK: 1f71cf1d { v29.w = vmpyieo(v15.h{{ *}},{{ *}}v17.h) } +v29.w=vmpyieo(v15.h,v17.h) diff --git a/test/MC/Hexagon/v60lookup.s b/test/MC/Hexagon/v60lookup.s new file mode 100644 index 000000000000..b92a2d3c6eb1 --- /dev/null +++ b/test/MC/Hexagon/v60lookup.s @@ -0,0 +1,14 @@ +#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \ +#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \ +#RUN: FileCheck %s + + V31.b = vlut32(V29.b, V15.b, R1) +# CHECK: 1b79fd3f { v31.b = vlut32(v29.b,v15.b,r1) } + V31.b |= vlut32(V29.b, V15.b, R2) +# CHECK: 1b7afdbf { v31.b |= vlut32(v29.b,v15.b,r2) } + V31:30.h = vlut16(V29.b, V15.h, R3) +# CHECK: 1b7bfdde { v31:30.h = vlut16(v29.b,v15.h,r3) } + v31:30.h |= vlut16(v2.b, v9.h, r4) +# CHECK: 1b4ce2fe { v31:30.h |= vlut16(v2.b,v9.h,r4) } + v31.w = vinsert(r4) +# CHECK: 19a4e03f { v31.w = vinsert(r4) } From 9f51f8f7e70fb7fbaff887224ac4ebec3125ae45 Mon Sep 17 00:00:00 2001 From: Chih-Hung Hsieh Date: Thu, 3 Dec 2015 22:02:40 +0000 Subject: [PATCH 026/364] [X86] Part 1 to fix x86-64 fp128 calling convention. Almost all these changes are conditioned and only apply to the new x86-64 f128 type configuration, which will be enabled in a follow up patch. They are required together to make new f128 work. If there is any error, we should fix or revert them as a whole. These changes should have no impact to current configurations. * Relax type legalization checks to accept new f128 type configuration, whose TypeAction is TypeSoftenFloat, not TypeLegal, but also has TLI.isTypeLegal true. * Relax GetSoftenedFloat to return in some cases f128 type SDValue, which is TLI.isTypeLegal but not "softened" to i128 node. * Allow customized FABS, FNEG, FCOPYSIGN on new f128 type configuration, to generate optimized bitwise operators for libm functions. * Enhance related Lower* functions to handle f128 type. * Enhance DAGTypeLegalizer::run, SoftenFloatResult, and related functions to keep new f128 type in register, and convert f128 operators to library calls. * Fix Combiner, Emitter, Legalizer routines that did not handle f128 type. * Add ExpandConstant to handle i128 constants, ExpandNode to handle ISD::Constant node. * Add one more parameter to getCommonSubClass and firstCommonClass, to guarantee that returned common sub class will contain the specified simple value type. This extra parameter is used by EmitCopyFromReg in InstrEmitter.cpp. * Fix infinite loop in getTypeLegalizationCost when f128 is the value type. * Fix printOperand to handle null operand. * Enhance ISD::BITCAST node to handle f128 constant. * Expand new f128 type for BR_CC, SELECT_CC, SELECT, SETCC nodes. * Enhance X86AsmPrinter to emit f128 values in comments. Differential Revision: http://reviews.llvm.org/D15134 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254653 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 4 +- include/llvm/Target/TargetRegisterInfo.h | 6 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 34 ++++- .../SelectionDAG/LegalizeFloatTypes.cpp | 141 ++++++++++++++---- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 43 ++++-- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 67 +++++++-- .../SelectionDAG/LegalizeTypesGeneric.cpp | 11 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +- .../SelectionDAG/SelectionDAGDumper.cpp | 5 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 +- lib/CodeGen/TargetLoweringBase.cpp | 4 + lib/CodeGen/TargetRegisterInfo.cpp | 18 ++- lib/Target/X86/X86MCInstLower.cpp | 14 +- 15 files changed, 298 insertions(+), 77 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 819458dbb0f0..e247abcb2f75 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -97,7 +97,8 @@ class TargetLoweringBase { TypeLegal, // The target natively supports this type. TypePromoteInteger, // Replace this integer with a larger one. TypeExpandInteger, // Split this integer into two of half the size. - TypeSoftenFloat, // Convert this float to a same size integer type. + TypeSoftenFloat, // Convert this float to a same size integer type, + // if an operation is not supported in target HW. TypeExpandFloat, // Split this float into two of half the size. TypeScalarizeVector, // Replace this one-element vector with its element. TypeSplitVector, // Split this vector into two of half the size. @@ -1913,6 +1914,7 @@ class TargetLoweringBase { /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; +protected: ValueTypeActionImpl ValueTypeActions; private: diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index e8926f788156..7d293fe82a6b 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -614,9 +614,13 @@ class TargetRegisterInfo : public MCRegisterInfo { /// Find the largest common subclass of A and B. /// Return NULL if there is no common subclass. + /// The common subclass should contain + /// simple value type SVT if it is not the Any type. const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const; + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) const; /// Returns a TargetRegisterClass used for pointer values. /// If a target supports multiple different pointer register classes, diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2941a7eb3aed..10fb334c4c60 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8722,6 +8722,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { ZeroCmp, Zero, RV); } +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + // Do not optimize out type conversion of f128 type yet. + // For some target like x86_64, configuration is changed + // to keep one f128 value in one SSE register, but + // instruction selection cannot handle FCOPYSIGN on + // SSE registers yet. + SDValue N1 = N->getOperand(1); + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND) && + (N1VT == N1Op0VT || N1Op0VT != MVT::f128); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8765,7 +8781,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 5ec10308dc28..a1e2d410ab00 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC); + TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8238cdeb59ca..3393e17b8e09 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -154,6 +154,7 @@ class SelectionDAGLegalize { SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall bool ExpandNode(SDNode *Node); @@ -294,6 +295,20 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { return Result; } +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast(CPIdx)->getAlignment(); + SDValue Result = + DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + /// Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, @@ -1192,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal && + assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) - assert((TLI.getTypeAction(*DAG.getContext(), - Op.getValueType()) == TargetLowering::TypeLegal || - Op.getOpcode() == ISD::TargetConstant) && - "Unexpected illegal type!"); + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Op.getValueType()) || + Op.getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode @@ -3390,6 +3407,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } + case ISD::Constant: { + ConstantSDNode *CP = cast(Node); + Results.push_back(ExpandConstant(CP)); + break; + } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 97e88bf84a70..bb150f726c23 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Result Float to Integer Conversion. +// Convert Float Results to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// -void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); + case ISD::Register: + case ISD::CopyFromReg: + case ISD::CopyToReg: + assert(isLegalInHWReg(N->getValueType(ResNo)) && + "Unsupported SoftenFloatRes opcode!"); + // Only when isLegalInHWReg, we can skip check of the operands. + R = SDValue(N, ResNo); + break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: - R = SoftenFloatRes_ConstantFP(cast(N)); - break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; @@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } // If R is null, the sub-method took care of registering the result. - if (R.getNode()) + if (R.getNode()) { SetSoftenedFloat(SDValue(N, ResNo), R); + ReplaceSoftenFloatResult(N, ResNo, R); + } + // Return true only if the node is changed, + // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. + return R.getNode() && R.getNode() != N; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); return BitConvertToInteger(N->getOperand(0)); } @@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { - return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N), +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, we can load better from the constant pool. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + ConstantFPSDNode *CN = cast(N); + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), TLI.getTypeToTransformTo(*DAG.getContext(), - N->getValueType(0))); + CN->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FABS can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -206,7 +227,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -390,7 +414,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FNEG can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); // Expand Y = FNEG(X) -> Y = SUB -0.0, X @@ -580,7 +607,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { + bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -595,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -609,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); - return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + if (LegalInHWReg) + return ExtendNode; + return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -645,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); return NewVAARG; } @@ -679,7 +716,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { //===----------------------------------------------------------------------===// -// Operand Float to Integer Conversion.. +// Convert Float Operand to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -689,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: + if (CanSkipSoftenFloatOperand(N, OpNo)) + return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -704,14 +743,23 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::STORE: + Res = SoftenFloatOp_STORE(N, OpNo); + // Do not try to analyze or soften this node again if the value is + // or can be held in a register. In that case, Res.getNode() should + // be equal to N. + if (Res.getNode() == N && + isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // Otherwise, we need to reanalyze and lower the new Res nodes. + break; } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. + // core about this to re-analyze. if (Res.getNode() == N) return true; @@ -722,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return false; } +bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { + if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // When the operand type can be kept in registers, SoftenFloatResult + // will call ReplaceValueWith to replace all references and we can + // skip softening this operand. + switch (N->getOperand(OpNo).getOpcode()) { + case ISD::BITCAST: + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + case ISD::SELECT: + case ISD::SELECT_CC: + return true; + } + // For some opcodes, SoftenFloatResult handles all conversion of softening + // and replacing operands, so that there is no need to soften operands + // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + return true; + } + return false; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 337cbe7fc598..d6b4f7921f2b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -238,9 +238,13 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - SoftenFloatResult(N, i); - Changed = true; - goto NodeDone; + Changed = SoftenFloatResult(N, i); + if (Changed) + goto NodeDone; + // If not changed, the result type should be legally in register. + assert(isLegalInHWReg(ResultVT) && + "Unchanged SoftenFloatResult should be legal in register!"); + goto ScanOperands; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -411,18 +415,27 @@ bool DAGTypeLegalizer::run() { bool Failed = false; // Check that all result types are legal. + // A value type is illegal if its TypeAction is not TypeLegal, + // and TLI.RegClassForVT does not have a register class for this type. + // For example, the x86_64 target has f128 that is not TypeLegal, + // to have softened operators, but it also has FR128 register class to + // pass and return f128 values. Hence a legalized node can have f128 type. if (!IgnoreNodeResults(&Node)) for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(Node.getValueType(i))) { - dbgs() << "Result type " << i << " illegal!\n"; + if (!isTypeLegal(Node.getValueType(i)) && + !TLI.isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(); Failed = true; } // Check that all operand types are legal. for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && - !isTypeLegal(Node.getOperand(i).getValueType())) { - dbgs() << "Operand type " << i << " illegal!\n"; + !isTypeLegal(Node.getOperand(i).getValueType()) && + !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(); Failed = true; } @@ -748,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + // f128 of x86_64 could be kept in SSE registers, + // but sometimes softened to i128. + assert((Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already converted to integer!"); + // Allow repeated calls to save f128 type nodes + // or any node with type that transforms to itself. + // Many operations on these types are not softened. + assert((!OpEntry.getNode()|| + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Node is already converted to integer!"); OpEntry = Result; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4e4740f1f9cb..84ea374345e9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -72,6 +72,20 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + /// isSimpleLegalType - Return true if this is a simple legal type. + bool isSimpleLegalType(EVT VT) const { + return VT.isSimple() && TLI.isTypeLegal(VT); + } + + /// isLegalInHWReg - Return true if this type can be passed in registers. + /// For example, x86_64's f128, should to be legally in registers + /// and only some operations converted to library calls or integer + /// bitwise operations. + bool isLegalInHWReg(EVT VT) const { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return VT == NVT && isSimpleLegalType(VT); + } + EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -372,32 +386,48 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetSoftenedFloat - Given a processed operand Op which was converted to an - /// integer of the same size, this returns the integer. The integer contains - /// exactly the same bits as Op - only the type changed. For example, if Op - /// is an f32 which was softened to an i32, then this method returns an i32, - /// the bits of which coincide with those of Op. + /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer + /// if the Op is not supported in target HW and converted to the integer. + /// The integer contains exactly the same bits as Op - only the type changed. + /// For example, if Op is an f32 which was softened to an i32, then this method + /// returns an i32, the bits of which coincide with those of Op. + /// If the Op can be efficiently supported in target HW or the operand must + /// stay in a register, the Op is not converted to an integer. + /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { SDValue &SoftenedOp = SoftenedFloats[Op]; + if (!SoftenedOp.getNode() && + isSimpleLegalType(Op.getValueType())) + return Op; RemapValue(SoftenedOp); assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Result Float to Integer Conversion. - void SoftenFloatResult(SDNode *N, unsigned OpNo); + // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. + void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { + // When the result type can be kept in HW registers, the converted + // NewRes node could have the same type. We can save the effort in + // cloning every user of N in SoftenFloatOperand or other legalization functions, + // by calling ReplaceValueWith here to update all users. + if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) + ReplaceValueWith(SDValue(N, ResNo), NewRes); + } + + // Convert Float Results to Integer for Non-HW-supported Operations. + bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N); + SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); - SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -409,7 +439,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -422,14 +452,19 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N); - SDValue SoftenFloatRes_SELECT(SDNode *N); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Operand Float to Integer Conversion. + // Return true if we can skip softening the given operand or SDNode because + // it was soften before by SoftenFloatResult and references to the operand + // were replaced by ReplaceValueWith. + bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); + + // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index eb545982ed02..593c346df770 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: - // Convert the integer operand instead. - SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + case TargetLowering::TypeSoftenFloat: { + // Expand the floating point operand only if it was converted to integers. + // Otherwise, it is a legal type like f128 that can be saved in a register. + auto SoftenedOp = GetSoftenedFloat(InOp); + if (SoftenedOp == InOp) + break; + SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4b7887b26afe..771bb00d86ac 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2893,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); - else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index a6f9699bb29c..d362f98d6464 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -626,7 +626,10 @@ void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value) { - if (shouldPrintInline(*Value.getNode())) { + if (!Value.getNode()) { + OS << ""; + return false; + } else if (shouldPrintInline(*Value.getNode())) { OS << Value->getOperationName(G) << ':'; Value->print_types(OS, G); Value->print_details(OS, G); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 21935cdd4699..bb31231f4e1a 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1072,7 +1072,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + Op.getOperand(0).getValueType() != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 69c130809bb8..68bca2e70369 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1654,6 +1654,10 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) Cost *= 2; + // Do not loop with f128 type. + if (MTy == LK.second) + return std::make_pair(Cost, MTy.getSimpleVT()); + // Keep legalizing the type. MTy = LK.second; } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index fe91c86b71f8..0c4a3dcb226e 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -171,16 +171,24 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) { + const MVT VT(SVT); for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) - if (unsigned Common = *A++ & *B++) - return TRI->getRegClass(I + countTrailingZeros(Common)); + if (unsigned Common = *A++ & *B++) { + const TargetRegisterClass *RC = + TRI->getRegClass(I + countTrailingZeros(Common)); + if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + return RC; + } return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const { + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT) const { // First take care of the trivial cases. if (A == B) return A; @@ -189,7 +197,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); } const TargetRegisterClass * diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 6887f2e4c04a..8878c9f169b5 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1373,7 +1373,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (isa(COp)) { CS << "u"; } else if (auto *CI = dyn_cast(COp)) { - CS << CI->getZExtValue(); + if (CI->getBitWidth() <= 64) { + CS << CI->getZExtValue(); + } else { + // print multi-word constant as (w0,w1) + auto Val = CI->getValue(); + CS << "("; + for (int i = 0, N = Val.getNumWords(); i < N; ++i) { + if (i > 0) + CS << ","; + CS << Val.getRawData()[i]; + } + CS << ")"; + } } else if (auto *CF = dyn_cast(COp)) { SmallString<32> Str; CF->getValueAPF().toString(Str); From 0956a120f5a443670cf86e59a6ad9dcff7135a9e Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Thu, 3 Dec 2015 22:13:40 +0000 Subject: [PATCH 027/364] [llvm-objdump] Use report_fatal_error() if we can't find a target. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254654 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-objdump/llvm-objdump.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 54f24d7a00ff..069425429d16 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -282,10 +282,8 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) { std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, Error); - if (!TheTarget) { - errs() << ToolName << ": " << Error; - return nullptr; - } + if (!TheTarget) + report_fatal_error("can't find target: " + Error); // Update the triple name and return the found target. TripleName = TheTriple.getTriple(); @@ -805,10 +803,6 @@ static bool getHidden(RelocationRef RelRef) { static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { const Target *TheTarget = getTarget(Obj); - // getTarget() will have already issued a diagnostic if necessary, so - // just bail here if it failed. - if (!TheTarget) - return; // Package up features to be passed to target/subtarget std::string FeaturesStr; From bb300c512008269c1caabff9613a6dbc49f83fa4 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 3 Dec 2015 22:17:26 +0000 Subject: [PATCH 028/364] raw_ostream: << operator for callables with raw_stream argument This allows easier construction of print helpers. Example: Printable PrintLaneMask(unsigned LaneMask) { return Printable([LaneMask](raw_ostream &OS) { OS << format("%08X", LaneMask); }); } // Usage: OS << PrintLaneMask(Mask); Differential Revision: http://reviews.llvm.org/D14348 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254655 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/raw_ostream.h | 14 +++ include/llvm/Target/TargetRegisterInfo.h | 73 ++------------ lib/CodeGen/RegAllocPBQP.cpp | 26 +---- .../SelectionDAG/SelectionDAGDumper.cpp | 17 +--- lib/CodeGen/TargetRegisterInfo.cpp | 95 +++++++++++-------- lib/Support/raw_ostream.cpp | 4 + 6 files changed, 89 insertions(+), 140 deletions(-) diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index e5cc40e7d6b2..38a96fa6ab74 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -17,12 +17,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include #include namespace llvm { class format_object_base; class FormattedString; class FormattedNumber; +class raw_ostream; template class SmallVectorImpl; namespace sys { @@ -31,6 +33,15 @@ enum OpenFlags : unsigned; } } +/// Type of function that prints to raw_ostream. +/// +/// Typical usage: +/// Printable PrintFoo(Foo x) { +/// return [] (raw_ostream &os) { os << /* ... */; }; +/// } +/// os << "Foo: " << PrintFoo(foo) << '\n'; +typedef std::function Printable; + /// This class implements an extremely fast bulk output stream that can *only* /// output to a stream. It does not support seeking, reopening, rewinding, line /// buffered disciplines etc. It is a simple buffer that outputs @@ -203,6 +214,9 @@ class raw_ostream { raw_ostream &operator<<(double N); + /// IO manipulator, \see Printable. + raw_ostream &operator<<(Printable P); + /// Output \p N in hexadecimal, without any prefix or padding. raw_ostream &write_hex(unsigned long long N); diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 7d293fe82a6b..de6f46eba013 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -932,7 +932,6 @@ struct VirtReg2IndexFunctor : public std::unary_function { } }; -/// Helper class for printing registers on a raw_ostream. /// Prints virtual and physical registers with or without a TRI instance. /// /// The format is: @@ -943,24 +942,10 @@ struct VirtReg2IndexFunctor : public std::unary_function { /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << PrintReg(Reg, TRI) << '\n'; -/// -class PrintReg { - const TargetRegisterInfo *TRI; - unsigned Reg; - unsigned SubIdx; -public: - explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = nullptr, - unsigned subidx = 0) - : TRI(tri), Reg(reg), SubIdx(subidx) {} - void print(raw_ostream&) const; -}; +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, + unsigned SubRegIdx = 0); -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { - PR.print(OS); - return OS; -} - -/// Helper class for printing register units on a raw_ostream. +/// Create Printable object to print register units on a \ref raw_ostream. /// /// Register units are named after their root registers: /// @@ -968,54 +953,14 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { /// FP0~ST7 - Dual roots. /// /// Usage: OS << PrintRegUnit(Unit, TRI) << '\n'; -/// -class PrintRegUnit { -protected: - const TargetRegisterInfo *TRI; - unsigned Unit; -public: - PrintRegUnit(unsigned unit, const TargetRegisterInfo *tri) - : TRI(tri), Unit(unit) {} - void print(raw_ostream&) const; -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintRegUnit &PR) { - PR.print(OS); - return OS; -} +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); -/// It is often convenient to track virtual registers and -/// physical register units in the same list. -class PrintVRegOrUnit : protected PrintRegUnit { -public: - PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *tri) - : PrintRegUnit(VRegOrUnit, tri) {} - void print(raw_ostream&) const; -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, - const PrintVRegOrUnit &PR) { - PR.print(OS); - return OS; -} - -/// Helper class for printing lane masks. -/// -/// They are currently printed out as hexadecimal numbers. -/// Usage: OS << PrintLaneMask(Mask); -class PrintLaneMask { -protected: - LaneBitmask LaneMask; -public: - PrintLaneMask(LaneBitmask LaneMask) - : LaneMask(LaneMask) {} - void print(raw_ostream&) const; -}; +/// \brief Create Printable object to print virtual registers and physical +/// registers on a \ref raw_ostream. +Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintLaneMask &P) { - P.print(OS); - return OS; -} +/// Create Printable object to print LaneBitmasks on a \ref raw_ostream. +Printable PrintLaneMask(LaneBitmask LaneMask); } // End llvm namespace diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index afa98b26d727..f08d616e6812 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -805,33 +805,17 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } -namespace { -// A helper class for printing node and register info in a consistent way -class PrintNodeInfo { -public: - typedef PBQP::RegAlloc::PBQPRAGraph Graph; - typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; - - PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} - - void print(raw_ostream &OS) const { +/// Create Printable object for node and register info. +static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, + const PBQP::RegAlloc::PBQPRAGraph &G) { + return [NId, &G](raw_ostream &OS) { const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); unsigned VReg = G.getNodeMetadata(NId).getVReg(); const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; - } - -private: - const Graph &G; - NodeId NId; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { - PR.print(OS); - return OS; + }; } -} // anonymous namespace void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { for (auto NId : nodeIds()) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index d362f98d6464..8c3a0f2d81ec 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -369,25 +369,14 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } -namespace { -class PrintNodeId { - const SDNode &Node; -public: - explicit PrintNodeId(const SDNode &Node) - : Node(Node) {} - void print(raw_ostream &OS) const { +static Printable PrintNodeId(const SDNode &Node) { + return [&Node](raw_ostream &OS) { #ifndef NDEBUG OS << 't' << Node.PersistentId; #else OS << (const void*)&Node; #endif - } -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeId &P) { - P.print(OS); - return OS; -} + }; } void SDNode::dump() const { dump(nullptr); } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 0c4a3dcb226e..839d9ef31ad0 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -40,58 +40,71 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, TargetRegisterInfo::~TargetRegisterInfo() {} -void PrintReg::print(raw_ostream &OS) const { - if (!Reg) - OS << "%noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); - else if (TRI && Reg < TRI->getNumRegs()) - OS << '%' << TRI->getName(Reg); - else - OS << "%physreg" << Reg; - if (SubIdx) { - if (TRI) - OS << ':' << TRI->getSubRegIndexName(SubIdx); +namespace llvm { + +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx) { + return [Reg, TRI, SubIdx](raw_ostream &OS) { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); else - OS << ":sub(" << SubIdx << ')'; - } + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } + }; } -void PrintRegUnit::print(raw_ostream &OS) const { - // Generic printout when TRI is missing. - if (!TRI) { - OS << "Unit~" << Unit; - return; - } +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return [Unit, TRI](raw_ostream &OS) { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } - // Check for invalid register units. - if (Unit >= TRI->getNumRegUnits()) { - OS << "BadUnit~" << Unit; - return; - } + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } - // Normal units have at least one root. - MCRegUnitRootIterator Roots(Unit, TRI); - assert(Roots.isValid() && "Unit has no roots."); - OS << TRI->getName(*Roots); - for (++Roots; Roots.isValid(); ++Roots) - OS << '~' << TRI->getName(*Roots); + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); + }; } -void PrintVRegOrUnit::print(raw_ostream &OS) const { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); - return; - } - PrintRegUnit::print(OS); +Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return [Unit, TRI](raw_ostream &OS) { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + } else { + OS << PrintRegUnit(Unit, TRI); + } + }; } -void PrintLaneMask::print(raw_ostream &OS) const { - OS << format("%08X", LaneMask); +Printable PrintLaneMask(LaneBitmask LaneMask) { + return [LaneMask](raw_ostream &OS) { + OS << format("%08X", LaneMask); + }; } +} // End of llvm namespace + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 49ef400c5f2d..5b1dceca0bfc 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -264,6 +264,10 @@ raw_ostream &raw_ostream::operator<<(double N) { return this->operator<<(format("%e", N)); } +raw_ostream &raw_ostream::operator<<(Printable P) { + P(*this); + return *this; +} void raw_ostream::flush_nonempty() { From 68c9f5ec88b3d7679f09d6634c64e6f1d209f097 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 3 Dec 2015 22:45:19 +0000 Subject: [PATCH 029/364] [Analysis] Become aware of MSVC's new/delete functions The compiler can take advantage of the allocation/deallocation function's properties. We knew how to do this for Itanium but had no support for MSVC-style functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254656 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/TargetLibraryInfo.def | 81 +++++++++++++++++++ lib/Analysis/MemoryBuiltins.cpp | 24 +++++- .../InstCombine/malloc-free-delete.ll | 11 +++ 3 files changed, 114 insertions(+), 2 deletions(-) diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index 393e9d6695f8..7798e3c88248 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -27,6 +27,86 @@ #define TLI_DEFINE_STRING_INTERNAL(string_repr) string_repr, #endif +/// void *new(unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_int) +TLI_DEFINE_STRING_INTERNAL("??2@YAPAXI@Z") + +/// void *new(unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_int_nothrow) +TLI_DEFINE_STRING_INTERNAL("??2@YAPAXIABUnothrow_t@std@@@Z") + +/// void *new(unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong) +TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_K@Z") + +/// void *new(unsigned long long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong_nothrow) +TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_KAEBUnothrow_t@std@@@Z") + +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAX@Z") + +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_nothrow) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXABUnothrow_t@std@@@Z") + +/// void operator delete(void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_int) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXI@Z") + +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX@Z") + +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_nothrow) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAXAEBUnothrow_t@std@@@Z") + +/// void operator delete(void*, unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_longlong) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX_K@Z") + +/// void *new[](unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXI@Z") + +/// void *new[](unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXIABUnothrow_t@std@@@Z") + +/// void *new[](unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_K@Z") + +/// void *new[](unsigned long long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z") + +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAX@Z") + +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXABUnothrow_t@std@@@Z") + +/// void operator delete[](void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_int) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXI@Z") + +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX@Z") + +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAXAEBUnothrow_t@std@@@Z") + +/// void operator delete[](void*, unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_longlong) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX_K@Z") + /// int _IO_getc(_IO_FILE * __fp); TLI_DEFINE_ENUM_INTERNAL(under_IO_getc) TLI_DEFINE_STRING_INTERNAL("_IO_getc") @@ -673,6 +753,7 @@ TLI_DEFINE_STRING_INTERNAL("modff") /// long double modfl(long double value, long double *iptr); TLI_DEFINE_ENUM_INTERNAL(modfl) TLI_DEFINE_STRING_INTERNAL("modfl") + /// double nearbyint(double x); TLI_DEFINE_ENUM_INTERNAL(nearbyint) TLI_DEFINE_STRING_INTERNAL("nearbyint") diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index b4dce4941538..c64be771f1f0 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -62,6 +62,14 @@ static const AllocFnsTy AllocationFnData[] = { {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long) {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {LibFunc::msvc_new_int, OpNewLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::msvc_new_int_nothrow, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {LibFunc::msvc_new_longlong, OpNewLike, 1, 0, -1}, // new(unsigned long long) + {LibFunc::msvc_new_longlong_nothrow, MallocLike, 2, 0, -1}, // new(unsigned long long, nothrow) + {LibFunc::msvc_new_array_int, OpNewLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::msvc_new_array_int_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {LibFunc::msvc_new_array_longlong, OpNewLike, 1, 0, -1}, // new[](unsigned long long) + {LibFunc::msvc_new_array_longlong_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned long long, nothrow) {LibFunc::calloc, CallocLike, 2, 0, 1}, {LibFunc::realloc, ReallocLike, 2, 1, -1}, {LibFunc::reallocf, ReallocLike, 2, 1, -1}, @@ -308,14 +316,26 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { unsigned ExpectedNumParams; if (TLIFn == LibFunc::free || TLIFn == LibFunc::ZdlPv || // operator delete(void*) - TLIFn == LibFunc::ZdaPv) // operator delete[](void*) + TLIFn == LibFunc::ZdaPv || // operator delete[](void*) + TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*) + TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*) + TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*) + TLIFn == LibFunc::msvc_delete_array_ptr64) // operator delete[](void*) ExpectedNumParams = 1; else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint) TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong) TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint) TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong) - TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) + TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) + TLIFn == LibFunc::msvc_delete_ptr32_int || // delete(void*, uint) + TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong) + TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc::msvc_delete_array_ptr32_int || // delete[](void*, uint) + TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) + TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) + TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) ExpectedNumParams = 2; else return nullptr; diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll index 138001ace951..8fcb8214360d 100644 --- a/test/Transforms/InstCombine/malloc-free-delete.ll +++ b/test/Transforms/InstCombine/malloc-free-delete.ll @@ -186,3 +186,14 @@ define void @test8() { call void @_ZdaPvj(i8* %naj, i32 32) builtin ret void } + +declare noalias i8* @"\01??2@YAPEAX_K@Z"(i64) nobuiltin +declare void @"\01??3@YAXPEAX@Z"(i8*) nobuiltin + +; CHECK-LABEL: @test9( +define void @test9() { + ; CHECK-NOT: call + %new_long_long = call noalias i8* @"\01??2@YAPEAX_K@Z"(i64 32) builtin + call void @"\01??3@YAXPEAX@Z"(i8* %new_long_long) builtin + ret void +} From 2ffa666beca8a55cafb5f0bac88671fec7645626 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Thu, 3 Dec 2015 22:51:08 +0000 Subject: [PATCH 030/364] [CMake] set_target_properties doesn't append link flags This fixes a bug introduced in r254627, and another occurance of the same bug in this file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254657 91177308-0d34-0410-b5e6-96231b3b80d8 --- cmake/modules/AddLLVM.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 97ac96ed4281..e121da6786d4 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -667,7 +667,7 @@ function(export_executable_symbols target) if (NOT MSVC) # MSVC's linker doesn't support exporting all symbols. set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) if (APPLE) - set_target_properties(${target} PROPERTIES + set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-rdynamic") endif() endif() @@ -1185,8 +1185,8 @@ function(llvm_externalize_debuginfo name) OR CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE} MATCHES "-flto") set(lto_object ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${name}-lto.o) - set_target_properties(${name} PROPERTIES - LINK_FLAGS "-Wl,-object_path_lto -Wl,${lto_object}") + set_property(TARGET ${name} APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,-object_path_lto,${lto_object}") endif() add_custom_command(TARGET ${name} POST_BUILD COMMAND xcrun dsymutil $ From d8b19eb290bffe27ffcb92950162a15024ed6572 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Thu, 3 Dec 2015 22:55:36 +0000 Subject: [PATCH 031/364] [CMake] Fixing bots CMake calls to set_property with APPEND string need to have a leading space. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254659 91177308-0d34-0410-b5e6-96231b3b80d8 --- cmake/modules/AddLLVM.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index e121da6786d4..8cc8abbbc7de 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -668,7 +668,7 @@ function(export_executable_symbols target) set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) if (APPLE) set_property(TARGET ${target} APPEND_STRING PROPERTY - LINK_FLAGS "-rdynamic") + LINK_FLAGS " -rdynamic") endif() endif() endfunction() From add5c9809ba804ed03c7f289a29fccb158d27ae9 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 3 Dec 2015 23:00:28 +0000 Subject: [PATCH 032/364] Revert "raw_ostream: << operator for callables with raw_stream argument" This commit provoked "error C2593: 'operator <<' is ambiguous" on MSVC. This reverts commit r254655. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254661 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/raw_ostream.h | 14 --- include/llvm/Target/TargetRegisterInfo.h | 73 ++++++++++++-- lib/CodeGen/RegAllocPBQP.cpp | 26 ++++- .../SelectionDAG/SelectionDAGDumper.cpp | 17 +++- lib/CodeGen/TargetRegisterInfo.cpp | 95 ++++++++----------- lib/Support/raw_ostream.cpp | 4 - 6 files changed, 140 insertions(+), 89 deletions(-) diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index 38a96fa6ab74..e5cc40e7d6b2 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -17,14 +17,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" -#include #include namespace llvm { class format_object_base; class FormattedString; class FormattedNumber; -class raw_ostream; template class SmallVectorImpl; namespace sys { @@ -33,15 +31,6 @@ enum OpenFlags : unsigned; } } -/// Type of function that prints to raw_ostream. -/// -/// Typical usage: -/// Printable PrintFoo(Foo x) { -/// return [] (raw_ostream &os) { os << /* ... */; }; -/// } -/// os << "Foo: " << PrintFoo(foo) << '\n'; -typedef std::function Printable; - /// This class implements an extremely fast bulk output stream that can *only* /// output to a stream. It does not support seeking, reopening, rewinding, line /// buffered disciplines etc. It is a simple buffer that outputs @@ -214,9 +203,6 @@ class raw_ostream { raw_ostream &operator<<(double N); - /// IO manipulator, \see Printable. - raw_ostream &operator<<(Printable P); - /// Output \p N in hexadecimal, without any prefix or padding. raw_ostream &write_hex(unsigned long long N); diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index de6f46eba013..7d293fe82a6b 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -932,6 +932,7 @@ struct VirtReg2IndexFunctor : public std::unary_function { } }; +/// Helper class for printing registers on a raw_ostream. /// Prints virtual and physical registers with or without a TRI instance. /// /// The format is: @@ -942,10 +943,24 @@ struct VirtReg2IndexFunctor : public std::unary_function { /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << PrintReg(Reg, TRI) << '\n'; -Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, - unsigned SubRegIdx = 0); +/// +class PrintReg { + const TargetRegisterInfo *TRI; + unsigned Reg; + unsigned SubIdx; +public: + explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = nullptr, + unsigned subidx = 0) + : TRI(tri), Reg(reg), SubIdx(subidx) {} + void print(raw_ostream&) const; +}; -/// Create Printable object to print register units on a \ref raw_ostream. +static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { + PR.print(OS); + return OS; +} + +/// Helper class for printing register units on a raw_ostream. /// /// Register units are named after their root registers: /// @@ -953,14 +968,54 @@ Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, /// FP0~ST7 - Dual roots. /// /// Usage: OS << PrintRegUnit(Unit, TRI) << '\n'; -Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); +/// +class PrintRegUnit { +protected: + const TargetRegisterInfo *TRI; + unsigned Unit; +public: + PrintRegUnit(unsigned unit, const TargetRegisterInfo *tri) + : TRI(tri), Unit(unit) {} + void print(raw_ostream&) const; +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const PrintRegUnit &PR) { + PR.print(OS); + return OS; +} -/// \brief Create Printable object to print virtual registers and physical -/// registers on a \ref raw_ostream. -Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); +/// It is often convenient to track virtual registers and +/// physical register units in the same list. +class PrintVRegOrUnit : protected PrintRegUnit { +public: + PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *tri) + : PrintRegUnit(VRegOrUnit, tri) {} + void print(raw_ostream&) const; +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, + const PrintVRegOrUnit &PR) { + PR.print(OS); + return OS; +} + +/// Helper class for printing lane masks. +/// +/// They are currently printed out as hexadecimal numbers. +/// Usage: OS << PrintLaneMask(Mask); +class PrintLaneMask { +protected: + LaneBitmask LaneMask; +public: + PrintLaneMask(LaneBitmask LaneMask) + : LaneMask(LaneMask) {} + void print(raw_ostream&) const; +}; -/// Create Printable object to print LaneBitmasks on a \ref raw_ostream. -Printable PrintLaneMask(LaneBitmask LaneMask); +static inline raw_ostream &operator<<(raw_ostream &OS, const PrintLaneMask &P) { + P.print(OS); + return OS; +} } // End llvm namespace diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index f08d616e6812..afa98b26d727 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -805,17 +805,33 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } -/// Create Printable object for node and register info. -static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, - const PBQP::RegAlloc::PBQPRAGraph &G) { - return [NId, &G](raw_ostream &OS) { +namespace { +// A helper class for printing node and register info in a consistent way +class PrintNodeInfo { +public: + typedef PBQP::RegAlloc::PBQPRAGraph Graph; + typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; + + PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} + + void print(raw_ostream &OS) const { const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); unsigned VReg = G.getNodeMetadata(NId).getVReg(); const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; - }; + } + +private: + const Graph &G; + NodeId NId; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { + PR.print(OS); + return OS; } +} // anonymous namespace void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { for (auto NId : nodeIds()) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 8c3a0f2d81ec..d362f98d6464 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -369,14 +369,25 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } -static Printable PrintNodeId(const SDNode &Node) { - return [&Node](raw_ostream &OS) { +namespace { +class PrintNodeId { + const SDNode &Node; +public: + explicit PrintNodeId(const SDNode &Node) + : Node(Node) {} + void print(raw_ostream &OS) const { #ifndef NDEBUG OS << 't' << Node.PersistentId; #else OS << (const void*)&Node; #endif - }; + } +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeId &P) { + P.print(OS); + return OS; +} } void SDNode::dump() const { dump(nullptr); } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 839d9ef31ad0..0c4a3dcb226e 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -40,71 +40,58 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, TargetRegisterInfo::~TargetRegisterInfo() {} -namespace llvm { - -Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, - unsigned SubIdx) { - return [Reg, TRI, SubIdx](raw_ostream &OS) { - if (!Reg) - OS << "%noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); - else if (TRI && Reg < TRI->getNumRegs()) - OS << '%' << TRI->getName(Reg); +void PrintReg::print(raw_ostream &OS) const { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); + else + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); else - OS << "%physreg" << Reg; - if (SubIdx) { - if (TRI) - OS << ':' << TRI->getSubRegIndexName(SubIdx); - else - OS << ":sub(" << SubIdx << ')'; - } - }; + OS << ":sub(" << SubIdx << ')'; + } } -Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { - return [Unit, TRI](raw_ostream &OS) { - // Generic printout when TRI is missing. - if (!TRI) { - OS << "Unit~" << Unit; - return; - } +void PrintRegUnit::print(raw_ostream &OS) const { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } - // Check for invalid register units. - if (Unit >= TRI->getNumRegUnits()) { - OS << "BadUnit~" << Unit; - return; - } + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } - // Normal units have at least one root. - MCRegUnitRootIterator Roots(Unit, TRI); - assert(Roots.isValid() && "Unit has no roots."); - OS << TRI->getName(*Roots); - for (++Roots; Roots.isValid(); ++Roots) - OS << '~' << TRI->getName(*Roots); - }; + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); } -Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { - return [Unit, TRI](raw_ostream &OS) { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); - } else { - OS << PrintRegUnit(Unit, TRI); - } - }; +void PrintVRegOrUnit::print(raw_ostream &OS) const { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + return; + } + PrintRegUnit::print(OS); } -Printable PrintLaneMask(LaneBitmask LaneMask) { - return [LaneMask](raw_ostream &OS) { - OS << format("%08X", LaneMask); - }; +void PrintLaneMask::print(raw_ostream &OS) const { + OS << format("%08X", LaneMask); } -} // End of llvm namespace - /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 5b1dceca0bfc..49ef400c5f2d 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -264,10 +264,6 @@ raw_ostream &raw_ostream::operator<<(double N) { return this->operator<<(format("%e", N)); } -raw_ostream &raw_ostream::operator<<(Printable P) { - P(*this); - return *this; -} void raw_ostream::flush_nonempty() { From e94b2105e93b551b1b1bc9a009eb7c3c622ad5b7 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 3 Dec 2015 23:07:03 +0000 Subject: [PATCH 033/364] [WebAssembly] Fix dominance check for PHIs in the StoreResult pass When a block has no terminator instructions, getFirstTerminator() returns end(), which can't be used in dominance checks. Check dominance for phi operands separately. Also, remove some bits from WebAssemblyRegStackify.cpp that were causing trouble on the same testcase; they were left behind from an earlier experiment. Differential Revision: http://reviews.llvm.org/D15210 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254662 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyRegStackify.cpp | 9 ++-- .../WebAssembly/WebAssemblyStoreResults.cpp | 33 +++++++++----- test/CodeGen/WebAssembly/store-results.ll | 43 +++++++++++++++++++ 3 files changed, 69 insertions(+), 16 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index bdccc8577c5e..ecbbc5c72243 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -81,6 +81,7 @@ static void ImposeStackOrdering(MachineInstr *MI) { // more precise. static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, AliasAnalysis &AA) { + assert(Def->getParent() == Insert->getParent()); bool SawStore = false, SawSideEffects = false; MachineBasicBlock::const_iterator D(Def), I(Insert); for (--I; I != D; --I) @@ -155,17 +156,15 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { Def->getOpcode() == WebAssembly::ARGUMENT_F64) continue; - // Single-use expression trees require defs that have one use, or that - // they be trivially clonable. + // Single-use expression trees require defs that have one use. // TODO: Eventually we'll relax this, to take advantage of set_local // returning its result. if (!MRI.hasOneUse(Reg)) continue; - // For now, be conservative and don't look across block boundaries, - // unless we have something trivially clonable. + // For now, be conservative and don't look across block boundaries. // TODO: Be more aggressive. - if (Def->getParent() != &MBB && !Def->isMoveImmediate()) + if (Def->getParent() != &MBB) continue; // Don't move instructions that have side effects or memory dependencies diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index 3a7f50e3b142..4a8fc09878c4 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -10,9 +10,12 @@ /// \file /// \brief This file implements an optimization pass using store result values. /// -/// WebAssembly's store instructions return the stored value, specifically to -/// enable the optimization of reducing get_local/set_local traffic, which is -/// what we're doing here. +/// WebAssembly's store instructions return the stored value. This is to enable +/// an optimization wherein uses of the stored value can be replaced by uses of +/// the store's result value, making the stored value register more likely to +/// be single-use, thus more likely to be useful to register stackifying, and +/// potentially also exposing the store to register stackifying. These both can +/// reduce get_local/set_local traffic. /// //===----------------------------------------------------------------------===// @@ -89,14 +92,22 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { MachineOperand &O = *I++; MachineInstr *Where = O.getParent(); - if (Where->getOpcode() == TargetOpcode::PHI) - Where = Where->getOperand(&O - &Where->getOperand(0) + 1) - .getMBB() - ->getFirstTerminator(); - if (&MI == Where || !MDT.dominates(&MI, Where)) - continue; - DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << - " from " << MI <<"\n"); + if (Where->getOpcode() == TargetOpcode::PHI) { + // PHIs use their operands on their incoming CFG edges rather than + // in their parent blocks. Get the basic block paired with this use + // of FromReg and check that MI's block dominates it. + MachineBasicBlock *Pred = + Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); + if (!MDT.dominates(&MBB, Pred)) + continue; + } else { + // For a non-PHI, check that MI dominates the instruction in the + // normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + } + DEBUG(dbgs() << "Setting operand " << O << " in " << *Where + << " from " << MI << "\n"); O.setReg(ToReg); } } diff --git a/test/CodeGen/WebAssembly/store-results.ll b/test/CodeGen/WebAssembly/store-results.ll index 1bcee5d31fb7..c05ed3a04be3 100644 --- a/test/CodeGen/WebAssembly/store-results.ll +++ b/test/CodeGen/WebAssembly/store-results.ll @@ -16,3 +16,46 @@ entry: store i32 0, i32* %p ret i32 0 } + +; Test interesting corner cases for wasm-store-results, in which the operand of +; a store ends up getting used by a phi, which needs special handling in the +; dominance test, since phis use their operands on their incoming edges. + +%class.Vec3 = type { float, float, float } + +@pos = global %class.Vec3 zeroinitializer, align 4 + +; CHECK-LABEL: foo: +; CHECK: i32.store $discard=, $pop0, $0 +define void @foo() { +for.body.i: + br label %for.body5.i + +for.body5.i: + %i.0168.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body5.i ] + %conv6.i = sitofp i32 %i.0168.i to float + store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0) + %inc.i = add nuw nsw i32 %i.0168.i, 1 + %exitcond.i = icmp eq i32 %inc.i, 256 + br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i + +for.cond.cleanup4.i: + ret void +} + +; CHECK-LABEL: bar: +; CHECK: i32.store $discard=, $0, $pop0 +define void @bar() { +for.body.i: + br label %for.body5.i + +for.body5.i: + %i.0168.i = phi float [ 0.0, %for.body.i ], [ %inc.i, %for.body5.i ] + store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0) + %inc.i = fadd float %i.0168.i, 1.0 + %exitcond.i = fcmp oeq float %inc.i, 256.0 + br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i + +for.cond.cleanup4.i: + ret void +} From c5aa4e71ff52b61762ef2dfdab5ccba6a49fffe5 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Thu, 3 Dec 2015 23:28:35 +0000 Subject: [PATCH 034/364] AsmPrinter: Simplify emitting FP elements in sequential data. NFC Use APFloat APIs here Rather than manually type-punning through unions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254664 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 41 ++++++++++----------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 9ffd830a9f58..2cfea650872a 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1945,33 +1945,22 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i), ElementByteSize); } - } else if (ElementByteSize == 4) { - // FP Constants are printed as integer constants to avoid losing - // precision. - assert(CDS->getElementType()->isFloatTy()); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { - float F; - uint32_t I; - }; - - F = CDS->getElementAsFloat(i); - if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer->EmitIntValue(I, 4); - } } else { - assert(CDS->getElementType()->isDoubleTy()); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { - double F; - uint64_t I; - }; - - F = CDS->getElementAsDouble(i); - if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer->EmitIntValue(I, 8); + // FP Constants are printed as integer constants to avoid losing precision. + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + APFloat Num = CDS->getElementAsAPFloat(I); + if (AP.isVerbose()) { + if (ElementByteSize == 4) + AP.OutStreamer->GetCommentOS() << "float " << Num.convertToFloat() + << '\n'; + else if (ElementByteSize == 8) + AP.OutStreamer->GetCommentOS() << "double " << Num.convertToDouble() + << '\n'; + else + llvm_unreachable("Unexpected float width"); + } + AP.OutStreamer->EmitIntValue(Num.bitcastToAPInt().getLimitedValue(), + ElementByteSize); } } From 922e9787e69e0af586b7f5b370969f99d2f8ea99 Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Thu, 3 Dec 2015 23:43:56 +0000 Subject: [PATCH 035/364] CodeGen peephole: fold redundant phys reg copies Code generation often exposes redundant physical register copies through virtual registers such as: %vreg = COPY %PHYSREG ... %PHYSREG = COPY %vreg There are cases where no intervening clobber of %PHYSREG occurs, and the later copy could therefore be removed. In some cases this further allows us to remove the initial copy. This patch contains a motivating example which comes from the x86 build of Chrome, specifically cc::ResourceProvider::UnlockForRead uses libstdc++'s implementation of hash_map. That example has two tests live at the same time, and after machine sinking LLVM has confused itself enough and things spilling EFLAGS is a great idea even though it's never restored and the comparison results are both live. Before this patch we have: DEC32m %RIP, 1, %noreg, , %noreg, %EFLAGS %vreg1 = COPY %EFLAGS; GR64:%vreg1 %EFLAGS = COPY %vreg1; GR64:%vreg1 JNE_1 , %EFLAGS Both copies are useless. This patch tries to eliminate the later copy in a generic manner. dec is especially confusing to LLVM when compared with sub. I wrote this patch to treat all physical registers generically, but only remove redundant copies of non-allocatable physical registers because the allocatable ones caused issues (e.g. when calling conventions weren't properly modeled) and should be handled later by the register allocator anyways. The following tests used to failed when the patch also replaced allocatable registers: CodeGen/X86/StackColoring.ll CodeGen/X86/avx512-calling-conv.ll CodeGen/X86/copy-propagation.ll CodeGen/X86/inline-asm-fpstack.ll CodeGen/X86/musttail-varargs.ll CodeGen/X86/pop-stack-cleanup.ll CodeGen/X86/preserve_mostcc64.ll CodeGen/X86/tailcallstack64.ll CodeGen/X86/this-return-64.ll This happens because COPY has other special meaning for e.g. dependency breakage and x87 FP stack. Note that all other backends' tests pass. Reviewers: qcolombet Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15157 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254665 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 144 +++++++++++-- .../X86/peephole-na-phys-copy-folding.ll | 190 ++++++++++++++++++ 2 files changed, 322 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/X86/peephole-na-phys-copy-folding.ll diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 4fd1c4bda433..0fabc40b64e0 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -98,6 +98,10 @@ static cl::opt DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); +static cl::opt DisableNAPhysCopyOpt( + "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), + cl::desc("Disable non-allocatable physical register copy optimization")); + // Limit the number of PHI instructions to process // in PeepholeOptimizer::getNextSource. static cl::opt RewritePHILimit( @@ -111,6 +115,7 @@ STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); +STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { class ValueTrackerResult; @@ -162,12 +167,24 @@ namespace { DenseMap &ImmDefMIs); /// \brief If copy instruction \p MI is a virtual register copy, track it in - /// the set \p CopiedFromRegs and \p CopyMIs. If this virtual register was + /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was /// previously seen as a copy, replace the uses of this copy with the /// previously seen copy's destination register. bool foldRedundantCopy(MachineInstr *MI, - SmallSet &CopiedFromRegs, - DenseMap &CopyMIs); + SmallSet &CopySrcRegs, + DenseMap &CopyMIs); + + /// \brief Is the register \p Reg a non-allocatable physical register? + bool isNAPhysCopy(unsigned Reg); + + /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical + /// register copy, track it in the \p NAPhysToVirtMIs map. If this + /// non-allocatable physical register was previously copied to a virtual + /// registered and hasn't been clobbered, the virt->phys copy can be + /// deleted. + bool foldRedundantNAPhysCopy( + MachineInstr *MI, + DenseMap &NAPhysToVirtMIs); bool isLoadFoldable(MachineInstr *MI, SmallSet &FoldAsLoadDefCandidates); @@ -1332,7 +1349,7 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, if (ImmDefRegs.count(Reg) == 0) continue; DenseMap::iterator II = ImmDefMIs.find(Reg); - assert(II != ImmDefMIs.end()); + assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { ++NumImmFold; return true; @@ -1356,10 +1373,10 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, // // Should replace %vreg2 uses with %vreg1:sub1 bool PeepholeOptimizer::foldRedundantCopy( - MachineInstr *MI, - SmallSet &CopySrcRegs, - DenseMap &CopyMIs) { - assert(MI->isCopy()); + MachineInstr *MI, + SmallSet &CopySrcRegs, + DenseMap &CopyMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); unsigned SrcReg = MI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) @@ -1400,6 +1417,59 @@ bool PeepholeOptimizer::foldRedundantCopy( return true; } +bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { + return TargetRegisterInfo::isPhysicalRegister(Reg) && + !MRI->isAllocatable(Reg); +} + +bool PeepholeOptimizer::foldRedundantNAPhysCopy( + MachineInstr *MI, DenseMap &NAPhysToVirtMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); + + if (DisableNAPhysCopyOpt) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + // %vreg = COPY %PHYSREG + // Avoid using a datastructure which can track multiple live non-allocatable + // phys->virt copies since LLVM doesn't seem to do this. + NAPhysToVirtMIs.insert({SrcReg, MI}); + return false; + } + + if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + return false; + + // %PHYSREG = COPY %vreg + auto PrevCopy = NAPhysToVirtMIs.find(DstReg); + if (PrevCopy == NAPhysToVirtMIs.end()) { + // We can't remove the copy: there was an intervening clobber of the + // non-allocatable physical register after the copy to virtual. + DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI + << '\n'); + return false; + } + + unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + if (PrevDstReg == SrcReg) { + // Remove the virt->phys copy: we saw the virtual register definition, and + // the non-allocatable physical register's state hasn't changed since then. + DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n'); + ++NumNAPhysCopies; + return true; + } + + // Potential missed optimization opportunity: we saw a different virtual + // register get a copy of the non-allocatable physical register, and we only + // track one such copy. Avoid getting confused by this new non-allocatable + // physical register definition, and remove it from the tracked copies. + DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n'); + NAPhysToVirtMIs.erase(PrevCopy); + return false; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; @@ -1433,6 +1503,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DenseMap ImmDefMIs; SmallSet FoldAsLoadDefCandidates; + // Track when a non-allocatable physical register is copied to a virtual + // register so that useless moves can be removed. + // + // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` + // without any intervening re-definition of %PHYSREG. + DenseMap NAPhysToVirtMIs; + // Set of virtual registers that are copied from. SmallSet CopySrcRegs; DenseMap CopySrcMIs; @@ -1453,10 +1530,51 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isLoadFoldBarrier()) FoldAsLoadDefCandidates.clear(); - if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || - MI->hasUnmodeledSideEffects()) + if (MI->isPosition() || MI->isPHI()) + continue; + + if (!MI->isCopy()) { + for (const auto &Op : MI->operands()) { + // Visit all operands: definitions can be implicit or explicit. + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Op.isDef() && isNAPhysCopy(Reg)) { + const auto &Def = NAPhysToVirtMIs.find(Reg); + if (Def != NAPhysToVirtMIs.end()) { + // A new definition of the non-allocatable physical register + // invalidates previous copies. + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } else if (Op.isRegMask()) { + const uint32_t *RegMask = Op.getRegMask(); + for (auto &RegMI : NAPhysToVirtMIs) { + unsigned Def = RegMI.first; + if (MachineOperand::clobbersPhysReg(RegMask, Def)) { + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } + } + } + + if (MI->isImplicitDef() || MI->isKill()) + continue; + + if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { + // Blow away all non-allocatable physical registers knowledge since we + // don't know what's correct anymore. + // + // FIXME: handle explicit asm clobbers. + DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI + << '\n'); + NAPhysToVirtMIs.clear(); continue; + } if ((isUncoalescableCopy(*MI) && optimizeUncoalescableCopy(MI, LocalMIs)) || @@ -1479,7 +1597,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI->isCopy() && foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs)) { + if (MI->isCopy() && + (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || + foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) { LocalMIs.erase(MI); MI->eraseFromParent(); Changed = true; diff --git a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll new file mode 100644 index 000000000000..438bf8ddf4c7 --- /dev/null +++ b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -0,0 +1,190 @@ +; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s + +; The peephole optimizer can elide some physical register copies such as +; EFLAGS. Make sure the flags are used directly, instead of needlessly using +; lahf, when possible. + +@L = external global i32 +@M = external global i8 +declare i32 @bar(i64) + +; CHECK-LABEL: plus_one +; CHECK-NOT: seto +; CHECK-NOT: lahf +; CHECK-NOT: sahf +; CHECK-NOT: pushf +; CHECK-NOT: popf +; CHECK: incl L +define i1 @plus_one() { +entry: + %loaded_L = load i32, i32* @L + %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc. + store i32 %val, i32* @L + %loaded_M = load i8, i8* @M + %masked = and i8 %loaded_M, 8 + %M_is_true = icmp ne i8 %masked, 0 + %L_is_false = icmp eq i32 %val, 0 + %cond = and i1 %L_is_false, %M_is_true + br i1 %cond, label %exit2, label %exit + +exit: + ret i1 true + +exit2: + ret i1 false +} + +; CHECK-LABEL: plus_forty_two +; CHECK-NOT: seto +; CHECK-NOT: lahf +; CHECK-NOT: sahf +; CHECK-NOT: pushf +; CHECK-NOT: popf +; CHECK: addl $42, +define i1 @plus_forty_two() { +entry: + %loaded_L = load i32, i32* @L + %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc. + store i32 %val, i32* @L + %loaded_M = load i8, i8* @M + %masked = and i8 %loaded_M, 8 + %M_is_true = icmp ne i8 %masked, 0 + %L_is_false = icmp eq i32 %val, 0 + %cond = and i1 %L_is_false, %M_is_true + br i1 %cond, label %exit2, label %exit + +exit: + ret i1 true + +exit2: + ret i1 false +} + +; CHECK-LABEL: minus_one +; CHECK-NOT: seto +; CHECK-NOT: lahf +; CHECK-NOT: sahf +; CHECK-NOT: pushf +; CHECK-NOT: popf +; CHECK: decl L +define i1 @minus_one() { +entry: + %loaded_L = load i32, i32* @L + %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec. + store i32 %val, i32* @L + %loaded_M = load i8, i8* @M + %masked = and i8 %loaded_M, 8 + %M_is_true = icmp ne i8 %masked, 0 + %L_is_false = icmp eq i32 %val, 0 + %cond = and i1 %L_is_false, %M_is_true + br i1 %cond, label %exit2, label %exit + +exit: + ret i1 true + +exit2: + ret i1 false +} + +; CHECK-LABEL: minus_forty_two +; CHECK-NOT: seto +; CHECK-NOT: lahf +; CHECK-NOT: sahf +; CHECK-NOT: pushf +; CHECK-NOT: popf +; CHECK: addl $-42, +define i1 @minus_forty_two() { +entry: + %loaded_L = load i32, i32* @L + %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec. + store i32 %val, i32* @L + %loaded_M = load i8, i8* @M + %masked = and i8 %loaded_M, 8 + %M_is_true = icmp ne i8 %masked, 0 + %L_is_false = icmp eq i32 %val, 0 + %cond = and i1 %L_is_false, %M_is_true + br i1 %cond, label %exit2, label %exit + +exit: + ret i1 true + +exit2: + ret i1 false +} + +; CHECK-LABEL: test_intervening_call: +; CHECK: cmpxchg +; CHECK: seto %al +; CHECK-NEXT: lahf +; CHECK: call{{[lq]}} bar +; CHECK: addb $127, %al +; CHECK-NEXT: sahf +define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) { + ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS. + %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst + %v = extractvalue { i64, i1 } %cx, 0 + %p = extractvalue { i64, i1 } %cx, 1 + call i32 @bar(i64 %v) + br i1 %p, label %t, label %f + +t: + ret i64 42 + +f: + ret i64 0 +} + +; CHECK-LABEL: test_two_live_flags: +; CHECK: cmpxchg +; CHECK-NEXT: seto %al +; CHECK-NEXT: lahf +; Save result of the first cmpxchg into D. +; CHECK-NEXT: mov{{[lq]}} %[[AX:[er]ax]], %[[D:[re]d[xi]]] +; CHECK: cmpxchg +; CHECK-NEXT: sete %al +; Save result of the second cmpxchg onto the stack. +; CHECK-NEXT: push{{[lq]}} %[[AX]] +; Restore result of the first cmpxchg from D, put it back in EFLAGS. +; CHECK-NEXT: mov{{[lq]}} %[[D]], %[[AX]] +; CHECK-NEXT: addb $127, %al +; CHECK-NEXT: sahf +; Restore result of the second cmpxchg from the stack. +; CHECK-NEXT: pop{{[lq]}} %[[AX]] +; Test from EFLAGS restored from first cmpxchg, jump if that fails. +; CHECK-NEXT: jne +; Fallthrough to test the second cmpxchg's result. +; CHECK: testb %al, %al +; CHECK-NEXT: je +define i64 @test_two_live_flags( + i64* %foo0, i64 %bar0, i64 %baz0, + i64* %foo1, i64 %bar1, i64 %baz1) { + %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst + %p0 = extractvalue { i64, i1 } %cx0, 1 + %cx1 = cmpxchg i64* %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst + %p1 = extractvalue { i64, i1 } %cx1, 1 + %flag = and i1 %p0, %p1 + br i1 %flag, label %t, label %f + +t: + ret i64 42 + +f: + ret i64 0 +} + +; CHECK-LABEL: asm_clobbering_flags: +; CHECK: test +; CHECK-NEXT: setg +; CHECK-NEXT: #APP +; CHECK-NEXT: bsfl +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movl +; CHECK-NEXT: ret +define i1 @asm_clobbering_flags(i32* %mem) { + %val = load i32, i32* %mem, align 4 + %cmp = icmp sgt i32 %val, 0 + %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val) + store i32 %res, i32* %mem, align 4 + ret i1 %cmp +} From 34254063b545bbbd722e3a7fba029b3e3653762a Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 3 Dec 2015 23:56:42 +0000 Subject: [PATCH 036/364] Simplify since this function never fails. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254667 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/LTO/LTOModule.h | 2 +- lib/LTO/LTOModule.cpp | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/include/llvm/LTO/LTOModule.h b/include/llvm/LTO/LTOModule.h index c3e86afe1d82..83a523613a76 100644 --- a/include/llvm/LTO/LTOModule.h +++ b/include/llvm/LTO/LTOModule.h @@ -170,7 +170,7 @@ struct LTOModule { /// Parse the symbols from the module and model-level ASM and add them to /// either the defined or undefined lists. - bool parseSymbols(std::string &errMsg); + void parseSymbols(); /// Add a symbol which isn't defined just yet to a list to be resolved later. void addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index d28563c23b80..42a568b54c7b 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -252,11 +252,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, else Ret = new LTOModule(std::move(IRObj), target); - if (Ret->parseSymbols(errMsg)) { - delete Ret; - return nullptr; - } - + Ret->parseSymbols(); Ret->parseMetadata(); return Ret; @@ -592,9 +588,7 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, info.symbol = decl; } -/// parseSymbols - Parse the symbols from the module and model-level ASM and add -/// them to either the defined or undefined lists. -bool LTOModule::parseSymbols(std::string &errMsg) { +void LTOModule::parseSymbols() { for (auto &Sym : IRFile->symbols()) { const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); uint32_t Flags = Sym.getFlags(); @@ -649,8 +643,6 @@ bool LTOModule::parseSymbols(std::string &errMsg) { NameAndAttributes info = u->getValue(); _symbols.push_back(info); } - - return false; } /// parseMetadata - Parse metadata from the module From 7008dd751d0660fee014bb2fcc31c92eb540faa4 Mon Sep 17 00:00:00 2001 From: "Kevin B. Smith" Date: Fri, 4 Dec 2015 00:00:10 +0000 Subject: [PATCH 037/364] [CodeGen] Minor correction to comment on PhysRegInfo. Differential revision: http://reviews.llvm.org/D15216 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254668 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineInstrBundle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h index 0ce42dda55bd..4ec3c189ae03 100644 --- a/include/llvm/CodeGen/MachineInstrBundle.h +++ b/include/llvm/CodeGen/MachineInstrBundle.h @@ -174,7 +174,7 @@ class MachineOperandIteratorBase { /// Defines - Reg or a super-register is defined. bool Defines; - /// Reads - Read or a super-register is read. + /// Reads - Reg or a super-register is read. bool Reads; /// ReadsOverlap - Reg or an overlapping register is read. From b7250858d96b8ce567681214273ac0e62713c661 Mon Sep 17 00:00:00 2001 From: Nathan Slingerland Date: Fri, 4 Dec 2015 00:00:20 +0000 Subject: [PATCH 038/364] [llvm-profdata] Add support for weighted merge of profile data This change adds support for an optional weight when merging profile data with the llvm-profdata tool. Weights are specified by adding an option ':' suffix to the input file names. Adding support for arbitrary weighting of input profile data allows for relative importance to be placed on the input data from multiple training runs. Both sampled and instrumented profiles are supported. Reviewers: dnovillo, bogner, davidxl Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D14547 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254669 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CommandGuide/llvm-profdata.rst | 6 +- include/llvm/ProfileData/InstrProf.h | 31 ++++++-- include/llvm/ProfileData/InstrProfWriter.h | 4 +- include/llvm/ProfileData/SampleProf.h | 54 +++++++++----- lib/ProfileData/InstrProfWriter.cpp | 14 +++- .../Inputs/weight-instr-bar.profdata | Bin 0 -> 1320 bytes .../Inputs/weight-instr-foo.profdata | Bin 0 -> 1320 bytes .../Inputs/weight-sample-bar.proftext | 8 ++ .../Inputs/weight-sample-foo.proftext | 8 ++ test/tools/llvm-profdata/weight-instr.test | 55 ++++++++++++++ test/tools/llvm-profdata/weight-sample.test | 43 +++++++++++ tools/llvm-profdata/llvm-profdata.cpp | 70 ++++++++++++++---- unittests/ProfileData/InstrProfTest.cpp | 20 +++++ 13 files changed, 266 insertions(+), 47 deletions(-) create mode 100644 test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata create mode 100644 test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata create mode 100644 test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext create mode 100644 test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext create mode 100644 test/tools/llvm-profdata/weight-instr.test create mode 100644 test/tools/llvm-profdata/weight-sample.test diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst index 210826a7babc..a4b18f301e42 100644 --- a/docs/CommandGuide/llvm-profdata.rst +++ b/docs/CommandGuide/llvm-profdata.rst @@ -28,7 +28,7 @@ MERGE SYNOPSIS ^^^^^^^^ -:program:`llvm-profdata merge` [*options*] [*filenames...*] +:program:`llvm-profdata merge` [*options*] [*filename[:weight]...*] DESCRIPTION ^^^^^^^^^^^ @@ -37,6 +37,10 @@ DESCRIPTION generated by PGO instrumentation and merges them together into a single indexed profile data file. +The profile counts in each input file can be scaled (multiplied) by specifying +``:``, where `` is a decimal integer >= 1. +A default weight of 1 is assumed if only `` is given. + OPTIONS ^^^^^^^ diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index 956485119102..e1ed2e9ce48c 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -218,7 +218,8 @@ struct InstrProfValueSiteRecord { } /// Merge data from another InstrProfValueSiteRecord - void mergeValueData(InstrProfValueSiteRecord &Input) { + /// Optionally scale merged counts by \p Weight. + void mergeValueData(InstrProfValueSiteRecord &Input, uint64_t Weight = 1) { this->sortByTargetValues(); Input.sortByTargetValues(); auto I = ValueData.begin(); @@ -228,7 +229,11 @@ struct InstrProfValueSiteRecord { while (I != IE && I->Value < J->Value) ++I; if (I != IE && I->Value == J->Value) { - I->Count = SaturatingAdd(I->Count, J->Count); + // TODO: Check for counter overflow and return error if it occurs. + uint64_t JCount = J->Count; + if (Weight > 1) + JCount = SaturatingMultiply(JCount, Weight); + I->Count = SaturatingAdd(I->Count, JCount); ++I; continue; } @@ -274,7 +279,8 @@ struct InstrProfRecord { ValueMapType *HashKeys); /// Merge the counts in \p Other into this one. - inline instrprof_error merge(InstrProfRecord &Other); + /// Optionally scale merged counts by \p Weight. + inline instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1); /// Used by InstrProfWriter: update the value strings to commoned strings in /// the writer instance. @@ -326,7 +332,9 @@ struct InstrProfRecord { } // Merge Value Profile data from Src record to this record for ValueKind. - instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src) { + // Scale merged value counts by \p Weight. + instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, + uint64_t Weight) { uint32_t ThisNumValueSites = getNumValueSites(ValueKind); uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); if (ThisNumValueSites != OtherNumValueSites) @@ -336,7 +344,7 @@ struct InstrProfRecord { std::vector &OtherSiteRecords = Src.getValueSitesForKind(ValueKind); for (uint32_t I = 0; I < ThisNumValueSites; I++) - ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I]); + ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight); return instrprof_error::success; } }; @@ -422,7 +430,8 @@ void InstrProfRecord::updateStrings(InstrProfStringTable *StrTab) { VData.Value = (uint64_t)StrTab->insertString((const char *)VData.Value); } -instrprof_error InstrProfRecord::merge(InstrProfRecord &Other) { +instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, + uint64_t Weight) { // If the number of counters doesn't match we either have bad data // or a hash collision. if (Counts.size() != Other.Counts.size()) @@ -432,13 +441,19 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other) { for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { bool ResultOverflowed; - Counts[I] = SaturatingAdd(Counts[I], Other.Counts[I], ResultOverflowed); + uint64_t OtherCount = Other.Counts[I]; + if (Weight > 1) { + OtherCount = SaturatingMultiply(OtherCount, Weight, ResultOverflowed); + if (ResultOverflowed) + Result = instrprof_error::counter_overflow; + } + Counts[I] = SaturatingAdd(Counts[I], OtherCount, ResultOverflowed); if (ResultOverflowed) Result = instrprof_error::counter_overflow; } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { - instrprof_error MergeValueResult = mergeValueProfData(Kind, Other); + instrprof_error MergeValueResult = mergeValueProfData(Kind, Other, Weight); if (MergeValueResult != instrprof_error::success) Result = MergeValueResult; } diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h index d026e08ec861..1958d5f232e7 100644 --- a/include/llvm/ProfileData/InstrProfWriter.h +++ b/include/llvm/ProfileData/InstrProfWriter.h @@ -39,8 +39,8 @@ class InstrProfWriter { void updateStringTableReferences(InstrProfRecord &I); /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is - /// summed. - std::error_code addRecord(InstrProfRecord &&I); + /// summed. Optionally scale counts by \p Weight. + std::error_code addRecord(InstrProfRecord &&I, uint64_t Weight = 1); /// Write the profile to \c OS void write(raw_fd_ostream &OS); /// Write the profile in text format to \c OS diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index a7b22c735480..3337f4d7df5c 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -173,19 +173,25 @@ class SampleRecord { SampleRecord() : NumSamples(0), CallTargets() {} /// Increment the number of samples for this record by \p S. + /// Optionally scale sample count \p S by \p Weight. /// /// Sample counts accumulate using saturating arithmetic, to avoid wrapping /// around unsigned integers. - void addSamples(uint64_t S) { + void addSamples(uint64_t S, uint64_t Weight = 1) { + if (Weight > 1) + S = SaturatingMultiply(S, Weight); NumSamples = SaturatingAdd(NumSamples, S); } /// Add called function \p F with samples \p S. + /// Optionally scale sample count \p S by \p Weight. /// /// Sample counts accumulate using saturating arithmetic, to avoid wrapping /// around unsigned integers. - void addCalledTarget(StringRef F, uint64_t S) { + void addCalledTarget(StringRef F, uint64_t S, uint64_t Weight = 1) { uint64_t &TargetSamples = CallTargets[F]; + if (Weight > 1) + S = SaturatingMultiply(S, Weight); TargetSamples = SaturatingAdd(TargetSamples, S); } @@ -196,10 +202,11 @@ class SampleRecord { const CallTargetMap &getCallTargets() const { return CallTargets; } /// Merge the samples in \p Other into this record. - void merge(const SampleRecord &Other) { - addSamples(Other.getSamples()); + /// Optionally scale sample counts by \p Weight. + void merge(const SampleRecord &Other, uint64_t Weight = 1) { + addSamples(Other.getSamples(), Weight); for (const auto &I : Other.getCallTargets()) - addCalledTarget(I.first(), I.second); + addCalledTarget(I.first(), I.second, Weight); } void print(raw_ostream &OS, unsigned Indent) const; @@ -226,16 +233,26 @@ class FunctionSamples { FunctionSamples() : TotalSamples(0), TotalHeadSamples(0) {} void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; void dump() const; - void addTotalSamples(uint64_t Num) { TotalSamples += Num; } - void addHeadSamples(uint64_t Num) { TotalHeadSamples += Num; } - void addBodySamples(uint32_t LineOffset, uint32_t Discriminator, - uint64_t Num) { - BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(Num); + void addTotalSamples(uint64_t Num, uint64_t Weight = 1) { + if (Weight > 1) + Num = SaturatingMultiply(Num, Weight); + TotalSamples += Num; + } + void addHeadSamples(uint64_t Num, uint64_t Weight = 1) { + if (Weight > 1) + Num = SaturatingMultiply(Num, Weight); + TotalHeadSamples += Num; + } + void addBodySamples(uint32_t LineOffset, uint32_t Discriminator, uint64_t Num, + uint64_t Weight = 1) { + BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(Num, + Weight); } void addCalledTargetSamples(uint32_t LineOffset, uint32_t Discriminator, - std::string FName, uint64_t Num) { - BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(FName, - Num); + std::string FName, uint64_t Num, + uint64_t Weight = 1) { + BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( + FName, Num, Weight); } /// Return the number of samples collected at the given location. @@ -284,18 +301,19 @@ class FunctionSamples { } /// Merge the samples in \p Other into this one. - void merge(const FunctionSamples &Other) { - addTotalSamples(Other.getTotalSamples()); - addHeadSamples(Other.getHeadSamples()); + /// Optionally scale samples by \p Weight. + void merge(const FunctionSamples &Other, uint64_t Weight = 1) { + addTotalSamples(Other.getTotalSamples(), Weight); + addHeadSamples(Other.getHeadSamples(), Weight); for (const auto &I : Other.getBodySamples()) { const LineLocation &Loc = I.first; const SampleRecord &Rec = I.second; - BodySamples[Loc].merge(Rec); + BodySamples[Loc].merge(Rec, Weight); } for (const auto &I : Other.getCallsiteSamples()) { const CallsiteLocation &Loc = I.first; const FunctionSamples &Rec = I.second; - functionSamplesAt(Loc).merge(Rec); + functionSamplesAt(Loc).merge(Rec, Weight); } } diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 78bec012eeb2..2261c92f03a9 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -98,7 +98,8 @@ void InstrProfWriter::updateStringTableReferences(InstrProfRecord &I) { I.updateStrings(&StringTable); } -std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I) { +std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, + uint64_t Weight) { updateStringTableReferences(I); auto &ProfileDataMap = FunctionData[I.Name]; @@ -113,9 +114,18 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I) { // We've never seen a function with this name and hash, add it. Dest = std::move(I); Result = instrprof_error::success; + if (Weight > 1) { + for (auto &Count : Dest.Counts) { + bool Overflowed; + Count = SaturatingMultiply(Count, Weight, Overflowed); + if (Overflowed && Result == instrprof_error::success) { + Result = instrprof_error::counter_overflow; + } + } + } } else { // We're updating a function we've seen before. - Result = Dest.merge(I); + Result = Dest.merge(I, Weight); } // We keep track of the max function count as we go for simplicity. diff --git a/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata b/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata new file mode 100644 index 0000000000000000000000000000000000000000..4ed07660f654090e750b19be4e0af609bc1c61db GIT binary patch literal 1320 zcmeyLQ&5zjmf6V600ExHYmK2yFeL$%U}Tt_rlBWzFff!ADy;yeON$fJQ=x1IMi>K1 zb3kcEhBbR7Zu=bi5d*Wx04kG~pWnpK1 zb3kcEhBbR7Zu=bi5d*Wx04kG~pWnpjI*ICuRlWs>LPl7M$03g{ F3;+VlD0u(? literal 0 HcmV?d00001 diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext new file mode 100644 index 000000000000..a910f745e6c7 --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext @@ -0,0 +1,8 @@ +bar:1772037:35370 + 17: 35370 + 18: 35370 + 19: 7005 + 20: 29407 + 21: 12170 + 23: 18150 bar:19829 + 25: 36666 diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext new file mode 100644 index 000000000000..155ec5d00315 --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext @@ -0,0 +1,8 @@ +foo:1763288:35327 + 7: 35327 + 8: 35327 + 9: 6930 + 10: 29341 + 11: 11906 + 13: 18185 foo:19531 + 15: 36458 diff --git a/test/tools/llvm-profdata/weight-instr.test b/test/tools/llvm-profdata/weight-instr.test new file mode 100644 index 000000000000..bc0b5061647f --- /dev/null +++ b/test/tools/llvm-profdata/weight-instr.test @@ -0,0 +1,55 @@ +Tests for weighted merge of instrumented profiles. + +1- Merge the foo and bar profiles with unity weight and verify the combined output +RUN: llvm-profdata merge --instr %p/Inputs/weight-instr-bar.profdata:1 %p/Inputs/weight-instr-foo.profdata:1 -o %t +RUN: llvm-profdata show --instr -all-functions %t | FileCheck %s --check-prefix=WEIGHT1 +WEIGHT1: Counters: +WEIGHT1: usage: +WEIGHT1: Hash: 0x0000000000000000 +WEIGHT1: Counters: 1 +WEIGHT1: Function count: 0 +WEIGHT1: foo: +WEIGHT1: Hash: 0x000000000000028a +WEIGHT1: Counters: 3 +WEIGHT1: Function count: 866988873 +WEIGHT1: bar: +WEIGHT1: Hash: 0x000000000000028a +WEIGHT1: Counters: 3 +WEIGHT1: Function count: 866988873 +WEIGHT1: main: +WEIGHT1: Hash: 0x7d31c47ea98f8248 +WEIGHT1: Counters: 60 +WEIGHT1: Function count: 2 +WEIGHT1: Functions shown: 4 +WEIGHT1: Total functions: 4 +WEIGHT1: Maximum function count: 866988873 +WEIGHT1: Maximum internal block count: 267914296 + +2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output +RUN: llvm-profdata merge --instr %p/Inputs/weight-instr-bar.profdata:3 %p/Inputs/weight-instr-foo.profdata:5 -o %t +RUN: llvm-profdata show --instr -all-functions %t | FileCheck %s --check-prefix=WEIGHT2 +WEIGHT2: Counters: +WEIGHT2: usage: +WEIGHT2: Hash: 0x0000000000000000 +WEIGHT2: Counters: 1 +WEIGHT2: Function count: 0 +WEIGHT2: foo: +WEIGHT2: Hash: 0x000000000000028a +WEIGHT2: Counters: 3 +WEIGHT2: Function count: 4334944365 +WEIGHT2: bar: +WEIGHT2: Hash: 0x000000000000028a +WEIGHT2: Counters: 3 +WEIGHT2: Function count: 2600966619 +WEIGHT2: main: +WEIGHT2: Hash: 0x7d31c47ea98f8248 +WEIGHT2: Counters: 60 +WEIGHT2: Function count: 8 +WEIGHT2: Functions shown: 4 +WEIGHT2: Total functions: 4 +WEIGHT2: Maximum function count: 4334944365 +WEIGHT2: Maximum internal block count: 1339571480 + +3- Bad merge: foo and bar profiles with invalid weights +RUN: not llvm-profdata merge --instr %p/Inputs/weight-instr-bar.profdata:3 %p/Inputs/weight-instr-foo.profdata:-5 -o %t.out 2>&1 | FileCheck %s --check-prefix=ERROR3 +ERROR3: error: Input weight must be a positive integer. diff --git a/test/tools/llvm-profdata/weight-sample.test b/test/tools/llvm-profdata/weight-sample.test new file mode 100644 index 000000000000..a1fe1df1b6de --- /dev/null +++ b/test/tools/llvm-profdata/weight-sample.test @@ -0,0 +1,43 @@ +Tests for weighted merge of sample profiles. + +1- Merge the foo and bar profiles with unity weight and verify the combined output +RUN: llvm-profdata merge --sample --text %p/Inputs/weight-sample-bar.proftext:1 %p/Inputs/weight-sample-foo.proftext:1 -o - | FileCheck %s --check-prefix=WEIGHT1 +WEIGHT1: foo:1763288:35327 +WEIGHT1: 7: 35327 +WEIGHT1: 8: 35327 +WEIGHT1: 9: 6930 +WEIGHT1: 10: 29341 +WEIGHT1: 11: 11906 +WEIGHT1: 13: 18185 foo:19531 +WEIGHT1: 15: 36458 +WEIGHT1: bar:1772037:35370 +WEIGHT1: 17: 35370 +WEIGHT1: 18: 35370 +WEIGHT1: 19: 7005 +WEIGHT1: 20: 29407 +WEIGHT1: 21: 12170 +WEIGHT1: 23: 18150 bar:19829 +WEIGHT1: 25: 36666 + +2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output +RUN: llvm-profdata merge --sample --text %p/Inputs/weight-sample-bar.proftext:3 %p/Inputs/weight-sample-foo.proftext:5 -o - | FileCheck %s --check-prefix=WEIGHT2 +WEIGHT2: foo:8816440:176635 +WEIGHT2: 7: 176635 +WEIGHT2: 8: 176635 +WEIGHT2: 9: 34650 +WEIGHT2: 10: 146705 +WEIGHT2: 11: 59530 +WEIGHT2: 13: 90925 foo:97655 +WEIGHT2: 15: 182290 +WEIGHT2: bar:5316111:106110 +WEIGHT2: 17: 106110 +WEIGHT2: 18: 106110 +WEIGHT2: 19: 21015 +WEIGHT2: 20: 88221 +WEIGHT2: 21: 36510 +WEIGHT2: 23: 54450 bar:59487 +WEIGHT2: 25: 109998 + +3- Bad merge: foo and bar profiles with invalid weights +RUN: not llvm-profdata merge --sample --text %p/Inputs/weight-sample-bar.proftext:3 %p/Inputs/weight-sample-foo.proftext:-5 -o %t.out 2>&1 | FileCheck %s --check-prefix=ERROR3 +ERROR3: error: Input weight must be a positive integer. diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index 10b6855233d5..56c80f518ec4 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/LLVMContext.h" #include "llvm/ProfileData/InstrProfReader.h" @@ -27,6 +28,7 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -93,7 +95,17 @@ static void handleMergeWriterError(std::error_code &Error, } } -static void mergeInstrProfile(const cl::list &Inputs, +struct WeightedFile { + StringRef Filename; + uint64_t Weight; + + WeightedFile() {} + + WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {} +}; +typedef SmallVector WeightedFileVector; + +static void mergeInstrProfile(const WeightedFileVector &Inputs, StringRef OutputFilename, ProfileFormat OutputFormat) { if (OutputFilename.compare("-") == 0) @@ -109,21 +121,21 @@ static void mergeInstrProfile(const cl::list &Inputs, InstrProfWriter Writer; SmallSet WriterErrorCodes; - for (const auto &Filename : Inputs) { - auto ReaderOrErr = InstrProfReader::create(Filename); + for (const auto &Input : Inputs) { + auto ReaderOrErr = InstrProfReader::create(Input.Filename); if (std::error_code ec = ReaderOrErr.getError()) - exitWithErrorCode(ec, Filename); + exitWithErrorCode(ec, Input.Filename); auto Reader = std::move(ReaderOrErr.get()); for (auto &I : *Reader) { - if (std::error_code EC = Writer.addRecord(std::move(I))) { + if (std::error_code EC = Writer.addRecord(std::move(I), Input.Weight)) { // Only show hint the first time an error occurs. bool firstTime = WriterErrorCodes.insert(EC).second; - handleMergeWriterError(EC, Filename, I.Name, firstTime); + handleMergeWriterError(EC, Input.Filename, I.Name, firstTime); } } if (Reader->hasError()) - exitWithErrorCode(Reader->getError(), Filename); + exitWithErrorCode(Reader->getError(), Input.Filename); } if (OutputFormat == PF_Text) Writer.writeText(Output); @@ -135,7 +147,7 @@ static sampleprof::SampleProfileFormat FormatMap[] = { sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Binary, sampleprof::SPF_GCC}; -static void mergeSampleProfile(const cl::list &Inputs, +static void mergeSampleProfile(const WeightedFileVector &Inputs, StringRef OutputFilename, ProfileFormat OutputFormat) { using namespace sampleprof; @@ -147,11 +159,11 @@ static void mergeSampleProfile(const cl::list &Inputs, auto Writer = std::move(WriterOrErr.get()); StringMap ProfileMap; SmallVector, 5> Readers; - for (const auto &Filename : Inputs) { + for (const auto &Input : Inputs) { auto ReaderOrErr = - SampleProfileReader::create(Filename, getGlobalContext()); + SampleProfileReader::create(Input.Filename, getGlobalContext()); if (std::error_code EC = ReaderOrErr.getError()) - exitWithErrorCode(EC, Filename); + exitWithErrorCode(EC, Input.Filename); // We need to keep the readers around until after all the files are // read so that we do not lose the function names stored in each @@ -160,7 +172,7 @@ static void mergeSampleProfile(const cl::list &Inputs, Readers.push_back(std::move(ReaderOrErr.get())); const auto Reader = Readers.back().get(); if (std::error_code EC = Reader->read()) - exitWithErrorCode(EC, Filename); + exitWithErrorCode(EC, Input.Filename); StringMap &Profiles = Reader->getProfiles(); for (StringMap::iterator I = Profiles.begin(), @@ -168,15 +180,38 @@ static void mergeSampleProfile(const cl::list &Inputs, I != E; ++I) { StringRef FName = I->first(); FunctionSamples &Samples = I->second; - ProfileMap[FName].merge(Samples); + ProfileMap[FName].merge(Samples, Input.Weight); } } Writer->write(ProfileMap); } +static void parseInputFiles(const cl::list &Inputs, + WeightedFileVector &WeightedInputs) { + WeightedInputs.reserve(Inputs.size()); + + for (StringRef Input : Inputs) { + StringRef FileName; + StringRef WeightStr; + std::tie(FileName, WeightStr) = Input.rsplit(':'); + if (WeightStr.empty() || sys::fs::exists(Input)) { + // No weight specified or valid path containing delimiter. + WeightedInputs.push_back(WeightedFile(Input, 1)); + } else { + // Input weight specified. + uint64_t Weight; + if (WeightStr.getAsInteger(10, Weight) || Weight < 1) { + // Invalid input weight. + exitWithError("Input weight must be a positive integer."); + } + WeightedInputs.push_back(WeightedFile(FileName, Weight)); + } + } +} + static int merge_main(int argc, const char *argv[]) { cl::list Inputs(cl::Positional, cl::Required, cl::OneOrMore, - cl::desc("")); + cl::desc("")); cl::opt OutputFilename("output", cl::value_desc("output"), cl::init("-"), cl::Required, @@ -198,10 +233,13 @@ static int merge_main(int argc, const char *argv[]) { cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); + WeightedFileVector WeightedInputs; + parseInputFiles(Inputs, WeightedInputs); + if (ProfileKind == instr) - mergeInstrProfile(Inputs, OutputFilename, OutputFormat); + mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat); else - mergeSampleProfile(Inputs, OutputFilename, OutputFormat); + mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat); return 0; } diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp index 635a5431a513..946afdadba93 100644 --- a/unittests/ProfileData/InstrProfTest.cpp +++ b/unittests/ProfileData/InstrProfTest.cpp @@ -490,4 +490,24 @@ TEST_F(InstrProfTest, get_max_function_count) { ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount()); } +TEST_F(InstrProfTest, get_weighted_function_counts) { + InstrProfRecord Record1("foo", 0x1234, {1, 2}); + InstrProfRecord Record2("foo", 0x1235, {3, 4}); + Writer.addRecord(std::move(Record1), 3); + Writer.addRecord(std::move(Record2), 5); + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + std::vector Counts; + ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1234, Counts))); + ASSERT_EQ(2U, Counts.size()); + ASSERT_EQ(3U, Counts[0]); + ASSERT_EQ(6U, Counts[1]); + + ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1235, Counts))); + ASSERT_EQ(2U, Counts.size()); + ASSERT_EQ(15U, Counts[0]); + ASSERT_EQ(20U, Counts[1]); +} + } // end anonymous namespace From 21aabdad38b4f6284e44df5456b8f8f4a844c5c8 Mon Sep 17 00:00:00 2001 From: Cong Hou Date: Fri, 4 Dec 2015 00:36:58 +0000 Subject: [PATCH 039/364] Don't punish vectorized arithmetic instruction whose type will be split to multiple registers Currently in LLVM's cost model, a vectorized arithmetic instruction will have high cost if its type is split into multiple registers. However, this punishment is too heavy and unnecessary. The overhead of the split should not be on arithmetic instructions but instructions that implement the split. Note that during vectorization we have calculated the register pressure, and we only choose proper interleaving factor (and also vectorization factor) so that we don't use more registers than the maximum number. Here is a very simple example: if a vadd has the cost 1, and if we double VF so that we need two registers to perform it, then its cost will become 4 with the current implementation, which will prevent us to use larger VF. Differential revision: http://reviews.llvm.org/D15159 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254671 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/BasicTTIImpl.h | 6 +----- test/Analysis/CostModel/X86/reduction.ll | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index e2245e9984b8..ec311a093869 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -302,12 +302,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2 * OpCost; - return LT.first * 1 * OpCost; + return LT.first * OpCost; } if (!TLI->isOperationExpand(ISD, LT.second)) { diff --git a/test/Analysis/CostModel/X86/reduction.ll b/test/Analysis/CostModel/X86/reduction.ll index 78e65aee1460..aaafe07c1eb8 100644 --- a/test/Analysis/CostModel/X86/reduction.ll +++ b/test/Analysis/CostModel/X86/reduction.ll @@ -33,7 +33,7 @@ define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) { %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 ; CHECK-LABEL: reduction_cost_int -; CHECK: cost of 23 {{.*}} extractelement +; CHECK: cost of 17 {{.*}} extractelement %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ret i32 %r From cb1cb45c602578eeb8c6894e86206bade019d044 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Fri, 4 Dec 2015 00:45:43 +0000 Subject: [PATCH 040/364] Emit function alias to data as a function symbol. CFI emits jump slots for indirect functions as a byte array constant, and declares function-typed aliases to these constants. This change fixes AsmPrinter to emit these aliases as function symbols and not data symbols. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 5 +++++ test/CodeGen/Generic/function-alias.ll | 12 ++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 test/CodeGen/Generic/function-alias.ll diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 2cfea650872a..b8604240b5d9 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1187,6 +1187,11 @@ bool AsmPrinter::doFinalization(Module &M) { else assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); + // Set the symbol type to function if the alias has a function type. + // This affects codegen when the aliasee is not a function. + if (Alias.getType()->getPointerElementType()->isFunctionTy()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); + EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: diff --git a/test/CodeGen/Generic/function-alias.ll b/test/CodeGen/Generic/function-alias.ll new file mode 100644 index 000000000000..d68d75d5578a --- /dev/null +++ b/test/CodeGen/Generic/function-alias.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; "data" constant +@0 = private constant <{ i8, i8 }> <{i8 15, i8 11}>, section ".text" + +; function-typed alias +@ud2 = alias void (), bitcast (<{ i8, i8 }>* @0 to void ()*) + +; Check that "ud2" is emitted as a function symbol. +; CHECK: .type{{.*}}ud2,@function From f015928ee8d3b4b918ed5c1d3ea3327417a82be0 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 4 Dec 2015 00:45:57 +0000 Subject: [PATCH 041/364] Simplify the error handling in llvm-lto a bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254675 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-lto/llvm-lto.cpp | 73 +++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp index 64e0ae31d06a..86b95577b307 100644 --- a/tools/llvm-lto/llvm-lto.cpp +++ b/tools/llvm-lto/llvm-lto.cpp @@ -150,18 +150,34 @@ static void diagnosticHandler(const DiagnosticInfo &DI) { exit(1); } +static void error(const Twine &Msg) { + errs() << "llvm-lto: " << Msg << '\n'; + exit(1); +} + +static void error(std::error_code EC, const Twine &Prefix) { + if (EC) + error(Prefix + ": " + EC.message()); +} + +template +static void error(const ErrorOr &V, const Twine &Prefix) { + error(V.getError(), Prefix); +} + static std::unique_ptr getLocalLTOModule(StringRef Path, std::unique_ptr &Buffer, - const TargetOptions &Options, std::string &Error) { + const TargetOptions &Options) { ErrorOr> BufferOrErr = MemoryBuffer::getFile(Path); - if (std::error_code EC = BufferOrErr.getError()) { - Error = EC.message(); - return nullptr; - } + error(BufferOrErr, "error loading file '" + Path + "'"); Buffer = std::move(BufferOrErr.get()); - return std::unique_ptr(LTOModule::createInLocalContext( + std::string Error; + std::unique_ptr Ret(LTOModule::createInLocalContext( Buffer->getBufferStart(), Buffer->getBufferSize(), Options, Error, Path)); + if (!Error.empty()) + error("error loading file '" + Path + "' " + Error); + return Ret; } /// \brief List symbols in each IR file. @@ -170,42 +186,30 @@ getLocalLTOModule(StringRef Path, std::unique_ptr &Buffer, /// functionality that's exposed by the C API to list symbols. Moreover, this /// provides testing coverage for modules that have been created in their own /// contexts. -static int listSymbols(StringRef Command, const TargetOptions &Options) { +static void listSymbols(const TargetOptions &Options) { for (auto &Filename : InputFilenames) { - std::string Error; std::unique_ptr Buffer; std::unique_ptr Module = - getLocalLTOModule(Filename, Buffer, Options, Error); - if (!Module) { - errs() << Command << ": error loading file '" << Filename - << "': " << Error << "\n"; - return 1; - } + getLocalLTOModule(Filename, Buffer, Options); // List the symbols. outs() << Filename << ":\n"; for (int I = 0, E = Module->getSymbolCount(); I != E; ++I) outs() << Module->getSymbolName(I) << "\n"; } - return 0; } /// Create a combined index file from the input IR files and write it. /// /// This is meant to enable testing of ThinLTO combined index generation, /// currently available via the gold plugin via -thinlto. -static int createCombinedFunctionIndex(StringRef Command) { +static void createCombinedFunctionIndex() { FunctionInfoIndex CombinedIndex; uint64_t NextModuleId = 0; for (auto &Filename : InputFilenames) { ErrorOr> IndexOrErr = llvm::getFunctionIndexForFile(Filename, diagnosticHandler); - if (std::error_code EC = IndexOrErr.getError()) { - std::string Error = EC.message(); - errs() << Command << ": error loading file '" << Filename - << "': " << Error << "\n"; - return 1; - } + error(IndexOrErr, "error loading file '" + Filename + "'"); std::unique_ptr Index = std::move(IndexOrErr.get()); // Skip files without a function summary. if (!Index) @@ -216,14 +220,9 @@ static int createCombinedFunctionIndex(StringRef Command) { assert(!OutputFilename.empty()); raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC, sys::fs::OpenFlags::F_None); - if (EC) { - errs() << Command << ": error opening the file '" << OutputFilename - << ".thinlto.bc': " << EC.message() << "\n"; - return 1; - } + error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'"); WriteFunctionSummaryToFile(CombinedIndex, OS); OS.close(); - return 0; } int main(int argc, char **argv) { @@ -234,10 +233,8 @@ int main(int argc, char **argv) { llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. cl::ParseCommandLineOptions(argc, argv, "llvm LTO linker\n"); - if (OptLevel < '0' || OptLevel > '3') { - errs() << argv[0] << ": optimization level must be between 0 and 3\n"; - return 1; - } + if (OptLevel < '0' || OptLevel > '3') + error("optimization level must be between 0 and 3"); // Initialize the configured targets. InitializeAllTargets(); @@ -248,11 +245,15 @@ int main(int argc, char **argv) { // set up the TargetOptions for the machine TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - if (ListSymbolsOnly) - return listSymbols(argv[0], Options); + if (ListSymbolsOnly) { + listSymbols(Options); + return 0; + } - if (ThinLTO) - return createCombinedFunctionIndex(argv[0]); + if (ThinLTO) { + createCombinedFunctionIndex(); + return 0; + } unsigned BaseArg = 0; From cd7c95d7e887ecda0b795690623ea9567f0659e7 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Fri, 4 Dec 2015 00:57:25 +0000 Subject: [PATCH 042/364] Fix function-alias.ll test on non-X86 targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254676 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/Generic/function-alias.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/CodeGen/Generic/function-alias.ll b/test/CodeGen/Generic/function-alias.ll index d68d75d5578a..7eec5be198b0 100644 --- a/test/CodeGen/Generic/function-alias.ll +++ b/test/CodeGen/Generic/function-alias.ll @@ -1,6 +1,4 @@ ; RUN: llc < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" ; "data" constant @0 = private constant <{ i8, i8 }> <{i8 15, i8 11}>, section ".text" From 7579d3aaed060dd8002e11f9c64f27508ec32f3c Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Fri, 4 Dec 2015 01:02:10 +0000 Subject: [PATCH 043/364] [PGO] Unify VP data format between raw and indexed profile (Reader) With the latest refactoring and code sharing patches landed, it is possible to unify the value profile implementation between raw and indexed profile. This is the patch in raw profile reader that uses the common interface. Differential Revision: http://reviews.llvm.org/D15056 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254677 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ProfileData/InstrProfReader.h | 13 +++++- lib/ProfileData/InstrProfReader.cpp | 49 +++++----------------- 2 files changed, 22 insertions(+), 40 deletions(-) diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index 49233366e164..318981f75e18 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -162,10 +162,19 @@ class RawInstrProfReader : public InstrProfReader { private: std::error_code readNextHeader(const char *CurrentPos); std::error_code readHeader(const RawInstrProf::Header &Header); - template - IntT swap(IntT Int) const { + template IntT swap(IntT Int) const { return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; } + support::endianness getDataEndianness() const { + support::endianness HostEndian = getHostEndianness(); + if (!ShouldSwapBytes) + return HostEndian; + if (HostEndian == support::little) + return support::big; + else + return support::little; + } + inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); } diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index cfc968739806..7683cad6ede4 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -296,55 +296,28 @@ std::error_code RawInstrProfReader::readRawCounts( } template -std::error_code RawInstrProfReader::readValueProfilingData( - InstrProfRecord &Record) { +std::error_code +RawInstrProfReader::readValueProfilingData(InstrProfRecord &Record) { Record.clearValueData(); if (!Data->Values || (ValueDataDelta == 0)) return success(); - // Read value data. - uint64_t NumVSites = 0; - for (uint32_t Kind = IPVK_First; Kind <= ValueKindLast; ++Kind) - NumVSites += swap(Data->NumValueSites[Kind]); - NumVSites += getNumPaddingBytes(NumVSites); + ErrorOr> VDataPtrOrErr = + ValueProfData::getValueProfData(getValueDataCounts(Data->Values), + (const unsigned char *)ProfileEnd, + getDataEndianness()); - auto VDataCounts = makeArrayRef(getValueDataCounts(Data->Values), NumVSites); - // Check bounds. - if (VDataCounts.data() < ValueDataStart || - VDataCounts.data() + VDataCounts.size() > - reinterpret_cast(ProfileEnd)) - return error(instrprof_error::malformed); + if (VDataPtrOrErr.getError()) + return VDataPtrOrErr.getError(); - const InstrProfValueData *VDataPtr = - getValueData(swap(Data->Values) + NumVSites); - for (uint32_t Kind = IPVK_First; Kind <= ValueKindLast; ++Kind) { - NumVSites = swap(Data->NumValueSites[Kind]); - Record.reserveSites(Kind, NumVSites); - for (uint32_t VSite = 0; VSite < NumVSites; ++VSite) { - - uint32_t VDataCount = VDataCounts[VSite]; - if ((const char *)(VDataPtr + VDataCount) > ProfileEnd) - return error(instrprof_error::malformed); - - std::vector CurrentValues; - CurrentValues.reserve(VDataCount); - for (uint32_t VIndex = 0; VIndex < VDataCount; ++VIndex) { - uint64_t TargetValue = swap(VDataPtr->Value); - uint64_t Count = swap(VDataPtr->Count); - CurrentValues.push_back({TargetValue, Count}); - ++VDataPtr; - } - Record.addValueData(Kind, VSite, CurrentValues.data(), - VDataCount, &FunctionPtrToNameMap); - } - } + VDataPtrOrErr.get()->deserializeTo(Record, &FunctionPtrToNameMap); return success(); } template -std::error_code RawInstrProfReader::readNextRecord( - InstrProfRecord &Record) { +std::error_code +RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { if (atEnd()) if (std::error_code EC = readNextHeader(ProfileEnd)) return EC; From 2cb46213b7277fc92db5d443c8e217140e5ac2c8 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Fri, 4 Dec 2015 01:14:24 +0000 Subject: [PATCH 044/364] IR: Update a comment and a bool that've been out of date since 2012 It became impossible to get here with a half in r157393, over 3 years ago. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254679 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AsmWriter.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 759c5a8001c0..cb9a792c598b 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1108,11 +1108,10 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // the value back and get the same value. // bool ignored; - bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf; bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble; bool isInf = CFP->getValueAPF().isInfinity(); bool isNaN = CFP->getValueAPF().isNaN(); - if (!isHalf && !isInf && !isNaN) { + if (!isInf && !isNaN) { double Val = isDouble ? CFP->getValueAPF().convertToDouble() : CFP->getValueAPF().convertToFloat(); SmallString<128> StrVal; @@ -1140,7 +1139,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, "assuming that double is 64 bits!"); char Buffer[40]; APFloat apf = CFP->getValueAPF(); - // Halves and floats are represented in ASCII IR as double, convert. + // Floats are represented in ASCII IR as double, convert. if (!isDouble) apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); From 2012083871b75ca032fefaec739ce1f280a8f3de Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Fri, 4 Dec 2015 01:18:17 +0000 Subject: [PATCH 045/364] X86InstrInfo::copyPhysReg: workaround reg liveness Summary: computeRegisterLiveness and analyzePhysReg are currently getting confused about liveness in some cases, breaking copyPhysReg's calculation of whether AX is dead in some cases. Work around this issue temporarily by assuming that AX is always live. See detail in: https://llvm.org/bugs/show_bug.cgi?id=25033#c7 And associated bugs PR24535 PR25033 PR24991 PR24992 PR25201. This workaround makes the code correct but slightly inefficient, but it seems to confuse the machine instr verifier which now things EAX was undefined in some cases where it's being conservatively saved / restored. Reviewers: majnemer, sanjoy Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15198 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254680 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 16 +++++-- test/CodeGen/X86/cmpxchg-clobber-flags.ll | 43 +++++++++++++++++-- .../X86/peephole-na-phys-copy-folding.ll | 8 ++-- 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 12da3a9319e6..e9d36f8ce2f1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4412,9 +4412,19 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, int Pop = is64 ? X86::POP64r : X86::POP32r; int AX = is64 ? X86::RAX : X86::EAX; - bool AXDead = (Reg == AX) || - (MachineBasicBlock::LQR_Dead == - MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI)); + bool AXDead = (Reg == AX); + // FIXME: The above could figure out that AX is dead in more cases with: + // || (MachineBasicBlock::LQR_Dead == + // MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI)); + // + // Unfortunately this is slightly broken, see PR24535 and the likely + // related PR25033 PR24991 PR24992 PR25201. These issues seem to + // showcase sub-register / super-register confusion: a previous kill + // of AH but no kill of AL leads computeRegisterLiveness to + // erroneously conclude that AX is dead. + // + // Once fixed, also update cmpxchg-clobber-flags.ll and + // peephole-na-phys-copy-folding.ll. if (!AXDead) BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true)); diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll index c129128b5fa7..791edba89c44 100644 --- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -1,7 +1,14 @@ -; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386 -; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f -; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664 -; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664 +; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386 +; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664 +; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664 + +; FIXME: X86InstrInfo::copyPhysReg had code which figured out whether AX was +; live or not to avoid save / restore when it's not needed. See FIXME in +; that function for more details on which the code is currently +; disabled. The extra push/pop are marked below and can be removed once +; the issue is fixed. +; -verify-machineinstrs should also be added back in the RUN lines above. declare i32 @foo() declare i32 @bar(i64) @@ -17,22 +24,34 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) { ; i386-NEXT: movl %edx, 4(%esp) ; i386-NEXT: movl %eax, (%esp) ; i386-NEXT: calll bar +; ** FIXME Next line isn't actually necessary. ** +; i386-NEXT: pushl %eax ; i386-NEXT: movl [[FLAGS]], %eax ; i386-NEXT: addb $127, %al ; i386-NEXT: sahf +; ** FIXME Next line isn't actually necessary. ** +; i386-NEXT: popl %eax ; i386-NEXT: jne ; i386f-LABEL: test_intervening_call: ; i386f: cmpxchg8b ; i386f-NEXT: movl %eax, (%esp) ; i386f-NEXT: movl %edx, 4(%esp) +; ** FIXME Next line isn't actually necessary. ** +; i386f-NEXT: pushl %eax ; i386f-NEXT: seto %al ; i386f-NEXT: lahf ; i386f-NEXT: movl %eax, [[FLAGS:%.*]] +; ** FIXME Next line isn't actually necessary. ** +; i386f-NEXT: popl %eax ; i386f-NEXT: calll bar +; ** FIXME Next line isn't actually necessary. ** +; i386f-NEXT: pushl %eax ; i386f-NEXT: movl [[FLAGS]], %eax ; i386f-NEXT: addb $127, %al ; i386f-NEXT: sahf +; ** FIXME Next line isn't actually necessary. ** +; i386f-NEXT: popl %eax ; i386f-NEXT: jne ; x8664-LABEL: test_intervening_call: @@ -44,9 +63,13 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) { ; x8664-NEXT: popq %rax ; x8664-NEXT: movq %rax, %rdi ; x8664-NEXT: callq bar +; ** FIXME Next line isn't actually necessary. ** +; x8664-NEXT: pushq %rax ; x8664-NEXT: movq [[FLAGS]], %rax ; x8664-NEXT: addb $127, %al ; x8664-NEXT: sahf +; ** FIXME Next line isn't actually necessary. ** +; x8664-NEXT: popq %rax ; x8664-NEXT: jne %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst @@ -111,9 +134,13 @@ cond.end: define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { ; i386-LABEL: test_feed_cmov: ; i386: cmpxchgl +; ** FIXME Next line isn't actually necessary. ** +; i386-NEXT: pushl %eax ; i386-NEXT: seto %al ; i386-NEXT: lahf ; i386-NEXT: movl %eax, [[FLAGS:%.*]] +; ** FIXME Next line isn't actually necessary. ** +; i386-NEXT: popl %eax ; i386-NEXT: calll foo ; i386-NEXT: pushl %eax ; i386-NEXT: movl [[FLAGS]], %eax @@ -123,9 +150,13 @@ define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { ; i386f-LABEL: test_feed_cmov: ; i386f: cmpxchgl +; ** FIXME Next line isn't actually necessary. ** +; i386f-NEXT: pushl %eax ; i386f-NEXT: seto %al ; i386f-NEXT: lahf ; i386f-NEXT: movl %eax, [[FLAGS:%.*]] +; ** FIXME Next line isn't actually necessary. ** +; i386f-NEXT: popl %eax ; i386f-NEXT: calll foo ; i386f-NEXT: pushl %eax ; i386f-NEXT: movl [[FLAGS]], %eax @@ -135,9 +166,13 @@ define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { ; x8664-LABEL: test_feed_cmov: ; x8664: cmpxchgl +; ** FIXME Next line isn't actually necessary. ** +; x8664: pushq %rax ; x8664: seto %al ; x8664-NEXT: lahf ; x8664-NEXT: movq %rax, [[FLAGS:%.*]] +; ** FIXME Next line isn't actually necessary. ** +; x8664-NEXT: popq %rax ; x8664-NEXT: callq foo ; x8664-NEXT: pushq %rax ; x8664-NEXT: movq [[FLAGS]], %rax diff --git a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll index 438bf8ddf4c7..891a925611cf 100644 --- a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s + +; FIXME Add -verify-machineinstrs back when PR24535 is fixed. ; The peephole optimizer can elide some physical register copies such as ; EFLAGS. Make sure the flags are used directly, instead of needlessly using @@ -137,7 +139,7 @@ f: ; CHECK-LABEL: test_two_live_flags: ; CHECK: cmpxchg -; CHECK-NEXT: seto %al +; CHECK: seto %al ; CHECK-NEXT: lahf ; Save result of the first cmpxchg into D. ; CHECK-NEXT: mov{{[lq]}} %[[AX:[er]ax]], %[[D:[re]d[xi]]] From ae4aa8b8d233366d231a10dafae68d7287512927 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 4 Dec 2015 01:31:59 +0000 Subject: [PATCH 046/364] raw_ostream: << operator for callables with raw_ostream argument This is a revised version of r254655 which uses a Printable wrapper class to avoid ambiguous overload problems. Differential Revision: http://reviews.llvm.org/D14348 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254681 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/Printable.h | 52 ++++++++++ include/llvm/Target/TargetRegisterInfo.h | 74 ++------------- lib/CodeGen/RegAllocPBQP.cpp | 27 ++---- .../SelectionDAG/SelectionDAGDumper.cpp | 18 +--- lib/CodeGen/TargetRegisterInfo.cpp | 95 +++++++++++-------- 5 files changed, 126 insertions(+), 140 deletions(-) create mode 100644 include/llvm/Support/Printable.h diff --git a/include/llvm/Support/Printable.h b/include/llvm/Support/Printable.h new file mode 100644 index 000000000000..5c1b8d5070d4 --- /dev/null +++ b/include/llvm/Support/Printable.h @@ -0,0 +1,52 @@ +//===--- Printable.h - Print function helpers -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Printable struct. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PRINTABLE_H +#define LLVM_SUPPORT_PRINTABLE_H + +#include + +namespace llvm { + +class raw_ostream; + +/// Simple wrapper around std::function. +/// This class is usefull to construct print helpers for raw_ostream. +/// +/// Example: +/// Printable PrintRegister(unsigned Register) { +/// return Printable([Register](raw_ostream &OS) { +/// OS << getRegisterName(Register); +/// } +/// } +/// ... OS << PrintRegister(Register); ... +/// +/// Implementation note: Ideally this would just be a typedef, but doing so +/// leads to operator << being ambiguous as function has matching constructors +/// in some STL versions. I have seen the problem on gcc 4.6 libstdc++ and +/// microsoft STL. +class Printable { +public: + std::function Print; + Printable(const std::function Print) + : Print(Print) {} +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) { + P.Print(OS); + return OS; +} + +} + +#endif diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 7d293fe82a6b..414255edb23e 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -22,6 +22,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Printable.h" #include #include @@ -932,7 +933,6 @@ struct VirtReg2IndexFunctor : public std::unary_function { } }; -/// Helper class for printing registers on a raw_ostream. /// Prints virtual and physical registers with or without a TRI instance. /// /// The format is: @@ -943,24 +943,10 @@ struct VirtReg2IndexFunctor : public std::unary_function { /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << PrintReg(Reg, TRI) << '\n'; -/// -class PrintReg { - const TargetRegisterInfo *TRI; - unsigned Reg; - unsigned SubIdx; -public: - explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = nullptr, - unsigned subidx = 0) - : TRI(tri), Reg(reg), SubIdx(subidx) {} - void print(raw_ostream&) const; -}; +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, + unsigned SubRegIdx = 0); -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { - PR.print(OS); - return OS; -} - -/// Helper class for printing register units on a raw_ostream. +/// Create Printable object to print register units on a \ref raw_ostream. /// /// Register units are named after their root registers: /// @@ -968,54 +954,14 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { /// FP0~ST7 - Dual roots. /// /// Usage: OS << PrintRegUnit(Unit, TRI) << '\n'; -/// -class PrintRegUnit { -protected: - const TargetRegisterInfo *TRI; - unsigned Unit; -public: - PrintRegUnit(unsigned unit, const TargetRegisterInfo *tri) - : TRI(tri), Unit(unit) {} - void print(raw_ostream&) const; -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintRegUnit &PR) { - PR.print(OS); - return OS; -} +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); -/// It is often convenient to track virtual registers and -/// physical register units in the same list. -class PrintVRegOrUnit : protected PrintRegUnit { -public: - PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *tri) - : PrintRegUnit(VRegOrUnit, tri) {} - void print(raw_ostream&) const; -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, - const PrintVRegOrUnit &PR) { - PR.print(OS); - return OS; -} - -/// Helper class for printing lane masks. -/// -/// They are currently printed out as hexadecimal numbers. -/// Usage: OS << PrintLaneMask(Mask); -class PrintLaneMask { -protected: - LaneBitmask LaneMask; -public: - PrintLaneMask(LaneBitmask LaneMask) - : LaneMask(LaneMask) {} - void print(raw_ostream&) const; -}; +/// \brief Create Printable object to print virtual registers and physical +/// registers on a \ref raw_ostream. +Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintLaneMask &P) { - P.print(OS); - return OS; -} +/// Create Printable object to print LaneBitmasks on a \ref raw_ostream. +Printable PrintLaneMask(LaneBitmask LaneMask); } // End llvm namespace diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index afa98b26d727..fd28b05ed80a 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -805,33 +806,17 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } -namespace { -// A helper class for printing node and register info in a consistent way -class PrintNodeInfo { -public: - typedef PBQP::RegAlloc::PBQPRAGraph Graph; - typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; - - PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} - - void print(raw_ostream &OS) const { +/// Create Printable object for node and register info. +static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, + const PBQP::RegAlloc::PBQPRAGraph &G) { + return Printable([NId, &G](raw_ostream &OS) { const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); unsigned VReg = G.getNodeMetadata(NId).getVReg(); const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; - } - -private: - const Graph &G; - NodeId NId; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { - PR.print(OS); - return OS; + }); } -} // anonymous namespace void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { for (auto NId : nodeIds()) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index d362f98d6464..7c5492b554c2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -369,25 +370,14 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } -namespace { -class PrintNodeId { - const SDNode &Node; -public: - explicit PrintNodeId(const SDNode &Node) - : Node(Node) {} - void print(raw_ostream &OS) const { +static Printable PrintNodeId(const SDNode &Node) { + return Printable([&Node](raw_ostream &OS) { #ifndef NDEBUG OS << 't' << Node.PersistentId; #else OS << (const void*)&Node; #endif - } -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeId &P) { - P.print(OS); - return OS; -} + }); } void SDNode::dump() const { dump(nullptr); } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 0c4a3dcb226e..0a7042ac3db5 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -40,58 +40,71 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, TargetRegisterInfo::~TargetRegisterInfo() {} -void PrintReg::print(raw_ostream &OS) const { - if (!Reg) - OS << "%noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); - else if (TRI && Reg < TRI->getNumRegs()) - OS << '%' << TRI->getName(Reg); - else - OS << "%physreg" << Reg; - if (SubIdx) { - if (TRI) - OS << ':' << TRI->getSubRegIndexName(SubIdx); +namespace llvm { + +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx) { + return Printable([Reg, TRI, SubIdx](raw_ostream &OS) { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); else - OS << ":sub(" << SubIdx << ')'; - } + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } + }); } -void PrintRegUnit::print(raw_ostream &OS) const { - // Generic printout when TRI is missing. - if (!TRI) { - OS << "Unit~" << Unit; - return; - } +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } - // Check for invalid register units. - if (Unit >= TRI->getNumRegUnits()) { - OS << "BadUnit~" << Unit; - return; - } + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } - // Normal units have at least one root. - MCRegUnitRootIterator Roots(Unit, TRI); - assert(Roots.isValid() && "Unit has no roots."); - OS << TRI->getName(*Roots); - for (++Roots; Roots.isValid(); ++Roots) - OS << '~' << TRI->getName(*Roots); + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); + }); } -void PrintVRegOrUnit::print(raw_ostream &OS) const { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); - return; - } - PrintRegUnit::print(OS); +Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + } else { + OS << PrintRegUnit(Unit, TRI); + } + }); } -void PrintLaneMask::print(raw_ostream &OS) const { - OS << format("%08X", LaneMask); +Printable PrintLaneMask(LaneBitmask LaneMask) { + return Printable([LaneMask](raw_ostream &OS) { + OS << format("%08X", LaneMask); + }); } +} // End of llvm namespace + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * From 7adbf112c7e68bbd8cda3315eb3d831426401987 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 4 Dec 2015 01:51:19 +0000 Subject: [PATCH 047/364] ScheduleDAGInstrs: Rework schedule graph builder. Re-comitting with a change that avoids undefined uses getting put into the VRegUses list. The new algorithm remembers the uses encountered while walking backwards until a matching def is found. Contrary to the previous version this: - Works without LiveIntervals being available - Allows to increase the precision to subregisters/lanemasks (not used for now) The changes in the AMDGPU tests are necessary because the R600 scheduler is not stable with respect to the order of nodes in the ready queues. Differential Revision: http://reviews.llvm.org/D9068 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254683 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ScheduleDAGInstrs.h | 40 ++- lib/CodeGen/ScheduleDAGInstrs.cpp | 227 +++++++++++++----- test/CodeGen/AMDGPU/image-attributes.ll | 20 +- test/CodeGen/AMDGPU/literals.ll | 8 +- .../AMDGPU/llvm.AMDGPU.read.workdim.ll | 2 +- test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll | 2 +- .../AMDGPU/llvm.r600.read.local.size.ll | 6 +- test/CodeGen/AMDGPU/or.ll | 2 +- test/CodeGen/AMDGPU/set-dx10.ll | 48 ++-- test/CodeGen/AMDGPU/sext-in-reg.ll | 4 +- test/CodeGen/AMDGPU/shl.ll | 12 +- test/CodeGen/AMDGPU/sra.ll | 8 +- test/CodeGen/AMDGPU/srl.ll | 10 +- test/CodeGen/AMDGPU/unsupported-cc.ll | 32 +-- test/CodeGen/AMDGPU/work-item-intrinsics.ll | 12 +- test/CodeGen/AMDGPU/xor.ll | 2 +- 16 files changed, 279 insertions(+), 156 deletions(-) diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 1446f2ac082b..c715e0f79205 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -33,15 +33,26 @@ namespace llvm { /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { unsigned VirtReg; + LaneBitmask LaneMask; SUnit *SU; - VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {} + VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) + : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; + /// Mapping from virtual register to SUnit including an operand index. + struct VReg2SUnitOperIdx : public VReg2SUnit { + unsigned OperandIndex; + + VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, + unsigned OperandIndex, SUnit *SU) + : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} + }; + /// Record a physical register access. /// For non-data-dependent uses, OpIdx == -1. struct PhysRegSUOper { @@ -69,7 +80,10 @@ namespace llvm { /// Track local uses of virtual registers. These uses are gathered by the DAG /// builder and may be consulted by the scheduler to avoid iterating an entire /// vreg use list. - typedef SparseMultiSet VReg2UseMap; + typedef SparseMultiSet VReg2SUnitMultiMap; + + typedef SparseMultiSet + VReg2SUnitOperIdxMultiMap; /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. @@ -95,6 +109,9 @@ namespace llvm { /// it has taken responsibility for scheduling the terminator correctly. bool CanHandleTerminators; + /// Whether lane masks should get tracked. + bool TrackLaneMasks; + /// State specific to the current scheduling region. /// ------------------------------------------------ @@ -117,7 +134,7 @@ namespace llvm { /// After calling BuildSchedGraph, each vreg used in the scheduling region /// is mapped to a set of SUnits. These include all local vreg uses, not /// just the uses for a singly defined vreg. - VReg2UseMap VRegUses; + VReg2SUnitMultiMap VRegUses; /// State internal to DAG building. /// ------------------------------- @@ -129,8 +146,12 @@ namespace llvm { Reg2SUnitsMap Defs; Reg2SUnitsMap Uses; - /// Track the last instruction in this region defining each virtual register. - VReg2SUnitMap VRegDefs; + /// Tracks the last instruction(s) in this region defining each virtual + /// register. There may be multiple current definitions for a register with + /// disjunct lanemasks. + VReg2SUnitMultiMap CurrentVRegDefs; + /// Tracks the last instructions in this region using each virtual register. + VReg2SUnitOperIdxMultiMap CurrentVRegUses; /// PendingLoads - Remember where unknown loads are after the most recent /// unknown store, as we iterate. As with Defs and Uses, this is here @@ -200,7 +221,8 @@ namespace llvm { /// input. void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker = nullptr, - PressureDiffs *PDiffs = nullptr); + PressureDiffs *PDiffs = nullptr, + bool TrackLaneMasks = false); /// addSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier. We want to @@ -247,6 +269,12 @@ namespace llvm { /// Other adjustments may be made to the instruction if necessary. Return /// true if the operand has been deleted, false if not. bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO); + + /// Returns a mask for which lanes get read/written by the given (register) + /// machine operand. + LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; + + void collectVRegUses(SUnit *SU); }; /// newSUnit - Creates a new SUnit and return a ptr to it. diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 12b2beb357b4..9d588ff24f61 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -55,7 +55,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, bool RemoveKillFlags) : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(LIS), RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), - FirstDbgValue(nullptr) { + TrackLaneMasks(false), FirstDbgValue(nullptr) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -363,6 +363,20 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } } +LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const +{ + unsigned Reg = MO.getReg(); + // No point in tracking lanemasks if we don't have interesting subregisters. + const TargetRegisterClass &RC = *MRI.getRegClass(Reg); + if (!RC.HasDisjunctSubRegs) + return ~0u; + + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0) + return RC.getLaneMask(); + return TRI->getSubRegIndexLaneMask(SubReg); +} + /// addVRegDefDeps - Add register output and data dependencies from this SUnit /// to instructions that occur later in the same scheduling region if they read /// from or write to the virtual register defined at OperIdx. @@ -370,35 +384,106 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { /// TODO: Hoist loop induction variable increments. This has to be /// reevaluated. Generally, IV scheduling should be done before coalescing. void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - unsigned Reg = MI->getOperand(OperIdx).getReg(); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + + LaneBitmask DefLaneMask; + LaneBitmask KillLaneMask; + if (TrackLaneMasks) { + bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); + DefLaneMask = getLaneMaskForMO(MO); + // If we have a flag, none of the lane values comes from an + // earlier instruction. + KillLaneMask = IsKill ? ~0u : DefLaneMask; + + // Clear undef flag, we'll re-add it later once we know which subregister + // Def is first. + MO.setIsUndef(false); + } else { + DefLaneMask = ~0u; + KillLaneMask = ~0u; + } + + if (MO.isDead()) { + assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && + "Dead defs should have no uses"); + } else { + // Add data dependence to all uses we found so far. + const TargetSubtargetInfo &ST = MF.getSubtarget(); + for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), + E = CurrentVRegUses.end(); I != E; /*empty*/) { + LaneBitmask LaneMask = I->LaneMask; + // Ignore uses of other lanes. + if ((LaneMask & KillLaneMask) == 0) { + ++I; + continue; + } - // Singly defined vregs do not have output/anti dependencies. - // The current operand is a def, so we have at least one. - // Check here if there are any others... + if ((LaneMask & DefLaneMask) != 0) { + SUnit *UseSU = I->SU; + MachineInstr *Use = UseSU->getInstr(); + SDep Dep(SU, SDep::Data, Reg); + Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, + I->OperandIndex)); + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); + } + + LaneMask &= ~KillLaneMask; + // If we found a Def for all lanes of this use, remove it from the list. + if (LaneMask != 0) { + I->LaneMask = LaneMask; + ++I; + } else + I = CurrentVRegUses.erase(I); + } + } + + // Shortcut: Singly defined vregs do not have output/anti dependencies. if (MRI.hasOneDef(Reg)) return; - // Add output dependence to the next nearest def of this vreg. + // Add output dependence to the next nearest defs of this vreg. // // Unless this definition is dead, the output dependence should be // transitively redundant with antidependencies from this definition's // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. - VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); - if (DefI == VRegDefs.end()) - VRegDefs.insert(VReg2SUnit(Reg, SU)); - else { - SUnit *DefSU = DefI->SU; - if (DefSU != SU && DefSU != &ExitSU) { - SDep Dep(SU, SDep::Output, Reg); - Dep.setLatency( - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); - DefSU->addPred(Dep); - } - DefI->SU = SU; + LaneBitmask LaneMask = DefLaneMask; + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for other lanes. + if ((V2SU.LaneMask & LaneMask) == 0) + continue; + // Add an output dependence. + SUnit *DefSU = V2SU.SU; + // Ignore additional defs of the same lanes in one instruction. This can + // happen because lanemasks are shared for targets with too many + // subregisters. We also use some representration tricks/hacks where we + // add super-register defs/uses, to imply that although we only access parts + // of the reg we care about the full one. + if (DefSU == SU) + continue; + SDep Dep(SU, SDep::Output, Reg); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); + DefSU->addPred(Dep); + + // Update current definition. This can get tricky if the def was about a + // bigger lanemask before. We then have to shrink it and create a new + // VReg2SUnit for the non-overlapping part. + LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; + LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; + if (NonOverlapMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU)); + V2SU.SU = SU; + V2SU.LaneMask = OverlapMask; } + // If there was no CurrentVRegDefs entry for some lanes yet, create one. + if (LaneMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); } /// addVRegUseDeps - Add a register data dependency if the instruction that @@ -408,49 +493,26 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { /// /// TODO: Handle ExitSU "uses" properly. void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { - MachineInstr *MI = SU->getInstr(); - unsigned Reg = MI->getOperand(OperIdx).getReg(); + const MachineInstr *MI = SU->getInstr(); + const MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + + // Remember the use. Data dependencies will be added when we find the def. + LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; + CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); + + // Add antidependences to the following defs of the vreg. + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for unrelated lanes. + LaneBitmask PrevDefLaneMask = V2SU.LaneMask; + if ((PrevDefLaneMask & LaneMask) == 0) + continue; + if (V2SU.SU == SU) + continue; - // Record this local VReg use. - VReg2UseMap::iterator UI = VRegUses.find(Reg); - for (; UI != VRegUses.end(); ++UI) { - if (UI->SU == SU) - break; - } - if (UI == VRegUses.end()) - VRegUses.insert(VReg2SUnit(Reg, SU)); - - // Lookup this operand's reaching definition. - assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveQueryResult LRQ - = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); - VNInfo *VNI = LRQ.valueIn(); - - // VNI will be valid because MachineOperand::readsReg() is checked by caller. - assert(VNI && "No value to read by operand"); - MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); - // Phis and other noninstructions (after coalescing) have a NULL Def. - if (Def) { - SUnit *DefSU = getSUnit(Def); - if (DefSU) { - // The reaching Def lives within this scheduling region. - // Create a data dependence. - SDep dep(DefSU, SDep::Data, Reg); - // Adjust the dependence latency using operand def/use information, then - // allow the target to perform its own adjustments. - int DefOp = Def->findRegisterDefOperandIdx(Reg); - dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); - - const TargetSubtargetInfo &ST = MF.getSubtarget(); - ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); - SU->addPred(dep); - } + V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); } - - // Add antidependence to the following def of the vreg it uses. - VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); - if (DefI != VRegDefs.end() && DefI->SU != SU) - DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); } /// Return true if MI is an instruction we are unable to reason about @@ -733,17 +795,44 @@ void ScheduleDAGInstrs::initSUnits() { } } +void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (!MO.readsReg()) + continue; + if (TrackLaneMasks && !MO.isUse()) + continue; + + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // Record this local VReg use. + VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, 0, SU)); + } +} + /// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, - PressureDiffs *PDiffs) { + PressureDiffs *PDiffs, + bool TrackLaneMasks) { const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); AliasAnalysis *AAForDep = UseAA ? AA : nullptr; + this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); ScheduleDAG::clearDAG(); @@ -777,10 +866,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.setUniverse(TRI->getNumRegs()); Uses.setUniverse(TRI->getNumRegs()); - assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); + assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs"); + assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses"); + unsigned NumVirtRegs = MRI.getNumVirtRegs(); + CurrentVRegDefs.setUniverse(NumVirtRegs); + CurrentVRegUses.setUniverse(NumVirtRegs); + VRegUses.clear(); - VRegDefs.setUniverse(MRI.getNumVirtRegs()); - VRegUses.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(NumVirtRegs); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -808,6 +901,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RPTracker->recede(/*LiveUses=*/nullptr, PDiff); assert(RPTracker->getPos() == std::prev(MII) && "RPTracker can't find MI"); + collectVRegUses(SU); } assert( @@ -1057,7 +1151,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.clear(); Uses.clear(); - VRegDefs.clear(); + CurrentVRegDefs.clear(); + CurrentVRegUses.clear(); PendingLoads.clear(); } diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll index 7a5a7346865f..5906b2f15709 100644 --- a/test/CodeGen/AMDGPU/image-attributes.ll +++ b/test/CodeGen/AMDGPU/image-attributes.ll @@ -6,7 +6,7 @@ ; FUNC-LABEL: {{^}}width_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].Z +; EG: MOV * [[VAL]], KC0[2].Z define void @width_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -19,7 +19,7 @@ entry: ; FUNC-LABEL: {{^}}width_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].Z +; EG: MOV * [[VAL]], KC0[2].Z define void @width_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -36,7 +36,7 @@ entry: ; FUNC-LABEL: {{^}}height_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].W +; EG: MOV * [[VAL]], KC0[2].W define void @height_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -49,7 +49,7 @@ entry: ; FUNC-LABEL: {{^}}height_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].W +; EG: MOV * [[VAL]], KC0[2].W define void @height_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -66,7 +66,7 @@ entry: ; FUNC-LABEL: {{^}}depth_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].X +; EG: MOV * [[VAL]], KC0[3].X define void @depth_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -83,7 +83,7 @@ entry: ; FUNC-LABEL: {{^}}data_type_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].Y +; EG: MOV * [[VAL]], KC0[3].Y define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -96,7 +96,7 @@ entry: ; FUNC-LABEL: {{^}}data_type_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].Y +; EG: MOV * [[VAL]], KC0[3].Y define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -113,7 +113,7 @@ entry: ; FUNC-LABEL: {{^}}channel_order_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].Z +; EG: MOV * [[VAL]], KC0[3].Z define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -126,7 +126,7 @@ entry: ; FUNC-LABEL: {{^}}channel_order_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].Z +; EG: MOV * [[VAL]], KC0[3].Z define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -145,7 +145,7 @@ entry: ; ; FUNC-LABEL: {{^}}image_arg_2nd: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[4].Z +; EG: MOV * [[VAL]], KC0[4].Z define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1, i32 %x, %opencl.image2d_t addrspace(1)* %in2, diff --git a/test/CodeGen/AMDGPU/literals.ll b/test/CodeGen/AMDGPU/literals.ll index cff1c24f89d6..9d2320cb2d19 100644 --- a/test/CodeGen/AMDGPU/literals.ll +++ b/test/CodeGen/AMDGPU/literals.ll @@ -7,8 +7,8 @@ ; ADD_INT literal.x KC0[2].Z, 5 ; CHECK: {{^}}i32_literal: -; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y ; CHECK-NEXT: 5 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { entry: @@ -24,8 +24,8 @@ entry: ; ADD literal.x KC0[2].Z, 5.0 ; CHECK: {{^}}float_literal: -; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y ; CHECK-NEXT: 1084227584(5.0 define void @float_literal(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll index 6dc9d050eee6..2e299e30b8c7 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll @@ -4,7 +4,7 @@ ; FUNC-LABEL: {{^}}read_workdim: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].Z +; EG: MOV * [[VAL]], KC0[2].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll index 74792e50017f..a30a8e083eb6 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s ; R600: {{^}}amdgpu_trunc: -; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: {{^}}amdgpu_trunc: ; SI: v_trunc_f32 diff --git a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll index f2a7256e812d..13ebee41e844 100644 --- a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll +++ b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll @@ -5,7 +5,7 @@ ; FUNC-LABEL: {{^}}local_size_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].Z +; EG: MOV * [[VAL]], KC0[1].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 @@ -23,7 +23,7 @@ entry: ; FUNC-LABEL: {{^}}local_size_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].W +; EG: MOV * [[VAL]], KC0[1].W ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c @@ -38,7 +38,7 @@ entry: ; FUNC-LABEL: {{^}}local_size_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].X +; EG: MOV * [[VAL]], KC0[2].X ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 diff --git a/test/CodeGen/AMDGPU/or.ll b/test/CodeGen/AMDGPU/or.ll index 1c04090b407f..e40f18f040b7 100644 --- a/test/CodeGen/AMDGPU/or.ll +++ b/test/CodeGen/AMDGPU/or.ll @@ -153,7 +153,7 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { } ; FUNC-LABEL: {{^}}or_i1: -; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} +; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}} ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}] define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { diff --git a/test/CodeGen/AMDGPU/set-dx10.ll b/test/CodeGen/AMDGPU/set-dx10.ll index 53694dcffa66..57365a6e1fc3 100644 --- a/test/CodeGen/AMDGPU/set-dx10.ll +++ b/test/CodeGen/AMDGPU/set-dx10.ll @@ -5,8 +5,8 @@ ; SET*DX10 instructions. ; CHECK: {{^}}fcmp_une_select_fptosi: -; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -19,8 +19,8 @@ entry: } ; CHECK: {{^}}fcmp_une_select_i32: -; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -31,8 +31,8 @@ entry: } ; CHECK: {{^}}fcmp_oeq_select_fptosi: -; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -45,8 +45,8 @@ entry: } ; CHECK: {{^}}fcmp_oeq_select_i32: -; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -57,8 +57,8 @@ entry: } ; CHECK: {{^}}fcmp_ogt_select_fptosi: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -71,8 +71,8 @@ entry: } ; CHECK: {{^}}fcmp_ogt_select_i32: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -83,8 +83,8 @@ entry: } ; CHECK: {{^}}fcmp_oge_select_fptosi: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -97,8 +97,8 @@ entry: } ; CHECK: {{^}}fcmp_oge_select_i32: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -109,8 +109,8 @@ entry: } ; CHECK: {{^}}fcmp_ole_select_fptosi: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -123,8 +123,8 @@ entry: } ; CHECK: {{^}}fcmp_ole_select_i32: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -135,8 +135,8 @@ entry: } ; CHECK: {{^}}fcmp_olt_select_fptosi: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -149,8 +149,8 @@ entry: } ; CHECK: {{^}}fcmp_olt_select_i32: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll index 95fcfdbdecae..23ae3b967971 100644 --- a/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -12,8 +12,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI: buffer_store_dword [[EXTRACT]], ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] -; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1 -; EG-NEXT: LSHR * [[ADDR]] +; EG: LSHR * [[ADDR]] +; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { %shl = shl i32 %in, 31 %sext = ashr i32 %shl, 31 diff --git a/test/CodeGen/AMDGPU/shl.ll b/test/CodeGen/AMDGPU/shl.ll index bf08e66f3304..55db80731c90 100644 --- a/test/CodeGen/AMDGPU/shl.ll +++ b/test/CodeGen/AMDGPU/shl.ll @@ -53,14 +53,14 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ret void } -;EG: {{^}}shl_i64: +;EG-LABEL: {{^}}shl_i64: ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 -;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}} +;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}} ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal ;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} ;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 @@ -80,7 +80,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ret void } -;EG: {{^}}shl_v2i64: +;EG-LABEL: {{^}}shl_v2i64: ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] ;EG-DAG: LSHR {{\*? *}}[[COMPSHA]] diff --git a/test/CodeGen/AMDGPU/sra.ll b/test/CodeGen/AMDGPU/sra.ll index bcbc32f4c053..3b59bbfb18c0 100644 --- a/test/CodeGen/AMDGPU/sra.ll +++ b/test/CodeGen/AMDGPU/sra.ll @@ -70,11 +70,11 @@ entry: ;EG-LABEL: {{^}}ashr_i64_2: ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 -;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}} ;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal ;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal diff --git a/test/CodeGen/AMDGPU/srl.ll b/test/CodeGen/AMDGPU/srl.ll index ebb2f2db252e..bbd954356322 100644 --- a/test/CodeGen/AMDGPU/srl.ll +++ b/test/CodeGen/AMDGPU/srl.ll @@ -65,14 +65,14 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} -; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}} ; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}} +; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]] ; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 diff --git a/test/CodeGen/AMDGPU/unsupported-cc.ll b/test/CodeGen/AMDGPU/unsupported-cc.ll index 8ab4faf2f145..d120111a71fb 100644 --- a/test/CodeGen/AMDGPU/unsupported-cc.ll +++ b/test/CodeGen/AMDGPU/unsupported-cc.ll @@ -3,8 +3,8 @@ ; These tests are for condition codes that are not supported by the hardware ; CHECK-LABEL: {{^}}slt: -; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) define void @slt(i32 addrspace(1)* %out, i32 %in) { entry: @@ -15,8 +15,8 @@ entry: } ; CHECK-LABEL: {{^}}ult_i32: -; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -40,8 +40,8 @@ entry: } ; CHECK-LABEL: {{^}}ult_float_native: -; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ult_float_native(float addrspace(1)* %out, float %in) { entry: @@ -52,8 +52,8 @@ entry: } ; CHECK-LABEL: {{^}}olt: -; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 1084227584(5.000000e+00) define void @olt(float addrspace(1)* %out, float %in) { entry: @@ -64,8 +64,8 @@ entry: } ; CHECK-LABEL: {{^}}sle: -; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) define void @sle(i32 addrspace(1)* %out, i32 %in) { entry: @@ -76,8 +76,8 @@ entry: } ; CHECK-LABEL: {{^}}ule_i32: -; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -101,8 +101,8 @@ entry: } ; CHECK-LABEL: {{^}}ule_float_native: -; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ule_float_native(float addrspace(1)* %out, float %in) { entry: @@ -113,8 +113,8 @@ entry: } ; CHECK-LABEL: {{^}}ole: -; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT:1084227584(5.000000e+00) define void @ole(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/work-item-intrinsics.ll index a704a23b0f92..f420ec9c7d23 100644 --- a/test/CodeGen/AMDGPU/work-item-intrinsics.ll +++ b/test/CodeGen/AMDGPU/work-item-intrinsics.ll @@ -7,7 +7,7 @@ ; FUNC-LABEL: {{^}}ngroups_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].X +; EG: MOV {{\*? *}}[[VAL]], KC0[0].X ; HSA: .amd_kernel_code_t @@ -38,7 +38,7 @@ entry: ; FUNC-LABEL: {{^}}ngroups_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].Y +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 @@ -53,7 +53,7 @@ entry: ; FUNC-LABEL: {{^}}ngroups_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].Z +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 @@ -68,7 +68,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].W +; EG: MOV {{\*? *}}[[VAL]], KC0[0].W ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc @@ -83,7 +83,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].X +; EG: MOV {{\*? *}}[[VAL]], KC0[1].X ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 @@ -98,7 +98,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].Y +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 diff --git a/test/CodeGen/AMDGPU/xor.ll b/test/CodeGen/AMDGPU/xor.ll index ddb920af29d8..655655d92f08 100644 --- a/test/CodeGen/AMDGPU/xor.ll +++ b/test/CodeGen/AMDGPU/xor.ll @@ -38,7 +38,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in } ; FUNC-LABEL: {{^}}xor_i1: -; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} +; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}} ; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}} ; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}} From c445f0fb72e9028e9ec92924025317c70b667359 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Fri, 4 Dec 2015 01:53:14 +0000 Subject: [PATCH 048/364] [ARM] When a bitcast is about to be turned into a VMOVDRR, try to combine it with its source instead of forcing the values on GPRs. This improves the lowering of vector code when such bitcasts happen in the middle of vector computations. rdar://problem/23691584 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254684 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 55 ++++++++++++++++++++++ test/CodeGen/ARM/combine-vmovdrr.ll | 72 +++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 test/CodeGen/ARM/combine-vmovdrr.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 33f74a3ba9fd..23f7bd0f4c8b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4139,6 +4139,56 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl &Results, Results.push_back(Read.getOperand(0)); } +/// \p BC is a bitcast that is about to be turned into a VMOVDRR. +/// When \p DstVT, the destination type of \p BC, is on the vector +/// register bank and the source of bitcast, \p Op, operates on the same bank, +/// it might be possible to combine them, such that everything stays on the +/// vector register bank. +/// \p return The node that would replace \p BT, if the combine +/// is possible. +static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, + SelectionDAG &DAG) { + SDValue Op = BC->getOperand(0); + EVT DstVT = BC->getValueType(0); + + // The only vector instruction that can produce a scalar (remember, + // since the bitcast was about to be turned into VMOVDRR, the source + // type is i64) from a vector is EXTRACT_VECTOR_ELT. + // Moreover, we can do this combine only if there is one use. + // Finally, if the destination type is not a vector, there is not + // much point on forcing everything on the vector bank. + if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !Op.hasOneUse()) + return SDValue(); + + // If the index is not constant, we will introduce an additional + // multiply that will stick. + // Give up in that case. + ConstantSDNode *Index = dyn_cast(Op.getOperand(1)); + if (!Index) + return SDValue(); + unsigned DstNumElt = DstVT.getVectorNumElements(); + + // Compute the new index. + const APInt &APIntIndex = Index->getAPIntValue(); + APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt); + NewIndex *= APIntIndex; + // Check if the new constant index fits into i32. + if (NewIndex.getBitWidth() > 32) + return SDValue(); + + // vMTy bitcast(i64 extractelt vNi64 src, i32 index) -> + // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M) + SDLoc dl(Op); + SDValue ExtractSrc = Op.getOperand(0); + EVT VecVT = EVT::getVectorVT( + *DAG.getContext(), DstVT.getScalarType(), + ExtractSrc.getValueType().getVectorNumElements() * DstNumElt); + SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast, + DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32)); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -4158,6 +4208,11 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { + // Do not force values to GPRs (this is what VMOVDRR does for the inputs) + // if we can combine the bitcast with its source. + if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG)) + return Val; + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, diff --git a/test/CodeGen/ARM/combine-vmovdrr.ll b/test/CodeGen/ARM/combine-vmovdrr.ll new file mode 100644 index 000000000000..358f7e3a983e --- /dev/null +++ b/test/CodeGen/ARM/combine-vmovdrr.ll @@ -0,0 +1,72 @@ +; RUN: llc %s -o - | FileCheck %s + +target triple = "thumbv7s-apple-ios" + +declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i) + +; Check that we get the motivating example: +; The bitcasts force the values to go through the GPRs, whereas +; they are defined on VPRs and used on VPRs. +; +; CHECK-LABEL: motivatingExample: +; CHECK: vldr [[ARG2_VAL:d[0-9]+]], [r1] +; CHECK-NEXT: vld1.32 {[[ARG1_VALlo:d[0-9]+]], [[ARG1_VALhi:d[0-9]+]]}, [r0] +; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALlo]], [[ARG1_VALhi]]}, [[ARG2_VAL]] +; CHECK-NEXT: vstr [[RES]], [r1] +; CHECK-NEXT: bx lr +define void @motivatingExample(<2 x i64>* %addr, <8 x i8>* %addr2) { + %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr + %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 + %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0 + %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 + %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> + %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> + %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) + store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 + ret void +} + +; Check that we do not perform the transformation for dynamic index. +; CHECK-LABEL: dynamicIndex: +; CHECK-NOT: mul +; CHECK: pop +define void @dynamicIndex(<2 x i64>* %addr, <8 x i8>* %addr2, i32 %index) { + %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr + %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 + %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index + %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 + %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> + %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> + %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) + store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 + ret void +} + +; Check that we do not perform the transformation when there are several uses +; of the result of the bitcast. +; CHECK-LABEL: severalUses: +; ARG1_VALlo is hard coded because we need to access the high part of d0, +; i.e., s1, and we can't express that with filecheck. +; CHECK: vld1.32 {[[ARG1_VALlo:d0]], [[ARG1_VALhi:d[0-9]+]]}, [r0] +; CHECK-NEXT: vldr [[ARG2_VAL:d[0-9]+]], [r1] +; s1 is actually 2 * ARG1_VALlo + 1, but we cannot express that with filecheck. +; CHECK-NEXT: vmov [[REThi:r[0-9]+]], s1 +; We build the return value here. s0 is 2 * ARG1_VALlo. +; CHECK-NEXT: vmov r0, s0 +; This copy is correct but actually useless. We should be able to clean it up. +; CHECK-NEXT: vmov [[ARG1_VALloCPY:d[0-9]+]], r0, [[REThi]] +; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALloCPY]], [[ARG1_VALhi]]}, [[ARG2_VAL]] +; CHECK-NEXT: vstr [[RES]], [r1] +; CHECK-NEXT: mov r1, [[REThi]] +; CHECK-NEXT: bx lr +define i64 @severalUses(<2 x i64>* %addr, <8 x i8>* %addr2) { + %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr + %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 + %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0 + %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 + %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> + %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> + %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) + store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 + ret i64 %shuffle.i.extract.i310 +} From a9a96c1f37545082343ccb7cfee7f97f8f7a76be Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 4 Dec 2015 02:00:12 +0000 Subject: [PATCH 049/364] Move llvm/test/CodeGen/Generic/function-alias.ll to X86. It is incompatible to PECOFF. FIXME: It may be ELF-generic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254685 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/{Generic => X86}/function-alias.ll | 2 ++ 1 file changed, 2 insertions(+) rename test/CodeGen/{Generic => X86}/function-alias.ll (73%) diff --git a/test/CodeGen/Generic/function-alias.ll b/test/CodeGen/X86/function-alias.ll similarity index 73% rename from test/CodeGen/Generic/function-alias.ll rename to test/CodeGen/X86/function-alias.ll index 7eec5be198b0..d68d75d5578a 100644 --- a/test/CodeGen/Generic/function-alias.ll +++ b/test/CodeGen/X86/function-alias.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" ; "data" constant @0 = private constant <{ i8, i8 }> <{i8 15, i8 11}>, section ".text" From d8ba62ab992b9e142e002aee220354c9f09d92b7 Mon Sep 17 00:00:00 2001 From: Junmo Park Date: Fri, 4 Dec 2015 02:06:59 +0000 Subject: [PATCH 050/364] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254686 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index c6a6476747e6..e41926a819c2 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -371,7 +371,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } // Back past possible debugging pseudos at beginning of block. This matters // when one block differs from the other only by whether debugging pseudos - // are present at the beginning. (This way, the various checks later for + // are present at the beginning. (This way, the various checks later for // I1==MBB1->begin() work as expected.) if (I1 == MBB1->begin() && I2 != MBB2->begin()) { --I2; From 8061fe5c678f809e69acc199fe68dc5794601503 Mon Sep 17 00:00:00 2001 From: Nathan Slingerland Date: Fri, 4 Dec 2015 02:13:58 +0000 Subject: [PATCH 051/364] Revert "[llvm-profdata] Add support for weighted merge of profile data" This reverts commit b7250858d96b8ce567681214273ac0e62713c661. Reverting in order to investigate Windows test failure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254687 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CommandGuide/llvm-profdata.rst | 6 +- include/llvm/ProfileData/InstrProf.h | 31 ++------ include/llvm/ProfileData/InstrProfWriter.h | 4 +- include/llvm/ProfileData/SampleProf.h | 54 +++++--------- lib/ProfileData/InstrProfWriter.cpp | 14 +--- .../Inputs/weight-instr-bar.profdata | Bin 1320 -> 0 bytes .../Inputs/weight-instr-foo.profdata | Bin 1320 -> 0 bytes .../Inputs/weight-sample-bar.proftext | 8 -- .../Inputs/weight-sample-foo.proftext | 8 -- test/tools/llvm-profdata/weight-instr.test | 55 -------------- test/tools/llvm-profdata/weight-sample.test | 43 ----------- tools/llvm-profdata/llvm-profdata.cpp | 70 ++++-------------- unittests/ProfileData/InstrProfTest.cpp | 20 ----- 13 files changed, 47 insertions(+), 266 deletions(-) delete mode 100644 test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata delete mode 100644 test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata delete mode 100644 test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext delete mode 100644 test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext delete mode 100644 test/tools/llvm-profdata/weight-instr.test delete mode 100644 test/tools/llvm-profdata/weight-sample.test diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst index a4b18f301e42..210826a7babc 100644 --- a/docs/CommandGuide/llvm-profdata.rst +++ b/docs/CommandGuide/llvm-profdata.rst @@ -28,7 +28,7 @@ MERGE SYNOPSIS ^^^^^^^^ -:program:`llvm-profdata merge` [*options*] [*filename[:weight]...*] +:program:`llvm-profdata merge` [*options*] [*filenames...*] DESCRIPTION ^^^^^^^^^^^ @@ -37,10 +37,6 @@ DESCRIPTION generated by PGO instrumentation and merges them together into a single indexed profile data file. -The profile counts in each input file can be scaled (multiplied) by specifying -``:``, where `` is a decimal integer >= 1. -A default weight of 1 is assumed if only `` is given. - OPTIONS ^^^^^^^ diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index e1ed2e9ce48c..956485119102 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -218,8 +218,7 @@ struct InstrProfValueSiteRecord { } /// Merge data from another InstrProfValueSiteRecord - /// Optionally scale merged counts by \p Weight. - void mergeValueData(InstrProfValueSiteRecord &Input, uint64_t Weight = 1) { + void mergeValueData(InstrProfValueSiteRecord &Input) { this->sortByTargetValues(); Input.sortByTargetValues(); auto I = ValueData.begin(); @@ -229,11 +228,7 @@ struct InstrProfValueSiteRecord { while (I != IE && I->Value < J->Value) ++I; if (I != IE && I->Value == J->Value) { - // TODO: Check for counter overflow and return error if it occurs. - uint64_t JCount = J->Count; - if (Weight > 1) - JCount = SaturatingMultiply(JCount, Weight); - I->Count = SaturatingAdd(I->Count, JCount); + I->Count = SaturatingAdd(I->Count, J->Count); ++I; continue; } @@ -279,8 +274,7 @@ struct InstrProfRecord { ValueMapType *HashKeys); /// Merge the counts in \p Other into this one. - /// Optionally scale merged counts by \p Weight. - inline instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1); + inline instrprof_error merge(InstrProfRecord &Other); /// Used by InstrProfWriter: update the value strings to commoned strings in /// the writer instance. @@ -332,9 +326,7 @@ struct InstrProfRecord { } // Merge Value Profile data from Src record to this record for ValueKind. - // Scale merged value counts by \p Weight. - instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, - uint64_t Weight) { + instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src) { uint32_t ThisNumValueSites = getNumValueSites(ValueKind); uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); if (ThisNumValueSites != OtherNumValueSites) @@ -344,7 +336,7 @@ struct InstrProfRecord { std::vector &OtherSiteRecords = Src.getValueSitesForKind(ValueKind); for (uint32_t I = 0; I < ThisNumValueSites; I++) - ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight); + ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I]); return instrprof_error::success; } }; @@ -430,8 +422,7 @@ void InstrProfRecord::updateStrings(InstrProfStringTable *StrTab) { VData.Value = (uint64_t)StrTab->insertString((const char *)VData.Value); } -instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, - uint64_t Weight) { +instrprof_error InstrProfRecord::merge(InstrProfRecord &Other) { // If the number of counters doesn't match we either have bad data // or a hash collision. if (Counts.size() != Other.Counts.size()) @@ -441,19 +432,13 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { bool ResultOverflowed; - uint64_t OtherCount = Other.Counts[I]; - if (Weight > 1) { - OtherCount = SaturatingMultiply(OtherCount, Weight, ResultOverflowed); - if (ResultOverflowed) - Result = instrprof_error::counter_overflow; - } - Counts[I] = SaturatingAdd(Counts[I], OtherCount, ResultOverflowed); + Counts[I] = SaturatingAdd(Counts[I], Other.Counts[I], ResultOverflowed); if (ResultOverflowed) Result = instrprof_error::counter_overflow; } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { - instrprof_error MergeValueResult = mergeValueProfData(Kind, Other, Weight); + instrprof_error MergeValueResult = mergeValueProfData(Kind, Other); if (MergeValueResult != instrprof_error::success) Result = MergeValueResult; } diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h index 1958d5f232e7..d026e08ec861 100644 --- a/include/llvm/ProfileData/InstrProfWriter.h +++ b/include/llvm/ProfileData/InstrProfWriter.h @@ -39,8 +39,8 @@ class InstrProfWriter { void updateStringTableReferences(InstrProfRecord &I); /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is - /// summed. Optionally scale counts by \p Weight. - std::error_code addRecord(InstrProfRecord &&I, uint64_t Weight = 1); + /// summed. + std::error_code addRecord(InstrProfRecord &&I); /// Write the profile to \c OS void write(raw_fd_ostream &OS); /// Write the profile in text format to \c OS diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 3337f4d7df5c..a7b22c735480 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -173,25 +173,19 @@ class SampleRecord { SampleRecord() : NumSamples(0), CallTargets() {} /// Increment the number of samples for this record by \p S. - /// Optionally scale sample count \p S by \p Weight. /// /// Sample counts accumulate using saturating arithmetic, to avoid wrapping /// around unsigned integers. - void addSamples(uint64_t S, uint64_t Weight = 1) { - if (Weight > 1) - S = SaturatingMultiply(S, Weight); + void addSamples(uint64_t S) { NumSamples = SaturatingAdd(NumSamples, S); } /// Add called function \p F with samples \p S. - /// Optionally scale sample count \p S by \p Weight. /// /// Sample counts accumulate using saturating arithmetic, to avoid wrapping /// around unsigned integers. - void addCalledTarget(StringRef F, uint64_t S, uint64_t Weight = 1) { + void addCalledTarget(StringRef F, uint64_t S) { uint64_t &TargetSamples = CallTargets[F]; - if (Weight > 1) - S = SaturatingMultiply(S, Weight); TargetSamples = SaturatingAdd(TargetSamples, S); } @@ -202,11 +196,10 @@ class SampleRecord { const CallTargetMap &getCallTargets() const { return CallTargets; } /// Merge the samples in \p Other into this record. - /// Optionally scale sample counts by \p Weight. - void merge(const SampleRecord &Other, uint64_t Weight = 1) { - addSamples(Other.getSamples(), Weight); + void merge(const SampleRecord &Other) { + addSamples(Other.getSamples()); for (const auto &I : Other.getCallTargets()) - addCalledTarget(I.first(), I.second, Weight); + addCalledTarget(I.first(), I.second); } void print(raw_ostream &OS, unsigned Indent) const; @@ -233,26 +226,16 @@ class FunctionSamples { FunctionSamples() : TotalSamples(0), TotalHeadSamples(0) {} void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; void dump() const; - void addTotalSamples(uint64_t Num, uint64_t Weight = 1) { - if (Weight > 1) - Num = SaturatingMultiply(Num, Weight); - TotalSamples += Num; - } - void addHeadSamples(uint64_t Num, uint64_t Weight = 1) { - if (Weight > 1) - Num = SaturatingMultiply(Num, Weight); - TotalHeadSamples += Num; - } - void addBodySamples(uint32_t LineOffset, uint32_t Discriminator, uint64_t Num, - uint64_t Weight = 1) { - BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(Num, - Weight); + void addTotalSamples(uint64_t Num) { TotalSamples += Num; } + void addHeadSamples(uint64_t Num) { TotalHeadSamples += Num; } + void addBodySamples(uint32_t LineOffset, uint32_t Discriminator, + uint64_t Num) { + BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(Num); } void addCalledTargetSamples(uint32_t LineOffset, uint32_t Discriminator, - std::string FName, uint64_t Num, - uint64_t Weight = 1) { - BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( - FName, Num, Weight); + std::string FName, uint64_t Num) { + BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(FName, + Num); } /// Return the number of samples collected at the given location. @@ -301,19 +284,18 @@ class FunctionSamples { } /// Merge the samples in \p Other into this one. - /// Optionally scale samples by \p Weight. - void merge(const FunctionSamples &Other, uint64_t Weight = 1) { - addTotalSamples(Other.getTotalSamples(), Weight); - addHeadSamples(Other.getHeadSamples(), Weight); + void merge(const FunctionSamples &Other) { + addTotalSamples(Other.getTotalSamples()); + addHeadSamples(Other.getHeadSamples()); for (const auto &I : Other.getBodySamples()) { const LineLocation &Loc = I.first; const SampleRecord &Rec = I.second; - BodySamples[Loc].merge(Rec, Weight); + BodySamples[Loc].merge(Rec); } for (const auto &I : Other.getCallsiteSamples()) { const CallsiteLocation &Loc = I.first; const FunctionSamples &Rec = I.second; - functionSamplesAt(Loc).merge(Rec, Weight); + functionSamplesAt(Loc).merge(Rec); } } diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 2261c92f03a9..78bec012eeb2 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -98,8 +98,7 @@ void InstrProfWriter::updateStringTableReferences(InstrProfRecord &I) { I.updateStrings(&StringTable); } -std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, - uint64_t Weight) { +std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I) { updateStringTableReferences(I); auto &ProfileDataMap = FunctionData[I.Name]; @@ -114,18 +113,9 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, // We've never seen a function with this name and hash, add it. Dest = std::move(I); Result = instrprof_error::success; - if (Weight > 1) { - for (auto &Count : Dest.Counts) { - bool Overflowed; - Count = SaturatingMultiply(Count, Weight, Overflowed); - if (Overflowed && Result == instrprof_error::success) { - Result = instrprof_error::counter_overflow; - } - } - } } else { // We're updating a function we've seen before. - Result = Dest.merge(I, Weight); + Result = Dest.merge(I); } // We keep track of the max function count as we go for simplicity. diff --git a/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata b/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata deleted file mode 100644 index 4ed07660f654090e750b19be4e0af609bc1c61db..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1320 zcmeyLQ&5zjmf6V600ExHYmK2yFeL$%U}Tt_rlBWzFff!ADy;yeON$fJQ=x1IMi>K1 zb3kcEhBbR7Zu=bi5d*Wx04kG~pWnpK1 zb3kcEhBbR7Zu=bi5d*Wx04kG~pWnpjI*ICuRlWs>LPl7M$03g{ F3;+VlD0u(? diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext deleted file mode 100644 index a910f745e6c7..000000000000 --- a/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext +++ /dev/null @@ -1,8 +0,0 @@ -bar:1772037:35370 - 17: 35370 - 18: 35370 - 19: 7005 - 20: 29407 - 21: 12170 - 23: 18150 bar:19829 - 25: 36666 diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext deleted file mode 100644 index 155ec5d00315..000000000000 --- a/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext +++ /dev/null @@ -1,8 +0,0 @@ -foo:1763288:35327 - 7: 35327 - 8: 35327 - 9: 6930 - 10: 29341 - 11: 11906 - 13: 18185 foo:19531 - 15: 36458 diff --git a/test/tools/llvm-profdata/weight-instr.test b/test/tools/llvm-profdata/weight-instr.test deleted file mode 100644 index bc0b5061647f..000000000000 --- a/test/tools/llvm-profdata/weight-instr.test +++ /dev/null @@ -1,55 +0,0 @@ -Tests for weighted merge of instrumented profiles. - -1- Merge the foo and bar profiles with unity weight and verify the combined output -RUN: llvm-profdata merge --instr %p/Inputs/weight-instr-bar.profdata:1 %p/Inputs/weight-instr-foo.profdata:1 -o %t -RUN: llvm-profdata show --instr -all-functions %t | FileCheck %s --check-prefix=WEIGHT1 -WEIGHT1: Counters: -WEIGHT1: usage: -WEIGHT1: Hash: 0x0000000000000000 -WEIGHT1: Counters: 1 -WEIGHT1: Function count: 0 -WEIGHT1: foo: -WEIGHT1: Hash: 0x000000000000028a -WEIGHT1: Counters: 3 -WEIGHT1: Function count: 866988873 -WEIGHT1: bar: -WEIGHT1: Hash: 0x000000000000028a -WEIGHT1: Counters: 3 -WEIGHT1: Function count: 866988873 -WEIGHT1: main: -WEIGHT1: Hash: 0x7d31c47ea98f8248 -WEIGHT1: Counters: 60 -WEIGHT1: Function count: 2 -WEIGHT1: Functions shown: 4 -WEIGHT1: Total functions: 4 -WEIGHT1: Maximum function count: 866988873 -WEIGHT1: Maximum internal block count: 267914296 - -2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output -RUN: llvm-profdata merge --instr %p/Inputs/weight-instr-bar.profdata:3 %p/Inputs/weight-instr-foo.profdata:5 -o %t -RUN: llvm-profdata show --instr -all-functions %t | FileCheck %s --check-prefix=WEIGHT2 -WEIGHT2: Counters: -WEIGHT2: usage: -WEIGHT2: Hash: 0x0000000000000000 -WEIGHT2: Counters: 1 -WEIGHT2: Function count: 0 -WEIGHT2: foo: -WEIGHT2: Hash: 0x000000000000028a -WEIGHT2: Counters: 3 -WEIGHT2: Function count: 4334944365 -WEIGHT2: bar: -WEIGHT2: Hash: 0x000000000000028a -WEIGHT2: Counters: 3 -WEIGHT2: Function count: 2600966619 -WEIGHT2: main: -WEIGHT2: Hash: 0x7d31c47ea98f8248 -WEIGHT2: Counters: 60 -WEIGHT2: Function count: 8 -WEIGHT2: Functions shown: 4 -WEIGHT2: Total functions: 4 -WEIGHT2: Maximum function count: 4334944365 -WEIGHT2: Maximum internal block count: 1339571480 - -3- Bad merge: foo and bar profiles with invalid weights -RUN: not llvm-profdata merge --instr %p/Inputs/weight-instr-bar.profdata:3 %p/Inputs/weight-instr-foo.profdata:-5 -o %t.out 2>&1 | FileCheck %s --check-prefix=ERROR3 -ERROR3: error: Input weight must be a positive integer. diff --git a/test/tools/llvm-profdata/weight-sample.test b/test/tools/llvm-profdata/weight-sample.test deleted file mode 100644 index a1fe1df1b6de..000000000000 --- a/test/tools/llvm-profdata/weight-sample.test +++ /dev/null @@ -1,43 +0,0 @@ -Tests for weighted merge of sample profiles. - -1- Merge the foo and bar profiles with unity weight and verify the combined output -RUN: llvm-profdata merge --sample --text %p/Inputs/weight-sample-bar.proftext:1 %p/Inputs/weight-sample-foo.proftext:1 -o - | FileCheck %s --check-prefix=WEIGHT1 -WEIGHT1: foo:1763288:35327 -WEIGHT1: 7: 35327 -WEIGHT1: 8: 35327 -WEIGHT1: 9: 6930 -WEIGHT1: 10: 29341 -WEIGHT1: 11: 11906 -WEIGHT1: 13: 18185 foo:19531 -WEIGHT1: 15: 36458 -WEIGHT1: bar:1772037:35370 -WEIGHT1: 17: 35370 -WEIGHT1: 18: 35370 -WEIGHT1: 19: 7005 -WEIGHT1: 20: 29407 -WEIGHT1: 21: 12170 -WEIGHT1: 23: 18150 bar:19829 -WEIGHT1: 25: 36666 - -2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output -RUN: llvm-profdata merge --sample --text %p/Inputs/weight-sample-bar.proftext:3 %p/Inputs/weight-sample-foo.proftext:5 -o - | FileCheck %s --check-prefix=WEIGHT2 -WEIGHT2: foo:8816440:176635 -WEIGHT2: 7: 176635 -WEIGHT2: 8: 176635 -WEIGHT2: 9: 34650 -WEIGHT2: 10: 146705 -WEIGHT2: 11: 59530 -WEIGHT2: 13: 90925 foo:97655 -WEIGHT2: 15: 182290 -WEIGHT2: bar:5316111:106110 -WEIGHT2: 17: 106110 -WEIGHT2: 18: 106110 -WEIGHT2: 19: 21015 -WEIGHT2: 20: 88221 -WEIGHT2: 21: 36510 -WEIGHT2: 23: 54450 bar:59487 -WEIGHT2: 25: 109998 - -3- Bad merge: foo and bar profiles with invalid weights -RUN: not llvm-profdata merge --sample --text %p/Inputs/weight-sample-bar.proftext:3 %p/Inputs/weight-sample-foo.proftext:-5 -o %t.out 2>&1 | FileCheck %s --check-prefix=ERROR3 -ERROR3: error: Input weight must be a positive integer. diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index 56c80f518ec4..10b6855233d5 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/LLVMContext.h" #include "llvm/ProfileData/InstrProfReader.h" @@ -28,7 +27,6 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; @@ -95,17 +93,7 @@ static void handleMergeWriterError(std::error_code &Error, } } -struct WeightedFile { - StringRef Filename; - uint64_t Weight; - - WeightedFile() {} - - WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {} -}; -typedef SmallVector WeightedFileVector; - -static void mergeInstrProfile(const WeightedFileVector &Inputs, +static void mergeInstrProfile(const cl::list &Inputs, StringRef OutputFilename, ProfileFormat OutputFormat) { if (OutputFilename.compare("-") == 0) @@ -121,21 +109,21 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, InstrProfWriter Writer; SmallSet WriterErrorCodes; - for (const auto &Input : Inputs) { - auto ReaderOrErr = InstrProfReader::create(Input.Filename); + for (const auto &Filename : Inputs) { + auto ReaderOrErr = InstrProfReader::create(Filename); if (std::error_code ec = ReaderOrErr.getError()) - exitWithErrorCode(ec, Input.Filename); + exitWithErrorCode(ec, Filename); auto Reader = std::move(ReaderOrErr.get()); for (auto &I : *Reader) { - if (std::error_code EC = Writer.addRecord(std::move(I), Input.Weight)) { + if (std::error_code EC = Writer.addRecord(std::move(I))) { // Only show hint the first time an error occurs. bool firstTime = WriterErrorCodes.insert(EC).second; - handleMergeWriterError(EC, Input.Filename, I.Name, firstTime); + handleMergeWriterError(EC, Filename, I.Name, firstTime); } } if (Reader->hasError()) - exitWithErrorCode(Reader->getError(), Input.Filename); + exitWithErrorCode(Reader->getError(), Filename); } if (OutputFormat == PF_Text) Writer.writeText(Output); @@ -147,7 +135,7 @@ static sampleprof::SampleProfileFormat FormatMap[] = { sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Binary, sampleprof::SPF_GCC}; -static void mergeSampleProfile(const WeightedFileVector &Inputs, +static void mergeSampleProfile(const cl::list &Inputs, StringRef OutputFilename, ProfileFormat OutputFormat) { using namespace sampleprof; @@ -159,11 +147,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, auto Writer = std::move(WriterOrErr.get()); StringMap ProfileMap; SmallVector, 5> Readers; - for (const auto &Input : Inputs) { + for (const auto &Filename : Inputs) { auto ReaderOrErr = - SampleProfileReader::create(Input.Filename, getGlobalContext()); + SampleProfileReader::create(Filename, getGlobalContext()); if (std::error_code EC = ReaderOrErr.getError()) - exitWithErrorCode(EC, Input.Filename); + exitWithErrorCode(EC, Filename); // We need to keep the readers around until after all the files are // read so that we do not lose the function names stored in each @@ -172,7 +160,7 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, Readers.push_back(std::move(ReaderOrErr.get())); const auto Reader = Readers.back().get(); if (std::error_code EC = Reader->read()) - exitWithErrorCode(EC, Input.Filename); + exitWithErrorCode(EC, Filename); StringMap &Profiles = Reader->getProfiles(); for (StringMap::iterator I = Profiles.begin(), @@ -180,38 +168,15 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, I != E; ++I) { StringRef FName = I->first(); FunctionSamples &Samples = I->second; - ProfileMap[FName].merge(Samples, Input.Weight); + ProfileMap[FName].merge(Samples); } } Writer->write(ProfileMap); } -static void parseInputFiles(const cl::list &Inputs, - WeightedFileVector &WeightedInputs) { - WeightedInputs.reserve(Inputs.size()); - - for (StringRef Input : Inputs) { - StringRef FileName; - StringRef WeightStr; - std::tie(FileName, WeightStr) = Input.rsplit(':'); - if (WeightStr.empty() || sys::fs::exists(Input)) { - // No weight specified or valid path containing delimiter. - WeightedInputs.push_back(WeightedFile(Input, 1)); - } else { - // Input weight specified. - uint64_t Weight; - if (WeightStr.getAsInteger(10, Weight) || Weight < 1) { - // Invalid input weight. - exitWithError("Input weight must be a positive integer."); - } - WeightedInputs.push_back(WeightedFile(FileName, Weight)); - } - } -} - static int merge_main(int argc, const char *argv[]) { cl::list Inputs(cl::Positional, cl::Required, cl::OneOrMore, - cl::desc("")); + cl::desc("")); cl::opt OutputFilename("output", cl::value_desc("output"), cl::init("-"), cl::Required, @@ -233,13 +198,10 @@ static int merge_main(int argc, const char *argv[]) { cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); - WeightedFileVector WeightedInputs; - parseInputFiles(Inputs, WeightedInputs); - if (ProfileKind == instr) - mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat); + mergeInstrProfile(Inputs, OutputFilename, OutputFormat); else - mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat); + mergeSampleProfile(Inputs, OutputFilename, OutputFormat); return 0; } diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp index 946afdadba93..635a5431a513 100644 --- a/unittests/ProfileData/InstrProfTest.cpp +++ b/unittests/ProfileData/InstrProfTest.cpp @@ -490,24 +490,4 @@ TEST_F(InstrProfTest, get_max_function_count) { ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount()); } -TEST_F(InstrProfTest, get_weighted_function_counts) { - InstrProfRecord Record1("foo", 0x1234, {1, 2}); - InstrProfRecord Record2("foo", 0x1235, {3, 4}); - Writer.addRecord(std::move(Record1), 3); - Writer.addRecord(std::move(Record2), 5); - auto Profile = Writer.writeBuffer(); - readProfile(std::move(Profile)); - - std::vector Counts; - ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1234, Counts))); - ASSERT_EQ(2U, Counts.size()); - ASSERT_EQ(3U, Counts[0]); - ASSERT_EQ(6U, Counts[1]); - - ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1235, Counts))); - ASSERT_EQ(2U, Counts.size()); - ASSERT_EQ(15U, Counts[0]); - ASSERT_EQ(20U, Counts[1]); -} - } // end anonymous namespace From d8def4abd185c430d7649be9347778612c296871 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Fri, 4 Dec 2015 02:14:34 +0000 Subject: [PATCH 052/364] IR: Use format_hex instead of handrolling the conversion. NFC Cleans up some very old code in AsmWriter's WriteConstantInternal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254688 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AsmWriter.cpp | 64 ++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 47 deletions(-) diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index cb9a792c598b..fae1ebee5f2a 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -1137,15 +1138,12 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // x86, so we must not use these types. static_assert(sizeof(double) == sizeof(uint64_t), "assuming that double is 64 bits!"); - char Buffer[40]; APFloat apf = CFP->getValueAPF(); // Floats are represented in ASCII IR as double, convert. if (!isDouble) apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "0x" << - utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()), - Buffer+40); + Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true); return; } @@ -1153,60 +1151,32 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // These appear as a magic letter identifying the type, then a // fixed number of hex digits. Out << "0x"; - // Bit position, in the current word, of the next nibble to print. - int shiftcount; - + APInt API = CFP->getValueAPF().bitcastToAPInt(); if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) { Out << 'K'; - // api needed to prevent premature destruction - APInt api = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t* p = api.getRawData(); - uint64_t word = p[1]; - shiftcount = 12; - int width = api.getBitWidth(); - for (int j=0; j>shiftcount) & 15; - if (nibble < 10) - Out << (unsigned char)(nibble + '0'); - else - Out << (unsigned char)(nibble - 10 + 'A'); - if (shiftcount == 0 && j+4 < width) { - word = *p; - shiftcount = 64; - if (width-j-4 < 64) - shiftcount = width-j-4; - } - } + Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); return; } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) { - shiftcount = 60; Out << 'L'; + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, + /*Upper=*/true); } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) { - shiftcount = 60; Out << 'M'; + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, + /*Upper=*/true); } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) { - shiftcount = 12; Out << 'H'; + Out << format_hex_no_prefix(API.getZExtValue(), 4, + /*Upper=*/true); } else llvm_unreachable("Unsupported floating point type"); - // api needed to prevent premature destruction - APInt api = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t* p = api.getRawData(); - uint64_t word = *p; - int width = api.getBitWidth(); - for (int j=0; j>shiftcount) & 15; - if (nibble < 10) - Out << (unsigned char)(nibble + '0'); - else - Out << (unsigned char)(nibble - 10 + 'A'); - if (shiftcount == 0 && j+4 < width) { - word = *(++p); - shiftcount = 64; - if (width-j-4 < 64) - shiftcount = width-j-4; - } - } return; } From 51540fbf420fb65bfdf0173a08ea951e10cf431f Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 4 Dec 2015 02:15:39 +0000 Subject: [PATCH 053/364] [Orc] Rename JITCompileCallbackManagerBase to JITCompileCallbackManager. This class is turning into a useful interface, rather than an implementation detail, so I'm dropping the 'Base' suffix. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254693 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Orc/CompileOnDemandLayer.h | 2 +- .../ExecutionEngine/Orc/IndirectionUtils.h | 20 +++++++++---------- lib/ExecutionEngine/Orc/IndirectionUtils.cpp | 2 +- lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp | 2 +- lib/ExecutionEngine/Orc/OrcCBindingsStack.h | 2 +- tools/lli/OrcLazyJIT.cpp | 2 +- tools/lli/OrcLazyJIT.h | 2 +- .../Orc/CompileOnDemandLayerTest.cpp | 4 ++-- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 242d2420162f..b7ee9b5937f7 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -38,7 +38,7 @@ namespace orc { /// of the function body from the original module. The extracted body is then /// compiled and executed. template class CompileOnDemandLayer { private: diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index cabc95543d81..b5b258e7a05c 100644 --- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -27,8 +27,8 @@ namespace llvm { namespace orc { -/// @brief Target-independent base class JITCompileCallbackManager. -class JITCompileCallbackManagerBase { +/// @brief Target-independent base class for compile callback management. +class JITCompileCallbackManager { public: typedef std::function CompileFtor; @@ -50,13 +50,13 @@ class JITCompileCallbackManagerBase { CompileFtor &Compile; }; - /// @brief Construct a JITCompileCallbackManagerBase. + /// @brief Construct a JITCompileCallbackManager. /// @param ErrorHandlerAddress The address of an error handler in the target /// process to be used if a compile callback fails. - JITCompileCallbackManagerBase(TargetAddress ErrorHandlerAddress) + JITCompileCallbackManager(TargetAddress ErrorHandlerAddress) : ErrorHandlerAddress(ErrorHandlerAddress) {} - virtual ~JITCompileCallbackManagerBase() {} + virtual ~JITCompileCallbackManager() {} /// @brief Execute the callback for the given trampoline id. Called by the JIT /// to compile functions on demand. @@ -116,16 +116,16 @@ class JITCompileCallbackManagerBase { virtual void anchor(); }; -/// @brief Manage compile callbacks. +/// @brief Manage compile callbacks for in-process JITs. template -class JITCompileCallbackManager : public JITCompileCallbackManagerBase { +class LocalJITCompileCallbackManager : public JITCompileCallbackManager { public: - /// @brief Construct a JITCompileCallbackManager. + /// @brief Construct a InProcessJITCompileCallbackManager. /// @param ErrorHandlerAddress The address of an error handler in the target /// process to be used if a compile callback fails. - JITCompileCallbackManager(TargetAddress ErrorHandlerAddress) - : JITCompileCallbackManagerBase(ErrorHandlerAddress) { + LocalJITCompileCallbackManager(TargetAddress ErrorHandlerAddress) + : JITCompileCallbackManager(ErrorHandlerAddress) { /// Set up the resolver block. std::error_code EC; diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 30dbe0383771..dd6e3a3b29ae 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -19,7 +19,7 @@ namespace llvm { namespace orc { -void JITCompileCallbackManagerBase::anchor() {} +void JITCompileCallbackManager::anchor() {} void IndirectStubsManagerBase::anchor() {} Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) { diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp index 7326fa7e2f8f..d1af56d84867 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp @@ -23,7 +23,7 @@ OrcCBindingsStack::createCompileCallbackMgr(Triple T) { default: return nullptr; case Triple::x86_64: { - typedef orc::JITCompileCallbackManager CCMgrT; + typedef orc::LocalJITCompileCallbackManager CCMgrT; return llvm::make_unique(0); } } diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index c62210112c35..d2f7fe4ac0ef 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -29,7 +29,7 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef) class OrcCBindingsStack { public: - typedef orc::JITCompileCallbackManagerBase CompileCallbackMgr; + typedef orc::JITCompileCallbackManager CompileCallbackMgr; typedef orc::ObjectLinkingLayer<> ObjLayerT; typedef orc::IRCompileLayer CompileLayerT; typedef orc::CompileOnDemandLayer CODLayerT; diff --git a/tools/lli/OrcLazyJIT.cpp b/tools/lli/OrcLazyJIT.cpp index 7d79c48559ef..edac10b86556 100644 --- a/tools/lli/OrcLazyJIT.cpp +++ b/tools/lli/OrcLazyJIT.cpp @@ -52,7 +52,7 @@ OrcLazyJIT::createCompileCallbackMgr(Triple T) { default: return nullptr; case Triple::x86_64: { - typedef orc::JITCompileCallbackManager CCMgrT; + typedef orc::LocalJITCompileCallbackManager CCMgrT; return llvm::make_unique(0); } } diff --git a/tools/lli/OrcLazyJIT.h b/tools/lli/OrcLazyJIT.h index ec86a72efaa0..bb4da33ea9b6 100644 --- a/tools/lli/OrcLazyJIT.h +++ b/tools/lli/OrcLazyJIT.h @@ -29,7 +29,7 @@ namespace llvm { class OrcLazyJIT { public: - typedef orc::JITCompileCallbackManagerBase CompileCallbackMgr; + typedef orc::JITCompileCallbackManager CompileCallbackMgr; typedef orc::ObjectLinkingLayer<> ObjLayerT; typedef orc::IRCompileLayer CompileLayerT; typedef std::function(std::unique_ptr)> diff --git a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp index 49f4cc124f9e..4a30cfc42971 100644 --- a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp @@ -16,10 +16,10 @@ using namespace llvm::orc; namespace { -class DummyCallbackManager : public orc::JITCompileCallbackManagerBase { +class DummyCallbackManager : public orc::JITCompileCallbackManager { public: DummyCallbackManager() - : JITCompileCallbackManagerBase(0), NextStubAddress(0), + : JITCompileCallbackManager(0), NextStubAddress(0), UniversalCompile([]() { return 0; }) { } From 64649a7dc718206641b8b6bb436d4cdd3c836d04 Mon Sep 17 00:00:00 2001 From: Junmo Park Date: Fri, 4 Dec 2015 02:29:25 +0000 Subject: [PATCH 054/364] [BranchFolding] Merge MMOs during tail merge Summary: If we remove the MMOs from Load/Store instructions, they are treated as volatile. This makes other optimization passes unhappy. eg. Load/Store Optimization So, it looks better to merge, not remove. Reviewers: gberry, mcrosier Subscribers: gberry, llvm-commits Differential Revision: http://reviews.llvm.org/D14797 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254694 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 42 ++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index e41926a819c2..3878281a4fe9 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -744,24 +744,35 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } -static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { +// Add MI1's MMOs to MI2's MMOs while excluding any duplicates. The MI scheduler +// currently doesn't handle multiple MMOs, so duplicates would likely pessimize +// the scheduler. +static void mergeMMOs(MachineInstr *MI1, MachineInstr *MI2) { auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); - if ((E1 - I1) != (E2 - I2)) - return false; - for (; I1 != E1; ++I1, ++I2) { - if (**I1 != **I2) - return false; + MachineFunction *MF = MI1->getParent()->getParent(); + + // Mostly, MI1's MMO count is 1 or zero. So we don't have to use + // SmallSet. + for (; I1 != E1; ++I1) { + bool IsDupMMO = false; + for (I2 = MI2->memoperands_begin(); I2 != E2; ++I2) { + if (**I1 == **I2) { + IsDupMMO = true; + break; + } + } + if (IsDupMMO == false) { + MI2->addMemOperand(*MF, *I1); + E2 = MI2->memoperands_end(); + } } - return true; } static void -removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, - MachineBasicBlock &MBBCommon) { - // Remove MMOs from memory operations in the common block - // when they do not match the ones from the block being tail-merged. - // This ensures later passes conservatively compute dependencies. +mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { + // Merge MMOs from memory operations in the common block MachineBasicBlock *MBB = MBBIStartPos->getParent(); // Note CommonTailLen does not necessarily matches the size of // the common BB nor all its instructions because of debug @@ -792,8 +803,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); if (MBBICommon->mayLoad() || MBBICommon->mayStore()) - if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) - MBBICommon->clearMemRefs(); + mergeMMOs(&*MBBI, &*MBBICommon); ++MBBI; ++MBBICommon; @@ -913,8 +923,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); - // Remove MMOs from memory operations as needed. - removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); + // Merge MMOs from memory operations as needed. + mergeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. From 6d3f26eb9adb329347b7af61958b32cf30761768 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 4 Dec 2015 02:32:32 +0000 Subject: [PATCH 055/364] [Orc] Fix Kaleidoscope example for change in r254693. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254695 91177308-0d34-0410-b5e6-96231b3b80d8 --- examples/Kaleidoscope/Orc/fully_lazy/toy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp index 8ba76e86ee07..78184f5d32cd 100644 --- a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp +++ b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp @@ -1308,7 +1308,7 @@ class KaleidoscopeJIT { std::map> FunctionDefs; - JITCompileCallbackManager CompileCallbacks; + LocalJITCompileCallbackManager CompileCallbacks; }; static void HandleDefinition(SessionContext &S, KaleidoscopeJIT &J) { From c6a202e8593da2db6c7330085536436ddc3fa263 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 4 Dec 2015 02:42:28 +0000 Subject: [PATCH 056/364] Move a call to getGlobalContext out of lib/LTO. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254696 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/LTO/LTOCodeGenerator.h | 4 +--- lib/LTO/LTOCodeGenerator.cpp | 11 ++--------- tools/llvm-lto/llvm-lto.cpp | 2 +- tools/lto/lto.cpp | 5 +++-- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/include/llvm/LTO/LTOCodeGenerator.h b/include/llvm/LTO/LTOCodeGenerator.h index c322288a1ae9..876defbdcd3f 100644 --- a/include/llvm/LTO/LTOCodeGenerator.h +++ b/include/llvm/LTO/LTOCodeGenerator.h @@ -62,8 +62,7 @@ namespace llvm { struct LTOCodeGenerator { static const char *getVersionString(); - LTOCodeGenerator(); - LTOCodeGenerator(std::unique_ptr Context); + LTOCodeGenerator(LLVMContext &Context); ~LTOCodeGenerator(); /// Merge given module. Return true on success. @@ -168,7 +167,6 @@ struct LTOCodeGenerator { typedef StringMap StringSet; - std::unique_ptr OwnedContext; LLVMContext &Context; std::unique_ptr MergedModule; std::unique_ptr IRLinker; diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 468ec24e3a06..b0dae74c13d4 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -64,20 +64,13 @@ const char* LTOCodeGenerator::getVersionString() { #endif } -LTOCodeGenerator::LTOCodeGenerator() - : Context(getGlobalContext()), +LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context) + : Context(Context), MergedModule(new Module("ld-temp.o", Context)), IRLinker(new Linker(*MergedModule)) { initializeLTOPasses(); } -LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr Context) - : OwnedContext(std::move(Context)), Context(*OwnedContext), - MergedModule(new Module("ld-temp.o", *OwnedContext)), - IRLinker(new Linker(*MergedModule)) { - initializeLTOPasses(); -} - LTOCodeGenerator::~LTOCodeGenerator() {} // Initialize LTO passes. Please keep this function in sync with diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp index 86b95577b307..07cd9bb1eaf8 100644 --- a/tools/llvm-lto/llvm-lto.cpp +++ b/tools/llvm-lto/llvm-lto.cpp @@ -257,7 +257,7 @@ int main(int argc, char **argv) { unsigned BaseArg = 0; - LTOCodeGenerator CodeGen; + LTOCodeGenerator CodeGen(getGlobalContext()); if (UseDiagnosticHandler) CodeGen.setDiagnosticHandler(handleDiagnostics, nullptr); diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp index ee389da2499e..6058fb1197eb 100644 --- a/tools/lto/lto.cpp +++ b/tools/lto/lto.cpp @@ -95,13 +95,14 @@ static void handleLibLTODiagnostic(lto_codegen_diagnostic_severity_t Severity, // libLTO API semantics, which require that the code generator owns the object // file. struct LibLTOCodeGenerator : LTOCodeGenerator { - LibLTOCodeGenerator() { + LibLTOCodeGenerator() : LTOCodeGenerator(getGlobalContext()) { setDiagnosticHandler(handleLibLTODiagnostic, nullptr); } LibLTOCodeGenerator(std::unique_ptr Context) - : LTOCodeGenerator(std::move(Context)) { + : LTOCodeGenerator(*Context), OwnedContext(std::move(Context)) { setDiagnosticHandler(handleLibLTODiagnostic, nullptr); } std::unique_ptr NativeObjectFile; + std::unique_ptr OwnedContext; }; } From 9ce890e1fe27d88dea0f652d11a09673eab67162 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 4 Dec 2015 04:15:05 +0000 Subject: [PATCH 057/364] Revert "[BranchFolding] Merge MMOs during tail merge" This reverts commit r254694. It broke bootstrap. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254700 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 42 +++++++++++++---------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 3878281a4fe9..e41926a819c2 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -744,35 +744,24 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } -// Add MI1's MMOs to MI2's MMOs while excluding any duplicates. The MI scheduler -// currently doesn't handle multiple MMOs, so duplicates would likely pessimize -// the scheduler. -static void mergeMMOs(MachineInstr *MI1, MachineInstr *MI2) { +static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); - MachineFunction *MF = MI1->getParent()->getParent(); - - // Mostly, MI1's MMO count is 1 or zero. So we don't have to use - // SmallSet. - for (; I1 != E1; ++I1) { - bool IsDupMMO = false; - for (I2 = MI2->memoperands_begin(); I2 != E2; ++I2) { - if (**I1 == **I2) { - IsDupMMO = true; - break; - } - } - if (IsDupMMO == false) { - MI2->addMemOperand(*MF, *I1); - E2 = MI2->memoperands_end(); - } + if ((E1 - I1) != (E2 - I2)) + return false; + for (; I1 != E1; ++I1, ++I2) { + if (**I1 != **I2) + return false; } + return true; } static void -mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, - MachineBasicBlock &MBBCommon) { - // Merge MMOs from memory operations in the common block +removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { + // Remove MMOs from memory operations in the common block + // when they do not match the ones from the block being tail-merged. + // This ensures later passes conservatively compute dependencies. MachineBasicBlock *MBB = MBBIStartPos->getParent(); // Note CommonTailLen does not necessarily matches the size of // the common BB nor all its instructions because of debug @@ -803,7 +792,8 @@ mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); if (MBBICommon->mayLoad() || MBBICommon->mayStore()) - mergeMMOs(&*MBBI, &*MBBICommon); + if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) + MBBICommon->clearMemRefs(); ++MBBI; ++MBBICommon; @@ -923,8 +913,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); - // Merge MMOs from memory operations as needed. - mergeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); + // Remove MMOs from memory operations as needed. + removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. From 008021cd33a7820d7edee10536d7c4693f29129e Mon Sep 17 00:00:00 2001 From: Yury Gribov Date: Fri, 4 Dec 2015 09:19:14 +0000 Subject: [PATCH 058/364] [asan] Fix dynamic allocas unpoisoning on PowerPC64. For PowerPC64 we cannot just pass SP extracted from @llvm.stackrestore to _asan_allocas_unpoison due to specific ABI requirements (http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#DYNAM-STACK). This patch adds the value returned by @llvm.get.dynamic.area.offset to extracted from @llvm.stackrestore stack pointer, so dynamic allocas unpoisoning stuff would work correctly on PowerPC64. Patch by Max Ostapenko. Differential Revision: http://reviews.llvm.org/D15108 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254707 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8eb82e39b8a6..dea94a514fe8 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -624,9 +624,24 @@ struct FunctionStackPoisoner : public InstVisitor { void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore, Value *SavedStack) { IRBuilder<> IRB(InstBefore); + Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy); + // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we + // need to adjust extracted SP to compute the address of the most recent + // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for + // this purpose. + if (!isa(InstBefore)) { + Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration( + InstBefore->getModule(), Intrinsic::get_dynamic_area_offset, + {IntptrTy}); + + Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {}); + + DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy), + DynamicAreaOffset); + } + IRB.CreateCall(AsanAllocasUnpoisonFunc, - {IRB.CreateLoad(DynamicAllocaLayout), - IRB.CreatePtrToInt(SavedStack, IntptrTy)}); + {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr}); } // Unpoison dynamic allocas redzones. From e471c45c92d58dc28cb3cab00d24b6bba5f1b3fe Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 4 Dec 2015 09:45:18 +0000 Subject: [PATCH 059/364] [AArch64] Clean up statistical profiling test This check has nothing to do with the statistical profiling extension, so shouldn't be in this test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254709 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Disassembler/AArch64/armv8.2a-statistical-profiling.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt b/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt index e83d750e715e..217424cc46e0 100644 --- a/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt +++ b/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt @@ -1,10 +1,6 @@ # RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+spe --disassemble < %s | FileCheck %s # RUN: llvm-mc -triple aarch64-none-linux-gnu --disassemble < %s | FileCheck --check-prefix=NO_SPE %s -[0x1f,0x22,0x03,0xd5] -# CHECK: hint #0x10 -# NO_SPE: hint #0x10 - [0x3f,0x22,0x03,0xd5] # CHECK: psb csync # NO_SPE: hint #0x11 From a18156c3b8d92d5eb1af043947d1b1461c84e40f Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 4 Dec 2015 10:53:15 +0000 Subject: [PATCH 060/364] LEA code size optimization pass (Part 1): Remove redundant address recalculations, by Andrey Turetsky Add new x86 pass which replaces address calculations in load or store instructions with def register of existing LEA (must be in the same basic block), if the LEA calculates address that differs only by a displacement. Works only with -Os or -Oz. Differential Revision: http://reviews.llvm.org/D13294 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254712 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/CMakeLists.txt | 1 + lib/Target/X86/X86.h | 4 + lib/Target/X86/X86OptimizeLEAs.cpp | 324 ++++++++++++++++++++++++++++ lib/Target/X86/X86TargetMachine.cpp | 3 + test/CodeGen/X86/lea-opt.ll | 131 +++++++++++ 5 files changed, 463 insertions(+) create mode 100644 lib/Target/X86/X86OptimizeLEAs.cpp create mode 100644 test/CodeGen/X86/lea-opt.ll diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index db486f9243af..b23f5c353013 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -34,6 +34,7 @@ set(sources X86VZeroUpper.cpp X86FixupLEAs.cpp X86WinEHState.cpp + X86OptimizeLEAs.cpp ) add_llvm_target(X86CodeGen ${sources}) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index cd914ee7f9c8..6bdb07d1df04 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -58,6 +58,10 @@ FunctionPass *createX86PadShortFunctions(); /// to eliminate execution delays in some Atom processors. FunctionPass *createX86FixupLEAs(); +/// createX86OptimizeLEAs() - Return a pass that removes redundant +/// address recalculations. +FunctionPass *createX86OptimizeLEAs(); + /// createX86CallFrameOptimization - Return a pass that optimizes /// the code-size of x86 call sequences. This is done by replacing /// esp-relative movs with pushes. diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp new file mode 100644 index 000000000000..9171786707d8 --- /dev/null +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -0,0 +1,324 @@ +//===-- X86OptimizeLEAs.cpp - optimize usage of LEA instructions ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass that performs some optimizations with LEA +// instructions in order to improve code size. +// Currently, it does one thing: +// 1) Address calculations in load and store instructions are replaced by +// existing LEA def registers where possible. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-optimize-LEAs" + +STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions"); + +namespace { +class OptimizeLEAPass : public MachineFunctionPass { +public: + OptimizeLEAPass() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { return "X86 LEA Optimize"; } + + /// \brief Loop over all of the basic blocks, replacing address + /// calculations in load and store instructions, if it's already + /// been calculated by LEA. Also, remove redundant LEAs. + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// \brief Returns a distance between two instructions inside one basic block. + /// Negative result means, that instructions occur in reverse order. + int calcInstrDist(const MachineInstr &First, const MachineInstr &Last); + + /// \brief Choose the best \p LEA instruction from the \p List to replace + /// address calculation in \p MI instruction. Return the address displacement + /// and the distance between \p MI and the choosen \p LEA in \p AddrDispShift + /// and \p Dist. + bool chooseBestLEA(const SmallVectorImpl &List, + const MachineInstr &MI, MachineInstr *&LEA, + int64_t &AddrDispShift, int &Dist); + + /// \brief Returns true if two machine operand are identical and they are not + /// physical registers. + bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2); + + /// \brief Returns true if the instruction is LEA. + bool isLEA(const MachineInstr &MI); + + /// \brief Returns true if two instructions have memory operands that only + /// differ by displacement. The numbers of the first memory operands for both + /// instructions are specified through \p N1 and \p N2. The address + /// displacement is returned through AddrDispShift. + bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1, + const MachineInstr &MI2, unsigned N2, + int64_t &AddrDispShift); + + /// \brief Find all LEA instructions in the basic block. + void findLEAs(const MachineBasicBlock &MBB, + SmallVectorImpl &List); + + /// \brief Removes redundant address calculations. + bool removeRedundantAddrCalc(const SmallVectorImpl &List); + + MachineRegisterInfo *MRI; + const X86InstrInfo *TII; + const X86RegisterInfo *TRI; + + static char ID; +}; +char OptimizeLEAPass::ID = 0; +} + +FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } + +int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, + const MachineInstr &Last) { + const MachineBasicBlock *MBB = First.getParent(); + + // Both instructions must be in the same basic block. + assert(Last.getParent() == MBB && + "Instructions are in different basic blocks"); + + return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) - + std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First)); +} + +// Find the best LEA instruction in the List to replace address recalculation in +// MI. Such LEA must meet these requirements: +// 1) The address calculated by the LEA differs only by the displacement from +// the address used in MI. +// 2) The register class of the definition of the LEA is compatible with the +// register class of the address base register of MI. +// 3) Displacement of the new memory operand should fit in 1 byte if possible. +// 4) The LEA should be as close to MI as possible, and prior to it if +// possible. +bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl &List, + const MachineInstr &MI, MachineInstr *&LEA, + int64_t &AddrDispShift, int &Dist) { + const MachineFunction *MF = MI.getParent()->getParent(); + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) + + X86II::getOperandBias(Desc); + + LEA = nullptr; + + // Loop over all LEA instructions. + for (auto DefMI : List) { + int64_t AddrDispShiftTemp = 0; + + // Compare instructions memory operands. + if (!isSimilarMemOp(MI, MemOpNo, *DefMI, 1, AddrDispShiftTemp)) + continue; + + // Make sure address displacement fits 4 bytes. + if (!isInt<32>(AddrDispShiftTemp)) + continue; + + // Check that LEA def register can be used as MI address base. Some + // instructions can use a limited set of registers as address base, for + // example MOV8mr_NOREX. We could constrain the register class of the LEA + // def to suit MI, however since this case is very rare and hard to + // reproduce in a test it's just more reliable to skip the LEA. + if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) != + MRI->getRegClass(DefMI->getOperand(0).getReg())) + continue; + + // Choose the closest LEA instruction from the list, prior to MI if + // possible. Note that we took into account resulting address displacement + // as well. Also note that the list is sorted by the order in which the LEAs + // occur, so the break condition is pretty simple. + int DistTemp = calcInstrDist(*DefMI, MI); + assert(DistTemp != 0 && + "The distance between two different instructions cannot be zero"); + if (DistTemp > 0 || LEA == nullptr) { + // Do not update return LEA, if the current one provides a displacement + // which fits in 1 byte, while the new candidate does not. + if (LEA != nullptr && !isInt<8>(AddrDispShiftTemp) && + isInt<8>(AddrDispShift)) + continue; + + LEA = DefMI; + AddrDispShift = AddrDispShiftTemp; + Dist = DistTemp; + } + + // FIXME: Maybe we should not always stop at the first LEA after MI. + if (DistTemp < 0) + break; + } + + return LEA != nullptr; +} + +bool OptimizeLEAPass::isIdenticalOp(const MachineOperand &MO1, + const MachineOperand &MO2) { + return MO1.isIdenticalTo(MO2) && + (!MO1.isReg() || + !TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); +} + +bool OptimizeLEAPass::isLEA(const MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + return Opcode == X86::LEA16r || Opcode == X86::LEA32r || + Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; +} + +// Check if MI1 and MI2 have memory operands which represent addresses that +// differ only by displacement. +bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1, + const MachineInstr &MI2, unsigned N2, + int64_t &AddrDispShift) { + // Address base, scale, index and segment operands must be identical. + static const int IdenticalOpNums[] = {X86::AddrBaseReg, X86::AddrScaleAmt, + X86::AddrIndexReg, X86::AddrSegmentReg}; + for (auto &N : IdenticalOpNums) + if (!isIdenticalOp(MI1.getOperand(N1 + N), MI2.getOperand(N2 + N))) + return false; + + // Address displacement operands may differ by a constant. + const MachineOperand *Op1 = &MI1.getOperand(N1 + X86::AddrDisp); + const MachineOperand *Op2 = &MI2.getOperand(N2 + X86::AddrDisp); + if (!isIdenticalOp(*Op1, *Op2)) { + if (Op1->isImm() && Op2->isImm()) + AddrDispShift = Op1->getImm() - Op2->getImm(); + else if (Op1->isGlobal() && Op2->isGlobal() && + Op1->getGlobal() == Op2->getGlobal()) + AddrDispShift = Op1->getOffset() - Op2->getOffset(); + else + return false; + } + + return true; +} + +void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, + SmallVectorImpl &List) { + for (auto &MI : MBB) { + if (isLEA(MI)) + List.push_back(const_cast(&MI)); + } +} + +// Try to find load and store instructions which recalculate addresses already +// calculated by some LEA and replace their memory operands with its def +// register. +bool OptimizeLEAPass::removeRedundantAddrCalc( + const SmallVectorImpl &List) { + bool Changed = false; + + assert(List.size() > 0); + MachineBasicBlock *MBB = List[0]->getParent(); + + // Process all instructions in basic block. + for (auto I = MBB->begin(), E = MBB->end(); I != E;) { + MachineInstr &MI = *I++; + unsigned Opcode = MI.getOpcode(); + + // Instruction must be load or store. + if (!MI.mayLoadOrStore()) + continue; + + // Get the number of the first memory operand. + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, Opcode); + + // If instruction has no memory operand - skip it. + if (MemOpNo < 0) + continue; + + MemOpNo += X86II::getOperandBias(Desc); + + // Get the best LEA instruction to replace address calculation. + MachineInstr *DefMI; + int64_t AddrDispShift; + int Dist; + if (!chooseBestLEA(List, MI, DefMI, AddrDispShift, Dist)) + continue; + + // If LEA occurs before current instruction, we can freely replace + // the instruction. If LEA occurs after, we can lift LEA above the + // instruction and this way to be able to replace it. Since LEA and the + // instruction have similar memory operands (thus, the same def + // instructions for these operands), we can always do that, without + // worries of using registers before their defs. + if (Dist < 0) { + DefMI->removeFromParent(); + MBB->insert(MachineBasicBlock::iterator(&MI), DefMI); + } + + // Since we can possibly extend register lifetime, clear kill flags. + MRI->clearKillFlags(DefMI->getOperand(0).getReg()); + + ++NumSubstLEAs; + DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump();); + + // Change instruction operands. + MI.getOperand(MemOpNo + X86::AddrBaseReg) + .ChangeToRegister(DefMI->getOperand(0).getReg(), false); + MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1); + MI.getOperand(MemOpNo + X86::AddrIndexReg) + .ChangeToRegister(X86::NoRegister, false); + MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift); + MI.getOperand(MemOpNo + X86::AddrSegmentReg) + .ChangeToRegister(X86::NoRegister, false); + + DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump();); + + Changed = true; + } + + return Changed; +} + +bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + bool OptSize = MF.getFunction()->optForSize(); + bool MinSize = MF.getFunction()->optForMinSize(); + + // Perform this optimization only if we care about code size. + if (!OptSize && !MinSize) + return false; + + MRI = &MF.getRegInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + + // Process all basic blocks. + for (auto &MBB : MF) { + SmallVector LEAs; + + // Find all LEA instructions in basic block. + findLEAs(MBB, LEAs); + + // If current basic block has no LEAs, move on to the next one. + if (LEAs.empty()) + continue; + + // Remove redundant address calculations. + Changed |= removeRedundantAddrCalc(LEAs); + } + + return Changed; +} diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 2e869eb7c3cd..0e7e4c0c84a9 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -254,6 +254,9 @@ bool X86PassConfig::addPreISel() { } void X86PassConfig::addPreRegAlloc() { + if (getOptLevel() != CodeGenOpt::None) + addPass(createX86OptimizeLEAs()); + addPass(createX86CallFrameOptimization()); } diff --git a/test/CodeGen/X86/lea-opt.ll b/test/CodeGen/X86/lea-opt.ll new file mode 100644 index 000000000000..c105b31995b3 --- /dev/null +++ b/test/CodeGen/X86/lea-opt.ll @@ -0,0 +1,131 @@ +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s + +%struct.anon1 = type { i32, i32, i32 } +%struct.anon2 = type { i32, [32 x i32], i32 } + +@arr1 = external global [65 x %struct.anon1], align 16 +@arr2 = external global [65 x %struct.anon2], align 16 + +define void @test1(i64 %x) nounwind { +entry: + %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0 + %tmp = load i32, i32* %a, align 4 + %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1 + %tmp1 = load i32, i32* %b, align 4 + %sub = sub i32 %tmp, %tmp1 + %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2 + %tmp2 = load i32, i32* %c, align 4 + %add = add nsw i32 %sub, %tmp2 + switch i32 %add, label %sw.epilog [ + i32 1, label %sw.bb.1 + i32 2, label %sw.bb.2 + ] + +sw.bb.1: ; preds = %entry + store i32 111, i32* %b, align 4 + store i32 222, i32* %c, align 4 + br label %sw.epilog + +sw.bb.2: ; preds = %entry + store i32 333, i32* %b, align 4 + store i32 444, i32* %c, align 4 + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry + ret void +; CHECK-LABEL: test1: +; CHECK: leaq (%rdi,%rdi,2), [[REG1:%[a-z]+]] +; CHECK: movl arr1(,[[REG1]],4), {{.*}} +; CHECK: leaq arr1+4(,[[REG1]],4), [[REG2:%[a-z]+]] +; CHECK: subl arr1+4(,[[REG1]],4), {{.*}} +; CHECK: leaq arr1+8(,[[REG1]],4), [[REG3:%[a-z]+]] +; CHECK: addl arr1+8(,[[REG1]],4), {{.*}} +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +} + +define void @test2(i64 %x) nounwind optsize { +entry: + %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0 + %tmp = load i32, i32* %a, align 4 + %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1 + %tmp1 = load i32, i32* %b, align 4 + %sub = sub i32 %tmp, %tmp1 + %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2 + %tmp2 = load i32, i32* %c, align 4 + %add = add nsw i32 %sub, %tmp2 + switch i32 %add, label %sw.epilog [ + i32 1, label %sw.bb.1 + i32 2, label %sw.bb.2 + ] + +sw.bb.1: ; preds = %entry + store i32 111, i32* %b, align 4 + store i32 222, i32* %c, align 4 + br label %sw.epilog + +sw.bb.2: ; preds = %entry + store i32 333, i32* %b, align 4 + store i32 444, i32* %c, align 4 + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry + ret void +; CHECK-LABEL: test2: +; CHECK: leaq (%rdi,%rdi,2), [[REG1:%[a-z]+]] +; CHECK: leaq arr1+4(,[[REG1]],4), [[REG2:%[a-z]+]] +; CHECK: movl -4([[REG2]]), {{.*}} +; CHECK: subl ([[REG2]]), {{.*}} +; CHECK: leaq arr1+8(,[[REG1]],4), [[REG3:%[a-z]+]] +; CHECK: addl ([[REG3]]), {{.*}} +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +} + +; Check that LEA optimization pass takes into account a resultant address +; displacement when choosing a LEA instruction for replacing a redundant +; address recalculation. + +define void @test3(i64 %x) nounwind optsize { +entry: + %a = getelementptr inbounds [65 x %struct.anon2], [65 x %struct.anon2]* @arr2, i64 0, i64 %x, i32 2 + %tmp = load i32, i32* %a, align 4 + %b = getelementptr inbounds [65 x %struct.anon2], [65 x %struct.anon2]* @arr2, i64 0, i64 %x, i32 0 + %tmp1 = load i32, i32* %b, align 4 + %add = add nsw i32 %tmp, %tmp1 + switch i32 %add, label %sw.epilog [ + i32 1, label %sw.bb.1 + i32 2, label %sw.bb.2 + ] + +sw.bb.1: ; preds = %entry + store i32 111, i32* %a, align 4 + store i32 222, i32* %b, align 4 + br label %sw.epilog + +sw.bb.2: ; preds = %entry + store i32 333, i32* %a, align 4 + store i32 444, i32* %b, align 4 + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry + ret void +; CHECK-LABEL: test3: +; CHECK: imulq {{.*}}, [[REG1:%[a-z]+]] +; CHECK: leaq arr2+132([[REG1]]), [[REG2:%[a-z]+]] +; CHECK: leaq arr2([[REG1]]), [[REG3:%[a-z]+]] + +; REG3's definition is closer to movl than REG2's, but the pass still chooses +; REG2 because it provides the resultant address displacement fitting 1 byte. + +; CHECK: movl ([[REG2]]), {{.*}} +; CHECK: addl ([[REG3]]), {{.*}} +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +} From c9119982e4c708faaab5f76a0f4bb81f42afeec6 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 4 Dec 2015 12:48:51 +0000 Subject: [PATCH 061/364] [SystemZ] Bugfix: Don't add CC twice to new three-address instruction. Since BuildMI() automatically adds the implicit operands for a new instruction, adding the old instructions CC operand resulted in that there were two CC imp-def operands, where only one was marked as dead. This caused buildSchedGraph() to miss dependencies on the CC reg. Review by Ulrich Weigand git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254714 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 85aa983da3a6..e6b5fc8e6235 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -676,7 +676,8 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LiveVariables *LV) const { MachineInstr *MI = MBBI; MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned Opcode = MI->getOpcode(); unsigned NumOps = MI->getNumOperands(); @@ -703,14 +704,19 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); if (ThreeOperandOpcode >= 0) { - MachineInstrBuilder MIB = - BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode)) - .addOperand(Dest); + // Create three address instruction without adding the implicit + // operands. Those will instead be copied over from the original + // instruction by the loop below. + MachineInstrBuilder MIB(*MF, + MF->CreateMachineInstr(get(ThreeOperandOpcode), + MI->getDebugLoc(), /*NoImplicit=*/true)); + MIB.addOperand(Dest); // Keep the kill state, but drop the tied flag. MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); // Keep the remaining operands as-is. for (unsigned I = 2; I < NumOps; ++I) MIB.addOperand(MI->getOperand(I)); + MBB->insert(MI, MIB); return finishConvertToThreeAddress(MI, MIB, LV); } } From fe8212f64b7b754d41d9d5e06718f8d44e13cf5e Mon Sep 17 00:00:00 2001 From: Colin LeMahieu Date: Fri, 4 Dec 2015 15:48:45 +0000 Subject: [PATCH 062/364] [Hexagon] Using multiply instead of shift on signed number which can be UB git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254719 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 5e78762b994a..64050824c10b 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -554,13 +554,13 @@ struct HexagonOperand : public MCParsedAsmOperand { void adds4_6ImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *CE = dyn_cast(getImm()); - Inst.addOperand(MCOperand::createImm(CE->getValue() << 6)); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); } void adds3_6ImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *CE = dyn_cast(getImm()); - Inst.addOperand(MCOperand::createImm(CE->getValue() << 6)); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); } StringRef getToken() const { From e67b3e872170aa4f2cacf12cc7c6c3aed7373399 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 4 Dec 2015 16:10:48 +0000 Subject: [PATCH 063/364] ARM/AArch64: update reference documentation. There's a more comprehensive ACLE and a real v8 ARM ARM now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254720 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CompilerWriterInfo.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst index 900ba24e230f..6c3ff4b10f1e 100644 --- a/docs/CompilerWriterInfo.rst +++ b/docs/CompilerWriterInfo.rst @@ -22,14 +22,16 @@ ARM * `ABI Addenda and Errata `_ -* `ARM C Language Extensions `_ +* `ARM C Language Extensions `_ AArch64 ------- +* `ARMv8 Architecture Reference Manual `_ + * `ARMv8 Instruction Set Overview `_ -* `ARM C Language Extensions `_ +* `ARM C Language Extensions `_ Itanium (ia64) -------------- From 608d7b4f4a08cf0ac4529ef1260667d82f3926c4 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 4 Dec 2015 16:14:31 +0000 Subject: [PATCH 064/364] Modernize the C++ APIs for creating LTO modules. This is a continuation of r253367. These functions return is owned by the caller, so they return std::unique_ptr now. The call can fail, so the return is wrapped in ErrorOr. They have a context where to report diagnostics, so they don't need to take a string out parameter. With this there are no call to getGlobalContext in lib/LTO. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254721 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/LTO/LTOModule.h | 42 +++++------ lib/LTO/LTOModule.cpp | 131 ++++++++++++++++------------------- tools/llvm-lto/llvm-lto.cpp | 49 +++++++------ tools/lto/lto.cpp | 70 +++++++++++++++---- 4 files changed, 165 insertions(+), 127 deletions(-) diff --git a/include/llvm/LTO/LTOModule.h b/include/llvm/LTO/LTOModule.h index 83a523613a76..97b5865bd47f 100644 --- a/include/llvm/LTO/LTOModule.h +++ b/include/llvm/LTO/LTOModule.h @@ -91,25 +91,24 @@ struct LTOModule { /// InitializeAllTargetMCs(); /// InitializeAllAsmPrinters(); /// InitializeAllAsmParsers(); - static LTOModule *createFromFile(const char *path, TargetOptions options, - std::string &errMsg); - static LTOModule *createFromOpenFile(int fd, const char *path, size_t size, - TargetOptions options, - std::string &errMsg); - static LTOModule *createFromOpenFileSlice(int fd, const char *path, - size_t map_size, off_t offset, - TargetOptions options, - std::string &errMsg); - static LTOModule *createFromBuffer(const void *mem, size_t length, - TargetOptions options, std::string &errMsg, - StringRef path = ""); - - static LTOModule *createInLocalContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path); - static LTOModule *createInContext(const void *mem, size_t length, - TargetOptions options, std::string &errMsg, - StringRef path, LLVMContext *Context); + static ErrorOr> + createFromFile(LLVMContext &Context, const char *path, TargetOptions options); + static ErrorOr> + createFromOpenFile(LLVMContext &Context, int fd, const char *path, + size_t size, TargetOptions options); + static ErrorOr> + createFromOpenFileSlice(LLVMContext &Context, int fd, const char *path, + size_t map_size, off_t offset, TargetOptions options); + static ErrorOr> + createFromBuffer(LLVMContext &Context, const void *mem, size_t length, + TargetOptions options, StringRef path = ""); + + static ErrorOr> + createInLocalContext(const void *mem, size_t length, TargetOptions options, + StringRef path); + static ErrorOr> + createInContext(const void *mem, size_t length, TargetOptions options, + StringRef path, LLVMContext *Context); const Module &getModule() const { return const_cast(this)->getModule(); @@ -207,8 +206,9 @@ struct LTOModule { bool objcClassNameFromExpression(const Constant *c, std::string &name); /// Create an LTOModule (private version). - static LTOModule *makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, - std::string &errMsg, LLVMContext *Context); + static ErrorOr> + makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, + LLVMContext *Context); }; } #endif diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 42a568b54c7b..a6a3002e457d 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -100,89 +100,72 @@ std::string LTOModule::getProducerString(MemoryBuffer *Buffer) { return getBitcodeProducerString(*BCOrErr, Context); } -LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options, - std::string &errMsg) { +ErrorOr> +LTOModule::createFromFile(LLVMContext &Context, const char *path, + TargetOptions options) { ErrorOr> BufferOrErr = MemoryBuffer::getFile(path); - if (std::error_code EC = BufferOrErr.getError()) { - errMsg = EC.message(); - return nullptr; - } + if (std::error_code EC = BufferOrErr.getError()) + return EC; std::unique_ptr Buffer = std::move(BufferOrErr.get()); - return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg, - &getGlobalContext()); + return makeLTOModule(Buffer->getMemBufferRef(), options, &Context); } -LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size, - TargetOptions options, - std::string &errMsg) { - return createFromOpenFileSlice(fd, path, size, 0, options, errMsg); +ErrorOr> +LTOModule::createFromOpenFile(LLVMContext &Context, int fd, const char *path, + size_t size, TargetOptions options) { + return createFromOpenFileSlice(Context, fd, path, size, 0, options); } -LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path, - size_t map_size, off_t offset, - TargetOptions options, - std::string &errMsg) { +ErrorOr> +LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, + const char *path, size_t map_size, + off_t offset, TargetOptions options) { ErrorOr> BufferOrErr = MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset); - if (std::error_code EC = BufferOrErr.getError()) { - errMsg = EC.message(); - return nullptr; - } + if (std::error_code EC = BufferOrErr.getError()) + return EC; std::unique_ptr Buffer = std::move(BufferOrErr.get()); - return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg, - &getGlobalContext()); + return makeLTOModule(Buffer->getMemBufferRef(), options, &Context); } -LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path) { - return createInContext(mem, length, options, errMsg, path, - &getGlobalContext()); +ErrorOr> +LTOModule::createFromBuffer(LLVMContext &Context, const void *mem, + size_t length, TargetOptions options, + StringRef path) { + return createInContext(mem, length, options, path, &Context); } -LTOModule *LTOModule::createInLocalContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, - StringRef path) { - return createInContext(mem, length, options, errMsg, path, nullptr); +ErrorOr> +LTOModule::createInLocalContext(const void *mem, size_t length, + TargetOptions options, StringRef path) { + return createInContext(mem, length, options, path, nullptr); } -LTOModule *LTOModule::createInContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path, - LLVMContext *Context) { +ErrorOr> +LTOModule::createInContext(const void *mem, size_t length, + TargetOptions options, StringRef path, + LLVMContext *Context) { StringRef Data((const char *)mem, length); MemoryBufferRef Buffer(Data, path); - return makeLTOModule(Buffer, options, errMsg, Context); + return makeLTOModule(Buffer, options, Context); } -static std::unique_ptr parseBitcodeFileImpl(MemoryBufferRef Buffer, - LLVMContext &Context, - bool ShouldBeLazy, - std::string &ErrMsg) { +static ErrorOr> +parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context, + bool ShouldBeLazy) { // Find the buffer. ErrorOr MBOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer); - if (std::error_code EC = MBOrErr.getError()) { - ErrMsg = EC.message(); - return nullptr; - } - - std::function DiagnosticHandler = - [&ErrMsg](const DiagnosticInfo &DI) { - raw_string_ostream Stream(ErrMsg); - DiagnosticPrinterRawOStream DP(Stream); - DI.print(DP); - }; + if (std::error_code EC = MBOrErr.getError()) + return EC; if (!ShouldBeLazy) { // Parse the full file. - ErrorOr> M = - parseBitcodeFile(*MBOrErr, Context, DiagnosticHandler); - if (!M) - return nullptr; + ErrorOr> M = parseBitcodeFile(*MBOrErr, Context); + if (std::error_code EC = M.getError()) + return EC; return std::move(*M); } @@ -190,16 +173,16 @@ static std::unique_ptr parseBitcodeFileImpl(MemoryBufferRef Buffer, std::unique_ptr LightweightBuf = MemoryBuffer::getMemBuffer(*MBOrErr, false); ErrorOr> M = - getLazyBitcodeModule(std::move(LightweightBuf), Context, - DiagnosticHandler, true /*ShouldLazyLoadMetadata*/); - if (!M) - return nullptr; + getLazyBitcodeModule(std::move(LightweightBuf), Context, nullptr, + true /*ShouldLazyLoadMetadata*/); + if (std::error_code EC = M.getError()) + return EC; return std::move(*M); } -LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, - TargetOptions options, std::string &errMsg, - LLVMContext *Context) { +ErrorOr> +LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, + LLVMContext *Context) { std::unique_ptr OwnedContext; if (!Context) { OwnedContext = llvm::make_unique(); @@ -208,11 +191,12 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, // If we own a context, we know this is being used only for symbol // extraction, not linking. Be lazy in that case. - std::unique_ptr M = parseBitcodeFileImpl( - Buffer, *Context, - /* ShouldBeLazy */ static_cast(OwnedContext), errMsg); - if (!M) - return nullptr; + ErrorOr> MOrErr = + parseBitcodeFileImpl(Buffer, *Context, + /* ShouldBeLazy */ static_cast(OwnedContext)); + if (std::error_code EC = MOrErr.getError()) + return EC; + std::unique_ptr &M = *MOrErr; std::string TripleStr = M->getTargetTriple(); if (TripleStr.empty()) @@ -220,9 +204,10 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, llvm::Triple Triple(TripleStr); // find machine architecture for this module + std::string errMsg; const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); if (!march) - return nullptr; + return std::unique_ptr(nullptr); // construct LTOModule, hand over ownership of module and target SubtargetFeatures Features; @@ -246,16 +231,16 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, std::unique_ptr IRObj( new object::IRObjectFile(Buffer, std::move(M))); - LTOModule *Ret; + std::unique_ptr Ret; if (OwnedContext) - Ret = new LTOModule(std::move(IRObj), target, std::move(OwnedContext)); + Ret.reset(new LTOModule(std::move(IRObj), target, std::move(OwnedContext))); else - Ret = new LTOModule(std::move(IRObj), target); + Ret.reset(new LTOModule(std::move(IRObj), target)); Ret->parseSymbols(); Ret->parseMetadata(); - return Ret; + return std::move(Ret); } /// Create a MemoryBuffer from a memory range with an optional name. diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp index 07cd9bb1eaf8..4bc692279b9e 100644 --- a/tools/llvm-lto/llvm-lto.cpp +++ b/tools/llvm-lto/llvm-lto.cpp @@ -124,23 +124,27 @@ static void handleDiagnostics(lto_codegen_diagnostic_severity_t Severity, errs() << Msg << "\n"; } +static std::string CurrentActivity; static void diagnosticHandler(const DiagnosticInfo &DI) { raw_ostream &OS = errs(); OS << "llvm-lto: "; switch (DI.getSeverity()) { case DS_Error: - OS << "error: "; + OS << "error"; break; case DS_Warning: - OS << "warning: "; + OS << "warning"; break; case DS_Remark: - OS << "remark: "; + OS << "remark"; break; case DS_Note: - OS << "note: "; + OS << "note"; break; } + if (!CurrentActivity.empty()) + OS << ' ' << CurrentActivity; + OS << ": "; DiagnosticPrinterRawOStream DP(OS); DI.print(DP); @@ -150,6 +154,11 @@ static void diagnosticHandler(const DiagnosticInfo &DI) { exit(1); } +static void diagnosticHandlerWithContenxt(const DiagnosticInfo &DI, + void *Context) { + diagnosticHandler(DI); +} + static void error(const Twine &Msg) { errs() << "llvm-lto: " << Msg << '\n'; exit(1); @@ -172,12 +181,11 @@ getLocalLTOModule(StringRef Path, std::unique_ptr &Buffer, MemoryBuffer::getFile(Path); error(BufferOrErr, "error loading file '" + Path + "'"); Buffer = std::move(BufferOrErr.get()); - std::string Error; - std::unique_ptr Ret(LTOModule::createInLocalContext( - Buffer->getBufferStart(), Buffer->getBufferSize(), Options, Error, Path)); - if (!Error.empty()) - error("error loading file '" + Path + "' " + Error); - return Ret; + CurrentActivity = ("loading file '" + Path + "'").str(); + ErrorOr> Ret = LTOModule::createInLocalContext( + Buffer->getBufferStart(), Buffer->getBufferSize(), Options, Path); + CurrentActivity = ""; + return std::move(*Ret); } /// \brief List symbols in each IR file. @@ -207,10 +215,11 @@ static void createCombinedFunctionIndex() { FunctionInfoIndex CombinedIndex; uint64_t NextModuleId = 0; for (auto &Filename : InputFilenames) { + CurrentActivity = "loading file '" + Filename + "'"; ErrorOr> IndexOrErr = llvm::getFunctionIndexForFile(Filename, diagnosticHandler); - error(IndexOrErr, "error loading file '" + Filename + "'"); std::unique_ptr Index = std::move(IndexOrErr.get()); + CurrentActivity = ""; // Skip files without a function summary. if (!Index) continue; @@ -257,7 +266,10 @@ int main(int argc, char **argv) { unsigned BaseArg = 0; - LTOCodeGenerator CodeGen(getGlobalContext()); + LLVMContext Context; + Context.setDiagnosticHandler(diagnosticHandlerWithContenxt, nullptr, true); + + LTOCodeGenerator CodeGen(Context); if (UseDiagnosticHandler) CodeGen.setDiagnosticHandler(handleDiagnostics, nullptr); @@ -274,14 +286,11 @@ int main(int argc, char **argv) { std::vector KeptDSOSyms; for (unsigned i = BaseArg; i < InputFilenames.size(); ++i) { - std::string error; - std::unique_ptr Module( - LTOModule::createFromFile(InputFilenames[i].c_str(), Options, error)); - if (!error.empty()) { - errs() << argv[0] << ": error loading file '" << InputFilenames[i] - << "': " << error << "\n"; - return 1; - } + CurrentActivity = "loading file '" + InputFilenames[i] + "'"; + ErrorOr> ModuleOrErr = + LTOModule::createFromFile(Context, InputFilenames[i].c_str(), Options); + std::unique_ptr &Module = *ModuleOrErr; + CurrentActivity = ""; unsigned NumSyms = Module->getSymbolCount(); for (unsigned I = 0; I < NumSyms; ++I) { diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp index 6058fb1197eb..d13de57e830c 100644 --- a/tools/lto/lto.cpp +++ b/tools/lto/lto.cpp @@ -15,6 +15,8 @@ #include "llvm-c/lto.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CommandFlags.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/LTO/LTOCodeGenerator.h" #include "llvm/LTO/LTOModule.h" @@ -64,6 +66,24 @@ static bool initialized = false; // Holds the command-line option parsing state of the LTO module. static bool parsedOptions = false; +static LLVMContext *LTOContext = nullptr; + +static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) { + if (DI.getSeverity() != DS_Error) { + DiagnosticPrinterRawOStream DP(errs()); + DI.print(DP); + errs() << '\n'; + return; + } + sLastErrorString = ""; + { + raw_string_ostream Stream(sLastErrorString); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); + } + sLastErrorString += '\n'; +} + // Initialize the configured targets if they have not been initialized. static void lto_initialize() { if (!initialized) { @@ -79,6 +99,9 @@ static void lto_initialize() { InitializeAllAsmParsers(); InitializeAllAsmPrinters(); InitializeAllDisassemblers(); + + LTOContext = &getGlobalContext(); + LTOContext->setDiagnosticHandler(diagnosticHandler, nullptr, true); initialized = true; } } @@ -95,7 +118,7 @@ static void handleLibLTODiagnostic(lto_codegen_diagnostic_severity_t Severity, // libLTO API semantics, which require that the code generator owns the object // file. struct LibLTOCodeGenerator : LTOCodeGenerator { - LibLTOCodeGenerator() : LTOCodeGenerator(getGlobalContext()) { + LibLTOCodeGenerator() : LTOCodeGenerator(*LTOContext) { setDiagnosticHandler(handleLibLTODiagnostic, nullptr); } LibLTOCodeGenerator(std::unique_ptr Context) : LTOCodeGenerator(*Context), OwnedContext(std::move(Context)) { @@ -166,14 +189,21 @@ lto_module_is_object_file_in_memory_for_target(const void* mem, lto_module_t lto_module_create(const char* path) { lto_initialize(); llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - return wrap(LTOModule::createFromFile(path, Options, sLastErrorString)); + ErrorOr> M = + LTOModule::createFromFile(*LTOContext, path, Options); + if (!M) + return nullptr; + return wrap(M->release()); } lto_module_t lto_module_create_from_fd(int fd, const char *path, size_t size) { lto_initialize(); llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - return wrap( - LTOModule::createFromOpenFile(fd, path, size, Options, sLastErrorString)); + ErrorOr> M = + LTOModule::createFromOpenFile(*LTOContext, fd, path, size, Options); + if (!M) + return nullptr; + return wrap(M->release()); } lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path, @@ -182,14 +212,21 @@ lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path, off_t offset) { lto_initialize(); llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - return wrap(LTOModule::createFromOpenFileSlice(fd, path, map_size, offset, - Options, sLastErrorString)); + ErrorOr> M = LTOModule::createFromOpenFileSlice( + *LTOContext, fd, path, map_size, offset, Options); + if (!M) + return nullptr; + return wrap(M->release()); } lto_module_t lto_module_create_from_memory(const void* mem, size_t length) { lto_initialize(); llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - return wrap(LTOModule::createFromBuffer(mem, length, Options, sLastErrorString)); + ErrorOr> M = + LTOModule::createFromBuffer(*LTOContext, mem, length, Options); + if (!M) + return nullptr; + return wrap(M->release()); } lto_module_t lto_module_create_from_memory_with_path(const void* mem, @@ -197,16 +234,22 @@ lto_module_t lto_module_create_from_memory_with_path(const void* mem, const char *path) { lto_initialize(); llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - return wrap( - LTOModule::createFromBuffer(mem, length, Options, sLastErrorString, path)); + ErrorOr> M = + LTOModule::createFromBuffer(*LTOContext, mem, length, Options, path); + if (!M) + return nullptr; + return wrap(M->release()); } lto_module_t lto_module_create_in_local_context(const void *mem, size_t length, const char *path) { lto_initialize(); llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - return wrap(LTOModule::createInLocalContext(mem, length, Options, - sLastErrorString, path)); + ErrorOr> M = + LTOModule::createInLocalContext(mem, length, Options, path); + if (!M) + return nullptr; + return wrap(M->release()); } lto_module_t lto_module_create_in_codegen_context(const void *mem, @@ -215,8 +258,9 @@ lto_module_t lto_module_create_in_codegen_context(const void *mem, lto_code_gen_t cg) { lto_initialize(); llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); - return wrap(LTOModule::createInContext(mem, length, Options, sLastErrorString, - path, &unwrap(cg)->getContext())); + ErrorOr> M = LTOModule::createInContext( + mem, length, Options, path, &unwrap(cg)->getContext()); + return wrap(M->release()); } void lto_module_dispose(lto_module_t mod) { delete unwrap(mod); } From 42a2b123da9cb79b3ffad472b64acdb981c1e850 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 4 Dec 2015 16:18:15 +0000 Subject: [PATCH 065/364] [Hexagon] Simplify LowerCONCAT_VECTORS, handle different types better git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254724 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 113 ++++++++++----------- 1 file changed, 55 insertions(+), 58 deletions(-) diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index b59fe6b67044..0a89ef424dd2 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1520,7 +1520,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, Subtarget(ST) { bool IsV4 = !Subtarget.hasV5TOps(); auto &HRI = *Subtarget.getRegisterInfo(); - bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); + bool UseHVX = Subtarget.useHVXOps(); + bool UseHVXSgl = Subtarget.useHVXSglOps(); + bool UseHVXDbl = Subtarget.useHVXDblOps(); setPrefLoopAlignment(4); setPrefFunctionAlignment(4); @@ -1808,17 +1810,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); if (UseHVX) { - if(!UseHVXDbl) { - setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); - } - else { - setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); + if (UseHVXSgl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); + } else if (UseHVXDbl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + } else { + llvm_unreachable("Unrecognized HVX mode"); } } // Subtarget-specific operation actions. @@ -2212,8 +2215,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned Size = VT.getSizeInBits(); - // A vector larger than 64 bits cannot be represented in Hexagon. - // Expand will split the vector. + // Only handle vectors of 64 bits or shorter. if (Size > 64) return SDValue(); @@ -2350,63 +2352,58 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, bool UseHVX = Subtarget.useHVXOps(); EVT VT = Op.getValueType(); unsigned NElts = Op.getNumOperands(); - SDValue Vec = Op.getOperand(0); - EVT VecVT = Vec.getValueType(); - SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64); - SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, - DAG.getConstant(32, dl, MVT::i64)); - SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64); - - ConstantSDNode *W = dyn_cast(Width); - ConstantSDNode *S = dyn_cast(Shifted); - - if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) { - if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { - // We are trying to concat two v2i16 to a single v4i16. - SDValue Vec0 = Op.getOperand(1); - SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); - return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + SDValue Vec0 = Op.getOperand(0); + EVT VecVT = Vec0.getValueType(); + unsigned Width = VecVT.getSizeInBits(); + + if (NElts == 2) { + MVT ST = VecVT.getSimpleVT(); + // We are trying to concat two v2i16 to a single v4i16, or two v4i8 + // into a single v8i8. + if (ST == MVT::v2i16 || ST == MVT::v4i8) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0); + + if (UseHVX) { + assert((Width == 64*8 && Subtarget.useHVXSglOps()) || + (Width == 128*8 && Subtarget.useHVXDblOps())); + SDValue Vec1 = Op.getOperand(1); + MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32; + MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32; + SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0); + SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1); + SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0); + return DAG.getNode(ISD::BITCAST, dl, VT, VC); } } - if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) { - if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { - // We are trying to concat two v4i8 to a single v8i8. - SDValue Vec0 = Op.getOperand(1); - SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); - return DAG.getNode(ISD::BITCAST, dl, VT, Combined); - } - } + if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64) + return SDValue(); + + SDValue C0 = DAG.getConstant(0, dl, MVT::i64); + SDValue C32 = DAG.getConstant(32, dl, MVT::i64); + SDValue W = DAG.getConstant(Width, dl, MVT::i64); + // Create the "width" part of the argument to insert_rp/insertp_rp. + SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32); + SDValue V = C0; - if (UseHVX) { - SDValue Vec0 = Op.getOperand(1); - assert((VecVT.getSizeInBits() == 64*8 && Subtarget.useHVXSglOps()) || - (VecVT.getSizeInBits() == 128*8 && Subtarget.useHVXDblOps())); - SDValue Combined = DAG.getNode(HexagonISD::VCOMBINE, dl, VT, Vec0, Vec); - return Combined; - } for (unsigned i = 0, e = NElts; i != e; ++i) { - unsigned OpIdx = NElts - i - 1; - SDValue Operand = Op.getOperand(OpIdx); + unsigned N = NElts-i-1; + SDValue OpN = Op.getOperand(N); - if (VT.getSizeInBits() == 64 && - Operand.getValueType().getSizeInBits() == 32) { + if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) { SDValue C = DAG.getConstant(0, dl, MVT::i32); - Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN); } - - SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64); - SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); - SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); - const SDValue Ops[] = {ConstVal, Operand, Combined}; - + SDValue Idx = DAG.getConstant(N, dl, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); if (VT.getSizeInBits() == 32) - ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); else - ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); } - return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); + return DAG.getNode(ISD::BITCAST, dl, VT, V); } SDValue From 3bf2abe88cac804262355d2070803d6334863c6b Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Dec 2015 17:09:42 +0000 Subject: [PATCH 066/364] [WebAssembly] Use SelectionDAG::getUNDEF. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 2485df1ab5d2..9f0860550533 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -400,7 +400,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, DAG.getTargetConstant(InVals.size(), DL, MVT::i32)) - : DAG.getNode(ISD::UNDEF, DL, In.VT)); + : DAG.getUNDEF(In.VT)); // Record the number and types of arguments. MF.getInfo()->addParam(In.VT); From 0771e45d22709610a9ee404063de8de05b9d92a8 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Dec 2015 17:12:52 +0000 Subject: [PATCH 067/364] [WebAssembly] Check for more unsupported ABI flags. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254727 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 9f0860550533..c54ffb0654a6 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -289,6 +289,18 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (Ins.size() > 1) fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); + SmallVectorImpl &Outs = CLI.Outs; + for (const ISD::OutputArg &Out : Outs) { + assert(!Out.Flags.isByVal() && "byval is not valid for return values"); + assert(!Out.Flags.isNest() && "nest is not valid for return values"); + if (Out.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); + if (Out.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); + if (Out.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); + } + bool IsVarArg = CLI.IsVarArg; if (IsVarArg) fail(DL, DAG, "WebAssembly doesn't support varargs yet"); @@ -309,8 +321,21 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.append(OutVals.begin(), OutVals.end()); SmallVector Tys; - for (const auto &In : Ins) + for (const auto &In : Ins) { + if (In.Flags.isByVal()) + fail(DL, DAG, "WebAssembly hasn't implemented byval arguments"); + if (In.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); + if (In.Flags.isNest()) + fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); + if (In.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); + if (In.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + // Ignore In.getOrigAlign() because all our arguments are passed in + // registers. Tys.push_back(In.VT); + } Tys.push_back(MVT::Other); SDVTList TyList = DAG.getVTList(Tys); SDValue Res = From 1951dce526cca48df403afbb46de939be15acb73 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Dec 2015 17:16:07 +0000 Subject: [PATCH 068/364] [WebAssembly] Factor out the list of supported calling conventions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254728 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index c54ffb0654a6..79d7cbbefa2b 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -257,6 +257,16 @@ static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) { DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue())); } +// Test whether the given calling convention is supported. +static bool +CallingConvSupported(CallingConv::ID CallConv) { + // We currently support the language-independent target-independent + // conventions. + return CallConv == CallingConv::C || + CallConv == CallingConv::Fast || + CallConv == CallingConv::Cold; +} + SDValue WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -267,8 +277,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); CallingConv::ID CallConv = CLI.CallConv; - if (CallConv != CallingConv::C && CallConv != CallingConv::Fast && - CallConv != CallingConv::Cold) + if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support language-specific or target-specific " "calling conventions yet"); @@ -367,7 +376,7 @@ SDValue WebAssemblyTargetLowering::LowerReturn( const SmallVectorImpl &OutVals, SDLoc DL, SelectionDAG &DAG) const { assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); - if (CallConv != CallingConv::C) + if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); if (IsVarArg) fail(DL, DAG, "WebAssembly doesn't support varargs yet"); @@ -399,7 +408,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - if (CallConv != CallingConv::C) + if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); if (IsVarArg) fail(DL, DAG, "WebAssembly doesn't support varargs yet"); From e0b2e5de57f8488a3c01cf55161cb9ec946b5fec Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Dec 2015 17:18:32 +0000 Subject: [PATCH 069/364] [WebAssembly] clang-format CallingConvSupported. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254729 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 79d7cbbefa2b..b651855eea7a 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -258,12 +258,10 @@ static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) { } // Test whether the given calling convention is supported. -static bool -CallingConvSupported(CallingConv::ID CallConv) { +static bool CallingConvSupported(CallingConv::ID CallConv) { // We currently support the language-independent target-independent // conventions. - return CallConv == CallingConv::C || - CallConv == CallingConv::Fast || + return CallConv == CallingConv::C || CallConv == CallingConv::Fast || CallConv == CallingConv::Cold; } From 6326d04e5969570e0c7f883e1fadbcf6a1fc4e1b Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Dec 2015 17:19:44 +0000 Subject: [PATCH 070/364] [WebAssembly] Give names to the callseq begin and end instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254730 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyInstrCall.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 4028460bd231..018d26cfacda 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -17,10 +17,10 @@ let Defs = [ARGUMENTS] in { // The call sequence start/end LLVM-isms isn't useful to WebAssembly since it's // a virtual ISA. let isCodeGenOnly = 1 in { -def : I<(outs), (ins i64imm:$amt), - [(WebAssemblycallseq_start timm:$amt)]>; -def : I<(outs), (ins i64imm:$amt1, i64imm:$amt2), - [(WebAssemblycallseq_end timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKDOWN : I<(outs), (ins i64imm:$amt), + [(WebAssemblycallseq_start timm:$amt)]>; +def ADJCALLSTACKUP : I<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(WebAssemblycallseq_end timm:$amt1, timm:$amt2)]>; } // isCodeGenOnly = 1 multiclass CALL { From 2a3cc32f7191837dc796338f42d9d75d188b4ce6 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 4 Dec 2015 17:20:04 +0000 Subject: [PATCH 071/364] [llvm-dwp] Retrieve the DWOID from the CU for the cu_index entry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254731 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/DebugInfo/DWARF/DWARFFormValue.h | 3 + lib/DebugInfo/DWARF/DWARFFormValue.cpp | 10 ++- test/tools/llvm-dwp/X86/simple.test | 6 +- tools/llvm-dwp/llvm-dwp.cpp | 63 ++++++++++++++++++- 4 files changed, 76 insertions(+), 6 deletions(-) diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 7ddcc0d81d59..3c32a3e5b794 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -84,6 +84,9 @@ class DWARFFormValue { const DWARFUnit *u) const; static bool skipValue(uint16_t form, DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFUnit *u); + static bool skipValue(uint16_t form, DataExtractor debug_info_data, + uint32_t *offset_ptr, uint16_t Version, + uint8_t AddrSize); static ArrayRef getFixedFormSizes(uint8_t AddrSize, uint16_t Version); diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index a11b00a926d9..3dc58423df68 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -261,6 +261,12 @@ DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr, bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFUnit *cu) { + return skipValue(form, debug_info_data, offset_ptr, cu->getVersion(), + cu->getAddressByteSize()); +} +bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, + uint32_t *offset_ptr, uint16_t Version, + uint8_t AddrSize) { bool indirect = false; do { switch (form) { @@ -295,10 +301,10 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, // Compile unit address sized values case DW_FORM_addr: - *offset_ptr += cu->getAddressByteSize(); + *offset_ptr += AddrSize; return true; case DW_FORM_ref_addr: - *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion()); + *offset_ptr += getRefAddrSize(AddrSize, Version); return true; // 0 byte values - implied from the form. diff --git a/test/tools/llvm-dwp/X86/simple.test b/test/tools/llvm-dwp/X86/simple.test index 6ee19697442d..1c7b1040bd31 100644 --- a/test/tools/llvm-dwp/X86/simple.test +++ b/test/tools/llvm-dwp/X86/simple.test @@ -28,6 +28,7 @@ CHECK: .debug_info.dwo contents: CHECK: 0x00000000: Compile Unit: length = 0x00000025 version = 0x0004 abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x00000029) CHECK: DW_TAG_compile_unit CHECK: DW_AT_name {{.*}} "a.cpp" +CHECK: DW_AT_GNU_dwo_id {{.*}} ([[DWOA:.*]]) CHECK: DW_TAG_variable CHECK: DW_AT_name {{.*}} "a" CHECK: DW_TAG_structure_type @@ -35,6 +36,7 @@ CHECK: DW_AT_name {{.*}} "foo" CHECK: 0x00000029: Compile Unit: length = 0x00000031 version = 0x0004 abbr_offset = 0x0031 addr_size = 0x08 (next unit at 0x0000005e) CHECK: DW_AT_name {{.*}} "b.cpp" +CHECK: DW_AT_GNU_dwo_id {{.*}} ([[DWOB:.*]]) CHECK: DW_TAG_structure_type CHECK: DW_AT_name {{.*}} "bar" CHECK: DW_TAG_subprogram @@ -45,8 +47,8 @@ CHECK: .debug_cu_index contents: Ensure only the relevant/contained sections are included in the table: CHECK: Index Signature INFO ABBREV STR_OFFSETS Don't bother checking the Signatures, they aren't correct yet. -CHECK: [0x00000000, 0x00000029) [0x00000000, 0x00000031) [0x00000000, 0x00000010) -CHECK: [0x00000029, 0x0000005e) [0x00000031, 0x00000075) [0x00000010, 0x00000024) +CHECK: 1 [[DWOA]] [0x00000000, 0x00000029) [0x00000000, 0x00000031) [0x00000000, 0x00000010) +CHECK: 2 [[DWOB]] [0x00000029, 0x0000005e) [0x00000031, 0x00000075) [0x00000010, 0x00000024) CHECK: .debug_str.dwo contents: CHECK: "clang version diff --git a/tools/llvm-dwp/llvm-dwp.cpp b/tools/llvm-dwp/llvm-dwp.cpp index e6a90cf8a3cf..b68ba437f830 100644 --- a/tools/llvm-dwp/llvm-dwp.cpp +++ b/tools/llvm-dwp/llvm-dwp.cpp @@ -1,5 +1,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -82,6 +83,52 @@ writeStringsAndOffsets(MCStreamer &Out, StringMap &Strings, return std::error_code(); } +static uint32_t getCUAbbrev(StringRef Abbrev, uint64_t AbbrCode) { + uint64_t CurCode; + uint32_t Offset = 0; + DataExtractor AbbrevData(Abbrev, true, 0); + while ((CurCode = AbbrevData.getULEB128(&Offset)) != AbbrCode) { + // Tag + AbbrevData.getULEB128(&Offset); + // DW_CHILDREN + AbbrevData.getU8(&Offset); + // Attributes + while (AbbrevData.getULEB128(&Offset) | AbbrevData.getULEB128(&Offset)) + ; + } + return Offset; +} + +static uint64_t getCUSignature(StringRef Abbrev, StringRef Info) { + uint32_t Offset = 0; + DataExtractor InfoData(Info, true, 0); + InfoData.getU32(&Offset); // Length + uint16_t Version = InfoData.getU16(&Offset); + InfoData.getU32(&Offset); // Abbrev offset (should be zero) + uint8_t AddrSize = InfoData.getU8(&Offset); + + uint32_t AbbrCode = InfoData.getULEB128(&Offset); + + DataExtractor AbbrevData(Abbrev, true, 0); + uint32_t AbbrevOffset = getCUAbbrev(Abbrev, AbbrCode); + uint64_t Tag = AbbrevData.getULEB128(&AbbrevOffset); + (void)Tag; + // FIXME: Real error handling + assert(Tag == dwarf::DW_TAG_compile_unit); + // DW_CHILDREN + AbbrevData.getU8(&AbbrevOffset); + uint32_t Name; + uint32_t Form; + while ((Name = AbbrevData.getULEB128(&AbbrevOffset)) | + (Form = AbbrevData.getULEB128(&AbbrevOffset)) && + Name != dwarf::DW_AT_GNU_dwo_id) { + DWARFFormValue::skipValue(Form, InfoData, &Offset, Version, AddrSize); + } + // FIXME: Real error handling + assert(Name == dwarf::DW_AT_GNU_dwo_id); + return InfoData.getU64(&Offset); +} + static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { const auto &MCOFI = *Out.getContext().getObjectFileInfo(); MCSection *const StrSection = MCOFI.getDwarfStrDWOSection(); @@ -104,7 +151,6 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { StringMap Strings; uint32_t StringOffset = 0; - uint64_t UnitIndex = 0; uint32_t ContributionOffsets[8] = {}; for (const auto &Input : Inputs) { @@ -114,10 +160,11 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { IndexEntries.emplace_back(); UnitIndexEntry &CurEntry = IndexEntries.back(); - CurEntry.Signature = UnitIndex++; StringRef CurStrSection; StringRef CurStrOffsetSection; + StringRef InfoSection; + StringRef AbbrevSection; for (const auto &Section : ErrOrObj->getBinary()->sections()) { StringRef Name; @@ -138,6 +185,14 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { CurEntry.Contributions[Index].Offset = ContributionOffsets[Index]; ContributionOffsets[Index] += (CurEntry.Contributions[Index].Length = Contents.size()); + + if (Kind == DW_SECT_INFO) { + assert(InfoSection.empty()); + InfoSection = Contents; + } else if (Kind == DW_SECT_ABBREV) { + assert(AbbrevSection.empty()); + AbbrevSection = Contents; + } } MCSection *OutSection = SectionPair->second.first; @@ -151,6 +206,10 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { } } + assert(!AbbrevSection.empty()); + assert(!InfoSection.empty()); + CurEntry.Signature = getCUSignature(AbbrevSection, InfoSection); + if (auto Err = writeStringsAndOffsets(Out, Strings, StringOffset, StrSection, StrOffsetSection, CurStrSection, CurStrOffsetSection)) From cd2103de5a0dcbe429010509d91d7648aaca6f59 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 4 Dec 2015 17:40:13 +0000 Subject: [PATCH 072/364] [CXX TLS calling convention] Add CXX TLS calling convention. This commit adds a new target-independent calling convention for C++ TLS access functions. It aims to minimize overhead in the caller by perserving as many registers as possible. The target-specific implementation for X86-64 is defined as following: Arguments are passed as for the default C calling convention The same applies for the return value(s) The callee preserves all GPRs - except RAX and RDI The access function makes C-style TLS function calls in the entry and exit block, C-style TLS functions save a lot more registers than normal calls. The added calling convention ties into the existing implementation of the C-style TLS functions, so we can't simply use existing calling conventions such as preserve_mostcc. rdar://9001553 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254737 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/BitCodeFormat.rst | 1 + docs/LangRef.rst | 10 +++++ include/llvm/IR/CallingConv.h | 3 ++ lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 2 + lib/AsmParser/LLToken.h | 1 + lib/IR/AsmWriter.cpp | 1 + lib/Target/X86/X86RegisterInfo.cpp | 8 ++++ test/CodeGen/X86/cxx_tlscc64.ll | 71 ++++++++++++++++++++++++++++++ 9 files changed, 98 insertions(+) create mode 100644 test/CodeGen/X86/cxx_tlscc64.ll diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 62d66f85d557..d6e3099bdb63 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -756,6 +756,7 @@ function. The operand fields are: * ``anyregcc``: code 13 * ``preserve_mostcc``: code 14 * ``preserve_allcc``: code 15 + * ``cxx_fast_tlscc``: code 17 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 diff --git a/docs/LangRef.rst b/docs/LangRef.rst index cf1ceab1f1c6..82b33557c128 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -406,6 +406,16 @@ added in the future: This calling convention, like the `PreserveMost` calling convention, will be used by a future version of the ObjectiveC runtime and should be considered experimental at this time. +"``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions + This calling convention aims to minimize overhead in the caller by + preserving as many registers as possible. This calling convention behaves + identical to the `C` calling convention on how arguments and return values + are passed, but it uses a different set of caller/callee-saved registers. + Given that C-style TLS on Darwin has its own special CSRs, we can't use the + existing `PreserveMost`. + + - On X86-64 the callee preserves all general purpose registers, except for + RDI and RAX. "``cc ``" - Numbered convention Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index ac7cc9b74ab9..8204d3e2e812 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -72,6 +72,9 @@ namespace CallingConv { // Swift - Calling convention for Swift. Swift = 16, + // CXX_FAST_TLS - Calling convention for access functions. + CXX_FAST_TLS = 17, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 372c56588864..f95a763e3dae 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -591,6 +591,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(ghccc); KEYWORD(hhvmcc); KEYWORD(hhvm_ccc); + KEYWORD(cxx_fast_tlscc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 307ed397834c..145b5eaaceca 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1544,6 +1544,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'ghccc' /// ::= 'hhvmcc' /// ::= 'hhvm_ccc' +/// ::= 'cxx_fast_tlscc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -1574,6 +1575,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_ghccc: CC = CallingConv::GHC; break; case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break; case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; + case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 6a9a1de0b850..48abeac95066 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -99,6 +99,7 @@ namespace lltok { kw_preserve_mostcc, kw_preserve_allcc, kw_ghccc, kw_hhvmcc, kw_hhvm_ccc, + kw_cxx_fast_tlscc, // Attributes: kw_attributes, diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index fae1ebee5f2a..f8040a7b5f86 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -304,6 +304,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::AnyReg: Out << "anyregcc"; break; case CallingConv::PreserveMost: Out << "preserve_mostcc"; break; case CallingConv::PreserveAll: Out << "preserve_allcc"; break; + case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 39de5004143e..888437634789 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -248,6 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (HasAVX) return CSR_64_RT_AllRegs_AVX_SaveList; return CSR_64_RT_AllRegs_SaveList; + case CallingConv::CXX_FAST_TLS: + if (Is64Bit) + return CSR_64_TLS_Darwin_SaveList; + break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; @@ -310,6 +314,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, if (HasAVX) return CSR_64_RT_AllRegs_AVX_RegMask; return CSR_64_RT_AllRegs_RegMask; + case CallingConv::CXX_FAST_TLS: + if (Is64Bit) + return CSR_64_TLS_Darwin_RegMask; + break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll new file mode 100644 index 000000000000..c229521cc9a4 --- /dev/null +++ b/test/CodeGen/X86/cxx_tlscc64.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s +%struct.S = type { i8 } + +@sg = internal thread_local global %struct.S zeroinitializer, align 1 +@__dso_handle = external global i8 +@__tls_guard = internal thread_local unnamed_addr global i1 false + +declare void @_ZN1SC1Ev(%struct.S*) +declare void @_ZN1SD1Ev(%struct.S*) +declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) + +; Every GPR should be saved - except rdi, rax, and rsp +; CHECK-LABEL: _ZTW2sg +; CHECK: pushq %r11 +; CHECK: pushq %r10 +; CHECK: pushq %r9 +; CHECK: pushq %r8 +; CHECK: pushq %rsi +; CHECK: pushq %rdx +; CHECK: pushq %rcx +; CHECK: pushq %rbx +; CHECK: callq +; CHECK: jne +; CHECK: callq +; CHECK: tlv_atexit +; CHECK: callq +; CHECK: popq %rbx +; CHECK: popq %rcx +; CHECK: popq %rdx +; CHECK: popq %rsi +; CHECK: popq %r8 +; CHECK: popq %r9 +; CHECK: popq %r10 +; CHECK: popq %r11 +; SHRINK-LABEL: _ZTW2sg +; SHRINK: callq +; SHRINK: jne +; SHRINK: pushq %r11 +; SHRINK: pushq %r10 +; SHRINK: pushq %r9 +; SHRINK: pushq %r8 +; SHRINK: pushq %rsi +; SHRINK: pushq %rdx +; SHRINK: pushq %rcx +; SHRINK: pushq %rbx +; SHRINK: callq +; SHRINK: tlv_atexit +; SHRINK: popq %rbx +; SHRINK: popq %rcx +; SHRINK: popq %rdx +; SHRINK: popq %rsi +; SHRINK: popq %r8 +; SHRINK: popq %r9 +; SHRINK: popq %r10 +; SHRINK: popq %r11 +; SHRINK: LBB{{.*}}: +; SHRINK: callq +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() { + %.b.i = load i1, i1* @__tls_guard, align 1 + br i1 %.b.i, label %__tls_init.exit, label %init.i + +init.i: + store i1 true, i1* @__tls_guard, align 1 + tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2 + %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2 + br label %__tls_init.exit + +__tls_init.exit: + ret %struct.S* @sg +} From 015255c0401cdd5f699fc2c0edd6f0239253a6f1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 4 Dec 2015 17:51:55 +0000 Subject: [PATCH 073/364] fix formatting; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254739 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86SelectionDAGInfo.cpp | 43 +++++++++++--------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index ce79fcf9ad81..b1a01614b4a1 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -44,13 +44,10 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( return false; } -SDValue -X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - MachinePointerInfo DstPtrInfo) const { +SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast(Size); const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget(); @@ -74,10 +71,10 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast(Src); - if (const char *bzeroEntry = V && + if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { - EVT IntPtr = - DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -94,7 +91,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 0) .setDiscardResult(); - std::pair CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI); + std::pair CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } @@ -144,8 +141,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, BytesLeft = SizeVal % UBytes; } - Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), - InFlag); + Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), + InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; @@ -172,9 +169,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : - X86::ECX, - Left, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, + Left, InFlag); InFlag = Chain.getValue(1); Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; @@ -249,17 +245,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, + Count, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, + Dst, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : - X86::ESI, - Src, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI, + Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); From d3e1404b7b4160d3b5b5979b6fc6ee99ac4daf75 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 4 Dec 2015 17:54:31 +0000 Subject: [PATCH 074/364] don't repeat function names in comments; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254740 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetSelectionDAGInfo.h | 45 +++++++++----------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h index 53db5aa84292..a7143ac3fa66 100644 --- a/include/llvm/Target/TargetSelectionDAGInfo.h +++ b/include/llvm/Target/TargetSelectionDAGInfo.h @@ -21,7 +21,7 @@ namespace llvm { //===----------------------------------------------------------------------===// -/// TargetSelectionDAGInfo - Targets can subclass this to parameterize the +/// Targets can subclass this to parameterize the /// SelectionDAG lowering and instruction selection process. /// class TargetSelectionDAGInfo { @@ -32,8 +32,8 @@ class TargetSelectionDAGInfo { explicit TargetSelectionDAGInfo() = default; virtual ~TargetSelectionDAGInfo(); - /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a - /// memcpy. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memcpy. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple loads/stores and can be /// more efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -56,8 +56,8 @@ class TargetSelectionDAGInfo { return SDValue(); } - /// EmitTargetCodeForMemmove - Emit target-specific code that performs a - /// memmove. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memmove. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple loads/stores and can be /// more efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -72,8 +72,8 @@ class TargetSelectionDAGInfo { return SDValue(); } - /// EmitTargetCodeForMemset - Emit target-specific code that performs a - /// memset. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memset. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple stores and can be more /// efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -87,11 +87,10 @@ class TargetSelectionDAGInfo { return SDValue(); } - /// EmitTargetCodeForMemcmp - Emit target-specific code that performs a - /// memcmp, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the memcmp and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a memcmp, in cases where that is + /// faster than a libcall. The first returned SDValue is the result of the + /// memcmp and the second is the chain. Both SDValues can be null if a normal + /// libcall should be used. virtual std::pair EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc dl, SDValue Chain, @@ -101,11 +100,10 @@ class TargetSelectionDAGInfo { return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForMemchr - Emit target-specific code that performs a - /// memchr, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the memchr and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a memchr, in cases where that is + /// faster than a libcall. The first returned SDValue is the result of the + /// memchr and the second is the chain. Both SDValues can be null if a normal + /// libcall should be used. virtual std::pair EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Src, SDValue Char, SDValue Length, @@ -113,8 +111,8 @@ class TargetSelectionDAGInfo { return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForStrcpy - Emit target-specific code that performs a - /// strcpy or stpcpy, in cases where that is faster than a libcall. + /// Emit target-specific code that performs a strcpy or stpcpy, in cases + /// where that is faster than a libcall. /// The first returned SDValue is the result of the copy (the start /// of the destination string for strcpy, a pointer to the null terminator /// for stpcpy) and the second is the chain. Both SDValues can be null @@ -128,11 +126,10 @@ class TargetSelectionDAGInfo { return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForStrcmp - Emit target-specific code that performs a - /// strcmp, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the strcmp and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a strcmp, in cases where that is + /// faster than a libcall. + /// The first returned SDValue is the result of the strcmp and the second is + /// the chain. Both SDValues can be null if a normal libcall should be used. virtual std::pair EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc dl, SDValue Chain, From 90c200464e00b6004e9f91b18a33cfd51202f3d8 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Dec 2015 18:27:03 +0000 Subject: [PATCH 075/364] [WebAssembly] Add several more calling conventions to the supported list. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254741 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index b651855eea7a..a7eba5611134 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -260,9 +260,14 @@ static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) { // Test whether the given calling convention is supported. static bool CallingConvSupported(CallingConv::ID CallConv) { // We currently support the language-independent target-independent - // conventions. + // conventions. We don't yet have a way to annotate calls with properties like + // "cold", and we don't have any call-clobbered registers, so these are mostly + // all handled the same. return CallConv == CallingConv::C || CallConv == CallingConv::Fast || - CallConv == CallingConv::Cold; + CallConv == CallingConv::Cold || + CallConv == CallingConv::PreserveMost || + CallConv == CallingConv::PreserveAll || + CallConv == CallingConv::CXX_FAST_TLS; } SDValue From 4d585624ebb50ff6dae91402180356c43aa9ed25 Mon Sep 17 00:00:00 2001 From: Mike Aizatsky Date: Fri, 4 Dec 2015 18:35:37 +0000 Subject: [PATCH 076/364] sancov -not-covered-functions. Summary: The command prints out list of functions that were not entered. To do this, addresses are first converted to function locations. Set operations are used for function locations. Differential Revision: http://reviews.llvm.org/D14889 review git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254742 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/sancov/covered_functions.test | 11 +- test/tools/sancov/not_covered_functions.test | 7 + tools/sancov/CMakeLists.txt | 4 + tools/sancov/sancov.cc | 389 ++++++++++++++----- 4 files changed, 317 insertions(+), 94 deletions(-) create mode 100644 test/tools/sancov/not_covered_functions.test diff --git a/test/tools/sancov/covered_functions.test b/test/tools/sancov/covered_functions.test index 02dd30210238..5e0696bf8615 100644 --- a/test/tools/sancov/covered_functions.test +++ b/test/tools/sancov/covered_functions.test @@ -1,14 +1,13 @@ REQUIRES: x86_64-linux -RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered_functions %p/Inputs/test-linux_x86_64.sancov | FileCheck %s -RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered_functions %p/Inputs/test-linux_x86_64-1.sancov | FileCheck --check-prefix=MULTIPLE_FILES %s -RUN: sancov -obj %p/Inputs/test-linux_x86_64 -demangle=0 -covered_functions %p/Inputs/test-linux_x86_64.sancov | FileCheck --check-prefix=NO_DEMANGLE %s +RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck %s +RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered-functions -strip_path_prefix=Inputs/ %p/Inputs/test-linux_x86_64.sancov | FileCheck --check-prefix=STRIP_PATH %s +RUN: sancov -obj %p/Inputs/test-linux_x86_64 -demangle=0 -covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck --check-prefix=NO_DEMANGLE %s CHECK: Inputs{{[/\\]}}test.cpp:12 bar(std::string) CHECK: Inputs{{[/\\]}}test.cpp:14 main -MULTIPLE_FILES: {{^}}foo.cpp:5 foo() -MULTIPLE_FILES: {{^}}test.cpp:12 bar(std::string) -MULTIPLE_FILES: {{^}}test.cpp:14 main +STRIP_PATH: {{^}}test.cpp:12 bar(std::string) +STRIP_PATH: {{^}}test.cpp:14 main NO_DEMANGLE: test.cpp:12 _Z3barSs NO_DEMANGLE: test.cpp:14 main diff --git a/test/tools/sancov/not_covered_functions.test b/test/tools/sancov/not_covered_functions.test new file mode 100644 index 000000000000..b82f9e22d5d8 --- /dev/null +++ b/test/tools/sancov/not_covered_functions.test @@ -0,0 +1,7 @@ +REQUIRES: x86_64-linux +RUN: sancov -obj %p/Inputs/test-linux_x86_64 -not-covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck %s +RUN: sancov -obj %p/Inputs/test-linux_x86_64 -not-covered-functions %p/Inputs/test-linux_x86_64-1.sancov | FileCheck --check-prefix=CHECK1 --allow-empty %s + +CHECK: Inputs{{[/\\]}}foo.cpp:5 foo() +CHECK1-NOT: {{.}}* + diff --git a/tools/sancov/CMakeLists.txt b/tools/sancov/CMakeLists.txt index f891f779e8c9..5ce589b37373 100644 --- a/tools/sancov/CMakeLists.txt +++ b/tools/sancov/CMakeLists.txt @@ -1,4 +1,8 @@ set(LLVM_LINK_COMPONENTS + AllTargetsAsmPrinters + AllTargetsDescs + AllTargetsDisassemblers + AllTargetsInfos DebugInfoDWARF DebugInfoPDB Object diff --git a/tools/sancov/sancov.cc b/tools/sancov/sancov.cc index 9b54575028ad..450c21bd1690 100644 --- a/tools/sancov/sancov.cc +++ b/tools/sancov/sancov.cc @@ -12,6 +12,18 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorOr.h" @@ -22,11 +34,14 @@ #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" #include #include +#include #include using namespace llvm; @@ -35,13 +50,19 @@ namespace { // --------- COMMAND LINE FLAGS --------- -enum ActionType { PrintAction, CoveredFunctionsAction }; +enum ActionType { + PrintAction, + CoveredFunctionsAction, + NotCoveredFunctionsAction +}; cl::opt Action( cl::desc("Action (required)"), cl::Required, cl::values(clEnumValN(PrintAction, "print", "Print coverage addresses"), - clEnumValN(CoveredFunctionsAction, "covered_functions", + clEnumValN(CoveredFunctionsAction, "covered-functions", "Print all covered funcions."), + clEnumValN(NotCoveredFunctionsAction, "not-covered-functions", + "Print all not covered funcions."), clEnumValEnd)); static cl::list ClInputFiles(cl::Positional, cl::OneOrMore, @@ -55,6 +76,10 @@ static cl::opt ClDemangle("demangle", cl::init(true), cl::desc("Print demangled function name.")); +static cl::opt ClStripPathPrefix( + "strip_path_prefix", cl::init(""), + cl::desc("Strip this prefix from file paths in reports.")); + // --------- FORMAT SPECIFICATION --------- struct FileHeader { @@ -68,31 +93,256 @@ static const uint32_t Bitness64 = 0xFFFFFF64; // --------- +static void FailIfError(std::error_code Error) { + if (!Error) + return; + errs() << "Error: " << Error.message() << "(" << Error.value() << ")\n"; + exit(1); +} + template static void FailIfError(const ErrorOr &E) { - if (E) + FailIfError(E.getError()); +} + +static void FailIfNotEmpty(const std::string &E) { + if (E.empty()) return; + errs() << "Error: " << E << "\n"; + exit(1); +} - auto Error = E.getError(); - errs() << "Error: " << Error.message() << "(" << Error.value() << ")\n"; - exit(-2); +template +static void FailIfEmpty(const std::unique_ptr &Ptr, + const std::string &Message) { + if (Ptr.get()) + return; + errs() << "Error: " << Message << "\n"; + exit(1); } template static void readInts(const char *Start, const char *End, - std::vector *V) { + std::set *Ints) { const T *S = reinterpret_cast(Start); const T *E = reinterpret_cast(End); - V->reserve(E - S); - std::copy(S, E, std::back_inserter(*V)); + std::copy(S, E, std::inserter(*Ints, Ints->end())); +} + +struct FileLoc { + bool operator<(const FileLoc &RHS) const { + return std::tie(FileName, Line) < std::tie(RHS.FileName, RHS.Line); + } + + std::string FileName; + uint32_t Line; +}; + +struct FunctionLoc { + bool operator<(const FunctionLoc &RHS) const { + return std::tie(Loc, FunctionName) < std::tie(RHS.Loc, RHS.FunctionName); + } + + FileLoc Loc; + std::string FunctionName; +}; + +std::string stripPathPrefix(std::string Path) { + if (ClStripPathPrefix.empty()) + return Path; + size_t Pos = Path.find(ClStripPathPrefix); + if (Pos == std::string::npos) + return Path; + return Path.substr(Pos + ClStripPathPrefix.size()); +} + +// Compute [FileLoc -> FunctionName] map for given addresses. +static std::map +computeFunctionsMap(const std::set &Addrs) { + std::map Fns; + + symbolize::LLVMSymbolizer::Options SymbolizerOptions; + SymbolizerOptions.Demangle = ClDemangle; + SymbolizerOptions.UseSymbolTable = true; + symbolize::LLVMSymbolizer Symbolizer(SymbolizerOptions); + + // Fill in Fns map. + for (auto Addr : Addrs) { + auto InliningInfo = Symbolizer.symbolizeInlinedCode(ClBinaryName, Addr); + FailIfError(InliningInfo); + for (uint32_t i = 0; i < InliningInfo->getNumberOfFrames(); ++i) { + auto FrameInfo = InliningInfo->getFrame(i); + SmallString<256> FileName(FrameInfo.FileName); + sys::path::remove_dots(FileName, /* remove_dot_dot */ true); + FileLoc Loc = {FileName.str(), FrameInfo.Line}; + Fns[Loc] = FrameInfo.FunctionName; + } + } + + return Fns; +} + +// Compute functions for given addresses. It keeps only the first +// occurence of a function within a file. +std::set computeFunctionLocs(const std::set &Addrs) { + std::map Fns = computeFunctionsMap(Addrs); + + std::set result; + std::string LastFileName; + std::set ProcessedFunctions; + + for (const auto &P : Fns) { + std::string FileName = P.first.FileName; + std::string FunctionName = P.second; + + if (LastFileName != FileName) + ProcessedFunctions.clear(); + LastFileName = FileName; + + if (!ProcessedFunctions.insert(FunctionName).second) + continue; + + result.insert(FunctionLoc{P.first, P.second}); + } + + return result; +} + +// Locate __sanitizer_cov function address. +static uint64_t findSanitizerCovFunction(const object::ObjectFile &O) { + for (const object::SymbolRef &Symbol : O.symbols()) { + ErrorOr AddressOrErr = Symbol.getAddress(); + FailIfError(AddressOrErr); + + ErrorOr Name = Symbol.getName(); + FailIfError(Name); + + if (Name.get() == "__sanitizer_cov") { + return AddressOrErr.get(); + } + } + FailIfNotEmpty("__sanitizer_cov not found"); + return 0; // not reachable. +} + +// Locate addresses of all coverage points in a file. Coverage point +// is defined as the 'address of instruction following __sanitizer_cov +// call - 1'. +static void getObjectCoveragePoints(const object::ObjectFile &O, + std::set *Addrs) { + Triple TheTriple("unknown-unknown-unknown"); + TheTriple.setArch(Triple::ArchType(O.getArch())); + auto TripleName = TheTriple.getTriple(); + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + FailIfNotEmpty(Error); + + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, "", "")); + FailIfEmpty(STI, "no subtarget info for target " + TripleName); + + std::unique_ptr MRI( + TheTarget->createMCRegInfo(TripleName)); + FailIfEmpty(MRI, "no register info for target " + TripleName); + + std::unique_ptr AsmInfo( + TheTarget->createMCAsmInfo(*MRI, TripleName)); + FailIfEmpty(AsmInfo, "no asm info for target " + TripleName); + + std::unique_ptr MOFI(new MCObjectFileInfo); + MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get()); + std::unique_ptr DisAsm( + TheTarget->createMCDisassembler(*STI, Ctx)); + FailIfEmpty(DisAsm, "no disassembler info for target " + TripleName); + + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + FailIfEmpty(MII, "no instruction info for target " + TripleName); + + std::unique_ptr MIA( + TheTarget->createMCInstrAnalysis(MII.get())); + FailIfEmpty(MIA, "no instruction analysis info for target " + TripleName); + + uint64_t SanCovAddr = findSanitizerCovFunction(O); + + for (const auto Section : O.sections()) { + if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same. + continue; + uint64_t SectionAddr = Section.getAddress(); + uint64_t SectSize = Section.getSize(); + if (!SectSize) + continue; + + StringRef SectionName; + FailIfError(Section.getName(SectionName)); + + StringRef BytesStr; + FailIfError(Section.getContents(BytesStr)); + ArrayRef Bytes(reinterpret_cast(BytesStr.data()), + BytesStr.size()); + + for (uint64_t Index = 0, Size = 0; Index < Section.getSize(); + Index += Size) { + MCInst Inst; + if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), + SectionAddr + Index, nulls(), nulls())) { + if (Size == 0) + Size = 1; + continue; + } + uint64_t Target; + if (MIA->isCall(Inst) && + MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) { + if (Target == SanCovAddr) { + // Sanitizer coverage uses the address of the next instruction - 1. + Addrs->insert(Index + SectionAddr + Size - 1); + } + } + } + } } -static std::string CommonPrefix(std::string A, std::string B) { - if (A.size() > B.size()) - return std::string(B.begin(), - std::mismatch(B.begin(), B.end(), A.begin()).first); +static void getArchiveCoveragePoints(const object::Archive &A, + std::set *Addrs) { + for (auto &ErrorOrChild : A.children()) { + FailIfError(ErrorOrChild); + const object::Archive::Child &C = *ErrorOrChild; + ErrorOr> ChildOrErr = C.getAsBinary(); + FailIfError(ChildOrErr); + if (object::ObjectFile *O = + dyn_cast(&*ChildOrErr.get())) + getObjectCoveragePoints(*O, Addrs); + else + FailIfError(object::object_error::invalid_file_type); + } +} + +// Locate addresses of all coverage points in a file. Coverage point +// is defined as the 'address of instruction following __sanitizer_cov +// call - 1'. +std::set getCoveragePoints(std::string FileName) { + std::set Result; + + ErrorOr> BinaryOrErr = + object::createBinary(FileName); + FailIfError(BinaryOrErr); + + object::Binary &Binary = *BinaryOrErr.get().getBinary(); + if (object::Archive *A = dyn_cast(&Binary)) + getArchiveCoveragePoints(*A, &Result); + else if (object::ObjectFile *O = dyn_cast(&Binary)) + getObjectCoveragePoints(*O, &Result); else - return std::string(A.begin(), - std::mismatch(A.begin(), A.end(), B.begin()).first); + FailIfError(object::object_error::invalid_file_type); + + return Result; +} + +static void printFunctionLocs(const std::set &FnLocs, + raw_ostream &OS) { + for (const FunctionLoc &FnLoc : FnLocs) { + OS << stripPathPrefix(FnLoc.Loc.FileName) << ":" << FnLoc.Loc.Line << " " + << FnLoc.FunctionName << "\n"; + } } class CoverageData { @@ -116,7 +366,7 @@ class CoverageData { return make_error_code(errc::illegal_byte_sequence); } - auto Addrs = llvm::make_unique>(); + auto Addrs = llvm::make_unique>(); switch (Header->Bitness) { case Bitness64: @@ -138,15 +388,12 @@ class CoverageData { // Merge multiple coverage data together. static std::unique_ptr merge(const std::vector> &Covs) { - std::set Addrs; + auto Addrs = llvm::make_unique>(); for (const auto &Cov : Covs) - Addrs.insert(Cov->Addrs->begin(), Cov->Addrs->end()); + Addrs->insert(Cov->Addrs->begin(), Cov->Addrs->end()); - auto AddrsVector = llvm::make_unique>( - Addrs.begin(), Addrs.end()); - return std::unique_ptr( - new CoverageData(std::move(AddrsVector))); + return std::unique_ptr(new CoverageData(std::move(Addrs))); } // Read list of files and merges their coverage info. @@ -163,83 +410,39 @@ class CoverageData { } // Print coverage addresses. - void printAddrs(raw_ostream &out) { + void printAddrs(raw_ostream &OS) { for (auto Addr : *Addrs) { - out << "0x"; - out.write_hex(Addr); - out << "\n"; + OS << "0x"; + OS.write_hex(Addr); + OS << "\n"; } } // Print list of covered functions. // Line format: : - void printCoveredFunctions(raw_ostream &out) { - if (Addrs->empty()) - return; - symbolize::LLVMSymbolizer::Options SymbolizerOptions; - SymbolizerOptions.Demangle = ClDemangle; - symbolize::LLVMSymbolizer Symbolizer(SymbolizerOptions); - - struct FileLoc { - std::string FileName; - uint32_t Line; - bool operator<(const FileLoc &Rhs) const { - return std::tie(FileName, Line) < std::tie(Rhs.FileName, Rhs.Line); - } - }; - - // FileLoc -> FunctionName - std::map Fns; - - // Fill in Fns map. - for (auto Addr : *Addrs) { - auto InliningInfo = Symbolizer.symbolizeInlinedCode(ClBinaryName, Addr); - FailIfError(InliningInfo); - for (uint32_t i = 0; i < InliningInfo->getNumberOfFrames(); ++i) { - auto FrameInfo = InliningInfo->getFrame(i); - SmallString<256> FileName(FrameInfo.FileName); - sys::path::remove_dots(FileName, /* remove_dot_dot */ true); - FileLoc Loc = { FileName.str(), FrameInfo.Line }; - Fns[Loc] = FrameInfo.FunctionName; - } - } - - // Compute file names common prefix. - std::string FilePrefix = Fns.begin()->first.FileName; - for (const auto &P : Fns) - FilePrefix = CommonPrefix(FilePrefix, P.first.FileName); - - // Print first function occurence in a file. - { - std::string LastFileName; - std::set ProcessedFunctions; - - for (const auto &P : Fns) { - std::string FileName = P.first.FileName; - std::string FunctionName = P.second; - uint32_t Line = P.first.Line; - - if (LastFileName != FileName) - ProcessedFunctions.clear(); - LastFileName = FileName; - - if (!ProcessedFunctions.insert(FunctionName).second) - continue; - - // Don't strip prefix if we only have a single file. - if (FileName.size() > FilePrefix.size()) - FileName = FileName.substr(FilePrefix.size()); + void printCoveredFunctions(raw_ostream &OS) { + printFunctionLocs(computeFunctionLocs(*Addrs), OS); + } - out << FileName << ":" << Line << " " << FunctionName << "\n"; - } - } + // Print list of not covered functions. + // Line format: : + void printNotCoveredFunctions(raw_ostream &OS) { + std::set AllFns = + computeFunctionLocs(getCoveragePoints(ClBinaryName)); + std::set CoveredFns = computeFunctionLocs(*Addrs); + + std::set NotCoveredFns; + std::set_difference(AllFns.begin(), AllFns.end(), CoveredFns.begin(), + CoveredFns.end(), + std::inserter(NotCoveredFns, NotCoveredFns.end())); + printFunctionLocs(NotCoveredFns, OS); } - private: - explicit CoverageData(std::unique_ptr> Addrs) +private: + explicit CoverageData(std::unique_ptr> Addrs) : Addrs(std::move(Addrs)) {} - std::unique_ptr> Addrs; + std::unique_ptr> Addrs; }; } // namespace @@ -249,6 +452,10 @@ int main(int argc, char **argv) { PrettyStackTraceProgram X(argc, argv); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllDisassemblers(); + cl::ParseCommandLineOptions(argc, argv, "Sanitizer Coverage Processing Tool"); auto CovData = CoverageData::readAndMerge(ClInputFiles); @@ -263,5 +470,11 @@ int main(int argc, char **argv) { CovData.get()->printCoveredFunctions(outs()); return 0; } + case NotCoveredFunctionsAction: { + CovData.get()->printNotCoveredFunctions(outs()); + return 0; + } } + + llvm_unreachable("unsupported action"); } From 76bae99a6129f228dd70d0f94d2c68e186d84664 Mon Sep 17 00:00:00 2001 From: Mike Aizatsky Date: Fri, 4 Dec 2015 18:50:18 +0000 Subject: [PATCH 077/364] adding MC dependencies in hopes to pacify the hexagon build. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254745 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/sancov/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/sancov/CMakeLists.txt b/tools/sancov/CMakeLists.txt index 5ce589b37373..e92b1fcbb862 100644 --- a/tools/sancov/CMakeLists.txt +++ b/tools/sancov/CMakeLists.txt @@ -5,6 +5,8 @@ set(LLVM_LINK_COMPONENTS AllTargetsInfos DebugInfoDWARF DebugInfoPDB + MC + MCDisassembler Object Support Symbolize From 03d76d50e6b92734a489341ddf52371eb14e660e Mon Sep 17 00:00:00 2001 From: Mike Aizatsky Date: Fri, 4 Dec 2015 19:11:54 +0000 Subject: [PATCH 078/364] fixing Makefile git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254749 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/sancov/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/sancov/Makefile b/tools/sancov/Makefile index 1114fe052d4d..7dba1a7a594a 100644 --- a/tools/sancov/Makefile +++ b/tools/sancov/Makefile @@ -9,7 +9,8 @@ LEVEL := ../.. TOOLNAME := sancov -LINK_COMPONENTS := DebugInfoDWARF DebugInfoPDB Object Support Symbolize +LINK_COMPONENTS := all-targets DebugInfoDWARF DebugInfoPDB MC MCParser \ + MCDisassembler Object Support Symbolize # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 From 2b50cdde679409dac8b96b9cf15f8560b34d0741 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Fri, 4 Dec 2015 19:27:58 +0000 Subject: [PATCH 079/364] [llvm-readobj/ELF] Simplify Verdef handling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254751 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/ELFDumper.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp index ef07a85f885d..02397f382848 100644 --- a/tools/llvm-readobj/ELFDumper.cpp +++ b/tools/llvm-readobj/ELFDumper.cpp @@ -433,17 +433,11 @@ StringRef ELFDumper::getSymbolVersion(StringRef StrTab, if (entry.isVerdef()) { // The first Verdaux entry holds the name. name_offset = entry.getVerdef()->getAux()->vda_name; - } else { - name_offset = entry.getVernaux()->vna_name; - } - - // Set IsDefault - if (entry.isVerdef()) { IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN); } else { + name_offset = entry.getVernaux()->vna_name; IsDefault = false; } - if (name_offset >= StrTab.size()) reportError("Invalid string offset"); return StringRef(StrTab.data() + name_offset); From f6712106fa164c50ba9751d21139bd902016feef Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Fri, 4 Dec 2015 19:29:49 +0000 Subject: [PATCH 080/364] [llvm-readobj] reportError() never returns. Mark with the correct attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254752 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/llvm-readobj.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp index 63cec03438cc..5406afff241e 100644 --- a/tools/llvm-readobj/llvm-readobj.cpp +++ b/tools/llvm-readobj/llvm-readobj.cpp @@ -231,7 +231,7 @@ namespace opts { namespace llvm { -void reportError(Twine Msg) { +LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) { outs() << "\nError reading file: " << Msg << ".\n"; outs().flush(); exit(1); From c57a70c32743e51e0823918a0f6fdcd60c14a0ca Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 4 Dec 2015 19:54:24 +0000 Subject: [PATCH 081/364] ScheduleDAGInstrs: Move LiveIntervals field to ScheduleDAGMI Now that ScheduleDAGInstrs doesn't need it anymore we can move the field down the class hierarcy to ScheduleDAGMI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254759 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineScheduler.h | 11 ++++++++--- include/llvm/CodeGen/ScheduleDAGInstrs.h | 8 -------- lib/CodeGen/ScheduleDAGInstrs.cpp | 3 +-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 3a510084f65a..358fd5a3732a 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -228,6 +228,7 @@ class ScheduleDAGMutation { class ScheduleDAGMI : public ScheduleDAGInstrs { protected: AliasAnalysis *AA; + LiveIntervals *LIS; std::unique_ptr SchedImpl; /// Topo - A topological ordering for SUnits which permits fast IsReachable @@ -255,9 +256,10 @@ class ScheduleDAGMI : public ScheduleDAGInstrs { public: ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr S, bool RemoveKillFlags) - : ScheduleDAGInstrs(*C->MF, C->MLI, C->LIS, RemoveKillFlags), - AA(C->AA), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), CurrentTop(), - CurrentBottom(), NextClusterPred(nullptr), NextClusterSucc(nullptr) { + : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), + LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), + CurrentTop(), CurrentBottom(), NextClusterPred(nullptr), + NextClusterSucc(nullptr) { #ifndef NDEBUG NumInstrsScheduled = 0; #endif @@ -266,6 +268,9 @@ class ScheduleDAGMI : public ScheduleDAGInstrs { // Provide a vtable anchor ~ScheduleDAGMI() override; + // Returns LiveIntervals instance for use in DAG mutators and such. + LiveIntervals *getLIS() const { return LIS; } + /// Return true if this DAG supports VReg liveness and RegPressure. virtual bool hasVRegLiveness() const { return false; } diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index c715e0f79205..c574df094911 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -26,7 +26,6 @@ namespace llvm { class MachineFrameInfo; class MachineLoopInfo; class MachineDominatorTree; - class LiveIntervals; class RegPressureTracker; class PressureDiffs; @@ -92,9 +91,6 @@ namespace llvm { const MachineLoopInfo *MLI; const MachineFrameInfo *MFI; - /// Live Intervals provides reaching defs in preRA scheduling. - LiveIntervals *LIS; - /// TargetSchedModel provides an interface to the machine model. TargetSchedModel SchedModel; @@ -172,14 +168,10 @@ namespace llvm { public: explicit ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - LiveIntervals *LIS = nullptr, bool RemoveKillFlags = false); ~ScheduleDAGInstrs() override {} - /// \brief Expose LiveIntervals for use in DAG mutators and such. - LiveIntervals *getLIS() const { return LIS; } - /// \brief Get the machine model for instruction scheduling. const TargetSchedModel *getSchedModel() const { return &SchedModel; } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 9d588ff24f61..fb82ab7a5555 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -51,9 +51,8 @@ static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - LiveIntervals *LIS, bool RemoveKillFlags) - : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(LIS), + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), TrackLaneMasks(false), FirstDbgValue(nullptr) { DbgValues.clear(); From 697fe024f6dad16e21c9b8e41bb27463971939c0 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 4 Dec 2015 20:05:04 +0000 Subject: [PATCH 082/364] [LegacyPassManager] Reduce memory usage for AnalysisUsage The LegacyPassManager was storing an instance of AnalysisUsage for each instance of each pass. In practice, most instances of a single pass class share the same dependencies. We can't rely on this because passes can (and some do) have dynamic dependencies based on instance options. We can exploit the likely commonality by uniqueing the usage information after querying the pass, but before storing it into the pass manager. This greatly reduces memory consumption by the AnalysisUsage objects. For a long pass pipeline, I measured a decrease in memory consumption for this storage of about 50%. I have not measured on the default O3 pipeline, but I suspect it will see some benefit as well since many passes are repeated (e.g. InstCombine). Differential Revision: http://reviews.llvm.org/D14677 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254760 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/LegacyPassManagers.h | 36 +++++++++++++++++++++++++++- lib/IR/LegacyPassManager.cpp | 32 ++++++++++++++++++------- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h index 3a0385581509..af045585691b 100644 --- a/include/llvm/IR/LegacyPassManagers.h +++ b/include/llvm/IR/LegacyPassManagers.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Pass.h" @@ -250,7 +251,40 @@ class PMTopLevelManager { /// Map from ID to immutable passes. SmallDenseMap ImmutablePassMap; - DenseMap AnUsageMap; + + /// A wrapper around AnalysisUsage for the purpose of uniqueing. The wrapper + /// is used to avoid needing to make AnalysisUsage itself a folding set node. + struct AUFoldingSetNode : public FoldingSetNode { + AnalysisUsage AU; + AUFoldingSetNode(const AnalysisUsage &AU) : AU(AU) {} + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, AU); + } + static void Profile(FoldingSetNodeID &ID, const AnalysisUsage &AU) { + // TODO: We could consider sorting the dependency arrays within the + // AnalysisUsage (since they are conceptually unordered). + ID.AddBoolean(AU.getPreservesAll()); + for (auto &Vec : {AU.getRequiredSet(), AU.getRequiredTransitiveSet(), + AU.getPreservedSet(), AU.getUsedSet()}) { + ID.AddInteger(Vec.size()); + for(AnalysisID AID : Vec) + ID.AddPointer(AID); + } + } + }; + + // Contains all of the unique combinations of AnalysisUsage. This is helpful + // when we have multiple instances of the same pass since they'll usually + // have the same analysis usage and can share storage. + FoldingSet UniqueAnalysisUsages; + + // Allocator used for allocating UAFoldingSetNodes. This handles deletion of + // all allocated nodes in one fell swoop. + BumpPtrAllocator AUFoldingSetNodeAllocator; + + // Maps from a pass to it's associated entry in UniqueAnalysisUsages. Does + // not own the storage associated with either key or value.. + DenseMap AnUsageMap; /// Collection of PassInfo objects found via analysis IDs and in this top /// level manager. This is used to memoize queries to the pass registry. diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 69f402029c81..08e8906e88db 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -569,13 +569,33 @@ void PMTopLevelManager::collectLastUses(SmallVectorImpl &LastUses, AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { AnalysisUsage *AnUsage = nullptr; - DenseMap::iterator DMI = AnUsageMap.find(P); + auto DMI = AnUsageMap.find(P); if (DMI != AnUsageMap.end()) AnUsage = DMI->second; else { - AnUsage = new AnalysisUsage(); - P->getAnalysisUsage(*AnUsage); - AnUsageMap[P] = AnUsage; + // Look up the analysis usage from the pass instance (different instances + // of the same pass can produce different results), but unique the + // resulting object to reduce memory usage. This helps to greatly reduce + // memory usage when we have many instances of only a few pass types + // (e.g. instcombine, simplifycfg, etc...) which tend to share a fixed set + // of dependencies. + AnalysisUsage AU; + P->getAnalysisUsage(AU); + + AUFoldingSetNode* Node = nullptr; + FoldingSetNodeID ID; + AUFoldingSetNode::Profile(ID, AU); + void *IP = nullptr; + if (auto *N = UniqueAnalysisUsages.FindNodeOrInsertPos(ID, IP)) + Node = N; + else { + Node = new (AUFoldingSetNodeAllocator) AUFoldingSetNode(AU); + UniqueAnalysisUsages.InsertNode(Node, IP); + } + assert(Node && "cached analysis usage must be non null"); + + AnUsageMap[P] = &Node->AU; + AnUsage = &Node->AU;; } return AnUsage; } @@ -798,10 +818,6 @@ PMTopLevelManager::~PMTopLevelManager() { for (SmallVectorImpl::iterator I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) delete *I; - - for (DenseMap::iterator DMI = AnUsageMap.begin(), - DME = AnUsageMap.end(); DMI != DME; ++DMI) - delete DMI->second; } //===----------------------------------------------------------------------===// From fb25c75967e3b30f6687737616613a56c9d442ca Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 4 Dec 2015 20:34:37 +0000 Subject: [PATCH 083/364] [OperandBundles] Allow operand-specific attributes in operand bundles Currently `OperandBundleUse::operandsHaveAttr` computes its result without being given a specific operand. This is problematic because it forces us to say that, e.g., even non-pointer operands in `"deopt"` operand bundles are `readonly`, which doesn't make sense. This commit changes `operandsHaveAttr` to work in the context of a specific operand, so that we can give the operand attributes that make sense for the operands's `llvm::Type`. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254764 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/InstrTypes.h | 38 ++++++++++++++++++++++++------------ lib/IR/Instructions.cpp | 4 ++-- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index 81de6999cdb1..58bc7c1ee10a 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -1121,14 +1121,12 @@ struct OperandBundleUse { explicit OperandBundleUse(StringMapEntry *Tag, ArrayRef Inputs) : Inputs(Inputs), Tag(Tag) {} - /// \brief Return true if all the operands in this operand bundle have the - /// attribute A. - /// - /// Currently there is no way to have attributes on operand bundles differ on - /// a per operand granularity. - bool operandsHaveAttr(Attribute::AttrKind A) const { + /// \brief Return true if the operand at index \p Idx in this operand bundle + /// has the attribute A. + bool operandHasAttr(unsigned Idx, Attribute::AttrKind A) const { if (isDeoptOperandBundle()) - return A == Attribute::ReadOnly || A == Attribute::NoCapture; + if (A == Attribute::ReadOnly || A == Attribute::NoCapture) + return Inputs[Idx]->getType()->isPointerTy(); // Conservative answer: no operands have any attributes. return false; @@ -1351,11 +1349,7 @@ template class OperandBundleUser { /// It is an error to call this with an OpIdx that does not correspond to an /// bundle operand. OperandBundleUse getOperandBundleForOperand(unsigned OpIdx) const { - for (auto &BOI : bundle_op_infos()) - if (BOI.Begin <= OpIdx && OpIdx < BOI.End) - return operandBundleFromBundleOpInfo(BOI); - - llvm_unreachable("Did not find operand bundle for operand!"); + return operandBundleFromBundleOpInfo(getBundleOpInfoForOperand(OpIdx)); } /// \brief Return true if this operand bundle user has operand bundles that @@ -1382,6 +1376,14 @@ template class OperandBundleUser { return false; } + /// \brief Return true if the bundle operand at index \p OpIdx has the + /// attribute \p A. + bool bundleOperandHasAttr(unsigned OpIdx, Attribute::AttrKind A) const { + auto &BOI = getBundleOpInfoForOperand(OpIdx); + auto OBU = operandBundleFromBundleOpInfo(BOI); + return OBU.operandHasAttr(OpIdx - BOI.Begin, A); + } + protected: /// \brief Is the function attribute S disallowed by some operand bundle on /// this operand bundle user? @@ -1518,6 +1520,18 @@ template class OperandBundleUser { return It; } + /// \brief Return the BundleOpInfo for the operand at index OpIdx. + /// + /// It is an error to call this with an OpIdx that does not correspond to an + /// bundle operand. + const BundleOpInfo &getBundleOpInfoForOperand(unsigned OpIdx) const { + for (auto &BOI : bundle_op_infos()) + if (BOI.Begin <= OpIdx && OpIdx < BOI.End) + return BOI; + + llvm_unreachable("Did not find operand bundle for operand!"); + } + /// \brief Return the total number of values used in \p Bundles. static unsigned CountBundleInputs(ArrayRef Bundles) { unsigned Total = 0; diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index b8c72dd7e39d..bba0ef2d7d34 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -369,7 +369,7 @@ bool CallInst::dataOperandHasImpliedAttr(unsigned i, assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && "Must be either a call argument or an operand bundle!"); - return getOperandBundleForOperand(i - 1).operandsHaveAttr(A); + return bundleOperandHasAttr(i - 1, A); } /// IsConstantOne - Return true only if val is constant int 1 @@ -646,7 +646,7 @@ bool InvokeInst::dataOperandHasImpliedAttr(unsigned i, assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && "Must be either an invoke argument or an operand bundle!"); - return getOperandBundleForOperand(i - 1).operandsHaveAttr(A); + return bundleOperandHasAttr(i - 1, A); } void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) { From 19d1511e67cb88a242419ad529e01d66739f283d Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 4 Dec 2015 21:16:42 +0000 Subject: [PATCH 084/364] [llvm-dwp] Include the debug_line.dwo section This probably shouldn't be generated in the .dwo file for CUs, only for TUs, but it's in the sample .dwos (generated by clang) so dwp should reflect that. Arguably the DWP tool could be smart enough to know that the CUs shouldn't need a debug_line.dwo section and skip that even when it's legitimately generated for TUs, but that's a bit more off-book. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254767 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/llvm-dwp/X86/simple.test | 6 +++--- tools/llvm-dwp/llvm-dwp.cpp | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/tools/llvm-dwp/X86/simple.test b/test/tools/llvm-dwp/X86/simple.test index 1c7b1040bd31..3c9795fbb934 100644 --- a/test/tools/llvm-dwp/X86/simple.test +++ b/test/tools/llvm-dwp/X86/simple.test @@ -45,10 +45,10 @@ CHECK: DW_TAG_formal_parameter CHECK: .debug_cu_index contents: Ensure only the relevant/contained sections are included in the table: -CHECK: Index Signature INFO ABBREV STR_OFFSETS +CHECK: Index Signature INFO ABBREV LINE STR_OFFSETS Don't bother checking the Signatures, they aren't correct yet. -CHECK: 1 [[DWOA]] [0x00000000, 0x00000029) [0x00000000, 0x00000031) [0x00000000, 0x00000010) -CHECK: 2 [[DWOB]] [0x00000029, 0x0000005e) [0x00000031, 0x00000075) [0x00000010, 0x00000024) +CHECK: 1 [[DWOA]] [0x00000000, 0x00000029) [0x00000000, 0x00000031) [0x00000000, 0x00000011) [0x00000000, 0x00000010) +CHECK: 2 [[DWOB]] [0x00000029, 0x0000005e) [0x00000031, 0x00000075) [0x00000011, 0x00000022) [0x00000010, 0x00000024) CHECK: .debug_str.dwo contents: CHECK: "clang version diff --git a/tools/llvm-dwp/llvm-dwp.cpp b/tools/llvm-dwp/llvm-dwp.cpp index b68ba437f830..f67ecbf3437f 100644 --- a/tools/llvm-dwp/llvm-dwp.cpp +++ b/tools/llvm-dwp/llvm-dwp.cpp @@ -139,6 +139,7 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { {"debug_str_offsets.dwo", {StrOffsetSection, DW_SECT_STR_OFFSETS}}, {"debug_str.dwo", {StrSection, static_cast(0)}}, {"debug_loc.dwo", {MCOFI.getDwarfLocDWOSection(), DW_SECT_LOC}}, + {"debug_line.dwo", {MCOFI.getDwarfLineDWOSection(), DW_SECT_LINE}}, {"debug_abbrev.dwo", {MCOFI.getDwarfAbbrevDWOSection(), DW_SECT_ABBREV}}}; struct UnitIndexEntry { From efb247f17cbc37fd4ff105b4316d6528db81564f Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 4 Dec 2015 21:29:53 +0000 Subject: [PATCH 085/364] Fix llvm-readobj build on Windows, match noreturn attribute on reportError in headers git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254769 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/llvm-readobj.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h index 58c50f58d750..5a103920c165 100644 --- a/tools/llvm-readobj/llvm-readobj.h +++ b/tools/llvm-readobj/llvm-readobj.h @@ -11,6 +11,7 @@ #define LLVM_TOOLS_LLVM_READOBJ_LLVM_READOBJ_H #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include namespace llvm { @@ -19,7 +20,7 @@ namespace llvm { } // Various helper functions. - void reportError(Twine Msg); + LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg); void error(std::error_code ec); bool relocAddressLess(object::RelocationRef A, object::RelocationRef B); From a2b2c5abb0d5f55744b113e893ede123c1a41d6c Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 4 Dec 2015 21:30:23 +0000 Subject: [PATCH 086/364] [llvm-dwp] Implement the required on-disk probed hash table git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254770 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/llvm-dwp/X86/simple.test | 4 ++-- tools/llvm-dwp/llvm-dwp.cpp | 21 ++++++++++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/test/tools/llvm-dwp/X86/simple.test b/test/tools/llvm-dwp/X86/simple.test index 3c9795fbb934..2ed8e611844a 100644 --- a/test/tools/llvm-dwp/X86/simple.test +++ b/test/tools/llvm-dwp/X86/simple.test @@ -47,8 +47,8 @@ CHECK: .debug_cu_index contents: Ensure only the relevant/contained sections are included in the table: CHECK: Index Signature INFO ABBREV LINE STR_OFFSETS Don't bother checking the Signatures, they aren't correct yet. -CHECK: 1 [[DWOA]] [0x00000000, 0x00000029) [0x00000000, 0x00000031) [0x00000000, 0x00000011) [0x00000000, 0x00000010) -CHECK: 2 [[DWOB]] [0x00000029, 0x0000005e) [0x00000031, 0x00000075) [0x00000011, 0x00000022) [0x00000010, 0x00000024) +CHECK: 3 [[DWOA]] [0x00000000, 0x00000029) [0x00000000, 0x00000031) [0x00000000, 0x00000011) [0x00000000, 0x00000010) +CHECK: 4 [[DWOB]] [0x00000029, 0x0000005e) [0x00000031, 0x00000075) [0x00000011, 0x00000022) [0x00000010, 0x00000024) CHECK: .debug_str.dwo contents: CHECK: "clang version diff --git a/tools/llvm-dwp/llvm-dwp.cpp b/tools/llvm-dwp/llvm-dwp.cpp index f67ecbf3437f..b4aaea3b238d 100644 --- a/tools/llvm-dwp/llvm-dwp.cpp +++ b/tools/llvm-dwp/llvm-dwp.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/TargetSelect.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -222,20 +223,30 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { if (C) ++Columns; + std::vector Buckets(NextPowerOf2(3 * IndexEntries.size() / 2)); + uint64_t Mask = Buckets.size() - 1; + for (size_t i = 0; i != IndexEntries.size(); ++i) { + auto S = IndexEntries[i].Signature; + auto H = S & Mask; + while (Buckets[H]) + H += ((S >> 32) & Mask) | 1; + Buckets[H] = i + 1; + } + Out.SwitchSection(MCOFI.getDwarfCUIndexSection()); Out.EmitIntValue(2, 4); // Version Out.EmitIntValue(Columns, 4); // Columns Out.EmitIntValue(IndexEntries.size(), 4); // Num Units // FIXME: This is not the right number of buckets for a real hash. - Out.EmitIntValue(IndexEntries.size(), 4); // Num Buckets + Out.EmitIntValue(Buckets.size(), 4); // Num Buckets // Write the signatures. - for (const auto &E : IndexEntries) - Out.EmitIntValue(E.Signature, 8); + for (const auto &I : Buckets) + Out.EmitIntValue(I ? IndexEntries[I - 1].Signature : 0, 8); // Write the indexes. - for (size_t i = 0; i != IndexEntries.size(); ++i) - Out.EmitIntValue(i + 1, 4); + for (const auto &I : Buckets) + Out.EmitIntValue(I, 4); // Write the column headers (which sections will appear in the table) for (size_t i = 0; i != array_lengthof(ContributionOffsets); ++i) From fca82775a2b75d8aaaf8f30226acaa3b3607f356 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 4 Dec 2015 21:38:39 +0000 Subject: [PATCH 087/364] [llvm-dwp] Remove some out of date comments git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254772 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/llvm-dwp/X86/simple.test | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/tools/llvm-dwp/X86/simple.test b/test/tools/llvm-dwp/X86/simple.test index 2ed8e611844a..5cc626334680 100644 --- a/test/tools/llvm-dwp/X86/simple.test +++ b/test/tools/llvm-dwp/X86/simple.test @@ -44,9 +44,7 @@ CHECK: DW_AT_name {{.*}} "b" CHECK: DW_TAG_formal_parameter CHECK: .debug_cu_index contents: -Ensure only the relevant/contained sections are included in the table: CHECK: Index Signature INFO ABBREV LINE STR_OFFSETS -Don't bother checking the Signatures, they aren't correct yet. CHECK: 3 [[DWOA]] [0x00000000, 0x00000029) [0x00000000, 0x00000031) [0x00000000, 0x00000011) [0x00000000, 0x00000010) CHECK: 4 [[DWOB]] [0x00000029, 0x0000005e) [0x00000031, 0x00000075) [0x00000011, 0x00000022) [0x00000010, 0x00000024) From b590f81c153265bc3ce6173161642b7fbc5b8a1c Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 4 Dec 2015 21:38:44 +0000 Subject: [PATCH 088/364] [AArch64] Expand vector SDIVREM/UDIVREM operations. http://reviews.llvm.org/D15214 Patch by Ana Pazos ! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254773 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++++ test/CodeGen/AArch64/divrem.ll | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 test/CodeGen/AArch64/divrem.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index f0fb03451b2a..9340e7f0a55c 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -237,6 +237,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); diff --git a/test/CodeGen/AArch64/divrem.ll b/test/CodeGen/AArch64/divrem.ll new file mode 100644 index 000000000000..9f648eb63eac --- /dev/null +++ b/test/CodeGen/AArch64/divrem.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s + +; SDIVREM/UDIVREM DAG nodes are generated but expanded when lowering and +; should not generate select error. +define <2 x i32> @test_udivrem(<2 x i32> %x, < 2 x i32> %y, < 2 x i32>* %z) { +; CHECK-LABEL: test_udivrem +; CHECK-DAG: udivrem +; CHECK-NOT: LLVM ERROR: Cannot select + %div = udiv <2 x i32> %x, %y + store <2 x i32> %div, <2 x i32>* %z + %1 = urem <2 x i32> %x, %y + ret <2 x i32> %1 +} + +define <4 x i32> @test_sdivrem(<4 x i32> %x, <4 x i32>* %y) { +; CHECK-LABEL: test_sdivrem +; CHECK-DAG: sdivrem + %div = sdiv <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 > + store <4 x i32> %div, <4 x i32>* %y + %1 = srem <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 > + ret <4 x i32> %1 +} From 6f41c1352b5e860ce6fd737f1d69a13f810c0311 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 4 Dec 2015 21:56:46 +0000 Subject: [PATCH 089/364] [llc/opt] Add an option to run all passes twice Summary: Lately, I have submitted a number of patches to fix bugs that only occurred when using the same pass manager to compile multiple modules (generally these bugs are failure to reset some persistent state). Unfortunately I don't think there is currently a way to test that from the command line. This adds a very simple flag to both llc and opt, under which the tools will simply re-run their respective pass pipelines using the same pass manager on (a clone of the same module). Additionally, we verify that both outputs are bitwise the same. Reviewers: yaron.keren Subscribers: loladiro, yaron.keren, kcc, llvm-commits Differential Revision: http://reviews.llvm.org/D14965 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254774 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/ELF/empty-twice.ll | 6 +++++ test/Other/opt-twice.ll | 14 ++++++++++ tools/llc/llc.cpp | 52 +++++++++++++++++++++++++++++++++++--- tools/opt/opt.cpp | 48 +++++++++++++++++++++++++++++++---- 4 files changed, 111 insertions(+), 9 deletions(-) create mode 100644 test/MC/ELF/empty-twice.ll create mode 100644 test/Other/opt-twice.ll diff --git a/test/MC/ELF/empty-twice.ll b/test/MC/ELF/empty-twice.ll new file mode 100644 index 000000000000..c24bd629c416 --- /dev/null +++ b/test/MC/ELF/empty-twice.ll @@ -0,0 +1,6 @@ +; Check that there is no persistent state in the ELF emitter that crashes us +; when we try to reuse the pass manager +; RUN: llc -compile-twice -filetype=obj %s -o - + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" diff --git a/test/Other/opt-twice.ll b/test/Other/opt-twice.ll new file mode 100644 index 000000000000..6bff52e34e35 --- /dev/null +++ b/test/Other/opt-twice.ll @@ -0,0 +1,14 @@ +; The pass here doesn't matter (we use deadargelim), but test +; that the -run-twice options exists, generates output, and +; doesn't crash +; RUN: opt -run-twice -deadargelim -S < %s | FileCheck %s + +; CHECK: define internal void @test +define internal {} @test() { + ret {} undef +} + +define void @caller() { + call {} @test() + ret void +} diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index c51c012391b2..531aba1f64bf 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/Cloning.h" #include using namespace llvm; @@ -96,6 +97,12 @@ static cl::opt AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), cl::init(true)); +static cl::opt + CompileTwice("compile-twice", cl::Hidden, + cl::desc("Run everything twice, re-using the same pass " + "manager and verify the the result is the same."), + cl::init(false)); + static int compileModule(char **, LLVMContext &); static std::unique_ptr @@ -325,10 +332,15 @@ static int compileModule(char **argv, LLVMContext &Context) { { raw_pwrite_stream *OS = &Out->os(); - std::unique_ptr BOS; - if (FileType != TargetMachine::CGFT_AssemblyFile && - !Out->os().supportsSeeking()) { - BOS = make_unique(*OS); + + // Manually do the buffering rather than using buffer_ostream, + // so we can memcmp the contents in CompileTwice mode + SmallVector Buffer; + std::unique_ptr BOS; + if ((FileType != TargetMachine::CGFT_AssemblyFile && + !Out->os().supportsSeeking()) || + CompileTwice) { + BOS = make_unique(Buffer); OS = BOS.get(); } @@ -378,7 +390,39 @@ static int compileModule(char **argv, LLVMContext &Context) { // Before executing passes, print the final values of the LLVM options. cl::PrintOptionValues(); + // If requested, run the pass manager over the same module again, + // to catch any bugs due to persistent state in the passes. Note that + // opt has the same functionality, so it may be worth abstracting this out + // in the future. + SmallVector CompileTwiceBuffer; + if (CompileTwice) { + std::unique_ptr M2(llvm::CloneModule(M.get())); + PM.run(*M2); + CompileTwiceBuffer = Buffer; + Buffer.clear(); + } + PM.run(*M); + + // Compare the two outputs and make sure they're the same + if (CompileTwice) { + if (Buffer.size() != CompileTwiceBuffer.size() || + (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) != + 0)) { + errs() + << "Running the pass manager twice changed the output.\n" + "Writing the result of the second run to the specified output\n" + "To generate the one-run comparison binary, just run without\n" + "the compile-twice option\n"; + Out->os() << Buffer; + Out->keep(); + return 1; + } + } + + if (BOS) { + Out->os() << Buffer; + } } // Declare success. diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 5fe2f034c6e2..c1510a7fb259 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LegacyPassNameParser.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" @@ -36,7 +37,6 @@ #include "llvm/LinkAllIR.h" #include "llvm/LinkAllPasses.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" @@ -51,6 +51,7 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/Cloning.h" #include #include using namespace llvm; @@ -190,6 +191,11 @@ static cl::opt PreserveAssemblyUseListOrder( cl::desc("Preserve use-list order when writing LLVM assembly."), cl::init(false), cl::Hidden); +static cl::opt + RunTwice("run-twice", + cl::desc("Run all passes twice, re-using the same pass manager."), + cl::init(false), cl::Hidden); + static inline void addPass(legacy::PassManagerBase &PM, Pass *P) { // Add the pass to the pass manager... PM.add(P); @@ -582,14 +588,25 @@ int main(int argc, char **argv) { if (!NoVerify && !VerifyEach) Passes.add(createVerifierPass()); + // In run twice mode, we want to make sure the output is bit-by-bit + // equivalent if we run the pass manager again, so setup two buffers and + // a stream to write to them. Note that llc does something similar and it + // may be worth to abstract this out in the future. + SmallVector Buffer; + SmallVector CompileTwiceBuffer; + std::unique_ptr BOS; + raw_ostream *OS = &Out->os(); + if (RunTwice) { + BOS = make_unique(Buffer); + OS = BOS.get(); + } + // Write bitcode or assembly to the output as the last step... if (!NoOutput && !AnalyzeOnly) { if (OutputAssembly) - Passes.add( - createPrintModulePass(Out->os(), "", PreserveAssemblyUseListOrder)); + Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder)); else - Passes.add( - createBitcodeWriterPass(Out->os(), PreserveBitcodeUseListOrder)); + Passes.add(createBitcodeWriterPass(*OS, PreserveBitcodeUseListOrder)); } // Before executing passes, print the final values of the LLVM options. @@ -598,6 +615,27 @@ int main(int argc, char **argv) { // Now that we have all of the passes ready, run them. Passes.run(*M); + // If requested, run all passes again with the same pass manager to catch + // bugs caused by persistent state in the passes + if (RunTwice) { + CompileTwiceBuffer = Buffer; + Buffer.clear(); + std::unique_ptr M2(CloneModule(M.get())); + Passes.run(*M2); + if (Buffer.size() != CompileTwiceBuffer.size() || + (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) != + 0)) { + errs() << "Running the pass manager twice changed the output.\n" + "Writing the result of the second run to the specified output." + "To generate the one-run comparison binary, just run without\n" + "the compile-twice option\n"; + Out->os() << BOS->str(); + Out->keep(); + return 1; + } + Out->os() << BOS->str(); + } + // Declare success. if (!NoOutput || PrintBreakpoints) Out->keep(); From b6cc95afa7478cecc423bac61fa54aa7fc3f6efe Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Fri, 4 Dec 2015 21:59:04 +0000 Subject: [PATCH 090/364] Fix incorrect quote. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254775 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/GettingStarted.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index 0d3115513453..8fb4daa5e26e 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -853,7 +853,7 @@ with the latest Xcode: .. code-block:: console - % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES=“armv7;armv7s;arm64" + % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES="armv7;armv7s;arm64" -DCMAKE_TOOLCHAIN_FILE=/cmake/platforms/iOS.cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_RUNTIME=Off -DLLVM_INCLUDE_TESTS=Off -DLLVM_INCLUDE_EXAMPLES=Off -DLLVM_ENABLE_BACKTRACES=Off [options] From cc87069c319aed2f95394a76dccfcd6360c08b80 Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Fri, 4 Dec 2015 22:00:47 +0000 Subject: [PATCH 091/364] [SimplifyLibCalls] Optimization for pow(x, n) where n is some constant Summary: In order to avoid calling pow function we generate repeated fmul when n is a positive or negative whole number. For each exponent we pre-compute Addition Chains in order to minimize the no. of fmuls. Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html We pre-compute addition chains for exponents upto 32 (which results in a max of 7 fmuls). For eg: 4 = 2+2 5 = 2+3 6 = 3+3 and so on Hence, pow(x, 4.0) ==> y = fmul x, x x = fmul y, y ret x For negative exponents, we simply compute the reciprocal of the final result. Note: This transformation is only enabled under fast-math. Patch by Mandeep Singh Grang Reviewers: weimingz, majnemer, escha, davide, scanon, joerg Subscribers: probinson, escha, llvm-commits Differential Revision: http://reviews.llvm.org/D13994 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254776 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 51 +++++++++ test/Transforms/InstCombine/pow-4.ll | 120 ++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 test/Transforms/InstCombine/pow-4.ll diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 83afb1a65ac0..df75ed96893d 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1058,6 +1058,31 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { return Ret; } +static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { + // Multiplications calculated using Addition Chains. + // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html + + assert(Exp != 0 && "Incorrect exponent 0 not handled"); + + if (InnerChain[Exp]) + return InnerChain[Exp]; + + static const unsigned AddChain[33][2] = { + {0, 0}, // Unused. + {0, 0}, // Unused (base case = pow1). + {1, 1}, // Unused (pre-computed). + {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, + {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, + {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, + {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, + {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, + }; + + InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), + getPow(InnerChain, AddChain[Exp][1], B)); + return InnerChain[Exp]; +} + Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1156,6 +1181,32 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFMul(Op1, Op1, "pow2"); if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); + + // In -ffast-math, generate repeated fmul instead of generating pow(x, n). + if (unsafeFPMath) { + APFloat V = abs(Op2C->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // This transformation applies to integer exponents only. + if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || + !V.isInteger()) + return nullptr; + + // We will memoize intermediate products of the Addition Chain. + Value *InnerChain[33] = {nullptr}; + InnerChain[1] = Op1; + InnerChain[2] = B.CreateFMul(Op1, Op1); + + // We cannot readily convert a non-double type (like float) to a double. + // So we first convert V to something which could be converted to double. + bool ignored; + V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + Value *FMul = getPow(InnerChain, V.convertToDouble(), B); + // For negative exponents simply compute the reciprocal. + if (Op2C->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); + return FMul; + } + return nullptr; } diff --git a/test/Transforms/InstCombine/pow-4.ll b/test/Transforms/InstCombine/pow-4.ll new file mode 100644 index 000000000000..76ef4c5de923 --- /dev/null +++ b/test/Transforms/InstCombine/pow-4.ll @@ -0,0 +1,120 @@ +; Test that the pow library call simplifier works correctly. + +; RUN: opt -instcombine -S < %s | FileCheck %s + +; Function Attrs: nounwind readnone +declare double @llvm.pow.f64(double, double) +declare float @llvm.pow.f32(float, float) + +; pow(x, 4.0f) +define float @test_simplify_4f(float %x) #0 { +; CHECK-LABEL: @test_simplify_4f( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul float %x, %x +; CHECK-NEXT: %2 = fmul float %1, %1 +; CHECK-NEXT: ret float %2 + %1 = call float @llvm.pow.f32(float %x, float 4.000000e+00) + ret float %1 +} + +; pow(x, 3.0) +define double @test_simplify_3(double %x) #0 { +; CHECK-LABEL: @test_simplify_3( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %x +; CHECK-NEXT: ret double %2 + %1 = call double @llvm.pow.f64(double %x, double 3.000000e+00) + ret double %1 +} + +; pow(x, 4.0) +define double @test_simplify_4(double %x) #0 { +; CHECK-LABEL: @test_simplify_4( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %1 +; CHECK-NEXT: ret double %2 + %1 = call double @llvm.pow.f64(double %x, double 4.000000e+00) + ret double %1 +} + +; pow(x, 15.0) +define double @test_simplify_15(double %x) #0 { +; CHECK-LABEL: @test_simplify_15( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %x +; CHECK-NEXT: %3 = fmul double %2, %2 +; CHECK-NEXT: %4 = fmul double %3, %3 +; CHECK-NEXT: %5 = fmul double %2, %4 +; CHECK-NEXT: ret double %5 + %1 = call double @llvm.pow.f64(double %x, double 1.500000e+01) + ret double %1 +} + +; pow(x, -7.0) +define double @test_simplify_neg_7(double %x) #0 { +; CHECK-LABEL: @test_simplify_neg_7( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %x +; CHECK-NEXT: %3 = fmul double %1, %2 +; CHECK-NEXT: %4 = fmul double %1, %3 +; CHECK-NEXT: %5 = fdiv double 1.000000e+00, %4 +; CHECK-NEXT: ret double %5 + %1 = call double @llvm.pow.f64(double %x, double -7.000000e+00) + ret double %1 +} + +; pow(x, -19.0) +define double @test_simplify_neg_19(double %x) #0 { +; CHECK-LABEL: @test_simplify_neg_19( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %1 +; CHECK-NEXT: %3 = fmul double %2, %2 +; CHECK-NEXT: %4 = fmul double %3, %3 +; CHECK-NEXT: %5 = fmul double %1, %4 +; CHECK-NEXT: %6 = fmul double %5, %x +; CHECK-NEXT: %7 = fdiv double 1.000000e+00, %6 +; CHECK-NEXT: ret double %7 + %1 = call double @llvm.pow.f64(double %x, double -1.900000e+01) + ret double %1 +} + +; pow(x, 11.23) +define double @test_simplify_11_23(double %x) #0 { +; CHECK-LABEL: @test_simplify_11_23( +; CHECK-NOT: fmul +; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01) +; CHECK-NEXT: ret double %1 + %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01) + ret double %1 +} + +; pow(x, 32.0) +define double @test_simplify_32(double %x) #0 { +; CHECK-LABEL: @test_simplify_32( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %1 +; CHECK-NEXT: %3 = fmul double %2, %2 +; CHECK-NEXT: %4 = fmul double %3, %3 +; CHECK-NEXT: %5 = fmul double %4, %4 +; CHECK-NEXT: ret double %5 + %1 = call double @llvm.pow.f64(double %x, double 3.200000e+01) + ret double %1 +} + +; pow(x, 33.0) +define double @test_simplify_33(double %x) #0 { +; CHECK-LABEL: @test_simplify_33( +; CHECK-NOT: fmul +; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01) +; CHECK-NEXT: ret double %1 + %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01) + ret double %1 +} + +attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="true" "use-soft-float"="false" } From c55f4fb8055591bee9ed577794299c0dd3ff791e Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 4 Dec 2015 22:08:53 +0000 Subject: [PATCH 092/364] Always pass a diagnostic handler to the linker. Before this patch the diagnostic handler was optional. If it was not passed, the one in the LLVMContext was used. That is probably not a pattern we want to follow. If each area has an optional callback, there is a sea of callbacks and it is hard to follow which one is called. Doing this also found cases where the callback is a nice addition, like testing that no errors or warnings are reported. The other option is to always use the diagnostic handler in the LLVMContext. That has a few problems * To implement the C API we would have to set the diag handler and then set it back to the original value. * Code that creates the context might be far away from code that wants the diagnostics. I do have a patch that implements the second option and will send that as an RFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254777 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Linker/Linker.h | 6 +++--- lib/LTO/LTOCodeGenerator.cpp | 10 ++++++---- lib/Linker/LinkModules.cpp | 10 ---------- tools/bugpoint/BugDriver.cpp | 9 ++++++++- tools/bugpoint/Miscompilation.cpp | 19 +++++++++++++++---- tools/gold/gold-plugin.cpp | 2 +- unittests/Linker/LinkModulesTest.cpp | 8 +++++--- 7 files changed, 38 insertions(+), 26 deletions(-) diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h index f9890935126e..f0c8ad979ab6 100644 --- a/include/llvm/Linker/Linker.h +++ b/include/llvm/Linker/Linker.h @@ -69,7 +69,6 @@ class Linker { }; Linker(Module &M, DiagnosticHandlerFunction DiagnosticHandler); - Linker(Module &M); /// \brief Link \p Src into the composite. The source is destroyed. /// @@ -88,8 +87,9 @@ class Linker { DiagnosticHandlerFunction DiagnosticHandler, unsigned Flags = Flags::None); - static bool linkModules(Module &Dest, Module &Src, - unsigned Flags = Flags::None); + DiagnosticHandlerFunction getDiagnosticHandler() const { + return DiagnosticHandler; + } private: Module &Composite; diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index b0dae74c13d4..25c150b27840 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -65,9 +65,10 @@ const char* LTOCodeGenerator::getVersionString() { } LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context) - : Context(Context), - MergedModule(new Module("ld-temp.o", Context)), - IRLinker(new Linker(*MergedModule)) { + : Context(Context), MergedModule(new Module("ld-temp.o", Context)), + IRLinker(new Linker(*MergedModule, [this](const DiagnosticInfo &DI) { + MergedModule->getContext().diagnose(DI); + })) { initializeLTOPasses(); } @@ -123,7 +124,8 @@ void LTOCodeGenerator::setModule(std::unique_ptr Mod) { AsmUndefinedRefs.clear(); MergedModule = Mod->takeModule(); - IRLinker = make_unique(*MergedModule); + IRLinker = + make_unique(*MergedModule, IRLinker->getDiagnosticHandler()); const std::vector &Undefs = Mod->getAsmUndefinedRefs(); for (int I = 0, E = Undefs.size(); I != E; ++I) diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 55ab1824740b..88b8e443c489 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -2030,11 +2030,6 @@ Linker::Linker(Module &M, DiagnosticHandlerFunction DiagnosticHandler) } } -Linker::Linker(Module &M) - : Linker(M, [this](const DiagnosticInfo &DI) { - Composite.getContext().diagnose(DI); - }) {} - bool Linker::linkInModule(Module &Src, unsigned Flags, const FunctionInfoIndex *Index, DenseSet *FunctionsToImport) { @@ -2061,11 +2056,6 @@ bool Linker::linkModules(Module &Dest, Module &Src, return L.linkInModule(Src, Flags); } -bool Linker::linkModules(Module &Dest, Module &Src, unsigned Flags) { - Linker L(Dest); - return L.linkInModule(Src, Flags); -} - //===----------------------------------------------------------------------===// // C API. //===----------------------------------------------------------------------===// diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp index 39887d5d59dc..9edc242d470e 100644 --- a/tools/bugpoint/BugDriver.cpp +++ b/tools/bugpoint/BugDriver.cpp @@ -15,6 +15,7 @@ #include "BugDriver.h" #include "ToolRunner.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" @@ -112,6 +113,12 @@ std::unique_ptr llvm::parseInputFile(StringRef Filename, return Result; } +static void diagnosticHandler(const DiagnosticInfo &DI) { + DiagnosticPrinterRawOStream DP(errs()); + DI.print(DP); + errs() << '\n'; +} + // This method takes the specified list of LLVM input files, attempts to load // them, either as assembly or bitcode, then link them together. It returns // true on failure (if, for example, an input bitcode file could not be @@ -132,7 +139,7 @@ bool BugDriver::addSources(const std::vector &Filenames) { if (!M.get()) return true; outs() << "Linking in input file: '" << Filenames[i] << "'\n"; - if (Linker::linkModules(*Program, *M)) + if (Linker::linkModules(*Program, *M, diagnosticHandler)) return true; } diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp index e7eae40ec95a..0b61b0969855 100644 --- a/tools/bugpoint/Miscompilation.cpp +++ b/tools/bugpoint/Miscompilation.cpp @@ -18,6 +18,7 @@ #include "llvm/Config/config.h" // for HAVE_LINK_R #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" @@ -207,6 +208,14 @@ namespace { }; } +static void diagnosticHandler(const DiagnosticInfo &DI) { + DiagnosticPrinterRawOStream DP(errs()); + DI.print(DP); + errs() << '\n'; + if (DI.getSeverity() == DS_Error) + exit(1); +} + /// TestMergedProgram - Given two modules, link them together and run the /// program, checking to see if the program matches the diff. If there is /// an error, return NULL. If not, return the merged module. The Broken argument @@ -222,7 +231,7 @@ static Module *TestMergedProgram(const BugDriver &BD, Module *M1, Module *M2, M1 = CloneModule(M1); M2 = CloneModule(M2); } - if (Linker::linkModules(*M1, *M2)) + if (Linker::linkModules(*M1, *M2, diagnosticHandler)) exit(1); delete M2; // We are done with this module. @@ -390,7 +399,8 @@ static bool ExtractLoops(BugDriver &BD, MisCompFunctions.emplace_back(F->getName(), F->getFunctionType()); } - if (Linker::linkModules(*ToNotOptimize, *ToOptimizeLoopExtracted)) + if (Linker::linkModules(*ToNotOptimize, *ToOptimizeLoopExtracted, + diagnosticHandler)) exit(1); MiscompiledFunctions.clear(); @@ -418,7 +428,8 @@ static bool ExtractLoops(BugDriver &BD, // extraction both didn't break the program, and didn't mask the problem. // Replace the current program with the loop extracted version, and try to // extract another loop. - if (Linker::linkModules(*ToNotOptimize, *ToOptimizeLoopExtracted)) + if (Linker::linkModules(*ToNotOptimize, *ToOptimizeLoopExtracted, + diagnosticHandler)) exit(1); delete ToOptimizeLoopExtracted; @@ -594,7 +605,7 @@ static bool ExtractBlocks(BugDriver &BD, if (!I->isDeclaration()) MisCompFunctions.emplace_back(I->getName(), I->getFunctionType()); - if (Linker::linkModules(*ProgClone, *Extracted)) + if (Linker::linkModules(*ProgClone, *Extracted, diagnosticHandler)) exit(1); // Set the new program and delete the old one. diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 1bd2f8afb290..8eacdc3ff235 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -938,7 +938,7 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) { } std::unique_ptr Combined(new Module("ld-temp.o", Context)); - Linker L(*Combined); + Linker L(*Combined, diagnosticHandler); std::string DefaultTriple = sys::getDefaultTargetTriple(); diff --git a/unittests/Linker/LinkModulesTest.cpp b/unittests/Linker/LinkModulesTest.cpp index 4eba718e2663..e56a692125ec 100644 --- a/unittests/Linker/LinkModulesTest.cpp +++ b/unittests/Linker/LinkModulesTest.cpp @@ -71,6 +71,8 @@ class LinkModuleTest : public testing::Test { BasicBlock *ExitBB; }; +static void expectNoDiags(const DiagnosticInfo &DI) { EXPECT_TRUE(false); } + TEST_F(LinkModuleTest, BlockAddress) { IRBuilder<> Builder(EntryBB); @@ -93,7 +95,7 @@ TEST_F(LinkModuleTest, BlockAddress) { Builder.CreateRet(ConstantPointerNull::get(Type::getInt8PtrTy(Ctx))); Module *LinkedModule = new Module("MyModuleLinked", Ctx); - Linker::linkModules(*LinkedModule, *M); + Linker::linkModules(*LinkedModule, *M, expectNoDiags); // Delete the original module. M.reset(); @@ -169,13 +171,13 @@ static Module *getInternal(LLVMContext &Ctx) { TEST_F(LinkModuleTest, EmptyModule) { std::unique_ptr InternalM(getInternal(Ctx)); std::unique_ptr EmptyM(new Module("EmptyModule1", Ctx)); - Linker::linkModules(*EmptyM, *InternalM); + Linker::linkModules(*EmptyM, *InternalM, expectNoDiags); } TEST_F(LinkModuleTest, EmptyModule2) { std::unique_ptr InternalM(getInternal(Ctx)); std::unique_ptr EmptyM(new Module("EmptyModule1", Ctx)); - Linker::linkModules(*InternalM, *EmptyM); + Linker::linkModules(*InternalM, *EmptyM, expectNoDiags); } TEST_F(LinkModuleTest, TypeMerge) { From c5aab87e68369e95e4fbe83f6ed2bd38ec9f41d6 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 4 Dec 2015 22:09:19 +0000 Subject: [PATCH 093/364] [Orc] Move some code up into the JITCompileCallbackManager base class. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254778 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../ExecutionEngine/Orc/IndirectionUtils.h | 39 ++++++++++--------- .../Orc/CompileOnDemandLayerTest.cpp | 12 +----- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index b5b258e7a05c..3bfff059110c 100644 --- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -84,7 +84,11 @@ class JITCompileCallbackManager { } /// @brief Reserve a compile callback. - virtual CompileCallbackInfo getCompileCallback() = 0; + CompileCallbackInfo getCompileCallback() { + TargetAddress TrampolineAddr = getAvailableTrampolineAddr(); + auto &Compile = this->ActiveTrampolines[TrampolineAddr]; + return CompileCallbackInfo(TrampolineAddr, Compile); + } /// @brief Get a CompileCallbackInfo for an existing callback. CompileCallbackInfo getCompileCallbackInfo(TargetAddress TrampolineAddr) { @@ -113,6 +117,20 @@ class JITCompileCallbackManager { std::vector AvailableTrampolines; private: + + TargetAddress getAvailableTrampolineAddr() { + if (this->AvailableTrampolines.empty()) + grow(); + assert(!this->AvailableTrampolines.empty() && + "Failed to grow available trampolines."); + TargetAddress TrampolineAddr = this->AvailableTrampolines.back(); + this->AvailableTrampolines.pop_back(); + return TrampolineAddr; + } + + // Create new trampolines - to be implemented in subclasses. + virtual void grow() = 0; + virtual void anchor(); }; @@ -145,13 +163,6 @@ class LocalJITCompileCallbackManager : public JITCompileCallbackManager { assert(!EC && "Failed to mprotect resolver block"); } - /// @brief Get/create a compile callback with the given signature. - CompileCallbackInfo getCompileCallback() final { - TargetAddress TrampolineAddr = getAvailableTrampolineAddr(); - auto &Compile = this->ActiveTrampolines[TrampolineAddr]; - return CompileCallbackInfo(TrampolineAddr, Compile); - } - private: static TargetAddress reenter(void *CCMgr, void *TrampolineId) { @@ -162,17 +173,7 @@ class LocalJITCompileCallbackManager : public JITCompileCallbackManager { reinterpret_cast(TrampolineId))); } - TargetAddress getAvailableTrampolineAddr() { - if (this->AvailableTrampolines.empty()) - grow(); - assert(!this->AvailableTrampolines.empty() && - "Failed to grow available trampolines."); - TargetAddress TrampolineAddr = this->AvailableTrampolines.back(); - this->AvailableTrampolines.pop_back(); - return TrampolineAddr; - } - - void grow() { + void grow() override { assert(this->AvailableTrampolines.empty() && "Growing prematurely?"); std::error_code EC; diff --git a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp index 4a30cfc42971..ca508d0a7561 100644 --- a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp @@ -18,17 +18,9 @@ namespace { class DummyCallbackManager : public orc::JITCompileCallbackManager { public: - DummyCallbackManager() - : JITCompileCallbackManager(0), NextStubAddress(0), - UniversalCompile([]() { return 0; }) { - } - - CompileCallbackInfo getCompileCallback() override { - return CompileCallbackInfo(++NextStubAddress, UniversalCompile); - } + DummyCallbackManager() : JITCompileCallbackManager(0) { } public: - TargetAddress NextStubAddress; - CompileFtor UniversalCompile; + void grow() override { llvm_unreachable("not implemented"); } }; class DummyStubsManager : public orc::IndirectStubsManagerBase { From f8e0f06f7c0ce41dc03400ad65dd005531234579 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 4 Dec 2015 22:26:21 +0000 Subject: [PATCH 094/364] MSVC complains about this being ambiguous. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254782 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/LTO/LTOCodeGenerator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 25c150b27840..bf3cde59443d 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -124,8 +124,8 @@ void LTOCodeGenerator::setModule(std::unique_ptr Mod) { AsmUndefinedRefs.clear(); MergedModule = Mod->takeModule(); - IRLinker = - make_unique(*MergedModule, IRLinker->getDiagnosticHandler()); + IRLinker = llvm::make_unique(*MergedModule, + IRLinker->getDiagnosticHandler()); const std::vector &Undefs = Mod->getAsmUndefinedRefs(); for (int I = 0, E = Undefs.size(); I != E; ++I) From 3f8065b694d883dfc4f8ced0d90d49e76b4f2c0b Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Fri, 4 Dec 2015 22:29:39 +0000 Subject: [PATCH 095/364] [libFuzzer] compute base64 in-process instead of using an external lib. Since libFuzzer should not depend on anything, just re-implement base64 encoder. PR25746 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254784 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Fuzzer/FuzzerIO.cpp | 5 ----- lib/Fuzzer/FuzzerInternal.h | 2 +- lib/Fuzzer/FuzzerLoop.cpp | 6 ++---- lib/Fuzzer/FuzzerUtil.cpp | 29 +++++++++++++++++++++++++++++ lib/Fuzzer/test/FuzzerUnittest.cpp | 13 +++++++++++++ 5 files changed, 45 insertions(+), 10 deletions(-) diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp index abc444a3d471..043fad396d51 100644 --- a/lib/Fuzzer/FuzzerIO.cpp +++ b/lib/Fuzzer/FuzzerIO.cpp @@ -91,11 +91,6 @@ std::string DirPlusFile(const std::string &DirPath, return DirPath + "/" + FileName; } -void PrintFileAsBase64(const std::string &Path) { - std::string Cmd = "base64 -w 0 < " + Path + "; echo"; - ExecuteCommand(Cmd); -} - void Printf(const char *Fmt, ...) { va_list ap; va_start(ap, Fmt); diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index 2c382b2ef314..bc6bec7473d3 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -42,7 +42,7 @@ void Print(const Unit &U, const char *PrintAfter = ""); void PrintASCII(const Unit &U, const char *PrintAfter = ""); std::string Hash(const Unit &U); void SetTimer(int Seconds); -void PrintFileAsBase64(const std::string &Path); +std::string Base64(const Unit &U); int ExecuteCommand(const std::string &Command); // Private copy of SHA1 implementation. diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 9c52a4dbe774..ca7f82b55607 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -302,10 +302,8 @@ void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { WriteToFile(U, Path); Printf("artifact_prefix='%s'; Test unit written to %s\n", Options.ArtifactPrefix.c_str(), Path.c_str()); - if (U.size() <= kMaxUnitSizeToPrint) { - Printf("Base64: "); - PrintFileAsBase64(Path); - } + if (U.size() <= kMaxUnitSizeToPrint) + Printf("Base64: %s\n", Base64(U).c_str()); } void Fuzzer::SaveCorpus() { diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp index 20a41e0d4fbb..6c1133fffd37 100644 --- a/lib/Fuzzer/FuzzerUtil.cpp +++ b/lib/Fuzzer/FuzzerUtil.cpp @@ -167,4 +167,33 @@ bool ParseDictionaryFile(const std::string &Text, std::vector *Units) { int GetPid() { return getpid(); } + +std::string Base64(const Unit &U) { + static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + std::string Res; + size_t i; + for (i = 0; i + 2 < U.size(); i += 3) { + uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2]; + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += Table[(x >> 6) & 63]; + Res += Table[x & 63]; + } + if (i + 1 == U.size()) { + uint32_t x = (U[i] << 16); + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += "=="; + } else if (i + 2 == U.size()) { + uint32_t x = (U[i] << 16) + (U[i + 1] << 8); + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += Table[(x >> 6) & 63]; + Res += "="; + } + return Res; +} + } // namespace fuzzer diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 4a96468f8d7a..b92e61877c6c 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -360,3 +360,16 @@ TEST(FuzzerDictionary, ParseDictionaryFile) { EXPECT_EQ(Units, std::vector({Unit({'a', 'a'}), Unit({'a', 'b', 'c'})})); } + +TEST(FuzzerUtil, Base64) { + EXPECT_EQ("", Base64({})); + EXPECT_EQ("YQ==", Base64({'a'})); + EXPECT_EQ("eA==", Base64({'x'})); + EXPECT_EQ("YWI=", Base64({'a', 'b'})); + EXPECT_EQ("eHk=", Base64({'x', 'y'})); + EXPECT_EQ("YWJj", Base64({'a', 'b', 'c'})); + EXPECT_EQ("eHl6", Base64({'x', 'y', 'z'})); + EXPECT_EQ("YWJjeA==", Base64({'a', 'b', 'c', 'x'})); + EXPECT_EQ("YWJjeHk=", Base64({'a', 'b', 'c', 'x', 'y'})); + EXPECT_EQ("YWJjeHl6", Base64({'a', 'b', 'c', 'x', 'y', 'z'})); +} From a7a95fed0f4e64e5a0c8836db68be8964fdc10c7 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Fri, 4 Dec 2015 22:47:58 +0000 Subject: [PATCH 096/364] Add TransformUtils to list of required libraries for llc This dependency was added in r254774 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254786 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llc/CMakeLists.txt | 1 + tools/llc/LLVMBuild.txt | 2 +- tools/llc/Makefile | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt index ff5a89e1da44..136cf4be1c12 100644 --- a/tools/llc/CMakeLists.txt +++ b/tools/llc/CMakeLists.txt @@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS SelectionDAG Support Target + TransformUtils ) # Support plugins. diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt index 38660cf27a46..c1f5cebea859 100644 --- a/tools/llc/LLVMBuild.txt +++ b/tools/llc/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llc parent = Tools -required_libraries = AsmParser BitReader IRReader MIRParser all-targets +required_libraries = AsmParser BitReader IRReader MIRParser TransformUtils all-targets diff --git a/tools/llc/Makefile b/tools/llc/Makefile index ae64c9a5b57c..cd34c80d840a 100644 --- a/tools/llc/Makefile +++ b/tools/llc/Makefile @@ -9,7 +9,7 @@ LEVEL := ../.. TOOLNAME := llc -LINK_COMPONENTS := all-targets bitreader asmparser irreader mirparser +LINK_COMPONENTS := all-targets bitreader asmparser irreader mirparser transformutils # Support plugins. NO_DEAD_STRIP := 1 From 35cba4cf6a352382826c0daf86108898807e4910 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 4 Dec 2015 23:00:33 +0000 Subject: [PATCH 097/364] X86: Don't emit SAHF/LAHF for 64-bit targets unless explicitly supported These instructions are not supported by all CPUs in 64-bit mode. Emitting them causes Chromium to crash on start-up for users with such chips. (GCC puts these instructions behind -msahf on 64-bit for the same reason.) This patch adds FeatureLAHFSAHF, enables it by default for 32-bit targets and modern CPUs, and changes X86InstrInfo::copyPhysReg back to the lowering from before r244503 when the instructions are not available. Differential Revision: http://reviews.llvm.org/D15240 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254793 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 58 ++++++++++----- lib/Target/X86/X86ISelLowering.cpp | 3 + lib/Target/X86/X86InstrInfo.cpp | 29 ++++++-- lib/Target/X86/X86InstrInfo.td | 7 +- lib/Target/X86/X86Subtarget.cpp | 10 +++ lib/Target/X86/X86Subtarget.h | 4 ++ test/CodeGen/X86/cmpxchg-clobber-flags.ll | 72 +++++++++++++------ .../X86/peephole-na-phys-copy-folding.ll | 2 +- 8 files changed, 136 insertions(+), 49 deletions(-) diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7d9f396c1e96..dc5ab1bf65d4 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -182,6 +182,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; +def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", + "Support LAHF and SAHF instructions">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", @@ -273,7 +275,8 @@ def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureSSSE3, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem + FeatureSlowBTMem, + FeatureLAHFSAHF ]>; def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureSlowUAMem16, @@ -281,7 +284,8 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureSSE41, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem + FeatureSlowBTMem, + FeatureLAHFSAHF ]>; // Atom CPUs. @@ -299,7 +303,8 @@ class BonnellProc : ProcessorModel; def : BonnellProc<"bonnell">; def : BonnellProc<"atom">; // Pin the generic name to the baseline. @@ -319,7 +324,8 @@ class SilvermontProc : ProcessorModel; def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. @@ -331,7 +337,8 @@ class NehalemProc : ProcessorModel; def : NehalemProc<"nehalem">; def : NehalemProc<"corei7">; @@ -346,7 +353,8 @@ class WestmereProc : ProcessorModel; def : WestmereProc<"westmere">; @@ -363,7 +371,8 @@ class SandyBridgeProc : ProcessorModel; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. @@ -382,7 +391,8 @@ class IvyBridgeProc : ProcessorModel; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. @@ -408,7 +418,8 @@ class HaswellProc : ProcessorModel; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. @@ -436,7 +447,8 @@ class BroadwellProc : ProcessorModel; def : BroadwellProc<"broadwell">; @@ -465,7 +477,8 @@ class KnightsLandingProc : ProcessorModel; def : KnightsLandingProc<"knl">; @@ -500,7 +513,8 @@ class SkylakeProc : ProcessorModel; def : SkylakeProc<"skylake">; def : SkylakeProc<"skx">; // Legacy alias. @@ -547,7 +561,7 @@ def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>; // Bobcat def : Proc<"btver1", [ @@ -560,7 +574,8 @@ def : Proc<"btver1", [ FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, - FeatureSlowSHLD + FeatureSlowSHLD, + FeatureLAHFSAHF ]>; // Jaguar @@ -580,7 +595,8 @@ def : ProcessorModel<"btver2", BtVer2Model, [ FeaturePOPCNT, FeatureXSAVE, FeatureXSAVEOPT, - FeatureSlowSHLD + FeatureSlowSHLD, + FeatureLAHFSAHF ]>; // Bulldozer @@ -598,7 +614,8 @@ def : Proc<"bdver1", [ FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, - FeatureSlowSHLD + FeatureSlowSHLD, + FeatureLAHFSAHF ]>; // Piledriver def : Proc<"bdver2", [ @@ -619,7 +636,8 @@ def : Proc<"bdver2", [ FeatureBMI, FeatureTBM, FeatureFMA, - FeatureSlowSHLD + FeatureSlowSHLD, + FeatureLAHFSAHF ]>; // Steamroller @@ -643,7 +661,8 @@ def : Proc<"bdver3", [ FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, - FeatureFSGSBase + FeatureFSGSBase, + FeatureLAHFSAHF ]>; // Excavator @@ -666,7 +685,8 @@ def : Proc<"bdver4", [ FeatureTBM, FeatureFMA, FeatureXSAVEOPT, - FeatureFSGSBase + FeatureFSGSBase, + FeatureLAHFSAHF ]>; def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2cf1d4ba30ee..c07bca8fe52a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13930,6 +13930,9 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, DAG.getConstant(8, dl, MVT::i8)); SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); + + // Some 64-bit targets lack SAHF support, but they do support FCOMI. + assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); } diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e9d36f8ce2f1..ebe329064c50 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4385,7 +4385,32 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, int Reg = FromEFLAGS ? DestReg : SrcReg; bool is32 = X86::GR32RegClass.contains(Reg); bool is64 = X86::GR64RegClass.contains(Reg); + if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) { + int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; + int Push = is64 ? X86::PUSH64r : X86::PUSH32r; + int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32; + int Pop = is64 ? X86::POP64r : X86::POP32r; + int PopF = is64 ? X86::POPF64 : X86::POPF32; + int AX = is64 ? X86::RAX : X86::EAX; + + if (!Subtarget.hasLAHFSAHF()) { + assert(is64 && "Not having LAHF/SAHF only happens on 64-bit."); + // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - clobbersTheStack. + if (FromEFLAGS) { + BuildMI(MBB, MI, DL, get(PushF)); + BuildMI(MBB, MI, DL, get(Pop), DestReg); + } + if (ToEFLAGS) { + BuildMI(MBB, MI, DL, get(Push)) + .addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(PopF)); + } + return; + } + // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is // inefficient. Instead: // - Save the overflow flag OF into AL using SETO, and restore it using a @@ -4407,10 +4432,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Notice that we have to adjust the stack if we don't want to clobber the // first frame index. See X86FrameLowering.cpp - clobbersTheStack. - int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; - int Push = is64 ? X86::PUSH64r : X86::PUSH32r; - int Pop = is64 ? X86::POP64r : X86::POP32r; - int AX = is64 ? X86::RAX : X86::EAX; bool AXDead = (Reg == AX); // FIXME: The above could figure out that AX is dead in more cases with: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1e66739026e2..1c21a098bc6c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -799,6 +799,7 @@ def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; +def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; @@ -1502,10 +1503,12 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem, let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", - [(set EFLAGS, (X86sahf AH))], IIC_AHF>; + [(set EFLAGS, (X86sahf AH))], IIC_AHF>, + Requires<[HasLAHFSAHF]>; let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], - IIC_AHF>; // AH = flags + IIC_AHF>, // AH = flags + Requires<[HasLAHFSAHF]>; } // SchedRW //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 44a46b7e07a2..f90a0b0d04f1 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -189,6 +189,15 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { FullFS = "+64bit,+sse2"; } + // LAHF/SAHF are always supported in non-64-bit mode. + if (!In64BitMode) { + if (!FullFS.empty()) + FullFS = "+sahf," + FullFS; + else + FullFS = "+sahf"; + } + + // Parse features string and set the CPU. ParseSubtargetFeatures(CPUName, FullFS); @@ -264,6 +273,7 @@ void X86Subtarget::initializeEnvironment() { HasSHA = false; HasPRFCHW = false; HasRDSEED = false; + HasLAHFSAHF = false; HasMPX = false; IsBTMemSlow = false; IsSHLDSlow = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 353b4f7f5ebd..83bc640976ac 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -152,6 +152,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Processor has RDSEED instructions. bool HasRDSEED; + /// Processor has LAHF/SAHF instructions. + bool HasLAHFSAHF; + /// True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -374,6 +377,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasSHA() const { return HasSHA; } bool hasPRFCHW() const { return HasPRFCHW; } bool hasRDSEED() const { return HasRDSEED; } + bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll index 791edba89c44..c294dee40135 100644 --- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -1,7 +1,11 @@ ; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386 ; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f + ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664 ; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664 +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf ; FIXME: X86InstrInfo::copyPhysReg had code which figured out whether AX was ; live or not to avoid save / restore when it's not needed. See FIXME in @@ -56,21 +60,31 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) { ; x8664-LABEL: test_intervening_call: ; x8664: cmpxchgq -; x8664: pushq %rax -; x8664-NEXT: seto %al -; x8664-NEXT: lahf -; x8664-NEXT: movq %rax, [[FLAGS:%.*]] -; x8664-NEXT: popq %rax +; x8664: pushfq +; x8664-NEXT: popq [[FLAGS:%.*]] ; x8664-NEXT: movq %rax, %rdi ; x8664-NEXT: callq bar +; x8664-NEXT: pushq [[FLAGS]] +; x8664-NEXT: popfq +; x8664-NEXT: jne + +; x8664-sahf-LABEL: test_intervening_call: +; x8664-sahf: cmpxchgq +; x8664-sahf: pushq %rax +; x8664-sahf-NEXT: seto %al +; x8664-sahf-NEXT: lahf +; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]] +; x8664-sahf-NEXT: popq %rax +; x8664-sahf-NEXT: movq %rax, %rdi +; x8664-sahf-NEXT: callq bar ; ** FIXME Next line isn't actually necessary. ** -; x8664-NEXT: pushq %rax -; x8664-NEXT: movq [[FLAGS]], %rax -; x8664-NEXT: addb $127, %al -; x8664-NEXT: sahf +; x8664-sahf-NEXT: pushq %rax +; x8664-sahf-NEXT: movq [[FLAGS]], %rax +; x8664-sahf-NEXT: addb $127, %al +; x8664-sahf-NEXT: sahf ; ** FIXME Next line isn't actually necessary. ** -; x8664-NEXT: popq %rax -; x8664-NEXT: jne +; x8664-sahf-NEXT: popq %rax +; x8664-sahf-NEXT: jne %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst %v = extractvalue { i64, i1 } %cx, 0 @@ -99,6 +113,10 @@ define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) { ; x8664: cmpxchg ; x8664-NEXT: jne +; x8664-sahf-LABEL: test_control_flow: +; x8664-sahf: cmpxchg +; x8664-sahf-NEXT: jne + entry: %cmp = icmp sgt i32 %i, %j br i1 %cmp, label %loop_start, label %cond.end @@ -165,20 +183,28 @@ define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { ; i386f-NEXT: popl %eax ; x8664-LABEL: test_feed_cmov: -; x8664: cmpxchgl +; x8664: cmpxchg +; x8664: pushfq +; x8664-NEXT: popq [[FLAGS:%.*]] +; x8664-NEXT: callq foo +; x8664-NEXT: pushq [[FLAGS]] +; x8664-NEXT: popfq + +; x8664-sahf-LABEL: test_feed_cmov: +; x8664-sahf: cmpxchgl ; ** FIXME Next line isn't actually necessary. ** -; x8664: pushq %rax -; x8664: seto %al -; x8664-NEXT: lahf -; x8664-NEXT: movq %rax, [[FLAGS:%.*]] +; x8664-sahf: pushq %rax +; x8664-sahf: seto %al +; x8664-sahf-NEXT: lahf +; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]] ; ** FIXME Next line isn't actually necessary. ** -; x8664-NEXT: popq %rax -; x8664-NEXT: callq foo -; x8664-NEXT: pushq %rax -; x8664-NEXT: movq [[FLAGS]], %rax -; x8664-NEXT: addb $127, %al -; x8664-NEXT: sahf -; x8664-NEXT: popq %rax +; x8664-sahf-NEXT: popq %rax +; x8664-sahf-NEXT: callq foo +; x8664-sahf-NEXT: pushq %rax +; x8664-sahf-NEXT: movq [[FLAGS]], %rax +; x8664-sahf-NEXT: addb $127, %al +; x8664-sahf-NEXT: sahf +; x8664-sahf-NEXT: popq %rax %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %success = extractvalue { i32, i1 } %res, 1 diff --git a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll index 891a925611cf..a8df33454e92 100644 --- a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s ; FIXME Add -verify-machineinstrs back when PR24535 is fixed. From a026cdc11a64e7c87b44d09a53c72fdf9e2163ea Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Fri, 4 Dec 2015 23:00:54 +0000 Subject: [PATCH 098/364] CodeGen: Move the SlotIndexes BumpPtrAllocator before the list it allocates When a `SlotIndexes` is destroyed, `ileAllocator` will currently be destructed before `IndexList`, but all of `IndexList`'s storage has been allocated by `ileAllocator`. This means we'll call destructors on garbage data, which is very bad. This can be avoided by putting the BumpPtrAllocator earlier in the class than anything it allocates. Unfortunately, I don't know how to test this. It depends very much on memory layout, and the only evidence I have that this is actually happening in practice are backtraces that might be explained by this. By inspection though, the code is obviously dangerous/wrong, and this is the right thing to do. I'll follow up later with a patch that calls clearAndLeakNodesUnsafely on the list, since there isn't much point in destructing them when they're allocated in a BPA anyway, but I figured it makes sense to commit the correctness fix separately from that optimization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254794 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SlotIndexes.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index bd3a9062fb90..5dc42e20debe 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -333,6 +333,8 @@ namespace llvm { /// This pass assigns indexes to each instruction. class SlotIndexes : public MachineFunctionPass { private: + // IndexListEntry allocator. + BumpPtrAllocator ileAllocator; typedef ilist IndexList; IndexList indexList; @@ -353,9 +355,6 @@ namespace llvm { /// and MBB id. SmallVector idx2MBBMap; - // IndexListEntry allocator. - BumpPtrAllocator ileAllocator; - IndexListEntry* createEntry(MachineInstr *mi, unsigned index) { IndexListEntry *entry = static_cast( From 9ac3ec43b3eb2c18007542c1fe43d93519606c32 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 4 Dec 2015 23:06:33 +0000 Subject: [PATCH 099/364] Address a memory leak in 254760 The issue appears to have been that the copy constructor of the SmallVector was being invoked and this was somehow leading to leaked memory. This patch avoids the symptom, but likely doesn't address the underlying problem. I'm still investigating the root cause, but wanted to avoid the memory leak in the mean time. Even with the underlying fix, avoiding the redundant allocation is worthwhile. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254795 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/LegacyPassManagers.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h index af045585691b..418702c0b781 100644 --- a/include/llvm/IR/LegacyPassManagers.h +++ b/include/llvm/IR/LegacyPassManagers.h @@ -264,12 +264,15 @@ class PMTopLevelManager { // TODO: We could consider sorting the dependency arrays within the // AnalysisUsage (since they are conceptually unordered). ID.AddBoolean(AU.getPreservesAll()); - for (auto &Vec : {AU.getRequiredSet(), AU.getRequiredTransitiveSet(), - AU.getPreservedSet(), AU.getUsedSet()}) { + auto ProfileVec = [&](const SmallVectorImpl& Vec) { ID.AddInteger(Vec.size()); for(AnalysisID AID : Vec) ID.AddPointer(AID); - } + }; + ProfileVec(AU.getRequiredSet()); + ProfileVec(AU.getRequiredTransitiveSet()); + ProfileVec(AU.getPreservedSet()); + ProfileVec(AU.getUsedSet()); } }; From 16d4cc83c30485f28bb37715930c4302749d23b1 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Dec 2015 23:22:35 +0000 Subject: [PATCH 100/364] [WebAssembly] Initial varargs support. Full varargs support will depend on prologue/epilogue support, but this patch gets us started with most of the basic infrastructure. Differential Revision: http://reviews.llvm.org/D15231 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254799 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 100 +++++++++++--- .../WebAssembly/WebAssemblyISelLowering.h | 1 + .../WebAssembly/WebAssemblyInstrCall.td | 4 +- .../WebAssembly/WebAssemblyInstrInfo.cpp | 4 +- .../WebAssembly/WebAssemblyRegStackify.cpp | 4 + .../WebAssembly/WebAssemblyRegisterInfo.cpp | 9 ++ .../WebAssembly/WebAssemblyRegisterInfo.h | 4 + test/CodeGen/WebAssembly/varargs.ll | 122 ++++++++++++++++++ 8 files changed, 230 insertions(+), 18 deletions(-) create mode 100644 test/CodeGen/WebAssembly/varargs.ll diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a7eba5611134..65d2b1967b13 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -118,6 +118,13 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); setOperationAction(ISD::JumpTable, MVTPtr, Custom); + // Take the default expansion for va_arg, va_copy, and va_end. There is no + // default action for va_start, so we do that custom. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + for (auto T : {MVT::f32, MVT::f64}) { // Don't expand the floating-point types to constant pools. setOperationAction(ISD::ConstantFP, T, Legal); @@ -314,23 +321,67 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, } bool IsVarArg = CLI.IsVarArg; - if (IsVarArg) - fail(DL, DAG, "WebAssembly doesn't support varargs yet"); + unsigned NumFixedArgs = CLI.NumFixedArgs; + auto PtrVT = getPointerTy(MF.getDataLayout()); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - unsigned NumBytes = CCInfo.getNextStackOffset(); - auto PtrVT = getPointerTy(MF.getDataLayout()); - auto Zero = DAG.getConstant(0, DL, PtrVT, true); + if (IsVarArg) { + // Outgoing non-fixed arguments are placed at the top of the stack. First + // compute their offsets and the total amount of argument stack space + // needed. + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + EVT VT = Arg.getValueType(); + assert(VT != MVT::iPTR && "Legalized args should be concrete"); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Offset = + CCInfo.AllocateStack(MF.getDataLayout().getTypeAllocSize(Ty), + MF.getDataLayout().getABITypeAlignment(Ty)); + CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), + Offset, VT.getSimpleVT(), + CCValAssign::Full)); + } + } + + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); + auto NB = DAG.getConstant(NumBytes, DL, PtrVT, true); Chain = DAG.getCALLSEQ_START(Chain, NB, DL); + if (IsVarArg) { + // For non-fixed arguments, next emit stores to store the argument values + // to the stack at the offsets computed above. + SDValue SP = DAG.getCopyFromReg( + Chain, DL, getStackPointerRegisterToSaveRestore(), PtrVT); + unsigned ValNo = 0; + SmallVector Chains; + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + assert(ArgLocs[ValNo].getValNo() == ValNo && + "ArgLocs should remain in order and only hold varargs args"); + unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); + SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, SP, + DAG.getConstant(Offset, DL, PtrVT)); + Chains.push_back(DAG.getStore(Chain, DL, Arg, Add, + MachinePointerInfo::getStack(MF, Offset), + false, false, 0)); + } + if (!Chains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + // Compute the operands for the CALLn node. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); - Ops.append(OutVals.begin(), OutVals.end()); + + // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs + // isn't reliable. + Ops.append(OutVals.begin(), + IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); SmallVector Tys; for (const auto &In : Ins) { @@ -360,7 +411,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = Res.getValue(1); } - Chain = DAG.getCALLSEQ_END(Chain, NB, Zero, SDValue(), DL); + SDValue Unused = DAG.getUNDEF(PtrVT); + Chain = DAG.getCALLSEQ_END(Chain, NB, Unused, SDValue(), DL); return Chain; } @@ -374,15 +426,13 @@ bool WebAssemblyTargetLowering::CanLowerReturn( } SDValue WebAssemblyTargetLowering::LowerReturn( - SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, SDLoc DL, SelectionDAG &DAG) const { assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); - if (IsVarArg) - fail(DL, DAG, "WebAssembly doesn't support varargs yet"); SmallVector RetOps(1, Chain); RetOps.append(OutVals.begin(), OutVals.end()); @@ -392,29 +442,26 @@ SDValue WebAssemblyTargetLowering::LowerReturn( for (const ISD::OutputArg &Out : Outs) { assert(!Out.Flags.isByVal() && "byval is not valid for return values"); assert(!Out.Flags.isNest() && "nest is not valid for return values"); + assert(Out.IsFixed && "non-fixed return value is not valid"); if (Out.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); if (Out.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); if (Out.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); - if (!Out.IsFixed) - fail(DL, DAG, "WebAssembly doesn't support non-fixed results yet"); } return Chain; } SDValue WebAssemblyTargetLowering::LowerFormalArguments( - SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); - if (IsVarArg) - fail(DL, DAG, "WebAssembly doesn't support varargs yet"); // Set up the incoming ARGUMENTS value, which serves to represent the liveness // of the incoming values before they're represented by virtual registers. @@ -443,6 +490,9 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( MF.getInfo()->addParam(In.VT); } + // Incoming varargs arguments are on the stack and will be accessed through + // va_arg, so we don't need to do anything for them here. + return Chain; } @@ -464,6 +514,8 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, return LowerJumpTable(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); } } @@ -529,6 +581,24 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, return DAG.getNode(WebAssemblyISD::TABLESWITCH, DL, MVT::Other, Ops); } +SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); + + // The incoming non-fixed arguments are placed on the top of the stack, with + // natural alignment, at the point of the call, so the base pointer is just + // the current frame pointer. + DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true); + unsigned FP = + static_cast(Subtarget->getRegisterInfo()) + ->getFrameRegister(DAG.getMachineFunction()); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); +} + //===----------------------------------------------------------------------===// // WebAssembly Optimization Hooks //===----------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index af5eab671f27..b6b54bb13ea6 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -77,6 +77,7 @@ class WebAssemblyTargetLowering final : public TargetLowering { SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; }; namespace WebAssembly { diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 018d26cfacda..6b7d03da4897 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -19,8 +19,8 @@ let Defs = [ARGUMENTS] in { let isCodeGenOnly = 1 in { def ADJCALLSTACKDOWN : I<(outs), (ins i64imm:$amt), [(WebAssemblycallseq_start timm:$amt)]>; -def ADJCALLSTACKUP : I<(outs), (ins i64imm:$amt1, i64imm:$amt2), - [(WebAssemblycallseq_end timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : I<(outs), (ins i64imm:$amt), + [(WebAssemblycallseq_end timm:$amt, undef)]>; } // isCodeGenOnly = 1 multiclass CALL { diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 82296b3cdace..bd06bc396dcd 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -28,7 +28,9 @@ using namespace llvm; #include "WebAssemblyGenInstrInfo.inc" WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) - : RI(STI.getTargetTriple()) {} + : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, + WebAssembly::ADJCALLSTACKUP), + RI(STI.getTargetTriple()) {} void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index ecbbc5c72243..7abc20a8387e 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -204,6 +204,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { continue; unsigned VReg = MO.getReg(); + // Don't stackify physregs like SP or FP. + if (!TargetRegisterInfo::isVirtualRegister(VReg)) + continue; + if (MFI.isVRegStackified(VReg)) { if (MO.isDef()) Stack.push_back(VReg); diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 6c74098aff10..f87b547e3f57 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -67,3 +67,12 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const WebAssemblyFrameLowering *TFI = getFrameLowering(MF); return Regs[TFI->hasFP(MF)][TT.isArch64Bit()]; } + +const TargetRegisterClass * +WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + assert(Kind == 0 && "Only one kind of pointer on WebAssembly"); + if (MF.getSubtarget().hasAddr64()) + return &WebAssembly::I64RegClass; + return &WebAssembly::I32RegClass; +} diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h index bd1485317160..ad1d71eebf22 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -41,6 +41,10 @@ class WebAssemblyRegisterInfo final : public WebAssemblyGenRegisterInfo { // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; + + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; }; } // end namespace llvm diff --git a/test/CodeGen/WebAssembly/varargs.ll b/test/CodeGen/WebAssembly/varargs.ll new file mode 100644 index 000000000000..ccc7c1f9ce43 --- /dev/null +++ b/test/CodeGen/WebAssembly/varargs.ll @@ -0,0 +1,122 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test varargs constructs. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; Test va_start. + +; TODO: Test va_start. + +;define void @start(i8** %ap, ...) { +;entry: +; %0 = bitcast i8** %ap to i8* +; call void @llvm.va_start(i8* %0) +; ret void +;} + +; Test va_end. + +; CHECK-LABEL: end: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: return{{$}} +define void @end(i8** %ap) { +entry: + %0 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %0) + ret void +} + +; Test va_copy. + +; CHECK-LABEL: copy: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.load $push0=, $1{{$}} +; CHECK-NEXT: i32.store $discard=, $0, $pop0{{$}} +; CHECK-NEXT: return{{$}} +define void @copy(i8** %ap, i8** %bp) { +entry: + %0 = bitcast i8** %ap to i8* + %1 = bitcast i8** %bp to i8* + call void @llvm.va_copy(i8* %0, i8* %1) + ret void +} + +; Test va_arg with an i8 argument. + +; CHECK-LABEL: arg_i8: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: .local i32{{$}} +; CHECK-NEXT: i32.load $1=, $0{{$}} +; CHECK-NEXT: i32.const $push0=, 4{{$}} +; CHECK-NEXT: i32.add $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, $0, $pop1{{$}} +; CHECK-NEXT: i32.load $push2=, $1{{$}} +; CHECK-NEXT: return $pop2{{$}} +define i8 @arg_i8(i8** %ap) { +entry: + %t = va_arg i8** %ap, i8 + ret i8 %t +} + +; Test va_arg with an i32 argument. + +; CHECK-LABEL: arg_i32: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: .local i32{{$}} +; CHECK-NEXT: i32.load $push0=, $0{{$}} +; CHECK-NEXT: i32.const $push1=, 3{{$}} +; CHECK-NEXT: i32.add $push2=, $pop0, $pop1{{$}} +; CHECK-NEXT: i32.const $push3=, -4{{$}} +; CHECK-NEXT: i32.and $1=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.const $push4=, 4{{$}} +; CHECK-NEXT: i32.add $push5=, $1, $pop4{{$}} +; CHECK-NEXT: i32.store $discard=, $0, $pop5{{$}} +; CHECK-NEXT: i32.load $push6=, $1{{$}} +; CHECK-NEXT: return $pop6{{$}} +define i32 @arg_i32(i8** %ap) { +entry: + %t = va_arg i8** %ap, i32 + ret i32 %t +} + +; Test va_arg with an i128 argument. + +; CHECK-LABEL: arg_i128: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: .local +; CHECK: i32.and +; CHECK: i64.load +; CHECK: i64.load +; CHECK: return{{$}} +define i128 @arg_i128(i8** %ap) { +entry: + %t = va_arg i8** %ap, i128 + ret i128 %t +} + +; Test a varargs call with no actual arguments. + +declare void @callee(...) + +; CHECK-LABEL: caller_none: +; CHECK-NEXT: call callee{{$}} +; CHECK-NEXT: return{{$}} +define void @caller_none() { + call void (...) @callee() + ret void +} + +; TODO: Test a varargs call with actual arguments. + +;define void @caller_some() { +; call void (...) @callee(i32 0, double 2.0) +; ret void +;} + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare void @llvm.va_copy(i8*, i8*) From 2174f151dc0d0a5f615786ba30a27dc65f42aa85 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 4 Dec 2015 23:32:19 +0000 Subject: [PATCH 101/364] Add FeatureLAHFSAHF to amdfam10 as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254801 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index dc5ab1bf65d4..7fc4def15fe5 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -558,7 +558,7 @@ def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, FeatureSlowSHLD]>; def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>; def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>; From 5143703795b66599df93a9f35462e522f925c0f8 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Fri, 4 Dec 2015 23:40:22 +0000 Subject: [PATCH 102/364] [ThinLTO] Helper for performing renaming/promotion on a module Creates a module and performs necessary renaming/promotion of locals that may be exported to another module. Split out of D15024. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254802 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Linker/Linker.h | 7 +++++++ lib/Linker/LinkModules.cpp | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h index f0c8ad979ab6..aa4300942947 100644 --- a/include/llvm/Linker/Linker.h +++ b/include/llvm/Linker/Linker.h @@ -99,6 +99,13 @@ class Linker { DiagnosticHandlerFunction DiagnosticHandler; }; +/// Create a new module with exported local functions renamed and promoted +/// for ThinLTO. +std::unique_ptr +renameModuleForThinLTO(std::unique_ptr &M, + const FunctionInfoIndex *Index, + DiagnosticHandlerFunction DiagnosticHandler); + } // End llvm namespace #endif diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 88b8e443c489..627137ba3abd 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -2056,6 +2056,18 @@ bool Linker::linkModules(Module &Dest, Module &Src, return L.linkInModule(Src, Flags); } +std::unique_ptr +llvm::renameModuleForThinLTO(std::unique_ptr &M, + const FunctionInfoIndex *Index, + DiagnosticHandlerFunction DiagnosticHandler) { + std::unique_ptr RenamedModule( + new llvm::Module(M->getModuleIdentifier(), M->getContext())); + Linker L(*RenamedModule.get(), DiagnosticHandler); + if (L.linkInModule(*M.get(), llvm::Linker::Flags::None, Index)) + return nullptr; + return RenamedModule; +} + //===----------------------------------------------------------------------===// // C API. //===----------------------------------------------------------------------===// From f79b7835d821236a302090fa9eb05a3a1cc47c31 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 4 Dec 2015 23:48:19 +0000 Subject: [PATCH 103/364] [PassManager] Ensure destructors of cached AnalysisUsage objects are run In 254760, I introduced the usage of a BumpPtrAllocator for the AnalysisUsage instances held by the PassManger. This turns out to have been incorrect since a BumpPtrAllocator does not run the destructors of objects when deallocating memory. Since a few of our SmallVector's had grown beyond their small size, we end up with some leaked memory. We need to use a SpecificBumpPtrAllocator instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254803 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/LegacyPassManagers.h | 2 +- lib/IR/LegacyPassManager.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h index 418702c0b781..b8e33478d6a9 100644 --- a/include/llvm/IR/LegacyPassManagers.h +++ b/include/llvm/IR/LegacyPassManagers.h @@ -283,7 +283,7 @@ class PMTopLevelManager { // Allocator used for allocating UAFoldingSetNodes. This handles deletion of // all allocated nodes in one fell swoop. - BumpPtrAllocator AUFoldingSetNodeAllocator; + SpecificBumpPtrAllocator AUFoldingSetNodeAllocator; // Maps from a pass to it's associated entry in UniqueAnalysisUsages. Does // not own the storage associated with either key or value.. diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 08e8906e88db..f2e0c7d32c02 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -589,7 +589,7 @@ AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { if (auto *N = UniqueAnalysisUsages.FindNodeOrInsertPos(ID, IP)) Node = N; else { - Node = new (AUFoldingSetNodeAllocator) AUFoldingSetNode(AU); + Node = new (AUFoldingSetNodeAllocator.Allocate()) AUFoldingSetNode(AU); UniqueAnalysisUsages.InsertNode(Node, IP); } assert(Node && "cached analysis usage must be non null"); From 357108cbea3d64a0f31b931121d4a381ee6c1a3b Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 5 Dec 2015 00:06:37 +0000 Subject: [PATCH 104/364] [opt] Fix sanitizer complaints about r254774 `Out` can be null if no output is requested, so move any access to it inside the conditional. Thanks to Justin Bogner for finding this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254804 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/opt/opt.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index c1510a7fb259..fc31beb48154 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -595,14 +595,16 @@ int main(int argc, char **argv) { SmallVector Buffer; SmallVector CompileTwiceBuffer; std::unique_ptr BOS; - raw_ostream *OS = &Out->os(); - if (RunTwice) { - BOS = make_unique(Buffer); - OS = BOS.get(); - } + raw_ostream *OS = nullptr; // Write bitcode or assembly to the output as the last step... if (!NoOutput && !AnalyzeOnly) { + assert(Out); + OS = &Out->os(); + if (RunTwice) { + BOS = make_unique(Buffer); + OS = BOS.get(); + } if (OutputAssembly) Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder)); else @@ -618,6 +620,7 @@ int main(int argc, char **argv) { // If requested, run all passes again with the same pass manager to catch // bugs caused by persistent state in the passes if (RunTwice) { + assert(Out); CompileTwiceBuffer = Buffer; Buffer.clear(); std::unique_ptr M2(CloneModule(M.get())); From 3817e67f7f0807234396b92d1a8884798da2f6f8 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sat, 5 Dec 2015 00:18:33 +0000 Subject: [PATCH 105/364] [EarlyCSE] IsSimple vs IsVolatile naming clarification (NFC) When the notion of target specific memory intrinsics was introduced to EarlyCSE, the commit confused the notions of volatile and simple memory access. Since I'm about to start working on this area, cleanup the naming so that patches aren't horribly confusing. Note that the actual implementation was always bailing if the load or store wasn't simple. Reminder: - "volatile" - C++ volatile, can't remove any memory operations, but in principal unordered - "ordered" - imposes ordering constraints on other nearby memory operations - "atomic" - can't be split or sheared. In LLVM terms, all "ordered" operations are also atomic so the predicate "isAtomic" is often used. - "simple" - a load which is none of the above. These are normal loads and what most of the optimizer works with. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254805 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/TargetTransformInfo.h | 6 +++-- .../AArch64/AArch64TargetTransformInfo.cpp | 4 ++-- lib/Transforms/Scalar/EarlyCSE.cpp | 22 +++++++++---------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 98458f1c3f3b..35c756b362d6 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -42,11 +42,13 @@ class Value; /// \brief Information about a load/store intrinsic defined by the target. struct MemIntrinsicInfo { MemIntrinsicInfo() - : ReadMem(false), WriteMem(false), Vol(false), MatchingId(0), + : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0), NumMemRefs(0), PtrVal(nullptr) {} bool ReadMem; bool WriteMem; - bool Vol; + /// True only if this memory operation is non-volatile, non-atomic, and + /// unordered. (See LoadInst/StoreInst for details on each) + bool IsSimple; // Same Id is set by the target for corresponding load/store intrinsics. unsigned short MatchingId; int NumMemRefs; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 81402a854f6a..e803ef949b9d 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -538,7 +538,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::aarch64_neon_ld4: Info.ReadMem = true; Info.WriteMem = false; - Info.Vol = false; + Info.IsSimple = true; Info.NumMemRefs = 1; Info.PtrVal = Inst->getArgOperand(0); break; @@ -547,7 +547,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::aarch64_neon_st4: Info.ReadMem = false; Info.WriteMem = true; - Info.Vol = false; + Info.IsSimple = true; Info.NumMemRefs = 1; Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1); break; diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index de539d53a4f5..b055044ba6d0 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -388,8 +388,8 @@ class EarlyCSE { class ParseMemoryInst { public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) - : Load(false), Store(false), Vol(false), MayReadFromMemory(false), - MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) { + : Load(false), Store(false), IsSimple(true), MayReadFromMemory(false), + MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) { MayReadFromMemory = Inst->mayReadFromMemory(); MayWriteToMemory = Inst->mayWriteToMemory(); if (IntrinsicInst *II = dyn_cast(Inst)) { @@ -402,22 +402,22 @@ class EarlyCSE { MatchingId = Info.MatchingId; MayReadFromMemory = Info.ReadMem; MayWriteToMemory = Info.WriteMem; - Vol = Info.Vol; + IsSimple = Info.IsSimple; Ptr = Info.PtrVal; } } else if (LoadInst *LI = dyn_cast(Inst)) { Load = true; - Vol = !LI->isSimple(); + IsSimple = LI->isSimple(); Ptr = LI->getPointerOperand(); } else if (StoreInst *SI = dyn_cast(Inst)) { Store = true; - Vol = !SI->isSimple(); + IsSimple = SI->isSimple(); Ptr = SI->getPointerOperand(); } } bool isLoad() const { return Load; } bool isStore() const { return Store; } - bool isVolatile() const { return Vol; } + bool isSimple() const { return IsSimple; } bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId; } @@ -430,7 +430,7 @@ class EarlyCSE { private: bool Load; bool Store; - bool Vol; + bool IsSimple; bool MayReadFromMemory; bool MayWriteToMemory; // For regular (non-intrinsic) loads/stores, this is set to -1. For @@ -554,8 +554,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ParseMemoryInst MemInst(Inst, TTI); // If this is a non-volatile load, process it. if (MemInst.isValid() && MemInst.isLoad()) { - // Ignore volatile loads. - if (MemInst.isVolatile()) { + // Ignore volatile or ordered loads. + if (!MemInst.isSimple()) { LastStore = nullptr; // Don't CSE across synchronization boundaries. if (Inst->mayWriteToMemory()) @@ -662,8 +662,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { MemInst.getPtr(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); - // Remember that this was the last store we saw for DSE. - if (!MemInst.isVolatile()) + // Remember that this was the last normal store we saw for DSE. + if (MemInst.isSimple()) LastStore = Inst; } } From a9143d4647d9eca6d9ed32e76a56368ed4935969 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Sat, 5 Dec 2015 00:26:39 +0000 Subject: [PATCH 106/364] [WebAssembly] Support constant offsets on loads and stores This is just prototype for load/store for i32 types. I'll add them to the rest of the types if we like this direction. Differential Revision: http://reviews.llvm.org/D15197 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254807 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyInstrMemory.td | 159 ++++++++++-------- .../WebAssembly/WebAssemblyStoreResults.cpp | 2 +- test/CodeGen/WebAssembly/cfg-stackify.ll | 2 +- test/CodeGen/WebAssembly/global.ll | 2 +- test/CodeGen/WebAssembly/load-ext.ll | 20 +-- test/CodeGen/WebAssembly/load-store-i1.ll | 12 +- test/CodeGen/WebAssembly/load.ll | 8 +- test/CodeGen/WebAssembly/store-results.ll | 6 +- test/CodeGen/WebAssembly/store-trunc.ll | 10 +- test/CodeGen/WebAssembly/store.ll | 8 +- test/CodeGen/WebAssembly/varargs.ll | 16 +- 11 files changed, 128 insertions(+), 117 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 700a196fa29c..fbb3df2f7b82 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -25,59 +25,63 @@ let Defs = [ARGUMENTS] in { // Basic load. -def LOAD_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (load I32:$addr))], - "i32.load\t$dst, $addr">; -def LOAD_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (load I32:$addr))], - "i64.load\t$dst, $addr">; -def LOAD_F32 : I<(outs F32:$dst), (ins I32:$addr), - [(set F32:$dst, (load I32:$addr))], - "f32.load\t$dst, $addr">; -def LOAD_F64 : I<(outs F64:$dst), (ins I32:$addr), - [(set F64:$dst, (load I32:$addr))], - "f64.load\t$dst, $addr">; +def LOAD_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load\t$dst, $off($addr)">; +def LOAD_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load\t$dst, $off($addr)">; +def LOAD_F32 : I<(outs F32:$dst), (ins I32:$off, I32:$addr), [], + "f32.load\t$dst, $off($addr)">; +def LOAD_F64 : I<(outs F64:$dst), (ins I32:$off, I32:$addr), [], + "f64.load\t$dst, $off($addr)">; // Extending load. -def LOAD8_S_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (sextloadi8 I32:$addr))], - "i32.load8_s\t$dst, $addr">; -def LOAD8_U_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (zextloadi8 I32:$addr))], - "i32.load8_u\t$dst, $addr">; -def LOAD16_S_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (sextloadi16 I32:$addr))], - "i32.load16_s\t$dst, $addr">; -def LOAD16_U_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (zextloadi16 I32:$addr))], - "i32.load16_u\t$dst, $addr">; -def LOAD8_S_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (sextloadi8 I32:$addr))], - "i64.load8_s\t$dst, $addr">; -def LOAD8_U_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (zextloadi8 I32:$addr))], - "i64.load8_u\t$dst, $addr">; -def LOAD16_S_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (sextloadi16 I32:$addr))], - "i64.load16_s\t$dst, $addr">; -def LOAD16_U_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (zextloadi16 I32:$addr))], - "i64.load16_u\t$dst, $addr">; -def LOAD32_S_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (sextloadi32 I32:$addr))], - "i64.load32_s\t$dst, $addr">; -def LOAD32_U_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (zextloadi32 I32:$addr))], - "i64.load32_u\t$dst, $addr">; +def LOAD8_S_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load8_s\t$dst, $off($addr)">; +def LOAD8_U_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load8_u\t$dst, $off($addr)">; +def LOAD16_S_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load16_s\t$dst, $off($addr)">; +def LOAD16_U_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load16_u\t$dst, $off($addr)">; +def LOAD8_S_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load8_s\t$dst, $off($addr)">; +def LOAD8_U_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load8_u\t$dst, $off($addr)">; +def LOAD16_S_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load16_s\t$dst, $off($addr)">; +def LOAD16_U_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load16_u\t$dst, $off($addr)">; +def LOAD32_S_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load32_s\t$dst, $off($addr)">; +def LOAD32_U_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load32_u\t$dst, $off($addr)">; } // Defs = [ARGUMENTS] +// Select loads with no constant offset. +def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; +def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; +def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; +def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; + +// Select extending loads with no constant offset. +def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>; +def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; +def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>; +def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; +def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; +def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; +def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; + // "Don't care" extending load become zero-extending load. -def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 $addr)>; -def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 $addr)>; -def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 $addr)>; -def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 $addr)>; -def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 $addr)>; +def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; +def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; +def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; +def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; +def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; let Defs = [ARGUMENTS] in { @@ -87,48 +91,55 @@ let Defs = [ARGUMENTS] in { // instruction definition patterns that don't reference all of the output // operands. // Note: WebAssembly inverts SelectionDAG's usual operand order. -def STORE_I32 : I<(outs I32:$dst), (ins I32:$addr, I32:$val), [], - "i32.store\t$dst, $addr, $val">; -def STORE_I64 : I<(outs I64:$dst), (ins I32:$addr, I64:$val), [], - "i64.store\t$dst, $addr, $val">; -def STORE_F32 : I<(outs F32:$dst), (ins I32:$addr, F32:$val), [], - "f32.store\t$dst, $addr, $val">; -def STORE_F64 : I<(outs F64:$dst), (ins I32:$addr, F64:$val), [], - "f64.store\t$dst, $addr, $val">; +def STORE_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr, I32:$val), [], + "i32.store\t$dst, $off($addr), $val">; +def STORE_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr, I64:$val), [], + "i64.store\t$dst, $off($addr), $val">; +def STORE_F32 : I<(outs F32:$dst), (ins I32:$off, I32:$addr, F32:$val), [], + "f32.store\t$dst, $off($addr), $val">; +def STORE_F64 : I<(outs F64:$dst), (ins I32:$off, I32:$addr, F64:$val), [], + "f64.store\t$dst, $off($addr), $val">; } // Defs = [ARGUMENTS] -def : Pat<(store I32:$val, I32:$addr), (STORE_I32 I32:$addr, I32:$val)>; -def : Pat<(store I64:$val, I32:$addr), (STORE_I64 I32:$addr, I64:$val)>; -def : Pat<(store F32:$val, I32:$addr), (STORE_F32 I32:$addr, F32:$val)>; -def : Pat<(store F64:$val, I32:$addr), (STORE_F64 I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; + +// FIXME: This pattern matches an immediate to actually use the offset field +// in the store instruction; however only unsigned offsets are supported in +// wasm, so we need to constrain the immediate we match. This may require +// custom code rather than a simple pattern. +// def : Pat<(store I32:$val, (add I32:$addr, (i32 imm:$off))), +// (STORE_I32 imm:$off, I32:$addr, I32:$val)>; let Defs = [ARGUMENTS] in { // Truncating store. -def STORE8_I32 : I<(outs I32:$dst), (ins I32:$addr, I32:$val), [], - "i32.store8\t$dst, $addr, $val">; -def STORE16_I32 : I<(outs I32:$dst), (ins I32:$addr, I32:$val), [], - "i32.store16\t$dst, $addr, $val">; -def STORE8_I64 : I<(outs I64:$dst), (ins I32:$addr, I64:$val), [], - "i64.store8\t$dst, $addr, $val">; -def STORE16_I64 : I<(outs I64:$dst), (ins I32:$addr, I64:$val), [], - "i64.store16\t$dst, $addr, $val">; -def STORE32_I64 : I<(outs I64:$dst), (ins I32:$addr, I64:$val), [], - "i64.store32\t$dst, $addr, $val">; +def STORE8_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr, I32:$val), [], + "i32.store8\t$dst, $off($addr), $val">; +def STORE16_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr, I32:$val), [], + "i32.store16\t$dst, $off($addr), $val">; +def STORE8_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr, I64:$val), [], + "i64.store8\t$dst, $off($addr), $val">; +def STORE16_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr, I64:$val), [], + "i64.store16\t$dst, $off($addr), $val">; +def STORE32_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr, I64:$val), [], + "i64.store32\t$dst, $off($addr), $val">; } // Defs = [ARGUMENTS] def : Pat<(truncstorei8 I32:$val, I32:$addr), - (STORE8_I32 I32:$addr, I32:$val)>; + (STORE8_I32 0, I32:$addr, I32:$val)>; def : Pat<(truncstorei16 I32:$val, I32:$addr), - (STORE16_I32 I32:$addr, I32:$val)>; + (STORE16_I32 0, I32:$addr, I32:$val)>; def : Pat<(truncstorei8 I64:$val, I32:$addr), - (STORE8_I64 I32:$addr, I64:$val)>; + (STORE8_I64 0, I32:$addr, I64:$val)>; def : Pat<(truncstorei16 I64:$val, I32:$addr), - (STORE16_I64 I32:$addr, I64:$val)>; + (STORE16_I64 0, I32:$addr, I64:$val)>; def : Pat<(truncstorei32 I64:$val, I32:$addr), - (STORE32_I64 I32:$addr, I64:$val)>; + (STORE32_I64 0, I32:$addr, I64:$val)>; let Defs = [ARGUMENTS] in { diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index 4a8fc09878c4..b67453bee708 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -88,7 +88,7 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::STORE_I32: case WebAssembly::STORE_I64: unsigned ToReg = MI.getOperand(0).getReg(); - unsigned FromReg = MI.getOperand(2).getReg(); + unsigned FromReg = MI.getOperand(3).getReg(); for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { MachineOperand &O = *I++; MachineInstr *Where = O.getParent(); diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll index 4a53f6e4bb93..b8ac48bf49dc 100644 --- a/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -184,7 +184,7 @@ entry: ; CHECK-LABEL: minimal_loop: ; CHECK-NOT: br ; CHECK: BB7_1: -; CHECK: i32.store $discard=, $0, $pop{{[0-9]+}}{{$}} +; CHECK: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}} ; CHECK: br BB7_1{{$}} ; CHECK: BB7_2: define i32 @minimal_loop(i32* %p) { diff --git a/test/CodeGen/WebAssembly/global.ll b/test/CodeGen/WebAssembly/global.ll index 818c454a4914..ffc73e3c1e35 100644 --- a/test/CodeGen/WebAssembly/global.ll +++ b/test/CodeGen/WebAssembly/global.ll @@ -11,7 +11,7 @@ target triple = "wasm32-unknown-unknown" ; CHECK: foo: ; CHECK: i32.const $push0=, answer{{$}} -; CHECK-NEXT: i32.load $push1=, $pop0{{$}} +; CHECK-NEXT: i32.load $push1=, 0($pop0){{$}} ; CHECK-NEXT: return $pop1{{$}} define i32 @foo() { %a = load i32, i32* @answer diff --git a/test/CodeGen/WebAssembly/load-ext.ll b/test/CodeGen/WebAssembly/load-ext.ll index bdccfff1d161..0ffcd38a8666 100644 --- a/test/CodeGen/WebAssembly/load-ext.ll +++ b/test/CodeGen/WebAssembly/load-ext.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: sext_i8_i32: -; CHECK: i32.load8_s $push0=, $0{{$}} +; CHECK: i32.load8_s $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i32 @sext_i8_i32(i8 *%p) { %v = load i8, i8* %p @@ -15,7 +15,7 @@ define i32 @sext_i8_i32(i8 *%p) { } ; CHECK-LABEL: zext_i8_i32: -; CHECK: i32.load8_u $push0=, $0{{$}} +; CHECK: i32.load8_u $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i32 @zext_i8_i32(i8 *%p) { %v = load i8, i8* %p @@ -24,7 +24,7 @@ define i32 @zext_i8_i32(i8 *%p) { } ; CHECK-LABEL: sext_i16_i32: -; CHECK: i32.load16_s $push0=, $0{{$}} +; CHECK: i32.load16_s $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i32 @sext_i16_i32(i16 *%p) { %v = load i16, i16* %p @@ -33,7 +33,7 @@ define i32 @sext_i16_i32(i16 *%p) { } ; CHECK-LABEL: zext_i16_i32: -; CHECK: i32.load16_u $push0=, $0{{$}} +; CHECK: i32.load16_u $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i32 @zext_i16_i32(i16 *%p) { %v = load i16, i16* %p @@ -42,7 +42,7 @@ define i32 @zext_i16_i32(i16 *%p) { } ; CHECK-LABEL: sext_i8_i64: -; CHECK: i64.load8_s $push0=, $0{{$}} +; CHECK: i64.load8_s $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i64 @sext_i8_i64(i8 *%p) { %v = load i8, i8* %p @@ -51,7 +51,7 @@ define i64 @sext_i8_i64(i8 *%p) { } ; CHECK-LABEL: zext_i8_i64: -; CHECK: i64.load8_u $push0=, $0{{$}} +; CHECK: i64.load8_u $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i64 @zext_i8_i64(i8 *%p) { %v = load i8, i8* %p @@ -60,7 +60,7 @@ define i64 @zext_i8_i64(i8 *%p) { } ; CHECK-LABEL: sext_i16_i64: -; CHECK: i64.load16_s $push0=, $0{{$}} +; CHECK: i64.load16_s $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i64 @sext_i16_i64(i16 *%p) { %v = load i16, i16* %p @@ -69,7 +69,7 @@ define i64 @sext_i16_i64(i16 *%p) { } ; CHECK-LABEL: zext_i16_i64: -; CHECK: i64.load16_u $push0=, $0{{$}} +; CHECK: i64.load16_u $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i64 @zext_i16_i64(i16 *%p) { %v = load i16, i16* %p @@ -78,7 +78,7 @@ define i64 @zext_i16_i64(i16 *%p) { } ; CHECK-LABEL: sext_i32_i64: -; CHECK: i64.load32_s $push0=, $0{{$}} +; CHECK: i64.load32_s $push0=, 0($0){{$}} ; CHECK-NEXT: return $pop0{{$}} define i64 @sext_i32_i64(i32 *%p) { %v = load i32, i32* %p @@ -87,7 +87,7 @@ define i64 @sext_i32_i64(i32 *%p) { } ; CHECK-LABEL: zext_i32_i64: -; CHECK: i64.load32_u $push0=, $0{{$}} +; CHECK: i64.load32_u $push0=, 0($0){{$}} ; CHECK: return $pop0{{$}} define i64 @zext_i32_i64(i32 *%p) { %v = load i32, i32* %p diff --git a/test/CodeGen/WebAssembly/load-store-i1.ll b/test/CodeGen/WebAssembly/load-store-i1.ll index 33d3aeecc582..1acdfc0dbdeb 100644 --- a/test/CodeGen/WebAssembly/load-store-i1.ll +++ b/test/CodeGen/WebAssembly/load-store-i1.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: load_u_i1_i32: -; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}} +; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[NUM0]]{{$}} define i32 @load_u_i1_i32(i1* %p) { %v = load i1, i1* %p @@ -15,7 +15,7 @@ define i32 @load_u_i1_i32(i1* %p) { } ; CHECK-LABEL: load_s_i1_i32: -; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}} +; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: i32.const $[[NUM1:[0-9]+]]=, 31{{$}} ; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}} ; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}} @@ -27,7 +27,7 @@ define i32 @load_s_i1_i32(i1* %p) { } ; CHECK-LABEL: load_u_i1_i64: -; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}} +; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[NUM0]]{{$}} define i64 @load_u_i1_i64(i1* %p) { %v = load i1, i1* %p @@ -36,7 +36,7 @@ define i64 @load_u_i1_i64(i1* %p) { } ; CHECK-LABEL: load_s_i1_i64: -; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}} +; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: i64.const $[[NUM1:[0-9]+]]=, 63{{$}} ; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}} ; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}} @@ -50,7 +50,7 @@ define i64 @load_s_i1_i64(i1* %p) { ; CHECK-LABEL: store_i32_i1: ; CHECK: i32.const $push[[NUM0:[0-9]+]]=, 1{{$}} ; CHECK-NEXT: i32.and $push[[NUM1:[0-9]+]]=, $1, $pop[[NUM0]]{{$}} -; CHECK-NEXT: i32.store8 $discard=, $0, $pop[[NUM1]]{{$}} +; CHECK-NEXT: i32.store8 $discard=, 0($0), $pop[[NUM1]]{{$}} define void @store_i32_i1(i1* %p, i32 %v) { %t = trunc i32 %v to i1 store i1 %t, i1* %p @@ -60,7 +60,7 @@ define void @store_i32_i1(i1* %p, i32 %v) { ; CHECK-LABEL: store_i64_i1: ; CHECK: i64.const $push[[NUM0:[0-9]+]]=, 1{{$}} ; CHECK-NEXT: i64.and $push[[NUM1:[0-9]+]]=, $1, $pop[[NUM0]]{{$}} -; CHECK-NEXT: i64.store8 $discard=, $0, $pop[[NUM1]]{{$}} +; CHECK-NEXT: i64.store8 $discard=, 0($0), $pop[[NUM1]]{{$}} define void @store_i64_i1(i1* %p, i64 %v) { %t = trunc i64 %v to i1 store i1 %t, i1* %p diff --git a/test/CodeGen/WebAssembly/load.ll b/test/CodeGen/WebAssembly/load.ll index 1017167d5227..aa8ae689e0d1 100644 --- a/test/CodeGen/WebAssembly/load.ll +++ b/test/CodeGen/WebAssembly/load.ll @@ -8,7 +8,7 @@ target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: ldi32: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result i32{{$}} -; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @ldi32(i32 *%p) { %v = load i32, i32* %p @@ -18,7 +18,7 @@ define i32 @ldi32(i32 *%p) { ; CHECK-LABEL: ldi64: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result i64{{$}} -; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} define i64 @ldi64(i64 *%p) { %v = load i64, i64* %p @@ -28,7 +28,7 @@ define i64 @ldi64(i64 *%p) { ; CHECK-LABEL: ldf32: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result f32{{$}} -; CHECK-NEXT: f32.load $push[[NUM:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: f32.load $push[[NUM:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} define float @ldf32(float *%p) { %v = load float, float* %p @@ -38,7 +38,7 @@ define float @ldf32(float *%p) { ; CHECK-LABEL: ldf64: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result f64{{$}} -; CHECK-NEXT: f64.load $push[[NUM:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: f64.load $push[[NUM:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} define double @ldf64(double *%p) { %v = load double, double* %p diff --git a/test/CodeGen/WebAssembly/store-results.ll b/test/CodeGen/WebAssembly/store-results.ll index c05ed3a04be3..5bb7eafa4b1a 100644 --- a/test/CodeGen/WebAssembly/store-results.ll +++ b/test/CodeGen/WebAssembly/store-results.ll @@ -9,7 +9,7 @@ target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: single_block: ; CHECK-NOT: .local ; CHECK: i32.const $push{{[0-9]+}}=, 0 -; CHECK: i32.store $push[[STORE:[0-9]+]]=, $0, $pop{{[0-9]+}} +; CHECK: i32.store $push[[STORE:[0-9]+]]=, 0($0), $pop{{[0-9]+}} ; CHECK: return $pop[[STORE]]{{$}} define i32 @single_block(i32* %p) { entry: @@ -26,7 +26,7 @@ entry: @pos = global %class.Vec3 zeroinitializer, align 4 ; CHECK-LABEL: foo: -; CHECK: i32.store $discard=, $pop0, $0 +; CHECK: i32.store $discard=, 0($pop0), $0 define void @foo() { for.body.i: br label %for.body5.i @@ -44,7 +44,7 @@ for.cond.cleanup4.i: } ; CHECK-LABEL: bar: -; CHECK: i32.store $discard=, $0, $pop0 +; CHECK: i32.store $discard=, 0($0), $pop0 define void @bar() { for.body.i: br label %for.body5.i diff --git a/test/CodeGen/WebAssembly/store-trunc.ll b/test/CodeGen/WebAssembly/store-trunc.ll index e3587a5ff170..c12b716dfd59 100644 --- a/test/CodeGen/WebAssembly/store-trunc.ll +++ b/test/CodeGen/WebAssembly/store-trunc.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: trunc_i8_i32: -; CHECK: i32.store8 $discard=, $0, $1{{$}} +; CHECK: i32.store8 $discard=, 0($0), $1{{$}} define void @trunc_i8_i32(i8 *%p, i32 %v) { %t = trunc i32 %v to i8 store i8 %t, i8* %p @@ -14,7 +14,7 @@ define void @trunc_i8_i32(i8 *%p, i32 %v) { } ; CHECK-LABEL: trunc_i16_i32: -; CHECK: i32.store16 $discard=, $0, $1{{$}} +; CHECK: i32.store16 $discard=, 0($0), $1{{$}} define void @trunc_i16_i32(i16 *%p, i32 %v) { %t = trunc i32 %v to i16 store i16 %t, i16* %p @@ -22,7 +22,7 @@ define void @trunc_i16_i32(i16 *%p, i32 %v) { } ; CHECK-LABEL: trunc_i8_i64: -; CHECK: i64.store8 $discard=, $0, $1{{$}} +; CHECK: i64.store8 $discard=, 0($0), $1{{$}} define void @trunc_i8_i64(i8 *%p, i64 %v) { %t = trunc i64 %v to i8 store i8 %t, i8* %p @@ -30,7 +30,7 @@ define void @trunc_i8_i64(i8 *%p, i64 %v) { } ; CHECK-LABEL: trunc_i16_i64: -; CHECK: i64.store16 $discard=, $0, $1{{$}} +; CHECK: i64.store16 $discard=, 0($0), $1{{$}} define void @trunc_i16_i64(i16 *%p, i64 %v) { %t = trunc i64 %v to i16 store i16 %t, i16* %p @@ -38,7 +38,7 @@ define void @trunc_i16_i64(i16 *%p, i64 %v) { } ; CHECK-LABEL: trunc_i32_i64: -; CHECK: i64.store32 $discard=, $0, $1{{$}} +; CHECK: i64.store32 $discard=, 0($0), $1{{$}} define void @trunc_i32_i64(i32 *%p, i64 %v) { %t = trunc i64 %v to i32 store i32 %t, i32* %p diff --git a/test/CodeGen/WebAssembly/store.ll b/test/CodeGen/WebAssembly/store.ll index a2164d4ae6a9..442caedef3a7 100644 --- a/test/CodeGen/WebAssembly/store.ll +++ b/test/CodeGen/WebAssembly/store.ll @@ -7,7 +7,7 @@ target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: sti32: ; CHECK-NEXT: .param i32, i32{{$}} -; CHECK-NEXT: i32.store $discard=, $0, $1{{$}} +; CHECK-NEXT: i32.store $discard=, 0($0), $1{{$}} ; CHECK-NEXT: return{{$}} define void @sti32(i32 *%p, i32 %v) { store i32 %v, i32* %p @@ -16,7 +16,7 @@ define void @sti32(i32 *%p, i32 %v) { ; CHECK-LABEL: sti64: ; CHECK-NEXT: .param i32, i64{{$}} -; CHECK-NEXT: i64.store $discard=, $0, $1{{$}} +; CHECK-NEXT: i64.store $discard=, 0($0), $1{{$}} ; CHECK-NEXT: return{{$}} define void @sti64(i64 *%p, i64 %v) { store i64 %v, i64* %p @@ -25,7 +25,7 @@ define void @sti64(i64 *%p, i64 %v) { ; CHECK-LABEL: stf32: ; CHECK-NEXT: .param i32, f32{{$}} -; CHECK-NEXT: f32.store $discard=, $0, $1{{$}} +; CHECK-NEXT: f32.store $discard=, 0($0), $1{{$}} ; CHECK-NEXT: return{{$}} define void @stf32(float *%p, float %v) { store float %v, float* %p @@ -34,7 +34,7 @@ define void @stf32(float *%p, float %v) { ; CHECK-LABEL: stf64: ; CHECK-NEXT: .param i32, f64{{$}} -; CHECK-NEXT: f64.store $discard=, $0, $1{{$}} +; CHECK-NEXT: f64.store $discard=, 0($0), $1{{$}} ; CHECK-NEXT: return{{$}} define void @stf64(double *%p, double %v) { store double %v, double* %p diff --git a/test/CodeGen/WebAssembly/varargs.ll b/test/CodeGen/WebAssembly/varargs.ll index ccc7c1f9ce43..bda0dd779e65 100644 --- a/test/CodeGen/WebAssembly/varargs.ll +++ b/test/CodeGen/WebAssembly/varargs.ll @@ -32,8 +32,8 @@ entry: ; CHECK-LABEL: copy: ; CHECK-NEXT: .param i32, i32{{$}} -; CHECK-NEXT: i32.load $push0=, $1{{$}} -; CHECK-NEXT: i32.store $discard=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push0=, 0($1){{$}} +; CHECK-NEXT: i32.store $discard=, 0($0), $pop0{{$}} ; CHECK-NEXT: return{{$}} define void @copy(i8** %ap, i8** %bp) { entry: @@ -49,11 +49,11 @@ entry: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result i32{{$}} ; CHECK-NEXT: .local i32{{$}} -; CHECK-NEXT: i32.load $1=, $0{{$}} +; CHECK-NEXT: i32.load $1=, 0($0){{$}} ; CHECK-NEXT: i32.const $push0=, 4{{$}} ; CHECK-NEXT: i32.add $push1=, $1, $pop0{{$}} -; CHECK-NEXT: i32.store $discard=, $0, $pop1{{$}} -; CHECK-NEXT: i32.load $push2=, $1{{$}} +; CHECK-NEXT: i32.store $discard=, 0($0), $pop1{{$}} +; CHECK-NEXT: i32.load $push2=, 0($1){{$}} ; CHECK-NEXT: return $pop2{{$}} define i8 @arg_i8(i8** %ap) { entry: @@ -67,15 +67,15 @@ entry: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result i32{{$}} ; CHECK-NEXT: .local i32{{$}} -; CHECK-NEXT: i32.load $push0=, $0{{$}} +; CHECK-NEXT: i32.load $push0=, 0($0){{$}} ; CHECK-NEXT: i32.const $push1=, 3{{$}} ; CHECK-NEXT: i32.add $push2=, $pop0, $pop1{{$}} ; CHECK-NEXT: i32.const $push3=, -4{{$}} ; CHECK-NEXT: i32.and $1=, $pop2, $pop3{{$}} ; CHECK-NEXT: i32.const $push4=, 4{{$}} ; CHECK-NEXT: i32.add $push5=, $1, $pop4{{$}} -; CHECK-NEXT: i32.store $discard=, $0, $pop5{{$}} -; CHECK-NEXT: i32.load $push6=, $1{{$}} +; CHECK-NEXT: i32.store $discard=, 0($0), $pop5{{$}} +; CHECK-NEXT: i32.load $push6=, 0($1){{$}} ; CHECK-NEXT: return $pop6{{$}} define i32 @arg_i32(i8** %ap) { entry: From 83eafdfffcd7fc709e4738213c3355d9b2d933b9 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Sat, 5 Dec 2015 00:39:14 +0000 Subject: [PATCH 107/364] CodeGen: Let the BumpPtrAllocator free the elements of indexList The indexList's nodes are all allocated on a BumpPtrAllocator, so it's more efficient to let them be freed when it goes away, rather than deleting them directly. This is a follow up to r254794. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254808 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SlotIndexes.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 5dc42e20debe..7b621bee259f 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -376,6 +376,11 @@ namespace llvm { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); } + ~SlotIndexes() { + // The indexList's nodes are all allocated in the BumpPtrAllocator. + indexList.clearAndLeakNodesUnsafely(); + } + void getAnalysisUsage(AnalysisUsage &au) const override; void releaseMemory() override; From ca526959c1fc8d8172b780d916cdeb3f03a33836 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 00:51:40 +0000 Subject: [PATCH 108/364] [WebAssembly] Fix scheduling dependencies in register-stackified code Add physical register defs to instructions used from stackified instructions to prevent them from being scheduled into the middle of a stack sequence. This is a conservative measure which may be loosened in the future. Differential Revision: http://reviews.llvm.org/D15252 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254811 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyRegStackify.cpp | 51 +++++++++++++++---- test/CodeGen/WebAssembly/load-store-i1.ll | 8 +-- test/CodeGen/WebAssembly/reg-stackify.ll | 28 ++++++++++ 3 files changed, 74 insertions(+), 13 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 7abc20a8387e..ac016a7b9b0a 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -61,15 +61,41 @@ FunctionPass *llvm::createWebAssemblyRegStackify() { } // Decorate the given instruction with implicit operands that enforce the -// expression stack ordering constraints. -static void ImposeStackOrdering(MachineInstr *MI) { - // Read and write the opaque EXPR_STACK register. - MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, - /*isDef=*/true, - /*isImp=*/true)); +// expression stack ordering constraints needed for an instruction which is +// consumed by an instruction using the expression stack. +static void ImposeStackInputOrdering(MachineInstr *MI) { + // Write the opaque EXPR_STACK register. + if (!MI->definesRegister(WebAssembly::EXPR_STACK)) + MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, + /*isDef=*/true, + /*isImp=*/true)); +} + +// Decorate the given instruction with implicit operands that enforce the +// expression stack ordering constraints for an instruction which is on +// the expression stack. +static void ImposeStackOrdering(MachineInstr *MI, MachineRegisterInfo &MRI) { + ImposeStackInputOrdering(MI); + + // Also read the opaque EXPR_STACK register. MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, /*isDef=*/false, /*isImp=*/true)); + + // Also, mark any inputs to this instruction as being consumed by an + // instruction on the expression stack. + // TODO: Find a lighter way to describe the appropriate constraints. + for (MachineOperand &MO : MI->uses()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineInstr *Def = MRI.getVRegDef(Reg); + if (Def->getOpcode() == TargetOpcode::PHI) + continue; + ImposeStackInputOrdering(Def); + } } // Test whether it's safe to move Def to just before Insert. Note that this @@ -126,8 +152,15 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { continue; unsigned Reg = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // An instruction with a physical register. Conservatively mark it as + // an expression stack input so that it isn't reordered with anything + // in an expression stack which might use it (physical registers + // aren't in SSA form so it's not trivial to determine this). + // TODO: Be less conservative. + ImposeStackInputOrdering(Insert); continue; + } // Only consider registers with a single definition. // TODO: Eventually we may relax this, to stackify phi transfers. @@ -178,11 +211,11 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { MBB.insert(MachineBasicBlock::instr_iterator(Insert), Def->removeFromParent()); MFI.stackifyVReg(Reg); - ImposeStackOrdering(Def); + ImposeStackOrdering(Def, MRI); Insert = Def; } if (AnyStackified) - ImposeStackOrdering(&MI); + ImposeStackOrdering(&MI, MRI); } } diff --git a/test/CodeGen/WebAssembly/load-store-i1.ll b/test/CodeGen/WebAssembly/load-store-i1.ll index 1acdfc0dbdeb..37b514729479 100644 --- a/test/CodeGen/WebAssembly/load-store-i1.ll +++ b/test/CodeGen/WebAssembly/load-store-i1.ll @@ -15,8 +15,8 @@ define i32 @load_u_i1_i32(i1* %p) { } ; CHECK-LABEL: load_s_i1_i32: -; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: i32.const $[[NUM1:[0-9]+]]=, 31{{$}} +; CHECK: i32.const $[[NUM1:[0-9]+]]=, 31{{$}} +; CHECK-NEXT: i32.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}} ; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}} ; CHECK-NEXT: return $pop[[NUM3]]{{$}} @@ -36,8 +36,8 @@ define i64 @load_u_i1_i64(i1* %p) { } ; CHECK-LABEL: load_s_i1_i64: -; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: i64.const $[[NUM1:[0-9]+]]=, 63{{$}} +; CHECK: i64.const $[[NUM1:[0-9]+]]=, 63{{$}} +; CHECK-NEXT: i64.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}} ; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}} ; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}} ; CHECK-NEXT: return $pop[[NUM3]]{{$}} diff --git a/test/CodeGen/WebAssembly/reg-stackify.ll b/test/CodeGen/WebAssembly/reg-stackify.ll index f3000aab70ad..180d70e2e4a7 100644 --- a/test/CodeGen/WebAssembly/reg-stackify.ll +++ b/test/CodeGen/WebAssembly/reg-stackify.ll @@ -44,4 +44,32 @@ define i32 @yes1(i32* %q) { ret i32 %t } +; Don't schedule stack uses into the stack. To reduce register pressure, the +; scheduler might be tempted to move the definition of $2 down. However, this +; would risk getting incorrect liveness if the instructions are later +; rearranged to make the stack contiguous. + +; CHECK-LABEL: stack_uses: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: local i32, i32{{$}} +; CHECK-NEXT: i32.const $1=, 1{{$}} +; CHECK-NEXT: i32.const $2=, 0{{$}} +; CHECK-NEXT: i32.and $push0=, $0, $1{{$}} +; CHECK-NEXT: i32.eq $push1=, $pop0, $2{{$}} +; CHECK-NEXT: block BB4_2{{$}} +; CHECK-NEXT: br_if $pop1, BB4_2{{$}} +; CHECK-NEXT: return $2{{$}} +; CHECK-NEXT: BB4_2:{{$}} +; CHECK-NEXT: return $1{{$}} +define i32 @stack_uses(i32 %x) { +entry: + %c = trunc i32 %x to i1 + br i1 %c, label %true, label %false +true: + ret i32 0 +false: + ret i32 1 +} + !0 = !{} From c5cf58b8a73357b0c4309f1e8dd10ba0f72accd2 Mon Sep 17 00:00:00 2001 From: Cong Hou Date: Sat, 5 Dec 2015 01:00:22 +0000 Subject: [PATCH 109/364] Fix a typo in LoopVectorize.cpp. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254813 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index c5b8b5b073d6..917f2d55f6cb 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5188,7 +5188,7 @@ LoopVectorizationCostModel::calculateRegisterUsage( continue; } - // Count the number of live intevals. + // Count the number of live intervals. unsigned RegUsage = 0; for (auto Inst : OpenIntervals) RegUsage += GetRegUsage(Inst->getType(), VFs[j]); From 517455ae01552736121c387b27362e2f0244de7e Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 5 Dec 2015 01:02:53 +0000 Subject: [PATCH 110/364] [MC] Add a test for state reset in MCMachOStreamer This was fixed in r254751, but untestable until r254774, which added the necessary command line flag to llc. Add a test now to make sure this doesn't regress again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254814 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/MachO/empty-twice.ll | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 test/MC/MachO/empty-twice.ll diff --git a/test/MC/MachO/empty-twice.ll b/test/MC/MachO/empty-twice.ll new file mode 100644 index 000000000000..6914c73a58d1 --- /dev/null +++ b/test/MC/MachO/empty-twice.ll @@ -0,0 +1,12 @@ +; Check that there is no persistent state in the MachO emitter that crashes +; us when reusing the pass manager. +; RUN: llc -mtriple=x86_64-apple-darwin -compile-twice -filetype=obj %s -o - + +; Force the creation of a DWARF section +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: true) +!1 = !DIFile(filename: "", directory: "/") +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} From 3687382a3485127a701ef31dabed4a32cf231276 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 5 Dec 2015 01:38:12 +0000 Subject: [PATCH 111/364] [opt] Fix run-twice option for non-idempotent passes Cloning the module was supposed to guard against the possibility that the passes may be non-idempotent. However, for some reason I decided to put that AFTER the passes had already run on the module, defeating the point entirely. Fix that by moving up the CloneModule as is done in llc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254819 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/opt/opt.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index fc31beb48154..fe1605aa8436 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -614,22 +614,26 @@ int main(int argc, char **argv) { // Before executing passes, print the final values of the LLVM options. cl::PrintOptionValues(); + // If requested, run all passes again with the same pass manager to catch + // bugs caused by persistent state in the passes + if (RunTwice) { + std::unique_ptr M2(CloneModule(M.get())); + Passes.run(*M2); + CompileTwiceBuffer = Buffer; + Buffer.clear(); + } + // Now that we have all of the passes ready, run them. Passes.run(*M); - // If requested, run all passes again with the same pass manager to catch - // bugs caused by persistent state in the passes + // Compare the two outputs and make sure they're the same if (RunTwice) { assert(Out); - CompileTwiceBuffer = Buffer; - Buffer.clear(); - std::unique_ptr M2(CloneModule(M.get())); - Passes.run(*M2); if (Buffer.size() != CompileTwiceBuffer.size() || (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) != 0)) { errs() << "Running the pass manager twice changed the output.\n" - "Writing the result of the second run to the specified output." + "Writing the result of the second run to the specified output.\n" "To generate the one-run comparison binary, just run without\n" "the compile-twice option\n"; Out->os() << BOS->str(); From 830355b8521ba547c5bbb7e5225f362af62db0e8 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sat, 5 Dec 2015 01:44:20 +0000 Subject: [PATCH 112/364] Whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254821 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ExecutionEngine/Orc/IndirectionUtils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index 3bfff059110c..aa75b3f46b4a 100644 --- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -202,7 +202,7 @@ class LocalJITCompileCallbackManager : public JITCompileCallbackManager { sys::Memory::MF_READ | sys::Memory::MF_EXEC); assert(!EC && "Failed to mprotect trampoline block"); - + TrampolineBlocks.push_back(std::move(TrampolineBlock)); } @@ -404,7 +404,7 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV, ValueMaterializer *Materializer = nullptr, GlobalVariable *NewGV = nullptr); -/// @brief Clone +/// @brief Clone GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, ValueToValueMapTy &VMap); From d33fc70ecf45feddde1f7675730ff170c017d650 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Sat, 5 Dec 2015 02:09:22 +0000 Subject: [PATCH 113/364] [libFuzzer] don't reload the corpus more than once every second git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254824 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Fuzzer/FuzzerLoop.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index ca7f82b55607..889c30c87489 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -468,10 +468,15 @@ void Fuzzer::Drill() { } void Fuzzer::Loop() { + system_clock::time_point LastCorpusReload = system_clock::now(); while (true) { size_t J1 = ChooseUnitIdxToMutate();; SyncCorpus(); - RereadOutputCorpus(); + auto Now = system_clock::now(); + if (duration_cast(Now - LastCorpusReload).count()) { + RereadOutputCorpus(); + LastCorpusReload = Now; + } if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) break; if (Options.MaxTotalTimeSec > 0 && From 35be75843e0a6e2b7f6431b7d960379380aa345d Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Sat, 5 Dec 2015 02:23:49 +0000 Subject: [PATCH 114/364] [libFuzzer] one more trophie git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254825 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LibFuzzer.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst index eb79b2e490c4..e14ea67d652c 100644 --- a/docs/LibFuzzer.rst +++ b/docs/LibFuzzer.rst @@ -483,6 +483,8 @@ Trophies * `Python `_ +* OpenSSL/BoringSSL: `[1] `_ + * `Libxml2 `_ From 8893466777ecbe91ff69206072bc95a3e38e1a0d Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 03:03:35 +0000 Subject: [PATCH 115/364] [WebAssembly] Implement ReverseBranchCondition, and re-enable MachineBlockPlacement This patch introduces a codegen-only instruction currently named br_unless, which makes it convenient to implement ReverseBranchCondition and re-enable the MachineBlockPlacement pass. Then in a late pass, it lowers br_unless back into br_if. Differential Revision: http://reviews.llvm.org/D14995 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254826 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/CMakeLists.txt | 1 + lib/Target/WebAssembly/WebAssembly.h | 1 + .../WebAssembly/WebAssemblyInstrControl.td | 19 ++- .../WebAssembly/WebAssemblyInstrInfo.cpp | 32 +++-- .../WebAssembly/WebAssemblyLowerBrUnless.cpp | 133 ++++++++++++++++++ .../WebAssembly/WebAssemblyPeephole.cpp | 2 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 7 +- test/CodeGen/WebAssembly/cfg-stackify.ll | 125 ++++++++++++++-- test/CodeGen/WebAssembly/reg-stackify.ll | 41 ++++-- test/CodeGen/WebAssembly/switch.ll | 5 +- 10 files changed, 321 insertions(+), 45 deletions(-) create mode 100644 lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt index 6a5894958e32..5d1a27a6f093 100644 --- a/lib/Target/WebAssembly/CMakeLists.txt +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyISelDAGToDAG.cpp WebAssemblyISelLowering.cpp WebAssemblyInstrInfo.cpp + WebAssemblyLowerBrUnless.cpp WebAssemblyMachineFunctionInfo.cpp WebAssemblyMCInstLower.cpp WebAssemblyOptimizeReturned.cpp diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h index 001f9f9d4a72..6705b22a376f 100644 --- a/lib/Target/WebAssembly/WebAssembly.h +++ b/lib/Target/WebAssembly/WebAssembly.h @@ -32,6 +32,7 @@ FunctionPass *createWebAssemblyStoreResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); FunctionPass *createWebAssemblyCFGStackify(); +FunctionPass *createWebAssemblyLowerBrUnless(); FunctionPass *createWebAssemblyRegNumbering(); FunctionPass *createWebAssemblyPeephole(); diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 840f7d669314..708d902e99e1 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -15,9 +15,13 @@ let Defs = [ARGUMENTS] in { let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { -def BR_IF : I<(outs), (ins I32:$a, bb_op:$dst), - [(brcond I32:$a, bb:$dst)], - "br_if \t$a, $dst">; +// The condition operand is a boolean value which WebAssembly represents as i32. +def BR_IF : I<(outs), (ins I32:$cond, bb_op:$dst), + [(brcond I32:$cond, bb:$dst)], + "br_if \t$cond, $dst">; +let isCodeGenOnly = 1 in +def BR_UNLESS : I<(outs), (ins I32:$cond, bb_op:$dst), [], + "br_unless\t$cond, $dst">; let isBarrier = 1 in { def BR : I<(outs), (ins bb_op:$dst), [(br bb:$dst)], @@ -25,6 +29,15 @@ def BR : I<(outs), (ins bb_op:$dst), } // isBarrier = 1 } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1 +} // Defs = [ARGUMENTS] + +def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst), + (BR_IF I32:$cond, bb_op:$dst)>; +def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst), + (BR_UNLESS I32:$cond, bb_op:$dst)>; + +let Defs = [ARGUMENTS] in { + // TODO: SelectionDAG's lowering insists on using a pointer as the index for // jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode // currently. diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index bd06bc396dcd..3b219f4a901a 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -71,6 +71,15 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, case WebAssembly::BR_IF: if (HaveCond) return true; + Cond.push_back(MachineOperand::CreateImm(true)); + Cond.push_back(MI.getOperand(0)); + TBB = MI.getOperand(1).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR_UNLESS: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(false)); Cond.push_back(MI.getOperand(0)); TBB = MI.getOperand(1).getMBB(); HaveCond = true; @@ -113,8 +122,6 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *FBB, ArrayRef Cond, DebugLoc DL) const { - assert(Cond.size() <= 1); - if (Cond.empty()) { if (!TBB) return 0; @@ -123,7 +130,17 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, return 1; } - BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addOperand(Cond[0]).addMBB(TBB); + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + + if (Cond[0].getImm()) { + BuildMI(&MBB, DL, get(WebAssembly::BR_IF)) + .addOperand(Cond[1]) + .addMBB(TBB); + } else { + BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)) + .addOperand(Cond[1]) + .addMBB(TBB); + } if (!FBB) return 1; @@ -133,10 +150,7 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, bool WebAssemblyInstrInfo::ReverseBranchCondition( SmallVectorImpl &Cond) const { - assert(Cond.size() == 1); - - // TODO: Add branch reversal here... And re-enable MachineBlockPlacementID - // when we do. - - return true; + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm()); + return false; } diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp new file mode 100644 index 000000000000..846f6eb1e5cf --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -0,0 +1,133 @@ +//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file lowers br_unless into br_if with an inverted condition. +/// +/// br_unless is not currently in the spec, but it's very convenient for LLVM +/// to use. This pass allows LLVM to use it, for now. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower-br_unless" + +namespace { +class WebAssemblyLowerBrUnless final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Lower br_unless"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyLowerBrUnless() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyLowerBrUnless::ID = 0; +FunctionPass *llvm::createWebAssemblyLowerBrUnless() { + return new WebAssemblyLowerBrUnless(); +} + +bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Lowering br_unless **********\n" + "********** Function: " + << MF.getName() << '\n'); + + auto &MFI = *MF.getInfo(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + + for (auto &MBB : MF) { + for (auto MII = MBB.begin(); MII != MBB.end(); ) { + MachineInstr *MI = &*MII++; + if (MI->getOpcode() != WebAssembly::BR_UNLESS) + continue; + + unsigned Cond = MI->getOperand(0).getReg(); + bool Inverted = false; + + // Attempt to invert the condition in place. + if (MFI.isVRegStackified(Cond)) { + assert(MRI.hasOneDef(Cond)); + MachineInstr *Def = MRI.getVRegDef(Cond); + switch (Def->getOpcode()) { + using namespace WebAssembly; + case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break; + case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break; + case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break; + case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break; + case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break; + case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break; + case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break; + case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break; + case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break; + case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break; + case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break; + case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break; + case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break; + case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break; + case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break; + case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break; + case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break; + case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break; + case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break; + case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break; + case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break; + case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break; + case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break; + case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break; + default: break; + } + } + + // If we weren't able to invert the condition in place. Insert an + // expression to invert it. + if (!Inverted) { + unsigned ZeroReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MFI.stackifyVReg(ZeroReg); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::CONST_I32), ZeroReg) + .addImm(0); + unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MFI.stackifyVReg(Tmp); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQ_I32), Tmp) + .addReg(Cond) + .addReg(ZeroReg); + Cond = Tmp; + Inverted = true; + } + + // The br_unless condition has now been inverted. Insert a br_if and + // delete the br_unless. + assert(Inverted); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF)) + .addReg(Cond) + .addMBB(MI->getOperand(1).getMBB()); + MBB.erase(MI); + } + } + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index e149d9cce719..11f44775b5ea 100644 --- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -64,7 +64,7 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { // can use $discard instead. MachineOperand &MO = MI.getOperand(0); unsigned OldReg = MO.getReg(); - if (OldReg == MI.getOperand(2).getReg()) { + if (OldReg == MI.getOperand(3).getReg()) { unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); MO.setReg(NewReg); MO.setIsDead(); diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 29fb89c6e18f..b54699243bd4 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -171,10 +171,6 @@ void WebAssemblyPassConfig::addPostRegAlloc() { // Fails with: should be run after register allocation. disablePass(&MachineCopyPropagationID); - // TODO: Until we get ReverseBranchCondition support, MachineBlockPlacement - // can create ugly-looking control flow. - disablePass(&MachineBlockPlacementID); - // Run the register coloring pass to reduce the total number of registers. addPass(createWebAssemblyRegColoring()); } @@ -183,6 +179,9 @@ void WebAssemblyPassConfig::addPreEmitPass() { // Put the CFG in structured form; insert BLOCK and LOOP markers. addPass(createWebAssemblyCFGStackify()); + // Lower br_unless into br_if. + addPass(createWebAssemblyLowerBrUnless()); + // Create a mapping from LLVM CodeGen virtual registers to wasm registers. addPass(createWebAssemblyRegNumbering()); diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll index b8ac48bf49dc..c615ebb0db9d 100644 --- a/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -asm-verbose=false | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-block-placement | FileCheck %s +; RUN: llc < %s -asm-verbose=false | FileCheck -check-prefix=OPT %s ; Test the CFG stackifier pass. @@ -12,10 +13,21 @@ declare void @something() ; CHECK-LABEL: test0: ; CHECK: loop ; CHECK: i32.add +; CHECK-NOT: br ; CHECK: br_if +; CHECK-NOT: br ; CHECK: call ; CHECK: br BB0_1{{$}} ; CHECK: return{{$}} +; OPT-LABEL: test0: +; OPT: loop +; OPT: i32.add +; OPT-NOT: br +; OPT: br_if +; OPT-NOT: br +; OPT: call +; OPT: br BB0_1{{$}} +; OPT: return{{$}} define void @test0(i32 %n) { entry: br label %header @@ -40,10 +52,21 @@ back: ; CHECK-LABEL: test1: ; CHECK: loop ; CHECK: i32.add +; CHECK-NOT: br ; CHECK: br_if +; CHECK-NOT: br ; CHECK: call ; CHECK: br BB1_1{{$}} ; CHECK: return{{$}} +; OPT-LABEL: test1: +; OPT: loop +; OPT: i32.add +; OPT-NOT: br +; OPT: br_if +; OPT-NOT: br +; OPT: call +; OPT: br BB1_1{{$}} +; OPT: return{{$}} define void @test1(i32 %n) { entry: br label %header @@ -69,9 +92,16 @@ back: ; CHECK: block BB2_2{{$}} ; CHECK: br_if {{.*}}, BB2_2{{$}} ; CHECK: BB2_1: -; CHECK: br_if $pop{{[0-9]+}}, BB2_1{{$}} +; CHECK: br_if ${{[0-9]+}}, BB2_1{{$}} ; CHECK: BB2_2: ; CHECK: return{{$}} +; OPT-LABEL: test2: +; OPT: block BB2_2{{$}} +; OPT: br_if {{.*}}, BB2_2{{$}} +; OPT: BB2_1: +; OPT: br_if ${{[0-9]+}}, BB2_1{{$}} +; OPT: BB2_2: +; OPT: return{{$}} define void @test2(double* nocapture %p, i32 %n) { entry: %cmp.4 = icmp sgt i32 %n, 0 @@ -100,13 +130,23 @@ for.end: ; CHECK-LABEL: doublediamond: ; CHECK: block BB3_5{{$}} ; CHECK: block BB3_2{{$}} -; CHECK: br_if $pop{{[0-9]+}}, BB3_2{{$}} +; CHECK: br_if $0, BB3_2{{$}} ; CHECK: block BB3_4{{$}} -; CHECK: br_if $pop{{[0-9]+}}, BB3_4{{$}} +; CHECK: br_if $1, BB3_4{{$}} ; CHECK: br BB3_5{{$}} ; CHECK: BB3_4: ; CHECK: BB3_5: ; CHECK: return ${{[0-9]+}}{{$}} +; OPT-LABEL: doublediamond: +; OPT: block BB3_5{{$}} +; OPT: block BB3_4{{$}} +; OPT: br_if {{.*}}, BB3_4{{$}} +; OPT: block BB3_3{{$}} +; OPT: br_if {{.*}}, BB3_3{{$}} +; OPT: br BB3_5{{$}} +; OPT: BB3_4: +; OPT: BB3_5: +; OPT: return ${{[0-9]+}}{{$}} define i32 @doublediamond(i32 %a, i32 %b, i32* %p) { entry: %c = icmp eq i32 %a, 0 @@ -132,9 +172,14 @@ exit: ; CHECK-LABEL: triangle: ; CHECK: block BB4_2{{$}} -; CHECK: br_if $pop{{[0-9]+}}, BB4_2{{$}} +; CHECK: br_if $1, BB4_2{{$}} ; CHECK: BB4_2: ; CHECK: return ${{[0-9]+}}{{$}} +; OPT-LABEL: triangle: +; OPT: block BB4_2{{$}} +; OPT: br_if $1, BB4_2{{$}} +; OPT: BB4_2: +; OPT: return ${{[0-9]+}}{{$}} define i32 @triangle(i32* %p, i32 %a) { entry: %c = icmp eq i32 %a, 0 @@ -151,11 +196,19 @@ exit: ; CHECK-LABEL: diamond: ; CHECK: block BB5_3{{$}} ; CHECK: block BB5_2{{$}} -; CHECK: br_if $pop{{[0-9]+}}, BB5_2{{$}} +; CHECK: br_if $1, BB5_2{{$}} ; CHECK: br BB5_3{{$}} ; CHECK: BB5_2: ; CHECK: BB5_3: ; CHECK: return ${{[0-9]+}}{{$}} +; OPT-LABEL: diamond: +; OPT: block BB5_3{{$}} +; OPT: block BB5_2{{$}} +; OPT: br_if {{.*}}, BB5_2{{$}} +; OPT: br BB5_3{{$}} +; OPT: BB5_2: +; OPT: BB5_3: +; OPT: return ${{[0-9]+}}{{$}} define i32 @diamond(i32* %p, i32 %a) { entry: %c = icmp eq i32 %a, 0 @@ -175,6 +228,9 @@ exit: ; CHECK-LABEL: single_block: ; CHECK-NOT: br ; CHECK: return $pop{{[0-9]+}}{{$}} +; OPT-LABEL: single_block: +; OPT-NOT: br +; OPT: return $pop{{[0-9]+}}{{$}} define i32 @single_block(i32* %p) { entry: store volatile i32 0, i32* %p @@ -187,6 +243,12 @@ entry: ; CHECK: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}} ; CHECK: br BB7_1{{$}} ; CHECK: BB7_2: +; OPT-LABEL: minimal_loop: +; OPT-NOT: br +; OPT: BB7_1: +; OPT: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}} +; OPT: br BB7_1{{$}} +; OPT: BB7_2: define i32 @minimal_loop(i32* %p) { entry: store volatile i32 0, i32* %p @@ -203,6 +265,13 @@ loop: ; CHECK: br_if $pop{{[0-9]+}}, BB8_1{{$}} ; CHECK: BB8_2: ; CHECK: return ${{[0-9]+}}{{$}} +; OPT-LABEL: simple_loop: +; OPT-NOT: br +; OPT: BB8_1: +; OPT: loop BB8_2{{$}} +; OPT: br_if {{.*}}, BB8_1{{$}} +; OPT: BB8_2: +; OPT: return ${{[0-9]+}}{{$}} define i32 @simple_loop(i32* %p, i32 %a) { entry: %c = icmp eq i32 %a, 0 @@ -218,12 +287,20 @@ exit: ; CHECK-LABEL: doubletriangle: ; CHECK: block BB9_4{{$}} -; CHECK: br_if $pop{{[0-9]+}}, BB9_4{{$}} +; CHECK: br_if $0, BB9_4{{$}} ; CHECK: block BB9_3{{$}} -; CHECK: br_if $pop{{[0-9]+}}, BB9_3{{$}} +; CHECK: br_if $1, BB9_3{{$}} ; CHECK: BB9_3: ; CHECK: BB9_4: ; CHECK: return ${{[0-9]+}}{{$}} +; OPT-LABEL: doubletriangle: +; OPT: block BB9_4{{$}} +; OPT: br_if $0, BB9_4{{$}} +; OPT: block BB9_3{{$}} +; OPT: br_if $1, BB9_3{{$}} +; OPT: BB9_3: +; OPT: BB9_4: +; OPT: return ${{[0-9]+}}{{$}} define i32 @doubletriangle(i32 %a, i32 %b, i32* %p) { entry: %c = icmp eq i32 %a, 0 @@ -247,12 +324,21 @@ exit: ; CHECK-LABEL: ifelse_earlyexits: ; CHECK: block BB10_4{{$}} ; CHECK: block BB10_2{{$}} -; CHECK: br_if $pop{{[0-9]+}}, BB10_2{{$}} +; CHECK: br_if $0, BB10_2{{$}} ; CHECK: br BB10_4{{$}} ; CHECK: BB10_2: -; CHECK: br_if $pop{{[0-9]+}}, BB10_4{{$}} +; CHECK: br_if $1, BB10_4{{$}} ; CHECK: BB10_4: ; CHECK: return ${{[0-9]+}}{{$}} +; OPT-LABEL: ifelse_earlyexits: +; OPT: block BB10_4{{$}} +; OPT: block BB10_3{{$}} +; OPT: br_if {{.*}}, BB10_3{{$}} +; OPT: br_if $1, BB10_4{{$}} +; OPT: br BB10_4{{$}} +; OPT: BB10_3: +; OPT: BB10_4: +; OPT: return ${{[0-9]+}}{{$}} define i32 @ifelse_earlyexits(i32 %a, i32 %b, i32* %p) { entry: %c = icmp eq i32 %a, 0 @@ -278,16 +364,31 @@ exit: ; CHECK: loop BB11_7{{$}} ; CHECK: block BB11_6{{$}} ; CHECK: block BB11_3{{$}} -; CHECK: br_if $pop{{.*}}, BB11_3{{$}} +; CHECK: br_if $0, BB11_3{{$}} ; CHECK: br BB11_6{{$}} ; CHECK: BB11_3: ; CHECK: block BB11_5{{$}} -; CHECK: br_if $pop{{.*}}, BB11_5{{$}} +; CHECK: br_if $1, BB11_5{{$}} ; CHECK: br BB11_6{{$}} ; CHECK: BB11_5: ; CHECK: BB11_6: ; CHECK: br BB11_1{{$}} ; CHECK: BB11_7: +; OPT-LABEL: doublediamond_in_a_loop: +; OPT: BB11_1: +; OPT: loop BB11_7{{$}} +; OPT: block BB11_6{{$}} +; OPT: block BB11_5{{$}} +; OPT: br_if {{.*}}, BB11_5{{$}} +; OPT: block BB11_4{{$}} +; OPT: br_if {{.*}}, BB11_4{{$}} +; OPT: br BB11_6{{$}} +; OPT: BB11_4: +; OPT: br BB11_6{{$}} +; OPT: BB11_5: +; OPT: BB11_6: +; OPT: br BB11_1{{$}} +; OPT: BB11_7: define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) { entry: br label %header diff --git a/test/CodeGen/WebAssembly/reg-stackify.ll b/test/CodeGen/WebAssembly/reg-stackify.ll index 180d70e2e4a7..af4a3501531b 100644 --- a/test/CodeGen/WebAssembly/reg-stackify.ll +++ b/test/CodeGen/WebAssembly/reg-stackify.ll @@ -50,22 +50,35 @@ define i32 @yes1(i32* %q) { ; rearranged to make the stack contiguous. ; CHECK-LABEL: stack_uses: -; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .param i32, i32, i32, i32{{$}} ; CHECK-NEXT: .result i32{{$}} -; CHECK-NEXT: local i32, i32{{$}} -; CHECK-NEXT: i32.const $1=, 1{{$}} -; CHECK-NEXT: i32.const $2=, 0{{$}} -; CHECK-NEXT: i32.and $push0=, $0, $1{{$}} -; CHECK-NEXT: i32.eq $push1=, $pop0, $2{{$}} -; CHECK-NEXT: block BB4_2{{$}} -; CHECK-NEXT: br_if $pop1, BB4_2{{$}} -; CHECK-NEXT: return $2{{$}} -; CHECK-NEXT: BB4_2:{{$}} -; CHECK-NEXT: return $1{{$}} -define i32 @stack_uses(i32 %x) { +; CHECK-NEXT: .local i32, i32{{$}} +; CHECK-NEXT: i32.const $4=, 1{{$}} +; CHECK-NEXT: i32.const $5=, 2{{$}} +; CHECK-NEXT: i32.lt_s $push0=, $0, $4{{$}} +; CHECK-NEXT: i32.lt_s $push1=, $1, $5{{$}} +; CHECK-NEXT: i32.xor $push4=, $pop0, $pop1{{$}} +; CHECK-NEXT: i32.lt_s $push2=, $2, $4{{$}} +; CHECK-NEXT: i32.lt_s $push3=, $3, $5{{$}} +; CHECK-NEXT: i32.xor $push5=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.xor $push6=, $pop4, $pop5{{$}} +; CHECK-NEXT: i32.ne $push7=, $pop6, $4{{$}} +; CHECK-NEXT: block BB4_2{{$}} +; CHECK-NEXT: br_if $pop7, BB4_2{{$}} +; CHECK-NEXT: i32.const $push8=, 0{{$}} +; CHECK-NEXT: return $pop8{{$}} +; CHECK-NEXT: BB4_2: +; CHECK-NEXT: return $4{{$}} +define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) { entry: - %c = trunc i32 %x to i1 - br i1 %c, label %true, label %false + %c = icmp sle i32 %x, 0 + %d = icmp sle i32 %y, 1 + %e = icmp sle i32 %z, 0 + %f = icmp sle i32 %w, 1 + %g = xor i1 %c, %d + %h = xor i1 %e, %f + %i = xor i1 %g, %h + br i1 %i, label %true, label %false true: ret i32 0 false: diff --git a/test/CodeGen/WebAssembly/switch.ll b/test/CodeGen/WebAssembly/switch.ll index 41c5b357d068..c62333c336fa 100644 --- a/test/CodeGen/WebAssembly/switch.ll +++ b/test/CodeGen/WebAssembly/switch.ll @@ -1,6 +1,7 @@ -; RUN: llc < %s -asm-verbose=false | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-block-placement | FileCheck %s -; Test switch instructions. +; Test switch instructions. Block placement is disabled because it reorders +; the blocks in a way that isn't interesting here. target datalayout = "e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" From 543e02b4381244c28463696f159170004c787628 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 5 Dec 2015 03:05:45 +0000 Subject: [PATCH 116/364] [llvm-dwp] Support debug_tu_index git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254827 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCObjectFileInfo.h | 2 + lib/MC/MCObjectFileInfo.cpp | 10 ++ test/tools/llvm-dwp/Inputs/simple/a.dwo | Bin 1193 -> 1369 bytes test/tools/llvm-dwp/Inputs/simple/b.dwo | Bin 1241 -> 1409 bytes test/tools/llvm-dwp/X86/simple.test | 51 ++++++-- tools/llvm-dwp/llvm-dwp.cpp | 163 ++++++++++++++++-------- 6 files changed, 160 insertions(+), 66 deletions(-) diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index 388a208fb4a0..cf2c3f12bb6b 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -118,6 +118,7 @@ class MCObjectFileInfo { // These are for Fission DWP files. MCSection *DwarfCUIndexSection; + MCSection *DwarfTUIndexSection; /// Section for newer gnu pubnames. MCSection *DwarfGnuPubNamesSection; @@ -266,6 +267,7 @@ class MCObjectFileInfo { MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; } MCSection *getDwarfAddrSection() const { return DwarfAddrSection; } MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; } + MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; } MCSection *getCOFFDebugSymbolsSection() const { return COFFDebugSymbolsSection; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 41e28698b1cc..dbedd73a4325 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -262,6 +262,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { DwarfCUIndexSection = Ctx->getMachOSection("__DWARF", "__debug_cu_index", MachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + DwarfTUIndexSection = + Ctx->getMachOSection("__DWARF", "__debug_tu_index", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0, SectionKind::getMetadata()); @@ -537,6 +540,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { // DWP Sections DwarfCUIndexSection = Ctx->getELFSection(".debug_cu_index", ELF::SHT_PROGBITS, 0); + DwarfTUIndexSection = + Ctx->getELFSection(".debug_tu_index", ELF::SHT_PROGBITS, 0); StackMapSection = Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); @@ -725,6 +730,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); + DwarfTUIndexSection = Ctx->getCOFFSection( + ".debug_tu_index", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfAccelNamesSection = Ctx->getCOFFSection( ".apple_names", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | diff --git a/test/tools/llvm-dwp/Inputs/simple/a.dwo b/test/tools/llvm-dwp/Inputs/simple/a.dwo index 7bdb2a7b9f826e0dfcbafb94b64ef9285b42f4c6..1fc71ca8d17560f4ce433262fedc9bcb4fc14a2f 100644 GIT binary patch delta 328 zcmZ388{}|hxlj$xh!BI4n`n@kt5*BqcHZv%uGzmKv6~} zrr!+Af0A#>JUUQn!JvRp3=@-s39&G+PTa0#W+RyiRK?=RC?GD(%D})X%)s1a$IWfU zZOsi-DF(z$3<5xsk?}VlBSWHIazO!*H}Ovve#j`#SUCA0qd4aih!BG_SgaW=rVjO_)MQ0wb52JnZ}Q|sW^>UQ5HB(u lfa>D{(j1f5GHbI&03FEy)Gs{w1M^M^Ae)(i9jXY40suf}IPw4h delta 204 zcmcb~wUTp!hIj)b0~|PjSq=0@DFV`5}vWMC9zRBf^oW@T=&+vvp3z`!KTU?Z8SF!7_h z0jndUfVePFHJdO4P@0?DirbnSs8tMznHU6sBqQT*K1PNlz2t%dAaAk;VT#m;uP-=C &TypeIndexEntries, + uint32_t OutTypesOffset, StringRef Types, + const UnitIndexEntry &CUEntry) { + uint32_t Offset = 0; + DataExtractor Data(Types, true, 0); + while (Data.isValidOffset(Offset)) { + TypeIndexEntries.push_back(CUEntry); + auto &Entry = TypeIndexEntries.back(); + // Zero out the debug_info contribution + Entry.Contributions[0] = {}; + auto &C = Entry.Contributions[DW_SECT_TYPES - DW_SECT_INFO]; + C.Offset = OutTypesOffset + Offset; + auto PrevOffset = Offset; + // Length of the unit, including the 4 byte length field. + C.Length = Data.getU32(&Offset) + 4; + + Data.getU16(&Offset); // Version + Data.getU32(&Offset); // Abbrev offset + Data.getU8(&Offset); // Address size + Entry.Signature = Data.getU64(&Offset); + Offset = PrevOffset + C.Length; + } +} + +static void +writeIndexTable(MCStreamer &Out, ArrayRef ContributionOffsets, + ArrayRef IndexEntries, + uint32_t DWARFUnitIndex::Entry::SectionContribution::*Field) { + for (const auto &E : IndexEntries) + for (size_t i = 0; i != array_lengthof(E.Contributions); ++i) + if (ContributionOffsets[i]) + Out.EmitIntValue(E.Contributions[i].*Field, 4); +} + +static void writeIndex(MCStreamer &Out, MCSection *Section, + ArrayRef ContributionOffsets, + ArrayRef IndexEntries) { + unsigned Columns = 0; + for (auto &C : ContributionOffsets) + if (C) + ++Columns; + + std::vector Buckets(NextPowerOf2(3 * IndexEntries.size() / 2)); + uint64_t Mask = Buckets.size() - 1; + for (size_t i = 0; i != IndexEntries.size(); ++i) { + auto S = IndexEntries[i].Signature; + auto H = S & Mask; + while (Buckets[H]) + H += ((S >> 32) & Mask) | 1; + Buckets[H] = i + 1; + } + + Out.SwitchSection(Section); + Out.EmitIntValue(2, 4); // Version + Out.EmitIntValue(Columns, 4); // Columns + Out.EmitIntValue(IndexEntries.size(), 4); // Num Units + Out.EmitIntValue(Buckets.size(), 4); // Num Buckets + + // Write the signatures. + for (const auto &I : Buckets) + Out.EmitIntValue(I ? IndexEntries[I - 1].Signature : 0, 8); + + // Write the indexes. + for (const auto &I : Buckets) + Out.EmitIntValue(I, 4); + + // Write the column headers (which sections will appear in the table) + for (size_t i = 0; i != ContributionOffsets.size(); ++i) + if (ContributionOffsets[i]) + Out.EmitIntValue(i + DW_SECT_INFO, 4); + + // Write the offsets. + writeIndexTable(Out, ContributionOffsets, IndexEntries, + &DWARFUnitIndex::Entry::SectionContribution::Offset); + + // Write the lengths. + writeIndexTable(Out, ContributionOffsets, IndexEntries, + &DWARFUnitIndex::Entry::SectionContribution::Length); +} static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { const auto &MCOFI = *Out.getContext().getObjectFileInfo(); MCSection *const StrSection = MCOFI.getDwarfStrDWOSection(); @@ -143,12 +227,8 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { {"debug_line.dwo", {MCOFI.getDwarfLineDWOSection(), DW_SECT_LINE}}, {"debug_abbrev.dwo", {MCOFI.getDwarfAbbrevDWOSection(), DW_SECT_ABBREV}}}; - struct UnitIndexEntry { - uint64_t Signature; - DWARFUnitIndex::Entry::SectionContribution Contributions[8]; - }; - std::vector IndexEntries; + std::vector TypeIndexEntries; StringMap Strings; uint32_t StringOffset = 0; @@ -167,6 +247,9 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { StringRef CurStrOffsetSection; StringRef InfoSection; StringRef AbbrevSection; + StringRef TypesSection; + + auto TypesOffset = ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]; for (const auto &Section : ErrOrObj->getBinary()->sections()) { StringRef Name; @@ -188,12 +271,18 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { ContributionOffsets[Index] += (CurEntry.Contributions[Index].Length = Contents.size()); - if (Kind == DW_SECT_INFO) { - assert(InfoSection.empty()); + switch (Kind) { + case DW_SECT_INFO: InfoSection = Contents; - } else if (Kind == DW_SECT_ABBREV) { - assert(AbbrevSection.empty()); + break; + case DW_SECT_ABBREV: AbbrevSection = Contents; + break; + case DW_SECT_TYPES: + TypesSection = Contents; + break; + default: + break; } } @@ -211,6 +300,7 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { assert(!AbbrevSection.empty()); assert(!InfoSection.empty()); CurEntry.Signature = getCUSignature(AbbrevSection, InfoSection); + addAllTypes(TypeIndexEntries, TypesOffset, TypesSection, CurEntry); if (auto Err = writeStringsAndOffsets(Out, Strings, StringOffset, StrSection, StrOffsetSection, @@ -218,52 +308,19 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { return Err; } - unsigned Columns = 0; - for (auto &C : ContributionOffsets) - if (C) - ++Columns; - - std::vector Buckets(NextPowerOf2(3 * IndexEntries.size() / 2)); - uint64_t Mask = Buckets.size() - 1; - for (size_t i = 0; i != IndexEntries.size(); ++i) { - auto S = IndexEntries[i].Signature; - auto H = S & Mask; - while (Buckets[H]) - H += ((S >> 32) & Mask) | 1; - Buckets[H] = i + 1; - } + // Lie about there being no info contributions so the TU index only includes + // the type unit contribution + ContributionOffsets[0] = 0; + writeIndex(Out, MCOFI.getDwarfTUIndexSection(), ContributionOffsets, + TypeIndexEntries); - Out.SwitchSection(MCOFI.getDwarfCUIndexSection()); - Out.EmitIntValue(2, 4); // Version - Out.EmitIntValue(Columns, 4); // Columns - Out.EmitIntValue(IndexEntries.size(), 4); // Num Units - // FIXME: This is not the right number of buckets for a real hash. - Out.EmitIntValue(Buckets.size(), 4); // Num Buckets + // Lie about the type contribution + ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO] = 0; + // Unlie about the info contribution + ContributionOffsets[0] = 1; - // Write the signatures. - for (const auto &I : Buckets) - Out.EmitIntValue(I ? IndexEntries[I - 1].Signature : 0, 8); - - // Write the indexes. - for (const auto &I : Buckets) - Out.EmitIntValue(I, 4); - - // Write the column headers (which sections will appear in the table) - for (size_t i = 0; i != array_lengthof(ContributionOffsets); ++i) - if (ContributionOffsets[i]) - Out.EmitIntValue(i + DW_SECT_INFO, 4); - - // Write the offsets. - for (const auto &E : IndexEntries) - for (size_t i = 0; i != array_lengthof(E.Contributions); ++i) - if (ContributionOffsets[i]) - Out.EmitIntValue(E.Contributions[i].Offset, 4); - - // Write the lengths. - for (const auto &E : IndexEntries) - for (size_t i = 0; i != array_lengthof(E.Contributions); ++i) - if (ContributionOffsets[i]) - Out.EmitIntValue(E.Contributions[i].Length, 4); + writeIndex(Out, MCOFI.getDwarfCUIndexSection(), ContributionOffsets, + IndexEntries); return std::error_code(); } From 32ad075fd5d405f2273dd6609164cb95af5c3b4b Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 5 Dec 2015 03:06:30 +0000 Subject: [PATCH 117/364] [llvm-dwp] clang-format this to catch anything I've missed along the way git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254828 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-dwp/llvm-dwp.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/llvm-dwp/llvm-dwp.cpp b/tools/llvm-dwp/llvm-dwp.cpp index 608eca152d94..2583e2e20818 100644 --- a/tools/llvm-dwp/llvm-dwp.cpp +++ b/tools/llvm-dwp/llvm-dwp.cpp @@ -1,7 +1,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" -#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" @@ -11,17 +12,16 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Options.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Options.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" -#include "llvm/Support/MathExtras.h" -#include #include +#include #include using namespace llvm; @@ -32,8 +32,10 @@ OptionCategory DwpCategory("Specific Options"); static list InputFiles(Positional, OneOrMore, desc(""), cat(DwpCategory)); -static opt OutputFilename(Required, "o", desc("Specify the output file."), - value_desc("filename"), cat(DwpCategory)); +static opt OutputFilename(Required, "o", + desc("Specify the output file."), + value_desc("filename"), + cat(DwpCategory)); static int error(const Twine &Error, const Twine &Context) { errs() << Twine("while processing ") + Context + ":\n"; @@ -191,7 +193,7 @@ static void writeIndex(MCStreamer &Out, MCSection *Section, Out.EmitIntValue(2, 4); // Version Out.EmitIntValue(Columns, 4); // Columns Out.EmitIntValue(IndexEntries.size(), 4); // Num Units - Out.EmitIntValue(Buckets.size(), 4); // Num Buckets + Out.EmitIntValue(Buckets.size(), 4); // Num Buckets // Write the signatures. for (const auto &I : Buckets) @@ -325,7 +327,7 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { return std::error_code(); } -int main(int argc, char** argv) { +int main(int argc, char **argv) { ParseCommandLineOptions(argc, argv, "merge split dwarf (.dwo) files"); @@ -357,8 +359,7 @@ int main(int argc, char** argv) { MCObjectFileInfo MOFI; MCContext MC(MAI.get(), MRI.get(), &MOFI); - MOFI.InitMCObjectFileInfo(TheTriple, Reloc::Default, CodeModel::Default, - MC); + MOFI.InitMCObjectFileInfo(TheTriple, Reloc::Default, CodeModel::Default, MC); auto MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, ""); if (!MAB) From eaf992ce08c4f3d08215149750f5342a628656ad Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 5 Dec 2015 03:10:05 +0000 Subject: [PATCH 118/364] [llvm-dwp] Rename the sufficiently-modified test to reflect it's non-simplicity git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254829 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm-dwp/Inputs/{simple => type_units}/a.cpp | 0 .../llvm-dwp/Inputs/{simple => type_units}/a.dwo | Bin .../llvm-dwp/Inputs/{simple => type_units}/b.cpp | 0 .../llvm-dwp/Inputs/{simple => type_units}/b.dwo | Bin .../llvm-dwp/X86/{simple.test => type_units.test} | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename test/tools/llvm-dwp/Inputs/{simple => type_units}/a.cpp (100%) rename test/tools/llvm-dwp/Inputs/{simple => type_units}/a.dwo (100%) rename test/tools/llvm-dwp/Inputs/{simple => type_units}/b.cpp (100%) rename test/tools/llvm-dwp/Inputs/{simple => type_units}/b.dwo (100%) rename test/tools/llvm-dwp/X86/{simple.test => type_units.test} (100%) diff --git a/test/tools/llvm-dwp/Inputs/simple/a.cpp b/test/tools/llvm-dwp/Inputs/type_units/a.cpp similarity index 100% rename from test/tools/llvm-dwp/Inputs/simple/a.cpp rename to test/tools/llvm-dwp/Inputs/type_units/a.cpp diff --git a/test/tools/llvm-dwp/Inputs/simple/a.dwo b/test/tools/llvm-dwp/Inputs/type_units/a.dwo similarity index 100% rename from test/tools/llvm-dwp/Inputs/simple/a.dwo rename to test/tools/llvm-dwp/Inputs/type_units/a.dwo diff --git a/test/tools/llvm-dwp/Inputs/simple/b.cpp b/test/tools/llvm-dwp/Inputs/type_units/b.cpp similarity index 100% rename from test/tools/llvm-dwp/Inputs/simple/b.cpp rename to test/tools/llvm-dwp/Inputs/type_units/b.cpp diff --git a/test/tools/llvm-dwp/Inputs/simple/b.dwo b/test/tools/llvm-dwp/Inputs/type_units/b.dwo similarity index 100% rename from test/tools/llvm-dwp/Inputs/simple/b.dwo rename to test/tools/llvm-dwp/Inputs/type_units/b.dwo diff --git a/test/tools/llvm-dwp/X86/simple.test b/test/tools/llvm-dwp/X86/type_units.test similarity index 100% rename from test/tools/llvm-dwp/X86/simple.test rename to test/tools/llvm-dwp/X86/type_units.test From 28683ac9d56e053056b176b8ddcbe104e6e5b434 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 5 Dec 2015 03:11:17 +0000 Subject: [PATCH 119/364] [llvm-dwp] Fix the type_units.test since I renamed its inputs as well git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254830 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/llvm-dwp/X86/type_units.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tools/llvm-dwp/X86/type_units.test b/test/tools/llvm-dwp/X86/type_units.test index 5502ef23d29f..c49d60d47bb3 100644 --- a/test/tools/llvm-dwp/X86/type_units.test +++ b/test/tools/llvm-dwp/X86/type_units.test @@ -1,4 +1,4 @@ -RUN: llvm-dwp %p/../Inputs/simple/a.dwo %p/../Inputs/simple/b.dwo -o %t +RUN: llvm-dwp %p/../Inputs/type_units/a.dwo %p/../Inputs/type_units/b.dwo -o %t RUN: llvm-dwarfdump %t | FileCheck %s FIXME: For some reason, piping straight from llvm-dwp to llvm-dwarfdump doesn't behave well - looks like dwarfdump is reading/closes before dwp has finished. From 2b762697564ca1e12e0e974e93ceeb4c3420505c Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 5 Dec 2015 03:41:53 +0000 Subject: [PATCH 120/364] [llvm-dwp] Add coverage for both the presence and absence of type units, and fix/remove the emission of a broken tu_index when no type units are present git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254833 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/llvm-dwp/Inputs/simple/a.dwo | Bin 0 -> 1193 bytes test/tools/llvm-dwp/Inputs/simple/b.dwo | Bin 0 -> 1241 bytes .../X86/{type_units.test => simple.test} | 58 +++++++++++------- tools/llvm-dwp/llvm-dwp.cpp | 12 ++-- 4 files changed, 42 insertions(+), 28 deletions(-) create mode 100644 test/tools/llvm-dwp/Inputs/simple/a.dwo create mode 100644 test/tools/llvm-dwp/Inputs/simple/b.dwo rename test/tools/llvm-dwp/X86/{type_units.test => simple.test} (54%) diff --git a/test/tools/llvm-dwp/Inputs/simple/a.dwo b/test/tools/llvm-dwp/Inputs/simple/a.dwo new file mode 100644 index 0000000000000000000000000000000000000000..7bdb2a7b9f826e0dfcbafb94b64ef9285b42f4c6 GIT binary patch literal 1193 zcmbtTT}s115T4ytTP=#EFN!Y#5n3N&wTf1;EsBC5;tgtAQ!S=RNqaHDd+~HrgfYZvW8JtxbWTT1RyNqS7Wk=hfB>29j%Xx`; ztjk<&eH3I?6(f32BQ!qp%mZ2`ghABrI>U;t@#a?^&{jhjM7`I?imvhIPce}cC_(S# zbta1nev7=PXgt;DIj34rK*K)(`l2?8$Uoe;;P`O|#`UQC`(i%oJ~8?yuwUDTBP@Dj W+#_lezMoh|yOF`TrpWUre(OK6DqH*j literal 0 HcmV?d00001 diff --git a/test/tools/llvm-dwp/Inputs/simple/b.dwo b/test/tools/llvm-dwp/Inputs/simple/b.dwo new file mode 100644 index 0000000000000000000000000000000000000000..f41243dc722b011d346dccede7fd91268525d578 GIT binary patch literal 1241 zcmbtU%`O8`6h8M(>#rfHT2yRiBf(;{8ll7(f`nLDk=RtHrZmQkOsiOl+FI}gEG>Bo zPayFgmNw4my)(wmBo=OR?m6FgzVkC}r`PM-8=Nr+4+9f^P{;zj#7L`4TQC5#95{n^(LJ-yMq zI&D>L^sPYn<1jjO?DM0F>(`9K3m*&tF{!|cU#~ZvRx?gFyt*I9Rof1ncARk=p3}v+ z!@tiMc8GnIQ9mn+eg~>{3e&_De#^j;cw6Da`p}m#AI5R%odhFsGC{kOz){H)U3g}N zi*9w+0cKQ=#P4+BSrab0Rc(M7l_T+}B(qVa2vjZQHAO>kN(wtlPOT@*6%E0HcuC+{ z=K9gokGD@^@nQfFpEUnN)+f!AE&g;`;wMA%k~StjX%ph*GRi~^(h4J>_#04C-$$xq AOaK4? literal 0 HcmV?d00001 diff --git a/test/tools/llvm-dwp/X86/type_units.test b/test/tools/llvm-dwp/X86/simple.test similarity index 54% rename from test/tools/llvm-dwp/X86/type_units.test rename to test/tools/llvm-dwp/X86/simple.test index c49d60d47bb3..962e270a594e 100644 --- a/test/tools/llvm-dwp/X86/type_units.test +++ b/test/tools/llvm-dwp/X86/simple.test @@ -1,5 +1,8 @@ +RUN: llvm-dwp %p/../Inputs/simple/a.dwo %p/../Inputs/simple/b.dwo -o %t +RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=NOTYP %s +RUN: llvm-objdump -h %t | FileCheck --check-prefix=NOTYPOBJ %s RUN: llvm-dwp %p/../Inputs/type_units/a.dwo %p/../Inputs/type_units/b.dwo -o %t -RUN: llvm-dwarfdump %t | FileCheck %s +RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=TYPES %s FIXME: For some reason, piping straight from llvm-dwp to llvm-dwarfdump doesn't behave well - looks like dwarfdump is reading/closes before dwp has finished. @@ -28,7 +31,7 @@ CHECK: DW_TAG_formal_parameter CHECK: .debug_info.dwo contents: CHECK: [[AOFF:0x[0-9a-f]*]]: -CHECK-LABEL: Compile Unit: length = 0x00000029 version = 0x0004 abbr_offset = +CHECK-LABEL: Compile Unit: length = {{.*}} version = 0x0004 abbr_offset = CHECK: 0x[[AAOFF]] addr_size = 0x08 (next unit at [[BOFF:.*]]) CHECK: DW_TAG_compile_unit CHECK: DW_AT_name {{.*}} "a.cpp" @@ -36,42 +39,51 @@ CHECK: DW_AT_GNU_dwo_id {{.*}} ([[DWOA:.*]]) CHECK: DW_TAG_variable CHECK: DW_AT_name {{.*}} "a" CHECK: DW_TAG_structure_type -CHECK: DW_AT_signature {{.*}} ([[FOOSIG:.*]]) +NOTYP: DW_AT_name {{.*}} "foo" +TYPES: DW_AT_signature {{.*}} ([[FOOSIG:.*]]) CHECK: [[BOFF]]: -CHECK-LABEL: Compile Unit: length = 0x00000035 version = 0x0004 abbr_offset = +CHECK-LABEL: Compile Unit: length = {{.*}} version = 0x0004 abbr_offset = CHECK: 0x[[BAOFF]] addr_size = 0x08 (next unit at [[XOFF:.*]]) CHECK: DW_AT_name {{.*}} "b.cpp" CHECK: DW_AT_GNU_dwo_id {{.*}} ([[DWOB:.*]]) CHECK: DW_TAG_structure_type -CHECK: DW_AT_signature {{.*}} ([[BARSIG:.*]]) +NOTYP: DW_AT_name {{.*}} "bar" +TYPES: DW_AT_signature {{.*}} ([[BARSIG:.*]]) CHECK: DW_TAG_subprogram CHECK: DW_AT_name {{.*}} "b" CHECK: DW_TAG_formal_parameter -CHECK-LABEL: .debug_types.dwo contents: -CHECK: [[FOOUOFF:0x[0-9a-f]*]]: -CHECK-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset = -CHECK: 0x[[AAOFF]] addr_size = 0x08 type_signature = [[FOOSIG]] type_offset = 0x[[FOOOFF:.*]] (next unit at [[BARUOFF:.*]]) -CHECK: DW_TAG_type_unit -CHECK: [[FOOOFF]]: DW_TAG_structure_type -CHECK: DW_AT_name {{.*}} "foo" -CHECK: [[BARUOFF]]: -CHECK-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset = -CHECK: 0x[[BAOFF]] addr_size = 0x08 type_signature = [[BARSIG]] type_offset = 0x001e (next unit at [[XUOFF:.*]]) -CHECK: DW_TAG_type_unit -CHECK: 0x00000042: DW_TAG_structure_type -CHECK: DW_AT_name {{.*}} "bar" +NOTYP-NOT: .debug_types.dwo contents: +TYPES-LABEL: .debug_types.dwo contents: +TYPES: [[FOOUOFF:0x[0-9a-f]*]]: +TYPES-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset = +TYPES: 0x[[AAOFF]] addr_size = 0x08 type_signature = [[FOOSIG]] type_offset = 0x[[FOOOFF:.*]] (next unit at [[BARUOFF:.*]]) +TYPES: DW_TAG_type_unit +TYPES: [[FOOOFF]]: DW_TAG_structure_type +TYPES: DW_AT_name {{.*}} "foo" +TYPES: [[BARUOFF]]: +TYPES-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset = +TYPES: 0x[[BAOFF]] addr_size = 0x08 type_signature = [[BARSIG]] type_offset = 0x001e (next unit at [[XUOFF:.*]]) +TYPES: DW_TAG_type_unit +TYPES: 0x00000042: DW_TAG_structure_type +TYPES: DW_AT_name {{.*}} "bar" CHECK-LABEL: .debug_cu_index contents: CHECK: Index Signature INFO ABBREV LINE STR_OFFSETS -CHECK: 1 [[DWOA]] {{\[}}[[AOFF]], [[BOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x0000001a) [0x00000000, 0x00000010) -CHECK: 3 [[DWOB]] {{\[}}[[BOFF]], [[XOFF]]) [0x0000[[BAOFF]], 0x00000099) [0x0000001a, 0x00000034) [0x00000010, 0x00000024) +TYPES: 1 [[DWOA]] {{\[}}[[AOFF]], [[BOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x0000001a) [0x00000000, 0x00000010) +TYPES: 3 [[DWOB]] {{\[}}[[BOFF]], [[XOFF]]) [0x0000[[BAOFF]], 0x00000099) [0x0000001a, 0x00000034) [0x00000010, 0x00000024) +NOTYP: 3 [[DWOA]] {{\[}}[[AOFF]], [[BOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x00000011) [0x00000000, 0x00000010) +NOTYP: 4 [[DWOB]] {{\[}}[[BOFF]], [[XOFF]]) [0x0000[[BAOFF]], 0x00000075) [0x00000011, 0x00000022) [0x00000010, 0x00000024) CHECK-LABEL: .debug_tu_index contents: -CHECK: Index Signature TYPES ABBREV LINE STR_OFFSETS -CHECK: 1 [[FOOSIG]] {{\[}}[[FOOUOFF]], [[BARUOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x0000001a) [0x00000000, 0x00000010) -CHECK: 4 [[BARSIG]] {{\[}}[[BARUOFF]], [[XUOFF]]) [0x0000[[BAOFF]], 0x00000099) [0x0000001a, 0x00000034) [0x00000010, 0x00000024) +NOTYP-NOT: Index +TYPES: Index Signature TYPES ABBREV LINE STR_OFFSETS +TYPES: 1 [[FOOSIG]] {{\[}}[[FOOUOFF]], [[BARUOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x0000001a) [0x00000000, 0x00000010) +TYPES: 4 [[BARSIG]] {{\[}}[[BARUOFF]], [[XUOFF]]) [0x0000[[BAOFF]], 0x00000099) [0x0000001a, 0x00000034) [0x00000010, 0x00000024) + +Ensure we do not create a debug_tu_index, even an empty or malformed one. +NOTYPOBJ-NOT: .debug_tu_index CHECK-LABEL: .debug_str.dwo contents: CHECK: "clang version diff --git a/tools/llvm-dwp/llvm-dwp.cpp b/tools/llvm-dwp/llvm-dwp.cpp index 2583e2e20818..9a9440574e85 100644 --- a/tools/llvm-dwp/llvm-dwp.cpp +++ b/tools/llvm-dwp/llvm-dwp.cpp @@ -310,11 +310,13 @@ static std::error_code write(MCStreamer &Out, ArrayRef Inputs) { return Err; } - // Lie about there being no info contributions so the TU index only includes - // the type unit contribution - ContributionOffsets[0] = 0; - writeIndex(Out, MCOFI.getDwarfTUIndexSection(), ContributionOffsets, - TypeIndexEntries); + if (!TypeIndexEntries.empty()) { + // Lie about there being no info contributions so the TU index only includes + // the type unit contribution + ContributionOffsets[0] = 0; + writeIndex(Out, MCOFI.getDwarfTUIndexSection(), ContributionOffsets, + TypeIndexEntries); + } // Lie about the type contribution ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO] = 0; From c7d0ead45b3f81bdca4b3abea9675fcf790a871c Mon Sep 17 00:00:00 2001 From: Cong Hou Date: Sat, 5 Dec 2015 05:00:55 +0000 Subject: [PATCH 121/364] Normalize successors' probabilities when building MBBs for jump table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254837 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 ++ test/CodeGen/X86/switch-edge-weight.ll | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 85e7e3c1bc8c..867b9562019b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8259,12 +8259,14 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, JumpProb += DefaultProb / 2; FallthroughProb -= DefaultProb / 2; JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); break; } } addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. diff --git a/test/CodeGen/X86/switch-edge-weight.ll b/test/CodeGen/X86/switch-edge-weight.ll index 6f594868c7ad..b8cb7b1280ad 100644 --- a/test/CodeGen/X86/switch-edge-weight.ll +++ b/test/CodeGen/X86/switch-edge-weight.ll @@ -111,7 +111,7 @@ sw.epilog: ; BB#8 to BB#3: {11} (10) ; BB#8 to BB#4: {12} (10) ; BB#8 to BB#5: {13, 14} (20) -; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}14.29%) BB#6({{[0-9a-fx/= ]+}}7.14%) BB#2({{[0-9a-fx/= ]+}}14.29%) BB#3({{[0-9a-fx/= ]+}}14.29%) BB#4({{[0-9a-fx/= ]+}}14.29%) BB#5({{[0-9a-fx/= ]+}}28.57%) +; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}15.38%) BB#6({{[0-9a-fx/= ]+}}7.69%) BB#2({{[0-9a-fx/= ]+}}15.38%) BB#3({{[0-9a-fx/= ]+}}15.38%) BB#4({{[0-9a-fx/= ]+}}15.38%) BB#5({{[0-9a-fx/= ]+}}30.77%) } ; CHECK-LABEL: test3 From a8bc4db3b2a16181b6373980abd142b7c1cb6e91 Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Sat, 5 Dec 2015 05:16:36 +0000 Subject: [PATCH 122/364] [PGO] Add version to getPGOFuncName method Different version of indexed format may use different name uniquing schemes for static functions. Pass the version info to the name interface so that different schmes can be picked (for profile lookup). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254838 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ProfileData/InstrProf.h | 11 +++++++---- include/llvm/ProfileData/InstrProfReader.h | 3 +++ lib/ProfileData/InstrProf.cpp | 8 +++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index 956485119102..3e711bb60cf0 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -16,9 +16,9 @@ #ifndef LLVM_PROFILEDATA_INSTRPROF_H_ #define LLVM_PROFILEDATA_INSTRPROF_H_ +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/IR/GlobalValue.h" #include "llvm/ProfileData/InstrProfData.inc" #include "llvm/Support/Endian.h" @@ -30,6 +30,7 @@ #include #include +#define INSTR_PROF_INDEX_VERSION 3 namespace llvm { class Function; @@ -132,7 +133,8 @@ inline StringRef getInstrProfFileOverriderFuncName() { /// Return the modified name for function \c F suitable to be /// used the key for profile lookup. -std::string getPGOFuncName(const Function &F); +std::string getPGOFuncName(const Function &F, + uint64_t Version = INSTR_PROF_INDEX_VERSION); /// Return the modified name for a function suitable to be /// used the key for profile lookup. The function's original @@ -140,7 +142,8 @@ std::string getPGOFuncName(const Function &F); /// The function is defined in module \c FileName. std::string getPGOFuncName(StringRef RawFuncName, GlobalValue::LinkageTypes Linkage, - StringRef FileName); + StringRef FileName, + uint64_t Version = INSTR_PROF_INDEX_VERSION); /// Create and return the global variable for function name used in PGO /// instrumentation. \c FuncName is the name of the function returned @@ -504,7 +507,7 @@ static inline uint64_t ComputeHash(HashT Type, StringRef K) { } const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" -const uint64_t Version = 3; +const uint64_t Version = INSTR_PROF_INDEX_VERSION; const HashT HashType = HashT::MD5; // This structure defines the file header of the LLVM profile diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index 318981f75e18..2837e421ba87 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -279,6 +279,7 @@ struct InstrProfReaderIndexBase { virtual bool atEnd() const = 0; virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; virtual ~InstrProfReaderIndexBase() {} + virtual uint64_t getVersion() const = 0; }; typedef OnDiskIterableChainedHashTable @@ -312,6 +313,7 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase { HashTable->getInfoObj().setValueProfDataEndianness(Endianness); } ~InstrProfReaderIndex() override {} + uint64_t getVersion() const override { return FormatVersion; } }; /// Reader for the indexed binary instrprof format. @@ -328,6 +330,7 @@ class IndexedInstrProfReader : public InstrProfReader { IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; public: + uint64_t getVersion() const { return Index->getVersion(); } IndexedInstrProfReader(std::unique_ptr DataBuffer) : DataBuffer(std::move(DataBuffer)), Index(nullptr) {} diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 530be8ac044a..a965a1208b51 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -74,7 +74,8 @@ namespace llvm { std::string getPGOFuncName(StringRef RawFuncName, GlobalValue::LinkageTypes Linkage, - StringRef FileName) { + StringRef FileName, + uint64_t Version LLVM_ATTRIBUTE_UNUSED) { // Function names may be prefixed with a binary '1' to indicate // that the backend should not modify the symbols due to any platform @@ -96,8 +97,9 @@ std::string getPGOFuncName(StringRef RawFuncName, return FuncName; } -std::string getPGOFuncName(const Function &F) { - return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName()); +std::string getPGOFuncName(const Function &F, uint64_t Version) { + return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName(), + Version); } GlobalVariable *createPGOFuncNameVar(Module &M, From fbbab8b9598762b23d1cc870d4a7a1cba4158792 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Dec 2015 07:07:42 +0000 Subject: [PATCH 123/364] [X86][FMA4] Explicitly set the domain of FMA4 float/double scalar instructions Both were defaulting to the float domain - now matches the packed instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254841 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFMA.td | 61 +++++++++++++++++--------------- test/CodeGen/X86/fma_patterns.ll | 2 +- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 0467a64d7e51..b11ff6e253fa 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -374,36 +374,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { } // isCodeGenOnly = 1 } -defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, - fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; -defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, - fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; -defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, - fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; -defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, - fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; -defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, - X86Fnmadd, loadf32>, - fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; -defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, - X86Fnmadd, loadf64>, - fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; -defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, - X86Fnmsub, loadf32>, - fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; -defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, - X86Fnmsub, loadf64>, - fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; - let ExeDomain = SSEPackedSingle in { + // Scalar Instructions + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, + fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, + int_x86_fma_vfmadd_ss>; + defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, + fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, + int_x86_fma_vfmsub_ss>; + defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, + X86Fnmadd, loadf32>, + fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, + int_x86_fma_vfnmadd_ss>; + defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, + X86Fnmsub, loadf32>, + fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, + int_x86_fma_vfnmsub_ss>; + // Packed Instructions defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, loadv4f32, loadv8f32>; defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, @@ -419,6 +406,22 @@ let ExeDomain = SSEPackedSingle in { } let ExeDomain = SSEPackedDouble in { + // Scalar Instructions + defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, + fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfmadd_sd>; + defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, + fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfmsub_sd>; + defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, + X86Fnmadd, loadf64>, + fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfnmadd_sd>; + defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, + X86Fnmsub, loadf64>, + fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfnmsub_sd>; + // Packed Instructions defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, loadv2f64, loadv4f64>; defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll index 0f0dd20da040..c6a4954e51a8 100644 --- a/test/CodeGen/X86/fma_patterns.ll +++ b/test/CodeGen/X86/fma_patterns.ll @@ -1120,7 +1120,7 @@ define double @test_f64_fneg_fmul(double %x, double %y) #0 { ; ; FMA4-LABEL: test_f64_fneg_fmul: ; FMA4: # BB#0: -; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 ; FMA4-NEXT: retq ; From 83f50fab5317ea62c8298a82c1de5054d5795ac9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 5 Dec 2015 07:13:25 +0000 Subject: [PATCH 124/364] Use std::fill instead of memset to initialize an array to avoid hardcoded count and a multiply. The outputed code is identical. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254842 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index e247abcb2f75..e99c9f758f8e 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1293,7 +1293,7 @@ class TargetLoweringBase { /// Remove all register classes. void clearRegisterClasses() { - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE * sizeof(TargetRegisterClass*)); + std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr); AvailableRegClasses.clear(); } From 79402ee6f96ff3be95b445286d91d0d87b5a3cc9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 5 Dec 2015 07:13:35 +0000 Subject: [PATCH 125/364] Replace uint16_t with the MCPhysReg typedef in many places. A lot of physical register arrays already use this typedef. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254843 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/CallingConvLower.h | 2 +- include/llvm/MC/MCInstrDesc.h | 28 ++++++++-------- lib/CodeGen/MIRParser/MIParser.cpp | 4 +-- lib/CodeGen/MachineInstr.cpp | 6 ++-- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 4 +-- .../SelectionDAG/ScheduleDAGRRList.cpp | 12 +++---- lib/MC/MCInstrDesc.cpp | 2 +- lib/MC/MCParser/AsmParser.cpp | 4 +-- lib/Target/AArch64/AArch64CallingConvention.h | 32 +++++++++---------- lib/Target/ARM/ARMCallingConv.h | 18 +++++------ lib/Target/ARM/ARMFastISel.cpp | 2 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 2 +- .../Disassembler/HexagonDisassembler.cpp | 32 +++++++++---------- lib/Target/Hexagon/HexagonGenMux.cpp | 4 +-- lib/Target/Hexagon/HexagonISelLowering.cpp | 24 +++++++------- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 2 +- .../MCTargetDesc/HexagonMCCodeEmitter.cpp | 2 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 2 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 4 +-- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 6 ++-- utils/TableGen/InstrInfoEmitter.cpp | 2 +- 21 files changed, 98 insertions(+), 96 deletions(-) diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 9df41dd0257c..415abb90da57 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -369,7 +369,7 @@ class CCState { /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive /// registers. If this is not possible, return zero. Otherwise, return the first /// register of the block that were allocated, marking the entire block as allocated. - unsigned AllocateRegBlock(ArrayRef Regs, unsigned RegsRequired) { + unsigned AllocateRegBlock(ArrayRef Regs, unsigned RegsRequired) { if (RegsRequired > Regs.size()) return 0; diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h index 1baf82ee5c45..88aab73d4058 100644 --- a/include/llvm/MC/MCInstrDesc.h +++ b/include/llvm/MC/MCInstrDesc.h @@ -15,12 +15,12 @@ #ifndef LLVM_MC_MCINSTRDESC_H #define LLVM_MC_MCINSTRDESC_H +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/DataTypes.h" #include namespace llvm { class MCInst; - class MCRegisterInfo; class MCSubtargetInfo; class FeatureBitset; @@ -137,16 +137,16 @@ enum Flag { /// directly to describe itself. class MCInstrDesc { public: - unsigned short Opcode; // The opcode number - unsigned short NumOperands; // Num of args (may be more if variable_ops) - unsigned char NumDefs; // Num of args that are definitions - unsigned char Size; // Number of bytes in encoding. - unsigned short SchedClass; // enum identifying instr sched class - uint64_t Flags; // Flags identifying machine instr class - uint64_t TSFlags; // Target Specific Flag values - const uint16_t *ImplicitUses; // Registers implicitly read by this instr - const uint16_t *ImplicitDefs; // Registers implicitly defined by this instr - const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands + unsigned short Opcode; // The opcode number + unsigned short NumOperands; // Num of args (may be more if variable_ops) + unsigned char NumDefs; // Num of args that are definitions + unsigned char Size; // Number of bytes in encoding. + unsigned short SchedClass; // enum identifying instr sched class + uint64_t Flags; // Flags identifying machine instr class + uint64_t TSFlags; // Target Specific Flag values + const MCPhysReg *ImplicitUses; // Registers implicitly read by this instr + const MCPhysReg *ImplicitDefs; // Registers implicitly defined by this instr + const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands // Subtarget feature that this is deprecated on, if any // -1 implies this is not deprecated by any single feature. It may still be // deprecated due to a "complex" reason, below. @@ -472,7 +472,7 @@ class MCInstrDesc { /// marked as implicitly reading the 'CL' register, which it always does. /// /// This method returns null if the instruction has no implicit uses. - const uint16_t *getImplicitUses() const { return ImplicitUses; } + const MCPhysReg *getImplicitUses() const { return ImplicitUses; } /// \brief Return the number of implicit uses this instruction has. unsigned getNumImplicitUses() const { @@ -494,7 +494,7 @@ class MCInstrDesc { /// EAX/EDX/EFLAGS registers. /// /// This method returns null if the instruction has no implicit defs. - const uint16_t *getImplicitDefs() const { return ImplicitDefs; } + const MCPhysReg *getImplicitDefs() const { return ImplicitDefs; } /// \brief Return the number of implicit defs this instruct has. unsigned getNumImplicitDefs() const { @@ -509,7 +509,7 @@ class MCInstrDesc { /// \brief Return true if this instruction implicitly /// uses the specified physical register. bool hasImplicitUseOfPhysReg(unsigned Reg) const { - if (const uint16_t *ImpUses = ImplicitUses) + if (const MCPhysReg *ImpUses = ImplicitUses) for (; *ImpUses; ++ImpUses) if (*ImpUses == Reg) return true; diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c9c2d62cec30..f2f6584fb6c8 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -745,11 +745,11 @@ bool MIParser::verifyImplicitOperands(ArrayRef Operands, // Gather all the expected implicit operands. SmallVector ImplicitOperands; if (MCID.ImplicitDefs) - for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) + for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) ImplicitOperands.push_back( MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID.ImplicitUses) - for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) ImplicitOperands.push_back( MachineOperand::CreateReg(*ImpUses, false, true)); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e202810bf6e5..1eb2edcd7cec 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -631,10 +631,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { if (MCID->ImplicitDefs) - for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; + ++ImpDefs) addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) - for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) + for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses; + ++ImpUses) addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true)); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 34e1a7001082..62e7733ecd2b 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 78985e01ef9a..91024e672f9c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1335,7 +1335,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } @@ -2720,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const uint16_t *ImpDefs + const MCPhysReg *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); if(!ImpDefs && !RegMask) @@ -2739,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, return true; if (ImpDefs) - for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). @@ -2758,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const uint16_t *SUImpDefs = + const MCPhysReg *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); if (!SUImpDefs && !SURegMask) diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp index 5be2fa1b30b6..ee55f3eff3ac 100644 --- a/lib/MC/MCInstrDesc.cpp +++ b/lib/MC/MCInstrDesc.cpp @@ -53,7 +53,7 @@ bool MCInstrDesc::mayAffectControlFlow(const MCInst &MI, bool MCInstrDesc::hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI) const { - if (const uint16_t *ImpDefs = ImplicitDefs) + if (const MCPhysReg *ImpDefs = ImplicitDefs) for (; *ImpDefs; ++ImpDefs) if (*ImpDefs == Reg || (MRI && MRI->isSubRegister(Reg, *ImpDefs))) return true; diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 8e8be8e52f63..61f7d749b968 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -4753,8 +4753,8 @@ bool AsmParser::parseMSInlineAsm( } // Consider implicit defs to be clobbers. Think of cpuid and push. - ArrayRef ImpDefs(Desc.getImplicitDefs(), - Desc.getNumImplicitDefs()); + ArrayRef ImpDefs(Desc.getImplicitDefs(), + Desc.getNumImplicitDefs()); ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end()); } diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h index 68f9dcf3fc2a..bc44bc5f2461 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.h +++ b/lib/Target/AArch64/AArch64CallingConvention.h @@ -25,21 +25,21 @@ namespace { using namespace llvm; -static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, - AArch64::X3, AArch64::X4, AArch64::X5, - AArch64::X6, AArch64::X7}; -static const uint16_t HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, - AArch64::H3, AArch64::H4, AArch64::H5, - AArch64::H6, AArch64::H7}; -static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, - AArch64::S3, AArch64::S4, AArch64::S5, - AArch64::S6, AArch64::S7}; -static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, - AArch64::D3, AArch64::D4, AArch64::D5, - AArch64::D6, AArch64::D7}; -static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, - AArch64::Q3, AArch64::Q4, AArch64::Q5, - AArch64::Q6, AArch64::Q7}; +static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7}; +static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, + AArch64::H3, AArch64::H4, AArch64::H5, + AArch64::H6, AArch64::H7}; +static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, + AArch64::S3, AArch64::S4, AArch64::S5, + AArch64::S6, AArch64::S7}; +static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, + AArch64::D3, AArch64::D4, AArch64::D5, + AArch64::D6, AArch64::D7}; +static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, + AArch64::Q3, AArch64::Q4, AArch64::Q5, + AArch64::Q6, AArch64::Q7}; static bool finishStackBlock(SmallVectorImpl &PendingMembers, MVT LocVT, ISD::ArgFlagsTy &ArgFlags, @@ -86,7 +86,7 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State) { // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. - ArrayRef RegList; + ArrayRef RegList; if (LocVT.SimpleTy == MVT::i64) RegList = XRegList; else if (LocVT.SimpleTy == MVT::f16) diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 3d216c0ed04a..a731d00883a1 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -160,15 +160,15 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } -static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; +static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, - ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; -static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; -static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; +static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; +static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; +static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA @@ -203,7 +203,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, unsigned StackAlign = DL.getStackAlignment(); unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign); - ArrayRef RegList; + ArrayRef RegList; switch (LocVT.SimpleTy) { case MVT::i32: { RegList = RRegList; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 175107450fc0..9bdf823c85bd 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -3036,7 +3036,7 @@ bool ARMFastISel::fastLowerArguments() { } - static const uint16_t GPRArgRegs[] = { + static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index f38fe1904055..bcd0e5751258 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -218,7 +218,7 @@ Thumb2SizeReduce::Thumb2SizeReduce(std::function Ftor) } static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { - for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) + for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) if (*Regs == ARM::CPSR) return true; return false; diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 1db59e1dd99d..4a9c3413cb29 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -471,7 +471,7 @@ extern const MCInstrDesc HexagonInsts[]; } static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, - ArrayRef Table) { + ArrayRef Table) { if (RegNo < Table.size()) { Inst.addOperand(MCOperand::createReg(Table[RegNo])); return MCDisassembler::Success; @@ -489,7 +489,7 @@ static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - static const uint16_t IntRegDecoderTable[] = { + static const MCPhysReg IntRegDecoderTable[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, @@ -498,13 +498,13 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, Hexagon::R30, Hexagon::R31}; - return (DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable); } static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t VecRegDecoderTable[] = { + static const MCPhysReg VecRegDecoderTable[] = { Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4, Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9, Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14, @@ -513,25 +513,25 @@ static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29, Hexagon::V30, Hexagon::V31}; - return (DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable); } static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t DoubleRegDecoderTable[] = { + static const MCPhysReg DoubleRegDecoderTable[] = { Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; - return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable); } static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t VecDblRegDecoderTable[] = { + static const MCPhysReg VecDblRegDecoderTable[] = { Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7, Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11, @@ -543,25 +543,25 @@ static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, - Hexagon::P2, Hexagon::P3}; + static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, + Hexagon::P2, Hexagon::P3}; - return (DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable); } static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1, - Hexagon::Q2, Hexagon::Q3}; + static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1, + Hexagon::Q2, Hexagon::Q3}; - return (DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable); } static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t CtrlRegDecoderTable[] = { + static const MCPhysReg CtrlRegDecoderTable[] = { Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7, Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, @@ -582,7 +582,7 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t CtrlReg64DecoderTable[] = { + static const MCPhysReg CtrlReg64DecoderTable[] = { Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2, Hexagon::NoRegister, Hexagon::C7_6, Hexagon::NoRegister, diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index b4ebd9140e75..c059d566709e 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -120,10 +120,10 @@ void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, // First, get the implicit defs and uses for this instruction. unsigned Opc = MI->getOpcode(); const MCInstrDesc &D = HII->get(Opc); - if (const uint16_t *R = D.ImplicitDefs) + if (const MCPhysReg *R = D.ImplicitDefs) while (*R) expandReg(*R++, Defs); - if (const uint16_t *R = D.ImplicitUses) + if (const MCPhysReg *R = D.ImplicitUses) while (*R) expandReg(*R++, Uses); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 0a89ef424dd2..f82fe7699e84 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -316,18 +316,18 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const uint16_t VecLstS[] = { Hexagon::V0, Hexagon::V1, - Hexagon::V2, Hexagon::V3, - Hexagon::V4, Hexagon::V5, - Hexagon::V6, Hexagon::V7, - Hexagon::V8, Hexagon::V9, - Hexagon::V10, Hexagon::V11, - Hexagon::V12, Hexagon::V13, - Hexagon::V14, Hexagon::V15}; - static const uint16_t VecLstD[] = { Hexagon::W0, Hexagon::W1, - Hexagon::W2, Hexagon::W3, - Hexagon::W4, Hexagon::W5, - Hexagon::W6, Hexagon::W7}; + static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1, + Hexagon::V2, Hexagon::V3, + Hexagon::V4, Hexagon::V5, + Hexagon::V6, Hexagon::V7, + Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, + Hexagon::V12, Hexagon::V13, + Hexagon::V14, Hexagon::V15}; + static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1, + Hexagon::W2, Hexagon::W3, + Hexagon::W4, Hexagon::W5, + Hexagon::W6, Hexagon::W7}; auto &MF = State.getMachineFunction(); auto &HST = MF.getSubtarget(); bool UseHVX = HST.useHVXOps(); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index c11abc1f42f6..fefe7543f397 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -85,7 +85,7 @@ void HexagonMCChecker::init(MCInst const& MCI) { } // Get implicit register definitions. - const uint16_t* ImpDefs = MCID.getImplicitDefs(); + const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); for (unsigned i = 0; i < MCID.getNumImplicitDefs(); ++i) { unsigned R = ImpDefs[i]; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 35e490afe41f..c2c6275e7e8d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -334,7 +334,7 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, // The only relocs left should be GP relative: default: if (MCID.mayStore() || MCID.mayLoad()) { - for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) { if (*ImpUses == Hexagon::GP) { switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 174deb88bc5c..beab844c6025 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -30,7 +30,7 @@ using namespace llvm; /// VRRegNo - Map from a numbered VR register to its enum value. /// -static const uint16_t VRRegNo[] = { +static const MCPhysReg VRRegNo[] = { PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 51c85f6ea673..c17603a7718a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1748,13 +1748,13 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, MI->setDesc(NewDesc); if (NewDesc.ImplicitDefs) - for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs(); + for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs(); *ImpDefs; ++ImpDefs) if (!MI->definesRegister(*ImpDefs)) MI->addOperand(*MI->getParent()->getParent(), MachineOperand::CreateReg(*ImpDefs, true, true)); if (NewDesc.ImplicitUses) - for (const uint16_t *ImpUses = NewDesc.getImplicitUses(); + for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses(); *ImpUses; ++ImpUses) if (!MI->readsRegister(*ImpUses)) MI->addOperand(*MI->getParent()->getParent(), diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 2eba084fc147..914ccdfb49a1 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -639,13 +639,13 @@ class DarwinX86AsmBackend : public X86AsmBackend { /// \brief Get the compact unwind number for a given register. The number /// corresponds to the enum lists in compact_unwind_encoding.h. int getCompactUnwindRegNum(unsigned Reg) const { - static const uint16_t CU32BitRegs[7] = { + static const MCPhysReg CU32BitRegs[7] = { X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 }; - static const uint16_t CU64BitRegs[] = { + static const MCPhysReg CU64BitRegs[] = { X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 }; - const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; + const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; for (int Idx = 1; *CURegs; ++CURegs, ++Idx) if (*CURegs == Reg) return Idx; diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp index a4302d09078b..a6583399fa20 100644 --- a/utils/TableGen/InstrInfoEmitter.cpp +++ b/utils/TableGen/InstrInfoEmitter.cpp @@ -74,7 +74,7 @@ class InstrInfoEmitter { static void PrintDefList(const std::vector &Uses, unsigned Num, raw_ostream &OS) { - OS << "static const uint16_t ImplicitList" << Num << "[] = { "; + OS << "static const MCPhysReg ImplicitList" << Num << "[] = { "; for (unsigned i = 0, e = Uses.size(); i != e; ++i) OS << getQualifiedName(Uses[i]) << ", "; OS << "0 };\n"; From 092921b3d817e8c63e565a633067e2927aa5c85a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Dec 2015 07:27:50 +0000 Subject: [PATCH 126/364] [X86][ADX] Added memory folding patterns and stack folding tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254844 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 6 +++ test/CodeGen/X86/stack-folding-adx-x86_64.ll | 45 ++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 test/CodeGen/X86/stack-folding-adx-x86_64.ll diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ebe329064c50..34d4e90b3101 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1650,6 +1650,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PEXT32rr, X86::PEXT32rm, 0 }, { X86::PEXT64rr, X86::PEXT64rm, 0 }, + // ADX foldable instructions + { X86::ADCX32rr, X86::ADCX32rm, 0 }, + { X86::ADCX64rr, X86::ADCX64rm, 0 }, + { X86::ADOX32rr, X86::ADOX32rm, 0 }, + { X86::ADOX64rr, X86::ADOX64rm, 0 }, + // AVX-512 foldable instructions { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, diff --git a/test/CodeGen/X86/stack-folding-adx-x86_64.ll b/test/CodeGen/X86/stack-folding-adx-x86_64.ll new file mode 100644 index 000000000000..5f109f09aa19 --- /dev/null +++ b/test/CodeGen/X86/stack-folding-adx-x86_64.ll @@ -0,0 +1,45 @@ +; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+adx < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +; Stack reload folding tests. +; +; By including a nop call with sideeffects we can force a partial register spill of the +; relevant registers and check that the reload is correctly folded into the instruction. + +define i8 @stack_fold_addcarry_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { + ;CHECK-LABEL: stack_fold_addcarry_u32 + ;CHECK: adcxl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload + %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = tail call i8 @llvm.x86.addcarry.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) + ret i8 %2; +} +declare i8 @llvm.x86.addcarry.u32(i8, i32, i32, i8*) + +define i8 @stack_fold_addcarry_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { + ;CHECK-LABEL: stack_fold_addcarry_u64 + ;CHECK: adcxq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload + %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = tail call i8 @llvm.x86.addcarry.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) + ret i8 %2; +} +declare i8 @llvm.x86.addcarry.u64(i8, i64, i64, i8*) + +define i8 @stack_fold_addcarryx_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { + ;CHECK-LABEL: stack_fold_addcarryx_u32 + ;CHECK: adcxl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload + %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = tail call i8 @llvm.x86.addcarryx.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) + ret i8 %2; +} +declare i8 @llvm.x86.addcarryx.u32(i8, i32, i32, i8*) + +define i8 @stack_fold_addcarryx_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { + ;CHECK-LABEL: stack_fold_addcarryx_u64 + ;CHECK: adcxq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload + %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = tail call i8 @llvm.x86.addcarryx.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) + ret i8 %2; +} +declare i8 @llvm.x86.addcarryx.u64(i8, i64, i64, i8*) From b2387dc751b88b853072fd679fb85ae7dc92e003 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 5 Dec 2015 14:42:34 +0000 Subject: [PATCH 127/364] [ASAN] Add doFinalization to reset state Summary: If the same pass manager is used for multiple modules ASAN complains about GlobalsMD being initialized twice. Fix this by resetting GlobalsMD in a new doFinalization method to allow this use case. Reviewers: kcc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D14962 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254851 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 11 +++++++++++ test/Instrumentation/AddressSanitizer/twice.ll | 8 ++++++++ 2 files changed, 19 insertions(+) create mode 100644 test/Instrumentation/AddressSanitizer/twice.ll diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index dea94a514fe8..a9df5e5898ae 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -280,6 +280,11 @@ class GlobalsMetadata { GlobalsMetadata() : inited_(false) {} + void reset() { + inited_ = false; + Entries.clear(); + } + void init(Module &M) { assert(!inited_); inited_ = true; @@ -450,6 +455,7 @@ struct AddressSanitizer : public FunctionPass { bool maybeInsertAsanInitAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; static char ID; // Pass identification, replacement for typeid DominatorTree &getDominatorTree() const { return *DT; } @@ -1521,6 +1527,11 @@ bool AddressSanitizer::doInitialization(Module &M) { return true; } +bool AddressSanitizer::doFinalization(Module &M) { + GlobalsMD.reset(); + return false; +} + bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. diff --git a/test/Instrumentation/AddressSanitizer/twice.ll b/test/Instrumentation/AddressSanitizer/twice.ll new file mode 100644 index 000000000000..9f7826f73952 --- /dev/null +++ b/test/Instrumentation/AddressSanitizer/twice.ll @@ -0,0 +1,8 @@ +; Check that the address sanitizer pass can be reused +; RUN: opt < %s -S -run-twice -asan + +define void @foo(i64* %b) nounwind uwtable sanitize_address { + entry: + store i64 0, i64* %b, align 1 + ret void +} From 5fa397629b43cf1428395698efdf913042a04ab7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 5 Dec 2015 17:34:07 +0000 Subject: [PATCH 128/364] [Hexagon] Don't call getNumImplicitDefs and then iterate over the count. getNumImplicitDefs contains a loop so its better to just loop over the null terminated implicit def list. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254852 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index fefe7543f397..46b7b41fec3b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -85,32 +85,33 @@ void HexagonMCChecker::init(MCInst const& MCI) { } // Get implicit register definitions. - const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); - for (unsigned i = 0; i < MCID.getNumImplicitDefs(); ++i) { - unsigned R = ImpDefs[i]; + if (const MCPhysReg *ImpDef = MCID.getImplicitDefs()) + for (; *ImpDef; ++ImpDef) { + unsigned R = *ImpDef; - if (Hexagon::R31 != R && MCID.isCall()) - // Any register other than the LR and the PC are actually volatile ones - // as defined by the ABI, not modified implicitly by the call insn. - continue; - if (Hexagon::PC == R) - // Branches are the only insns that can change the PC, - // otherwise a read-only register. - continue; + if (Hexagon::R31 != R && MCID.isCall()) + // Any register other than the LR and the PC are actually volatile ones + // as defined by the ABI, not modified implicitly by the call insn. + continue; + if (Hexagon::PC == R) + // Branches are the only insns that can change the PC, + // otherwise a read-only register. + continue; - if (Hexagon::USR_OVF == R) - // Many insns change the USR implicitly, but only one or another flag. - // The instruction table models the USR.OVF flag, which can be implicitly - // modified more than once, but cannot be modified in the same packet - // with an instruction that modifies is explicitly. Deal with such situ- - // ations individually. - SoftDefs.insert(R); - else if (isPredicateRegister(R) && HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) - // Include implicit late predicates. - LatePreds.insert(R); - else - Defs[R].insert(PredSense(PredReg, isTrue)); - } + if (Hexagon::USR_OVF == R) + // Many insns change the USR implicitly, but only one or another flag. + // The instruction table models the USR.OVF flag, which can be implicitly + // modified more than once, but cannot be modified in the same packet + // with an instruction that modifies is explicitly. Deal with such situ- + // ations individually. + SoftDefs.insert(R); + else if (isPredicateRegister(R) && + HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) + // Include implicit late predicates. + LatePreds.insert(R); + else + Defs[R].insert(PredSense(PredReg, isTrue)); + } // Figure out explicit register definitions. for (unsigned i = 0; i < MCID.getNumDefs(); ++i) { From e05c0dfd574a41963e48413bb0524d2cc23645e5 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 19:15:57 +0000 Subject: [PATCH 129/364] [WebAssembly] Expand frem as a floating point library function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254854 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 65d2b1967b13..baefd8d0758d 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -133,7 +133,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) setCondCodeAction(CC, T, Expand); // Expand floating-point library function operators. - for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW}) + for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW, + ISD::FREM}) setOperationAction(Op, T, Expand); // Note supported floating-point library function operators that otherwise // default to expand. From e7174bd9a604bce9023a25abddb802fd851d9f3a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 19:24:17 +0000 Subject: [PATCH 130/364] [WebAssembly] Call TargetPassConfig base class functions in overriding functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254855 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index b54699243bd4..917dfacfe9d5 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -146,14 +146,20 @@ void WebAssemblyPassConfig::addIRPasses() { } bool WebAssemblyPassConfig::addInstSelector() { + (void)TargetPassConfig::addInstSelector(); addPass( createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); return false; } -bool WebAssemblyPassConfig::addILPOpts() { return true; } +bool WebAssemblyPassConfig::addILPOpts() { + (void)TargetPassConfig::addILPOpts(); + return true; +} void WebAssemblyPassConfig::addPreRegAlloc() { + TargetPassConfig::addPreRegAlloc(); + // Prepare store instructions for register stackifying. addPass(createWebAssemblyStoreResults()); @@ -173,9 +179,13 @@ void WebAssemblyPassConfig::addPostRegAlloc() { // Run the register coloring pass to reduce the total number of registers. addPass(createWebAssemblyRegColoring()); + + TargetPassConfig::addPostRegAlloc(); } void WebAssemblyPassConfig::addPreEmitPass() { + TargetPassConfig::addPreEmitPass(); + // Put the CFG in structured form; insert BLOCK and LOOP markers. addPass(createWebAssemblyCFGStackify()); From ecc456747ed00af4960e5f76cb0a75c56e67f776 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 19:27:18 +0000 Subject: [PATCH 131/364] [WebAssembly] Move useAA() out of line to make it more convenient to experiment with. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254856 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblySubtarget.cpp | 1 + lib/Target/WebAssembly/WebAssemblySubtarget.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 3d9e7aacbfbf..cb2d5a63a19f 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -46,3 +46,4 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableMachineScheduler() const { return true; } +bool WebAssemblySubtarget::useAA() const { return true; } diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h index 9b17300e497d..f530a290fa0e 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -69,7 +69,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { } const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override; - bool useAA() const override { return true; } + bool useAA() const override; // Predicates used by WebAssemblyInstrInfo.td. bool hasAddr64() const { return TargetTriple.isArch64Bit(); } From 1f5f023fe6acfb5d3bf41cc19045e3e187707bf0 Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Sat, 5 Dec 2015 19:36:33 +0000 Subject: [PATCH 132/364] WebAssembly: improve readme, add placeholder for tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254857 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/README.txt | 9 +++++++++ lib/Target/WebAssembly/known_gcc_test_failures.txt | 2 ++ 2 files changed, 11 insertions(+) create mode 100644 lib/Target/WebAssembly/known_gcc_test_failures.txt diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index bfb124d504eb..78b3123cde85 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -12,6 +12,15 @@ binary encoding of WebAssembly itself: * https://github.com/WebAssembly/design/blob/master/AstSemantics.md * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +The backend is built, tested and archived on the following waterfall: + https://build.chromium.org/p/client.wasm.llvm/console + +The backend's bringup is done using the GCC torture test suite first since it +doesn't require C library support. Current known failures are in +known_gcc_test_failures.txt, all other tests should pass. The waterfall will +turn red if not. Once most of these pass, further testing will use LLVM's own +test suite. + Interesting work that remains to be done: * Write a pass to restructurize irreducible control flow. This needs to be done before register allocation to be efficient, because it may duplicate basic diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt new file mode 100644 index 000000000000..6038b198abea --- /dev/null +++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -0,0 +1,2 @@ +# Tests which are known to fail from the GCC torture test suite. +# FIXME: placeholder. The script which runs the tests needs a file here! From 64d85bf50cf9481a3979a1c7fdf0ecb58077a1f1 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 19:43:19 +0000 Subject: [PATCH 133/364] [WebAssembly] Update a stale comment. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254859 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyInstrCall.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 6b7d03da4897..84f70400f8a3 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -14,8 +14,8 @@ let Defs = [ARGUMENTS] in { -// The call sequence start/end LLVM-isms isn't useful to WebAssembly since it's -// a virtual ISA. +// Call sequence markers. These have an immediate which represents the amount of +// stack space to allocate or free, which is used for varargs lowering. let isCodeGenOnly = 1 in { def ADJCALLSTACKDOWN : I<(outs), (ins i64imm:$amt), [(WebAssemblycallseq_start timm:$amt)]>; From 26a006a71e4fc1431a1a3782979ac3482fdab736 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 5 Dec 2015 19:54:59 +0000 Subject: [PATCH 134/364] fix typo; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254860 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/sse-minmax.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index e4d0373299fb..f0341277851d 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s ; Some of these patterns can be matched as SSE min or max. Some of -; then can be matched provided that the operands are swapped. +; them can be matched provided that the operands are swapped. ; Some of them can't be matched at all and require a comparison ; and a conditional branch. From b975ecb43f74888e84d14d92ef95c9a86684549f Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 20:03:44 +0000 Subject: [PATCH 135/364] [WebAssembly] Support inline asm constraints of type i16 and similar. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254861 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 11 +++++++---- test/CodeGen/WebAssembly/inline-asm.ll | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index baefd8d0758d..6e1283b4d334 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -228,10 +228,13 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint( if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - if (VT == MVT::i32) - return std::make_pair(0U, &WebAssembly::I32RegClass); - if (VT == MVT::i64) - return std::make_pair(0U, &WebAssembly::I64RegClass); + assert(VT != MVT::iPTR && "Pointer MVT not expected here"); + if (VT.isInteger() && !VT.isVector()) { + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &WebAssembly::I32RegClass); + if (VT.getSizeInBits() <= 64) + return std::make_pair(0U, &WebAssembly::I64RegClass); + } break; default: break; diff --git a/test/CodeGen/WebAssembly/inline-asm.ll b/test/CodeGen/WebAssembly/inline-asm.ll index e9d2ebf51f63..646ea779dc8f 100644 --- a/test/CodeGen/WebAssembly/inline-asm.ll +++ b/test/CodeGen/WebAssembly/inline-asm.ll @@ -56,6 +56,22 @@ entry: ret i64 %0 } +; CHECK-LABEL: X_i16: +; CHECK: foo $1{{$}} +; CHECK: i32.store16 $discard=, 0($0), $1{{$}} +define void @X_i16(i16 * %t) { + call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16* %t) + ret void +} + +; CHECK-LABEL: X_ptr: +; CHECK: foo $1{{$}} +; CHECK: i32.store $discard=, 0($0), $1 +define void @X_ptr(i16 ** %t) { + call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16** %t) + ret void +} + attributes #0 = { nounwind } !0 = !{i32 47} From 27951cf8f8fdefbfd6b51e6dbc956f29de553d64 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 5 Dec 2015 20:27:10 +0000 Subject: [PATCH 136/364] Add vector fmaxnum tests that correspond to the existing fminnum tests Note: missing 256-bit tests for min and max should also be added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254862 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/fmaxnum.ll | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/test/CodeGen/X86/fmaxnum.ll b/test/CodeGen/X86/fmaxnum.ll index 23678c46dba0..17bd3b9b45b4 100644 --- a/test/CodeGen/X86/fmaxnum.ll +++ b/test/CodeGen/X86/fmaxnum.ll @@ -7,6 +7,12 @@ declare float @llvm.maxnum.f32(float, float) declare double @llvm.maxnum.f64(double, double) declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) +declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) +declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) + + ; CHECK-LABEL: @test_fmaxf ; CHECK: calll fmaxf define float @test_fmaxf(float %x, float %y) { @@ -48,3 +54,44 @@ define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) { %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone ret x86_fp80 %z } + +; CHECK-LABEL: @test_intrinsic_fmax_v2f32 +; CHECK: calll fmaxf +; CHECK: calll fmaxf +define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) { + %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone + ret <2 x float> %z +} + +; CHECK-LABEL: @test_intrinsic_fmax_v4f32 +; CHECK: calll fmaxf +; CHECK: calll fmaxf +; CHECK: calll fmaxf +; CHECK: calll fmaxf +define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) { + %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone + ret <4 x float> %z +} + +; CHECK-LABEL: @test_intrinsic_fmax_v2f64 +; CHECK: calll fmax +; CHECK: calll fmax +define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) { + %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone + ret <2 x double> %z +} + +; CHECK-LABEL: @test_intrinsic_fmax_v8f64 +; CHECK: calll fmax +; CHECK: calll fmax +; CHECK: calll fmax +; CHECK: calll fmax +; CHECK: calll fmax +; CHECK: calll fmax +; CHECK: calll fmax +; CHECK: calll fmax +define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) { + %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone + ret <8 x double> %z +} + From 76e67ade5a49d8ba1285a249117d8242cf3476e3 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 20:41:36 +0000 Subject: [PATCH 137/364] [WebAssembly] Implement direct calls to external symbols. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254863 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyInstrCall.td | 38 ++++++++++++++++--- .../WebAssembly/WebAssemblyInstrInfo.td | 1 - test/CodeGen/WebAssembly/frem.ll | 26 +++++++++++++ test/CodeGen/WebAssembly/global.ll | 3 +- 4 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/WebAssembly/frem.ll diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 84f70400f8a3..0587c0b6613e 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -12,20 +12,22 @@ /// //===----------------------------------------------------------------------===// +// TODO: addr64: These currently assume the callee address is 32-bit. + let Defs = [ARGUMENTS] in { // Call sequence markers. These have an immediate which represents the amount of // stack space to allocate or free, which is used for varargs lowering. let isCodeGenOnly = 1 in { -def ADJCALLSTACKDOWN : I<(outs), (ins i64imm:$amt), +def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt), [(WebAssemblycallseq_start timm:$amt)]>; -def ADJCALLSTACKUP : I<(outs), (ins i64imm:$amt), +def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt), [(WebAssemblycallseq_end timm:$amt, undef)]>; } // isCodeGenOnly = 1 multiclass CALL { - def CALL_#vt : I<(outs vt:$dst), (ins global:$callee, variable_ops), - [(set vt:$dst, (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee)))], + def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops), + [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))], "call \t$dst, $callee">; def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops), [(set vt:$dst, (WebAssemblycall1 I32:$callee))], @@ -37,8 +39,8 @@ let Uses = [SP32, SP64], isCall = 1 in { defm : CALL; defm : CALL; - def CALL_VOID : I<(outs), (ins global:$callee, variable_ops), - [(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee))], + def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops), + [(WebAssemblycall0 (i32 imm:$callee))], "call \t$callee">; def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops), [(WebAssemblycall0 I32:$callee)], @@ -46,3 +48,27 @@ let Uses = [SP32, SP64], isCall = 1 in { } // Uses = [SP32,SP64], isCall = 1 } // Defs = [ARGUMENTS] + +// Patterns for matching a direct call to a global address. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_I32 tglobaladdr:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_I64 tglobaladdr:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_F32 tglobaladdr:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_F64 tglobaladdr:$callee)>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), + (CALL_VOID tglobaladdr:$callee)>; + +// Patterns for matching a direct call to an external symbol. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_I32 texternalsym:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_I64 texternalsym:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_F32 texternalsym:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_F64 texternalsym:$callee)>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), + (CALL_VOID texternalsym:$callee)>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index caffac1bc52b..c36a45fe91d7 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -68,7 +68,6 @@ def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", def bb_op : Operand; def tjumptable_op : Operand; -def global : Operand; //===----------------------------------------------------------------------===// // WebAssembly Instruction Format Definitions. diff --git a/test/CodeGen/WebAssembly/frem.ll b/test/CodeGen/WebAssembly/frem.ll new file mode 100644 index 000000000000..43552a9bdf3e --- /dev/null +++ b/test/CodeGen/WebAssembly/frem.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test that the frem instruction works. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK-LABEL: frem32: +; CHECK-NEXT: .param f32, f32{{$}} +; CHECK-NEXT: .result f32{{$}} +; CHECK-NEXT: call $push0=, fmodf, $0, $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define float @frem32(float %x, float %y) { + %a = frem float %x, %y + ret float %a +} + +; CHECK-LABEL: frem64: +; CHECK-NEXT: .param f64, f64{{$}} +; CHECK-NEXT: .result f64{{$}} +; CHECK-NEXT: call $push0=, fmod, $0, $1{{$}} +; CHECK-NEXT: return $pop0{{$}} +define double @frem64(double %x, double %y) { + %a = frem double %x, %y + ret double %a +} diff --git a/test/CodeGen/WebAssembly/global.ll b/test/CodeGen/WebAssembly/global.ll index ffc73e3c1e35..e00d32b972ec 100644 --- a/test/CodeGen/WebAssembly/global.ll +++ b/test/CodeGen/WebAssembly/global.ll @@ -21,8 +21,7 @@ define i32 @foo() { ; CHECK-LABEL: call_memcpy: ; CHECK-NEXT: .param i32, i32, i32{{$}} ; CHECK-NEXT: .result i32{{$}} -; CHECK-NEXT: i32.const $push0=, memcpy{{$}} -; CHECK-NEXT: call_indirect $pop0, $0, $1, $2{{$}} +; CHECK-NEXT: call memcpy, $0, $1, $2{{$}} ; CHECK-NEXT: return $0{{$}} declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) { From 39f84fda2f1b44f926a313e138721d4b14d00da7 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 20:46:53 +0000 Subject: [PATCH 138/364] [WebAssembly] Replace the fake JUMP_TABLE instruction with a def : Pat. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254864 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyInstrInfo.td | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index c36a45fe91d7..dafe6c1ed64b 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -67,7 +67,6 @@ def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", //===----------------------------------------------------------------------===// def bb_op : Operand; -def tjumptable_op : Operand; //===----------------------------------------------------------------------===// // WebAssembly Instruction Format Definitions. @@ -135,13 +134,11 @@ def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)), (CONST_I32 tglobaladdr:$dst)>; def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)), (CONST_I32 texternalsym:$dst)>; +def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)), + (CONST_I32 tjumptable:$dst)>; let Defs = [ARGUMENTS] in { -def JUMP_TABLE : I<(outs I32:$dst), (ins tjumptable_op:$addr), - [(set I32:$dst, (WebAssemblywrapper tjumptable:$addr))], - "jump_table\t$dst, $addr">; - // Function signature and local variable declaration "instructions". def PARAM : I<(outs), (ins variable_ops), [], ".param \t">; def RESULT : I<(outs), (ins variable_ops), [], ".result \t">; From 9eb92586b28e7d951f0ac320045885a320f2a0ab Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 5 Dec 2015 22:12:39 +0000 Subject: [PATCH 139/364] [WebAssembly] Don't perform the returned-argument optimization on constants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254866 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyOptimizeReturned.cpp | 3 +++ test/CodeGen/WebAssembly/returned.ll | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp index dea419c5975c..4dc401a2c7cc 100644 --- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -57,6 +57,9 @@ void OptimizeReturned::visitCallSite(CallSite CS) { if (CS.paramHasAttr(1 + i, Attribute::Returned)) { Instruction *Inst = CS.getInstruction(); Value *Arg = CS.getArgOperand(i); + // Ignore constants, globals, undef, etc. + if (isa(Arg)) + continue; // Like replaceDominatedUsesWith but using Instruction/Use dominance. for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) { Use &U = *UI++; diff --git a/test/CodeGen/WebAssembly/returned.ll b/test/CodeGen/WebAssembly/returned.ll index d65e2a8bc3e5..9cfdc711a8a3 100644 --- a/test/CodeGen/WebAssembly/returned.ll +++ b/test/CodeGen/WebAssembly/returned.ll @@ -33,3 +33,17 @@ entry: %call = tail call i8* @memcpy(i8* %p, i8* %s, i32 %n) ret i8* %p } + +; Test that the optimization isn't performed on constant arguments. + +; CHECK-LABEL: test_constant_arg: +; CHECK-NEXT: i32.const $push0=, global{{$}} +; CHECK-NEXT: call $discard=, returns_arg, $pop0{{$}} +; CHECK-NEXT: return{{$}} +@global = external global i32 +@addr = global i32* @global +define void @test_constant_arg() { + %call = call i32* @returns_arg(i32* @global) + ret void +} +declare i32* @returns_arg(i32* returned) From 3aa1034794ef6a4c9b9196c00debbd318b36fdd3 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Sat, 5 Dec 2015 23:36:52 +0000 Subject: [PATCH 140/364] [llvm-readobj] report_error() does not return, so we can simplify. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254868 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-readobj/llvm-readobj.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp index 5406afff241e..2a75ababb2e8 100644 --- a/tools/llvm-readobj/llvm-readobj.cpp +++ b/tools/llvm-readobj/llvm-readobj.cpp @@ -296,10 +296,8 @@ static std::error_code createDumper(const ObjectFile *Obj, StreamWriter &Writer, static void dumpObject(const ObjectFile *Obj) { StreamWriter Writer(outs()); std::unique_ptr Dumper; - if (std::error_code EC = createDumper(Obj, Writer, Dumper)) { + if (std::error_code EC = createDumper(Obj, Writer, Dumper)) reportError(Obj->getFileName(), EC); - return; - } outs() << '\n'; outs() << "File: " << Obj->getFileName() << "\n"; @@ -414,10 +412,8 @@ static void dumpInput(StringRef File) { // Attempt to open the binary. ErrorOr> BinaryOrErr = createBinary(File); - if (std::error_code EC = BinaryOrErr.getError()) { + if (std::error_code EC = BinaryOrErr.getError()) reportError(File, EC); - return; - } Binary &Binary = *BinaryOrErr.get().getBinary(); if (Archive *Arc = dyn_cast(&Binary)) From a8231e7f59c2f43d11c387030e5e0c29670bd5a9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Sat, 5 Dec 2015 23:44:22 +0000 Subject: [PATCH 141/364] [InstCombine] Call getCmpPredicateForMinMax only with a valid SPF Summary: There are `SelectPatternFlavor`s that don't represent min or max idioms, and we should not be passing those to `getCmpPredicateForMinMax`. Fixes PR25745. Reviewers: majnemer Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15249 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254869 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/ValueTracking.h | 5 +++++ .../InstCombine/InstCombineSelect.cpp | 6 +++++- test/Transforms/InstCombine/pr25745.ll | 20 +++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/InstCombine/pr25745.ll diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index b34d6bac1f34..eb2c000e07cd 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -412,6 +412,11 @@ namespace llvm { bool Ordered; /// When implementing this min/max pattern as /// fcmp; select, does the fcmp have to be /// ordered? + + /// \brief Return true if \p SPF is a min or a max pattern. + static bool isMinOrMax(SelectPatternFlavor SPF) { + return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS); + } }; /// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind /// and providing the out parameter results if we successfully match. diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 2baa131bc99c..776704d1efa9 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1070,7 +1070,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp); auto SPF = SPR.Flavor; - if (SPF) { + if (SelectPatternResult::isMinOrMax(SPF)) { // Canonicalize so that type casts are outside select patterns. if (LHS->getType()->getPrimitiveSizeInBits() != SI.getType()->getPrimitiveSizeInBits()) { @@ -1091,11 +1091,15 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { SI.getType()); return ReplaceInstUsesWith(SI, NewSI); } + } + if (SPF) { // MAX(MAX(a, b), a) -> MAX(a, b) // MIN(MIN(a, b), a) -> MIN(a, b) // MAX(MIN(a, b), a) -> a // MIN(MAX(a, b), a) -> a + // ABS(ABS(a)) -> ABS(a) + // NABS(NABS(a)) -> NABS(a) if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor) if (Instruction *R = FoldSPFofSPF(cast(LHS),SPF2,LHS2,RHS2, SI, SPF, RHS)) diff --git a/test/Transforms/InstCombine/pr25745.ll b/test/Transforms/InstCombine/pr25745.ll new file mode 100644 index 000000000000..3bf9efc92b90 --- /dev/null +++ b/test/Transforms/InstCombine/pr25745.ll @@ -0,0 +1,20 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +; Checking for a crash + +declare void @use.i1(i1 %val) +declare void @use.i64(i64 %val) + +define i64 @f(i32 %x) { +; CHECK-LABEL: @f( + entry: + %x.wide = sext i32 %x to i64 + %minus.x = sub i32 0, %x + %minus.x.wide = sext i32 %minus.x to i64 + %c = icmp slt i32 %x, 0 + %val = select i1 %c, i64 %x.wide, i64 %minus.x.wide + call void @use.i1(i1 %c) + call void @use.i64(i64 %x.wide) + ret i64 %val +; CHECK: ret i64 %val +} From eb32659b9c004c7ad9270932a3cc7a40286b77d6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Dec 2015 05:07:58 +0000 Subject: [PATCH 142/364] Minor formatting fix. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254871 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAGNodes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 66ed5703fe11..e8f63b2d928e 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -664,7 +664,7 @@ class SDNode : public FoldingSetNode, public ilist_node { /// to which the glue operand points. Otherwise return NULL. SDNode *getGluedNode() const { if (getNumOperands() != 0 && - getOperand(getNumOperands()-1).getValueType() == MVT::Glue) + getOperand(getNumOperands()-1).getValueType() == MVT::Glue) return getOperand(getNumOperands()-1).getNode(); return nullptr; } From e6bc7d1f0d765d1e67a1c5d6c7a4f36677810c8e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Dec 2015 05:08:07 +0000 Subject: [PATCH 143/364] Use make_range to reduce mentions of iterator type. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254872 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/LazyCallGraph.h | 5 +-- include/llvm/Analysis/LoopInfo.h | 2 +- include/llvm/CodeGen/MachineBasicBlock.h | 12 +++--- include/llvm/CodeGen/MachineInstr.h | 38 ++++++++--------- include/llvm/CodeGen/MachineRegisterInfo.h | 42 +++++++------------ include/llvm/CodeGen/SelectionDAG.h | 5 +-- include/llvm/CodeGen/SelectionDAGNodes.h | 8 ++-- include/llvm/IR/CallSite.h | 4 +- include/llvm/IR/DebugInfo.h | 10 ++--- include/llvm/IR/Function.h | 4 +- include/llvm/IR/InstrTypes.h | 6 +-- include/llvm/IR/Instructions.h | 28 ++++++------- include/llvm/IR/Metadata.h | 4 +- include/llvm/IR/Module.h | 18 ++++---- include/llvm/IR/Statepoint.h | 9 ++-- include/llvm/IR/User.h | 2 +- include/llvm/IR/Value.h | 8 ++-- include/llvm/Object/Archive.h | 5 +-- include/llvm/Object/ObjectFile.h | 3 +- include/llvm/Support/Registry.h | 2 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 2 +- lib/Object/MachOObjectFile.cpp | 12 ++---- .../MCTargetDesc/HexagonMCInstrInfo.cpp | 3 +- utils/TableGen/CodeGenSchedule.h | 10 ++--- utils/TableGen/CodeGenTarget.h | 2 +- 25 files changed, 105 insertions(+), 139 deletions(-) diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index 7cbc40f768eb..270a32621be7 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -235,7 +235,7 @@ class LazyCallGraph { parent_iterator parent_end() const { return ParentSCCs.end(); } iterator_range parents() const { - return iterator_range(parent_begin(), parent_end()); + return make_range(parent_begin(), parent_end()); } /// \brief Test if this SCC is a parent of \a C. @@ -410,8 +410,7 @@ class LazyCallGraph { } iterator_range postorder_sccs() { - return iterator_range(postorder_scc_begin(), - postorder_scc_end()); + return make_range(postorder_scc_begin(), postorder_scc_end()); } /// \brief Lookup a function in the graph which has already been scanned and diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index ac0a4b02f445..9196250233cd 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -141,7 +141,7 @@ class LoopBase { block_iterator block_begin() const { return Blocks.begin(); } block_iterator block_end() const { return Blocks.end(); } inline iterator_range blocks() const { - return iterator_range(block_begin(), block_end()); + return make_range(block_begin(), block_end()); } /// getNumBlocks - Get the number of blocks in this loop in constant time. diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index ac87f4f901f5..57bd24ddddfe 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -272,10 +272,10 @@ class MachineBasicBlock } inline iterator_range terminators() { - return iterator_range(getFirstTerminator(), end()); + return make_range(getFirstTerminator(), end()); } inline iterator_range terminators() const { - return iterator_range(getFirstTerminator(), end()); + return make_range(getFirstTerminator(), end()); } // Machine-CFG iterators @@ -325,16 +325,16 @@ class MachineBasicBlock bool succ_empty() const { return Successors.empty(); } inline iterator_range predecessors() { - return iterator_range(pred_begin(), pred_end()); + return make_range(pred_begin(), pred_end()); } inline iterator_range predecessors() const { - return iterator_range(pred_begin(), pred_end()); + return make_range(pred_begin(), pred_end()); } inline iterator_range successors() { - return iterator_range(succ_begin(), succ_end()); + return make_range(succ_begin(), succ_end()); } inline iterator_range successors() const { - return iterator_range(succ_begin(), succ_end()); + return make_range(succ_begin(), succ_end()); } // LiveIn management methods. diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 607e2781960f..07b1133b2a4a 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -296,48 +296,46 @@ class MachineInstr const_mop_iterator operands_end() const { return Operands + NumOperands; } iterator_range operands() { - return iterator_range(operands_begin(), operands_end()); + return make_range(operands_begin(), operands_end()); } iterator_range operands() const { - return iterator_range(operands_begin(), operands_end()); + return make_range(operands_begin(), operands_end()); } iterator_range explicit_operands() { - return iterator_range( - operands_begin(), operands_begin() + getNumExplicitOperands()); + return make_range(operands_begin(), + operands_begin() + getNumExplicitOperands()); } iterator_range explicit_operands() const { - return iterator_range( - operands_begin(), operands_begin() + getNumExplicitOperands()); + return make_range(operands_begin(), + operands_begin() + getNumExplicitOperands()); } iterator_range implicit_operands() { - return iterator_range(explicit_operands().end(), - operands_end()); + return make_range(explicit_operands().end(), operands_end()); } iterator_range implicit_operands() const { - return iterator_range(explicit_operands().end(), - operands_end()); + return make_range(explicit_operands().end(), operands_end()); } /// Returns a range over all explicit operands that are register definitions. /// Implicit definition are not included! iterator_range defs() { - return iterator_range( - operands_begin(), operands_begin() + getDesc().getNumDefs()); + return make_range(operands_begin(), + operands_begin() + getDesc().getNumDefs()); } /// \copydoc defs() iterator_range defs() const { - return iterator_range( - operands_begin(), operands_begin() + getDesc().getNumDefs()); + return make_range(operands_begin(), + operands_begin() + getDesc().getNumDefs()); } /// Returns a range that includes all operands that are register uses. /// This may include unrelated operands which are not register uses. iterator_range uses() { - return iterator_range( - operands_begin() + getDesc().getNumDefs(), operands_end()); + return make_range(operands_begin() + getDesc().getNumDefs(), + operands_end()); } /// \copydoc uses() iterator_range uses() const { - return iterator_range( - operands_begin() + getDesc().getNumDefs(), operands_end()); + return make_range(operands_begin() + getDesc().getNumDefs(), + operands_end()); } /// Returns the number of the operand iterator \p I points to. @@ -351,10 +349,10 @@ class MachineInstr bool memoperands_empty() const { return NumMemRefs == 0; } iterator_range memoperands() { - return iterator_range(memoperands_begin(), memoperands_end()); + return make_range(memoperands_begin(), memoperands_end()); } iterator_range memoperands() const { - return iterator_range(memoperands_begin(), memoperands_end()); + return make_range(memoperands_begin(), memoperands_end()); } /// Return true if this instruction has exactly one MachineMemOperand. diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 0a1f62006327..04191bc1b74f 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -234,7 +234,7 @@ class MachineRegisterInfo { static reg_iterator reg_end() { return reg_iterator(nullptr); } inline iterator_range reg_operands(unsigned Reg) const { - return iterator_range(reg_begin(Reg), reg_end()); + return make_range(reg_begin(Reg), reg_end()); } /// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses @@ -250,8 +250,7 @@ class MachineRegisterInfo { inline iterator_range reg_instructions(unsigned Reg) const { - return iterator_range(reg_instr_begin(Reg), - reg_instr_end()); + return make_range(reg_instr_begin(Reg), reg_instr_end()); } /// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses @@ -266,8 +265,7 @@ class MachineRegisterInfo { } inline iterator_range reg_bundles(unsigned Reg) const { - return iterator_range(reg_bundle_begin(Reg), - reg_bundle_end()); + return make_range(reg_bundle_begin(Reg), reg_bundle_end()); } /// reg_empty - Return true if there are no instructions using or defining the @@ -287,8 +285,7 @@ class MachineRegisterInfo { inline iterator_range reg_nodbg_operands(unsigned Reg) const { - return iterator_range(reg_nodbg_begin(Reg), - reg_nodbg_end()); + return make_range(reg_nodbg_begin(Reg), reg_nodbg_end()); } /// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk @@ -305,8 +302,7 @@ class MachineRegisterInfo { inline iterator_range reg_nodbg_instructions(unsigned Reg) const { - return iterator_range(reg_instr_nodbg_begin(Reg), - reg_instr_nodbg_end()); + return make_range(reg_instr_nodbg_begin(Reg), reg_instr_nodbg_end()); } /// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk @@ -323,8 +319,7 @@ class MachineRegisterInfo { inline iterator_range reg_nodbg_bundles(unsigned Reg) const { - return iterator_range(reg_bundle_nodbg_begin(Reg), - reg_bundle_nodbg_end()); + return make_range(reg_bundle_nodbg_begin(Reg), reg_bundle_nodbg_end()); } /// reg_nodbg_empty - Return true if the only instructions using or defining @@ -342,7 +337,7 @@ class MachineRegisterInfo { static def_iterator def_end() { return def_iterator(nullptr); } inline iterator_range def_operands(unsigned Reg) const { - return iterator_range(def_begin(Reg), def_end()); + return make_range(def_begin(Reg), def_end()); } /// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the @@ -358,8 +353,7 @@ class MachineRegisterInfo { inline iterator_range def_instructions(unsigned Reg) const { - return iterator_range(def_instr_begin(Reg), - def_instr_end()); + return make_range(def_instr_begin(Reg), def_instr_end()); } /// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the @@ -374,8 +368,7 @@ class MachineRegisterInfo { } inline iterator_range def_bundles(unsigned Reg) const { - return iterator_range(def_bundle_begin(Reg), - def_bundle_end()); + return make_range(def_bundle_begin(Reg), def_bundle_end()); } /// def_empty - Return true if there are no instructions defining the @@ -400,7 +393,7 @@ class MachineRegisterInfo { static use_iterator use_end() { return use_iterator(nullptr); } inline iterator_range use_operands(unsigned Reg) const { - return iterator_range(use_begin(Reg), use_end()); + return make_range(use_begin(Reg), use_end()); } /// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the @@ -416,8 +409,7 @@ class MachineRegisterInfo { inline iterator_range use_instructions(unsigned Reg) const { - return iterator_range(use_instr_begin(Reg), - use_instr_end()); + return make_range(use_instr_begin(Reg), use_instr_end()); } /// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the @@ -432,8 +424,7 @@ class MachineRegisterInfo { } inline iterator_range use_bundles(unsigned Reg) const { - return iterator_range(use_bundle_begin(Reg), - use_bundle_end()); + return make_range(use_bundle_begin(Reg), use_bundle_end()); } /// use_empty - Return true if there are no instructions using the specified @@ -462,8 +453,7 @@ class MachineRegisterInfo { inline iterator_range use_nodbg_operands(unsigned Reg) const { - return iterator_range(use_nodbg_begin(Reg), - use_nodbg_end()); + return make_range(use_nodbg_begin(Reg), use_nodbg_end()); } /// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk @@ -480,8 +470,7 @@ class MachineRegisterInfo { inline iterator_range use_nodbg_instructions(unsigned Reg) const { - return iterator_range(use_instr_nodbg_begin(Reg), - use_instr_nodbg_end()); + return make_range(use_instr_nodbg_begin(Reg), use_instr_nodbg_end()); } /// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk @@ -498,8 +487,7 @@ class MachineRegisterInfo { inline iterator_range use_nodbg_bundles(unsigned Reg) const { - return iterator_range(use_bundle_nodbg_begin(Reg), - use_bundle_nodbg_end()); + return make_range(use_bundle_nodbg_begin(Reg), use_bundle_nodbg_end()); } /// use_nodbg_empty - Return true if there are no non-Debug instructions diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 83464a6c9785..a21e9ae881a7 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -326,11 +326,10 @@ class SelectionDAG { } iterator_range allnodes() { - return iterator_range(allnodes_begin(), allnodes_end()); + return make_range(allnodes_begin(), allnodes_end()); } iterator_range allnodes() const { - return iterator_range(allnodes_begin(), - allnodes_end()); + return make_range(allnodes_begin(), allnodes_end()); } /// Return the root tag of the SelectionDAG. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index e8f63b2d928e..548549ab1353 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -576,10 +576,10 @@ class SDNode : public FoldingSetNode, public ilist_node { static use_iterator use_end() { return use_iterator(nullptr); } inline iterator_range uses() { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } inline iterator_range uses() const { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } /// Return true if there are exactly NUSES uses of the indicated value. @@ -651,8 +651,8 @@ class SDNode : public FoldingSetNode, public ilist_node { }; iterator_range op_values() const { - return iterator_range(value_op_iterator(op_begin()), - value_op_iterator(op_end())); + return make_range(value_op_iterator(op_begin()), + value_op_iterator(op_end())); } SDVTList getVTList() const { diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index c87f1293330b..8556dda163b8 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -158,7 +158,7 @@ class CallSiteBase { IterTy arg_end() const { return (*this)->op_end() - getArgumentEndOffset(); } iterator_range args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } bool arg_empty() const { return arg_end() == arg_begin(); } unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); } @@ -182,7 +182,7 @@ class CallSiteBase { return (*this)->op_end() - (isCall() ? 1 : 3); } iterator_range data_ops() const { - return iterator_range(data_operands_begin(), data_operands_end()); + return make_range(data_operands_begin(), data_operands_end()); } bool data_operands_empty() const { return data_operands_end() == data_operands_begin(); diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h index 59cabd326d28..4caceacbb58e 100644 --- a/include/llvm/IR/DebugInfo.h +++ b/include/llvm/IR/DebugInfo.h @@ -105,23 +105,23 @@ class DebugInfoFinder { typedef SmallVectorImpl::const_iterator scope_iterator; iterator_range compile_units() const { - return iterator_range(CUs.begin(), CUs.end()); + return make_range(CUs.begin(), CUs.end()); } iterator_range subprograms() const { - return iterator_range(SPs.begin(), SPs.end()); + return make_range(SPs.begin(), SPs.end()); } iterator_range global_variables() const { - return iterator_range(GVs.begin(), GVs.end()); + return make_range(GVs.begin(), GVs.end()); } iterator_range types() const { - return iterator_range(TYs.begin(), TYs.end()); + return make_range(TYs.begin(), TYs.end()); } iterator_range scopes() const { - return iterator_range(Scopes.begin(), Scopes.end()); + return make_range(Scopes.begin(), Scopes.end()); } unsigned compile_unit_count() const { return CUs.size(); } diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 71822a462daa..a55ff5cb6e2b 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -493,11 +493,11 @@ class Function : public GlobalObject, public ilist_node { } iterator_range args() { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } iterator_range args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } /// @} diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index 58bc7c1ee10a..157cb27cefbb 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -1479,14 +1479,12 @@ template class OperandBundleUser { /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end). iterator_range bundle_op_infos() { - return iterator_range(bundle_op_info_begin(), - bundle_op_info_end()); + return make_range(bundle_op_info_begin(), bundle_op_info_end()); } /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end). iterator_range bundle_op_infos() const { - return iterator_range(bundle_op_info_begin(), - bundle_op_info_end()); + return make_range(bundle_op_info_begin(), bundle_op_info_end()); } /// \brief Populate the BundleOpInfo instances and the Use& vector from \p diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 5119749ba73c..ae06a5f641a1 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -1547,14 +1547,12 @@ class CallInst : public Instruction, iterator_range arg_operands() { // The last operand in the op list is the callee - it's not one of the args // so we don't want to iterate over it. - return iterator_range( - op_begin(), op_end() - getNumTotalBundleOperands() - 1); + return make_range(op_begin(), op_end() - getNumTotalBundleOperands() - 1); } /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range( - op_begin(), op_end() - getNumTotalBundleOperands() - 1); + return make_range(op_begin(), op_end() - getNumTotalBundleOperands() - 1); } /// \brief Wrappers for getting the \c Use of a call argument. @@ -2213,7 +2211,7 @@ class ExtractValueInst : public UnaryInstruction { inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { - return iterator_range(idx_begin(), idx_end()); + return make_range(idx_begin(), idx_end()); } Value *getAggregateOperand() { @@ -2330,7 +2328,7 @@ class InsertValueInst : public Instruction { inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { - return iterator_range(idx_begin(), idx_end()); + return make_range(idx_begin(), idx_end()); } Value *getAggregateOperand() { @@ -3105,12 +3103,12 @@ class SwitchInst : public TerminatorInst { /// cases - iteration adapter for range-for loops. iterator_range cases() { - return iterator_range(case_begin(), case_end()); + return make_range(case_begin(), case_end()); } /// cases - iteration adapter for range-for loops. iterator_range cases() const { - return iterator_range(case_begin(), case_end()); + return make_range(case_begin(), case_end()); } /// Returns an iterator that points to the default case. @@ -3454,14 +3452,12 @@ class InvokeInst : public TerminatorInst, /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() { - return iterator_range( - op_begin(), op_end() - getNumTotalBundleOperands() - 3); + return make_range(op_begin(), op_end() - getNumTotalBundleOperands() - 3); } /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range( - op_begin(), op_end() - getNumTotalBundleOperands() - 3); + return make_range(op_begin(), op_end() - getNumTotalBundleOperands() - 3); } /// \brief Wrappers for getting the \c Use of a invoke argument. @@ -3915,12 +3911,12 @@ class CatchPadInst : public TerminatorInst { /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() { - return iterator_range(op_begin(), op_end() - 2); + return make_range(op_begin(), op_end() - 2); } /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range(op_begin(), op_end() - 2); + return make_range(op_begin(), op_end() - 2); } /// \brief Wrappers for getting the \c Use of a catchpad argument. @@ -4039,12 +4035,12 @@ class TerminatePadInst : public TerminatorInst { /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() { - return iterator_range(op_begin(), arg_end()); + return make_range(op_begin(), arg_end()); } /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range(op_begin(), arg_end()); + return make_range(op_begin(), arg_end()); } /// \brief Wrappers for getting the \c Use of a terminatepad argument. diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index 8805cec1471b..276fa7d11885 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -1210,10 +1210,10 @@ class NamedMDNode : public ilist_node { const_op_iterator op_end() const { return const_op_iterator(this, getNumOperands()); } inline iterator_range operands() { - return iterator_range(op_begin(), op_end()); + return make_range(op_begin(), op_end()); } inline iterator_range operands() const { - return iterator_range(op_begin(), op_end()); + return make_range(op_begin(), op_end()); } }; diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index 6cf75e747e06..2378b6d83d87 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -515,10 +515,10 @@ class Module { bool global_empty() const { return GlobalList.empty(); } iterator_range globals() { - return iterator_range(global_begin(), global_end()); + return make_range(global_begin(), global_end()); } iterator_range globals() const { - return iterator_range(global_begin(), global_end()); + return make_range(global_begin(), global_end()); } /// @} @@ -537,10 +537,10 @@ class Module { bool empty() const { return FunctionList.empty(); } iterator_range functions() { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } iterator_range functions() const { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } /// @} @@ -555,10 +555,10 @@ class Module { bool alias_empty() const { return AliasList.empty(); } iterator_range aliases() { - return iterator_range(alias_begin(), alias_end()); + return make_range(alias_begin(), alias_end()); } iterator_range aliases() const { - return iterator_range(alias_begin(), alias_end()); + return make_range(alias_begin(), alias_end()); } /// @} @@ -579,12 +579,10 @@ class Module { bool named_metadata_empty() const { return NamedMDList.empty(); } iterator_range named_metadata() { - return iterator_range(named_metadata_begin(), - named_metadata_end()); + return make_range(named_metadata_begin(), named_metadata_end()); } iterator_range named_metadata() const { - return iterator_range(named_metadata_begin(), - named_metadata_end()); + return make_range(named_metadata_begin(), named_metadata_end()); } /// Destroy ConstantArrays in LLVMContext if they are not used. diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h index efe58e3e5a15..21b98a97a83c 100644 --- a/include/llvm/IR/Statepoint.h +++ b/include/llvm/IR/Statepoint.h @@ -173,7 +173,7 @@ class StatepointBase { /// range adapter for call arguments iterator_range call_args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } /// \brief Return true if the call or the callee has the given attribute. @@ -201,8 +201,7 @@ class StatepointBase { /// range adapter for GC transition arguments iterator_range gc_transition_args() const { - return iterator_range(gc_transition_args_begin(), - gc_transition_args_end()); + return make_range(gc_transition_args_begin(), gc_transition_args_end()); } /// Number of additional arguments excluding those intended @@ -225,7 +224,7 @@ class StatepointBase { /// range adapter for vm state arguments iterator_range vm_state_args() const { - return iterator_range(vm_state_begin(), vm_state_end()); + return make_range(vm_state_begin(), vm_state_end()); } typename CallSiteTy::arg_iterator gc_args_begin() const { @@ -241,7 +240,7 @@ class StatepointBase { /// range adapter for gc arguments iterator_range gc_args() const { - return iterator_range(gc_args_begin(), gc_args_end()); + return make_range(gc_args_begin(), gc_args_end()); } /// Get list of all gc reloactes linked to this statepoint diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h index 78a3b43c86d2..639dc5c01c8c 100644 --- a/include/llvm/IR/User.h +++ b/include/llvm/IR/User.h @@ -233,7 +233,7 @@ class User : public Value { return value_op_iterator(op_end()); } iterator_range operand_values() { - return iterator_range(value_op_begin(), value_op_end()); + return make_range(value_op_begin(), value_op_end()); } /// \brief Drop all references to operands. diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index 9a87a7178866..7f11ba3d1f6d 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -283,10 +283,10 @@ class Value { use_iterator use_end() { return use_iterator(); } const_use_iterator use_end() const { return const_use_iterator(); } iterator_range uses() { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } iterator_range uses() const { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } bool user_empty() const { return UseList == nullptr; } @@ -300,10 +300,10 @@ class Value { User *user_back() { return *user_begin(); } const User *user_back() const { return *user_begin(); } iterator_range users() { - return iterator_range(user_begin(), user_end()); + return make_range(user_begin(), user_end()); } iterator_range users() const { - return iterator_range(user_begin(), user_end()); + return make_range(user_begin(), user_end()); } /// \brief Return true if there is exactly one user of this value. diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index a68f200ce3fb..8dd042a2533f 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -191,14 +191,13 @@ class Archive : public Binary { child_iterator child_begin(bool SkipInternal = true) const; child_iterator child_end() const; iterator_range children(bool SkipInternal = true) const { - return iterator_range(child_begin(SkipInternal), - child_end()); + return make_range(child_begin(SkipInternal), child_end()); } symbol_iterator symbol_begin() const; symbol_iterator symbol_end() const; iterator_range symbols() const { - return iterator_range(symbol_begin(), symbol_end()); + return make_range(symbol_begin(), symbol_end()); } // Cast methods. diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 08131908e067..ce0c891ee0c2 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -100,8 +100,7 @@ class SectionRef { relocation_iterator relocation_begin() const; relocation_iterator relocation_end() const; iterator_range relocations() const { - return iterator_range(relocation_begin(), - relocation_end()); + return make_range(relocation_begin(), relocation_end()); } section_iterator getRelocatedSection() const; diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h index 2cb8f3c719fa..bbea97b289a6 100644 --- a/include/llvm/Support/Registry.h +++ b/include/llvm/Support/Registry.h @@ -119,7 +119,7 @@ namespace llvm { static iterator end() { return iterator(nullptr); } static iterator_range entries() { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } /// Abstract base class for registry listeners, which are informed when new diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index f27aa39f2d5d..3c9d2fd50336 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -86,7 +86,7 @@ class MCJIT : public ExecutionEngine { ModulePtrSet::iterator begin_added() { return AddedModules.begin(); } ModulePtrSet::iterator end_added() { return AddedModules.end(); } iterator_range added() { - return iterator_range(begin_added(), end_added()); + return make_range(begin_added(), end_added()); } ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); } diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index e34c86542ab3..d1f79b225ee4 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1403,8 +1403,7 @@ MachOObjectFile::exports(ArrayRef Trie) { ExportEntry Finish(Trie); Finish.moveToEnd(); - return iterator_range(export_iterator(Start), - export_iterator(Finish)); + return make_range(export_iterator(Start), export_iterator(Finish)); } iterator_range MachOObjectFile::exports() const { @@ -1574,8 +1573,7 @@ MachOObjectFile::rebaseTable(ArrayRef Opcodes, bool is64) { MachORebaseEntry Finish(Opcodes, is64); Finish.moveToEnd(); - return iterator_range(rebase_iterator(Start), - rebase_iterator(Finish)); + return make_range(rebase_iterator(Start), rebase_iterator(Finish)); } iterator_range MachOObjectFile::rebaseTable() const { @@ -1826,8 +1824,7 @@ MachOObjectFile::bindTable(ArrayRef Opcodes, bool is64, MachOBindEntry Finish(Opcodes, is64, BKind); Finish.moveToEnd(); - return iterator_range(bind_iterator(Start), - bind_iterator(Finish)); + return make_range(bind_iterator(Start), bind_iterator(Finish)); } iterator_range MachOObjectFile::bindTable() const { @@ -1857,8 +1854,7 @@ MachOObjectFile::end_load_commands() const { iterator_range MachOObjectFile::load_commands() const { - return iterator_range(begin_load_commands(), - end_load_commands()); + return make_range(begin_load_commands(), end_load_commands()); } StringRef diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 7adc0ba51da3..e6842076db2a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -45,8 +45,7 @@ void HexagonMCInstrInfo::addConstExtender(MCContext &Context, iterator_range HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { assert(isBundle(MCI)); - return iterator_range( - MCI.begin() + bundleInstructionsOffset, MCI.end()); + return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end()); } size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h index 6c34f51900cb..f5c50c992a92 100644 --- a/utils/TableGen/CodeGenSchedule.h +++ b/utils/TableGen/CodeGenSchedule.h @@ -257,18 +257,16 @@ class CodeGenSchedModels { class_iterator classes_end() { return SchedClasses.end(); } const_class_iterator classes_end() const { return SchedClasses.end(); } iterator_range classes() { - return iterator_range(classes_begin(), classes_end()); + return make_range(classes_begin(), classes_end()); } iterator_range classes() const { - return iterator_range(classes_begin(), classes_end()); + return make_range(classes_begin(), classes_end()); } iterator_range explicit_classes() { - return iterator_range( - classes_begin(), classes_begin() + NumInstrSchedClasses); + return make_range(classes_begin(), classes_begin() + NumInstrSchedClasses); } iterator_range explicit_classes() const { - return iterator_range( - classes_begin(), classes_begin() + NumInstrSchedClasses); + return make_range(classes_begin(), classes_begin() + NumInstrSchedClasses); } Record *getModelOrItinDef(Record *ProcDef) const { diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h index 24b38514260c..cf4a0bbe5bd9 100644 --- a/utils/TableGen/CodeGenTarget.h +++ b/utils/TableGen/CodeGenTarget.h @@ -173,7 +173,7 @@ class CodeGenTarget { inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();} inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); } iterator_range instructions() const { - return iterator_range(inst_begin(), inst_end()); + return make_range(inst_begin(), inst_end()); } From e407ee0520400e5fa7316aed15caf788990de2af Mon Sep 17 00:00:00 2001 From: Igor Breger Date: Sun, 6 Dec 2015 11:35:18 +0000 Subject: [PATCH 144/364] AVX512: support AVX512BW Intrinsic in 32bit mode. Differential Revision: http://reviews.llvm.org/D15076 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254873 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 128 +-- test/CodeGen/X86/avx512bw-intrinsics.ll | 1064 ++++++++++++++++++++++- 2 files changed, 1139 insertions(+), 53 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c07bca8fe52a..364a8c260ba1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1710,8 +1710,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - if (!Subtarget->is64Bit()) + if (!Subtarget->is64Bit()) { setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + } // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -15983,58 +15985,83 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } +/// \brief Return Mask with the necessary casting or extending +/// for \p Mask according to \p MaskVT when lowering masking intrinsics +static SDValue getMaskNode(SDValue Mask, MVT MaskVT, + const X86Subtarget *Subtarget, + SelectionDAG &DAG, SDLoc dl) { + + if (MaskVT.bitsGT(Mask.getSimpleValueType())) { + // Mask should be extended + Mask = DAG.getNode(ISD::ANY_EXTEND, dl, + MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask); + } + + if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) { + assert(MaskVT == MVT::v64i1 && "Unexpected mask VT!"); + assert(Subtarget->hasBWI() && "Expected AVX512BW target!"); + // In case 32bit mode, bitcast i64 is illegal, extend/split it. + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, + DAG.getConstant(0, dl, MVT::i32)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, + DAG.getConstant(1, dl, MVT::i32)); + + Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Hi, Lo); + + } else { + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); + // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements + // are extracted by EXTRACT_SUBVECTOR. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); + } +} + /// \brief Return (and \p Op, \p Mask) for compare instructions or /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the /// necessary casting or extending for \p Mask when lowering masking intrinsics static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, - SDValue PreservedSrc, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask; - unsigned OpcodeSelect = ISD::VSELECT; - SDLoc dl(Op); + SDValue PreservedSrc, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + unsigned OpcodeSelect = ISD::VSELECT; + SDLoc dl(Op); - if (isAllOnesConstant(Mask)) - return Op; + if (isAllOnesConstant(Mask)) + return Op; - if (MaskVT.bitsGT(Mask.getSimpleValueType())) { - MVT newMaskVT = MVT::getIntegerVT(MaskVT.getSizeInBits()); - VMask = DAG.getBitcast(MaskVT, - DAG.getNode(ISD::ANY_EXTEND, dl, newMaskVT, Mask)); - } else { - MVT BitcastVT = MVT::getVectorVT(MVT::i1, - Mask.getSimpleValueType().getSizeInBits()); - // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements - // are extracted by EXTRACT_SUBVECTOR. - VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); - } + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - switch (Op.getOpcode()) { - default: break; - case X86ISD::PCMPEQM: - case X86ISD::PCMPGTM: - case X86ISD::CMPM: - case X86ISD::CMPMU: - return DAG.getNode(ISD::AND, dl, VT, Op, VMask); - case X86ISD::VFPCLASS: + switch (Op.getOpcode()) { + default: break; + case X86ISD::PCMPEQM: + case X86ISD::PCMPGTM: + case X86ISD::CMPM: + case X86ISD::CMPMU: + return DAG.getNode(ISD::AND, dl, VT, Op, VMask); + case X86ISD::VFPCLASS: case X86ISD::VFPCLASSS: - return DAG.getNode(ISD::OR, dl, VT, Op, VMask); - case X86ISD::VTRUNC: - case X86ISD::VTRUNCS: - case X86ISD::VTRUNCUS: - // We can't use ISD::VSELECT here because it is not always "Legal" - // for the destination type. For example vpmovqb require only AVX512 - // and vselect that can operate on byte element type require BWI - OpcodeSelect = X86ISD::SELECT; - break; - } - if (PreservedSrc.getOpcode() == ISD::UNDEF) - PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); - return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); + return DAG.getNode(ISD::OR, dl, VT, Op, VMask); + case X86ISD::VTRUNC: + case X86ISD::VTRUNCS: + case X86ISD::VTRUNCUS: + // We can't use ISD::VSELECT here because it is not always "Legal" + // for the destination type. For example vpmovqb require only AVX512 + // and vselect that can operate on byte element type require BWI + OpcodeSelect = X86ISD::SELECT; + break; + } + if (PreservedSrc.getOpcode() == ISD::UNDEF) + PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); } /// \brief Creates an SDNode for a predicated scalar operation. @@ -16569,12 +16596,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Mask = Op.getOperand(3); MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - MVT BitcastVT = MVT::getVectorVT(MVT::i1, - Mask.getSimpleValueType().getSizeInBits()); - SDLoc dl(Op); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1), Op.getOperand(2)); } @@ -19978,6 +20000,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results); } } + case ISD::INTRINSIC_WO_CHAIN: { + Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG)); + return; + } case ISD::READCYCLECOUNTER: { return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 6b032e0e6d78..0eba131a67c4 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW - +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { ; AVX512BW-LABEL: test_pcmpeq_b: @@ -8,6 +8,18 @@ define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kmovq %k0, %rax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_pcmpeq_b: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp0: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) ret i64 %res } @@ -19,6 +31,21 @@ define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ; AVX512BW-NEXT: kmovq %k0, %rax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_pcmpeq_b: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp1: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) ret i64 %res } @@ -31,6 +58,12 @@ define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_pcmpeq_w: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) ret i32 %res } @@ -42,6 +75,13 @@ define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_pcmpeq_w: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) ret i32 %res } @@ -54,6 +94,18 @@ define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kmovq %k0, %rax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_pcmpgt_b: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp2: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) ret i64 %res } @@ -65,6 +117,21 @@ define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ; AVX512BW-NEXT: kmovq %k0, %rax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_pcmpgt_b: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp3: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) ret i64 %res } @@ -77,6 +144,12 @@ define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_pcmpgt_w: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) ret i32 %res } @@ -88,6 +161,13 @@ define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_pcmpgt_w: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) ret i32 %res } @@ -121,6 +201,46 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512BW-NEXT: kmovq %k0, %rax ; AVX512BW-NEXT: addq %rdx, %rax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_cmp_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $68, %esp +; AVX512F-32-NEXT: .Ltmp4: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: addl (%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $68, %esp +; AVX512F-32-NEXT: retl %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) %ret1 = add i64 %res0, %res1 @@ -167,6 +287,49 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512BW-NEXT: kmovq %k0, %rax ; AVX512BW-NEXT: addq %rdx, %rax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_cmp_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $68, %esp +; AVX512F-32-NEXT: .Ltmp5: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $68, %esp +; AVX512F-32-NEXT: retl %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) %ret1 = add i64 %res0, %res1 @@ -214,6 +377,46 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512BW-NEXT: kmovq %k0, %rax ; AVX512BW-NEXT: addq %rdx, %rax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_ucmp_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $68, %esp +; AVX512F-32-NEXT: .Ltmp6: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 +; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: addl (%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $68, %esp +; AVX512F-32-NEXT: retl %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) %ret1 = add i64 %res0, %res1 @@ -260,6 +463,49 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512BW-NEXT: kmovq %k0, %rax ; AVX512BW-NEXT: addq %rdx, %rax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $68, %esp +; AVX512F-32-NEXT: .Ltmp7: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $68, %esp +; AVX512F-32-NEXT: retl %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) %ret1 = add i64 %res0, %res1 @@ -307,6 +553,33 @@ define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: addl %edx, %eax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_cmp_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %edx +; AVX512F-32-NEXT: addl %ecx, %edx +; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %edx, %eax +; AVX512F-32-NEXT: retl %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) %ret1 = add i32 %res0, %res1 @@ -353,6 +626,34 @@ define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: addl %edx, %eax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_cmp_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %edx +; AVX512F-32-NEXT: addl %ecx, %edx +; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %edx, %eax +; AVX512F-32-NEXT: retl %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) %ret1 = add i32 %res0, %res1 @@ -400,6 +701,33 @@ define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: addl %edx, %eax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_ucmp_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %edx +; AVX512F-32-NEXT: addl %ecx, %edx +; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %edx, %eax +; AVX512F-32-NEXT: retl %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) %ret1 = add i32 %res0, %res1 @@ -446,6 +774,34 @@ define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: addl %edx, %eax ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_ucmp_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: addl %eax, %ecx +; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %edx +; AVX512F-32-NEXT: addl %ecx, %edx +; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %edx, %eax +; AVX512F-32-NEXT: retl %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) %ret1 = add i32 %res0, %res1 @@ -474,6 +830,12 @@ define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i1 ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_x86_mask_blend_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1} +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1] ret <32 x i16> %res } @@ -485,6 +847,14 @@ define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a ; AVX512BW-NEXT: kmovq %rdi, %k1 ; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_x86_mask_blend_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1] ret <64 x i8> %res } @@ -494,6 +864,11 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -505,6 +880,13 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, < ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -515,6 +897,12 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -524,6 +912,12 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_ ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -536,6 +930,14 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -547,6 +949,13 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %pt ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -557,6 +966,12 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -571,6 +986,14 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <3 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -584,6 +1007,13 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -598,6 +1028,11 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) ret <64 x i8> %res } @@ -609,6 +1044,15 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) ret <64 x i8> %res } @@ -619,6 +1063,14 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: kmovq %rdi, %k1 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) ret <64 x i8> %res } @@ -628,6 +1080,12 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) ret <64 x i8> %res @@ -640,6 +1098,16 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) ret <64 x i8> %res @@ -651,6 +1119,15 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: kmovq %rsi, %k1 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) ret <64 x i8> %res @@ -664,6 +1141,11 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -675,6 +1157,13 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -685,6 +1174,12 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -694,6 +1189,12 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -706,6 +1207,14 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %pt ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -717,6 +1226,13 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %p ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -727,6 +1243,12 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -741,6 +1263,14 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, < ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -754,6 +1284,13 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -768,6 +1305,11 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) ret <64 x i8> %res } @@ -779,6 +1321,15 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, < ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) ret <64 x i8> %res } @@ -789,6 +1340,14 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, ; AVX512BW-NEXT: kmovq %rdi, %k1 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) ret <64 x i8> %res } @@ -798,6 +1357,12 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) ret <64 x i8> %res @@ -810,6 +1375,16 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) ret <64 x i8> %res @@ -821,6 +1396,15 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt ; AVX512BW-NEXT: kmovq %rsi, %k1 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) ret <64 x i8> %res @@ -833,6 +1417,11 @@ define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -844,6 +1433,13 @@ define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -854,6 +1450,12 @@ define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -863,6 +1465,12 @@ define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -875,6 +1483,14 @@ define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -886,6 +1502,13 @@ define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -898,6 +1521,11 @@ define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -909,6 +1537,13 @@ define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -919,6 +1554,12 @@ define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -928,6 +1569,12 @@ define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -940,6 +1587,14 @@ define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -951,6 +1606,13 @@ define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -963,6 +1625,11 @@ define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -974,6 +1641,13 @@ define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -984,6 +1658,12 @@ define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -993,6 +1673,12 @@ define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -1005,6 +1691,14 @@ define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -1016,6 +1710,13 @@ define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -1028,6 +1729,11 @@ define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -1039,6 +1745,13 @@ define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -1049,6 +1762,12 @@ define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -1058,6 +1777,12 @@ define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -1070,6 +1795,14 @@ define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -1081,6 +1814,13 @@ define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -1098,6 +1838,16 @@ define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> % ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1114,6 +1864,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1130,6 +1888,16 @@ define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> % ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1146,6 +1914,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1162,6 +1938,16 @@ define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> % ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1178,6 +1964,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1194,6 +1988,16 @@ define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> % ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1210,6 +2014,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1227,6 +2039,15 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 +; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} +; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1244,6 +2065,15 @@ define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <3 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 +; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1261,6 +2091,15 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 +; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1} +; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1277,6 +2116,16 @@ define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1293,6 +2142,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1309,6 +2166,16 @@ define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> % ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1325,6 +2192,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> ; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1341,6 +2216,16 @@ define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x ; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1357,6 +2242,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i1 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1373,6 +2266,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1389,6 +2290,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1407,6 +2316,16 @@ define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1} +; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} +; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512F-32-NEXT: retl %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) @@ -1424,6 +2343,14 @@ define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) +; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1} +; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) ret void @@ -1441,6 +2368,16 @@ define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1} +; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} +; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0 +; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512F-32-NEXT: retl %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) @@ -1458,6 +2395,15 @@ define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) +; AVX512F-32-NEXT: kmovd %eax, %k1 +; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) ret void @@ -1475,6 +2421,16 @@ define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} +; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} +; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0 +; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512F-32-NEXT: retl %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) @@ -1492,6 +2448,15 @@ define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1 ; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) +; AVX512F-32-NEXT: kmovd %eax, %k1 +; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) ret void @@ -1507,6 +2472,14 @@ define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1523,6 +2496,14 @@ define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i1 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) %res2 = add <16 x i32> %res, %res1 @@ -1539,6 +2520,16 @@ define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63] +; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1555,6 +2546,16 @@ define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55] +; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 @@ -1571,6 +2572,14 @@ define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31] +; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1587,6 +2596,14 @@ define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27] +; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = add <32 x i16> %res, %res1 @@ -1605,6 +2622,18 @@ define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> % ; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: vpaddb %zmm3, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4) %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4) %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1) @@ -1625,6 +2654,16 @@ define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) @@ -1642,6 +2681,13 @@ define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) { ; AVX512BW-NEXT: vpslldq $4, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_dq_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpslldq $8, %zmm0, %zmm1 +; AVX512F-32-NEXT: vpslldq $4, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) %res2 = add <8 x i64> %res, %res1 @@ -1657,6 +2703,13 @@ define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) { ; AVX512BW-NEXT: vpsrldq $4, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_dq_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpsrldq $8, %zmm0, %zmm1 +; AVX512F-32-NEXT: vpsrldq $4, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) %res2 = add <8 x i64> %res, %res1 @@ -1671,6 +2724,13 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> ; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 +; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) %res2 = add <8 x i64> %res, %res1 From f7fc15ed79ddcb8cb26f00b61d32b757c62a24b3 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Sun, 6 Dec 2015 13:06:20 +0000 Subject: [PATCH 145/364] [X86] Always generate precise CFA adjustments. This removes the code path that generate "synchronous" (only correct at call site) CFA. We will probably want to re-introduce it once we are capable of emitting different .eh_frame and .debug_frame sections. Differential Revision: http://reviews.llvm.org/D14948 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254874 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineModuleInfo.h | 11 +++--- lib/Target/X86/X86CallFrameOptimization.cpp | 3 +- lib/Target/X86/X86FrameLowering.cpp | 21 +++++++----- lib/Target/X86/X86MCInstLower.cpp | 6 ++-- test/CodeGen/X86/push-cfi.ll | 37 ++++++++++++++++----- test/CodeGen/X86/tls-pie.ll | 8 +++++ test/CodeGen/X86/win32-pic-jumptable.ll | 4 +++ 7 files changed, 65 insertions(+), 25 deletions(-) diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 43b9f5203c50..acd6440eb358 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -161,6 +161,12 @@ class MachineModuleInfo : public ImmutablePass { bool CallsUnwindInit; bool HasEHFunclets; + // TODO: Ideally, what we'd like is to have a switch that allows emitting + // synchronous (precise at call-sites only) CFA into .eh_frame. However, + // even under this switch, we'd like .debug_frame to be precise when using. + // -g. At this moment, there's no way to specify that some CFI directives + // go into .eh_frame only, while others go into .debug_frame only. + /// DbgInfoAvailable - True if debugging information is available /// in this module. bool DbgInfoAvailable; @@ -235,11 +241,6 @@ class MachineModuleInfo : public ImmutablePass { bool hasDebugInfo() const { return DbgInfoAvailable; } void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; } - // Returns true if we need to generate precise CFI. Currently - // this is equivalent to hasDebugInfo(), but if we ever implement - // async EH, it will require precise CFI as well. - bool usePreciseUnwindInfo() const { return hasDebugInfo(); } - bool callsEHReturn() const { return CallsEHReturn; } void setCallsEHReturn(bool b) { CallsEHReturn = b; } diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 23990b01ba18..fc6ee1752f1f 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -500,7 +500,8 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, // For debugging, when using SP-based CFA, we need to adjust the CFA // offset after each push. - if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo()) + // TODO: This is needed only if we require precise CFA. + if (!TFL->hasFP(MF)) TFL->BuildCFI(MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 682f75c7f51c..2e7ed58e340a 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -2524,10 +2524,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // (Pushes of argument for frame setup, callee pops for frame destroy) Amount -= InternalAmt; - // If this is a callee-pop calling convention, and we're emitting precise - // SP-based CFI, emit a CFA adjust for the amount the callee popped. - if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) && - MMI.usePreciseUnwindInfo()) + // TODO: This is needed only if we require precise CFA. + // If this is a callee-pop calling convention, emit a CFA adjust for + // the amount the callee popped. + if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); @@ -2548,11 +2548,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // offset to be correct at each call site, while for debugging we want // it to be more precise. int CFAOffset = Amount; - if (!MMI.usePreciseUnwindInfo()) - CFAOffset += InternalAmt; - CFAOffset = isDestroy ? -CFAOffset : CFAOffset; - BuildCFI(MBB, I, DL, - MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset)); + // TODO: When not using precise CFA, we also need to adjust for the + // InternalAmt here. + + if (CFAOffset) { + CFAOffset = isDestroy ? -CFAOffset : CFAOffset; + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset)); + } } return; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 8878c9f169b5..af386807cd70 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1143,8 +1143,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { const X86FrameLowering* FrameLowering = MF->getSubtarget().getFrameLowering(); bool hasFP = FrameLowering->hasFP(*MF); - - bool NeedsDwarfCFI = MMI->usePreciseUnwindInfo(); + + // TODO: This is needed only if we require precise CFA. + bool NeedsDwarfCFI = + (MMI->hasDebugInfo() || MF->getFunction()->needsUnwindTableEntry()); int stackGrowth = -RI->getSlotSize(); if (NeedsDwarfCFI && !hasFP) { diff --git a/test/CodeGen/X86/push-cfi.ll b/test/CodeGen/X86/push-cfi.ll index 4d07a1d8181b..6389708f42cc 100644 --- a/test/CodeGen/X86/push-cfi.ll +++ b/test/CodeGen/X86/push-cfi.ll @@ -6,17 +6,24 @@ declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) declare void @empty() -; When we use an invoke, and have FP, we expect a .cfi_escape GNU_ARGS_SIZE -; with size 16 before the invocation. Without FP, we expect.cfi_adjust_cfa_offset -; before and after. -; Darwin should not generate pushes in neither circumstance. +; When we use an invoke, we expect a .cfi_escape GNU_ARGS_SIZE +; with size 16 before the invocation. Without FP, we also expect +; .cfi_adjust_cfa_offset after each push. +; Darwin should not generate pushes in either circumstance. ; CHECK-LABEL: test1_nofp: ; LINUX: .cfi_escape 0x2e, 0x10 -; LINUX: .cfi_adjust_cfa_offset 16 ; LINUX-NEXT: pushl $4 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $3 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $2 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $1 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: call ; LINUX-NEXT: addl $16, %esp ; LINUX: .cfi_adjust_cfa_offset -16 @@ -62,11 +69,18 @@ cleanup: ; so darwin should not generate pushes. ; CHECK-LABEL: test2_nofp: ; LINUX-NOT: .cfi_escape -; LINUX: .cfi_adjust_cfa_offset 16 -; LINUX-NEXT: pushl $4 +; LINUX: pushl $4 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $3 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $2 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $1 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: call ; LINUX-NEXT: addl $16, %esp ; LINUX: .cfi_adjust_cfa_offset -16 @@ -170,11 +184,18 @@ cleanup: ; without parameters, but don't need to adjust the cfa offset ; CHECK-LABEL: test5_nofp: ; LINUX: .cfi_escape 0x2e, 0x10 -; LINUX: .cfi_adjust_cfa_offset 16 ; LINUX-NEXT: pushl $4 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $3 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $2 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: pushl $1 +; LINUX-NEXT: Ltmp{{[0-9]+}}: +; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: call ; LINUX-NEXT: addl $16, %esp ; LINUX: .cfi_adjust_cfa_offset -16 diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll index 10fe1e94bbdc..235230e3c6a8 100644 --- a/test/CodeGen/X86/tls-pie.ll +++ b/test/CodeGen/X86/tls-pie.ll @@ -36,9 +36,13 @@ entry: define i32 @f3() { ; X32-LABEL: f3: ; X32: calll .L{{[0-9]+}}$pb +; X32-NEXT: .Ltmp{{[0-9]+}}: +; X32-NEXT: .cfi_adjust_cfa_offset 4 ; X32-NEXT: .L{{[0-9]+}}$pb: ; X32-NEXT: popl %eax ; X32-NEXT: .Ltmp{{[0-9]+}}: +; X32-NEXT: .cfi_adjust_cfa_offset -4 +; X32-NEXT: .Ltmp{{[0-9]+}}: ; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %eax ; X32-NEXT: movl i2@GOTNTPOFF(%eax), %eax ; X32-NEXT: movl %gs:(%eax), %eax @@ -56,9 +60,13 @@ entry: define i32* @f4() { ; X32-LABEL: f4: ; X32: calll .L{{[0-9]+}}$pb +; X32-NEXT: .Ltmp{{[0-9]+}}: +; X32-NEXT: .cfi_adjust_cfa_offset 4 ; X32-NEXT: .L{{[0-9]+}}$pb: ; X32-NEXT: popl %ecx ; X32-NEXT: .Ltmp{{[0-9]+}}: +; X32-NEXT: .cfi_adjust_cfa_offset -4 +; X32-NEXT: .Ltmp{{[0-9]+}}: ; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %ecx ; X32-NEXT: movl %gs:0, %eax ; X32-NEXT: addl i2@GOTNTPOFF(%ecx), %eax diff --git a/test/CodeGen/X86/win32-pic-jumptable.ll b/test/CodeGen/X86/win32-pic-jumptable.ll index 1a90b6238f26..3a8ef2d0b916 100644 --- a/test/CodeGen/X86/win32-pic-jumptable.ll +++ b/test/CodeGen/X86/win32-pic-jumptable.ll @@ -1,8 +1,12 @@ ; RUN: llc < %s -relocation-model=pic | FileCheck %s ; CHECK: calll L0$pb +; CHECK-NEXT: Ltmp{{[0-9]+}}: +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: L0$pb: ; CHECK-NEXT: popl %eax +; CHECK-NEXT: Ltmp{{[0-9]+}}: +; CHECK-NEXT: .cfi_adjust_cfa_offset -4 ; CHECK-NEXT: addl LJTI0_0(,%ecx,4), %eax ; CHECK-NEXT: jmpl *%eax From 023610af4f6a40d16801fac7fc0effdd329108cd Mon Sep 17 00:00:00 2001 From: Asaf Badouh Date: Sun, 6 Dec 2015 13:26:56 +0000 Subject: [PATCH 146/364] [X86][AVX512] add vmovss/sd missing encoding Differential Revision: http://reviews.llvm.org/D14701 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254875 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 7 +++ lib/Target/X86/X86InstrAVX512.td | 83 +++++++++++++------------ lib/Target/X86/X86IntrinsicsInfo.h | 6 +- test/CodeGen/X86/avx512-intrinsics.ll | 45 ++++++++++++++ test/MC/X86/avx512-encodings.s | 55 +++++++++++++++++ test/MC/X86/intel-syntax-avx512.s | 88 +++++++++++++++++++++++++++ 6 files changed, 245 insertions(+), 39 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 57ad278a68bd..1c028dea601f 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1840,6 +1840,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + + def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; } // Conditional store ops diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d15d0dc96e6f..452e9f05f84a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2973,53 +2973,60 @@ def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { - let hasSideEffects = 0 in { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, (vt (OpNode VR128X:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; - let Constraints = "$src1 = $dst" in - def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3), - !strconcat(asm, - "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"), - [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K; - def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, - EVEX, VEX_LIG; +multiclass avx512_move_scalar { + defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), + asm, "$src2, $src1","$src1, $src2", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2))), + IIC_SSE_MOV_S_RR>, EVEX_4V; + let Constraints = "$src1 = $dst" , mayLoad = 1 in + defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _, + (outs _.RC:$dst), + (ins _.ScalarMemOp:$src), + asm,"$src","$src", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector + (_.ScalarLdFrag addr:$src)))))>, EVEX; + let isCodeGenOnly = 1 in { + def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src1, _.FRC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; + let mayLoad = 1 in + def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], + _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + } let mayStore = 1 in { - def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG; - def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src), - !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), - [], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG, EVEX_K; + def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>, + EVEX; + def mrk: AVX512PI<0x11, MRMDestMem, (outs), + (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K; } // mayStore - } //hasSideEffects = 0 } -let ExeDomain = SSEPackedSingle in -defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem, - loadf32>, XS, EVEX_CD8<32, CD8VT1>; +defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, + VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; -let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem, - loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, + VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), - (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), - VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), - (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), - VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index bb2f7248b0e9..cc53d5f3ce5a 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -143,7 +143,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { EXPAND_FROM_MEM, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512, EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, + X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), @@ -807,6 +807,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::MOVDDUP, 0), X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK, X86ISD::MOVDDUP, 0), + X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK, + X86ISD::MOVSD, 0), + X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK, + X86ISD::MOVSS, 0), X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK, X86ISD::MOVSHDUP, 0), X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK, diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index c47027eed2b2..c01f1adce360 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -6234,3 +6234,48 @@ define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) { } declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) +declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk: +; CHECK: vmovss %xmm1, %xmm0, %xmm2 {%k1} + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + ret <4 x float> %res +} + +define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz: +; CHECK: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2) + ret <4 x float> %res +} + +define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr: +; CHECK: vmovss %xmm1, %xmm0, %xmm0 + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1) + ret <4 x float> %res +} + +declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) +define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr: +; CHECK: vmovsd %xmm1, %xmm0, %xmm0 + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1) + ret <2 x double> %res +} + +define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz: +; CHECK: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2) + ret <2 x double> %res +} + +define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk: +; CHECK: vmovsd %xmm1, %xmm0, %xmm2 {%k1} + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + ret <2 x double> %res +} + diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 2043100bf3e6..d8806effb0e3 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -19220,3 +19220,58 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: vucomiss -516(%rdx), %xmm22 // CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0xfc,0xfd,0xff,0xff] vucomiss -516(%rdx), %xmm22 +// CHECK: vmovsd (%rcx), %xmm25 {%k3} +// CHECK: encoding: [0x62,0x61,0xff,0x0b,0x10,0x09] + vmovsd (%rcx), %xmm25 {%k3} + +// CHECK: vmovsd (%rcx), %xmm25 {%k3} {z} +// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09] + vmovsd (%rcx), %xmm25 {%k3} {z} + +// CHECK: vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0xe7,0x8b,0x10,0xdb] + vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z} + +// CHECK: vmovss (%rcx), %xmm2 {%k4} +// CHECK: encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11] + vmovss (%rcx), %xmm2 {%k4} + +// CHECK: vmovss (%rcx), %xmm2 {%k4} {z} +// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11] + vmovss (%rcx), %xmm2 {%k4} {z} + +// CHECK: vmovss %xmm26, %xmm9, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0x36,0x8c,0x10,0xe2] + vmovss %xmm26, %xmm9, %xmm28 {%k4} {z} + +// CHECK: vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z} +// CHECK: encoding: [0x62,0xc1,0xcf,0x87,0x10,0xef] + vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z} + +// CHECK: vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z} +// CHECK: encoding: [0x62,0xd1,0x97,0x8d,0x10,0xd8] + vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z} + +// CHECK: vmovss %xmm2, %xmm27, %xmm17 {%k2} {z} +// CHECK: encoding: [0x62,0xe1,0x26,0x82,0x10,0xca] + vmovss %xmm2, %xmm27, %xmm17 {%k2} {z} + +// CHECK: vmovss %xmm23, %xmm19, %xmm10 {%k3} {z} +// CHECK: encoding: [0x62,0x31,0x66,0x83,0x10,0xd7] + vmovss %xmm23, %xmm19, %xmm10 {%k3} {z} + +// CHECK: vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z} +// CHECK: encoding: [0x62,0xf1,0x87,0x8e,0x10,0xe4] + vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z} + +// CHECK: vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z} +// CHECK: encoding: [0x62,0xc1,0xef,0x8f,0x10,0xe6] + vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z} + +// CHECK: vmovss %xmm19, %xmm11, %xmm21 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x26,0x8b,0x10,0xeb] + vmovss %xmm19, %xmm11, %xmm21 {%k3} {z} + +// CHECK: vmovss %xmm24, %xmm27, %xmm15 {%k2} {z} +// CHECK: encoding: [0x62,0x11,0x26,0x82,0x10,0xf8] + vmovss %xmm24, %xmm27, %xmm15 {%k2} {z} diff --git a/test/MC/X86/intel-syntax-avx512.s b/test/MC/X86/intel-syntax-avx512.s index 6340f853b553..c5ab7dde1106 100644 --- a/test/MC/X86/intel-syntax-avx512.s +++ b/test/MC/X86/intel-syntax-avx512.s @@ -264,3 +264,91 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae} // CHECK: vcomiss xmm16, dword ptr [rcx] // CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2f,0x01] vcomiss xmm16, DWORD PTR [rcx] + +// CHECK: vmovss dword ptr [rcx] {k2}, xmm13 +// CHECK: encoding: [0x62,0x71,0x7e,0x0a,0x11,0x29] + vmovss dword ptr [rcx]{k2},xmm13 + +// CHECK: vmovss dword ptr [rax + 8*r14 + 4660], xmm13 +// CHECK: encoding: [0xc4,0x21,0x7a,0x11,0xac,0xf0,0x34,0x12,0x00,0x00] + vmovss dword ptr [rax+r14*8+0x1234],xmm13 + +// CHECK: vmovss dword ptr [rdx + 508], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0x01,0x00,0x00] + vmovss dword ptr [rdx+0x1fc],xmm13 + +// CHECK: vmovss dword ptr [rdx + 512], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0x00,0x02,0x00,0x00] + vmovss dword ptr [rdx+0x200],xmm13 + +// CHECK: vmovss dword ptr [rdx - 512], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0x00,0xfe,0xff,0xff] + vmovss dword ptr [rdx-0x200],xmm13 + +// CHECK: vmovss dword ptr [rdx - 516], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0xfd,0xff,0xff] + vmovss dword ptr [rdx-0x204],xmm13 + +// CHECK: vmovss dword ptr [rdx + 508], xmm5 +// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0x01,0x00,0x00] + vmovss dword ptr [rdx+0x1fc],xmm5 + +// CHECK: vmovss dword ptr [rdx + 512], xmm5 +// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0x00,0x02,0x00,0x00] + vmovss dword ptr [rdx+0x200],xmm5 + +// CHECK: vmovss dword ptr [rdx - 512], xmm5 +// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0x00,0xfe,0xff,0xff] + vmovss dword ptr [rdx-0x200], xmm5 + +// CHECK: vmovss dword ptr [rdx - 516], xmm5 +// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0xfd,0xff,0xff] + vmovss dword ptr [rdx-0x204],xmm5 + +// CHECK: vmovss dword ptr [rcx], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0x29] + vmovss dword ptr [rcx],xmm13 + +// CHECK: vmovss xmm2, dword ptr [rcx] +// CHECK: encoding: [0xc5,0xfa,0x10,0x11] + vmovss xmm2, dword ptr [rcx] + +// CHECK: vmovss xmm2 {k4}, dword ptr [rcx] +// CHECK: encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11] + vmovss xmm2{k4}, dword ptr [rcx] + +// CHECK: vmovss xmm2 {k4} {z}, dword ptr [rcx] +// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11] + vmovss xmm2{k4} {z}, dword ptr [rcx] + +// CHECK: vmovsd xmm25 , qword ptr [rcx] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x09] + vmovsd xmm25, qword ptr [rcx] + +// CHECK: vmovsd xmm25 {k3}, qword ptr [rcx] +// CHECK: encoding: [0x62,0x61,0xff,0x0b,0x10,0x09] + vmovsd xmm25{k3}, qword ptr [rcx] + +// CHECK: vmovsd xmm25 {k3} {z}, qword ptr [rcx] +// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09] + vmovsd xmm25{k3} {z}, qword ptr [rcx] + +// CHECK: vmovsd xmm25 , qword ptr [rax + 8*r14 + 291] +// CHECK: encoding: [0x62,0x21,0xff,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00] + vmovsd xmm25, qword ptr [rax+r14*8+0x123] + +// CHECK: vmovsd xmm25 , qword ptr [rdx + 1016] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x7f] + vmovsd xmm25, qword ptr [rdx+0x3f8] + +// CHECK: vmovsd xmm25 , qword ptr [rdx + 1024] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0x00,0x04,0x00,0x00] + vmovsd xmm25, qword ptr [rdx+0x400] + +// CHECK: vmovsd xmm25 , qword ptr [rdx - 1024] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x80] + vmovsd xmm25, qword ptr [rdx-0x400] + +// CHECK: vmovsd xmm25 , qword ptr [rdx - 1032] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff] + vmovsd xmm25, qword ptr [rdx-0x408] From 1c73aa0b63c49b8fbbc9cd0cf35ebcc027174969 Mon Sep 17 00:00:00 2001 From: Marina Yatsina Date: Sun, 6 Dec 2015 15:31:47 +0000 Subject: [PATCH 147/364] [X86] Add support for loopz, loopnz for Intel syntax According to x86 spec, loopz and loopnz should be supported for Intel syntax, where loopz is equivalent to loope and loopnz is equivalent to loopne. Differential Revision: http://reviews.llvm.org/D15148 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254877 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 4 ++-- test/MC/X86/intel-syntax.s | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1c21a098bc6c..4a4ceaca88f4 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2572,8 +2572,8 @@ def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>; def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"loopz", "loope", "att">; -def : MnemonicAlias<"loopnz", "loopne", "att">; +def : MnemonicAlias<"loopz", "loope">; +def : MnemonicAlias<"loopnz", "loopne">; def : MnemonicAlias<"pop", "popw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>; diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index 71bf6cc0ffdb..214d827168b8 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -741,3 +741,8 @@ fcomip st, st(2) fucomip st, st(2) // CHECK: fcompi %st(2) // CHECK: fucompi %st(2) + +loopz _foo +loopnz _foo +// CHECK: loope _foo +// CHECK: loopne _foo From e89ea49c12448ba0f7909013048ca1ae7e8dd98d Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Sun, 6 Dec 2015 16:18:25 +0000 Subject: [PATCH 148/364] Create llvm.global_ctors in the new format. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254878 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/ModuleUtils.cpp | 4 ++-- test/Instrumentation/SanitizerCoverage/coverage.ll | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index c5abf76d93fa..9ec28a3f3d47 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -43,9 +43,9 @@ static void appendToGlobalArray(const char *Array, } GVCtor->eraseFromParent(); } else { - // Use a simple two-field struct if there isn't one already. + // Use the new three-field struct if there isn't one already. EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - nullptr); + IRB.getInt8PtrTy(), nullptr); } // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. diff --git a/test/Instrumentation/SanitizerCoverage/coverage.ll b/test/Instrumentation/SanitizerCoverage/coverage.ll index 659c03040f2f..71fdbbb5ada7 100644 --- a/test/Instrumentation/SanitizerCoverage/coverage.ll +++ b/test/Instrumentation/SanitizerCoverage/coverage.ll @@ -29,8 +29,8 @@ entry: } ; CHECK0-NOT: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor } -; CHECK1: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor } -; CHECK2: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor } +; CHECK1: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor, i8* null } +; CHECK2: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor, i8* null } ; CHECK0-NOT: call void @__sanitizer_cov( ; CHECK0-NOT: call void @__sanitizer_cov_module_init( From 180d5cb8e1b2fb4d9fdba85669c972de4cf6b734 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Dec 2015 18:05:12 +0000 Subject: [PATCH 149/364] [x86] add missing maxnum/minnum tests for 256-bit vectors Also, switch to x86-64 because once we can lower these to something more reasonable, there will be less noise in the checks. And add AVX runs because those will be different than SSE. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254879 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/fmaxnum.ll | 58 ++++++++++++++++++++++--------------- test/CodeGen/X86/fminnum.ll | 50 ++++++++++++++++++++------------ 2 files changed, 66 insertions(+), 42 deletions(-) diff --git a/test/CodeGen/X86/fmaxnum.ll b/test/CodeGen/X86/fmaxnum.ll index 17bd3b9b45b4..7aa087f92bdc 100644 --- a/test/CodeGen/X86/fmaxnum.ll +++ b/test/CodeGen/X86/fmaxnum.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=x86 -mtriple=i386-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s declare float @fmaxf(float, float) declare double @fmax(double, double) @@ -10,86 +11,97 @@ declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) ; CHECK-LABEL: @test_fmaxf -; CHECK: calll fmaxf +; CHECK: jmp fmaxf define float @test_fmaxf(float %x, float %y) { %z = call float @fmaxf(float %x, float %y) readnone ret float %z } ; CHECK-LABEL: @test_fmax -; CHECK: calll fmax +; CHECK: jmp fmax define double @test_fmax(double %x, double %y) { %z = call double @fmax(double %x, double %y) readnone ret double %z } ; CHECK-LABEL: @test_fmaxl -; CHECK: calll fmaxl +; CHECK: callq fmaxl define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) { %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone ret x86_fp80 %z } ; CHECK-LABEL: @test_intrinsic_fmaxf -; CHECK: calll fmaxf +; CHECK: jmp fmaxf define float @test_intrinsic_fmaxf(float %x, float %y) { %z = call float @llvm.maxnum.f32(float %x, float %y) readnone ret float %z } ; CHECK-LABEL: @test_intrinsic_fmax -; CHECK: calll fmax +; CHECK: jmp fmax define double @test_intrinsic_fmax(double %x, double %y) { %z = call double @llvm.maxnum.f64(double %x, double %y) readnone ret double %z } ; CHECK-LABEL: @test_intrinsic_fmaxl -; CHECK: calll fmaxl +; CHECK: callq fmaxl define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) { %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone ret x86_fp80 %z } ; CHECK-LABEL: @test_intrinsic_fmax_v2f32 -; CHECK: calll fmaxf -; CHECK: calll fmaxf +; CHECK: callq fmaxf +; CHECK: callq fmaxf define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) { %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone ret <2 x float> %z } ; CHECK-LABEL: @test_intrinsic_fmax_v4f32 -; CHECK: calll fmaxf -; CHECK: calll fmaxf -; CHECK: calll fmaxf -; CHECK: calll fmaxf +; CHECK: callq fmaxf +; CHECK: callq fmaxf +; CHECK: callq fmaxf +; CHECK: callq fmaxf define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) { %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone ret <4 x float> %z } ; CHECK-LABEL: @test_intrinsic_fmax_v2f64 -; CHECK: calll fmax -; CHECK: calll fmax +; CHECK: callq fmax +; CHECK: callq fmax define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) { %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone ret <2 x double> %z } +; CHECK-LABEL: @test_intrinsic_fmax_v4f64 +; CHECK: callq fmax +; CHECK: callq fmax +; CHECK: callq fmax +; CHECK: callq fmax +define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) { + %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone + ret <4 x double> %z +} + ; CHECK-LABEL: @test_intrinsic_fmax_v8f64 -; CHECK: calll fmax -; CHECK: calll fmax -; CHECK: calll fmax -; CHECK: calll fmax -; CHECK: calll fmax -; CHECK: calll fmax -; CHECK: calll fmax -; CHECK: calll fmax +; CHECK: callq fmax +; CHECK: callq fmax +; CHECK: callq fmax +; CHECK: callq fmax +; CHECK: callq fmax +; CHECK: callq fmax +; CHECK: callq fmax +; CHECK: callq fmax define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) { %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone ret <8 x double> %z diff --git a/test/CodeGen/X86/fminnum.ll b/test/CodeGen/X86/fminnum.ll index 1e33cf4696af..e89ed32ad618 100644 --- a/test/CodeGen/X86/fminnum.ll +++ b/test/CodeGen/X86/fminnum.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=x86 -mtriple=i386-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s declare float @fminf(float, float) declare double @fmin(double, double) @@ -10,6 +11,7 @@ declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80) declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) ; CHECK-LABEL: @test_fminf @@ -27,7 +29,7 @@ define double @test_fmin(double %x, double %y) { } ; CHECK-LABEL: @test_fminl -; CHECK: calll fminl +; CHECK: callq fminl define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) { %z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone ret x86_fp80 %z @@ -48,47 +50,57 @@ define double @test_intrinsic_fmin(double %x, double %y) { } ; CHECK-LABEL: @test_intrinsic_fminl -; CHECK: calll fminl +; CHECK: callq fminl define x86_fp80 @test_intrinsic_fminl(x86_fp80 %x, x86_fp80 %y) { %z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone ret x86_fp80 %z } ; CHECK-LABEL: @test_intrinsic_fmin_v2f32 -; CHECK: calll fminf -; CHECK: calll fminf +; CHECK: callq fminf +; CHECK: callq fminf define <2 x float> @test_intrinsic_fmin_v2f32(<2 x float> %x, <2 x float> %y) { %z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) readnone ret <2 x float> %z } ; CHECK-LABEL: @test_intrinsic_fmin_v4f32 -; CHECK: calll fminf -; CHECK: calll fminf -; CHECK: calll fminf -; CHECK: calll fminf +; CHECK: callq fminf +; CHECK: callq fminf +; CHECK: callq fminf +; CHECK: callq fminf define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) { %z = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) readnone ret <4 x float> %z } ; CHECK-LABEL: @test_intrinsic_fmin_v2f64 -; CHECK: calll fmin -; CHECK: calll fmin +; CHECK: callq fmin +; CHECK: callq fmin define <2 x double> @test_intrinsic_fmin_v2f64(<2 x double> %x, <2 x double> %y) { %z = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) readnone ret <2 x double> %z } +; CHECK-LABEL: @test_intrinsic_fmin_v4f64 +; CHECK: callq fmin +; CHECK: callq fmin +; CHECK: callq fmin +; CHECK: callq fmin +define <4 x double> @test_intrinsic_fmin_v4f64(<4 x double> %x, <4 x double> %y) { + %z = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) readnone + ret <4 x double> %z +} + ; CHECK-LABEL: @test_intrinsic_fmin_v8f64 -; CHECK: calll fmin -; CHECK: calll fmin -; CHECK: calll fmin -; CHECK: calll fmin -; CHECK: calll fmin -; CHECK: calll fmin -; CHECK: calll fmin -; CHECK: calll fmin +; CHECK: callq fmin +; CHECK: callq fmin +; CHECK: callq fmin +; CHECK: callq fmin +; CHECK: callq fmin +; CHECK: callq fmin +; CHECK: callq fmin +; CHECK: callq fmin define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y) { %z = call <8 x double> @llvm.minnum.v8f64(<8 x double> %x, <8 x double> %y) readnone ret <8 x double> %z From 51b079cd28aacf110bf1b53c89bca4a2b23c4d22 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sun, 6 Dec 2015 19:29:54 +0000 Subject: [PATCH 150/364] [WebAssembly] Add some more ideas to README.txt. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254880 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/README.txt | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index 78b3123cde85..97072ab1cc15 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -47,3 +47,46 @@ expression stack across the jump (sometimes). We should (a) model this, and (b) extend the stackifier to utilize it. //===---------------------------------------------------------------------===// + +The min/max operators aren't exactly a Date: Sun, 6 Dec 2015 19:31:44 +0000 Subject: [PATCH 151/364] [WebAssembly] Tighten up some testcase regular expressions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254881 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/WebAssembly/cfg-stackify.ll | 18 +++++++++--------- test/CodeGen/WebAssembly/cpus.ll | 4 ++-- test/CodeGen/WebAssembly/switch.ll | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll index c615ebb0db9d..4c86c55a8f85 100644 --- a/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -90,14 +90,14 @@ back: ; CHECK-LABEL: test2: ; CHECK: block BB2_2{{$}} -; CHECK: br_if {{.*}}, BB2_2{{$}} +; CHECK: br_if {{[^,]*}}, BB2_2{{$}} ; CHECK: BB2_1: ; CHECK: br_if ${{[0-9]+}}, BB2_1{{$}} ; CHECK: BB2_2: ; CHECK: return{{$}} ; OPT-LABEL: test2: ; OPT: block BB2_2{{$}} -; OPT: br_if {{.*}}, BB2_2{{$}} +; OPT: br_if {{[^,]*}}, BB2_2{{$}} ; OPT: BB2_1: ; OPT: br_if ${{[0-9]+}}, BB2_1{{$}} ; OPT: BB2_2: @@ -140,9 +140,9 @@ for.end: ; OPT-LABEL: doublediamond: ; OPT: block BB3_5{{$}} ; OPT: block BB3_4{{$}} -; OPT: br_if {{.*}}, BB3_4{{$}} +; OPT: br_if {{[^,]*}}, BB3_4{{$}} ; OPT: block BB3_3{{$}} -; OPT: br_if {{.*}}, BB3_3{{$}} +; OPT: br_if {{[^,]*}}, BB3_3{{$}} ; OPT: br BB3_5{{$}} ; OPT: BB3_4: ; OPT: BB3_5: @@ -204,7 +204,7 @@ exit: ; OPT-LABEL: diamond: ; OPT: block BB5_3{{$}} ; OPT: block BB5_2{{$}} -; OPT: br_if {{.*}}, BB5_2{{$}} +; OPT: br_if {{[^,]*}}, BB5_2{{$}} ; OPT: br BB5_3{{$}} ; OPT: BB5_2: ; OPT: BB5_3: @@ -269,7 +269,7 @@ loop: ; OPT-NOT: br ; OPT: BB8_1: ; OPT: loop BB8_2{{$}} -; OPT: br_if {{.*}}, BB8_1{{$}} +; OPT: br_if {{[^,]*}}, BB8_1{{$}} ; OPT: BB8_2: ; OPT: return ${{[0-9]+}}{{$}} define i32 @simple_loop(i32* %p, i32 %a) { @@ -333,7 +333,7 @@ exit: ; OPT-LABEL: ifelse_earlyexits: ; OPT: block BB10_4{{$}} ; OPT: block BB10_3{{$}} -; OPT: br_if {{.*}}, BB10_3{{$}} +; OPT: br_if {{[^,]*}}, BB10_3{{$}} ; OPT: br_if $1, BB10_4{{$}} ; OPT: br BB10_4{{$}} ; OPT: BB10_3: @@ -379,9 +379,9 @@ exit: ; OPT: loop BB11_7{{$}} ; OPT: block BB11_6{{$}} ; OPT: block BB11_5{{$}} -; OPT: br_if {{.*}}, BB11_5{{$}} +; OPT: br_if {{[^,]*}}, BB11_5{{$}} ; OPT: block BB11_4{{$}} -; OPT: br_if {{.*}}, BB11_4{{$}} +; OPT: br_if {{[^,]*}}, BB11_4{{$}} ; OPT: br BB11_6{{$}} ; OPT: BB11_4: ; OPT: br BB11_6{{$}} diff --git a/test/CodeGen/WebAssembly/cpus.ll b/test/CodeGen/WebAssembly/cpus.ll index bbc9c8fe4f31..2b77c5f475c8 100644 --- a/test/CodeGen/WebAssembly/cpus.ll +++ b/test/CodeGen/WebAssembly/cpus.ll @@ -9,8 +9,8 @@ ; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID ; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID -; CHECK-NOT: {{.*}} is not a recognized processor for this target -; INVALID: {{.*}} is not a recognized processor for this target +; CHECK-NOT: {{.*}} is not a recognized processor for this target +; INVALID: {{.*}} is not a recognized processor for this target define i32 @f(i32 %i_like_the_web) { ret i32 %i_like_the_web diff --git a/test/CodeGen/WebAssembly/switch.ll b/test/CodeGen/WebAssembly/switch.ll index c62333c336fa..b146a239b419 100644 --- a/test/CodeGen/WebAssembly/switch.ll +++ b/test/CodeGen/WebAssembly/switch.ll @@ -21,7 +21,7 @@ declare void @foo5() ; CHECK: block BB0_4{{$}} ; CHECK: block BB0_3{{$}} ; CHECK: block BB0_2{{$}} -; CHECK: tableswitch {{.*}}, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_5, BB0_6, BB0_7{{$}} +; CHECK: tableswitch {{[^,]*}}, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_5, BB0_6, BB0_7{{$}} ; CHECK: BB0_2: ; CHECK: call foo0 ; CHECK: BB0_3: @@ -101,7 +101,7 @@ sw.epilog: ; preds = %entry, %sw.bb.5, %s ; CHECK: block BB1_4{{$}} ; CHECK: block BB1_3{{$}} ; CHECK: block BB1_2{{$}} -; CHECK: tableswitch {{.*}}, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_5, BB1_6, BB1_7{{$}} +; CHECK: tableswitch {{[^,]*}}, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_5, BB1_6, BB1_7{{$}} ; CHECK: BB1_2: ; CHECK: call foo0 ; CHECK: BB1_3: From 4693393907a37825c48332f7dad147cb2764441a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sun, 6 Dec 2015 19:33:32 +0000 Subject: [PATCH 152/364] [WebAssembly] Enable folding of offsets into global variable addresses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254882 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 10 ++--- .../WebAssembly/WebAssemblyMCInstLower.cpp | 9 +++- test/CodeGen/WebAssembly/offset-folding.ll | 45 +++++++++++++++++++ 3 files changed, 56 insertions(+), 8 deletions(-) create mode 100644 test/CodeGen/WebAssembly/offset-folding.ll diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 6e1283b4d334..85fb753ed0e0 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -196,9 +196,8 @@ FastISel *WebAssemblyTargetLowering::createFastISel( bool WebAssemblyTargetLowering::isOffsetFoldingLegal( const GlobalAddressSDNode * /*GA*/) const { - // The WebAssembly target doesn't support folding offsets into global - // addresses. - return false; + // All offsets can be folded. + return true; } MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, @@ -528,13 +527,12 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, SDLoc DL(Op); const auto *GA = cast(Op); EVT VT = Op.getValueType(); - assert(GA->getOffset() == 0 && - "offsets on global addresses are forbidden by isOffsetFoldingLegal"); assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); if (GA->getAddressSpace() != 0) fail(DL, DAG, "WebAssembly only expects the 0 address space"); return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, - DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT)); + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, + GA->getOffset())); } SDValue diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index dd9358035a88..2d2adeb93d2d 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -39,11 +39,16 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { + assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags"); const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); - if (!MO.isJTI() && MO.getOffset()) - llvm_unreachable("unknown symbol op"); + int64_t Offset = MO.getOffset(); + if (Offset != 0) { + assert(!MO.isJTI() && "Unexpected offset with jump table index"); + Expr = + MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx); + } return MCOperand::createExpr(Expr); } diff --git a/test/CodeGen/WebAssembly/offset-folding.ll b/test/CodeGen/WebAssembly/offset-folding.ll new file mode 100644 index 000000000000..19b110fcfa8a --- /dev/null +++ b/test/CodeGen/WebAssembly/offset-folding.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test that constant offsets can be folded into global addresses. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +@x = external global [0 x i32] +@y = global [50 x i32] zeroinitializer + +; Test basic constant offsets of both defined and external symbols. + +; CHECK-LABEL: test0: +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.const $push0=, x+188{{$}} +; CHECK=NEXT: return $pop0{{$}} +define i32* @test0() { + ret i32* getelementptr ([0 x i32], [0 x i32]* @x, i32 0, i32 47) +} + +; CHECK-LABEL: test1: +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.const $push0=, y+188{{$}} +; CHECK=NEXT: return $pop0{{$}} +define i32* @test1() { + ret i32* getelementptr ([50 x i32], [50 x i32]* @y, i32 0, i32 47) +} + +; Test zero offsets. + +; CHECK-LABEL: test2: +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.const $push0=, x{{$}} +; CHECK=NEXT: return $pop0{{$}} +define i32* @test2() { + ret i32* getelementptr ([0 x i32], [0 x i32]* @x, i32 0, i32 0) +} + +; CHECK-LABEL: test3: +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.const $push0=, y{{$}} +; CHECK=NEXT: return $pop0{{$}} +define i32* @test3() { + ret i32* getelementptr ([50 x i32], [50 x i32]* @y, i32 0, i32 0) +} From 577f887f754894edf6d8093ddca72ff69f0659ec Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sun, 6 Dec 2015 19:34:57 +0000 Subject: [PATCH 153/364] [WebAssembly] Make tableswitch's 'default' operand explicit. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254883 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyInstrControl.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 708d902e99e1..9a9468bb3909 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -42,12 +42,12 @@ let Defs = [ARGUMENTS] in { // jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode // currently. let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { -def TABLESWITCH_I32 : I<(outs), (ins I32:$index, variable_ops), - [(WebAssemblytableswitch I32:$index)], - "tableswitch\t$index">; -def TABLESWITCH_I64 : I<(outs), (ins I64:$index, variable_ops), - [(WebAssemblytableswitch I64:$index)], - "tableswitch\t$index">; +def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops), + [(WebAssemblytableswitch I32:$index, bb:$default)], + "tableswitch\t$index, $default">; +def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops), + [(WebAssemblytableswitch I64:$index, bb:$default)], + "tableswitch\t$index, $default">; } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 // Placemarkers to indicate the start of a block or loop scope. From 001f3417071d4d6b08cc0dcd1dc03f5f90fe7623 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sun, 6 Dec 2015 19:42:29 +0000 Subject: [PATCH 154/364] [WebAssembly] Factor out a TypeToString function, since we need it in multiple places. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254884 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstPrinter/WebAssemblyInstPrinter.cpp | 32 +++++++++---------- .../InstPrinter/WebAssemblyInstPrinter.h | 7 ++++ .../WebAssembly/WebAssemblyAsmPrinter.cpp | 16 +--------- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp index 9b94806c9533..3a151dec16f3 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -98,22 +98,7 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, case WebAssembly::PARAM: case WebAssembly::RESULT: case WebAssembly::LOCAL: - switch (Op.getImm()) { - case MVT::i32: - O << "i32"; - break; - case MVT::i64: - O << "i64"; - break; - case MVT::f32: - O << "f32"; - break; - case MVT::f64: - O << "f64"; - break; - default: - llvm_unreachable("unexpected type"); - } + O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm())); break; default: O << Op.getImm(); @@ -126,3 +111,18 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, Op.getExpr()->print(O, &MAI); } } + +const char *llvm::WebAssembly::TypeToString(MVT Ty) { + switch (Ty.SimpleTy) { + case MVT::i32: + return "i32"; + case MVT::i64: + return "i64"; + case MVT::f32: + return "f32"; + case MVT::f64: + return "f64"; + default: + llvm_unreachable("unsupported type"); + } +} diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index 319c8ee1d5d9..20569da0b110 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -17,6 +17,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineValueType.h" namespace llvm { @@ -40,6 +41,12 @@ class WebAssemblyInstPrinter final : public MCInstPrinter { static const char *getRegisterName(unsigned RegNo); }; +namespace WebAssembly { + +const char *TypeToString(MVT Ty); + +} // end namespace WebAssembly + } // end namespace llvm #endif diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index cfabc21ea4d9..1b175a7f8d5b 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -115,21 +115,7 @@ std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { } const char *WebAssemblyAsmPrinter::toString(MVT VT) const { - switch (VT.SimpleTy) { - default: - break; - case MVT::f32: - return "f32"; - case MVT::f64: - return "f64"; - case MVT::i32: - return "i32"; - case MVT::i64: - return "i64"; - } - DEBUG(dbgs() << "Invalid type " << EVT(VT).getEVTString() << '\n'); - llvm_unreachable("invalid type"); - return ""; + return WebAssembly::TypeToString(VT); } //===----------------------------------------------------------------------===// From 1ecc6c0df2f39cc3c79ebc4f23f77be7e009f9df Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sun, 6 Dec 2015 19:44:45 +0000 Subject: [PATCH 155/364] [Orc] Rename IndirectStubsManagerBase to IndirectStubsManager. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254885 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h | 2 +- include/llvm/ExecutionEngine/Orc/IndirectionUtils.h | 6 +++--- lib/ExecutionEngine/Orc/IndirectionUtils.cpp | 2 +- lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp | 3 ++- lib/ExecutionEngine/Orc/OrcCBindingsStack.h | 2 +- tools/lli/OrcLazyJIT.cpp | 3 ++- unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp | 2 +- 7 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index b7ee9b5937f7..7dab5d1bc67f 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -39,7 +39,7 @@ namespace orc { /// compiled and executed. template + typename IndirectStubsMgrT = IndirectStubsManager> class CompileOnDemandLayer { private: diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index aa75b3f46b4a..e490d894390b 100644 --- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -211,13 +211,13 @@ class LocalJITCompileCallbackManager : public JITCompileCallbackManager { }; /// @brief Base class for managing collections of named indirect stubs. -class IndirectStubsManagerBase { +class IndirectStubsManager { public: /// @brief Map type for initializing the manager. See init. typedef StringMap> StubInitsMap; - virtual ~IndirectStubsManagerBase() {} + virtual ~IndirectStubsManager() {} /// @brief Create a single stub with the given name, target address and flags. virtual std::error_code createStub(StringRef StubName, TargetAddress StubAddr, @@ -244,7 +244,7 @@ class IndirectStubsManagerBase { /// @brief IndirectStubsManager implementation for a concrete target, e.g. /// OrcX86_64. (See OrcTargetSupport.h). template -class IndirectStubsManager : public IndirectStubsManagerBase { +class LocalIndirectStubsManager : public IndirectStubsManager { public: std::error_code createStub(StringRef StubName, TargetAddress StubAddr, diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index dd6e3a3b29ae..34564e42b10f 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -20,7 +20,7 @@ namespace llvm { namespace orc { void JITCompileCallbackManager::anchor() {} -void IndirectStubsManagerBase::anchor() {} +void IndirectStubsManager::anchor() {} Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) { Constant *AddrIntVal = diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp index d1af56d84867..e519c7f30920 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp @@ -36,7 +36,8 @@ OrcCBindingsStack::createIndirectStubsMgrBuilder(Triple T) { case Triple::x86_64: return [](){ - return llvm::make_unique>(); + return llvm::make_unique< + orc::LocalIndirectStubsManager>(); }; } } diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index d2f7fe4ac0ef..2e17624ff474 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -268,7 +268,7 @@ class OrcCBindingsStack { CompileLayerT CompileLayer; CODLayerT CODLayer; - std::unique_ptr IndirectStubsMgr; + std::unique_ptr IndirectStubsMgr; std::vector> GenericHandles; std::vector FreeHandleIndexes; diff --git a/tools/lli/OrcLazyJIT.cpp b/tools/lli/OrcLazyJIT.cpp index edac10b86556..4235145ee7a5 100644 --- a/tools/lli/OrcLazyJIT.cpp +++ b/tools/lli/OrcLazyJIT.cpp @@ -65,7 +65,8 @@ OrcLazyJIT::createIndirectStubsMgrBuilder(Triple T) { case Triple::x86_64: return [](){ - return llvm::make_unique>(); + return llvm::make_unique< + orc::LocalIndirectStubsManager>(); }; } } diff --git a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp index ca508d0a7561..a27e649b616f 100644 --- a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp @@ -23,7 +23,7 @@ class DummyCallbackManager : public orc::JITCompileCallbackManager { void grow() override { llvm_unreachable("not implemented"); } }; -class DummyStubsManager : public orc::IndirectStubsManagerBase { +class DummyStubsManager : public orc::IndirectStubsManager { public: std::error_code createStub(StringRef StubName, TargetAddress InitAddr, JITSymbolFlags Flags) override { From 3b8cbdadafcf6879c1e6544ee8dd8182e6cd4133 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Dec 2015 20:12:19 +0000 Subject: [PATCH 156/364] [X86][AVX] Tidied up BROADCASTPD/BROADCASTPS tests Regenerate tests using update_llc_test_checks.py git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254886 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx-vbroadcast.ll | 117 ++++++++++++++++++----------- 1 file changed, 72 insertions(+), 45 deletions(-) diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll index 8b8c11b85875..bfc9149b107d 100644 --- a/test/CodeGen/X86/avx-vbroadcast.ll +++ b/test/CodeGen/X86/avx-vbroadcast.ll @@ -1,7 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s -; CHECK: vbroadcastsd (% define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: A: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 +; CHECK-NEXT: retq entry: %q = load i64, i64* %ptr, align 8 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 @@ -11,8 +15,11 @@ entry: ret <4 x i64> %vecinit6.i } -; CHECK: vbroadcastss (% define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: B: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 +; CHECK-NEXT: retq entry: %q = load i32, i32* %ptr, align 4 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -22,8 +29,11 @@ entry: ret <8 x i32> %vecinit6.i } -; CHECK: vbroadcastsd (% define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: C: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 +; CHECK-NEXT: retq entry: %q = load double, double* %ptr, align 8 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 @@ -33,8 +43,11 @@ entry: ret <4 x double> %vecinit6.i } -; CHECK: vbroadcastss (% define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: D: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 +; CHECK-NEXT: retq entry: %q = load float, float* %ptr, align 4 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 @@ -46,8 +59,11 @@ entry: ;;;; 128-bit versions -; CHECK: vbroadcastss (% define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: e: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 +; CHECK-NEXT: retq entry: %q = load float, float* %ptr, align 4 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -57,12 +73,14 @@ entry: ret <4 x float> %vecinit6.i } - -; CHECK: _e2 -; CHECK-NOT: vbroadcastss -; CHECK: ret +; Don't broadcast constants on pre-AVX2 hardware. define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { - %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 +; CHECK-LABEL: _e2: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03] +; CHECK-NEXT: retq +entry: + %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 @@ -70,8 +88,11 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { } -; CHECK: vbroadcastss (% define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: F: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 +; CHECK-NEXT: retq entry: %q = load i32, i32* %ptr, align 4 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -83,10 +104,12 @@ entry: ; Unsupported vbroadcasts -; CHECK: _G -; CHECK-NOT: broadcast (% -; CHECK: ret define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: G: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; CHECK-NEXT: retq entry: %q = load i64, i64* %ptr, align 8 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 @@ -94,18 +117,21 @@ entry: ret <2 x i64> %vecinit2.i } -; CHECK: _H -; CHECK-NOT: broadcast -; CHECK: ret define <4 x i32> @H(<4 x i32> %a) { +; CHECK-LABEL: H: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: retq +entry: %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> ret <4 x i32> %x } -; CHECK: _I -; CHECK-NOT: broadcast (% -; CHECK: ret define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: I: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; CHECK-NEXT: retq entry: %q = load double, double* %ptr, align 4 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 @@ -113,10 +139,13 @@ entry: ret <2 x double> %vecinit2.i } -; CHECK: _RR -; CHECK: vbroadcastss (% -; CHECK: ret define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp { +; CHECK-LABEL: _RR: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movl %eax, (%rax) +; CHECK-NEXT: retq entry: %q = load float, float* %ptr, align 4 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -129,11 +158,11 @@ entry: ret <4 x float> %vecinit6.i } - -; CHECK: _RR2 -; CHECK: vbroadcastss (% -; CHECK: ret define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp { +; CHECK-LABEL: _RR2: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 +; CHECK-NEXT: retq entry: %q = load float, float* %ptr, align 4 %v = insertelement <4 x float> undef, float %q, i32 0 @@ -141,16 +170,15 @@ entry: ret <4 x float> %t } - ; These tests check that a vbroadcast instruction is used when we have a splat ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs ; (via the insertelements). -; CHECK-LABEL: splat_concat1 -; CHECK-NOT: vinsertf128 -; CHECK: vbroadcastss (% -; CHECK-NEXT: ret define <8 x float> @splat_concat1(float* %p) { +; CHECK-LABEL: splat_concat1: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 +; CHECK-NEXT: retq %1 = load float, float* %p, align 4 %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = insertelement <4 x float> %2, float %1, i32 1 @@ -160,11 +188,11 @@ define <8 x float> @splat_concat1(float* %p) { ret <8 x float> %6 } -; CHECK-LABEL: splat_concat2 -; CHECK-NOT: vinsertf128 -; CHECK: vbroadcastss (% -; CHECK-NEXT: ret define <8 x float> @splat_concat2(float* %p) { +; CHECK-LABEL: splat_concat2: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 +; CHECK-NEXT: retq %1 = load float, float* %p, align 4 %2 = insertelement <4 x float> undef, float %1, i32 0 %3 = insertelement <4 x float> %2, float %1, i32 1 @@ -178,11 +206,11 @@ define <8 x float> @splat_concat2(float* %p) { ret <8 x float> %10 } -; CHECK-LABEL: splat_concat3 -; CHECK-NOT: vinsertf128 -; CHECK: vbroadcastsd (% -; CHECK-NEXT: ret define <4 x double> @splat_concat3(double* %p) { +; CHECK-LABEL: splat_concat3: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 +; CHECK-NEXT: retq %1 = load double, double* %p, align 8 %2 = insertelement <2 x double> undef, double %1, i32 0 %3 = insertelement <2 x double> %2, double %1, i32 1 @@ -190,11 +218,11 @@ define <4 x double> @splat_concat3(double* %p) { ret <4 x double> %4 } -; CHECK-LABEL: splat_concat4 -; CHECK-NOT: vinsertf128 -; CHECK: vbroadcastsd (% -; CHECK-NEXT: ret define <4 x double> @splat_concat4(double* %p) { +; CHECK-LABEL: splat_concat4: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 +; CHECK-NEXT: retq %1 = load double, double* %p, align 8 %2 = insertelement <2 x double> undef, double %1, i32 0 %3 = insertelement <2 x double> %2, double %1, i32 1 @@ -203,4 +231,3 @@ define <4 x double> @splat_concat4(double* %p) { %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> ret <4 x double> %6 } - From 41e546b231af7f317b84d2b342125cea73a1bb46 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sun, 6 Dec 2015 23:05:38 +0000 Subject: [PATCH 157/364] [Verifier] Fix !dbg validation if Scope is the Subprogram Summary: We are inserting both Scope and SP into the Seen map and check whether it was already there in which case we skip the validation (the idea being that we already checked this Subprogram before). However, if (Scope == SP) as MDNodes, then inserting the Scope, will trigger the Seen check causing us to incorrectly not validate this !dbg attachment. Fix this by not performing the SP Seen check if Scope == SP Reviewers: pcc, dexonsmith, dblaikie Subscribers: dblaikie, llvm-commits Differential Revision: http://reviews.llvm.org/D14697 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254887 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 5 ++++- test/CodeGen/X86/2010-05-28-Crash.ll | 2 +- test/Verifier/func-dbg.ll | 25 +++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 test/Verifier/func-dbg.ll diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 5cbb597ca269..96b8a779577d 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -1814,7 +1814,10 @@ void Verifier::visitFunction(const Function &F) { continue; DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr; - if (SP && !Seen.insert(SP).second) + + // Scope and SP could be the same MDNode and we don't want to skip + // validation in that case + if (SP && ((Scope != SP) && !Seen.insert(SP).second)) continue; // FIXME: Once N is canonical, check "SP == &N". diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll index 678f1befad1d..7967d45c2ee8 100644 --- a/test/CodeGen/X86/2010-05-28-Crash.ll +++ b/test/CodeGen/X86/2010-05-28-Crash.ll @@ -16,7 +16,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon define i32 @bar(i32 %x) nounwind optsize ssp !dbg !8 { entry: tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !7, metadata !DIExpression()), !dbg !DILocation(scope: !8) - tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1) + tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1, inlinedAt: !DILocation(scope: !8)) %0 = tail call i32 (...) @zoo(i32 1) nounwind, !dbg !12 ; [#uses=1] %1 = add nsw i32 %0, %x, !dbg !13 ; [#uses=1] ret i32 %1, !dbg !13 diff --git a/test/Verifier/func-dbg.ll b/test/Verifier/func-dbg.ll new file mode 100644 index 000000000000..e56de94d18c9 --- /dev/null +++ b/test/Verifier/func-dbg.ll @@ -0,0 +1,25 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +define i32 @foo() !dbg !4 { +entry: + ret i32 0, !dbg !6 +} + +define i32 @bar() !dbg !5 { +entry: +; CHECK: !dbg attachment points at wrong subprogram for function + ret i32 0, !dbg !6 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) +!1 = !DIFile(filename: "dwarf-test.c", directory: "test") +!2 = !{} +!3 = !{!4, !5} +!4 = distinct !DISubprogram(name: "foo", scope: !0, isDefinition: true) +!5 = distinct !DISubprogram(name: "bar", scope: !0, isDefinition: true) +!6 = !DILocation(line: 7, scope: !4) +!7 = !{i32 2, !"Dwarf Version", i32 3} +!8 = !{i32 1, !"Debug Info Version", i32 3} From 0511c3bfdd84c473ea36ae52feacd6f40d1422d0 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Mon, 7 Dec 2015 00:03:28 +0000 Subject: [PATCH 158/364] [llvm-objdump/MachoDump] Make code much more concise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254888 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-objdump/MachODump.cpp | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 3fea0b2a4eee..b270057151e1 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -7608,26 +7608,11 @@ static void PrintUuidLoadCommand(MachO::uuid_command uuid) { else outs() << "\n"; outs() << " uuid "; - outs() << format("%02" PRIX32, uuid.uuid[0]); - outs() << format("%02" PRIX32, uuid.uuid[1]); - outs() << format("%02" PRIX32, uuid.uuid[2]); - outs() << format("%02" PRIX32, uuid.uuid[3]); - outs() << "-"; - outs() << format("%02" PRIX32, uuid.uuid[4]); - outs() << format("%02" PRIX32, uuid.uuid[5]); - outs() << "-"; - outs() << format("%02" PRIX32, uuid.uuid[6]); - outs() << format("%02" PRIX32, uuid.uuid[7]); - outs() << "-"; - outs() << format("%02" PRIX32, uuid.uuid[8]); - outs() << format("%02" PRIX32, uuid.uuid[9]); - outs() << "-"; - outs() << format("%02" PRIX32, uuid.uuid[10]); - outs() << format("%02" PRIX32, uuid.uuid[11]); - outs() << format("%02" PRIX32, uuid.uuid[12]); - outs() << format("%02" PRIX32, uuid.uuid[13]); - outs() << format("%02" PRIX32, uuid.uuid[14]); - outs() << format("%02" PRIX32, uuid.uuid[15]); + for (int i = 0; i < 16; ++i) { + outs() << format("%02" PRIX32, uuid.uuid[i]); + if (i == 3 || i == 5 || i == 7 || i == 9) + outs() << "-"; + } outs() << "\n"; } From 260f72a26498347915e351d16b642a679abf64a9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Dec 2015 06:01:51 +0000 Subject: [PATCH 159/364] Add uint8_t size to LegalizeAction enum so we can use the enum type directly and remove some casts. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254893 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index e99c9f758f8e..7e981776290d 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -83,7 +83,7 @@ class TargetLoweringBase { public: /// This enum indicates whether operations are valid for a target, and if not, /// what action should be used to make them valid. - enum LegalizeAction { + enum LegalizeAction : uint8_t { Legal, // The target natively supports this operation. Promote, // This operation should be executed in a larger type. Expand, // Try to expand this to other ops, otherwise use a libcall. @@ -550,8 +550,7 @@ class TargetLoweringBase { // If a target-specific SDNode requires legalization, require the target // to provide custom legalization for it. if (Op > array_lengthof(OpActions[0])) return Custom; - unsigned I = (unsigned) VT.getSimpleVT().SimpleTy; - return (LegalizeAction)OpActions[I][Op]; + return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } /// Return true if the specified operation is legal on this target or can be @@ -595,7 +594,7 @@ class TargetLoweringBase { unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)LoadExtActions[ValI][MemI][ExtType]; + return LoadExtActions[ValI][MemI][ExtType]; } /// Return true if the specified load with extension is legal on this target. @@ -621,7 +620,7 @@ class TargetLoweringBase { unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)TruncStoreActions[ValI][MemI]; + return TruncStoreActions[ValI][MemI]; } /// Return true if the specified store with truncation is legal on this @@ -1316,7 +1315,7 @@ class TargetLoweringBase { void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); - OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action; + OpActions[(unsigned)VT.SimpleTy][Op] = Action; } /// Indicate that the specified load with extension does not work with the @@ -1325,7 +1324,7 @@ class TargetLoweringBase { LegalizeAction Action) { assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); - LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = (uint8_t)Action; + LoadExtActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = Action; } /// Indicate that the specified truncating store does not work with the @@ -1333,7 +1332,7 @@ class TargetLoweringBase { void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); - TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action; + TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; } /// Indicate that the specified indexed load does or does not work with the @@ -1886,17 +1885,17 @@ class TargetLoweringBase { /// operations are Legal (aka, supported natively by the target), but /// operations that are not should be described. Note that operations on /// non-legal value types are not described here. - uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; + LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; /// For each load extension type and each value type, keep a LegalizeAction /// that indicates how instruction selection should deal with a load of a /// specific value type and extension type. - uint8_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE] - [ISD::LAST_LOADEXT_TYPE]; + LegalizeAction LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE] + [ISD::LAST_LOADEXT_TYPE]; /// For each value type pair keep a LegalizeAction that indicates whether a /// truncating store of a specific value type and truncating type is legal. - uint8_t TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; + LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; /// For each indexed mode and each value type, keep a pair of LegalizeAction /// that indicates how instruction selection should deal with the load / From 1632a3b15247d3710382f81dc1ba6b4debf7c531 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Dec 2015 06:31:41 +0000 Subject: [PATCH 160/364] Add uint8_t size to LegalizeTypeAction enum and use the enum type directly to remove some typecasts. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254895 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 7e981776290d..f7152565f914 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -93,7 +93,7 @@ class TargetLoweringBase { /// This enum indicates whether a types are legal for a target, and if not, /// what action should be used to make them valid. - enum LegalizeTypeAction { + enum LegalizeTypeAction : uint8_t { TypeLegal, // The target natively supports this type. TypePromoteInteger, // Replace this integer with a larger one. TypeExpandInteger, // Split this integer into two of half the size. @@ -413,20 +413,20 @@ class TargetLoweringBase { class ValueTypeActionImpl { /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum /// that indicates how instruction selection should deal with the type. - uint8_t ValueTypeActions[MVT::LAST_VALUETYPE]; + LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; public: ValueTypeActionImpl() { - std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), 0); + std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), + TypeLegal); } LegalizeTypeAction getTypeAction(MVT VT) const { - return (LegalizeTypeAction)ValueTypeActions[VT.SimpleTy]; + return ValueTypeActions[VT.SimpleTy]; } void setTypeAction(MVT VT, LegalizeTypeAction Action) { - unsigned I = VT.SimpleTy; - ValueTypeActions[I] = Action; + ValueTypeActions[VT.SimpleTy] = Action; } }; From 01654c4941dcf2447b293500ef34ca9c299ee8f0 Mon Sep 17 00:00:00 2001 From: Zlatko Buljan Date: Mon, 7 Dec 2015 08:29:31 +0000 Subject: [PATCH 161/364] [mips][microMIPS] Implement LH, LHE, LHU and LHUE instructions Differential Revision: http://reviews.llvm.org/D9824 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254897 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrInfo.td | 12 ++++--- lib/Target/Mips/MipsInstrInfo.td | 35 +++++++++++++++---- .../Disassembler/Mips/micromips32r6/valid.txt | 4 +++ .../Disassembler/Mips/micromips64r6/valid.txt | 4 +++ test/MC/Mips/micromips-invalid.s | 12 +++++++ test/MC/Mips/micromips32r6/invalid.s | 12 +++++++ test/MC/Mips/micromips32r6/valid.s | 4 +++ test/MC/Mips/micromips64r6/invalid.s | 12 +++++++ test/MC/Mips/micromips64r6/valid.s | 4 +++ test/MC/Mips/mips32r6/invalid.s | 12 +++++++ test/MC/Mips/mips64r6/invalid.s | 12 +++++++ 11 files changed, 113 insertions(+), 10 deletions(-) diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 175a9559e004..5745601b32e4 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -738,8 +738,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { let DecoderMethod = "DecodeMemMMImm16" in { def LB_MM : Load<"lb", GPR32Opnd>, MMRel, LW_FM_MM<0x7>; def LBu_MM : Load<"lbu", GPR32Opnd>, MMRel, LW_FM_MM<0x5>; - def LH_MM : Load<"lh", GPR32Opnd>, MMRel, LW_FM_MM<0xf>; - def LHu_MM : Load<"lhu", GPR32Opnd>, MMRel, LW_FM_MM<0xd>; + def LH_MM : LoadMemory<"lh", GPR32Opnd, mem_simm16gpr>, MMRel, + LW_FM_MM<0xf>; + def LHu_MM : LoadMemory<"lhu", GPR32Opnd, mem_simm16gpr>, MMRel, + LW_FM_MM<0xd>; def LW_MM : Load<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>; def SB_MM : Store<"sb", GPR32Opnd>, MMRel, LW_FM_MM<0x6>; def SH_MM : Store<"sh", GPR32Opnd>, MMRel, LW_FM_MM<0xe>; @@ -749,8 +751,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { let DecoderMethod = "DecodeMemMMImm9" in { def LBE_MM : Load<"lbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>; def LBuE_MM : Load<"lbue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x0>; - def LHE_MM : Load<"lhe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x5>; - def LHuE_MM : Load<"lhue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x1>; + def LHE_MM : LoadMemory<"lhe", GPR32Opnd, mem_simm9gpr>, + POOL32C_LHUE_FM_MM<0x18, 0x6, 0x5>; + def LHuE_MM : LoadMemory<"lhue", GPR32Opnd, mem_simm9gpr>, + POOL32C_LHUE_FM_MM<0x18, 0x6, 0x1>; def LWE_MM : Load<"lwe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x7>; def SBE_MM : Store<"sbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x4>; def SHE_MM : Store<"she", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x5>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index e0b317e9bf2d..f3be7fc46187 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -598,6 +598,14 @@ def MipsMemSimm16AsmOperand : AsmOperandClass { let PredicateMethod = "isMemWithSimmOffset<16>"; } +def MipsMemSimm16GPRAsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm16GPR"; + let SuperClasses = [MipsMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffsetGPR<16>"; +} + def MipsInvertedImmoperand : AsmOperandClass { let Name = "InvNum"; let RenderMethod = "addImmOperands"; @@ -653,6 +661,12 @@ def mem_simm16 : mem_generic { let ParserMatchClass = MipsMemSimm16AsmOperand; } +def mem_simm16gpr : mem_generic { + let MIOperandInfo = (ops ptr_rc, simm16); + let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemSimm16GPRAsmOperand; +} + def mem_ea : Operand { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops ptr_rc, simm16); @@ -820,15 +834,21 @@ class LoadUpper: } // Memory Load/Store -class Load : - InstSE<(outs RO:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"), +class LoadMemory : + InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"), [(set RO:$rt, (OpNode Addr:$addr))], Itin, FrmI, opstr> { let DecoderMethod = "DecodeMem"; let canFoldAsLoad = 1; let mayLoad = 1; } +class Load : + LoadMemory; + class StoreMemory : @@ -1374,9 +1394,12 @@ def ROTRV : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd, II_ROTRV, rotr>, def LB : Load<"lb", GPR32Opnd, sextloadi8, II_LB>, MMRel, LW_FM<0x20>; def LBu : Load<"lbu", GPR32Opnd, zextloadi8, II_LBU, addrDefault>, MMRel, LW_FM<0x24>; -def LH : Load<"lh", GPR32Opnd, sextloadi16, II_LH, addrDefault>, MMRel, - LW_FM<0x21>; -def LHu : Load<"lhu", GPR32Opnd, zextloadi16, II_LHU>, MMRel, LW_FM<0x25>; +let AdditionalPredicates = [NotInMicroMips] in { + def LH : LoadMemory<"lh", GPR32Opnd, mem_simm16gpr, sextloadi16, II_LH, + addrDefault>, MMRel, LW_FM<0x21>; + def LHu : LoadMemory<"lhu", GPR32Opnd, mem_simm16gpr, zextloadi16, II_LHU>, + MMRel, LW_FM<0x25>; +} let AdditionalPredicates = [NotInMicroMips] in { def LW : StdMMR6Rel, Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel, LW_FM<0x23>; diff --git a/test/MC/Disassembler/Mips/micromips32r6/valid.txt b/test/MC/Disassembler/Mips/micromips32r6/valid.txt index 5fa2138262a4..d96a243eef8c 100644 --- a/test/MC/Disassembler/Mips/micromips32r6/valid.txt +++ b/test/MC/Disassembler/Mips/micromips32r6/valid.txt @@ -253,3 +253,7 @@ 0x55 0x04 0x12 0x78 # CHECK: selnez.d $f2, $f4, $f8 0x54 0x62 0x00 0x60 # CHECK: class.s $f2, $f3 0x54 0x82 0x02 0x60 # CHECK: class.d $f2, $f4 +0x3c 0x44 0x00 0x08 # CHECK: lh $2, 8($4) +0x60 0x82 0x6a 0x08 # CHECK: lhe $4, 8($2) +0x34 0x82 0x00 0x08 # CHECK: lhu $4, 8($2) +0x60 0x82 0x62 0x08 # CHECK: lhue $4, 8($2) diff --git a/test/MC/Disassembler/Mips/micromips64r6/valid.txt b/test/MC/Disassembler/Mips/micromips64r6/valid.txt index 10a9687384ea..fadd61c9ecd7 100644 --- a/test/MC/Disassembler/Mips/micromips64r6/valid.txt +++ b/test/MC/Disassembler/Mips/micromips64r6/valid.txt @@ -166,3 +166,7 @@ 0x55 0x04 0x12 0x78 # CHECK: selnez.d $f2, $f4, $f8 0x54 0x62 0x00 0x60 # CHECK: class.s $f2, $f3 0x54 0x82 0x02 0x60 # CHECK: class.d $f2, $f4 +0x3c 0x44 0x00 0x08 # CHECK: lh $2, 8($4) +0x60 0x82 0x6a 0x08 # CHECK: lhe $4, 8($2) +0x34 0x82 0x00 0x08 # CHECK: lhu $4, 8($2) +0x60 0x82 0x62 0x08 # CHECK: lhue $4, 8($2) diff --git a/test/MC/Mips/micromips-invalid.s b/test/MC/Mips/micromips-invalid.s index ed0ab1bdc233..63a1c914301a 100644 --- a/test/MC/Mips/micromips-invalid.s +++ b/test/MC/Mips/micromips-invalid.s @@ -91,3 +91,15 @@ jraddiusp 33 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4 jraddiusp 125 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4 jraddiusp 132 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4 + lh $33, 8($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $34, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $35, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $36, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 8($34) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 8($33) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 8($35) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 8($37) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 65536($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 65536($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/test/MC/Mips/micromips32r6/invalid.s b/test/MC/Mips/micromips32r6/invalid.s index 14259eadaeac..8f1e64acf777 100644 --- a/test/MC/Mips/micromips32r6/invalid.s +++ b/test/MC/Mips/micromips32r6/invalid.s @@ -109,3 +109,15 @@ swm16 $16-$20, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction swm16 $16, $17, $ra, 8($fp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction swm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $33, 8($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $34, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $35, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $36, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 8($34) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 8($33) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 8($35) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 8($37) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 65536($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 65536($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/test/MC/Mips/micromips32r6/valid.s b/test/MC/Mips/micromips32r6/valid.s index 194b15e1a4f6..85958bc49179 100644 --- a/test/MC/Mips/micromips32r6/valid.s +++ b/test/MC/Mips/micromips32r6/valid.s @@ -246,3 +246,7 @@ selnez.d $f2, $f4, $f8 # CHECK: selnez.d $f2, $f4, $f8 # encoding: [0x55,0x04,0x12,0x78] class.s $f2, $f3 # CHECK: class.s $f2, $f3 # encoding: [0x54,0x62,0x00,0x60] class.d $f2, $f4 # CHECK: class.d $f2, $f4 # encoding: [0x54,0x82,0x02,0x60] + lh $2, 8($4) # CHECK: lh $2, 8($4) # encoding: [0x3c,0x44,0x00,0x08] + lhe $4, 8($2) # CHECK: lhe $4, 8($2) # encoding: [0x60,0x82,0x6a,0x08] + lhu $4, 8($2) # CHECK: lhu $4, 8($2) # encoding: [0x34,0x82,0x00,0x08] + lhue $4, 8($2) # CHECK: lhue $4, 8($2) # encoding: [0x60,0x82,0x62,0x08] diff --git a/test/MC/Mips/micromips64r6/invalid.s b/test/MC/Mips/micromips64r6/invalid.s index 51b9f7530485..27b5146af3eb 100644 --- a/test/MC/Mips/micromips64r6/invalid.s +++ b/test/MC/Mips/micromips64r6/invalid.s @@ -118,3 +118,15 @@ swm16 $16-$20, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction swm16 $16, $17, $ra, 8($fp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction swm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $33, 8($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $34, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $35, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $36, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 8($34) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 8($33) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 8($35) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 8($37) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 65536($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 65536($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/test/MC/Mips/micromips64r6/valid.s b/test/MC/Mips/micromips64r6/valid.s index 1c8781b6e966..edee56adeda4 100644 --- a/test/MC/Mips/micromips64r6/valid.s +++ b/test/MC/Mips/micromips64r6/valid.s @@ -146,5 +146,9 @@ a: selnez.d $f2, $f4, $f8 # CHECK: selnez.d $f2, $f4, $f8 # encoding: [0x55,0x04,0x12,0x78] class.s $f2, $f3 # CHECK: class.s $f2, $f3 # encoding: [0x54,0x62,0x00,0x60] class.d $f2, $f4 # CHECK: class.d $f2, $f4 # encoding: [0x54,0x82,0x02,0x60] + lh $2, 8($4) # CHECK: lh $2, 8($4) # encoding: [0x3c,0x44,0x00,0x08] + lhe $4, 8($2) # CHECK: lhe $4, 8($2) # encoding: [0x60,0x82,0x6a,0x08] + lhu $4, 8($2) # CHECK: lhu $4, 8($2) # encoding: [0x34,0x82,0x00,0x08] + lhue $4, 8($2) # CHECK: lhue $4, 8($2) # encoding: [0x60,0x82,0x62,0x08] 1: diff --git a/test/MC/Mips/mips32r6/invalid.s b/test/MC/Mips/mips32r6/invalid.s index 452cd3a5ee62..56edcb372a4a 100644 --- a/test/MC/Mips/mips32r6/invalid.s +++ b/test/MC/Mips/mips32r6/invalid.s @@ -19,6 +19,18 @@ local_label: break 1024, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction break 7, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction break 1024, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $33, 8($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $34, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $35, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $36, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 8($34) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 8($33) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 8($35) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 8($37) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 65536($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 65536($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction // FIXME: Following tests are temporarely disabled, until "PredicateControl not in hierarchy" problem is resolved bltl $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled bltul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled diff --git a/test/MC/Mips/mips64r6/invalid.s b/test/MC/Mips/mips64r6/invalid.s index 8d68b51c5111..c615b06e4bfd 100644 --- a/test/MC/Mips/mips64r6/invalid.s +++ b/test/MC/Mips/mips64r6/invalid.s @@ -17,6 +17,18 @@ local_label: break 1024, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction break 7, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction break 1024, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $33, 8($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $34, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $35, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $36, 8($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 8($34) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 8($33) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 8($35) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 8($37) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lh $2, 65536($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhe $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhu $4, 65536($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction + lhue $4, 512($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction // FIXME: Following tests are temporarely disabled, until "PredicateControl not in hierarchy" problem is resolved bltl $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled bltul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled From 0239b7553bd7dc0071475e9bf63effa983690666 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Dec 2015 09:09:54 +0000 Subject: [PATCH 162/364] [X86][AVX] Added tests to load+broadcast non-zero'th vector elements Baseline for an upcoming patch for PR23022 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254898 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx-vbroadcast.ll | 153 +++++++++++++++++++ test/CodeGen/X86/avx2-vbroadcast.ll | 225 +++++++++++++++++++++++++++- 2 files changed, 375 insertions(+), 3 deletions(-) diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll index bfc9149b107d..5c0f43da876d 100644 --- a/test/CodeGen/X86/avx-vbroadcast.ll +++ b/test/CodeGen/X86/avx-vbroadcast.ll @@ -102,6 +102,159 @@ entry: ret <4 x i32> %vecinit6.i } +; FIXME: Pointer adjusted broadcasts + +define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i32_4i32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> + ret <4 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_4i32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> + ret <8 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_8i32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i32>, <8 x i32>* %ptr + %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> + ret <8 x i32> %ret +} + +define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f32_4f32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> + ret <4 x float> %ret +} + +define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_4f32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> + ret <8 x float> %ret +} + +define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_8f32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x float>, <8 x float>* %ptr + %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> + ret <8 x float> %ret +} + +define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> + ret <2 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> + ret <4 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_4i64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovapd (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i64>, <4 x i64>* %ptr + %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> + ret <4 x i64> %ret +} + +define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> + ret <2 x double> %ret +} + +define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> + ret <4 x double> %ret +} + +define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_4f64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovapd (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x double>, <4 x double>* %ptr + %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> + ret <4 x double> %ret +} + ; Unsupported vbroadcasts define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 418707cdc237..186f50873650 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -172,6 +172,225 @@ entry: ret <4 x i64> %q3 } +; FIXME: Pointer adjusted broadcasts + +define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <16 x i8>, <16 x i8>* %ptr + %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> + ret <16 x i8> %ret +} + +define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <16 x i8>, <16 x i8>* %ptr + %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> + ret <32 x i8> %ret +} + +define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <32 x i8>, <32 x i8>* %ptr + %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> + ret <32 x i8> %ret +} + +define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i16_8i16_11111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; CHECK-NEXT: retq +entry: + %ld = load <8 x i16>, <8 x i16>* %ptr + %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> + ret <8 x i16> %ret +} + +define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i16>, <8 x i16>* %ptr + %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> + ret <16 x i16> %ret +} + +define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <16 x i16>, <16 x i16>* %ptr + %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> + ret <16 x i16> %ret +} + +define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i32_4i32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> + ret <4 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_4i32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpbroadcastd LCPI15_0(%rip), %ymm1 +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> + ret <8 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_8i32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastd LCPI16_0(%rip), %ymm0 +; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i32>, <8 x i32>* %ptr + %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> + ret <8 x i32> %ret +} + +define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f32_4f32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> + ret <4 x float> %ret +} + +define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_4f32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vbroadcastss LCPI18_0(%rip), %ymm1 +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> + ret <8 x float> %ret +} + +define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_8f32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss LCPI19_0(%rip), %ymm0 +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x float>, <8 x float>* %ptr + %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> + ret <8 x float> %ret +} + +define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> + ret <2 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> + ret <4 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_4i64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = mem[2,2,2,2] +; CHECK-NEXT: retq +entry: + %ld = load <4 x i64>, <4 x i64>* %ptr + %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> + ret <4 x i64> %ret +} + +define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> + ret <2 x double> %ret +} + +define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovapd (%rdi), %xmm0 +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> + ret <4 x double> %ret +} + +define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_4f64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,2,2,2] +; CHECK-NEXT: retq +entry: + %ld = load <4 x double>, <4 x double>* %ptr + %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> + ret <4 x double> %ret +} + ; make sure that we still don't support broadcast double into 128-bit vector ; this used to crash define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { @@ -242,13 +461,13 @@ define void @crash() nounwind alwaysinline { ; CHECK: ## BB#0: ## %WGLoopsEntry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB13_1 +; CHECK-NEXT: je LBB31_1 ; CHECK-NEXT: ## BB#2: ## %ret ; CHECK-NEXT: retq ; CHECK-NEXT: .align 4, 0x90 -; CHECK-NEXT: LBB13_1: ## %footer349VF +; CHECK-NEXT: LBB31_1: ## %footer349VF ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB13_1 +; CHECK-NEXT: jmp LBB31_1 WGLoopsEntry: br i1 undef, label %ret, label %footer329VF From 8205637a28004f0cb652634f13a154006e187c12 Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Mon, 7 Dec 2015 10:54:36 +0000 Subject: [PATCH 163/364] [ARM] Flag vcvt{t,b} with an f16 type specifier as part of the FP16 extension Additionally correct the Cortex-R7 definition to allow the FP16 feature. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254900 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 1 + lib/Target/ARM/ARMInstrVFP.td | 12 ++++++++---- test/CodeGen/ARM/build-attributes.ll | 2 +- test/MC/ARM/neon-vcvt-fp16.s | 18 ++++++++++++++++++ 4 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 test/MC/ARM/neon-vcvt-fp16.s diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index dd33c3614b1a..a44dc830a673 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -585,6 +585,7 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, FeatureVFP3, FeatureVFPOnlySP, FeatureD16, + FeatureFP16, FeatureMP, FeatureSlowFPBrcc, FeatureHWDivARM, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 2aea73a6336e..050cd1a445ad 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -540,19 +540,23 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index b80191d76012..bf502b3ae077 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -1105,7 +1105,7 @@ ; CORTEX-R7: .eabi_attribute 25, 1 ; CORTEX-R7: .eabi_attribute 27, 1 ; CORTEX-R7-NOT: .eabi_attribute 28 -; CORTEX-R7-NOT: .eabi_attribute 36 +; CORTEX-R7: .eabi_attribute 36, 1 ; CORTEX-R7: .eabi_attribute 38, 1 ; CORTEX-R7: .eabi_attribute 42, 1 ; CORTEX-R7: .eabi_attribute 44, 2 diff --git a/test/MC/ARM/neon-vcvt-fp16.s b/test/MC/ARM/neon-vcvt-fp16.s new file mode 100644 index 000000000000..a23be061c0fa --- /dev/null +++ b/test/MC/ARM/neon-vcvt-fp16.s @@ -0,0 +1,18 @@ +@ RUN: llvm-mc -mcpu=cortex-r7 -triple arm -show-encoding < %s 2>&1| \ +@ RUN: FileCheck %s --check-prefix=CHECK-FP16 +@ RUN: not llvm-mc -mcpu=cortex-r5 -triple arm -show-encoding < %s 2>&1 | \ +@ RUN: FileCheck %s --check-prefix=CHECK-NOFP16 + +@ CHECK-FP16: vcvtt.f32.f16 s7, s1 @ encoding: [0xe0,0x3a,0xf2,0xee] +@ CHECK-NOFP16: instruction requires: half-float conversions + vcvtt.f32.f16 s7, s1 +@ CHECK-FP16: vcvtt.f16.f32 s1, s7 @ encoding: [0xe3,0x0a,0xf3,0xee] +@ CHECK-NOFP16: instruction requires: half-float conversions + vcvtt.f16.f32 s1, s7 + +@ CHECK-FP16: vcvtb.f32.f16 s7, s1 @ encoding: [0x60,0x3a,0xf2,0xee] +@ CHECK-NOFP16: instruction requires: half-float conversions + vcvtb.f32.f16 s7, s1 +@ CHECK-FP16: vcvtb.f16.f32 s1, s7 @ encoding: [0x63,0x0a,0xf3,0xee] +@ CHECK-NOFP16: instruction requires: half-float conversions + vcvtb.f16.f32 s1, s7 From 5c239f408dd6152787aac57f38ab16e5e1d0c94f Mon Sep 17 00:00:00 2001 From: Marina Yatsina Date: Mon, 7 Dec 2015 13:09:20 +0000 Subject: [PATCH 164/364] [X86] Adding support for FWORD type for MS inline asm Adding support for FWORD type for MS inline asm. Differential Revision: http://reviews.llvm.org/D15268 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254904 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 1 + test/MC/X86/intel-syntax.s | 3 +++ 2 files changed, 4 insertions(+) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d53ab71f3d5a..f2efefd35c52 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1049,6 +1049,7 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { .Cases("BYTE", "byte", 8) .Cases("WORD", "word", 16) .Cases("DWORD", "dword", 32) + .Cases("FWORD", "fword", 48) .Cases("QWORD", "qword", 64) .Cases("MMWORD","mmword", 64) .Cases("XWORD", "xword", 80) diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index 214d827168b8..001a26d07019 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -746,3 +746,6 @@ loopz _foo loopnz _foo // CHECK: loope _foo // CHECK: loopne _foo + +sidt fword ptr [eax] +// CHECK: sidtq (%eax) From fbb911506eb0aeca8a269d0fa80ac279d932b44f Mon Sep 17 00:00:00 2001 From: Asaf Badouh Date: Mon, 7 Dec 2015 13:14:14 +0000 Subject: [PATCH 165/364] [avx512] rename gcc intrinsics to be align with gcc format rename the gcc intrinsics suffix : _mask ->_round Differential Revision: http://reviews.llvm.org/D15285 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254905 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 1c028dea601f..29377d136797 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -5205,40 +5205,40 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_mask">, + def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_mask">, + def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_mask">, + def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_mask">, + def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_mask">, + def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_mask">, + def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_mask">, + def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_mask">, + def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_mask">, + def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_mask">, + def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_mask">, + def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_mask">, + def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; From 35ecd26675997eeb0954dfb0edffb4ce85cf20f1 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 7 Dec 2015 13:24:23 +0000 Subject: [PATCH 166/364] Change how the linker handles the old llvm.global_ctors. Now instead of changing it to the new format and then linking, it just handles the old format while copying it over. The main differences are: * There is no rauw in the source module. * An old format input is always upgraded. The first item helps with having a sane API that passes in a GV list to the linker. The second one is a small step in deprecating the old format. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254907 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 126 ++++++++++--------------------------- test/Linker/ctors5.ll | 8 +++ 2 files changed, 42 insertions(+), 92 deletions(-) create mode 100644 test/Linker/ctors5.ll diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 627137ba3abd..a1c3162bf796 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -509,9 +509,6 @@ class ModuleLinker { void computeTypeMapping(); - void upgradeMismatchedGlobalArray(StringRef Name); - void upgradeMismatchedGlobals(); - bool linkIfNeeded(GlobalValue &GV); Constant *linkAppendingVarProto(GlobalVariable *DstGV, const GlobalVariable *SrcGV); @@ -1190,83 +1187,6 @@ void ModuleLinker::computeTypeMapping() { TypeMap.linkDefinedTypeBodies(); } -static void upgradeGlobalArray(GlobalVariable *GV) { - ArrayType *ATy = cast(GV->getType()->getElementType()); - StructType *OldTy = cast(ATy->getElementType()); - assert(OldTy->getNumElements() == 2 && "Expected to upgrade from 2 elements"); - - // Get the upgraded 3 element type. - PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo(); - Type *Tys[3] = {OldTy->getElementType(0), OldTy->getElementType(1), - VoidPtrTy}; - StructType *NewTy = StructType::get(GV->getContext(), Tys, false); - - // Build new constants with a null third field filled in. - Constant *OldInitC = GV->getInitializer(); - ConstantArray *OldInit = dyn_cast(OldInitC); - if (!OldInit && !isa(OldInitC)) - // Invalid initializer; give up. - return; - std::vector Initializers; - if (OldInit && OldInit->getNumOperands()) { - Value *Null = Constant::getNullValue(VoidPtrTy); - for (Use &U : OldInit->operands()) { - ConstantStruct *Init = cast(U.get()); - Initializers.push_back(ConstantStruct::get( - NewTy, Init->getOperand(0), Init->getOperand(1), Null, nullptr)); - } - } - assert(Initializers.size() == ATy->getNumElements() && - "Failed to copy all array elements"); - - // Replace the old GV with a new one. - ATy = ArrayType::get(NewTy, Initializers.size()); - Constant *NewInit = ConstantArray::get(ATy, Initializers); - GlobalVariable *NewGV = new GlobalVariable( - *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "", - GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(), - GV->isExternallyInitialized()); - NewGV->copyAttributesFrom(GV); - NewGV->takeName(GV); - assert(GV->use_empty() && "program cannot use initializer list"); - GV->eraseFromParent(); -} - -void ModuleLinker::upgradeMismatchedGlobalArray(StringRef Name) { - // Look for the global arrays. - auto *DstGV = dyn_cast_or_null(DstM.getNamedValue(Name)); - if (!DstGV) - return; - auto *SrcGV = dyn_cast_or_null(SrcM.getNamedValue(Name)); - if (!SrcGV) - return; - - // Check if the types already match. - auto *DstTy = cast(DstGV->getType()->getElementType()); - auto *SrcTy = - cast(TypeMap.get(SrcGV->getType()->getElementType())); - if (DstTy == SrcTy) - return; - - // Grab the element types. We can only upgrade an array of a two-field - // struct. Only bother if the other one has three-fields. - auto *DstEltTy = cast(DstTy->getElementType()); - auto *SrcEltTy = cast(SrcTy->getElementType()); - if (DstEltTy->getNumElements() == 2 && SrcEltTy->getNumElements() == 3) { - upgradeGlobalArray(DstGV); - return; - } - if (DstEltTy->getNumElements() == 3 && SrcEltTy->getNumElements() == 2) - upgradeGlobalArray(SrcGV); - - // We can't upgrade any other differences. -} - -void ModuleLinker::upgradeMismatchedGlobals() { - upgradeMismatchedGlobalArray("llvm.global_ctors"); - upgradeMismatchedGlobalArray("llvm.global_dtors"); -} - static void getArrayElements(const Constant *C, SmallVectorImpl &Dest) { unsigned NumElements = cast(C->getType())->getNumElements(); @@ -1279,9 +1199,25 @@ static void getArrayElements(const Constant *C, /// Return true on error. Constant *ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, const GlobalVariable *SrcGV) { - ArrayType *SrcTy = - cast(TypeMap.get(SrcGV->getType()->getElementType())); - Type *EltTy = SrcTy->getElementType(); + Type *EltTy = cast(TypeMap.get(SrcGV->getType()->getElementType())) + ->getElementType(); + + StringRef Name = SrcGV->getName(); + bool IsNewStructor = false; + bool IsOldStructor = false; + if (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") { + if (cast(EltTy)->getNumElements() == 3) + IsNewStructor = true; + else + IsOldStructor = true; + } + + PointerType *VoidPtrTy = Type::getInt8Ty(SrcGV->getContext())->getPointerTo(); + if (IsOldStructor) { + auto &ST = *cast(EltTy); + Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; + EltTy = StructType::get(SrcGV->getContext(), Tys, false); + } if (DstGV) { ArrayType *DstTy = cast(DstGV->getType()->getElementType()); @@ -1335,10 +1271,6 @@ Constant *ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, SmallVector SrcElements; getArrayElements(SrcGV->getInitializer(), SrcElements); - StringRef Name = SrcGV->getName(); - bool IsNewStructor = - (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") && - cast(EltTy)->getNumElements() == 3; if (IsNewStructor) SrcElements.erase( std::remove_if(SrcElements.begin(), SrcElements.end(), @@ -1367,8 +1299,21 @@ Constant *ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, ValueMap[SrcGV] = Ret; for (auto *V : SrcElements) { - DstElements.push_back( - MapValue(V, ValueMap, RF_MoveDistinctMDs, &TypeMap, &ValMaterializer)); + Constant *NewV; + if (IsOldStructor) { + auto *S = cast(V); + auto *E1 = MapValue(S->getOperand(0), ValueMap, RF_MoveDistinctMDs, + &TypeMap, &ValMaterializer); + auto *E2 = MapValue(S->getOperand(1), ValueMap, RF_MoveDistinctMDs, + &TypeMap, &ValMaterializer); + Value *Null = Constant::getNullValue(VoidPtrTy); + NewV = + ConstantStruct::get(cast(EltTy), E1, E2, Null, nullptr); + } else { + NewV = + MapValue(V, ValueMap, RF_MoveDistinctMDs, &TypeMap, &ValMaterializer); + } + DstElements.push_back(NewV); } NG->setInitializer(ConstantArray::get(NewType, DstElements)); @@ -1877,9 +1822,6 @@ bool ModuleLinker::run() { ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc); } - // Upgrade mismatched global arrays. - upgradeMismatchedGlobals(); - for (GlobalVariable &GV : SrcM.globals()) if (const Comdat *SC = GV.getComdat()) ComdatMembers[SC].push_back(&GV); diff --git a/test/Linker/ctors5.ll b/test/Linker/ctors5.ll new file mode 100644 index 000000000000..99124061bb32 --- /dev/null +++ b/test/Linker/ctors5.ll @@ -0,0 +1,8 @@ +; RUN: llvm-link -S %s | FileCheck %s + +@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }] +; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null }] + +define void @f() { + ret void +} From eea645e49f194c23d90a465967cee1561cc997ea Mon Sep 17 00:00:00 2001 From: Igor Breger Date: Mon, 7 Dec 2015 13:25:18 +0000 Subject: [PATCH 167/364] AVX-512: implement kunpck intrinsics. Differential Revision: http://reviews.llvm.org/D14821 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254908 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 6 + lib/Target/X86/X86ISelLowering.cpp | 50 ++++++--- lib/Target/X86/X86InstrAVX512.td | 13 +-- lib/Target/X86/X86IntrinsicsInfo.h | 8 +- test/CodeGen/X86/avx512-intrinsics.ll | 141 ++++++++++++++++-------- test/CodeGen/X86/avx512bw-intrinsics.ll | 50 +++++++++ 6 files changed, 194 insertions(+), 74 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 29377d136797..c287a3a1928e 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -4026,6 +4026,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 364a8c260ba1..f38ca2956ff3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15998,19 +15998,26 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT, } if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) { - assert(MaskVT == MVT::v64i1 && "Unexpected mask VT!"); - assert(Subtarget->hasBWI() && "Expected AVX512BW target!"); - // In case 32bit mode, bitcast i64 is illegal, extend/split it. - SDValue Lo, Hi; - Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, - DAG.getConstant(0, dl, MVT::i32)); - Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, - DAG.getConstant(1, dl, MVT::i32)); - - Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, Lo); - Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, Hi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Hi, Lo); + if (MaskVT == MVT::v64i1) { + assert(Subtarget->hasBWI() && "Expected AVX512BW target!"); + // In case 32bit mode, bitcast i64 is illegal, extend/split it. + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, + DAG.getConstant(0, dl, MVT::i32)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, + DAG.getConstant(1, dl, MVT::i32)); + + Lo = DAG.getBitcast(MVT::v32i1, Lo); + Hi = DAG.getBitcast(MVT::v32i1, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); + } else { + // MaskVT require < 64bit. Truncate mask (should succeed in any case), + // and bitcast. + MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits()); + return DAG.getBitcast(MaskVT, + DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask)); + } } else { MVT BitcastVT = MVT::getVectorVT(MVT::i1, @@ -16600,6 +16607,18 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1), Op.getOperand(2)); } + case KUNPCK: { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2); + + SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl); + SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl); + // Arguments should be swapped. + SDValue Res = DAG.getNode(IntrData->Opc0, dl, + MVT::getVectorVT(MVT::i1, VT.getSizeInBits()), + Src2, Src1); + return DAG.getBitcast(VT, Res); + } default: break; } @@ -20001,8 +20020,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } } case ISD::INTRINSIC_WO_CHAIN: { - Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG)); - return; + if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG)) + Results.push_back(V); + return; } case ISD::READCYCLECOUNTER: { return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 452e9f05f84a..60238f6ab23d 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2396,16 +2396,6 @@ defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD; defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS; defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W; -multiclass avx512_mask_unpck_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_avx512_"##IntName##"_bw") - (i16 GR16:$src1), (i16 GR16:$src2)), - (COPY_TO_REGCLASS (!cast(InstName##"BWrr") - (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)), - (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>; -} -defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; - // Mask bit testing multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode, Predicate prd> { @@ -2496,6 +2486,9 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; +def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))), + (v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>; + def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))), (v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index cc53d5f3ce5a..8f8a100cea04 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -30,7 +30,7 @@ enum IntrinsicType { COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC, - TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM + TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK }; struct IntrinsicData { @@ -341,7 +341,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), - + X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0), + X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0), + X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD, @@ -1827,7 +1829,7 @@ static void verifyIntrinsicTables() { "Intrinsic data tables should have unique entries"); } -// X86 specific compare constants. +// X86 specific compare constants. // They must be kept in synch with avxintrin.h #define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ #define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index c01f1adce360..4a4032570e7c 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -65,9 +65,9 @@ declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone define i16 @unpckbw_test(i16 %a0, i16 %a1) { ; CHECK-LABEL: unpckbw_test: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k0 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: kunpckbw %k0, %k1, %k0 +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kunpckbw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) @@ -6160,76 +6160,103 @@ define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x } define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae -; CHECK: vcomisd {sae}, %xmm1, %xmm0 -; CHECK-NEXT: sete %al - %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) +; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae: +; CHECK: ## BB#0: +; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) ret i32 %res } define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae -; CHECK: vucomisd {sae}, %xmm1, %xmm0 -; CHECK-NEXT: sete %al - %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) +; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae: +; CHECK: ## BB#0: +; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) ret i32 %res } define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_avx512_comi_sd_eq -; CHECK: vcomisd %xmm1, %xmm0 -; CHECK-NEXT: sete %al - %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) +; CHECK-LABEL: test_x86_avx512_comi_sd_eq: +; CHECK: ## BB#0: +; CHECK-NEXT: vcomisd %xmm1, %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) ret i32 %res } define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq -; CHECK: vucomisd %xmm1, %xmm0 -; CHECK-NEXT: sete %al - %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) +; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq: +; CHECK: ## BB#0: +; CHECK-NEXT: vucomisd %xmm1, %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) ret i32 %res } define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae -; CHECK: vcomisd {sae}, %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax - %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) +; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae: +; CHECK: ## BB#0: +; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) ret i32 %res } define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae -; CHECK: vucomisd {sae}, %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax - %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) +; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae: +; CHECK: ## BB#0: +; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) ret i32 %res } define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_avx512_comi_sd_lt -; CHECK: vcomisd %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax - %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) +; CHECK-LABEL: test_x86_avx512_comi_sd_lt: +; CHECK: ## BB#0: +; CHECK-NEXT: vcomisd %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) ret i32 %res } define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt -; CHECK: vucomisd %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax - %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) +; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt: +; CHECK: ## BB#0: +; CHECK-NEXT: vucomisd %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) ret i32 %res } -declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) +declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt -; CHECK: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax - %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) +; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt: +; CHECK: ## BB#0: +; CHECK-NEXT: vucomiss %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) ret i32 %res } @@ -6238,21 +6265,32 @@ declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk: -; CHECK: vmovss %xmm1, %xmm0, %xmm2 {%k1} +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) ret <4 x float> %res } define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz: -; CHECK: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2) ret <4 x float> %res } define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr: -; CHECK: vmovss %xmm1, %xmm0, %xmm0 +; CHECK: ## BB#0: +; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1) ret <4 x float> %res } @@ -6260,21 +6298,32 @@ define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x flo declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr: -; CHECK: vmovsd %xmm1, %xmm0, %xmm0 +; CHECK: ## BB#0: +; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1) ret <2 x double> %res } define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz: -; CHECK: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2) ret <2 x double> %res } define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk: -; CHECK: vmovsd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) ret <2 x double> %res } diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 0eba131a67c4..c6ba0dd6eb42 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2736,3 +2736,53 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %res2 = add <8 x i64> %res, %res1 ret <8 x i64> %res2 } + +declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) + +define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) + ret i32 %res +} + +declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) + +define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k0 +; AVX512BW-NEXT: kmovq %rsi, %k1 +; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .Ltmp8: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) + ret i64 %res +} From b06ff9b1e1a0e4e9b3a135efd2803424db3a2abf Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 7 Dec 2015 13:39:24 +0000 Subject: [PATCH 168/364] AVX-512: Fixed masked load / store instruction selection for KNL. Patterns were missing for KNL target for <8 x i32>, <8 x float> masked load/store. This intrinsic comes with all legal types: <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 align, <8 x i1> %mask, <8 x float> %passThru), but still requires lowering, because VMASKMOVPS, VMASKMOVDQU32 work with 512-bit vectors only. All data operands should be widened to 512-bit vector. The mask operand should be widened to v16i1 with zeroes. Differential Revision: http://reviews.llvm.org/D15265 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254909 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeVectorOps.cpp | 5 +- lib/Target/X86/X86ISelLowering.cpp | 105 ++++++++++++++++++ lib/Target/X86/X86InstrAVX512.td | 27 ----- test/CodeGen/X86/masked_memop.ll | 46 +++++++- 4 files changed, 151 insertions(+), 32 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 1fb7b160a671..8295b2a19dd2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -244,7 +244,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandStore(Op)); } - } else if (Op.getOpcode() == ISD::MSCATTER) + } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) HasVectorValue = true; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); @@ -344,6 +344,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::MSCATTER: QueryType = cast(Node)->getValue().getValueType(); break; + case ISD::MSTORE: + QueryType = cast(Node)->getValue().getValueType(); + break; } switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f38ca2956ff3..fa6f5c8be88c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1384,6 +1384,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); + } else { + setOperationAction(ISD::MLOAD, MVT::v8i32, Custom); + setOperationAction(ISD::MLOAD, MVT::v8f32, Custom); + setOperationAction(ISD::MSTORE, MVT::v8i32, Custom); + setOperationAction(ISD::MSTORE, MVT::v8f32, Custom); } setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); @@ -1459,6 +1464,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom); @@ -19685,6 +19691,47 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); } +/// Widen a vector input to a vector of NVT. The +/// input vector must have the same element type as NVT. +static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, + bool FillWithZeroes = false) { + // Check if InOp already has the right width. + MVT InVT = InOp.getSimpleValueType(); + if (InVT == NVT) + return InOp; + + if (InOp.isUndef()) + return DAG.getUNDEF(NVT); + + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && + "Unexpected request for vector widening"); + + EVT EltVT = NVT.getVectorElementType(); + + SDLoc dl(InOp); + if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { + SmallVector Ops; + for (unsigned i = 0; i < InNumElts; ++i) + Ops.push_back(InOp.getOperand(i)); + + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) + Ops.push_back(FillVal); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); + } + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : + DAG.getUNDEF(NVT); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, + InOp, DAG.getIntPtrConstant(0, dl)); +} + static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->hasAVX512() && @@ -19714,6 +19761,62 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, return Op; } +static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + + MaskedLoadSDNode *N = cast(Op.getNode()); + MVT VT = Op.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDLoc dl(Op); + + if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && + !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { + // This operation is legal for targets with VLX, but without + // VLX the vector should be widened to 512 bit + unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); + MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); + SDValue Src0 = N->getSrc0(); + Src0 = ExtendToType(Src0, WideDataVT, DAG); + Mask = ExtendToType(Mask, WideMaskVT, DAG, true); + SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), + N->getBasePtr(), Mask, Src0, + N->getMemoryVT(), N->getMemOperand(), + N->getExtensionType()); + + SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + NewLoad.getValue(0), + DAG.getIntPtrConstant(0, dl)); + SDValue RetOps[] = {Exract, NewLoad.getValue(1)}; + return DAG.getMergeValues(RetOps, dl); + } + return Op; +} + +static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MaskedStoreSDNode *N = cast(Op.getNode()); + SDValue DataToStore = N->getValue(); + MVT VT = DataToStore.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDLoc dl(Op); + + if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && + !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { + // This operation is legal for targets with VLX, but without + // VLX the vector should be widened to 512 bit + unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); + MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); + DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); + Mask = ExtendToType(Mask, WideMaskVT, DAG, true); + return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), + Mask, N->getMemoryVT(), N->getMemOperand(), + N->isTruncatingStore()); + } + return Op; +} + static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->hasAVX512() && @@ -19873,6 +19976,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UMAX: case ISD::UMIN: return LowerMINMAX(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); + case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); + case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); case ISD::GC_TRANSITION_START: diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 60238f6ab23d..58206c6acaa6 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2766,22 +2766,6 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src), (VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src)>; -let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(X86mstore addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), - (VMOVUPSZmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm), - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; -} - defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, @@ -2843,17 +2827,6 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (v16i32 VR512:$src))), (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } -// NoVLX patterns -let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(X86mstore addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), - (VMOVDQU32Zmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; -} // Move Int Doubleword to Packed Double Int // diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index a720054c167c..1a9cf008e869 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -139,18 +139,55 @@ define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double ret <4 x double> %res } -; AVX2-LABEL: test11 +; AVX2-LABEL: test11a ; AVX2: vmaskmovps ; AVX2: vblendvps -; SKX-LABEL: test11 -; SKX: vmovaps {{.*}}{%k1} -define <8 x float> @test11(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { +; SKX-LABEL: test11a +; SKX: vmovaps (%rdi), %ymm1 {%k1} +; AVX512-LABEL: test11a +; AVX512: kshiftlw $8 +; AVX512: kshiftrw $8 +; AVX512: vmovups (%rdi), %zmm1 {%k1} +define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { %mask = icmp eq <8 x i32> %trigger, zeroinitializer %res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst) ret <8 x float> %res } +; SKX-LABEL: test11b +; SKX: vmovdqu32 (%rdi), %ymm1 {%k1} +; AVX512-LABEL: test11b +; AVX512: kshiftlw $8 +; AVX512: kshiftrw $8 +; AVX512: vmovdqu32 (%rdi), %zmm1 {%k1} +define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) { + %res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst) + ret <8 x i32> %res +} + +; SKX-LABEL: test11c +; SKX: vmovaps (%rdi), %ymm0 {%k1} {z} +; AVX512-LABEL: test11c +; AVX512: kshiftlw $8 +; AVX512: kshiftrw $8 +; AVX512: vmovups (%rdi), %zmm0 {%k1} {z} +define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) { + %res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1> %mask, <8 x float> zeroinitializer) + ret <8 x float> %res +} + +; SKX-LABEL: test11d +; SKX: vmovdqu32 (%rdi), %ymm0 {%k1} {z} +; AVX512-LABEL: test11d +; AVX512: kshiftlw $8 +; AVX512: kshiftrw $8 +; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) { + %res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1> %mask, <8 x i32> zeroinitializer) + ret <8 x i32> %res +} + ; AVX2-LABEL: test12 ; AVX2: vpmaskmovd %ymm @@ -291,6 +328,7 @@ declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>) declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>) declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>) +declare <8 x i32> @llvm.masked.load.v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>) declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>) declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>) From 0284291aa161a26a07c1da135ad34596672dd79a Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 7 Dec 2015 14:12:44 +0000 Subject: [PATCH 169/364] [mips][ias] Removed DSP/DSPr2 instructions from base architecture valid-xfail.s's. Summary: valid-xfail.s is for instructions that should be valid in the given ISA but incorrectly fail. DSP/DSPr2 instructions are correct to fail since DSP/DSPr2 is not enabled. Reviewers: vkalintiris Subscribers: dsanders, llvm-commits Differential Revision: http://reviews.llvm.org/D15072 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254911 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/Mips/mips32r2/invalid-dsp.s | 97 ++++++++++++++++++ test/MC/Mips/mips32r2/invalid-dspr2.s | 134 ++++++++++++++++++++++++ test/MC/Mips/mips32r2/valid-xfail.s | 123 ---------------------- test/MC/Mips/mips32r3/valid-xfail.s | 123 ---------------------- test/MC/Mips/mips32r5/valid-xfail.s | 123 ---------------------- test/MC/Mips/mips64r2/valid-xfail.s | 142 ++------------------------ test/MC/Mips/mips64r3/valid-xfail.s | 139 ++----------------------- test/MC/Mips/mips64r5/valid-xfail.s | 139 ++----------------------- 8 files changed, 255 insertions(+), 765 deletions(-) create mode 100644 test/MC/Mips/mips32r2/invalid-dsp.s create mode 100644 test/MC/Mips/mips32r2/invalid-dspr2.s diff --git a/test/MC/Mips/mips32r2/invalid-dsp.s b/test/MC/Mips/mips32r2/invalid-dsp.s new file mode 100644 index 000000000000..66e5f63129ac --- /dev/null +++ b/test/MC/Mips/mips32r2/invalid-dsp.s @@ -0,0 +1,97 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding \ +# RUN: -mcpu=mips32r2 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + absq_s.ph $8,$a0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + absq_s.w $s3,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addq.ph $s1,$15,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addq_s.ph $s3,$s6,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addq_s.w $a2,$8,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addsc $s8,$15,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addu.qb $s6,$v1,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addu_s.qb $s4,$s8,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addwc $k0,$s6,$s7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + bitrev $14,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmp.eq.ph $s7,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmp.le.ph $8,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmp.lt.ph $k0,$sp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgu.eq.qb $14,$s6,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgu.le.qb $9,$a3,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgu.lt.qb $sp,$at,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpu.eq.qb $v0,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpu.le.qb $s1,$a1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpu.lt.qb $at,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpaq_sa.l.w $ac0,$a2,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpau.h.qbl $ac1,$10,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpau.h.qbr $ac1,$s7,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsq_s.w.ph $ac0,$gp,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsq_sa.l.w $ac0,$a3,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsu.h.qbl $ac2,$14,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsu.h.qbr $ac2,$a1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extpdpv $s6,$ac0,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extpv $13,$ac0,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extrv.w $8,$ac3,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extrv_r.w $8,$ac1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extrv_rs.w $gp,$ac1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extrv_s.h $s2,$ac1,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + insv $s2,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + lbux $9,$14($v0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + lhx $sp,$k0($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + lwx $12,$12($s4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + madd $ac2,$sp,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maddu $ac2,$a1,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maq_s.w.phl $ac2,$25,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maq_s.w.phr $ac0,$10,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maq_sa.w.phl $ac3,$a1,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maq_sa.w.phr $ac1,$at,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mfhi $9,$ac2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mflo $9,$ac2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + modsub $a3,$12,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mthi $v0,$ac1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mthlip $a3,$ac0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mtlo $v0,$ac1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + muleq_s.w.phl $11,$s4,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + muleq_s.w.phr $s6,$a0,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + muleu_s.ph.qbl $a2,$14,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + muleu_s.ph.qbr $a1,$ra,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mulq_rs.ph $s2,$14,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mulsaq_s.w.ph $ac0,$ra,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mult $ac1, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + multu $ac1, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + packrl.ph $ra,$24,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + pick.ph $ra,$a2,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + pick.qb $11,$a0,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precequ.ph.qbl $s7,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precequ.ph.qbla $a0,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precequ.ph.qbr $ra,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precequ.ph.qbra $24,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + preceu.ph.qbl $sp,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + preceu.ph.qbla $s6,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + preceu.ph.qbr $gp,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + preceu.ph.qbra $k1,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precrq.ph.w $14,$s8,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precrq.qb.ph $a2,$12,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precrq_rs.ph.w $a1,$k0,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precrqu_s.qb.ph $zero,$gp,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + raddu.w.qb $25,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + repl.ph $at,-307 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + replv.ph $v1,$s7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + replv.qb $25,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shilo $ac1,26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shilov $ac2,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shllv.ph $10,$s0,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shllv.qb $gp,$v1,$zero # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shllv_s.ph $k1,$at,$13 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shllv_s.w $s1,$ra,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrav.ph $25,$s2,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrav_r.ph $s3,$11,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrav_r.w $s7,$s4,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrlv.qb $a2,$s2,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subq.ph $ra,$9,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subq_s.ph $13,$s8,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subq_s.w $k1,$a2,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subu.qb $s6,$a2,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subu_s.qb $s1,$at,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled diff --git a/test/MC/Mips/mips32r2/invalid-dspr2.s b/test/MC/Mips/mips32r2/invalid-dspr2.s new file mode 100644 index 000000000000..5c31b465ca1f --- /dev/null +++ b/test/MC/Mips/mips32r2/invalid-dspr2.s @@ -0,0 +1,134 @@ +# Instructions that are invalid +# +# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding \ +# RUN: -mcpu=mips32r2 2>%t1 +# RUN: FileCheck %s < %t1 + + .set noat + absq_s.ph $8,$a0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + absq_s.qb $15,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + absq_s.w $s3,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addq.ph $s1,$15,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addq_s.ph $s3,$s6,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addq_s.w $a2,$8,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addqh.ph $s4,$14,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addqh_r.ph $sp,$25,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addsc $s8,$15,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addu.ph $a2,$14,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addu.qb $s6,$v1,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addu_s.ph $a3,$s3,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addu_s.qb $s4,$s8,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + adduh.qb $a1,$a1,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + adduh_r.qb $a0,$9,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + addwc $k0,$s6,$s7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + bitrev $14,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmp.eq.ph $s7,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmp.le.ph $8,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmp.lt.ph $k0,$sp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgdu.eq.qb $s3,$zero,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgdu.le.qb $v1,$15,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgdu.lt.qb $s0,$gp,$sp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgu.eq.qb $14,$s6,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgu.le.qb $9,$a3,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpgu.lt.qb $sp,$at,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpu.eq.qb $v0,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpu.le.qb $s1,$a1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + cmpu.lt.qb $at,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpa.w.ph $ac1,$s7,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpaq_sa.l.w $ac0,$a2,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpaqx_s.w.ph $ac3,$a0,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpaqx_sa.w.ph $ac1,$zero,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpau.h.qbl $ac1,$10,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpau.h.qbr $ac1,$s7,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpax.w.ph $ac3,$a0,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dps.w.ph $ac1,$a3,$a1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsq_s.w.ph $ac0,$gp,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsq_sa.l.w $ac0,$a3,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsqx_s.w.ph $ac3,$13,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsqx_sa.w.ph $ac3,$sp,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsu.h.qbl $ac2,$14,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsu.h.qbr $ac2,$a1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + dpsx.w.ph $ac0,$s7,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extpdpv $s6,$ac0,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extpv $13,$ac0,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extrv.w $8,$ac3,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extrv_r.w $8,$ac1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extrv_rs.w $gp,$ac1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + extrv_s.h $s2,$ac1,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + insv $s2,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + lbux $9,$14($v0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + lhx $sp,$k0($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + lwx $12,$12($s4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + madd $ac2,$sp,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maddu $ac2,$a1,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maq_s.w.phl $ac2,$25,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maq_s.w.phr $ac0,$10,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maq_sa.w.phl $ac3,$a1,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + maq_sa.w.phr $ac1,$at,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mfhi $9,$ac2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mflo $9,$ac2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + modsub $a3,$12,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mthi $v0,$ac1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mthlip $a3,$ac0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mtlo $v0,$ac1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mul.ph $10,$14,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mul.ph $s4,$24,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mul_s.ph $10,$14,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + muleq_s.w.phl $11,$s4,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + muleq_s.w.phr $s6,$a0,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + muleu_s.ph.qbl $a2,$14,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + muleu_s.ph.qbr $a1,$ra,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mulq_rs.ph $s2,$14,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mulq_rs.w $at,$s4,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mulq_s.ph $s0,$k1,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mulq_s.w $9,$a3,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mulsa.w.ph $ac1,$s4,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mulsaq_s.w.ph $ac0,$ra,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + mult $ac1, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + multu $ac1, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + packrl.ph $ra,$24,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + pick.ph $ra,$a2,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + pick.qb $11,$a0,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precequ.ph.qbl $s7,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precequ.ph.qbla $a0,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precequ.ph.qbr $ra,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precequ.ph.qbra $24,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + preceu.ph.qbl $sp,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + preceu.ph.qbla $s6,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + preceu.ph.qbr $gp,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + preceu.ph.qbra $k1,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precr.qb.ph $v0,$12,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precrq.ph.w $14,$s8,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precrq.qb.ph $a2,$12,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precrq_rs.ph.w $a1,$k0,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + precrqu_s.qb.ph $zero,$gp,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + raddu.w.qb $25,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + repl.ph $at,-307 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + replv.ph $v1,$s7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + replv.qb $25,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shilo $ac1,26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shilov $ac2,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shllv.ph $10,$s0,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shllv.qb $gp,$v1,$zero # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shllv_s.ph $k1,$at,$13 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shllv_s.w $s1,$ra,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrav.ph $25,$s2,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrav.qb $zero,$24,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrav_r.ph $s3,$11,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrav_r.qb $a0,$sp,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrav_r.w $s7,$s4,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrlv.ph $14,$10,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + shrlv.qb $a2,$s2,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subq.ph $ra,$9,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subq_s.ph $13,$s8,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subq_s.w $k1,$a2,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subqh.ph $10,$at,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subqh.w $v0,$a2,$zero # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subqh_r.ph $a0,$12,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subqh_r.w $10,$a2,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subu.ph $9,$s6,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subu.qb $s6,$a2,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subu_s.ph $v1,$a1,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subu_s.qb $s1,$at,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subuh.qb $zero,$gp,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled + subuh_r.qb $s4,$s8,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled diff --git a/test/MC/Mips/mips32r2/valid-xfail.s b/test/MC/Mips/mips32r2/valid-xfail.s index 658f172aec3d..5a61eb6cbfb7 100644 --- a/test/MC/Mips/mips32r2/valid-xfail.s +++ b/test/MC/Mips/mips32r2/valid-xfail.s @@ -8,27 +8,10 @@ .set noat abs.ps $f22,$f8 - absq_s.ph $8,$a0 - absq_s.qb $15,$s1 - absq_s.w $s3,$ra add.ps $f25,$f27,$f13 - addq.ph $s1,$15,$at - addq_s.ph $s3,$s6,$s2 - addq_s.w $a2,$8,$at - addqh.ph $s4,$14,$s1 addqh.w $s7,$s7,$k1 - addqh_r.ph $sp,$25,$s8 addqh_r.w $8,$v1,$zero - addsc $s8,$15,$12 - addu.ph $a2,$14,$s3 - addu.qb $s6,$v1,$v1 - addu_s.ph $a3,$s3,$gp - addu_s.qb $s4,$s8,$s1 - adduh.qb $a1,$a1,$at - adduh_r.qb $a0,$9,$12 - addwc $k0,$s6,$s7 alnv.ps $f12,$f18,$f30,$12 - bitrev $14,$at c.eq.d $fcc1,$f15,$f15 c.eq.ps $fcc5,$f0,$f9 c.eq.s $fcc5,$f24,$f17 @@ -76,18 +59,6 @@ ceil.l.d $f1,$f3 ceil.l.s $f18,$f13 cfcmsa $s6,$19 - cmp.eq.ph $s7,$14 - cmp.le.ph $8,$14 - cmp.lt.ph $k0,$sp - cmpgdu.eq.qb $s3,$zero,$k0 - cmpgdu.le.qb $v1,$15,$s2 - cmpgdu.lt.qb $s0,$gp,$sp - cmpgu.eq.qb $14,$s6,$s8 - cmpgu.le.qb $9,$a3,$s4 - cmpgu.lt.qb $sp,$at,$8 - cmpu.eq.qb $v0,$24 - cmpu.le.qb $s1,$a1 - cmpu.lt.qb $at,$a3 ctcmsa $31,$s7 cvt.d.l $f4,$f16 cvt.ps.s $f3,$f18,$f19 @@ -95,112 +66,44 @@ cvt.s.pl $f30,$f1 cvt.s.pu $f14,$f25 dmt $k0 - dpa.w.ph $ac1,$s7,$k0 - dpaq_s.w.ph $ac2,$a0,$13 - dpaq_sa.l.w $ac0,$a2,$14 - dpaqx_s.w.ph $ac3,$a0,$24 - dpaqx_sa.w.ph $ac1,$zero,$s5 - dpau.h.qbl $ac1,$10,$24 - dpau.h.qbr $ac1,$s7,$s6 - dpax.w.ph $ac3,$a0,$k0 - dps.w.ph $ac1,$a3,$a1 - dpsq_s.w.ph $ac0,$gp,$k0 - dpsq_sa.l.w $ac0,$a3,$15 - dpsqx_s.w.ph $ac3,$13,$a3 - dpsqx_sa.w.ph $ac3,$sp,$s2 - dpsu.h.qbl $ac2,$14,$10 - dpsu.h.qbr $ac2,$a1,$s6 - dpsx.w.ph $ac0,$s7,$gp dvpe $s6 emt $8 evpe $v0 - extpdpv $s6,$ac0,$s8 - extpv $13,$ac0,$14 - extrv.w $8,$ac3,$at - extrv_r.w $8,$ac1,$s6 - extrv_rs.w $gp,$ac1,$s6 - extrv_s.h $s2,$ac1,$14 floor.l.d $f26,$f7 floor.l.s $f12,$f5 fork $s2,$8,$a0 - insv $s2,$at iret lbe $14,122($9) lbue $11,-108($10) - lbux $9,$14($v0) lhe $s6,219($v1) lhue $gp,118($11) - lhx $sp,$k0($15) lle $gp,-237($ra) lwe $ra,-145($14) lwle $11,-42($11) lwre $sp,-152($24) - lwx $12,$12($s4) madd.ps $f22,$f3,$f14,$f3 - maq_s.w.phl $ac2,$25,$11 - maq_s.w.phr $ac0,$10,$25 - maq_sa.w.phl $ac3,$a1,$v1 - maq_sa.w.phr $ac1,$at,$10 mfgc0 $s6,c0_datahi1 - mflo $9,$ac2 - modsub $a3,$12,$a3 mov.ps $f22,$f17 movf.ps $f10,$f28,$fcc6 movn.ps $f31,$f31,$s3 movt.ps $f20,$f25,$fcc2 movz.ps $f18,$f17,$ra - msub $ac2,$sp,$14 msub.ps $f12,$f14,$f29,$f17 - msubu $ac2,$a1,$24 mtc0 $9,c0_datahi1 mtgc0 $s4,$21,7 - mthi $v0,$ac1 - mthlip $a3,$ac0 - mul.ph $s4,$24,$s0 mul.ps $f14,$f0,$f16 - mul_s.ph $10,$14,$15 - muleq_s.w.phl $11,$s4,$s4 - muleq_s.w.phr $s6,$a0,$s8 - muleu_s.ph.qbl $a2,$14,$8 - muleu_s.ph.qbr $a1,$ra,$9 - mulq_rs.ph $s2,$14,$15 - mulq_rs.w $at,$s4,$25 - mulq_s.ph $s0,$k1,$15 - mulq_s.w $9,$a3,$s0 - mulsa.w.ph $ac1,$s4,$s6 - mulsaq_s.w.ph $ac0,$ra,$s2 neg.ps $f19,$f13 nmadd.ps $f27,$f4,$f9,$f25 nmsub.ps $f6,$f12,$f14,$f17 - packrl.ph $ra,$24,$14 - pick.ph $ra,$a2,$gp - pick.qb $11,$a0,$gp pll.ps $f25,$f9,$f30 plu.ps $f1,$f26,$f29 preceq.w.phl $s8,$gp preceq.w.phr $s5,$15 - precequ.ph.qbl $s7,$ra - precequ.ph.qbla $a0,$9 - precequ.ph.qbr $ra,$s3 - precequ.ph.qbra $24,$8 - preceu.ph.qbl $sp,$8 - preceu.ph.qbla $s6,$11 - preceu.ph.qbr $gp,$s1 - preceu.ph.qbra $k1,$s0 - precr.qb.ph $v0,$12,$s8 - precrq.ph.w $14,$s8,$24 - precrq.qb.ph $a2,$12,$12 - precrq_rs.ph.w $a1,$k0,$a3 - precrqu_s.qb.ph $zero,$gp,$s5 pul.ps $f9,$f30,$f26 puu.ps $f24,$f9,$f2 - raddu.w.qb $25,$s3 rdpgpr $s3,$9 recip.d $f19,$f6 recip.s $f3,$f30 - repl.ph $at,-307 - replv.ph $v1,$s7 - replv.qb $25,$12 rorv $13,$a3,$s5 round.l.d $f12,$f1 round.l.s $f25,$f5 @@ -209,33 +112,7 @@ sbe $s7,33($s1) sce $sp,189($10) she $24,105($v0) - shilo $ac1,26 - shilov $ac2,$10 - shllv.ph $10,$s0,$s0 - shllv.qb $gp,$v1,$zero - shllv_s.ph $k1,$at,$13 - shllv_s.w $s1,$ra,$k0 - shrav.ph $25,$s2,$s1 - shrav.qb $zero,$24,$11 - shrav_r.ph $s3,$11,$25 - shrav_r.qb $a0,$sp,$s5 - shrav_r.w $s7,$s4,$s6 - shrlv.ph $14,$10,$9 - shrlv.qb $a2,$s2,$11 sub.ps $f5,$f14,$f26 - subq.ph $ra,$9,$s8 - subq_s.ph $13,$s8,$s5 - subq_s.w $k1,$a2,$a3 - subqh.ph $10,$at,$9 - subqh.w $v0,$a2,$zero - subqh_r.ph $a0,$12,$s6 - subqh_r.w $10,$a2,$gp - subu.ph $9,$s6,$s4 - subu.qb $s6,$a2,$s6 - subu_s.ph $v1,$a1,$s3 - subu_s.qb $s1,$at,$ra - subuh.qb $zero,$gp,$gp - subuh_r.qb $s4,$s8,$s6 swe $24,94($k0) swle $v1,-209($gp) swre $k0,-202($s2) diff --git a/test/MC/Mips/mips32r3/valid-xfail.s b/test/MC/Mips/mips32r3/valid-xfail.s index 09e19e8bb3b6..defa388c4df2 100644 --- a/test/MC/Mips/mips32r3/valid-xfail.s +++ b/test/MC/Mips/mips32r3/valid-xfail.s @@ -8,27 +8,10 @@ .set noat abs.ps $f22,$f8 - absq_s.ph $8,$a0 - absq_s.qb $15,$s1 - absq_s.w $s3,$ra add.ps $f25,$f27,$f13 - addq.ph $s1,$15,$at - addq_s.ph $s3,$s6,$s2 - addq_s.w $a2,$8,$at - addqh.ph $s4,$14,$s1 addqh.w $s7,$s7,$k1 - addqh_r.ph $sp,$25,$s8 addqh_r.w $8,$v1,$zero - addsc $s8,$15,$12 - addu.ph $a2,$14,$s3 - addu.qb $s6,$v1,$v1 - addu_s.ph $a3,$s3,$gp - addu_s.qb $s4,$s8,$s1 - adduh.qb $a1,$a1,$at - adduh_r.qb $a0,$9,$12 - addwc $k0,$s6,$s7 alnv.ps $f12,$f18,$f30,$12 - bitrev $14,$at c.eq.d $fcc1,$f15,$f15 c.eq.ps $fcc5,$f0,$f9 c.eq.s $fcc5,$f24,$f17 @@ -76,18 +59,6 @@ ceil.l.d $f1,$f3 ceil.l.s $f18,$f13 cfcmsa $s6,$19 - cmp.eq.ph $s7,$14 - cmp.le.ph $8,$14 - cmp.lt.ph $k0,$sp - cmpgdu.eq.qb $s3,$zero,$k0 - cmpgdu.le.qb $v1,$15,$s2 - cmpgdu.lt.qb $s0,$gp,$sp - cmpgu.eq.qb $14,$s6,$s8 - cmpgu.le.qb $9,$a3,$s4 - cmpgu.lt.qb $sp,$at,$8 - cmpu.eq.qb $v0,$24 - cmpu.le.qb $s1,$a1 - cmpu.lt.qb $at,$a3 ctcmsa $31,$s7 cvt.d.l $f4,$f16 cvt.ps.s $f3,$f18,$f19 @@ -95,112 +66,44 @@ cvt.s.pl $f30,$f1 cvt.s.pu $f14,$f25 dmt $k0 - dpa.w.ph $ac1,$s7,$k0 - dpaq_s.w.ph $ac2,$a0,$13 - dpaq_sa.l.w $ac0,$a2,$14 - dpaqx_s.w.ph $ac3,$a0,$24 - dpaqx_sa.w.ph $ac1,$zero,$s5 - dpau.h.qbl $ac1,$10,$24 - dpau.h.qbr $ac1,$s7,$s6 - dpax.w.ph $ac3,$a0,$k0 - dps.w.ph $ac1,$a3,$a1 - dpsq_s.w.ph $ac0,$gp,$k0 - dpsq_sa.l.w $ac0,$a3,$15 - dpsqx_s.w.ph $ac3,$13,$a3 - dpsqx_sa.w.ph $ac3,$sp,$s2 - dpsu.h.qbl $ac2,$14,$10 - dpsu.h.qbr $ac2,$a1,$s6 - dpsx.w.ph $ac0,$s7,$gp dvpe $s6 emt $8 evpe $v0 - extpdpv $s6,$ac0,$s8 - extpv $13,$ac0,$14 - extrv.w $8,$ac3,$at - extrv_r.w $8,$ac1,$s6 - extrv_rs.w $gp,$ac1,$s6 - extrv_s.h $s2,$ac1,$14 floor.l.d $f26,$f7 floor.l.s $f12,$f5 fork $s2,$8,$a0 - insv $s2,$at iret lbe $14,122($9) lbue $11,-108($10) - lbux $9,$14($v0) lhe $s6,219($v1) lhue $gp,118($11) - lhx $sp,$k0($15) lle $gp,-237($ra) lwe $ra,-145($14) lwle $11,-42($11) lwre $sp,-152($24) - lwx $12,$12($s4) madd.ps $f22,$f3,$f14,$f3 - maq_s.w.phl $ac2,$25,$11 - maq_s.w.phr $ac0,$10,$25 - maq_sa.w.phl $ac3,$a1,$v1 - maq_sa.w.phr $ac1,$at,$10 mfgc0 $s6,c0_datahi1 - mflo $9,$ac2 - modsub $a3,$12,$a3 mov.ps $f22,$f17 movf.ps $f10,$f28,$fcc6 movn.ps $f31,$f31,$s3 movt.ps $f20,$f25,$fcc2 movz.ps $f18,$f17,$ra - msub $ac2,$sp,$14 msub.ps $f12,$f14,$f29,$f17 - msubu $ac2,$a1,$24 mtc0 $9,c0_datahi1 mtgc0 $s4,$21,7 - mthi $v0,$ac1 - mthlip $a3,$ac0 - mul.ph $s4,$24,$s0 mul.ps $f14,$f0,$f16 - mul_s.ph $10,$14,$15 - muleq_s.w.phl $11,$s4,$s4 - muleq_s.w.phr $s6,$a0,$s8 - muleu_s.ph.qbl $a2,$14,$8 - muleu_s.ph.qbr $a1,$ra,$9 - mulq_rs.ph $s2,$14,$15 - mulq_rs.w $at,$s4,$25 - mulq_s.ph $s0,$k1,$15 - mulq_s.w $9,$a3,$s0 - mulsa.w.ph $ac1,$s4,$s6 - mulsaq_s.w.ph $ac0,$ra,$s2 neg.ps $f19,$f13 nmadd.ps $f27,$f4,$f9,$f25 nmsub.ps $f6,$f12,$f14,$f17 - packrl.ph $ra,$24,$14 - pick.ph $ra,$a2,$gp - pick.qb $11,$a0,$gp pll.ps $f25,$f9,$f30 plu.ps $f1,$f26,$f29 preceq.w.phl $s8,$gp preceq.w.phr $s5,$15 - precequ.ph.qbl $s7,$ra - precequ.ph.qbla $a0,$9 - precequ.ph.qbr $ra,$s3 - precequ.ph.qbra $24,$8 - preceu.ph.qbl $sp,$8 - preceu.ph.qbla $s6,$11 - preceu.ph.qbr $gp,$s1 - preceu.ph.qbra $k1,$s0 - precr.qb.ph $v0,$12,$s8 - precrq.ph.w $14,$s8,$24 - precrq.qb.ph $a2,$12,$12 - precrq_rs.ph.w $a1,$k0,$a3 - precrqu_s.qb.ph $zero,$gp,$s5 pul.ps $f9,$f30,$f26 puu.ps $f24,$f9,$f2 - raddu.w.qb $25,$s3 rdpgpr $s3,$9 recip.d $f19,$f6 recip.s $f3,$f30 - repl.ph $at,-307 - replv.ph $v1,$s7 - replv.qb $25,$12 rorv $13,$a3,$s5 round.l.d $f12,$f1 round.l.s $f25,$f5 @@ -209,33 +112,7 @@ sbe $s7,33($s1) sce $sp,189($10) she $24,105($v0) - shilo $ac1,26 - shilov $ac2,$10 - shllv.ph $10,$s0,$s0 - shllv.qb $gp,$v1,$zero - shllv_s.ph $k1,$at,$13 - shllv_s.w $s1,$ra,$k0 - shrav.ph $25,$s2,$s1 - shrav.qb $zero,$24,$11 - shrav_r.ph $s3,$11,$25 - shrav_r.qb $a0,$sp,$s5 - shrav_r.w $s7,$s4,$s6 - shrlv.ph $14,$10,$9 - shrlv.qb $a2,$s2,$11 sub.ps $f5,$f14,$f26 - subq.ph $ra,$9,$s8 - subq_s.ph $13,$s8,$s5 - subq_s.w $k1,$a2,$a3 - subqh.ph $10,$at,$9 - subqh.w $v0,$a2,$zero - subqh_r.ph $a0,$12,$s6 - subqh_r.w $10,$a2,$gp - subu.ph $9,$s6,$s4 - subu.qb $s6,$a2,$s6 - subu_s.ph $v1,$a1,$s3 - subu_s.qb $s1,$at,$ra - subuh.qb $zero,$gp,$gp - subuh_r.qb $s4,$s8,$s6 swe $24,94($k0) swle $v1,-209($gp) swre $k0,-202($s2) diff --git a/test/MC/Mips/mips32r5/valid-xfail.s b/test/MC/Mips/mips32r5/valid-xfail.s index 30fc4b98e056..c1bf7a4b3a29 100644 --- a/test/MC/Mips/mips32r5/valid-xfail.s +++ b/test/MC/Mips/mips32r5/valid-xfail.s @@ -8,27 +8,10 @@ .set noat abs.ps $f22,$f8 - absq_s.ph $8,$a0 - absq_s.qb $15,$s1 - absq_s.w $s3,$ra add.ps $f25,$f27,$f13 - addq.ph $s1,$15,$at - addq_s.ph $s3,$s6,$s2 - addq_s.w $a2,$8,$at - addqh.ph $s4,$14,$s1 addqh.w $s7,$s7,$k1 - addqh_r.ph $sp,$25,$s8 addqh_r.w $8,$v1,$zero - addsc $s8,$15,$12 - addu.ph $a2,$14,$s3 - addu.qb $s6,$v1,$v1 - addu_s.ph $a3,$s3,$gp - addu_s.qb $s4,$s8,$s1 - adduh.qb $a1,$a1,$at - adduh_r.qb $a0,$9,$12 - addwc $k0,$s6,$s7 alnv.ps $f12,$f18,$f30,$12 - bitrev $14,$at c.eq.d $fcc1,$f15,$f15 c.eq.ps $fcc5,$f0,$f9 c.eq.s $fcc5,$f24,$f17 @@ -76,18 +59,6 @@ ceil.l.d $f1,$f3 ceil.l.s $f18,$f13 cfcmsa $s6,$19 - cmp.eq.ph $s7,$14 - cmp.le.ph $8,$14 - cmp.lt.ph $k0,$sp - cmpgdu.eq.qb $s3,$zero,$k0 - cmpgdu.le.qb $v1,$15,$s2 - cmpgdu.lt.qb $s0,$gp,$sp - cmpgu.eq.qb $14,$s6,$s8 - cmpgu.le.qb $9,$a3,$s4 - cmpgu.lt.qb $sp,$at,$8 - cmpu.eq.qb $v0,$24 - cmpu.le.qb $s1,$a1 - cmpu.lt.qb $at,$a3 ctcmsa $31,$s7 cvt.d.l $f4,$f16 cvt.ps.s $f3,$f18,$f19 @@ -95,112 +66,44 @@ cvt.s.pl $f30,$f1 cvt.s.pu $f14,$f25 dmt $k0 - dpa.w.ph $ac1,$s7,$k0 - dpaq_s.w.ph $ac2,$a0,$13 - dpaq_sa.l.w $ac0,$a2,$14 - dpaqx_s.w.ph $ac3,$a0,$24 - dpaqx_sa.w.ph $ac1,$zero,$s5 - dpau.h.qbl $ac1,$10,$24 - dpau.h.qbr $ac1,$s7,$s6 - dpax.w.ph $ac3,$a0,$k0 - dps.w.ph $ac1,$a3,$a1 - dpsq_s.w.ph $ac0,$gp,$k0 - dpsq_sa.l.w $ac0,$a3,$15 - dpsqx_s.w.ph $ac3,$13,$a3 - dpsqx_sa.w.ph $ac3,$sp,$s2 - dpsu.h.qbl $ac2,$14,$10 - dpsu.h.qbr $ac2,$a1,$s6 - dpsx.w.ph $ac0,$s7,$gp dvpe $s6 emt $8 evpe $v0 - extpdpv $s6,$ac0,$s8 - extpv $13,$ac0,$14 - extrv.w $8,$ac3,$at - extrv_r.w $8,$ac1,$s6 - extrv_rs.w $gp,$ac1,$s6 - extrv_s.h $s2,$ac1,$14 floor.l.d $f26,$f7 floor.l.s $f12,$f5 fork $s2,$8,$a0 - insv $s2,$at iret lbe $14,122($9) lbue $11,-108($10) - lbux $9,$14($v0) lhe $s6,219($v1) lhue $gp,118($11) - lhx $sp,$k0($15) lle $gp,-237($ra) lwe $ra,-145($14) lwle $11,-42($11) lwre $sp,-152($24) - lwx $12,$12($s4) madd.ps $f22,$f3,$f14,$f3 - maq_s.w.phl $ac2,$25,$11 - maq_s.w.phr $ac0,$10,$25 - maq_sa.w.phl $ac3,$a1,$v1 - maq_sa.w.phr $ac1,$at,$10 mfgc0 $s6,c0_datahi1 - mflo $9,$ac2 - modsub $a3,$12,$a3 mov.ps $f22,$f17 movf.ps $f10,$f28,$fcc6 movn.ps $f31,$f31,$s3 movt.ps $f20,$f25,$fcc2 movz.ps $f18,$f17,$ra - msub $ac2,$sp,$14 msub.ps $f12,$f14,$f29,$f17 - msubu $ac2,$a1,$24 mtc0 $9,c0_datahi1 mtgc0 $s4,$21,7 - mthi $v0,$ac1 - mthlip $a3,$ac0 - mul.ph $s4,$24,$s0 mul.ps $f14,$f0,$f16 - mul_s.ph $10,$14,$15 - muleq_s.w.phl $11,$s4,$s4 - muleq_s.w.phr $s6,$a0,$s8 - muleu_s.ph.qbl $a2,$14,$8 - muleu_s.ph.qbr $a1,$ra,$9 - mulq_rs.ph $s2,$14,$15 - mulq_rs.w $at,$s4,$25 - mulq_s.ph $s0,$k1,$15 - mulq_s.w $9,$a3,$s0 - mulsa.w.ph $ac1,$s4,$s6 - mulsaq_s.w.ph $ac0,$ra,$s2 neg.ps $f19,$f13 nmadd.ps $f27,$f4,$f9,$f25 nmsub.ps $f6,$f12,$f14,$f17 - packrl.ph $ra,$24,$14 - pick.ph $ra,$a2,$gp - pick.qb $11,$a0,$gp pll.ps $f25,$f9,$f30 plu.ps $f1,$f26,$f29 preceq.w.phl $s8,$gp preceq.w.phr $s5,$15 - precequ.ph.qbl $s7,$ra - precequ.ph.qbla $a0,$9 - precequ.ph.qbr $ra,$s3 - precequ.ph.qbra $24,$8 - preceu.ph.qbl $sp,$8 - preceu.ph.qbla $s6,$11 - preceu.ph.qbr $gp,$s1 - preceu.ph.qbra $k1,$s0 - precr.qb.ph $v0,$12,$s8 - precrq.ph.w $14,$s8,$24 - precrq.qb.ph $a2,$12,$12 - precrq_rs.ph.w $a1,$k0,$a3 - precrqu_s.qb.ph $zero,$gp,$s5 pul.ps $f9,$f30,$f26 puu.ps $f24,$f9,$f2 - raddu.w.qb $25,$s3 rdpgpr $s3,$9 recip.d $f19,$f6 recip.s $f3,$f30 - repl.ph $at,-307 - replv.ph $v1,$s7 - replv.qb $25,$12 rorv $13,$a3,$s5 round.l.d $f12,$f1 round.l.s $f25,$f5 @@ -209,33 +112,7 @@ sbe $s7,33($s1) sce $sp,189($10) she $24,105($v0) - shilo $ac1,26 - shilov $ac2,$10 - shllv.ph $10,$s0,$s0 - shllv.qb $gp,$v1,$zero - shllv_s.ph $k1,$at,$13 - shllv_s.w $s1,$ra,$k0 - shrav.ph $25,$s2,$s1 - shrav.qb $zero,$24,$11 - shrav_r.ph $s3,$11,$25 - shrav_r.qb $a0,$sp,$s5 - shrav_r.w $s7,$s4,$s6 - shrlv.ph $14,$10,$9 - shrlv.qb $a2,$s2,$11 sub.ps $f5,$f14,$f26 - subq.ph $ra,$9,$s8 - subq_s.ph $13,$s8,$s5 - subq_s.w $k1,$a2,$a3 - subqh.ph $10,$at,$9 - subqh.w $v0,$a2,$zero - subqh_r.ph $a0,$12,$s6 - subqh_r.w $10,$a2,$gp - subu.ph $9,$s6,$s4 - subu.qb $s6,$a2,$s6 - subu_s.ph $v1,$a1,$s3 - subu_s.qb $s1,$at,$ra - subuh.qb $zero,$gp,$gp - subuh_r.qb $s4,$s8,$s6 swe $24,94($k0) swle $v1,-209($gp) swre $k0,-202($s2) diff --git a/test/MC/Mips/mips64r2/valid-xfail.s b/test/MC/Mips/mips64r2/valid-xfail.s index 5faa29d6468e..bf17b35c446c 100644 --- a/test/MC/Mips/mips64r2/valid-xfail.s +++ b/test/MC/Mips/mips64r2/valid-xfail.s @@ -8,30 +8,10 @@ .set noat abs.ps $f22,$f8 - absq_s.ph $8,$a0 - absq_s.qb $15,$s1 - absq_s.w $s3,$ra add.ps $f25,$f27,$f13 - addq.ph $s1,$15,$at - addq_s.ph $s3,$s6,$s2 - addq_s.w $a2,$8,$at - addqh.ph $s4,$14,$s1 addqh.w $s7,$s7,$k1 - addqh_r.ph $sp,$25,$s8 addqh_r.w $8,$v1,$zero - addsc $s8,$15,$12 - addu.ph $a2,$14,$s3 - addu.qb $s6,$v1,$v1 - addu_s.ph $a3,$s3,$gp - addu_s.qb $s4,$s8,$s1 - adduh.qb $a1,$a1,$at - adduh_r.qb $a0,$9,$12 - addwc $k0,$s6,$s7 - alnv.ob $v22,$v19,$v30,$v1 - alnv.ob $v31,$v23,$v30,$at - alnv.ob $v8,$v17,$v30,$a1 alnv.ps $f12,$f18,$f30,$12 - bitrev $14,$at c.eq.d $fcc1,$f15,$f15 c.eq.ps $fcc5,$f0,$f9 c.eq.s $fcc5,$f24,$f17 @@ -77,18 +57,6 @@ c.un.ps $fcc4,$f2,$f26 c.un.s $fcc1,$f30,$f4 cvt.ps.s $f3,$f18,$f19 - cmp.eq.ph $s7,$14 - cmp.le.ph $8,$14 - cmp.lt.ph $k0,$sp - cmpgdu.eq.qb $s3,$zero,$k0 - cmpgdu.le.qb $v1,$15,$s2 - cmpgdu.lt.qb $s0,$gp,$sp - cmpgu.eq.qb $14,$s6,$s8 - cmpgu.le.qb $9,$a3,$s4 - cmpgu.lt.qb $sp,$at,$8 - cmpu.eq.qb $v0,$24 - cmpu.le.qb $s1,$a1 - cmpu.lt.qb $at,$a3 cvt.s.pl $f30,$f1 cvt.s.pu $f14,$f25 dmfc0 $10,c0_watchhi,2 @@ -96,54 +64,22 @@ dmt $k0 dmtc0 $15,c0_datalo dmtgc0 $a2,c0_watchlo,2 - dpa.w.ph $ac1,$s7,$k0 - dpaq_s.w.ph $ac2,$a0,$13 - dpaq_sa.l.w $ac0,$a2,$14 - dpaqx_s.w.ph $ac3,$a0,$24 - dpaqx_sa.w.ph $ac1,$zero,$s5 - dpau.h.qbl $ac1,$10,$24 - dpau.h.qbr $ac1,$s7,$s6 - dpax.w.ph $ac3,$a0,$k0 - dps.w.ph $ac1,$a3,$a1 - dpsq_s.w.ph $ac0,$gp,$k0 - dpsq_sa.l.w $ac0,$a3,$15 - dpsqx_s.w.ph $ac3,$13,$a3 - dpsqx_sa.w.ph $ac3,$sp,$s2 - dpsu.h.qbl $ac2,$14,$10 - dpsu.h.qbr $ac2,$a1,$s6 - dpsx.w.ph $ac0,$s7,$gp drorv $at,$a1,$s7 dvpe $s6 emt $8 evpe $v0 - extpdpv $s6,$ac0,$s8 - extpv $13,$ac0,$14 - extrv.w $8,$ac3,$at - extrv_r.w $8,$ac1,$s6 - extrv_rs.w $gp,$ac1,$s6 - extrv_s.h $s2,$ac1,$14 fork $s2,$8,$a0 - insv $s2,$at iret - lbe $14,122($9) + lbe $14,122($9) lbue $11,-108($10) - lbux $9,$14($v0) - lhe $s6,219($v1) + lhe $s6,219($v1) lhue $gp,118($11) - lhx $sp,$k0($15) - lle $gp,-237($ra) - lwe $ra,-145($14) + lle $gp,-237($ra) + lwe $ra,-145($14) lwle $11,-42($11) lwre $sp,-152($24) - lwx $12,$12($s4) madd.ps $f22,$f3,$f14,$f3 - maq_s.w.phl $ac2,$25,$11 - maq_s.w.phr $ac0,$10,$25 - maq_sa.w.phl $ac3,$a1,$v1 - maq_sa.w.phr $ac1,$at,$10 mfgc0 $s6,c0_datahi1 - mflo $9,$ac2 - modsub $a3,$12,$a3 mov.ps $f22,$f17 movf.ps $f10,$f28,$fcc6 movn.ps $f31,$f31,$s3 @@ -151,92 +87,30 @@ movz.ps $f18,$f17,$ra msgn.qh $v0,$v24,$v20 msgn.qh $v12,$v21,$v0[1] - msub $ac2,$sp,$14 msub.ps $f12,$f14,$f29,$f17 - msubu $ac2,$a1,$24 mtc0 $9,c0_datahi1 mtgc0 $s4,$21,7 - mthi $v0,$ac1 - mthlip $a3,$ac0 - mul.ph $s4,$24,$s0 mul.ps $f14,$f0,$f16 - mul_s.ph $10,$14,$15 - muleq_s.w.phl $11,$s4,$s4 - muleq_s.w.phr $s6,$a0,$s8 - muleu_s.ph.qbl $a2,$14,$8 - muleu_s.ph.qbr $a1,$ra,$9 - mulq_rs.ph $s2,$14,$15 - mulq_rs.w $at,$s4,$25 - mulq_s.ph $s0,$k1,$15 - mulq_s.w $9,$a3,$s0 - mulsa.w.ph $ac1,$s4,$s6 - mulsaq_s.w.ph $ac0,$ra,$s2 neg.ps $f19,$f13 nmadd.ps $f27,$f4,$f9,$f25 nmsub.ps $f6,$f12,$f14,$f17 - packrl.ph $ra,$24,$14 - pick.ph $ra,$a2,$gp - pick.qb $11,$a0,$gp pll.ps $f25,$f9,$f30 plu.ps $f1,$f26,$f29 preceq.w.phl $s8,$gp preceq.w.phr $s5,$15 - precequ.ph.qbl $s7,$ra - precequ.ph.qbla $a0,$9 - precequ.ph.qbr $ra,$s3 - precequ.ph.qbra $24,$8 - preceu.ph.qbl $sp,$8 - preceu.ph.qbla $s6,$11 - preceu.ph.qbr $gp,$s1 - preceu.ph.qbra $k1,$s0 - precr.qb.ph $v0,$12,$s8 - precrq.ph.w $14,$s8,$24 - precrq.qb.ph $a2,$12,$12 - precrq_rs.ph.w $a1,$k0,$a3 - precrqu_s.qb.ph $zero,$gp,$s5 pul.ps $f9,$f30,$f26 puu.ps $f24,$f9,$f2 - raddu.w.qb $25,$s3 rdpgpr $s3,$9 recip.d $f19,$f6 recip.s $f3,$f30 - repl.ph $at,-307 - replv.ph $v1,$s7 - replv.qb $25,$12 rorv $13,$a3,$s5 rsqrt.d $f3,$f28 rsqrt.s $f4,$f8 - sbe $s7,33($s1) - sce $sp,189($10) - she $24,105($v0) - shilo $ac1,26 - shilov $ac2,$10 - shllv.ph $10,$s0,$s0 - shllv.qb $gp,$v1,$zero - shllv_s.ph $k1,$at,$13 - shllv_s.w $s1,$ra,$k0 - shrav.ph $25,$s2,$s1 - shrav.qb $zero,$24,$11 - shrav_r.ph $s3,$11,$25 - shrav_r.qb $a0,$sp,$s5 - shrav_r.w $s7,$s4,$s6 - shrlv.ph $14,$10,$9 - shrlv.qb $a2,$s2,$11 + sbe $s7,33($s1) + sce $sp,189($10) + she $24,105($v0) sub.ps $f5,$f14,$f26 - subq.ph $ra,$9,$s8 - subq_s.ph $13,$s8,$s5 - subq_s.w $k1,$a2,$a3 - subqh.ph $10,$at,$9 - subqh.w $v0,$a2,$zero - subqh_r.ph $a0,$12,$s6 - subqh_r.w $10,$a2,$gp - subu.ph $9,$s6,$s4 - subu.qb $s6,$a2,$s6 - subu_s.ph $v1,$a1,$s3 - subu_s.qb $s1,$at,$ra - subuh.qb $zero,$gp,$gp - subuh_r.qb $s4,$s8,$s6 - swe $24,94($k0) + swe $24,94($k0) swle $v1,-209($gp) swre $k0,-202($s2) tlbginv diff --git a/test/MC/Mips/mips64r3/valid-xfail.s b/test/MC/Mips/mips64r3/valid-xfail.s index dcf66bf97d68..7e94200dfd62 100644 --- a/test/MC/Mips/mips64r3/valid-xfail.s +++ b/test/MC/Mips/mips64r3/valid-xfail.s @@ -8,30 +8,13 @@ .set noat abs.ps $f22,$f8 - absq_s.ph $8,$a0 - absq_s.qb $15,$s1 - absq_s.w $s3,$ra add.ps $f25,$f27,$f13 - addq.ph $s1,$15,$at - addq_s.ph $s3,$s6,$s2 - addq_s.w $a2,$8,$at - addqh.ph $s4,$14,$s1 addqh.w $s7,$s7,$k1 - addqh_r.ph $sp,$25,$s8 addqh_r.w $8,$v1,$zero - addsc $s8,$15,$12 - addu.ph $a2,$14,$s3 - addu.qb $s6,$v1,$v1 - addu_s.ph $a3,$s3,$gp - addu_s.qb $s4,$s8,$s1 - adduh.qb $a1,$a1,$at - adduh_r.qb $a0,$9,$12 - addwc $k0,$s6,$s7 alnv.ob $v22,$v19,$v30,$v1 alnv.ob $v31,$v23,$v30,$at alnv.ob $v8,$v17,$v30,$a1 alnv.ps $f12,$f18,$f30,$12 - bitrev $14,$at c.eq.d $fcc1,$f15,$f15 c.eq.ps $fcc5,$f0,$f9 c.eq.s $fcc5,$f24,$f17 @@ -77,18 +60,6 @@ c.un.ps $fcc4,$f2,$f26 c.un.s $fcc1,$f30,$f4 cvt.ps.s $f3,$f18,$f19 - cmp.eq.ph $s7,$14 - cmp.le.ph $8,$14 - cmp.lt.ph $k0,$sp - cmpgdu.eq.qb $s3,$zero,$k0 - cmpgdu.le.qb $v1,$15,$s2 - cmpgdu.lt.qb $s0,$gp,$sp - cmpgu.eq.qb $14,$s6,$s8 - cmpgu.le.qb $9,$a3,$s4 - cmpgu.lt.qb $sp,$at,$8 - cmpu.eq.qb $v0,$24 - cmpu.le.qb $s1,$a1 - cmpu.lt.qb $at,$a3 cvt.s.pl $f30,$f1 cvt.s.pu $f14,$f25 dmfc0 $10,c0_watchhi,2 @@ -96,54 +67,22 @@ dmt $k0 dmtc0 $15,c0_datalo dmtgc0 $a2,c0_watchlo,2 - dpa.w.ph $ac1,$s7,$k0 - dpaq_s.w.ph $ac2,$a0,$13 - dpaq_sa.l.w $ac0,$a2,$14 - dpaqx_s.w.ph $ac3,$a0,$24 - dpaqx_sa.w.ph $ac1,$zero,$s5 - dpau.h.qbl $ac1,$10,$24 - dpau.h.qbr $ac1,$s7,$s6 - dpax.w.ph $ac3,$a0,$k0 - dps.w.ph $ac1,$a3,$a1 - dpsq_s.w.ph $ac0,$gp,$k0 - dpsq_sa.l.w $ac0,$a3,$15 - dpsqx_s.w.ph $ac3,$13,$a3 - dpsqx_sa.w.ph $ac3,$sp,$s2 - dpsu.h.qbl $ac2,$14,$10 - dpsu.h.qbr $ac2,$a1,$s6 - dpsx.w.ph $ac0,$s7,$gp drorv $at,$a1,$s7 dvpe $s6 emt $8 evpe $v0 - extpdpv $s6,$ac0,$s8 - extpv $13,$ac0,$14 - extrv.w $8,$ac3,$at - extrv_r.w $8,$ac1,$s6 - extrv_rs.w $gp,$ac1,$s6 - extrv_s.h $s2,$ac1,$14 fork $s2,$8,$a0 - insv $s2,$at iret - lbe $14,122($9) + lbe $14,122($9) lbue $11,-108($10) - lbux $9,$14($v0) - lhe $s6,219($v1) + lhe $s6,219($v1) lhue $gp,118($11) - lhx $sp,$k0($15) - lle $gp,-237($ra) - lwe $ra,-145($14) + lle $gp,-237($ra) + lwe $ra,-145($14) lwle $11,-42($11) lwre $sp,-152($24) - lwx $12,$12($s4) madd.ps $f22,$f3,$f14,$f3 - maq_s.w.phl $ac2,$25,$11 - maq_s.w.phr $ac0,$10,$25 - maq_sa.w.phl $ac3,$a1,$v1 - maq_sa.w.phr $ac1,$at,$10 mfgc0 $s6,c0_datahi1 - mflo $9,$ac2 - modsub $a3,$12,$a3 mov.ps $f22,$f17 movf.ps $f10,$f28,$fcc6 movn.ps $f31,$f31,$s3 @@ -151,92 +90,30 @@ movz.ps $f18,$f17,$ra msgn.qh $v0,$v24,$v20 msgn.qh $v12,$v21,$v0[1] - msub $ac2,$sp,$14 msub.ps $f12,$f14,$f29,$f17 - msubu $ac2,$a1,$24 mtc0 $9,c0_datahi1 mtgc0 $s4,$21,7 - mthi $v0,$ac1 - mthlip $a3,$ac0 - mul.ph $s4,$24,$s0 mul.ps $f14,$f0,$f16 - mul_s.ph $10,$14,$15 - muleq_s.w.phl $11,$s4,$s4 - muleq_s.w.phr $s6,$a0,$s8 - muleu_s.ph.qbl $a2,$14,$8 - muleu_s.ph.qbr $a1,$ra,$9 - mulq_rs.ph $s2,$14,$15 - mulq_rs.w $at,$s4,$25 - mulq_s.ph $s0,$k1,$15 - mulq_s.w $9,$a3,$s0 - mulsa.w.ph $ac1,$s4,$s6 - mulsaq_s.w.ph $ac0,$ra,$s2 neg.ps $f19,$f13 nmadd.ps $f27,$f4,$f9,$f25 nmsub.ps $f6,$f12,$f14,$f17 - packrl.ph $ra,$24,$14 - pick.ph $ra,$a2,$gp - pick.qb $11,$a0,$gp pll.ps $f25,$f9,$f30 plu.ps $f1,$f26,$f29 preceq.w.phl $s8,$gp preceq.w.phr $s5,$15 - precequ.ph.qbl $s7,$ra - precequ.ph.qbla $a0,$9 - precequ.ph.qbr $ra,$s3 - precequ.ph.qbra $24,$8 - preceu.ph.qbl $sp,$8 - preceu.ph.qbla $s6,$11 - preceu.ph.qbr $gp,$s1 - preceu.ph.qbra $k1,$s0 - precr.qb.ph $v0,$12,$s8 - precrq.ph.w $14,$s8,$24 - precrq.qb.ph $a2,$12,$12 - precrq_rs.ph.w $a1,$k0,$a3 - precrqu_s.qb.ph $zero,$gp,$s5 pul.ps $f9,$f30,$f26 puu.ps $f24,$f9,$f2 - raddu.w.qb $25,$s3 rdpgpr $s3,$9 recip.d $f19,$f6 recip.s $f3,$f30 - repl.ph $at,-307 - replv.ph $v1,$s7 - replv.qb $25,$12 rorv $13,$a3,$s5 rsqrt.d $f3,$f28 rsqrt.s $f4,$f8 - sbe $s7,33($s1) - sce $sp,189($10) - she $24,105($v0) - shilo $ac1,26 - shilov $ac2,$10 - shllv.ph $10,$s0,$s0 - shllv.qb $gp,$v1,$zero - shllv_s.ph $k1,$at,$13 - shllv_s.w $s1,$ra,$k0 - shrav.ph $25,$s2,$s1 - shrav.qb $zero,$24,$11 - shrav_r.ph $s3,$11,$25 - shrav_r.qb $a0,$sp,$s5 - shrav_r.w $s7,$s4,$s6 - shrlv.ph $14,$10,$9 - shrlv.qb $a2,$s2,$11 + sbe $s7,33($s1) + sce $sp,189($10) + she $24,105($v0) sub.ps $f5,$f14,$f26 - subq.ph $ra,$9,$s8 - subq_s.ph $13,$s8,$s5 - subq_s.w $k1,$a2,$a3 - subqh.ph $10,$at,$9 - subqh.w $v0,$a2,$zero - subqh_r.ph $a0,$12,$s6 - subqh_r.w $10,$a2,$gp - subu.ph $9,$s6,$s4 - subu.qb $s6,$a2,$s6 - subu_s.ph $v1,$a1,$s3 - subu_s.qb $s1,$at,$ra - subuh.qb $zero,$gp,$gp - subuh_r.qb $s4,$s8,$s6 - swe $24,94($k0) + swe $24,94($k0) swle $v1,-209($gp) swre $k0,-202($s2) tlbginv diff --git a/test/MC/Mips/mips64r5/valid-xfail.s b/test/MC/Mips/mips64r5/valid-xfail.s index 0f7788359cf2..b5ecdcbfb726 100644 --- a/test/MC/Mips/mips64r5/valid-xfail.s +++ b/test/MC/Mips/mips64r5/valid-xfail.s @@ -8,30 +8,13 @@ .set noat abs.ps $f22,$f8 - absq_s.ph $8,$a0 - absq_s.qb $15,$s1 - absq_s.w $s3,$ra add.ps $f25,$f27,$f13 - addq.ph $s1,$15,$at - addq_s.ph $s3,$s6,$s2 - addq_s.w $a2,$8,$at - addqh.ph $s4,$14,$s1 addqh.w $s7,$s7,$k1 - addqh_r.ph $sp,$25,$s8 addqh_r.w $8,$v1,$zero - addsc $s8,$15,$12 - addu.ph $a2,$14,$s3 - addu.qb $s6,$v1,$v1 - addu_s.ph $a3,$s3,$gp - addu_s.qb $s4,$s8,$s1 - adduh.qb $a1,$a1,$at - adduh_r.qb $a0,$9,$12 - addwc $k0,$s6,$s7 alnv.ob $v22,$v19,$v30,$v1 alnv.ob $v31,$v23,$v30,$at alnv.ob $v8,$v17,$v30,$a1 alnv.ps $f12,$f18,$f30,$12 - bitrev $14,$at c.eq.d $fcc1,$f15,$f15 c.eq.ps $fcc5,$f0,$f9 c.eq.s $fcc5,$f24,$f17 @@ -77,18 +60,6 @@ c.un.ps $fcc4,$f2,$f26 c.un.s $fcc1,$f30,$f4 cvt.ps.s $f3,$f18,$f19 - cmp.eq.ph $s7,$14 - cmp.le.ph $8,$14 - cmp.lt.ph $k0,$sp - cmpgdu.eq.qb $s3,$zero,$k0 - cmpgdu.le.qb $v1,$15,$s2 - cmpgdu.lt.qb $s0,$gp,$sp - cmpgu.eq.qb $14,$s6,$s8 - cmpgu.le.qb $9,$a3,$s4 - cmpgu.lt.qb $sp,$at,$8 - cmpu.eq.qb $v0,$24 - cmpu.le.qb $s1,$a1 - cmpu.lt.qb $at,$a3 cvt.s.pl $f30,$f1 cvt.s.pu $f14,$f25 dmfc0 $10,c0_watchhi,2 @@ -96,54 +67,22 @@ dmt $k0 dmtc0 $15,c0_datalo dmtgc0 $a2,c0_watchlo,2 - dpa.w.ph $ac1,$s7,$k0 - dpaq_s.w.ph $ac2,$a0,$13 - dpaq_sa.l.w $ac0,$a2,$14 - dpaqx_s.w.ph $ac3,$a0,$24 - dpaqx_sa.w.ph $ac1,$zero,$s5 - dpau.h.qbl $ac1,$10,$24 - dpau.h.qbr $ac1,$s7,$s6 - dpax.w.ph $ac3,$a0,$k0 - dps.w.ph $ac1,$a3,$a1 - dpsq_s.w.ph $ac0,$gp,$k0 - dpsq_sa.l.w $ac0,$a3,$15 - dpsqx_s.w.ph $ac3,$13,$a3 - dpsqx_sa.w.ph $ac3,$sp,$s2 - dpsu.h.qbl $ac2,$14,$10 - dpsu.h.qbr $ac2,$a1,$s6 - dpsx.w.ph $ac0,$s7,$gp drorv $at,$a1,$s7 dvpe $s6 emt $8 evpe $v0 - extpdpv $s6,$ac0,$s8 - extpv $13,$ac0,$14 - extrv.w $8,$ac3,$at - extrv_r.w $8,$ac1,$s6 - extrv_rs.w $gp,$ac1,$s6 - extrv_s.h $s2,$ac1,$14 fork $s2,$8,$a0 - insv $s2,$at iret - lbe $14,122($9) + lbe $14,122($9) lbue $11,-108($10) - lbux $9,$14($v0) - lhe $s6,219($v1) + lhe $s6,219($v1) lhue $gp,118($11) - lhx $sp,$k0($15) - lle $gp,-237($ra) - lwe $ra,-145($14) + lle $gp,-237($ra) + lwe $ra,-145($14) lwle $11,-42($11) lwre $sp,-152($24) - lwx $12,$12($s4) madd.ps $f22,$f3,$f14,$f3 - maq_s.w.phl $ac2,$25,$11 - maq_s.w.phr $ac0,$10,$25 - maq_sa.w.phl $ac3,$a1,$v1 - maq_sa.w.phr $ac1,$at,$10 mfgc0 $s6,c0_datahi1 - mflo $9,$ac2 - modsub $a3,$12,$a3 mov.ps $f22,$f17 movf.ps $f10,$f28,$fcc6 movn.ps $f31,$f31,$s3 @@ -151,92 +90,30 @@ movz.ps $f18,$f17,$ra msgn.qh $v0,$v24,$v20 msgn.qh $v12,$v21,$v0[1] - msub $ac2,$sp,$14 msub.ps $f12,$f14,$f29,$f17 - msubu $ac2,$a1,$24 mtc0 $9,c0_datahi1 mtgc0 $s4,$21,7 - mthi $v0,$ac1 - mthlip $a3,$ac0 - mul.ph $s4,$24,$s0 mul.ps $f14,$f0,$f16 - mul_s.ph $10,$14,$15 - muleq_s.w.phl $11,$s4,$s4 - muleq_s.w.phr $s6,$a0,$s8 - muleu_s.ph.qbl $a2,$14,$8 - muleu_s.ph.qbr $a1,$ra,$9 - mulq_rs.ph $s2,$14,$15 - mulq_rs.w $at,$s4,$25 - mulq_s.ph $s0,$k1,$15 - mulq_s.w $9,$a3,$s0 - mulsa.w.ph $ac1,$s4,$s6 - mulsaq_s.w.ph $ac0,$ra,$s2 neg.ps $f19,$f13 nmadd.ps $f27,$f4,$f9,$f25 nmsub.ps $f6,$f12,$f14,$f17 - packrl.ph $ra,$24,$14 - pick.ph $ra,$a2,$gp - pick.qb $11,$a0,$gp pll.ps $f25,$f9,$f30 plu.ps $f1,$f26,$f29 preceq.w.phl $s8,$gp preceq.w.phr $s5,$15 - precequ.ph.qbl $s7,$ra - precequ.ph.qbla $a0,$9 - precequ.ph.qbr $ra,$s3 - precequ.ph.qbra $24,$8 - preceu.ph.qbl $sp,$8 - preceu.ph.qbla $s6,$11 - preceu.ph.qbr $gp,$s1 - preceu.ph.qbra $k1,$s0 - precr.qb.ph $v0,$12,$s8 - precrq.ph.w $14,$s8,$24 - precrq.qb.ph $a2,$12,$12 - precrq_rs.ph.w $a1,$k0,$a3 - precrqu_s.qb.ph $zero,$gp,$s5 pul.ps $f9,$f30,$f26 puu.ps $f24,$f9,$f2 - raddu.w.qb $25,$s3 rdpgpr $s3,$9 recip.d $f19,$f6 recip.s $f3,$f30 - repl.ph $at,-307 - replv.ph $v1,$s7 - replv.qb $25,$12 rorv $13,$a3,$s5 rsqrt.d $f3,$f28 rsqrt.s $f4,$f8 - sbe $s7,33($s1) - sce $sp,189($10) - she $24,105($v0) - shilo $ac1,26 - shilov $ac2,$10 - shllv.ph $10,$s0,$s0 - shllv.qb $gp,$v1,$zero - shllv_s.ph $k1,$at,$13 - shllv_s.w $s1,$ra,$k0 - shrav.ph $25,$s2,$s1 - shrav.qb $zero,$24,$11 - shrav_r.ph $s3,$11,$25 - shrav_r.qb $a0,$sp,$s5 - shrav_r.w $s7,$s4,$s6 - shrlv.ph $14,$10,$9 - shrlv.qb $a2,$s2,$11 + sbe $s7,33($s1) + sce $sp,189($10) + she $24,105($v0) sub.ps $f5,$f14,$f26 - subq.ph $ra,$9,$s8 - subq_s.ph $13,$s8,$s5 - subq_s.w $k1,$a2,$a3 - subqh.ph $10,$at,$9 - subqh.w $v0,$a2,$zero - subqh_r.ph $a0,$12,$s6 - subqh_r.w $10,$a2,$gp - subu.ph $9,$s6,$s4 - subu.qb $s6,$a2,$s6 - subu_s.ph $v1,$a1,$s3 - subu_s.qb $s1,$at,$ra - subuh.qb $zero,$gp,$gp - subuh_r.qb $s4,$s8,$s6 - swe $24,94($k0) + swe $24,94($k0) swle $v1,-209($gp) swre $k0,-202($s2) tlbginv From fad998fc360e9d7140354e14ef71de8426446ead Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Mon, 7 Dec 2015 14:22:39 +0000 Subject: [PATCH 170/364] [ARM] Generate ABI_optimization_goals build attribute, as described in the ARM ARM. Summary: This reverts r254234, and adds a simple fix for the annoying case of use-after-free. Reviewers: rengolin Subscribers: aemerson, llvm-commits, rengolin Differential Revision: http://reviews.llvm.org/D15236 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254912 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 45 +++++++++++++++++-- lib/Target/ARM/ARMAsmPrinter.h | 5 +++ .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 6 +-- .../build-attributes-optimization-minsize.ll | 18 ++++++++ .../build-attributes-optimization-mixed.ll | 23 ++++++++++ .../build-attributes-optimization-optnone.ll | 18 ++++++++ .../build-attributes-optimization-optsize.ll | 18 ++++++++ .../ARM/build-attributes-optimization.ll | 23 ++++++++++ test/MC/ARM/data-in-code.ll | 16 +++---- 9 files changed, 157 insertions(+), 15 deletions(-) create mode 100644 test/CodeGen/ARM/build-attributes-optimization-minsize.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization-mixed.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization-optnone.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization-optsize.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization.ll diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 67ebfa2b581d..61141c0031df 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -60,7 +60,7 @@ using namespace llvm; ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr), - InConstantPool(false) {} + InConstantPool(false), OptimizationGoals(-1) {} void ARMAsmPrinter::EmitFunctionBodyEnd() { // Make sure to terminate any constant pools that were at the end @@ -106,9 +106,38 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); SetupMachineFunction(MF); + const Function* F = MF.getFunction(); + const TargetMachine& TM = MF.getTarget(); + + // Calculate this function's optimization goal. + unsigned OptimizationGoal; + if (F->hasFnAttribute(Attribute::OptimizeNone)) + // For best debugging illusion, speed and small size sacrificed + OptimizationGoal = 6; + else if (F->optForMinSize()) + // Aggressively for small size, speed and debug illusion sacrificed + OptimizationGoal = 4; + else if (F->optForSize()) + // For small size, but speed and debugging illusion preserved + OptimizationGoal = 3; + else if (TM.getOptLevel() == CodeGenOpt::Aggressive) + // Aggressively for speed, small size and debug illusion sacrificed + OptimizationGoal = 2; + else if (TM.getOptLevel() > CodeGenOpt::None) + // For speed, but small size and good debug illusion preserved + OptimizationGoal = 1; + else // TM.getOptLevel() == CodeGenOpt::None + // For good debugging, but speed and small size preserved + OptimizationGoal = 5; + + // Combine a new optimization goal with existing ones. + if (OptimizationGoals == -1) // uninitialized goals + OptimizationGoals = OptimizationGoal; + else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals + OptimizationGoals = 0; if (Subtarget->isTargetCOFF()) { - bool Internal = MF.getFunction()->hasInternalLinkage(); + bool Internal = F->hasInternalLinkage(); COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL; int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; @@ -506,6 +535,16 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + + // The last attribute to be emitted is ABI_optimization_goals + MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast(TS); + + if (OptimizationGoals > 0) + ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals); + OptimizationGoals = -1; + + ATS.finishAttributeSection(); } //===----------------------------------------------------------------------===// @@ -798,8 +837,6 @@ void ARMAsmPrinter::emitAttributes() { else if (STI.hasVirtualization()) ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowVirtualization); - - ATS.finishAttributeSection(); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index fb925f162f70..ed7be2de51ca 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -51,6 +51,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { /// labels used for ARMv4t thumb code to make register indirect calls. SmallVector, 4> ThumbIndirectPads; + /// OptimizationGoals - Maintain a combined optimization goal for all + /// functions in a module: one of Tag_ABI_optimization_goals values, + /// -1 if uninitialized, 0 if conflicting goals + int OptimizationGoals; + public: explicit ARMAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index f316ad17576a..6084f22c8470 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -79,7 +79,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void emitAttribute(unsigned Attribute, unsigned Value) override; void emitTextAttribute(unsigned Attribute, StringRef String) override; void emitIntTextAttribute(unsigned Attribute, unsigned IntValue, - StringRef StrinValue) override; + StringRef StringValue) override; void emitArch(unsigned Arch) override; void emitArchExtension(unsigned ArchExt) override; void emitObjectArch(unsigned Arch) override; @@ -243,7 +243,7 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset, class ARMTargetELFStreamer : public ARMTargetStreamer { private: // This structure holds all attributes, accounting for - // their string/numeric value, so we can later emmit them + // their string/numeric value, so we can later emit them // in declaration order, keeping all in the same vector struct AttributeItem { enum { @@ -254,7 +254,7 @@ class ARMTargetELFStreamer : public ARMTargetStreamer { } Type; unsigned Tag; unsigned IntValue; - StringRef StringValue; + std::string StringValue; static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) { // The conformance tag must be emitted first when serialised diff --git a/test/CodeGen/ARM/build-attributes-optimization-minsize.ll b/test/CodeGen/ARM/build-attributes-optimization-minsize.ll new file mode 100644 index 000000000000..4cfb6012f439 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization-minsize.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ + +; CHECK: .eabi_attribute 30, 4 @ Tag_ABI_optimization_goals +; CHECK-OBJ: TagName: ABI_optimization_goals +; CHECK-OBJ-NEXT: Description: Aggressive Size + +define i32 @f(i64 %z) #0 { + ret i32 0 +} + +attributes #0 = { minsize optsize } + diff --git a/test/CodeGen/ARM/build-attributes-optimization-mixed.ll b/test/CodeGen/ARM/build-attributes-optimization-mixed.ll new file mode 100644 index 000000000000..8009fc6e28f8 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization-mixed.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s + +; CHECK-NOT: .eabi_attribute 30 +; CHECK-NOT: Tag_ABI_optimization_goals + +define i32 @f(i64 %z) #0 { + ret i32 0 +} + +define i32 @g(i64 %z) #1 { + ret i32 1 +} + +attributes #0 = { noinline optnone } + +attributes #1 = { minsize optsize } + diff --git a/test/CodeGen/ARM/build-attributes-optimization-optnone.ll b/test/CodeGen/ARM/build-attributes-optimization-optnone.ll new file mode 100644 index 000000000000..cbdb915045c6 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization-optnone.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ + +; CHECK: .eabi_attribute 30, 6 @ Tag_ABI_optimization_goals +; CHECK-OBJ: TagName: ABI_optimization_goals +; CHECK-OBJ-NEXT: Description: Best Debugging + +define i32 @f(i64 %z) #0 { + ret i32 0 +} + +attributes #0 = { noinline optnone } + diff --git a/test/CodeGen/ARM/build-attributes-optimization-optsize.ll b/test/CodeGen/ARM/build-attributes-optimization-optsize.ll new file mode 100644 index 000000000000..bab210aa8d01 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization-optsize.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ + +; CHECK: .eabi_attribute 30, 3 @ Tag_ABI_optimization_goals +; CHECK-OBJ: TagName: ABI_optimization_goals +; CHECK-OBJ-NEXT: Description: Size + +define i32 @f(i64 %z) #0 { + ret i32 0 +} + +attributes #0 = { optsize } + diff --git a/test/CodeGen/ARM/build-attributes-optimization.ll b/test/CodeGen/ARM/build-attributes-optimization.ll new file mode 100644 index 000000000000..21b7b3c3ab0c --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s --check-prefix=NONE +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s --check-prefix=SPEED +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s --check-prefix=MAXSPEED + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=NONE-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=SPEED-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=MAXSPEED-OBJ + +; NONE: .eabi_attribute 30, 5 @ Tag_ABI_optimization_goals +; SPEED: .eabi_attribute 30, 1 @ Tag_ABI_optimization_goals +; MAXSPEED: .eabi_attribute 30, 2 @ Tag_ABI_optimization_goals + +; NONE-OBJ: TagName: ABI_optimization_goals +; NONE-OBJ-NEXT: Description: Debugging +; SPEED-OBJ: TagName: ABI_optimization_goals +; SPEED-OBJ-NEXT: Description: Speed +; MAXSPEED-OBJ: TagName: ABI_optimization_goals +; MAXSPEED-OBJ-NEXT: Description: Aggressive Speed + +define i32 @f(i64 %z) { + ret i32 0 +} + diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll index c4910ff20e61..10657a3fed39 100644 --- a/test/MC/ARM/data-in-code.ll +++ b/test/MC/ARM/data-in-code.ll @@ -51,13 +51,6 @@ exit: ;; ARM-NEXT: Other: ;; ARM-NEXT: Section: [[MIXED_SECT]] -;; ARM: Symbol { -;; ARM: Name: $d -;; ARM-NEXT: Value: 0 -;; ARM-NEXT: Size: 0 -;; ARM-NEXT: Binding: Local -;; ARM-NEXT: Type: None - ;; ARM: Symbol { ;; ARM: Name: $d ;; ARM-NEXT: Value: 0x{{[0-9A-F]+}} @@ -77,10 +70,17 @@ exit: ;; ARM-NEXT: Section: .ARM.exidx ;; ARM-NEXT: } +;; ARM: Symbol { +;; ARM: Name: $d +;; ARM-NEXT: Value: 0 +;; ARM-NEXT: Size: 0 +;; ARM-NEXT: Binding: Local +;; ARM-NEXT: Type: None + ;; ARM-NOT: ${{[atd]}} ;; TMB: Symbol { -;; TMB: Name: $d.2 +;; TMB: Name: $d.1 ;; TMB-NEXT: Value: 0x{{[0-9A-F]+}} ;; TMB-NEXT: Size: 0 ;; TMB-NEXT: Binding: Local From 3b45f263c3669c65a76f7033ee24093a9870cfcf Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 7 Dec 2015 14:33:34 +0000 Subject: [PATCH 171/364] VX-512: Fixed a bug in FP logic operation lowering FP logic instructions are supported in DQ extension on AVX-512 target. I use integer operations instead. Added tests. I also enabled FABS in this patch in order to check ANDPS. The operations are FOR, FXOR, FAND, FANDN. The instructions, that supported for 512-bit vector under DQ are: VORPS/PD, VXORPS/PD, VANDPS/PD, FANDNPS/PD. Differential Revision: http://reviews.llvm.org/D15110 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254913 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 55 +++++++++++++++-------- lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86InstrSSE.td | 2 +- test/CodeGen/X86/avx-logic.ll | 1 + test/CodeGen/X86/avx512-arith.ll | 71 ++++++++++++++++++++++++++++++ test/CodeGen/X86/vec_fabs.ll | 2 +- 6 files changed, 111 insertions(+), 21 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fa6f5c8be88c..21bca74353c4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1340,6 +1340,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v16f32, Legal); setOperationAction(ISD::FSQRT, MVT::v16f32, Legal); setOperationAction(ISD::FNEG, MVT::v16f32, Custom); + setOperationAction(ISD::FABS, MVT::v16f32, Custom); setOperationAction(ISD::FADD, MVT::v8f64, Legal); setOperationAction(ISD::FSUB, MVT::v8f64, Legal); @@ -1347,6 +1348,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v8f64, Legal); setOperationAction(ISD::FSQRT, MVT::v8f64, Legal); setOperationAction(ISD::FNEG, MVT::v8f64, Custom); + setOperationAction(ISD::FABS, MVT::v8f64, Custom); setOperationAction(ISD::FMA, MVT::v8f64, Legal); setOperationAction(ISD::FMA, MVT::v16f32, Legal); @@ -26339,6 +26341,31 @@ static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT.is512BitVector() && !Subtarget->hasDQI()) { + // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention. + // These logic operations may be executed in the integer domain. + SDLoc dl(N); + MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); + + SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); + unsigned IntOpcode = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected FP logic op"); + case X86ISD::FOR: IntOpcode = ISD::OR; break; + case X86ISD::FXOR: IntOpcode = ISD::XOR; break; + case X86ISD::FAND: IntOpcode = ISD::AND; break; + case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; + } + SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); + return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); + } + return SDValue(); +} /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -26354,19 +26381,7 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, if (C->getValueAPF().isPosZero()) return N->getOperand(0); - EVT VT = N->getValueType(0); - if (VT.is512BitVector() && !Subtarget->hasDQI()) { - SDLoc dl(N); - MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); - MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); - - SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); - unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR; - SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); - return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); - } - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes. @@ -26391,7 +26406,8 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { } /// Do target-specific dag combines on X86ISD::FAND nodes. -static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // FAND(0.0, x) -> 0.0 if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -26402,11 +26418,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { if (C->getValueAPF().isPosZero()) return N->getOperand(1); - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FANDN nodes -static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // FANDN(0.0, x) -> x if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -26417,7 +26434,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { if (C->getValueAPF().isPosZero()) return N->getOperand(1); - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } static SDValue PerformBTCombine(SDNode *N, @@ -27233,8 +27250,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget); case X86ISD::FMIN: case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); - case X86ISD::FAND: return PerformFANDCombine(N, DAG); - case X86ISD::FANDN: return PerformFANDNCombine(N, DAG); + case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget); + case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ANY_EXTEND: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4a4ceaca88f4..b412f8fb3ecb 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -770,6 +770,7 @@ def HasVLX : Predicate<"Subtarget->hasVLX()">, AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">; def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; +def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a93240bd717c..a545335dd5dd 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2906,7 +2906,7 @@ let isCodeGenOnly = 1 in { // Multiclass for vectors using the X86 logical operation aliases for FP. multiclass sse12_fp_packed_vector_logical_alias< bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { defm V#NAME#PS : sse12_fp_packed, PS, VEX_4V; diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll index a91fe7e0c523..e9e7d5aea273 100644 --- a/test/CodeGen/X86/avx-logic.ll +++ b/test/CodeGen/X86/avx-logic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { ; CHECK-LABEL: andpd256: diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index d7da77a5eb54..9220e4f269cd 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s @@ -823,3 +824,73 @@ define <16 x float> @test_fxor(<16 x float> %a) { ret <16 x float>%res } +define <8 x float> @test_fxor_8f32(<8 x float> %a) { +; CHECK-LABEL: test_fxor_8f32: +; CHECK: ## BB#0: +; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: retq + %res = fsub <8 x float> , %a + ret <8 x float>%res +} + +define <8 x double> @fabs_v8f64(<8 x double> %p) +; AVX512F-LABEL: fabs_v8f64: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fabs_v8f64: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: fabs_v8f64: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: fabs_v8f64: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: fabs_v8f64: +; SKX: ## BB#0: +; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq +{ + %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) + ret <8 x double> %t +} +declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) + +define <16 x float> @fabs_v16f32(<16 x float> %p) +; AVX512F-LABEL: fabs_v16f32: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fabs_v16f32: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: fabs_v16f32: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: fabs_v16f32: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: fabs_v16f32: +; SKX: ## BB#0: +; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq +{ + %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) + ret <16 x float> %t +} +declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll index 960b5f27cf53..54f33b2bd224 100644 --- a/test/CodeGen/X86/vec_fabs.ll +++ b/test/CodeGen/X86/vec_fabs.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s - +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s define <2 x double> @fabs_v2f64(<2 x double> %p) { From fda49fdbc8fadb4d3fdf839da37d8543085eb738 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Mon, 7 Dec 2015 15:05:44 +0000 Subject: [PATCH 172/364] [ThinLTO] Support cloning of temporary DILocation metadata This is needed to support linking of module-level metadata as a postpass after function importing, where we will be leaving temporary metadata on imported instructions until the postpass metadata import. Also added unittest. Split from D14838. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254914 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/DebugInfoMetadata.h | 6 ++++-- unittests/IR/MetadataTest.cpp | 8 ++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 68a497745ae7..0b3fe06f3576 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -1118,8 +1118,10 @@ class DILocation : public MDNode { } TempDILocation cloneImpl() const { - return getTemporary(getContext(), getLine(), getColumn(), getScope(), - getInlinedAt()); + // Get the raw scope/inlinedAt since it is possible to invoke this on + // a DILocation containing temporary metadata. + return getTemporary(getContext(), getLine(), getColumn(), getRawScope(), + getRawInlinedAt()); } // Disallow replacing operands. diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp index da4271a30aea..8f346f53a2d2 100644 --- a/unittests/IR/MetadataTest.cpp +++ b/unittests/IR/MetadataTest.cpp @@ -813,6 +813,14 @@ TEST_F(DILocationTest, getTemporary) { EXPECT_FALSE(L->isResolved()); } +TEST_F(DILocationTest, cloneTemporary) { + MDNode *N = MDNode::get(Context, None); + auto L = DILocation::getTemporary(Context, 2, 7, N); + EXPECT_TRUE(L->isTemporary()); + auto L2 = L->clone(); + EXPECT_TRUE(L2->isTemporary()); +} + typedef MetadataTest GenericDINodeTest; TEST_F(GenericDINodeTest, get) { From c4724f60d7d2d5bbea55e60e5711f2ed5f3ec6ce Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Mon, 7 Dec 2015 15:44:34 +0000 Subject: [PATCH 173/364] Silence all C4592 warnings with MSVC 2015 Update 1. This warning produces false positives that Microsoft says will be fixed in Update 2. Until this produces reliable diagnostics, it is safe to disable the diagnostic -- the compiler is not doing anything different than it previously did aside from issuing the diagnostic. (Note, this silences at least one false positive in LLVM with FeatureBitset uses.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254915 91177308-0d34-0410-b5e6-96231b3b80d8 --- cmake/modules/HandleLLVMOptions.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 9e4c4c5078af..6c3109c4de76 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -317,6 +317,9 @@ if( MSVC ) -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer' -wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed' -wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared' + # C4592 is disabled because of false positives in Visual Studio 2015 + # Update 1. Re-evaluate the usefulness of this diagnostic with Update 2. + -wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation) # Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't # support the 'aligned' attribute in the way that clang sources requires (for From 7218e37dabb5cc8b3b61b08cbaf0da49da65a479 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 7 Dec 2015 16:01:40 +0000 Subject: [PATCH 174/364] Simplify test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254916 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Linker/2003-05-31-LinkerRename.ll | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll index e10e239071ad..ee070b6dbdf0 100644 --- a/test/Linker/2003-05-31-LinkerRename.ll +++ b/test/Linker/2003-05-31-LinkerRename.ll @@ -1,6 +1,4 @@ -; RUN: llvm-as %S/Inputs/2003-05-31-LinkerRename.ll -o %t.1.bc -; RUN: llvm-as %s -o %t.2.bc -; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s +; RUN: llvm-link %S/Inputs/2003-05-31-LinkerRename.ll %s -S | FileCheck %s ; CHECK: @bar = global i32 ()* @foo.2 From 0178d23ade9b549a91e3409dd15833ea8a451cb4 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 7 Dec 2015 16:31:41 +0000 Subject: [PATCH 175/364] Link declaration lazily. We already linked available_externally and linkonce lazily, this just adds declarations to the list. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254917 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 3 +++ test/Linker/2003-04-23-LinkOnceLost.ll | 4 ++++ test/Linker/2003-05-31-LinkerRename.ll | 4 ++-- test/Linker/ConstantGlobals.ll | 4 ++++ test/Linker/Inputs/opaque.ll | 8 ++++++++ test/Linker/Inputs/testlink.ll | 4 +++- test/Linker/Inputs/type-unique-dst-types2.ll | 4 ++++ test/Linker/Inputs/type-unique-dst-types3.ll | 4 ++++ test/Linker/opaque.ll | 4 ++++ test/Linker/testlink.ll | 9 ++++++++- test/Linker/type-unique-dst-types.ll | 4 ++++ test/Linker/type-unique-src-type.ll | 4 ++-- test/Linker/unnamed-addr1-a.ll | 10 ++++++++++ test/Linker/weakextern.ll | 4 ++++ 14 files changed, 64 insertions(+), 6 deletions(-) diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index a1c3162bf796..8e0904a858bc 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -1753,6 +1753,9 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { GV.hasAvailableExternallyLinkage())) return false; + if (GV.isDeclaration()) + return false; + if (const Comdat *SC = GV.getComdat()) { bool LinkFromSrc; Comdat::SelectionKind SK; diff --git a/test/Linker/2003-04-23-LinkOnceLost.ll b/test/Linker/2003-04-23-LinkOnceLost.ll index c699d1eb058e..e2b600c877c0 100644 --- a/test/Linker/2003-04-23-LinkOnceLost.ll +++ b/test/Linker/2003-04-23-LinkOnceLost.ll @@ -4,3 +4,7 @@ declare void @foo() +define void @use_foo() { + call void @foo() + ret void +} diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll index ee070b6dbdf0..f511be1bf22f 100644 --- a/test/Linker/2003-05-31-LinkerRename.ll +++ b/test/Linker/2003-05-31-LinkerRename.ll @@ -6,13 +6,13 @@ ; CHECK-NEXT: ret i32 7 ; CHECK-NEXT: } -; CHECK: declare i32 @foo() - ; CHECK: define i32 @test() { ; CHECK-NEXT: %X = call i32 @foo() ; CHECK-NEXT: ret i32 %X ; CHECK-NEXT: } +; CHECK: declare i32 @foo() + declare i32 @foo() define i32 @test() { diff --git a/test/Linker/ConstantGlobals.ll b/test/Linker/ConstantGlobals.ll index 49f86a51bd7f..58c0d711a07e 100644 --- a/test/Linker/ConstantGlobals.ll +++ b/test/Linker/ConstantGlobals.ll @@ -6,3 +6,7 @@ ; CHECK-DAG: @Y = external global [1 x i32] @Y = external global [1 x i32] + +define [1 x i32]* @use-Y() { + ret [1 x i32] *@Y +} diff --git a/test/Linker/Inputs/opaque.ll b/test/Linker/Inputs/opaque.ll index f164abd586d1..a5f27cba418e 100644 --- a/test/Linker/Inputs/opaque.ll +++ b/test/Linker/Inputs/opaque.ll @@ -11,3 +11,11 @@ define void @f1() { getelementptr %A, %A* null, i32 0 ret void } + +define %A* @use_g2() { + ret %A* @g2 +} + +define %B* @use_g3() { + ret %B* @g3 +} diff --git a/test/Linker/Inputs/testlink.ll b/test/Linker/Inputs/testlink.ll index 263d9e77d1ab..22a66399be09 100644 --- a/test/Linker/Inputs/testlink.ll +++ b/test/Linker/Inputs/testlink.ll @@ -53,4 +53,6 @@ define internal void @testIntern() { ret void } -declare void @VecSizeCrash1(%VecSize) +define void @VecSizeCrash1(%VecSize) { + ret void +} diff --git a/test/Linker/Inputs/type-unique-dst-types2.ll b/test/Linker/Inputs/type-unique-dst-types2.ll index b565c6d73649..7770ea3cca07 100644 --- a/test/Linker/Inputs/type-unique-dst-types2.ll +++ b/test/Linker/Inputs/type-unique-dst-types2.ll @@ -1,3 +1,7 @@ %A.11 = type { %B } %B = type { i8 } @g1 = external global %A.11 + +define %A.11* @use_g1() { + ret %A.11* @g1 +} diff --git a/test/Linker/Inputs/type-unique-dst-types3.ll b/test/Linker/Inputs/type-unique-dst-types3.ll index c5794ad839a2..8a5ac2694791 100644 --- a/test/Linker/Inputs/type-unique-dst-types3.ll +++ b/test/Linker/Inputs/type-unique-dst-types3.ll @@ -1,2 +1,6 @@ %A.11 = type opaque @g2 = external global %A.11 + +define %A.11* @use_g2() { + ret %A.11* @g2 +} diff --git a/test/Linker/opaque.ll b/test/Linker/opaque.ll index 4f3f398f8f1b..6fd1ae90d4f4 100644 --- a/test/Linker/opaque.ll +++ b/test/Linker/opaque.ll @@ -19,3 +19,7 @@ %C = type { %A } @g1 = external global %B + +define %B* @use_g1() { + ret %B* @g1 +} diff --git a/test/Linker/testlink.ll b/test/Linker/testlink.ll index 82a2229f57a0..6a316a3bf846 100644 --- a/test/Linker/testlink.ll +++ b/test/Linker/testlink.ll @@ -32,6 +32,11 @@ ; CHECK-DAG: @0 = external global i32 @0 = external global i32 + +define i32* @use0() { + ret i32* @0 +} + ; CHECK-DAG: @Inte = global i32 1 @Inte = global i32 1 @@ -101,4 +106,6 @@ define void @testIntern() { ret void } -declare void @VecSizeCrash(%VecSize) +define void @VecSizeCrash(%VecSize) { + ret void +} diff --git a/test/Linker/type-unique-dst-types.ll b/test/Linker/type-unique-dst-types.ll index 30aecbb970cb..1adad49de91d 100644 --- a/test/Linker/type-unique-dst-types.ll +++ b/test/Linker/type-unique-dst-types.ll @@ -17,3 +17,7 @@ %A = type { %B } %B = type { i8 } @g3 = external global %A + +define %A* @use_g3() { + ret %A* @g3 +} diff --git a/test/Linker/type-unique-src-type.ll b/test/Linker/type-unique-src-type.ll index 110ecc87e1b1..ab7322892e07 100644 --- a/test/Linker/type-unique-src-type.ll +++ b/test/Linker/type-unique-src-type.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: %B = type { %A } ; CHECK-NEXT: %A = type { i8 } -; CHECK: @g1 = external global %C.0 +; CHECK: @g1 = global %C.0 zeroinitializer ; CHECK: getelementptr %C.0, %C.0* null, i64 0, i32 0, i32 0 %A = type { i8 } @@ -21,4 +21,4 @@ define void @f1() { getelementptr %C, %C* null, i64 0, i32 0, i32 0 ret void } -@g1 = external global %C.0 +@g1 = global %C.0 zeroinitializer diff --git a/test/Linker/unnamed-addr1-a.ll b/test/Linker/unnamed-addr1-a.ll index 6ff0fd90b687..a96f089a99c0 100644 --- a/test/Linker/unnamed-addr1-a.ll +++ b/test/Linker/unnamed-addr1-a.ll @@ -15,6 +15,11 @@ define weak void @func-b() unnamed_addr { ret void } @global-c = common unnamed_addr global i32 0 ; CHECK-DAG: @global-c = common unnamed_addr global i32 0 @global-d = external global i32 + +define i32* @use-global-d() { + ret i32* @global-d +} + ; CHECK-DAG: @global-d = global i32 42 @global-e = external unnamed_addr global i32 ; CHECK-DAG: @global-e = unnamed_addr global i32 42 @@ -27,6 +32,11 @@ define weak void @func-b() unnamed_addr { ret void } ; CHECK-DAG: @alias-b = unnamed_addr alias i32, i32* @global-f declare void @func-c() +define void @use-func-c() { + call void @func-c() + ret void +} + ; CHECK-DAG: define weak void @func-c() { define weak void @func-d() { ret void } ; CHECK-DAG: define weak void @func-d() { diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll index e1754e60547e..814550a907bb 100644 --- a/test/Linker/weakextern.ll +++ b/test/Linker/weakextern.ll @@ -6,3 +6,7 @@ @kallsyms_names = extern_weak global [0 x i8] @MyVar = extern_weak global i32 @Inte = extern_weak global i32 + +define weak [0 x i8]* @use_kallsyms_names() { + ret [0 x i8]* @kallsyms_names +} From 07b637e04d1e88688c2456da1bd02eef08682cf7 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 7 Dec 2015 17:35:56 +0000 Subject: [PATCH 176/364] [Orc] Removing traces of takeOwnershipOfBuffers left after r251560. Patch by Joshua Gerrard. Thanks Joshua! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254919 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Orc/ObjectTransformLayer.h | 8 ------ tools/llvm-rtdyld/llvm-rtdyld.cpp | 12 --------- .../Orc/ObjectTransformLayerTest.cpp | 27 ------------------- 3 files changed, 47 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index 7af662085474..f96e83ed5a1a 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -87,14 +87,6 @@ class ObjectTransformLayer { BaseLayer.mapSectionAddress(H, LocalAddress, TargetAddr); } - // Ownership hack. - // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without - // referencing the original object. - template - void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) { - BaseLayer.takeOwnershipOfBuffers(H, std::move(MBs)); - } - /// @brief Access the transform functor directly. TransformFtor &getTransform() { return Transform; } diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp index 59c9a0c990f3..6ee3a44b63bf 100644 --- a/tools/llvm-rtdyld/llvm-rtdyld.cpp +++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp @@ -388,11 +388,6 @@ static int executeInput() { doPreallocation(MemMgr); RuntimeDyld Dyld(MemMgr, MemMgr); - // FIXME: Preserve buffers until resolveRelocations time to work around a bug - // in RuntimeDyldELF. - // This fixme should be fixed ASAP. This is a very brittle workaround. - std::vector> InputBuffers; - // If we don't have any input files, read from stdin. if (!InputFileList.size()) InputFileList.push_back("-"); @@ -409,7 +404,6 @@ static int executeInput() { return Error("unable to create object file: '" + EC.message() + "'"); ObjectFile &Obj = **MaybeObj; - InputBuffers.push_back(std::move(*InputBuffer)); // Load the object file Dyld.loadObject(Obj); @@ -656,11 +650,6 @@ static int linkAndVerify() { RuntimeDyldChecker Checker(Dyld, Disassembler.get(), InstPrinter.get(), llvm::dbgs()); - // FIXME: Preserve buffers until resolveRelocations time to work around a bug - // in RuntimeDyldELF. - // This fixme should be fixed ASAP. This is a very brittle workaround. - std::vector> InputBuffers; - // If we don't have any input files, read from stdin. if (!InputFileList.size()) InputFileList.push_back("-"); @@ -679,7 +668,6 @@ static int linkAndVerify() { return Error("unable to create object file: '" + EC.message() + "'"); ObjectFile &Obj = **MaybeObj; - InputBuffers.push_back(std::move(*InputBuffer)); // Load the object file Dyld.loadObject(Obj); diff --git a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp index 41b2307cadd8..c88c94f17b1c 100644 --- a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp @@ -157,21 +157,6 @@ class MockBaseLayer { resetExpectations(); } - template - void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) { - EXPECT_EQ(MockObjSetHandle, H); - EXPECT_EQ(MockBufferSet, *MBs); - LastCalled = "takeOwnershipOfBuffers"; - } - void expectTakeOwnershipOfBuffers(ObjSetHandleT H, MockMemoryBufferSet *MBs) { - MockObjSetHandle = H; - MockBufferSet = *MBs; - } - void verifyTakeOwnershipOfBuffers() { - EXPECT_EQ("takeOwnershipOfBuffers", LastCalled); - resetExpectations(); - } - private: // Backing fields for remembering parameter/return values std::string LastCalled; @@ -275,18 +260,6 @@ TEST(ObjectTransformLayerTest, Main) { T1.mapSectionAddress(H, Buffer, MockAddress); M.verifyMapSectionAddress(); - // Test takeOwnershipOfBuffers, using unique pointer to buffer set - auto MockBufferSetPtr = llvm::make_unique(366); - M.expectTakeOwnershipOfBuffers(H, MockBufferSetPtr.get()); - T2.takeOwnershipOfBuffers(H, std::move(MockBufferSetPtr)); - M.verifyTakeOwnershipOfBuffers(); - - // Test takeOwnershipOfBuffers, using naked pointer to buffer set - MockMemoryBufferSet MockBufferSet = 266; - M.expectTakeOwnershipOfBuffers(H, &MockBufferSet); - T1.takeOwnershipOfBuffers(H, &MockBufferSet); - M.verifyTakeOwnershipOfBuffers(); - // Verify transform getter (non-const) MockObjectFile Mutatee = 277; MockObjectFile *Out = T2.getTransform()(&Mutatee); From bd58adf18d5a517be128887b57b67bd3ea976456 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Dec 2015 17:39:48 +0000 Subject: [PATCH 177/364] Tighten checks so we can see existing codegen The 2-element vector case shows a surprising bug: we failed to eliminate ops on undefs, so there are 4 fmax calls even though there can only be 2 valid elements in the inputs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254920 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/fmaxnum.ll | 114 +++++++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 8 deletions(-) diff --git a/test/CodeGen/X86/fmaxnum.ll b/test/CodeGen/X86/fmaxnum.ll index 7aa087f92bdc..19041c9ff677 100644 --- a/test/CodeGen/X86/fmaxnum.ll +++ b/test/CodeGen/X86/fmaxnum.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX declare float @fmaxf(float, float) declare double @fmax(double, double) @@ -58,18 +58,116 @@ define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) { } ; CHECK-LABEL: @test_intrinsic_fmax_v2f32 -; CHECK: callq fmaxf -; CHECK: callq fmaxf +; SSE: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; SSE-NEXT: callq fmaxf +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE-NEXT: callq fmaxf +; SSE-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload +; SSE: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE-NEXT: callq fmaxf +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] +; SSE-NEXT: callq fmaxf +; SSE-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; SSE: movaps %xmm1, %xmm0 +; SSE-NEXT: addq $72, %rsp +; SSE-NEXT: retq +; +; AVX: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX-NEXT: callq fmaxf +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovshdup {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload +; AVX: vmovshdup {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; AVX: callq fmaxf +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload +; AVX: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; AVX: callq fmaxf +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vpermilps $231, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload +; AVX: vpermilps $231, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; AVX: callq fmaxf +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX-NEXT: addq $56, %rsp +; AVX-NEXT: retq define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) { %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone ret <2 x float> %z } ; CHECK-LABEL: @test_intrinsic_fmax_v4f32 -; CHECK: callq fmaxf -; CHECK: callq fmaxf -; CHECK: callq fmaxf -; CHECK: callq fmaxf +; SSE: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; SSE-NEXT: callq fmaxf +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE-NEXT: callq fmaxf +; SSE-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload +; SSE: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE-NEXT: callq fmaxf +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] +; SSE-NEXT: callq fmaxf +; SSE-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; SSE: movaps %xmm1, %xmm0 +; SSE-NEXT: addq $72, %rsp +; SSE-NEXT: retq +; +; AVX: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX-NEXT: callq fmaxf +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovshdup {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload +; AVX: vmovshdup {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; AVX: callq fmaxf +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload +; AVX: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; AVX: callq fmaxf +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vpermilps $231, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload +; AVX: vpermilps $231, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; AVX: callq fmaxf +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX-NEXT: addq $56, %rsp +; AVX-NEXT: retq define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) { %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone ret <4 x float> %z From f8a6223dd008c680193a723e685acbe46ea61836 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 7 Dec 2015 18:46:41 +0000 Subject: [PATCH 178/364] [llvm-dwp] Restructure inputs for test case so they're all grouped together git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254922 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm-dwp/Inputs/simple/{ => notypes}/a.dwo | Bin .../llvm-dwp/Inputs/simple/{ => notypes}/b.dwo | Bin .../Inputs/{type_units => simple/types}/a.dwo | Bin .../Inputs/{type_units => simple/types}/b.dwo | Bin test/tools/llvm-dwp/X86/simple.test | 4 ++-- 5 files changed, 2 insertions(+), 2 deletions(-) rename test/tools/llvm-dwp/Inputs/simple/{ => notypes}/a.dwo (100%) rename test/tools/llvm-dwp/Inputs/simple/{ => notypes}/b.dwo (100%) rename test/tools/llvm-dwp/Inputs/{type_units => simple/types}/a.dwo (100%) rename test/tools/llvm-dwp/Inputs/{type_units => simple/types}/b.dwo (100%) diff --git a/test/tools/llvm-dwp/Inputs/simple/a.dwo b/test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo similarity index 100% rename from test/tools/llvm-dwp/Inputs/simple/a.dwo rename to test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo diff --git a/test/tools/llvm-dwp/Inputs/simple/b.dwo b/test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo similarity index 100% rename from test/tools/llvm-dwp/Inputs/simple/b.dwo rename to test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo diff --git a/test/tools/llvm-dwp/Inputs/type_units/a.dwo b/test/tools/llvm-dwp/Inputs/simple/types/a.dwo similarity index 100% rename from test/tools/llvm-dwp/Inputs/type_units/a.dwo rename to test/tools/llvm-dwp/Inputs/simple/types/a.dwo diff --git a/test/tools/llvm-dwp/Inputs/type_units/b.dwo b/test/tools/llvm-dwp/Inputs/simple/types/b.dwo similarity index 100% rename from test/tools/llvm-dwp/Inputs/type_units/b.dwo rename to test/tools/llvm-dwp/Inputs/simple/types/b.dwo diff --git a/test/tools/llvm-dwp/X86/simple.test b/test/tools/llvm-dwp/X86/simple.test index 962e270a594e..d7365c814435 100644 --- a/test/tools/llvm-dwp/X86/simple.test +++ b/test/tools/llvm-dwp/X86/simple.test @@ -1,7 +1,7 @@ -RUN: llvm-dwp %p/../Inputs/simple/a.dwo %p/../Inputs/simple/b.dwo -o %t +RUN: llvm-dwp %p/../Inputs/simple/notypes/a.dwo %p/../Inputs/simple/notypes/b.dwo -o %t RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=NOTYP %s RUN: llvm-objdump -h %t | FileCheck --check-prefix=NOTYPOBJ %s -RUN: llvm-dwp %p/../Inputs/type_units/a.dwo %p/../Inputs/type_units/b.dwo -o %t +RUN: llvm-dwp %p/../Inputs/simple/types/a.dwo %p/../Inputs/simple/types/b.dwo -o %t RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=TYPES %s FIXME: For some reason, piping straight from llvm-dwp to llvm-dwarfdump doesn't behave well - looks like dwarfdump is reading/closes before dwp has finished. From 55aaa984cb3318df455b998f362646bf72414e10 Mon Sep 17 00:00:00 2001 From: Ron Lieberman Date: Mon, 7 Dec 2015 18:52:39 +0000 Subject: [PATCH 179/364] [Hexagon] Adding v60 test, vasr in particular. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254923 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/Hexagon/v60Vasr.ll | 247 ++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 test/CodeGen/Hexagon/v60Vasr.ll diff --git a/test/CodeGen/Hexagon/v60Vasr.ll b/test/CodeGen/Hexagon/v60Vasr.ll new file mode 100644 index 000000000000..fb177f614f72 --- /dev/null +++ b/test/CodeGen/Hexagon/v60Vasr.ll @@ -0,0 +1,247 @@ +; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 < %s | FileCheck %s + +; CHECK: vasr(v{{[0-9]+}}.h,v{{[0-9]+}}.h,r{{[0-7]+}}):sat + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32" +target triple = "hexagon-unknown--elf" + +%struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] } + +; Function Attrs: norecurse nounwind +define i32 @__test_vasr(%struct.buffer_t* noalias nocapture %f.buffer, %struct.buffer_t* noalias nocapture %g.buffer, %struct.buffer_t* noalias nocapture %res.buffer) #0 { +entry: + %buf_host = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 1 + %f.host = load i8*, i8** %buf_host, align 4 + %buf_dev = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 0 + %f.dev = load i64, i64* %buf_dev, align 8 + %0 = icmp eq i8* %f.host, null + %1 = icmp eq i64 %f.dev, 0 + %f.host_and_dev_are_null = and i1 %0, %1 + %buf_min = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 0 + %f.min.0 = load i32, i32* %buf_min, align 4 + %buf_host10 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 1 + %g.host = load i8*, i8** %buf_host10, align 4 + %buf_dev11 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 0 + %g.dev = load i64, i64* %buf_dev11, align 8 + %2 = icmp eq i8* %g.host, null + %3 = icmp eq i64 %g.dev, 0 + %g.host_and_dev_are_null = and i1 %2, %3 + %buf_min22 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 0 + %g.min.0 = load i32, i32* %buf_min22, align 4 + %buf_host27 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 1 + %res.host = load i8*, i8** %buf_host27, align 4 + %buf_dev28 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 0 + %res.dev = load i64, i64* %buf_dev28, align 8 + %4 = icmp eq i8* %res.host, null + %5 = icmp eq i64 %res.dev, 0 + %res.host_and_dev_are_null = and i1 %4, %5 + %buf_extent31 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 0 + %res.extent.0 = load i32, i32* %buf_extent31, align 4 + %buf_min39 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 0 + %res.min.0 = load i32, i32* %buf_min39, align 4 + %6 = add nsw i32 %res.extent.0, -1 + %7 = and i32 %6, -64 + %8 = add i32 %res.min.0, 63 + %9 = add i32 %8, %7 + %10 = add nsw i32 %res.min.0, %res.extent.0 + %11 = add nsw i32 %10, -1 + %12 = icmp slt i32 %9, %11 + %13 = select i1 %12, i32 %9, i32 %11 + %14 = add nsw i32 %10, -64 + %15 = icmp slt i32 %res.min.0, %14 + %16 = select i1 %15, i32 %res.min.0, i32 %14 + %f.extent.0.required.s = sub nsw i32 %13, %16 + br i1 %f.host_and_dev_are_null, label %true_bb, label %after_bb + +true_bb: ; preds = %entry + %buf_elem_size44 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 5 + store i32 1, i32* %buf_elem_size44, align 4 + store i32 %16, i32* %buf_min, align 4 + %17 = add nsw i32 %f.extent.0.required.s, 1 + %buf_extent46 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 0 + store i32 %17, i32* %buf_extent46, align 4 + %buf_stride47 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 0 + store i32 1, i32* %buf_stride47, align 4 + %buf_min48 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 1 + store i32 0, i32* %buf_min48, align 4 + %buf_extent49 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 1 + store i32 0, i32* %buf_extent49, align 4 + %buf_stride50 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 1 + store i32 0, i32* %buf_stride50, align 4 + %buf_min51 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 2 + store i32 0, i32* %buf_min51, align 4 + %buf_extent52 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 2 + store i32 0, i32* %buf_extent52, align 4 + %buf_stride53 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 2 + store i32 0, i32* %buf_stride53, align 4 + %buf_min54 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 3 + store i32 0, i32* %buf_min54, align 4 + %buf_extent55 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 3 + store i32 0, i32* %buf_extent55, align 4 + %buf_stride56 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 3 + store i32 0, i32* %buf_stride56, align 4 + br label %after_bb + +after_bb: ; preds = %true_bb, %entry + br i1 %g.host_and_dev_are_null, label %true_bb57, label %after_bb59 + +true_bb57: ; preds = %after_bb + %buf_elem_size60 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 5 + store i32 1, i32* %buf_elem_size60, align 4 + store i32 %16, i32* %buf_min22, align 4 + %18 = add nsw i32 %f.extent.0.required.s, 1 + %buf_extent62 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 0 + store i32 %18, i32* %buf_extent62, align 4 + %buf_stride63 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 0 + store i32 1, i32* %buf_stride63, align 4 + %buf_min64 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 1 + store i32 0, i32* %buf_min64, align 4 + %buf_extent65 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 1 + store i32 0, i32* %buf_extent65, align 4 + %buf_stride66 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 1 + store i32 0, i32* %buf_stride66, align 4 + %buf_min67 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 2 + store i32 0, i32* %buf_min67, align 4 + %buf_extent68 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 2 + store i32 0, i32* %buf_extent68, align 4 + %buf_stride69 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 2 + store i32 0, i32* %buf_stride69, align 4 + %buf_min70 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 3 + store i32 0, i32* %buf_min70, align 4 + %buf_extent71 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 3 + store i32 0, i32* %buf_extent71, align 4 + %buf_stride72 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 3 + store i32 0, i32* %buf_stride72, align 4 + br label %after_bb59 + +after_bb59: ; preds = %true_bb57, %after_bb + br i1 %res.host_and_dev_are_null, label %after_bb75.thread, label %after_bb75 + +after_bb75.thread: ; preds = %after_bb59 + %buf_elem_size76 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 5 + store i32 1, i32* %buf_elem_size76, align 4 + store i32 %16, i32* %buf_min39, align 4 + %19 = add nsw i32 %f.extent.0.required.s, 1 + store i32 %19, i32* %buf_extent31, align 4 + %buf_stride79 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 0 + store i32 1, i32* %buf_stride79, align 4 + %buf_min80 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 1 + store i32 0, i32* %buf_min80, align 4 + %buf_extent81 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 1 + store i32 0, i32* %buf_extent81, align 4 + %buf_stride82 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 1 + store i32 0, i32* %buf_stride82, align 4 + %buf_min83 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 2 + store i32 0, i32* %buf_min83, align 4 + %buf_extent84 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 2 + store i32 0, i32* %buf_extent84, align 4 + %buf_stride85 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 2 + store i32 0, i32* %buf_stride85, align 4 + %buf_min86 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 3 + store i32 0, i32* %buf_min86, align 4 + %buf_extent87 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 3 + store i32 0, i32* %buf_extent87, align 4 + %buf_stride88 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 3 + store i32 0, i32* %buf_stride88, align 4 + br label %destructor_block + +after_bb75: ; preds = %after_bb59 + %20 = or i1 %f.host_and_dev_are_null, %g.host_and_dev_are_null + br i1 %20, label %destructor_block, label %"produce res" + +"produce res": ; preds = %after_bb75 + %21 = ashr i32 %res.extent.0, 6 + %22 = icmp sgt i32 %21, 0 + br i1 %22, label %"for res.s0.x.x", label %"end for res.s0.x.x", !prof !4 + +"for res.s0.x.x": ; preds = %"for res.s0.x.x", %"produce res" + %res.s0.x.x = phi i32 [ %41, %"for res.s0.x.x" ], [ 0, %"produce res" ] + %23 = shl nsw i32 %res.s0.x.x, 6 + %24 = add nsw i32 %23, %res.min.0 + %25 = sub nsw i32 %24, %f.min.0 + %26 = getelementptr inbounds i8, i8* %f.host, i32 %25 + %27 = bitcast i8* %26 to <16 x i32>* + %28 = load <16 x i32>, <16 x i32>* %27, align 1, !tbaa !5 + %29 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %28) + %30 = sub nsw i32 %24, %g.min.0 + %31 = getelementptr inbounds i8, i8* %g.host, i32 %30 + %32 = bitcast i8* %31 to <16 x i32>* + %33 = load <16 x i32>, <16 x i32>* %32, align 1, !tbaa !8 + %34 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %33) + %35 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %29, <32 x i32> %34) + %36 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %35) + %37 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %35) + %38 = tail call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %36, <16 x i32> %37, i32 4) + %39 = getelementptr inbounds i8, i8* %res.host, i32 %23 + %40 = bitcast i8* %39 to <16 x i32>* + store <16 x i32> %38, <16 x i32>* %40, align 1, !tbaa !10 + %41 = add nuw nsw i32 %res.s0.x.x, 1 + %42 = icmp eq i32 %41, %21 + br i1 %42, label %"end for res.s0.x.x", label %"for res.s0.x.x" + +"end for res.s0.x.x": ; preds = %"for res.s0.x.x", %"produce res" + %43 = add nsw i32 %res.extent.0, 63 + %44 = ashr i32 %43, 6 + %45 = icmp sgt i32 %44, %21 + br i1 %45, label %"for res.s0.x.x92.preheader", label %destructor_block, !prof !4 + +"for res.s0.x.x92.preheader": ; preds = %"end for res.s0.x.x" + %46 = sub i32 -64, %f.min.0 + %47 = add i32 %46, %10 + %48 = getelementptr inbounds i8, i8* %f.host, i32 %47 + %49 = bitcast i8* %48 to <16 x i32>* + %50 = load <16 x i32>, <16 x i32>* %49, align 1 + %51 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %50) + %52 = sub i32 -64, %g.min.0 + %53 = add i32 %52, %10 + %54 = getelementptr inbounds i8, i8* %g.host, i32 %53 + %55 = bitcast i8* %54 to <16 x i32>* + %56 = load <16 x i32>, <16 x i32>* %55, align 1 + %57 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %56) + %58 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %51, <32 x i32> %57) + %59 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %58) + %60 = add nsw i32 %res.extent.0, -64 + %61 = getelementptr inbounds i8, i8* %res.host, i32 %60 + %62 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %58) + %63 = tail call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %62, <16 x i32> %59, i32 4) + %64 = bitcast i8* %61 to <16 x i32>* + store <16 x i32> %63, <16 x i32>* %64, align 1, !tbaa !10 + br label %destructor_block + +destructor_block: ; preds = %"for res.s0.x.x92.preheader", %"end for res.s0.x.x", %after_bb75, %after_bb75.thread + ret i32 0 +} + +; Function Attrs: nounwind readnone +declare <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32>, <32 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32>, <16 x i32>, i32) #1 + +attributes #0 = { norecurse nounwind } +attributes #1 = { nounwind readnone } + +!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0} +!llvm.module.flags = !{!1, !2, !3} + +!0 = !{!"Clang $LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR (based on LLVM 3.8.0)"} +!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0} +!2 = !{i32 2, !"halide_mcpu", !"hexagonv60"} +!3 = !{i32 2, !"halide_mattrs", !"+hvx"} +!4 = !{!"branch_weights", i32 1073741824, i32 0} +!5 = !{!6, !6, i64 0} +!6 = !{!"f", !7} +!7 = !{!"Halide buffer"} +!8 = !{!9, !9, i64 0} +!9 = !{!"g", !7} +!10 = !{!11, !11, i64 0} +!11 = !{!"res", !7} From fbee4fa427e8b0959a95654924714449c7e076ba Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Dec 2015 19:13:40 +0000 Subject: [PATCH 180/364] remove redundant check: optForSize() includes a check for the minsize attribute; NFCI git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254925 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86OptimizeLEAs.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp index 9171786707d8..da83c8ad8248 100644 --- a/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -294,11 +294,9 @@ bool OptimizeLEAPass::removeRedundantAddrCalc( bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - bool OptSize = MF.getFunction()->optForSize(); - bool MinSize = MF.getFunction()->optForMinSize(); // Perform this optimization only if we care about code size. - if (!OptSize && !MinSize) + if (!MF.getFunction()->optForSize()) return false; MRI = &MF.getRegInfo(); From 5d0d98f6ec23fc83ec1b4f0816bcba9d393db1c4 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Mon, 7 Dec 2015 19:21:11 +0000 Subject: [PATCH 181/364] [ThinLTO] Support for specifying function index from pass manager Summary: Add a field on the PassManagerBuilder that clang or gold can use to pass down a pointer to the function index in memory to use for importing when the ThinLTO backend is triggered. Add support to supply this to the function import pass. Reviewers: joker.eph, dexonsmith Subscribers: davidxl, llvm-commits, joker.eph Differential Revision: http://reviews.llvm.org/D15024 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254926 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/LinkAllPasses.h | 1 + include/llvm/Transforms/IPO.h | 5 +++ .../llvm/Transforms/IPO/PassManagerBuilder.h | 5 +++ lib/Transforms/IPO/FunctionImport.cpp | 39 +++++++++++++------ lib/Transforms/IPO/PassManagerBuilder.cpp | 5 +++ 5 files changed, 44 insertions(+), 11 deletions(-) diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index fbc112ba45b7..1b22d01a3a25 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -86,6 +86,7 @@ namespace { (void) llvm::createDomViewerPass(); (void) llvm::createGCOVProfilerPass(); (void) llvm::createInstrProfilingPass(); + (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerPass(); (void) llvm::createGlobalDCEPass(); diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 96ddc6eceed2..eabf0556babd 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -20,6 +20,7 @@ namespace llvm { +class FunctionInfoIndex; class ModulePass; class Pass; class Function; @@ -85,6 +86,10 @@ ModulePass *createEliminateAvailableExternallyPass(); ModulePass *createGVExtractionPass(std::vector& GVs, bool deleteFn = false); +//===----------------------------------------------------------------------===// +/// This pass performs iterative function importing from other modules. +ModulePass *createFunctionImportPass(FunctionInfoIndex *Index = nullptr); + //===----------------------------------------------------------------------===// /// createFunctionInliningPass - Return a new pass object that uses a heuristic /// to inline direct function calls to small functions. diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index ef01fa350531..70b785f9efa3 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -15,9 +15,11 @@ #ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H #define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H +#include #include namespace llvm { +class FunctionInfoIndex; class Pass; class TargetLibraryInfoImpl; class TargetMachine; @@ -114,6 +116,9 @@ class PassManagerBuilder { /// added to the per-module passes. Pass *Inliner; + /// The function summary index to use for function importing. + FunctionInfoIndex *FunctionIndex; + bool DisableTailCalls; bool DisableUnitAtATime; bool DisableUnrollLoops; diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index c2359a8a172e..67d77adb650a 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -256,23 +256,38 @@ getFunctionIndexForFile(StringRef Path, std::string &Error, /// Pass that performs cross-module function import provided a summary file. class FunctionImportPass : public ModulePass { + /// Optional function summary index to use for importing, otherwise + /// the summary-file option must be specified. + FunctionInfoIndex *Index; public: /// Pass identification, replacement for typeid static char ID; - explicit FunctionImportPass() : ModulePass(ID) {} + /// Specify pass name for debug output + const char *getPassName() const override { + return "Function Importing"; + } + + explicit FunctionImportPass(FunctionInfoIndex *Index = nullptr) + : ModulePass(ID), Index(Index) {} bool runOnModule(Module &M) override { - if (SummaryFile.empty()) { - report_fatal_error("error: -function-import requires -summary-file\n"); - } - std::string Error; - std::unique_ptr Index = - getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler); - if (!Index) { - errs() << "Error loading file '" << SummaryFile << "': " << Error << "\n"; - return false; + if (SummaryFile.empty() && !Index) + report_fatal_error("error: -function-import requires -summary-file or " + "file from frontend\n"); + std::unique_ptr IndexPtr; + if (!SummaryFile.empty()) { + if (Index) + report_fatal_error("error: -summary-file and index from frontend\n"); + std::string Error; + IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler); + if (!IndexPtr) { + errs() << "Error loading file '" << SummaryFile << "': " << Error + << "\n"; + return false; + } + Index = IndexPtr.get(); } // Perform the import now. @@ -293,5 +308,7 @@ INITIALIZE_PASS_END(FunctionImportPass, "function-import", "Summary Based Function Import", false, false) namespace llvm { -Pass *createFunctionImportPass() { return new FunctionImportPass(); } +Pass *createFunctionImportPass(FunctionInfoIndex *Index = nullptr) { + return new FunctionImportPass(Index); +} } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index ec6f21e8c64f..b8d1b7e78e35 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/Verifier.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" @@ -108,6 +109,7 @@ PassManagerBuilder::PassManagerBuilder() { SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; + FunctionIndex = nullptr; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -476,6 +478,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); + if (FunctionIndex) + PM.add(createFunctionImportPass(FunctionIndex)); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. From 322ee9e42128124fc21f69d0d1126bd74593b380 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Dec 2015 19:21:39 +0000 Subject: [PATCH 182/364] fix 'the the '; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254928 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/ValueTracking.h | 2 +- lib/Analysis/LoopAccessAnalysis.cpp | 2 +- lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 2 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 2 +- tools/dsymutil/DwarfLinker.cpp | 2 +- tools/llc/llc.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index eb2c000e07cd..8e0291068472 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -286,7 +286,7 @@ namespace llvm { /// Returns true if the result or effects of the given instructions \p I /// depend on or influence global memory. - /// Memory dependence arises for example if the the instruction reads from + /// Memory dependence arises for example if the instruction reads from /// memory or may produce effects or undefined behaviour. Memory dependent /// instructions generally cannot be reorderd with respect to other memory /// dependent instructions or moved into non-dominated basic blocks. diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 49b28078c976..b2670bf48dd8 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -289,7 +289,7 @@ void RuntimePointerChecking::groupChecks( // don't process them twice. SmallSet Seen; - // Go through all equivalence classes, get the the "pointer check groups" + // Go through all equivalence classes, get the "pointer check groups" // and add them to the overall solution. We use the order in which accesses // appear in 'Pointers' to enforce determinism. for (unsigned I = 0; I < Pointers.size(); ++I) { diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 64050824c10b..a8622a96527c 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -1592,7 +1592,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, // not use the other opcode as it is a legacy artifact of TD files. int64_t Value; if (MO.getExpr()->evaluateAsAbsolute(Value)) { - // if the the operand can fit within a 7:2 field + // if the operand can fit within a 7:2 field if (Value < (1 << 8) && Value >= -(1 << 8)) { SMLoc myLoc = Operands[2]->getStartLoc(); // # is left in startLoc in the case of ## diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3dc49337ecb5..2862468563c2 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1475,7 +1475,7 @@ bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const { } -// Return true if the the instruction is a compund branch instruction. +// Return true if the instruction is a compund branch instruction. bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const { return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch()); } diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp index 7ac6f8ed5e3d..e8877a5da894 100644 --- a/tools/dsymutil/DwarfLinker.cpp +++ b/tools/dsymutil/DwarfLinker.cpp @@ -2826,7 +2826,7 @@ void DwarfLinker::patchRangesForUnit(const CompileUnit &Unit, uint64_t OrigLowPc = OrigUnitDie->getAttributeValueAsAddress( &OrigUnit, dwarf::DW_AT_low_pc, -1ULL); // Ranges addresses are based on the unit's low_pc. Compute the - // offset we need to apply to adapt to the the new unit's low_pc. + // offset we need to apply to adapt to the new unit's low_pc. int64_t UnitPcOffset = 0; if (OrigLowPc != -1ULL) UnitPcOffset = int64_t(OrigLowPc) - Unit.getLowPc(); diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index 531aba1f64bf..bffa39fd9e5c 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -100,7 +100,7 @@ static cl::opt AsmVerbose("asm-verbose", static cl::opt CompileTwice("compile-twice", cl::Hidden, cl::desc("Run everything twice, re-using the same pass " - "manager and verify the the result is the same."), + "manager and verify the result is the same."), cl::init(false)); static int compileModule(char **, LLVMContext &); From 3317c77cea4b033fa0dc447e6b008dc1e9278094 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Dec 2015 19:31:34 +0000 Subject: [PATCH 183/364] don't repeat function names in comments; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254930 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.h | 53 +++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 6bdb07d1df04..fbec6626d99d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -23,54 +23,47 @@ class FunctionPass; class ImmutablePass; class X86TargetMachine; -/// createX86ISelDag - This pass converts a legalized DAG into a -/// X86-specific DAG, ready for instruction scheduling. -/// +/// This pass converts a legalized DAG into a X86-specific DAG, ready for +/// instruction scheduling. FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); -/// createX86GlobalBaseRegPass - This pass initializes a global base -/// register for PIC on x86-32. +/// This pass initializes a global base register for PIC on x86-32. FunctionPass* createX86GlobalBaseRegPass(); -/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses -/// to local-dynamic TLS variables so that the TLS base address for the module -/// is only fetched once per execution path through the function. +/// This pass combines multiple accesses to local-dynamic TLS variables so that +/// the TLS base address for the module is only fetched once per execution path +/// through the function. FunctionPass *createCleanupLocalDynamicTLSPass(); -/// createX86FloatingPointStackifierPass - This function returns a pass which -/// converts floating point register references and pseudo instructions into -/// floating point stack references and physical instructions. -/// +/// This function returns a pass which converts floating-point register +/// references and pseudo instructions into floating-point stack references and +/// physical instructions. FunctionPass *createX86FloatingPointStackifierPass(); -/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions -/// before each call to avoid transition penalty between functions encoded with -/// AVX and SSE. +/// This pass inserts AVX vzeroupper instructions before each call to avoid +/// transition penalty between functions encoded with AVX and SSE. FunctionPass *createX86IssueVZeroUpperPass(); -/// createX86PadShortFunctions - Return a pass that pads short functions -/// with NOOPs. This will prevent a stall when returning on the Atom. +/// Return a pass that pads short functions with NOOPs. +/// This will prevent a stall when returning on the Atom. FunctionPass *createX86PadShortFunctions(); -/// createX86FixupLEAs - Return a a pass that selectively replaces -/// certain instructions (like add, sub, inc, dec, some shifts, -/// and some multiplies) by equivalent LEA instructions, in order -/// to eliminate execution delays in some Atom processors. + +/// Return a a pass that selectively replaces certain instructions (like add, +/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA +/// instructions, in order to eliminate execution delays in some processors. FunctionPass *createX86FixupLEAs(); -/// createX86OptimizeLEAs() - Return a pass that removes redundant -/// address recalculations. +/// Return a pass that removes redundant address recalculations. FunctionPass *createX86OptimizeLEAs(); -/// createX86CallFrameOptimization - Return a pass that optimizes -/// the code-size of x86 call sequences. This is done by replacing -/// esp-relative movs with pushes. +/// Return a pass that optimizes the code-size of x86 call sequences. This is +/// done by replacing esp-relative movs with pushes. FunctionPass *createX86CallFrameOptimization(); -/// createX86WinEHStatePass - Return an IR pass that inserts EH registration -/// stack objects and explicit EH state updates. This pass must run after EH -/// preparation, which does Windows-specific but architecture-neutral -/// preparation. +/// Return an IR pass that inserts EH registration stack objects and explicit +/// EH state updates. This pass must run after EH preparation, which does +/// Windows-specific but architecture-neutral preparation. FunctionPass *createX86WinEHStatePass(); /// Return a Machine IR pass that expands X86-specific pseudo From a16511be95e9579b1a60497ae5fe70ac41b552b1 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Mon, 7 Dec 2015 19:53:38 +0000 Subject: [PATCH 184/364] Fix function return type in declaration (bot errors from r254926). Try to fix bot build errors from r254926 by correcting the function return type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254934 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/IPO.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index eabf0556babd..04032d3b328a 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -88,7 +88,7 @@ ModulePass *createGVExtractionPass(std::vector& GVs, bool //===----------------------------------------------------------------------===// /// This pass performs iterative function importing from other modules. -ModulePass *createFunctionImportPass(FunctionInfoIndex *Index = nullptr); +Pass *createFunctionImportPass(FunctionInfoIndex *Index = nullptr); //===----------------------------------------------------------------------===// /// createFunctionInliningPass - Return a new pass object that uses a heuristic From 7d9752d1d3cd23f31a258804ecbb7e04b9813865 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Dec 2015 20:36:00 +0000 Subject: [PATCH 185/364] Fix line endings git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254939 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx2-vbroadcast.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 186f50873650..a18a587e4a66 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -300,8 +300,8 @@ entry: define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { ; CHECK-LABEL: load_splat_8f32_4f32_33333333: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vmovaps (%rdi), %xmm0 -; CHECK-NEXT: vbroadcastss LCPI18_0(%rip), %ymm1 +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vbroadcastss LCPI18_0(%rip), %ymm1 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq entry: From 4e2c148ec86b66853f64326a7ee8e85afd479ae1 Mon Sep 17 00:00:00 2001 From: Kit Barton Date: Mon, 7 Dec 2015 20:50:29 +0000 Subject: [PATCH 186/364] [PPC64] Convert bool literals to i32 Convert i1 values to i32 values if they should be allocated in GPRs instead of CRs. Phabricator: http://reviews.llvm.org/D14064 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254942 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/CMakeLists.txt | 1 + lib/Target/PowerPC/PPC.h | 2 + lib/Target/PowerPC/PPCBoolRetToInt.cpp | 253 +++++++++++++++++++++++ lib/Target/PowerPC/PPCTargetMachine.cpp | 5 + test/CodeGen/PowerPC/BoolRetToIntTest.ll | 203 ++++++++++++++++++ 5 files changed, 464 insertions(+) create mode 100644 lib/Target/PowerPC/PPCBoolRetToInt.cpp create mode 100644 test/CodeGen/PowerPC/BoolRetToIntTest.ll diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index df6e08db8d35..c31ababafbe7 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -13,6 +13,7 @@ tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(PowerPCCommonTableGen) add_llvm_target(PowerPCCodeGen + PPCBoolRetToInt.cpp PPCAsmPrinter.cpp PPCBranchSelector.cpp PPCCTRLoops.cpp diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index e157fd37c6e1..a259ed3fd327 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -45,10 +45,12 @@ namespace llvm { FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCTLSDynamicCallPass(); + FunctionPass *createPPCBoolRetToIntPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); void initializePPCVSXFMAMutatePass(PassRegistry&); + void initializePPCBoolRetToIntPass(PassRegistry&); extern char &PPCVSXFMAMutateID; namespace PPCII { diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp new file mode 100644 index 000000000000..7920240bc2b9 --- /dev/null +++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -0,0 +1,253 @@ +//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting i1 values to i32 if they could be more +// profitably allocated as GPRs rather than CRs. This pass will become totally +// unnecessary if Register Bank Allocation and Global Instruction Selection ever +// go upstream. +// +// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// transitive closure of their uses includes only PHINodes, CallInsts, and +// ReturnInsts. The rational is that arguments are generally passed and returned +// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// actually save casts at the Machine Instruction level. +// +// It might be useful to expand this pass to add bit-wise operations to the list +// of safe transitive closure types. Also, we miss some opportunities when LLVM +// represents logical AND and OR operations with control flow rather than data +// flow. For example by lowering the expression: return (A && B && C) +// +// as: return A ? true : B && C. +// +// There's code in SimplifyCFG that code be used to turn control flow in data +// flow using SelectInsts. Selects are slow on some architectures (P7/P8), so +// this probably isn't good in general, but for the special case of i1, the +// Selects could be further lowered to bit operations that are fast everywhere. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +#define DEBUG_TYPE "bool-ret-to-int" + +STATISTIC(NumBoolRetPromotion, + "Number of times a bool feeding a RetInst was promoted to an int"); +STATISTIC(NumBoolCallPromotion, + "Number of times a bool feeding a CallInst was promoted to an int"); +STATISTIC(NumBoolToIntPromotion, + "Total number of times a bool was promoted to an int"); + +class PPCBoolRetToInt : public FunctionPass { + + static SmallPtrSet findAllDefs(Value *V) { + SmallPtrSet Defs; + SmallVector WorkList; + WorkList.push_back(V); + Defs.insert(V); + while (!WorkList.empty()) { + Value *Curr = WorkList.back(); + WorkList.pop_back(); + if (User *CurrUser = dyn_cast(Curr)) + for (auto &Op : CurrUser->operands()) + if (Defs.insert(Op).second) + WorkList.push_back(Op); + } + return Defs; + } + + // Translate a i1 value to an equivalent i32 value: + static Value *translate(Value *V) { + Type *Int32Ty = Type::getInt32Ty(V->getContext()); + if (Constant *C = dyn_cast(V)) + return ConstantExpr::getZExt(C, Int32Ty); + if (PHINode *P = dyn_cast(V)) { + // Temporarily set the operands to 0. We'll fix this later in + // runOnUse. + Value *Zero = Constant::getNullValue(Int32Ty); + PHINode *Q = + PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + for (unsigned i = 0; i < P->getNumOperands(); ++i) + Q->addIncoming(Zero, P->getIncomingBlock(i)); + return Q; + } + + Argument *A = dyn_cast(V); + Instruction *I = dyn_cast(V); + assert((A || I) && "Unknown value type"); + + auto InstPt = + A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); + return new ZExtInst(V, Int32Ty, "", InstPt); + } + + typedef SmallPtrSet PHINodeSet; + + // A PHINode is Promotable if: + // 1. Its type is i1 AND + // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic + // AND + // 3. All of its operands are Constant or Argument or + // CallInst or PHINode AND + // 4. All of its PHINode uses are Promotable AND + // 5. All of its PHINode operands are Promotable + static PHINodeSet getPromotablePHINodes(const Function &F) { + PHINodeSet Promotable; + // Condition 1 + for (auto &BB : F) + for (auto &I : BB) + if (const PHINode *P = dyn_cast(&I)) + if (P->getType()->isIntegerTy(1)) + Promotable.insert(P); + + SmallVector ToRemove; + for (const auto &P : Promotable) { + // Condition 2 and 3 + auto IsValidUser = [] (const Value *V) -> bool { + return isa(V) || isa(V) || isa(V) || + isa(V); + }; + auto IsValidOperand = [] (const Value *V) -> bool { + return isa(V) || isa(V) || isa(V) || + isa(V); + }; + const auto &Users = P->users(); + const auto &Operands = P->operands(); + if (!std::all_of(Users.begin(), Users.end(), IsValidUser) || + !std::all_of(Operands.begin(), Operands.end(), IsValidOperand)) + ToRemove.push_back(P); + } + + // Iterate to convergence + auto IsPromotable = [&Promotable] (const Value *V) -> bool { + const PHINode *Phi = dyn_cast(V); + return !Phi || Promotable.count(Phi); + }; + while (!ToRemove.empty()) { + for (auto &User : ToRemove) + Promotable.erase(User); + ToRemove.clear(); + + for (const auto &P : Promotable) { + // Condition 4 and 5 + const auto &Users = P->users(); + const auto &Operands = P->operands(); + if (!std::all_of(Users.begin(), Users.end(), IsPromotable) || + !std::all_of(Operands.begin(), Operands.end(), IsPromotable)) + ToRemove.push_back(P); + } + } + + return Promotable; + } + + typedef DenseMap B2IMap; + + public: + static char ID; + PPCBoolRetToInt() : FunctionPass(ID) { + initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) { + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); + B2IMap Bool2IntMap; + bool Changed = false; + for (auto &BB : F) { + for (auto &I : BB) { + if (ReturnInst *R = dyn_cast(&I)) + if (F.getReturnType()->isIntegerTy(1)) + Changed |= + runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap); + + if (CallInst *CI = dyn_cast(&I)) + for (auto &U : CI->operands()) + if (U->getType()->isIntegerTy(1)) + Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap); + } + } + + return Changed; + } + + static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + B2IMap &BoolToIntMap) { + auto Defs = findAllDefs(U); + + // If the values are all Constants or Arguments, don't bother + if (!std::any_of(Defs.begin(), Defs.end(), isa)) + return false; + + // Presently, we only know how to handle PHINode, Constant, and Arguments. + // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension + // could also be handled in the future. + for (const auto &V : Defs) + if (!isa(V) && !isa(V) && !isa(V)) + return false; + + for (const auto &V : Defs) + if (const PHINode *P = dyn_cast(V)) + if (!PromotablePHINodes.count(P)) + return false; + + if (isa(U.getUser())) + ++NumBoolRetPromotion; + if (isa(U.getUser())) + ++NumBoolCallPromotion; + ++NumBoolToIntPromotion; + + for (const auto &V : Defs) + if (!BoolToIntMap.count(V)) + BoolToIntMap[V] = translate(V); + + // Replace the operands of the translated instructions. There were set to + // zero in the translate function. + for (auto &Pair : BoolToIntMap) { + User *First = dyn_cast(Pair.first); + User *Second = dyn_cast(Pair.second); + assert((!First || Second) && "translated from user to non-user!?"); + if (First) + for (unsigned i = 0; i < First->getNumOperands(); ++i) + Second->setOperand(i, BoolToIntMap[First->getOperand(i)]); + } + + Value *IntRetVal = BoolToIntMap[U]; + Type *Int1Ty = Type::getInt1Ty(U->getContext()); + Instruction *I = cast(U.getUser()); + Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I); + U.set(BackToBool); + + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } +}; +} + +char PPCBoolRetToInt::ID = 0; +INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", + "Convert i1 constants to i32 if they are returned", + false, false) + +FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 24a9ef0ef077..946e0f10cddd 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -71,6 +71,9 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine A(ThePPC32Target); RegisterTargetMachine B(ThePPC64Target); RegisterTargetMachine C(ThePPC64LETarget); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializePPCBoolRetToIntPass(PR); } /// Return the datalayout string of a subtarget. @@ -286,6 +289,8 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { } void PPCPassConfig::addIRPasses() { + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createPPCBoolRetToIntPass()); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest.ll b/test/CodeGen/PowerPC/BoolRetToIntTest.ll new file mode 100644 index 000000000000..a7b79789b4ca --- /dev/null +++ b/test/CodeGen/PowerPC/BoolRetToIntTest.ll @@ -0,0 +1,203 @@ +; RUN: opt -bool-ret-to-int -S -o - < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK-LABEL: notBoolRet +define signext i32 @notBoolRet() { +entry: +; CHECK: ret i32 1 + ret i32 1 +} + +; CHECK-LABEL: find +define zeroext i1 @find(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: retFalse +define zeroext i1 @retFalse() { +entry: +; CHECK: ret i1 false + ret i1 false +} + +; CHECK-LABEL: retCvtFalse +define zeroext i1 @retCvtFalse() { +entry: +; CHECK: ret i1 false + ret i1 trunc(i32 0 to i1) +} + +; CHECK-LABEL: find_cont +define void @find_cont(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) + ret void +} + +; CHECK-LABEL: find_cont_ret +define zeroext i1 @find_cont_ret(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: arg_operand +define zeroext i1 @arg_operand(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo ], [ %operand, %entry ] + ret i1 %result +} + +; CHECK-LABEL: bad_use +define zeroext i1 @bad_use(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo], [ true, %entry ] + %0 = icmp eq i1 %result, %operand + ret i1 %result +} + +; CHECK-LABEL: bad_use_closure +define zeroext i1 @bad_use_closure(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + %0 = icmp eq i1 %bar, %operand + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 [ true +; CHECK: ret i1 [[REG]] + %result = phi i1 [ true, %entry ], [ %bar, %foo] + ret i1 %result +} + +; CHECK-LABEL: arg_test +define zeroext i1 @arg_test(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + br label %cleanup + +; CHECK-LABEL: cleanup +cleanup: +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ %bar, %foo], [ %operand, %entry ] + ret i1 %result +} + +declare zeroext i1 @return_i1() + +; CHECK-LABEL: call_test +define zeroext i1 @call_test() { +; CHECK: [[REG:%.+]] = call i1 + %result = call i1 @return_i1() +; CHECK: ret i1 [[REG]] + ret i1 %result +} \ No newline at end of file From aa464aada41ae2b8ef9f3ef4147c6ef09d2879e2 Mon Sep 17 00:00:00 2001 From: Easwaran Raman Date: Mon, 7 Dec 2015 21:21:20 +0000 Subject: [PATCH 187/364] Use updated threshold for indirect call bonus When considering foo->bar inlining, if there is an indirect call in foo which gets resolved to a direct call (say baz), then we try to inline baz into bar with a threshold T and subtract max(T - Cost(bar->baz), 0) from Cost(foo->bar). This patch uses max(Threshold(bar->baz) - Cost(bar->baz)) instead, where Thresheld(bar->baz) could be different from T due to bonuses or subtractions. Threshold(bar->baz) - Cost(bar->baz) better represents the desirability of inlining baz into bar. Differential Revision: http://reviews.llvm.org/D14309 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254945 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InlineCost.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 6d7d74999061..cebc8731d4d3 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -834,8 +834,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the - // bonus we want to apply, but don't go below zero. - Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } return Base::visitCallSite(CS); From 3f8a9448c501898a912830554762408460b9a61d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 7 Dec 2015 21:27:15 +0000 Subject: [PATCH 188/364] [EarlyCSE] Simplify and invert ParseMemoryInst [NFCI] Restructure ParseMemoryInst - which was introduced to abstract over target specific load and stores instructions - to just query the underlying instructions. In theory, this could be slightly slower than caching the results, but in practice, it's very unlikely to be measurable. The simple query scheme makes it far easier to understand, and much easier to extend with new queries. Given I'm about to need to add new query types, doing the cleanup first seemed worthwhile. Do we still believe the target specific intrinsic handling is worthwhile in EarlyCSE? It adds quite a bit of complexity and makes the code harder to read. Being able to delete the abstraction entirely would be wonderful. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254950 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/EarlyCSE.cpp | 89 +++++++++++++++--------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index b055044ba6d0..4c28d4bc5f7d 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -388,57 +388,58 @@ class EarlyCSE { class ParseMemoryInst { public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) - : Load(false), Store(false), IsSimple(true), MayReadFromMemory(false), - MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) { - MayReadFromMemory = Inst->mayReadFromMemory(); - MayWriteToMemory = Inst->mayWriteToMemory(); - if (IntrinsicInst *II = dyn_cast(Inst)) { - MemIntrinsicInfo Info; - if (!TTI.getTgtMemIntrinsic(II, Info)) - return; - if (Info.NumMemRefs == 1) { - Store = Info.WriteMem; - Load = Info.ReadMem; - MatchingId = Info.MatchingId; - MayReadFromMemory = Info.ReadMem; - MayWriteToMemory = Info.WriteMem; - IsSimple = Info.IsSimple; - Ptr = Info.PtrVal; - } - } else if (LoadInst *LI = dyn_cast(Inst)) { - Load = true; - IsSimple = LI->isSimple(); - Ptr = LI->getPointerOperand(); + : IsTargetMemInst(false), Inst(Inst) { + if (IntrinsicInst *II = dyn_cast(Inst)) + if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1) + IsTargetMemInst = true; + } + bool isLoad() const { + if (IsTargetMemInst) return Info.ReadMem; + return isa(Inst); + } + bool isStore() const { + if (IsTargetMemInst) return Info.WriteMem; + return isa(Inst); + } + bool isSimple() const { + if (IsTargetMemInst) return Info.IsSimple; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isSimple(); } else if (StoreInst *SI = dyn_cast(Inst)) { - Store = true; - IsSimple = SI->isSimple(); - Ptr = SI->getPointerOperand(); + return SI->isSimple(); } + return Inst->isAtomic(); } - bool isLoad() const { return Load; } - bool isStore() const { return Store; } - bool isSimple() const { return IsSimple; } bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { - return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId; + return (getPointerOperand() == Inst.getPointerOperand() && + getMatchingId() == Inst.getMatchingId()); } - bool isValid() const { return Ptr != nullptr; } - int getMatchingId() const { return MatchingId; } - Value *getPtr() const { return Ptr; } - bool mayReadFromMemory() const { return MayReadFromMemory; } - bool mayWriteToMemory() const { return MayWriteToMemory; } + bool isValid() const { return getPointerOperand() != nullptr; } - private: - bool Load; - bool Store; - bool IsSimple; - bool MayReadFromMemory; - bool MayWriteToMemory; // For regular (non-intrinsic) loads/stores, this is set to -1. For // intrinsic loads/stores, the id is retrieved from the corresponding // field in the MemIntrinsicInfo structure. That field contains // non-negative values only. - int MatchingId; - Value *Ptr; + int getMatchingId() const { + if (IsTargetMemInst) return Info.MatchingId; + return -1; + } + Value *getPointerOperand() const { + if (IsTargetMemInst) return Info.PtrVal; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->getPointerOperand(); + } + return nullptr; + } + bool mayReadFromMemory() const { return Inst->mayReadFromMemory(); } + bool mayWriteToMemory() const { return Inst->mayWriteToMemory(); } + + private: + bool IsTargetMemInst; + MemIntrinsicInfo Info; + Instruction *Inst; }; bool processNode(DomTreeNode *Node); @@ -565,7 +566,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If we have an available version of this load, and if it is the right // generation, replace this instruction. - LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr()); + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && InVal.MatchingId == MemInst.getMatchingId()) { Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); @@ -583,7 +584,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Otherwise, remember that we have this instruction. AvailableLoads.insert( - MemInst.getPtr(), + MemInst.getPointerOperand(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); LastStore = nullptr; continue; @@ -659,7 +660,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // to non-volatile loads, so we don't have to check for volatility of // the store. AvailableLoads.insert( - MemInst.getPtr(), + MemInst.getPointerOperand(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); // Remember that this was the last normal store we saw for DSE. From 73036cee732953e35eef06d6eb3597ba084b8a4c Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 7 Dec 2015 21:28:22 +0000 Subject: [PATCH 189/364] Simplify the error handling a bit. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254952 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 8e0904a858bc..3e1416d8d506 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -1559,7 +1559,6 @@ bool ModuleLinker::linkModuleFlagsMetadata() { // Merge in the flags from the source module, and also collect its set of // requirements. - bool HasErr = false; for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) { MDNode *SrcOp = SrcModFlags->getOperand(I); ConstantInt *SrcBehavior = @@ -1597,8 +1596,8 @@ bool ModuleLinker::linkModuleFlagsMetadata() { // Diagnose inconsistent flags which both have override behavior. if (SrcBehaviorValue == Module::Override && SrcOp->getOperand(2) != DstOp->getOperand(2)) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting override values"); + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting override values"); } continue; } else if (SrcBehaviorValue == Module::Override) { @@ -1610,8 +1609,8 @@ bool ModuleLinker::linkModuleFlagsMetadata() { // Diagnose inconsistent merge behavior types. if (SrcBehaviorValue != DstBehaviorValue) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting behaviors"); + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting behaviors"); continue; } @@ -1630,8 +1629,8 @@ bool ModuleLinker::linkModuleFlagsMetadata() { case Module::Error: { // Emit an error if the values differ. if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting values"); + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting values"); } continue; } @@ -1676,13 +1675,13 @@ bool ModuleLinker::linkModuleFlagsMetadata() { MDNode *Op = Flags[Flag].first; if (!Op || Op->getOperand(2) != ReqValue) { - HasErr |= emitError("linking module flags '" + Flag->getString() + - "': does not have the required value"); + emitError("linking module flags '" + Flag->getString() + + "': does not have the required value"); continue; } } - return HasErr; + return HasError; } // This function returns true if the triples match. From 8d6e45b694ca4b18cdb3223f70145736fcd3cd54 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Mon, 7 Dec 2015 21:40:09 +0000 Subject: [PATCH 190/364] Update doc for C++ TLS calling convention. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254953 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 82b33557c128..ca0939e53575 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -407,12 +407,22 @@ added in the future: used by a future version of the ObjectiveC runtime and should be considered experimental at this time. "``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions + Clang generates an access function to access C++-style TLS. The access + function generally has an entry block, an exit block and an initialization + block that is run at the first time. The entry and exit blocks can access + a few TLS IR variables, each access will be lowered to a platform-specific + sequence. + This calling convention aims to minimize overhead in the caller by - preserving as many registers as possible. This calling convention behaves - identical to the `C` calling convention on how arguments and return values - are passed, but it uses a different set of caller/callee-saved registers. - Given that C-style TLS on Darwin has its own special CSRs, we can't use the - existing `PreserveMost`. + preserving as many registers as possible (all the registers that are + perserved on the fast path, composed of the entry and exit blocks). + + This calling convention behaves identical to the `C` calling convention on + how arguments and return values are passed, but it uses a different set of + caller/callee-saved registers. + + Given that each platform has its own lowering sequence, hence its own set + of preserved registers, we can't use the existing `PreserveMost`. - On X86-64 the callee preserves all general purpose registers, except for RDI and RAX. From 1440f53307347d3f3ee6da62b21e00cfd4cc968a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 7 Dec 2015 21:41:29 +0000 Subject: [PATCH 191/364] Revert 254950 It's causing test failures on AArch64. Due to a bad build config on my part, I apparently wasn't running the tests I thought I was. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254954 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/EarlyCSE.cpp | 89 +++++++++++++++--------------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 4c28d4bc5f7d..b055044ba6d0 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -388,58 +388,57 @@ class EarlyCSE { class ParseMemoryInst { public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) - : IsTargetMemInst(false), Inst(Inst) { - if (IntrinsicInst *II = dyn_cast(Inst)) - if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1) - IsTargetMemInst = true; - } - bool isLoad() const { - if (IsTargetMemInst) return Info.ReadMem; - return isa(Inst); - } - bool isStore() const { - if (IsTargetMemInst) return Info.WriteMem; - return isa(Inst); - } - bool isSimple() const { - if (IsTargetMemInst) return Info.IsSimple; - if (LoadInst *LI = dyn_cast(Inst)) { - return LI->isSimple(); + : Load(false), Store(false), IsSimple(true), MayReadFromMemory(false), + MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) { + MayReadFromMemory = Inst->mayReadFromMemory(); + MayWriteToMemory = Inst->mayWriteToMemory(); + if (IntrinsicInst *II = dyn_cast(Inst)) { + MemIntrinsicInfo Info; + if (!TTI.getTgtMemIntrinsic(II, Info)) + return; + if (Info.NumMemRefs == 1) { + Store = Info.WriteMem; + Load = Info.ReadMem; + MatchingId = Info.MatchingId; + MayReadFromMemory = Info.ReadMem; + MayWriteToMemory = Info.WriteMem; + IsSimple = Info.IsSimple; + Ptr = Info.PtrVal; + } + } else if (LoadInst *LI = dyn_cast(Inst)) { + Load = true; + IsSimple = LI->isSimple(); + Ptr = LI->getPointerOperand(); } else if (StoreInst *SI = dyn_cast(Inst)) { - return SI->isSimple(); + Store = true; + IsSimple = SI->isSimple(); + Ptr = SI->getPointerOperand(); } - return Inst->isAtomic(); } + bool isLoad() const { return Load; } + bool isStore() const { return Store; } + bool isSimple() const { return IsSimple; } bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { - return (getPointerOperand() == Inst.getPointerOperand() && - getMatchingId() == Inst.getMatchingId()); + return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId; } - bool isValid() const { return getPointerOperand() != nullptr; } + bool isValid() const { return Ptr != nullptr; } + int getMatchingId() const { return MatchingId; } + Value *getPtr() const { return Ptr; } + bool mayReadFromMemory() const { return MayReadFromMemory; } + bool mayWriteToMemory() const { return MayWriteToMemory; } + private: + bool Load; + bool Store; + bool IsSimple; + bool MayReadFromMemory; + bool MayWriteToMemory; // For regular (non-intrinsic) loads/stores, this is set to -1. For // intrinsic loads/stores, the id is retrieved from the corresponding // field in the MemIntrinsicInfo structure. That field contains // non-negative values only. - int getMatchingId() const { - if (IsTargetMemInst) return Info.MatchingId; - return -1; - } - Value *getPointerOperand() const { - if (IsTargetMemInst) return Info.PtrVal; - if (LoadInst *LI = dyn_cast(Inst)) { - return LI->getPointerOperand(); - } else if (StoreInst *SI = dyn_cast(Inst)) { - return SI->getPointerOperand(); - } - return nullptr; - } - bool mayReadFromMemory() const { return Inst->mayReadFromMemory(); } - bool mayWriteToMemory() const { return Inst->mayWriteToMemory(); } - - private: - bool IsTargetMemInst; - MemIntrinsicInfo Info; - Instruction *Inst; + int MatchingId; + Value *Ptr; }; bool processNode(DomTreeNode *Node); @@ -566,7 +565,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If we have an available version of this load, and if it is the right // generation, replace this instruction. - LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); + LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr()); if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && InVal.MatchingId == MemInst.getMatchingId()) { Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); @@ -584,7 +583,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Otherwise, remember that we have this instruction. AvailableLoads.insert( - MemInst.getPointerOperand(), + MemInst.getPtr(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); LastStore = nullptr; continue; @@ -660,7 +659,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // to non-volatile loads, so we don't have to check for volatility of // the store. AvailableLoads.insert( - MemInst.getPointerOperand(), + MemInst.getPtr(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); // Remember that this was the last normal store we saw for DSE. From 3ff1b160f82fb7887a77f41d5c1f47d9e9126af9 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 7 Dec 2015 22:27:19 +0000 Subject: [PATCH 192/364] Remove useless hack that avoids calling LLVMLinkInInterpreter() This is supposed to force-link the Interpreter, by inserting a dead call to LLVMLinkInInterpreter(). Since it is actually an empty function, there is no reason for the call to be dead. From: Mehdi Amini git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254956 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ExecutionEngine/Interpreter.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h index f49d0c487fe9..a14707840ad8 100644 --- a/include/llvm/ExecutionEngine/Interpreter.h +++ b/include/llvm/ExecutionEngine/Interpreter.h @@ -16,22 +16,12 @@ #define LLVM_EXECUTIONENGINE_INTERPRETER_H #include "llvm/ExecutionEngine/ExecutionEngine.h" -#include extern "C" void LLVMLinkInInterpreter(); namespace { struct ForceInterpreterLinking { - ForceInterpreterLinking() { - // We must reference the interpreter in such a way that compilers will not - // delete it all as dead code, even with whole program optimization, - // yet is effectively a NO-OP. As the compiler isn't smart enough - // to know that getenv() never returns -1, this will do the job. - if (std::getenv("bar") != (char*) -1) - return; - - LLVMLinkInInterpreter(); - } + ForceInterpreterLinking() { LLVMLinkInInterpreter(); } } ForceInterpreterLinking; } From 0890b95b60c353074d8d3a2e11906e66282e30da Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 7 Dec 2015 22:41:23 +0000 Subject: [PATCH 193/364] Reapply 254950 w/fix 254950 ended up being not NFC. The previous code was overriding the flags for whether an instruction read or wrote memory using the target specific flags returned via TTI. I'd missed this in my refactoring. Since I mistakenly built only x86 and didn't notice the number of unsupported tests, I didn't catch that before the original checkin. This raises an interesting issue though. Given we have function attributes (i.e. readonly, readnone, argmemonly) which describe the aliasing of intrinsics, why does TTI have this information overriding the instruction definition at all? I see no reason for this, but decided to preserve existing behavior for the moment. The root issue might be that we don't have a "writeonly" attribute. Original commit message: [EarlyCSE] Simplify and invert ParseMemoryInst [NFCI] Restructure ParseMemoryInst - which was introduced to abstract over target specific load and stores instructions - to just query the underlying instructions. In theory, this could be slightly slower than caching the results, but in practice, it's very unlikely to be measurable. The simple query scheme makes it far easier to understand, and much easier to extend with new queries. Given I'm about to need to add new query types, doing the cleanup first seemed worthwhile. Do we still believe the target specific intrinsic handling is worthwhile in EarlyCSE? It adds quite a bit of complexity and makes the code harder to read. Being able to delete the abstraction entirely would be wonderful. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254957 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/LegacyPassManager.cpp | 6 ++ lib/Transforms/Scalar/EarlyCSE.cpp | 95 ++++++++++++++++-------------- 2 files changed, 57 insertions(+), 44 deletions(-) diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index f2e0c7d32c02..3fa11f0d51ae 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -589,6 +589,12 @@ AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { if (auto *N = UniqueAnalysisUsages.FindNodeOrInsertPos(ID, IP)) Node = N; else { +#if 0 + dbgs() << AU.getRequiredSet().size() << " " + << AU.getRequiredTransitiveSet().size() << " " + << AU.getPreservedSet().size() << " " + << AU.getUsedSet().size() << "\n"; +#endif Node = new (AUFoldingSetNodeAllocator.Allocate()) AUFoldingSetNode(AU); UniqueAnalysisUsages.InsertNode(Node, IP); } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index b055044ba6d0..7e3703de25e4 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -388,57 +388,64 @@ class EarlyCSE { class ParseMemoryInst { public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) - : Load(false), Store(false), IsSimple(true), MayReadFromMemory(false), - MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) { - MayReadFromMemory = Inst->mayReadFromMemory(); - MayWriteToMemory = Inst->mayWriteToMemory(); - if (IntrinsicInst *II = dyn_cast(Inst)) { - MemIntrinsicInfo Info; - if (!TTI.getTgtMemIntrinsic(II, Info)) - return; - if (Info.NumMemRefs == 1) { - Store = Info.WriteMem; - Load = Info.ReadMem; - MatchingId = Info.MatchingId; - MayReadFromMemory = Info.ReadMem; - MayWriteToMemory = Info.WriteMem; - IsSimple = Info.IsSimple; - Ptr = Info.PtrVal; - } - } else if (LoadInst *LI = dyn_cast(Inst)) { - Load = true; - IsSimple = LI->isSimple(); - Ptr = LI->getPointerOperand(); + : IsTargetMemInst(false), Inst(Inst) { + if (IntrinsicInst *II = dyn_cast(Inst)) + if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1) + IsTargetMemInst = true; + } + bool isLoad() const { + if (IsTargetMemInst) return Info.ReadMem; + return isa(Inst); + } + bool isStore() const { + if (IsTargetMemInst) return Info.WriteMem; + return isa(Inst); + } + bool isSimple() const { + if (IsTargetMemInst) return Info.IsSimple; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isSimple(); } else if (StoreInst *SI = dyn_cast(Inst)) { - Store = true; - IsSimple = SI->isSimple(); - Ptr = SI->getPointerOperand(); + return SI->isSimple(); } + return Inst->isAtomic(); } - bool isLoad() const { return Load; } - bool isStore() const { return Store; } - bool isSimple() const { return IsSimple; } bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { - return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId; + return (getPointerOperand() == Inst.getPointerOperand() && + getMatchingId() == Inst.getMatchingId()); } - bool isValid() const { return Ptr != nullptr; } - int getMatchingId() const { return MatchingId; } - Value *getPtr() const { return Ptr; } - bool mayReadFromMemory() const { return MayReadFromMemory; } - bool mayWriteToMemory() const { return MayWriteToMemory; } + bool isValid() const { return getPointerOperand() != nullptr; } - private: - bool Load; - bool Store; - bool IsSimple; - bool MayReadFromMemory; - bool MayWriteToMemory; // For regular (non-intrinsic) loads/stores, this is set to -1. For // intrinsic loads/stores, the id is retrieved from the corresponding // field in the MemIntrinsicInfo structure. That field contains // non-negative values only. - int MatchingId; - Value *Ptr; + int getMatchingId() const { + if (IsTargetMemInst) return Info.MatchingId; + return -1; + } + Value *getPointerOperand() const { + if (IsTargetMemInst) return Info.PtrVal; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->getPointerOperand(); + } + return nullptr; + } + bool mayReadFromMemory() const { + if (IsTargetMemInst) return Info.ReadMem; + return Inst->mayReadFromMemory(); + } + bool mayWriteToMemory() const { + if (IsTargetMemInst) return Info.WriteMem; + return Inst->mayWriteToMemory(); + } + + private: + bool IsTargetMemInst; + MemIntrinsicInfo Info; + Instruction *Inst; }; bool processNode(DomTreeNode *Node); @@ -565,7 +572,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If we have an available version of this load, and if it is the right // generation, replace this instruction. - LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr()); + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && InVal.MatchingId == MemInst.getMatchingId()) { Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); @@ -583,7 +590,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Otherwise, remember that we have this instruction. AvailableLoads.insert( - MemInst.getPtr(), + MemInst.getPointerOperand(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); LastStore = nullptr; continue; @@ -659,7 +666,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // to non-volatile loads, so we don't have to check for volatility of // the store. AvailableLoads.insert( - MemInst.getPtr(), + MemInst.getPointerOperand(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); // Remember that this was the last normal store we saw for DSE. From ba40ae0275ee81cf25c087abcf0dac035b7a4547 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 7 Dec 2015 22:43:56 +0000 Subject: [PATCH 194/364] Remove debug output that snuck into 254957 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254960 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/LegacyPassManager.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 3fa11f0d51ae..f2e0c7d32c02 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -589,12 +589,6 @@ AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { if (auto *N = UniqueAnalysisUsages.FindNodeOrInsertPos(ID, IP)) Node = N; else { -#if 0 - dbgs() << AU.getRequiredSet().size() << " " - << AU.getRequiredTransitiveSet().size() << " " - << AU.getPreservedSet().size() << " " - << AU.getUsedSet().size() << "\n"; -#endif Node = new (AUFoldingSetNodeAllocator.Allocate()) AUFoldingSetNode(AU); UniqueAnalysisUsages.InsertNode(Node, IP); } From 5b1522a8d1ff60a987137c8d1d585959e30d70d0 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 7 Dec 2015 23:07:16 +0000 Subject: [PATCH 195/364] Let llvm-lto installed. A few tests in clang/test are using it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254963 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-lto/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/llvm-lto/Makefile b/tools/llvm-lto/Makefile index f1801b4b20cc..f8ca7e1cac5d 100644 --- a/tools/llvm-lto/Makefile +++ b/tools/llvm-lto/Makefile @@ -14,6 +14,4 @@ LINK_COMPONENTS := lto ipo scalaropts linker bitreader bitwriter mcdisassembler # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 -NO_INSTALL := 1 - include $(LEVEL)/Makefile.common From d5841c9f1f378e13de5bb4eccffaf1c213662e11 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 7 Dec 2015 23:12:26 +0000 Subject: [PATCH 196/364] Support: Teach Asan about BumpPtrAllocator Based on patch by Pete Cooper. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254964 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/Allocator.h | 15 ++++++++++++++- include/llvm/Support/Compiler.h | 3 +++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h index f9b5cf22f97d..c608736fa956 100644 --- a/include/llvm/Support/Allocator.h +++ b/include/llvm/Support/Allocator.h @@ -222,6 +222,8 @@ class BumpPtrAllocatorImpl // Without this, MemorySanitizer messages for values originated from here // will point to the allocation of the entire slab. __msan_allocated_memory(AlignedPtr, Size); + // Similarly, tell ASan about this space. + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } @@ -229,12 +231,16 @@ class BumpPtrAllocatorImpl size_t PaddedSize = Size + Alignment - 1; if (PaddedSize > SizeThreshold) { void *NewSlab = Allocator.Allocate(PaddedSize, 0); + // We own the new slab and don't want anyone reading anyting other than + // pieces returned from this method. So poison the whole slab. + __asan_poison_memory_region(NewSlab, PaddedSize); CustomSizedSlabs.push_back(std::make_pair(NewSlab, PaddedSize)); uintptr_t AlignedAddr = alignAddr(NewSlab, Alignment); assert(AlignedAddr + Size <= (uintptr_t)NewSlab + PaddedSize); char *AlignedPtr = (char*)AlignedAddr; __msan_allocated_memory(AlignedPtr, Size); + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } @@ -246,13 +252,16 @@ class BumpPtrAllocatorImpl char *AlignedPtr = (char*)AlignedAddr; CurPtr = AlignedPtr + Size; __msan_allocated_memory(AlignedPtr, Size); + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } // Pull in base class overloads. using AllocatorBase::Allocate; - void Deallocate(const void * /*Ptr*/, size_t /*Size*/) {} + void Deallocate(const void *Ptr, size_t Size) { + __asan_poison_memory_region(Ptr, Size); + } // Pull in base class overloads. using AllocatorBase::Deallocate; @@ -310,6 +319,10 @@ class BumpPtrAllocatorImpl size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); void *NewSlab = Allocator.Allocate(AllocatedSlabSize, 0); + // We own the new slab and don't want anyone reading anything other than + // pieces returned from this method. So poison the whole slab. + __asan_poison_memory_region(NewSlab, AllocatedSlabSize); + Slabs.push_back(NewSlab); CurPtr = (char *)(NewSlab); End = ((char *)NewSlab) + AllocatedSlabSize; diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index 99a02f7ada03..b3416bbfffb6 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -361,8 +361,11 @@ /// \brief Whether LLVM itself is built with AddressSanitizer instrumentation. #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) # define LLVM_ADDRESS_SANITIZER_BUILD 1 +# include #else # define LLVM_ADDRESS_SANITIZER_BUILD 0 +# define __asan_poison_memory_region(p, size) +# define __asan_unpoison_memory_region(p, size) #endif /// \macro LLVM_THREAD_SANITIZER_BUILD From b69b6b56339cffdb675817ff4bab206254141bbe Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 7 Dec 2015 23:15:57 +0000 Subject: [PATCH 197/364] Stabilize llvm/test/Object/archive-update.test a bit. A manipulation (in this case, mkdir) can make slack between creating and touching %t.older/evenlen. I would make this rewrote with python if this were still unstable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254965 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Object/archive-update.test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/Object/archive-update.test b/test/Object/archive-update.test index ef1a71f60f43..fd1ea4113c39 100644 --- a/test/Object/archive-update.test +++ b/test/Object/archive-update.test @@ -7,12 +7,13 @@ Create a file named evenlen that is newer than the evenlen on the source dir. RUN: mkdir -p %t.older RUN: echo older > %t.older/evenlen +RUN: mkdir -p %t.newer + Either the shell supports the 'touch' command with a flag to manually set the mtime or we sleep for over two seconds so that the mtime is definitely observable. RUN: touch -m -t 200001010000 %t.older/evenlen || sleep 2.1 -RUN: mkdir -p %t.newer RUN: echo newer > %t.newer/evenlen RUN: touch %t.newer/evenlen From e826a2ef86c0e89c60d7a4fa792dbbb0358e030e Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 7 Dec 2015 23:32:39 +0000 Subject: [PATCH 198/364] Factor two calls to a common location. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254967 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 3e1416d8d506..a2bc95602210 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -451,8 +451,7 @@ class ModuleLinker { /// Handles cloning of a global values from the source module into /// the destination module, including setting the attributes and visibility. - GlobalValue *copyGlobalValueProto(const GlobalValue *SGV, - const GlobalValue *DGV, bool ForDefinition); + GlobalValue *copyGlobalValueProto(const GlobalValue *SGV, bool ForDefinition); /// Check if we should promote the given local value to global scope. bool doPromoteLocalToGlobal(const GlobalValue *SGV); @@ -816,7 +815,6 @@ void ModuleLinker::setVisibility(GlobalValue *NewGV, const GlobalValue *SGV, } GlobalValue *ModuleLinker::copyGlobalValueProto(const GlobalValue *SGV, - const GlobalValue *DGV, bool ForDefinition) { GlobalValue *NewGV; if (auto *SGVar = dyn_cast(SGV)) { @@ -842,7 +840,6 @@ GlobalValue *ModuleLinker::copyGlobalValueProto(const GlobalValue *SGV, NewGV->setLinkage(GlobalValue::ExternalWeakLinkage); copyGVAttributes(NewGV, SGV); - setVisibility(NewGV, SGV, DGV); return NewGV; } @@ -1364,8 +1361,6 @@ Constant *ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { GlobalValue *NewGV; if (!LinkFromSrc && DGV) { NewGV = DGV; - // When linking from source we setVisibility from copyGlobalValueProto. - setVisibility(NewGV, SGV, DGV); } else { // If we are done linking global value bodies (i.e. we are performing // metadata linking), don't link in the global value due to this @@ -1373,9 +1368,10 @@ Constant *ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { if (DoneLinkingBodies) return nullptr; - NewGV = copyGlobalValueProto(SGV, DGV, LinkFromSrc); + NewGV = copyGlobalValueProto(SGV, LinkFromSrc); } + setVisibility(NewGV, SGV, DGV); NewGV->setUnnamedAddr(HasUnnamedAddr); if (auto *NewGO = dyn_cast(NewGV)) { From afd3f07154466b8707b767bfb97ce5672a3a1f17 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Dec 2015 23:34:30 +0000 Subject: [PATCH 199/364] fix return values to match bool return type; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254968 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 10fb334c4c60..97a9646c3232 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3888,10 +3888,10 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) - return 0; + return false; ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); if (!NegC) - return 0; + return false; SDValue NegOp1 = Neg.getOperand(1); // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with From a187d6f327f3882f1925c95b7e9ac553d62d4a9f Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 8 Dec 2015 00:10:56 +0000 Subject: [PATCH 200/364] [PassManager] Tuning Memory Usage of AnalysisUsage We were using unneccessarily large initial sizes for these SmallVectors. This was wasting around 50kb of memory for the O3 pipeline, even after the uniquing changes. We're still using around 20kb which is a bit much, but it's definitely better. This is about a 6% improvement in total O3 memory usage. Note: The raw data on structure size which were used to pick these thresholds can be found in the review thread. Differential Revision: http://reviews.llvm.org/D15244 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254974 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/PassAnalysisSupport.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h index f6265b62cbf5..492a4ef464f8 100644 --- a/include/llvm/PassAnalysisSupport.h +++ b/include/llvm/PassAnalysisSupport.h @@ -36,11 +36,17 @@ namespace llvm { /// class AnalysisUsage { public: - typedef SmallVector VectorType; + typedef SmallVectorImpl VectorType; private: /// Sets of analyses required and preserved by a pass - VectorType Required, RequiredTransitive, Preserved, Used; + // TODO: It's not clear that SmallVector is an appropriate data structure for + // this usecase. The sizes were picked to minimize wasted space, but are + // otherwise fairly meaningless. + SmallVector Required; + SmallVector RequiredTransitive; + SmallVector Preserved; + SmallVector Used; bool PreservesAll; public: From c11338cf10a01b70961536250358b0dc6ebd4438 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 00:13:12 +0000 Subject: [PATCH 201/364] Add Instruction::getFunction; NFC Will be used in a upcoming patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254975 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/Instruction.h | 7 +++++++ lib/IR/Instruction.cpp | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index c7ba8721fe06..77ba87c6b664 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -66,6 +66,13 @@ class Instruction : public User, const Module *getModule() const; Module *getModule(); + /// \brief Return the function this instruction belongs to. + /// + /// Note: it is undefined behavior to call this on an instruction not + /// currently inserted into a function. + const Function *getFunction() const; + Function *getFunction(); + /// removeFromParent - This method unlinks 'this' from the containing basic /// block, but does not delete it. /// diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index b5a30a4969b3..7bd50328b126 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -62,6 +62,11 @@ Module *Instruction::getModule() { return getParent()->getModule(); } +Function *Instruction::getFunction() { return getParent()->getParent(); } + +const Function *Instruction::getFunction() const { + return getParent()->getParent(); +} void Instruction::removeFromParent() { getParent()->getInstList().remove(getIterator()); From 51d40aea3b6e74dea4b3c6680f00921441a30522 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 00:13:17 +0000 Subject: [PATCH 202/364] [SCEVExpander] Have hoistIVInc preserve LCSSA Summary: (Note: the problematic invocation of hoistIVInc that caused PR24804 came from IndVarSimplify, not from SCEVExpander itself) Fixes PR24804. Test case by David Majnemer. Reviewers: hfinkel, majnemer, atrick, mzolotukhin Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15058 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254976 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/LoopInfo.h | 73 +++++++++++++++++++++++ lib/Analysis/ScalarEvolutionExpander.cpp | 3 + test/Transforms/IndVarSimplify/pr24804.ll | 25 ++++++++ 3 files changed, 101 insertions(+) create mode 100644 test/Transforms/IndVarSimplify/pr24804.ll diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 9196250233cd..616d6ad1761a 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -37,6 +37,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include @@ -681,6 +682,78 @@ class LoopInfo : public LoopInfoBase { // it as a replacement will not break LCSSA form. return ToLoop->contains(getLoopFor(From->getParent())); } + + /// \brief Checks if moving a specific instruction can break LCSSA in any + /// loop. + /// + /// Return true if moving \p Inst to before \p NewLoc will break LCSSA, + /// assuming that the function containing \p Inst and \p NewLoc is currently + /// in LCSSA form. + bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) { + assert(Inst->getFunction() == NewLoc->getFunction() && + "Can't reason about IPO!"); + + auto *OldBB = Inst->getParent(); + auto *NewBB = NewLoc->getParent(); + + // Movement within the same loop does not break LCSSA (the equality check is + // to avoid doing a hashtable lookup in case of intra-block movement). + if (OldBB == NewBB) + return true; + + auto *OldLoop = getLoopFor(OldBB); + auto *NewLoop = getLoopFor(NewBB); + + if (OldLoop == NewLoop) + return true; + + // Check if Outer contains Inner; with the null loop counting as the + // "outermost" loop. + auto Contains = [](const Loop *Outer, const Loop *Inner) { + return !Outer || Outer->contains(Inner); + }; + + // To check that the movement of Inst to before NewLoc does not break LCSSA, + // we need to check two sets of uses for possible LCSSA violations at + // NewLoc: the users of NewInst, and the operands of NewInst. + + // If we know we're hoisting Inst out of an inner loop to an outer loop, + // then the uses *of* Inst don't need to be checked. + + if (!Contains(NewLoop, OldLoop)) { + for (Use &U : Inst->uses()) { + auto *UI = cast(U.getUser()); + auto *UBB = isa(UI) ? cast(UI)->getIncomingBlock(U) + : UI->getParent(); + if (UBB != NewBB && getLoopFor(UBB) != NewLoop) + return false; + } + } + + // If we know we're sinking Inst from an outer loop into an inner loop, then + // the *operands* of Inst don't need to be checked. + + if (!Contains(OldLoop, NewLoop)) { + // See below on why we can't handle phi nodes here. + if (isa(Inst)) + return false; + + for (Use &U : Inst->operands()) { + auto *DefI = dyn_cast(U.get()); + if (!DefI) + return false; + + // This would need adjustment if we allow Inst to be a phi node -- the + // new use block won't simply be NewBB. + + auto *DefBlock = DefI->getParent(); + if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop) + return false; + } + } + + return true; + } }; // Allow clients to walk the list of nested loops... diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 8c5805e9d168..abfcfbafb32e 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -933,6 +933,9 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) return false; + if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) + return false; + // Check that the chain of IV operands leading back to Phi can be hoisted. SmallVector IVIncs; for(;;) { diff --git a/test/Transforms/IndVarSimplify/pr24804.ll b/test/Transforms/IndVarSimplify/pr24804.ll new file mode 100644 index 000000000000..6f89481853ad --- /dev/null +++ b/test/Transforms/IndVarSimplify/pr24804.ll @@ -0,0 +1,25 @@ +; RUN: opt -indvars -loop-idiom -loop-deletion -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Checking for a crash + +define void @f(i32* %a) { +; CHECK-LABEL: @f( +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %for.cond, %entry + %iv = phi i32 [ 0, %entry ], [ %add, %for.inc ], [ %iv, %for.cond ] + %add = add nsw i32 %iv, 1 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom + br i1 undef, label %for.cond, label %for.inc + +for.inc: ; preds = %for.cond + br i1 undef, label %for.cond, label %for.end + +for.end: ; preds = %for.inc + ret void +} From ca40161d4a0f48fcb5c60d984ba1bc774dc0a45c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 00:13:21 +0000 Subject: [PATCH 203/364] [IndVars] Have getInsertPointForUses preserve LCSSA Summary: Also add a stricter post-condition for IndVarSimplify. Fixes PR25578. Test case by Michael Zolotukhin. Reviewers: hfinkel, atrick, mzolotukhin Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15059 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254977 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/LoopInfo.h | 3 ++ lib/Analysis/LoopInfo.cpp | 9 +++++ lib/Transforms/Scalar/IndVarSimplify.cpp | 42 ++++++++++++++------- test/Transforms/IndVarSimplify/pr25578.ll | 45 +++++++++++++++++++++++ 4 files changed, 85 insertions(+), 14 deletions(-) create mode 100644 test/Transforms/IndVarSimplify/pr25578.ll diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 616d6ad1761a..57695b46d640 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -402,6 +402,9 @@ class Loop : public LoopBase { /// isLCSSAForm - Return true if the Loop is in LCSSA form bool isLCSSAForm(DominatorTree &DT) const; + /// \brief Return true if this Loop and all inner subloops are in LCSSA form. + bool isRecursivelyLCSSAForm(DominatorTree &DT) const; + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index e679b7ad7b86..67a82b192e56 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -200,6 +200,15 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { return true; } +bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const { + if (!isLCSSAForm(DT)) + return false; + + return std::all_of(begin(), end(), [&](const Loop *L) { + return L->isRecursivelyLCSSAForm(DT); + }); +} + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 4ea92df9924f..308c8f8f7c6d 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; @@ -215,7 +216,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { /// loop. For PHI nodes, there may be multiple uses, so compute the nearest /// common dominator for the incoming blocks. static Instruction *getInsertPointForUses(Instruction *User, Value *Def, - DominatorTree *DT) { + DominatorTree *DT, LoopInfo *LI) { PHINode *PHI = dyn_cast(User); if (!PHI) return User; @@ -234,10 +235,21 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, InsertPt = InsertBB->getTerminator(); } assert(InsertPt && "Missing phi operand"); - assert((!isa(Def) || - DT->dominates(cast(Def), InsertPt)) && - "def does not dominate all uses"); - return InsertPt; + + auto *DefI = dyn_cast(Def); + if (!DefI) + return InsertPt; + + assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); + + auto *L = LI->getLoopFor(DefI->getParent()); + assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); + + for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) + if (LI->getLoopFor(DTN->getBlock()) == L) + return DTN->getBlock()->getTerminator(); + + llvm_unreachable("DefI dominates InsertPt!"); } //===----------------------------------------------------------------------===// @@ -528,8 +540,8 @@ Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { - // Verify the input to the pass in already in LCSSA form. - assert(L->isLCSSAForm(*DT)); + // Check a pre-condition. + assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -1167,10 +1179,11 @@ const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) { /// This IV user cannot be widen. Replace this use of the original narrow IV /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. -static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) { +static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " << *DU.NarrowUse << "\n"); - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + IRBuilder<> Builder( + getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); } @@ -1207,7 +1220,8 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { assert (CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + IRBuilder<> Builder( + getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); // Widen the other operand of the compare, if necessary. @@ -1229,7 +1243,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // After SimplifyCFG most loop exit targets have a single predecessor. // Otherwise fall back to a truncate within the loop. if (UsePhi->getNumOperands() != 1) - truncateIVUse(DU, DT); + truncateIVUse(DU, DT, LI); else { PHINode *WidePhi = PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", @@ -1297,7 +1311,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - truncateIVUse(DU, DT); + truncateIVUse(DU, DT, LI); return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to @@ -2165,9 +2179,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader(), TLI); + // Check a post-condition. - assert(L->isLCSSAForm(*DT) && - "Indvars did not leave the loop in lcssa form!"); + assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); // Verify that LFTR, and any other change have not interfered with SCEV's // ability to compute trip count. diff --git a/test/Transforms/IndVarSimplify/pr25578.ll b/test/Transforms/IndVarSimplify/pr25578.ll new file mode 100644 index 000000000000..bc648b517bbe --- /dev/null +++ b/test/Transforms/IndVarSimplify/pr25578.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -indvars -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: @foo +define void @foo() { +entry: + br label %L1_header + +L1_header: + br label %L2_header + +; CHECK: L2_header: +; CHECK: %[[INDVAR:.*]] = phi i64 +; CHECK: %[[TRUNC:.*]] = trunc i64 %[[INDVAR]] to i32 +L2_header: + %i = phi i32 [ 0, %L1_header ], [ %i_next, %L2_latch ] + %i_prom = sext i32 %i to i64 + br label %L3_header + +L3_header: + br i1 undef, label %L3_latch, label %L2_exiting_1 + +L3_latch: + br i1 undef, label %L3_header, label %L2_exiting_2 + +L2_exiting_1: + br i1 undef, label %L2_latch, label %L1_latch + +L2_exiting_2: + br i1 undef, label %L2_latch, label %L1_latch + +L2_latch: + %i_next = add nsw i32 %i, 1 + br label %L2_header + +L1_latch: +; CHECK: L1_latch: +; CHECK: %i_lcssa = phi i32 [ %[[TRUNC]], %L2_exiting_1 ], [ %[[TRUNC]], %L2_exiting_2 ] + + %i_lcssa = phi i32 [ %i, %L2_exiting_1 ], [ %i, %L2_exiting_2 ] + br i1 undef, label %exit, label %L1_header + +exit: + ret void +} From 23ae77267189ac95510c0915de0adc1f056f9427 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Tue, 8 Dec 2015 00:14:38 +0000 Subject: [PATCH 204/364] [CXX TLS calling convention] Add support for AArch64. rdar://9001553 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254978 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/AArch64CallingConvention.td | 9 +++ lib/Target/AArch64/AArch64RegisterInfo.cpp | 4 + test/CodeGen/AArch64/cxx-tlscc.ll | 77 +++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 test/CodeGen/AArch64/cxx-tlscc.ll diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 948b9ddb5df6..66d92100e637 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -279,6 +279,15 @@ def CSR_AArch64_TLS_Darwin FP, (sequence "Q%u", 0, 31))>; +// We can only handle a register pair with adjacent registers, the register pair +// should belong to the same class as well. Since the access function on the +// fast path calls a function that follows CSR_AArch64_TLS_Darwin, +// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. +def CSR_AArch64_CXX_TLS_Darwin + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), + (sequence "D%u", 0, 31))>; + // The ELF stub used for TLS-descriptor access saves every feasible // register. Only X0 and LR are clobbered. def CSR_AArch64_TLS_ELF diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 1aef31baad20..763b2337de12 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -46,6 +46,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AArch64_NoRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; + if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS) + return CSR_AArch64_CXX_TLS_Darwin_SaveList; else return CSR_AArch64_AAPCS_SaveList; } @@ -58,6 +60,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return CSR_AArch64_AllRegs_RegMask; + if (CC == CallingConv::CXX_FAST_TLS) + return CSR_AArch64_CXX_TLS_Darwin_RegMask; else return CSR_AArch64_AAPCS_RegMask; } diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll new file mode 100644 index 000000000000..39f6c0fbec94 --- /dev/null +++ b/test/CodeGen/AArch64/cxx-tlscc.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s +; Shrink wrapping currently does not kick in because we have a TLS CALL +; in the entry block and it will clobber the link register. + +%struct.S = type { i8 } + +@sg = internal thread_local global %struct.S zeroinitializer, align 1 +@__dso_handle = external global i8 +@__tls_guard = internal thread_local unnamed_addr global i1 false + +declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) +declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) +declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) + +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() { + %.b.i = load i1, i1* @__tls_guard, align 1 + br i1 %.b.i, label %__tls_init.exit, label %init.i + +init.i: + store i1 true, i1* @__tls_guard, align 1 + %call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg) + %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) + br label %__tls_init.exit + +__tls_init.exit: + ret %struct.S* @sg +} + +; CHECK-LABEL: _ZTW2sg +; CHECK-DAG: stp d31, d30 +; CHECK-DAG: stp d29, d28 +; CHECK-DAG: stp d27, d26 +; CHECK-DAG: stp d25, d24 +; CHECK-DAG: stp d23, d22 +; CHECK-DAG: stp d21, d20 +; CHECK-DAG: stp d19, d18 +; CHECK-DAG: stp d17, d16 +; CHECK-DAG: stp d7, d6 +; CHECK-DAG: stp d5, d4 +; CHECK-DAG: stp d3, d2 +; CHECK-DAG: stp d1, d0 +; CHECK-DAG: stp x20, x19 +; CHECK-DAG: stp x14, x13 +; CHECK-DAG: stp x12, x11 +; CHECK-DAG: stp x10, x9 +; CHECK-DAG: stp x8, x7 +; CHECK-DAG: stp x6, x5 +; CHECK-DAG: stp x4, x3 +; CHECK-DAG: stp x2, x1 +; CHECK-DAG: stp x29, x30 +; CHECK: blr +; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]] +; CHECK: blr +; CHECK: tlv_atexit +; CHECK: [[BB_end]]: +; CHECK: blr +; CHECK-DAG: ldp x2, x1 +; CHECK-DAG: ldp x4, x3 +; CHECK-DAG: ldp x6, x5 +; CHECK-DAG: ldp x8, x7 +; CHECK-DAG: ldp x10, x9 +; CHECK-DAG: ldp x12, x11 +; CHECK-DAG: ldp x14, x13 +; CHECK-DAG: ldp x20, x19 +; CHECK-DAG: ldp d1, d0 +; CHECK-DAG: ldp d3, d2 +; CHECK-DAG: ldp d5, d4 +; CHECK-DAG: ldp d7, d6 +; CHECK-DAG: ldp d17, d16 +; CHECK-DAG: ldp d19, d18 +; CHECK-DAG: ldp d21, d20 +; CHECK-DAG: ldp d23, d22 +; CHECK-DAG: ldp d25, d24 +; CHECK-DAG: ldp d27, d26 +; CHECK-DAG: ldp d29, d28 +; CHECK-DAG: ldp d31, d30 From 4609cb778ad34d55e05762b7159f3339c4054145 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 8 Dec 2015 02:29:45 +0000 Subject: [PATCH 205/364] Simplify test. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254987 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Linker/link-flags.ll | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/test/Linker/link-flags.ll b/test/Linker/link-flags.ll index d03503aa4548..c901b699575a 100644 --- a/test/Linker/link-flags.ll +++ b/test/Linker/link-flags.ll @@ -1,9 +1,7 @@ -; RUN: llvm-as %S/Inputs/linkage.b.ll -o %t.b.bc -; RUN: llvm-as %S/Inputs/linkage.c.ll -o %t.c.bc -; RUN: llvm-link -S %t.b.bc %t.c.bc | FileCheck %s -check-prefix=B -check-prefix=C -check-prefix=CU -; RUN: llvm-link -S -only-needed %t.b.bc %t.c.bc | FileCheck %s -check-prefix=B -check-prefix=C -check-prefix=CN -; RUN: llvm-link -S -internalize %t.b.bc %t.c.bc | FileCheck %s -check-prefix=B -check-prefix=CI -; RUN: llvm-link -S -internalize -only-needed %t.b.bc %t.c.bc | FileCheck %s -check-prefix=B -check-prefix=CN +; RUN: llvm-link -S %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=C -check-prefix=CU +; RUN: llvm-link -S -only-needed %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=C -check-prefix=CN +; RUN: llvm-link -S -internalize %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=CI +; RUN: llvm-link -S -internalize -only-needed %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=CN C-LABEL: @X = global i32 5 CI-LABEL: @X = internal global i32 5 From cbf2c65b9e200ee1b7827171773faa1e4c399fb0 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Tue, 8 Dec 2015 02:37:48 +0000 Subject: [PATCH 206/364] AsmPrinter: Use emitGlobalConstantFP to emit elements of constant data It's strange to duplicate the logic for emitting FP values into emitGlobalConstantDataSequential, and it's even stranger that we end up printing the verbose assembly comments differently between the two paths. Just call into emitGlobalConstantFP rather than crudely duplicating its logic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254988 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 20 ++---- test/CodeGen/ARM/constants.ll | 6 +- test/CodeGen/Mips/sitofp-selectcc-opt.ll | 3 +- .../X86/copysign-constant-magnitude.ll | 24 +++---- test/CodeGen/X86/fadd-combines.ll | 64 +++++++++---------- test/CodeGen/X86/fmul-combines.ll | 40 ++++++------ test/CodeGen/X86/vec_uint_to_fp-fastmath.ll | 16 ++--- test/CodeGen/X86/vec_uint_to_fp.ll | 8 +-- .../X86/x86-setcc-int-to-fp-combine.ll | 16 ++--- 9 files changed, 92 insertions(+), 105 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b8604240b5d9..f1f3547750b4 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1873,6 +1873,8 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, const Constant *BaseCV = nullptr, uint64_t Offset = 0); +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); + /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. @@ -1951,22 +1953,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, ElementByteSize); } } else { - // FP Constants are printed as integer constants to avoid losing precision. - for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { - APFloat Num = CDS->getElementAsAPFloat(I); - if (AP.isVerbose()) { - if (ElementByteSize == 4) - AP.OutStreamer->GetCommentOS() << "float " << Num.convertToFloat() - << '\n'; - else if (ElementByteSize == 8) - AP.OutStreamer->GetCommentOS() << "double " << Num.convertToDouble() - << '\n'; - else - llvm_unreachable("Unexpected float width"); - } - AP.OutStreamer->EmitIntValue(Num.bitcastToAPInt().getLimitedValue(), - ElementByteSize); - } + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) + emitGlobalConstantFP(cast(CDS->getElementAsConstant(I)), AP); } unsigned Size = DL.getTypeAllocSize(CDS->getType()); diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll index 3baa103e3d5d..75a90bbf0caa 100644 --- a/test/CodeGen/ARM/constants.ll +++ b/test/CodeGen/ARM/constants.ll @@ -63,7 +63,7 @@ define i32 @f8() nounwind { float 3.000000e+00> }, align 16 ; CHECK: const1 ; CHECK: .zero 16 -; CHECK: float 1.0 -; CHECK: float 2.0 -; CHECK: float 3.0 +; CHECK: float 1 +; CHECK: float 2 +; CHECK: float 3 ; CHECK: .zero 4 diff --git a/test/CodeGen/Mips/sitofp-selectcc-opt.ll b/test/CodeGen/Mips/sitofp-selectcc-opt.ll index c60fceb1a04c..751fba46d72f 100644 --- a/test/CodeGen/Mips/sitofp-selectcc-opt.ll +++ b/test/CodeGen/Mips/sitofp-selectcc-opt.ll @@ -7,7 +7,7 @@ entry: ; check that this transformation doesn't happen: ; (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) ; -; CHECK-NOT: # double -1.000000e+00 +; CHECK-NOT: # double -1 %tobool1 = icmp ne i32 %a, 0 %not.tobool = icmp ne i64 %b, 0 @@ -19,4 +19,3 @@ entry: store double %add, double* @foo12.d4, align 8 ret double %add } - diff --git a/test/CodeGen/X86/copysign-constant-magnitude.ll b/test/CodeGen/X86/copysign-constant-magnitude.ll index 537d6298ddf4..6c577a2cfcc7 100644 --- a/test/CodeGen/X86/copysign-constant-magnitude.ll +++ b/test/CodeGen/X86/copysign-constant-magnitude.ll @@ -5,13 +5,13 @@ target triple = "x86_64-apple-macosx10.10.0" define void @test_copysign_const_magnitude_d(double %X) { ; CHECK: [[SIGNMASK:L.+]]: -; CHECK-NEXT: .quad -9223372036854775808 ## double -0.000000e+00 -; CHECK-NEXT: .quad 0 ## double 0.000000e+00 +; CHECK-NEXT: .quad -9223372036854775808 ## double -0 +; CHECK-NEXT: .quad 0 ## double 0 ; CHECK: [[ZERO:L.+]]: ; CHECK-NEXT: .space 16 ; CHECK: [[ONE:L.+]]: -; CHECK-NEXT: .quad 4607182418800017408 ## double 1.000000e+00 -; CHECK-NEXT: .quad 0 ## double 0.000000e+00 +; CHECK-NEXT: .quad 4607182418800017408 ## double 1 +; CHECK-NEXT: .quad 0 ## double 0 ; CHECK-LABEL: test_copysign_const_magnitude_d: ; CHECK: id @@ -50,17 +50,17 @@ define void @test_copysign_const_magnitude_d(double %X) { define void @test_copysign_const_magnitude_f(float %X) { ; CHECK: [[SIGNMASK:L.+]]: -; CHECK-NEXT: .long 2147483648 ## float -0.000000e+00 -; CHECK-NEXT: .long 0 ## float 0.000000e+00 -; CHECK-NEXT: .long 0 ## float 0.000000e+00 -; CHECK-NEXT: .long 0 ## float 0.000000e+00 +; CHECK-NEXT: .long 2147483648 ## float -0 +; CHECK-NEXT: .long 0 ## float 0 +; CHECK-NEXT: .long 0 ## float 0 +; CHECK-NEXT: .long 0 ## float 0 ; CHECK: [[ZERO:L.+]]: ; CHECK-NEXT: .space 16 ; CHECK: [[ONE:L.+]]: -; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00 -; CHECK-NEXT: .long 0 ## float 0.000000e+00 -; CHECK-NEXT: .long 0 ## float 0.000000e+00 -; CHECK-NEXT: .long 0 ## float 0.000000e+00 +; CHECK-NEXT: .long 1065353216 ## float 1 +; CHECK-NEXT: .long 0 ## float 0 +; CHECK-NEXT: .long 0 ## float 0 +; CHECK-NEXT: .long 0 ## float 0 ; CHECK-LABEL: test_copysign_const_magnitude_f: ; CHECK: id diff --git a/test/CodeGen/X86/fadd-combines.ll b/test/CodeGen/X86/fadd-combines.ll index 6b389f4099c2..2df0e06dc252 100644 --- a/test/CodeGen/X86/fadd-combines.ll +++ b/test/CodeGen/X86/fadd-combines.ll @@ -28,10 +28,10 @@ define float @fadd_2const_f32(float %x) #0 { ret float %z } -; CHECK: float 5.000000e+00 -; CHECK: float 5.000000e+00 -; CHECK: float 5.000000e+00 -; CHECK: float 5.000000e+00 +; CHECK: float 5 +; CHECK: float 5 +; CHECK: float 5 +; CHECK: float 5 define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_2const_4f32: ; CHECK: # BB#0: @@ -53,10 +53,10 @@ define float @fadd_x_fmul_x_c_f32(float %x) #0 { ret float %z } -; CHECK: float 2.000000e+00 -; CHECK: float 3.000000e+00 -; CHECK: float 4.000000e+00 -; CHECK: float 5.000000e+00 +; CHECK: float 2 +; CHECK: float 3 +; CHECK: float 4 +; CHECK: float 5 define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_x_fmul_x_c_4f32: ; CHECK: # BB#0: @@ -78,10 +78,10 @@ define float @fadd_fmul_x_c_x_f32(float %x) #0 { ret float %z } -; CHECK: float 2.000000e+00 -; CHECK: float 3.000000e+00 -; CHECK: float 4.000000e+00 -; CHECK: float 5.000000e+00 +; CHECK: float 2 +; CHECK: float 3 +; CHECK: float 4 +; CHECK: float 5 define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fmul_x_c_x_4f32: ; CHECK: # BB#0: @@ -104,10 +104,10 @@ define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 { ret float %w } -; CHECK: float 3.000000e+00 -; CHECK: float 4.000000e+00 -; CHECK: float 5.000000e+00 -; CHECK: float 6.000000e+00 +; CHECK: float 3 +; CHECK: float 4 +; CHECK: float 5 +; CHECK: float 6 define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_4f32: ; CHECK: # BB#0: @@ -131,10 +131,10 @@ define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 { ret float %w } -; CHECK: float 3.000000e+00 -; CHECK: float 4.000000e+00 -; CHECK: float 5.000000e+00 -; CHECK: float 6.000000e+00 +; CHECK: float 3 +; CHECK: float 4 +; CHECK: float 5 +; CHECK: float 6 define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_4f32: ; CHECK: # BB#0: @@ -157,10 +157,10 @@ define float @fadd_x_fadd_x_x_f32(float %x) #0 { ret float %z } -; CHECK: float 3.000000e+00 -; CHECK: float 3.000000e+00 -; CHECK: float 3.000000e+00 -; CHECK: float 3.000000e+00 +; CHECK: float 3 +; CHECK: float 3 +; CHECK: float 3 +; CHECK: float 3 define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_x_fadd_x_x_4f32: ; CHECK: # BB#0: @@ -182,10 +182,10 @@ define float @fadd_fadd_x_x_x_f32(float %x) #0 { ret float %z } -; CHECK: float 3.000000e+00 -; CHECK: float 3.000000e+00 -; CHECK: float 3.000000e+00 -; CHECK: float 3.000000e+00 +; CHECK: float 3 +; CHECK: float 3 +; CHECK: float 3 +; CHECK: float 3 define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_x_4f32: ; CHECK: # BB#0: @@ -207,10 +207,10 @@ define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 { ret float %z } -; CHECK: float 4.000000e+00 -; CHECK: float 4.000000e+00 -; CHECK: float 4.000000e+00 -; CHECK: float 4.000000e+00 +; CHECK: float 4 +; CHECK: float 4 +; CHECK: float 4 +; CHECK: float 4 define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 { ; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_4f32: ; CHECK: # BB#0: diff --git a/test/CodeGen/X86/fmul-combines.ll b/test/CodeGen/X86/fmul-combines.ll index 42e538646ffe..564ce42fdb75 100644 --- a/test/CodeGen/X86/fmul-combines.ll +++ b/test/CodeGen/X86/fmul-combines.ll @@ -56,10 +56,10 @@ define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 { } ; We should be able to pre-multiply the two constant vectors. -; CHECK: float 5.000000e+00 -; CHECK: float 1.200000e+01 -; CHECK: float 2.100000e+01 -; CHECK: float 3.200000e+01 +; CHECK: float 5 +; CHECK: float 12 +; CHECK: float 21 +; CHECK: float 32 ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat: ; CHECK: mulps ; CHECK-NOT: mulps @@ -71,10 +71,10 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 { } ; Same as above, but reverse operands to make sure non-canonical form is also handled. -; CHECK: float 5.000000e+00 -; CHECK: float 1.200000e+01 -; CHECK: float 2.100000e+01 -; CHECK: float 3.200000e+01 +; CHECK: float 5 +; CHECK: float 12 +; CHECK: float 21 +; CHECK: float 32 ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical: ; CHECK: mulps ; CHECK-NOT: mulps @@ -87,10 +87,10 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) ; More than one use of a constant multiply should not inhibit the optimization. ; Instead of a chain of 2 dependent mults, this test will have 2 independent mults. -; CHECK: float 6.000000e+00 -; CHECK: float 1.400000e+01 -; CHECK: float 2.400000e+01 -; CHECK: float 3.600000e+01 +; CHECK: float 6 +; CHECK: float 14 +; CHECK: float 24 +; CHECK: float 36 ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use: ; CHECK: mulps ; CHECK: ret @@ -110,10 +110,10 @@ define <4 x float> @PR22698_splats(<4 x float> %a) #0 { %mul3 = fmul fast <4 x float> %a, %mul2 ret <4 x float> %mul3 -; CHECK: float 2.400000e+01 -; CHECK: float 2.400000e+01 -; CHECK: float 2.400000e+01 -; CHECK: float 2.400000e+01 +; CHECK: float 24 +; CHECK: float 24 +; CHECK: float 24 +; CHECK: float 24 ; CHECK-LABEL: PR22698_splats: ; CHECK: mulps ; CHECK: ret @@ -126,10 +126,10 @@ define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 { %mul3 = fmul fast <4 x float> %a, %mul2 ret <4 x float> %mul3 -; CHECK: float 4.500000e+01 -; CHECK: float 1.200000e+02 -; CHECK: float 2.310000e+02 -; CHECK: float 3.840000e+02 +; CHECK: float 45 +; CHECK: float 120 +; CHECK: float 231 +; CHECK: float 384 ; CHECK-LABEL: PR22698_no_splats: ; CHECK: mulps ; CHECK: ret diff --git a/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll index 0d67ac4bc25a..1f36d064f873 100644 --- a/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll +++ b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll @@ -14,10 +14,10 @@ ; CST-NEXT: .long 65535 # 0xffff ; CST: [[FPMASKCSTADDR:.LCPI[0-9_]+]]: -; CST-NEXT: .long 1199570944 # float 6.553600e+04 -; CST-NEXT: .long 1199570944 # float 6.553600e+04 -; CST-NEXT: .long 1199570944 # float 6.553600e+04 -; CST-NEXT: .long 1199570944 # float 6.553600e+04 +; CST-NEXT: .long 1199570944 # float 65536 +; CST-NEXT: .long 1199570944 # float 65536 +; CST-NEXT: .long 1199570944 # float 65536 +; CST-NEXT: .long 1199570944 # float 65536 ; AVX2: [[FPMASKCSTADDR:.LCPI[0-9_]+]]: ; AVX2-NEXT: .long 1199570944 # float 65536 @@ -69,10 +69,10 @@ define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) { ; AVX-NEXT: .long 65535 # 0xffff ; AVX: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]: -; AVX-NEXT: .long 1199570944 # float 6.553600e+04 -; AVX-NEXT: .long 1199570944 # float 6.553600e+04 -; AVX-NEXT: .long 1199570944 # float 6.553600e+04 -; AVX-NEXT: .long 1199570944 # float 6.553600e+04 +; AVX-NEXT: .long 1199570944 # float 65536 +; AVX-NEXT: .long 1199570944 # float 65536 +; AVX-NEXT: .long 1199570944 # float 65536 +; AVX-NEXT: .long 1199570944 # float 65536 ; AVX2: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]: ; AVX2-NEXT: .long 1199570944 # float 65536 diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll index 46cfcd9a9a12..ce0c11b2fa2a 100644 --- a/test/CodeGen/X86/vec_uint_to_fp.ll +++ b/test/CodeGen/X86/vec_uint_to_fp.ll @@ -23,10 +23,10 @@ ; CST-NEXT: .long 1392508928 ## 0x53000000 ; CST: [[MAGICCSTADDR:LCPI0_[0-9]+]]: -; CST-NEXT: .long 3539992704 ## float -5.497642e+11 -; CST-NEXT: .long 3539992704 ## float -5.497642e+11 -; CST-NEXT: .long 3539992704 ## float -5.497642e+11 -; CST-NEXT: .long 3539992704 ## float -5.497642e+11 +; CST-NEXT: .long 3539992704 ## float -5.49764202E+11 +; CST-NEXT: .long 3539992704 ## float -5.49764202E+11 +; CST-NEXT: .long 3539992704 ## float -5.49764202E+11 +; CST-NEXT: .long 3539992704 ## float -5.49764202E+11 ; AVX2: [[LOWCSTADDR:LCPI0_[0-9]+]]: ; AVX2-NEXT: .long 1258291200 ## 0x4b000000 diff --git a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll index 248a9202e997..99b27efe7f54 100644 --- a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll +++ b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll @@ -39,10 +39,10 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwin ; Also test the general purpose constant folding of int->fp. define void @foo2(<4 x float>* noalias %result) nounwind { ; CHECK-LABEL: LCPI2_0: -; CHECK-NEXT: .long 1082130432 ## float 4.000000e+00 -; CHECK-NEXT: .long 1084227584 ## float 5.000000e+00 -; CHECK-NEXT: .long 1086324736 ## float 6.000000e+00 -; CHECK-NEXT: .long 1088421888 ## float 7.000000e+00 +; CHECK-NEXT: .long 1082130432 ## float 4 +; CHECK-NEXT: .long 1084227584 ## float 5 +; CHECK-NEXT: .long 1086324736 ## float 6 +; CHECK-NEXT: .long 1088421888 ## float 7 ; CHECK-LABEL: foo2: ; CHECK: movaps LCPI2_0(%rip), %xmm0 @@ -72,10 +72,10 @@ define <4 x float> @foo3(<4 x float> %val, <4 x float> %test) nounwind { ; Test the general purpose constant folding of uint->fp. define void @foo4(<4 x float>* noalias %result) nounwind { ; CHECK-LABEL: LCPI4_0: -; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00 -; CHECK-NEXT: .long 1123942400 ## float 1.270000e+02 -; CHECK-NEXT: .long 1124073472 ## float 1.280000e+02 -; CHECK-NEXT: .long 1132396544 ## float 2.550000e+02 +; CHECK-NEXT: .long 1065353216 ## float 1 +; CHECK-NEXT: .long 1123942400 ## float 127 +; CHECK-NEXT: .long 1124073472 ## float 128 +; CHECK-NEXT: .long 1132396544 ## float 255 ; CHECK-LABEL: foo4: ; CHECK: movaps LCPI4_0(%rip), %xmm0 From fcb0893d8819bd6e1aecb360c49d5476b3b2ac24 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 8 Dec 2015 02:38:14 +0000 Subject: [PATCH 207/364] Add a test showing that we internalize lazily linked GVs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254989 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Linker/Inputs/internalize-lazy.ll | 8 ++++++++ test/Linker/internalize-lazy.ll | 4 ++++ 2 files changed, 12 insertions(+) create mode 100644 test/Linker/Inputs/internalize-lazy.ll create mode 100644 test/Linker/internalize-lazy.ll diff --git a/test/Linker/Inputs/internalize-lazy.ll b/test/Linker/Inputs/internalize-lazy.ll new file mode 100644 index 000000000000..43f9a7ab7455 --- /dev/null +++ b/test/Linker/Inputs/internalize-lazy.ll @@ -0,0 +1,8 @@ +define linkonce_odr void @g() { + ret void +} + +define void @f() { + call void @g() + ret void +} diff --git a/test/Linker/internalize-lazy.ll b/test/Linker/internalize-lazy.ll new file mode 100644 index 000000000000..480335927b51 --- /dev/null +++ b/test/Linker/internalize-lazy.ll @@ -0,0 +1,4 @@ +; RUN: llvm-link -S -internalize %s %p/Inputs/internalize-lazy.ll | FileCheck %s + +; CHECK: define internal void @f +; CHECK: define internal void @g From f2765767e6131c9b9b50acd53d58b54717fbd7c7 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Tue, 8 Dec 2015 02:45:59 +0000 Subject: [PATCH 208/364] [llvm-objdump/MachO] Don't cut'n'paste the same code over and over. Use the appropriate helper instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254990 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-objdump/MachODump.cpp | 28 ++++++++-------------------- tools/llvm-objdump/llvm-objdump.cpp | 2 +- tools/llvm-objdump/llvm-objdump.h | 1 + 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index b270057151e1..61567436a1cc 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -1495,11 +1495,8 @@ void llvm::ParseInputMachO(StringRef Filename) { printArchiveHeaders(A, !NonVerbose, ArchiveMemberOffsets); for (Archive::child_iterator I = A->child_begin(), E = A->child_end(); I != E; ++I) { - if (std::error_code EC = I->getError()) { - errs() << "llvm-objdump: '" << Filename << "': " << EC.message() - << ".\n"; - exit(1); - } + if (std::error_code EC = I->getError()) + report_error(Filename, EC); auto &C = I->get(); ErrorOr> ChildOrErr = C.getAsBinary(); if (ChildOrErr.getError()) @@ -1549,11 +1546,8 @@ void llvm::ParseInputMachO(StringRef Filename) { for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end(); AI != AE; ++AI) { - if (std::error_code EC = AI->getError()) { - errs() << "llvm-objdump: '" << Filename - << "': " << EC.message() << ".\n"; - exit(1); - } + if (std::error_code EC = AI->getError()) + report_error(Filename, EC); auto &C = AI->get(); ErrorOr> ChildOrErr = C.getAsBinary(); if (ChildOrErr.getError()) @@ -1597,11 +1591,8 @@ void llvm::ParseInputMachO(StringRef Filename) { for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end(); AI != AE; ++AI) { - if (std::error_code EC = AI->getError()) { - errs() << "llvm-objdump: '" << Filename << "': " << EC.message() - << ".\n"; - exit(1); - } + if (std::error_code EC = AI->getError()) + report_error(Filename, EC); auto &C = AI->get(); ErrorOr> ChildOrErr = C.getAsBinary(); if (ChildOrErr.getError()) @@ -1639,11 +1630,8 @@ void llvm::ParseInputMachO(StringRef Filename) { printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets); for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end(); AI != AE; ++AI) { - if (std::error_code EC = AI->getError()) { - errs() << "llvm-objdump: '" << Filename << "': " << EC.message() - << ".\n"; - exit(1); - } + if (std::error_code EC = AI->getError()) + report_error(Filename, EC); auto &C = AI->get(); ErrorOr> ChildOrErr = C.getAsBinary(); if (ChildOrErr.getError()) diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 069425429d16..a2c43e11a78e 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -252,7 +252,7 @@ void llvm::error(std::error_code EC) { exit(1); } -static void report_error(StringRef File, std::error_code EC) { +void llvm::report_error(StringRef File, std::error_code EC) { assert(EC); errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n"; exit(1); diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h index 3c5e7b365825..f74ed010d1d1 100644 --- a/tools/llvm-objdump/llvm-objdump.h +++ b/tools/llvm-objdump/llvm-objdump.h @@ -78,6 +78,7 @@ void PrintRelocations(const object::ObjectFile *o); void PrintSectionHeaders(const object::ObjectFile *o); void PrintSectionContents(const object::ObjectFile *o); void PrintSymbolTable(const object::ObjectFile *o); +void report_error(StringRef File, std::error_code EC); } // end namespace llvm From e32f0e20e5af6fe16869bde63c121b5571133550 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Tue, 8 Dec 2015 03:01:16 +0000 Subject: [PATCH 209/364] IR: Allow vectors of halfs to be ConstantDataVectors Currently, vectors of halfs end up as ConstantVectors, but there isn't a good reason they can't be ConstantDataVectors. This should save some memory. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254991 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Constants.cpp | 18 +++++++++++++++--- test/CodeGen/X86/float-asmprint.ll | 15 +++++++++++++++ unittests/IR/ConstantsTest.cpp | 28 ++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index b4a07a1b6b4a..509783fff8bd 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -899,7 +899,9 @@ static Constant *getSequenceIfElementsMatch(Constant *C, else if (CI->getType()->isIntegerTy(64)) return getIntSequenceIfElementsMatch(V); } else if (ConstantFP *CFP = dyn_cast(C)) { - if (CFP->getType()->isFloatTy()) + if (CFP->getType()->isHalfTy()) + return getFPSequenceIfElementsMatch(V); + else if (CFP->getType()->isFloatTy()) return getFPSequenceIfElementsMatch(V); else if (CFP->getType()->isDoubleTy()) return getFPSequenceIfElementsMatch(V); @@ -2365,7 +2367,7 @@ StringRef ConstantDataSequential::getRawDataValues() const { /// ConstantDataArray only works with normal float and int types that are /// stored densely in memory, not with things like i42 or x86_f80. bool ConstantDataSequential::isElementTypeCompatible(Type *Ty) { - if (Ty->isFloatTy() || Ty->isDoubleTy()) return true; + if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return true; if (auto *IT = dyn_cast(Ty)) { switch (IT->getBitWidth()) { case 8: @@ -2637,6 +2639,11 @@ Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { } if (ConstantFP *CFP = dyn_cast(V)) { + if (CFP->getType()->isHalfTy()) { + SmallVector Elts( + NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); + return getFP(V->getContext(), Elts); + } if (CFP->getType()->isFloatTy()) { SmallVector Elts( NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); @@ -2682,6 +2689,10 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { switch (getElementType()->getTypeID()) { default: llvm_unreachable("Accessor can only be used when element is float/double!"); + case Type::HalfTyID: { + auto EltVal = *reinterpret_cast(EltPtr); + return APFloat(APFloat::IEEEhalf, APInt(16, EltVal)); + } case Type::FloatTyID: { auto EltVal = *reinterpret_cast(EltPtr); return APFloat(APFloat::IEEEsingle, APInt(32, EltVal)); @@ -2716,7 +2727,8 @@ double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { /// Note that this has to compute a new constant to return, so it isn't as /// efficient as getElementAsInteger/Float/Double. Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { - if (getElementType()->isFloatTy() || getElementType()->isDoubleTy()) + if (getElementType()->isHalfTy() || getElementType()->isFloatTy() || + getElementType()->isDoubleTy()) return ConstantFP::get(getContext(), getElementAsAPFloat(Elt)); return ConstantInt::get(getElementType(), getElementAsInteger(Elt)); diff --git a/test/CodeGen/X86/float-asmprint.ll b/test/CodeGen/X86/float-asmprint.ll index 5de9700fc064..0108430ee93e 100644 --- a/test/CodeGen/X86/float-asmprint.ll +++ b/test/CodeGen/X86/float-asmprint.ll @@ -9,6 +9,8 @@ @var64 = global double -0.0, align 8 @var32 = global float -0.0, align 4 @var16 = global half -0.0, align 2 +@var4f32 = global <4 x float> +@var4f16 = global <4 x half> ; CHECK: var128: ; CHECK-NEXT: .quad 0 # fp128 -0 @@ -39,3 +41,16 @@ ; CHECK-NEXT: .short 32768 # half -0 ; CHECK-NEXT: .size +; CHECK: var4f32: +; CHECK-NEXT: .long 2147483648 # float -0 +; CHECK-NEXT: .long 0 # float 0 +; CHECK-NEXT: .long 1065353216 # float 1 +; CHECK-NEXT: .long 1073741824 # float 2 +; CHECK-NEXT: .size + +; CHECK: var4f16: +; CHECK-NEXT: .short 32768 # half -0 +; CHECK-NEXT: .short 0 # half 0 +; CHECK-NEXT: .short 15360 # half 1 +; CHECK-NEXT: .short 16384 # half 2 +; CHECK-NEXT: .size diff --git a/unittests/IR/ConstantsTest.cpp b/unittests/IR/ConstantsTest.cpp index 7741b448fa8d..8c33453d293d 100644 --- a/unittests/IR/ConstantsTest.cpp +++ b/unittests/IR/ConstantsTest.cpp @@ -382,5 +382,33 @@ TEST(ConstantsTest, AliasCAPI) { ASSERT_EQ(unwrap(AliasRef)->getAliasee(), Aliasee); } +static std::string getNameOfType(Type *T) { + std::string S; + raw_string_ostream RSOS(S); + T->print(RSOS); + return S; +} + +TEST(ConstantsTest, BuildConstantDataVectors) { + LLVMContext Context; + std::unique_ptr M(new Module("MyModule", Context)); + + for (Type *T : {Type::getInt8Ty(Context), Type::getInt16Ty(Context), + Type::getInt32Ty(Context), Type::getInt64Ty(Context)}) { + Constant *Vals[] = {ConstantInt::get(T, 0), ConstantInt::get(T, 1)}; + Constant *CDV = ConstantVector::get(Vals); + ASSERT_TRUE(dyn_cast(CDV) != nullptr) + << " T = " << getNameOfType(T); + } + + for (Type *T : {Type::getHalfTy(Context), Type::getFloatTy(Context), + Type::getDoubleTy(Context)}) { + Constant *Vals[] = {ConstantFP::get(T, 0), ConstantFP::get(T, 1)}; + Constant *CDV = ConstantVector::get(Vals); + ASSERT_TRUE(dyn_cast(CDV) != nullptr) + << " T = " << getNameOfType(T); + } +} + } // end anonymous namespace } // end namespace llvm From c2d82ef29f45abe3b75069b1fbdf9d0d9f8d8335 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Dec 2015 03:22:33 +0000 Subject: [PATCH 210/364] [WebAssembly] Remove the override of haveFastSqrt. The default implementation in BasicTTI already checks TLI and does the right thing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254993 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp | 5 ----- lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h | 1 - 2 files changed, 6 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index ea7044d58834..356631711921 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -25,8 +25,3 @@ WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); return TargetTransformInfo::PSK_FastHardware; } - -bool WebAssemblyTTIImpl::haveFastSqrt(Type *Ty) const { - assert(Ty->isFPOrFPVectorTy() && "Ty must be floating point"); - return true; -} diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 84f9f0e3e55e..26dc388cc922 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -55,7 +55,6 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { // TODO: Implement more Scalar TTI for WebAssembly TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; - bool haveFastSqrt(Type *Ty) const; /// @} From 579ccfd983f4deb9d2374176fbb0119d73f546e2 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Dec 2015 03:25:35 +0000 Subject: [PATCH 211/364] [WebAssembly] Trim some unneeded #includes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254994 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h | 1 - lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h | 1 - lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h | 2 -- 3 files changed, 4 deletions(-) diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index 20569da0b110..c585d44f0150 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -16,7 +16,6 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/CodeGen/MachineValueType.h" namespace llvm { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 6ef01ffade43..b13dd148adf3 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -16,7 +16,6 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #include "llvm/Support/DataTypes.h" -#include namespace llvm { diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 62c5f33cfad7..af4dabb2c6c3 100644 --- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -16,8 +16,6 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H -#include "WebAssemblyRegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { From 55a29f75fd81ea0a107fe3cfab9fe4000082cc9a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Dec 2015 03:30:42 +0000 Subject: [PATCH 212/364] [WebAssembly] Assert MRI.isSSA() in passes that depend on SSA form. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254995 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyRegStackify.cpp | 2 ++ lib/Target/WebAssembly/WebAssemblyStoreResults.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index ac016a7b9b0a..9fbde70634ac 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -127,6 +127,8 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { WebAssemblyFunctionInfo &MFI = *MF.getInfo(); AliasAnalysis &AA = getAnalysis().getAAResults(); + assert(MRI.isSSA() && "RegStackify depends on SSA form"); + // Walk the instructions from the bottom up. Currently we don't look past // block boundaries, and the blocks aren't ordered so the block visitation // order isn't significant, but we may want to change this in the future. diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index b67453bee708..21122ba2b2ea 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -72,6 +72,8 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineDominatorTree &MDT = getAnalysis(); + assert(MRI.isSSA() && "StoreResults depends on SSA form"); + for (auto &MBB : MF) { DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); for (auto &MI : MBB) From 1acb0660c303fc422ca4f5bfe7e8147941409fd6 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Dec 2015 03:33:51 +0000 Subject: [PATCH 213/364] [WebAssembly] Convert a file-level comment to doxygen style. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssembly.td | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td index 53dd9290348a..551ad9345154 100644 --- a/lib/Target/WebAssembly/WebAssembly.td +++ b/lib/Target/WebAssembly/WebAssembly.td @@ -6,10 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This is a target description file for the WebAssembly architecture, which is -// also known as "wasm". -// +/// +/// \file +/// \brief This is a target description file for the WebAssembly architecture, +/// which is also known as "wasm". +/// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// From 7710c66ebd1a584c72a803f6528e42cb892507b2 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Dec 2015 03:36:00 +0000 Subject: [PATCH 214/364] [WebAssembly] Fix an emacs syntax highlighting comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254997 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyInstrFormats.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td index 62a108c0d400..8008dd32353a 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -1,4 +1,4 @@ -// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-// +//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // From ca9fa31c8cddf8adbe019dbc12871a7b5703d0a4 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Dec 2015 03:42:50 +0000 Subject: [PATCH 215/364] [WebAssembly] Remove an unneeded static_cast. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254998 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 85fb753ed0e0..4883d83647db 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -593,8 +593,7 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, // the current frame pointer. DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true); unsigned FP = - static_cast(Subtarget->getRegisterInfo()) - ->getFrameRegister(DAG.getMachineFunction()); + Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1), From 4474471834be491ad1c6de19da0fa82bf667e70a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Dec 2015 03:43:03 +0000 Subject: [PATCH 216/364] [WebAssembly] Fix a typo in a comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254999 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/WebAssemblyRegStackify.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 9fbde70634ac..d890310ac501 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -15,7 +15,7 @@ /// are then marked as "stackified", meaning references to them are replaced by /// "push" and "pop" from the stack. /// -/// This is primarily a code size optimiation, since temporary values on the +/// This is primarily a code size optimization, since temporary values on the /// expression don't need to be named. /// //===----------------------------------------------------------------------===// From 43a68c82e51919cc6d38187f95d30fef456e6ad7 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 03:50:32 +0000 Subject: [PATCH 217/364] [OperandBundles] Remove unncessary constructor The StringRef constructor is unnecessary (since we're converting to std::string anyway), and having it requires an explicit call to StringRef's or std::string's constructor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255000 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/InstrTypes.h | 3 --- lib/Transforms/Utils/InlineFunction.cpp | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index 157cb27cefbb..2a0927266656 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -1167,9 +1167,6 @@ template class OperandBundleDefT { std::vector Inputs; public: - explicit OperandBundleDefT(StringRef Tag, std::vector Inputs) - : Tag(Tag), Inputs(std::move(Inputs)) {} - explicit OperandBundleDefT(std::string Tag, std::vector Inputs) : Tag(std::move(Tag)), Inputs(std::move(Inputs)) {} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index aee84c07d593..52bde6797dbe 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -1191,7 +1191,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), ChildOB.Inputs.end()); - OpDefs.emplace_back(StringRef("deopt"), std::move(MergedDeoptArgs)); + OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); } Instruction *NewI = nullptr; From 354f2216a71dd030ec818aa637b53ee192dbfa1a Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 04:32:51 +0000 Subject: [PATCH 218/364] [SCEV] Fix indentation; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255002 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 300 +++++++++++++++---------------- 1 file changed, 150 insertions(+), 150 deletions(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 9a0570d47f02..59c845baa289 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -446,179 +446,179 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { //===----------------------------------------------------------------------===// namespace { - /// SCEVComplexityCompare - Return true if the complexity of the LHS is less - /// than the complexity of the RHS. This comparator is used to canonicalize - /// expressions. - class SCEVComplexityCompare { - const LoopInfo *const LI; - public: - explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} - - // Return true or false if LHS is less than, or at least RHS, respectively. - bool operator()(const SCEV *LHS, const SCEV *RHS) const { - return compare(LHS, RHS) < 0; - } - - // Return negative, zero, or positive, if LHS is less than, equal to, or - // greater than RHS, respectively. A three-way result allows recursive - // comparisons to be more efficient. - int compare(const SCEV *LHS, const SCEV *RHS) const { - // Fast-path: SCEVs are uniqued so we can do a quick equality check. - if (LHS == RHS) - return 0; - - // Primarily, sort the SCEVs by their getSCEVType(). - unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); - if (LType != RType) - return (int)LType - (int)RType; - - // Aside from the getSCEVType() ordering, the particular ordering - // isn't very important except that it's beneficial to be consistent, - // so that (a + b) and (b + a) don't end up as different expressions. - switch (static_cast(LType)) { - case scUnknown: { - const SCEVUnknown *LU = cast(LHS); - const SCEVUnknown *RU = cast(RHS); - - // Sort SCEVUnknown values with some loose heuristics. TODO: This is - // not as complete as it could be. - const Value *LV = LU->getValue(), *RV = RU->getValue(); - - // Order pointer values after integer values. This helps SCEVExpander - // form GEPs. - bool LIsPointer = LV->getType()->isPointerTy(), - RIsPointer = RV->getType()->isPointerTy(); - if (LIsPointer != RIsPointer) - return (int)LIsPointer - (int)RIsPointer; - - // Compare getValueID values. - unsigned LID = LV->getValueID(), - RID = RV->getValueID(); - if (LID != RID) - return (int)LID - (int)RID; - - // Sort arguments by their position. - if (const Argument *LA = dyn_cast(LV)) { - const Argument *RA = cast(RV); - unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); - return (int)LArgNo - (int)RArgNo; - } - - // For instructions, compare their loop depth, and their operand - // count. This is pretty loose. - if (const Instruction *LInst = dyn_cast(LV)) { - const Instruction *RInst = cast(RV); - - // Compare loop depths. - const BasicBlock *LParent = LInst->getParent(), - *RParent = RInst->getParent(); - if (LParent != RParent) { - unsigned LDepth = LI->getLoopDepth(LParent), - RDepth = LI->getLoopDepth(RParent); - if (LDepth != RDepth) - return (int)LDepth - (int)RDepth; - } - - // Compare the number of operands. - unsigned LNumOps = LInst->getNumOperands(), - RNumOps = RInst->getNumOperands(); - return (int)LNumOps - (int)RNumOps; - } +/// SCEVComplexityCompare - Return true if the complexity of the LHS is less +/// than the complexity of the RHS. This comparator is used to canonicalize +/// expressions. +class SCEVComplexityCompare { + const LoopInfo *const LI; +public: + explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} - return 0; - } + // Return true or false if LHS is less than, or at least RHS, respectively. + bool operator()(const SCEV *LHS, const SCEV *RHS) const { + return compare(LHS, RHS) < 0; + } - case scConstant: { - const SCEVConstant *LC = cast(LHS); - const SCEVConstant *RC = cast(RHS); - - // Compare constant values. - const APInt &LA = LC->getValue()->getValue(); - const APInt &RA = RC->getValue()->getValue(); - unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); - if (LBitWidth != RBitWidth) - return (int)LBitWidth - (int)RBitWidth; - return LA.ult(RA) ? -1 : 1; + // Return negative, zero, or positive, if LHS is less than, equal to, or + // greater than RHS, respectively. A three-way result allows recursive + // comparisons to be more efficient. + int compare(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return 0; + + // Primarily, sort the SCEVs by their getSCEVType(). + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; + + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + switch (static_cast(LType)) { + case scUnknown: { + const SCEVUnknown *LU = cast(LHS); + const SCEVUnknown *RU = cast(RHS); + + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + const Value *LV = LU->getValue(), *RV = RU->getValue(); + + // Order pointer values after integer values. This helps SCEVExpander + // form GEPs. + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; + + // Compare getValueID values. + unsigned LID = LV->getValueID(), + RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; + + // Sort arguments by their position. + if (const Argument *LA = dyn_cast(LV)) { + const Argument *RA = cast(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; } - case scAddRecExpr: { - const SCEVAddRecExpr *LA = cast(LHS); - const SCEVAddRecExpr *RA = cast(RHS); - - // Compare addrec loop depths. - const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); - if (LLoop != RLoop) { - unsigned LDepth = LLoop->getLoopDepth(), - RDepth = RLoop->getLoopDepth(); + // For instructions, compare their loop depth, and their operand + // count. This is pretty loose. + if (const Instruction *LInst = dyn_cast(LV)) { + const Instruction *RInst = cast(RV); + + // Compare loop depths. + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); if (LDepth != RDepth) return (int)LDepth - (int)RDepth; } - // Addrec complexity grows with operand count. - unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; + // Compare the number of operands. + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); + return (int)LNumOps - (int)RNumOps; + } - // Lexicographically compare. - for (unsigned i = 0; i != LNumOps; ++i) { - long X = compare(LA->getOperand(i), RA->getOperand(i)); - if (X != 0) - return X; - } + return 0; + } + + case scConstant: { + const SCEVConstant *LC = cast(LHS); + const SCEVConstant *RC = cast(RHS); + + // Compare constant values. + const APInt &LA = LC->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; + } - return 0; + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast(LHS); + const SCEVAddRecExpr *RA = cast(RHS); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), + RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; } - case scAddExpr: - case scMulExpr: - case scSMaxExpr: - case scUMaxExpr: { - const SCEVNAryExpr *LC = cast(LHS); - const SCEVNAryExpr *RC = cast(RHS); - - // Lexicographically compare n-ary expressions. - unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; - - for (unsigned i = 0; i != LNumOps; ++i) { - if (i >= RNumOps) - return 1; - long X = compare(LC->getOperand(i), RC->getOperand(i)); - if (X != 0) - return X; - } + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + long X = compare(LA->getOperand(i), RA->getOperand(i)); + if (X != 0) + return X; } - case scUDivExpr: { - const SCEVUDivExpr *LC = cast(LHS); - const SCEVUDivExpr *RC = cast(RHS); + return 0; + } + + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast(LHS); + const SCEVNAryExpr *RC = cast(RHS); - // Lexicographically compare udiv expressions. - long X = compare(LC->getLHS(), RC->getLHS()); + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + long X = compare(LC->getOperand(i), RC->getOperand(i)); if (X != 0) return X; - return compare(LC->getRHS(), RC->getRHS()); } + return (int)LNumOps - (int)RNumOps; + } - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *LC = cast(LHS); - const SCEVCastExpr *RC = cast(RHS); + case scUDivExpr: { + const SCEVUDivExpr *LC = cast(LHS); + const SCEVUDivExpr *RC = cast(RHS); - // Compare cast expressions by operand. - return compare(LC->getOperand(), RC->getOperand()); - } + // Lexicographically compare udiv expressions. + long X = compare(LC->getLHS(), RC->getLHS()); + if (X != 0) + return X; + return compare(LC->getRHS(), RC->getRHS()); + } - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - } - llvm_unreachable("Unknown SCEV kind!"); + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast(LHS); + const SCEVCastExpr *RC = cast(RHS); + + // Compare cast expressions by operand. + return compare(LC->getOperand(), RC->getOperand()); } - }; -} + + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + llvm_unreachable("Unknown SCEV kind!"); + } +}; +} // end anonymous namespace /// GroupByComplexity - Given a list of SCEV objects, order them by their /// complexity, and group objects of the same complexity together by value. From 711641834a5e07534b823880a932747d0dca174c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 04:32:54 +0000 Subject: [PATCH 219/364] [SCEV] Move some struct declarations inside functions; NFC Reduces the scope over which the struct is visible, making its usages obvious. I did not move structs in cases where this wasn't a clear win (the struct is too large, or is grouped in some other interesting way). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255003 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 117 ++++++++++++++----------------- 1 file changed, 54 insertions(+), 63 deletions(-) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 59c845baa289..6c8f3ba9c6e0 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -666,24 +666,22 @@ static void GroupByComplexity(SmallVectorImpl &Ops, } } -namespace { -struct FindSCEVSize { - int Size; - FindSCEVSize() : Size(0) {} - - bool follow(const SCEV *S) { - ++Size; - // Keep looking at all operands of S. - return true; - } - bool isDone() const { - return false; - } -}; -} - // Returns the size of the SCEV S. static inline int sizeOfSCEV(const SCEV *S) { + struct FindSCEVSize { + int Size; + FindSCEVSize() : Size(0) {} + + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; + } + bool isDone() const { + return false; + } + }; + FindSCEVSize F; SCEVTraversal ST(F); ST.visitAll(S); @@ -1929,14 +1927,6 @@ CollectAddOperandsWithScales(DenseMap &M, return Interesting; } -namespace { - struct APIntCompare { - bool operator()(const APInt &LHS, const APInt &RHS) const { - return LHS.ult(RHS); - } - }; -} - // We're trying to construct a SCEV of type `Type' with `Ops' as operands and // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of // can't-overflow flags for the operation if possible. @@ -2149,6 +2139,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops.data(), Ops.size(), APInt(BitWidth, 1), *this)) { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); + } + }; + // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. @@ -3289,7 +3285,8 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { return CouldNotCompute.get(); } -namespace { + +bool ScalarEvolution::checkValidity(const SCEV *S) const { // Helper class working with SCEVTraversal to figure out if a SCEV contains // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne // is set iff if find such SCEVUnknown. @@ -3311,9 +3308,7 @@ namespace { } bool isDone() const { return FindOne; } }; -} -bool ScalarEvolution::checkValidity(const SCEV *S) const { FindInvalidSCEVUnknown F; SCEVTraversal ST(F); ST.visitAll(S); @@ -8721,30 +8716,28 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, return true; } -namespace { -struct FindParameter { - bool FoundParameter; - FindParameter() : FoundParameter(false) {} - - bool follow(const SCEV *S) { - if (isa(S)) { - FoundParameter = true; - // Stop recursion: we found a parameter. - return false; - } - // Keep looking. - return true; - } - bool isDone() const { - // Stop recursion if we have found a parameter. - return FoundParameter; - } -}; -} - // Returns true when S contains at least a SCEVUnknown parameter. static inline bool containsParameters(const SCEV *S) { + struct FindParameter { + bool FoundParameter; + FindParameter() : FoundParameter(false) {} + + bool follow(const SCEV *S) { + if (isa(S)) { + FoundParameter = true; + // Stop recursion: we found a parameter. + return false; + } + // Keep looking. + return true; + } + bool isDone() const { + // Stop recursion if we have found a parameter. + return FoundParameter; + } + }; + FindParameter F; SCEVTraversal ST(F); ST.visitAll(S); @@ -9406,24 +9399,22 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } -namespace { -// Search for a SCEV expression node within an expression tree. -// Implements SCEVTraversal::Visitor. -struct SCEVSearch { - const SCEV *Node; - bool IsFound; +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + // Search for a SCEV expression node within an expression tree. + // Implements SCEVTraversal::Visitor. + struct SCEVSearch { + const SCEV *Node; + bool IsFound; - SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} + SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} - bool follow(const SCEV *S) { - IsFound |= (S == Node); - return !IsFound; - } - bool isDone() const { return IsFound; } -}; -} + bool follow(const SCEV *S) { + IsFound |= (S == Node); + return !IsFound; + } + bool isDone() const { return IsFound; } + }; -bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { SCEVSearch Search(Op); visitAll(S, Search); return Search.IsFound; From 2450ea130a3776fe6285f19feea9ae330e2a7ba7 Mon Sep 17 00:00:00 2001 From: Michael Zuckerman Date: Tue, 8 Dec 2015 12:00:24 +0000 Subject: [PATCH 220/364] dding test for fnstsw continue of Wrong FNSTSW size operator url: http://reviews.llvm.org/D14953 Differential Revision: http://reviews.llvm.org/D15155 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255007 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/X86/intel-syntax.s | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index 001a26d07019..c7ec77eadfee 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -489,10 +489,12 @@ test [ECX], AL // CHECK: fnstsw %ax // CHECK: fnstsw %ax // CHECK: fnstsw %ax +// CHECK: fnstsw (%eax) fnstsw fnstsw AX fnstsw EAX fnstsw AL +fnstsw WORD PTR [EAX] // CHECK: faddp %st(1) // CHECK: fmulp %st(1) From 59ad77e46ed43d552c2dee1ffd8e5bd522022919 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Tue, 8 Dec 2015 12:16:10 +0000 Subject: [PATCH 221/364] [AArch64] Add ARMv8.2-A FP16 vector instructions ARMv8.2-A adds 16-bit floating point versions of all existing SIMD floating-point instructions. This is an optional extension, so all of these instructions require the FeatureFullFP16 subtarget feature. Note that VFP without SIMD is not a valid combination for any version of ARMv8-A, but I have ensured that these instructions all depend on both FeatureNEON and FeatureFullFP16 for consistency. The ".2h" vector type specifier is now legal (for the scalar pairwise reduction instructions), so some unrelated tests have been modified as different error messages are emitted. This is not a problem as the invalid operands are still caught. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrFormats.td | 576 ++++++++++++------ lib/Target/AArch64/AArch64InstrInfo.td | 152 +++-- lib/Target/AArch64/AArch64RegisterInfo.td | 2 +- .../AArch64/AsmParser/AArch64AsmParser.cpp | 2 + test/MC/AArch64/arm64-advsimd.s | 153 ++++- test/MC/AArch64/armv8.1a-rdma.s | 18 - test/MC/AArch64/fullfp16-diagnostics.s | 42 ++ test/MC/AArch64/fullfp16-neon-neg.s | 382 ++++++++++++ test/MC/AArch64/neon-2velem.s | 18 +- test/MC/AArch64/neon-aba-abd.s | 4 +- test/MC/AArch64/neon-across.s | 18 +- test/MC/AArch64/neon-add-pairwise.s | 6 +- test/MC/AArch64/neon-add-sub-instructions.s | 10 +- test/MC/AArch64/neon-compare-instructions.s | 62 +- test/MC/AArch64/neon-diagnostics.s | 132 ++-- test/MC/AArch64/neon-facge-facgt.s | 18 +- test/MC/AArch64/neon-frsqrt-frecp.s | 10 +- test/MC/AArch64/neon-max-min-pairwise.s | 18 +- test/MC/AArch64/neon-max-min.s | 18 +- test/MC/AArch64/neon-mla-mls-instructions.s | 10 +- test/MC/AArch64/neon-scalar-abs.s | 4 +- test/MC/AArch64/neon-scalar-by-elem-mla.s | 6 +- test/MC/AArch64/neon-scalar-by-elem-mul.s | 6 +- test/MC/AArch64/neon-scalar-cvt.s | 34 +- test/MC/AArch64/neon-scalar-fp-compare.s | 32 +- test/MC/AArch64/neon-scalar-mul.s | 4 +- test/MC/AArch64/neon-scalar-recip.s | 12 +- test/MC/AArch64/neon-scalar-reduce-pairwise.s | 7 +- test/MC/AArch64/neon-simd-misc.s | 98 ++- test/MC/AArch64/neon-simd-shift.s | 18 +- .../AArch64/fullfp16-neon-neg.txt | 382 ++++++++++++ 31 files changed, 1917 insertions(+), 337 deletions(-) create mode 100644 test/MC/AArch64/fullfp16-diagnostics.s create mode 100644 test/MC/AArch64/fullfp16-neon-neg.s create mode 100644 test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 5eef82153e39..101b0f7e1d3a 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -4315,7 +4315,7 @@ let Predicates = [HasNEON] in { //---------------------------------------------------------------------------- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVector size, bits<5> opcode, +class BaseSIMDThreeSameVector size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, @@ -4329,8 +4329,7 @@ class BaseSIMDThreeSameVector size, bits<5> opcode, let Inst{30} = Q; let Inst{29} = U; let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -4339,7 +4338,7 @@ class BaseSIMDThreeSameVector size, bits<5> opcode, } let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorTied size, bits<5> opcode, +class BaseSIMDThreeSameVectorTied size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, @@ -4353,8 +4352,7 @@ class BaseSIMDThreeSameVectorTied size, bits<5> opcode, let Inst{30} = Q; let Inst{29} = U; let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -4365,25 +4363,25 @@ class BaseSIMDThreeSameVectorTied size, bits<5> opcode, // All operand sizes distinguished in the encoding. multiclass SIMDThreeSameVector opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; - def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128, asm, ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; } @@ -4391,49 +4389,49 @@ multiclass SIMDThreeSameVector opc, string asm, // As above, but D sized elements unsupported. multiclass SIMDThreeSameVectorBHS opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; } multiclass SIMDThreeSameVectorBHSTied opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; @@ -4442,54 +4440,80 @@ multiclass SIMDThreeSameVectorBHSTied opc, string asm, // As above, but only B sized elements supported. multiclass SIMDThreeSameVectorB opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; } -// As above, but only S and D sized floating point elements supported. -multiclass SIMDThreeSameVectorFP opc, +// As above, but only floating point elements supported. +multiclass SIMDThreeSameVectorFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } -multiclass SIMDThreeSameVectorFPCmp opc, +multiclass SIMDThreeSameVectorFPCmp opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } -multiclass SIMDThreeSameVectorFPTied opc, +multiclass SIMDThreeSameVectorFPTied opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$dst), + (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$dst), + (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; @@ -4498,16 +4522,16 @@ multiclass SIMDThreeSameVectorFPTied opc, // As above, but D and B sized elements unsupported. multiclass SIMDThreeSameVectorHS opc, string asm, SDPatternOperator OpNode> { - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; } @@ -4515,10 +4539,10 @@ multiclass SIMDThreeSameVectorHS opc, string asm, // Logical three vector ops share opcode bits, and only use B sized elements. multiclass SIMDLogicalThreeVector size, string asm, SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; @@ -4539,11 +4563,11 @@ multiclass SIMDLogicalThreeVector size, string asm, multiclass SIMDLogicalThreeVectorTied size, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64, + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64, asm, ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128, + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128, asm, ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), @@ -4583,8 +4607,8 @@ multiclass SIMDLogicalThreeVectorTied size, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class BaseSIMDTwoSameVector size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list pattern> + bits<2> size2, RegisterOperand regtype, string asm, + string dstkind, string srckind, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "{\t$Rd" # dstkind # ", $Rn" # srckind # "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, @@ -4596,7 +4620,9 @@ class BaseSIMDTwoSameVector size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4605,8 +4631,9 @@ class BaseSIMDTwoSameVector size, bits<5> opcode, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class BaseSIMDTwoSameVectorTied size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list pattern> + bits<2> size2, RegisterOperand regtype, + string asm, string dstkind, string srckind, + list pattern> : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, "{\t$Rd" # dstkind # ", $Rn" # srckind # "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, @@ -4618,7 +4645,9 @@ class BaseSIMDTwoSameVectorTied size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4628,22 +4657,22 @@ class BaseSIMDTwoSameVectorTied size, bits<5> opcode, // Supports B, H, and S element sizes. multiclass SIMDTwoVectorBHS opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } @@ -4686,49 +4715,49 @@ multiclass SIMDVectorLShiftLongBySizeBHS { // Supports all element sizes. multiclass SIMDLongTwoVector opc, string asm, SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".4h", ".8b", [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".8h", ".16b", [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".2s", ".4h", [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".4s", ".8h", [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".1d", ".2s", [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".2d", ".4s", [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } multiclass SIMDLongTwoVectorTied opc, string asm, SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, + def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, asm, ".4h", ".8b", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, + def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, asm, ".8h", ".16b", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, + def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, asm, ".2s", ".4h", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, + def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, asm, ".4s", ".8h", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, + def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, asm, ".1d", ".2s", [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, + def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, asm, ".2d", ".4s", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v4i32 V128:$Rn)))]>; @@ -4737,50 +4766,50 @@ multiclass SIMDLongTwoVectorTied opc, string asm, // Supports all element sizes, except 1xD. multiclass SIMDTwoVectorBHSDTied opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; } multiclass SIMDTwoVectorBHSD opc, string asm, SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; } @@ -4789,10 +4818,10 @@ multiclass SIMDTwoVectorBHSD opc, string asm, // Supports only B element sizes. multiclass SIMDTwoVectorB size, bits<5> opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; @@ -4801,16 +4830,16 @@ multiclass SIMDTwoVectorB size, bits<5> opc, string asm, // Supports only B and H element sizes. multiclass SIMDTwoVectorBH opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; } @@ -4819,13 +4848,21 @@ multiclass SIMDTwoVectorBH opc, string asm, // as an extra opcode bit. multiclass SIMDTwoVectorFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; } @@ -4833,10 +4870,10 @@ multiclass SIMDTwoVectorFP opc, string asm, // Supports only S element size. multiclass SIMDTwoVectorS opc, string asm, SDPatternOperator OpNode> { - def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } @@ -4844,26 +4881,42 @@ multiclass SIMDTwoVectorS opc, string asm, multiclass SIMDTwoVectorFPToInt opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; } multiclass SIMDTwoVectorIntToFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; } @@ -4942,10 +4995,10 @@ multiclass SIMDMixedTwoVector opc, string asm, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; } -class BaseSIMDCmpTwoVector size, bits<5> opcode, - RegisterOperand regtype, - string asm, string kind, string zero, - ValueType dty, ValueType sty, SDNode OpNode> +class BaseSIMDCmpTwoVector size, bits<2> size2, + bits<5> opcode, RegisterOperand regtype, string asm, + string kind, string zero, ValueType dty, + ValueType sty, SDNode OpNode> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero # "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "", @@ -4958,7 +5011,9 @@ class BaseSIMDCmpTwoVector size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4968,49 +5023,69 @@ class BaseSIMDCmpTwoVector size, bits<5> opcode, // Comparisons support all element sizes, except 1xD. multiclass SIMDCmpTwoVector opc, string asm, SDNode OpNode> { - def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64, + def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64, asm, ".8b", "0", v8i8, v8i8, OpNode>; - def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128, + def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128, asm, ".16b", "0", v16i8, v16i8, OpNode>; - def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64, + def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64, asm, ".4h", "0", v4i16, v4i16, OpNode>; - def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128, + def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128, asm, ".8h", "0", v8i16, v8i16, OpNode>; - def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64, + def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64, asm, ".2s", "0", v2i32, v2i32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128, + def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128, asm, ".4s", "0", v4i32, v4i32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128, + def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128, asm, ".2d", "0", v2i64, v2i64, OpNode>; } -// FP Comparisons support only S and D element sizes. +// FP Comparisons support only S and D element sizes (and H for v8.2a). multiclass SIMDFPCmpTwoVector opc, string asm, SDNode OpNode> { - def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64, + asm, ".4h", "0.0", + v4i16, v4f16, OpNode>; + def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128, + asm, ".8h", "0.0", + v8i16, v8f16, OpNode>; + } // Predicates = [HasNEON, HasFullFP16] + def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64, asm, ".2s", "0.0", v2i32, v2f32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128, + def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128, asm, ".4s", "0.0", v4i32, v4f32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128, + def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128, asm, ".2d", "0.0", v2i64, v2f64, OpNode>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; def : InstAliasopc, string asm, //---------------------------------------------------------------------------- let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDThreeScalar size, bits<5> opcode, +class BaseSIMDThreeScalar size, bits<5> opcode, RegisterClass regtype, string asm, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, @@ -5573,8 +5648,7 @@ class BaseSIMDThreeScalar size, bits<5> opcode, let Inst{31-30} = 0b01; let Inst{29} = U; let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -5605,17 +5679,17 @@ class BaseSIMDThreeScalarTied size, bit R, bits<5> opcode, multiclass SIMDThreeScalarD opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; } multiclass SIMDThreeScalarBHSD opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; - def v1i8 : BaseSIMDThreeScalar; + def v1i32 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; + def v1i8 : BaseSIMDThreeScalar; def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; @@ -5625,9 +5699,9 @@ multiclass SIMDThreeScalarBHSD opc, string asm, multiclass SIMDThreeScalarHS opc, string asm, SDPatternOperator OpNode> { - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; } multiclass SIMDThreeScalarHSTied opc, string asm, @@ -5640,26 +5714,34 @@ multiclass SIMDThreeScalarHSTied opc, string asm, asm, []>; } -multiclass SIMDThreeScalarSD opc, string asm, +multiclass SIMDFPThreeScalar opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar; - def #NAME#32 : BaseSIMDThreeScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar; + } // Predicates = [HasNEON, HasFullFP16] } def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; } -multiclass SIMDThreeScalarFPCmp opc, string asm, +multiclass SIMDThreeScalarFPCmp opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar; - def #NAME#32 : BaseSIMDThreeScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar; + } // Predicates = [HasNEON, HasFullFP16] } def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), @@ -5718,7 +5800,7 @@ multiclass SIMDThreeScalarMixedTiedHS opc, string asm, //---------------------------------------------------------------------------- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalar size, bits<5> opcode, +class BaseSIMDTwoScalar size, bits<2> size2, bits<5> opcode, RegisterClass regtype, RegisterClass regtype2, string asm, list pat> : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, @@ -5730,7 +5812,9 @@ class BaseSIMDTwoScalar size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b11110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -5759,7 +5843,7 @@ class BaseSIMDTwoScalarTied size, bits<5> opcode, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDCmpTwoScalar size, bits<5> opcode, +class BaseSIMDCmpTwoScalar size, bits<2> size2, bits<5> opcode, RegisterClass regtype, string asm, string zero> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn, #" # zero, "", []>, @@ -5770,7 +5854,9 @@ class BaseSIMDCmpTwoScalar size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b11110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -5792,7 +5878,7 @@ class SIMDInexactCvtTwoScalar opcode, string asm> multiclass SIMDCmpTwoScalarD opc, string asm, SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; + def v1i64rz : BaseSIMDCmpTwoScalar; def : Pat<(v1i64 (OpNode FPR64:$Rn)), (!cast(NAME # v1i64rz) FPR64:$Rn)>; @@ -5800,13 +5886,20 @@ multiclass SIMDCmpTwoScalarD opc, string asm, multiclass SIMDFPCmpTwoScalar opc, string asm, SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; - def v1i32rz : BaseSIMDCmpTwoScalar; + def v1i64rz : BaseSIMDCmpTwoScalar; + def v1i32rz : BaseSIMDCmpTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16rz : BaseSIMDCmpTwoScalar; + } def : InstAlias(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; def : InstAlias(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>; + } def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), (!cast(NAME # v1i64rz) FPR64:$Rn)>; @@ -5814,7 +5907,7 @@ multiclass SIMDFPCmpTwoScalar opc, string asm, multiclass SIMDTwoScalarD opc, string asm, SDPatternOperator OpNode = null_frag> { - def v1i64 : BaseSIMDTwoScalar; def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), @@ -5822,27 +5915,34 @@ multiclass SIMDTwoScalarD opc, string asm, } multiclass SIMDFPTwoScalar opc, string asm> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1f16 : BaseSIMDTwoScalar; + } } -multiclass SIMDTwoScalarCVTSD opc, string asm, +multiclass SIMDFPTwoScalarCVT opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16 : BaseSIMDTwoScalar; + } } multiclass SIMDTwoScalarBHSD opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; } def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), @@ -5869,10 +5969,10 @@ multiclass SIMDTwoScalarBHSDTied opc, string asm, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in multiclass SIMDTwoScalarMixedBHS opc, string asm, SDPatternOperator OpNode = null_frag> { - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; } //---------------------------------------------------------------------------- @@ -5904,10 +6004,14 @@ multiclass SIMDPairwiseScalarD opc, string asm> { asm, ".2d">; } -multiclass SIMDFPPairwiseScalar opc, string asm> { - def v2i32p : BaseSIMDPairwiseScalar opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64, + asm, ".2h">; + } + def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64, asm, ".2s">; - def v2i64p : BaseSIMDPairwiseScalar; } @@ -5963,8 +6067,16 @@ multiclass SIMDAcrossLanesHSD opcode, string asm> { asm, ".4s", []>; } -multiclass SIMDAcrossLanesS opcode, bit sz1, string asm, +multiclass SIMDFPAcrossLanes opcode, bit sz1, string asm, Intrinsic intOp> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64, + asm, ".4h", + [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>; + def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128, + asm, ".8h", + [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, asm, ".4s", [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; @@ -6451,7 +6563,7 @@ multiclass SIMDScalarCPY { // AdvSIMD modified immediate instructions //---------------------------------------------------------------------------- -class BaseSIMDModifiedImm pattern> : I, @@ -6463,16 +6575,17 @@ class BaseSIMDModifiedImm pattern> - : BaseSIMDModifiedImm pattern> - : BaseSIMDModifiedImm b15_b12, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<2> shift; @@ -6520,7 +6633,7 @@ class BaseSIMDModifiedImmVectorShiftTied b15_b12, class BaseSIMDModifiedImmVectorShiftHalf b15_b12, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<2> shift; @@ -6585,7 +6698,7 @@ multiclass SIMDModifiedImmVectorShiftTied hw_cmode, class SIMDModifiedImmMoveMSL cmode, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<1> shift; @@ -6593,18 +6706,18 @@ class SIMDModifiedImmMoveMSL cmode, let Inst{12} = shift; } -class SIMDModifiedImmVectorNoShift cmode, +class SIMDModifiedImmVectorNoShift cmode, RegisterOperand vectype, Operand imm_type, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { let Inst{15-12} = cmode; } class SIMDModifiedImmScalarNoShift cmode, string asm, list pattern> - : BaseSIMDModifiedImm { let Inst{15-12} = cmode; let DecoderMethod = "DecodeModImmInstruction"; @@ -6676,6 +6789,34 @@ class BaseSIMDIndexedTied size, bits<4> opc, multiclass SIMDFPIndexed opc, string asm, SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc, + V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4f16 V64:$Rd), + (OpNode (v4f16 V64:$Rn), + (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8f16 V128:$Rd), + (OpNode (v8f16 V128:$Rn), + (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, V64, V64, V128, VectorIndexS, @@ -6712,6 +6853,21 @@ multiclass SIMDFPIndexed opc, string asm, let Inst{21} = 0; } + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128, VectorIndexH, + asm, ".h", "", "", ".h", + [(set (f16 FPR16Op:$Rd), + (OpNode (f16 FPR16Op:$Rn), + (f16 (vector_extract (v8f16 V128:$Rm), + VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, FPR32Op, FPR32Op, V128, VectorIndexS, asm, ".s", "", "", ".s", @@ -6790,6 +6946,27 @@ multiclass SIMDFPIndexedTiedPatterns { } multiclass SIMDFPIndexedTied opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, V128, VectorIndexS, asm, ".2s", ".2s", ".2s", ".s", []> { @@ -6816,6 +6993,16 @@ multiclass SIMDFPIndexedTied opc, string asm> { let Inst{21} = 0; } + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, FPR32Op, FPR32Op, V128, VectorIndexS, @@ -7353,7 +7540,13 @@ class BaseSIMDScalarShiftTied opc, bits<7> fixed_imm, } -multiclass SIMDScalarRShiftSD opc, string asm> { +multiclass SIMDFPScalarRShift opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + } // Predicates = [HasNEON, HasFullFP16] def s : BaseSIMDScalarShift { let Inst{20-16} = imm{4-0}; @@ -7533,6 +7726,23 @@ class BaseSIMDVectorShiftTied opc, bits<7> fixed_imm, multiclass SIMDVectorRShiftSD opc, string asm, Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", @@ -7558,8 +7768,26 @@ multiclass SIMDVectorRShiftSD opc, string asm, } } -multiclass SIMDVectorRShiftSDToFP opc, string asm, +multiclass SIMDVectorRShiftToFP opc, string asm, Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", @@ -8840,9 +9068,8 @@ let Predicates = [HasNEON, HasV8_1a] in { class BaseSIMDThreeSameVectorTiedR0 size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> - : BaseSIMDThreeSameVectorTied { - let Inst{21}=0; } multiclass SIMDThreeSameVectorSQRDMLxHTiedHS opc, string asm, SDPatternOperator Accum> { @@ -9277,6 +9504,7 @@ def : TokenAlias<".8H", ".8h">; def : TokenAlias<".4S", ".4s">; def : TokenAlias<".2D", ".2d">; def : TokenAlias<".1Q", ".1q">; +def : TokenAlias<".2H", ".2h">; def : TokenAlias<".B", ".b">; def : TokenAlias<".H", ".h">; def : TokenAlias<".S", ".s">; diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 881f55ebeef9..cfb0c1b578da 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2857,29 +2857,29 @@ defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; -defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>; -defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>; -defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>; -defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; -defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; -defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; -defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; -defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", fmaxnum>; -defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", fmaxnan>; -defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", fminnum>; -defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", fminnan>; +defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; +defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; +defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; +defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>; +defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. -defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla", +defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls", +defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; // The following def pats catch the case where the LHS of an FMA is negated. @@ -2893,11 +2893,11 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; -defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; -defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>; -defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; +defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", @@ -3081,6 +3081,14 @@ def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # "|cmlt.2d\t$dst, $src1, $src2}", (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmle.4h\t$dst, $src1, $src2}", + (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmle.8h\t$dst, $src1, $src2}", + (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # "|fcmle.2s\t$dst, $src1, $src2}", (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3091,6 +3099,14 @@ def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # "|fcmle.2d\t$dst, $src1, $src2}", (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmlt.4h\t$dst, $src1, $src2}", + (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmlt.8h\t$dst, $src1, $src2}", + (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # "|fcmlt.2s\t$dst, $src1, $src2}", (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3101,6 +3117,14 @@ def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # "|fcmlt.2d\t$dst, $src1, $src2}", (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # + "|facle.4h\t$dst, $src1, $src2}", + (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # + "|facle.8h\t$dst, $src1, $src2}", + (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # "|facle.2s\t$dst, $src1, $src2}", (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3111,6 +3135,14 @@ def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # "|facle.2d\t$dst, $src1, $src2}", (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # + "|faclt.4h\t$dst, $src1, $src2}", + (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # + "|faclt.8h\t$dst, $src1, $src2}", + (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # "|faclt.2s\t$dst, $src1, $src2}", (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3132,19 +3164,19 @@ defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; -defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>; +defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FABD64 FPR64:$Rn, FPR64:$Rm)>; -defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", +defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", int_aarch64_neon_facge>; -defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", +defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", int_aarch64_neon_facgt>; -defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; -defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; -defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; -defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>; -defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>; -defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>; +defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>; +defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>; defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; @@ -3248,14 +3280,14 @@ defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>; +defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", int_aarch64_neon_suqadd>; -defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>; +defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; @@ -3620,11 +3652,11 @@ defm CPY : SIMDScalarCPY<"cpy">; //---------------------------------------------------------------------------- defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; -defm FADDP : SIMDFPPairwiseScalar<1, 0, 0b01101, "faddp">; -defm FMAXNMP : SIMDFPPairwiseScalar<1, 0, 0b01100, "fmaxnmp">; -defm FMAXP : SIMDFPPairwiseScalar<1, 0, 0b01111, "fmaxp">; -defm FMINNMP : SIMDFPPairwiseScalar<1, 1, 0b01100, "fminnmp">; -defm FMINP : SIMDFPPairwiseScalar<1, 1, 0b01111, "fminp">; +defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; +defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; +defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; +defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; +defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; def : Pat<(v2i64 (AArch64saddv V128:$Rn)), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), @@ -3976,10 +4008,10 @@ defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; -defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; -defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; -defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; -defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; +defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; +defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; +defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; +defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; // Patterns for across-vector intrinsics, that have a node equivalent, that // returns a vector (with only the low lane defined) instead of a scalar. @@ -4226,15 +4258,23 @@ def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; // AdvSIMD FMOV -def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, +def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, "fmov", ".2d", [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, +def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, "fmov", ".2s", [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, +def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, "fmov", ".4s", [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +let Predicates = [HasNEON, HasFullFP16] in { +def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, + "fmov", ".4h", + [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, + "fmov", ".8h", + [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +} // Predicates = [HasNEON, HasFullFP16] // AdvSIMD MOVI @@ -4262,7 +4302,7 @@ def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; // The movi_edit node has the immediate value already encoded, so we use // a plain imm0_255 in the pattern let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, +def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, simdimmtype10, "movi", ".2d", [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; @@ -4323,10 +4363,10 @@ def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; // Per byte: 8b & 16b -def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, +def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, "movi", ".8b", [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; -def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, +def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, "movi", ".16b", [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; @@ -4526,10 +4566,10 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), //---------------------------------------------------------------------------- // AdvSIMD scalar shift instructions //---------------------------------------------------------------------------- -defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">; -defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">; -defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">; -defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; +defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; +defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; +defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; +defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; // Codegen patterns for the above. We don't put these directly on the // instructions because TableGen's type inference can't handle the truth. // Having the same base pattern for fp <--> int totally freaks it out. @@ -4602,7 +4642,7 @@ defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", //---------------------------------------------------------------------------- defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; -defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", +defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", int_aarch64_neon_vcvtfxs2fp>; defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", int_aarch64_neon_rshrn>; @@ -4637,7 +4677,7 @@ defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; -defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", +defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", int_aarch64_neon_vcvtfxu2fp>; defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", int_aarch64_neon_uqrshrn>; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index b2efca023372..a8c8b176efa9 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -407,7 +407,7 @@ def FPR128 : RegisterClass<"AArch64", // The lower 16 vector registers. Some instructions can only take registers // in this range. def FPR128_lo : RegisterClass<"AArch64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128, (trunc FPR128, 16)>; // Pairs, triples, and quads of 64-bit vector registers. diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f0ad855ed5e6..394c8e78581f 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1921,6 +1921,8 @@ static bool isValidVectorKind(StringRef Name) { .Case(".h", true) .Case(".s", true) .Case(".d", true) + // Needed for fp16 scalar pairwise reductions + .Case(".2h", true) .Default(false); } diff --git a/test/MC/AArch64/arm64-advsimd.s b/test/MC/AArch64/arm64-advsimd.s index c627de708d31..294f09082916 100644 --- a/test/MC/AArch64/arm64-advsimd.s +++ b/test/MC/AArch64/arm64-advsimd.s @@ -1,4 +1,4 @@ -; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 -show-encoding < %s | FileCheck %s +; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto,fullfp16 -output-asm-variant=1 -show-encoding < %s | FileCheck %s foo: @@ -440,6 +440,106 @@ foo: ; CHECK: urshl.8b v0, v0, v0 ; encoding: [0x00,0x54,0x20,0x2e] ; CHECK: ushl.8b v0, v0, v0 ; encoding: [0x00,0x44,0x20,0x2e] + fabd.4h v0, v0, v0 + facge.4h v0, v0, v0 + facgt.4h v0, v0, v0 + faddp.4h v0, v0, v0 + fadd.4h v0, v0, v0 + fcmeq.4h v0, v0, v0 + fcmge.4h v0, v0, v0 + fcmgt.4h v0, v0, v0 + fdiv.4h v0, v0, v0 + fmaxnmp.4h v0, v0, v0 + fmaxnm.4h v0, v0, v0 + fmaxp.4h v0, v0, v0 + fmax.4h v0, v0, v0 + fminnmp.4h v0, v0, v0 + fminnm.4h v0, v0, v0 + fminp.4h v0, v0, v0 + fmin.4h v0, v0, v0 + fmla.4h v0, v0, v0 + fmls.4h v0, v0, v0 + fmulx.4h v0, v0, v0 + fmul.4h v0, v0, v0 + frecps.4h v0, v0, v0 + frsqrts.4h v0, v0, v0 + fsub.4h v0, v0, v0 + +; CHECK: fabd.4h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x2e] +; CHECK: facge.4h v0, v0, v0 ; encoding: [0x00,0x2c,0x40,0x2e] +; CHECK: facgt.4h v0, v0, v0 ; encoding: [0x00,0x2c,0xc0,0x2e] +; CHECK: faddp.4h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x2e] +; CHECK: fadd.4h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x0e] +; CHECK: fcmeq.4h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x0e] +; CHECK: fcmge.4h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x2e] +; CHECK: fcmgt.4h v0, v0, v0 ; encoding: [0x00,0x24,0xc0,0x2e] +; CHECK: fdiv.4h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x2e] +; CHECK: fmaxnmp.4h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x2e] +; CHECK: fmaxnm.4h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x0e] +; CHECK: fmaxp.4h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x2e] +; CHECK: fmax.4h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x0e] +; CHECK: fminnmp.4h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x2e] +; CHECK: fminnm.4h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x0e] +; CHECK: fminp.4h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x2e] +; CHECK: fmin.4h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x0e] +; CHECK: fmla.4h v0, v0, v0 ; encoding: [0x00,0x0c,0x40,0x0e] +; CHECK: fmls.4h v0, v0, v0 ; encoding: [0x00,0x0c,0xc0,0x0e] +; CHECK: fmulx.4h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x0e] +; CHECK: fmul.4h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x2e] +; CHECK: frecps.4h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x0e] +; CHECK: frsqrts.4h v0, v0, v0 ; encoding: [0x00,0x3c,0xc0,0x0e] +; CHECK: fsub.4h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x0e] + + fabd.8h v0, v0, v0 + facge.8h v0, v0, v0 + facgt.8h v0, v0, v0 + faddp.8h v0, v0, v0 + fadd.8h v0, v0, v0 + fcmeq.8h v0, v0, v0 + fcmge.8h v0, v0, v0 + fcmgt.8h v0, v0, v0 + fdiv.8h v0, v0, v0 + fmaxnmp.8h v0, v0, v0 + fmaxnm.8h v0, v0, v0 + fmaxp.8h v0, v0, v0 + fmax.8h v0, v0, v0 + fminnmp.8h v0, v0, v0 + fminnm.8h v0, v0, v0 + fminp.8h v0, v0, v0 + fmin.8h v0, v0, v0 + fmla.8h v0, v0, v0 + fmls.8h v0, v0, v0 + fmulx.8h v0, v0, v0 + fmul.8h v0, v0, v0 + frecps.8h v0, v0, v0 + frsqrts.8h v0, v0, v0 + fsub.8h v0, v0, v0 + +; CHECK: fabd.8h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x6e] +; CHECK: facge.8h v0, v0, v0 ; encoding: [0x00,0x2c,0x40,0x6e] +; CHECK: facgt.8h v0, v0, v0 ; encoding: [0x00,0x2c,0xc0,0x6e] +; CHECK: faddp.8h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x6e] +; CHECK: fadd.8h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x4e] +; CHECK: fcmeq.8h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x4e] +; CHECK: fcmge.8h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x6e] +; CHECK: fcmgt.8h v0, v0, v0 ; encoding: [0x00,0x24,0xc0,0x6e] +; CHECK: fdiv.8h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x6e] +; CHECK: fmaxnmp.8h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x6e] +; CHECK: fmaxnm.8h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x4e] +; CHECK: fmaxp.8h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x6e] +; CHECK: fmax.8h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x4e] +; CHECK: fminnmp.8h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x6e] +; CHECK: fminnm.8h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x4e] +; CHECK: fminp.8h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x6e] +; CHECK: fmin.8h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x4e] +; CHECK: fmla.8h v0, v0, v0 ; encoding: [0x00,0x0c,0x40,0x4e] +; CHECK: fmls.8h v0, v0, v0 ; encoding: [0x00,0x0c,0xc0,0x4e] +; CHECK: fmulx.8h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x4e] +; CHECK: fmul.8h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x6e] +; CHECK: frecps.8h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x4e] +; CHECK: frsqrts.8h v0, v0, v0 ; encoding: [0x00,0x3c,0xc0,0x4e] +; CHECK: fsub.8h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x4e] + bif.8b v0, v0, v0 bit.8b v0, v0, v0 bsl.8b v0, v0, v0 @@ -568,6 +668,57 @@ foo: ; CHECK: shll2.4s v1, v2, #16 ; encoding: [0x41,0x38,0x61,0x6e] ; CHECK: shll2.2d v1, v2, #32 ; encoding: [0x41,0x38,0xa1,0x6e] + fabs.4h v0, v0 + fneg.4h v0, v0 + frecpe.4h v0, v0 + frinta.4h v0, v0 + frintx.4h v0, v0 + frinti.4h v0, v0 + frintm.4h v0, v0 + frintn.4h v0, v0 + frintp.4h v0, v0 + frintz.4h v0, v0 + frsqrte.4h v0, v0 + fsqrt.4h v0, v0 + +; CHECK: fabs.4h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x0e] +; CHECK: fneg.4h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x2e] +; CHECK: frecpe.4h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x0e] +; CHECK: frinta.4h v0, v0 ; encoding: [0x00,0x88,0x79,0x2e] +; CHECK: frintx.4h v0, v0 ; encoding: [0x00,0x98,0x79,0x2e] +; CHECK: frinti.4h v0, v0 ; encoding: [0x00,0x98,0xf9,0x2e] +; CHECK: frintm.4h v0, v0 ; encoding: [0x00,0x98,0x79,0x0e] +; CHECK: frintn.4h v0, v0 ; encoding: [0x00,0x88,0x79,0x0e] +; CHECK: frintp.4h v0, v0 ; encoding: [0x00,0x88,0xf9,0x0e] +; CHECK: frintz.4h v0, v0 ; encoding: [0x00,0x98,0xf9,0x0e] +; CHECK: frsqrte.4h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x2e] +; CHECK: fsqrt.4h v0, v0 ; encoding: [0x00,0xf8,0xf9,0x2e] + + fabs.8h v0, v0 + fneg.8h v0, v0 + frecpe.8h v0, v0 + frinta.8h v0, v0 + frintx.8h v0, v0 + frinti.8h v0, v0 + frintm.8h v0, v0 + frintn.8h v0, v0 + frintp.8h v0, v0 + frintz.8h v0, v0 + frsqrte.8h v0, v0 + fsqrt.8h v0, v0 + +; CHECK: fabs.8h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x4e] +; CHECK: fneg.8h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x6e] +; CHECK: frecpe.8h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x4e] +; CHECK: frinta.8h v0, v0 ; encoding: [0x00,0x88,0x79,0x6e] +; CHECK: frintx.8h v0, v0 ; encoding: [0x00,0x98,0x79,0x6e] +; CHECK: frinti.8h v0, v0 ; encoding: [0x00,0x98,0xf9,0x6e] +; CHECK: frintm.8h v0, v0 ; encoding: [0x00,0x98,0x79,0x4e] +; CHECK: frintn.8h v0, v0 ; encoding: [0x00,0x88,0x79,0x4e] +; CHECK: frintp.8h v0, v0 ; encoding: [0x00,0x88,0xf9,0x4e] +; CHECK: frintz.8h v0, v0 ; encoding: [0x00,0x98,0xf9,0x4e] +; CHECK: frsqrte.8h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x6e] +; CHECK: fsqrt.8h v0, v0 ; encoding: [0x00,0xf8,0xf9,0x6e] cmeq.8b v0, v0, #0 cmeq.16b v0, v0, #0 diff --git a/test/MC/AArch64/armv8.1a-rdma.s b/test/MC/AArch64/armv8.1a-rdma.s index 1de2a0fb15dd..36158428d6c4 100644 --- a/test/MC/AArch64/armv8.1a-rdma.s +++ b/test/MC/AArch64/armv8.1a-rdma.s @@ -26,27 +26,9 @@ sqrdmlsh v0.8s, v1.8s, v2.8s sqrdmlah v0.2s, v1.4h, v2.8h sqrdmlsh v0.4s, v1.8h, v2.2s -// CHECK-ERROR: error: invalid vector kind qualifier -// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h -// CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid vector kind qualifier -// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h -// CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid vector kind qualifier -// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h -// CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid vector kind qualifier -// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h -// CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid vector kind qualifier -// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h -// CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid vector kind qualifier -// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h -// CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h // CHECK-ERROR: ^ diff --git a/test/MC/AArch64/fullfp16-diagnostics.s b/test/MC/AArch64/fullfp16-diagnostics.s new file mode 100644 index 000000000000..190b6e25a4b1 --- /dev/null +++ b/test/MC/AArch64/fullfp16-diagnostics.s @@ -0,0 +1,42 @@ +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t +// RUN: FileCheck < %t %s + + fmla v0.4h, v1.4h, v16.h[3] + fmla v2.8h, v3.8h, v17.h[6] + +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: fmla v0.4h, v1.4h, v16.h[3] +// CHECK-NEXT: ^ +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: fmla v2.8h, v3.8h, v17.h[6] +// CHECK-NEXT: ^ + + fmls v0.4h, v1.4h, v16.h[3] + fmls v2.8h, v3.8h, v17.h[6] + +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: fmls v0.4h, v1.4h, v16.h[3] +// CHECK-NEXT: ^ +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: fmls v2.8h, v3.8h, v17.h[6] +// CHECK-NEXT: ^ + + fmul v0.4h, v1.4h, v16.h[3] + fmul v2.8h, v3.8h, v17.h[6] + +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: fmul v0.4h, v1.4h, v16.h[3] +// CHECK-NEXT: ^ +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: fmul v2.8h, v3.8h, v17.h[6] +// CHECK-NEXT: ^ + + fmulx v0.4h, v1.4h, v16.h[3] + fmulx v2.8h, v3.8h, v17.h[6] + +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: fmulx v0.4h, v1.4h, v16.h[3] +// CHECK-NEXT: ^ +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: fmulx v2.8h, v3.8h, v17.h[6] +// CHECK-NEXT: ^ diff --git a/test/MC/AArch64/fullfp16-neon-neg.s b/test/MC/AArch64/fullfp16-neon-neg.s new file mode 100644 index 000000000000..0913ecb7e9ab --- /dev/null +++ b/test/MC/AArch64/fullfp16-neon-neg.s @@ -0,0 +1,382 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+neon,-fullfp16 -show-encoding < %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -mattr=-neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s + + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fabs.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fneg.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecpe.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frinta.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintx.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frinti.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintm.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintn.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintp.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintz.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frsqrte.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fsqrt.4h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fabs.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fneg.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecpe.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frinta.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintx.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frinti.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintm.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintn.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintp.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintz.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frsqrte.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fsqrt.8h v0, v0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmla v0.4h, v1.4h, v2.h[2] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmla v3.8h, v8.8h, v2.h[1] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmls v0.4h, v1.4h, v2.h[2] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmls v3.8h, v8.8h, v2.h[1] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmul v0.4h, v1.4h, v2.h[2] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmul v0.8h, v1.8h, v2.h[2] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmulx v0.4h, v1.4h, v2.h[2] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmulx v0.8h, v1.8h, v2.h[2] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fabd v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmaxnmv h0, v1.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fminnmv h0, v1.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmaxv h0, v1.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fminv h0, v1.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + faddp v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + faddp v0.8h, v1.8h, v2.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fadd v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fadd v0.8h, v1.8h, v2.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fsub v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fsub v0.8h, v1.8h, v2.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq v0.4h, v31.4h, v16.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq v4.8h, v7.8h, v15.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge v3.4h, v8.4h, v12.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge v31.8h, v29.8h, v28.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmle v3.4h, v12.4h, v8.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmle v31.8h, v28.8h, v29.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt v0.4h, v31.4h, v16.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt v4.8h, v7.8h, v15.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmlt v0.4h, v16.4h, v31.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmlt v4.8h, v15.8h, v7.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq v0.4h, v31.4h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq v4.8h, v7.8h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq v0.4h, v31.4h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq v4.8h, v7.8h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge v3.4h, v8.4h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge v31.8h, v29.8h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge v3.4h, v8.4h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge v31.8h, v29.8h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt v0.4h, v31.4h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt v4.8h, v7.8h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt v0.4h, v31.4h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt v4.8h, v7.8h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmle v3.4h, v20.4h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmle v1.8h, v8.8h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmle v3.4h, v20.4h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmle v1.8h, v8.8h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmlt v16.4h, v2.4h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmlt v15.8h, v4.8h, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmlt v16.4h, v2.4h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmlt v15.8h, v4.8h, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + facge v0.4h, v31.4h, v16.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + facge v4.8h, v7.8h, v15.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + facle v0.4h, v16.4h, v31.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + facle v4.8h, v15.8h, v7.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + facgt v3.4h, v8.4h, v12.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + facgt v31.8h, v29.8h, v28.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + faclt v3.4h, v12.4h, v8.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + faclt v31.8h, v28.8h, v29.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frsqrts v0.4h, v31.4h, v16.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frsqrts v4.8h, v7.8h, v15.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecps v3.4h, v8.4h, v12.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecps v31.8h, v29.8h, v28.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmaxp v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmaxp v31.8h, v15.8h, v16.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fminp v10.4h, v15.4h, v22.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fminp v3.8h, v5.8h, v6.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmaxnmp v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmaxnmp v31.8h, v15.8h, v16.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fminnmp v10.4h, v15.4h, v22.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fminnmp v3.8h, v5.8h, v6.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmax v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmax v0.8h, v1.8h, v2.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmin v10.4h, v15.4h, v22.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmin v10.8h, v15.8h, v22.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmaxnm v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmaxnm v0.8h, v1.8h, v2.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fminnm v10.4h, v15.4h, v22.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fminnm v10.8h, v15.8h, v22.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmla v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmla v0.8h, v1.8h, v2.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmls v0.4h, v1.4h, v2.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmls v0.8h, v1.8h, v2.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fabd h29, h24, h20 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmla h0, h1, v1.h[5] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmls h2, h3, v4.h[5] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmul h0, h1, v1.h[5] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmulx h6, h2, v8.h[5] +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtzs h21, h12, #1 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtzu h21, h12, #1 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtas h12, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtau h12, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtms h22, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtmu h12, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtns h22, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtnu h12, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtps h22, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtpu h12, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtzs h12, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtzu h12, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq h10, h11, h12 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq h10, h11, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmeq h10, h11, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge h10, h11, h12 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge h10, h11, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmge h10, h11, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt h10, h11, h12 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt h10, h11, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmgt h10, h11, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmle h10, h11, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmle h10, h11, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmlt h10, h11, #0.0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcmlt h10, h11, #0 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + facge h10, h11, h12 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + facgt h10, h11, h12 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fmulx h20, h22, h15 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecps h21, h16, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frsqrts h21, h5, h12 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecpe h19, h14 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecpx h18, h10 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frsqrte h22, h13 +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + faddp h18, v3.2h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fabs v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fabs v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fneg v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fneg v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintn v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintn v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frinta v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frinta v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintp v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintp v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintm v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintm v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintx v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintx v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintz v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frintz v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frinti v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frinti v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtns v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtns v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtnu v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtnu v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtps v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtps v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtpu v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtpu v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtms v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtms v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtmu v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtmu v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtzs v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtzs v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtzu v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtzu v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtas v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtas v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtau v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fcvtau v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecpe v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frecpe v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frsqrte v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + frsqrte v6.8h, v8.8h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fsqrt v4.4h, v0.4h +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: + fsqrt v6.8h, v8.8h + +// CHECK-NOT: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires: diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s index 04841d0164f2..ed55ad0b1363 100644 --- a/test/MC/AArch64/neon-2velem.s +++ b/test/MC/AArch64/neon-2velem.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -46,6 +46,8 @@ // CHECK: mls v0.8h, v1.8h, v2.h[7] // encoding: [0x20,0x48,0x72,0x6f] // CHECK: mls v0.8h, v1.8h, v14.h[6] // encoding: [0x20,0x48,0x6e,0x6f] + fmla v0.4h, v1.4h, v2.h[2] + fmla v3.8h, v8.8h, v2.h[1] fmla v0.2s, v1.2s, v2.s[2] fmla v0.2s, v1.2s, v22.s[2] fmla v3.4s, v8.4s, v2.s[1] @@ -53,6 +55,8 @@ fmla v0.2d, v1.2d, v2.d[1] fmla v0.2d, v1.2d, v22.d[1] +// CHECK: fmla v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x10,0x22,0x0f] +// CHECK: fmla v3.8h, v8.8h, v2.h[1] // encoding: [0x03,0x11,0x12,0x4f] // CHECK: fmla v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x18,0x82,0x0f] // CHECK: fmla v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x18,0x96,0x0f] // CHECK: fmla v3.4s, v8.4s, v2.s[1] // encoding: [0x03,0x11,0xa2,0x4f] @@ -60,6 +64,8 @@ // CHECK: fmla v0.2d, v1.2d, v2.d[1] // encoding: [0x20,0x18,0xc2,0x4f] // CHECK: fmla v0.2d, v1.2d, v22.d[1] // encoding: [0x20,0x18,0xd6,0x4f] + fmls v0.4h, v1.4h, v2.h[2] + fmls v3.8h, v8.8h, v2.h[1] fmls v0.2s, v1.2s, v2.s[2] fmls v0.2s, v1.2s, v22.s[2] fmls v3.4s, v8.4s, v2.s[1] @@ -67,6 +73,8 @@ fmls v0.2d, v1.2d, v2.d[1] fmls v0.2d, v1.2d, v22.d[1] +// CHECK: fmls v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x50,0x22,0x0f] +// CHECK: fmls v3.8h, v8.8h, v2.h[1] // encoding: [0x03,0x51,0x12,0x4f] // CHECK: fmls v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x58,0x82,0x0f] // CHECK: fmls v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x58,0x96,0x0f] // CHECK: fmls v3.4s, v8.4s, v2.s[1] // encoding: [0x03,0x51,0xa2,0x4f] @@ -172,6 +180,8 @@ // CHECK: mul v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x88,0x82,0x4f] // CHECK: mul v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0x88,0x96,0x4f] + fmul v0.4h, v1.4h, v2.h[2] + fmul v0.8h, v1.8h, v2.h[2] fmul v0.2s, v1.2s, v2.s[2] fmul v0.2s, v1.2s, v22.s[2] fmul v0.4s, v1.4s, v2.s[2] @@ -179,6 +189,8 @@ fmul v0.2d, v1.2d, v2.d[1] fmul v0.2d, v1.2d, v22.d[1] +// CHECK: fmul v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x90,0x22,0x0f] +// CHECK: fmul v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0x90,0x22,0x4f] // CHECK: fmul v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x98,0x82,0x0f] // CHECK: fmul v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x98,0x96,0x0f] // CHECK: fmul v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x98,0x82,0x4f] @@ -186,6 +198,8 @@ // CHECK: fmul v0.2d, v1.2d, v2.d[1] // encoding: [0x20,0x98,0xc2,0x4f] // CHECK: fmul v0.2d, v1.2d, v22.d[1] // encoding: [0x20,0x98,0xd6,0x4f] + fmulx v0.4h, v1.4h, v2.h[2] + fmulx v0.8h, v1.8h, v2.h[2] fmulx v0.2s, v1.2s, v2.s[2] fmulx v0.2s, v1.2s, v22.s[2] fmulx v0.4s, v1.4s, v2.s[2] @@ -193,6 +207,8 @@ fmulx v0.2d, v1.2d, v2.d[1] fmulx v0.2d, v1.2d, v22.d[1] +// CHECK: fmulx v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x90,0x22,0x2f] +// CHECK: fmulx v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0x90,0x22,0x6f] // CHECK: fmulx v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x98,0x82,0x2f] // CHECK: fmulx v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x98,0x96,0x2f] // CHECK: fmulx v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x98,0x82,0x6f] diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s index 178eb26f64c2..b3a90bb14895 100644 --- a/test/MC/AArch64/neon-aba-abd.s +++ b/test/MC/AArch64/neon-aba-abd.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -68,10 +68,12 @@ //---------------------------------------------------------------------- // Vector Absolute Difference (Floating Point) //---------------------------------------------------------------------- + fabd v0.4h, v1.4h, v2.4h fabd v0.2s, v1.2s, v2.2s fabd v31.4s, v15.4s, v16.4s fabd v7.2d, v8.2d, v25.2d +// CHECK: fabd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0xc2,0x2e] // CHECK: fabd v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0xa2,0x2e] // CHECK: fabd v31.4s, v15.4s, v16.4s // encoding: [0xff,0xd5,0xb0,0x6e] // CHECK: fabd v7.2d, v8.2d, v25.2d // encoding: [0x07,0xd5,0xf9,0x6e] diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s index 60b766d8c881..74edc519a475 100644 --- a/test/MC/AArch64/neon-across.s +++ b/test/MC/AArch64/neon-across.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -90,11 +90,27 @@ // CHECK: addv h0, v1.8h // encoding: [0x20,0xb8,0x71,0x4e] // CHECK: addv s0, v1.4s // encoding: [0x20,0xb8,0xb1,0x4e] + fmaxnmv h0, v1.4h + fminnmv h0, v1.4h + fmaxv h0, v1.4h + fminv h0, v1.4h + fmaxnmv h0, v1.8h + fminnmv h0, v1.8h + fmaxv h0, v1.8h + fminv h0, v1.8h fmaxnmv s0, v1.4s fminnmv s0, v1.4s fmaxv s0, v1.4s fminv s0, v1.4s +// CHECK: fmaxnmv h0, v1.4h // encoding: [0x20,0xc8,0x30,0x0e] +// CHECK: fminnmv h0, v1.4h // encoding: [0x20,0xc8,0xb0,0x0e] +// CHECK: fmaxv h0, v1.4h // encoding: [0x20,0xf8,0x30,0x0e] +// CHECK: fminv h0, v1.4h // encoding: [0x20,0xf8,0xb0,0x0e] +// CHECK: fmaxnmv h0, v1.8h // encoding: [0x20,0xc8,0x30,0x4e] +// CHECK: fminnmv h0, v1.8h // encoding: [0x20,0xc8,0xb0,0x4e] +// CHECK: fmaxv h0, v1.8h // encoding: [0x20,0xf8,0x30,0x4e] +// CHECK: fminv h0, v1.8h // encoding: [0x20,0xf8,0xb0,0x4e] // CHECK: fmaxnmv s0, v1.4s // encoding: [0x20,0xc8,0x30,0x6e] // CHECK: fminnmv s0, v1.4s // encoding: [0x20,0xc8,0xb0,0x6e] // CHECK: fmaxv s0, v1.4s // encoding: [0x20,0xf8,0x30,0x6e] diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s index df9938b07e52..3d77c6e2790f 100644 --- a/test/MC/AArch64/neon-add-pairwise.s +++ b/test/MC/AArch64/neon-add-pairwise.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -25,10 +25,14 @@ //------------------------------------------------------------------------------ // Vector Add Pairwise (Floating Point //------------------------------------------------------------------------------ + faddp v0.4h, v1.4h, v2.4h + faddp v0.8h, v1.8h, v2.8h faddp v0.2s, v1.2s, v2.2s faddp v0.4s, v1.4s, v2.4s faddp v0.2d, v1.2d, v2.2d +// CHECK: faddp v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0x42,0x2e] +// CHECK: faddp v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0x42,0x6e] // CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e] // CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e] // CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e] diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s index 68f169b3dd90..0d8416537022 100644 --- a/test/MC/AArch64/neon-add-sub-instructions.s +++ b/test/MC/AArch64/neon-add-sub-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -44,10 +44,14 @@ //------------------------------------------------------------------------------ // Vector Floating-Point Add //------------------------------------------------------------------------------ + fadd v0.4h, v1.4h, v2.4h + fadd v0.8h, v1.8h, v2.8h fadd v0.2s, v1.2s, v2.2s fadd v0.4s, v1.4s, v2.4s fadd v0.2d, v1.2d, v2.2d +// CHECK: fadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0x42,0x0e] +// CHECK: fadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0x42,0x4e] // CHECK: fadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x0e] // CHECK: fadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x4e] // CHECK: fadd v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x4e] @@ -56,10 +60,14 @@ //------------------------------------------------------------------------------ // Vector Floating-Point Sub //------------------------------------------------------------------------------ + fsub v0.4h, v1.4h, v2.4h + fsub v0.8h, v1.8h, v2.8h fsub v0.2s, v1.2s, v2.2s fsub v0.4s, v1.4s, v2.4s fsub v0.2d, v1.2d, v2.2d +// CHECK: fsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0xc2,0x0e] +// CHECK; fsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0xc2,0x4e] // CHECK: fsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0xa2,0x0e] // CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e] // CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e] diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s index 19cfaf1f4d36..ffa88e50e0ce 100644 --- a/test/MC/AArch64/neon-compare-instructions.s +++ b/test/MC/AArch64/neon-compare-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -194,10 +194,14 @@ // Vector Compare Mask Equal (Floating Point) //---------------------------------------------------------------------- + fcmeq v0.4h, v31.4h, v16.4h + fcmeq v4.8h, v7.8h, v15.8h fcmeq v0.2s, v31.2s, v16.2s fcmeq v4.4s, v7.4s, v15.4s fcmeq v29.2d, v2.2d, v5.2d +// CHECK: fcmeq v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x27,0x50,0x0e] +// CHECK: fcmeq v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x24,0x4f,0x4e] // CHECK: fcmeq v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0x30,0x0e] // CHECK: fcmeq v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0x2f,0x4e] // CHECK: fcmeq v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0x65,0x4e] @@ -208,6 +212,10 @@ // FCMLE is alias for FCMGE with operands reversed. //---------------------------------------------------------------------- + fcmge v3.4h, v8.4h, v12.4h + fcmge v31.8h, v29.8h, v28.8h + fcmle v3.4h, v12.4h, v8.4h + fcmle v31.8h, v28.8h, v29.8h fcmge v31.4s, v29.4s, v28.4s fcmge v3.2s, v8.2s, v12.2s fcmge v17.2d, v15.2d, v13.2d @@ -215,6 +223,10 @@ fcmle v3.2s, v12.2s, v8.2s fcmle v17.2d, v13.2d, v15.2d +// CHECK: fcmge v3.4h, v8.4h, v12.4h // encoding: [0x03,0x25,0x4c,0x2e] +// CHECK: fcmge v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x27,0x5c,0x6e] +// CHECK: fcmge v3.4h, v8.4h, v12.4h // encoding: [0x03,0x25,0x4c,0x2e] +// CHECK: fcmge v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x27,0x5c,0x6e] // CHECK: fcmge v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xe7,0x3c,0x6e] // CHECK: fcmge v3.2s, v8.2s, v12.2s // encoding: [0x03,0xe5,0x2c,0x2e] // CHECK: fcmge v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xe5,0x6d,0x6e] @@ -228,6 +240,10 @@ // FCMLT is alias for FCMGT with operands reversed. //---------------------------------------------------------------------- + fcmgt v0.4h, v31.4h, v16.4h + fcmgt v4.8h, v7.8h, v15.8h + fcmlt v0.4h, v16.4h, v31.4h + fcmlt v4.8h, v15.8h, v7.8h fcmgt v0.2s, v31.2s, v16.2s fcmgt v4.4s, v7.4s, v15.4s fcmgt v29.2d, v2.2d, v5.2d @@ -235,6 +251,10 @@ fcmlt v4.4s, v15.4s, v7.4s fcmlt v29.2d, v5.2d, v2.2d +// CHECK: fcmgt v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x27,0xd0,0x2e] +// CHECK: fcmgt v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x24,0xcf,0x6e] +// CHECK: fcmgt v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x27,0xd0,0x2e] +// CHECK: fcmgt v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x24,0xcf,0x6e] // CHECK: fcmgt v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0xb0,0x2e] // CHECK: fcmgt v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0xaf,0x6e] // CHECK: fcmgt v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0xe5,0x6e] @@ -343,16 +363,24 @@ //---------------------------------------------------------------------- // Vector Compare Mask Equal to Zero (Floating Point) //---------------------------------------------------------------------- + fcmeq v0.4h, v31.4h, #0.0 + fcmeq v4.8h, v7.8h, #0.0 fcmeq v0.2s, v31.2s, #0.0 fcmeq v4.4s, v7.4s, #0.0 fcmeq v29.2d, v2.2d, #0.0 + fcmeq v0.4h, v31.4h, #0 + fcmeq v4.8h, v7.8h, #0 fcmeq v0.2s, v31.2s, #0 fcmeq v4.4s, v7.4s, #0 fcmeq v29.2d, v2.2d, #0 +// CHECK: fcmeq v0.4h, v31.4h, #0.0 // encoding: [0xe0,0xdb,0xf8,0x0e] +// CHECK: fcmeq v4.8h, v7.8h, #0.0 // encoding: [0xe4,0xd8,0xf8,0x4e] // CHECK: fcmeq v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xdb,0xa0,0x0e] // CHECK: fcmeq v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xd8,0xa0,0x4e] // CHECK: fcmeq v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xd8,0xe0,0x4e] +// CHECK: fcmeq v0.4h, v31.4h, #0.0 // encoding: [0xe0,0xdb,0xf8,0x0e] +// CHECK: fcmeq v4.8h, v7.8h, #0.0 // encoding: [0xe4,0xd8,0xf8,0x4e] // CHECK: fcmeq v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xdb,0xa0,0x0e] // CHECK: fcmeq v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xd8,0xa0,0x4e] // CHECK: fcmeq v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xd8,0xe0,0x4e] @@ -360,16 +388,24 @@ //---------------------------------------------------------------------- // Vector Compare Mask Greater Than or Equal to Zero (Floating Point) //---------------------------------------------------------------------- + fcmge v3.4h, v8.4h, #0.0 + fcmge v31.8h, v29.8h, #0.0 fcmge v31.4s, v29.4s, #0.0 fcmge v3.2s, v8.2s, #0.0 fcmge v17.2d, v15.2d, #0.0 + fcmge v3.4h, v8.4h, #0 + fcmge v31.8h, v29.8h, #0 fcmge v31.4s, v29.4s, #0 fcmge v3.2s, v8.2s, #0 fcmge v17.2d, v15.2d, #0 +// CHECK: fcmge v3.4h, v8.4h, #0.0 // encoding: [0x03,0xc9,0xf8,0x2e] +// CHECK: fcmge v31.8h, v29.8h, #0.0 // encoding: [0xbf,0xcb,0xf8,0x6e] // CHECK: fcmge v31.4s, v29.4s, #0.0 // encoding: [0xbf,0xcb,0xa0,0x6e] // CHECK: fcmge v3.2s, v8.2s, #0.0 // encoding: [0x03,0xc9,0xa0,0x2e] // CHECK: fcmge v17.2d, v15.2d, #0.0 // encoding: [0xf1,0xc9,0xe0,0x6e] +// CHECK: fcmge v3.4h, v8.4h, #0.0 // encoding: [0x03,0xc9,0xf8,0x2e] +// CHECK: fcmge v31.8h, v29.8h, #0.0 // encoding: [0xbf,0xcb,0xf8,0x6e] // CHECK: fcmge v31.4s, v29.4s, #0.0 // encoding: [0xbf,0xcb,0xa0,0x6e] // CHECK: fcmge v3.2s, v8.2s, #0.0 // encoding: [0x03,0xc9,0xa0,0x2e] // CHECK: fcmge v17.2d, v15.2d, #0.0 // encoding: [0xf1,0xc9,0xe0,0x6e] @@ -377,16 +413,24 @@ //---------------------------------------------------------------------- // Vector Compare Mask Greater Than Zero (Floating Point) //---------------------------------------------------------------------- + fcmgt v0.4h, v31.4h, #0.0 + fcmgt v4.8h, v7.8h, #0.0 fcmgt v0.2s, v31.2s, #0.0 fcmgt v4.4s, v7.4s, #0.0 fcmgt v29.2d, v2.2d, #0.0 + fcmgt v0.4h, v31.4h, #0 + fcmgt v4.8h, v7.8h, #0 fcmgt v0.2s, v31.2s, #0 fcmgt v4.4s, v7.4s, #0 fcmgt v29.2d, v2.2d, #0 +// CHECK: fcmgt v0.4h, v31.4h, #0.0 // encoding: [0xe0,0xcb,0xf8,0x0e] +// CHECK: fcmgt v4.8h, v7.8h, #0.0 // encoding: [0xe4,0xc8,0xf8,0x4e] // CHECK: fcmgt v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xcb,0xa0,0x0e] // CHECK: fcmgt v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xc8,0xa0,0x4e] // CHECK: fcmgt v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xc8,0xe0,0x4e] +// CHECK: fcmgt v0.4h, v31.4h, #0.0 // encoding: [0xe0,0xcb,0xf8,0x0e] +// CHECK: fcmgt v4.8h, v7.8h, #0.0 // encoding: [0xe4,0xc8,0xf8,0x4e] // CHECK: fcmgt v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xcb,0xa0,0x0e] // CHECK: fcmgt v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xc8,0xa0,0x4e] // CHECK: fcmgt v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xc8,0xe0,0x4e] @@ -394,16 +438,24 @@ //---------------------------------------------------------------------- // Vector Compare Mask Less Than or Equal To Zero (Floating Point) //---------------------------------------------------------------------- + fcmle v3.4h, v20.4h, #0.0 + fcmle v1.8h, v8.8h, #0.0 fcmle v1.4s, v8.4s, #0.0 fcmle v3.2s, v20.2s, #0.0 fcmle v7.2d, v13.2d, #0.0 + fcmle v3.4h, v20.4h, #0 + fcmle v1.8h, v8.8h, #0 fcmle v1.4s, v8.4s, #0 fcmle v3.2s, v20.2s, #0 fcmle v7.2d, v13.2d, #0 +// CHECK: fcmle v3.4h, v20.4h, #0.0 // encoding: [0x83,0xda,0xf8,0x2e] +// CHECK: fcmle v1.8h, v8.8h, #0.0 // encoding: [0x01,0xd9,0xf8,0x6e] // CHECK: fcmle v1.4s, v8.4s, #0.0 // encoding: [0x01,0xd9,0xa0,0x6e] // CHECK: fcmle v3.2s, v20.2s, #0.0 // encoding: [0x83,0xda,0xa0,0x2e] // CHECK: fcmle v7.2d, v13.2d, #0.0 // encoding: [0xa7,0xd9,0xe0,0x6e] +// CHECK: fcmle v3.4h, v20.4h, #0.0 // encoding: [0x83,0xda,0xf8,0x2e] +// CHECK: fcmle v1.8h, v8.8h, #0.0 // encoding: [0x01,0xd9,0xf8,0x6e] // CHECK: fcmle v1.4s, v8.4s, #0.0 // encoding: [0x01,0xd9,0xa0,0x6e] // CHECK: fcmle v3.2s, v20.2s, #0.0 // encoding: [0x83,0xda,0xa0,0x2e] // CHECK: fcmle v7.2d, v13.2d, #0.0 // encoding: [0xa7,0xd9,0xe0,0x6e] @@ -411,16 +463,24 @@ //---------------------------------------------------------------------- // Vector Compare Mask Less Than Zero (Floating Point) //---------------------------------------------------------------------- + fcmlt v16.4h, v2.4h, #0.0 + fcmlt v15.8h, v4.8h, #0.0 fcmlt v16.2s, v2.2s, #0.0 fcmlt v15.4s, v4.4s, #0.0 fcmlt v5.2d, v29.2d, #0.0 + fcmlt v16.4h, v2.4h, #0 + fcmlt v15.8h, v4.8h, #0 fcmlt v16.2s, v2.2s, #0 fcmlt v15.4s, v4.4s, #0 fcmlt v5.2d, v29.2d, #0 +// CHECK: fcmlt v16.4h, v2.4h, #0.0 // encoding: [0x50,0xe8,0xf8,0x0e] +// CHECK: fcmlt v15.8h, v4.8h, #0.0 // encoding: [0x8f,0xe8,0xf8,0x4e] // CHECK: fcmlt v16.2s, v2.2s, #0.0 // encoding: [0x50,0xe8,0xa0,0x0e] // CHECK: fcmlt v15.4s, v4.4s, #0.0 // encoding: [0x8f,0xe8,0xa0,0x4e] // CHECK: fcmlt v5.2d, v29.2d, #0.0 // encoding: [0xa5,0xeb,0xe0,0x4e] +// CHECK: fcmlt v16.4h, v2.4h, #0.0 // encoding: [0x50,0xe8,0xf8,0x0e] +// CHECK: fcmlt v15.8h, v4.8h, #0.0 // encoding: [0x8f,0xe8,0xf8,0x4e] // CHECK: fcmlt v16.2s, v2.2s, #0.0 // encoding: [0x50,0xe8,0xa0,0x0e] // CHECK: fcmlt v15.4s, v4.4s, #0.0 // encoding: [0x8f,0xe8,0xa0,0x4e] // CHECK: fcmlt v5.2d, v29.2d, #0.0 // encoding: [0xa5,0xeb,0xe0,0x4e] diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 973acb8249ad..6ded6e40bfb9 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -341,7 +341,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fabd v0.2s, v1.4s, v2.2d // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fabd v0.4h, v1.4h, v2.4h // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -385,7 +385,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frecps v0.4s, v1.2d, v2.4s // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frecps v0.8h, v1.8h, v2.8h // CHECK-ERROR: ^ @@ -400,7 +400,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frsqrts v0.2d, v1.2d, v2.2s // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frsqrts v0.4h, v1.4h, v2.4h // CHECK-ERROR: ^ @@ -417,7 +417,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: facge v0.2d, v1.2s, v2.2d // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: facge v0.4h, v1.4h, v2.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -435,7 +435,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: facgt v0.2d, v1.2d, v2.4s // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: facgt v0.8h, v1.8h, v2.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -1092,7 +1092,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmin v0.4s, v1.4s, v2.2d // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmin v0.8h, v1.8h, v2.8h // CHECK-ERROR: ^ @@ -1177,7 +1177,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fminp v0.4s, v1.4s, v2.2d // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fminp v0.8h, v1.8h, v2.8h // CHECK-ERROR: ^ @@ -1283,7 +1283,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmulx v21.2s, v5.2s, v13.2d // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmulx v1.4h, v25.4h, v3.4h // CHECK-ERROR: ^ @@ -3023,10 +3023,10 @@ fmla v0.2d, v1.2d, v2.d[2] fmla v0.2d, v1.2d, v22.d[2] -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmla v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmla v0.8h, v1.8h, v2.h[2] // CHECK-ERROR: ^ // CHECK-ERROR: vector lane must be an integer in range @@ -3057,10 +3057,10 @@ fmls v0.2d, v1.2d, v2.d[2] fmls v0.2d, v1.2d, v22.d[2] -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmls v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmls v0.8h, v1.8h, v2.h[2] // CHECK-ERROR: ^ // CHECK-ERROR: vector lane must be an integer in range @@ -3428,7 +3428,7 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mul v0.2d, v1.2d, v2.d[1] // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmul v0.4h, v1.4h, v2.h[4] // CHECK-ERROR: ^ // CHECK-ERROR: vector lane must be an integer in range @@ -3458,7 +3458,7 @@ fmulx v0.2d, v1.2d, v2.d[2] fmulx v0.2d, v1.2d, v22.d[2] -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmulx v0.4h, v1.4h, v2.h[4] // CHECK-ERROR: ^ // CHECK-ERROR: vector lane must be an integer in range @@ -3837,16 +3837,16 @@ fmaxv h0, v1.8h fminv h0, v1.8h -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmaxnmv h0, v1.8h // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fminnmv h0, v1.8h // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fmaxv h0, v1.8h // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fminv h0, v1.8h // CHECK-ERROR: ^ @@ -5594,13 +5594,13 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fabs v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fabs v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fabs v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fabs v13.4h, v21.4h // CHECK-ERROR: ^ @@ -5616,13 +5616,13 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fneg v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fneg v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fneg v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fneg v13.4h, v21.4h // CHECK-ERROR: ^ @@ -5978,205 +5978,205 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintn v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintn v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintn v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintn v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frinta v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frinta v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frinta v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frinta v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintp v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintp v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintp v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintp v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintm v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintm v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintm v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintm v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintx v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintx v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintx v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintx v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintz v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintz v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frintz v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frintz v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frinti v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frinti v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frinti v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frinti v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtns v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtns v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtns v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtns v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtnu v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtnu v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtnu v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtnu v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtps v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtps v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtps v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtps v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtpu v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtpu v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtpu v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtpu v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtms v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtms v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtms v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtms v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtmu v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtmu v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtmu v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtmu v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtzs v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtzs v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtzs v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtzs v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtzu v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtzu v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtzu v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtzu v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtas v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtas v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtas v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtas v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtau v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtau v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcvtau v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fcvtau v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -6212,61 +6212,61 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: scvtf v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: scvtf v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: scvtf v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: scvtf v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ucvtf v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: ucvtf v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ucvtf v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: ucvtf v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frecpe v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frecpe v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frecpe v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frecpe v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frsqrte v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frsqrte v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: frsqrte v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: frsqrte v13.4h, v21.4h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fsqrt v0.16b, v31.16b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fsqrt v2.8h, v4.8h // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fsqrt v1.8b, v9.8b // CHECK-ERROR: ^ -// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: instruction requires: fullfp16 // CHECK-ERROR: fsqrt v13.4h, v21.4h // CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s index 212eda2f2092..9c10caa0f7c2 100644 --- a/test/MC/AArch64/neon-facge-facgt.s +++ b/test/MC/AArch64/neon-facge-facgt.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -6,16 +6,24 @@ // Vector Absolute Compare Mask Less Than Or Equal (Floating Point) // FACLE is alias for FACGE with operands reversed //---------------------------------------------------------------------- + facge v0.4h, v31.4h, v16.4h + facge v4.8h, v7.8h, v15.8h facge v0.2s, v31.2s, v16.2s facge v4.4s, v7.4s, v15.4s facge v29.2d, v2.2d, v5.2d + facle v0.4h, v16.4h, v31.4h + facle v4.8h, v15.8h, v7.8h facle v0.2s, v16.2s, v31.2s facle v4.4s, v15.4s, v7.4s facle v29.2d, v5.2d, v2.2d +// CHECK: facge v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x2f,0x50,0x2e] +// CHECK: facge v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x2c,0x4f,0x6e] // CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e] // CHECK: facge v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xec,0x2f,0x6e] // CHECK: facge v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xec,0x65,0x6e] +// CHECK: facge v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x2f,0x50,0x2e] +// CHECK: facge v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x2c,0x4f,0x6e] // CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e] // CHECK: facge v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xec,0x2f,0x6e] // CHECK: facge v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xec,0x65,0x6e] @@ -24,16 +32,24 @@ // Vector Absolute Compare Mask Less Than (Floating Point) // FACLT is alias for FACGT with operands reversed //---------------------------------------------------------------------- + facgt v3.4h, v8.4h, v12.4h + facgt v31.8h, v29.8h, v28.8h facgt v31.4s, v29.4s, v28.4s facgt v3.2s, v8.2s, v12.2s facgt v17.2d, v15.2d, v13.2d + faclt v3.4h, v12.4h, v8.4h + faclt v31.8h, v28.8h, v29.8h faclt v31.4s, v28.4s, v29.4s faclt v3.2s, v12.2s, v8.2s faclt v17.2d, v13.2d, v15.2d +// CHECK: facgt v3.4h, v8.4h, v12.4h // encoding: [0x03,0x2d,0xcc,0x2e] +// CHECK: facgt v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x2f,0xdc,0x6e] // CHECK: facgt v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xef,0xbc,0x6e] // CHECK: facgt v3.2s, v8.2s, v12.2s // encoding: [0x03,0xed,0xac,0x2e] // CHECK: facgt v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xed,0xed,0x6e] +// CHECK: facgt v3.4h, v8.4h, v12.4h // encoding: [0x03,0x2d,0xcc,0x2e] +// CHECK: facgt v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x2f,0xdc,0x6e] // CHECK: facgt v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xef,0xbc,0x6e] // CHECK: facgt v3.2s, v8.2s, v12.2s // encoding: [0x03,0xed,0xac,0x2e] // CHECK: facgt v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xed,0xed,0x6e] diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s index 79fe5da5e76f..67a1340ecc32 100644 --- a/test/MC/AArch64/neon-frsqrt-frecp.s +++ b/test/MC/AArch64/neon-frsqrt-frecp.s @@ -1,14 +1,18 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 //---------------------------------------------------------------------- // Vector Reciprocal Square Root Step (Floating Point) //---------------------------------------------------------------------- + frsqrts v0.4h, v31.4h, v16.4h + frsqrts v4.8h, v7.8h, v15.8h frsqrts v0.2s, v31.2s, v16.2s frsqrts v4.4s, v7.4s, v15.4s frsqrts v29.2d, v2.2d, v5.2d +// CHECK: frsqrts v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x3f,0xd0,0x0e] +// CHECK: frsqrts v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x3c,0xcf,0x4e] // CHECK: frsqrts v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xff,0xb0,0x0e] // CHECK: frsqrts v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xfc,0xaf,0x4e] // CHECK: frsqrts v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xfc,0xe5,0x4e] @@ -16,10 +20,14 @@ //---------------------------------------------------------------------- // Vector Reciprocal Step (Floating Point) //---------------------------------------------------------------------- + frecps v3.4h, v8.4h, v12.4h + frecps v31.8h, v29.8h, v28.8h frecps v31.4s, v29.4s, v28.4s frecps v3.2s, v8.2s, v12.2s frecps v17.2d, v15.2d, v13.2d +// CHECK: frecps v3.4h, v8.4h, v12.4h // encoding: [0x03,0x3d,0x4c,0x0e] +// CHECK: frecps v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x3f,0x5c,0x4e] // CHECK: frecps v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xff,0x3c,0x4e] // CHECK: frecps v3.2s, v8.2s, v12.2s // encoding: [0x03,0xfd,0x2c,0x0e] // CHECK: frecps v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xfd,0x6d,0x4e] diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s index 8d2dadb1997f..27cf4c8d830a 100644 --- a/test/MC/AArch64/neon-max-min-pairwise.s +++ b/test/MC/AArch64/neon-max-min-pairwise.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -67,10 +67,14 @@ //---------------------------------------------------------------------- // Vector Maximum Pairwise (Floating Point) //---------------------------------------------------------------------- + fmaxp v0.4h, v1.4h, v2.4h + fmaxp v31.8h, v15.8h, v16.8h fmaxp v0.2s, v1.2s, v2.2s fmaxp v31.4s, v15.4s, v16.4s fmaxp v7.2d, v8.2d, v25.2d +// CHECK: fmaxp v0.4h, v1.4h, v2.4h // encoding: [0x20,0x34,0x42,0x2e] +// CHECK: fmaxp v31.8h, v15.8h, v16.8h // encoding: [0xff,0x35,0x50,0x6e] // CHECK: fmaxp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xf4,0x22,0x2e] // CHECK: fmaxp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x6e] // CHECK: fmaxp v7.2d, v8.2d, v25.2d // encoding: [0x07,0xf5,0x79,0x6e] @@ -78,10 +82,14 @@ //---------------------------------------------------------------------- // Vector Minimum Pairwise (Floating Point) //---------------------------------------------------------------------- + fminp v10.4h, v15.4h, v22.4h + fminp v3.8h, v5.8h, v6.8h fminp v10.2s, v15.2s, v22.2s fminp v3.4s, v5.4s, v6.4s fminp v17.2d, v13.2d, v2.2d +// CHECK: fminp v10.4h, v15.4h, v22.4h // encoding: [0xea,0x35,0xd6,0x2e] +// CHECK: fminp v3.8h, v5.8h, v6.8h // encoding: [0xa3,0x34,0xc6,0x6e] // CHECK: fminp v10.2s, v15.2s, v22.2s // encoding: [0xea,0xf5,0xb6,0x2e] // CHECK: fminp v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xf4,0xa6,0x6e] // CHECK: fminp v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xf5,0xe2,0x6e] @@ -89,10 +97,14 @@ //---------------------------------------------------------------------- // Vector maxNum Pairwise (Floating Point) //---------------------------------------------------------------------- + fmaxnmp v0.4h, v1.4h, v2.4h + fmaxnmp v31.8h, v15.8h, v16.8h fmaxnmp v0.2s, v1.2s, v2.2s fmaxnmp v31.4s, v15.4s, v16.4s fmaxnmp v7.2d, v8.2d, v25.2d +// CHECK: fmaxnmp v0.4h, v1.4h, v2.4h // encoding: [0x20,0x04,0x42,0x2e] +// CHECK: fmaxnmp v31.8h, v15.8h, v16.8h // encoding: [0xff,0x05,0x50,0x6e] // CHECK: fmaxnmp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xc4,0x22,0x2e] // CHECK: fmaxnmp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x6e] // CHECK: fmaxnmp v7.2d, v8.2d, v25.2d // encoding: [0x07,0xc5,0x79,0x6e] @@ -100,10 +112,14 @@ //---------------------------------------------------------------------- // Vector minNum Pairwise (Floating Point) //---------------------------------------------------------------------- + fminnmp v10.4h, v15.4h, v22.4h + fminnmp v3.8h, v5.8h, v6.8h fminnmp v10.2s, v15.2s, v22.2s fminnmp v3.4s, v5.4s, v6.4s fminnmp v17.2d, v13.2d, v2.2d +// CHECK: fminnmp v10.4h, v15.4h, v22.4h // encoding: [0xea,0x05,0xd6,0x2e] +// CHECK: fminnmp v3.8h, v5.8h, v6.8h // encoding: [0xa3,0x04,0xc6,0x6e] // CHECK: fminnmp v10.2s, v15.2s, v22.2s // encoding: [0xea,0xc5,0xb6,0x2e] // CHECK: fminnmp v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xc4,0xa6,0x6e] // CHECK: fminnmp v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xc5,0xe2,0x6e] diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s index 6d1efde5077f..c4bd74d98882 100644 --- a/test/MC/AArch64/neon-max-min.s +++ b/test/MC/AArch64/neon-max-min.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -67,10 +67,14 @@ //---------------------------------------------------------------------- // Vector Maximum (Floating Point) //---------------------------------------------------------------------- + fmax v0.4h, v1.4h, v2.4h + fmax v0.8h, v1.8h, v2.8h fmax v0.2s, v1.2s, v2.2s fmax v31.4s, v15.4s, v16.4s fmax v7.2d, v8.2d, v25.2d +// CHECK: fmax v0.4h, v1.4h, v2.4h // encoding: [0x20,0x34,0x42,0x0e] +// CHECK: fmax v0.8h, v1.8h, v2.8h // encoding: [0x20,0x34,0x42,0x4e] // CHECK: fmax v0.2s, v1.2s, v2.2s // encoding: [0x20,0xf4,0x22,0x0e] // CHECK: fmax v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x4e] // CHECK: fmax v7.2d, v8.2d, v25.2d // encoding: [0x07,0xf5,0x79,0x4e] @@ -78,10 +82,14 @@ //---------------------------------------------------------------------- // Vector Minimum (Floating Point) //---------------------------------------------------------------------- + fmin v10.4h, v15.4h, v22.4h + fmin v10.8h, v15.8h, v22.8h fmin v10.2s, v15.2s, v22.2s fmin v3.4s, v5.4s, v6.4s fmin v17.2d, v13.2d, v2.2d +// CHECK: fmin v10.4h, v15.4h, v22.4h // encoding: [0xea,0x35,0xd6,0x0e] +// CHECK: fmin v10.8h, v15.8h, v22.8h // encoding: [0xea,0x35,0xd6,0x4e] // CHECK: fmin v10.2s, v15.2s, v22.2s // encoding: [0xea,0xf5,0xb6,0x0e] // CHECK: fmin v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xf4,0xa6,0x4e] // CHECK: fmin v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xf5,0xe2,0x4e] @@ -89,10 +97,14 @@ //---------------------------------------------------------------------- // Vector maxNum (Floating Point) //---------------------------------------------------------------------- + fmaxnm v0.4h, v1.4h, v2.4h + fmaxnm v0.8h, v1.8h, v2.8h fmaxnm v0.2s, v1.2s, v2.2s fmaxnm v31.4s, v15.4s, v16.4s fmaxnm v7.2d, v8.2d, v25.2d +// CHECK: fmaxnm v0.4h, v1.4h, v2.4h // encoding: [0x20,0x04,0x42,0x0e] +// CHECK: fmaxnm v0.8h, v1.8h, v2.8h // encoding: [0x20,0x04,0x42,0x4e] // CHECK: fmaxnm v0.2s, v1.2s, v2.2s // encoding: [0x20,0xc4,0x22,0x0e] // CHECK: fmaxnm v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x4e] // CHECK: fmaxnm v7.2d, v8.2d, v25.2d // encoding: [0x07,0xc5,0x79,0x4e] @@ -100,10 +112,14 @@ //---------------------------------------------------------------------- // Vector minNum (Floating Point) //---------------------------------------------------------------------- + fminnm v10.4h, v15.4h, v22.4h + fminnm v10.8h, v15.8h, v22.8h fminnm v10.2s, v15.2s, v22.2s fminnm v3.4s, v5.4s, v6.4s fminnm v17.2d, v13.2d, v2.2d +// CHECK: fminnm v10.4h, v15.4h, v22.4h // encoding: [0xea,0x05,0xd6,0x0e] +// CHECK: fminnm v10.8h, v15.8h, v22.8h // encoding: [0xea,0x05,0xd6,0x4e] // CHECK: fminnm v10.2s, v15.2s, v22.2s // encoding: [0xea,0xc5,0xb6,0x0e] // CHECK: fminnm v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xc4,0xa6,0x4e] // CHECK: fminnm v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xc5,0xe2,0x4e] diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s index 3072e6f1200d..a510fc8c7b91 100644 --- a/test/MC/AArch64/neon-mla-mls-instructions.s +++ b/test/MC/AArch64/neon-mla-mls-instructions.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -40,10 +40,14 @@ //---------------------------------------------------------------------- // Vector Floating-Point Multiply-accumulate //---------------------------------------------------------------------- + fmla v0.4h, v1.4h, v2.4h + fmla v0.8h, v1.8h, v2.8h fmla v0.2s, v1.2s, v2.2s fmla v0.4s, v1.4s, v2.4s fmla v0.2d, v1.2d, v2.2d +// CHECK: fmla v0.4h, v1.4h, v2.4h // encoding: [0x20,0x0c,0x42,0x0e] +// CHECK: fmla v0.8h, v1.8h, v2.8h // encoding: [0x20,0x0c,0x42,0x4e] // CHECK: fmla v0.2s, v1.2s, v2.2s // encoding: [0x20,0xcc,0x22,0x0e] // CHECK: fmla v0.4s, v1.4s, v2.4s // encoding: [0x20,0xcc,0x22,0x4e] // CHECK: fmla v0.2d, v1.2d, v2.2d // encoding: [0x20,0xcc,0x62,0x4e] @@ -51,10 +55,14 @@ //---------------------------------------------------------------------- // Vector Floating-Point Multiply-subtract //---------------------------------------------------------------------- + fmls v0.4h, v1.4h, v2.4h + fmls v0.8h, v1.8h, v2.8h fmls v0.2s, v1.2s, v2.2s fmls v0.4s, v1.4s, v2.4s fmls v0.2d, v1.2d, v2.2d +// CHECK: fmls v0.4h, v1.4h, v2.4h // encoding: [0x20,0x0c,0xc2,0x0e] +// CHECK: fmls v0.8h, v1.8h, v2.8h // encoding: [0x20,0x0c,0xc2,0x4e] // CHECK: fmls v0.2s, v1.2s, v2.2s // encoding: [0x20,0xcc,0xa2,0x0e] // CHECK: fmls v0.4s, v1.4s, v2.4s // encoding: [0x20,0xcc,0xa2,0x4e] // CHECK: fmls v0.2d, v1.2d, v2.2d // encoding: [0x20,0xcc,0xe2,0x4e] diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s index d08756c0c10c..71130617848f 100644 --- a/test/MC/AArch64/neon-scalar-abs.s +++ b/test/MC/AArch64/neon-scalar-abs.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -14,9 +14,11 @@ // Scalar Floating-point Absolute Difference //---------------------------------------------------------------------- + fabd h29, h24, h20 fabd s29, s24, s20 fabd d29, d24, d20 +// CHECK: fabd h29, h24, h20 // encoding: [0x1d,0x17,0xd4,0x7e] // CHECK: fabd s29, s24, s20 // encoding: [0x1d,0xd7,0xb4,0x7e] // CHECK: fabd d29, d24, d20 // encoding: [0x1d,0xd7,0xf4,0x7e] diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s index fec9d12d8b8d..394fda673e20 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-mla.s +++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s @@ -1,8 +1,9 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Floating Point fused multiply-add (scalar, by element) //------------------------------------------------------------------------------ + fmla h0, h1, v1.h[5] fmla s0, s1, v1.s[0] fmla s30, s11, v1.s[1] fmla s4, s5, v7.s[2] @@ -10,6 +11,7 @@ fmla d0, d1, v1.d[0] fmla d30, d11, v1.d[1] +// CHECK: fmla h0, h1, v1.h[5] // encoding: [0x20,0x18,0x11,0x5f] // CHECK: fmla s0, s1, v1.s[0] // encoding: [0x20,0x10,0x81,0x5f] // CHECK: fmla s30, s11, v1.s[1] // encoding: [0x7e,0x11,0xa1,0x5f] // CHECK: fmla s4, s5, v7.s[2] // encoding: [0xa4,0x18,0x87,0x5f] @@ -21,6 +23,7 @@ // Floating Point fused multiply-subtract (scalar, by element) //------------------------------------------------------------------------------ + fmls h2, h3, v4.h[5] fmls s2, s3, v4.s[0] fmls s29, s10, v28.s[1] fmls s5, s12, v23.s[2] @@ -28,6 +31,7 @@ fmls d0, d1, v1.d[0] fmls d30, d11, v1.d[1] +// CHECK: fmls h2, h3, v4.h[5] // encoding: [0x62,0x58,0x14,0x5f] // CHECK: fmls s2, s3, v4.s[0] // encoding: [0x62,0x50,0x84,0x5f] // CHECK: fmls s29, s10, v28.s[1] // encoding: [0x5d,0x51,0xbc,0x5f] // CHECK: fmls s5, s12, v23.s[2] // encoding: [0x85,0x59,0x97,0x5f] diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s index 8b8a3f57a9ca..0d832742a389 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-mul.s +++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s @@ -1,8 +1,9 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ // Floating Point multiply (scalar, by element) //------------------------------------------------------------------------------ + fmul h0, h1, v1.h[5] fmul s0, s1, v1.s[0] fmul s30, s11, v1.s[1] fmul s4, s5, v7.s[2] @@ -10,6 +11,7 @@ fmul d0, d1, v1.d[0] fmul d30, d11, v1.d[1] +// CHECK: fmul h0, h1, v1.h[5] // encoding: [0x20,0x98,0x11,0x5f] // CHECK: fmul s0, s1, v1.s[0] // encoding: [0x20,0x90,0x81,0x5f] // CHECK: fmul s30, s11, v1.s[1] // encoding: [0x7e,0x91,0xa1,0x5f] // CHECK: fmul s4, s5, v7.s[2] // encoding: [0xa4,0x98,0x87,0x5f] @@ -21,6 +23,7 @@ //------------------------------------------------------------------------------ // Floating Point multiply extended (scalar, by element) //------------------------------------------------------------------------------ + fmulx h6, h2, v8.h[5] fmulx s6, s2, v8.s[0] fmulx s7, s3, v13.s[1] fmulx s9, s7, v9.s[2] @@ -28,6 +31,7 @@ fmulx d15, d9, v7.d[0] fmulx d13, d12, v11.d[1] +// CHECK: fmulx h6, h2, v8.h[5] // encoding: [0x46,0x98,0x18,0x7f] // CHECK: fmulx s6, s2, v8.s[0] // encoding: [0x46,0x90,0x88,0x7f] // CHECK: fmulx s7, s3, v13.s[1] // encoding: [0x67,0x90,0xad,0x7f] // CHECK: fmulx s9, s7, v9.s[2] // encoding: [0xe9,0x98,0x89,0x7f] diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s index 97416daf0801..3cbf6bae6758 100644 --- a/test/MC/AArch64/neon-scalar-cvt.s +++ b/test/MC/AArch64/neon-scalar-cvt.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -6,9 +6,11 @@ // Scalar Signed Integer Convert To Floating-point //---------------------------------------------------------------------- + scvtf h23, h14 scvtf s22, s13 scvtf d21, d12 +// CHECK: scvtf h23, h14 // encoding: [0xd7,0xd9,0x79,0x5e] // CHECK: scvtf s22, s13 // encoding: [0xb6,0xd9,0x21,0x5e] // CHECK: scvtf d21, d12 // encoding: [0x95,0xd9,0x61,0x5e] @@ -16,9 +18,11 @@ // Scalar Unsigned Integer Convert To Floating-point //---------------------------------------------------------------------- + ucvtf h20, h12 ucvtf s22, s13 ucvtf d21, d14 +// CHECK: ucvtf h20, h12 // encoding: [0x94,0xd9,0x79,0x7e] // CHECK: ucvtf s22, s13 // encoding: [0xb6,0xd9,0x21,0x7e] // CHECK: ucvtf d21, d14 // encoding: [0xd5,0xd9,0x61,0x7e] @@ -26,9 +30,11 @@ // Scalar Signed Fixed-point Convert To Floating-Point (Immediate) //---------------------------------------------------------------------- + scvtf h22, h13, #16 scvtf s22, s13, #32 scvtf d21, d12, #64 +// CHECK: scvtf h22, h13, #16 // encoding: [0xb6,0xe5,0x10,0x5f] // CHECK: scvtf s22, s13, #32 // encoding: [0xb6,0xe5,0x20,0x5f] // CHECK: scvtf d21, d12, #64 // encoding: [0x95,0xe5,0x40,0x5f] @@ -36,9 +42,11 @@ // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) //---------------------------------------------------------------------- + ucvtf h22, h13, #16 ucvtf s22, s13, #32 ucvtf d21, d14, #64 +// CHECK: ucvtf h22, h13, #16 // encoding: [0xb6,0xe5,0x10,0x7f] // CHECK: ucvtf s22, s13, #32 // encoding: [0xb6,0xe5,0x20,0x7f] // CHECK: ucvtf d21, d14, #64 // encoding: [0xd5,0xe5,0x40,0x7f] @@ -46,9 +54,11 @@ // Scalar Floating-point Convert To Signed Fixed-point (Immediate) //---------------------------------------------------------------------- + fcvtzs h21, h12, #1 fcvtzs s21, s12, #1 fcvtzs d21, d12, #1 +// CHECK: fcvtzs h21, h12, #1 // encoding: [0x95,0xfd,0x1f,0x5f] // CHECK: fcvtzs s21, s12, #1 // encoding: [0x95,0xfd,0x3f,0x5f] // CHECK: fcvtzs d21, d12, #1 // encoding: [0x95,0xfd,0x7f,0x5f] @@ -56,9 +66,11 @@ // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) //---------------------------------------------------------------------- + fcvtzu h21, h12, #1 fcvtzu s21, s12, #1 fcvtzu d21, d12, #1 +// CHECK: fcvtzu h21, h12, #1 // encoding: [0x95,0xfd,0x1f,0x7f] // CHECK: fcvtzu s21, s12, #1 // encoding: [0x95,0xfd,0x3f,0x7f] // CHECK: fcvtzu d21, d12, #1 // encoding: [0x95,0xfd,0x7f,0x7f] @@ -76,9 +88,11 @@ // With Ties To Away //---------------------------------------------------------------------- + fcvtas h12, h13 fcvtas s12, s13 fcvtas d21, d14 +// CHECK: fcvtas h12, h13 // encoding: [0xac,0xc9,0x79,0x5e] // CHECK: fcvtas s12, s13 // encoding: [0xac,0xc9,0x21,0x5e] // CHECK: fcvtas d21, d14 // encoding: [0xd5,0xc9,0x61,0x5e] @@ -87,9 +101,11 @@ // Nearest With Ties To Away //---------------------------------------------------------------------- + fcvtau h12, h13 fcvtau s12, s13 fcvtau d21, d14 +// CHECK: fcvtau h12, h13 // encoding: [0xac,0xc9,0x79,0x7e] // CHECK: fcvtau s12, s13 // encoding: [0xac,0xc9,0x21,0x7e] // CHECK: fcvtau d21, d14 // encoding: [0xd5,0xc9,0x61,0x7e] @@ -98,9 +114,11 @@ // Minus Infinity //---------------------------------------------------------------------- + fcvtms h22, h13 fcvtms s22, s13 fcvtms d21, d14 +// CHECK: fcvtms h22, h13 // encoding: [0xb6,0xb9,0x79,0x5e] // CHECK: fcvtms s22, s13 // encoding: [0xb6,0xb9,0x21,0x5e] // CHECK: fcvtms d21, d14 // encoding: [0xd5,0xb9,0x61,0x5e] @@ -109,9 +127,11 @@ // Minus Infinity //---------------------------------------------------------------------- + fcvtmu h12, h13 fcvtmu s12, s13 fcvtmu d21, d14 +// CHECK: fcvtmu h12, h13 // encoding: [0xac,0xb9,0x79,0x7e] // CHECK: fcvtmu s12, s13 // encoding: [0xac,0xb9,0x21,0x7e] // CHECK: fcvtmu d21, d14 // encoding: [0xd5,0xb9,0x61,0x7e] @@ -120,9 +140,11 @@ // With Ties To Even //---------------------------------------------------------------------- + fcvtns h22, h13 fcvtns s22, s13 fcvtns d21, d14 +// CHECK: fcvtns h22, h13 // encoding: [0xb6,0xa9,0x79,0x5e] // CHECK: fcvtns s22, s13 // encoding: [0xb6,0xa9,0x21,0x5e] // CHECK: fcvtns d21, d14 // encoding: [0xd5,0xa9,0x61,0x5e] @@ -131,9 +153,11 @@ // Nearest With Ties To Even //---------------------------------------------------------------------- + fcvtnu h12, h13 fcvtnu s12, s13 fcvtnu d21, d14 +// CHECK: fcvtnu h12, h13 // encoding: [0xac,0xa9,0x79,0x7e] // CHECK: fcvtnu s12, s13 // encoding: [0xac,0xa9,0x21,0x7e] // CHECK: fcvtnu d21, d14 // encoding: [0xd5,0xa9,0x61,0x7e] @@ -142,9 +166,11 @@ // Positive Infinity //---------------------------------------------------------------------- + fcvtps h22, h13 fcvtps s22, s13 fcvtps d21, d14 +// CHECK: fcvtps h22, h13 // encoding: [0xb6,0xa9,0xf9,0x5e] // CHECK: fcvtps s22, s13 // encoding: [0xb6,0xa9,0xa1,0x5e] // CHECK: fcvtps d21, d14 // encoding: [0xd5,0xa9,0xe1,0x5e] @@ -153,9 +179,11 @@ // Positive Infinity //---------------------------------------------------------------------- + fcvtpu h12, h13 fcvtpu s12, s13 fcvtpu d21, d14 +// CHECK: fcvtpu h12, h13 // encoding: [0xac,0xa9,0xf9,0x7e] // CHECK: fcvtpu s12, s13 // encoding: [0xac,0xa9,0xa1,0x7e] // CHECK: fcvtpu d21, d14 // encoding: [0xd5,0xa9,0xe1,0x7e] @@ -163,9 +191,11 @@ // Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero //---------------------------------------------------------------------- + fcvtzs h12, h13 fcvtzs s12, s13 fcvtzs d21, d14 +// CHECK: fcvtzs h12, h13 // encoding: [0xac,0xb9,0xf9,0x5e] // CHECK: fcvtzs s12, s13 // encoding: [0xac,0xb9,0xa1,0x5e] // CHECK: fcvtzs d21, d14 // encoding: [0xd5,0xb9,0xe1,0x5e] @@ -174,8 +204,10 @@ // Zero //---------------------------------------------------------------------- + fcvtzu h12, h13 fcvtzu s12, s13 fcvtzu d21, d14 +// CHECK: fcvtzu h12, h13 // encoding: [0xac,0xb9,0xf9,0x7e] // CHECK: fcvtzu s12, s13 // encoding: [0xac,0xb9,0xa1,0x7e] // CHECK: fcvtzu d21, d14 // encoding: [0xd5,0xb9,0xe1,0x7e] diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s index b798b3410670..0b91d945a719 100644 --- a/test/MC/AArch64/neon-scalar-fp-compare.s +++ b/test/MC/AArch64/neon-scalar-fp-compare.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -6,9 +6,11 @@ // Scalar Floating-point Compare Mask Equal //---------------------------------------------------------------------- + fcmeq h10, h11, h12 fcmeq s10, s11, s12 fcmeq d20, d21, d22 +// CHECK: fcmeq h10, h11, h12 // encoding: [0x6a,0x25,0x4c,0x5e] // CHECK: fcmeq s10, s11, s12 // encoding: [0x6a,0xe5,0x2c,0x5e] // CHECK: fcmeq d20, d21, d22 // encoding: [0xb4,0xe6,0x76,0x5e] @@ -16,13 +18,17 @@ // Scalar Floating-point Compare Mask Equal To Zero //---------------------------------------------------------------------- + fcmeq h10, h11, #0.0 fcmeq s10, s11, #0.0 fcmeq d20, d21, #0.0 + fcmeq h10, h11, #0 fcmeq s10, s11, #0 fcmeq d20, d21, #0x0 +// CHECK: fcmeq h10, h11, #0.0 // encoding: [0x6a,0xd9,0xf8,0x5e] // CHECK: fcmeq s10, s11, #0.0 // encoding: [0x6a,0xd9,0xa0,0x5e] // CHECK: fcmeq d20, d21, #0.0 // encoding: [0xb4,0xda,0xe0,0x5e] +// CHECK: fcmeq h10, h11, #0.0 // encoding: [0x6a,0xd9,0xf8,0x5e] // CHECK: fcmeq s10, s11, #0.0 // encoding: [0x6a,0xd9,0xa0,0x5e] // CHECK: fcmeq d20, d21, #0.0 // encoding: [0xb4,0xda,0xe0,0x5e] @@ -30,9 +36,11 @@ // Scalar Floating-point Compare Mask Greater Than Or Equal //---------------------------------------------------------------------- + fcmge h10, h11, h12 fcmge s10, s11, s12 fcmge d20, d21, d22 +// CHECK: fcmge h10, h11, h12 // encoding: [0x6a,0x25,0x4c,0x7e] // CHECK: fcmge s10, s11, s12 // encoding: [0x6a,0xe5,0x2c,0x7e] // CHECK: fcmge d20, d21, d22 // encoding: [0xb4,0xe6,0x76,0x7e] @@ -40,13 +48,17 @@ // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero //---------------------------------------------------------------------- + fcmge h10, h11, #0.0 fcmge s10, s11, #0.0 fcmge d20, d21, #0.0 + fcmge h10, h11, #0 fcmge s10, s11, #0 fcmge d20, d21, #0x0 +// CHECK: fcmge h10, h11, #0.0 // encoding: [0x6a,0xc9,0xf8,0x7e] // CHECK: fcmge s10, s11, #0.0 // encoding: [0x6a,0xc9,0xa0,0x7e] // CHECK: fcmge d20, d21, #0.0 // encoding: [0xb4,0xca,0xe0,0x7e] +// CHECK: fcmge h10, h11, #0.0 // encoding: [0x6a,0xc9,0xf8,0x7e] // CHECK: fcmge s10, s11, #0.0 // encoding: [0x6a,0xc9,0xa0,0x7e] // CHECK: fcmge d20, d21, #0.0 // encoding: [0xb4,0xca,0xe0,0x7e] @@ -54,9 +66,11 @@ // Scalar Floating-point Compare Mask Greather Than //---------------------------------------------------------------------- + fcmgt h10, h11, h12 fcmgt s10, s11, s12 fcmgt d20, d21, d22 +// CHECK: fcmgt h10, h11, h12 // encoding: [0x6a,0x25,0xcc,0x7e] // CHECK: fcmgt s10, s11, s12 // encoding: [0x6a,0xe5,0xac,0x7e] // CHECK: fcmgt d20, d21, d22 // encoding: [0xb4,0xe6,0xf6,0x7e] @@ -64,13 +78,17 @@ // Scalar Floating-point Compare Mask Greather Than Zero //---------------------------------------------------------------------- + fcmgt h10, h11, #0.0 fcmgt s10, s11, #0.0 fcmgt d20, d21, #0.0 + fcmgt h10, h11, #0 fcmgt s10, s11, #0 fcmgt d20, d21, #0x0 +// CHECK: fcmgt h10, h11, #0.0 // encoding: [0x6a,0xc9,0xf8,0x5e] // CHECK: fcmgt s10, s11, #0.0 // encoding: [0x6a,0xc9,0xa0,0x5e] // CHECK: fcmgt d20, d21, #0.0 // encoding: [0xb4,0xca,0xe0,0x5e] +// CHECK: fcmgt h10, h11, #0.0 // encoding: [0x6a,0xc9,0xf8,0x5e] // CHECK: fcmgt s10, s11, #0.0 // encoding: [0x6a,0xc9,0xa0,0x5e] // CHECK: fcmgt d20, d21, #0.0 // encoding: [0xb4,0xca,0xe0,0x5e] @@ -78,13 +96,17 @@ // Scalar Floating-point Compare Mask Less Than Or Equal To Zero //---------------------------------------------------------------------- + fcmle h10, h11, #0.0 fcmle s10, s11, #0.0 fcmle d20, d21, #0.0 + fcmle h10, h11, #0 fcmle s10, s11, #0 fcmle d20, d21, #0x0 +// CHECK: fcmle h10, h11, #0.0 // encoding: [0x6a,0xd9,0xf8,0x7e] // CHECK: fcmle s10, s11, #0.0 // encoding: [0x6a,0xd9,0xa0,0x7e] // CHECK: fcmle d20, d21, #0.0 // encoding: [0xb4,0xda,0xe0,0x7e] +// CHECK: fcmle h10, h11, #0.0 // encoding: [0x6a,0xd9,0xf8,0x7e] // CHECK: fcmle s10, s11, #0.0 // encoding: [0x6a,0xd9,0xa0,0x7e] // CHECK: fcmle d20, d21, #0.0 // encoding: [0xb4,0xda,0xe0,0x7e] @@ -92,13 +114,17 @@ // Scalar Floating-point Compare Mask Less Than //---------------------------------------------------------------------- + fcmlt h10, h11, #0.0 fcmlt s10, s11, #0.0 fcmlt d20, d21, #0.0 + fcmlt h10, h11, #0 fcmlt s10, s11, #0 fcmlt d20, d21, #0x0 +// CHECK: fcmlt h10, h11, #0.0 // encoding: [0x6a,0xe9,0xf8,0x5e] // CHECK: fcmlt s10, s11, #0.0 // encoding: [0x6a,0xe9,0xa0,0x5e] // CHECK: fcmlt d20, d21, #0.0 // encoding: [0xb4,0xea,0xe0,0x5e] +// CHECK: fcmlt h10, h11, #0.0 // encoding: [0x6a,0xe9,0xf8,0x5e] // CHECK: fcmlt s10, s11, #0.0 // encoding: [0x6a,0xe9,0xa0,0x5e] // CHECK: fcmlt d20, d21, #0.0 // encoding: [0xb4,0xea,0xe0,0x5e] @@ -106,9 +132,11 @@ // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal //---------------------------------------------------------------------- + facge h10, h11, h12 facge s10, s11, s12 facge d20, d21, d22 +// CHECK: facge h10, h11, h12 // encoding: [0x6a,0x2d,0x4c,0x7e] // CHECK: facge s10, s11, s12 // encoding: [0x6a,0xed,0x2c,0x7e] // CHECK: facge d20, d21, d22 // encoding: [0xb4,0xee,0x76,0x7e] @@ -116,8 +144,10 @@ // Scalar Floating-point Absolute Compare Mask Greater Than //---------------------------------------------------------------------- + facgt h10, h11, h12 facgt s10, s11, s12 facgt d20, d21, d22 +// CHECK: facgt h10, h11, h12 // encoding: [0x6a,0x2d,0xcc,0x7e] // CHECK: facgt s10, s11, s12 // encoding: [0x6a,0xed,0xac,0x7e] // CHECK: facgt d20, d21, d22 // encoding: [0xb4,0xee,0xf6,0x7e] diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s index e33bdad91a94..323fad206c4d 100644 --- a/test/MC/AArch64/neon-scalar-mul.s +++ b/test/MC/AArch64/neon-scalar-mul.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -26,9 +26,11 @@ // Floating-point Multiply Extended //---------------------------------------------------------------------- + fmulx h20, h22, h15 fmulx s20, s22, s15 fmulx d23, d11, d1 +// CHECK: fmulx h20, h22, h15 // encoding: [0xd4,0x1e,0x4f,0x5e] // CHECK: fmulx s20, s22, s15 // encoding: [0xd4,0xde,0x2f,0x5e] // CHECK: fmulx d23, d11, d1 // encoding: [0x77,0xdd,0x61,0x5e] diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s index 7a886f3b4a73..923c3549d6f0 100644 --- a/test/MC/AArch64/neon-scalar-recip.s +++ b/test/MC/AArch64/neon-scalar-recip.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -6,9 +6,11 @@ // Floating-point Reciprocal Step //---------------------------------------------------------------------- + frecps h21, h16, h13 frecps s21, s16, s13 frecps d22, d30, d21 +// CHECK: frecps h21, h16, h13 // encoding: [0x15,0x3e,0x4d,0x5e] // CHECK: frecps s21, s16, s13 // encoding: [0x15,0xfe,0x2d,0x5e] // CHECK: frecps d22, d30, d21 // encoding: [0xd6,0xff,0x75,0x5e] @@ -16,9 +18,11 @@ // Floating-point Reciprocal Square Root Step //---------------------------------------------------------------------- + frsqrts h21, h5, h12 frsqrts s21, s5, s12 frsqrts d8, d22, d18 +// CHECK: frsqrts h21, h5, h12 // encoding: [0xb5,0x3c,0xcc,0x5e] // CHECK: frsqrts s21, s5, s12 // encoding: [0xb5,0xfc,0xac,0x5e] // CHECK: frsqrts d8, d22, d18 // encoding: [0xc8,0xfe,0xf2,0x5e] @@ -26,9 +30,11 @@ // Scalar Floating-point Reciprocal Estimate //---------------------------------------------------------------------- + frecpe h19, h14 frecpe s19, s14 frecpe d13, d13 +// CHECK: frecpe h19, h14 // encoding: [0xd3,0xd9,0xf9,0x5e] // CHECK: frecpe s19, s14 // encoding: [0xd3,0xd9,0xa1,0x5e] // CHECK: frecpe d13, d13 // encoding: [0xad,0xd9,0xe1,0x5e] @@ -36,9 +42,11 @@ // Scalar Floating-point Reciprocal Exponent //---------------------------------------------------------------------- + frecpx h18, h10 frecpx s18, s10 frecpx d16, d19 +// CHECK: frecpx h18, h10 // encoding: [0x52,0xf9,0xf9,0x5e] // CHECK: frecpx s18, s10 // encoding: [0x52,0xf9,0xa1,0x5e] // CHECK: frecpx d16, d19 // encoding: [0x70,0xfa,0xe1,0x5e] @@ -46,8 +54,10 @@ // Scalar Floating-point Reciprocal Square Root Estimate //---------------------------------------------------------------------- + frsqrte h22, h13 frsqrte s22, s13 frsqrte d21, d12 +// CHECK: frsqrte h22, h13 // encoding: [0xb6,0xd9,0xf9,0x7e] // CHECK: frsqrte s22, s13 // encoding: [0xb6,0xd9,0xa1,0x7e] // CHECK: frsqrte d21, d12 // encoding: [0x95,0xd9,0xe1,0x7e] diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s index 403a940ec2f2..dae61d0f0f32 100644 --- a/test/MC/AArch64/neon-scalar-reduce-pairwise.s +++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s //---------------------------------------------------------------------- // Scalar Reduce Add Pairwise (Integer) @@ -10,7 +10,12 @@ //---------------------------------------------------------------------- // Scalar Reduce Add Pairwise (Floating Point) //---------------------------------------------------------------------- + faddp h18, v3.2h + faddp h18, v3.2H + faddp s19, v2.2s faddp d20, v1.2d +// CHECK: faddp h18, v3.2h // encoding: [0x72,0xd8,0x30,0x5e] +// CHECK: faddp s19, v2.2s // encoding: [0x53,0xd8,0x30,0x7e] // CHECK: faddp d20, v1.2d // encoding: [0x34,0xd8,0x70,0x7e] diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s index 6d1aafdd7725..32dd48629cd8 100644 --- a/test/MC/AArch64/neon-simd-misc.s +++ b/test/MC/AArch64/neon-simd-misc.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -298,10 +298,14 @@ // Floating-point absolute //------------------------------------------------------------------------------ + fabs v4.4h, v0.4h + fabs v6.8h, v8.8h fabs v6.4s, v8.4s fabs v6.2d, v8.2d fabs v4.2s, v0.2s +// CHECK: fabs v4.4h, v0.4h // encoding: [0x04,0xf8,0xf8,0x0e] +// CHECK: fabs v6.8h, v8.8h // encoding: [0x06,0xf9,0xf8,0x4e] // CHECK: fabs v6.4s, v8.4s // encoding: [0x06,0xf9,0xa0,0x4e] // CHECK: fabs v6.2d, v8.2d // encoding: [0x06,0xf9,0xe0,0x4e] // CHECK: fabs v4.2s, v0.2s // encoding: [0x04,0xf8,0xa0,0x0e] @@ -310,10 +314,14 @@ // Floating-point negate //------------------------------------------------------------------------------ + fneg v4.4h, v0.4h + fneg v6.8h, v8.8h fneg v6.4s, v8.4s fneg v6.2d, v8.2d fneg v4.2s, v0.2s +// CHECK: fneg v4.4h, v0.4h // encoding: [0x04,0xf8,0xf8,0x2e] +// CHECK: fneg v6.8h, v8.8h // encoding: [0x06,0xf9,0xf8,0x6e] // CHECK: fneg v6.4s, v8.4s // encoding: [0x06,0xf9,0xa0,0x6e] // CHECK: fneg v6.2d, v8.2d // encoding: [0x06,0xf9,0xe0,0x6e] // CHECK: fneg v4.2s, v0.2s // encoding: [0x04,0xf8,0xa0,0x2e] @@ -450,58 +458,86 @@ // Floating-point round to integral //------------------------------------------------------------------------------ + frintn v4.4h, v0.4h + frintn v6.8h, v8.8h frintn v6.4s, v8.4s frintn v6.2d, v8.2d frintn v4.2s, v0.2s +// CHECK: frintn v4.4h, v0.4h // encoding: [0x04,0x88,0x79,0x0e] +// CHECK: frintn v6.8h, v8.8h // encoding: [0x06,0x89,0x79,0x4e] // CHECK: frintn v6.4s, v8.4s // encoding: [0x06,0x89,0x21,0x4e] // CHECK: frintn v6.2d, v8.2d // encoding: [0x06,0x89,0x61,0x4e] // CHECK: frintn v4.2s, v0.2s // encoding: [0x04,0x88,0x21,0x0e] + frinta v4.4h, v0.4h + frinta v6.8h, v8.8h frinta v6.4s, v8.4s frinta v6.2d, v8.2d frinta v4.2s, v0.2s +// CHECK: frinta v4.4h, v0.4h // encoding: [0x04,0x88,0x79,0x2e] +// CHECK: frinta v6.8h, v8.8h // encoding: [0x06,0x89,0x79,0x6e] // CHECK: frinta v6.4s, v8.4s // encoding: [0x06,0x89,0x21,0x6e] // CHECK: frinta v6.2d, v8.2d // encoding: [0x06,0x89,0x61,0x6e] // CHECK: frinta v4.2s, v0.2s // encoding: [0x04,0x88,0x21,0x2e] + frintp v4.4h, v0.4h + frintp v6.8h, v8.8h frintp v6.4s, v8.4s frintp v6.2d, v8.2d frintp v4.2s, v0.2s +// CHECK: frintp v4.4h, v0.4h // encoding: [0x04,0x88,0xf9,0x0e] +// CHECK: frintp v6.8h, v8.8h // encoding: [0x06,0x89,0xf9,0x4e] // CHECK: frintp v6.4s, v8.4s // encoding: [0x06,0x89,0xa1,0x4e] // CHECK: frintp v6.2d, v8.2d // encoding: [0x06,0x89,0xe1,0x4e] // CHECK: frintp v4.2s, v0.2s // encoding: [0x04,0x88,0xa1,0x0e] + frintm v4.4h, v0.4h + frintm v6.8h, v8.8h frintm v6.4s, v8.4s frintm v6.2d, v8.2d frintm v4.2s, v0.2s +// CHECK: frintm v4.4h, v0.4h // encoding: [0x04,0x98,0x79,0x0e] +// CHECK: frintm v6.8h, v8.8h // encoding: [0x06,0x99,0x79,0x4e] // CHECK: frintm v6.4s, v8.4s // encoding: [0x06,0x99,0x21,0x4e] // CHECK: frintm v6.2d, v8.2d // encoding: [0x06,0x99,0x61,0x4e] // CHECK: frintm v4.2s, v0.2s // encoding: [0x04,0x98,0x21,0x0e] + frintx v4.4h, v0.4h + frintx v6.8h, v8.8h frintx v6.4s, v8.4s frintx v6.2d, v8.2d frintx v4.2s, v0.2s +// CHECK: frintx v4.4h, v0.4h // encoding: [0x04,0x98,0x79,0x2e] +// CHECK: frintx v6.8h, v8.8h // encoding: [0x06,0x99,0x79,0x6e] // CHECK: frintx v6.4s, v8.4s // encoding: [0x06,0x99,0x21,0x6e] // CHECK: frintx v6.2d, v8.2d // encoding: [0x06,0x99,0x61,0x6e] // CHECK: frintx v4.2s, v0.2s // encoding: [0x04,0x98,0x21,0x2e] + frintz v4.4h, v0.4h + frintz v6.8h, v8.8h frintz v6.4s, v8.4s frintz v6.2d, v8.2d frintz v4.2s, v0.2s +// CHECK: frintz v4.4h, v0.4h // encoding: [0x04,0x98,0xf9,0x0e] +// CHECK: frintz v6.8h, v8.8h // encoding: [0x06,0x99,0xf9,0x4e] // CHECK: frintz v6.4s, v8.4s // encoding: [0x06,0x99,0xa1,0x4e] // CHECK: frintz v6.2d, v8.2d // encoding: [0x06,0x99,0xe1,0x4e] // CHECK: frintz v4.2s, v0.2s // encoding: [0x04,0x98,0xa1,0x0e] + frinti v4.4h, v0.4h + frinti v6.8h, v8.8h frinti v6.4s, v8.4s frinti v6.2d, v8.2d frinti v4.2s, v0.2s +// CHECK: frinti v4.4h, v0.4h // encoding: [0x04,0x98,0xf9,0x2e] +// CHECK: frinti v6.8h, v8.8h // encoding: [0x06,0x99,0xf9,0x6e] // CHECK: frinti v6.4s, v8.4s // encoding: [0x06,0x99,0xa1,0x6e] // CHECK: frinti v6.2d, v8.2d // encoding: [0x06,0x99,0xe1,0x6e] // CHECK: frinti v4.2s, v0.2s // encoding: [0x04,0x98,0xa1,0x2e] @@ -510,83 +546,123 @@ // Floating-point convert to integer //------------------------------------------------------------------------------ + fcvtns v4.4h, v0.4h + fcvtns v6.8h, v8.8h fcvtns v6.4s, v8.4s fcvtns v6.2d, v8.2d fcvtns v4.2s, v0.2s +// CHECK: fcvtns v4.4h, v0.4h // encoding: [0x04,0xa8,0x79,0x0e] +// CHECK: fcvtns v6.8h, v8.8h // encoding: [0x06,0xa9,0x79,0x4e] // CHECK: fcvtns v6.4s, v8.4s // encoding: [0x06,0xa9,0x21,0x4e] // CHECK: fcvtns v6.2d, v8.2d // encoding: [0x06,0xa9,0x61,0x4e] // CHECK: fcvtns v4.2s, v0.2s // encoding: [0x04,0xa8,0x21,0x0e] + fcvtnu v4.4h, v0.4h + fcvtnu v6.8h, v8.8h fcvtnu v6.4s, v8.4s fcvtnu v6.2d, v8.2d fcvtnu v4.2s, v0.2s +// CHECK: fcvtnu v4.4h, v0.4h // encoding: [0x04,0xa8,0x79,0x2e] +// CHECK: fcvtnu v6.8h, v8.8h // encoding: [0x06,0xa9,0x79,0x6e] // CHECK: fcvtnu v6.4s, v8.4s // encoding: [0x06,0xa9,0x21,0x6e] // CHECK: fcvtnu v6.2d, v8.2d // encoding: [0x06,0xa9,0x61,0x6e] // CHECK: fcvtnu v4.2s, v0.2s // encoding: [0x04,0xa8,0x21,0x2e] + fcvtps v4.4h, v0.4h + fcvtps v6.8h, v8.8h fcvtps v6.4s, v8.4s fcvtps v6.2d, v8.2d fcvtps v4.2s, v0.2s +// CHECK: fcvtps v4.4h, v0.4h // encoding: [0x04,0xa8,0xf9,0x0e] +// CHECK: fcvtps v6.8h, v8.8h // encoding: [0x06,0xa9,0xf9,0x4e] // CHECK: fcvtps v6.4s, v8.4s // encoding: [0x06,0xa9,0xa1,0x4e] // CHECK: fcvtps v6.2d, v8.2d // encoding: [0x06,0xa9,0xe1,0x4e] // CHECK: fcvtps v4.2s, v0.2s // encoding: [0x04,0xa8,0xa1,0x0e] + fcvtpu v4.4h, v0.4h + fcvtpu v6.8h, v8.8h fcvtpu v6.4s, v8.4s fcvtpu v6.2d, v8.2d fcvtpu v4.2s, v0.2s +// CHECK: fcvtpu v4.4h, v0.4h // encoding: [0x04,0xa8,0xf9,0x2e] +// CHECK: fcvtpu v6.8h, v8.8h // encoding: [0x06,0xa9,0xf9,0x6e] // CHECK: fcvtpu v6.4s, v8.4s // encoding: [0x06,0xa9,0xa1,0x6e] // CHECK: fcvtpu v6.2d, v8.2d // encoding: [0x06,0xa9,0xe1,0x6e] // CHECK: fcvtpu v4.2s, v0.2s // encoding: [0x04,0xa8,0xa1,0x2e] + fcvtms v4.4h, v0.4h + fcvtms v6.8h, v8.8h fcvtms v6.4s, v8.4s fcvtms v6.2d, v8.2d fcvtms v4.2s, v0.2s +// CHECK: fcvtms v4.4h, v0.4h // encoding: [0x04,0xb8,0x79,0x0e] +// CHECK: fcvtms v6.8h, v8.8h // encoding: [0x06,0xb9,0x79,0x4e] // CHECK: fcvtms v6.4s, v8.4s // encoding: [0x06,0xb9,0x21,0x4e] // CHECK: fcvtms v6.2d, v8.2d // encoding: [0x06,0xb9,0x61,0x4e] // CHECK: fcvtms v4.2s, v0.2s // encoding: [0x04,0xb8,0x21,0x0e] + fcvtmu v4.4h, v0.4h + fcvtmu v6.8h, v8.8h fcvtmu v6.4s, v8.4s fcvtmu v6.2d, v8.2d fcvtmu v4.2s, v0.2s +// CHECK: fcvtmu v4.4h, v0.4h // encoding: [0x04,0xb8,0x79,0x2e] +// CHECK: fcvtmu v6.8h, v8.8h // encoding: [0x06,0xb9,0x79,0x6e] // CHECK: fcvtmu v6.4s, v8.4s // encoding: [0x06,0xb9,0x21,0x6e] // CHECK: fcvtmu v6.2d, v8.2d // encoding: [0x06,0xb9,0x61,0x6e] // CHECK: fcvtmu v4.2s, v0.2s // encoding: [0x04,0xb8,0x21,0x2e] + fcvtzs v4.4h, v0.4h + fcvtzs v6.8h, v8.8h fcvtzs v6.4s, v8.4s fcvtzs v6.2d, v8.2d fcvtzs v4.2s, v0.2s +// CHECK: fcvtzs v4.4h, v0.4h // encoding: [0x04,0xb8,0xf9,0x0e] +// CHECK: fcvtzs v6.8h, v8.8h // encoding: [0x06,0xb9,0xf9,0x4e] // CHECK: fcvtzs v6.4s, v8.4s // encoding: [0x06,0xb9,0xa1,0x4e] // CHECK: fcvtzs v6.2d, v8.2d // encoding: [0x06,0xb9,0xe1,0x4e] // CHECK: fcvtzs v4.2s, v0.2s // encoding: [0x04,0xb8,0xa1,0x0e] + fcvtzu v4.4h, v0.4h + fcvtzu v6.8h, v8.8h fcvtzu v6.4s, v8.4s fcvtzu v6.2d, v8.2d fcvtzu v4.2s, v0.2s +// CHECK: fcvtzu v4.4h, v0.4h // encoding: [0x04,0xb8,0xf9,0x2e] +// CHECK: fcvtzu v6.8h, v8.8h // encoding: [0x06,0xb9,0xf9,0x6e] // CHECK: fcvtzu v6.4s, v8.4s // encoding: [0x06,0xb9,0xa1,0x6e] // CHECK: fcvtzu v6.2d, v8.2d // encoding: [0x06,0xb9,0xe1,0x6e] // CHECK: fcvtzu v4.2s, v0.2s // encoding: [0x04,0xb8,0xa1,0x2e] + fcvtas v4.4h, v0.4h + fcvtas v6.8h, v8.8h fcvtas v6.4s, v8.4s fcvtas v6.2d, v8.2d fcvtas v4.2s, v0.2s +// CHECK: fcvtas v4.4h, v0.4h // encoding: [0x04,0xc8,0x79,0x0e] +// CHECK: fcvtas v6.8h, v8.8h // encoding: [0x06,0xc9,0x79,0x4e] // CHECK: fcvtas v6.4s, v8.4s // encoding: [0x06,0xc9,0x21,0x4e] // CHECK: fcvtas v6.2d, v8.2d // encoding: [0x06,0xc9,0x61,0x4e] // CHECK: fcvtas v4.2s, v0.2s // encoding: [0x04,0xc8,0x21,0x0e] + fcvtau v4.4h, v0.4h + fcvtau v6.8h, v8.8h fcvtau v6.4s, v8.4s fcvtau v6.2d, v8.2d fcvtau v4.2s, v0.2s +// CHECK: fcvtau v4.4h, v0.4h // encoding: [0x04,0xc8,0x79,0x2e] +// CHECK: fcvtau v6.8h, v8.8h // encoding: [0x06,0xc9,0x79,0x6e] // CHECK: fcvtau v6.4s, v8.4s // encoding: [0x06,0xc9,0x21,0x6e] // CHECK: fcvtau v6.2d, v8.2d // encoding: [0x06,0xc9,0x61,0x6e] // CHECK: fcvtau v4.2s, v0.2s // encoding: [0x04,0xc8,0x21,0x2e] @@ -603,42 +679,62 @@ // CHECK: ursqrte v6.4s, v8.4s // encoding: [0x06,0xc9,0xa1,0x6e] // CHECK: ursqrte v4.2s, v0.2s // encoding: [0x04,0xc8,0xa1,0x2e] + scvtf v4.4h, v0.4h + scvtf v6.8h, v8.8h scvtf v6.4s, v8.4s scvtf v6.2d, v8.2d scvtf v4.2s, v0.2s +// CHECK: scvtf v4.4h, v0.4h // encoding: [0x04,0xd8,0x79,0x0e] +// CHECK: scvtf v6.8h, v8.8h // encoding: [0x06,0xd9,0x79,0x4e] // CHECK: scvtf v6.4s, v8.4s // encoding: [0x06,0xd9,0x21,0x4e] // CHECK: scvtf v6.2d, v8.2d // encoding: [0x06,0xd9,0x61,0x4e] // CHECK: scvtf v4.2s, v0.2s // encoding: [0x04,0xd8,0x21,0x0e] + ucvtf v4.4h, v0.4h + ucvtf v6.8h, v8.8h ucvtf v6.4s, v8.4s ucvtf v6.2d, v8.2d ucvtf v4.2s, v0.2s +// CHECK: ucvtf v4.4h, v0.4h // encoding: [0x04,0xd8,0x79,0x2e] +// CHECK: ucvtf v6.8h, v8.8h // encoding: [0x06,0xd9,0x79,0x6e] // CHECK: ucvtf v6.4s, v8.4s // encoding: [0x06,0xd9,0x21,0x6e] // CHECK: ucvtf v6.2d, v8.2d // encoding: [0x06,0xd9,0x61,0x6e] // CHECK: ucvtf v4.2s, v0.2s // encoding: [0x04,0xd8,0x21,0x2e] + frecpe v4.4h, v0.4h + frecpe v6.8h, v8.8h frecpe v6.4s, v8.4s frecpe v6.2d, v8.2d frecpe v4.2s, v0.2s +// CHECK: frecpe v4.4h, v0.4h // encoding: [0x04,0xd8,0xf9,0x0e] +// CHECK: frecpe v6.8h, v8.8h // encoding: [0x06,0xd9,0xf9,0x4e] // CHECK: frecpe v6.4s, v8.4s // encoding: [0x06,0xd9,0xa1,0x4e] // CHECK: frecpe v6.2d, v8.2d // encoding: [0x06,0xd9,0xe1,0x4e] // CHECK: frecpe v4.2s, v0.2s // encoding: [0x04,0xd8,0xa1,0x0e] + frsqrte v4.4h, v0.4h + frsqrte v6.8h, v8.8h frsqrte v6.4s, v8.4s frsqrte v6.2d, v8.2d frsqrte v4.2s, v0.2s +// CHECK: frsqrte v4.4h, v0.4h // encoding: [0x04,0xd8,0xf9,0x2e] +// CHECK: frsqrte v6.8h, v8.8h // encoding: [0x06,0xd9,0xf9,0x6e] // CHECK: frsqrte v6.4s, v8.4s // encoding: [0x06,0xd9,0xa1,0x6e] // CHECK: frsqrte v6.2d, v8.2d // encoding: [0x06,0xd9,0xe1,0x6e] // CHECK: frsqrte v4.2s, v0.2s // encoding: [0x04,0xd8,0xa1,0x2e] + fsqrt v4.4h, v0.4h + fsqrt v6.8h, v8.8h fsqrt v6.4s, v8.4s fsqrt v6.2d, v8.2d fsqrt v4.2s, v0.2s +// CHECK: fsqrt v4.4h, v0.4h // encoding: [0x04,0xf8,0xf9,0x2e] +// CHECK: fsqrt v6.8h, v8.8h // encoding: [0x06,0xf9,0xf9,0x6e] // CHECK: fsqrt v6.4s, v8.4s // encoding: [0x06,0xf9,0xa1,0x6e] // CHECK: fsqrt v6.2d, v8.2d // encoding: [0x06,0xf9,0xe1,0x6e] // CHECK: fsqrt v4.2s, v0.2s // encoding: [0x04,0xf8,0xa1,0x2e] diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s index a16432324efc..4638c535a6a7 100644 --- a/test/MC/AArch64/neon-simd-shift.s +++ b/test/MC/AArch64/neon-simd-shift.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 @@ -400,16 +400,24 @@ //------------------------------------------------------------------------------ // Fixed-point convert to floating-point //------------------------------------------------------------------------------ + scvtf v0.4h, v1.4h, #3 + scvtf v0.8h, v1.8h, #3 scvtf v0.2s, v1.2s, #3 scvtf v0.4s, v1.4s, #3 scvtf v0.2d, v1.2d, #3 + ucvtf v0.4h, v1.4h, #3 + ucvtf v0.8h, v1.8h, #3 ucvtf v0.2s, v1.2s, #3 ucvtf v0.4s, v1.4s, #3 ucvtf v0.2d, v1.2d, #3 +// CHECK: scvtf v0.4h, v1.4h, #3 // encoding: [0x20,0xe4,0x1d,0x0f] +// CHECK: scvtf v0.8h, v1.8h, #3 // encoding: [0x20,0xe4,0x1d,0x4f] // CHECK: scvtf v0.2s, v1.2s, #3 // encoding: [0x20,0xe4,0x3d,0x0f] // CHECK: scvtf v0.4s, v1.4s, #3 // encoding: [0x20,0xe4,0x3d,0x4f] // CHECK: scvtf v0.2d, v1.2d, #3 // encoding: [0x20,0xe4,0x7d,0x4f] +// CHECK: ucvtf v0.4h, v1.4h, #3 // encoding: [0x20,0xe4,0x1d,0x2f] +// CHECK: ucvtf v0.8h, v1.8h, #3 // encoding: [0x20,0xe4,0x1d,0x6f] // CHECK: ucvtf v0.2s, v1.2s, #3 // encoding: [0x20,0xe4,0x3d,0x2f] // CHECK: ucvtf v0.4s, v1.4s, #3 // encoding: [0x20,0xe4,0x3d,0x6f] // CHECK: ucvtf v0.2d, v1.2d, #3 // encoding: [0x20,0xe4,0x7d,0x6f] @@ -417,17 +425,25 @@ //------------------------------------------------------------------------------ // Floating-point convert to fixed-point //------------------------------------------------------------------------------ + fcvtzs v0.4h, v1.4h, #3 + fcvtzs v0.8h, v1.8h, #3 fcvtzs v0.2s, v1.2s, #3 fcvtzs v0.4s, v1.4s, #3 fcvtzs v0.2d, v1.2d, #3 + fcvtzu v0.4h, v1.4h, #3 + fcvtzu v0.8h, v1.8h, #3 fcvtzu v0.2s, v1.2s, #3 fcvtzu v0.4s, v1.4s, #3 fcvtzu v0.2d, v1.2d, #3 +// CHECK: fcvtzs v0.4h, v1.4h, #3 // encoding: [0x20,0xfc,0x1d,0x0f] +// CHECK: fcvtzs v0.8h, v1.8h, #3 // encoding: [0x20,0xfc,0x1d,0x4f] // CHECK: fcvtzs v0.2s, v1.2s, #3 // encoding: [0x20,0xfc,0x3d,0x0f] // CHECK: fcvtzs v0.4s, v1.4s, #3 // encoding: [0x20,0xfc,0x3d,0x4f] // CHECK: fcvtzs v0.2d, v1.2d, #3 // encoding: [0x20,0xfc,0x7d,0x4f] +// CHECK: fcvtzu v0.4h, v1.4h, #3 // encoding: [0x20,0xfc,0x1d,0x2f] +// CHECK: fcvtzu v0.8h, v1.8h, #3 // encoding: [0x20,0xfc,0x1d,0x6f] // CHECK: fcvtzu v0.2s, v1.2s, #3 // encoding: [0x20,0xfc,0x3d,0x2f] // CHECK: fcvtzu v0.4s, v1.4s, #3 // encoding: [0x20,0xfc,0x3d,0x6f] // CHECK: fcvtzu v0.2d, v1.2d, #3 // encoding: [0x20,0xfc,0x7d,0x6f] diff --git a/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt b/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt new file mode 100644 index 000000000000..8b7e1c878002 --- /dev/null +++ b/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt @@ -0,0 +1,382 @@ +# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=+neon,-fullfp16 < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,-fullfp16 < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,+fullfp16 < %s 2>&1 | FileCheck %s + +[0x00,0xf8,0xf8,0x0e] +# CHECK: warning: invalid instruction encoding +[0x00,0xf8,0xf8,0x2e] +# CHECK: warning: invalid instruction encoding +[0x00,0xd8,0xf9,0x0e] +# CHECK: warning: invalid instruction encoding +[0x00,0x88,0x79,0x2e] +# CHECK: warning: invalid instruction encoding +[0x00,0x98,0x79,0x2e] +# CHECK: warning: invalid instruction encoding +[0x00,0x98,0xf9,0x2e] +# CHECK: warning: invalid instruction encoding +[0x00,0x98,0x79,0x0e] +# CHECK: warning: invalid instruction encoding +[0x00,0x88,0x79,0x0e] +# CHECK: warning: invalid instruction encoding +[0x00,0x88,0xf9,0x0e] +# CHECK: warning: invalid instruction encoding +[0x00,0x98,0xf9,0x0e] +# CHECK: warning: invalid instruction encoding +[0x00,0xd8,0xf9,0x2e] +# CHECK: warning: invalid instruction encoding +[0x00,0xf8,0xf9,0x2e] +# CHECK: warning: invalid instruction encoding +[0x00,0xf8,0xf8,0x4e] +# CHECK: warning: invalid instruction encoding +[0x00,0xf8,0xf8,0x6e] +# CHECK: warning: invalid instruction encoding +[0x00,0xd8,0xf9,0x4e] +# CHECK: warning: invalid instruction encoding +[0x00,0x88,0x79,0x6e] +# CHECK: warning: invalid instruction encoding +[0x00,0x98,0x79,0x6e] +# CHECK: warning: invalid instruction encoding +[0x00,0x98,0xf9,0x6e] +# CHECK: warning: invalid instruction encoding +[0x00,0x98,0x79,0x4e] +# CHECK: warning: invalid instruction encoding +[0x00,0x88,0x79,0x4e] +# CHECK: warning: invalid instruction encoding +[0x00,0x88,0xf9,0x4e] +# CHECK: warning: invalid instruction encoding +[0x00,0x98,0xf9,0x4e] +# CHECK: warning: invalid instruction encoding +[0x00,0xd8,0xf9,0x6e] +# CHECK: warning: invalid instruction encoding +[0x00,0xf8,0xf9,0x6e] +# CHECK: warning: invalid instruction encoding +[0x20,0x10,0x22,0x0f] +# CHECK: warning: invalid instruction encoding +[0x03,0x11,0x12,0x4f] +# CHECK: warning: invalid instruction encoding +[0x20,0x50,0x22,0x0f] +# CHECK: warning: invalid instruction encoding +[0x03,0x51,0x12,0x4f] +# CHECK: warning: invalid instruction encoding +[0x20,0x90,0x22,0x0f] +# CHECK: warning: invalid instruction encoding +[0x20,0x90,0x22,0x4f] +# CHECK: warning: invalid instruction encoding +[0x20,0x90,0x22,0x2f] +# CHECK: warning: invalid instruction encoding +[0x20,0x90,0x22,0x6f] +# CHECK: warning: invalid instruction encoding +[0x20,0x14,0xc2,0x2e] +# CHECK: warning: invalid instruction encoding +[0x20,0xc8,0x30,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0xc8,0xb0,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0xf8,0x30,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0xf8,0xb0,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0x14,0x42,0x2e] +# CHECK: warning: invalid instruction encoding +[0x20,0x14,0x42,0x6e] +# CHECK: warning: invalid instruction encoding +[0x20,0x14,0x42,0x0e] +# CHECK: warning: invalid instruction encoding +[0x20,0x14,0x42,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0x14,0xc2,0x0e] +# CHECK: warning: invalid instruction encoding +[0x20,0x14,0xc2,0x4e] +# CHECK: warning: invalid instruction encoding +[0xe0,0x27,0x50,0x0e] +# CHECK: warning: invalid instruction encoding +[0xe4,0x24,0x4f,0x4e] +# CHECK: warning: invalid instruction encoding +[0x03,0x25,0x4c,0x2e] +# CHECK: warning: invalid instruction encoding +[0xbf,0x27,0x5c,0x6e] +# CHECK: warning: invalid instruction encoding +[0x03,0x25,0x4c,0x2e] +# CHECK: warning: invalid instruction encoding +[0xbf,0x27,0x5c,0x6e] +# CHECK: warning: invalid instruction encoding +[0xe0,0x27,0xd0,0x2e] +# CHECK: warning: invalid instruction encoding +[0xe4,0x24,0xcf,0x6e] +# CHECK: warning: invalid instruction encoding +[0xe0,0x27,0xd0,0x2e] +# CHECK: warning: invalid instruction encoding +[0xe4,0x24,0xcf,0x6e] +# CHECK: warning: invalid instruction encoding +[0xe0,0xdb,0xf8,0x0e] +# CHECK: warning: invalid instruction encoding +[0xe4,0xd8,0xf8,0x4e] +# CHECK: warning: invalid instruction encoding +[0xe0,0xdb,0xf8,0x0e] +# CHECK: warning: invalid instruction encoding +[0xe4,0xd8,0xf8,0x4e] +# CHECK: warning: invalid instruction encoding +[0x03,0xc9,0xf8,0x2e] +# CHECK: warning: invalid instruction encoding +[0xbf,0xcb,0xf8,0x6e] +# CHECK: warning: invalid instruction encoding +[0x03,0xc9,0xf8,0x2e] +# CHECK: warning: invalid instruction encoding +[0xbf,0xcb,0xf8,0x6e] +# CHECK: warning: invalid instruction encoding +[0xe0,0xcb,0xf8,0x0e] +# CHECK: warning: invalid instruction encoding +[0xe4,0xc8,0xf8,0x4e] +# CHECK: warning: invalid instruction encoding +[0xe0,0xcb,0xf8,0x0e] +# CHECK: warning: invalid instruction encoding +[0xe4,0xc8,0xf8,0x4e] +# CHECK: warning: invalid instruction encoding +[0x83,0xda,0xf8,0x2e] +# CHECK: warning: invalid instruction encoding +[0x01,0xd9,0xf8,0x6e] +# CHECK: warning: invalid instruction encoding +[0x83,0xda,0xf8,0x2e] +# CHECK: warning: invalid instruction encoding +[0x01,0xd9,0xf8,0x6e] +# CHECK: warning: invalid instruction encoding +[0x50,0xe8,0xf8,0x0e] +# CHECK: warning: invalid instruction encoding +[0x8f,0xe8,0xf8,0x4e] +# CHECK: warning: invalid instruction encoding +[0x50,0xe8,0xf8,0x0e] +# CHECK: warning: invalid instruction encoding +[0x8f,0xe8,0xf8,0x4e] +# CHECK: warning: invalid instruction encoding +[0xe0,0x2f,0x50,0x2e] +# CHECK: warning: invalid instruction encoding +[0xe4,0x2c,0x4f,0x6e] +# CHECK: warning: invalid instruction encoding +[0xe0,0x2f,0x50,0x2e] +# CHECK: warning: invalid instruction encoding +[0xe4,0x2c,0x4f,0x6e] +# CHECK: warning: invalid instruction encoding +[0x03,0x2d,0xcc,0x2e] +# CHECK: warning: invalid instruction encoding +[0xbf,0x2f,0xdc,0x6e] +# CHECK: warning: invalid instruction encoding +[0x03,0x2d,0xcc,0x2e] +# CHECK: warning: invalid instruction encoding +[0xbf,0x2f,0xdc,0x6e] +# CHECK: warning: invalid instruction encoding +[0xe0,0x3f,0xd0,0x0e] +# CHECK: warning: invalid instruction encoding +[0xe4,0x3c,0xcf,0x4e] +# CHECK: warning: invalid instruction encoding +[0x03,0x3d,0x4c,0x0e] +# CHECK: warning: invalid instruction encoding +[0xbf,0x3f,0x5c,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0x34,0x42,0x2e] +# CHECK: warning: invalid instruction encoding +[0xff,0x35,0x50,0x6e] +# CHECK: warning: invalid instruction encoding +[0xea,0x35,0xd6,0x2e] +# CHECK: warning: invalid instruction encoding +[0xa3,0x34,0xc6,0x6e] +# CHECK: warning: invalid instruction encoding +[0x20,0x04,0x42,0x2e] +# CHECK: warning: invalid instruction encoding +[0xff,0x05,0x50,0x6e] +# CHECK: warning: invalid instruction encoding +[0xea,0x05,0xd6,0x2e] +# CHECK: warning: invalid instruction encoding +[0xa3,0x04,0xc6,0x6e] +# CHECK: warning: invalid instruction encoding +[0x20,0x34,0x42,0x0e] +# CHECK: warning: invalid instruction encoding +[0x20,0x34,0x42,0x4e] +# CHECK: warning: invalid instruction encoding +[0xea,0x35,0xd6,0x0e] +# CHECK: warning: invalid instruction encoding +[0xea,0x35,0xd6,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0x04,0x42,0x0e] +# CHECK: warning: invalid instruction encoding +[0x20,0x04,0x42,0x4e] +# CHECK: warning: invalid instruction encoding +[0xea,0x05,0xd6,0x0e] +# CHECK: warning: invalid instruction encoding +[0xea,0x05,0xd6,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0x0c,0x42,0x0e] +# CHECK: warning: invalid instruction encoding +[0x20,0x0c,0x42,0x4e] +# CHECK: warning: invalid instruction encoding +[0x20,0x0c,0xc2,0x0e] +# CHECK: warning: invalid instruction encoding +[0x20,0x0c,0xc2,0x4e] +# CHECK: warning: invalid instruction encoding +[0x1d,0x17,0xd4,0x7e] +# CHECK: warning: invalid instruction encoding +[0x20,0x18,0x11,0x5f] +# CHECK: warning: invalid instruction encoding +[0x62,0x58,0x14,0x5f] +# CHECK: warning: invalid instruction encoding +[0x20,0x98,0x11,0x5f] +# CHECK: warning: invalid instruction encoding +[0x46,0x98,0x18,0x7f] +# CHECK: warning: invalid instruction encoding +[0x95,0xfd,0x1f,0x5f] +# CHECK: warning: invalid instruction encoding +[0x95,0xfd,0x1f,0x7f] +# CHECK: warning: invalid instruction encoding +[0xac,0xc9,0x79,0x5e] +# CHECK: warning: invalid instruction encoding +[0xac,0xc9,0x79,0x7e] +# CHECK: warning: invalid instruction encoding +[0xb6,0xb9,0x79,0x5e] +# CHECK: warning: invalid instruction encoding +[0xac,0xb9,0x79,0x7e] +# CHECK: warning: invalid instruction encoding +[0xb6,0xa9,0x79,0x5e] +# CHECK: warning: invalid instruction encoding +[0xac,0xa9,0x79,0x7e] +# CHECK: warning: invalid instruction encoding +[0xb6,0xa9,0xf9,0x5e] +# CHECK: warning: invalid instruction encoding +[0xac,0xa9,0xf9,0x7e] +# CHECK: warning: invalid instruction encoding +[0xac,0xb9,0xf9,0x5e] +# CHECK: warning: invalid instruction encoding +[0xac,0xb9,0xf9,0x7e] +# CHECK: warning: invalid instruction encoding +[0x6a,0x25,0x4c,0x5e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xd9,0xf8,0x5e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xd9,0xf8,0x5e] +# CHECK: warning: invalid instruction encoding +[0x6a,0x25,0x4c,0x7e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xc9,0xf8,0x7e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xc9,0xf8,0x7e] +# CHECK: warning: invalid instruction encoding +[0x6a,0x25,0xcc,0x7e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xc9,0xf8,0x5e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xc9,0xf8,0x5e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xd9,0xf8,0x7e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xd9,0xf8,0x7e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xe9,0xf8,0x5e] +# CHECK: warning: invalid instruction encoding +[0x6a,0xe9,0xf8,0x5e] +# CHECK: warning: invalid instruction encoding +[0x6a,0x2d,0x4c,0x7e] +# CHECK: warning: invalid instruction encoding +[0x6a,0x2d,0xcc,0x7e] +# CHECK: warning: invalid instruction encoding +[0xd4,0x1e,0x4f,0x5e] +# CHECK: warning: invalid instruction encoding +[0x15,0x3e,0x4d,0x5e] +# CHECK: warning: invalid instruction encoding +[0xb5,0x3c,0xcc,0x5e] +# CHECK: warning: invalid instruction encoding +[0xd3,0xd9,0xf9,0x5e] +# CHECK: warning: invalid instruction encoding +[0x52,0xf9,0xf9,0x5e] +# CHECK: warning: invalid instruction encoding +[0xb6,0xd9,0xf9,0x7e] +# CHECK: warning: invalid instruction encoding +[0x72,0xd8,0x30,0x5e] +# CHECK: warning: invalid instruction encoding +[0x04,0xf8,0xf8,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0xf9,0xf8,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0xf8,0xf8,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0xf9,0xf8,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0x88,0x79,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0x89,0x79,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0x88,0x79,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0x89,0x79,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0x88,0xf9,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0x89,0xf9,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0x98,0x79,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0x99,0x79,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0x98,0x79,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0x99,0x79,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0x98,0xf9,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0x99,0xf9,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0x98,0xf9,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0x99,0xf9,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0xa8,0x79,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0xa9,0x79,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0xa8,0x79,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0xa9,0x79,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0xa8,0xf9,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0xa9,0xf9,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0xa8,0xf9,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0xa9,0xf9,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0xb8,0x79,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0xb9,0x79,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0xb8,0x79,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0xb9,0x79,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0xb8,0xf9,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0xb9,0xf9,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0xb8,0xf9,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0xb9,0xf9,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0xc8,0x79,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0xc9,0x79,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0xc8,0x79,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0xc9,0x79,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0xd8,0xf9,0x0e] +# CHECK: warning: invalid instruction encoding +[0x06,0xd9,0xf9,0x4e] +# CHECK: warning: invalid instruction encoding +[0x04,0xd8,0xf9,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0xd9,0xf9,0x6e] +# CHECK: warning: invalid instruction encoding +[0x04,0xf8,0xf9,0x2e] +# CHECK: warning: invalid instruction encoding +[0x06,0xf9,0xf9,0x6e] +# CHECK: warning: invalid instruction encoding + +# CHECK-NOT: warning: invalid instruction encoding From c14af0e1c8aff647498cf5e21362fec267b983e7 Mon Sep 17 00:00:00 2001 From: Asaf Badouh Date: Tue, 8 Dec 2015 12:34:34 +0000 Subject: [PATCH 222/364] [x86][avx512] more changes in intrinsics to be align with gcc format Differential Revision: http://reviews.llvm.org/D15329 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255011 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index c287a3a1928e..370b527c6f7c 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -5248,27 +5248,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_mask">, + def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_mask">, + def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess_mask">, + def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd_mask">, + def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">, + def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">, + def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; @@ -5297,10 +5297,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtrndss_mask">, + def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtrndsd_mask">, + def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -5385,21 +5385,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem]>; def int_x86_avx512_mask_getmant_ss : - GCCBuiltin<"__builtin_ia32_getmantss_mask">, + GCCBuiltin<"__builtin_ia32_getmantss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_sd : - GCCBuiltin<"__builtin_ia32_getmantsd_mask">, + GCCBuiltin<"__builtin_ia32_getmantsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, + def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">, + def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; @@ -5409,10 +5409,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">, + def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">, + def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; @@ -5436,11 +5436,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">, + def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">, + def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -5452,11 +5452,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">, + def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">, + def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; From 5a34e2eef2c83684f1220e3a38180b204a043f13 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 8 Dec 2015 13:49:19 +0000 Subject: [PATCH 223/364] [mips][ias] Range check uimm6 operands and fix a bug this revealed. Summary: We don't check the size operand on ext/dext*/ins/dins* yet because the permitted range depends on the pos argument and we can't check that using this mechanism. The bug was that dextu/dinsu accepted 0..31 in the pos operand instead of 32..63. Reviewers: vkalintiris Subscribers: llvm-commits, dsanders Differential Revision: http://reviews.llvm.org/D15190 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255015 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 3 + lib/Target/Mips/MicroMips64r6InstrInfo.td | 4 +- lib/Target/Mips/Mips64InstrInfo.td | 4 +- lib/Target/Mips/MipsInstrInfo.td | 25 ++-- lib/Target/Mips/MipsMSAInstrInfo.td | 126 ++++++++------------ test/MC/Mips/micromips64r6/invalid.s | 14 +++ test/MC/Mips/micromips64r6/valid.s | 2 +- test/MC/Mips/mips32r2/invalid.s | 14 +++ test/MC/Mips/mips64r2/invalid.s | 42 +++++++ test/MC/Mips/msa/invalid-64.s | 28 +++++ test/MC/Mips/msa/invalid.s | 24 ++++ 11 files changed, 193 insertions(+), 93 deletions(-) diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 44f665a86c66..ac32a25a8b3c 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -3647,6 +3647,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_UImm5_Lsl2: return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected both 7-bit unsigned immediate and multiple of 4"); + case Match_UImm6_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 6-bit unsigned immediate"); } llvm_unreachable("Implement any new match types added!"); diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td index dfc54d75b361..f9e6a3c2f6ca 100644 --- a/lib/Target/Mips/MicroMips64r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td @@ -66,9 +66,9 @@ class EXTBITS_DESC_BASE; -class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm6, +class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm5, MipsExt>; -class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm6, +class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm5_plus32, MipsExt>; class DALIGN_DESC_BASE, RDHWR_FM; let AdditionalPredicates = [NotInMicroMips] in { def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>; def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>; - def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>; + def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm5_plus32>, EXT_FM<2>; } def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>; -def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>; +def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32>, EXT_FM<6>; def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>; let isCodeGenOnly = 1, rs = 0, shamt = 0 in { diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index f3be7fc46187..dae61c3c782d 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -394,8 +394,13 @@ class ConstantUImmAsmOperandClass Supers = [], let DiagnosticType = "UImm" # Bits # "_" # Offset; } +def ConstantUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<6, []>; +def ConstantUImm5Plus32AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32>; def ConstantUImm5Plus32NormalizeAsmOperandClass - : ConstantUImmAsmOperandClass<5, [], 32> { + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32> { + let Name = "ConstantUImm5_32_Norm"; // We must also subtract 32 when we render the operand. let RenderMethod = "addConstantUImmOperands<5, 32, -32>"; } @@ -403,19 +408,20 @@ def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass { let Name = "UImm5Lsl2"; let RenderMethod = "addImmOperands"; let PredicateMethod = "isScaledUImm<5, 2>"; - let SuperClasses = []; + let SuperClasses = [ConstantUImm6AsmOperandClass]; let DiagnosticType = "UImm5_Lsl2"; } def ConstantUImm5ReportUImm6AsmOperandClass - : ConstantUImmAsmOperandClass<5, []> { + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]> { let Name = "ConstantUImm5_0_Report_UImm6"; let DiagnosticType = "UImm5_0_Report_UImm6"; } def ConstantUImm5AsmOperandClass - : ConstantUImmAsmOperandClass<5, []>; + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]>; def ConstantUImm4AsmOperandClass : ConstantUImmAsmOperandClass< 4, [ConstantUImm5AsmOperandClass, + ConstantUImm5Plus32AsmOperandClass, ConstantUImm5Plus32NormalizeAsmOperandClass]>; def ConstantUImm3AsmOperandClass : ConstantUImmAsmOperandClass<3, [ConstantUImm4AsmOperandClass]>; @@ -506,7 +512,7 @@ def uimmz : Operand { } // Unsigned Operands -foreach I = {1, 2, 3, 4, 5} in +foreach I = {1, 2, 3, 4, 5, 6} in def uimm # I : Operand { let PrintMethod = "printUnsignedImm"; let ParserMatchClass = @@ -520,6 +526,11 @@ def uimm2_plus1 : Operand { let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass; } +def uimm5_plus32 : Operand { + let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = ConstantUImm5Plus32AsmOperandClass; +} + def uimm5_plus32_normalize : Operand { let PrintMethod = "printUnsignedImm"; let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass; @@ -550,10 +561,6 @@ def uimm5_64_report_uimm6 : Operand { let ParserMatchClass = ConstantUImm5ReportUImm6AsmOperandClass; } -def uimm6 : Operand { - let PrintMethod = "printUnsignedImm"; -} - def uimm16 : Operand { let PrintMethod = "printUnsignedImm"; } diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index b74e967e6817..68e6ca1086bb 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1177,47 +1177,14 @@ class MSA_BIT_D_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm3:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_H_X_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm4:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_W_X_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm5:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_D_X_DESC_BASE { +class MSA_BIT_X_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm6:$m); + dag InOperandList = (ins ROWS:$ws, ImmOp:$m); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))]; InstrItinClass Itinerary = itin; } @@ -1502,13 +1469,14 @@ class MSA_INSERT_VIDX_PSEUDO_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWD:$wd_in, uimm6:$n, ROWS:$ws, uimmz:$n2); + dag InOperandList = (ins ROWD:$wd_in, ImmOp:$n, ROWS:$ws, uimmz:$n2); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[$n2]"); list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, - immZExt6:$n, + Imm:$n, ROWS:$ws, immz:$n2))]; InstrItinClass Itinerary = itin; @@ -2327,13 +2295,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC : class INSERT_FD_VIDX64_PSEUDO_DESC : MSA_INSERT_VIDX_PSEUDO_BASE; -class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, +class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4, MSA128BOpnd>; -class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, +class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3, MSA128HOpnd>; -class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, +class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2, MSA128WOpnd>; -class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, +class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1, MSA128DOpnd>; class LD_DESC_BASE; class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>; -class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, - MSA128BOpnd>; -class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, - MSA128HOpnd>; -class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, - MSA128WOpnd>; -class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, - MSA128DOpnd>; - -class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, - MSA128BOpnd>; -class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, - MSA128HOpnd>; -class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, - MSA128WOpnd>; -class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, - MSA128DOpnd>; +class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3, + immZExt3, MSA128BOpnd>; +class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4, + immZExt4, MSA128HOpnd>; +class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5, + immZExt5, MSA128WOpnd>; +class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6, + immZExt6, MSA128DOpnd>; + +class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3, + immZExt3, MSA128BOpnd>; +class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4, + immZExt4, MSA128HOpnd>; +class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5, + immZExt5, MSA128WOpnd>; +class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6, + immZExt6, MSA128DOpnd>; class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>; @@ -2633,14 +2601,14 @@ class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>; class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; -class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b, - MSA128BOpnd>; -class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h, - MSA128HOpnd>; -class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w, - MSA128WOpnd>; -class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d, - MSA128DOpnd>; +class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3, + immZExt3, MSA128BOpnd>; +class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4, + immZExt4, MSA128HOpnd>; +class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5, + immZExt5, MSA128WOpnd>; +class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6, + immZExt6, MSA128DOpnd>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; @@ -2661,14 +2629,14 @@ class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>; class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; -class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b, - MSA128BOpnd>; -class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h, - MSA128HOpnd>; -class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w, - MSA128WOpnd>; -class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d, - MSA128DOpnd>; +class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3, + immZExt3, MSA128BOpnd>; +class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4, + immZExt4, MSA128HOpnd>; +class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5, + immZExt5, MSA128WOpnd>; +class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6, + immZExt6, MSA128DOpnd>; class ST_DESC_BASE Date: Tue, 8 Dec 2015 14:42:10 +0000 Subject: [PATCH 224/364] [mips][ias] Range check uimm8 operands Summary: Reviewers: vkalintiris Subscribers: llvm-commits, dsanders Differential Revision: http://reviews.llvm.org/D15226 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255018 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 3 +++ lib/Target/Mips/MipsInstrInfo.td | 6 ++++-- lib/Target/Mips/MipsMSAInstrInfo.td | 4 ---- test/MC/Mips/msa/invalid-64.s | 3 +++ test/MC/Mips/msa/invalid.s | 6 ++++++ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index ac32a25a8b3c..d04e8d4e4fa5 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -3650,6 +3650,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_UImm6_0: return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected 6-bit unsigned immediate"); + case Match_UImm8_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 8-bit unsigned immediate"); } llvm_unreachable("Implement any new match types added!"); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index dae61c3c782d..5dd01b538f15 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -394,8 +394,10 @@ class ConstantUImmAsmOperandClass Supers = [], let DiagnosticType = "UImm" # Bits # "_" # Offset; } +def ConstantUImm8AsmOperandClass + : ConstantUImmAsmOperandClass<8, []>; def ConstantUImm6AsmOperandClass - : ConstantUImmAsmOperandClass<6, []>; + : ConstantUImmAsmOperandClass<6, [ConstantUImm8AsmOperandClass]>; def ConstantUImm5Plus32AsmOperandClass : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32>; def ConstantUImm5Plus32NormalizeAsmOperandClass @@ -512,7 +514,7 @@ def uimmz : Operand { } // Unsigned Operands -foreach I = {1, 2, 3, 4, 5, 6} in +foreach I = {1, 2, 3, 4, 5, 6, 8} in def uimm # I : Operand { let PrintMethod = "printUnsignedImm"; let ParserMatchClass = diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 68e6ca1086bb..eacfcec78bc7 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -78,10 +78,6 @@ def uimm6_ptr : Operand { let PrintMethod = "printUnsignedImm8"; } -def uimm8 : Operand { - let PrintMethod = "printUnsignedImm8"; -} - def simm5 : Operand; def vsplat_uimm1 : Operand { diff --git a/test/MC/Mips/msa/invalid-64.s b/test/MC/Mips/msa/invalid-64.s index 90f19568b584..a15ee270bccf 100644 --- a/test/MC/Mips/msa/invalid-64.s +++ b/test/MC/Mips/msa/invalid-64.s @@ -37,6 +37,9 @@ sat_u.w $w31, $w31, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate sat_u.d $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate sat_u.d $w31, $w31, 64 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate + shf.b $w19, $w30, -1 # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate + shf.h $w17, $w8, -1 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate + shf.w $w14, $w3, -1 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate sldi.b $w0, $w29[-1] # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate sldi.b $w0, $w29[16] # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate sldi.d $w4, $w12[-1] # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate diff --git a/test/MC/Mips/msa/invalid.s b/test/MC/Mips/msa/invalid.s index 22cec375e7e3..724d9c193e0a 100644 --- a/test/MC/Mips/msa/invalid.s +++ b/test/MC/Mips/msa/invalid.s @@ -35,6 +35,12 @@ sat_u.w $w31, $w31, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate sat_u.d $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate sat_u.d $w31, $w31, 64 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate + shf.b $w19, $w30, -1 # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate + shf.b $w19, $w30, 256 # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate + shf.h $w17, $w8, -1 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate + shf.h $w17, $w8, 256 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate + shf.w $w14, $w3, -1 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate + shf.w $w14, $w3, 256 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate sldi.b $w0, $w29[-1] # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate sldi.b $w0, $w29[16] # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate sldi.d $w4, $w12[-1] # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate From 5c953e3267ebfc4b0430aea2b296348fa6320d38 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 8 Dec 2015 14:54:49 +0000 Subject: [PATCH 225/364] Move all private members together. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255021 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 45 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index a2bc95602210..a9fcee7c98ce 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -420,29 +420,6 @@ class ModuleLinker { bool HasError = false; -public: - ModuleLinker(Module &DstM, Linker::IdentifiedStructTypeSet &Set, Module &SrcM, - DiagnosticHandlerFunction DiagnosticHandler, unsigned Flags, - const FunctionInfoIndex *Index = nullptr, - DenseSet *FunctionsToImport = nullptr) - : DstM(DstM), SrcM(SrcM), TypeMap(Set), ValMaterializer(this), - DiagnosticHandler(DiagnosticHandler), Flags(Flags), ImportIndex(Index), - ImportFunction(FunctionsToImport) { - assert((ImportIndex || !ImportFunction) && - "Expect a FunctionInfoIndex when importing"); - // If we have a FunctionInfoIndex but no function to import, - // then this is the primary module being compiled in a ThinLTO - // backend compilation, and we need to see if it has functions that - // may be exported to another backend compilation. - if (ImportIndex && !ImportFunction) - HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM); - } - - bool run(); - Value *materializeDeclFor(Value *V); - void materializeInitFor(GlobalValue *New, GlobalValue *Old); - -private: bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; } bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; } bool shouldInternalizeLinkedSymbols() { @@ -556,6 +533,28 @@ class ModuleLinker { const GlobalValue *DGV = nullptr); void linkNamedMDNodes(); + +public: + ModuleLinker(Module &DstM, Linker::IdentifiedStructTypeSet &Set, Module &SrcM, + DiagnosticHandlerFunction DiagnosticHandler, unsigned Flags, + const FunctionInfoIndex *Index = nullptr, + DenseSet *FunctionsToImport = nullptr) + : DstM(DstM), SrcM(SrcM), TypeMap(Set), ValMaterializer(this), + DiagnosticHandler(DiagnosticHandler), Flags(Flags), ImportIndex(Index), + ImportFunction(FunctionsToImport) { + assert((ImportIndex || !ImportFunction) && + "Expect a FunctionInfoIndex when importing"); + // If we have a FunctionInfoIndex but no function to import, + // then this is the primary module being compiled in a ThinLTO + // backend compilation, and we need to see if it has functions that + // may be exported to another backend compilation. + if (ImportIndex && !ImportFunction) + HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM); + } + + bool run(); + Value *materializeDeclFor(Value *V); + void materializeInitFor(GlobalValue *New, GlobalValue *Old); }; } From b691e2ed749bb6b463573006ce05982fab86c873 Mon Sep 17 00:00:00 2001 From: Ron Lieberman Date: Tue, 8 Dec 2015 16:28:32 +0000 Subject: [PATCH 226/364] [Hexagon] Add NewValueJump support for C4_cmpneq, C4_cmplte, C4_cmplteu git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255027 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonNewValueJump.cpp | 18 +++++ test/CodeGen/Hexagon/NVJumpCmp.ll | 89 ++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 test/CodeGen/Hexagon/NVJumpCmp.ll diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 6415c6cc7906..20c4ab112b5f 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -342,6 +342,24 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t : Hexagon::J4_cmpgtui_t_jumpnv_nt; + case Hexagon::C4_cmpneq: + return taken ? Hexagon::J4_cmpeq_f_jumpnv_t + : Hexagon::J4_cmpeq_f_jumpnv_nt; + + case Hexagon::C4_cmplte: + if (secondRegNewified) + return taken ? Hexagon::J4_cmplt_f_jumpnv_t + : Hexagon::J4_cmplt_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgt_f_jumpnv_t + : Hexagon::J4_cmpgt_f_jumpnv_nt; + + case Hexagon::C4_cmplteu: + if (secondRegNewified) + return taken ? Hexagon::J4_cmpltu_f_jumpnv_t + : Hexagon::J4_cmpltu_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t + : Hexagon::J4_cmpgtu_f_jumpnv_nt; + default: llvm_unreachable("Could not find matching New Value Jump instruction."); } diff --git a/test/CodeGen/Hexagon/NVJumpCmp.ll b/test/CodeGen/Hexagon/NVJumpCmp.ll new file mode 100644 index 000000000000..6b160d962ebb --- /dev/null +++ b/test/CodeGen/Hexagon/NVJumpCmp.ll @@ -0,0 +1,89 @@ +; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 < %s | FileCheck %s + +; Look for an instruction, we really just do not want to see an abort. +; CHECK: trace_event +; REQUIRES: asserts + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32" +target triple = "hexagon-unknown--elf" + +; Function Attrs: nounwind +define void @_ZN6Halide7Runtime8Internal13default_traceEPvPK18halide_trace_event() #0 { +entry: + br i1 undef, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %while.cond + +while.cond: ; preds = %while.cond, %if.then + br i1 undef, label %while.cond, label %while.end + +while.end: ; preds = %while.cond + %add = add i32 undef, 48 + br i1 undef, label %if.end, label %if.then17 + +if.then17: ; preds = %while.end + unreachable + +if.end: ; preds = %while.end + %arrayidx21 = getelementptr inbounds [4096 x i8], [4096 x i8]* undef, i32 0, i32 8 + store i8 undef, i8* %arrayidx21, align 4, !tbaa !1 + br i1 undef, label %for.body42.preheader6, label %min.iters.checked + +for.body42.preheader6: ; preds = %vector.body.preheader, %min.iters.checked, %if.end + unreachable + +min.iters.checked: ; preds = %if.end + br i1 undef, label %for.body42.preheader6, label %vector.body.preheader + +vector.body.preheader: ; preds = %min.iters.checked + br i1 undef, label %for.cond48.preheader, label %for.body42.preheader6 + +for.cond48.preheader: ; preds = %vector.body.preheader + br i1 undef, label %while.cond.i, label %for.body61.lr.ph + +for.body61.lr.ph: ; preds = %for.cond48.preheader + br i1 undef, label %for.body61, label %min.iters.checked595 + +min.iters.checked595: ; preds = %for.body61.lr.ph + br i1 undef, label %for.body61, label %vector.memcheck608 + +vector.memcheck608: ; preds = %min.iters.checked595 + %scevgep600 = getelementptr [4096 x i8], [4096 x i8]* undef, i32 0, i32 %add + %bound0604 = icmp ule i8* %scevgep600, undef + %memcheck.conflict607 = and i1 undef, %bound0604 + br i1 %memcheck.conflict607, label %for.body61, label %vector.body590 + +vector.body590: ; preds = %vector.body590, %vector.memcheck608 + br i1 undef, label %middle.block591, label %vector.body590, !llvm.loop !4 + +middle.block591: ; preds = %vector.body590 + %cmp.n613 = icmp eq i32 undef, 0 + br i1 %cmp.n613, label %while.cond.i, label %for.body61 + +while.cond.i: ; preds = %for.body61, %while.cond.i, %middle.block591, %for.cond48.preheader + br i1 undef, label %_ZN6Halide7Runtime8Internal14ScopedSpinLockC2EPVi.exit, label %while.cond.i + +_ZN6Halide7Runtime8Internal14ScopedSpinLockC2EPVi.exit: ; preds = %while.cond.i + unreachable + +for.body61: ; preds = %for.body61, %middle.block591, %vector.memcheck608, %min.iters.checked595, %for.body61.lr.ph + %cmp59 = icmp ult i32 undef, undef + br i1 %cmp59, label %for.body61, label %while.cond.i, !llvm.loop !7 + +if.else: ; preds = %entry + unreachable +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"halide_mattrs", !"+hvx"} +!1 = !{!2, !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = distinct !{!4, !5, !6} +!5 = !{!"llvm.loop.vectorize.width", i32 1} +!6 = !{!"llvm.loop.interleave.count", i32 1} +!7 = distinct !{!7, !5, !6} From 72f75faa283d31acd018a40226470b852e39df0b Mon Sep 17 00:00:00 2001 From: Mike Aizatsky Date: Tue, 8 Dec 2015 17:44:51 +0000 Subject: [PATCH 227/364] adding readability-identifier-naming to llvm clang-tidy configuration. Differential Revision: http://reviews.llvm.org/D15196 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255028 91177308-0d34-0410-b5e6-96231b3b80d8 --- .clang-tidy | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 3186da43d43d..97fbe23333bd 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1 +1,13 @@ -Checks: '-*,clang-diagnostic-*,llvm-*,misc-*' +Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,readability-identifier-naming' +CheckOptions: + - key: readability-identifier-naming.ClassCase + value: CamelCase + - key: readability-identifier-naming.EnumCase + value: CamelCase + - key: readability-identifier-naming.FunctionCase + value: lowerCase + - key: readability-identifier-naming.UnionCase + value: CamelCase + - key: readability-identifier-naming.VariableCase + value: CamelCase + From a0beb06ac92b08c8e62c04b3f2d5279785e12e96 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Tue, 8 Dec 2015 18:10:58 +0000 Subject: [PATCH 228/364] [ARM] Allowing SP/PC for AND/BIC mod_imm_not AND/BIC instructions do accept SP/PC, so the register class should be more generic (rGPR -> GPR) to cope with that case. Adding more tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 8 ++++---- test/MC/ARM/basic-arm-instructions.s | 11 +++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 4c7107aee6a2..2aa9475e6f47 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -5655,16 +5655,16 @@ def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Same for AND <--> BIC def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", - (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, + (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", - (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, + (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", - (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, + (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rdn, $imm", - (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, + (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Likewise, "add Rd, mod_imm_neg" -> sub diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s index a1f13b76dda3..99a3cfa7b29e 100644 --- a/test/MC/ARM/basic-arm-instructions.s +++ b/test/MC/ARM/basic-arm-instructions.s @@ -349,6 +349,8 @@ Lforward: and r6, r7, r8, ror r2 and r10, r1, r6, rrx and r2, r3, #0x7fffffff + and sp, sp, #0x7fffffff + and pc, pc, #0x7fffffff @ destination register is optional and r1, #0xf @@ -397,6 +399,8 @@ Lforward: @ CHECK: and r6, r7, r8, ror r2 @ encoding: [0x78,0x62,0x07,0xe0] @ CHECK: and r10, r1, r6, rrx @ encoding: [0x66,0xa0,0x01,0xe0] @ CHECK: bic r2, r3, #-2147483648 @ encoding: [0x02,0x21,0xc3,0xe3] +@ CHECK: bic sp, sp, #-2147483648 @ encoding: [0x02,0xd1,0xcd,0xe3] +@ CHECK: bic pc, pc, #-2147483648 @ encoding: [0x02,0xf1,0xcf,0xe3] @ CHECK: and r1, r1, #15 @ encoding: [0x0f,0x10,0x01,0xe2] @ CHECK: and r1, r1, #15 @ encoding: [0x0f,0x10,0x01,0xe2] @@ -502,6 +506,10 @@ Lforward: bic r6, r7, r8, asr r2 bic r6, r7, r8, ror r2 bic r10, r1, r6, rrx + bic r2, r3, #0x7fffffff + bic sp, sp, #0x7fffffff + bic pc, pc, #0x7fffffff + @ destination register is optional bic r1, #0xf @@ -548,6 +556,9 @@ Lforward: @ CHECK: bic r6, r7, r8, asr r2 @ encoding: [0x58,0x62,0xc7,0xe1] @ CHECK: bic r6, r7, r8, ror r2 @ encoding: [0x78,0x62,0xc7,0xe1] @ CHECK: bic r10, r1, r6, rrx @ encoding: [0x66,0xa0,0xc1,0xe1] +@ CHECK: and r2, r3, #-2147483648 @ encoding: [0x02,0x21,0x03,0xe2] +@ CHECK: and sp, sp, #-2147483648 @ encoding: [0x02,0xd1,0x0d,0xe2] +@ CHECK: and pc, pc, #-2147483648 @ encoding: [0x02,0xf1,0x0f,0xe2] @ CHECK: bic r1, r1, #15 @ encoding: [0x0f,0x10,0xc1,0xe3] From 81bf65619f565510552f650b118e0098e047f1a4 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 8 Dec 2015 18:31:35 +0000 Subject: [PATCH 229/364] X86: produce more friendly errors during MachO relocation handling git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255036 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 92 ++++++++++++------- test/MC/X86/macho-reloc-errors-x86.s | 15 +++ test/MC/X86/macho-reloc-errors-x86_64.s | 19 ++++ 3 files changed, 94 insertions(+), 32 deletions(-) create mode 100644 test/MC/X86/macho-reloc-errors-x86.s create mode 100644 test/MC/X86/macho-reloc-errors-x86_64.s diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 614e43864088..191ebeac7265 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -149,14 +149,19 @@ void X86MachObjectWriter::RecordX86_64Relocation( // Neither symbol can be modified. if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported relocation of modified symbol", false); + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported relocation of modified symbol"); + return; + } // We don't support PCrel relocations of differences. Darwin 'as' doesn't // implement most of these correctly. - if (IsPCRel) - report_fatal_error("unsupported pc-relative relocation of difference", - false); + if (IsPCRel) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported pc-relative relocation of difference"); + return; + } // The support for the situation where one or both of the symbols would // require a local relocation is handled just like if the symbols were @@ -168,8 +173,11 @@ void X86MachObjectWriter::RecordX86_64Relocation( // Darwin 'as' doesn't emit correct relocations for this (it ends up with a // single SIGNED relocation); reject it for now. Except the case where both // symbols don't have a base, equal but both NULL. - if (A_Base == B_Base && A_Base) - report_fatal_error("unsupported relocation with identical base", false); + if (A_Base == B_Base && A_Base) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported relocation with identical base"); + return; + } // A subtraction expression where either symbol is undefined is a // non-relocatable expression. @@ -245,12 +253,16 @@ void X86MachObjectWriter::RecordX86_64Relocation( FixedValue = Res; return; } else { - report_fatal_error("unsupported relocation of variable '" + - Symbol->getName() + "'", false); + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported relocation of variable '" + + Symbol->getName() + "'"); + return; } } else { - report_fatal_error("unsupported relocation of undefined symbol '" + - Symbol->getName() + "'", false); + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); + return; } MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); @@ -267,8 +279,9 @@ void X86MachObjectWriter::RecordX86_64Relocation( } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { Type = MachO::X86_64_RELOC_TLV; } else if (Modifier != MCSymbolRefExpr::VK_None) { - report_fatal_error("unsupported symbol modifier in relocation", - false); + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported symbol modifier in relocation"); + return; } else { Type = MachO::X86_64_RELOC_SIGNED; @@ -293,9 +306,12 @@ void X86MachObjectWriter::RecordX86_64Relocation( } } } else { - if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in branch " - "relocation", false); + if (Modifier != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError( + Fixup.getLoc(), + "unsupported symbol modifier in branch relocation"); + return; + } Type = MachO::X86_64_RELOC_BRANCH; } @@ -310,16 +326,22 @@ void X86MachObjectWriter::RecordX86_64Relocation( Type = MachO::X86_64_RELOC_GOT; IsPCRel = 1; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - report_fatal_error("TLVP symbol modifier should have been rip-rel", - false); - } else if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in relocation", false); - else { + Asm.getContext().reportError( + Fixup.getLoc(), "TLVP symbol modifier should have been rip-rel"); + return; + } else if (Modifier != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported symbol modifier in relocation"); + return; + } else { Type = MachO::X86_64_RELOC_UNSIGNED; unsigned Kind = Fixup.getKind(); - if (Kind == X86::reloc_signed_4byte) - report_fatal_error("32-bit absolute addressing is not supported in " - "64-bit mode", false); + if (Kind == X86::reloc_signed_4byte) { + Asm.getContext().reportError( + Fixup.getLoc(), + "32-bit absolute addressing is not supported in 64-bit mode"); + return; + } } } } @@ -351,10 +373,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - if (!A->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression", - false); + if (!A->getFragment()) { + Asm.getContext().reportError( + Fixup.getLoc(), + "symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + return false; + } uint32_t Value = Writer->getSymbolAddress(*A, Layout); uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); @@ -364,10 +389,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, if (const MCSymbolRefExpr *B = Target.getSymB()) { const MCSymbol *SB = &B->getSymbol(); - if (!SB->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression", - false); + if (!SB->getFragment()) { + Asm.getContext().reportError( + Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + return false; + } // Select the appropriate difference relocation type. // diff --git a/test/MC/X86/macho-reloc-errors-x86.s b/test/MC/X86/macho-reloc-errors-x86.s new file mode 100644 index 000000000000..4af202220073 --- /dev/null +++ b/test/MC/X86/macho-reloc-errors-x86.s @@ -0,0 +1,15 @@ +// RUN: not llvm-mc -triple=i686-apple-darwin -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + .space 0x1000000 + mov %eax, thing-thing2 + mov %eax, defined-thing2 + mov %eax, later-defined + + .section __DATA,__tim +defined: + + .section __DATA,__tim2 +later: + +// CHECK-ERROR: 3:9: error: symbol 'thing' can not be undefined in a subtraction expression +// CHECK-ERROR: 4:9: error: symbol 'thing2' can not be undefined in a subtraction expression +// CHECK-ERROR: 5:9: error: Section too large, can't encode r_address (0x100000b) into 24 bits of scattered relocation entry. diff --git a/test/MC/X86/macho-reloc-errors-x86_64.s b/test/MC/X86/macho-reloc-errors-x86_64.s new file mode 100644 index 000000000000..05f77c495b24 --- /dev/null +++ b/test/MC/X86/macho-reloc-errors-x86_64.s @@ -0,0 +1,19 @@ +// RUN: not llvm-mc -triple=x86_64-apple-darwin -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + + mov %rax, thing + mov %rax, thing@GOT-thing2@GOT + mov %rax, (thing-thing2)(%rip) + mov %rax, thing-thing + mov %rax, thing-thing2 + mov %rax, thing@PLT + jmp thing@PLT + mov %rax, thing@TLVP + +// CHECK-ERROR: 3:9: error: 32-bit absolute addressing is not supported in 64-bit mode +// CHECK-ERROR: 4:9: error: unsupported relocation of modified symbol +// CHECK-ERROR: 5:9: error: unsupported pc-relative relocation of difference +// CHECK-ERROR: 6:9: error: unsupported relocation with identical base +// CHECK-ERROR: 7:9: error: unsupported relocation with subtraction expression, symbol 'thing' can not be undefined in a subtraction expression +// CHECK-ERROR: 8:9: error: unsupported symbol modifier in relocation +// CHECK-ERROR: 9:9: error: unsupported symbol modifier in branch relocation +// CHECK-ERROR: 10:9: error: TLVP symbol modifier should have been rip-rel From 0cb0e797f0e7b13bed123e14b538abbcbb4709b4 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Dec 2015 19:01:29 +0000 Subject: [PATCH 230/364] Add Available Externally linkage type to isWeakForLinker() Per LangRef: "Globals with available_externally linkage are allowed to be discarded at will, and are otherwise the same as linkonce_odr", since linkonce_odr is in this list it makes sense to have available_externally there as well. Reviewers: rafael Differential Revision: http://reviews.llvm.org/D15323 From: Mehdi Amini git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255043 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/GlobalValue.h | 3 ++- .../available_externally_global_ctors.ll | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/GlobalOpt/available_externally_global_ctors.ll diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 2f9172648a26..3461b9ee8b82 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -256,7 +256,8 @@ class GlobalValue : public Constant { static bool isWeakForLinker(LinkageTypes Linkage) { return Linkage == WeakAnyLinkage || Linkage == WeakODRLinkage || Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage || - Linkage == CommonLinkage || Linkage == ExternalWeakLinkage; + Linkage == CommonLinkage || Linkage == ExternalWeakLinkage || + Linkage == AvailableExternallyLinkage; } bool hasExternalLinkage() const { return isExternalLinkage(Linkage); } diff --git a/test/Transforms/GlobalOpt/available_externally_global_ctors.ll b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll new file mode 100644 index 000000000000..39dc054ac227 --- /dev/null +++ b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll @@ -0,0 +1,22 @@ +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +; RUN: opt -S -globalopt < %s | FileCheck %s + +; Verify that the initialization of the available_externally global is not eliminated +; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }] + +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }] +@foo_external = available_externally global void ()* null, align 8 + +define internal void @foo_static_init() { +entry: + store void ()* @foo_impl, void ()** @foo_external, align 8 + ret void +} + +define internal void @foo_impl() { +entry: + ret void +} + From 0a5a3d4acb906d07bdd290b459afcd838f5af932 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Dec 2015 19:02:55 +0000 Subject: [PATCH 231/364] Cleanup test: remove useless alignment From: Mehdi Amini git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255044 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Transforms/GlobalOpt/available_externally_global_ctors.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Transforms/GlobalOpt/available_externally_global_ctors.ll b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll index 39dc054ac227..7092a5ae2226 100644 --- a/test/Transforms/GlobalOpt/available_externally_global_ctors.ll +++ b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll @@ -7,11 +7,11 @@ target triple = "x86_64-apple-macosx10.11.0" ; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }] @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }] -@foo_external = available_externally global void ()* null, align 8 +@foo_external = available_externally global void ()* null define internal void @foo_static_init() { entry: - store void ()* @foo_impl, void ()** @foo_external, align 8 + store void ()* @foo_impl, void ()** @foo_external ret void } From 917e9a38ca5d5fc5cd567fd209864e62d5e641ad Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Dec 2015 19:13:31 +0000 Subject: [PATCH 232/364] Revert "Add Available Externally linkage type to isWeakForLinker()" This reverts r255043, as per post-review concern were raised on the correctness. From: Mehdi Amini git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255045 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/GlobalValue.h | 3 +-- .../available_externally_global_ctors.ll | 22 ------------------- 2 files changed, 1 insertion(+), 24 deletions(-) delete mode 100644 test/Transforms/GlobalOpt/available_externally_global_ctors.ll diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 3461b9ee8b82..2f9172648a26 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -256,8 +256,7 @@ class GlobalValue : public Constant { static bool isWeakForLinker(LinkageTypes Linkage) { return Linkage == WeakAnyLinkage || Linkage == WeakODRLinkage || Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage || - Linkage == CommonLinkage || Linkage == ExternalWeakLinkage || - Linkage == AvailableExternallyLinkage; + Linkage == CommonLinkage || Linkage == ExternalWeakLinkage; } bool hasExternalLinkage() const { return isExternalLinkage(Linkage); } diff --git a/test/Transforms/GlobalOpt/available_externally_global_ctors.ll b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll deleted file mode 100644 index 7092a5ae2226..000000000000 --- a/test/Transforms/GlobalOpt/available_externally_global_ctors.ll +++ /dev/null @@ -1,22 +0,0 @@ -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.11.0" - -; RUN: opt -S -globalopt < %s | FileCheck %s - -; Verify that the initialization of the available_externally global is not eliminated -; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }] - -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }] -@foo_external = available_externally global void ()* null - -define internal void @foo_static_init() { -entry: - store void ()* @foo_impl, void ()** @foo_external - ret void -} - -define internal void @foo_impl() { -entry: - ret void -} - From d10549743ae65fcc4420a5b07606f97f5cac4bae Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Tue, 8 Dec 2015 19:59:01 +0000 Subject: [PATCH 233/364] Fix ARMv4T (Thumb1) epilogue generation Summary: Before ARMv5T, Thumb1 code could not pop PC, as described at D14357 and D14986; so we need the special fixup in the epilogue. Reviewers: jroelofs, qcolombet Subscribers: aemerson, llvm-commits, rengolin Differential Revision: http://reviews.llvm.org/D15126 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255047 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Thumb1FrameLowering.cpp | 41 ++++++-- test/CodeGen/ARM/debug-frame.ll | 4 +- test/CodeGen/Thumb/large-stack.ll | 20 ++-- test/CodeGen/Thumb/pop-special-fixup.ll | 60 ------------ test/CodeGen/Thumb/thumb-shrink-wrapping.ll | 102 +++++++++++++++++--- 5 files changed, 132 insertions(+), 95 deletions(-) delete mode 100644 test/CodeGen/Thumb/pop-special-fixup.ll diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index fd96af6cb6e0..8771c68e5931 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -406,9 +406,6 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { if (AFI->getArgRegsSaveSize()) return true; - // FIXME: this doesn't make sense, and the following patch will remove it. - if (!STI.hasV4TOps()) return false; - // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo()) if (CSI.getReg() == ARM::LR) @@ -532,10 +529,32 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, .addReg(PopReg, RegState::Kill)); } + bool AddBx = false; if (MBBI == MBB.end()) { MachineInstr& Pop = MBB.back(); assert(Pop.getOpcode() == ARM::tPOP); Pop.RemoveOperand(Pop.findRegisterDefOperandIdx(ARM::LR)); + } else if (MBBI->getOpcode() == ARM::tPOP_RET) { + // We couldn't use the direct restoration above, so + // perform the opposite conversion: tPOP_RET to tPOP. + MachineInstrBuilder MIB = + AddDefaultPred( + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); + unsigned Popped = 0; + for (auto MO: MBBI->operands()) + if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && + MO.getReg() != ARM::PC) { + MIB.addOperand(MO); + if (!MO.isImplicit()) + Popped++; + } + // Is there anything left to pop? + if (!Popped) + MBB.erase(MIB.getInstr()); + // Erase the old instruction. + MBB.erase(MBBI); + MBBI = MBB.end(); + AddBx = true; } assert(PopReg && "Do not know how to get LR"); @@ -554,14 +573,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; } - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(PopReg, RegState::Kill)); - + if (AddBx && !TemporaryReg) { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) + .addReg(PopReg, RegState::Kill)); + } else { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); + } if (TemporaryReg) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill)); + if (AddBx) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET))); } return true; @@ -628,7 +653,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (isVarArg) continue; // ARMv4T requires BX, see emitEpilogue - if (STI.hasV4TOps() && !STI.hasV5TOps()) + if (!STI.hasV5TOps()) continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll index 33d389698995..4bd401b68496 100644 --- a/test/CodeGen/ARM/debug-frame.ll +++ b/test/CodeGen/ARM/debug-frame.ll @@ -30,11 +30,11 @@ ; RUN: -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM -; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \ ; RUN: -disable-fp-elim -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP -; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \ ; RUN: -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index 0d534589ae0a..c5d1044e9d69 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -32,10 +32,10 @@ define void @test100() { ; Smallest stack for which we use a constant pool define void @test2() { ; CHECK-LABEL: test2: -; CHECK: ldr r0, -; CHECK: add sp, r0 -; EABI: ldr r0, -; EABI: add sp, r0 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; EABI: ldr [[TEMP:r[0-7]]], +; EABI: add sp, [[TEMP]] ; IOS: subs r4, r7, #4 ; IOS: mov sp, r4 %tmp = alloca [ 1528 x i8 ] , align 4 @@ -44,12 +44,12 @@ define void @test2() { define i32 @test3() { ; CHECK-LABEL: test3: -; CHECK: ldr r1, -; CHECK: add sp, r1 -; CHECK: ldr r1, -; CHECK: add r1, sp -; EABI: ldr r1, -; EABI: add sp, r1 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; CHECK: ldr [[TEMP]], +; CHECK: add [[TEMP]], sp +; EABI: ldr [[TEMP:r[0-7]]], +; EABI: add sp, [[TEMP]] ; IOS: subs r4, r7, #4 ; IOS: mov sp, r4 %retval = alloca i32, align 4 diff --git a/test/CodeGen/Thumb/pop-special-fixup.ll b/test/CodeGen/Thumb/pop-special-fixup.ll deleted file mode 100644 index 9ba589d6cec3..000000000000 --- a/test/CodeGen/Thumb/pop-special-fixup.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: llc %s -enable-shrink-wrap=true -o - | FileCheck %s - -target triple = "thumbv6m-none-none-eabi" - -@retval = global i32 0, align 4 - -define i32 @test(i32 %i, i32 %argc, i8** nocapture readonly %argv) { - %1 = icmp sgt i32 %argc, %i - br i1 %1, label %2, label %19 - - %3 = getelementptr inbounds i8*, i8** %argv, i32 %i - %4 = load i8*, i8** %3, align 4 - %5 = load i8, i8* %4, align 1 - %6 = icmp eq i8 %5, 45 - %7 = getelementptr inbounds i8, i8* %4, i32 1 - %. = select i1 %6, i8* %7, i8* %4 - %.1 = select i1 %6, i32 -1, i32 1 - %8 = load i8, i8* %., align 1 - %.off2 = add i8 %8, -48 - %9 = icmp ult i8 %.off2, 10 - %.pre = load i32, i32* @retval, align 4 - br i1 %9, label %.lr.ph.preheader, label %.critedge - -.lr.ph.preheader: ; preds = %2 - br label %.lr.ph - -.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph - %10 = phi i32 [ %14, %.lr.ph ], [ %.pre, %.lr.ph.preheader ] - %11 = phi i8 [ %15, %.lr.ph ], [ %8, %.lr.ph.preheader ] - %valstring.03 = phi i8* [ %13, %.lr.ph ], [ %., %.lr.ph.preheader ] - %12 = zext i8 %11 to i32 - %13 = getelementptr inbounds i8, i8* %valstring.03, i32 1 - %14 = add nsw i32 %10, %12 - store i32 %14, i32* @retval, align 4 - %15 = load i8, i8* %13, align 1 - %.off = add i8 %15, -48 - %16 = icmp ult i8 %.off, 10 - br i1 %16, label %.lr.ph, label %.critedge.loopexit - -.critedge.loopexit: ; preds = %.lr.ph - %.lcssa = phi i32 [ %14, %.lr.ph ] - br label %.critedge - -.critedge: ; preds = %.critedge.loopexit, %2 - %17 = phi i32 [ %.pre, %2 ], [ %.lcssa, %.critedge.loopexit ] - %18 = mul nsw i32 %17, %.1 - store i32 %18, i32* @retval, align 4 - br label %19 - -;