From 78b7d4a492023c21a3c602598785b4048c74997a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 Mar 2019 06:53:43 +0000 Subject: [PATCH 01/27] [X86] Remove a couple unused SDNodeXForms. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356867 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFragmentsSIMD.td | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 99252fc2a7aa..2aa5fa45ce92 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -931,17 +931,6 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{ return N->isExactlyValue(+0.0); }]>; -def I8Imm : SDNodeXFormgetZExtValue(), SDLoc(N)); -}]>; - -// BYTE_imm - Transform bit immediates into byte immediates. -def BYTE_imm : SDNodeXForm> 3 - return getI32Imm(N->getZExtValue() >> 3, SDLoc(N)); -}]>; - // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index // to VEXTRACTF128/VEXTRACTI128 imm. def EXTRACT_get_vextract128_imm : SDNodeXForm Date: Mon, 25 Mar 2019 06:53:44 +0000 Subject: [PATCH 02/27] [X86] Remove GetLo8XForm and use GetLo32XForm instead. NFCI We were using this to create an AND32ri8 node from a 64-bit and, but that node normally still uses a 32-bit immediate. So we should just truncate the existing immediate to i32. We already verified it has the same value in bits 31:7. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356868 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrCompiler.td | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 4c06b1765432..7aab8f8f377f 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -19,11 +19,6 @@ def GetLo32XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; -def GetLo8XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); -}]>; - //===----------------------------------------------------------------------===// // Random Pseudo Instructions. @@ -1523,7 +1518,7 @@ def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), (i64 0), (AND32ri8 (EXTRACT_SUBREG GR64:$src, sub_32bit), - (i32 (GetLo8XForm imm:$imm))), + (i32 (GetLo32XForm imm:$imm))), sub_32bit)>; def : Pat<(and GR64:$src, i64immZExt32:$imm), From 83768b93ba5df47e6d4e8b16aab4627f47636a0d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 Mar 2019 06:53:45 +0000 Subject: [PATCH 03/27] [X86] When selecting (x << C1) op C2 as (x op (C2>>C1)) << C1, use the operation VT for the target constant. Normally when the nodes we use here(AND32ri8 for example) are selected their immediates are just converted from ConstantSDNode to TargetConstantSDNode without changing VT from the original operation VT. So we should still be emitting them with the operation VT. Theoretically this could expose more accurate opportunities for CSE. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356869 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index bfa0df7db347..6290a8f4a8dc 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3561,7 +3561,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } // Emit the smaller op and the shift. - SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT); + // Even though we shrink the constant, the VT should match the operation VT. + SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, NVT); SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); if (ShlVal == 1) CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0), From a8f354662a73aeb5425ca6b5351cbe3c5dbfa390 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 Mar 2019 07:22:18 +0000 Subject: [PATCH 04/27] [X86] Update some of the getMachineNode calls from X86ISelDAGToDAG to also include a VT for a EFLAGS result. This makes the nodes consistent with how they would be emitted from the isel table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356870 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6290a8f4a8dc..e08914c536e3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3167,14 +3167,14 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) }; - SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); + SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); // Update the chain. - ReplaceUses(Input.getValue(1), SDValue(NewNode, 1)); + ReplaceUses(Input.getValue(1), SDValue(NewNode, 2)); // Record the mem-refs CurDAG->setNodeMemRefs(NewNode, {cast(Input)->getMemOperand()}); } else { - NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New); + NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New); } return NewNode; @@ -3563,12 +3563,13 @@ void X86DAGToDAGISel::Select(SDNode *Node) { // Emit the smaller op and the shift. // Even though we shrink the constant, the VT should match the operation VT. SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, NVT); - SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); + SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, MVT::i32, + N0->getOperand(0), NewCst); if (ShlVal == 1) - CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0), + CurDAG->SelectNodeTo(Node, AddOp, NVT, MVT::i32, SDValue(New, 0), SDValue(New, 0)); else - CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), + CurDAG->SelectNodeTo(Node, ShlOp, NVT, MVT::i32, SDValue(New, 0), getI8Imm(ShlVal, dl)); return; } @@ -3969,7 +3970,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { unsigned TrailingZeros = countTrailingZeros(Mask); SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64); SDValue Shift = - SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, + SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32, N0.getOperand(0), Imm), 0); MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, MVT::i32, Shift, Shift); @@ -3980,7 +3981,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { unsigned LeadingZeros = countLeadingZeros(Mask); SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64); SDValue Shift = - SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, + SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32, N0.getOperand(0), Imm), 0); MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, MVT::i32, Shift, Shift); From fa60fe0a8f358563cbd4219811de57204036c7b7 Mon Sep 17 00:00:00 2001 From: Diana Picus Date: Mon, 25 Mar 2019 08:54:29 +0000 Subject: [PATCH 05/27] [ARM GlobalISel] 64-bit memops should be aligned We currently use only VLDR/VSTR for all 64-bit loads/stores, so the memory operands must be word-aligned. Mark aligned operations as legal and narrow non-aligned ones to 32 bits. While we're here, also mark non-power-of-2 loads/stores as unsupported. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356872 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLegalizerInfo.cpp | 19 ++-- .../GlobalISel/arm-legalize-load-store.mir | 90 ++++++++++++++++++- test/CodeGen/ARM/GlobalISel/arm-legalizer.mir | 30 ------- 3 files changed, 99 insertions(+), 40 deletions(-) diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 82817d3b5ad1..5ab211b791e7 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -129,14 +129,13 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { // We're keeping these builders around because we'll want to add support for // floating point to them. - auto &LoadStoreBuilder = - getActionDefinitionsBuilder({G_LOAD, G_STORE}) - .legalForTypesWithMemDesc({ - {s1, p0, 8, 8}, - {s8, p0, 8, 8}, - {s16, p0, 16, 8}, - {s32, p0, 32, 8}, - {p0, p0, 32, 8}}); + auto &LoadStoreBuilder = getActionDefinitionsBuilder({G_LOAD, G_STORE}) + .legalForTypesWithMemDesc({{s1, p0, 8, 8}, + {s8, p0, 8, 8}, + {s16, p0, 16, 8}, + {s32, p0, 32, 8}, + {p0, p0, 32, 8}}) + .unsupportedIfMemSizeNotPow2(); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); @@ -155,7 +154,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG}) .legalFor({s32, s64}); - LoadStoreBuilder.legalFor({{s64, p0}}); + LoadStoreBuilder + .legalForTypesWithMemDesc({{s64, p0, 64, 32}}) + .maxScalar(0, s32); PhiBuilder.legalFor({s64}); getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({s1}, diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir index 34ed8b843075..a7d83d549b8a 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir @@ -1,10 +1,16 @@ # RUN: llc -mtriple arm-- -run-pass=legalizer %s -o - | FileCheck %s -# RUN: llc -mtriple thumb-- -mattr=+v6t2 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple thumbv7-- -run-pass=legalizer %s -o - | FileCheck %s --- | define void @test_legal_loads_stores() { ret void } define void @test_load_from_stack() { ret void } + define void @test_load_store_64_vfp() #0 { ret void } + define void @test_load_store_64_novfp() #1 { ret void } + define void @test_gep() { ret void } + + attributes #0 = { "target-features"="+vfp2" } + attributes #1 = { "target-features"="-vfp2" } ... --- name: test_legal_loads_stores @@ -81,6 +87,88 @@ body: | BX_RET 14, $noreg, implicit $r0 ... --- +name: test_load_store_64_vfp +# CHECK-LABEL: name: test_load_store_64_vfp +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: $r0 + + ; Can't use the VFP support for unaligned operations, we need to use 32-bits + ; operations instead. + ; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY $r0 + ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4, align 1) + ; CHECK-NEXT: [[OFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[OFFCOPY:%[0-9]+]]:_(s32) = COPY [[OFF]] + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFCOPY]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4, align 1) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4, align 1) + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] + ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4, align 1) + %0(p0) = COPY $r0 + %1(s64) = G_LOAD %0(p0) :: (load 8, align 1) + G_STORE %1(s64), %0(p0) :: (store 8, align 1) + + ; For word-aligned we can use VFP operations. + ; CHECK: [[V:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8, align 4) + ; CHECK: G_STORE [[V]](s64), %0(p0) :: (store 8, align 4) + %2(s64) = G_LOAD %0(p0) :: (load 8, align 4) + G_STORE %2(s64), %0(p0) :: (store 8, align 4) + + BX_RET 14, $noreg +... +--- +name: test_load_store_64_novfp +# CHECK-LABEL: name: test_load_store_64_novfp +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: $r0 + + ; When we don't have VFP support, we need to use 32-bit operations. + ; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY $r0 + ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4, align 1) + ; CHECK-NEXT: [[OFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[OFFCOPY:%[0-9]+]]:_(s32) = COPY [[OFF]] + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFCOPY]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4, align 1) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4, align 1) + ; CHECK-NEXT: [[OFFCOPY:%[0-9]+]]:_(s32) = COPY [[OFF]] + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFCOPY]] + ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4, align 1) + %0(p0) = COPY $r0 + %1(s64) = G_LOAD %0(p0) :: (load 8, align 1) + G_STORE %1(s64), %0(p0) :: (store 8, align 1) + + ; CHECK: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4) + ; CHECK-NEXT: [[OFFCOPY:%[0-9]+]]:_(s32) = COPY [[OFF]] + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFCOPY]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4) + ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] + ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4) + %2(s64) = G_LOAD %0(p0) :: (load 8, align 4) + G_STORE %2(s64), %0(p0) :: (store 8, align 4) + + BX_RET 14, $noreg +... +--- name: test_gep # CHECK-LABEL: name: test_gep legalized: false diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index ae4e94904ec9..f4408adce960 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -1,7 +1,5 @@ # RUN: llc -mtriple arm-- -run-pass=legalizer %s -o - | FileCheck %s --- | - define void @test_load_store_64() #0 { ret void } - define void @test_constants_s64() { ret void } define void @test_phi_s64() #0 { ret void } @@ -9,34 +7,6 @@ attributes #0 = { "target-features"="+vfp2" } ... --- -name: test_load_store_64 -# CHECK-LABEL: name: test_load_store_64 -legalized: false -# CHECK: legalized: true -regBankSelected: false -selected: false -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } - - { id: 6, class: _ } -body: | - bb.0: - liveins: $r0 - - ; These are legal, so we should find them unchanged in the output - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s64), %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s64) = G_LOAD %0(p0) - %0(p0) = COPY $r0 - %1(s64) = G_LOAD %0(p0) :: (load 8) - G_STORE %1(s64), %0(p0) :: (store 8) - BX_RET 14, $noreg -... ---- name: test_constants_s64 # CHECK-LABEL: name: test_constants_s64 legalized: false From a355fbf680800618640081aefabc6c906f935e73 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Mon, 25 Mar 2019 08:54:47 +0000 Subject: [PATCH 06/27] [TTI] Move getIntrinsicCost to allow functions to be overridden. NFC Moving this to base class TargetTransformInfoImplCRTPBase allows static_cast to a subtarget so that calls to e.g. getMemcpyCost actually go the overridden functions. Differential revision: https://reviews.llvm.org/D59706 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356873 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Analysis/TargetTransformInfoImpl.h | 88 +++++++++---------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 6b209ed2d715..a1e1f9b07aaf 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -144,50 +144,6 @@ class TargetTransformInfoImplBase { return TTI::TCC_Expensive; } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U) { - switch (IID) { - default: - // Intrinsics rarely (if ever) have normal argument setup constraints. - // Model them as having a basic instruction cost. - return TTI::TCC_Basic; - - // TODO: other libc intrinsics. - case Intrinsic::memcpy: - return getMemcpyCost(dyn_cast(U)); - - case Intrinsic::annotation: - case Intrinsic::assume: - case Intrinsic::sideeffect: - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::dbg_label: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::launder_invariant_group: - case Intrinsic::strip_invariant_group: - case Intrinsic::is_constant: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - case Intrinsic::experimental_gc_result: - case Intrinsic::experimental_gc_relocate: - case Intrinsic::coro_alloc: - case Intrinsic::coro_begin: - case Intrinsic::coro_free: - case Intrinsic::coro_end: - case Intrinsic::coro_frame: - case Intrinsic::coro_size: - case Intrinsic::coro_suspend: - case Intrinsic::coro_param: - case Intrinsic::coro_subfn_addr: - // These intrinsics don't actually represent code after lowering. - return TTI::TCC_Free; - } - } - bool hasBranchDivergence() { return false; } bool isSourceOfDivergence(const Value *V) { return false; } @@ -786,7 +742,49 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { return TTI::TCC_Basic; } - using BaseT::getIntrinsicCost; + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys, const User *U) { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + return TTI::TCC_Basic; + + // TODO: other libc intrinsics. + case Intrinsic::memcpy: + return static_cast(this)->getMemcpyCost(dyn_cast(U)); + + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::coro_alloc: + case Intrinsic::coro_begin: + case Intrinsic::coro_free: + case Intrinsic::coro_end: + case Intrinsic::coro_frame: + case Intrinsic::coro_size: + case Intrinsic::coro_suspend: + case Intrinsic::coro_param: + case Intrinsic::coro_subfn_addr: + // These intrinsics don't actually represent code after lowering. + return TTI::TCC_Free; + } + } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, const User *U) { From 6e821f01513dd49ed1668b65d10b2dbd07801d39 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 25 Mar 2019 09:27:42 +0000 Subject: [PATCH 07/27] Fix the build with GCC 4.8 after r356783 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356875 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/MemorySSA.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index ea68faa7fc7f..5630a01824dc 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -665,7 +665,7 @@ template class ClobberWalker { struct generic_def_path_iterator : public iterator_facade_base, std::forward_iterator_tag, T *> { - generic_def_path_iterator() = default; + generic_def_path_iterator() {} generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} T &operator*() const { return curNode(); } From 75ebb082a21d5e14710b428bc6a24a656083c30b Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Mon, 25 Mar 2019 11:02:49 +0000 Subject: [PATCH 08/27] [llvm-readobj] Separate `Symbol Version` dumpers into `LLVM style` and `GNU style` Summary: Currently, llvm-readobj can dump symbol version sections only in LLVM style. In this patch, I would like to separate these dumpers into GNU style and LLVM style for future implementation. Reviewers: grimar, jhenderson, mattd, rupprecht Reviewed By: jhenderson, rupprecht Subscribers: ormris, dyung, RKSimon, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59186 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356881 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/llvm-readobj/elf-versioninfo.test | 211 ++++++------ test/tools/yaml2obj/verdef-section.yaml | 2 +- test/tools/yaml2obj/verneed-section.yaml | 2 +- test/tools/yaml2obj/versym-section.yaml | 2 +- tools/llvm-readobj/ELFDumper.cpp | 320 +++++++++++-------- 5 files changed, 298 insertions(+), 239 deletions(-) diff --git a/test/tools/llvm-readobj/elf-versioninfo.test b/test/tools/llvm-readobj/elf-versioninfo.test index 82029cf82fd3..7ef599e1370b 100644 --- a/test/tools/llvm-readobj/elf-versioninfo.test +++ b/test/tools/llvm-readobj/elf-versioninfo.test @@ -1,106 +1,119 @@ // Test that llvm-readobj dumps version info tags correctly. -RUN: llvm-readobj -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s +RUN: llvm-readobj -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s --check-prefix=LLVM-VERDEF +RUN: llvm-readelf -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s --check-prefix=GNU-VERDEF -CHECK: 0x000000006FFFFFF0 VERSYM 0x24C -CHECK: 0x000000006FFFFFFC VERDEF 0x25C -CHECK: 0x000000006FFFFFFD VERDEFNUM 3 +LLVM-VERDEF: 0x000000006FFFFFF0 VERSYM 0x24C +LLVM-VERDEF: 0x000000006FFFFFFC VERDEF 0x25C +LLVM-VERDEF: 0x000000006FFFFFFD VERDEFNUM 3 -CHECK: Version symbols { -CHECK-NEXT: Section Name: .gnu.version (20) -CHECK-NEXT: Address: 0x24C -CHECK-NEXT: Offset: 0x24C -CHECK-NEXT: Link: 1 -CHECK-NEXT: Symbols [ -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 0 -CHECK-NEXT: Name: {{$}} -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Name: _end{{$}} -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Name: _edata{{$}} -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 3 -CHECK-NEXT: Name: goo@@VERSION2 -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Name: __bss_start{{$}} -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 2 -CHECK-NEXT: Name: foo@@VERSION1 -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 2 -CHECK-NEXT: Name: VERSION1@@VERSION1 -CHECK-NEXT: } -CHECK-NEXT: Symbol { -CHECK-NEXT: Version: 3 -CHECK-NEXT: Name: VERSION2@@VERSION2 -CHECK-NEXT: } -CHECK-NEXT: ] -CHECK-NEXT: } +LLVM-VERDEF: Version symbols { +LLVM-VERDEF-NEXT: Section Name: .gnu.version (20) +LLVM-VERDEF-NEXT: Address: 0x24C +LLVM-VERDEF-NEXT: Offset: 0x24C +LLVM-VERDEF-NEXT: Link: 1 +LLVM-VERDEF-NEXT: Symbols [ +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 0 +LLVM-VERDEF-NEXT: Name: {{$}} +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Name: _end{{$}} +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Name: _edata{{$}} +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 3 +LLVM-VERDEF-NEXT: Name: goo@@VERSION2 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Name: __bss_start{{$}} +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 2 +LLVM-VERDEF-NEXT: Name: foo@@VERSION1 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 2 +LLVM-VERDEF-NEXT: Name: VERSION1@@VERSION1 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Symbol { +LLVM-VERDEF-NEXT: Version: 3 +LLVM-VERDEF-NEXT: Name: VERSION2@@VERSION2 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: ] +LLVM-VERDEF-NEXT: } -CHECK: SHT_GNU_verdef { -CHECK-NEXT: Definition { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Flags: Base (0x1) -CHECK-NEXT: Index: 1 -CHECK-NEXT: Hash: 430712 -CHECK-NEXT: Name: blah -CHECK-NEXT: } -CHECK-NEXT: Definition { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Flags: 0x0 -CHECK-NEXT: Index: 2 -CHECK-NEXT: Hash: 175630257 -CHECK-NEXT: Name: VERSION1 -CHECK-NEXT: } -CHECK-NEXT: Definition { -CHECK-NEXT: Version: 1 -CHECK-NEXT: Flags: 0x0 -CHECK-NEXT: Index: 3 -CHECK-NEXT: Hash: 175630258 -CHECK-NEXT: Name: VERSION2 -CHECK-NEXT: Predecessor: VERSION1 -CHECK-NEXT: } -CHECK-NEXT: } +LLVM-VERDEF: SHT_GNU_verdef { +LLVM-VERDEF-NEXT: Definition { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Flags: Base (0x1) +LLVM-VERDEF-NEXT: Index: 1 +LLVM-VERDEF-NEXT: Hash: 430712 +LLVM-VERDEF-NEXT: Name: blah +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Definition { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Flags: 0x0 +LLVM-VERDEF-NEXT: Index: 2 +LLVM-VERDEF-NEXT: Hash: 175630257 +LLVM-VERDEF-NEXT: Name: VERSION1 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: Definition { +LLVM-VERDEF-NEXT: Version: 1 +LLVM-VERDEF-NEXT: Flags: 0x0 +LLVM-VERDEF-NEXT: Index: 3 +LLVM-VERDEF-NEXT: Hash: 175630258 +LLVM-VERDEF-NEXT: Name: VERSION2 +LLVM-VERDEF-NEXT: Predecessor: VERSION1 +LLVM-VERDEF-NEXT: } +LLVM-VERDEF-NEXT: } -RUN: llvm-readobj -V %p/Inputs/verneed.elf-x86-64 | FileCheck %s --check-prefix=VERNEED +GNU-VERDEF: 0x000000006ffffff0 VERSYM 0x24c +GNU-VERDEF: 0x000000006ffffffc VERDEF 0x25c +GNU-VERDEF: 0x000000006ffffffd VERDEFNUM 3 + +GNU-VERDEF: Dumper for .gnu.version is not implemented +GNU-VERDEF: Dumper for .gnu.version_d is not implemented + +RUN: llvm-readobj -V %p/Inputs/verneed.elf-x86-64 | FileCheck %s --check-prefix=LLVM-VERNEED +RUN: llvm-readelf -V %p/Inputs/verneed.elf-x86-64 | FileCheck %s --check-prefix=GNU-VERNEED + +LLVM-VERNEED: SHT_GNU_verneed { +LLVM-VERNEED-NEXT: Dependency { +LLVM-VERNEED-NEXT: Version: 1 +LLVM-VERNEED-NEXT: Count: 2 +LLVM-VERNEED-NEXT: FileName: verneed1.so.0 +LLVM-VERNEED-NEXT: Entry { +LLVM-VERNEED-NEXT: Hash: 1938 +LLVM-VERNEED-NEXT: Flags: 0x0 +LLVM-VERNEED-NEXT: Index: 3 +LLVM-VERNEED-NEXT: Name: v2 +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: Entry { +LLVM-VERNEED-NEXT: Hash: 1939 +LLVM-VERNEED-NEXT: Flags: 0x0 +LLVM-VERNEED-NEXT: Index: 2 +LLVM-VERNEED-NEXT: Name: v3 +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: Dependency { +LLVM-VERNEED-NEXT: Version: 1 +LLVM-VERNEED-NEXT: Count: 1 +LLVM-VERNEED-NEXT: FileName: verneed2.so.0 +LLVM-VERNEED-NEXT: Entry { +LLVM-VERNEED-NEXT: Hash: 1937 +LLVM-VERNEED-NEXT: Flags: 0x0 +LLVM-VERNEED-NEXT: Index: 4 +LLVM-VERNEED-NEXT: Name: v1 +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: } +LLVM-VERNEED-NEXT: } + +GNU-VERNEED: Dumper for .gnu.version is not implemented +GNU-VERNEED: Dumper for .gnu.version_r is not implemented -VERNEED: SHT_GNU_verneed { -VERNEED-NEXT: Dependency { -VERNEED-NEXT: Version: 1 -VERNEED-NEXT: Count: 2 -VERNEED-NEXT: FileName: verneed1.so.0 -VERNEED-NEXT: Entry { -VERNEED-NEXT: Hash: 1938 -VERNEED-NEXT: Flags: 0x0 -VERNEED-NEXT: Index: 3 -VERNEED-NEXT: Name: v2 -VERNEED-NEXT: } -VERNEED-NEXT: Entry { -VERNEED-NEXT: Hash: 1939 -VERNEED-NEXT: Flags: 0x0 -VERNEED-NEXT: Index: 2 -VERNEED-NEXT: Name: v3 -VERNEED-NEXT: } -VERNEED-NEXT: } -VERNEED-NEXT: Dependency { -VERNEED-NEXT: Version: 1 -VERNEED-NEXT: Count: 1 -VERNEED-NEXT: FileName: verneed2.so.0 -VERNEED-NEXT: Entry { -VERNEED-NEXT: Hash: 1937 -VERNEED-NEXT: Flags: 0x0 -VERNEED-NEXT: Index: 4 -VERNEED-NEXT: Name: v1 -VERNEED-NEXT: } -VERNEED-NEXT: } -VERNEED-NEXT: } diff --git a/test/tools/yaml2obj/verdef-section.yaml b/test/tools/yaml2obj/verdef-section.yaml index f81bcf196f6f..deac6e736c0b 100644 --- a/test/tools/yaml2obj/verdef-section.yaml +++ b/test/tools/yaml2obj/verdef-section.yaml @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t -# RUN: llvm-readelf -V %t | FileCheck %s +# RUN: llvm-readobj -V %t | FileCheck %s # Check we are able to handle the SHT_GNU_verdef sections. diff --git a/test/tools/yaml2obj/verneed-section.yaml b/test/tools/yaml2obj/verneed-section.yaml index 436e54ba0893..2fc58ad64f0f 100644 --- a/test/tools/yaml2obj/verneed-section.yaml +++ b/test/tools/yaml2obj/verneed-section.yaml @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t -# RUN: llvm-readelf -V %t | FileCheck %s +# RUN: llvm-readobj -V %t | FileCheck %s # Check we are able to handle the SHT_GNU_verneed sections. diff --git a/test/tools/yaml2obj/versym-section.yaml b/test/tools/yaml2obj/versym-section.yaml index 31dfecfa297c..3c08ddd63d2e 100644 --- a/test/tools/yaml2obj/versym-section.yaml +++ b/test/tools/yaml2obj/versym-section.yaml @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t -# RUN: llvm-readelf -V %t | FileCheck %s +# RUN: llvm-readobj -V %t | FileCheck %s ## Check we are able to produce a valid SHT_GNU_versym ## section from its description. diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp index 8c1c6fef89ec..091456329f91 100644 --- a/tools/llvm-readobj/ELFDumper.cpp +++ b/tools/llvm-readobj/ELFDumper.cpp @@ -342,6 +342,12 @@ template class DumpStyle { virtual void printProgramHeaders(const ELFFile *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) = 0; + virtual void printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) = 0; + virtual void printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) = 0; + virtual void printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) = 0; virtual void printHashHistogram(const ELFFile *Obj) = 0; virtual void printCGProfile(const ELFFile *Obj) = 0; virtual void printAddrsig(const ELFFile *Obj) = 0; @@ -376,6 +382,12 @@ template class GNUStyle : public DumpStyle { size_t Offset) override; void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; + void printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; + void printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; + void printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; void printHashHistogram(const ELFFile *Obj) override; void printCGProfile(const ELFFile *Obj) override; void printAddrsig(const ELFFile *Obj) override; @@ -470,6 +482,12 @@ template class LLVMStyle : public DumpStyle { void printDynamicRelocations(const ELFO *Obj) override; void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; + void printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; + void printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; + void printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) override; void printHashHistogram(const ELFFile *Obj) override; void printCGProfile(const ELFFile *Obj) override; void printAddrsig(const ELFFile *Obj) override; @@ -607,143 +625,6 @@ template void ELFDumper::LoadVersionMap() const { LoadVersionNeeds(dot_gnu_version_r_sec); } -template -static void printVersionSymbolSection(ELFDumper *Dumper, const ELFO *Obj, - const typename ELFO::Elf_Shdr *Sec, - ScopedPrinter &W) { - DictScope SS(W, "Version symbols"); - if (!Sec) - return; - StringRef Name = unwrapOrError(Obj->getSectionName(Sec)); - W.printNumber("Section Name", Name, Sec->sh_name); - W.printHex("Address", Sec->sh_addr); - W.printHex("Offset", Sec->sh_offset); - W.printNumber("Link", Sec->sh_link); - - const uint8_t *P = (const uint8_t *)Obj->base() + Sec->sh_offset; - StringRef StrTable = Dumper->getDynamicStringTable(); - - // Same number of entries in the dynamic symbol table (DT_SYMTAB). - ListScope Syms(W, "Symbols"); - for (const typename ELFO::Elf_Sym &Sym : Dumper->dynamic_symbols()) { - DictScope S(W, "Symbol"); - std::string FullSymbolName = - Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */); - W.printNumber("Version", *P); - W.printString("Name", FullSymbolName); - P += sizeof(typename ELFO::Elf_Half); - } -} - -static const EnumEntry SymVersionFlags[] = { - {"Base", "BASE", VER_FLG_BASE}, - {"Weak", "WEAK", VER_FLG_WEAK}, - {"Info", "INFO", VER_FLG_INFO}}; - -template -static void printVersionDefinitionSection(ELFDumper *Dumper, - const ELFO *Obj, - const typename ELFO::Elf_Shdr *Sec, - ScopedPrinter &W) { - using VerDef = typename ELFO::Elf_Verdef; - using VerdAux = typename ELFO::Elf_Verdaux; - - DictScope SD(W, "SHT_GNU_verdef"); - if (!Sec) - return; - - const uint8_t *SecStartAddress = - (const uint8_t *)Obj->base() + Sec->sh_offset; - const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size; - const uint8_t *P = SecStartAddress; - const typename ELFO::Elf_Shdr *StrTab = - unwrapOrError(Obj->getSection(Sec->sh_link)); - - unsigned VerDefsNum = Sec->sh_info; - while (VerDefsNum--) { - if (P + sizeof(VerDef) > SecEndAddress) - report_fatal_error("invalid offset in the section"); - - auto *VD = reinterpret_cast(P); - DictScope Def(W, "Definition"); - W.printNumber("Version", VD->vd_version); - W.printEnum("Flags", VD->vd_flags, makeArrayRef(SymVersionFlags)); - W.printNumber("Index", VD->vd_ndx); - W.printNumber("Hash", VD->vd_hash); - W.printString("Name", - StringRef((const char *)(Obj->base() + StrTab->sh_offset + - VD->getAux()->vda_name))); - if (!VD->vd_cnt) - report_fatal_error("at least one definition string must exist"); - if (VD->vd_cnt > 2) - report_fatal_error("more than one predecessor is not expected"); - - if (VD->vd_cnt == 2) { - const uint8_t *PAux = P + VD->vd_aux + VD->getAux()->vda_next; - const VerdAux *Aux = reinterpret_cast(PAux); - W.printString("Predecessor", - StringRef((const char *)(Obj->base() + StrTab->sh_offset + - Aux->vda_name))); - } - - P += VD->vd_next; - } -} - -template -static void printVersionDependencySection(ELFDumper *Dumper, - const ELFO *Obj, - const typename ELFO::Elf_Shdr *Sec, - ScopedPrinter &W) { - using VerNeed = typename ELFO::Elf_Verneed; - using VernAux = typename ELFO::Elf_Vernaux; - - DictScope SD(W, "SHT_GNU_verneed"); - if (!Sec) - return; - - const uint8_t *SecData = (const uint8_t *)Obj->base() + Sec->sh_offset; - const typename ELFO::Elf_Shdr *StrTab = - unwrapOrError(Obj->getSection(Sec->sh_link)); - - const uint8_t *P = SecData; - unsigned VerNeedNum = Sec->sh_info; - for (unsigned I = 0; I < VerNeedNum; ++I) { - const VerNeed *Need = reinterpret_cast(P); - DictScope Entry(W, "Dependency"); - W.printNumber("Version", Need->vn_version); - W.printNumber("Count", Need->vn_cnt); - W.printString("FileName", - StringRef((const char *)(Obj->base() + StrTab->sh_offset + - Need->vn_file))); - - const uint8_t *PAux = P + Need->vn_aux; - for (unsigned J = 0; J < Need->vn_cnt; ++J) { - const VernAux *Aux = reinterpret_cast(PAux); - DictScope Entry(W, "Entry"); - W.printNumber("Hash", Aux->vna_hash); - W.printEnum("Flags", Aux->vna_flags, makeArrayRef(SymVersionFlags)); - W.printNumber("Index", Aux->vna_other); - W.printString("Name", - StringRef((const char *)(Obj->base() + StrTab->sh_offset + - Aux->vna_name))); - PAux += Aux->vna_next; - } - P += Need->vn_next; - } -} - -template void ELFDumper::printVersionInfo() { - // Dump version symbol section. - printVersionSymbolSection(this, ObjF->getELFFile(), dot_gnu_version_sec, W); - - // Dump version definition section. - printVersionDefinitionSection(this, ObjF->getELFFile(), dot_gnu_version_d_sec, W); - - // Dump version dependency section. - printVersionDependencySection(this, ObjF->getELFFile(), dot_gnu_version_r_sec, W); -} - template StringRef ELFDumper::getSymbolVersion(StringRef StrTab, const Elf_Sym *symb, @@ -925,6 +806,11 @@ static const EnumEntry ElfOSABI[] = { {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE} }; +static const EnumEntry SymVersionFlags[] = { + {"Base", "BASE", VER_FLG_BASE}, + {"Weak", "WEAK", VER_FLG_WEAK}, + {"Info", "INFO", VER_FLG_INFO}}; + static const EnumEntry AMDGPUElfOSABI[] = { {"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA}, {"AMDGPU_PAL", "AMDGPU - PAL", ELF::ELFOSABI_AMDGPU_PAL}, @@ -1616,6 +1502,20 @@ void ELFDumper::printProgramHeaders( PrintSectionMapping); } +template void ELFDumper::printVersionInfo() { + // Dump version symbol section. + ELFDumperStyle->printVersionSymbolSection(ObjF->getELFFile(), + dot_gnu_version_sec); + + // Dump version definition section. + ELFDumperStyle->printVersionDefinitionSection(ObjF->getELFFile(), + dot_gnu_version_d_sec); + + // Dump version dependency section. + ELFDumperStyle->printVersionDependencySection(ObjF->getELFFile(), + dot_gnu_version_r_sec); +} + template void ELFDumper::printDynamicRelocations() { ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile()); } @@ -3460,6 +3360,36 @@ void GNUStyle::printDynamicRelocations(const ELFO *Obj) { } } +template +void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + if (!Sec) + return; + + StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + OS << "Dumper for " << SecName << " is not implemented\n"; +} + +template +void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + if (!Sec) + return; + + StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + OS << "Dumper for " << SecName << " is not implemented\n"; +} + +template +void GNUStyle::printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + if (!Sec) + return; + + StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + OS << "Dumper for " << SecName << " is not implemented\n"; +} + // Hash histogram shows statistics of how efficient the hash was for the // dynamic symbol table. The table shows number of hash buckets for different // lengths of chains as absolute number and percentage of the total buckets. @@ -4538,6 +4468,122 @@ void LLVMStyle::printProgramHeaders(const ELFO *Obj) { } } +template +void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + DictScope SS(W, "Version symbols"); + if (!Sec) + return; + + StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + W.printNumber("Section Name", SecName, Sec->sh_name); + W.printHex("Address", Sec->sh_addr); + W.printHex("Offset", Sec->sh_offset); + W.printNumber("Link", Sec->sh_link); + + const uint8_t *VersymBuf = (const uint8_t *)Obj->base() + Sec->sh_offset; + const ELFDumper *Dumper = this->dumper(); + StringRef StrTable = Dumper->getDynamicStringTable(); + + // Same number of entries in the dynamic symbol table (DT_SYMTAB). + ListScope Syms(W, "Symbols"); + for (const Elf_Sym &Sym : Dumper->dynamic_symbols()) { + DictScope S(W, "Symbol"); + const Elf_Versym *Versym = reinterpret_cast(VersymBuf); + std::string FullSymbolName = + Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */); + W.printNumber("Version", Versym->vs_index & VERSYM_VERSION); + W.printString("Name", FullSymbolName); + VersymBuf += sizeof(Elf_Versym); + } +} + +template +void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + DictScope SD(W, "SHT_GNU_verdef"); + if (!Sec) + return; + + const uint8_t *SecStartAddress = + (const uint8_t *)Obj->base() + Sec->sh_offset; + const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size; + const uint8_t *VerdefBuf = SecStartAddress; + const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + + unsigned VerDefsNum = Sec->sh_info; + while (VerDefsNum--) { + if (VerdefBuf + sizeof(Elf_Verdef) > SecEndAddress) + // FIXME: report_fatal_error is not a good way to report error. We should + // emit a parsing error here and below. + report_fatal_error("invalid offset in the section"); + + const Elf_Verdef *Verdef = reinterpret_cast(VerdefBuf); + DictScope Def(W, "Definition"); + W.printNumber("Version", Verdef->vd_version); + W.printEnum("Flags", Verdef->vd_flags, makeArrayRef(SymVersionFlags)); + W.printNumber("Index", Verdef->vd_ndx); + W.printNumber("Hash", Verdef->vd_hash); + W.printString("Name", + StringRef((const char *)(Obj->base() + StrTab->sh_offset + + Verdef->getAux()->vda_name))); + if (!Verdef->vd_cnt) + report_fatal_error("at least one definition string must exist"); + if (Verdef->vd_cnt > 2) + report_fatal_error("more than one predecessor is not expected"); + + if (Verdef->vd_cnt == 2) { + const uint8_t *VerdauxBuf = + VerdefBuf + Verdef->vd_aux + Verdef->getAux()->vda_next; + const Elf_Verdaux *Verdaux = + reinterpret_cast(VerdauxBuf); + W.printString("Predecessor", + StringRef((const char *)(Obj->base() + StrTab->sh_offset + + Verdaux->vda_name))); + } + VerdefBuf += Verdef->vd_next; + } +} + +template +void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, + const Elf_Shdr *Sec) { + DictScope SD(W, "SHT_GNU_verneed"); + if (!Sec) + return; + + const uint8_t *SecData = (const uint8_t *)Obj->base() + Sec->sh_offset; + const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + + const uint8_t *VerneedBuf = SecData; + unsigned VerneedNum = Sec->sh_info; + for (unsigned I = 0; I < VerneedNum; ++I) { + const Elf_Verneed *Verneed = + reinterpret_cast(VerneedBuf); + DictScope Entry(W, "Dependency"); + W.printNumber("Version", Verneed->vn_version); + W.printNumber("Count", Verneed->vn_cnt); + W.printString("FileName", + StringRef((const char *)(Obj->base() + StrTab->sh_offset + + Verneed->vn_file))); + + const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux; + for (unsigned J = 0; J < Verneed->vn_cnt; ++J) { + const Elf_Vernaux *Vernaux = + reinterpret_cast(VernauxBuf); + DictScope Entry(W, "Entry"); + W.printNumber("Hash", Vernaux->vna_hash); + W.printEnum("Flags", Vernaux->vna_flags, makeArrayRef(SymVersionFlags)); + W.printNumber("Index", Vernaux->vna_other); + W.printString("Name", + StringRef((const char *)(Obj->base() + StrTab->sh_offset + + Vernaux->vna_name))); + VernauxBuf += Vernaux->vna_next; + } + VerneedBuf += Verneed->vn_next; + } +} + template void LLVMStyle::printHashHistogram(const ELFFile *Obj) { W.startLine() << "Hash Histogram not implemented!\n"; From ce7cb683c293bd4b63b4eeca0da381c1e9892056 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 25 Mar 2019 11:23:41 +0000 Subject: [PATCH 09/27] [MIPS GlobalISel] Lower float and double arguments in registers Lower float and double arguments in registers for MIPS32. When float/double argument is passed through gpr registers select appropriate move instruction. Differential Revision: https://reviews.llvm.org/D59642 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356882 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCallLowering.cpp | 124 +++++++--- lib/Target/Mips/MipsCallLowering.h | 10 +- .../GlobalISel/irtranslator/float_args.ll | 211 ++++++++++++++++++ 3 files changed, 309 insertions(+), 36 deletions(-) create mode 100644 test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp index fd0f7921ba3a..0cee6e732ec0 100644 --- a/lib/Target/Mips/MipsCallLowering.cpp +++ b/lib/Target/Mips/MipsCallLowering.cpp @@ -23,10 +23,10 @@ using namespace llvm; MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI) : CallLowering(&TLI) {} -bool MipsCallLowering::MipsHandler::assign(unsigned VReg, - const CCValAssign &VA) { +bool MipsCallLowering::MipsHandler::assign(unsigned VReg, const CCValAssign &VA, + const EVT &VT) { if (VA.isRegLoc()) { - assignValueToReg(VReg, VA); + assignValueToReg(VReg, VA, VT); } else if (VA.isMemLoc()) { assignValueToAddress(VReg, VA); } else { @@ -37,9 +37,10 @@ bool MipsCallLowering::MipsHandler::assign(unsigned VReg, bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef VRegs, ArrayRef ArgLocs, - unsigned ArgLocsStartIndex) { + unsigned ArgLocsStartIndex, + const EVT &VT) { for (unsigned i = 0; i < VRegs.size(); ++i) - if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i])) + if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i], VT)) return false; return true; } @@ -71,10 +72,10 @@ bool MipsCallLowering::MipsHandler::handle( for (unsigned i = 0; i < SplitLength; ++i) VRegs.push_back(MRI.createGenericVirtualRegister(LLT{RegisterVT})); - if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg)) + if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg, VT)) return false; } else { - if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex])) + if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex], VT)) return false; } } @@ -88,7 +89,8 @@ class IncomingValueHandler : public MipsCallLowering::MipsHandler { : MipsHandler(MIRBuilder, MRI) {} private: - void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override; + void assignValueToReg(unsigned ValVReg, const CCValAssign &VA, + const EVT &VT) override; unsigned getStackAddress(const CCValAssign &VA, MachineMemOperand *&MMO) override; @@ -97,7 +99,7 @@ class IncomingValueHandler : public MipsCallLowering::MipsHandler { bool handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, unsigned ArgLocsStartIndex, - unsigned ArgsReg) override; + unsigned ArgsReg, const EVT &VT) override; virtual void markPhysRegUsed(unsigned PhysReg) { MIRBuilder.getMBB().addLiveIn(PhysReg); @@ -127,21 +129,47 @@ class CallReturnHandler : public IncomingValueHandler { } // end anonymous namespace void IncomingValueHandler::assignValueToReg(unsigned ValVReg, - const CCValAssign &VA) { + const CCValAssign &VA, + const EVT &VT) { + const MipsSubtarget &STI = + static_cast(MIRBuilder.getMF().getSubtarget()); unsigned PhysReg = VA.getLocReg(); - switch (VA.getLocInfo()) { - case CCValAssign::LocInfo::SExt: - case CCValAssign::LocInfo::ZExt: - case CCValAssign::LocInfo::AExt: { - auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); - MIRBuilder.buildTrunc(ValVReg, Copy); - break; - } - default: - MIRBuilder.buildCopy(ValVReg, PhysReg); - break; + if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) { + const MipsSubtarget &STI = + static_cast(MIRBuilder.getMF().getSubtarget()); + + MIRBuilder + .buildInstr(STI.isFP64bit() ? Mips::BuildPairF64_64 + : Mips::BuildPairF64) + .addDef(ValVReg) + .addUse(PhysReg + (STI.isLittle() ? 0 : 1)) + .addUse(PhysReg + (STI.isLittle() ? 1 : 0)) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + markPhysRegUsed(PhysReg); + markPhysRegUsed(PhysReg + 1); + } else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) { + MIRBuilder.buildInstr(Mips::MTC1) + .addDef(ValVReg) + .addUse(PhysReg) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + markPhysRegUsed(PhysReg); + } else { + switch (VA.getLocInfo()) { + case CCValAssign::LocInfo::SExt: + case CCValAssign::LocInfo::ZExt: + case CCValAssign::LocInfo::AExt: { + auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + break; + } + default: + MIRBuilder.buildCopy(ValVReg, PhysReg); + break; + } + markPhysRegUsed(PhysReg); } - markPhysRegUsed(PhysReg); } unsigned IncomingValueHandler::getStackAddress(const CCValAssign &VA, @@ -180,8 +208,8 @@ void IncomingValueHandler::assignValueToAddress(unsigned ValVReg, bool IncomingValueHandler::handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, unsigned ArgLocsStartIndex, - unsigned ArgsReg) { - if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex)) + unsigned ArgsReg, const EVT &VT) { + if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT)) return false; setLeastSignificantFirst(VRegs); MIRBuilder.buildMerge(ArgsReg, VRegs); @@ -196,7 +224,8 @@ class OutgoingValueHandler : public MipsCallLowering::MipsHandler { : MipsHandler(MIRBuilder, MRI), MIB(MIB) {} private: - void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override; + void assignValueToReg(unsigned ValVReg, const CCValAssign &VA, + const EVT &VT) override; unsigned getStackAddress(const CCValAssign &VA, MachineMemOperand *&MMO) override; @@ -205,7 +234,7 @@ class OutgoingValueHandler : public MipsCallLowering::MipsHandler { bool handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, unsigned ArgLocsStartIndex, - unsigned ArgsReg) override; + unsigned ArgsReg, const EVT &VT) override; unsigned extendRegister(unsigned ValReg, const CCValAssign &VA); @@ -214,11 +243,40 @@ class OutgoingValueHandler : public MipsCallLowering::MipsHandler { } // end anonymous namespace void OutgoingValueHandler::assignValueToReg(unsigned ValVReg, - const CCValAssign &VA) { + const CCValAssign &VA, + const EVT &VT) { unsigned PhysReg = VA.getLocReg(); - unsigned ExtReg = extendRegister(ValVReg, VA); - MIRBuilder.buildCopy(PhysReg, ExtReg); - MIB.addUse(PhysReg, RegState::Implicit); + const MipsSubtarget &STI = + static_cast(MIRBuilder.getMF().getSubtarget()); + + if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) { + MIRBuilder + .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64 + : Mips::ExtractElementF64) + .addDef(PhysReg + (STI.isLittle() ? 1 : 0)) + .addUse(ValVReg) + .addImm(1) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + MIRBuilder + .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64 + : Mips::ExtractElementF64) + .addDef(PhysReg + (STI.isLittle() ? 0 : 1)) + .addUse(ValVReg) + .addImm(0) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + } else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) { + MIRBuilder.buildInstr(Mips::MFC1) + .addDef(PhysReg) + .addUse(ValVReg) + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + } else { + unsigned ExtReg = extendRegister(ValVReg, VA); + MIRBuilder.buildCopy(PhysReg, ExtReg); + MIB.addUse(PhysReg, RegState::Implicit); + } } unsigned OutgoingValueHandler::getStackAddress(const CCValAssign &VA, @@ -286,10 +344,10 @@ unsigned OutgoingValueHandler::extendRegister(unsigned ValReg, bool OutgoingValueHandler::handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, unsigned ArgLocsStartIndex, - unsigned ArgsReg) { + unsigned ArgsReg, const EVT &VT) { MIRBuilder.buildUnmerge(VRegs, ArgsReg); setLeastSignificantFirst(VRegs); - if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex)) + if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT)) return false; return true; @@ -300,6 +358,8 @@ static bool isSupportedType(Type *T) { return true; if (T->isPointerTy()) return true; + if (T->isFloatingPointTy()) + return true; return false; } diff --git a/lib/Target/Mips/MipsCallLowering.h b/lib/Target/Mips/MipsCallLowering.h index dc04d83733e7..05c703b60bd6 100644 --- a/lib/Target/Mips/MipsCallLowering.h +++ b/lib/Target/Mips/MipsCallLowering.h @@ -35,7 +35,7 @@ class MipsCallLowering : public CallLowering { protected: bool assignVRegs(ArrayRef VRegs, ArrayRef ArgLocs, - unsigned Index); + unsigned ArgLocsStartIndex, const EVT &VT); void setLeastSignificantFirst(SmallVectorImpl &VRegs); @@ -43,19 +43,21 @@ class MipsCallLowering : public CallLowering { MachineRegisterInfo &MRI; private: - bool assign(unsigned VReg, const CCValAssign &VA); + bool assign(unsigned VReg, const CCValAssign &VA, const EVT &VT); virtual unsigned getStackAddress(const CCValAssign &VA, MachineMemOperand *&MMO) = 0; - virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) = 0; + virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA, + const EVT &VT) = 0; virtual void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) = 0; virtual bool handleSplit(SmallVectorImpl &VRegs, ArrayRef ArgLocs, - unsigned ArgLocsStartIndex, unsigned ArgsReg) = 0; + unsigned ArgLocsStartIndex, unsigned ArgsReg, + const EVT &VT) = 0; }; MipsCallLowering(const MipsTargetLowering &TLI); diff --git a/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll b/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll new file mode 100644 index 000000000000..24cfcd895a78 --- /dev/null +++ b/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32 +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64 + +define float @float_in_fpr(float %a, float %b) { + ; FP32-LABEL: name: float_in_fpr + ; FP32: bb.1.entry: + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14 + ; FP32: $f0 = COPY [[COPY1]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_fpr + ; FP64: bb.1.entry: + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14 + ; FP64: $f0 = COPY [[COPY1]](s32) + ; FP64: RetRA implicit $f0 +entry: + ret float %b +} + +define double @double_in_fpr(double %a, double %b) { + ; FP32-LABEL: name: double_in_fpr + ; FP32: bb.1.entry: + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 + ; FP32: $d0 = COPY [[COPY1]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_fpr + ; FP64: bb.1.entry: + ; FP64: liveins: $d12_64, $d14_64 + ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64 + ; FP64: [[COPY1:%[0-9]+]]:_(s64) = COPY $d14_64 + ; FP64: $d0_64 = COPY [[COPY1]](s64) + ; FP64: RetRA implicit $d0_64 +entry: + ret double %b +} + +define float @float_in_gpr(i32 %a, float %b) { + ; FP32-LABEL: name: float_in_gpr + ; FP32: bb.1.entry: + ; FP32: liveins: $a0, $a1 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP32: $f0 = COPY [[MTC1_]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_gpr + ; FP64: bb.1.entry: + ; FP64: liveins: $a0, $a1 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP64: $f0 = COPY [[MTC1_]](s32) + ; FP64: RetRA implicit $f0 +entry: + ret float %b +} + +define double @double_in_gpr(i32 %a, double %b) { + ; FP32-LABEL: name: double_in_gpr + ; FP32: bb.1.entry: + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP32: $d0 = COPY [[BuildPairF64_]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_gpr + ; FP64: bb.1.entry: + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP64: [[BuildPairF64_64_:%[0-9]+]]:fgr64(s64) = BuildPairF64_64 $a2, $a3 + ; FP64: $d0_64 = COPY [[BuildPairF64_64_]](s64) + ; FP64: RetRA implicit $d0_64 +entry: + ret double %b +} + +define float @call_float_in_fpr(float %a, float %b) { + ; FP32-LABEL: name: call_float_in_fpr + ; FP32: bb.1.entry: + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f12 = COPY [[COPY]](s32) + ; FP32: $f14 = COPY [[COPY1]](s32) + ; FP32: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY2]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_fpr + ; FP64: bb.1.entry: + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f12 = COPY [[COPY]](s32) + ; FP64: $f14 = COPY [[COPY1]](s32) + ; FP64: JAL @float_in_fpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY2]](s32) + ; FP64: RetRA implicit $f0 +entry: + %call = call float @float_in_fpr(float %a, float %b) + ret float %call +} + +define double @call_double_in_fpr(double %a, double %b) { + ; FP32-LABEL: name: call_double_in_fpr + ; FP32: bb.1.entry: + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d6 = COPY [[COPY]](s64) + ; FP32: $d7 = COPY [[COPY1]](s64) + ; FP32: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP32: [[COPY2:%[0-9]+]]:_(s64) = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY2]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_fpr + ; FP64: bb.1.entry: + ; FP64: liveins: $d12_64, $d14_64 + ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64 + ; FP64: [[COPY1:%[0-9]+]]:_(s64) = COPY $d14_64 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d12_64 = COPY [[COPY]](s64) + ; FP64: $d14_64 = COPY [[COPY1]](s64) + ; FP64: JAL @double_in_fpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit $d14_64, implicit-def $d0_64 + ; FP64: [[COPY2:%[0-9]+]]:_(s64) = COPY $d0_64 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0_64 = COPY [[COPY2]](s64) + ; FP64: RetRA implicit $d0_64 +entry: + %call = call double @double_in_fpr(double %a, double %b) + ret double %call +} + +define float @call_float_in_gpr(i32 %a, float %b) { + ; FP32-LABEL: name: call_float_in_gpr + ; FP32: bb.1.entry: + ; FP32: liveins: $a0, $a1 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]](s32) + ; FP32: $a1 = MFC1 [[MTC1_]](s32) + ; FP32: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY1]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_gpr + ; FP64: bb.1.entry: + ; FP64: liveins: $a0, $a1 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]](s32) + ; FP64: $a1 = MFC1 [[MTC1_]](s32) + ; FP64: JAL @float_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY1]](s32) + ; FP64: RetRA implicit $f0 +entry: + %call = call float @float_in_gpr(i32 %a, float %b) + ret float %call +} + + +define double @call_double_in_gpr(i32 %a, double %b) { + ; FP32-LABEL: name: call_double_in_gpr + ; FP32: bb.1.entry: + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]](s32) + ; FP32: $a3 = ExtractElementF64 [[BuildPairF64_]](s64), 1 + ; FP32: $a2 = ExtractElementF64 [[BuildPairF64_]](s64), 0 + ; FP32: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY1]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_gpr + ; FP64: bb.1.entry: + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; FP64: [[BuildPairF64_64_:%[0-9]+]]:fgr64(s64) = BuildPairF64_64 $a2, $a3 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]](s32) + ; FP64: $a3 = ExtractElementF64_64 [[BuildPairF64_64_]](s64), 1 + ; FP64: $a2 = ExtractElementF64_64 [[BuildPairF64_64_]](s64), 0 + ; FP64: JAL @double_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0_64 + ; FP64: [[COPY1:%[0-9]+]]:_(s64) = COPY $d0_64 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0_64 = COPY [[COPY1]](s64) + ; FP64: RetRA implicit $d0_64 +entry: + %call = call double @double_in_gpr(i32 %a, double %b) + ret double %call +} From f636c26bde8b2185dfd83fb70fe74dd37884a93c Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 25 Mar 2019 11:30:46 +0000 Subject: [PATCH 10/27] [MIPS GlobalISel] Add floating point register bank Add floating point register bank for MIPS32. Implement getRegBankFromRegClass for float register classes. Differential Revision: https://reviews.llvm.org/D59643 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356883 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsRegisterBankInfo.cpp | 5 + lib/Target/Mips/MipsRegisterBanks.td | 2 + .../GlobalISel/regbankselect/float_args.mir | 296 ++++++++++++++++++ 3 files changed, 303 insertions(+) create mode 100644 test/CodeGen/Mips/GlobalISel/regbankselect/float_args.mir diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp index 214dd106869e..08c33a4119ce 100644 --- a/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -62,6 +62,11 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass( case Mips::GPRMM16MovePPairFirst_and_GPRMM16MovePPairSecondRegClassID: case Mips::SP32RegClassID: return getRegBank(Mips::GPRBRegBankID); + case Mips::FGRCCRegClassID: + case Mips::FGR64RegClassID: + case Mips::AFGR64RegClassID: + case Mips::AFGR64_and_OddSPRegClassID: + return getRegBank(Mips::FPRBRegBankID); default: llvm_unreachable("Register class not supported"); } diff --git a/lib/Target/Mips/MipsRegisterBanks.td b/lib/Target/Mips/MipsRegisterBanks.td index b591841dfef2..14a0181f8f11 100644 --- a/lib/Target/Mips/MipsRegisterBanks.td +++ b/lib/Target/Mips/MipsRegisterBanks.td @@ -10,3 +10,5 @@ //===----------------------------------------------------------------------===// def GPRBRegBank : RegisterBank<"GPRB", [GPR32]>; + +def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64]>; diff --git a/test/CodeGen/Mips/GlobalISel/regbankselect/float_args.mir b/test/CodeGen/Mips/GlobalISel/regbankselect/float_args.mir new file mode 100644 index 000000000000..ba4d28ca53ac --- /dev/null +++ b/test/CodeGen/Mips/GlobalISel/regbankselect/float_args.mir @@ -0,0 +1,296 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32 +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64 + +--- | + + define void @float_in_fpr() {entry: ret void} + define void @double_in_fpr() {entry: ret void} + define void @float_in_gpr() {entry: ret void} + define void @double_in_gpr() {entry: ret void} + define void @call_float_in_fpr() {entry: ret void} + define void @call_double_in_fpr() {entry: ret void} + define void @call_float_in_gpr() {entry: ret void} + define void @call_double_in_gpr() {entry: ret void} + +... +--- +name: float_in_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $f12, $f14 + + ; FP32-LABEL: name: float_in_fpr + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f14 + ; FP32: $f0 = COPY [[COPY]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_fpr + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f14 + ; FP64: $f0 = COPY [[COPY]](s32) + ; FP64: RetRA implicit $f0 + %1:_(s32) = COPY $f14 + $f0 = COPY %1(s32) + RetRA implicit $f0 + +... +--- +name: double_in_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d6, $d7 + + ; FP32-LABEL: name: double_in_fpr + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d7 + ; FP32: $d0 = COPY [[COPY]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_fpr + ; FP64: liveins: $d6, $d7 + ; FP64: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d7 + ; FP64: $d0 = COPY [[COPY]](s64) + ; FP64: RetRA implicit $d0 + %1:_(s64) = COPY $d7 + $d0 = COPY %1(s64) + RetRA implicit $d0 + +... +--- +name: float_in_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; FP32-LABEL: name: float_in_gpr + ; FP32: liveins: $a0, $a1 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP32: $f0 = COPY [[MTC1_]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_gpr + ; FP64: liveins: $a0, $a1 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP64: $f0 = COPY [[MTC1_]](s32) + ; FP64: RetRA implicit $f0 + %1:fgr32(s32) = MTC1 $a1 + $f0 = COPY %1(s32) + RetRA implicit $f0 + +... +--- +name: double_in_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a2, $a3 + + ; FP32-LABEL: name: double_in_gpr + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP32: $d0 = COPY [[BuildPairF64_]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_gpr + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP64: $d0 = COPY [[BuildPairF64_]](s64) + ; FP64: RetRA implicit $d0 + %1:afgr64(s64) = BuildPairF64 $a2, $a3 + $d0 = COPY %1(s64) + RetRA implicit $d0 + +... +--- +name: call_float_in_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $f12, $f14 + + ; FP32-LABEL: name: call_float_in_fpr + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 + ; FP32: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f14 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f12 = COPY [[COPY]](s32) + ; FP32: $f14 = COPY [[COPY1]](s32) + ; FP32: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP32: [[COPY2:%[0-9]+]]:fprb(s32) = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY2]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_fpr + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 + ; FP64: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f14 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f12 = COPY [[COPY]](s32) + ; FP64: $f14 = COPY [[COPY1]](s32) + ; FP64: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP64: [[COPY2:%[0-9]+]]:fprb(s32) = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY2]](s32) + ; FP64: RetRA implicit $f0 + %0:_(s32) = COPY $f12 + %1:_(s32) = COPY $f14 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $f12 = COPY %0(s32) + $f14 = COPY %1(s32) + JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + %2:_(s32) = COPY $f0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: call_double_in_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d6, $d7 + + ; FP32-LABEL: name: call_double_in_fpr + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6 + ; FP32: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d7 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d6 = COPY [[COPY]](s64) + ; FP32: $d7 = COPY [[COPY1]](s64) + ; FP32: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP32: [[COPY2:%[0-9]+]]:fprb(s64) = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY2]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_fpr + ; FP64: liveins: $d6, $d7 + ; FP64: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6 + ; FP64: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d7 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d6 = COPY [[COPY]](s64) + ; FP64: $d7 = COPY [[COPY1]](s64) + ; FP64: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP64: [[COPY2:%[0-9]+]]:fprb(s64) = COPY $d0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0 = COPY [[COPY2]](s64) + ; FP64: RetRA implicit $d0 + %0:_(s64) = COPY $d6 + %1:_(s64) = COPY $d7 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $d6 = COPY %0(s64) + $d7 = COPY %1(s64) + JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + %2:_(s64) = COPY $d0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $d0 = COPY %2(s64) + RetRA implicit $d0 + +... +--- +name: call_float_in_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; FP32-LABEL: name: call_float_in_gpr + ; FP32: liveins: $a0, $a1 + ; FP32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]](s32) + ; FP32: $a1 = MFC1 [[MTC1_]](s32) + ; FP32: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP32: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY1]](s32) + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_gpr + ; FP64: liveins: $a0, $a1 + ; FP64: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32(s32) = MTC1 $a1 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]](s32) + ; FP64: $a1 = MFC1 [[MTC1_]](s32) + ; FP64: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP64: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY1]](s32) + ; FP64: RetRA implicit $f0 + %0:_(s32) = COPY $a0 + %1:fgr32(s32) = MTC1 $a1 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $a0 = COPY %0(s32) + $a1 = MFC1 %1(s32) + JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + %2:_(s32) = COPY $f0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: call_double_in_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a2, $a3 + + ; FP32-LABEL: name: call_double_in_gpr + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]](s32) + ; FP32: $a3 = ExtractElementF64 [[BuildPairF64_]](s64), 1 + ; FP32: $a2 = ExtractElementF64 [[BuildPairF64_]](s64), 0 + ; FP32: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP32: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY1]](s64) + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_gpr + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; FP64: [[BuildPairF64_:%[0-9]+]]:afgr64(s64) = BuildPairF64 $a2, $a3 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]](s32) + ; FP64: $a3 = ExtractElementF64 [[BuildPairF64_]](s64), 1 + ; FP64: $a2 = ExtractElementF64 [[BuildPairF64_]](s64), 0 + ; FP64: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP64: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0 = COPY [[COPY1]](s64) + ; FP64: RetRA implicit $d0 + %0:_(s32) = COPY $a0 + %1:afgr64(s64) = BuildPairF64 $a2, $a3 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $a0 = COPY %0(s32) + $a3 = ExtractElementF64 %1(s64), 1 + $a2 = ExtractElementF64 %1(s64), 0 + JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + %2:_(s64) = COPY $d0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $d0 = COPY %2(s64) + RetRA implicit $d0 + +... + From af179cc68ee0288582ce01686a2e7361737fb2be Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 25 Mar 2019 11:32:27 +0000 Subject: [PATCH 11/27] gn build: Let get.py keep zip file in memory instead of using a temp file The zip is small, and it's a bit less code this way. No intended behavior change. Differential Revision: https://reviews.llvm.org/D59677 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356884 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/gn/get.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/utils/gn/get.py b/utils/gn/get.py index 4015d5986c9d..c39649df78a4 100755 --- a/utils/gn/get.py +++ b/utils/gn/get.py @@ -3,27 +3,20 @@ from __future__ import print_function +import io import os import urllib2 import sys -import tempfile import zipfile -def download_url(url, output_file): - """Download url into output_file.""" +def download_and_unpack(url, output_dir, gn): + """Download an archive from url and extract gn from it into output_dir.""" print('downloading %s ...' % url, end='') sys.stdout.flush() - output_file.write(urllib2.urlopen(url).read()) + data = urllib2.urlopen(url).read() print(' done') - - -def download_and_unpack(url, output_dir, gn): - """Download an archive from url and extract gn from it into output_dir.""" - with tempfile.TemporaryFile() as f: - download_url(url, f) - f.seek(0) - zipfile.ZipFile(f).extract(gn, path=output_dir) + zipfile.ZipFile(io.BytesIO(data)).extract(gn, path=output_dir) def set_executable_bit(path): From a0a20fdec38de7a543953665d0a958d7afc4b951 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 25 Mar 2019 11:33:19 +0000 Subject: [PATCH 12/27] gn build: Clean up README.rst a bit - Make introduction a bit shorter - Add a `git clone` step to Quick start - Put command to run first in each of the Quick start steps - Use ``code`` instead of `label` throughout; this is .rst not .md Differential Revision: https://reviews.llvm.org/D59600 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356885 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/gn/README.rst | 71 ++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/utils/gn/README.rst b/utils/gn/README.rst index 3c40846689c2..7ffa144aafb3 100644 --- a/utils/gn/README.rst +++ b/utils/gn/README.rst @@ -22,16 +22,10 @@ build. creates ninja files, but it can create some IDE projects (MSVC, Xcode, ...) which then shell out to ninja for the actual build. -Its main features are that GN is very fast (it currently produces ninja files -for LLVM's build in 35ms on the author's laptop, compared to 66s for CMake) -- -a 2000x difference), and since it's so fast it doesn't aggressively cache, -making it possible to switch e.g. between release and debug builds in one build -directory. - The main motivation behind the GN build is that some people find it more convenient for day-to-day hacking on LLVM than CMake. Distribution, building -just parts of LLVM, and embedding the LLVM GN build from other builds are a -non-goal for the GN build. +just parts of LLVM, and embedding the LLVM GN build from other builds are +non-goals for the GN build. This is a `good overview of GN `_. @@ -42,39 +36,42 @@ Quick start GN only works in the monorepo layout. -#. Obtain a gn binary. If gn is not already on your PATH, run - `llvm/utils/gn/get.py` to download a prebuilt gn binary if you're on a 64-bit - X86 system running Linux, macOS, or Windows, or `build gn yourself +#. ``git clone https://github.com/llvm/llvm-project.git; cd llvm-project`` if + you don't have a monorepo checkout yet. + +#. ``llvm/utils/gn/get.py`` to download a prebuilt gn binary if you're on a + 64-bit X86 system running Linux, macOS, or Windows. `Build gn yourself `_ if you're on a different platform or don't want to trust prebuilt binaries. -#. In the root of the monorepo, run `llvm/utils/gn/gn.py gen out/gn`. - `out/gn` is the build directory, it can have any name, and you can have as - many as you want, each with different build settings. (The `gn.py` script - adds `--dotfile=llvm/utils/gn/.gn --root=.` and just runs regular `gn`; +#. ``llvm/utils/gn/gn.py gen out/gn`` to run GN and create build files. + ``out/gn`` is the build directory, it can have any name, and you can have as + many as you want, each with different build settings. (The ``gn.py`` script + adds ``--dotfile=llvm/utils/gn/.gn --root=.`` and just runs regular ``gn``; you can manually pass these parameters and not use the wrapper if you prefer.) -#. Run e.g. `ninja -C out/gn check-lld` to build all prerequisites for and - run the LLD tests. +#. ``ninja -C out/gn check-lld`` to build all prerequisites for and run the LLD + tests. By default, you get a release build with assertions enabled that targets -the host arch. You can set various build options by editing `out/gn/args.gn`, -for example putting `is_debug = true` in there gives you a debug build. Run -`llvm/utils/gn/gn.py args --list out/gn` to see a list of all possible -options. After touching `out/gn/args.gn`, just run ninja, it will re-invoke gn +the host arch. You can set build options by editing ``out/gn/args.gn``, for +example putting ``is_debug = true`` in there gives you a debug build. Run +``llvm/utils/gn/gn.py args --list out/gn`` to see a list of all possible +options. After touching ``out/gn/args.gn`` just run ninja: it will re-invoke gn before starting the build. -GN has extensive built-in help; try e.g. `gn help gen` to see the help -for the `gen` command. The full GN reference is also `available online -`_. +GN has extensive built-in help; try e.g. ``llvm/utils/gn/gn.py help gen`` to see +the help for the ``gen`` command. The full GN reference is also `available +online `_. -GN has an autoformatter: `git ls-files '*.gn' '*.gni' | xargs -n 1 gn format` +GN has an autoformatter: +``git ls-files '*.gn' '*.gni' | xargs llvm/utils/gn/gn.py format`` after making GN build changes is your friend. -To not put `BUILD.gn` into the main tree, they are all below -`utils/gn/secondary`. For example, the build file for `llvm/lib/Support` is in -`utils/gn/secondary/llvm/lib/Support`. +To not put ``BUILD.gn`` files into the main tree, they are all below +``utils/gn/secondary``. For example, the build file for ``llvm/lib/Support`` +is in ``utils/gn/secondary/llvm/lib/Support``. .. _Syncing GN files from CMake files: @@ -83,15 +80,15 @@ Syncing GN files from CMake files Sometimes after pulling in the latest changes, the GN build doesn't work. Most of the time this is due to someone adding a file to CMakeLists.txt file. -Run `llvm/utils/gn/build/sync_source_lists_from_cmake.py` to print a report -of which files need to be added to or removed from `BUILD.gn` files to -match the corresponding `CMakeLists.txt`. You have to manually read the output +Run ``llvm/utils/gn/build/sync_source_lists_from_cmake.py`` to print a report +of which files need to be added to or removed from ``BUILD.gn`` files to +match the corresponding ``CMakeLists.txt``. You have to manually read the output of the script and implement its suggestions. -If new `CMakeLists.txt` files have been added, you have to manually create -a new corresponding `BUILD.gn` file below `llvm/utils/gn/secondary/`. +If new ``CMakeLists.txt`` files have been added, you have to manually create +a new corresponding ``BUILD.gn`` file below ``llvm/utils/gn/secondary/``. -If the dependencies in a `CMakeLists.txt` file have been changed, you have to +If the dependencies in a ``CMakeLists.txt`` file have been changed, you have to manually analyze and fix. .. _Philosophy: @@ -133,9 +130,9 @@ configure is used for three classes of feature checks: config.h in a build step). For the last two points, it would be nice if LLVM didn't have a single -`config.h` header, but one header per toggle. That way, when e.g. -`llvm_enable_terminfo` is toggled, only the 3 files caring about that setting -would need to be rebuilt, instead of everything including `config.h`. +``config.h`` header, but one header per toggle. That way, when e.g. +``llvm_enable_terminfo`` is toggled, only the 3 files caring about that setting +would need to be rebuilt, instead of everything including ``config.h``. GN doesn't believe in users setting arbitrary cflags from an environment variable, it wants the build to be controlled by .gn files. From 29ab38eb24e08f6bfc74196cf6327e8c3f5474c5 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 25 Mar 2019 11:38:06 +0000 Subject: [PATCH 13/27] [MIPS GlobalISel] Select copy for arguments from FPRBRegBank Move selectCopy into MipsInstructionSelector class. Select copy for arguments from FPRBRegBank for MIPS32. Differential Revision: https://reviews.llvm.org/D59644 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356886 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstructionSelector.cpp | 20 +- .../instruction-select/float_args.mir | 303 ++++++++++++++++++ .../Mips/GlobalISel/llvm-ir/float_args.ll | 147 +++++++++ 3 files changed, 465 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/Mips/GlobalISel/instruction-select/float_args.mir create mode 100644 test/CodeGen/Mips/GlobalISel/llvm-ir/float_args.ll diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp index 36aea2983591..ded8c1c1fbc0 100644 --- a/lib/Target/Mips/MipsInstructionSelector.cpp +++ b/lib/Target/Mips/MipsInstructionSelector.cpp @@ -38,6 +38,7 @@ class MipsInstructionSelector : public InstructionSelector { bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool materialize32BitImm(unsigned DestReg, APInt Imm, MachineIRBuilder &B) const; + bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; const MipsTargetMachine &TM; const MipsSubtarget &STI; @@ -75,15 +76,24 @@ MipsInstructionSelector::MipsInstructionSelector( { } -static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { +bool MipsInstructionSelector::selectCopy(MachineInstr &I, + MachineRegisterInfo &MRI) const { unsigned DstReg = I.getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) return true; - const TargetRegisterClass *RC = &Mips::GPR32RegClass; + const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI); + const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + if (RegBank->getID() == Mips::FPRBRegBankID) { + if (DstSize == 32) + RC = &Mips::FGR32RegClass; + else if (DstSize == 64) + RC = STI.isFP64bit() ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass; + else + llvm_unreachable("Unsupported destination size"); + } if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"); @@ -162,7 +172,7 @@ bool MipsInstructionSelector::select(MachineInstr &I, if (!isPreISelGenericOpcode(I.getOpcode())) { if (I.isCopy()) - return selectCopy(I, TII, MRI, TRI, RBI); + return selectCopy(I, MRI); return true; } diff --git a/test/CodeGen/Mips/GlobalISel/instruction-select/float_args.mir b/test/CodeGen/Mips/GlobalISel/instruction-select/float_args.mir new file mode 100644 index 000000000000..a81888ab49b3 --- /dev/null +++ b/test/CodeGen/Mips/GlobalISel/instruction-select/float_args.mir @@ -0,0 +1,303 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32 +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64 +--- | + + define void @float_in_fpr() {entry: ret void} + define void @double_in_fpr() {entry: ret void} + define void @float_in_gpr() {entry: ret void} + define void @double_in_gpr() {entry: ret void} + define void @call_float_in_fpr() {entry: ret void} + define void @call_double_in_fpr() {entry: ret void} + define void @call_float_in_gpr() {entry: ret void} + define void @call_double_in_gpr() {entry: ret void} + +... +--- +name: float_in_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $f12, $f14 + + ; FP32-LABEL: name: float_in_fpr + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f14 + ; FP32: $f0 = COPY [[COPY]] + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_fpr + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f14 + ; FP64: $f0 = COPY [[COPY]] + ; FP64: RetRA implicit $f0 + %1:fprb(s32) = COPY $f14 + $f0 = COPY %1(s32) + RetRA implicit $f0 + +... +--- +name: double_in_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d6, $d7 + + ; FP32-LABEL: name: double_in_fpr + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d7 + ; FP32: $d0 = COPY [[COPY]] + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_fpr + ; FP64: liveins: $d6, $d7 + ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d7 + ; FP64: $d0 = COPY [[COPY]] + ; FP64: RetRA implicit $d0 + %1:fprb(s64) = COPY $d7 + $d0 = COPY %1(s64) + RetRA implicit $d0 + +... +--- +name: float_in_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; FP32-LABEL: name: float_in_gpr + ; FP32: liveins: $a0, $a1 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32 = MTC1 $a1 + ; FP32: $f0 = COPY [[MTC1_]] + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: float_in_gpr + ; FP64: liveins: $a0, $a1 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32 = MTC1 $a1 + ; FP64: $f0 = COPY [[MTC1_]] + ; FP64: RetRA implicit $f0 + %1:fgr32(s32) = MTC1 $a1 + $f0 = COPY %1(s32) + RetRA implicit $f0 + +... +--- +name: double_in_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a2, $a3 + + ; FP32-LABEL: name: double_in_gpr + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64 = BuildPairF64 $a2, $a3 + ; FP32: $d0 = COPY [[BuildPairF64_]] + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: double_in_gpr + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[BuildPairF64_:%[0-9]+]]:afgr64 = BuildPairF64 $a2, $a3 + ; FP64: $d0 = COPY [[BuildPairF64_]] + ; FP64: RetRA implicit $d0 + %1:afgr64(s64) = BuildPairF64 $a2, $a3 + $d0 = COPY %1(s64) + RetRA implicit $d0 + +... +--- +name: call_float_in_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $f12, $f14 + + ; FP32-LABEL: name: call_float_in_fpr + ; FP32: liveins: $f12, $f14 + ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 + ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f12 = COPY [[COPY]] + ; FP32: $f14 = COPY [[COPY1]] + ; FP32: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP32: [[COPY2:%[0-9]+]]:fgr32 = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY2]] + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_fpr + ; FP64: liveins: $f12, $f14 + ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 + ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f12 = COPY [[COPY]] + ; FP64: $f14 = COPY [[COPY1]] + ; FP64: JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + ; FP64: [[COPY2:%[0-9]+]]:fgr32 = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY2]] + ; FP64: RetRA implicit $f0 + %0:fprb(s32) = COPY $f12 + %1:fprb(s32) = COPY $f14 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $f12 = COPY %0(s32) + $f14 = COPY %1(s32) + JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0 + %2:fprb(s32) = COPY $f0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: call_double_in_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d6, $d7 + + ; FP32-LABEL: name: call_double_in_fpr + ; FP32: liveins: $d6, $d7 + ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6 + ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d6 = COPY [[COPY]] + ; FP32: $d7 = COPY [[COPY1]] + ; FP32: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP32: [[COPY2:%[0-9]+]]:afgr64 = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY2]] + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_fpr + ; FP64: liveins: $d6, $d7 + ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6 + ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d6 = COPY [[COPY]] + ; FP64: $d7 = COPY [[COPY1]] + ; FP64: JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + ; FP64: [[COPY2:%[0-9]+]]:fgr64 = COPY $d0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0 = COPY [[COPY2]] + ; FP64: RetRA implicit $d0 + %0:fprb(s64) = COPY $d6 + %1:fprb(s64) = COPY $d7 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $d6 = COPY %0(s64) + $d7 = COPY %1(s64) + JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0 + %2:fprb(s64) = COPY $d0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $d0 = COPY %2(s64) + RetRA implicit $d0 + +... +--- +name: call_float_in_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; FP32-LABEL: name: call_float_in_gpr + ; FP32: liveins: $a0, $a1 + ; FP32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; FP32: [[MTC1_:%[0-9]+]]:fgr32 = MTC1 $a1 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]] + ; FP32: $a1 = MFC1 [[MTC1_]] + ; FP32: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $f0 = COPY [[COPY1]] + ; FP32: RetRA implicit $f0 + ; FP64-LABEL: name: call_float_in_gpr + ; FP64: liveins: $a0, $a1 + ; FP64: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; FP64: [[MTC1_:%[0-9]+]]:fgr32 = MTC1 $a1 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]] + ; FP64: $a1 = MFC1 [[MTC1_]] + ; FP64: JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $f0 = COPY [[COPY1]] + ; FP64: RetRA implicit $f0 + %0:gprb(s32) = COPY $a0 + %1:fgr32(s32) = MTC1 $a1 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $a0 = COPY %0(s32) + $a1 = MFC1 %1(s32) + JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $f0 + %2:fprb(s32) = COPY $f0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: call_double_in_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a2, $a3 + + ; FP32-LABEL: name: call_double_in_gpr + ; FP32: liveins: $a0, $a2, $a3 + ; FP32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; FP32: [[BuildPairF64_:%[0-9]+]]:afgr64 = BuildPairF64 $a2, $a3 + ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32: $a0 = COPY [[COPY]] + ; FP32: $a3 = ExtractElementF64 [[BuildPairF64_]], 1 + ; FP32: $a2 = ExtractElementF64 [[BuildPairF64_]], 0 + ; FP32: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d0 + ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32: $d0 = COPY [[COPY1]] + ; FP32: RetRA implicit $d0 + ; FP64-LABEL: name: call_double_in_gpr + ; FP64: liveins: $a0, $a2, $a3 + ; FP64: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; FP64: [[BuildPairF64_:%[0-9]+]]:afgr64 = BuildPairF64 $a2, $a3 + ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64: $a0 = COPY [[COPY]] + ; FP64: $a3 = ExtractElementF64 [[BuildPairF64_]], 1 + ; FP64: $a2 = ExtractElementF64 [[BuildPairF64_]], 0 + ; FP64: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d0 + ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64: $d0 = COPY [[COPY1]] + ; FP64: RetRA implicit $d0 + %0:gprb(s32) = COPY $a0 + %1:afgr64(s64) = BuildPairF64 $a2, $a3 + ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + $a0 = COPY %0(s32) + $a3 = ExtractElementF64 %1(s64), 1 + $a2 = ExtractElementF64 %1(s64), 0 + JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0 + %2:fprb(s64) = COPY $d0 + ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + $d0 = COPY %2(s64) + RetRA implicit $d0 + +... + diff --git a/test/CodeGen/Mips/GlobalISel/llvm-ir/float_args.ll b/test/CodeGen/Mips/GlobalISel/llvm-ir/float_args.ll new file mode 100644 index 000000000000..e46b7e64acd2 --- /dev/null +++ b/test/CodeGen/Mips/GlobalISel/llvm-ir/float_args.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32,FP32 +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32,FP64 + +define float @float_in_fpr(float %a, float %b) { +; MIPS32-LABEL: float_in_fpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mov.s $f0, $f14 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + ret float %b +} + +define double @double_in_fpr(double %a, double %b) { +; MIPS32-LABEL: double_in_fpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mov.d $f0, $f14 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + ret double %b +} + +define float @float_in_gpr(i32 %a, float %b) { +; MIPS32-LABEL: float_in_gpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mtc1 $5, $f0 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + ret float %b +} + +define double @double_in_gpr(i32 %a, double %b) { +; FP32-LABEL: double_in_gpr: +; FP32: # %bb.0: # %entry +; FP32-NEXT: mtc1 $6, $f0 +; FP32-NEXT: mtc1 $7, $f1 +; FP32-NEXT: jr $ra +; FP32-NEXT: nop +; +; FP64-LABEL: double_in_gpr: +; FP64: # %bb.0: # %entry +; FP64-NEXT: mtc1 $6, $f0 +; FP64-NEXT: mthc1 $7, $f0 +; FP64-NEXT: jr $ra +; FP64-NEXT: nop +entry: + ret double %b +} + +define float @call_float_in_fpr(float %a, float %b) { +; MIPS32-LABEL: call_float_in_fpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: jal float_in_fpr +; MIPS32-NEXT: nop +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: addiu $sp, $sp, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %call = call float @float_in_fpr(float %a, float %b) + ret float %call +} + +define double @call_double_in_fpr(double %a, double %b) { +; MIPS32-LABEL: call_double_in_fpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: jal double_in_fpr +; MIPS32-NEXT: nop +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: addiu $sp, $sp, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %call = call double @double_in_fpr(double %a, double %b) + ret double %call +} + +define float @call_float_in_gpr(i32 %a, float %b) { +; MIPS32-LABEL: call_float_in_gpr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: mtc1 $5, $f0 +; MIPS32-NEXT: mfc1 $5, $f0 +; MIPS32-NEXT: jal float_in_gpr +; MIPS32-NEXT: nop +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: addiu $sp, $sp, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %call = call float @float_in_gpr(i32 %a, float %b) + ret float %call +} + + +define double @call_double_in_gpr(i32 %a, double %b) { +; FP32-LABEL: call_double_in_gpr: +; FP32: # %bb.0: # %entry +; FP32-NEXT: addiu $sp, $sp, -24 +; FP32-NEXT: .cfi_def_cfa_offset 24 +; FP32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; FP32-NEXT: .cfi_offset 31, -4 +; FP32-NEXT: mtc1 $6, $f0 +; FP32-NEXT: mtc1 $7, $f1 +; FP32-NEXT: mfc1 $7, $f1 +; FP32-NEXT: mfc1 $6, $f0 +; FP32-NEXT: jal double_in_gpr +; FP32-NEXT: nop +; FP32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; FP32-NEXT: addiu $sp, $sp, 24 +; FP32-NEXT: jr $ra +; FP32-NEXT: nop +; +; FP64-LABEL: call_double_in_gpr: +; FP64: # %bb.0: # %entry +; FP64-NEXT: addiu $sp, $sp, -24 +; FP64-NEXT: .cfi_def_cfa_offset 24 +; FP64-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; FP64-NEXT: .cfi_offset 31, -4 +; FP64-NEXT: mtc1 $6, $f0 +; FP64-NEXT: mthc1 $7, $f0 +; FP64-NEXT: mfhc1 $7, $f0 +; FP64-NEXT: mfc1 $6, $f0 +; FP64-NEXT: jal double_in_gpr +; FP64-NEXT: nop +; FP64-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; FP64-NEXT: addiu $sp, $sp, 24 +; FP64-NEXT: jr $ra +; FP64-NEXT: nop +entry: + %call = call double @double_in_gpr(i32 %a, double %b) + ret double %call +} From b05515fb57086e1884321d9a62241410e2556d51 Mon Sep 17 00:00:00 2001 From: George Rimar Date: Mon, 25 Mar 2019 12:34:25 +0000 Subject: [PATCH 14/27] [llvm-objcopy] - Refactor the code. NFC. The idea of the patch is about to move out the code to a new helper static functions (to reduce the size of 'handleArgs' and to isolate the parts of it's logic). Differential revision: https://reviews.llvm.org/D59762 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356889 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-objcopy/ELF/ELFObjcopy.cpp | 202 ++++++++++++++------------ 1 file changed, 106 insertions(+), 96 deletions(-) diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index fc1ac38ec872..1ae802ff14b3 100644 --- a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -298,110 +298,94 @@ static bool isUnneededSymbol(const Symbol &Sym) { Sym.Type != STT_FILE && Sym.Type != STT_SECTION; } -// This function handles the high level operations of GNU objcopy including -// handling command line options. It's important to outline certain properties -// we expect to hold of the command line operations. Any operation that "keeps" -// should keep regardless of a remove. Additionally any removal should respect -// any previous removals. Lastly whether or not something is removed shouldn't -// depend a) on the order the options occur in or b) on some opaque priority -// system. The only priority is that keeps/copies overrule removes. -static Error handleArgs(const CopyConfig &Config, Object &Obj, - const Reader &Reader, ElfType OutputElfType) { - - if (!Config.SplitDWO.empty()) - if (Error E = - splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType)) - return E; - - if (Config.OutputArch) { - Obj.Machine = Config.OutputArch.getValue().EMachine; - Obj.OSABI = Config.OutputArch.getValue().OSABI; - } - +static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { // TODO: update or remove symbols only if there is an option that affects // them. - if (Obj.SymbolTable) { - Obj.SymbolTable->updateSymbols([&](Symbol &Sym) { - // Common and undefined symbols don't make sense as local symbols, and can - // even cause crashes if we localize those, so skip them. - if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && - ((Config.LocalizeHidden && - (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || - is_contained(Config.SymbolsToLocalize, Sym.Name))) - Sym.Binding = STB_LOCAL; - - // Note: these two globalize flags have very similar names but different - // meanings: - // - // --globalize-symbol: promote a symbol to global - // --keep-global-symbol: all symbols except for these should be made local - // - // If --globalize-symbol is specified for a given symbol, it will be - // global in the output file even if it is not included via - // --keep-global-symbol. Because of that, make sure to check - // --globalize-symbol second. - if (!Config.SymbolsToKeepGlobal.empty() && - !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_LOCAL; - - if (is_contained(Config.SymbolsToGlobalize, Sym.Name) && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_GLOBAL; - - if (is_contained(Config.SymbolsToWeaken, Sym.Name) && - Sym.Binding == STB_GLOBAL) - Sym.Binding = STB_WEAK; - - if (Config.Weaken && Sym.Binding == STB_GLOBAL && - Sym.getShndx() != SHN_UNDEF) - Sym.Binding = STB_WEAK; - - const auto I = Config.SymbolsToRename.find(Sym.Name); - if (I != Config.SymbolsToRename.end()) - Sym.Name = I->getValue(); - - if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION) - Sym.Name = (Config.SymbolsPrefix + Sym.Name).str(); - }); - - // The purpose of this loop is to mark symbols referenced by sections - // (like GroupSection or RelocationSection). This way, we know which - // symbols are still 'needed' and which are not. - if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty()) { - for (auto &Section : Obj.sections()) - Section.markSymbols(); - } + if (!Obj.SymbolTable) + return Error::success(); + + Obj.SymbolTable->updateSymbols([&](Symbol &Sym) { + // Common and undefined symbols don't make sense as local symbols, and can + // even cause crashes if we localize those, so skip them. + if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && + ((Config.LocalizeHidden && + (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || + is_contained(Config.SymbolsToLocalize, Sym.Name))) + Sym.Binding = STB_LOCAL; + + // Note: these two globalize flags have very similar names but different + // meanings: + // + // --globalize-symbol: promote a symbol to global + // --keep-global-symbol: all symbols except for these should be made local + // + // If --globalize-symbol is specified for a given symbol, it will be + // global in the output file even if it is not included via + // --keep-global-symbol. Because of that, make sure to check + // --globalize-symbol second. + if (!Config.SymbolsToKeepGlobal.empty() && + !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_LOCAL; + + if (is_contained(Config.SymbolsToGlobalize, Sym.Name) && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_GLOBAL; + + if (is_contained(Config.SymbolsToWeaken, Sym.Name) && + Sym.Binding == STB_GLOBAL) + Sym.Binding = STB_WEAK; + + if (Config.Weaken && Sym.Binding == STB_GLOBAL && + Sym.getShndx() != SHN_UNDEF) + Sym.Binding = STB_WEAK; + + const auto I = Config.SymbolsToRename.find(Sym.Name); + if (I != Config.SymbolsToRename.end()) + Sym.Name = I->getValue(); + + if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION) + Sym.Name = (Config.SymbolsPrefix + Sym.Name).str(); + }); + + // The purpose of this loop is to mark symbols referenced by sections + // (like GroupSection or RelocationSection). This way, we know which + // symbols are still 'needed' and which are not. + if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty()) { + for (auto &Section : Obj.sections()) + Section.markSymbols(); + } - auto RemoveSymbolsPred = [&](const Symbol &Sym) { - if (is_contained(Config.SymbolsToKeep, Sym.Name) || - (Config.KeepFileSymbols && Sym.Type == STT_FILE)) - return false; + auto RemoveSymbolsPred = [&](const Symbol &Sym) { + if (is_contained(Config.SymbolsToKeep, Sym.Name) || + (Config.KeepFileSymbols && Sym.Type == STT_FILE)) + return false; - if ((Config.DiscardMode == DiscardType::All || - (Config.DiscardMode == DiscardType::Locals && - StringRef(Sym.Name).startswith(".L"))) && - Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF && - Sym.Type != STT_FILE && Sym.Type != STT_SECTION) - return true; + if ((Config.DiscardMode == DiscardType::All || + (Config.DiscardMode == DiscardType::Locals && + StringRef(Sym.Name).startswith(".L"))) && + Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF && + Sym.Type != STT_FILE && Sym.Type != STT_SECTION) + return true; - if (Config.StripAll || Config.StripAllGNU) - return true; + if (Config.StripAll || Config.StripAllGNU) + return true; - if (is_contained(Config.SymbolsToRemove, Sym.Name)) - return true; + if (is_contained(Config.SymbolsToRemove, Sym.Name)) + return true; - if ((Config.StripUnneeded || - is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) && - isUnneededSymbol(Sym)) - return true; + if ((Config.StripUnneeded || + is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) && + isUnneededSymbol(Sym)) + return true; - return false; - }; - if (Error E = Obj.removeSymbols(RemoveSymbolsPred)) - return E; - } + return false; + }; + + return Obj.removeSymbols(RemoveSymbolsPred); +} +static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { SectionPred RemovePred = [](const SectionBase &) { return false; }; // Removes: @@ -535,7 +519,33 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, return &Obj.addSection(*CS); }); - if (Error E = Obj.removeSections(RemovePred)) + return Obj.removeSections(RemovePred); +} + +// This function handles the high level operations of GNU objcopy including +// handling command line options. It's important to outline certain properties +// we expect to hold of the command line operations. Any operation that "keeps" +// should keep regardless of a remove. Additionally any removal should respect +// any previous removals. Lastly whether or not something is removed shouldn't +// depend a) on the order the options occur in or b) on some opaque priority +// system. The only priority is that keeps/copies overrule removes. +static Error handleArgs(const CopyConfig &Config, Object &Obj, + const Reader &Reader, ElfType OutputElfType) { + + if (!Config.SplitDWO.empty()) + if (Error E = + splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType)) + return E; + + if (Config.OutputArch) { + Obj.Machine = Config.OutputArch.getValue().EMachine; + Obj.OSABI = Config.OutputArch.getValue().OSABI; + } + + if (Error E = updateAndRemoveSymbols(Config, Obj)) + return E; + + if (Error E = replaceAndRemoveSections(Config, Obj)) return E; if (!Config.SectionsToRename.empty()) { From f694319233c7cda4ed5fe51f85121cff480a83a5 Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Mon, 25 Mar 2019 12:36:30 +0000 Subject: [PATCH 15/27] [clang-tidy] Separate the check-facing interface Summary: Move ClangTidyCheck to a separate header/.cpp Switch checks to #include "ClangTidyCheck.h" Mention ClangTidyCheck.h in the docs Reviewers: hokein, gribozavr, aaron.ballman Reviewed By: hokein Subscribers: mgorny, javed.absar, xazax.hun, arphaman, jdoerfert, llvm-commits, cfe-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D59714 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356890 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn index 2b0bb2b7c05d..415e0fc7f29d 100644 --- a/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn +++ b/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn @@ -28,6 +28,7 @@ static_library("clang-tidy") { sources = [ "ClangTidy.cpp", + "ClangTidyCheck.cpp", "ClangTidyDiagnosticConsumer.cpp", "ClangTidyModule.cpp", "ClangTidyOptions.cpp", From 198d1a1ac3845c0604ab32eb099dd86b7abb70dc Mon Sep 17 00:00:00 2001 From: Brock Wyma Date: Mon, 25 Mar 2019 13:50:26 +0000 Subject: [PATCH 16/27] [DebugInfo] IntelJitEventListener follow up for "add SectionedAddress ..." Following r354972 the Intel JIT Listener would not report line table information because the section indices did not match. There was a similar issue with the PerfJitEventListener. This change performs the section index lookup when building the object address used to query the line table information. Differential Revision: https://reviews.llvm.org/D59490 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356895 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../IntelJITEvents/IntelJITEventListener.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 2b89f9d16fae..1ebc820a8b49 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -141,15 +141,25 @@ void IntelJITEventListener::notifyObjectLoaded( uint64_t Addr = *AddrOrErr; uint64_t Size = P.second; + auto SecOrErr = Sym.getSection(); + if (!SecOrErr) { + // TODO: Actually report errors helpfully. + consumeError(SecOrErr.takeError()); + continue; + } + object::section_iterator Sec = *SecOrErr; + if (Sec == Obj.section_end()) + continue; + uint64_t Index = Sec->getIndex(); + // Record this address in a local vector Functions.push_back((void*)Addr); // Build the function loaded notification message iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size); - // TODO: it is neccessary to set proper SectionIndex here. - // object::SectionedAddress::UndefSection works for only absolute addresses. - DILineInfoTable Lines = Context->getLineInfoForAddressRange({Addr, object::SectionedAddress::UndefSection}, Size); + DILineInfoTable Lines = + Context->getLineInfoForAddressRange({Addr, Index}, Size); DILineInfoTable::iterator Begin = Lines.begin(); DILineInfoTable::iterator End = Lines.end(); for (DILineInfoTable::iterator It = Begin; It != End; ++It) { From dabd4d53f4e2ae51e4ff71501075f0896863178b Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Mon, 25 Mar 2019 14:28:32 +0000 Subject: [PATCH 17/27] [RegAlloc] Simplify MIR test Remove the IR part from test/CodeGen/X86/regalloc-copy-hints.mir (added by r355854). To make the test remain functional, the parts of the MBB names referring to BB names have been removed, as well as all machine memory operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356899 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/regalloc-copy-hints.mir | 458 ++++------------------- 1 file changed, 80 insertions(+), 378 deletions(-) diff --git a/test/CodeGen/X86/regalloc-copy-hints.mir b/test/CodeGen/X86/regalloc-copy-hints.mir index 6287066e64fe..3a5cd41b852f 100644 --- a/test/CodeGen/X86/regalloc-copy-hints.mir +++ b/test/CodeGen/X86/regalloc-copy-hints.mir @@ -3,310 +3,12 @@ # REQUIRES: asserts --- | - %0 = type { %1 } - %1 = type { %2, %23, %23*, %27*, %28*, %29, %33*, %34, %42, i8, i32, i32, i32 } - %2 = type { %3, %6, %14, %14, i8, i8*, i8*, %16 } - %3 = type { i32 (...)**, %4*, %5* } - %4 = type { i32 (...)**, %3* } - %5 = type { i32 (...)** } - %6 = type { %7 } - %7 = type { %8, i32, %12 } - %8 = type { %9**, %9**, %9**, %10 } - %9 = type { i32, i32, i32, i8* } - %10 = type { %11 } - %11 = type { %9** } - %12 = type { %13 } - %13 = type { i32 } - %14 = type { i32, %15* } - %15 = type { i32, i32, i8* } - %16 = type { %17 } - %17 = type { %18*, %20, %22 } - %18 = type { %19* } - %19 = type <{ %18, %19*, %18*, i8, [3 x i8] }> - %20 = type { %21 } - %21 = type { %18 } - %22 = type { %13 } - %23 = type { %24 } - %24 = type { %18*, %25, %26 } - %25 = type { %21 } - %26 = type { %13 } - %27 = type { i32 (...)** } - %28 = type { i32 (...)** } - %29 = type { %30 } - %30 = type { %18*, %31, %32 } - %31 = type { %21 } - %32 = type { %13 } - %33 = type { i32 (...)** } - %34 = type { %35 } - %35 = type { %36 } - %36 = type { %37, i32, %41 } - %37 = type { %38**, %38**, %38**, %39 } - %38 = type { %42, i32 } - %39 = type { %40 } - %40 = type { %38** } - %41 = type { %13 } - %42 = type { %43 } - %43 = type { %18*, %44, %45 } - %44 = type { %21 } - %45 = type { %13 } - %46 = type { %47, %48 } - %47 = type <{ %18, %19*, %18*, i8 }> - %48 = type { %49 } - %49 = type { i32, %50 } - %50 = type { { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 } } - - define void @fun(%0* %arg) local_unnamed_addr #0 align 2 personality i32 (...)* @__gxx_personality_v0 { - bb: - %tmp = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1 - %tmp1 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0 - br i1 undef, label %bb5, label %bb6 - - bb5: ; preds = %bb - unreachable - - bb6: ; preds = %bb - %tmp8 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 8, i32 0, i32 1, i32 0, i32 0 - br i1 undef, label %bb10, label %bb9 - - bb9: ; preds = %bb6 - unreachable - - bb10: ; preds = %bb6 - store %18* %tmp8, %18** undef - br i1 undef, label %bb14, label %bb13 - - bb13: ; preds = %bb10 - unreachable - - bb14: ; preds = %bb10 - br i1 undef, label %bb17, label %bb18 - - bb17: ; preds = %bb14 - unreachable - - bb18: ; preds = %bb14 - br i1 undef, label %bb20, label %bb19 - - bb19: ; preds = %bb18 - unreachable - - bb20: ; preds = %bb18 - br i1 undef, label %bb25, label %bb24 - - bb24: ; preds = %bb20 - unreachable - - bb25: ; preds = %bb20 - br i1 undef, label %bb29, label %bb30 - - bb29: ; preds = %bb25 - unreachable - - bb30: ; preds = %bb25 - br i1 undef, label %bb38, label %bb31 - - bb31: ; preds = %bb30 - %tmp32 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0 - br i1 undef, label %bb34, label %bb35 - - bb34: ; preds = %bb31 - unreachable - - bb35: ; preds = %bb31 - br i1 undef, label %bb40, label %bb36 - - bb36: ; preds = %bb35 - unreachable - - bb38: ; preds = %bb30 - %tmp391 = bitcast %18* %tmp1 to %19** - br label %bb40 - - bb40: ; preds = %bb35, %bb38 - %tmp41 = phi %18* [ %tmp1, %bb38 ], [ null, %bb35 ] - %tmp42 = phi %19** [ %tmp391, %bb38 ], [ %tmp32, %bb35 ] - br i1 undef, label %bb43, label %bb48 - - bb43: ; preds = %bb40 - %tmp44 = tail call i8* @_Znwj() - store %18* %tmp41, %18** undef - %tmp46 = bitcast %19** %tmp42 to i8** - store i8* %tmp44, i8** %tmp46 - %0 = bitcast i8* %tmp44 to %46* - tail call void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() - br label %bb48 - - bb48: ; preds = %bb43, %bb40 - %tmp49 = phi %46* [ %0, %bb43 ], [ undef, %bb40 ] - %tmp50 = getelementptr inbounds %46, %46* %tmp49, i32 0, i32 1, i32 0, i32 1, i32 4, i32 0 - store i32 ptrtoint (i1 (%0*)* @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv to i32), i32* %tmp50 - br i1 undef, label %bb52, label %bb53 - - bb52: ; preds = %bb48 - unreachable - - bb53: ; preds = %bb48 - br i1 undef, label %bb55, label %bb54 - - bb54: ; preds = %bb53 - unreachable - - bb55: ; preds = %bb53 - br i1 undef, label %bb59, label %bb58 - - bb58: ; preds = %bb55 - unreachable - - bb59: ; preds = %bb55 - br i1 undef, label %bb62, label %bb61 - - bb61: ; preds = %bb59 - unreachable - - bb62: ; preds = %bb59 - br i1 undef, label %bb64, label %bb65 - - bb64: ; preds = %bb62 - unreachable - - bb65: ; preds = %bb62 - %tmp66 = icmp eq %46* null, null - br i1 %tmp66, label %bb72, label %bb67 - - bb67: ; preds = %bb65 - %tmp68 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0 - br i1 undef, label %bb70, label %bb74 - - bb70: ; preds = %bb67 - unreachable - - bb72: ; preds = %bb65 - %tmp732 = bitcast %18* %tmp1 to %19** - br label %bb74 - - bb74: ; preds = %bb67, %bb72 - %tmp75 = phi %18* [ %tmp1, %bb72 ], [ null, %bb67 ] - %tmp76 = phi %19** [ %tmp732, %bb72 ], [ %tmp68, %bb67 ] - %tmp77 = tail call i8* @_Znwj() - store %18* %tmp75, %18** undef - %tmp79 = bitcast %19** %tmp76 to i8** - store i8* %tmp77, i8** %tmp79 - %1 = bitcast i8* %tmp77 to %46* - tail call void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() - %tmp81 = getelementptr inbounds %46, %46* %1, i32 0, i32 1, i32 0, i32 1, i32 2, i32 0 - store i32 ptrtoint (i1 (%0*)* @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv to i32), i32* %tmp81 - store %18* %tmp8, %18** undef - %2 = bitcast %0* %arg to i8* - %sunkaddr = getelementptr i8, i8* %2, i32 140 - %3 = bitcast i8* %sunkaddr to %18** - %tmp85 = load %18*, %18** %3 - %tmp864 = bitcast %18* %tmp85 to %19** - %tmp87 = load %19*, %19** %tmp864 - %tmp88 = icmp eq %19* %tmp87, null - br i1 %tmp88, label %bb90, label %bb89 - - bb89: ; preds = %bb74 - unreachable - - bb90: ; preds = %bb74 - br i1 undef, label %bb94, label %bb92 - - bb92: ; preds = %bb90 - br i1 undef, label %bb96, label %bb97 - - bb94: ; preds = %bb90 - unreachable - - bb96: ; preds = %bb92 - unreachable - - bb97: ; preds = %bb92 - br i1 undef, label %bb101, label %bb102 - - bb101: ; preds = %bb97 - unreachable - - bb102: ; preds = %bb97 - br i1 undef, label %bb104, label %bb103 - - bb103: ; preds = %bb102 - unreachable - - bb104: ; preds = %bb102 - br i1 undef, label %bb109, label %bb108 - - bb108: ; preds = %bb104 - unreachable - - bb109: ; preds = %bb104 - br i1 undef, label %bb111, label %bb112 - - bb111: ; preds = %bb109 - unreachable - - bb112: ; preds = %bb109 - br i1 undef, label %bb118, label %bb117 - - bb117: ; preds = %bb112 - unreachable - - bb118: ; preds = %bb112 - br i1 undef, label %bb120, label %bb121 - - bb120: ; preds = %bb118 - unreachable - - bb121: ; preds = %bb118 - br i1 undef, label %bb124, label %bb125 - - bb124: ; preds = %bb121 - unreachable - - bb125: ; preds = %bb121 - %4 = bitcast %18* %tmp1 to %46** - %tmp126 = load %46*, %46** %4 - %tmp127 = icmp eq %46* %tmp126, null - br i1 %tmp127, label %bb135, label %bb128 - - bb128: ; preds = %bb125 - br label %bb129 - - bb129: ; preds = %bb131, %bb128 - %tmp130 = icmp ugt i32 undef, 95406324 - br i1 %tmp130, label %bb131, label %bb133 - - bb131: ; preds = %bb129 - br label %bb129 - - bb133: ; preds = %bb129 - unreachable - - bb135: ; preds = %bb125 - br i1 undef, label %bb137, label %bb138 - - bb137: ; preds = %bb135 - unreachable - - bb138: ; preds = %bb135 - unreachable - } - - declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv(%0*) #0 - - declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv(%0*) #0 align 2 - - declare i32 @__gxx_personality_v0(...) #0 - - declare noalias nonnull i8* @_Znwj() local_unnamed_addr #0 - - declare void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() local_unnamed_addr #0 - - ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #1 - - attributes #0 = { "target-cpu"="i486" } - attributes #1 = { nounwind } + define void @fun() { ret void } + declare noalias nonnull i8* @_Znwj() + declare void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() + declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv() + declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv() ... --- # A physreg should always only be hinted once per getRegAllocationHints() query. @@ -405,7 +107,7 @@ frameInfo: fixedStack: - { id: 0, size: 4, alignment: 4, stack-id: 0, isImmutable: true } body: | - bb.0.bb: + bb.0: successors: %bb.1(0x00000001), %bb.2(0x7fffffff) %13:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -413,11 +115,11 @@ body: | JNE_1 %bb.2, implicit killed $eflags JMP_1 %bb.1 - bb.1.bb5: + bb.1: successors: - bb.2.bb6: + bb.2: successors: %bb.4(0x7fffffff), %bb.3(0x00000001) %15:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -425,26 +127,26 @@ body: | JNE_1 %bb.4, implicit killed $eflags JMP_1 %bb.3 - bb.3.bb9: + bb.3: successors: - bb.4.bb10: + bb.4: successors: %bb.6(0x7fffffff), %bb.5(0x00000001) - %12:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) + %12:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg %1:gr32 = LEA32r %12, 1, $noreg, 144, $noreg - MOV32mr undef %17:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `%18** undef`) + MOV32mr undef %17:gr32, 1, $noreg, 0, $noreg, %1 %18:gr32_abcd = MOV32r0 implicit-def dead $eflags TEST8rr %18.sub_8bit, %18.sub_8bit, implicit-def $eflags JNE_1 %bb.6, implicit killed $eflags JMP_1 %bb.5 - bb.5.bb13: + bb.5: successors: - bb.6.bb14: + bb.6: successors: %bb.7(0x00000001), %bb.8(0x7fffffff) %20:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -452,11 +154,11 @@ body: | JNE_1 %bb.8, implicit killed $eflags JMP_1 %bb.7 - bb.7.bb17: + bb.7: successors: - bb.8.bb18: + bb.8: successors: %bb.10(0x7fffffff), %bb.9(0x00000001) %22:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -464,11 +166,11 @@ body: | JNE_1 %bb.10, implicit killed $eflags JMP_1 %bb.9 - bb.9.bb19: + bb.9: successors: - bb.10.bb20: + bb.10: successors: %bb.12(0x7fffffff), %bb.11(0x00000001) %24:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -476,11 +178,11 @@ body: | JNE_1 %bb.12, implicit killed $eflags JMP_1 %bb.11 - bb.11.bb24: + bb.11: successors: - bb.12.bb25: + bb.12: successors: %bb.13(0x00000001), %bb.14(0x7fffffff) %26:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -488,18 +190,18 @@ body: | JNE_1 %bb.14, implicit killed $eflags JMP_1 %bb.13 - bb.13.bb29: + bb.13: successors: - bb.14.bb30: + bb.14: %0:gr32 = LEA32r %12, 1, $noreg, 80, $noreg %28:gr32_abcd = MOV32r0 implicit-def dead $eflags TEST8rr %28.sub_8bit, %28.sub_8bit, implicit-def $eflags JNE_1 %bb.20, implicit killed $eflags JMP_1 %bb.15 - bb.15.bb31: + bb.15: successors: %bb.16(0x00000001), %bb.17(0x7fffffff) %78:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -507,11 +209,11 @@ body: | JNE_1 %bb.17, implicit killed $eflags JMP_1 %bb.16 - bb.16.bb34: + bb.16: successors: - bb.17.bb35: + bb.17: successors: %bb.18(0x7fffffff), %bb.19(0x00000001) TEST8rr %78.sub_8bit, %78.sub_8bit, implicit-def $eflags @@ -521,15 +223,15 @@ body: | %79:gr32 = LEA32r %12, 1, $noreg, 80, $noreg JMP_1 %bb.21 - bb.19.bb36: + bb.19: successors: - bb.20.bb38: + bb.20: %78:gr32_abcd = COPY %0 %79:gr32 = COPY %0 - bb.21.bb40: + bb.21: successors: %bb.22, %bb.23 %35:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -538,31 +240,31 @@ body: | JNE_1 %bb.23, implicit killed $eflags JMP_1 %bb.22 - bb.22.bb43: + bb.22: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CALLpcrel32 @_Znwj, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def $eax ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp %80:gr32 = COPY killed $eax - MOV32mr undef %38:gr32, 1, $noreg, 0, $noreg, %78 :: (store 4 into `%18** undef`) - MOV32mr %79, 1, $noreg, 0, $noreg, %80 :: (store 4 into %ir.tmp46) + MOV32mr undef %38:gr32, 1, $noreg, 0, $noreg, %78 + MOV32mr %79, 1, $noreg, 0, $noreg, %80 ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CALLpcrel32 @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp - bb.23.bb48: + bb.23: successors: %bb.24(0x00000001), %bb.25(0x7fffffff) - MOV32mi %80, 1, $noreg, 52, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv :: (store 4 into %ir.tmp50) + MOV32mi %80, 1, $noreg, 52, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv %39:gr32_abcd = MOV32r0 implicit-def dead $eflags TEST8rr %39.sub_8bit, %39.sub_8bit, implicit-def $eflags JNE_1 %bb.25, implicit killed $eflags JMP_1 %bb.24 - bb.24.bb52: + bb.24: successors: - bb.25.bb53: + bb.25: successors: %bb.27(0x7fffffff), %bb.26(0x00000001) %41:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -570,11 +272,11 @@ body: | JNE_1 %bb.27, implicit killed $eflags JMP_1 %bb.26 - bb.26.bb54: + bb.26: successors: - bb.27.bb55: + bb.27: successors: %bb.29(0x7fffffff), %bb.28(0x00000001) %43:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -582,11 +284,11 @@ body: | JNE_1 %bb.29, implicit killed $eflags JMP_1 %bb.28 - bb.28.bb58: + bb.28: successors: - bb.29.bb59: + bb.29: successors: %bb.31(0x7fffffff), %bb.30(0x00000001) %45:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -594,11 +296,11 @@ body: | JNE_1 %bb.31, implicit killed $eflags JMP_1 %bb.30 - bb.30.bb61: + bb.30: successors: - bb.31.bb62: + bb.31: successors: %bb.32(0x00000001), %bb.33(0x7fffffff) %47:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -606,11 +308,11 @@ body: | JNE_1 %bb.33, implicit killed $eflags JMP_1 %bb.32 - bb.32.bb64: + bb.32: successors: - bb.33.bb65: + bb.33: successors: %bb.37(0x30000000), %bb.34(0x50000000) %49:gr8 = MOV8ri 1 @@ -618,7 +320,7 @@ body: | JNE_1 %bb.37, implicit killed $eflags JMP_1 %bb.34 - bb.34.bb67: + bb.34: successors: %bb.36(0x00000001), %bb.35(0x7fffffff) %81:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -629,38 +331,38 @@ body: | %82:gr32 = LEA32r %12, 1, $noreg, 80, $noreg JMP_1 %bb.38 - bb.36.bb70: + bb.36: successors: - bb.37.bb72: + bb.37: %81:gr32_abcd = COPY %0 %82:gr32 = COPY %0 - bb.38.bb74: + bb.38: successors: %bb.40(0x7fffffff), %bb.39(0x00000001) ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CALLpcrel32 @_Znwj, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def $eax ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp %52:gr32 = COPY killed $eax - MOV32mr undef %53:gr32, 1, $noreg, 0, $noreg, %81 :: (store 4 into `%18** undef`) - MOV32mr %82, 1, $noreg, 0, $noreg, %52 :: (store 4 into %ir.tmp79) + MOV32mr undef %53:gr32, 1, $noreg, 0, $noreg, %81 + MOV32mr %82, 1, $noreg, 0, $noreg, %52 ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CALLpcrel32 @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp - MOV32mi %52, 1, $noreg, 36, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv :: (store 4 into %ir.tmp81) - MOV32mr undef %54:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `%18** undef`) - %55:gr32 = MOV32rm %12, 1, $noreg, 140, $noreg :: (load 4 from %ir.3) - CMP32mi8 %55, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.tmp864) + MOV32mi %52, 1, $noreg, 36, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv + MOV32mr undef %54:gr32, 1, $noreg, 0, $noreg, %1 + %55:gr32 = MOV32rm %12, 1, $noreg, 140, $noreg + CMP32mi8 %55, 1, $noreg, 0, $noreg, 0, implicit-def $eflags JE_1 %bb.40, implicit killed $eflags JMP_1 %bb.39 - bb.39.bb89: + bb.39: successors: - bb.40.bb90: + bb.40: successors: %bb.42(0x00000001), %bb.41(0x7fffffff) %56:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -668,7 +370,7 @@ body: | JNE_1 %bb.42, implicit killed $eflags JMP_1 %bb.41 - bb.41.bb92: + bb.41: successors: %bb.43(0x00000001), %bb.44(0x7fffffff) %58:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -676,15 +378,15 @@ body: | JNE_1 %bb.43, implicit killed $eflags JMP_1 %bb.44 - bb.42.bb94: + bb.42: successors: - bb.43.bb96: + bb.43: successors: - bb.44.bb97: + bb.44: successors: %bb.45(0x00000001), %bb.46(0x7fffffff) %60:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -692,11 +394,11 @@ body: | JNE_1 %bb.46, implicit killed $eflags JMP_1 %bb.45 - bb.45.bb101: + bb.45: successors: - bb.46.bb102: + bb.46: successors: %bb.48(0x7fffffff), %bb.47(0x00000001) %62:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -704,11 +406,11 @@ body: | JNE_1 %bb.48, implicit killed $eflags JMP_1 %bb.47 - bb.47.bb103: + bb.47: successors: - bb.48.bb104: + bb.48: successors: %bb.50(0x7fffffff), %bb.49(0x00000001) %64:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -716,11 +418,11 @@ body: | JNE_1 %bb.50, implicit killed $eflags JMP_1 %bb.49 - bb.49.bb108: + bb.49: successors: - bb.50.bb109: + bb.50: successors: %bb.51(0x00000001), %bb.52(0x7fffffff) %66:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -728,11 +430,11 @@ body: | JNE_1 %bb.52, implicit killed $eflags JMP_1 %bb.51 - bb.51.bb111: + bb.51: successors: - bb.52.bb112: + bb.52: successors: %bb.54(0x7fffffff), %bb.53(0x00000001) %68:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -740,11 +442,11 @@ body: | JNE_1 %bb.54, implicit killed $eflags JMP_1 %bb.53 - bb.53.bb117: + bb.53: successors: - bb.54.bb118: + bb.54: successors: %bb.55(0x00000001), %bb.56(0x7fffffff) %70:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -752,11 +454,11 @@ body: | JNE_1 %bb.56, implicit killed $eflags JMP_1 %bb.55 - bb.55.bb120: + bb.55: successors: - bb.56.bb121: + bb.56: successors: %bb.57(0x00000001), %bb.58(0x7fffffff) %72:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -764,31 +466,31 @@ body: | JNE_1 %bb.58, implicit killed $eflags JMP_1 %bb.57 - bb.57.bb124: + bb.57: successors: - bb.58.bb125: + bb.58: successors: %bb.62(0x00000001), %bb.59(0x7fffffff) - CMP32mi8 %0, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.4) + CMP32mi8 %0, 1, $noreg, 0, $noreg, 0, implicit-def $eflags JE_1 %bb.62, implicit killed $eflags JMP_1 %bb.59 - bb.59.bb128: + bb.59: - bb.60.bb129: + bb.60: successors: %bb.60(0x7fffffff), %bb.61(0x00000001) CMP32ri undef %75:gr32, 95406325, implicit-def $eflags JB_1 %bb.61, implicit killed $eflags JMP_1 %bb.60 - bb.61.bb133: + bb.61: successors: - bb.62.bb135: + bb.62: successors: %bb.63, %bb.64 %76:gr32_abcd = MOV32r0 implicit-def dead $eflags @@ -796,10 +498,10 @@ body: | JNE_1 %bb.64, implicit killed $eflags JMP_1 %bb.63 - bb.63.bb137: + bb.63: successors: - bb.64.bb138: + bb.64: ... From af815f178f79d0460951e952b2db3e094e701d49 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 25 Mar 2019 14:45:31 +0000 Subject: [PATCH 18/27] MinidumpYAML.cpp: Fix some code standard violations missed during review functions should begin with lower case letters. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356901 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ObjectYAML/MinidumpYAML.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/ObjectYAML/MinidumpYAML.cpp b/lib/ObjectYAML/MinidumpYAML.cpp index e578e0591f90..bd017c82fb4d 100644 --- a/lib/ObjectYAML/MinidumpYAML.cpp +++ b/lib/ObjectYAML/MinidumpYAML.cpp @@ -17,7 +17,7 @@ class BlobAllocator { public: size_t tell() const { return NextOffset; } - size_t AllocateCallback(size_t Size, + size_t allocateCallback(size_t Size, std::function Callback) { size_t Offset = NextOffset; NextOffset += Size; @@ -25,18 +25,18 @@ class BlobAllocator { return Offset; } - size_t AllocateBytes(ArrayRef Data) { - return AllocateCallback( + size_t allocateBytes(ArrayRef Data) { + return allocateCallback( Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); }); } - template size_t AllocateArray(ArrayRef Data) { - return AllocateBytes({reinterpret_cast(Data.data()), + template size_t allocateArray(ArrayRef Data) { + return allocateBytes({reinterpret_cast(Data.data()), sizeof(T) * Data.size()}); } - template size_t AllocateObject(const T &Data) { - return AllocateArray(makeArrayRef(Data)); + template size_t allocateObject(const T &Data) { + return allocateArray(makeArrayRef(Data)); } void writeTo(raw_ostream &OS) const; @@ -340,7 +340,7 @@ static Directory layout(BlobAllocator &File, Stream &S) { switch (S.Kind) { case Stream::StreamKind::RawContent: { RawContentStream &Raw = cast(S); - File.AllocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { + File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { Raw.Content.writeAsBinary(OS); assert(Raw.Content.binary_size() <= Raw.Size); OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); @@ -348,10 +348,10 @@ static Directory layout(BlobAllocator &File, Stream &S) { break; } case Stream::StreamKind::SystemInfo: - File.AllocateObject(cast(S).Info); + File.allocateObject(cast(S).Info); break; case Stream::StreamKind::TextContent: - File.AllocateArray(arrayRefFromStringRef(cast(S).Text)); + File.allocateArray(arrayRefFromStringRef(cast(S).Text)); break; } Result.Location.DataSize = File.tell() - Result.Location.RVA; @@ -360,11 +360,11 @@ static Directory layout(BlobAllocator &File, Stream &S) { void MinidumpYAML::writeAsBinary(Object &Obj, raw_ostream &OS) { BlobAllocator File; - File.AllocateObject(Obj.Header); + File.allocateObject(Obj.Header); std::vector StreamDirectory(Obj.Streams.size()); Obj.Header.StreamDirectoryRVA = - File.AllocateArray(makeArrayRef(StreamDirectory)); + File.allocateArray(makeArrayRef(StreamDirectory)); Obj.Header.NumberOfStreams = StreamDirectory.size(); for (auto &Stream : enumerate(Obj.Streams)) From c72a66877f345149757c39edc1de228d9f287437 Mon Sep 17 00:00:00 2001 From: Serge Guelton Date: Mon, 25 Mar 2019 15:14:15 +0000 Subject: [PATCH 19/27] Python 2/3 compat: queue vs Queue Differential Revision: https://reviews.llvm.org/D59590 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356905 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-config/llvm-config.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/llvm-config/llvm-config.cpp b/tools/llvm-config/llvm-config.cpp index 7ef7c46a2627..25f1c28e5f0c 100644 --- a/tools/llvm-config/llvm-config.cpp +++ b/tools/llvm-config/llvm-config.cpp @@ -268,7 +268,6 @@ int main(int argc, char **argv) { // tree. bool IsInDevelopmentTree; enum { CMakeStyle, CMakeBuildModeStyle } DevelopmentTreeLayout; - llvm::SmallString<256> CurrentPath(GetExecutablePath(argv[0])); std::string CurrentExecPrefix; std::string ActiveObjRoot; @@ -279,11 +278,18 @@ int main(int argc, char **argv) { build_mode = CMAKE_CFG_INTDIR; #endif - // Create an absolute path, and pop up one directory (we expect to be inside a - // bin dir). - sys::fs::make_absolute(CurrentPath); - CurrentExecPrefix = - sys::path::parent_path(sys::path::parent_path(CurrentPath)).str(); + // Create an absolute path, and pop up as much directory as in + // LLVM_TOOLS_INSTALL_DIR + { + llvm::SmallString<256> CurrentPath(GetExecutablePath(argv[0])); + sys::fs::make_absolute(CurrentPath); + for (auto iter = sys::path::begin(LLVM_TOOLS_INSTALL_DIR), + end = sys::path::end(LLVM_TOOLS_INSTALL_DIR); + iter != end; ++iter) { + CurrentPath = sys::path::parent_path(CurrentPath).str(); + } + CurrentExecPrefix = sys::path::parent_path(CurrentPath).str(); + } // Check to see if we are inside a development tree by comparing to possible // locations (prefix style or CMake style). From 8ee86696d74ac7955b3998bb4d5362f3ac8e3134 Mon Sep 17 00:00:00 2001 From: Serge Guelton Date: Mon, 25 Mar 2019 15:18:55 +0000 Subject: [PATCH 20/27] Revert 356905 Commited from wrong directory... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356907 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-config/llvm-config.cpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/tools/llvm-config/llvm-config.cpp b/tools/llvm-config/llvm-config.cpp index 25f1c28e5f0c..7ef7c46a2627 100644 --- a/tools/llvm-config/llvm-config.cpp +++ b/tools/llvm-config/llvm-config.cpp @@ -268,6 +268,7 @@ int main(int argc, char **argv) { // tree. bool IsInDevelopmentTree; enum { CMakeStyle, CMakeBuildModeStyle } DevelopmentTreeLayout; + llvm::SmallString<256> CurrentPath(GetExecutablePath(argv[0])); std::string CurrentExecPrefix; std::string ActiveObjRoot; @@ -278,18 +279,11 @@ int main(int argc, char **argv) { build_mode = CMAKE_CFG_INTDIR; #endif - // Create an absolute path, and pop up as much directory as in - // LLVM_TOOLS_INSTALL_DIR - { - llvm::SmallString<256> CurrentPath(GetExecutablePath(argv[0])); - sys::fs::make_absolute(CurrentPath); - for (auto iter = sys::path::begin(LLVM_TOOLS_INSTALL_DIR), - end = sys::path::end(LLVM_TOOLS_INSTALL_DIR); - iter != end; ++iter) { - CurrentPath = sys::path::parent_path(CurrentPath).str(); - } - CurrentExecPrefix = sys::path::parent_path(CurrentPath).str(); - } + // Create an absolute path, and pop up one directory (we expect to be inside a + // bin dir). + sys::fs::make_absolute(CurrentPath); + CurrentExecPrefix = + sys::path::parent_path(sys::path::parent_path(CurrentPath)).str(); // Check to see if we are inside a development tree by comparing to possible // locations (prefix style or CMake style). From ce23689b40c282c4f851d485beb6187e9c51e13a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 Mar 2019 15:53:55 +0000 Subject: [PATCH 21/27] [SLPVectorizer] reorderInputsAccordingToOpcode - remove non-Instruction canonicalization Remove attempts to commute non-Instructions to the LHS - the codegen changes appear to rely on chance more than anything else and also have a tendency to fight existing instcombine canonicalization which moves constants to the RHS of commutable binary ops. This is prep work towards: (a) reusing reorderInputsAccordingToOpcode for alt-shuffles and removing the similar reorderAltShuffleOperands (b) improving reordering to optimized cases with commutable and non-commutable instructions to still find splat/consecutive ops. Differential Revision: https://reviews.llvm.org/D59738 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356913 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 9 +--- .../LoopVectorize/X86/metadata-enable.ll | 24 ++++----- .../SLPVectorizer/AArch64/ext-trunc.ll | 2 +- .../SLPVectorizer/AArch64/gather-cost.ll | 2 +- .../SLPVectorizer/AArch64/getelementptr.ll | 20 ++++---- test/Transforms/SLPVectorizer/NVPTX/v2f16.ll | 4 +- .../Transforms/SLPVectorizer/X86/PR35628_2.ll | 2 +- test/Transforms/SLPVectorizer/X86/PR35777.ll | 2 +- test/Transforms/SLPVectorizer/X86/PR39774.ll | 4 +- test/Transforms/SLPVectorizer/X86/PR40310.ll | 2 +- .../SLPVectorizer/X86/barriercall.ll | 2 +- .../SLPVectorizer/X86/commutativity.ll | 2 +- .../SLPVectorizer/X86/compare-reduce.ll | 4 +- .../SLPVectorizer/X86/crash_cmpop.ll | 4 +- .../SLPVectorizer/X86/crash_mandeltext.ll | 2 +- .../SLPVectorizer/X86/crash_smallpt.ll | 8 +-- .../SLPVectorizer/X86/cross_block_slp.ll | 2 +- test/Transforms/SLPVectorizer/X86/cse.ll | 25 ++++----- .../Transforms/SLPVectorizer/X86/cycle_dup.ll | 2 +- .../SLPVectorizer/X86/external_user.ll | 6 +-- test/Transforms/SLPVectorizer/X86/extract.ll | 6 +-- .../SLPVectorizer/X86/extractcost.ll | 2 +- test/Transforms/SLPVectorizer/X86/hoist.ll | 2 +- .../SLPVectorizer/X86/horizontal.ll | 4 +- .../SLPVectorizer/X86/in-tree-user.ll | 4 +- .../SLPVectorizer/X86/insert-after-bundle.ll | 34 ++++++------- .../X86/insert-element-build-vector.ll | 4 +- .../SLPVectorizer/X86/long_chains.ll | 12 ++--- .../SLPVectorizer/X86/loopinvariant.ll | 2 +- .../SLPVectorizer/X86/multi_block.ll | 4 +- .../SLPVectorizer/X86/multi_user.ll | 2 +- .../SLPVectorizer/X86/operandorder.ll | 2 +- test/Transforms/SLPVectorizer/X86/phi.ll | 51 +++++++++---------- test/Transforms/SLPVectorizer/X86/pr35497.ll | 4 +- .../SLPVectorizer/X86/propagate_ir_flags.ll | 16 +++--- .../Transforms/SLPVectorizer/X86/reduction.ll | 2 +- .../SLPVectorizer/X86/reduction_loads.ll | 8 +-- .../SLPVectorizer/X86/reorder_repeated_ops.ll | 4 +- test/Transforms/SLPVectorizer/X86/resched.ll | 2 +- test/Transforms/SLPVectorizer/X86/saxpy.ll | 2 +- .../SLPVectorizer/X86/schedule-bundle.ll | 2 +- .../SLPVectorizer/X86/simple-loop.ll | 4 +- .../Transforms/SLPVectorizer/X86/value-bug.ll | 4 +- .../X86/vect_copyable_in_binops.ll | 18 +++---- .../X86/vectorize-reorder-reuse.ll | 6 +-- 45 files changed, 162 insertions(+), 167 deletions(-) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7ed1183c05f9..983b8fa4af1f 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2922,13 +2922,8 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S, // Peel the first iteration out of the loop since there's nothing // interesting to do anyway and it simplifies the checks in the loop. auto *I = cast(VL[0]); - Value *VLeft = I->getOperand(0); - Value *VRight = I->getOperand(1); - if (!isa(VRight) && isa(VLeft)) - // Favor having instruction to the right. FIXME: why? - std::swap(VLeft, VRight); - Left.push_back(VLeft); - Right.push_back(VRight); + Left.push_back(I->getOperand(0)); + Right.push_back(I->getOperand(1)); } // Keep track if we have instructions with all the same opcode on one side. diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll index ac535096466c..709e69fbb1da 100644 --- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -2246,84 +2246,84 @@ define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly ; O3DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer -; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] +; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP1]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 ; O3DEFAULT-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX_4]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 -; O3DEFAULT-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP7]] +; O3DEFAULT-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX2_4]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 ; O3DEFAULT-NEXT: [[ARRAYIDX2_8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = bitcast i32* [[ARRAYIDX_8]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 -; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP11]] +; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP11]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP13:%.*]] = bitcast i32* [[ARRAYIDX2_8]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 ; O3DEFAULT-NEXT: [[ARRAYIDX2_12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX_12]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 -; O3DEFAULT-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP15]] +; O3DEFAULT-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP15]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX2_12]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP17]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 ; O3DEFAULT-NEXT: [[ARRAYIDX2_16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX_16]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4 -; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP19]] +; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[TMP19]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX2_16]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP21]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 ; O3DEFAULT-NEXT: [[ARRAYIDX2_20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX_20]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP23:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4 -; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP23]] +; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP23]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARRAYIDX2_20]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP25]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 ; O3DEFAULT-NEXT: [[ARRAYIDX2_24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARRAYIDX_24]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP27:%.*]] = load <4 x i32>, <4 x i32>* [[TMP26]], align 4 -; O3DEFAULT-NEXT: [[TMP28:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP27]] +; O3DEFAULT-NEXT: [[TMP28:%.*]] = add nsw <4 x i32> [[TMP27]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[ARRAYIDX2_24]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP28]], <4 x i32>* [[TMP29]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 ; O3DEFAULT-NEXT: [[ARRAYIDX2_28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 ; O3DEFAULT-NEXT: [[TMP30:%.*]] = bitcast i32* [[ARRAYIDX_28]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP31:%.*]] = load <4 x i32>, <4 x i32>* [[TMP30]], align 4 -; O3DEFAULT-NEXT: [[TMP32:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP31]] +; O3DEFAULT-NEXT: [[TMP32:%.*]] = add nsw <4 x i32> [[TMP31]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP33:%.*]] = bitcast i32* [[ARRAYIDX2_28]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP32]], <4 x i32>* [[TMP33]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 ; O3DEFAULT-NEXT: [[ARRAYIDX2_32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 ; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[ARRAYIDX_32]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP35:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 -; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP35]] +; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[TMP35]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[ARRAYIDX2_32]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP36]], <4 x i32>* [[TMP37]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 ; O3DEFAULT-NEXT: [[ARRAYIDX2_36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 ; O3DEFAULT-NEXT: [[TMP38:%.*]] = bitcast i32* [[ARRAYIDX_36]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP39:%.*]] = load <4 x i32>, <4 x i32>* [[TMP38]], align 4 -; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP39]] +; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP41:%.*]] = bitcast i32* [[ARRAYIDX2_36]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP41]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 ; O3DEFAULT-NEXT: [[ARRAYIDX2_40:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 ; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[ARRAYIDX_40]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP43:%.*]] = load <4 x i32>, <4 x i32>* [[TMP42]], align 4 -; O3DEFAULT-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP43]] +; O3DEFAULT-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP45:%.*]] = bitcast i32* [[ARRAYIDX2_40]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP44]], <4 x i32>* [[TMP45]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 ; O3DEFAULT-NEXT: [[ARRAYIDX2_44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 ; O3DEFAULT-NEXT: [[TMP46:%.*]] = bitcast i32* [[ARRAYIDX_44]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP47:%.*]] = load <4 x i32>, <4 x i32>* [[TMP46]], align 4 -; O3DEFAULT-NEXT: [[TMP48:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP47]] +; O3DEFAULT-NEXT: [[TMP48:%.*]] = add nsw <4 x i32> [[TMP47]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[ARRAYIDX2_44]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP48]], <4 x i32>* [[TMP49]], align 4 ; O3DEFAULT-NEXT: [[TMP50:%.*]] = load i32, i32* [[A]], align 4 diff --git a/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll b/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll index fb3d12d88ba5..8e36a921c758 100644 --- a/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll +++ b/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -66,7 +66,7 @@ define void @test2(<4 x i16> %a, <4 x i16> %b, i64 %c0, i64 %c1, i64 %c2, i64 %c ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[C1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[C2:%.*]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[C3:%.*]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[GEP0]] diff --git a/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll index 401776aa270d..14a6d0eb72c8 100644 --- a/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll +++ b/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll @@ -21,7 +21,7 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP9]]) ; CHECK-NEXT: ret i32 [[TMP10]] diff --git a/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll index db02f55dcc77..d3bbf3df8582 100644 --- a/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -65,7 +65,7 @@ define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 % ; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP5]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP10]] @@ -86,10 +86,10 @@ define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 % ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP17]] ; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> , i32 [[ADD11]], i32 1 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[T12]], i32 1 -; CHECK-NEXT: [[TMP21]] = add nsw <2 x i32> [[TMP18]], [[TMP20]] +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[ADD11]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> , i32 [[T12]], i32 1 +; CHECK-NEXT: [[TMP21]] = add nsw <2 x i32> [[TMP19]], [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP22]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] @@ -184,7 +184,7 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 % ; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[T4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP1]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP9]], [[TMP1]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]] @@ -194,10 +194,10 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 % ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]] ; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> , i32 [[ADD11]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[T12]], i32 1 -; CHECK-NEXT: [[TMP18]] = add nsw <2 x i32> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[ADD11]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> , i32 [[T12]], i32 1 +; CHECK-NEXT: [[TMP18]] = add nsw <2 x i32> [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP19]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] diff --git a/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll b/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll index 7038b0f8e276..6c474705abcf 100644 --- a/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll +++ b/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll @@ -16,8 +16,8 @@ define void @fusion(i8* noalias nocapture align 256 dereferenceable(19267584) %a ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[TMP11]] to <2 x half>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half>* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x half> , [[TMP3]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x half> [[TMP3]], ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast half* [[TMP16]] to <2 x half>* ; CHECK-NEXT: store <2 x half> [[TMP4]], <2 x half>* [[TMP5]], align 8 diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index e8a83fa4b7a6..712ff040a918 100644 --- a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -14,7 +14,7 @@ define void @test() #0 { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP4]], ; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 diff --git a/test/Transforms/SLPVectorizer/X86/PR35777.ll b/test/Transforms/SLPVectorizer/X86/PR35777.ll index adfe77f89f52..4a403e7b9a3f 100644 --- a/test/Transforms/SLPVectorizer/X86/PR35777.ll +++ b/test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -10,7 +10,7 @@ define { i64, i64 } @patatino(double %arg) { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] diff --git a/test/Transforms/SLPVectorizer/X86/PR39774.ll b/test/Transforms/SLPVectorizer/X86/PR39774.ll index 67717a54659c..ae4a6b88bd31 100644 --- a/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -10,7 +10,7 @@ define void @Test(i32) { ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef ; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] ; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] @@ -100,7 +100,7 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 -; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[SHUFFLE]] +; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], ; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef ; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] ; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] diff --git a/test/Transforms/SLPVectorizer/X86/PR40310.ll b/test/Transforms/SLPVectorizer/X86/PR40310.ll index ad1434146a5b..2a0b66ee2817 100644 --- a/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -12,7 +12,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[V14:%.*]] = and i32 [[TMP2]], undef ; CHECK-NEXT: [[V16:%.*]] = and i32 undef, [[V14]] ; CHECK-NEXT: [[V18:%.*]] = and i32 undef, [[V16]] diff --git a/test/Transforms/SLPVectorizer/X86/barriercall.ll b/test/Transforms/SLPVectorizer/X86/barriercall.ll index 7378b8bcb1c9..2ea29ed95c77 100644 --- a/test/Transforms/SLPVectorizer/X86/barriercall.ll +++ b/test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -15,7 +15,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/SLPVectorizer/X86/commutativity.ll b/test/Transforms/SLPVectorizer/X86/commutativity.ll index 9af59efd3453..ad566cb3411e 100644 --- a/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -96,7 +96,7 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[B:%.*]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[C]], i32 2 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[A]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP9]], [[TMP12]] ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 ; CHECK-NEXT: ret void ; diff --git a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index ec29f8413ace..c16ac5385598 100644 --- a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -20,8 +20,8 @@ define void @reduce_compare(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 ; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] diff --git a/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index 47e89df5ab6c..550b8314d606 100644 --- a/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -68,12 +68,12 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) { ; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 ; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1 ; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP5]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = fmul <2 x float> zeroinitializer, [[TMP0]] +; AVX-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer ; AVX-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[TMP9]], [[TMP8]] ; AVX-NEXT: [[TMP11:%.*]] = fcmp olt <2 x float> [[TMP10]], ; AVX-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP10]], <2 x float> ; AVX-NEXT: [[TMP13:%.*]] = fcmp olt <2 x float> [[TMP12]], -; AVX-NEXT: [[TMP14:%.*]] = fmul <2 x float> zeroinitializer, [[TMP12]] +; AVX-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer ; AVX-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP13]], <2 x float> , <2 x float> [[TMP14]] ; AVX-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0 ; AVX-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll index f12de2ad199c..8f57a820197e 100644 --- a/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll +++ b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll @@ -99,7 +99,7 @@ define void @zot(%struct.hoge* %arg) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[ARG:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], undef ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP7]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 diff --git a/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index e2d36376f5ea..5c753091f95c 100644 --- a/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -31,10 +31,10 @@ define void @main() #0 { ; CHECK: cond.false66.us: ; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[ADD_I276_US]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0xBFA5CC2D1960285F, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> , [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double undef, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> undef, [[TMP2]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP6]], align 8 diff --git a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll index 98db3edd90ea..a3d98e39ce1b 100644 --- a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll +++ b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll @@ -22,7 +22,7 @@ define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <2 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[TMP1]], ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[G:%.*]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll index 5860a24906be..d2512dcd615f 100644 --- a/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/test/Transforms/SLPVectorizer/X86/cse.ll @@ -18,20 +18,21 @@ define i32 @test(double* nocapture %G) { ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[G]], i64 6 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[G]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[G]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 -; CHECK-NEXT: [[ADD8:%.*]] = fadd double [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[G]], i64 2 -; CHECK-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[TMP6]], 4.000000e+00 -; CHECK-NEXT: [[ADD12:%.*]] = fadd double [[MUL11]], 8.000000e+00 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[MUL11]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[G]], i64 3 -; CHECK-NEXT: store double [[ADD12]], double* [[ARRAYIDX13]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[ARRAYIDX9]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -72,13 +73,13 @@ define i32 @foo(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> undef, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> , [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret i32 undef @@ -135,7 +136,7 @@ define i32 @test2(double* nocapture %G, i32 %k) { ; CHECK-NEXT: [[TMP8:%.*]] = fmul double [[TMP7]], 3.000000e+00 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> undef, double [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[G]], i64 1 ; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[G]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP13]], align 8 @@ -146,7 +147,7 @@ define i32 @test2(double* nocapture %G, i32 %k) { ; CHECK-NEXT: [[TMP18:%.*]] = fmul double [[TMP17]], 3.000000e+00 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> undef, double [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP19]], double [[TMP18]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> , [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[G]], i64 3 ; CHECK-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP15]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[TMP23]], align 8 @@ -203,13 +204,13 @@ define i32 @foo4(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> undef, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> , [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll index ac6933304780..2ba0a15fed22 100644 --- a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll +++ b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll @@ -24,7 +24,7 @@ define i32 @foo(i32* nocapture %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ [[TMP4:%.*]], [[FOR_BODY]] ], [ [[TMP1]], [[ENTRY]] ] -; CHECK-NEXT: [[TMP4]] = mul nsw <4 x i32> , [[TMP3]] +; CHECK-NEXT: [[TMP4]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_029]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll index 8ee644f939ba..1e47f7a51fd1 100644 --- a/test/Transforms/SLPVectorizer/X86/external_user.ll +++ b/test/Transforms/SLPVectorizer/X86/external_user.ll @@ -32,9 +32,9 @@ define double @ext_user(double* noalias nocapture %B, double* noalias nocapture ; CHECK: for.body: ; CHECK-NEXT: [[I_020:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] -; CHECK-NEXT: [[TMP5]] = fadd <2 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_020]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/test/Transforms/SLPVectorizer/X86/extract.ll b/test/Transforms/SLPVectorizer/X86/extract.ll index 24cf83ca405d..9a741cbb4cfd 100644 --- a/test/Transforms/SLPVectorizer/X86/extract.ll +++ b/test/Transforms/SLPVectorizer/X86/extract.ll @@ -8,7 +8,7 @@ define void @fextr(double* %ptr) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> , [[LD]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[LD]], ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: ret void @@ -32,7 +32,7 @@ define void @fextr1(double* %ptr) { ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef ; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x double> [[LD]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> , [[REORDER_SHUFFLE]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[REORDER_SHUFFLE]], ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P1]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: ret void @@ -59,7 +59,7 @@ define void @fextr2(double* %ptr) { ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[P0]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 4 ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/extractcost.ll b/test/Transforms/SLPVectorizer/X86/extractcost.ll index c9fae4460e57..834f5a089b50 100644 --- a/test/Transforms/SLPVectorizer/X86/extractcost.ll +++ b/test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -14,7 +14,7 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 diff --git a/test/Transforms/SLPVectorizer/X86/hoist.ll b/test/Transforms/SLPVectorizer/X86/hoist.ll index 885d11acfa15..5abf85f319b9 100644 --- a/test/Transforms/SLPVectorizer/X86/hoist.ll +++ b/test/Transforms/SLPVectorizer/X86/hoist.ll @@ -25,7 +25,7 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4 diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll index 986da9fa52b9..311d8a476c71 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -36,7 +36,7 @@ define i32 @add_red(float* %A, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], ; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef ; CHECK-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> @@ -76,7 +76,7 @@ define i32 @add_red(float* %A, i32 %n) { ; STORE-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> , [[TMP2]] +; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], ; STORE-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef ; STORE-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> diff --git a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll index 493e09a1d609..7e0cfb77c6b7 100644 --- a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll +++ b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll @@ -21,8 +21,8 @@ define void @in_tree_user(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 ; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP8]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll index 429ad84f8a6a..2a4d457f1063 100644 --- a/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -132,7 +132,7 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon ; CHECK-NEXT: [[TMP24:%.*]] = icmp ult <16 x i8> [[TMP17]], [[TMP19]] ; CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x i8> [[TMP23]], <16 x i8> [[TMP21]] ; CHECK-NEXT: [[TMP26:%.*]] = zext <16 x i8> [[TMP25]] to <16 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP15]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP26]], [[TMP15]] ; CHECK-NEXT: [[TMP28:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8> ; CHECK-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15 ; CHECK-NEXT: [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>* @@ -413,52 +413,52 @@ define i32 @foo1() local_unnamed_addr #0 { ; CHECK-LABEL: @foo1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> , [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], ; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], ; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], ; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> , [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> , [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], ; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> , [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], ; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> , [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], ; CHECK-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> , [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], ; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> , [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], ; CHECK-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> , [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], ; CHECK-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> , [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], ; CHECK-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> , [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], ; CHECK-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> , [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], ; CHECK-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16 ; CHECK-NEXT: br label [[FOR_BODY5:%.*]] ; CHECK: for.cond3: diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index e26eeec63087..5f6e8f143f4e 100644 --- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -627,7 +627,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 @@ -645,7 +645,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], -; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] +; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], ; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 ; ZEROTHRESH-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 ; ZEROTHRESH-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/long_chains.ll b/test/Transforms/SLPVectorizer/X86/long_chains.ll index 99b340addb92..ffbdd9f1d148 100644 --- a/test/Transforms/SLPVectorizer/X86/long_chains.ll +++ b/test/Transforms/SLPVectorizer/X86/long_chains.ll @@ -11,22 +11,22 @@ define i32 @test(double* nocapture %A, i8* nocapture %B) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8> undef, i8 [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8 [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double> ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> , [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], ; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[TMP13]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> , [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP14]], ; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> , [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[TMP16]], ; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[TMP18]], align 8 ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll index 1b19aeae0377..020b50d54632 100644 --- a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll +++ b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll @@ -36,7 +36,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP13]], i32 [[N]], i32 5 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[N]], i32 6 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i32> [[TMP15]], i32 [[N]], i32 7 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP8]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[TMP17]], <8 x i32>* [[TMP18]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8 diff --git a/test/Transforms/SLPVectorizer/X86/multi_block.ll b/test/Transforms/SLPVectorizer/X86/multi_block.ll index d0216103d42a..f785926cc412 100644 --- a/test/Transforms/SLPVectorizer/X86/multi_block.ll +++ b/test/Transforms/SLPVectorizer/X86/multi_block.ll @@ -26,10 +26,10 @@ define i32 @bar(double* nocapture %A, i32 %d) { ; CHECK-NEXT: br i1 [[TMP4]], label [[TMP7:%.*]], label [[TMP5:%.*]] ; CHECK: [[TMP6:%.*]] = tail call i32 (...) @foo() ; CHECK-NEXT: br label [[TMP7]] -; CHECK: [[TMP8:%.*]] = fadd <2 x float> , [[TMP3]] +; CHECK: [[TMP8:%.*]] = fadd <2 x float> [[TMP3]], ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[A]], i64 8 ; CHECK-NEXT: [[TMP10:%.*]] = fpext <2 x float> [[TMP8]] to <2 x double> -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP9]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8 ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll index ce8594ea84d7..9268adf9481c 100644 --- a/test/Transforms/SLPVectorizer/X86/multi_user.ll +++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll @@ -19,7 +19,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> , [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] diff --git a/test/Transforms/SLPVectorizer/X86/operandorder.ll b/test/Transforms/SLPVectorizer/X86/operandorder.ll index 2354ebd29879..1b959f19883a 100644 --- a/test/Transforms/SLPVectorizer/X86/operandorder.ll +++ b/test/Transforms/SLPVectorizer/X86/operandorder.ll @@ -14,7 +14,7 @@ define void @shuffle_operands1(double * noalias %from, double * noalias %to, ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[V1:%.*]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4 ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll index a0a13b2b5aac..fe604e2652d2 100644 --- a/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/test/Transforms/SLPVectorizer/X86/phi.ll @@ -81,9 +81,9 @@ define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 ; CHECK: for.body: ; CHECK-NEXT: [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] -; CHECK-NEXT: [[TMP5]] = fadd <2 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_019]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -150,9 +150,9 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVARS_IV]], 2 @@ -163,27 +163,26 @@ define float @foo3(float* nocapture readonly %A) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4 ; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> , float [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP12]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float 8.000000e+00, i32 3 -; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP11]], [[TMP17]] -; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121 +; CHECK-NEXT: [[TMP11]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = fmul <4 x float> [[TMP16]], +; CHECK-NEXT: [[TMP18]] = fadd <4 x float> [[TMP6]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP19]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 3 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 2 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 1 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 0 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP18]], i32 3 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP18]], i32 2 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP18]], i32 1 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP18]], i32 0 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP23]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: @@ -255,7 +254,7 @@ define float @sort_phi_type(float* nocapture readonly %A) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> , [[TMP8]] +; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] diff --git a/test/Transforms/SLPVectorizer/X86/pr35497.ll b/test/Transforms/SLPVectorizer/X86/pr35497.ll index c6989c384e01..bdb37b28d58c 100644 --- a/test/Transforms/SLPVectorizer/X86/pr35497.ll +++ b/test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -55,7 +55,7 @@ define void @pr35497() local_unnamed_addr #0 { ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1 @@ -63,7 +63,7 @@ define void @pr35497() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> undef, i64 [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i64> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], ; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1 diff --git a/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll index 7cc0194c7302..380f58fe5dc8 100644 --- a/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll +++ b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll @@ -88,7 +88,7 @@ define void @nsw(i32* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -124,7 +124,7 @@ define void @not_nsw(i32* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -160,7 +160,7 @@ define void @nuw(i32* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -196,7 +196,7 @@ define void @not_nuw(i32* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -232,7 +232,7 @@ define void @nnan(float* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -268,7 +268,7 @@ define void @not_nnan(float* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -304,7 +304,7 @@ define void @only_fast(float* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -340,7 +340,7 @@ define void @only_arcp(float* %x) { ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/reduction.ll b/test/Transforms/SLPVectorizer/X86/reduction.ll index 03b7f67ae4ca..e9f8e7f7c884 100644 --- a/test/Transforms/SLPVectorizer/X86/reduction.ll +++ b/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -23,7 +23,7 @@ define i32 @reduce(double* nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i32 [[MUL]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 ; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[TMP3]], [[TMP4]] diff --git a/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/test/Transforms/SLPVectorizer/X86/reduction_loads.ll index 47a6a44611d8..0f0bbf9a2ad2 100644 --- a/test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ b/test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -14,10 +14,10 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], ; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] @@ -32,11 +32,11 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[SUM]] +; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]] ; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[BIN_EXTRA]] +; CHECK-NEXT: ret i32 [[OP_EXTRA]] ; entry: %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 diff --git a/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index d1c7e6e851f5..13884efd98dd 100644 --- a/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -18,7 +18,7 @@ define void @hoge() { ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i32> , [[REORDER_SHUFFLE]] ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef ; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> , [[SHUFFLE8]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 undef, undef ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef ; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef @@ -37,7 +37,7 @@ define void @hoge() { ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 undef, undef ; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef ; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef diff --git a/test/Transforms/SLPVectorizer/X86/resched.ll b/test/Transforms/SLPVectorizer/X86/resched.ll index b8b1ff00db41..28bc95e2f4ca 100644 --- a/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/test/Transforms/SLPVectorizer/X86/resched.ll @@ -72,7 +72,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14 ; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15 ; CHECK-NEXT: [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8> -; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> , [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> [[TMP43]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15 ; CHECK-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1 diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll index f2f858e3c7dd..7e9109a4ef00 100644 --- a/test/Transforms/SLPVectorizer/X86/saxpy.ll +++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll @@ -15,7 +15,7 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP11]] diff --git a/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll b/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll index 3abde37048fd..bff947e28cae 100644 --- a/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll +++ b/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll @@ -12,7 +12,7 @@ define i32 @slp_schedule_bundle() local_unnamed_addr #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([1 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([1 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 4, i64 0), align 4 ; CHECK-NEXT: [[DOTLOBIT_4:%.*]] = lshr i32 [[TMP3]], 31 diff --git a/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/test/Transforms/SLPVectorizer/X86/simple-loop.ll index 975a1af7576a..59b94cad17e4 100644 --- a/test/Transforms/SLPVectorizer/X86/simple-loop.ll +++ b/test/Transforms/SLPVectorizer/X86/simple-loop.ll @@ -14,8 +14,8 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4 diff --git a/test/Transforms/SLPVectorizer/X86/value-bug.ll b/test/Transforms/SLPVectorizer/X86/value-bug.ll index c2f4b981af9e..78df5a1d68dd 100644 --- a/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -33,9 +33,9 @@ define void @test() { ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double> -; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> , [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], ; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> undef, [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], undef ; CHECK-NEXT: [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; diff --git a/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll index 2b593b78652f..8bf3f362f03c 100644 --- a/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll +++ b/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll @@ -12,7 +12,7 @@ define void @add0(i32* noalias %dst, i32* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -136,7 +136,7 @@ define void @sub1(i32* noalias %dst, i32* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -174,7 +174,7 @@ define void @sub2(i32* noalias %dst, i32* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -422,7 +422,7 @@ define void @add0f(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -546,7 +546,7 @@ define void @sub1f(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -584,7 +584,7 @@ define void @sub2f(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -751,7 +751,7 @@ define void @add0fn(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -875,7 +875,7 @@ define void @sub1fn(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -913,7 +913,7 @@ define void @sub2fn(float* noalias %dst, float* noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll index fd23381949d7..889bba80b7a4 100644 --- a/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -17,7 +17,7 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef @@ -91,7 +91,7 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef @@ -169,7 +169,7 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef From 2b324fde545d4f8365658621d2d7d946f01d47b1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 25 Mar 2019 15:54:34 +0000 Subject: [PATCH 22/27] [x86] add tests for vector zext; NFC The AVX1 lowering is poor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356914 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vector-zext.ll | 157 ++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index 4e436f61e833..c21fc6a73a88 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -2563,3 +2563,160 @@ entry: %e = zext <8 x i6> %d to <8 x i64> ret <8 x i64> %e } + +define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { +; SSE2-LABEL: splatshuf_zext_v4i64: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: splatshuf_zext_v4i64: +; SSSE3: # %bb.0: +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: splatshuf_zext_v4i64: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE41-NEXT: retq +; +; AVX1-LABEL: splatshuf_zext_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatshuf_zext_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: retq +; +; AVX512-LABEL: splatshuf_zext_v4i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512-NEXT: retq + %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer + %ext = zext <4 x i32> %shuf to <4 x i64> + ret <4 x i64> %ext +} + +define <8 x i32> @splatshuf_zext_v8i32(<8 x i16> %x) { +; SSE2-LABEL: splatshuf_zext_v8i32: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,5,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,6,5,5,4] +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: splatshuf_zext_v8i32: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; SSSE3-NEXT: pxor %xmm2, %xmm2 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: splatshuf_zext_v8i32: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: splatshuf_zext_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatshuf_zext_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: retq +; +; AVX512-LABEL: splatshuf_zext_v8i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: retq + %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> + %ext = zext <8 x i16> %shuf to <8 x i32> + ret <8 x i32> %ext +} + +define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) { +; SSE2-LABEL: splatshuf_zext_v16i16: +; SSE2: # %bb.0: +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,3] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: splatshuf_zext_v16i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: splatshuf_zext_v16i16: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] +; SSE41-NEXT: retq +; +; AVX1-LABEL: splatshuf_zext_v16i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatshuf_zext_v16i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: retq +; +; AVX512-LABEL: splatshuf_zext_v16i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] +; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512-NEXT: retq + %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> + %ext = zext <16 x i8> %shuf to <16 x i16> + ret <16 x i16> %ext +} From acd323e334aa98799deee5c52a257e588cf3a902 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 25 Mar 2019 16:14:21 +0000 Subject: [PATCH 23/27] [SLPVectorizer] Update file missed in rL356913 Differential Revision: https://reviews.llvm.org/D59738 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356915 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/SLPVectorizer/X86/alternate-int.ll | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 2a49864ca1e4..a04beed1a45b 100644 --- a/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -536,12 +536,12 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; CHECK-LABEL: @add_sub_v8i32_splat( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R7]] ; From ea6301340f3886e8c7e87b0274449cbc4288269a Mon Sep 17 00:00:00 2001 From: James Henderson Date: Mon, 25 Mar 2019 16:36:26 +0000 Subject: [PATCH 24/27] [llvm-objcopy]Preserve data in segments not covered by sections llvm-objcopy previously knew nothing about data in segments that wasn't covered by section headers, meaning that it wrote zeroes instead of what was there. As it is possible for this data to be useful to the loader, this patch causes llvm-objcopy to start preserving this data. Data in sections that are explicitly removed continues to be written as zeroes. This fixes https://bugs.llvm.org/show_bug.cgi?id=41005. Reviewed by: jakehehrlich, rupprecht Differential Revision: https://reviews.llvm.org/D59483 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356919 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../preserve-segment-contents-ehdr-phdrs.test | 41 ++ .../ELF/preserve-segment-contents.test | 639 ++++++++++++++++++ tools/llvm-objcopy/ELF/Object.cpp | 38 +- tools/llvm-objcopy/ELF/Object.h | 14 +- 4 files changed, 727 insertions(+), 5 deletions(-) create mode 100644 test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test create mode 100644 test/tools/llvm-objcopy/ELF/preserve-segment-contents.test diff --git a/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test b/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test new file mode 100644 index 000000000000..9dc63d753f60 --- /dev/null +++ b/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test @@ -0,0 +1,41 @@ +## Show that llvm-objcopy correctly updates the elf header and program header +## table when they are within a segment. + +# RUN: yaml2obj %s -o %t.in +## Validate that the properties are different before the removal. +# RUN: llvm-readobj --file-headers --program-headers %t.in | FileCheck %s --check-prefix=BEFORE +# RUN: llvm-objcopy %t.in %t.out -R .remove_me +# RUN: llvm-readobj --file-headers --program-headers %t.out | FileCheck %s --check-prefix=AFTER + +# BEFORE: SectionHeaderCount: 6 +# BEFORE: Type: PT_LOAD +# BEFORE-NEXT: Offset: 0x0 +# BEFORE: Type: PT_LOAD +# BEFORE-NEXT: Offset: 0x240 + +# AFTER: SectionHeaderCount: 5 +# AFTER: Type: PT_LOAD +# AFTER-NEXT: Offset: 0x0 +# AFTER: Type: PT_LOAD +# AFTER-NEXT: Offset: 0xB0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .remove_me + Type: SHT_PROGBITS + Size: 0x10 + - Name: .keep_me + Type: SHT_PROGBITS + Size: 0x10 +ProgramHeaders: + - Type: PT_LOAD + Offset: 0 + FileSize: 176 # sizeof(Elf64_Ehdr) + 2 * sizeof(Elf64_Phdr) + - Type: PT_LOAD + Sections: + - Section: .keep_me diff --git a/test/tools/llvm-objcopy/ELF/preserve-segment-contents.test b/test/tools/llvm-objcopy/ELF/preserve-segment-contents.test new file mode 100644 index 000000000000..4de3ac9700e5 --- /dev/null +++ b/test/tools/llvm-objcopy/ELF/preserve-segment-contents.test @@ -0,0 +1,639 @@ +# We want to preserve areas in segments that are not covered by section headers. +# This test shows that we do this for areas at the start of a segment, between +# sections in a segment, and after all sections in a segment. +# To create inputs with arbitrary data in segments, not covered by sections, we +# use yaml2obj to create segments with sections covering all areas, then remove +# some sections in those segments, and finally write over the areas of the +# removed sections using python. + +# blob* sections are the sections that will be removed to create unlabelled +# areas and then overwritten with data to show we preserve the data. + +# RUN: yaml2obj %s -o %t.base +# RUN: llvm-objcopy %t.base %t.stripped --regex -R blob.* +# Show that the removal leaves the bytes as zeroes, as desired, for all our +# test cases. +# RUN: od -t x1 -j 0x2000 -N 24 %t.stripped | FileCheck %s --check-prefix=CHECK1 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x2100 -N 12 %t.stripped | FileCheck %s --check-prefix=CHECK2 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x2200 -N 4 %t.stripped | FileCheck %s --check-prefix=CHECK3 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x2300 -N 12 %t.stripped | FileCheck %s --check-prefix=CHECK4 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x3000 -N 68 %t.stripped | FileCheck %s --check-prefix=CHECK5 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x4000 -N 60 %t.stripped | FileCheck %s --check-prefix=CHECK6 -DPATTERN="00 00 00 00" +# RUN: od -t x1 -j 0x5000 -N 60 %t.stripped | FileCheck %s --check-prefix=CHECK7 -DPATTERN="00 00 00 00" + +# RUN: cp %t.stripped %t.in +# RUN: echo "with open('%/t.in', 'r+') as input:" > %t.py +# RUN: echo " for offset in [" >> %t.py +# RUN: echo " 0x2000, 0x2008, 0x200C, 0x2014, 0x2104, 0x2300," >> %t.py +# RUN: echo " 0x3008, 0x3010, 0x3018, 0x3020, 0x3028, 0x302C, 0x3034, 0x303C," >> %t.py +# RUN: echo " 0x4000, 0x4008, 0x4010, 0x4014, 0x401C, 0x4024, 0x4034," >> %t.py +# RUN: echo " 0x5000, 0x5008, 0x5010, 0x501C, 0x5024, 0x502C, 0x5030, 0x5038]:" >> %t.py +# RUN: echo " input.seek(offset)" >> %t.py +# RUN: echo " input.write('\xDE\xAD\xBE\xEF')" >> %t.py +# RUN: %python %t.py +# RUN: llvm-objcopy %t.in %t.out +# RUN: od -t x1 -j 0x2000 -N 24 %t.out | FileCheck %s --check-prefix=CHECK1 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x2100 -N 12 %t.out | FileCheck %s --check-prefix=CHECK2 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x2200 -N 4 %t.out | FileCheck %s --check-prefix=CHECK3 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x2300 -N 12 %t.out | FileCheck %s --check-prefix=CHECK4 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x3000 -N 68 %t.out | FileCheck %s --check-prefix=CHECK5 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x4000 -N 60 %t.out | FileCheck %s --check-prefix=CHECK6 -DPATTERN="de ad be ef" +# RUN: od -t x1 -j 0x5000 -N 60 %t.out | FileCheck %s --check-prefix=CHECK7 -DPATTERN="de ad be ef" + +# CHECK1: [[PATTERN]] 11 22 33 44 [[PATTERN]] [[PATTERN]] +# CHECK1-NEXT: 55 66 77 88 [[PATTERN]] +# CHECK2: 99 00 aa bb [[PATTERN]] cc dd ee ff +# CHECK3: fe fe fe fe +# CHECK4: [[PATTERN]] 00 00 00 00 00 00 00 00 +# CHECK5: ff ff ee ee dd dd cc cc [[PATTERN]] bb bb aa aa +# CHECK5-NEXT: [[PATTERN]] 00 00 99 99 [[PATTERN]] 88 88 77 77 +# CHECK5-NEXT: [[PATTERN]] 66 66 55 55 [[PATTERN]] [[PATTERN]] +# CHECK5-NEXT: 44 44 33 33 [[PATTERN]] 22 22 11 11 [[PATTERN]] +# CHECK5-NEXT: 00 11 22 33 +# CHECK6: [[PATTERN]] 44 55 66 77 [[PATTERN]] 88 99 aa bb +# CHECK6-NEXT: [[PATTERN]] [[PATTERN]] cc dd ee ff [[PATTERN]] +# CHECK6-NEXT: ff ee dd cc [[PATTERN]] bb aa 99 88 77 66 55 44 +# CHECK6-NEXT: 33 22 11 00 [[PATTERN]] 11 11 11 11 +# CHECK7: [[PATTERN]] 12 34 56 78 [[PATTERN]] 90 ab cd ef +# CHECK7-NEXT: [[PATTERN]] fe dc ba 09 87 65 43 21 [[PATTERN]] +# CHECK7-NEXT: 22 22 22 22 [[PATTERN]] 33 33 33 33 [[PATTERN]] +# CHECK7-NEXT: [[PATTERN]] 44 44 44 44 [[PATTERN]] + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: blob1 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2000 + AddressAlign: 0x2000 + - Name: section1 + Type: SHT_PROGBITS + Address: 0x2004 + Content: '11223344' + - Name: blob2 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2008 + - Name: section2 + Type: SHT_NOBITS + Size: 4 + Address: 0x200C + - Name: blob3 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2010 + - Name: section3 + Type: SHT_PROGBITS + Content: '55667788' + Address: 0x2014 + - Name: blob4 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2018 + - Name: section4 + Type: SHT_PROGBITS + Content: '9900aabb' + Address: 0x2100 + AddressAlign: 0x100 + - Name: blob5 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2104 + - Name: section5 + Type: SHT_PROGBITS + Address: 0x2108 + Content: 'ccddeeff' + - Name: section6 + Type: SHT_PROGBITS + Content: 'fefefefe' + Address: 0x2200 + AddressAlign: 0x100 + - Name: blob6 + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x2300 + AddressAlign: 0x100 + - Name: sectionA + Type: SHT_PROGBITS + Content: 'ffffeeee' + Address: 0x3000 + AddressAlign: 0x1000 + - Name: sectionB + Type: SHT_PROGBITS + Content: 'ddddcccc' + Address: 0x3004 + - Name: blobA + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3008 + - Name: sectionC + Type: SHT_PROGBITS + Content: 'bbbbaaaa' + Address: 0x300C + - Name: blobB + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3010 + - Name: sectionD + Type: SHT_PROGBITS + Content: '00009999' + Address: 0x3014 + - Name: blobC + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3018 + - Name: sectionE + Type: SHT_PROGBITS + Content: '88887777' + Address: 0x301C + - Name: blobD + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3020 + - Name: sectionF + Type: SHT_PROGBITS + Content: '66665555' + Address: 0x3024 + - Name: blobE + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3028 + - Name: blobF + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x302C + - Name: sectionG + Type: SHT_PROGBITS + Content: '44443333' + Address: 0x3030 + - Name: blobG + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x3034 + - Name: sectionH + Type: SHT_PROGBITS + Content: '22221111' + Address: 0x3038 + - Name: blobH + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x303C + - Name: sectionI + Type: SHT_PROGBITS + Content: '00112233' + Address: 0x3040 + - Name: blobz + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4000 + AddressAlign: 0x1000 + - Name: sectionz + Type: SHT_PROGBITS + Content: '44556677' + Address: 0x4004 + - Name: bloby + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4008 + - Name: sectiony + Type: SHT_PROGBITS + Content: '8899aabb' + Address: 0x400C + - Name: blobx + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4010 + - Name: blobw + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4014 + - Name: sectionx + Type: SHT_PROGBITS + Content: 'ccddeeff' + Address: 0x4018 + - Name: blobv + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x401C + - Name: sectionw + Type: SHT_PROGBITS + Content: 'ffeeddcc' + Address: 0x4020 + - Name: blobu + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4024 + - Name: sectionv + Type: SHT_PROGBITS + Content: 'bbaa9988' + Address: 0x4028 + - Name: sectionu + Type: SHT_PROGBITS + Content: '77665544' + Address: 0x402C + - Name: sectiont + Type: SHT_PROGBITS + Content: '33221100' + Address: 0x4030 + - Name: blobt + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x4034 + - Name: sections + Type: SHT_PROGBITS + Content: '11111111' + Address: 0x4038 + - Name: bloba + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5000 + AddressAlign: 0x1000 + - Name: sectiona + Type: SHT_PROGBITS + Content: '12345678' + Address: 0x5004 + - Name: blobb + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5008 + - Name: sectionb + Type: SHT_PROGBITS + Content: '90abcdef' + Address: 0x500C + - Name: blobc + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5010 + - Name: sectionc + Type: SHT_PROGBITS + Content: 'fedcba09' + Address: 0x5014 + - Name: sectiond + Type: SHT_PROGBITS + Content: '87654321' + Address: 0x5018 + - Name: blobd + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x501C + - Name: sectione + Type: SHT_PROGBITS + Content: '22222222' + Address: 0x5020 + - Name: blobe + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5024 + - Name: sectionf + Type: SHT_PROGBITS + Content: '33333333' + Address: 0x5028 + - Name: blobf + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x502C + - Name: blobg + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5030 + - Name: sectiong + Type: SHT_PROGBITS + Content: '44444444' + Address: 0x5034 + - Name: blobh + Type: SHT_PROGBITS + Content: 'abbababa' + Address: 0x5038 +ProgramHeaders: + # First segment has unlabelled space at start and end. + - Type: 0x6ABCDEF0 # Non-specific segment type. + VAddr: 0x2000 + PAddr: 0x2000 + Align: 0x2000 + Sections: + - Section: blob1 + - Section: section1 + - Section: blob2 + - Section: section2 # nobits + - Section: blob3 + - Section: section3 + - Section: blob4 + # Second segment has sections at start and end. + - Type: 0x6ABCDEF0 + VAddr: 0x2100 + PAddr: 0x2100 + Align: 0x100 + Sections: + - Section: section4 + - Section: blob5 + - Section: section5 + # Third segment is all covered by a section. + - Type: 0x6ABCDEF0 + VAddr: 0x2200 + PAddr: 0x2200 + Align: 0x100 + Sections: + - Section: section6 + # Fourth segment has no sections (after removing blob headers). + - Type: 0x6ABCDEF0 + VAddr: 0x2300 + PAddr: 0x2300 + Align: 0x100 + Sections: + - Section: blob6 + # Fifth segment is empty. + - Type: 0x6ABCDEF0 + VAddr: 0x2308 + PAddr: 0x2308 + Offset: 0x2308 + + # The next few segments test behaviour of fully nested segments. + # Sixth segment is the "parent" segment. + - Type: 0x6ABCDEF0 + VAddr: 0x3000 + PAddr: 0x3000 + Align: 0x1000 + Sections: + - Section: sectionA + - Section: sectionB + - Section: blobA + - Section: sectionC + - Section: blobB + - Section: sectionD + - Section: blobC + - Section: sectionE + - Section: blobD + - Section: sectionF + - Section: blobE + - Section: blobF + - Section: sectionG + - Section: blobG + - Section: sectionH + - Section: blobH + - Section: sectionI + # Seventh segment is empty and nested. + - Type: 0x6ABCDEF0 + VAddr: 0x3002 + PAddr: 0x3002 + Offset: 0x3002 + # Eighth segment contains only a section and is nested. + - Type: 0x6ABCDEF0 + VAddr: 0x3004 + PAddr: 0x3004 + Sections: + - Section: sectionB + # Ninth segment contains only unlabelled space and is nested. + - Type: 0x6ABCDEF0 + VAddr: 0x3008 + PAddr: 0x3008 + Sections: + - Section: blobA + # Tenth segment contains two sections with space between and is nested. + - Type: 0x6ABCDEF0 + VAddr: 0x300C + PAddr: 0x300C + Sections: + - Section: sectionC + - Section: blobB + - Section: sectionD + # Eleventh segment contains two sections with space between and at ends and is nested. + - Type: 0x6ABCDEF0 + VAddr: 0x3018 + PAddr: 0x3018 + Sections: + - Section: blobC + - Section: sectionE + - Section: blobD + - Section: sectionF + - Section: blobE + # Twelfth segment contains one section with space at ends adjacent to space in parent segment. + - Type: 0x6ABCDEF0 + VAddr: 0x302E + PAddr: 0x302E + Offset: 0x302E + FileSize: 8 + Sections: + - Section: sectionG + # Thirteenth segment contains overlaps sections at either end in parent segment. + - Type: 0x6ABCDEF0 + VAddr: 0x303A + PAddr: 0x303A + Offset: 0x303A + FileSize: 0x8 + Sections: + - Section: blobH + + # The next batch of segments are segments that only partially overlap other segments. + + # Segment14: |-unlabelled-|-Sec-| + # Segment15: |--|-Sec-|-unlabelled-| + - Type: 0x6ABCDEF0 + VAddr: 0x4000 + PAddr: 0x4000 + Sections: + - Section: blobz + - Section: sectionz + - Type: 0x6ABCDEF0 + VAddr: 0x4002 + PAddr: 0x4002 + Offset: 0x4002 + Sections: + - Section: sectionz + - Section: bloby + + # Segment16: |-Sec-|--| + # Segment17: |--|----unlabelled---| + - Type: 0x6ABCDEF0 + VAddr: 0x400C + PAddr: 0x400C + FileSize: 6 + Sections: + - Section: sectiony + - Type: 0x6ABCDEF0 + VAddr: 0x400E + PAddr: 0x400E + Offset: 0x400E + Sections: + - Section: blobx + + # Segment18: |-unlabelled-|-Sec-| + # Segment19: |-Sec-|-unlabelled-| + - Type: 0x6ABCDEF0 + VAddr: 0x4014 + PAddr: 0x4014 + Sections: + - Section: blobw + - Section: sectionx + - Type: 0x6ABCDEF0 + VAddr: 0x4018 + PAddr: 0x4018 + Sections: + - Section: sectionx + - Section: blobv + + # Segment20: |-Sec-| + # Segment21: |--|-unlabelled-|-Sec-| + - Type: 0x6ABCDEF0 + VAddr: 0x4020 + PAddr: 0x4020 + Sections: + - Section: sectionw + - Type: 0x6ABCDEF0 + VAddr: 0x4022 + PAddr: 0x4022 + Offset: 0x4022 + Sections: + - Section: blobu + - Section: sectionv + + # Segment22: |-Sec-| + # Segment23: |--|-Sec-| + - Type: 0x6ABCDEF0 + VAddr: 0x402C + PAddr: 0x402C + Sections: + - Section: sectionu + - Type: 0x6ABCDEF0 + VAddr: 0x402E + PAddr: 0x402E + Offset: 0x402E + Sections: + - Section: sectiont + + # Segment24: |-unlabelled-|--| + # Segment25: |--Sec--| + - Type: 0x6ABCDEF0 + VAddr: 0x4034 + PAddr: 0x4034 + FileSize: 6 + Sections: + - Section: blobt + - Type: 0x6ABCDEF0 + VAddr: 0x4038 + PAddr: 0x4038 + Sections: + - Section: sections + + # The next batch of segments represent groups of three nested/overlapping segments, + # with one parent segment containing two overlapping segments. + + # Segment26: |-unlabelled-|-Sec-|-unlabelled-| + # Segment27: |------------|--| + # Segment28: |-Sec-|------------| + - Type: 0x6ABCDEF0 + VAddr: 0x5000 + PAddr: 0x5000 + Align: 0x1000 + Sections: + - Section: bloba + - Section: sectiona + - Section: blobb + - Type: 0x6ABCDEF0 + VAddr: 0x5000 + PAddr: 0x5000 + FileSize: 6 + Sections: + - Section: bloba + - Type: 0x6ABCDEF0 + VAddr: 0x5004 + PAddr: 0x5004 + Sections: + - Section: sectiona + - Section: blobb + + # Segment29: |-Sec-|-unlabelled-|-Sec-| + # Segment30: |-Sec-|--------| + # Segment31: |---------|-Sec-| + - Type: 0x6ABCDEF0 + VAddr: 0x500C + PAddr: 0x500C + Sections: + - Section: sectionb + - Section: blobc + - Section: sectionc + - Type: 0x6ABCDEF0 + VAddr: 0x500C + PAddr: 0x500C + FileSize: 7 + Sections: + - Section: sectionb + - Type: 0x6ABCDEF0 + VAddr: 0x5011 + PAddr: 0x5011 + Offset: 0x5011 + Sections: + - Section: sectionc + + # Segment32: |-Sec-|-unlabelled-|-Sec-| + # Segment33: |-Sec-|------------| + # Segment34: |------------|-Sec-| + - Type: 0x6ABCDEF0 + VAddr: 0x5018 + PAddr: 0x5018 + Sections: + - Section: sectiond + - Section: blobd + - Section: sectione + - Type: 0x6ABCDEF0 + VAddr: 0x5018 + PAddr: 0x5018 + Sections: + - Section: sectiond + - Section: blobd + - Type: 0x6ABCDEF0 + VAddr: 0x501C + PAddr: 0x501C + Sections: + - Section: blobd + - Section: sectione + + # Segment35: |-unlabelled-|-Sec-|-unlabelled-| + # Segment36: |------------|-Sec-| + # Segment37: |-Sec-|------------| + - Type: 0x6ABCDEF0 + VAddr: 0x5024 + PAddr: 0x5024 + Sections: + - Section: blobe + - Section: sectionf + - Section: blobf + - Type: 0x6ABCDEF0 + VAddr: 0x5024 + PAddr: 0x5024 + Sections: + - Section: blobe + - Section: sectionf + - Type: 0x6ABCDEF0 + VAddr: 0x5028 + PAddr: 0x5028 + Sections: + - Section: sectionf + - Section: blobf + + # Segment38: |-unlabelled-|-Sec-|-unlabelled-| + # Segment39: |------------|---| + # Segment40: |---|------------| + - Type: 0x6ABCDEF0 + VAddr: 0x5030 + PAddr: 0x5030 + Sections: + - Section: blobg + - Section: sectiong + - Section: blobh + - Type: 0x6ABCDEF0 + VAddr: 0x5030 + PAddr: 0x5030 + FileSize: 7 + Sections: + - Section: blobg + - Type: 0x6ABCDEF0 + VAddr: 0x5035 + PAddr: 0x5035 + Offset: 0x5035 + Sections: + - Section: blobh diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp index 4639d9053943..7cceb70ca63b 100644 --- a/tools/llvm-objcopy/ELF/Object.cpp +++ b/tools/llvm-objcopy/ELF/Object.cpp @@ -906,7 +906,9 @@ template void ELFBuilder::setParentSegment(Segment &Child) { template void ELFBuilder::readProgramHeaders() { uint32_t Index = 0; for (const auto &Phdr : unwrapOrError(ElfFile.program_headers())) { - Segment &Seg = Obj.addSegment(); + ArrayRef Data{ElfFile.base() + Phdr.p_offset, + (size_t)Phdr.p_filesz}; + Segment &Seg = Obj.addSegment(Data); Seg.Type = Phdr.p_type; Seg.Flags = Phdr.p_flags; Seg.OriginalOffset = Phdr.p_offset; @@ -1350,7 +1352,31 @@ template void ELFWriter::writeShdrs() { template void ELFWriter::writeSectionData() { for (auto &Sec : Obj.sections()) - Sec.accept(*SecWriter); + // Segments are responsible for writing their contents, so only write the + // section data if the section is not in a segment. Note that this renders + // sections in segments effectively immutable. + if (Sec.ParentSegment == nullptr) + Sec.accept(*SecWriter); +} + +template void ELFWriter::writeSegmentData() { + for (Segment &Seg : Obj.segments()) { + uint8_t *B = Buf.getBufferStart() + Seg.Offset; + assert(Seg.FileSize == Seg.getContents().size() && + "Segment size must match contents size"); + std::memcpy(B, Seg.getContents().data(), Seg.FileSize); + } + + // Iterate over removed sections and overwrite their old data with zeroes. + for (auto &Sec : Obj.removedSections()) { + Segment *Parent = Sec.ParentSegment; + if (Parent == nullptr || Sec.Type == SHT_NOBITS || Sec.Size == 0) + continue; + uint64_t Offset = + Sec.OriginalOffset - Parent->OriginalOffset + Parent->Offset; + uint8_t *B = Buf.getBufferStart(); + std::memset(B + Offset, 0, Sec.Size); + } } Error Object::removeSections( @@ -1396,7 +1422,10 @@ Error Object::removeSections( return E; } - // Now finally get rid of them all togethor. + // Transfer removed sections into the Object RemovedSections container for use + // later. + std::move(Iter, Sections.end(), std::back_inserter(RemovedSections)); + // Now finally get rid of them all together. Sections.erase(Iter, std::end(Sections)); return Error::success(); } @@ -1542,6 +1571,9 @@ template size_t ELFWriter::totalSize() const { } template Error ELFWriter::write() { + // Segment data must be written first, so that the ELF header and program + // header tables can overwrite it, if covered by a segment. + writeSegmentData(); writeEhdr(); writePhdrs(); writeSectionData(); diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h index e892d066a6cd..26d6a122c468 100644 --- a/tools/llvm-objcopy/ELF/Object.h +++ b/tools/llvm-objcopy/ELF/Object.h @@ -215,6 +215,7 @@ template class ELFWriter : public Writer { void writePhdrs(); void writeShdrs(); void writeSectionData(); + void writeSegmentData(); void assignOffsets(); @@ -312,6 +313,10 @@ class Segment { uint32_t Index; uint64_t OriginalOffset; Segment *ParentSegment = nullptr; + ArrayRef Contents; + + explicit Segment(ArrayRef Data) : Contents(Data) {} + Segment() {} const SectionBase *firstSection() const { if (!Sections.empty()) @@ -321,6 +326,8 @@ class Segment { void removeSection(const SectionBase *Sec) { Sections.erase(Sec); } void addSection(const SectionBase *Sec) { Sections.insert(Sec); } + + ArrayRef getContents() const { return Contents; } }; class Section : public SectionBase { @@ -773,6 +780,7 @@ class Object { std::vector Sections; std::vector Segments; + std::vector RemovedSections; public: template @@ -815,6 +823,8 @@ class Object { find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; }); return SecIt == Sections.end() ? nullptr : SecIt->get(); } + SectionTableRef removedSections() { return SectionTableRef(RemovedSections); } + Range segments() { return make_pointee_range(Segments); } ConstRange segments() const { return make_pointee_range(Segments); } @@ -827,8 +837,8 @@ class Object { Ptr->Index = Sections.size(); return *Ptr; } - Segment &addSegment() { - Segments.emplace_back(llvm::make_unique()); + Segment &addSegment(ArrayRef Data) { + Segments.emplace_back(llvm::make_unique(Data)); return *Segments.back(); } }; From 6375a63fcb5c41a2a1a20f0a7654b50b23073266 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 25 Mar 2019 16:47:42 +0000 Subject: [PATCH 25/27] AMDGPU: Preserve LiveIntervals in WQM This seems to already be done, but wasn't marked. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356922 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIWholeQuadMode.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp index dae7d455d826..03c0353390f0 100644 --- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -200,6 +200,8 @@ class SIWholeQuadMode : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } From 08e5a72001161796fbb808a4dd5badc95473b56c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 25 Mar 2019 17:01:29 +0000 Subject: [PATCH 26/27] merge-request.sh: Update 8.0 metabug for 8.0.1 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356924 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/release/merge-request.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/release/merge-request.sh b/utils/release/merge-request.sh index 6a4ee16d788a..0a2bf7661fac 100755 --- a/utils/release/merge-request.sh +++ b/utils/release/merge-request.sh @@ -101,7 +101,7 @@ case $stable_version in release_metabug="39106" ;; 8.0) - release_metabug="40331" + release_metabug="41221" ;; *) echo "error: invalid stable version" From 58dc6ce2d062d8eafd253900ff3ab254040650a1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 25 Mar 2019 17:15:44 +0000 Subject: [PATCH 27/27] MISched: Don't schedule regions with 0 instructions I think this is correct, but may not necessarily be the correct fix for the assertion I'm really trying to solve. If a scheduling region was found that only has dbg_value instructions, the RegPressure tracker would end up in an inconsistent state because it would skip over any debug instructions and point to an instruction outside of the scheduling region. It may still be possible for this to happen if there are some real schedulable instructions between dbg_values, but I haven't managed to break this. The testcase is extremely sensitive and I'm not sure how to make it more resistent to future scheduler changes that would avoid stressing this situation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356926 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 8 +- ...ched-assert-onlydbg-value-empty-region.mir | 115 ++++++++++++++++++ 2 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 66d3a281d42d..88f0630f014f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -486,13 +486,17 @@ getSchedRegions(MachineBasicBlock *MBB, MachineInstr &MI = *std::prev(I); if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; - if (!MI.isDebugInstr()) + if (!MI.isDebugInstr()) { // MBB::size() uses instr_iterator to count. Here we need a bundle to // count as a single instruction. ++NumRegionInstrs; + } } - Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); + // It's possible we found a scheduling region that only has debug + // instructions. Don't bother scheduling these. + if (NumRegionInstrs != 0) + Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); } if (RegionsTopDown) diff --git a/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir new file mode 100644 index 000000000000..d60abaf7c27b --- /dev/null +++ b/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir @@ -0,0 +1,115 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s + +# The sequence of DBG_VALUEs forms a scheduling region with 0 real +# instructions. The RegPressure tracker would end up skipping over any +# debug instructions, so it would point to the instruction +# before/outside of the region, hitting this assert: +# assert((BotRPTracker.getPos() == RegionEnd || +# (RegionEnd->isDebugInstr() && +# BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) && +# "Can't find the region bottom"); + +--- +name: only_dbg_value_sched_region +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + waveLimiter: true +body: | + ; CHECK-LABEL: name: only_dbg_value_sched_region + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, implicit $exec + ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, implicit $exec + ; CHECK: undef %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec + ; CHECK: %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + ; CHECK: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: undef %11.sub1:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $exec + ; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $exec + ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, implicit $exec + ; CHECK: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec + ; CHECK: %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $exec + ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec + ; CHECK: %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, 0, implicit $exec + ; CHECK: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, implicit $exec + ; CHECK: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec + ; CHECK: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, 0, implicit $exec + ; CHECK: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 2, [[DEF2]], implicit $exec + ; CHECK: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, 0, implicit $exec + ; CHECK: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]] + ; CHECK: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, 0, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_SETREG_IMM32_B32 0, 1 + ; CHECK: DBG_VALUE + ; CHECK: DBG_VALUE + ; CHECK: DBG_VALUE + ; CHECK: S_SETREG_IMM32_B32 0, 1 + ; CHECK: bb.2: + ; CHECK: S_NOP 0, implicit [[COPY]] + ; CHECK: S_NOP 0, implicit [[DEF8]] + ; CHECK: S_ENDPGM 0 + bb.0: + liveins: $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64 = IMPLICIT_DEF + %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, implicit $exec + %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, implicit $exec + undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec + %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + %5:vreg_64 = COPY %2 + undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $exec + %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $exec + %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, implicit $exec + %8:vreg_64 = IMPLICIT_DEF + %9:vreg_64 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + undef %11.sub1:vreg_64 = IMPLICIT_DEF + %12:vgpr_32 = IMPLICIT_DEF + %13:vgpr_32 = IMPLICIT_DEF + %14:vreg_64 = IMPLICIT_DEF + %15:vreg_64 = IMPLICIT_DEF + %16:vgpr_32 = IMPLICIT_DEF + %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $exec + %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $exec + GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec + %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, implicit $exec + %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, implicit $exec + %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec + %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, implicit $exec + %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, implicit $exec + %23:vreg_64 = V_LSHLREV_B64 2, %8, implicit $exec + S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17 + GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, implicit $exec + + bb.1: + S_SETREG_IMM32_B32 0, 1 + DBG_VALUE + DBG_VALUE + DBG_VALUE + S_SETREG_IMM32_B32 0, 1 + + bb.2: + S_NOP 0, implicit %0 + S_NOP 0, implicit %16 + S_ENDPGM 0 + +...