diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 51841a842ce0b0..50590e51ae0ed7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -16215,6 +16215,11 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, } Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false); + if (auto *VecI = dyn_cast(Vec); + VecI && VecI->getParent() == Builder.GetInsertBlock() && + Builder.GetInsertPoint()->comesBefore(VecI)) + VecI->moveBeforePreserving(*Builder.GetInsertBlock(), + Builder.GetInsertPoint()); if (Vec->getType() != PrevVec->getType()) { assert(Vec->getType()->isIntOrIntVectorTy() && PrevVec->getType()->isIntOrIntVectorTy() && diff --git a/llvm/test/Transforms/SLPVectorizer/X86/perfect-matched-reused-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/perfect-matched-reused-bv.ll new file mode 100644 index 00000000000000..1053e0fc10669e --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/perfect-matched-reused-bv.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s + +define void @test() { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB4:.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[TMP0]], zeroinitializer +; CHECK-NEXT: br i1 false, label %[[BB7:.*]], label %[[BB4]] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP4]] = add <2 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: br i1 false, label %[[BB7]], label %[[BB1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP1]], %[[BB1]] ], [ [[TMP3]], %[[BB4]] ] +; CHECK-NEXT: ret void +; +bb: + br label %bb1 + +bb1: + %phi = phi i32 [ 0, %bb ], [ %add6, %bb4 ] + %phi2 = phi i32 [ 0, %bb ], [ %add, %bb4 ] + %or = or i32 %phi2, 0 + %or3 = or i32 %phi, 0 + br i1 false, label %bb7, label %bb4 + +bb4: + %add = add i32 0, 0 + %add5 = add i32 0, 0 + %add6 = add i32 %phi, 0 + br i1 false, label %bb7, label %bb1 + +bb7: + %phi8 = phi i32 [ %or, %bb1 ], [ %add5, %bb4 ] + %phi9 = phi i32 [ %or3, %bb1 ], [ %add6, %bb4 ] + ret void +}