From f810cf143cba518d71e03c3d8acecfc482303fb9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 8 Jan 2024 10:49:06 -0800 Subject: [PATCH] [RISCV] Use getELen() instead of hardcoded 64 in lowerBUILD_VECTOR. This is needed to properly support Zve32x. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 448 +++++++++++++----- 2 files changed, 320 insertions(+), 132 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 79c16cf4c4c361..835ea4618904f6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3657,10 +3657,10 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, // would require bit-manipulation instructions to construct the splat value. SmallVector Sequence; const auto *BV = cast(Op); - if (VT.isInteger() && EltBitSize < 64 && + if (VT.isInteger() && EltBitSize < Subtarget.getELen() && ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && BV->getRepeatedSequence(Sequence) && - (Sequence.size() * EltBitSize) <= 64) { + (Sequence.size() * EltBitSize) <= Subtarget.getELen()) { unsigned SeqLen = Sequence.size(); MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 5dfa3835cad020..faeca5ef801a54 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V +; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32 define void @buildvec_vid_v16i8(ptr %x) { ; CHECK-LABEL: buildvec_vid_v16i8: @@ -296,11 +297,22 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() { ; RV32-NEXT: vsext.vf4 v8, v10 ; RV32-NEXT: ret ; -; RV64-LABEL: buildvec_vid_step1_add0_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: ret +; RV64V-LABEL: buildvec_vid_step1_add0_v4i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vid.v v8 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: li a1, 3 +; RV64ZVE32-NEXT: sd a1, 24(a0) +; RV64ZVE32-NEXT: li a1, 2 +; RV64ZVE32-NEXT: sd a1, 16(a0) +; RV64ZVE32-NEXT: li a1, 1 +; RV64ZVE32-NEXT: sd a1, 8(a0) +; RV64ZVE32-NEXT: sd zero, 0(a0) +; RV64ZVE32-NEXT: ret ret <4 x i64> } @@ -314,12 +326,23 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() { ; RV32-NEXT: vsext.vf4 v8, v10 ; RV32-NEXT: ret ; -; RV64-LABEL: buildvec_vid_step2_add0_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: ret +; RV64V-LABEL: buildvec_vid_step2_add0_v4i64: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vid.v v8 +; RV64V-NEXT: vadd.vv v8, v8, v8 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: li a1, 6 +; RV64ZVE32-NEXT: sd a1, 24(a0) +; RV64ZVE32-NEXT: li a1, 4 +; RV64ZVE32-NEXT: sd a1, 16(a0) +; RV64ZVE32-NEXT: li a1, 2 +; RV64ZVE32-NEXT: sd a1, 8(a0) +; RV64ZVE32-NEXT: sd zero, 0(a0) +; RV64ZVE32-NEXT: ret ret <4 x i64> } @@ -420,21 +443,47 @@ define <2 x i8> @buildvec_dominant0_v2i8() { } define <2 x i8> @buildvec_dominant1_v2i8() { -; CHECK-LABEL: buildvec_dominant1_v2i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_dominant1_v2i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v8, -1 +; RV32-NEXT: ret +; +; RV64V-LABEL: buildvec_dominant1_v2i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64V-NEXT: vmv.v.i v8, -1 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_dominant1_v2i8: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32-NEXT: vmv.v.i v8, -1 +; RV64ZVE32-NEXT: ret ret <2 x i8> } define <2 x i8> @buildvec_dominant2_v2i8() { -; CHECK-LABEL: buildvec_dominant2_v2i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vrsub.vi v8, v8, 0 -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_dominant2_v2i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vrsub.vi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64V-LABEL: buildvec_dominant2_v2i8: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64V-NEXT: vid.v v8 +; RV64V-NEXT: vrsub.vi v8, v8, 0 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_dominant2_v2i8: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32-NEXT: vid.v v8 +; RV64ZVE32-NEXT: vrsub.vi v8, v8, 0 +; RV64ZVE32-NEXT: ret ret <2 x i8> } @@ -448,16 +497,25 @@ define void @buildvec_dominant0_v2i32(ptr %x) { ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret ; -; RV64-LABEL: buildvec_dominant0_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI38_0) -; RV64-NEXT: ld a1, %lo(.LCPI38_0)(a1) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v8, -1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: ret +; RV64V-LABEL: buildvec_dominant0_v2i32: +; RV64V: # %bb.0: +; RV64V-NEXT: lui a1, %hi(.LCPI38_0) +; RV64V-NEXT: ld a1, %lo(.LCPI38_0)(a1) +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vmv.v.i v8, -1 +; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; RV64V-NEXT: vmv.s.x v8, a1 +; RV64V-NEXT: vse64.v v8, (a0) +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_dominant0_v2i32: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0) +; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1) +; RV64ZVE32-NEXT: li a2, -1 +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: ret store <2 x i64> , ptr %x ret void } @@ -472,14 +530,23 @@ define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize { ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret ; -; RV64-LABEL: buildvec_dominant1_optsize_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI39_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI39_0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a1) -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: ret +; RV64V-LABEL: buildvec_dominant1_optsize_v2i32: +; RV64V: # %bb.0: +; RV64V-NEXT: lui a1, %hi(.LCPI39_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI39_0) +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vle64.v v8, (a1) +; RV64V-NEXT: vse64.v v8, (a0) +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0) +; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1) +; RV64ZVE32-NEXT: li a2, -1 +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: ret store <2 x i64> , ptr %x ret void } @@ -497,15 +564,35 @@ define void @buildvec_seq_v8i8_v4i16(ptr %x) { } define void @buildvec_seq_v8i8_v2i32(ptr %x) { -; CHECK-LABEL: buildvec_seq_v8i8_v2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, 48 -; CHECK-NEXT: addi a1, a1, 513 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_seq_v8i8_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 48 +; RV32-NEXT: addi a1, a1, 513 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vse8.v v8, (a0) +; RV32-NEXT: ret +; +; RV64V-LABEL: buildvec_seq_v8i8_v2i32: +; RV64V: # %bb.0: +; RV64V-NEXT: lui a1, 48 +; RV64V-NEXT: addi a1, a1, 513 +; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64V-NEXT: vmv.v.x v8, a1 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vse8.v v8, (a0) +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_seq_v8i8_v2i32: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: lui a1, 48 +; RV64ZVE32-NEXT: addi a1, a1, 513 +; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32-NEXT: vmv.v.x v8, a1 +; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32-NEXT: vse8.v v8, (a0) +; RV64ZVE32-NEXT: ret store <8 x i8> , ptr %x ret void } @@ -520,15 +607,24 @@ define void @buildvec_seq_v16i8_v2i64(ptr %x) { ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; -; RV64-LABEL: buildvec_seq_v16i8_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI42_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI42_0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vlse64.v v8, (a1), zero -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: ret +; RV64V-LABEL: buildvec_seq_v16i8_v2i64: +; RV64V: # %bb.0: +; RV64V-NEXT: lui a1, %hi(.LCPI42_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI42_0) +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vlse64.v v8, (a1), zero +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64V-NEXT: vse8.v v8, (a0) +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: lui a1, %hi(.LCPI42_0) +; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI42_0) +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64ZVE32-NEXT: vle8.v v8, (a1) +; RV64ZVE32-NEXT: vse8.v v8, (a0) +; RV64ZVE32-NEXT: ret store <16 x i8> , ptr %x ret void } @@ -544,36 +640,79 @@ define void @buildvec_seq2_v16i8_v2i64(ptr %x) { ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; -; RV64-LABEL: buildvec_seq2_v16i8_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 528432 -; RV64-NEXT: addiw a1, a1, 513 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a1 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: ret +; RV64V-LABEL: buildvec_seq2_v16i8_v2i64: +; RV64V: # %bb.0: +; RV64V-NEXT: lui a1, 528432 +; RV64V-NEXT: addiw a1, a1, 513 +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vmv.v.x v8, a1 +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64V-NEXT: vse8.v v8, (a0) +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: lui a1, %hi(.LCPI43_0) +; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI43_0) +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64ZVE32-NEXT: vle8.v v8, (a1) +; RV64ZVE32-NEXT: vse8.v v8, (a0) +; RV64ZVE32-NEXT: ret store <16 x i8> , ptr %x ret void } define void @buildvec_seq_v9i8(ptr %x) { -; CHECK-LABEL: buildvec_seq_v9i8: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 73 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: li a1, 146 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 -; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_seq_v9i8: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 73 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vmv.v.i v8, 3 +; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: li a1, 146 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV32-NEXT: vmerge.vim v8, v8, 2, v0 +; RV32-NEXT: vsetivli zero, 9, e8, m1, ta, ma +; RV32-NEXT: vse8.v v8, (a0) +; RV32-NEXT: ret +; +; RV64V-LABEL: buildvec_seq_v9i8: +; RV64V: # %bb.0: +; RV64V-NEXT: li a1, 73 +; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64V-NEXT: vmv.s.x v0, a1 +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64V-NEXT: vmv.v.i v8, 3 +; RV64V-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64V-NEXT: li a1, 146 +; RV64V-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-NEXT: vmv.s.x v0, a1 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV64V-NEXT: vmerge.vim v8, v8, 2, v0 +; RV64V-NEXT: vsetivli zero, 9, e8, m1, ta, ma +; RV64V-NEXT: vse8.v v8, (a0) +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_seq_v9i8: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: li a1, 73 +; RV64ZVE32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64ZVE32-NEXT: vmv.v.i v8, 3 +; RV64ZVE32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64ZVE32-NEXT: li a1, 146 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV64ZVE32-NEXT: vmerge.vim v8, v8, 2, v0 +; RV64ZVE32-NEXT: vsetivli zero, 9, e8, m1, ta, ma +; RV64ZVE32-NEXT: vse8.v v8, (a0) +; RV64ZVE32-NEXT: ret store <9 x i8> , ptr %x ret void } @@ -863,14 +1002,22 @@ define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) ; RV32-NEXT: vslide1down.vx v8, v8, a3 ; RV32-NEXT: ret ; -; RV64-LABEL: v4xi64_exact: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vslide1down.vx v9, v8, a3 -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: ret +; RV64V-LABEL: v4xi64_exact: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vmv.v.x v8, a2 +; RV64V-NEXT: vslide1down.vx v9, v8, a3 +; RV64V-NEXT: vmv.v.x v8, a0 +; RV64V-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: v4xi64_exact: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: sd a4, 24(a0) +; RV64ZVE32-NEXT: sd a3, 16(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: ret %v1 = insertelement <4 x i64> poison, i64 %a, i32 0 %v2 = insertelement <4 x i64> %v1, i64 %b, i32 1 %v3 = insertelement <4 x i64> %v2, i64 %c, i32 2 @@ -907,18 +1054,31 @@ define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i ; RV32-NEXT: vslide1down.vx v11, v11, t0 ; RV32-NEXT: ret ; -; RV64-LABEL: v8xi64_exact: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vslide1down.vx v9, v8, a3 -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vmv.v.x v10, a4 -; RV64-NEXT: vslide1down.vx v10, v10, a5 -; RV64-NEXT: vmv.v.x v11, a6 -; RV64-NEXT: vslide1down.vx v11, v11, a7 -; RV64-NEXT: ret +; RV64V-LABEL: v8xi64_exact: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vmv.v.x v8, a2 +; RV64V-NEXT: vslide1down.vx v9, v8, a3 +; RV64V-NEXT: vmv.v.x v8, a0 +; RV64V-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-NEXT: vmv.v.x v10, a4 +; RV64V-NEXT: vslide1down.vx v10, v10, a5 +; RV64V-NEXT: vmv.v.x v11, a6 +; RV64V-NEXT: vslide1down.vx v11, v11, a7 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: v8xi64_exact: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: ld t0, 0(sp) +; RV64ZVE32-NEXT: sd t0, 56(a0) +; RV64ZVE32-NEXT: sd a7, 48(a0) +; RV64ZVE32-NEXT: sd a6, 40(a0) +; RV64ZVE32-NEXT: sd a5, 32(a0) +; RV64ZVE32-NEXT: sd a4, 24(a0) +; RV64ZVE32-NEXT: sd a3, 16(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2 @@ -946,16 +1106,28 @@ define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vsca ; RV32-NEXT: vmv.v.v v11, v9 ; RV32-NEXT: ret ; -; RV64-LABEL: v8xi64_exact_equal_halves: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vslide1down.vx v9, v8, a3 -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vmv.v.v v10, v8 -; RV64-NEXT: vmv.v.v v11, v9 -; RV64-NEXT: ret +; RV64V-LABEL: v8xi64_exact_equal_halves: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vmv.v.x v8, a2 +; RV64V-NEXT: vslide1down.vx v9, v8, a3 +; RV64V-NEXT: vmv.v.x v8, a0 +; RV64V-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-NEXT: vmv.v.v v10, v8 +; RV64V-NEXT: vmv.v.v v11, v9 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: v8xi64_exact_equal_halves: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: sd a4, 56(a0) +; RV64ZVE32-NEXT: sd a3, 48(a0) +; RV64ZVE32-NEXT: sd a2, 40(a0) +; RV64ZVE32-NEXT: sd a1, 32(a0) +; RV64ZVE32-NEXT: sd a4, 24(a0) +; RV64ZVE32-NEXT: sd a3, 16(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2 @@ -981,14 +1153,22 @@ define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vsca ; RV32-NEXT: vslide1down.vx v8, v8, a3 ; RV32-NEXT: ret ; -; RV64-LABEL: v8xi64_exact_undef_suffix: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vslide1down.vx v9, v8, a3 -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: ret +; RV64V-LABEL: v8xi64_exact_undef_suffix: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vmv.v.x v8, a2 +; RV64V-NEXT: vslide1down.vx v9, v8, a3 +; RV64V-NEXT: vmv.v.x v8, a0 +; RV64V-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: sd a4, 24(a0) +; RV64ZVE32-NEXT: sd a3, 16(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2 @@ -1010,14 +1190,22 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca ; RV32-NEXT: vslide1down.vx v10, v8, a3 ; RV32-NEXT: ret ; -; RV64-LABEL: v8xi64_exact_undef_prefix: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vslide1down.vx v11, v8, a3 -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vslide1down.vx v10, v8, a1 -; RV64-NEXT: ret +; RV64V-LABEL: v8xi64_exact_undef_prefix: +; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64V-NEXT: vmv.v.x v8, a2 +; RV64V-NEXT: vslide1down.vx v11, v8, a3 +; RV64V-NEXT: vmv.v.x v8, a0 +; RV64V-NEXT: vslide1down.vx v10, v8, a1 +; RV64V-NEXT: ret +; +; RV64ZVE32-LABEL: v8xi64_exact_undef_prefix: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: sd a4, 56(a0) +; RV64ZVE32-NEXT: sd a3, 48(a0) +; RV64ZVE32-NEXT: sd a2, 40(a0) +; RV64ZVE32-NEXT: sd a1, 32(a0) +; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 4 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 5 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 6