From 201629ff7d56b8fc874c7cb3a60246a13895b330 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Fri, 3 Jan 2025 13:18:38 -0800 Subject: [PATCH] Revert "[RISCV][VLOPT] Enable the RISCVVLOptimizer by default (#119461)" This reverts commit 169c32eb49fa9b559d388b9b8f4374ff9e1be9be. --- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 3 + llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 537 +++++++++++++++++- 2 files changed, 539 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index edd2ae5f94231e8..78ed8f16979affc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -781,10 +781,12 @@ define void @copysign_v6bf16(ptr %x, ptr %y) { ; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: vand.vx v9, v9, a1 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -1203,6 +1205,7 @@ define void @copysign_neg_trunc_v3bf16_v3f32(ptr %x, ptr %y) { ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a2, a1, -1 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v9 ; CHECK-NEXT: vxor.vx v9, v10, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index d3f63b76605d7d5..c86496f43e67734 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -4198,7 +4198,17 @@ define @vfnmadd_vf_nxv1f16_neg_splat( %va ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t @@ -4229,7 +4239,17 @@ define @vfnmadd_vf_nxv1f16_neg_splat_commute( @vfnmadd_vf_nxv1f16_neg_splat_unmasked( @vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv1f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t @@ -4577,6 +4622,19 @@ define @vfnmsub_vf_nxv1f16_neg_splat_commute( @vfnmsub_vf_nxv1f16_neg_splat_unmasked( @vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv2f16_neg_splat( %va ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t @@ -5096,7 +5170,17 @@ define @vfnmadd_vf_nxv2f16_neg_splat_commute( @vfnmadd_vf_nxv2f16_neg_splat_unmasked( @vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv2f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t @@ -5444,6 +5553,19 @@ define @vfnmsub_vf_nxv2f16_neg_splat_commute( @vfnmsub_vf_nxv2f16_neg_splat_unmasked( @vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv4f16_neg_splat( %va ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t @@ -5963,7 +6101,17 @@ define @vfnmadd_vf_nxv4f16_neg_splat_commute( @vfnmadd_vf_nxv4f16_neg_splat_unmasked( @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv4f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t @@ -6311,6 +6484,19 @@ define @vfnmsub_vf_nxv4f16_neg_splat_commute( @vfnmsub_vf_nxv4f16_neg_splat_unmasked( @vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv8f16_neg_splat( %va ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t @@ -6830,7 +7032,17 @@ define @vfnmadd_vf_nxv8f16_neg_splat_commute( @vfnmadd_vf_nxv8f16_neg_splat_unmasked( @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv8f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v10, v12, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t @@ -7178,6 +7415,19 @@ define @vfnmsub_vf_nxv8f16_neg_splat_commute( @vfnmsub_vf_nxv8f16_neg_splat_unmasked( @vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv16f16_neg_splat( ; ZVFHMIN-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v4, v16, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vxor.vx v4, v16, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t @@ -7795,6 +8066,8 @@ define @vfnmadd_vf_nxv16f16_neg_splat_commute( @vfnmadd_vf_nxv16f16_neg_splat_commute( @vfnmadd_vf_nxv16f16_neg_splat_unmasked( @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv16f16_neg_splat( ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv4r.v v4, v8 +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vmv.v.x v16, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v12, v16, a1, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv4r.v v4, v8 ; ZVFHMIN-NEXT: fmv.x.h a0, fa0 ; ZVFHMIN-NEXT: vmv.v.x v16, a0 ; ZVFHMIN-NEXT: lui a0, 8 @@ -8210,6 +8515,36 @@ define @vfnmsub_vf_nxv16f16_neg_splat_commute( @vfnmsub_vf_nxv16f16_neg_splat_unmasked( @vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: lui a4, 8 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a4, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v16, a4, v0.t +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v4, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB294_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB294_2: +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb @@ -10475,6 +10862,20 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_unmasked( @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: lui a3, 8 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a2 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v24, v16, a3, v0.t +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: vmv4r.v v20, v28 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB306_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB306_2: +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v4, v12 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 5 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb @@ -12165,6 +12630,64 @@ define @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(